Merge branch 'lemmsh-upload_more_than_pdf'

Merging PR #30
This commit is contained in:
Cervinko Cera 2016-08-07 18:47:00 +02:00
commit fee60bb21d
5 changed files with 254 additions and 27 deletions

99
cps/book_formats.py Normal file
View File

@ -0,0 +1,99 @@
__author__ = 'lemmsh'
import logging
logger = logging.getLogger("book_formats")
import uploader
import os
try:
from wand.image import Image
use_generic_pdf_cover = False
except ImportError, e:
logger.warning('cannot import Image, generating pdf covers for pdf uploads will not work: %s', e)
use_generic_pdf_cover = True
try:
from PyPDF2 import PdfFileReader
use_pdf_meta = True
except ImportError, e:
logger.warning('cannot import PyPDF2, extracting pdf metadata will not work: %s', e)
use_pdf_meta = False
try:
import epub
use_epub_meta = True
except ImportError, e:
logger.warning('cannot import epub, extracting epub metadata will not work: %s', e)
use_epub_meta = False
try:
import fb2
use_fb2_meta = True
except ImportError, e:
logger.warning('cannot import fb2, extracting fb2 metadata will not work: %s', e)
use_fb2_meta = False
def process(tmp_file_path, original_file_name, original_file_extension):
try:
if ".PDF" == original_file_extension.upper():
return pdf_meta(tmp_file_path, original_file_name, original_file_extension)
if ".EPUB" == original_file_extension.upper() and use_epub_meta == True:
return epub.get_epub_info(tmp_file_path, original_file_name, original_file_extension)
if ".FB2" == original_file_extension.upper() and use_fb2_meta == True:
return fb2.get_fb2_info(tmp_file_path, original_file_name, original_file_extension)
except Exception, e:
logger.warning('cannot parse metadata, using default: %s', e)
return default_meta(tmp_file_path, original_file_name, original_file_extension)
def default_meta(tmp_file_path, original_file_name, original_file_extension):
return uploader.BookMeta(
file_path = tmp_file_path,
extension = original_file_extension,
title = original_file_name,
author = "Unknown",
cover = None,
description = "",
tags = "",
series = "",
series_id="")
def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
if (use_pdf_meta):
pdf = PdfFileReader(open(tmp_file_path, 'rb'))
doc_info = pdf.getDocumentInfo()
else:
doc_info = None
if (doc_info is not None):
author = doc_info.author
title = doc_info.title
subject = doc_info.subject
else:
author = "Unknown"
title = original_file_name
subject = ""
return uploader.BookMeta(
file_path = tmp_file_path,
extension = original_file_extension,
title = title,
author = author,
cover = pdf_preview(tmp_file_path, original_file_name),
description = subject,
tags = "",
series = "",
series_id="")
def pdf_preview(tmp_file_path, tmp_dir):
if use_generic_pdf_cover:
return None
else:
cover_file_name = os.path.splitext(tmp_file_path)[0] + ".cover.jpg"
with Image(filename=tmp_file_path + "[0]", resolution=150) as img:
img.compression_quality = 88
img.save(filename=os.path.join(tmp_dir, cover_file_name))
return cover_file_name

67
cps/epub.py Normal file
View File

@ -0,0 +1,67 @@
import zipfile
from lxml import etree
import os
import uploader
def extractCover(zip, coverFile, tmp_file_name):
if (coverFile is None):
return None
else:
cf = zip.read("OPS/" + coverFile)
prefix = os.path.splitext(tmp_file_name)[0]
tmp_cover_name = prefix + "." + coverFile
image = open(tmp_cover_name, 'wb')
image.write(cf)
image.close()
return tmp_cover_name
def get_epub_info(tmp_file_path, original_file_name, original_file_extension):
ns = {
'n':'urn:oasis:names:tc:opendocument:xmlns:container',
'pkg':'http://www.idpf.org/2007/opf',
'dc':'http://purl.org/dc/elements/1.1/'
}
zip = zipfile.ZipFile(tmp_file_path)
txt = zip.read('META-INF/container.xml')
tree = etree.fromstring(txt)
cfname = tree.xpath('n:rootfiles/n:rootfile/@full-path',namespaces=ns)[0]
cf = zip.read(cfname)
tree = etree.fromstring(cf)
p = tree.xpath('/pkg:package/pkg:metadata',namespaces=ns)[0]
epub_metadata = {}
for s in ['title', 'description', 'creator']:
tmp = p.xpath('dc:%s/text()'%(s),namespaces=ns)
if (len(tmp) > 0):
epub_metadata[s] = p.xpath('dc:%s/text()'%(s),namespaces=ns)[0]
else:
epub_metadata[s] = "Unknown"
coversection = tree.xpath("/pkg:package/pkg:manifest/pkg:item[@id='cover']/@href",namespaces=ns)
if (len(coversection) > 0):
coverfile = extractCover(zip, coversection[0], tmp_file_path)
else:
coverfile = None
if epub_metadata['title'] is None:
title = original_file_name
else:
title = epub_metadata['title']
return uploader.BookMeta(
file_path = tmp_file_path,
extension = original_file_extension,
title = title,
author = epub_metadata['creator'],
cover = coverfile,
description = epub_metadata['description'],
tags = "",
series = "",
series_id="")

35
cps/fb2.py Normal file
View File

@ -0,0 +1,35 @@
from lxml import etree
import os
import uploader
def get_fb2_info(tmp_file_path, original_file_name, original_file_extension):
ns = {
'fb':'http://www.gribuser.ru/xml/fictionbook/2.0',
'l':'http://www.w3.org/1999/xlink',
}
fb2_file = open(tmp_file_path)
tree = etree.fromstring(fb2_file.read())
authors = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:author', namespaces=ns)
def get_author(element):
return element.xpath('fb:first-name/text()', namespaces=ns)[0] + ' ' + element.xpath('fb:middle-name/text()', namespaces=ns)[0] + ' ' + element.xpath('fb:last-name/text()', namespaces=ns)[0]
author = ", ".join(map(get_author, authors))
title = unicode(tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:book-title/text()', namespaces=ns)[0])
description = unicode(tree.xpath('/fb:FictionBook/fb:description/fb:publish-info/fb:book-name/text()', namespaces=ns)[0])
return uploader.BookMeta(
file_path = tmp_file_path,
extension = original_file_extension,
title = title,
author = author,
cover = None,
description = description,
tags = "",
series = "",
series_id="")

30
cps/uploader.py Normal file
View File

@ -0,0 +1,30 @@
import os
import hashlib
from collections import namedtuple
import book_formats
tmp_dir = "/tmp/calibre-web"
BookMeta = namedtuple('BookMeta', 'file_path, extension, title, author, cover, description, tags, series, series_id')
"""
:rtype: BookMeta
"""
def upload(file):
if not os.path.isdir(tmp_dir):
os.mkdir(tmp_dir)
filename = file.filename
filename_root, file_extension = os.path.splitext(filename)
md5 = hashlib.md5()
md5.update(filename)
tmp_file_path = os.path.join(tmp_dir, md5.hexdigest())
file.save(tmp_file_path)
meta = book_formats.process(tmp_file_path, filename_root, file_extension)
return meta

View File

@ -25,11 +25,6 @@ from sqlalchemy.sql import *
import json import json
import datetime import datetime
from uuid import uuid4 from uuid import uuid4
try:
from wand.image import Image
use_generic_pdf_cover = False
except ImportError, e:
use_generic_pdf_cover = True
from shutil import copyfile from shutil import copyfile
class ReverseProxied(object): class ReverseProxied(object):
@ -78,6 +73,9 @@ file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter) file_handler.setFormatter(formatter)
app.logger.addHandler(file_handler) app.logger.addHandler(file_handler)
app.logger.info('Starting Calibre Web...') app.logger.info('Starting Calibre Web...')
logging.getLogger("book_formats").addHandler(file_handler)
logging.getLogger("book_formats").setLevel(logging.INFO)
Principal(app) Principal(app)
@ -1123,6 +1121,9 @@ def edit_book(book_id):
else: else:
return render_template('edit_book.html', book=book, authors=author_names, cc=cc) return render_template('edit_book.html', book=book, authors=author_names, cc=cc)
import uploader
from shutil import move
@app.route("/upload", methods = ["GET", "POST"]) @app.route("/upload", methods = ["GET", "POST"])
@login_required @login_required
@upload_required @upload_required
@ -1134,20 +1135,16 @@ def upload():
db.session.connection().connection.connection.create_function('uuid4', 0, lambda : str(uuid4())) db.session.connection().connection.connection.create_function('uuid4', 0, lambda : str(uuid4()))
if request.method == 'POST' and 'btn-upload' in request.files: if request.method == 'POST' and 'btn-upload' in request.files:
file = request.files['btn-upload'] file = request.files['btn-upload']
filename = file.filename meta = uploader.upload(file)
filename_root, fileextension = os.path.splitext(filename)
if fileextension.upper() == ".PDF": title = meta.title
title = filename_root author = meta.author
author = "Unknown"
else:
flash("Upload is only available for PDF files", category="error")
return redirect(url_for('index', _external=True))
title_dir = helper.get_valid_filename(title, False) title_dir = helper.get_valid_filename(title, False)
author_dir = helper.get_valid_filename(author.decode('utf-8'), False) author_dir = helper.get_valid_filename(author.decode('utf-8'), False)
data_name = title_dir data_name = title_dir
filepath = config.DB_ROOT + "/" + author_dir + "/" + title_dir filepath = config.DB_ROOT + "/" + author_dir + "/" + title_dir
saved_filename = filepath + "/" + data_name + fileextension saved_filename = filepath + "/" + data_name + meta.extension
if not os.path.exists(filepath): if not os.path.exists(filepath):
try: try:
os.makedirs(filepath) os.makedirs(filepath)
@ -1155,21 +1152,20 @@ def upload():
flash("Failed to create path %s (Permission denied)." % filepath, category="error") flash("Failed to create path %s (Permission denied)." % filepath, category="error")
return redirect(url_for('index', _external=True)) return redirect(url_for('index', _external=True))
try: try:
file.save(saved_filename) move(meta.file_path, saved_filename)
except OSError: except OSError:
flash("Failed to store file %s (Permission denied)." % saved_filename, category="error") flash("Failed to store file %s (Permission denied)." % saved_filename, category="error")
return redirect(url_for('index', _external=True)) return redirect(url_for('index', _external=True))
file_size = os.path.getsize(saved_filename) file_size = os.path.getsize(saved_filename)
if meta.cover is None:
has_cover = 0 has_cover = 0
if fileextension.upper() == ".PDF":
if use_generic_pdf_cover:
basedir = os.path.dirname(__file__) basedir = os.path.dirname(__file__)
copyfile(os.path.join(basedir, "static/generic_cover.jpg"), os.path.join(filepath, "cover.jpg")) copyfile(os.path.join(basedir, "static/generic_cover.jpg"), os.path.join(filepath, "cover.jpg"))
else: else:
with Image(filename=saved_filename + "[0]", resolution=150) as img:
img.compression_quality = 88
img.save(filename=os.path.join(filepath, "cover.jpg"))
has_cover = 1 has_cover = 1
move(meta.cover, os.path.join(filepath, "cover.jpg"))
is_author = db.session.query(db.Authors).filter(db.Authors.name == author).first() is_author = db.session.query(db.Authors).filter(db.Authors.name == author).first()
if is_author: if is_author:
db_author = is_author db_author = is_author
@ -1179,7 +1175,7 @@ def upload():
path = os.path.join(author_dir, title_dir) path = os.path.join(author_dir, title_dir)
db_book = db.Books(title, "", "", datetime.datetime.now(), datetime.datetime(101, 01,01), 1, datetime.datetime.now(), path, has_cover, db_author, []) db_book = db.Books(title, "", "", datetime.datetime.now(), datetime.datetime(101, 01,01), 1, datetime.datetime.now(), path, has_cover, db_author, [])
db_book.authors.append(db_author) db_book.authors.append(db_author)
db_data = db.Data(db_book, fileextension.upper()[1:], file_size, data_name) db_data = db.Data(db_book, meta.extension.upper()[1:], file_size, data_name)
db_book.data.append(db_data) db_book.data.append(db_data)
db.session.add(db_book) db.session.add(db_book)