From 2d0af0ab496b84c97e1564d97eb5415f9ec6236b Mon Sep 17 00:00:00 2001 From: Wulf Rajek <40003252+AnonTester@users.noreply.github.com> Date: Tue, 5 Apr 2022 01:26:35 +0100 Subject: [PATCH 1/2] Add pubdate, publisher and identifiers metadata #2163 --- cps/comic.py | 8 +++++-- cps/constants.py | 2 +- cps/editbooks.py | 17 ++++++++++++++- cps/epub.py | 22 ++++++++++++++++++-- cps/fb2.py | 4 +++- cps/uploader.py | 54 +++++++----------------------------------------- 6 files changed, 53 insertions(+), 54 deletions(-) diff --git a/cps/comic.py b/cps/comic.py index 2549579e..8f3a6f61 100644 --- a/cps/comic.py +++ b/cps/comic.py @@ -130,7 +130,9 @@ def get_comic_info(tmp_file_path, original_file_name, original_file_extension, r series=loaded_metadata.series or "", series_id=loaded_metadata.issue or "", languages=loaded_metadata.language, - publisher="") + publisher="", + pubdate="", + identifiers=[]) return BookMeta( file_path=tmp_file_path, @@ -143,4 +145,6 @@ def get_comic_info(tmp_file_path, original_file_name, original_file_extension, r series="", series_id="", languages="", - publisher="") + publisher="", + pubdate="", + identifiers=[]) diff --git a/cps/constants.py b/cps/constants.py index f40d16b0..762336dd 100644 --- a/cps/constants.py +++ b/cps/constants.py @@ -152,7 +152,7 @@ def selected_roles(dictionary): # :rtype: BookMeta BookMeta = namedtuple('BookMeta', 'file_path, extension, title, author, cover, description, tags, series, ' - 'series_id, languages, publisher') + 'series_id, languages, publisher, pubdate, identifiers') STABLE_VERSION = {'version': '0.6.19 Beta'} diff --git a/cps/editbooks.py b/cps/editbooks.py index c07e5d24..db87d891 100755 --- a/cps/editbooks.py +++ b/cps/editbooks.py @@ -983,8 +983,13 @@ def create_book_on_upload(modify_date, meta): # combine path and normalize path from Windows systems path = os.path.join(author_dir, title_dir).replace('\\', '/') + if meta.pubdate != "": + pubdate = datetime.strptime(meta.pubdate[:10], "%Y-%m-%d") + else: + pubdate = datetime(101, 1, 1) + # Calibre adds books with utc as timezone - db_book = db.Books(title, "", sort_authors, datetime.utcnow(), datetime(101, 1, 1), + db_book = db.Books(title, "", sort_authors, datetime.utcnow(), pubdate, '1', datetime.utcnow(), path, meta.cover, db_author, [], "") modify_date |= modify_database_object(input_authors, db_book.authors, db.Authors, calibre_db.session, @@ -1017,6 +1022,16 @@ def create_book_on_upload(modify_date, meta): # flush content, get db_book.id available calibre_db.session.flush() + + # Handle identifiers now that db_book.id is available + identifier_list = [] + for type_key, type_value in meta.identifiers: + identifier_list.append(db.Identifiers(type_value, type_key, db_book.id)) + modification, warning = modify_identifiers(identifier_list, db_book.identifiers, calibre_db.session) + if warning: + flash(_("Identifiers are not Case Sensitive, Overwriting Old Identifier"), category="warning") + modify_date |= modification + return db_book, input_authors, title_dir, renamed_authors diff --git a/cps/epub.py b/cps/epub.py index 80c12c35..d358d038 100644 --- a/cps/epub.py +++ b/cps/epub.py @@ -63,13 +63,15 @@ def get_epub_info(tmp_file_path, original_file_name, original_file_extension): epub_metadata = {} - for s in ['title', 'description', 'creator', 'language', 'subject']: + for s in ['title', 'description', 'creator', 'language', 'subject', 'publisher', 'date']: tmp = p.xpath('dc:%s/text()' % s, namespaces=ns) if len(tmp) > 0: if s == 'creator': epub_metadata[s] = ' & '.join(split_authors(tmp)) elif s == 'subject': epub_metadata[s] = ', '.join(tmp) + elif s == 'date': + epub_metadata[s] = tmp[0][:10] else: epub_metadata[s] = tmp[0] else: @@ -78,6 +80,12 @@ def get_epub_info(tmp_file_path, original_file_name, original_file_extension): if epub_metadata['subject'] == 'Unknown': epub_metadata['subject'] = '' + if epub_metadata['publisher'] == u'Unknown': + epub_metadata['publisher'] = '' + + if epub_metadata['date'] == u'Unknown': + epub_metadata['date'] = '' + if epub_metadata['description'] == u'Unknown': description = tree.xpath("//*[local-name() = 'description']/text()") if len(description) > 0: @@ -92,6 +100,14 @@ def get_epub_info(tmp_file_path, original_file_name, original_file_extension): cover_file = parse_epub_cover(ns, tree, epub_zip, cover_path, tmp_file_path) + identifiers = [] + for node in p.xpath('dc:identifier', namespaces=ns): + identifier_name=node.attrib.values()[-1]; + identifier_value=node.text; + if identifier_name in ('uuid','calibre'): + continue; + identifiers.append( [identifier_name, identifier_value] ) + if not epub_metadata['title']: title = original_file_name else: @@ -108,7 +124,9 @@ def get_epub_info(tmp_file_path, original_file_name, original_file_extension): series=epub_metadata['series'].encode('utf-8').decode('utf-8'), series_id=epub_metadata['series_id'].encode('utf-8').decode('utf-8'), languages=epub_metadata['language'], - publisher="") + publisher=epub_metadata['publisher'].encode('utf-8').decode('utf-8'), + pubdate=epub_metadata['date'], + identifiers=identifiers) def parse_epub_cover(ns, tree, epub_zip, cover_path, tmp_file_path): diff --git a/cps/fb2.py b/cps/fb2.py index 21586736..c4b89fd6 100644 --- a/cps/fb2.py +++ b/cps/fb2.py @@ -77,4 +77,6 @@ def get_fb2_info(tmp_file_path, original_file_extension): series="", series_id="", languages="", - publisher="") + publisher="", + pubdate="", + identifiers=[]) diff --git a/cps/uploader.py b/cps/uploader.py index 992d188c..7a0359b3 100644 --- a/cps/uploader.py +++ b/cps/uploader.py @@ -107,52 +107,10 @@ def default_meta(tmp_file_path, original_file_name, original_file_extension): series="", series_id="", languages="", - publisher="") - - -def parse_xmp(pdf_file): - """ - Parse XMP Metadata and prepare for BookMeta object - """ - try: - xmp_info = pdf_file.getXmpMetadata() - except Exception as ex: - log.debug('Can not read XMP metadata {}'.format(ex)) - return None - - if xmp_info: - try: - xmp_author = xmp_info.dc_creator # list - except AttributeError: - xmp_author = [''] - - if xmp_info.dc_title: - xmp_title = xmp_info.dc_title['x-default'] - else: - xmp_title = '' - - if xmp_info.dc_description: - xmp_description = xmp_info.dc_description['x-default'] - else: - xmp_description = '' - - languages = [] - try: - for i in xmp_info.dc_language: - #calibre-web currently only takes one language. - languages.append(isoLanguages.get_lang3(i)) - except AttributeError: - languages.append('') - - xmp_tags = ', '.join(xmp_info.dc_subject) - xmp_publisher = ', '.join(xmp_info.dc_publisher) - - return {'author': xmp_author, - 'title': xmp_title, - 'subject': xmp_description, - 'tags': xmp_tags, 'languages': languages, - 'publisher': xmp_publisher - } + publisher="", + pubdate="", + identifiers=[] + ) def parse_xmp(pdf_file): @@ -251,7 +209,9 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension): series="", series_id="", languages=','.join(languages), - publisher=publisher) + publisher=publisher, + pubdate="", + identifiers=[]) def pdf_preview(tmp_file_path, tmp_dir): From adf6728f142378d1b3b2c81acae21d02858d6de3 Mon Sep 17 00:00:00 2001 From: Wulf Rajek <40003252+AnonTester@users.noreply.github.com> Date: Tue, 12 Apr 2022 00:22:05 +0100 Subject: [PATCH 2/2] Gracefully deal with incorrect dates --- cps/editbooks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cps/editbooks.py b/cps/editbooks.py index db87d891..b14f79e0 100755 --- a/cps/editbooks.py +++ b/cps/editbooks.py @@ -983,9 +983,9 @@ def create_book_on_upload(modify_date, meta): # combine path and normalize path from Windows systems path = os.path.join(author_dir, title_dir).replace('\\', '/') - if meta.pubdate != "": + try: pubdate = datetime.strptime(meta.pubdate[:10], "%Y-%m-%d") - else: + except: pubdate = datetime(101, 1, 1) # Calibre adds books with utc as timezone