isolanguage parsing of xmp data

This commit is contained in:
dickreckard 2020-09-11 10:49:45 +00:00
parent 22466d6b98
commit 65929c02bc

View File

@ -22,7 +22,7 @@ import hashlib
from tempfile import gettempdir from tempfile import gettempdir
from flask_babel import gettext as _ from flask_babel import gettext as _
from . import logger, comic from . import logger, comic, isoLanguages
from .constants import BookMeta from .constants import BookMeta
from .helper import split_authors from .helper import split_authors
@ -118,8 +118,9 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
doc_info = None doc_info = None
if use_pdf_meta: if use_pdf_meta:
with open(tmp_file_path, 'rb') as f: with open(tmp_file_path, 'rb') as f:
doc_info = PdfFileReader(f).getDocumentInfo() pdf_file = PdfFileReader(f)
xmp_info = PdfFileReader(f).getXmpMetadata() doc_info = pdf_file.getDocumentInfo()
xmp_info = pdf_file.getXmpMetadata()
if xmp_info: if xmp_info:
xmp_author = xmp_info.dc_creator xmp_author = xmp_info.dc_creator
if xmp_info.dc_title: if xmp_info.dc_title:
@ -130,25 +131,26 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
xmp_description = xmp_info.dc_description['x-default'] xmp_description = xmp_info.dc_description['x-default']
else: else:
xmp_description = '' xmp_description = ''
if xmp_info.dc_subject:
xmp_tags = ', '.join(xmp_info.dc_subject) xmp_tags = ', '.join(xmp_info.dc_subject)
else: xmp_language = xmp_info.dc_language[0]
xmp_tags = ''
if xmp_info.dc_language:
xmp_language = ', '.join(xmp_info.dc_language)
else:
xmp_language=''
if xmp_info.dc_publisher:
xmp_publisher = ', '.join(xmp_info.dc_publisher) xmp_publisher = ', '.join(xmp_info.dc_publisher)
else:
xmp_publisher=''
if xmp_info or doc_info: if xmp_info or doc_info:
author = xmp_author or split_authors([doc_info.author]) or u'Unknown' author = xmp_author or split_authors([doc_info.author]) or u'Unknown'
title = xmp_title or doc_info.title or original_file_name title = xmp_title or doc_info.title or original_file_name
subject = xmp_description or doc_info.subject subject = xmp_description or doc_info.subject
publisher = xmp_publisher publisher = xmp_publisher
tags = xmp_tags or doc_info['/Keywords'] tags = xmp_tags or doc_info['/Keywords']
language = xmp_language if xmp_language :
lang = xmp_language.split('-', 1)[0].lower()
if len(lang) == 2:
language = isoLanguages.get(part1=lang).name
elif len(lang) == 3:
language = isoLanguages.get(part3=lang).name
else:
language = ''
else:
language = ''
else: else:
author = u'Unknown' author = u'Unknown'
title = original_file_name title = original_file_name