Make pyPDF2 again to the favorite pdf metadata extractor
This commit is contained in:
parent
e5c8a7ce50
commit
ec8844c7d4
|
@ -37,16 +37,15 @@ except (ImportError, RuntimeError) as e:
|
||||||
use_generic_pdf_cover = True
|
use_generic_pdf_cover = True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from PyPDF3 import PdfFileReader
|
from PyPDF2 import PdfFileReader
|
||||||
from PyPDF3 import __version__ as PyPdfVersion
|
|
||||||
use_pdf_meta = True
|
use_pdf_meta = True
|
||||||
except ImportError as ex:
|
except ImportError as ex:
|
||||||
|
log.debug('PyPDF2 is recommended for best performance in metadata extracting from pdf files: %s', ex)
|
||||||
try:
|
try:
|
||||||
from PyPDF2 import PdfFileReader
|
from PyPDF3 import PdfFileReader
|
||||||
from PyPDF2 import __version__ as PyPdfVersion
|
|
||||||
use_pdf_meta = True
|
use_pdf_meta = True
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
log.debug('Cannot import PyPDF3/PyPDF2, extracting pdf metadata will not work: %s / %s', ex, e)
|
log.debug('Cannot import PyPDF3/PyPDF2, extracting pdf metadata will not work: %s / %s', e)
|
||||||
use_pdf_meta = False
|
use_pdf_meta = False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -114,7 +113,7 @@ def parse_xmp(pdf_file):
|
||||||
try:
|
try:
|
||||||
xmp_info = pdf_file.getXmpMetadata()
|
xmp_info = pdf_file.getXmpMetadata()
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
log.debug('Can not read XMP metadata {}'.format(ex))
|
log.debug('Can not read PDF XMP metadata {}'.format(ex))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if xmp_info:
|
if xmp_info:
|
||||||
|
@ -160,6 +159,10 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
|
||||||
with open(tmp_file_path, 'rb') as f:
|
with open(tmp_file_path, 'rb') as f:
|
||||||
pdf_file = PdfFileReader(f)
|
pdf_file = PdfFileReader(f)
|
||||||
doc_info = pdf_file.getDocumentInfo()
|
doc_info = pdf_file.getDocumentInfo()
|
||||||
|
try:
|
||||||
|
doc_info = pdf_file.getDocumentInfo()
|
||||||
|
except Exception as exc:
|
||||||
|
log.debug('Can not read PDF DocumentInfo {}'.format(exc))
|
||||||
xmp_info = parse_xmp(pdf_file)
|
xmp_info = parse_xmp(pdf_file)
|
||||||
|
|
||||||
if xmp_info:
|
if xmp_info:
|
||||||
|
|
|
@ -7,7 +7,7 @@ Flask-Principal>=0.3.2,<0.5.1
|
||||||
backports_abc>=0.4
|
backports_abc>=0.4
|
||||||
Flask>=1.0.2,<2.1.0
|
Flask>=1.0.2,<2.1.0
|
||||||
iso-639>=0.4.5,<0.5.0
|
iso-639>=0.4.5,<0.5.0
|
||||||
PyPDF3>=1.0.0,<1.0.7
|
PyPDF2>=1.20,<2.11.0
|
||||||
pytz>=2016.10
|
pytz>=2016.10
|
||||||
requests>=2.11.1,<2.28.0
|
requests>=2.11.1,<2.28.0
|
||||||
SQLAlchemy>=1.3.0,<1.5.0
|
SQLAlchemy>=1.3.0,<1.5.0
|
||||||
|
|
Loading…
Reference in New Issue
Block a user