From 5167ee520eb2f6cd91acd71755e5a5e15d6ccc2d Mon Sep 17 00:00:00 2001 From: Ozzieisaacs Date: Tue, 22 Oct 2019 19:11:40 +0200 Subject: [PATCH] Solve cropped image pdf import (#1068) --- cps/uploader.py | 62 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 5 deletions(-) diff --git a/cps/uploader.py b/cps/uploader.py index b790acd1..914e6e34 100644 --- a/cps/uploader.py +++ b/cps/uploader.py @@ -20,6 +20,7 @@ from __future__ import division, print_function, unicode_literals import os import hashlib +import struct from tempfile import gettempdir from flask_babel import gettext as _ @@ -148,6 +149,23 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension): def CMYKInvert(img): return Image.merge(img.mode,[ImageOps.invert(b.convert('L')) for b in img.split()]) +def tiff_header_for_CCITT(width, height, img_size, CCITT_group=4): + tiff_header_struct = '<' + '2s' + 'h' + 'l' + 'h' + 'hhll' * 8 + 'h' + return struct.pack(tiff_header_struct, + b'II', # Byte order indication: Little indian + 42, # Version number (always 42) + 8, # Offset to first IFD + 8, # Number of tags in IFD + 256, 4, 1, width, # ImageWidth, LONG, 1, width + 257, 4, 1, height, # ImageLength, LONG, 1, lenght + 258, 3, 1, 1, # BitsPerSample, SHORT, 1, 1 + 259, 3, 1, CCITT_group, # Compression, SHORT, 1, 4 = CCITT Group 4 fax encoding + 262, 3, 1, 0, # Threshholding, SHORT, 1, 0 = WhiteIsZero + 273, 4, 1, struct.calcsize(tiff_header_struct), # StripOffsets, LONG, 1, len of header + 278, 4, 1, height, # RowsPerStrip, LONG, 1, lenght + 279, 4, 1, img_size, # StripByteCounts, LONG, 1, size of image + 0 # last IFD + ) def pdf_preview(tmp_file_path, tmp_dir): if use_generic_pdf_cover: @@ -157,12 +175,14 @@ def pdf_preview(tmp_file_path, tmp_dir): try: input1 = PdfFileReader(open(tmp_file_path, 'rb'), strict=False) page0 = input1.getPage(0) + mediaBox = page0['/MediaBox'] + box = page0['/CropBox'] if '/CropBox' in page0 else mediaBox xObject = page0['/Resources']['/XObject'].getObject() for obj in xObject: if xObject[obj]['/Subtype'] == '/Image': size = (xObject[obj]['/Width'], xObject[obj]['/Height']) - data = xObject[obj]._data # xObject[obj].getData() + data = xObject[obj]._data mode = "P" if xObject[obj]['/ColorSpace'] == '/DeviceRGB': mode = "RGB" @@ -180,20 +200,52 @@ def pdf_preview(tmp_file_path, tmp_dir): img = open(cover_file_name, "wb") img.write(data) img.close() + # Post processing + img2 = Image.open(cover_file_name) + width, height = img2.size if mode == 'CMYK': - img2 = Image.open(cover_file_name)# .convert('RGB') img2 = CMYKInvert(img2) - img2.save(cover_file_name) + img2 = img2.crop((box[0]/mediaBox[2]*width, + box[1]/mediaBox[3]*height, + box[2]/mediaBox[2]*width, + box[3]/mediaBox[3]*height)) + img2.save(cover_file_name) return cover_file_name elif xObject[obj]['/Filter'] == '/JPXDecode': cover_file_name = os.path.splitext(tmp_file_path)[0] + ".cover.jp2" img = open(cover_file_name, "wb") img.write(data) img.close() + # Post processing + img2 = Image.open(cover_file_name) + width, height = img2.size if mode == 'CMYK': - img2 = Image.open(cover_file_name)# .convert('RGB') img2 = CMYKInvert(img2) - img2.save(cover_file_name) + img2 = img2.crop((box[0]/mediaBox[2]*width, + box[1]/mediaBox[3]*height, + box[2]/mediaBox[2]*width, + box[3]/mediaBox[3]*height)) + img2.save(cover_file_name) + return cover_file_name + elif xObject[obj]['/Filter'] == '/CCITTFaxDecode': + if xObject[obj]['/DecodeParms']['/K'] == -1: + CCITT_group = 4 + else: + CCITT_group = 3 + width = xObject[obj]['/Width'] + height = xObject[obj]['/Height'] + img_size = len(data) + tiff_header = tiff_header_for_CCITT(width, height, img_size, CCITT_group) + cover_file_name_tiff = os.path.splitext(tmp_file_path)[0] + obj[1:] + '.tiff' + cover_file_name = os.path.splitext(tmp_file_path)[0] + obj[1:] + '.jpg' + img = open(cover_file_name_tiff, "wb") + img.write(tiff_header + data) + img.close() + # Post processing + img2 = Image.open(cover_file_name_tiff) + if img2.mode == '1': + img2 = ImageOps.invert(img2.convert('RGB')) + img2.save(cover_file_name) return cover_file_name else: img = Image.frombytes(mode, size, data)