Add PyPDF2 to vendor
This commit is contained in:
parent
2b2d485730
commit
829e926770
5
vendor/PyPDF2/__init__.py
vendored
Executable file
5
vendor/PyPDF2/__init__.py
vendored
Executable file
|
@ -0,0 +1,5 @@
|
||||||
|
from .pdf import PdfFileReader, PdfFileWriter
|
||||||
|
from .merger import PdfFileMerger
|
||||||
|
from .pagerange import PageRange, parse_filename_page_ranges
|
||||||
|
from ._version import __version__
|
||||||
|
__all__ = ["pdf", "PdfFileMerger"]
|
1
vendor/PyPDF2/_version.py
vendored
Executable file
1
vendor/PyPDF2/_version.py
vendored
Executable file
|
@ -0,0 +1 @@
|
||||||
|
__version__ = '1.26.0'
|
362
vendor/PyPDF2/filters.py
vendored
Executable file
362
vendor/PyPDF2/filters.py
vendored
Executable file
|
@ -0,0 +1,362 @@
|
||||||
|
# vim: sw=4:expandtab:foldmethod=marker
|
||||||
|
#
|
||||||
|
# Copyright (c) 2006, Mathieu Fenniak
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions are
|
||||||
|
# met:
|
||||||
|
#
|
||||||
|
# * Redistributions of source code must retain the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer in the documentation
|
||||||
|
# and/or other materials provided with the distribution.
|
||||||
|
# * The name of the author may not be used to endorse or promote products
|
||||||
|
# derived from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Implementation of stream filters for PDF.
|
||||||
|
"""
|
||||||
|
__author__ = "Mathieu Fenniak"
|
||||||
|
__author_email__ = "biziqe@mathieu.fenniak.net"
|
||||||
|
|
||||||
|
from .utils import PdfReadError, ord_, chr_
|
||||||
|
from sys import version_info
|
||||||
|
if version_info < ( 3, 0 ):
|
||||||
|
from cStringIO import StringIO
|
||||||
|
else:
|
||||||
|
from io import StringIO
|
||||||
|
import struct
|
||||||
|
|
||||||
|
try:
|
||||||
|
import zlib
|
||||||
|
|
||||||
|
def decompress(data):
|
||||||
|
return zlib.decompress(data)
|
||||||
|
|
||||||
|
def compress(data):
|
||||||
|
return zlib.compress(data)
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
# Unable to import zlib. Attempt to use the System.IO.Compression
|
||||||
|
# library from the .NET framework. (IronPython only)
|
||||||
|
import System
|
||||||
|
from System import IO, Collections, Array
|
||||||
|
|
||||||
|
def _string_to_bytearr(buf):
|
||||||
|
retval = Array.CreateInstance(System.Byte, len(buf))
|
||||||
|
for i in range(len(buf)):
|
||||||
|
retval[i] = ord(buf[i])
|
||||||
|
return retval
|
||||||
|
|
||||||
|
def _bytearr_to_string(bytes):
|
||||||
|
retval = ""
|
||||||
|
for i in range(bytes.Length):
|
||||||
|
retval += chr(bytes[i])
|
||||||
|
return retval
|
||||||
|
|
||||||
|
def _read_bytes(stream):
|
||||||
|
ms = IO.MemoryStream()
|
||||||
|
buf = Array.CreateInstance(System.Byte, 2048)
|
||||||
|
while True:
|
||||||
|
bytes = stream.Read(buf, 0, buf.Length)
|
||||||
|
if bytes == 0:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
ms.Write(buf, 0, bytes)
|
||||||
|
retval = ms.ToArray()
|
||||||
|
ms.Close()
|
||||||
|
return retval
|
||||||
|
|
||||||
|
def decompress(data):
|
||||||
|
bytes = _string_to_bytearr(data)
|
||||||
|
ms = IO.MemoryStream()
|
||||||
|
ms.Write(bytes, 0, bytes.Length)
|
||||||
|
ms.Position = 0 # fseek 0
|
||||||
|
gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
|
||||||
|
bytes = _read_bytes(gz)
|
||||||
|
retval = _bytearr_to_string(bytes)
|
||||||
|
gz.Close()
|
||||||
|
return retval
|
||||||
|
|
||||||
|
def compress(data):
|
||||||
|
bytes = _string_to_bytearr(data)
|
||||||
|
ms = IO.MemoryStream()
|
||||||
|
gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
|
||||||
|
gz.Write(bytes, 0, bytes.Length)
|
||||||
|
gz.Close()
|
||||||
|
ms.Position = 0 # fseek 0
|
||||||
|
bytes = ms.ToArray()
|
||||||
|
retval = _bytearr_to_string(bytes)
|
||||||
|
ms.Close()
|
||||||
|
return retval
|
||||||
|
|
||||||
|
|
||||||
|
class FlateDecode(object):
|
||||||
|
def decode(data, decodeParms):
|
||||||
|
data = decompress(data)
|
||||||
|
predictor = 1
|
||||||
|
if decodeParms:
|
||||||
|
try:
|
||||||
|
predictor = decodeParms.get("/Predictor", 1)
|
||||||
|
except AttributeError:
|
||||||
|
pass # usually an array with a null object was read
|
||||||
|
|
||||||
|
# predictor 1 == no predictor
|
||||||
|
if predictor != 1:
|
||||||
|
columns = decodeParms["/Columns"]
|
||||||
|
# PNG prediction:
|
||||||
|
if predictor >= 10 and predictor <= 15:
|
||||||
|
output = StringIO()
|
||||||
|
# PNG prediction can vary from row to row
|
||||||
|
rowlength = columns + 1
|
||||||
|
assert len(data) % rowlength == 0
|
||||||
|
prev_rowdata = (0,) * rowlength
|
||||||
|
for row in range(len(data) // rowlength):
|
||||||
|
rowdata = [ord_(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
|
||||||
|
filterByte = rowdata[0]
|
||||||
|
if filterByte == 0:
|
||||||
|
pass
|
||||||
|
elif filterByte == 1:
|
||||||
|
for i in range(2, rowlength):
|
||||||
|
rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
|
||||||
|
elif filterByte == 2:
|
||||||
|
for i in range(1, rowlength):
|
||||||
|
rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
|
||||||
|
else:
|
||||||
|
# unsupported PNG filter
|
||||||
|
raise PdfReadError("Unsupported PNG filter %r" % filterByte)
|
||||||
|
prev_rowdata = rowdata
|
||||||
|
output.write(''.join([chr(x) for x in rowdata[1:]]))
|
||||||
|
data = output.getvalue()
|
||||||
|
else:
|
||||||
|
# unsupported predictor
|
||||||
|
raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
|
||||||
|
return data
|
||||||
|
decode = staticmethod(decode)
|
||||||
|
|
||||||
|
def encode(data):
|
||||||
|
return compress(data)
|
||||||
|
encode = staticmethod(encode)
|
||||||
|
|
||||||
|
|
||||||
|
class ASCIIHexDecode(object):
|
||||||
|
def decode(data, decodeParms=None):
|
||||||
|
retval = ""
|
||||||
|
char = ""
|
||||||
|
x = 0
|
||||||
|
while True:
|
||||||
|
c = data[x]
|
||||||
|
if c == ">":
|
||||||
|
break
|
||||||
|
elif c.isspace():
|
||||||
|
x += 1
|
||||||
|
continue
|
||||||
|
char += c
|
||||||
|
if len(char) == 2:
|
||||||
|
retval += chr(int(char, base=16))
|
||||||
|
char = ""
|
||||||
|
x += 1
|
||||||
|
assert char == ""
|
||||||
|
return retval
|
||||||
|
decode = staticmethod(decode)
|
||||||
|
|
||||||
|
|
||||||
|
class LZWDecode(object):
|
||||||
|
"""Taken from:
|
||||||
|
http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm
|
||||||
|
"""
|
||||||
|
class decoder(object):
|
||||||
|
def __init__(self, data):
|
||||||
|
self.STOP=257
|
||||||
|
self.CLEARDICT=256
|
||||||
|
self.data=data
|
||||||
|
self.bytepos=0
|
||||||
|
self.bitpos=0
|
||||||
|
self.dict=[""]*4096
|
||||||
|
for i in range(256):
|
||||||
|
self.dict[i]=chr(i)
|
||||||
|
self.resetDict()
|
||||||
|
|
||||||
|
def resetDict(self):
|
||||||
|
self.dictlen=258
|
||||||
|
self.bitspercode=9
|
||||||
|
|
||||||
|
def nextCode(self):
|
||||||
|
fillbits=self.bitspercode
|
||||||
|
value=0
|
||||||
|
while fillbits>0 :
|
||||||
|
if self.bytepos >= len(self.data):
|
||||||
|
return -1
|
||||||
|
nextbits=ord(self.data[self.bytepos])
|
||||||
|
bitsfromhere=8-self.bitpos
|
||||||
|
if bitsfromhere>fillbits:
|
||||||
|
bitsfromhere=fillbits
|
||||||
|
value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) &
|
||||||
|
(0xff >> (8-bitsfromhere))) <<
|
||||||
|
(fillbits-bitsfromhere))
|
||||||
|
fillbits -= bitsfromhere
|
||||||
|
self.bitpos += bitsfromhere
|
||||||
|
if self.bitpos >=8:
|
||||||
|
self.bitpos=0
|
||||||
|
self.bytepos = self.bytepos+1
|
||||||
|
return value
|
||||||
|
|
||||||
|
def decode(self):
|
||||||
|
""" algorithm derived from:
|
||||||
|
http://www.rasip.fer.hr/research/compress/algorithms/fund/lz/lzw.html
|
||||||
|
and the PDFReference
|
||||||
|
"""
|
||||||
|
cW = self.CLEARDICT;
|
||||||
|
baos=""
|
||||||
|
while True:
|
||||||
|
pW = cW;
|
||||||
|
cW = self.nextCode();
|
||||||
|
if cW == -1:
|
||||||
|
raise PdfReadError("Missed the stop code in LZWDecode!")
|
||||||
|
if cW == self.STOP:
|
||||||
|
break;
|
||||||
|
elif cW == self.CLEARDICT:
|
||||||
|
self.resetDict();
|
||||||
|
elif pW == self.CLEARDICT:
|
||||||
|
baos+=self.dict[cW]
|
||||||
|
else:
|
||||||
|
if cW < self.dictlen:
|
||||||
|
baos += self.dict[cW]
|
||||||
|
p=self.dict[pW]+self.dict[cW][0]
|
||||||
|
self.dict[self.dictlen]=p
|
||||||
|
self.dictlen+=1
|
||||||
|
else:
|
||||||
|
p=self.dict[pW]+self.dict[pW][0]
|
||||||
|
baos+=p
|
||||||
|
self.dict[self.dictlen] = p;
|
||||||
|
self.dictlen+=1
|
||||||
|
if (self.dictlen >= (1 << self.bitspercode) - 1 and
|
||||||
|
self.bitspercode < 12):
|
||||||
|
self.bitspercode+=1
|
||||||
|
return baos
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def decode(data,decodeParams=None):
|
||||||
|
return LZWDecode.decoder(data).decode()
|
||||||
|
|
||||||
|
|
||||||
|
class ASCII85Decode(object):
|
||||||
|
def decode(data, decodeParms=None):
|
||||||
|
if version_info < ( 3, 0 ):
|
||||||
|
retval = ""
|
||||||
|
group = []
|
||||||
|
x = 0
|
||||||
|
hitEod = False
|
||||||
|
# remove all whitespace from data
|
||||||
|
data = [y for y in data if not (y in ' \n\r\t')]
|
||||||
|
while not hitEod:
|
||||||
|
c = data[x]
|
||||||
|
if len(retval) == 0 and c == "<" and data[x+1] == "~":
|
||||||
|
x += 2
|
||||||
|
continue
|
||||||
|
#elif c.isspace():
|
||||||
|
# x += 1
|
||||||
|
# continue
|
||||||
|
elif c == 'z':
|
||||||
|
assert len(group) == 0
|
||||||
|
retval += '\x00\x00\x00\x00'
|
||||||
|
x += 1
|
||||||
|
continue
|
||||||
|
elif c == "~" and data[x+1] == ">":
|
||||||
|
if len(group) != 0:
|
||||||
|
# cannot have a final group of just 1 char
|
||||||
|
assert len(group) > 1
|
||||||
|
cnt = len(group) - 1
|
||||||
|
group += [ 85, 85, 85 ]
|
||||||
|
hitEod = cnt
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
c = ord(c) - 33
|
||||||
|
assert c >= 0 and c < 85
|
||||||
|
group += [ c ]
|
||||||
|
if len(group) >= 5:
|
||||||
|
b = group[0] * (85**4) + \
|
||||||
|
group[1] * (85**3) + \
|
||||||
|
group[2] * (85**2) + \
|
||||||
|
group[3] * 85 + \
|
||||||
|
group[4]
|
||||||
|
assert b < (2**32 - 1)
|
||||||
|
c4 = chr((b >> 0) % 256)
|
||||||
|
c3 = chr((b >> 8) % 256)
|
||||||
|
c2 = chr((b >> 16) % 256)
|
||||||
|
c1 = chr(b >> 24)
|
||||||
|
retval += (c1 + c2 + c3 + c4)
|
||||||
|
if hitEod:
|
||||||
|
retval = retval[:-4+hitEod]
|
||||||
|
group = []
|
||||||
|
x += 1
|
||||||
|
return retval
|
||||||
|
else:
|
||||||
|
if isinstance(data, str):
|
||||||
|
data = data.encode('ascii')
|
||||||
|
n = b = 0
|
||||||
|
out = bytearray()
|
||||||
|
for c in data:
|
||||||
|
if ord('!') <= c and c <= ord('u'):
|
||||||
|
n += 1
|
||||||
|
b = b*85+(c-33)
|
||||||
|
if n == 5:
|
||||||
|
out += struct.pack(b'>L',b)
|
||||||
|
n = b = 0
|
||||||
|
elif c == ord('z'):
|
||||||
|
assert n == 0
|
||||||
|
out += b'\0\0\0\0'
|
||||||
|
elif c == ord('~'):
|
||||||
|
if n:
|
||||||
|
for _ in range(5-n):
|
||||||
|
b = b*85+84
|
||||||
|
out += struct.pack(b'>L',b)[:n-1]
|
||||||
|
break
|
||||||
|
return bytes(out)
|
||||||
|
decode = staticmethod(decode)
|
||||||
|
|
||||||
|
|
||||||
|
def decodeStreamData(stream):
|
||||||
|
from .generic import NameObject
|
||||||
|
filters = stream.get("/Filter", ())
|
||||||
|
if len(filters) and not isinstance(filters[0], NameObject):
|
||||||
|
# we have a single filter instance
|
||||||
|
filters = (filters,)
|
||||||
|
data = stream._data
|
||||||
|
# If there is not data to decode we should not try to decode the data.
|
||||||
|
if data:
|
||||||
|
for filterType in filters:
|
||||||
|
if filterType == "/FlateDecode" or filterType == "/Fl":
|
||||||
|
data = FlateDecode.decode(data, stream.get("/DecodeParms"))
|
||||||
|
elif filterType == "/ASCIIHexDecode" or filterType == "/AHx":
|
||||||
|
data = ASCIIHexDecode.decode(data)
|
||||||
|
elif filterType == "/LZWDecode" or filterType == "/LZW":
|
||||||
|
data = LZWDecode.decode(data, stream.get("/DecodeParms"))
|
||||||
|
elif filterType == "/ASCII85Decode" or filterType == "/A85":
|
||||||
|
data = ASCII85Decode.decode(data)
|
||||||
|
elif filterType == "/Crypt":
|
||||||
|
decodeParams = stream.get("/DecodeParams", {})
|
||||||
|
if "/Name" not in decodeParams and "/Type" not in decodeParams:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
|
||||||
|
else:
|
||||||
|
# unsupported filter
|
||||||
|
raise NotImplementedError("unsupported filter %s" % filterType)
|
||||||
|
return data
|
1226
vendor/PyPDF2/generic.py
vendored
Executable file
1226
vendor/PyPDF2/generic.py
vendored
Executable file
File diff suppressed because it is too large
Load Diff
553
vendor/PyPDF2/merger.py
vendored
Executable file
553
vendor/PyPDF2/merger.py
vendored
Executable file
|
@ -0,0 +1,553 @@
|
||||||
|
# vim: sw=4:expandtab:foldmethod=marker
|
||||||
|
#
|
||||||
|
# Copyright (c) 2006, Mathieu Fenniak
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions are
|
||||||
|
# met:
|
||||||
|
#
|
||||||
|
# * Redistributions of source code must retain the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer in the documentation
|
||||||
|
# and/or other materials provided with the distribution.
|
||||||
|
# * The name of the author may not be used to endorse or promote products
|
||||||
|
# derived from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
from .generic import *
|
||||||
|
from .utils import isString, str_
|
||||||
|
from .pdf import PdfFileReader, PdfFileWriter
|
||||||
|
from .pagerange import PageRange
|
||||||
|
from sys import version_info
|
||||||
|
if version_info < ( 3, 0 ):
|
||||||
|
from cStringIO import StringIO
|
||||||
|
StreamIO = StringIO
|
||||||
|
else:
|
||||||
|
from io import BytesIO
|
||||||
|
from io import FileIO as file
|
||||||
|
StreamIO = BytesIO
|
||||||
|
|
||||||
|
|
||||||
|
class _MergedPage(object):
|
||||||
|
"""
|
||||||
|
_MergedPage is used internally by PdfFileMerger to collect necessary
|
||||||
|
information on each page that is being merged.
|
||||||
|
"""
|
||||||
|
def __init__(self, pagedata, src, id):
|
||||||
|
self.src = src
|
||||||
|
self.pagedata = pagedata
|
||||||
|
self.out_pagedata = None
|
||||||
|
self.id = id
|
||||||
|
|
||||||
|
|
||||||
|
class PdfFileMerger(object):
|
||||||
|
"""
|
||||||
|
Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs
|
||||||
|
into a single PDF. It can concatenate, slice, insert, or any combination
|
||||||
|
of the above.
|
||||||
|
|
||||||
|
See the functions :meth:`merge()<merge>` (or :meth:`append()<append>`)
|
||||||
|
and :meth:`write()<write>` for usage information.
|
||||||
|
|
||||||
|
:param bool strict: Determines whether user should be warned of all
|
||||||
|
problems and also causes some correctable problems to be fatal.
|
||||||
|
Defaults to ``True``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, strict=True):
|
||||||
|
self.inputs = []
|
||||||
|
self.pages = []
|
||||||
|
self.output = PdfFileWriter()
|
||||||
|
self.bookmarks = []
|
||||||
|
self.named_dests = []
|
||||||
|
self.id_count = 0
|
||||||
|
self.strict = strict
|
||||||
|
|
||||||
|
def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
|
||||||
|
"""
|
||||||
|
Merges the pages from the given file into the output file at the
|
||||||
|
specified page number.
|
||||||
|
|
||||||
|
:param int position: The *page number* to insert this file. File will
|
||||||
|
be inserted after the given number.
|
||||||
|
|
||||||
|
:param fileobj: A File Object or an object that supports the standard read
|
||||||
|
and seek methods similar to a File Object. Could also be a
|
||||||
|
string representing a path to a PDF file.
|
||||||
|
|
||||||
|
:param str bookmark: Optionally, you may specify a bookmark to be applied at
|
||||||
|
the beginning of the included file by supplying the text of the bookmark.
|
||||||
|
|
||||||
|
:param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
|
||||||
|
to merge only the specified range of pages from the source
|
||||||
|
document into the output document.
|
||||||
|
|
||||||
|
:param bool import_bookmarks: You may prevent the source document's bookmarks
|
||||||
|
from being imported by specifying this as ``False``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# This parameter is passed to self.inputs.append and means
|
||||||
|
# that the stream used was created in this method.
|
||||||
|
my_file = False
|
||||||
|
|
||||||
|
# If the fileobj parameter is a string, assume it is a path
|
||||||
|
# and create a file object at that location. If it is a file,
|
||||||
|
# copy the file's contents into a BytesIO (or StreamIO) stream object; if
|
||||||
|
# it is a PdfFileReader, copy that reader's stream into a
|
||||||
|
# BytesIO (or StreamIO) stream.
|
||||||
|
# If fileobj is none of the above types, it is not modified
|
||||||
|
decryption_key = None
|
||||||
|
if isString(fileobj):
|
||||||
|
fileobj = file(fileobj, 'rb')
|
||||||
|
my_file = True
|
||||||
|
elif isinstance(fileobj, file):
|
||||||
|
fileobj.seek(0)
|
||||||
|
filecontent = fileobj.read()
|
||||||
|
fileobj = StreamIO(filecontent)
|
||||||
|
my_file = True
|
||||||
|
elif isinstance(fileobj, PdfFileReader):
|
||||||
|
orig_tell = fileobj.stream.tell()
|
||||||
|
fileobj.stream.seek(0)
|
||||||
|
filecontent = StreamIO(fileobj.stream.read())
|
||||||
|
fileobj.stream.seek(orig_tell) # reset the stream to its original location
|
||||||
|
fileobj = filecontent
|
||||||
|
if hasattr(fileobj, '_decryption_key'):
|
||||||
|
decryption_key = fileobj._decryption_key
|
||||||
|
my_file = True
|
||||||
|
|
||||||
|
# Create a new PdfFileReader instance using the stream
|
||||||
|
# (either file or BytesIO or StringIO) created above
|
||||||
|
pdfr = PdfFileReader(fileobj, strict=self.strict)
|
||||||
|
if decryption_key is not None:
|
||||||
|
pdfr._decryption_key = decryption_key
|
||||||
|
|
||||||
|
# Find the range of pages to merge.
|
||||||
|
if pages == None:
|
||||||
|
pages = (0, pdfr.getNumPages())
|
||||||
|
elif isinstance(pages, PageRange):
|
||||||
|
pages = pages.indices(pdfr.getNumPages())
|
||||||
|
elif not isinstance(pages, tuple):
|
||||||
|
raise TypeError('"pages" must be a tuple of (start, stop[, step])')
|
||||||
|
|
||||||
|
srcpages = []
|
||||||
|
if bookmark:
|
||||||
|
bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
|
||||||
|
|
||||||
|
outline = []
|
||||||
|
if import_bookmarks:
|
||||||
|
outline = pdfr.getOutlines()
|
||||||
|
outline = self._trim_outline(pdfr, outline, pages)
|
||||||
|
|
||||||
|
if bookmark:
|
||||||
|
self.bookmarks += [bookmark, outline]
|
||||||
|
else:
|
||||||
|
self.bookmarks += outline
|
||||||
|
|
||||||
|
dests = pdfr.namedDestinations
|
||||||
|
dests = self._trim_dests(pdfr, dests, pages)
|
||||||
|
self.named_dests += dests
|
||||||
|
|
||||||
|
# Gather all the pages that are going to be merged
|
||||||
|
for i in range(*pages):
|
||||||
|
pg = pdfr.getPage(i)
|
||||||
|
|
||||||
|
id = self.id_count
|
||||||
|
self.id_count += 1
|
||||||
|
|
||||||
|
mp = _MergedPage(pg, pdfr, id)
|
||||||
|
|
||||||
|
srcpages.append(mp)
|
||||||
|
|
||||||
|
self._associate_dests_to_pages(srcpages)
|
||||||
|
self._associate_bookmarks_to_pages(srcpages)
|
||||||
|
|
||||||
|
# Slice to insert the pages at the specified position
|
||||||
|
self.pages[position:position] = srcpages
|
||||||
|
|
||||||
|
# Keep track of our input files so we can close them later
|
||||||
|
self.inputs.append((fileobj, pdfr, my_file))
|
||||||
|
|
||||||
|
def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True):
|
||||||
|
"""
|
||||||
|
Identical to the :meth:`merge()<merge>` method, but assumes you want to concatenate
|
||||||
|
all pages onto the end of the file instead of specifying a position.
|
||||||
|
|
||||||
|
:param fileobj: A File Object or an object that supports the standard read
|
||||||
|
and seek methods similar to a File Object. Could also be a
|
||||||
|
string representing a path to a PDF file.
|
||||||
|
|
||||||
|
:param str bookmark: Optionally, you may specify a bookmark to be applied at
|
||||||
|
the beginning of the included file by supplying the text of the bookmark.
|
||||||
|
|
||||||
|
:param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
|
||||||
|
to merge only the specified range of pages from the source
|
||||||
|
document into the output document.
|
||||||
|
|
||||||
|
:param bool import_bookmarks: You may prevent the source document's bookmarks
|
||||||
|
from being imported by specifying this as ``False``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
|
||||||
|
|
||||||
|
def write(self, fileobj):
|
||||||
|
"""
|
||||||
|
Writes all data that has been merged to the given output file.
|
||||||
|
|
||||||
|
:param fileobj: Output file. Can be a filename or any kind of
|
||||||
|
file-like object.
|
||||||
|
"""
|
||||||
|
my_file = False
|
||||||
|
if isString(fileobj):
|
||||||
|
fileobj = file(fileobj, 'wb')
|
||||||
|
my_file = True
|
||||||
|
|
||||||
|
# Add pages to the PdfFileWriter
|
||||||
|
# The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13
|
||||||
|
for page in self.pages:
|
||||||
|
self.output.addPage(page.pagedata)
|
||||||
|
page.out_pagedata = self.output.getReference(self.output._pages.getObject()["/Kids"][-1].getObject())
|
||||||
|
#idnum = self.output._objects.index(self.output._pages.getObject()["/Kids"][-1].getObject()) + 1
|
||||||
|
#page.out_pagedata = IndirectObject(idnum, 0, self.output)
|
||||||
|
|
||||||
|
# Once all pages are added, create bookmarks to point at those pages
|
||||||
|
self._write_dests()
|
||||||
|
self._write_bookmarks()
|
||||||
|
|
||||||
|
# Write the output to the file
|
||||||
|
self.output.write(fileobj)
|
||||||
|
|
||||||
|
if my_file:
|
||||||
|
fileobj.close()
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
"""
|
||||||
|
Shuts all file descriptors (input and output) and clears all memory
|
||||||
|
usage.
|
||||||
|
"""
|
||||||
|
self.pages = []
|
||||||
|
for fo, pdfr, mine in self.inputs:
|
||||||
|
if mine:
|
||||||
|
fo.close()
|
||||||
|
|
||||||
|
self.inputs = []
|
||||||
|
self.output = None
|
||||||
|
|
||||||
|
def addMetadata(self, infos):
|
||||||
|
"""
|
||||||
|
Add custom metadata to the output.
|
||||||
|
|
||||||
|
:param dict infos: a Python dictionary where each key is a field
|
||||||
|
and each value is your new metadata.
|
||||||
|
Example: ``{u'/Title': u'My title'}``
|
||||||
|
"""
|
||||||
|
self.output.addMetadata(infos)
|
||||||
|
|
||||||
|
def setPageLayout(self, layout):
|
||||||
|
"""
|
||||||
|
Set the page layout
|
||||||
|
|
||||||
|
:param str layout: The page layout to be used
|
||||||
|
|
||||||
|
Valid layouts are:
|
||||||
|
/NoLayout Layout explicitly not specified
|
||||||
|
/SinglePage Show one page at a time
|
||||||
|
/OneColumn Show one column at a time
|
||||||
|
/TwoColumnLeft Show pages in two columns, odd-numbered pages on the left
|
||||||
|
/TwoColumnRight Show pages in two columns, odd-numbered pages on the right
|
||||||
|
/TwoPageLeft Show two pages at a time, odd-numbered pages on the left
|
||||||
|
/TwoPageRight Show two pages at a time, odd-numbered pages on the right
|
||||||
|
"""
|
||||||
|
self.output.setPageLayout(layout)
|
||||||
|
|
||||||
|
def setPageMode(self, mode):
|
||||||
|
"""
|
||||||
|
Set the page mode.
|
||||||
|
|
||||||
|
:param str mode: The page mode to use.
|
||||||
|
|
||||||
|
Valid modes are:
|
||||||
|
/UseNone Do not show outlines or thumbnails panels
|
||||||
|
/UseOutlines Show outlines (aka bookmarks) panel
|
||||||
|
/UseThumbs Show page thumbnails panel
|
||||||
|
/FullScreen Fullscreen view
|
||||||
|
/UseOC Show Optional Content Group (OCG) panel
|
||||||
|
/UseAttachments Show attachments panel
|
||||||
|
"""
|
||||||
|
self.output.setPageMode(mode)
|
||||||
|
|
||||||
|
def _trim_dests(self, pdf, dests, pages):
|
||||||
|
"""
|
||||||
|
Removes any named destinations that are not a part of the specified
|
||||||
|
page set.
|
||||||
|
"""
|
||||||
|
new_dests = []
|
||||||
|
prev_header_added = True
|
||||||
|
for k, o in list(dests.items()):
|
||||||
|
for j in range(*pages):
|
||||||
|
if pdf.getPage(j).getObject() == o['/Page'].getObject():
|
||||||
|
o[NameObject('/Page')] = o['/Page'].getObject()
|
||||||
|
assert str_(k) == str_(o['/Title'])
|
||||||
|
new_dests.append(o)
|
||||||
|
break
|
||||||
|
return new_dests
|
||||||
|
|
||||||
|
def _trim_outline(self, pdf, outline, pages):
|
||||||
|
"""
|
||||||
|
Removes any outline/bookmark entries that are not a part of the
|
||||||
|
specified page set.
|
||||||
|
"""
|
||||||
|
new_outline = []
|
||||||
|
prev_header_added = True
|
||||||
|
for i, o in enumerate(outline):
|
||||||
|
if isinstance(o, list):
|
||||||
|
sub = self._trim_outline(pdf, o, pages)
|
||||||
|
if sub:
|
||||||
|
if not prev_header_added:
|
||||||
|
new_outline.append(outline[i-1])
|
||||||
|
new_outline.append(sub)
|
||||||
|
else:
|
||||||
|
prev_header_added = False
|
||||||
|
for j in range(*pages):
|
||||||
|
if pdf.getPage(j).getObject() == o['/Page'].getObject():
|
||||||
|
o[NameObject('/Page')] = o['/Page'].getObject()
|
||||||
|
new_outline.append(o)
|
||||||
|
prev_header_added = True
|
||||||
|
break
|
||||||
|
return new_outline
|
||||||
|
|
||||||
|
def _write_dests(self):
|
||||||
|
dests = self.named_dests
|
||||||
|
|
||||||
|
for v in dests:
|
||||||
|
pageno = None
|
||||||
|
pdf = None
|
||||||
|
if '/Page' in v:
|
||||||
|
for i, p in enumerate(self.pages):
|
||||||
|
if p.id == v['/Page']:
|
||||||
|
v[NameObject('/Page')] = p.out_pagedata
|
||||||
|
pageno = i
|
||||||
|
pdf = p.src
|
||||||
|
break
|
||||||
|
if pageno != None:
|
||||||
|
self.output.addNamedDestinationObject(v)
|
||||||
|
|
||||||
|
def _write_bookmarks(self, bookmarks=None, parent=None):
|
||||||
|
|
||||||
|
if bookmarks == None:
|
||||||
|
bookmarks = self.bookmarks
|
||||||
|
|
||||||
|
last_added = None
|
||||||
|
for b in bookmarks:
|
||||||
|
if isinstance(b, list):
|
||||||
|
self._write_bookmarks(b, last_added)
|
||||||
|
continue
|
||||||
|
|
||||||
|
pageno = None
|
||||||
|
pdf = None
|
||||||
|
if '/Page' in b:
|
||||||
|
for i, p in enumerate(self.pages):
|
||||||
|
if p.id == b['/Page']:
|
||||||
|
#b[NameObject('/Page')] = p.out_pagedata
|
||||||
|
args = [NumberObject(p.id), NameObject(b['/Type'])]
|
||||||
|
#nothing more to add
|
||||||
|
#if b['/Type'] == '/Fit' or b['/Type'] == '/FitB'
|
||||||
|
if b['/Type'] == '/FitH' or b['/Type'] == '/FitBH':
|
||||||
|
if '/Top' in b and not isinstance(b['/Top'], NullObject):
|
||||||
|
args.append(FloatObject(b['/Top']))
|
||||||
|
else:
|
||||||
|
args.append(FloatObject(0))
|
||||||
|
del b['/Top']
|
||||||
|
elif b['/Type'] == '/FitV' or b['/Type'] == '/FitBV':
|
||||||
|
if '/Left' in b and not isinstance(b['/Left'], NullObject):
|
||||||
|
args.append(FloatObject(b['/Left']))
|
||||||
|
else:
|
||||||
|
args.append(FloatObject(0))
|
||||||
|
del b['/Left']
|
||||||
|
elif b['/Type'] == '/XYZ':
|
||||||
|
if '/Left' in b and not isinstance(b['/Left'], NullObject):
|
||||||
|
args.append(FloatObject(b['/Left']))
|
||||||
|
else:
|
||||||
|
args.append(FloatObject(0))
|
||||||
|
if '/Top' in b and not isinstance(b['/Top'], NullObject):
|
||||||
|
args.append(FloatObject(b['/Top']))
|
||||||
|
else:
|
||||||
|
args.append(FloatObject(0))
|
||||||
|
if '/Zoom' in b and not isinstance(b['/Zoom'], NullObject):
|
||||||
|
args.append(FloatObject(b['/Zoom']))
|
||||||
|
else:
|
||||||
|
args.append(FloatObject(0))
|
||||||
|
del b['/Top'], b['/Zoom'], b['/Left']
|
||||||
|
elif b['/Type'] == '/FitR':
|
||||||
|
if '/Left' in b and not isinstance(b['/Left'], NullObject):
|
||||||
|
args.append(FloatObject(b['/Left']))
|
||||||
|
else:
|
||||||
|
args.append(FloatObject(0))
|
||||||
|
if '/Bottom' in b and not isinstance(b['/Bottom'], NullObject):
|
||||||
|
args.append(FloatObject(b['/Bottom']))
|
||||||
|
else:
|
||||||
|
args.append(FloatObject(0))
|
||||||
|
if '/Right' in b and not isinstance(b['/Right'], NullObject):
|
||||||
|
args.append(FloatObject(b['/Right']))
|
||||||
|
else:
|
||||||
|
args.append(FloatObject(0))
|
||||||
|
if '/Top' in b and not isinstance(b['/Top'], NullObject):
|
||||||
|
args.append(FloatObject(b['/Top']))
|
||||||
|
else:
|
||||||
|
args.append(FloatObject(0))
|
||||||
|
del b['/Left'], b['/Right'], b['/Bottom'], b['/Top']
|
||||||
|
|
||||||
|
b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)})
|
||||||
|
|
||||||
|
pageno = i
|
||||||
|
pdf = p.src
|
||||||
|
break
|
||||||
|
if pageno != None:
|
||||||
|
del b['/Page'], b['/Type']
|
||||||
|
last_added = self.output.addBookmarkDict(b, parent)
|
||||||
|
|
||||||
|
def _associate_dests_to_pages(self, pages):
|
||||||
|
for nd in self.named_dests:
|
||||||
|
pageno = None
|
||||||
|
np = nd['/Page']
|
||||||
|
|
||||||
|
if isinstance(np, NumberObject):
|
||||||
|
continue
|
||||||
|
|
||||||
|
for p in pages:
|
||||||
|
if np.getObject() == p.pagedata.getObject():
|
||||||
|
pageno = p.id
|
||||||
|
|
||||||
|
if pageno != None:
|
||||||
|
nd[NameObject('/Page')] = NumberObject(pageno)
|
||||||
|
else:
|
||||||
|
raise ValueError("Unresolved named destination '%s'" % (nd['/Title'],))
|
||||||
|
|
||||||
|
def _associate_bookmarks_to_pages(self, pages, bookmarks=None):
|
||||||
|
if bookmarks == None:
|
||||||
|
bookmarks = self.bookmarks
|
||||||
|
|
||||||
|
for b in bookmarks:
|
||||||
|
if isinstance(b, list):
|
||||||
|
self._associate_bookmarks_to_pages(pages, b)
|
||||||
|
continue
|
||||||
|
|
||||||
|
pageno = None
|
||||||
|
bp = b['/Page']
|
||||||
|
|
||||||
|
if isinstance(bp, NumberObject):
|
||||||
|
continue
|
||||||
|
|
||||||
|
for p in pages:
|
||||||
|
if bp.getObject() == p.pagedata.getObject():
|
||||||
|
pageno = p.id
|
||||||
|
|
||||||
|
if pageno != None:
|
||||||
|
b[NameObject('/Page')] = NumberObject(pageno)
|
||||||
|
else:
|
||||||
|
raise ValueError("Unresolved bookmark '%s'" % (b['/Title'],))
|
||||||
|
|
||||||
|
def findBookmark(self, bookmark, root=None):
|
||||||
|
if root == None:
|
||||||
|
root = self.bookmarks
|
||||||
|
|
||||||
|
for i, b in enumerate(root):
|
||||||
|
if isinstance(b, list):
|
||||||
|
res = self.findBookmark(bookmark, b)
|
||||||
|
if res:
|
||||||
|
return [i] + res
|
||||||
|
elif b == bookmark or b['/Title'] == bookmark:
|
||||||
|
return [i]
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def addBookmark(self, title, pagenum, parent=None):
|
||||||
|
"""
|
||||||
|
Add a bookmark to this PDF file.
|
||||||
|
|
||||||
|
:param str title: Title to use for this bookmark.
|
||||||
|
:param int pagenum: Page number this bookmark will point to.
|
||||||
|
:param parent: A reference to a parent bookmark to create nested
|
||||||
|
bookmarks.
|
||||||
|
"""
|
||||||
|
if parent == None:
|
||||||
|
iloc = [len(self.bookmarks)-1]
|
||||||
|
elif isinstance(parent, list):
|
||||||
|
iloc = parent
|
||||||
|
else:
|
||||||
|
iloc = self.findBookmark(parent)
|
||||||
|
|
||||||
|
dest = Bookmark(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
|
||||||
|
|
||||||
|
if parent == None:
|
||||||
|
self.bookmarks.append(dest)
|
||||||
|
else:
|
||||||
|
bmparent = self.bookmarks
|
||||||
|
for i in iloc[:-1]:
|
||||||
|
bmparent = bmparent[i]
|
||||||
|
npos = iloc[-1]+1
|
||||||
|
if npos < len(bmparent) and isinstance(bmparent[npos], list):
|
||||||
|
bmparent[npos].append(dest)
|
||||||
|
else:
|
||||||
|
bmparent.insert(npos, [dest])
|
||||||
|
return dest
|
||||||
|
|
||||||
|
def addNamedDestination(self, title, pagenum):
|
||||||
|
"""
|
||||||
|
Add a destination to the output.
|
||||||
|
|
||||||
|
:param str title: Title to use
|
||||||
|
:param int pagenum: Page number this destination points at.
|
||||||
|
"""
|
||||||
|
|
||||||
|
dest = Destination(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
|
||||||
|
self.named_dests.append(dest)
|
||||||
|
|
||||||
|
|
||||||
|
class OutlinesObject(list):
|
||||||
|
def __init__(self, pdf, tree, parent=None):
|
||||||
|
list.__init__(self)
|
||||||
|
self.tree = tree
|
||||||
|
self.pdf = pdf
|
||||||
|
self.parent = parent
|
||||||
|
|
||||||
|
def remove(self, index):
|
||||||
|
obj = self[index]
|
||||||
|
del self[index]
|
||||||
|
self.tree.removeChild(obj)
|
||||||
|
|
||||||
|
def add(self, title, pagenum):
|
||||||
|
pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum]
|
||||||
|
action = DictionaryObject()
|
||||||
|
action.update({
|
||||||
|
NameObject('/D') : ArrayObject([pageRef, NameObject('/FitH'), NumberObject(826)]),
|
||||||
|
NameObject('/S') : NameObject('/GoTo')
|
||||||
|
})
|
||||||
|
actionRef = self.pdf._addObject(action)
|
||||||
|
bookmark = TreeObject()
|
||||||
|
|
||||||
|
bookmark.update({
|
||||||
|
NameObject('/A'): actionRef,
|
||||||
|
NameObject('/Title'): createStringObject(title),
|
||||||
|
})
|
||||||
|
|
||||||
|
self.pdf._addObject(bookmark)
|
||||||
|
|
||||||
|
self.tree.addChild(bookmark)
|
||||||
|
|
||||||
|
def removeAll(self):
|
||||||
|
for child in [x for x in self.tree.children()]:
|
||||||
|
self.tree.removeChild(child)
|
||||||
|
self.pop()
|
152
vendor/PyPDF2/pagerange.py
vendored
Executable file
152
vendor/PyPDF2/pagerange.py
vendored
Executable file
|
@ -0,0 +1,152 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
"""
|
||||||
|
Representation and utils for ranges of PDF file pages.
|
||||||
|
|
||||||
|
Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
|
||||||
|
All rights reserved. This software is available under a BSD license;
|
||||||
|
see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from .utils import isString
|
||||||
|
|
||||||
|
_INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0".
|
||||||
|
PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
|
||||||
|
# groups: 12 34 5 6 7 8
|
||||||
|
|
||||||
|
|
||||||
|
class ParseError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
PAGE_RANGE_HELP = """Remember, page indices start with zero.
|
||||||
|
Page range expression examples:
|
||||||
|
: all pages. -1 last page.
|
||||||
|
22 just the 23rd page. :-1 all but the last page.
|
||||||
|
0:3 the first three pages. -2 second-to-last page.
|
||||||
|
:3 the first three pages. -2: last two pages.
|
||||||
|
5: from the sixth page onward. -3:-1 third & second to last.
|
||||||
|
The third, "stride" or "step" number is also recognized.
|
||||||
|
::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.
|
||||||
|
1:10:2 1 3 5 7 9 2::-1 2 1 0.
|
||||||
|
::-1 all pages in reverse order.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class PageRange(object):
|
||||||
|
"""
|
||||||
|
A slice-like representation of a range of page indices,
|
||||||
|
i.e. page numbers, only starting at zero.
|
||||||
|
The syntax is like what you would put between brackets [ ].
|
||||||
|
The slice is one of the few Python types that can't be subclassed,
|
||||||
|
but this class converts to and from slices, and allows similar use.
|
||||||
|
o PageRange(str) parses a string representing a page range.
|
||||||
|
o PageRange(slice) directly "imports" a slice.
|
||||||
|
o to_slice() gives the equivalent slice.
|
||||||
|
o str() and repr() allow printing.
|
||||||
|
o indices(n) is like slice.indices(n).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, arg):
|
||||||
|
"""
|
||||||
|
Initialize with either a slice -- giving the equivalent page range,
|
||||||
|
or a PageRange object -- making a copy,
|
||||||
|
or a string like
|
||||||
|
"int", "[int]:[int]" or "[int]:[int]:[int]",
|
||||||
|
where the brackets indicate optional ints.
|
||||||
|
{page_range_help}
|
||||||
|
Note the difference between this notation and arguments to slice():
|
||||||
|
slice(3) means the first three pages;
|
||||||
|
PageRange("3") means the range of only the fourth page.
|
||||||
|
However PageRange(slice(3)) means the first three pages.
|
||||||
|
"""
|
||||||
|
if isinstance(arg, slice):
|
||||||
|
self._slice = arg
|
||||||
|
return
|
||||||
|
|
||||||
|
if isinstance(arg, PageRange):
|
||||||
|
self._slice = arg.to_slice()
|
||||||
|
return
|
||||||
|
|
||||||
|
m = isString(arg) and re.match(PAGE_RANGE_RE, arg)
|
||||||
|
if not m:
|
||||||
|
raise ParseError(arg)
|
||||||
|
elif m.group(2):
|
||||||
|
# Special case: just an int means a range of one page.
|
||||||
|
start = int(m.group(2))
|
||||||
|
stop = start + 1 if start != -1 else None
|
||||||
|
self._slice = slice(start, stop)
|
||||||
|
else:
|
||||||
|
self._slice = slice(*[int(g) if g else None
|
||||||
|
for g in m.group(4, 6, 8)])
|
||||||
|
|
||||||
|
# Just formatting this when there is __doc__ for __init__
|
||||||
|
if __init__.__doc__:
|
||||||
|
__init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def valid(input):
|
||||||
|
""" True if input is a valid initializer for a PageRange. """
|
||||||
|
return isinstance(input, slice) or \
|
||||||
|
isinstance(input, PageRange) or \
|
||||||
|
(isString(input)
|
||||||
|
and bool(re.match(PAGE_RANGE_RE, input)))
|
||||||
|
|
||||||
|
def to_slice(self):
|
||||||
|
""" Return the slice equivalent of this page range. """
|
||||||
|
return self._slice
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
""" A string like "1:2:3". """
|
||||||
|
s = self._slice
|
||||||
|
if s.step == None:
|
||||||
|
if s.start != None and s.stop == s.start + 1:
|
||||||
|
return str(s.start)
|
||||||
|
|
||||||
|
indices = s.start, s.stop
|
||||||
|
else:
|
||||||
|
indices = s.start, s.stop, s.step
|
||||||
|
return ':'.join("" if i == None else str(i) for i in indices)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
""" A string like "PageRange('1:2:3')". """
|
||||||
|
return "PageRange(" + repr(str(self)) + ")"
|
||||||
|
|
||||||
|
def indices(self, n):
|
||||||
|
"""
|
||||||
|
n is the length of the list of pages to choose from.
|
||||||
|
Returns arguments for range(). See help(slice.indices).
|
||||||
|
"""
|
||||||
|
return self._slice.indices(n)
|
||||||
|
|
||||||
|
|
||||||
|
PAGE_RANGE_ALL = PageRange(":") # The range of all pages.
|
||||||
|
|
||||||
|
|
||||||
|
def parse_filename_page_ranges(args):
|
||||||
|
"""
|
||||||
|
Given a list of filenames and page ranges, return a list of
|
||||||
|
(filename, page_range) pairs.
|
||||||
|
First arg must be a filename; other ags are filenames, page-range
|
||||||
|
expressions, slice objects, or PageRange objects.
|
||||||
|
A filename not followed by a page range indicates all pages of the file.
|
||||||
|
"""
|
||||||
|
pairs = []
|
||||||
|
pdf_filename = None
|
||||||
|
did_page_range = False
|
||||||
|
for arg in args + [None]:
|
||||||
|
if PageRange.valid(arg):
|
||||||
|
if not pdf_filename:
|
||||||
|
raise ValueError("The first argument must be a filename, " \
|
||||||
|
"not a page range.")
|
||||||
|
|
||||||
|
pairs.append( (pdf_filename, PageRange(arg)) )
|
||||||
|
did_page_range = True
|
||||||
|
else:
|
||||||
|
# New filename or end of list--do all of the previous file?
|
||||||
|
if pdf_filename and not did_page_range:
|
||||||
|
pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
|
||||||
|
|
||||||
|
pdf_filename = arg
|
||||||
|
did_page_range = False
|
||||||
|
return pairs
|
3004
vendor/PyPDF2/pdf.py
vendored
Executable file
3004
vendor/PyPDF2/pdf.py
vendored
Executable file
File diff suppressed because it is too large
Load Diff
295
vendor/PyPDF2/utils.py
vendored
Executable file
295
vendor/PyPDF2/utils.py
vendored
Executable file
|
@ -0,0 +1,295 @@
|
||||||
|
# Copyright (c) 2006, Mathieu Fenniak
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions are
|
||||||
|
# met:
|
||||||
|
#
|
||||||
|
# * Redistributions of source code must retain the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer in the documentation
|
||||||
|
# and/or other materials provided with the distribution.
|
||||||
|
# * The name of the author may not be used to endorse or promote products
|
||||||
|
# derived from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Utility functions for PDF library.
|
||||||
|
"""
|
||||||
|
__author__ = "Mathieu Fenniak"
|
||||||
|
__author_email__ = "biziqe@mathieu.fenniak.net"
|
||||||
|
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
try:
|
||||||
|
import __builtin__ as builtins
|
||||||
|
except ImportError: # Py3
|
||||||
|
import builtins
|
||||||
|
|
||||||
|
|
||||||
|
xrange_fn = getattr(builtins, "xrange", range)
|
||||||
|
_basestring = getattr(builtins, "basestring", str)
|
||||||
|
|
||||||
|
bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
|
||||||
|
string_type = getattr(builtins, "unicode", str)
|
||||||
|
int_types = (int, long) if sys.version_info[0] < 3 else (int,)
|
||||||
|
|
||||||
|
|
||||||
|
# Make basic type tests more consistent
|
||||||
|
def isString(s):
|
||||||
|
"""Test if arg is a string. Compatible with Python 2 and 3."""
|
||||||
|
return isinstance(s, _basestring)
|
||||||
|
|
||||||
|
|
||||||
|
def isInt(n):
|
||||||
|
"""Test if arg is an int. Compatible with Python 2 and 3."""
|
||||||
|
return isinstance(n, int_types)
|
||||||
|
|
||||||
|
|
||||||
|
def isBytes(b):
|
||||||
|
"""Test if arg is a bytes instance. Compatible with Python 2 and 3."""
|
||||||
|
return isinstance(b, bytes_type)
|
||||||
|
|
||||||
|
|
||||||
|
#custom implementation of warnings.formatwarning
|
||||||
|
def formatWarning(message, category, filename, lineno, line=None):
|
||||||
|
file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
|
||||||
|
return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
|
||||||
|
|
||||||
|
|
||||||
|
def readUntilWhitespace(stream, maxchars=None):
|
||||||
|
"""
|
||||||
|
Reads non-whitespace characters and returns them.
|
||||||
|
Stops upon encountering whitespace or when maxchars is reached.
|
||||||
|
"""
|
||||||
|
txt = b_("")
|
||||||
|
while True:
|
||||||
|
tok = stream.read(1)
|
||||||
|
if tok.isspace() or not tok:
|
||||||
|
break
|
||||||
|
txt += tok
|
||||||
|
if len(txt) == maxchars:
|
||||||
|
break
|
||||||
|
return txt
|
||||||
|
|
||||||
|
|
||||||
|
def readNonWhitespace(stream):
|
||||||
|
"""
|
||||||
|
Finds and reads the next non-whitespace character (ignores whitespace).
|
||||||
|
"""
|
||||||
|
tok = WHITESPACES[0]
|
||||||
|
while tok in WHITESPACES:
|
||||||
|
tok = stream.read(1)
|
||||||
|
return tok
|
||||||
|
|
||||||
|
|
||||||
|
def skipOverWhitespace(stream):
|
||||||
|
"""
|
||||||
|
Similar to readNonWhitespace, but returns a Boolean if more than
|
||||||
|
one whitespace character was read.
|
||||||
|
"""
|
||||||
|
tok = WHITESPACES[0]
|
||||||
|
cnt = 0;
|
||||||
|
while tok in WHITESPACES:
|
||||||
|
tok = stream.read(1)
|
||||||
|
cnt+=1
|
||||||
|
return (cnt > 1)
|
||||||
|
|
||||||
|
|
||||||
|
def skipOverComment(stream):
|
||||||
|
tok = stream.read(1)
|
||||||
|
stream.seek(-1, 1)
|
||||||
|
if tok == b_('%'):
|
||||||
|
while tok not in (b_('\n'), b_('\r')):
|
||||||
|
tok = stream.read(1)
|
||||||
|
|
||||||
|
|
||||||
|
def readUntilRegex(stream, regex, ignore_eof=False):
|
||||||
|
"""
|
||||||
|
Reads until the regular expression pattern matched (ignore the match)
|
||||||
|
Raise PdfStreamError on premature end-of-file.
|
||||||
|
:param bool ignore_eof: If true, ignore end-of-line and return immediately
|
||||||
|
"""
|
||||||
|
name = b_('')
|
||||||
|
while True:
|
||||||
|
tok = stream.read(16)
|
||||||
|
if not tok:
|
||||||
|
# stream has truncated prematurely
|
||||||
|
if ignore_eof == True:
|
||||||
|
return name
|
||||||
|
else:
|
||||||
|
raise PdfStreamError("Stream has ended unexpectedly")
|
||||||
|
m = regex.search(tok)
|
||||||
|
if m is not None:
|
||||||
|
name += tok[:m.start()]
|
||||||
|
stream.seek(m.start()-len(tok), 1)
|
||||||
|
break
|
||||||
|
name += tok
|
||||||
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
class ConvertFunctionsToVirtualList(object):
|
||||||
|
def __init__(self, lengthFunction, getFunction):
|
||||||
|
self.lengthFunction = lengthFunction
|
||||||
|
self.getFunction = getFunction
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return self.lengthFunction()
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
if isinstance(index, slice):
|
||||||
|
indices = xrange_fn(*index.indices(len(self)))
|
||||||
|
cls = type(self)
|
||||||
|
return cls(indices.__len__, lambda idx: self[indices[idx]])
|
||||||
|
if not isInt(index):
|
||||||
|
raise TypeError("sequence indices must be integers")
|
||||||
|
len_self = len(self)
|
||||||
|
if index < 0:
|
||||||
|
# support negative indexes
|
||||||
|
index = len_self + index
|
||||||
|
if index < 0 or index >= len_self:
|
||||||
|
raise IndexError("sequence index out of range")
|
||||||
|
return self.getFunction(index)
|
||||||
|
|
||||||
|
|
||||||
|
def RC4_encrypt(key, plaintext):
|
||||||
|
S = [i for i in range(256)]
|
||||||
|
j = 0
|
||||||
|
for i in range(256):
|
||||||
|
j = (j + S[i] + ord_(key[i % len(key)])) % 256
|
||||||
|
S[i], S[j] = S[j], S[i]
|
||||||
|
i, j = 0, 0
|
||||||
|
retval = b_("")
|
||||||
|
for x in range(len(plaintext)):
|
||||||
|
i = (i + 1) % 256
|
||||||
|
j = (j + S[i]) % 256
|
||||||
|
S[i], S[j] = S[j], S[i]
|
||||||
|
t = S[(S[i] + S[j]) % 256]
|
||||||
|
retval += b_(chr(ord_(plaintext[x]) ^ t))
|
||||||
|
return retval
|
||||||
|
|
||||||
|
|
||||||
|
def matrixMultiply(a, b):
|
||||||
|
return [[sum([float(i)*float(j)
|
||||||
|
for i, j in zip(row, col)]
|
||||||
|
) for col in zip(*b)]
|
||||||
|
for row in a]
|
||||||
|
|
||||||
|
|
||||||
|
def markLocation(stream):
|
||||||
|
"""Creates text file showing current location in context."""
|
||||||
|
# Mainly for debugging
|
||||||
|
RADIUS = 5000
|
||||||
|
stream.seek(-RADIUS, 1)
|
||||||
|
outputDoc = open('PyPDF2_pdfLocation.txt', 'w')
|
||||||
|
outputDoc.write(stream.read(RADIUS))
|
||||||
|
outputDoc.write('HERE')
|
||||||
|
outputDoc.write(stream.read(RADIUS))
|
||||||
|
outputDoc.close()
|
||||||
|
stream.seek(-RADIUS, 1)
|
||||||
|
|
||||||
|
|
||||||
|
class PyPdfError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class PdfReadError(PyPdfError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class PageSizeNotDefinedError(PyPdfError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class PdfReadWarning(UserWarning):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class PdfStreamError(PdfReadError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
if sys.version_info[0] < 3:
|
||||||
|
def b_(s):
|
||||||
|
return s
|
||||||
|
else:
|
||||||
|
B_CACHE = {}
|
||||||
|
|
||||||
|
def b_(s):
|
||||||
|
bc = B_CACHE
|
||||||
|
if s in bc:
|
||||||
|
return bc[s]
|
||||||
|
if type(s) == bytes:
|
||||||
|
return s
|
||||||
|
else:
|
||||||
|
r = s.encode('latin-1')
|
||||||
|
if len(s) < 2:
|
||||||
|
bc[s] = r
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
def u_(s):
|
||||||
|
if sys.version_info[0] < 3:
|
||||||
|
return unicode(s, 'unicode_escape')
|
||||||
|
else:
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
def str_(b):
|
||||||
|
if sys.version_info[0] < 3:
|
||||||
|
return b
|
||||||
|
else:
|
||||||
|
if type(b) == bytes:
|
||||||
|
return b.decode('latin-1')
|
||||||
|
else:
|
||||||
|
return b
|
||||||
|
|
||||||
|
|
||||||
|
def ord_(b):
|
||||||
|
if sys.version_info[0] < 3 or type(b) == str:
|
||||||
|
return ord(b)
|
||||||
|
else:
|
||||||
|
return b
|
||||||
|
|
||||||
|
|
||||||
|
def chr_(c):
|
||||||
|
if sys.version_info[0] < 3:
|
||||||
|
return c
|
||||||
|
else:
|
||||||
|
return chr(c)
|
||||||
|
|
||||||
|
|
||||||
|
def barray(b):
|
||||||
|
if sys.version_info[0] < 3:
|
||||||
|
return b
|
||||||
|
else:
|
||||||
|
return bytearray(b)
|
||||||
|
|
||||||
|
|
||||||
|
def hexencode(b):
|
||||||
|
if sys.version_info[0] < 3:
|
||||||
|
return b.encode('hex')
|
||||||
|
else:
|
||||||
|
import codecs
|
||||||
|
coder = codecs.getencoder('hex_codec')
|
||||||
|
return coder(b)[0]
|
||||||
|
|
||||||
|
|
||||||
|
def hexStr(num):
|
||||||
|
return hex(num).replace('L', '')
|
||||||
|
|
||||||
|
|
||||||
|
WHITESPACES = [b_(x) for x in [' ', '\n', '\r', '\t', '\x00']]
|
358
vendor/PyPDF2/xmp.py
vendored
Executable file
358
vendor/PyPDF2/xmp.py
vendored
Executable file
|
@ -0,0 +1,358 @@
|
||||||
|
import re
|
||||||
|
import datetime
|
||||||
|
import decimal
|
||||||
|
from .generic import PdfObject
|
||||||
|
from xml.dom import getDOMImplementation
|
||||||
|
from xml.dom.minidom import parseString
|
||||||
|
from .utils import u_
|
||||||
|
|
||||||
|
RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
|
||||||
|
XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/"
|
||||||
|
PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/"
|
||||||
|
XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/"
|
||||||
|
|
||||||
|
# What is the PDFX namespace, you might ask? I might ask that too. It's
|
||||||
|
# a completely undocumented namespace used to place "custom metadata"
|
||||||
|
# properties, which are arbitrary metadata properties with no semantic or
|
||||||
|
# documented meaning. Elements in the namespace are key/value-style storage,
|
||||||
|
# where the element name is the key and the content is the value. The keys
|
||||||
|
# are transformed into valid XML identifiers by substituting an invalid
|
||||||
|
# identifier character with \u2182 followed by the unicode hex ID of the
|
||||||
|
# original character. A key like "my car" is therefore "my\u21820020car".
|
||||||
|
#
|
||||||
|
# \u2182, in case you're wondering, is the unicode character
|
||||||
|
# \u{ROMAN NUMERAL TEN THOUSAND}, a straightforward and obvious choice for
|
||||||
|
# escaping characters.
|
||||||
|
#
|
||||||
|
# Intentional users of the pdfx namespace should be shot on sight. A
|
||||||
|
# custom data schema and sensical XML elements could be used instead, as is
|
||||||
|
# suggested by Adobe's own documentation on XMP (under "Extensibility of
|
||||||
|
# Schemas").
|
||||||
|
#
|
||||||
|
# Information presented here on the /pdfx/ schema is a result of limited
|
||||||
|
# reverse engineering, and does not constitute a full specification.
|
||||||
|
PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"
|
||||||
|
|
||||||
|
iso8601 = re.compile("""
|
||||||
|
(?P<year>[0-9]{4})
|
||||||
|
(-
|
||||||
|
(?P<month>[0-9]{2})
|
||||||
|
(-
|
||||||
|
(?P<day>[0-9]+)
|
||||||
|
(T
|
||||||
|
(?P<hour>[0-9]{2}):
|
||||||
|
(?P<minute>[0-9]{2})
|
||||||
|
(:(?P<second>[0-9]{2}(.[0-9]+)?))?
|
||||||
|
(?P<tzd>Z|[-+][0-9]{2}:[0-9]{2})
|
||||||
|
)?
|
||||||
|
)?
|
||||||
|
)?
|
||||||
|
""", re.VERBOSE)
|
||||||
|
|
||||||
|
|
||||||
|
class XmpInformation(PdfObject):
|
||||||
|
"""
|
||||||
|
An object that represents Adobe XMP metadata.
|
||||||
|
Usually accessed by :meth:`getXmpMetadata()<PyPDF2.PdfFileReader.getXmpMetadata>`
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, stream):
|
||||||
|
self.stream = stream
|
||||||
|
docRoot = parseString(self.stream.getData())
|
||||||
|
self.rdfRoot = docRoot.getElementsByTagNameNS(RDF_NAMESPACE, "RDF")[0]
|
||||||
|
self.cache = {}
|
||||||
|
|
||||||
|
def writeToStream(self, stream, encryption_key):
|
||||||
|
self.stream.writeToStream(stream, encryption_key)
|
||||||
|
|
||||||
|
def getElement(self, aboutUri, namespace, name):
|
||||||
|
for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
|
||||||
|
if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
|
||||||
|
attr = desc.getAttributeNodeNS(namespace, name)
|
||||||
|
if attr != None:
|
||||||
|
yield attr
|
||||||
|
for element in desc.getElementsByTagNameNS(namespace, name):
|
||||||
|
yield element
|
||||||
|
|
||||||
|
def getNodesInNamespace(self, aboutUri, namespace):
|
||||||
|
for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
|
||||||
|
if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
|
||||||
|
for i in range(desc.attributes.length):
|
||||||
|
attr = desc.attributes.item(i)
|
||||||
|
if attr.namespaceURI == namespace:
|
||||||
|
yield attr
|
||||||
|
for child in desc.childNodes:
|
||||||
|
if child.namespaceURI == namespace:
|
||||||
|
yield child
|
||||||
|
|
||||||
|
def _getText(self, element):
|
||||||
|
text = ""
|
||||||
|
for child in element.childNodes:
|
||||||
|
if child.nodeType == child.TEXT_NODE:
|
||||||
|
text += child.data
|
||||||
|
return text
|
||||||
|
|
||||||
|
def _converter_string(value):
|
||||||
|
return value
|
||||||
|
|
||||||
|
def _converter_date(value):
|
||||||
|
m = iso8601.match(value)
|
||||||
|
year = int(m.group("year"))
|
||||||
|
month = int(m.group("month") or "1")
|
||||||
|
day = int(m.group("day") or "1")
|
||||||
|
hour = int(m.group("hour") or "0")
|
||||||
|
minute = int(m.group("minute") or "0")
|
||||||
|
second = decimal.Decimal(m.group("second") or "0")
|
||||||
|
seconds = second.to_integral(decimal.ROUND_FLOOR)
|
||||||
|
milliseconds = (second - seconds) * 1000000
|
||||||
|
tzd = m.group("tzd") or "Z"
|
||||||
|
dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds)
|
||||||
|
if tzd != "Z":
|
||||||
|
tzd_hours, tzd_minutes = [int(x) for x in tzd.split(":")]
|
||||||
|
tzd_hours *= -1
|
||||||
|
if tzd_hours < 0:
|
||||||
|
tzd_minutes *= -1
|
||||||
|
dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes)
|
||||||
|
return dt
|
||||||
|
_test_converter_date = staticmethod(_converter_date)
|
||||||
|
|
||||||
|
def _getter_bag(namespace, name, converter):
|
||||||
|
def get(self):
|
||||||
|
cached = self.cache.get(namespace, {}).get(name)
|
||||||
|
if cached:
|
||||||
|
return cached
|
||||||
|
retval = []
|
||||||
|
for element in self.getElement("", namespace, name):
|
||||||
|
bags = element.getElementsByTagNameNS(RDF_NAMESPACE, "Bag")
|
||||||
|
if len(bags):
|
||||||
|
for bag in bags:
|
||||||
|
for item in bag.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
|
||||||
|
value = self._getText(item)
|
||||||
|
value = converter(value)
|
||||||
|
retval.append(value)
|
||||||
|
ns_cache = self.cache.setdefault(namespace, {})
|
||||||
|
ns_cache[name] = retval
|
||||||
|
return retval
|
||||||
|
return get
|
||||||
|
|
||||||
|
def _getter_seq(namespace, name, converter):
|
||||||
|
def get(self):
|
||||||
|
cached = self.cache.get(namespace, {}).get(name)
|
||||||
|
if cached:
|
||||||
|
return cached
|
||||||
|
retval = []
|
||||||
|
for element in self.getElement("", namespace, name):
|
||||||
|
seqs = element.getElementsByTagNameNS(RDF_NAMESPACE, "Seq")
|
||||||
|
if len(seqs):
|
||||||
|
for seq in seqs:
|
||||||
|
for item in seq.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
|
||||||
|
value = self._getText(item)
|
||||||
|
value = converter(value)
|
||||||
|
retval.append(value)
|
||||||
|
else:
|
||||||
|
value = converter(self._getText(element))
|
||||||
|
retval.append(value)
|
||||||
|
ns_cache = self.cache.setdefault(namespace, {})
|
||||||
|
ns_cache[name] = retval
|
||||||
|
return retval
|
||||||
|
return get
|
||||||
|
|
||||||
|
def _getter_langalt(namespace, name, converter):
|
||||||
|
def get(self):
|
||||||
|
cached = self.cache.get(namespace, {}).get(name)
|
||||||
|
if cached:
|
||||||
|
return cached
|
||||||
|
retval = {}
|
||||||
|
for element in self.getElement("", namespace, name):
|
||||||
|
alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt")
|
||||||
|
if len(alts):
|
||||||
|
for alt in alts:
|
||||||
|
for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
|
||||||
|
value = self._getText(item)
|
||||||
|
value = converter(value)
|
||||||
|
retval[item.getAttribute("xml:lang")] = value
|
||||||
|
else:
|
||||||
|
retval["x-default"] = converter(self._getText(element))
|
||||||
|
ns_cache = self.cache.setdefault(namespace, {})
|
||||||
|
ns_cache[name] = retval
|
||||||
|
return retval
|
||||||
|
return get
|
||||||
|
|
||||||
|
def _getter_single(namespace, name, converter):
|
||||||
|
def get(self):
|
||||||
|
cached = self.cache.get(namespace, {}).get(name)
|
||||||
|
if cached:
|
||||||
|
return cached
|
||||||
|
value = None
|
||||||
|
for element in self.getElement("", namespace, name):
|
||||||
|
if element.nodeType == element.ATTRIBUTE_NODE:
|
||||||
|
value = element.nodeValue
|
||||||
|
else:
|
||||||
|
value = self._getText(element)
|
||||||
|
break
|
||||||
|
if value != None:
|
||||||
|
value = converter(value)
|
||||||
|
ns_cache = self.cache.setdefault(namespace, {})
|
||||||
|
ns_cache[name] = value
|
||||||
|
return value
|
||||||
|
return get
|
||||||
|
|
||||||
|
dc_contributor = property(_getter_bag(DC_NAMESPACE, "contributor", _converter_string))
|
||||||
|
"""
|
||||||
|
Contributors to the resource (other than the authors). An unsorted
|
||||||
|
array of names.
|
||||||
|
"""
|
||||||
|
|
||||||
|
dc_coverage = property(_getter_single(DC_NAMESPACE, "coverage", _converter_string))
|
||||||
|
"""
|
||||||
|
Text describing the extent or scope of the resource.
|
||||||
|
"""
|
||||||
|
|
||||||
|
dc_creator = property(_getter_seq(DC_NAMESPACE, "creator", _converter_string))
|
||||||
|
"""
|
||||||
|
A sorted array of names of the authors of the resource, listed in order
|
||||||
|
of precedence.
|
||||||
|
"""
|
||||||
|
|
||||||
|
dc_date = property(_getter_seq(DC_NAMESPACE, "date", _converter_date))
|
||||||
|
"""
|
||||||
|
A sorted array of dates (datetime.datetime instances) of signifigance to
|
||||||
|
the resource. The dates and times are in UTC.
|
||||||
|
"""
|
||||||
|
|
||||||
|
dc_description = property(_getter_langalt(DC_NAMESPACE, "description", _converter_string))
|
||||||
|
"""
|
||||||
|
A language-keyed dictionary of textual descriptions of the content of the
|
||||||
|
resource.
|
||||||
|
"""
|
||||||
|
|
||||||
|
dc_format = property(_getter_single(DC_NAMESPACE, "format", _converter_string))
|
||||||
|
"""
|
||||||
|
The mime-type of the resource.
|
||||||
|
"""
|
||||||
|
|
||||||
|
dc_identifier = property(_getter_single(DC_NAMESPACE, "identifier", _converter_string))
|
||||||
|
"""
|
||||||
|
Unique identifier of the resource.
|
||||||
|
"""
|
||||||
|
|
||||||
|
dc_language = property(_getter_bag(DC_NAMESPACE, "language", _converter_string))
|
||||||
|
"""
|
||||||
|
An unordered array specifying the languages used in the resource.
|
||||||
|
"""
|
||||||
|
|
||||||
|
dc_publisher = property(_getter_bag(DC_NAMESPACE, "publisher", _converter_string))
|
||||||
|
"""
|
||||||
|
An unordered array of publisher names.
|
||||||
|
"""
|
||||||
|
|
||||||
|
dc_relation = property(_getter_bag(DC_NAMESPACE, "relation", _converter_string))
|
||||||
|
"""
|
||||||
|
An unordered array of text descriptions of relationships to other
|
||||||
|
documents.
|
||||||
|
"""
|
||||||
|
|
||||||
|
dc_rights = property(_getter_langalt(DC_NAMESPACE, "rights", _converter_string))
|
||||||
|
"""
|
||||||
|
A language-keyed dictionary of textual descriptions of the rights the
|
||||||
|
user has to this resource.
|
||||||
|
"""
|
||||||
|
|
||||||
|
dc_source = property(_getter_single(DC_NAMESPACE, "source", _converter_string))
|
||||||
|
"""
|
||||||
|
Unique identifier of the work from which this resource was derived.
|
||||||
|
"""
|
||||||
|
|
||||||
|
dc_subject = property(_getter_bag(DC_NAMESPACE, "subject", _converter_string))
|
||||||
|
"""
|
||||||
|
An unordered array of descriptive phrases or keywrods that specify the
|
||||||
|
topic of the content of the resource.
|
||||||
|
"""
|
||||||
|
|
||||||
|
dc_title = property(_getter_langalt(DC_NAMESPACE, "title", _converter_string))
|
||||||
|
"""
|
||||||
|
A language-keyed dictionary of the title of the resource.
|
||||||
|
"""
|
||||||
|
|
||||||
|
dc_type = property(_getter_bag(DC_NAMESPACE, "type", _converter_string))
|
||||||
|
"""
|
||||||
|
An unordered array of textual descriptions of the document type.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pdf_keywords = property(_getter_single(PDF_NAMESPACE, "Keywords", _converter_string))
|
||||||
|
"""
|
||||||
|
An unformatted text string representing document keywords.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pdf_pdfversion = property(_getter_single(PDF_NAMESPACE, "PDFVersion", _converter_string))
|
||||||
|
"""
|
||||||
|
The PDF file version, for example 1.0, 1.3.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pdf_producer = property(_getter_single(PDF_NAMESPACE, "Producer", _converter_string))
|
||||||
|
"""
|
||||||
|
The name of the tool that created the PDF document.
|
||||||
|
"""
|
||||||
|
|
||||||
|
xmp_createDate = property(_getter_single(XMP_NAMESPACE, "CreateDate", _converter_date))
|
||||||
|
"""
|
||||||
|
The date and time the resource was originally created. The date and
|
||||||
|
time are returned as a UTC datetime.datetime object.
|
||||||
|
"""
|
||||||
|
|
||||||
|
xmp_modifyDate = property(_getter_single(XMP_NAMESPACE, "ModifyDate", _converter_date))
|
||||||
|
"""
|
||||||
|
The date and time the resource was last modified. The date and time
|
||||||
|
are returned as a UTC datetime.datetime object.
|
||||||
|
"""
|
||||||
|
|
||||||
|
xmp_metadataDate = property(_getter_single(XMP_NAMESPACE, "MetadataDate", _converter_date))
|
||||||
|
"""
|
||||||
|
The date and time that any metadata for this resource was last
|
||||||
|
changed. The date and time are returned as a UTC datetime.datetime
|
||||||
|
object.
|
||||||
|
"""
|
||||||
|
|
||||||
|
xmp_creatorTool = property(_getter_single(XMP_NAMESPACE, "CreatorTool", _converter_string))
|
||||||
|
"""
|
||||||
|
The name of the first known tool used to create the resource.
|
||||||
|
"""
|
||||||
|
|
||||||
|
xmpmm_documentId = property(_getter_single(XMPMM_NAMESPACE, "DocumentID", _converter_string))
|
||||||
|
"""
|
||||||
|
The common identifier for all versions and renditions of this resource.
|
||||||
|
"""
|
||||||
|
|
||||||
|
xmpmm_instanceId = property(_getter_single(XMPMM_NAMESPACE, "InstanceID", _converter_string))
|
||||||
|
"""
|
||||||
|
An identifier for a specific incarnation of a document, updated each
|
||||||
|
time a file is saved.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def custom_properties(self):
|
||||||
|
if not hasattr(self, "_custom_properties"):
|
||||||
|
self._custom_properties = {}
|
||||||
|
for node in self.getNodesInNamespace("", PDFX_NAMESPACE):
|
||||||
|
key = node.localName
|
||||||
|
while True:
|
||||||
|
# see documentation about PDFX_NAMESPACE earlier in file
|
||||||
|
idx = key.find(u_("\u2182"))
|
||||||
|
if idx == -1:
|
||||||
|
break
|
||||||
|
key = key[:idx] + chr(int(key[idx+1:idx+5], base=16)) + key[idx+5:]
|
||||||
|
if node.nodeType == node.ATTRIBUTE_NODE:
|
||||||
|
value = node.nodeValue
|
||||||
|
else:
|
||||||
|
value = self._getText(node)
|
||||||
|
self._custom_properties[key] = value
|
||||||
|
return self._custom_properties
|
||||||
|
|
||||||
|
custom_properties = property(custom_properties)
|
||||||
|
"""
|
||||||
|
Retrieves custom metadata properties defined in the undocumented pdfx
|
||||||
|
metadata schema.
|
||||||
|
|
||||||
|
:return: a dictionary of key/value items for custom metadata properties.
|
||||||
|
:rtype: dict
|
||||||
|
"""
|
Loading…
Reference in New Issue
Block a user