Add PyPDF2 to vendor

2016-08-07 22:12:32 +02:00 · 2016-08-07 22:12:32 +02:00 · 829e926770
commit 829e926770
parent 2b2d485730
9 changed files with 5956 additions and 0 deletions
--- a/vendor/PyPDF2/init.py
+++ b/vendor/PyPDF2/init.py
@ -0,0 +1,5 @@
 from .pdf import PdfFileReader, PdfFileWriter
 from .merger import PdfFileMerger
 from .pagerange import PageRange, parse_filename_page_ranges
 from ._version import __version__
 __all__ = ["pdf", "PdfFileMerger"]
--- a/vendor/PyPDF2/_version.py
+++ b/vendor/PyPDF2/_version.py
@ -0,0 +1 @@
 __version__ = '1.26.0'
--- a/vendor/PyPDF2/filters.py
+++ b/vendor/PyPDF2/filters.py
@ -0,0 +1,362 @@
 # vim: sw=4:expandtab:foldmethod=marker
 #
 # Copyright (c) 2006, Mathieu Fenniak
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 """
 Implementation of stream filters for PDF.
 """
 __author__ = "Mathieu Fenniak"
 __author_email__ = "biziqe@mathieu.fenniak.net"
 from .utils import PdfReadError, ord_, chr_
 from sys import version_info
 if version_info < ( 3, 0 ):
    from cStringIO import StringIO
 else:
    from io import StringIO
    import struct
 try:
    import zlib
    def decompress(data):
        return zlib.decompress(data)
    def compress(data):
        return zlib.compress(data)
 except ImportError:
    # Unable to import zlib.  Attempt to use the System.IO.Compression
    # library from the .NET framework. (IronPython only)
    import System
    from System import IO, Collections, Array
    def _string_to_bytearr(buf):
        retval = Array.CreateInstance(System.Byte, len(buf))
        for i in range(len(buf)):
            retval[i] = ord(buf[i])
        return retval
    def _bytearr_to_string(bytes):
        retval = ""
        for i in range(bytes.Length):
            retval += chr(bytes[i])
        return retval
    def _read_bytes(stream):
        ms = IO.MemoryStream()
        buf = Array.CreateInstance(System.Byte, 2048)
        while True:
            bytes = stream.Read(buf, 0, buf.Length)
            if bytes == 0:
                break
            else:
                ms.Write(buf, 0, bytes)
        retval = ms.ToArray()
        ms.Close()
        return retval
    def decompress(data):
        bytes = _string_to_bytearr(data)
        ms = IO.MemoryStream()
        ms.Write(bytes, 0, bytes.Length)
        ms.Position = 0  # fseek 0
        gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
        bytes = _read_bytes(gz)
        retval = _bytearr_to_string(bytes)
        gz.Close()
        return retval
    def compress(data):
        bytes = _string_to_bytearr(data)
        ms = IO.MemoryStream()
        gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
        gz.Write(bytes, 0, bytes.Length)
        gz.Close()
        ms.Position = 0 # fseek 0
        bytes = ms.ToArray()
        retval = _bytearr_to_string(bytes)
        ms.Close()
        return retval
 class FlateDecode(object):
    def decode(data, decodeParms):
        data = decompress(data)
        predictor = 1
        if decodeParms:
            try:
                predictor = decodeParms.get("/Predictor", 1)
            except AttributeError:
                pass    # usually an array with a null object was read
        # predictor 1 == no predictor
        if predictor != 1:
            columns = decodeParms["/Columns"]
            # PNG prediction:
            if predictor >= 10 and predictor <= 15:
                output = StringIO()
                # PNG prediction can vary from row to row
                rowlength = columns + 1
                assert len(data) % rowlength == 0
                prev_rowdata = (0,) * rowlength
                for row in range(len(data) // rowlength):
                    rowdata = [ord_(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
                    filterByte = rowdata[0]
                    if filterByte == 0:
                        pass
                    elif filterByte == 1:
                        for i in range(2, rowlength):
                            rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
                    elif filterByte == 2:
                        for i in range(1, rowlength):
                            rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
                    else:
                        # unsupported PNG filter
                        raise PdfReadError("Unsupported PNG filter %r" % filterByte)
                    prev_rowdata = rowdata
                    output.write(''.join([chr(x) for x in rowdata[1:]]))
                data = output.getvalue()
            else:
                # unsupported predictor
                raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
        return data
    decode = staticmethod(decode)
    def encode(data):
        return compress(data)
    encode = staticmethod(encode)
 class ASCIIHexDecode(object):
    def decode(data, decodeParms=None):
        retval = ""
        char = ""
        x = 0
        while True:
            c = data[x]
            if c == ">":
                break
            elif c.isspace():
                x += 1
                continue
            char += c
            if len(char) == 2:
                retval += chr(int(char, base=16))
                char = ""
            x += 1
        assert char == ""
        return retval
    decode = staticmethod(decode)
 class LZWDecode(object):
    """Taken from:
    http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm
    """
    class decoder(object):
        def __init__(self, data):
            self.STOP=257
            self.CLEARDICT=256
            self.data=data
            self.bytepos=0
            self.bitpos=0
            self.dict=[""]*4096
            for i in range(256):
                self.dict[i]=chr(i)
            self.resetDict()
        def resetDict(self):
            self.dictlen=258
            self.bitspercode=9
        def nextCode(self):
            fillbits=self.bitspercode
            value=0
            while fillbits>0 :
                if self.bytepos >= len(self.data):
                    return -1
                nextbits=ord(self.data[self.bytepos])
                bitsfromhere=8-self.bitpos
                if bitsfromhere>fillbits:
                    bitsfromhere=fillbits
                value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) &
                           (0xff >> (8-bitsfromhere))) <<
                          (fillbits-bitsfromhere))
                fillbits -= bitsfromhere
                self.bitpos += bitsfromhere
                if self.bitpos >=8:
                    self.bitpos=0
                    self.bytepos = self.bytepos+1
            return value
        def decode(self):
            """ algorithm derived from:
            http://www.rasip.fer.hr/research/compress/algorithms/fund/lz/lzw.html
            and the PDFReference
            """
            cW = self.CLEARDICT;
            baos=""
            while True:
                pW = cW;
                cW = self.nextCode();
                if cW == -1:
                    raise PdfReadError("Missed the stop code in LZWDecode!")
                if cW == self.STOP:
                    break;
                elif cW == self.CLEARDICT:
                    self.resetDict();
                elif pW == self.CLEARDICT:
                    baos+=self.dict[cW]
                else:
                    if cW < self.dictlen:
                        baos += self.dict[cW]
                        p=self.dict[pW]+self.dict[cW][0]
                        self.dict[self.dictlen]=p
                        self.dictlen+=1
                    else:
                        p=self.dict[pW]+self.dict[pW][0]
                        baos+=p
                        self.dict[self.dictlen] = p;
                        self.dictlen+=1
                    if (self.dictlen >= (1 << self.bitspercode) - 1 and
                        self.bitspercode < 12):
                        self.bitspercode+=1
            return baos
    @staticmethod
    def decode(data,decodeParams=None):
        return LZWDecode.decoder(data).decode()
 class ASCII85Decode(object):
    def decode(data, decodeParms=None):
        if version_info < ( 3, 0 ):
            retval = ""
            group = []
            x = 0
            hitEod = False
            # remove all whitespace from data
            data = [y for y in data if not (y in ' \n\r\t')]
            while not hitEod:
                c = data[x]
                if len(retval) == 0 and c == "<" and data[x+1] == "~":
                    x += 2
                    continue
                #elif c.isspace():
                #    x += 1
                #    continue
                elif c == 'z':
                    assert len(group) == 0
                    retval += '\x00\x00\x00\x00'
                    x += 1
                    continue
                elif c == "~" and data[x+1] == ">":
                    if len(group) != 0:
                        # cannot have a final group of just 1 char
                        assert len(group) > 1
                        cnt = len(group) - 1
                        group += [ 85, 85, 85 ]
                        hitEod = cnt
                    else:
                        break
                else:
                    c = ord(c) - 33
                    assert c >= 0 and c < 85
                    group += [ c ]
                if len(group) >= 5:
                    b = group[0] * (85**4) + \
                        group[1] * (85**3) + \
                        group[2] * (85**2) + \
                        group[3] * 85 + \
                        group[4]
                    assert b < (2**32 - 1)
                    c4 = chr((b >> 0) % 256)
                    c3 = chr((b >> 8) % 256)
                    c2 = chr((b >> 16) % 256)
                    c1 = chr(b >> 24)
                    retval += (c1 + c2 + c3 + c4)
                    if hitEod:
                        retval = retval[:-4+hitEod]
                    group = []
                x += 1
            return retval
        else:
            if isinstance(data, str):
                data = data.encode('ascii')
            n = b = 0
            out = bytearray()
            for c in data:
                if ord('!') <= c and c <= ord('u'):
                    n += 1
                    b = b*85+(c-33)
                    if n == 5:
                        out += struct.pack(b'>L',b)
                        n = b = 0
                elif c == ord('z'):
                    assert n == 0
                    out += b'\0\0\0\0'
                elif c == ord('~'):
                    if n:
                        for _ in range(5-n):
                            b = b*85+84
                        out += struct.pack(b'>L',b)[:n-1]
                    break
            return bytes(out)
    decode = staticmethod(decode)
 def decodeStreamData(stream):
    from .generic import NameObject
    filters = stream.get("/Filter", ())
    if len(filters) and not isinstance(filters[0], NameObject):
        # we have a single filter instance
        filters = (filters,)
    data = stream._data
    # If there is not data to decode we should not try to decode the data.
    if data:
        for filterType in filters:
            if filterType == "/FlateDecode" or filterType == "/Fl":
                data = FlateDecode.decode(data, stream.get("/DecodeParms"))
            elif filterType == "/ASCIIHexDecode" or filterType == "/AHx":
                data = ASCIIHexDecode.decode(data)
            elif filterType == "/LZWDecode" or filterType == "/LZW":
                data = LZWDecode.decode(data, stream.get("/DecodeParms"))
            elif filterType == "/ASCII85Decode" or filterType == "/A85":
                data = ASCII85Decode.decode(data)
            elif filterType == "/Crypt":
                decodeParams = stream.get("/DecodeParams", {})
                if "/Name" not in decodeParams and "/Type" not in decodeParams:
                    pass
                else:
                    raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
            else:
                # unsupported filter
                raise NotImplementedError("unsupported filter %s" % filterType)
    return data
--- a/vendor/PyPDF2/generic.py
+++ b/vendor/PyPDF2/generic.py
--- a/vendor/PyPDF2/merger.py
+++ b/vendor/PyPDF2/merger.py
@ -0,0 +1,553 @@
 # vim: sw=4:expandtab:foldmethod=marker
 #
 # Copyright (c) 2006, Mathieu Fenniak
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 from .generic import *
 from .utils import isString, str_
 from .pdf import PdfFileReader, PdfFileWriter
 from .pagerange import PageRange
 from sys import version_info
 if version_info < ( 3, 0 ):
    from cStringIO import StringIO
    StreamIO = StringIO
 else:
    from io import BytesIO
    from io import FileIO as file
    StreamIO = BytesIO
 class _MergedPage(object):
    """
    _MergedPage is used internally by PdfFileMerger to collect necessary
    information on each page that is being merged.
    """
    def __init__(self, pagedata, src, id):
        self.src = src
        self.pagedata = pagedata
        self.out_pagedata = None
        self.id = id
 class PdfFileMerger(object):
    """
    Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs
    into a single PDF. It can concatenate, slice, insert, or any combination
    of the above.
    See the functions :meth:`merge()<merge>` (or :meth:`append()<append>`)
    and :meth:`write()<write>` for usage information.
    :param bool strict: Determines whether user should be warned of all
            problems and also causes some correctable problems to be fatal.
            Defaults to ``True``.
    """
    def __init__(self, strict=True):
        self.inputs = []
        self.pages = []
        self.output = PdfFileWriter()
        self.bookmarks = []
        self.named_dests = []
        self.id_count = 0
        self.strict = strict
    def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
        """
        Merges the pages from the given file into the output file at the
        specified page number.
        :param int position: The *page number* to insert this file. File will
            be inserted after the given number.
        :param fileobj: A File Object or an object that supports the standard read
            and seek methods similar to a File Object. Could also be a
            string representing a path to a PDF file.
        :param str bookmark: Optionally, you may specify a bookmark to be applied at
            the beginning of the included file by supplying the text of the bookmark.
        :param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
            to merge only the specified range of pages from the source
            document into the output document.
        :param bool import_bookmarks: You may prevent the source document's bookmarks
            from being imported by specifying this as ``False``.
        """
        # This parameter is passed to self.inputs.append and means
        # that the stream used was created in this method.
        my_file = False
        # If the fileobj parameter is a string, assume it is a path
        # and create a file object at that location. If it is a file,
        # copy the file's contents into a BytesIO (or StreamIO) stream object; if
        # it is a PdfFileReader, copy that reader's stream into a
        # BytesIO (or StreamIO) stream.
        # If fileobj is none of the above types, it is not modified
        decryption_key = None
        if isString(fileobj):
            fileobj = file(fileobj, 'rb')
            my_file = True
        elif isinstance(fileobj, file):
            fileobj.seek(0)
            filecontent = fileobj.read()
            fileobj = StreamIO(filecontent)
            my_file = True
        elif isinstance(fileobj, PdfFileReader):
            orig_tell = fileobj.stream.tell()
            fileobj.stream.seek(0)
            filecontent = StreamIO(fileobj.stream.read())
            fileobj.stream.seek(orig_tell) # reset the stream to its original location
            fileobj = filecontent
            if hasattr(fileobj, '_decryption_key'):
                decryption_key = fileobj._decryption_key
            my_file = True
        # Create a new PdfFileReader instance using the stream
        # (either file or BytesIO or StringIO) created above
        pdfr = PdfFileReader(fileobj, strict=self.strict)
        if decryption_key is not None:
            pdfr._decryption_key = decryption_key
        # Find the range of pages to merge.
        if pages == None:
            pages = (0, pdfr.getNumPages())
        elif isinstance(pages, PageRange):
            pages = pages.indices(pdfr.getNumPages())
        elif not isinstance(pages, tuple):
            raise TypeError('"pages" must be a tuple of (start, stop[, step])')
        srcpages = []
        if bookmark:
            bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
        outline = []
        if import_bookmarks:
            outline = pdfr.getOutlines()
            outline = self._trim_outline(pdfr, outline, pages)
        if bookmark:
            self.bookmarks += [bookmark, outline]
        else:
            self.bookmarks += outline
        dests = pdfr.namedDestinations
        dests = self._trim_dests(pdfr, dests, pages)
        self.named_dests += dests
        # Gather all the pages that are going to be merged
        for i in range(*pages):
            pg = pdfr.getPage(i)
            id = self.id_count
            self.id_count += 1
            mp = _MergedPage(pg, pdfr, id)
            srcpages.append(mp)
        self._associate_dests_to_pages(srcpages)
        self._associate_bookmarks_to_pages(srcpages)
        # Slice to insert the pages at the specified position
        self.pages[position:position] = srcpages
        # Keep track of our input files so we can close them later
        self.inputs.append((fileobj, pdfr, my_file))
    def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True):
        """
        Identical to the :meth:`merge()<merge>` method, but assumes you want to concatenate
        all pages onto the end of the file instead of specifying a position.
        :param fileobj: A File Object or an object that supports the standard read
            and seek methods similar to a File Object. Could also be a
            string representing a path to a PDF file.
        :param str bookmark: Optionally, you may specify a bookmark to be applied at
            the beginning of the included file by supplying the text of the bookmark.
        :param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
            to merge only the specified range of pages from the source
            document into the output document.
        :param bool import_bookmarks: You may prevent the source document's bookmarks
            from being imported by specifying this as ``False``.
        """
        self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
    def write(self, fileobj):
        """
        Writes all data that has been merged to the given output file.
        :param fileobj: Output file. Can be a filename or any kind of
            file-like object.
        """
        my_file = False
        if isString(fileobj):
            fileobj = file(fileobj, 'wb')
            my_file = True
        # Add pages to the PdfFileWriter
        # The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13
        for page in self.pages:
            self.output.addPage(page.pagedata)
            page.out_pagedata = self.output.getReference(self.output._pages.getObject()["/Kids"][-1].getObject())
            #idnum = self.output._objects.index(self.output._pages.getObject()["/Kids"][-1].getObject()) + 1
            #page.out_pagedata = IndirectObject(idnum, 0, self.output)
        # Once all pages are added, create bookmarks to point at those pages
        self._write_dests()
        self._write_bookmarks()
        # Write the output to the file
        self.output.write(fileobj)
        if my_file:
            fileobj.close()
    def close(self):
        """
        Shuts all file descriptors (input and output) and clears all memory
        usage.
        """
        self.pages = []
        for fo, pdfr, mine in self.inputs:
            if mine:
                fo.close()
        self.inputs = []
        self.output = None
    def addMetadata(self, infos):
        """
        Add custom metadata to the output.
        :param dict infos: a Python dictionary where each key is a field
            and each value is your new metadata.
            Example: ``{u'/Title': u'My title'}``
        """
        self.output.addMetadata(infos)
    def setPageLayout(self, layout):
        """
        Set the page layout
        :param str layout: The page layout to be used
        Valid layouts are:
             /NoLayout        Layout explicitly not specified
             /SinglePage      Show one page at a time
             /OneColumn       Show one column at a time
             /TwoColumnLeft   Show pages in two columns, odd-numbered pages on the left
             /TwoColumnRight  Show pages in two columns, odd-numbered pages on the right
             /TwoPageLeft     Show two pages at a time, odd-numbered pages on the left
             /TwoPageRight    Show two pages at a time, odd-numbered pages on the right
        """
        self.output.setPageLayout(layout)
    def setPageMode(self, mode):
        """
        Set the page mode.
        :param str mode: The page mode to use.
        Valid modes are:
            /UseNone         Do not show outlines or thumbnails panels
            /UseOutlines     Show outlines (aka bookmarks) panel
            /UseThumbs       Show page thumbnails panel
            /FullScreen      Fullscreen view
            /UseOC           Show Optional Content Group (OCG) panel
            /UseAttachments  Show attachments panel
        """
        self.output.setPageMode(mode)
    def _trim_dests(self, pdf, dests, pages):
        """
        Removes any named destinations that are not a part of the specified
        page set.
        """
        new_dests = []
        prev_header_added = True
        for k, o in list(dests.items()):
            for j in range(*pages):
                if pdf.getPage(j).getObject() == o['/Page'].getObject():
                    o[NameObject('/Page')] = o['/Page'].getObject()
                    assert str_(k) == str_(o['/Title'])
                    new_dests.append(o)
                    break
        return new_dests
    def _trim_outline(self, pdf, outline, pages):
        """
        Removes any outline/bookmark entries that are not a part of the
        specified page set.
        """
        new_outline = []
        prev_header_added = True
        for i, o in enumerate(outline):
            if isinstance(o, list):
                sub = self._trim_outline(pdf, o, pages)
                if sub:
                    if not prev_header_added:
                        new_outline.append(outline[i-1])
                    new_outline.append(sub)
            else:
                prev_header_added = False
                for j in range(*pages):
                    if pdf.getPage(j).getObject() == o['/Page'].getObject():
                        o[NameObject('/Page')] = o['/Page'].getObject()
                        new_outline.append(o)
                        prev_header_added = True
                        break
        return new_outline
    def _write_dests(self):
        dests = self.named_dests
        for v in dests:
            pageno = None
            pdf = None
            if '/Page' in v:
                for i, p in enumerate(self.pages):
                    if p.id == v['/Page']:
                        v[NameObject('/Page')] = p.out_pagedata
                        pageno = i
                        pdf = p.src
                        break
            if pageno != None:
                self.output.addNamedDestinationObject(v)
    def _write_bookmarks(self, bookmarks=None, parent=None):
        if bookmarks == None:
            bookmarks = self.bookmarks
        last_added = None
        for b in bookmarks:
            if isinstance(b, list):
                self._write_bookmarks(b, last_added)
                continue
            pageno = None
            pdf = None
            if '/Page' in b:
                for i, p in enumerate(self.pages):
                    if p.id == b['/Page']:
                        #b[NameObject('/Page')] = p.out_pagedata
                        args = [NumberObject(p.id), NameObject(b['/Type'])]
                        #nothing more to add
                        #if b['/Type'] == '/Fit' or b['/Type'] == '/FitB'
                        if b['/Type'] == '/FitH' or b['/Type'] == '/FitBH':
                            if '/Top' in b and not isinstance(b['/Top'], NullObject):
                                args.append(FloatObject(b['/Top']))
                            else:
                                args.append(FloatObject(0))
                            del b['/Top']
                        elif b['/Type'] == '/FitV' or b['/Type'] == '/FitBV':
                            if '/Left' in b and not isinstance(b['/Left'], NullObject):
                                args.append(FloatObject(b['/Left']))
                            else:
                                args.append(FloatObject(0))
                            del b['/Left']
                        elif b['/Type'] == '/XYZ':
                            if '/Left' in b and not isinstance(b['/Left'], NullObject):
                                args.append(FloatObject(b['/Left']))
                            else:
                                args.append(FloatObject(0))
                            if '/Top' in b and not isinstance(b['/Top'], NullObject):
                                args.append(FloatObject(b['/Top']))
                            else:
                                args.append(FloatObject(0))
                            if '/Zoom' in b and not isinstance(b['/Zoom'], NullObject):
                                args.append(FloatObject(b['/Zoom']))
                            else:
                                args.append(FloatObject(0))
                            del b['/Top'], b['/Zoom'], b['/Left']
                        elif b['/Type'] == '/FitR':
                            if '/Left' in b and not isinstance(b['/Left'], NullObject):
                                args.append(FloatObject(b['/Left']))
                            else:
                                args.append(FloatObject(0))
                            if '/Bottom' in b and not isinstance(b['/Bottom'], NullObject):
                                args.append(FloatObject(b['/Bottom']))
                            else:
                                args.append(FloatObject(0))
                            if '/Right' in b and not isinstance(b['/Right'], NullObject):
                                args.append(FloatObject(b['/Right']))
                            else:
                                args.append(FloatObject(0))
                            if '/Top' in b and not isinstance(b['/Top'], NullObject):
                                args.append(FloatObject(b['/Top']))
                            else:
                                args.append(FloatObject(0))
                            del b['/Left'], b['/Right'], b['/Bottom'], b['/Top']
                        b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)})
                        pageno = i
                        pdf = p.src
                        break
            if pageno != None:
                del b['/Page'], b['/Type']
                last_added = self.output.addBookmarkDict(b, parent)
    def _associate_dests_to_pages(self, pages):
        for nd in self.named_dests:
            pageno = None
            np = nd['/Page']
            if isinstance(np, NumberObject):
                continue
            for p in pages:
                if np.getObject() == p.pagedata.getObject():
                    pageno = p.id
            if pageno != None:
                nd[NameObject('/Page')] = NumberObject(pageno)
            else:
                raise ValueError("Unresolved named destination '%s'" % (nd['/Title'],))
    def _associate_bookmarks_to_pages(self, pages, bookmarks=None):
        if bookmarks == None:
            bookmarks = self.bookmarks
        for b in bookmarks:
            if isinstance(b, list):
                self._associate_bookmarks_to_pages(pages, b)
                continue
            pageno = None
            bp = b['/Page']
            if isinstance(bp, NumberObject):
                continue
            for p in pages:
                if bp.getObject() == p.pagedata.getObject():
                    pageno = p.id
            if pageno != None:
                b[NameObject('/Page')] = NumberObject(pageno)
            else:
                raise ValueError("Unresolved bookmark '%s'" % (b['/Title'],))
    def findBookmark(self, bookmark, root=None):
        if root == None:
            root = self.bookmarks
        for i, b in enumerate(root):
            if isinstance(b, list):
                res = self.findBookmark(bookmark, b)
                if res:
                    return [i] + res
            elif b == bookmark or b['/Title'] == bookmark:
                return [i]
        return None
    def addBookmark(self, title, pagenum, parent=None):
        """
        Add a bookmark to this PDF file.
        :param str title: Title to use for this bookmark.
        :param int pagenum: Page number this bookmark will point to.
        :param parent: A reference to a parent bookmark to create nested
            bookmarks.
        """
        if parent == None:
            iloc = [len(self.bookmarks)-1]
        elif isinstance(parent, list):
            iloc = parent
        else:
            iloc = self.findBookmark(parent)
        dest = Bookmark(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
        if parent == None:
            self.bookmarks.append(dest)
        else:
            bmparent = self.bookmarks
            for i in iloc[:-1]:
                bmparent = bmparent[i]
            npos = iloc[-1]+1
            if npos < len(bmparent) and isinstance(bmparent[npos], list):
                bmparent[npos].append(dest)
            else:
                bmparent.insert(npos, [dest])
        return dest
    def addNamedDestination(self, title, pagenum):
        """
        Add a destination to the output.
        :param str title: Title to use
        :param int pagenum: Page number this destination points at.
        """
        dest = Destination(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
        self.named_dests.append(dest)
 class OutlinesObject(list):
    def __init__(self, pdf, tree, parent=None):
        list.__init__(self)
        self.tree = tree
        self.pdf = pdf
        self.parent = parent
    def remove(self, index):
        obj = self[index]
        del self[index]
        self.tree.removeChild(obj)
    def add(self, title, pagenum):
        pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum]
        action = DictionaryObject()
        action.update({
            NameObject('/D') : ArrayObject([pageRef, NameObject('/FitH'), NumberObject(826)]),
            NameObject('/S') : NameObject('/GoTo')
        })
        actionRef = self.pdf._addObject(action)
        bookmark = TreeObject()
        bookmark.update({
            NameObject('/A'): actionRef,
            NameObject('/Title'): createStringObject(title),
        })
        self.pdf._addObject(bookmark)
        self.tree.addChild(bookmark)
    def removeAll(self):
        for child in [x for x in self.tree.children()]:
            self.tree.removeChild(child)
            self.pop()
--- a/vendor/PyPDF2/pagerange.py
+++ b/vendor/PyPDF2/pagerange.py
@ -0,0 +1,152 @@
 #!/usr/bin/env python
 """
 Representation and utils for ranges of PDF file pages.
 Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
 All rights reserved. This software is available under a BSD license;
 see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE
 """
 import re
 from .utils import isString
 _INT_RE = r"(0|-?[1-9]\d*)"  # A decimal int, don't allow "-0".
 PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
 # groups:         12     34     5 6     7 8
 class ParseError(Exception):
    pass
 PAGE_RANGE_HELP = """Remember, page indices start with zero.
        Page range expression examples:
            :     all pages.                   -1    last page.
            22    just the 23rd page.          :-1   all but the last page.
            0:3   the first three pages.       -2    second-to-last page.
            :3    the first three pages.       -2:   last two pages.
            5:    from the sixth page onward.  -3:-1 third & second to last.
        The third, "stride" or "step" number is also recognized.
            ::2       0 2 4 ... to the end.    3:0:-1    3 2 1 but not 0.
            1:10:2    1 3 5 7 9                2::-1     2 1 0.
            ::-1      all pages in reverse order.
 """
 class PageRange(object):
    """
    A slice-like representation of a range of page indices,
        i.e. page numbers, only starting at zero.
    The syntax is like what you would put between brackets [ ].
    The slice is one of the few Python types that can't be subclassed,
    but this class converts to and from slices, and allows similar use.
      o  PageRange(str) parses a string representing a page range.
      o  PageRange(slice) directly "imports" a slice.
      o  to_slice() gives the equivalent slice.
      o  str() and repr() allow printing.
      o  indices(n) is like slice.indices(n).
    """
    def __init__(self, arg):
        """
        Initialize with either a slice -- giving the equivalent page range,
        or a PageRange object -- making a copy,
        or a string like
            "int", "[int]:[int]" or "[int]:[int]:[int]",
            where the brackets indicate optional ints.
        {page_range_help}
        Note the difference between this notation and arguments to slice():
            slice(3) means the first three pages;
            PageRange("3") means the range of only the fourth page.
            However PageRange(slice(3)) means the first three pages.
        """
        if isinstance(arg, slice):
            self._slice = arg
            return
        if isinstance(arg, PageRange):
            self._slice = arg.to_slice()
            return
        m = isString(arg) and re.match(PAGE_RANGE_RE, arg)
        if not m:
            raise ParseError(arg)
        elif m.group(2):
            # Special case: just an int means a range of one page.
            start = int(m.group(2))
            stop = start + 1 if start != -1 else None
            self._slice = slice(start, stop)
        else:
            self._slice = slice(*[int(g) if g else None
                                  for g in m.group(4, 6, 8)])
    # Just formatting this when there is __doc__ for __init__
    if __init__.__doc__:
        __init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP)
    @staticmethod
    def valid(input):
        """ True if input is a valid initializer for a PageRange. """
        return isinstance(input, slice)  or \
               isinstance(input, PageRange) or \
               (isString(input)
                and bool(re.match(PAGE_RANGE_RE, input)))
    def to_slice(self):
        """ Return the slice equivalent of this page range. """
        return self._slice
    def __str__(self):
        """ A string like "1:2:3". """
        s = self._slice
        if s.step == None:
            if s.start != None  and  s.stop == s.start + 1:
                return str(s.start)
            indices = s.start, s.stop
        else:
            indices = s.start, s.stop, s.step
        return ':'.join("" if i == None else str(i) for i in indices)
    def __repr__(self):
        """ A string like "PageRange('1:2:3')". """
        return "PageRange(" + repr(str(self)) + ")"
    def indices(self, n):
        """
        n is the length of the list of pages to choose from.
        Returns arguments for range().  See help(slice.indices).
        """
        return self._slice.indices(n)
 PAGE_RANGE_ALL = PageRange(":")  # The range of all pages.
 def parse_filename_page_ranges(args):
    """
    Given a list of filenames and page ranges, return a list of
    (filename, page_range) pairs.
    First arg must be a filename; other ags are filenames, page-range
    expressions, slice objects, or PageRange objects.
    A filename not followed by a page range indicates all pages of the file.
    """
    pairs = []
    pdf_filename = None
    did_page_range = False
    for arg in args + [None]:
        if PageRange.valid(arg):
            if not pdf_filename:
                raise ValueError("The first argument must be a filename, " \
                                 "not a page range.")
            pairs.append( (pdf_filename, PageRange(arg)) )
            did_page_range = True
        else:
            # New filename or end of list--do all of the previous file?
            if pdf_filename and not did_page_range:
                pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
            pdf_filename = arg
            did_page_range = False
    return pairs
--- a/vendor/PyPDF2/pdf.py
+++ b/vendor/PyPDF2/pdf.py
--- a/vendor/PyPDF2/utils.py
+++ b/vendor/PyPDF2/utils.py
@ -0,0 +1,295 @@
 # Copyright (c) 2006, Mathieu Fenniak
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 """
 Utility functions for PDF library.
 """
 __author__ = "Mathieu Fenniak"
 __author_email__ = "biziqe@mathieu.fenniak.net"
 import sys
 try:
    import __builtin__ as builtins
 except ImportError:  # Py3
    import builtins
 xrange_fn = getattr(builtins, "xrange", range)
 _basestring = getattr(builtins, "basestring", str)
 bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
 string_type = getattr(builtins, "unicode", str)
 int_types = (int, long) if sys.version_info[0] < 3 else (int,)
 # Make basic type tests more consistent
 def isString(s):
    """Test if arg is a string. Compatible with Python 2 and 3."""
    return isinstance(s, _basestring)
 def isInt(n):
    """Test if arg is an int. Compatible with Python 2 and 3."""
    return isinstance(n, int_types)
 def isBytes(b):
    """Test if arg is a bytes instance. Compatible with Python 2 and 3."""
    return isinstance(b, bytes_type)
 #custom implementation of warnings.formatwarning
 def formatWarning(message, category, filename, lineno, line=None):
    file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
    return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
 def readUntilWhitespace(stream, maxchars=None):
    """
    Reads non-whitespace characters and returns them.
    Stops upon encountering whitespace or when maxchars is reached.
    """
    txt = b_("")
    while True:
        tok = stream.read(1)
        if tok.isspace() or not tok:
            break
        txt += tok
        if len(txt) == maxchars:
            break
    return txt
 def readNonWhitespace(stream):
    """
    Finds and reads the next non-whitespace character (ignores whitespace).
    """
    tok = WHITESPACES[0]
    while tok in WHITESPACES:
        tok = stream.read(1)
    return tok
 def skipOverWhitespace(stream):
    """
    Similar to readNonWhitespace, but returns a Boolean if more than
    one whitespace character was read.
    """
    tok = WHITESPACES[0]
    cnt = 0;
    while tok in WHITESPACES:
        tok = stream.read(1)
        cnt+=1
    return (cnt > 1)
 def skipOverComment(stream):
    tok = stream.read(1)
    stream.seek(-1, 1)
    if tok == b_('%'):
        while tok not in (b_('\n'), b_('\r')):
            tok = stream.read(1)
 def readUntilRegex(stream, regex, ignore_eof=False):
    """
    Reads until the regular expression pattern matched (ignore the match)
    Raise PdfStreamError on premature end-of-file.
    :param bool ignore_eof: If true, ignore end-of-line and return immediately
    """
    name = b_('')
    while True:
        tok = stream.read(16)
        if not tok:
            # stream has truncated prematurely
            if ignore_eof == True:
                return name
            else:
                raise PdfStreamError("Stream has ended unexpectedly")
        m = regex.search(tok)
        if m is not None:
            name += tok[:m.start()]
            stream.seek(m.start()-len(tok), 1)
            break
        name += tok
    return name
 class ConvertFunctionsToVirtualList(object):
    def __init__(self, lengthFunction, getFunction):
        self.lengthFunction = lengthFunction
        self.getFunction = getFunction
    def __len__(self):
        return self.lengthFunction()
    def __getitem__(self, index):
        if isinstance(index, slice):
            indices = xrange_fn(*index.indices(len(self)))
            cls = type(self)
            return cls(indices.__len__, lambda idx: self[indices[idx]])
        if not isInt(index):
            raise TypeError("sequence indices must be integers")
        len_self = len(self)
        if index < 0:
            # support negative indexes
            index = len_self + index
        if index < 0 or index >= len_self:
            raise IndexError("sequence index out of range")
        return self.getFunction(index)
 def RC4_encrypt(key, plaintext):
    S = [i for i in range(256)]
    j = 0
    for i in range(256):
        j = (j + S[i] + ord_(key[i % len(key)])) % 256
        S[i], S[j] = S[j], S[i]
    i, j = 0, 0
    retval = b_("")
    for x in range(len(plaintext)):
        i = (i + 1) % 256
        j = (j + S[i]) % 256
        S[i], S[j] = S[j], S[i]
        t = S[(S[i] + S[j]) % 256]
        retval += b_(chr(ord_(plaintext[x]) ^ t))
    return retval
 def matrixMultiply(a, b):
    return [[sum([float(i)*float(j)
                  for i, j in zip(row, col)]
                ) for col in zip(*b)]
            for row in a]
 def markLocation(stream):
    """Creates text file showing current location in context."""
    # Mainly for debugging
    RADIUS = 5000
    stream.seek(-RADIUS, 1)
    outputDoc = open('PyPDF2_pdfLocation.txt', 'w')
    outputDoc.write(stream.read(RADIUS))
    outputDoc.write('HERE')
    outputDoc.write(stream.read(RADIUS))
    outputDoc.close()
    stream.seek(-RADIUS, 1)
 class PyPdfError(Exception):
    pass
 class PdfReadError(PyPdfError):
    pass
 class PageSizeNotDefinedError(PyPdfError):
    pass
 class PdfReadWarning(UserWarning):
    pass
 class PdfStreamError(PdfReadError):
    pass
 if sys.version_info[0] < 3:
    def b_(s):
        return s
 else:
    B_CACHE = {}
    def b_(s):
        bc = B_CACHE
        if s in bc:
            return bc[s]
        if type(s) == bytes:
            return s
        else:
            r = s.encode('latin-1')
            if len(s) < 2:
                bc[s] = r
            return r
 def u_(s):
    if sys.version_info[0] < 3:
        return unicode(s, 'unicode_escape')
    else:
        return s
 def str_(b):
    if sys.version_info[0] < 3:
        return b
    else:
        if type(b) == bytes:
            return b.decode('latin-1')
        else:
            return b
 def ord_(b):
    if sys.version_info[0] < 3 or type(b) == str:
        return ord(b)
    else:
        return b
 def chr_(c):
    if sys.version_info[0] < 3:
        return c
    else:
        return chr(c)
 def barray(b):
    if sys.version_info[0] < 3:
        return b
    else:
        return bytearray(b)
 def hexencode(b):
    if sys.version_info[0] < 3:
        return b.encode('hex')
    else:
        import codecs
        coder = codecs.getencoder('hex_codec')
        return coder(b)[0]
 def hexStr(num):
    return hex(num).replace('L', '')
 WHITESPACES = [b_(x) for x in [' ', '\n', '\r', '\t', '\x00']]
--- a/vendor/PyPDF2/xmp.py
+++ b/vendor/PyPDF2/xmp.py
@ -0,0 +1,358 @@
 import re
 import datetime
 import decimal
 from .generic import PdfObject
 from xml.dom import getDOMImplementation
 from xml.dom.minidom import parseString
 from .utils import u_
 RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
 XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/"
 PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/"
 XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/"
 # What is the PDFX namespace, you might ask?  I might ask that too.  It's
 # a completely undocumented namespace used to place "custom metadata"
 # properties, which are arbitrary metadata properties with no semantic or
 # documented meaning.  Elements in the namespace are key/value-style storage,
 # where the element name is the key and the content is the value.  The keys
 # are transformed into valid XML identifiers by substituting an invalid
 # identifier character with \u2182 followed by the unicode hex ID of the
 # original character.  A key like "my car" is therefore "my\u21820020car".
 #
 # \u2182, in case you're wondering, is the unicode character
 # \u{ROMAN NUMERAL TEN THOUSAND}, a straightforward and obvious choice for
 # escaping characters.
 #
 # Intentional users of the pdfx namespace should be shot on sight.  A
 # custom data schema and sensical XML elements could be used instead, as is
 # suggested by Adobe's own documentation on XMP (under "Extensibility of
 # Schemas").
 #
 # Information presented here on the /pdfx/ schema is a result of limited
 # reverse engineering, and does not constitute a full specification.
 PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"
 iso8601 = re.compile("""
        (?P<year>[0-9]{4})
        (-
            (?P<month>[0-9]{2})
            (-
                (?P<day>[0-9]+)
                (T
                    (?P<hour>[0-9]{2}):
                    (?P<minute>[0-9]{2})
                    (:(?P<second>[0-9]{2}(.[0-9]+)?))?
                    (?P<tzd>Z|[-+][0-9]{2}:[0-9]{2})
                )?
            )?
        )?
        """, re.VERBOSE)
 class XmpInformation(PdfObject):
    """
    An object that represents Adobe XMP metadata.
    Usually accessed by :meth:`getXmpMetadata()<PyPDF2.PdfFileReader.getXmpMetadata>`
    """
    def __init__(self, stream):
        self.stream = stream
        docRoot = parseString(self.stream.getData())
        self.rdfRoot = docRoot.getElementsByTagNameNS(RDF_NAMESPACE, "RDF")[0]
        self.cache = {}
    def writeToStream(self, stream, encryption_key):
        self.stream.writeToStream(stream, encryption_key)
    def getElement(self, aboutUri, namespace, name):
        for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
            if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
                attr = desc.getAttributeNodeNS(namespace, name)
                if attr != None:
                    yield attr
                for element in desc.getElementsByTagNameNS(namespace, name):
                    yield element
    def getNodesInNamespace(self, aboutUri, namespace):
        for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
            if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
                for i in range(desc.attributes.length):
                    attr = desc.attributes.item(i)
                    if attr.namespaceURI == namespace:
                        yield attr
                for child in desc.childNodes:
                    if child.namespaceURI == namespace:
                        yield child
    def _getText(self, element):
        text = ""
        for child in element.childNodes:
            if child.nodeType == child.TEXT_NODE:
                text += child.data
        return text
    def _converter_string(value):
        return value
    def _converter_date(value):
        m = iso8601.match(value)
        year = int(m.group("year"))
        month = int(m.group("month") or "1")
        day = int(m.group("day") or "1")
        hour = int(m.group("hour") or "0")
        minute = int(m.group("minute") or "0")
        second = decimal.Decimal(m.group("second") or "0")
        seconds = second.to_integral(decimal.ROUND_FLOOR)
        milliseconds = (second - seconds) * 1000000
        tzd = m.group("tzd") or "Z"
        dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds)
        if tzd != "Z":
            tzd_hours, tzd_minutes = [int(x) for x in tzd.split(":")]
            tzd_hours *= -1
            if tzd_hours < 0:
                tzd_minutes *= -1
            dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes)
        return dt
    _test_converter_date = staticmethod(_converter_date)
    def _getter_bag(namespace, name, converter):
        def get(self):
            cached = self.cache.get(namespace, {}).get(name)
            if cached:
                return cached
            retval = []
            for element in self.getElement("", namespace, name):
                bags = element.getElementsByTagNameNS(RDF_NAMESPACE, "Bag")
                if len(bags):
                    for bag in bags:
                        for item in bag.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
                            value = self._getText(item)
                            value = converter(value)
                            retval.append(value)
            ns_cache = self.cache.setdefault(namespace, {})
            ns_cache[name] = retval
            return retval
        return get
    def _getter_seq(namespace, name, converter):
        def get(self):
            cached = self.cache.get(namespace, {}).get(name)
            if cached:
                return cached
            retval = []
            for element in self.getElement("", namespace, name):
                seqs = element.getElementsByTagNameNS(RDF_NAMESPACE, "Seq")
                if len(seqs):
                    for seq in seqs:
                        for item in seq.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
                            value = self._getText(item)
                            value = converter(value)
                            retval.append(value)
                else:
                    value = converter(self._getText(element))
                    retval.append(value)
            ns_cache = self.cache.setdefault(namespace, {})
            ns_cache[name] = retval
            return retval
        return get
    def _getter_langalt(namespace, name, converter):
        def get(self):
            cached = self.cache.get(namespace, {}).get(name)
            if cached:
                return cached
            retval = {}
            for element in self.getElement("", namespace, name):
                alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt")
                if len(alts):
                    for alt in alts:
                        for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
                            value = self._getText(item)
                            value = converter(value)
                            retval[item.getAttribute("xml:lang")] = value
                else:
                    retval["x-default"] = converter(self._getText(element))
            ns_cache = self.cache.setdefault(namespace, {})
            ns_cache[name] = retval
            return retval
        return get
    def _getter_single(namespace, name, converter):
        def get(self):
            cached = self.cache.get(namespace, {}).get(name)
            if cached:
                return cached
            value = None
            for element in self.getElement("", namespace, name):
                if element.nodeType == element.ATTRIBUTE_NODE:
                    value = element.nodeValue
                else:
                    value = self._getText(element)
                break
            if value != None:
                value = converter(value)
            ns_cache = self.cache.setdefault(namespace, {})
            ns_cache[name] = value
            return value
        return get
    dc_contributor = property(_getter_bag(DC_NAMESPACE, "contributor", _converter_string))
    """
    Contributors to the resource (other than the authors). An unsorted
    array of names.
    """
    dc_coverage = property(_getter_single(DC_NAMESPACE, "coverage", _converter_string))
    """
    Text describing the extent or scope of the resource.
    """
    dc_creator = property(_getter_seq(DC_NAMESPACE, "creator", _converter_string))
    """
    A sorted array of names of the authors of the resource, listed in order
    of precedence.
    """
    dc_date = property(_getter_seq(DC_NAMESPACE, "date", _converter_date))
    """
    A sorted array of dates (datetime.datetime instances) of signifigance to
    the resource.  The dates and times are in UTC.
    """
    dc_description = property(_getter_langalt(DC_NAMESPACE, "description", _converter_string))
    """
    A language-keyed dictionary of textual descriptions of the content of the
    resource.
    """
    dc_format = property(_getter_single(DC_NAMESPACE, "format", _converter_string))
    """
    The mime-type of the resource.
    """
    dc_identifier = property(_getter_single(DC_NAMESPACE, "identifier", _converter_string))
    """
    Unique identifier of the resource.
    """
    dc_language = property(_getter_bag(DC_NAMESPACE, "language", _converter_string))
    """
    An unordered array specifying the languages used in the resource.
    """
    dc_publisher = property(_getter_bag(DC_NAMESPACE, "publisher", _converter_string))
    """
    An unordered array of publisher names.
    """
    dc_relation = property(_getter_bag(DC_NAMESPACE, "relation", _converter_string))
    """
    An unordered array of text descriptions of relationships to other
    documents.
    """
    dc_rights = property(_getter_langalt(DC_NAMESPACE, "rights", _converter_string))
    """
    A language-keyed dictionary of textual descriptions of the rights the
    user has to this resource.
    """
    dc_source = property(_getter_single(DC_NAMESPACE, "source", _converter_string))
    """
    Unique identifier of the work from which this resource was derived.
    """
    dc_subject = property(_getter_bag(DC_NAMESPACE, "subject", _converter_string))
    """
    An unordered array of descriptive phrases or keywrods that specify the
    topic of the content of the resource.
    """
    dc_title = property(_getter_langalt(DC_NAMESPACE, "title", _converter_string))
    """
    A language-keyed dictionary of the title of the resource.
    """
    dc_type = property(_getter_bag(DC_NAMESPACE, "type", _converter_string))
    """
    An unordered array of textual descriptions of the document type.
    """
    pdf_keywords = property(_getter_single(PDF_NAMESPACE, "Keywords", _converter_string))
    """
    An unformatted text string representing document keywords.
    """
    pdf_pdfversion = property(_getter_single(PDF_NAMESPACE, "PDFVersion", _converter_string))
    """
    The PDF file version, for example 1.0, 1.3.
    """
    pdf_producer = property(_getter_single(PDF_NAMESPACE, "Producer", _converter_string))
    """
    The name of the tool that created the PDF document.
    """
    xmp_createDate = property(_getter_single(XMP_NAMESPACE, "CreateDate", _converter_date))
    """
    The date and time the resource was originally created.  The date and
    time are returned as a UTC datetime.datetime object.
    """
    xmp_modifyDate = property(_getter_single(XMP_NAMESPACE, "ModifyDate", _converter_date))
    """
    The date and time the resource was last modified.  The date and time
    are returned as a UTC datetime.datetime object.
    """
    xmp_metadataDate = property(_getter_single(XMP_NAMESPACE, "MetadataDate", _converter_date))
    """
    The date and time that any metadata for this resource was last
    changed.  The date and time are returned as a UTC datetime.datetime
    object.
    """
    xmp_creatorTool = property(_getter_single(XMP_NAMESPACE, "CreatorTool", _converter_string))
    """
    The name of the first known tool used to create the resource.
    """
    xmpmm_documentId = property(_getter_single(XMPMM_NAMESPACE, "DocumentID", _converter_string))
    """
    The common identifier for all versions and renditions of this resource.
    """
    xmpmm_instanceId = property(_getter_single(XMPMM_NAMESPACE, "InstanceID", _converter_string))
    """
    An identifier for a specific incarnation of a document, updated each
    time a file is saved.
    """
    def custom_properties(self):
        if not hasattr(self, "_custom_properties"):
            self._custom_properties = {}
            for node in self.getNodesInNamespace("", PDFX_NAMESPACE):
                key = node.localName
                while True:
                    # see documentation about PDFX_NAMESPACE earlier in file
                    idx = key.find(u_("\u2182"))
                    if idx == -1:
                        break
                    key = key[:idx] + chr(int(key[idx+1:idx+5], base=16)) + key[idx+5:]
                if node.nodeType == node.ATTRIBUTE_NODE:
                    value = node.nodeValue
                else:
                    value = self._getText(node)
                self._custom_properties[key] = value
        return self._custom_properties
    custom_properties = property(custom_properties)
    """
    Retrieves custom metadata properties defined in the undocumented pdfx
    metadata schema.
    :return: a dictionary of key/value items for custom metadata properties.
    :rtype: dict
    """