calibre-web/vendor/PyPDF2/utils.py

296 lines
7.6 KiB
Python
Raw Normal View History

2016-08-07 20:12:32 +00:00
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
Utility functions for PDF library.
"""
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"
import sys
try:
import __builtin__ as builtins
except ImportError: # Py3
import builtins
xrange_fn = getattr(builtins, "xrange", range)
_basestring = getattr(builtins, "basestring", str)
bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
string_type = getattr(builtins, "unicode", str)
int_types = (int, long) if sys.version_info[0] < 3 else (int,)
# Make basic type tests more consistent
def isString(s):
"""Test if arg is a string. Compatible with Python 2 and 3."""
return isinstance(s, _basestring)
def isInt(n):
"""Test if arg is an int. Compatible with Python 2 and 3."""
return isinstance(n, int_types)
def isBytes(b):
"""Test if arg is a bytes instance. Compatible with Python 2 and 3."""
return isinstance(b, bytes_type)
#custom implementation of warnings.formatwarning
def formatWarning(message, category, filename, lineno, line=None):
file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
def readUntilWhitespace(stream, maxchars=None):
"""
Reads non-whitespace characters and returns them.
Stops upon encountering whitespace or when maxchars is reached.
"""
txt = b_("")
while True:
tok = stream.read(1)
if tok.isspace() or not tok:
break
txt += tok
if len(txt) == maxchars:
break
return txt
def readNonWhitespace(stream):
"""
Finds and reads the next non-whitespace character (ignores whitespace).
"""
tok = WHITESPACES[0]
while tok in WHITESPACES:
tok = stream.read(1)
return tok
def skipOverWhitespace(stream):
"""
Similar to readNonWhitespace, but returns a Boolean if more than
one whitespace character was read.
"""
tok = WHITESPACES[0]
cnt = 0;
while tok in WHITESPACES:
tok = stream.read(1)
cnt+=1
return (cnt > 1)
def skipOverComment(stream):
tok = stream.read(1)
stream.seek(-1, 1)
if tok == b_('%'):
while tok not in (b_('\n'), b_('\r')):
tok = stream.read(1)
def readUntilRegex(stream, regex, ignore_eof=False):
"""
Reads until the regular expression pattern matched (ignore the match)
Raise PdfStreamError on premature end-of-file.
:param bool ignore_eof: If true, ignore end-of-line and return immediately
"""
name = b_('')
while True:
tok = stream.read(16)
if not tok:
# stream has truncated prematurely
if ignore_eof == True:
return name
else:
raise PdfStreamError("Stream has ended unexpectedly")
m = regex.search(tok)
if m is not None:
name += tok[:m.start()]
stream.seek(m.start()-len(tok), 1)
break
name += tok
return name
class ConvertFunctionsToVirtualList(object):
def __init__(self, lengthFunction, getFunction):
self.lengthFunction = lengthFunction
self.getFunction = getFunction
def __len__(self):
return self.lengthFunction()
def __getitem__(self, index):
if isinstance(index, slice):
indices = xrange_fn(*index.indices(len(self)))
cls = type(self)
return cls(indices.__len__, lambda idx: self[indices[idx]])
if not isInt(index):
raise TypeError("sequence indices must be integers")
len_self = len(self)
if index < 0:
# support negative indexes
index = len_self + index
if index < 0 or index >= len_self:
raise IndexError("sequence index out of range")
return self.getFunction(index)
def RC4_encrypt(key, plaintext):
S = [i for i in range(256)]
j = 0
for i in range(256):
j = (j + S[i] + ord_(key[i % len(key)])) % 256
S[i], S[j] = S[j], S[i]
i, j = 0, 0
retval = b_("")
for x in range(len(plaintext)):
i = (i + 1) % 256
j = (j + S[i]) % 256
S[i], S[j] = S[j], S[i]
t = S[(S[i] + S[j]) % 256]
retval += b_(chr(ord_(plaintext[x]) ^ t))
return retval
def matrixMultiply(a, b):
return [[sum([float(i)*float(j)
for i, j in zip(row, col)]
) for col in zip(*b)]
for row in a]
def markLocation(stream):
"""Creates text file showing current location in context."""
# Mainly for debugging
RADIUS = 5000
stream.seek(-RADIUS, 1)
outputDoc = open('PyPDF2_pdfLocation.txt', 'w')
outputDoc.write(stream.read(RADIUS))
outputDoc.write('HERE')
outputDoc.write(stream.read(RADIUS))
outputDoc.close()
stream.seek(-RADIUS, 1)
class PyPdfError(Exception):
pass
class PdfReadError(PyPdfError):
pass
class PageSizeNotDefinedError(PyPdfError):
pass
class PdfReadWarning(UserWarning):
pass
class PdfStreamError(PdfReadError):
pass
if sys.version_info[0] < 3:
def b_(s):
return s
else:
B_CACHE = {}
def b_(s):
bc = B_CACHE
if s in bc:
return bc[s]
if type(s) == bytes:
return s
else:
r = s.encode('latin-1')
if len(s) < 2:
bc[s] = r
return r
def u_(s):
if sys.version_info[0] < 3:
return unicode(s, 'unicode_escape')
else:
return s
def str_(b):
if sys.version_info[0] < 3:
return b
else:
if type(b) == bytes:
return b.decode('latin-1')
else:
return b
def ord_(b):
if sys.version_info[0] < 3 or type(b) == str:
return ord(b)
else:
return b
def chr_(c):
if sys.version_info[0] < 3:
return c
else:
return chr(c)
def barray(b):
if sys.version_info[0] < 3:
return b
else:
return bytearray(b)
def hexencode(b):
if sys.version_info[0] < 3:
return b.encode('hex')
else:
import codecs
coder = codecs.getencoder('hex_codec')
return coder(b)[0]
def hexStr(num):
return hex(num).replace('L', '')
WHITESPACES = [b_(x) for x in [' ', '\n', '\r', '\t', '\x00']]