calibre-web/vendor/babel/messages/jslexer.py

# -*- coding: utf-8 -*-
"""
    babel.messages.jslexer
    ~~~~~~~~~~~~~~~~~~~~~~

    A simple JavaScript 1.5 lexer which is used for the JavaScript
    extractor.

    :copyright: (c) 2013 by the Babel Team.
    :license: BSD, see LICENSE for more details.
"""

from operator import itemgetter
import re
from babel._compat import unichr

operators = [
    '+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=',
    '+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=',
    '>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')',
    '[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':'
]
operators.sort(key=lambda a: -len(a))

escapes = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'}

rules = [
    (None, re.compile(r'\s+(?u)')),
    (None, re.compile(r'<!--.*')),
    ('linecomment', re.compile(r'//.*')),
    ('multilinecomment', re.compile(r'/\*.*?\*/(?us)')),
    ('name', re.compile(r'(\$+\w*|[^\W\d]\w*)(?u)')),
    ('number', re.compile(r'''(?x)(
        (?:0|[1-9]\d*)
        (\.\d+)?
        ([eE][-+]?\d+)? |
        (0x[a-fA-F0-9]+)
    )''')),
    ('operator', re.compile(r'(%s)' % '|'.join(map(re.escape, operators)))),
    ('string', re.compile(r'''(?xs)(
        '(?:[^'\\]*(?:\\.[^'\\]*)*)'  |
        "(?:[^"\\]*(?:\\.[^"\\]*)*)"
    )'''))
]

division_re = re.compile(r'/=?')
regex_re = re.compile(r'/(?:[^/\\]*(?:\\.[^/\\]*)*)/[a-zA-Z]*(?s)')
line_re = re.compile(r'(\r\n|\n|\r)')
line_join_re = re.compile(r'\\' + line_re.pattern)
uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}')


class Token(tuple):
    """Represents a token as returned by `tokenize`."""
    __slots__ = ()

    def __new__(cls, type, value, lineno):
        return tuple.__new__(cls, (type, value, lineno))

    type = property(itemgetter(0))
    value = property(itemgetter(1))
    lineno = property(itemgetter(2))


def indicates_division(token):
    """A helper function that helps the tokenizer to decide if the current
    token may be followed by a division operator.
    """
    if token.type == 'operator':
        return token.value in (')', ']', '}', '++', '--')
    return token.type in ('name', 'number', 'string', 'regexp')


def unquote_string(string):
    """Unquote a string with JavaScript rules.  The string has to start with
    string delimiters (``'`` or ``"``.)
    """
    assert string and string[0] == string[-1] and string[0] in '"\'', \
        'string provided is not properly delimited'
    string = line_join_re.sub('\\1', string[1:-1])
    result = []
    add = result.append
    pos = 0

    while 1:
        # scan for the next escape
        escape_pos = string.find('\\', pos)
        if escape_pos < 0:
            break
        add(string[pos:escape_pos])

        # check which character is escaped
        next_char = string[escape_pos + 1]
        if next_char in escapes:
            add(escapes[next_char])

        # unicode escapes.  trie to consume up to four characters of
        # hexadecimal characters and try to interpret them as unicode
        # character point.  If there is no such character point, put
        # all the consumed characters into the string.
        elif next_char in 'uU':
            escaped = uni_escape_re.match(string, escape_pos + 2)
            if escaped is not None:
                escaped_value = escaped.group()
                if len(escaped_value) == 4:
                    try:
                        add(unichr(int(escaped_value, 16)))
                    except ValueError:
                        pass
                    else:
                        pos = escape_pos + 6
                        continue
                add(next_char + escaped_value)
                pos = escaped.end()
                continue
            else:
                add(next_char)

        # bogus escape.  Just remove the backslash.
        else:
            add(next_char)
        pos = escape_pos + 2

    if pos < len(string):
        add(string[pos:])

    return u''.join(result)


def tokenize(source):
    """Tokenize a JavaScript source.  Returns a generator of tokens.
    """
    may_divide = False
    pos = 0
    lineno = 1
    end = len(source)

    while pos < end:
        # handle regular rules first
        for token_type, rule in rules:
            match = rule.match(source, pos)
            if match is not None:
                break
        # if we don't have a match we don't give up yet, but check for
        # division operators or regular expression literals, based on
        # the status of `may_divide` which is determined by the last
        # processed non-whitespace token using `indicates_division`.
        else:
            if may_divide:
                match = division_re.match(source, pos)
                token_type = 'operator'
            else:
                match = regex_re.match(source, pos)
                token_type = 'regexp'
            if match is None:
                # woops. invalid syntax. jump one char ahead and try again.
                pos += 1
                continue

        token_value = match.group()
        if token_type is not None:
            token = Token(token_type, token_value, lineno)
            may_divide = indicates_division(token)
            yield token
        lineno += len(line_re.findall(token_value))
        pos = match.end()
Translation of UI (german and english) Bugfix for feeds - removed categories related and up - load new books now working - category random now working login page is free of non accessible elements boolean custom column is vivible in UI books with only with certain languages can be shown book shelfs can be deleted from UI Anonymous user view is more resticted Added browse of series in sidebar Dependencys in vendor folder are updated to newer versions (licencs files are now present) Bugfix editing Authors names Made upload on windows working 2016-11-09 18:24:33 +00:00			`# -- coding: utf-8 --`
			`"""`
			`babel.messages.jslexer`
			`~~~~~~~~~~~~~~~~~~~~~~`

			`A simple JavaScript 1.5 lexer which is used for the JavaScript`
			`extractor.`

			`:copyright: (c) 2013 by the Babel Team.`
			`:license: BSD, see LICENSE for more details.`
			`"""`

			`from operator import itemgetter`
			`import re`
			`from babel._compat import unichr`

			`operators = [`
			`'+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=',`
			`'+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=',`
			`'>>>=', '&', '&=', '\|', '\|=', '&&', '\|\|', '^', '^=', '(', ')',`
			`'[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':'`
			`]`
			`operators.sort(key=lambda a: -len(a))`

			`escapes = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'}`

			`rules = [`
			`(None, re.compile(r'\s+(?u)')),`
			`(None, re.compile(r'<!--.*')),`
			`('linecomment', re.compile(r'//.*')),`
			`('multilinecomment', re.compile(r'/\.?\*/(?us)')),`
			`('name', re.compile(r'(\$+\w\|[^\W\d]\w)(?u)')),`
			`('number', re.compile(r'''(?x)(`
			`(?:0\|[1-9]\d*)`
			`(\.\d+)?`
			`([eE][-+]?\d+)? \|`
			`(0x[a-fA-F0-9]+)`
			`)''')),`
			`('operator', re.compile(r'(%s)' % '\|'.join(map(re.escape, operators)))),`
			`('string', re.compile(r'''(?xs)(`
			`'(?:[^'\\](?:\\.[^'\\])*)' \|`
			`"(?:[^"\\](?:\\.[^"\\])*)"`
			`)'''))`
			`]`

			`division_re = re.compile(r'/=?')`
			`regex_re = re.compile(r'/(?:[^/\\](?:\\.[^/\\]))/[a-zA-Z](?s)')`
			`line_re = re.compile(r'(\r\n\|\n\|\r)')`
			`line_join_re = re.compile(r'\\' + line_re.pattern)`
			`uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}')`


			`class Token(tuple):`
			"""Represents a token as returned by `tokenize`."""
			`__slots__ = ()`

			`def __new__(cls, type, value, lineno):`
			`return tuple.__new__(cls, (type, value, lineno))`

			`type = property(itemgetter(0))`
			`value = property(itemgetter(1))`
			`lineno = property(itemgetter(2))`


			`def indicates_division(token):`
			`"""A helper function that helps the tokenizer to decide if the current`
			`token may be followed by a division operator.`
			`"""`
			`if token.type == 'operator':`
			`return token.value in (')', ']', '}', '++', '--')`
			`return token.type in ('name', 'number', 'string', 'regexp')`


			`def unquote_string(string):`
			`"""Unquote a string with JavaScript rules. The string has to start with`
			string delimiters (``'`` or ``"``.)
			`"""`
			`assert string and string[0] == string[-1] and string[0] in '"\'', \`
			`'string provided is not properly delimited'`
			`string = line_join_re.sub('\\1', string[1:-1])`
			`result = []`
			`add = result.append`
			`pos = 0`

			`while 1:`
			`# scan for the next escape`
			`escape_pos = string.find('\\', pos)`
			`if escape_pos < 0:`
			`break`
			`add(string[pos:escape_pos])`

			`# check which character is escaped`
			`next_char = string[escape_pos + 1]`
			`if next_char in escapes:`
			`add(escapes[next_char])`

			`# unicode escapes. trie to consume up to four characters of`
			`# hexadecimal characters and try to interpret them as unicode`
			`# character point. If there is no such character point, put`
			`# all the consumed characters into the string.`
			`elif next_char in 'uU':`
			`escaped = uni_escape_re.match(string, escape_pos + 2)`
			`if escaped is not None:`
			`escaped_value = escaped.group()`
			`if len(escaped_value) == 4:`
			`try:`
			`add(unichr(int(escaped_value, 16)))`
			`except ValueError:`
			`pass`
			`else:`
			`pos = escape_pos + 6`
			`continue`
			`add(next_char + escaped_value)`
			`pos = escaped.end()`
			`continue`
			`else:`
			`add(next_char)`

			`# bogus escape. Just remove the backslash.`
			`else:`
			`add(next_char)`
			`pos = escape_pos + 2`

			`if pos < len(string):`
			`add(string[pos:])`

			`return u''.join(result)`


			`def tokenize(source):`
			`"""Tokenize a JavaScript source. Returns a generator of tokens.`
			`"""`
			`may_divide = False`
			`pos = 0`
			`lineno = 1`
			`end = len(source)`

			`while pos < end:`
			`# handle regular rules first`
			`for token_type, rule in rules:`
			`match = rule.match(source, pos)`
			`if match is not None:`
			`break`
			`# if we don't have a match we don't give up yet, but check for`
			`# division operators or regular expression literals, based on`
			# the status of `may_divide` which is determined by the last
			# processed non-whitespace token using `indicates_division`.
			`else:`
			`if may_divide:`
			`match = division_re.match(source, pos)`
			`token_type = 'operator'`
			`else:`
			`match = regex_re.match(source, pos)`
			`token_type = 'regexp'`
			`if match is None:`
			`# woops. invalid syntax. jump one char ahead and try again.`
			`pos += 1`
			`continue`

			`token_value = match.group()`
			`if token_type is not None:`
			`token = Token(token_type, token_value, lineno)`
			`may_divide = indicates_division(token)`
			`yield token`
			`lineno += len(line_re.findall(token_value))`
			`pos = match.end()`