bundle: update (2026-01-18)

2026-01-18 01:20:18 +00:00
parent 83c22af578
commit 03c5c75177
149 changed files with 38486 additions and 0 deletions
--- a/extensions/botbox3000/deps/tinycss2/tokenizer.py
+++ b/extensions/botbox3000/deps/tinycss2/tokenizer.py
@@ -0,0 +1,423 @@
+import re
+import sys
+
+from webencodings import ascii_lower
+
+from .ast import (  # isort: skip
+    AtKeywordToken, Comment, CurlyBracketsBlock, DimensionToken, FunctionBlock,
+    HashToken, IdentToken, LiteralToken, NumberToken, ParenthesesBlock, ParseError,
+    PercentageToken, SquareBracketsBlock, StringToken, UnicodeRangeToken, URLToken,
+    WhitespaceToken)
+from .serializer import serialize_string_value, serialize_url
+
+_NUMBER_RE = re.compile(r'[-+]?([0-9]*\.)?[0-9]+([eE][+-]?[0-9]+)?')
+_HEX_ESCAPE_RE = re.compile(r'([0-9A-Fa-f]{1,6})[ \n\t]?')
+
+
+def parse_component_value_list(css, skip_comments=False):
+    """Parse a list of component values.
+
+    :type css: :obj:`str`
+    :param css: A CSS string.
+    :type skip_comments: :obj:`bool`
+    :param skip_comments:
+        Ignore CSS comments.
+        The return values (and recursively its blocks and functions)
+        will not contain any :class:`~tinycss2.ast.Comment` object.
+    :returns: A list of :term:`component values`.
+
+    """
+    css = (css.replace('\0', '\uFFFD')
+           # This turns out to be faster than a regexp:
+           .replace('\r\n', '\n').replace('\r', '\n').replace('\f', '\n'))
+    length = len(css)
+    token_start_pos = pos = 0  # Character index in the css source.
+    line = 1  # First line is line 1.
+    last_newline = -1
+    root = tokens = []
+    end_char = None  # Pop the stack when encountering this character.
+    stack = []  # Stack of nested blocks: (tokens, end_char) tuples.
+
+    while pos < length:
+        newline = css.rfind('\n', token_start_pos, pos)
+        if newline != -1:
+            line += 1 + css.count('\n', token_start_pos, newline)
+            last_newline = newline
+        # First character in a line is in column 1.
+        column = pos - last_newline
+        token_start_pos = pos
+        c = css[pos]
+
+        if c in ' \n\t':
+            pos += 1
+            while css.startswith((' ', '\n', '\t'), pos):
+                pos += 1
+            value = css[token_start_pos:pos]
+            tokens.append(WhitespaceToken(line, column, value))
+            continue
+        elif (c in 'Uu' and pos + 2 < length and css[pos + 1] == '+' and
+              css[pos + 2] in '0123456789abcdefABCDEF?'):
+            start, end, pos = _consume_unicode_range(css, pos + 2)
+            tokens.append(UnicodeRangeToken(line, column, start, end))
+            continue
+        elif css.startswith('-->', pos):  # Check before identifiers
+            tokens.append(LiteralToken(line, column, '-->'))
+            pos += 3
+            continue
+        elif _is_ident_start(css, pos):
+            value, pos = _consume_ident(css, pos)
+            if not css.startswith('(', pos):  # Not a function
+                tokens.append(IdentToken(line, column, value))
+                continue
+            pos += 1  # Skip the '('
+            try:
+                is_url = ascii_lower(value) == 'url'
+            except UnicodeEncodeError:
+                is_url = False
+            if is_url:
+                url_pos = pos
+                while css.startswith((' ', '\n', '\t'), url_pos):
+                    url_pos += 1
+                if url_pos >= length or css[url_pos] not in ('"', "'"):
+                    value, pos, error = _consume_url(css, pos)
+                    if value is not None:
+                        repr = f'url({serialize_url(value)})'
+                        if error is not None:
+                            error_key = error[0]
+                            if error_key == 'eof-in-string':
+                                repr = repr[:-2]
+                            else:
+                                assert error_key == 'eof-in-url'
+                                repr = repr[:-1]
+                        tokens.append(URLToken(line, column, value, repr))
+                    if error is not None:
+                        tokens.append(ParseError(line, column, *error))
+                    continue
+            arguments = []
+            tokens.append(FunctionBlock(line, column, value, arguments))
+            stack.append((tokens, end_char))
+            end_char = ')'
+            tokens = arguments
+            continue
+
+        match = _NUMBER_RE.match(css, pos)
+        if match:
+            pos = match.end()
+            repr_ = css[token_start_pos:pos]
+            value = float(repr_)
+            int_value = int(repr_) if not any(match.groups()) else None
+            if pos < length and _is_ident_start(css, pos):
+                unit, pos = _consume_ident(css, pos)
+                tokens.append(DimensionToken(
+                    line, column, value, int_value, repr_, unit))
+            elif css.startswith('%', pos):
+                pos += 1
+                tokens.append(PercentageToken(line, column, value, int_value, repr_))
+            else:
+                tokens.append(NumberToken(line, column, value, int_value, repr_))
+        elif c == '@':
+            pos += 1
+            if pos < length and _is_ident_start(css, pos):
+                value, pos = _consume_ident(css, pos)
+                tokens.append(AtKeywordToken(line, column, value))
+            else:
+                tokens.append(LiteralToken(line, column, '@'))
+        elif c == '#':
+            pos += 1
+            if pos < length and (
+                    css[pos] in '0123456789abcdefghijklmnopqrstuvwxyz'
+                                '-_ABCDEFGHIJKLMNOPQRSTUVWXYZ' or
+                    ord(css[pos]) > 0x7F or  # Non-ASCII
+                    # Valid escape:
+                    (css[pos] == '\\' and not css.startswith('\\\n', pos))):
+                is_identifier = _is_ident_start(css, pos)
+                value, pos = _consume_ident(css, pos)
+                tokens.append(HashToken(line, column, value, is_identifier))
+            else:
+                tokens.append(LiteralToken(line, column, '#'))
+        elif c == '{':
+            content = []
+            tokens.append(CurlyBracketsBlock(line, column, content))
+            stack.append((tokens, end_char))
+            end_char = '}'
+            tokens = content
+            pos += 1
+        elif c == '[':
+            content = []
+            tokens.append(SquareBracketsBlock(line, column, content))
+            stack.append((tokens, end_char))
+            end_char = ']'
+            tokens = content
+            pos += 1
+        elif c == '(':
+            content = []
+            tokens.append(ParenthesesBlock(line, column, content))
+            stack.append((tokens, end_char))
+            end_char = ')'
+            tokens = content
+            pos += 1
+        elif c == end_char:  # Matching }, ] or )
+            # The top-level end_char is None (never equal to a character),
+            # so we never get here if the stack is empty.
+            tokens, end_char = stack.pop()
+            pos += 1
+        elif c in '}])':
+            tokens.append(ParseError(line, column, c, 'Unmatched ' + c))
+            pos += 1
+        elif c in ('"', "'"):
+            value, pos, error = _consume_quoted_string(css, pos)
+            if value is not None:
+                repr = f'"{serialize_string_value(value)}"'
+                if error is not None:
+                    repr = repr[:-1]
+                tokens.append(StringToken(line, column, value, repr))
+            if error is not None:
+                tokens.append(ParseError(line, column, *error))
+        elif css.startswith('/*', pos):  # Comment
+            pos = css.find('*/', pos + 2)
+            if pos == -1:
+                if not skip_comments:
+                    tokens.append(Comment(line, column, css[token_start_pos + 2:]))
+                break
+            if not skip_comments:
+                tokens.append(Comment(line, column, css[token_start_pos + 2:pos]))
+            pos += 2
+        elif css.startswith('<!--', pos):
+            tokens.append(LiteralToken(line, column, '<!--'))
+            pos += 4
+        elif css.startswith('||', pos):
+            tokens.append(LiteralToken(line, column, '||'))
+            pos += 2
+        elif c in '~|^$*':
+            pos += 1
+            if css.startswith('=', pos):
+                pos += 1
+                tokens.append(LiteralToken(line, column, c + '='))
+            else:
+                tokens.append(LiteralToken(line, column, c))
+        else:
+            tokens.append(LiteralToken(line, column, c))
+            pos += 1
+    return root
+
+
+def _is_name_start(css, pos):
+    """Return true if the given character is a name-start code point."""
+    # https://www.w3.org/TR/css-syntax-3/#name-start-code-point
+    c = css[pos]
+    return (
+        c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_' or
+        ord(c) > 0x7F)
+
+
+def _is_ident_start(css, pos):
+    """Return True if the given position is the start of a CSS identifier."""
+    # https://drafts.csswg.org/css-syntax/#would-start-an-identifier
+    if _is_name_start(css, pos):
+        return True
+    elif css[pos] == '-':
+        pos += 1
+        return (
+            # Name-start code point or hyphen:
+            (pos < len(css) and (_is_name_start(css, pos) or css[pos] == '-')) or
+            # Valid escape:
+            (css.startswith('\\', pos) and not css.startswith('\\\n', pos)))
+    elif css[pos] == '\\':
+        return not css.startswith('\\\n', pos)
+    return False
+
+
+def _consume_ident(css, pos):
+    """Return (unescaped_value, new_pos).
+
+    Assumes pos starts at a valid identifier. See :func:`_is_ident_start`.
+
+    """
+    # http://dev.w3.org/csswg/css-syntax/#consume-a-name
+    chunks = []
+    length = len(css)
+    start_pos = pos
+    while pos < length:
+        c = css[pos]
+        if c in ('abcdefghijklmnopqrstuvwxyz-_0123456789'
+                 'ABCDEFGHIJKLMNOPQRSTUVWXYZ') or ord(c) > 0x7F:
+            pos += 1
+        elif c == '\\' and not css.startswith('\\\n', pos):
+            # Valid escape
+            chunks.append(css[start_pos:pos])
+            c, pos = _consume_escape(css, pos + 1)
+            chunks.append(c)
+            start_pos = pos
+        else:
+            break
+    chunks.append(css[start_pos:pos])
+    return ''.join(chunks), pos
+
+
+def _consume_quoted_string(css, pos):
+    """Return (unescaped_value, new_pos)."""
+    # https://drafts.csswg.org/css-syntax/#consume-a-string-token
+    error = None
+    quote = css[pos]
+    assert quote in ('"', "'")
+    pos += 1
+    chunks = []
+    length = len(css)
+    start_pos = pos
+    while pos < length:
+        c = css[pos]
+        if c == quote:
+            chunks.append(css[start_pos:pos])
+            pos += 1
+            break
+        elif c == '\\':
+            chunks.append(css[start_pos:pos])
+            pos += 1
+            if pos < length:
+                if css[pos] == '\n':  # Ignore escaped newlines
+                    pos += 1
+                else:
+                    c, pos = _consume_escape(css, pos)
+                    chunks.append(c)
+            # else: Escaped EOF, do nothing
+            start_pos = pos
+        elif c == '\n':  # Unescaped newline
+            return None, pos, ('bad-string', 'Bad string token')
+        else:
+            pos += 1
+    else:
+        error = ('eof-in-string', 'EOF in string')
+        chunks.append(css[start_pos:pos])
+    return ''.join(chunks), pos, error
+
+
+def _consume_escape(css, pos):
+    r"""Return (unescaped_char, new_pos).
+
+    Assumes a valid escape: pos is just after '\' and not followed by '\n'.
+
+    """
+    # https://drafts.csswg.org/css-syntax/#consume-an-escaped-character
+    hex_match = _HEX_ESCAPE_RE.match(css, pos)
+    if hex_match:
+        codepoint = int(hex_match.group(1), 16)
+        return (
+            chr(codepoint) if 0 < codepoint <= sys.maxunicode else '\uFFFD',
+            hex_match.end())
+    elif pos < len(css):
+        return css[pos], pos + 1
+    else:
+        return '\uFFFD', pos
+
+
+def _consume_url(css, pos):
+    """Return (unescaped_url, new_pos)
+
+    The given pos is assumed to be just after the '(' of 'url('.
+
+    """
+    error = None
+    length = len(css)
+    # https://drafts.csswg.org/css-syntax/#consume-a-url-token
+    # Skip whitespace
+    while css.startswith((' ', '\n', '\t'), pos):
+        pos += 1
+    if pos >= length:  # EOF
+        return '', pos, ('eof-in-url', 'EOF in URL')
+    c = css[pos]
+    if c in ('"', "'"):
+        value, pos, error = _consume_quoted_string(css, pos)
+    elif c == ')':
+        return '', pos + 1, error
+    else:
+        chunks = []
+        start_pos = pos
+        while 1:
+            if pos >= length:  # EOF
+                chunks.append(css[start_pos:pos])
+                return ''.join(chunks), pos, ('eof-in-url', 'EOF in URL')
+            c = css[pos]
+            if c == ')':
+                chunks.append(css[start_pos:pos])
+                pos += 1
+                return ''.join(chunks), pos, error
+            elif c in ' \n\t':
+                chunks.append(css[start_pos:pos])
+                value = ''.join(chunks)
+                pos += 1
+                break
+            elif c == '\\' and not css.startswith('\\\n', pos):
+                # Valid escape
+                chunks.append(css[start_pos:pos])
+                c, pos = _consume_escape(css, pos + 1)
+                chunks.append(c)
+                start_pos = pos
+            elif (c in
+                  '"\'('
+                  # https://drafts.csswg.org/css-syntax/#non-printable-character
+                  '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0e'
+                  '\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19'
+                  '\x1a\x1b\x1c\x1d\x1e\x1f\x7f'):
+                value = None  # Parse error
+                pos += 1
+                break
+            else:
+                pos += 1
+
+    if value is not None:
+        while css.startswith((' ', '\n', '\t'), pos):
+            pos += 1
+        if pos < length:
+            if css[pos] == ')':
+                return value, pos + 1, error
+        else:
+            if error is None:
+                error = ('eof-in-url', 'EOF in URL')
+            return value, pos, error
+
+    # https://drafts.csswg.org/css-syntax/#consume-the-remnants-of-a-bad-url0
+    while pos < length:
+        if css.startswith('\\)', pos):
+            pos += 2
+        elif css[pos] == ')':
+            pos += 1
+            break
+        else:
+            pos += 1
+    return None, pos, ('bad-url', 'bad URL token')
+
+
+def _consume_unicode_range(css, pos):
+    """Return (range, new_pos)
+
+    The given pos is assume to be just after the '+' of 'U+' or 'u+'.
+
+    """
+    # https://drafts.csswg.org/css-syntax/#consume-a-unicode-range-token
+    length = len(css)
+    start_pos = pos
+    max_pos = min(pos + 6, length)
+    while pos < max_pos and css[pos] in '0123456789abcdefABCDEF':
+        pos += 1
+    start = css[start_pos:pos]
+
+    start_pos = pos
+    # Same max_pos as before: total of hex digits and question marks <= 6
+    while pos < max_pos and css[pos] == '?':
+        pos += 1
+    question_marks = pos - start_pos
+
+    if question_marks:
+        end = start + 'F' * question_marks
+        start = start + '0' * question_marks
+    elif (pos + 1 < length and css[pos] == '-' and
+          css[pos + 1] in '0123456789abcdefABCDEF'):
+        pos += 1
+        start_pos = pos
+        max_pos = min(pos + 6, length)
+        while pos < max_pos and css[pos] in '0123456789abcdefABCDEF':
+            pos += 1
+        end = css[start_pos:pos]
+    else:
+        end = start
+    return int(start, 16), int(end, 16), pos