# Forked from cssselect2, 1.2.1, BSD License """Parse CSS declarations.""" from tinycss2 import parse_component_value_list __all__ = ["parse"] SUPPORTED_PSEUDO_ELEMENTS = { # As per CSS Pseudo-Elements Module Level 4 "first-line", "first-letter", "prefix", "postfix", "selection", "target-text", "spelling-error", "grammar-error", "before", "after", "marker", "placeholder", "file-selector-button", # As per CSS Generated Content for Paged Media Module "footnote-call", "footnote-marker", # As per CSS Scoping Module Level 1 "content", "shadow", } def parse(input, namespaces=None, forgiving=False, relative=False): """Yield tinycss2 selectors found in given ``input``. :param input: A string, or an iterable of tinycss2 component values. """ if isinstance(input, str): input = parse_component_value_list(input) tokens = TokenStream(input) namespaces = namespaces or {} try: yield parse_selector(tokens, namespaces, relative) except SelectorError as exception: if forgiving: return raise exception while 1: next = tokens.next() if next is None: return elif next == ",": try: yield parse_selector(tokens, namespaces, relative) except SelectorError as exception: if not forgiving: raise exception else: if not forgiving: raise SelectorError(next, f"unexpected {next.type} token.") def parse_selector(tokens, namespaces, relative=False): tokens.skip_whitespace_and_comment() if relative: peek = tokens.peek() if peek in (">", "+", "~"): initial_combinator = peek.value tokens.next() else: initial_combinator = " " tokens.skip_whitespace_and_comment() result, pseudo_element = parse_compound_selector(tokens, namespaces) while 1: has_whitespace = tokens.skip_whitespace() while tokens.skip_comment(): has_whitespace = tokens.skip_whitespace() or has_whitespace selector = Selector(result, pseudo_element) if relative: selector = RelativeSelector(initial_combinator, selector) if pseudo_element is not None: return selector peek = tokens.peek() if peek is None or peek == ",": return selector elif peek in (">", "+", "~"): combinator = peek.value tokens.next() elif has_whitespace: combinator = " " else: return selector compound, pseudo_element = parse_compound_selector(tokens, namespaces) result = CombinedSelector(result, combinator, compound) def parse_compound_selector(tokens, namespaces): type_selectors = parse_type_selector(tokens, namespaces) simple_selectors = type_selectors if type_selectors is not None else [] while 1: simple_selector, pseudo_element = parse_simple_selector(tokens, namespaces) if pseudo_element is not None or simple_selector is None: break simple_selectors.append(simple_selector) if simple_selectors or (type_selectors, pseudo_element) != (None, None): return CompoundSelector(simple_selectors), pseudo_element peek = tokens.peek() peek_type = peek.type if peek else "EOF" raise SelectorError(peek, f"expected a compound selector, got {peek_type}") def parse_type_selector(tokens, namespaces): tokens.skip_whitespace() qualified_name = parse_qualified_name(tokens, namespaces) if qualified_name is None: return None simple_selectors = [] namespace, local_name = qualified_name if local_name is not None: simple_selectors.append(LocalNameSelector(local_name)) if namespace is not None: simple_selectors.append(NamespaceSelector(namespace)) return simple_selectors def parse_simple_selector(tokens, namespaces): peek = tokens.peek() if peek is None: return None, None if peek.type == "hash" and peek.is_identifier: tokens.next() return IDSelector(peek.value), None elif peek == ".": tokens.next() next = tokens.next() if next is None or next.type != "ident": raise SelectorError(next, f"Expected a class name, got {next}") return ClassSelector(next.value), None elif peek.type == "[] block": tokens.next() attr = parse_attribute_selector(TokenStream(peek.content), namespaces) return attr, None elif peek == ":": tokens.next() next = tokens.next() if next == ":": next = tokens.next() if next is None or next.type != "ident": raise SelectorError(next, f"Expected a pseudo-element name, got {next}") value = next.lower_value if value not in SUPPORTED_PSEUDO_ELEMENTS: raise SelectorError( next, f"Expected a supported pseudo-element, got {value}" ) return None, value elif next is not None and next.type == "ident": name = next.lower_value if name in ("before", "after", "first-line", "first-letter"): return None, name else: return PseudoClassSelector(name), None elif next is not None and next.type == "function": name = next.lower_name if name in ("is", "where", "not", "has"): return parse_logical_combination(next, namespaces, name), None else: return (FunctionalPseudoClassSelector(name, next.arguments), None) else: raise SelectorError(next, f"unexpected {next} token.") else: return None, None def parse_logical_combination(matches_any_token, namespaces, name): forgiving = True relative = False if name == "is": selector_class = MatchesAnySelector elif name == "where": selector_class = SpecificityAdjustmentSelector elif name == "not": forgiving = False selector_class = NegationSelector elif name == "has": relative = True selector_class = RelationalSelector selectors = [ selector for selector in parse( matches_any_token.arguments, namespaces, forgiving, relative ) if selector.pseudo_element is None ] return selector_class(selectors) def parse_attribute_selector(tokens, namespaces): tokens.skip_whitespace() qualified_name = parse_qualified_name(tokens, namespaces, is_attribute=True) if qualified_name is None: next = tokens.next() raise SelectorError(next, f"expected attribute name, got {next}") namespace, local_name = qualified_name tokens.skip_whitespace() peek = tokens.peek() if peek is None: operator = None value = None elif peek in ("=", "~=", "|=", "^=", "$=", "*="): operator = peek.value tokens.next() tokens.skip_whitespace() next = tokens.next() if next is None or next.type not in ("ident", "string"): next_type = "None" if next is None else next.type raise SelectorError(next, f"expected attribute value, got {next_type}") value = next.value else: raise SelectorError(peek, f"expected attribute selector operator, got {peek}") tokens.skip_whitespace() next = tokens.next() case_sensitive = None if next is not None: if next.type == "ident" and next.value.lower() == "i": case_sensitive = False elif next.type == "ident" and next.value.lower() == "s": case_sensitive = True else: raise SelectorError(next, f"expected ], got {next.type}") return AttributeSelector(namespace, local_name, operator, value, case_sensitive) def parse_qualified_name(tokens, namespaces, is_attribute=False): """Return ``(namespace, local)`` for given tokens. Can also return ``None`` for a wildcard. The empty string for ``namespace`` means "no namespace". """ peek = tokens.peek() if peek is None: return None if peek.type == "ident": first_ident = tokens.next() peek = tokens.peek() if peek != "|": namespace = "" if is_attribute else namespaces.get(None, None) return namespace, (first_ident.value, first_ident.lower_value) tokens.next() namespace = namespaces.get(first_ident.value) if namespace is None: raise SelectorError( first_ident, f"undefined namespace prefix: {first_ident.value}" ) elif peek == "*": next = tokens.next() peek = tokens.peek() if peek != "|": if is_attribute: raise SelectorError(next, f"expected local name, got {next.type}") return namespaces.get(None, None), None tokens.next() namespace = None elif peek == "|": tokens.next() namespace = "" else: return None # If we get here, we just consumed '|' and set ``namespace`` next = tokens.next() if next.type == "ident": return namespace, (next.value, next.lower_value) elif next == "*" and not is_attribute: return namespace, None else: raise SelectorError(next, f"expected local name, got {next.type}") class SelectorError(ValueError): """A specialized ``ValueError`` for invalid selectors.""" class TokenStream: def __init__(self, tokens): self.tokens = iter(tokens) self.peeked = [] # In reversed order def next(self): if self.peeked: return self.peeked.pop() else: return next(self.tokens, None) def peek(self): if not self.peeked: self.peeked.append(next(self.tokens, None)) return self.peeked[-1] def skip(self, skip_types): found = False while 1: peek = self.peek() if peek is None or peek.type not in skip_types: break self.next() found = True return found def skip_whitespace(self): return self.skip(["whitespace"]) def skip_comment(self): return self.skip(["comment"]) def skip_whitespace_and_comment(self): return self.skip(["comment", "whitespace"]) class Selector: def __init__(self, tree, pseudo_element=None): self.parsed_tree = tree self.pseudo_element = pseudo_element if pseudo_element is None: #: Tuple of 3 integers: http://www.w3.org/TR/selectors/#specificity self.specificity = tree.specificity else: a, b, c = tree.specificity self.specificity = a, b, c + 1 def __repr__(self): pseudo = f"::{self.pseudo_element}" if self.pseudo_element else "" return f"{self.parsed_tree!r}{pseudo}" class RelativeSelector: def __init__(self, combinator, selector): self.combinator = combinator self.selector = selector @property def specificity(self): return self.selector.specificity @property def pseudo_element(self): return self.selector.pseudo_element def __repr__(self): return ( f"{self.selector!r}" if self.combinator == " " else f"{self.combinator} {self.selector!r}" ) class CombinedSelector: def __init__(self, left, combinator, right): #: Combined or compound selector self.left = left # One of `` `` (a single space), ``>``, ``+`` or ``~``. self.combinator = combinator #: compound selector self.right = right @property def specificity(self): a1, b1, c1 = self.left.specificity a2, b2, c2 = self.right.specificity return a1 + a2, b1 + b2, c1 + c2 def __repr__(self): return f"{self.left!r}{self.combinator}{self.right!r}" class CompoundSelector: def __init__(self, simple_selectors): self.simple_selectors = simple_selectors @property def specificity(self): if self.simple_selectors: # zip(*foo) turns [(a1, b1, c1), (a2, b2, c2), ...] # into [(a1, a2, ...), (b1, b2, ...), (c1, c2, ...)] return tuple( map(sum, zip(*(sel.specificity for sel in self.simple_selectors))) ) else: return 0, 0, 0 def __repr__(self): return "".join(map(repr, self.simple_selectors)) class LocalNameSelector: specificity = 0, 0, 1 def __init__(self, local_name): self.local_name, self.lower_local_name = local_name def __repr__(self): return self.local_name class NamespaceSelector: specificity = 0, 0, 0 def __init__(self, namespace): #: The namespace URL as a string, #: or the empty string for elements not in any namespace. self.namespace = namespace def __repr__(self): if self.namespace == "": return "|" else: return f"{{{self.namespace}}}|" class IDSelector: specificity = 1, 0, 0 def __init__(self, ident): self.ident = ident def __repr__(self): return f"#{self.ident}" class ClassSelector: specificity = 0, 1, 0 def __init__(self, class_name): self.class_name = class_name def __repr__(self): return f".{self.class_name}" class AttributeSelector: specificity = 0, 1, 0 def __init__(self, namespace, name, operator, value, case_sensitive): self.namespace = namespace self.name, self.lower_name = name #: A string like ``=`` or ``~=``, or None for ``[attr]`` selectors self.operator = operator #: A string, or None for ``[attr]`` selectors self.value = value #: ``True`` if case-sensitive, ``False`` if case-insensitive, ``None`` #: if depends on the document language self.case_sensitive = case_sensitive def __repr__(self): namespace = "*|" if self.namespace is None else f"{{{self.namespace}}}" case_sensitive = ( "" if self.case_sensitive is None else f" {'s' if self.case_sensitive else 'i'}" ) return f"[{namespace}{self.name}{self.operator}{self.value!r}{case_sensitive}]" class PseudoClassSelector: specificity = 0, 1, 0 def __init__(self, name): self.name = name def __repr__(self): return ":" + self.name class FunctionalPseudoClassSelector: specificity = 0, 1, 0 def __init__(self, name, arguments): self.name = name self.arguments = arguments def __repr__(self): return f":{self.name}{tuple(self.arguments)!r}" class NegationSelector: def __init__(self, selector_list): self.selector_list = selector_list @property def specificity(self): if self.selector_list: return max(selector.specificity for selector in self.selector_list) else: return (0, 0, 0) def __repr__(self): return f":not({', '.join(repr(sel) for sel in self.selector_list)})" class RelationalSelector: def __init__(self, selector_list): self.selector_list = selector_list @property def specificity(self): if self.selector_list: return max(selector.specificity for selector in self.selector_list) else: return (0, 0, 0) def __repr__(self): return f":has({', '.join(repr(sel) for sel in self.selector_list)})" class MatchesAnySelector: def __init__(self, selector_list): self.selector_list = selector_list @property def specificity(self): if self.selector_list: return max(selector.specificity for selector in self.selector_list) else: return (0, 0, 0) def __repr__(self): return f":is({', '.join(repr(sel) for sel in self.selector_list)})" class SpecificityAdjustmentSelector: def __init__(self, selector_list): self.selector_list = selector_list @property def specificity(self): return (0, 0, 0) def __repr__(self): return f":where({', '.join(repr(sel) for sel in self.selector_list)})"