Browse Source

Intermediate changes

robot-piglet 1 year ago
parent
commit
1f0341f177

+ 1 - 1
contrib/python/hypothesis/py3/.dist-info/METADATA

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: hypothesis
-Version: 6.92.9
+Version: 6.93.0
 Summary: A library for property-based testing
 Home-page: https://hypothesis.works
 Author: David R. MacIver and Zac Hatfield-Dodds

+ 51 - 29
contrib/python/hypothesis/py3/hypothesis/extra/lark.py

@@ -34,6 +34,7 @@ from hypothesis import strategies as st
 from hypothesis.errors import InvalidArgument
 from hypothesis.internal.conjecture.utils import calc_label_from_name
 from hypothesis.internal.validation import check_type
+from hypothesis.strategies._internal.regex import IncompatibleWithAlphabet
 from hypothesis.strategies._internal.utils import cacheable, defines_strategy
 
 __all__ = ["from_lark"]
@@ -59,7 +60,7 @@ class LarkStrategy(st.SearchStrategy):
     See ``from_lark`` for details.
     """
 
-    def __init__(self, grammar, start, explicit):
+    def __init__(self, grammar, start, explicit, alphabet):
         assert isinstance(grammar, lark.lark.Lark)
         if start is None:
             start = grammar.options.start
@@ -86,38 +87,66 @@ class LarkStrategy(st.SearchStrategy):
             t = r.origin
             self.names_to_symbols[t.name] = t
 
+        disallowed = set()
+        self.terminal_strategies = {}
         for t in terminals:
             self.names_to_symbols[t.name] = Terminal(t.name)
-
-        self.start = st.sampled_from([self.names_to_symbols[s] for s in start])
+            s = st.from_regex(t.pattern.to_regexp(), fullmatch=True, alphabet=alphabet)
+            try:
+                s.validate()
+            except IncompatibleWithAlphabet:
+                disallowed.add(t.name)
+            else:
+                self.terminal_strategies[t.name] = s
 
         self.ignored_symbols = tuple(self.names_to_symbols[n] for n in ignore_names)
 
-        self.terminal_strategies = {
-            t.name: st.from_regex(t.pattern.to_regexp(), fullmatch=True)
-            for t in terminals
-        }
-        unknown_explicit = set(explicit) - get_terminal_names(
-            terminals, rules, ignore_names
-        )
-        if unknown_explicit:
+        all_terminals = get_terminal_names(terminals, rules, ignore_names)
+        if unknown_explicit := sorted(set(explicit) - all_terminals):
+            raise InvalidArgument(
+                "The following arguments were passed as explicit_strategies, but "
+                f"there is no {unknown_explicit} terminal production in this grammar."
+            )
+        if missing_declared := sorted(
+            all_terminals - {t.name for t in terminals} - set(explicit)
+        ):
             raise InvalidArgument(
-                "The following arguments were passed as explicit_strategies, "
-                "but there is no such terminal production in this grammar: "
-                + repr(sorted(unknown_explicit))
+                f"Undefined terminal{'s' * (len(missing_declared) > 1)} "
+                f"{sorted(missing_declared)!r}. Generation does not currently "
+                "support use of %declare unless you pass `explicit`, a dict of "
+                f"names-to-strategies, such as `{{{missing_declared[0]!r}: "
+                'st.just("")}}`'
             )
         self.terminal_strategies.update(explicit)
 
         nonterminals = {}
 
         for rule in rules:
-            nonterminals.setdefault(rule.origin.name, []).append(tuple(rule.expansion))
-
-        for v in nonterminals.values():
-            v.sort(key=len)
+            if disallowed.isdisjoint(r.name for r in rule.expansion):
+                nonterminals.setdefault(rule.origin.name, []).append(
+                    tuple(rule.expansion)
+                )
+
+        allowed_rules = {*self.terminal_strategies, *nonterminals}
+        while dict(nonterminals) != (
+            nonterminals := {
+                k: clean
+                for k, v in nonterminals.items()
+                if (clean := [x for x in v if all(r.name in allowed_rules for r in x)])
+            }
+        ):
+            allowed_rules = {*self.terminal_strategies, *nonterminals}
+
+        if set(start).isdisjoint(allowed_rules):
+            raise InvalidArgument(
+                f"No start rule {tuple(start)} is allowed by {alphabet=}"
+            )
+        self.start = st.sampled_from(
+            [self.names_to_symbols[s] for s in start if s in allowed_rules]
+        )
 
         self.nonterminal_strategies = {
-            k: st.sampled_from(v) for k, v in nonterminals.items()
+            k: st.sampled_from(sorted(v, key=len)) for k, v in nonterminals.items()
         }
 
         self.__rule_labels = {}
@@ -138,15 +167,7 @@ class LarkStrategy(st.SearchStrategy):
 
     def draw_symbol(self, data, symbol, draw_state):
         if isinstance(symbol, Terminal):
-            try:
-                strategy = self.terminal_strategies[symbol.name]
-            except KeyError:
-                raise InvalidArgument(
-                    "Undefined terminal %r. Generation does not currently support "
-                    "use of %%declare unless you pass `explicit`, a dict of "
-                    'names-to-strategies, such as `{%r: st.just("")}`'
-                    % (symbol.name, symbol.name)
-                ) from None
+            strategy = self.terminal_strategies[symbol.name]
             draw_state.append(data.draw(strategy))
         else:
             assert isinstance(symbol, NonTerminal)
@@ -181,6 +202,7 @@ def from_lark(
     *,
     start: Optional[str] = None,
     explicit: Optional[Dict[str, st.SearchStrategy[str]]] = None,
+    alphabet: st.SearchStrategy[str] = st.characters(codec="utf-8"),
 ) -> st.SearchStrategy[str]:
     """A strategy for strings accepted by the given context-free grammar.
 
@@ -214,4 +236,4 @@ def from_lark(
             k: v.map(check_explicit(f"explicit[{k!r}]={v!r}"))
             for k, v in explicit.items()
         }
-    return LarkStrategy(grammar, start, explicit)
+    return LarkStrategy(grammar, start, explicit, alphabet)

+ 38 - 10
contrib/python/hypothesis/py3/hypothesis/strategies/_internal/regex.py

@@ -63,6 +63,10 @@ GROUP_CACHE_STRATEGY: st.SearchStrategy[dict] = st.shared(
 )
 
 
+class IncompatibleWithAlphabet(InvalidArgument):
+    pass
+
+
 @st.composite
 def update_group(draw, group_name, strategy):
     cache = draw(GROUP_CACHE_STRATEGY)
@@ -176,11 +180,8 @@ class CharactersBuilder:
         else:
             raise NotImplementedError(f"Unknown character category: {category}")
 
-    def add_char(self, char, *, check=True):
+    def add_char(self, c):
         """Add given char to the whitelist."""
-        c = self.code_to_char(char)
-        if check and chars_not_in_alphabet(self._alphabet, c):
-            raise InvalidArgument(f"Literal {c!r} is not in the specified alphabet")
         self._whitelist_chars.add(c)
         if (
             self._ignorecase
@@ -363,7 +364,7 @@ def _strategy(codes, context, is_unicode, *, alphabet):
                 if i + 1 < j:
                     chars = empty.join(to_char(charcode) for _, charcode in codes[i:j])
                     if invalid := chars_not_in_alphabet(alphabet, chars):
-                        raise InvalidArgument(
+                        raise IncompatibleWithAlphabet(
                             f"Literal {chars!r} contains characters {invalid!r} "
                             f"which are not in the specified alphabet"
                         )
@@ -389,7 +390,9 @@ def _strategy(codes, context, is_unicode, *, alphabet):
             # Regex 'a' (single char)
             c = to_char(value)
             if chars_not_in_alphabet(alphabet, c):
-                raise InvalidArgument(f"Literal {c!r} is not in the specified alphabet")
+                raise IncompatibleWithAlphabet(
+                    f"Literal {c!r} is not in the specified alphabet"
+                )
             if (
                 context.flags & re.IGNORECASE
                 and c != c.swapcase()
@@ -451,12 +454,28 @@ def _strategy(codes, context, is_unicode, *, alphabet):
                     pass
                 elif charset_code == sre.LITERAL:
                     # Regex '[a]' (single char)
-                    builder.add_char(charset_value)
+                    c = builder.code_to_char(charset_value)
+                    if chars_not_in_alphabet(builder._alphabet, c):
+                        raise IncompatibleWithAlphabet(
+                            f"Literal {c!r} is not in the specified alphabet"
+                        )
+                    builder.add_char(c)
                 elif charset_code == sre.RANGE:
                     # Regex '[a-z]' (char range)
                     low, high = charset_value
-                    for char_code in range(low, high + 1):
-                        builder.add_char(char_code, check=char_code in (low, high))
+                    chars = empty.join(map(builder.code_to_char, range(low, high + 1)))
+                    if len(chars) == len(
+                        invalid := set(chars_not_in_alphabet(alphabet, chars))
+                    ):
+                        raise IncompatibleWithAlphabet(
+                            f"Charset '[{chr(low)}-{chr(high)}]' contains characters {invalid!r} "
+                            f"which are not in the specified alphabet"
+                        )
+                    for c in chars:
+                        if isinstance(c, int):
+                            c = int_to_byte(c)
+                        if c not in invalid:
+                            builder.add_char(c)
                 elif charset_code == sre.CATEGORY:
                     # Regex '[\w]' (char category)
                     builder.add_category(charset_value)
@@ -515,7 +534,16 @@ def _strategy(codes, context, is_unicode, *, alphabet):
 
         elif code == sre.BRANCH:
             # Regex 'a|b|c' (branch)
-            return st.one_of([recurse(branch) for branch in value[1]])
+            branches = []
+            errors = []
+            for branch in value[1]:
+                try:
+                    branches.append(recurse(branch))
+                except IncompatibleWithAlphabet as e:
+                    errors.append(str(e))
+            if errors and not branches:
+                raise IncompatibleWithAlphabet("\n".join(errors))
+            return st.one_of(branches)
 
         elif code in [sre.MIN_REPEAT, sre.MAX_REPEAT, POSSESSIVE_REPEAT]:
             # Regexes 'a?', 'a*', 'a+' and their non-greedy variants

+ 1 - 1
contrib/python/hypothesis/py3/hypothesis/version.py

@@ -8,5 +8,5 @@
 # v. 2.0. If a copy of the MPL was not distributed with this file, You can
 # obtain one at https://mozilla.org/MPL/2.0/.
 
-__version_info__ = (6, 92, 9)
+__version_info__ = (6, 93, 0)
 __version__ = ".".join(map(str, __version_info__))

+ 1 - 1
contrib/python/hypothesis/py3/ya.make

@@ -2,7 +2,7 @@
 
 PY3_LIBRARY()
 
-VERSION(6.92.9)
+VERSION(6.93.0)
 
 LICENSE(MPL-2.0)
 

+ 2 - 2
yt/yt/build/ya.make

@@ -2,8 +2,8 @@ LIBRARY()
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-SET(YT_VERSION_MAJOR 23)
-SET(YT_VERSION_MINOR 3)
+SET(YT_VERSION_MAJOR 24)
+SET(YT_VERSION_MINOR 1)
 
 DEFAULT(YT_VERSION_PATCH 0)
 DEFAULT(YT_VERSION_BRANCH "local")