Browse Source

Intermediate changes
commit_hash:eed9ab338cab60f205ce140295c0393eaee72cfe

robot-piglet 5 months ago
parent
commit
9180a474dc

+ 2 - 2
contrib/libs/croaring/include/roaring/memory.h

@@ -1,12 +1,12 @@
 #ifndef INCLUDE_ROARING_MEMORY_H_
 #define INCLUDE_ROARING_MEMORY_H_
 
+#include <stddef.h>  // for size_t
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stddef.h>  // for size_t
-
 typedef void* (*roaring_malloc_p)(size_t);
 typedef void* (*roaring_realloc_p)(void*, size_t);
 typedef void* (*roaring_calloc_p)(size_t, size_t);

+ 2 - 0
contrib/libs/croaring/include/roaring/portability.h

@@ -585,6 +585,8 @@ static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
 
 #if defined(__GNUC__) || defined(__clang__)
 #define CROARING_DEPRECATED __attribute__((deprecated))
+#elif defined(_MSC_VER)
+#define CROARING_DEPRECATED __declspec(deprecated)
 #else
 #define CROARING_DEPRECATED
 #endif  // defined(__GNUC__) || defined(__clang__)

+ 9 - 0
contrib/libs/croaring/include/roaring/roaring64.h

@@ -1,6 +1,7 @@
 #ifndef ROARING64_H
 #define ROARING64_H
 
+#include <roaring.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
@@ -92,6 +93,14 @@ roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args,
         &((const uint64_t[]){0, __VA_ARGS__})[1])
 #endif
 
+/**
+ * Create a new bitmap by moving containers from a 32 bit roaring bitmap.
+ *
+ * After calling this function, the original bitmap will be empty, and the
+ * returned bitmap will contain all the values from the original bitmap.
+ */
+roaring64_bitmap_t *roaring64_bitmap_move_from_roaring32(roaring_bitmap_t *r);
+
 /**
  * Create a new bitmap containing all the values in [min, max) that are at a
  * distance k*step from min.

+ 2 - 2
contrib/libs/croaring/include/roaring/roaring_version.h

@@ -2,11 +2,11 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand
 #ifndef ROARING_INCLUDE_ROARING_VERSION
 #define ROARING_INCLUDE_ROARING_VERSION
-#define ROARING_VERSION "4.1.1"
+#define ROARING_VERSION "4.1.2"
 enum {
     ROARING_VERSION_MAJOR = 4,
     ROARING_VERSION_MINOR = 1,
-    ROARING_VERSION_REVISION = 1
+    ROARING_VERSION_REVISION = 2
 };
 #endif // ROARING_INCLUDE_ROARING_VERSION
 // clang-format on

+ 39 - 16
contrib/libs/croaring/src/roaring64.c

@@ -178,6 +178,43 @@ roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r) {
     return result;
 }
 
+/**
+ * Steal the containers from a 32-bit bitmap and insert them into a 64-bit
+ * bitmap (with an offset)
+ *
+ * After calling this function, the original bitmap will be empty, and the
+ * returned bitmap will contain all the values from the original bitmap.
+ */
+static void move_from_roaring32_offset(roaring64_bitmap_t *dst,
+                                       roaring_bitmap_t *src,
+                                       uint32_t high_bits) {
+    uint64_t key_base = ((uint64_t)high_bits) << 32;
+    uint32_t r32_size = ra_get_size(&src->high_low_container);
+    for (uint32_t i = 0; i < r32_size; ++i) {
+        uint16_t key = ra_get_key_at_index(&src->high_low_container, i);
+        uint8_t typecode;
+        container_t *container = ra_get_container_at_index(
+            &src->high_low_container, (uint16_t)i, &typecode);
+
+        uint8_t high48[ART_KEY_BYTES];
+        uint64_t high48_bits = key_base | ((uint64_t)key << 16);
+        split_key(high48_bits, high48);
+        leaf_t *leaf = create_leaf(container, typecode);
+        art_insert(&dst->art, high48, (art_val_t *)leaf);
+    }
+    // We stole all the containers, so leave behind a size of zero
+    src->high_low_container.size = 0;
+}
+
+roaring64_bitmap_t *roaring64_bitmap_move_from_roaring32(
+    roaring_bitmap_t *bitmap32) {
+    roaring64_bitmap_t *result = roaring64_bitmap_create();
+
+    move_from_roaring32_offset(result, bitmap32, 0);
+
+    return result;
+}
+
 roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max,
                                                 uint64_t step) {
     if (step == 0 || max <= min) {
@@ -1947,22 +1984,8 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(
         read_bytes += bitmap32_size;
 
         // Insert all containers of the 32-bit bitmap into the 64-bit bitmap.
-        uint32_t r32_size = ra_get_size(&bitmap32->high_low_container);
-        for (size_t i = 0; i < r32_size; ++i) {
-            uint16_t key16 =
-                ra_get_key_at_index(&bitmap32->high_low_container, (uint16_t)i);
-            uint8_t typecode;
-            container_t *container = ra_get_container_at_index(
-                &bitmap32->high_low_container, (uint16_t)i, &typecode);
-
-            uint64_t high48_bits =
-                (((uint64_t)high32) << 32) | (((uint64_t)key16) << 16);
-            uint8_t high48[ART_KEY_BYTES];
-            split_key(high48_bits, high48);
-            leaf_t *leaf = create_leaf(container, typecode);
-            art_insert(&r->art, high48, (art_val_t *)leaf);
-        }
-        roaring_bitmap_free_without_containers(bitmap32);
+        move_from_roaring32_offset(r, bitmap32, high32);
+        roaring_bitmap_free(bitmap32);
     }
     return r;
 }

+ 2 - 2
contrib/libs/croaring/ya.make

@@ -10,9 +10,9 @@ LICENSE(
 
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
-VERSION(4.1.1)
+VERSION(4.1.2)
 
-ORIGINAL_SOURCE(https://github.com/RoaringBitmap/CRoaring/archive/v4.1.1.tar.gz)
+ORIGINAL_SOURCE(https://github.com/RoaringBitmap/CRoaring/archive/v4.1.2.tar.gz)
 
 ADDINCL(
     GLOBAL contrib/libs/croaring/include

+ 1 - 1
contrib/python/hypothesis/py3/.dist-info/METADATA

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: hypothesis
-Version: 6.111.2
+Version: 6.112.0
 Summary: A library for property-based testing
 Home-page: https://hypothesis.works
 Author: David R. MacIver and Zac Hatfield-Dodds

+ 58 - 28
contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py

@@ -111,11 +111,12 @@ class FloatKWargs(TypedDict):
 class StringKWargs(TypedDict):
     intervals: IntervalSet
     min_size: int
-    max_size: Optional[int]
+    max_size: int
 
 
 class BytesKWargs(TypedDict):
-    size: int
+    min_size: int
+    max_size: int
 
 
 class BooleanKWargs(TypedDict):
@@ -206,7 +207,7 @@ NASTY_FLOATS.extend([-x for x in NASTY_FLOATS])
 FLOAT_INIT_LOGIC_CACHE = LRUCache(4096)
 POOLED_KWARGS_CACHE = LRUCache(4096)
 
-DRAW_STRING_DEFAULT_MAX_SIZE = 10**10  # "arbitrarily large"
+COLLECTION_DEFAULT_MAX_SIZE = 10**10  # "arbitrarily large"
 
 
 class Example:
@@ -1036,7 +1037,7 @@ class IRNode:
             return self.value == (minimal_char * self.kwargs["min_size"])
         if self.ir_type == "bytes":
             # smallest size and all-zero value.
-            return len(self.value) == self.kwargs["size"] and not any(self.value)
+            return len(self.value) == self.kwargs["min_size"] and not any(self.value)
 
         raise NotImplementedError(f"unhandled ir_type {self.ir_type}")
 
@@ -1095,7 +1096,9 @@ def ir_value_permitted(value, ir_type, kwargs):
             return False
         return all(ord(c) in kwargs["intervals"] for c in value)
     elif ir_type == "bytes":
-        return len(value) == kwargs["size"]
+        if len(value) < kwargs["min_size"]:
+            return False
+        return kwargs["max_size"] is None or len(value) <= kwargs["max_size"]
     elif ir_type == "boolean":
         if kwargs["p"] <= 2 ** (-64):
             return value is False
@@ -1314,7 +1317,7 @@ class PrimitiveProvider(abc.ABC):
         intervals: IntervalSet,
         *,
         min_size: int = 0,
-        max_size: Optional[int] = None,
+        max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
         forced: Optional[str] = None,
         fake_forced: bool = False,
     ) -> str:
@@ -1322,7 +1325,12 @@ class PrimitiveProvider(abc.ABC):
 
     @abc.abstractmethod
     def draw_bytes(
-        self, size: int, *, forced: Optional[bytes] = None, fake_forced: bool = False
+        self,
+        min_size: int = 0,
+        max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
+        *,
+        forced: Optional[bytes] = None,
+        fake_forced: bool = False,
     ) -> bytes:
         raise NotImplementedError
 
@@ -1606,14 +1614,10 @@ class HypothesisProvider(PrimitiveProvider):
         intervals: IntervalSet,
         *,
         min_size: int = 0,
-        max_size: Optional[int] = None,
+        max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
         forced: Optional[str] = None,
         fake_forced: bool = False,
     ) -> str:
-        if max_size is None:
-            max_size = DRAW_STRING_DEFAULT_MAX_SIZE
-
-        assert forced is None or min_size <= len(forced) <= max_size
         assert self._cd is not None
 
         average_size = min(
@@ -1663,17 +1667,40 @@ class HypothesisProvider(PrimitiveProvider):
         return "".join(chars)
 
     def draw_bytes(
-        self, size: int, *, forced: Optional[bytes] = None, fake_forced: bool = False
+        self,
+        min_size: int = 0,
+        max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
+        *,
+        forced: Optional[bytes] = None,
+        fake_forced: bool = False,
     ) -> bytes:
-        forced_i = None
-        if forced is not None:
-            forced_i = int_from_bytes(forced)
-            size = len(forced)
-
         assert self._cd is not None
-        return self._cd.draw_bits(
-            8 * size, forced=forced_i, fake_forced=fake_forced
-        ).to_bytes(size, "big")
+
+        buf = bytearray()
+        average_size = min(
+            max(min_size * 2, min_size + 5),
+            0.5 * (min_size + max_size),
+        )
+        elements = many(
+            self._cd,
+            min_size=min_size,
+            max_size=max_size,
+            average_size=average_size,
+            forced=None if forced is None else len(forced),
+            fake_forced=fake_forced,
+            observe=False,
+        )
+        while elements.more():
+            forced_i: Optional[int] = None
+            if forced is not None:
+                # implicit conversion from bytes to int by indexing here
+                forced_i = forced[elements.count - 1]
+
+            buf += self._cd.draw_bits(
+                8, forced=forced_i, fake_forced=fake_forced
+            ).to_bytes(1, "big")
+
+        return bytes(buf)
 
     def _draw_float(
         self,
@@ -2216,12 +2243,13 @@ class ConjectureData:
         intervals: IntervalSet,
         *,
         min_size: int = 0,
-        max_size: Optional[int] = None,
+        max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
         forced: Optional[str] = None,
         fake_forced: bool = False,
         observe: bool = True,
     ) -> str:
-        assert forced is None or min_size <= len(forced)
+        assert forced is None or min_size <= len(forced) <= max_size
+        assert min_size >= 0
 
         kwargs: StringKWargs = self._pooled_kwargs(
             "string",
@@ -2255,17 +2283,19 @@ class ConjectureData:
 
     def draw_bytes(
         self,
-        # TODO move to min_size and max_size here.
-        size: int,
+        min_size: int = 0,
+        max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
         *,
         forced: Optional[bytes] = None,
         fake_forced: bool = False,
         observe: bool = True,
     ) -> bytes:
-        assert forced is None or len(forced) == size
-        assert size >= 0
+        assert forced is None or min_size <= len(forced) <= max_size
+        assert min_size >= 0
 
-        kwargs: BytesKWargs = self._pooled_kwargs("bytes", {"size": size})
+        kwargs: BytesKWargs = self._pooled_kwargs(
+            "bytes", {"min_size": min_size, "max_size": max_size}
+        )
 
         if self.ir_tree_nodes is not None and observe:
             node_value = self._pop_ir_tree_node("bytes", kwargs, forced=forced)

+ 35 - 36
contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py

@@ -146,9 +146,31 @@ class Conclusion:
 MAX_CHILDREN_EFFECTIVELY_INFINITE = 100_000
 
 
-def compute_max_children(ir_type, kwargs):
-    from hypothesis.internal.conjecture.data import DRAW_STRING_DEFAULT_MAX_SIZE
+def _count_distinct_strings(*, alphabet_size, min_size, max_size):
+    # We want to estimate if we're going to have more children than
+    # MAX_CHILDREN_EFFECTIVELY_INFINITE, without computing a potentially
+    # extremely expensive pow. We'll check if the number of strings in
+    # the largest string size alone is enough to put us over this limit.
+    # We'll also employ a trick of estimating against log, which is cheaper
+    # than computing a pow.
+    #
+    # x = max_size
+    # y = alphabet_size
+    # n = MAX_CHILDREN_EFFECTIVELY_INFINITE
+    #
+    #     x**y > n
+    # <=> log(x**y)  > log(n)
+    # <=> y * log(x) > log(n)
+    definitely_too_large = max_size * math.log(alphabet_size) > math.log(
+        MAX_CHILDREN_EFFECTIVELY_INFINITE
+    )
+    if definitely_too_large:
+        return MAX_CHILDREN_EFFECTIVELY_INFINITE
 
+    return sum(alphabet_size**k for k in range(min_size, max_size + 1))
+
+
+def compute_max_children(ir_type, kwargs):
     if ir_type == "integer":
         min_value = kwargs["min_value"]
         max_value = kwargs["max_value"]
@@ -178,50 +200,27 @@ def compute_max_children(ir_type, kwargs):
             return 1
         return 2
     elif ir_type == "bytes":
-        return 2 ** (8 * kwargs["size"])
+        return _count_distinct_strings(
+            alphabet_size=2**8, min_size=kwargs["min_size"], max_size=kwargs["max_size"]
+        )
     elif ir_type == "string":
         min_size = kwargs["min_size"]
         max_size = kwargs["max_size"]
         intervals = kwargs["intervals"]
 
-        if max_size is None:
-            max_size = DRAW_STRING_DEFAULT_MAX_SIZE
-
         if len(intervals) == 0:
             # Special-case the empty alphabet to avoid an error in math.log(0).
             # Only possibility is the empty string.
             return 1
 
-        # We want to estimate if we're going to have more children than
-        # MAX_CHILDREN_EFFECTIVELY_INFINITE, without computing a potentially
-        # extremely expensive pow. We'll check if the number of strings in
-        # the largest string size alone is enough to put us over this limit.
-        # We'll also employ a trick of estimating against log, which is cheaper
-        # than computing a pow.
-        #
-        # x = max_size
-        # y = len(intervals)
-        # n = MAX_CHILDREN_EFFECTIVELY_INFINITE
-        #
-        #     x**y > n
-        # <=> log(x**y)  > log(n)
-        # <=> y * log(x) > log(n)
-
-        # avoid math.log(1) == 0 and incorrectly failing the below estimate,
-        # even when we definitely are too large.
-        if len(intervals) == 1:
-            definitely_too_large = max_size > MAX_CHILDREN_EFFECTIVELY_INFINITE
-        else:
-            definitely_too_large = max_size * math.log(len(intervals)) > math.log(
-                MAX_CHILDREN_EFFECTIVELY_INFINITE
-            )
-
-        if definitely_too_large:
+        # avoid math.log(1) == 0 and incorrectly failing our effectively_infinite
+        # estimate, even when we definitely are too large.
+        if len(intervals) == 1 and max_size > MAX_CHILDREN_EFFECTIVELY_INFINITE:
             return MAX_CHILDREN_EFFECTIVELY_INFINITE
 
-        # number of strings of length k, for each k in [min_size, max_size].
-        return sum(len(intervals) ** k for k in range(min_size, max_size + 1))
-
+        return _count_distinct_strings(
+            alphabet_size=len(intervals), min_size=min_size, max_size=max_size
+        )
     elif ir_type == "float":
         min_value = kwargs["min_value"]
         max_value = kwargs["max_value"]
@@ -306,8 +305,8 @@ def all_children(ir_type, kwargs):
         else:
             yield from [False, True]
     if ir_type == "bytes":
-        size = kwargs["size"]
-        yield from (int_to_bytes(i, size) for i in range(2 ** (8 * size)))
+        for size in range(kwargs["min_size"], kwargs["max_size"] + 1):
+            yield from (int_to_bytes(i, size) for i in range(2 ** (8 * size)))
     if ir_type == "string":
         min_size = kwargs["min_size"]
         max_size = kwargs["max_size"]

+ 2 - 3
contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py

@@ -1075,10 +1075,9 @@ class Shrinker:
                 return False  # pragma: no cover
 
             if node.ir_type in {"string", "bytes"}:
-                size_kwarg = "min_size" if node.ir_type == "string" else "size"
                 # if the size *increased*, we would have to guess what to pad with
                 # in order to try fixing up this attempt. Just give up.
-                if node.kwargs[size_kwarg] <= attempt_kwargs[size_kwarg]:
+                if node.kwargs["min_size"] <= attempt_kwargs["min_size"]:
                     return False
                 # the size decreased in our attempt. Try again, but replace with
                 # the min_size that we would have gotten, and truncate the value
@@ -1089,7 +1088,7 @@ class Shrinker:
                         initial_attempt[node.index].copy(
                             with_kwargs=attempt_kwargs,
                             with_value=initial_attempt[node.index].value[
-                                : attempt_kwargs[size_kwarg]
+                                : attempt_kwargs["min_size"]
                             ],
                         )
                     ]

Some files were not shown because too many files changed in this diff