Skip to content

Commit

Permalink
Mark failures for crosshair to fix?
Browse files Browse the repository at this point in the history
  • Loading branch information
Zac-HD committed Jan 26, 2025
1 parent 8de04da commit 4ff8bc3
Show file tree
Hide file tree
Showing 21 changed files with 45 additions and 4 deletions.
3 changes: 2 additions & 1 deletion hypothesis-python/tests/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ class Why(enum.Enum):

# nested_given: https://github.com/pschanely/hypothesis-crosshair/issues/11
nested_given = "nested @given decorators don't work with crosshair"
undiscovered = "crosshair may not find the failing input"
other = "reasons not elsewhere categorized"


Expand All @@ -276,7 +277,7 @@ def xfail_on_crosshair(why: Why, /, *, strict=True, as_marks=False):

current_backend = settings.get_profile(settings._current_profile).backend
kw = {
"strict": strict,
"strict": strict and why != Why.undiscovered,
"reason": f"Expected failure due to: {why.value}",
"condition": current_backend == "crosshair",
}
Expand Down
2 changes: 2 additions & 0 deletions hypothesis-python/tests/cover/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from hypothesis.strategies import dates, datetimes, timedeltas, times

from tests.common.debug import assert_simple_property, find_any, minimal
from tests.common.utils import Why, xfail_on_crosshair


def test_can_find_positive_delta():
Expand Down Expand Up @@ -104,6 +105,7 @@ def test_single_date(val):
assert find_any(dates(val, val)) is val


@xfail_on_crosshair(Why.undiscovered)
def test_can_find_midnight():
find_any(times(), lambda x: x.hour == x.minute == x.second == 0)

Expand Down
1 change: 1 addition & 0 deletions hypothesis-python/tests/cover/test_filter_rewriting.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def test_rewrite_unsatisfiable_filter(s, pred):
assert s.filter(pred).is_empty


@xfail_on_crosshair(Why.undiscovered)
@pytest.mark.parametrize(
"pred",
[
Expand Down
1 change: 1 addition & 0 deletions hypothesis-python/tests/cover/test_lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,7 @@ def test_supportsop_types_support_protocol(protocol, data):
assert issubclass(type(value), protocol)


@xfail_on_crosshair(Why.undiscovered)
@pytest.mark.parametrize("restrict_custom_strategy", [True, False])
def test_generic_aliases_can_be_conditionally_resolved_by_registered_function(
restrict_custom_strategy,
Expand Down
1 change: 1 addition & 0 deletions hypothesis-python/tests/cover/test_reproduce_failure.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ def test(data):
assert "@reproduce_failure" not in o.getvalue()


@xfail_on_crosshair(Why.undiscovered)
def test_does_not_print_reproduction_for_large_data_examples_by_default():
@settings(phases=no_shrink, print_blob=False)
@given(st.data())
Expand Down
1 change: 1 addition & 0 deletions hypothesis-python/tests/cover/test_sampled_from.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ def stupid_sampled_sets(draw):
return result


@xfail_on_crosshair(Why.undiscovered)
@given(stupid_sampled_sets())
def test_efficient_sets_of_samples_with_chained_transformations_slow_path(x):
# This deliberately exercises the standard filtering logic without going
Expand Down
1 change: 1 addition & 0 deletions hypothesis-python/tests/cover/test_stateful.py
Original file line number Diff line number Diff line change
Expand Up @@ -1285,6 +1285,7 @@ def fail_fast(self, a1, a2, a3, b1, b2, b3):
)


@xfail_on_crosshair(Why.undiscovered)
def test_multiple_common_targets():
class Machine(RuleBasedStateMachine):
a = Bundle("a")
Expand Down
3 changes: 3 additions & 0 deletions hypothesis-python/tests/cover/test_targeting.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from hypothesis.control import current_build_context
from hypothesis.errors import InvalidArgument

from tests.common.utils import Why, xfail_on_crosshair


@example(0.0, "this covers the branch where context.data is None")
@given(
Expand Down Expand Up @@ -100,6 +102,7 @@ def test_cannot_target_same_label_twice(_):
target(1.0, label="label")


@xfail_on_crosshair(Why.undiscovered)
@given(st.none())
def test_cannot_target_default_label_twice(_):
target(0.0)
Expand Down
3 changes: 3 additions & 0 deletions hypothesis-python/tests/cover/test_testdecorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def test_can_be_given_keyword_args(x, name):
assert len(name) < x


@xfail_on_crosshair(Why.undiscovered)
@fails
@given(one_of(floats(), booleans()), one_of(floats(), booleans()))
def test_one_of_produces_different_values(x, y):
Expand Down Expand Up @@ -196,6 +197,7 @@ def test_removing_an_element_from_a_unique_list(xs, y):
assert y not in xs


@xfail_on_crosshair(Why.undiscovered)
@fails
@given(lists(integers(), min_size=2), data())
def test_removing_an_element_from_a_non_unique_list(xs, data):
Expand All @@ -219,6 +221,7 @@ def test_can_mix_sampling_with_generating(x, y):
assert type(x) == type(y)


@xfail_on_crosshair(Why.undiscovered)
@fails
@given(frozensets(integers()))
def test_can_find_large_sum_frozenset(xs):
Expand Down
2 changes: 2 additions & 0 deletions hypothesis-python/tests/datetime/test_dateutil_timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def test_dateutil_exists_our_not_exists_are_inverse(value):
assert datetime_does_not_exist(value) == (not tz.datetime_exists(value))


@xfail_on_crosshair(Why.undiscovered)
def test_datetimes_can_exclude_imaginary():
find_any(
datetimes(**DAY_WITH_IMAGINARY_HOUR_KWARGS, allow_imaginary=True),
Expand All @@ -120,6 +121,7 @@ def test_datetimes_can_exclude_imaginary():
)


@xfail_on_crosshair(Why.undiscovered)
@fails_with(FailedHealthCheck)
@given(
datetimes(
Expand Down
1 change: 1 addition & 0 deletions hypothesis-python/tests/datetime/test_pytz_timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def test_time_bounds_must_be_naive(name, val):
times(**{name: val}).validate()


@xfail_on_crosshair(Why.undiscovered)
@pytest.mark.parametrize(
"bound",
[
Expand Down
2 changes: 2 additions & 0 deletions hypothesis-python/tests/datetime/test_zoneinfo_timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@
from hypothesis.errors import InvalidArgument

from tests.common.debug import assert_no_examples, find_any, minimal
from tests.common.utils import Why, xfail_on_crosshair


def test_utc_is_minimal():
assert minimal(st.timezones()) is zoneinfo.ZoneInfo("UTC")


@xfail_on_crosshair(Why.undiscovered)
def test_can_generate_non_utc():
find_any(
st.datetimes(timezones=st.timezones()).filter(lambda d: d.tzinfo.key != "UTC")
Expand Down
3 changes: 3 additions & 0 deletions hypothesis-python/tests/nocover/test_characters.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

from hypothesis import given, settings, strategies as st

from tests.common.utils import Why, xfail_on_crosshair

IDENTIFIER_CHARS = string.ascii_letters + string.digits + "_"


Expand All @@ -23,6 +25,7 @@ def test_large_blacklist(c):
assert c not in IDENTIFIER_CHARS


@xfail_on_crosshair(Why.symbolic_outside_context) # seems like a crosshair bug here
@given(st.data())
def test_arbitrary_blacklist(data):
blacklist = data.draw(st.text(st.characters(max_codepoint=1000), min_size=1))
Expand Down
1 change: 1 addition & 0 deletions hypothesis-python/tests/nocover/test_database_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def has_a_non_zero_byte(x):
return any(bytes(x))


@xfail_on_crosshair(Why.undiscovered)
def test_saves_incremental_steps_in_database():
key = b"a database key"
database = InMemoryExampleDatabase()
Expand Down
3 changes: 3 additions & 0 deletions hypothesis-python/tests/nocover/test_duplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from hypothesis import given, settings
from hypothesis.strategies._internal import SearchStrategy

from tests.common.utils import Why, xfail_on_crosshair


class Blocks(SearchStrategy):
def __init__(self, n):
Expand All @@ -37,6 +39,7 @@ def test(b):
assert set(counts.values()) == {1}


@xfail_on_crosshair(Why.other, strict=False) # CrosshairInternal for n>0
@pytest.mark.parametrize("n", range(1, 5))
def test_mostly_does_not_duplicate_blocks_even_when_failing(n):
counts = Counter()
Expand Down
3 changes: 3 additions & 0 deletions hypothesis-python/tests/nocover/test_flatmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
)

from tests.common.debug import find_any, minimal
from tests.common.utils import Why, xfail_on_crosshair

ConstantLists = integers().flatmap(lambda i: lists(just(i)))

Expand Down Expand Up @@ -97,6 +98,7 @@ def criterion(ls):
assert set(result) == {False, ""}


@xfail_on_crosshair(Why.undiscovered) # for n >= 8 at least
@pytest.mark.parametrize("n", range(1, 10))
def test_can_shrink_through_a_binding(n):
bool_lists = integers(0, 100).flatmap(
Expand All @@ -105,6 +107,7 @@ def test_can_shrink_through_a_binding(n):
assert minimal(bool_lists, lambda x: x.count(True) >= n) == [True] * n


@xfail_on_crosshair(Why.undiscovered) # for n >= 8 at least
@pytest.mark.parametrize("n", range(1, 10))
def test_can_delete_in_middle_of_a_binding(n):
bool_lists = integers(1, 100).flatmap(
Expand Down
4 changes: 3 additions & 1 deletion hypothesis-python/tests/nocover/test_floating.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from hypothesis.strategies import data, floats, lists

from tests.common.debug import find_any
from tests.common.utils import fails
from tests.common.utils import Why, fails, xfail_on_crosshair

TRY_HARDER = settings(
max_examples=1000, suppress_health_check=[HealthCheck.filter_too_much]
Expand Down Expand Up @@ -93,6 +93,7 @@ def test_is_not_int(x):
assert x != int(x)


@xfail_on_crosshair(Why.undiscovered)
@fails
@given(floats())
@TRY_HARDER
Expand Down Expand Up @@ -128,6 +129,7 @@ def test_floats_are_in_range(x, y, data):
assert x <= t <= y


@xfail_on_crosshair(Why.undiscovered)
@pytest.mark.parametrize("neg", [False, True])
@pytest.mark.parametrize("snan", [False, True])
def test_can_find_negative_and_signaling_nans(neg, snan):
Expand Down
3 changes: 2 additions & 1 deletion hypothesis-python/tests/nocover/test_recursive.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from hypothesis import HealthCheck, given, settings, strategies as st

from tests.common.debug import find_any, minimal
from tests.common.utils import flaky
from tests.common.utils import Why, flaky, xfail_on_crosshair


def test_can_generate_with_large_branching():
Expand Down Expand Up @@ -79,6 +79,7 @@ def test_drawing_many_near_boundary():
assert len(ls) == size


@xfail_on_crosshair(Why.undiscovered)
def test_can_use_recursive_data_in_sets():
nested_sets = st.recursive(st.booleans(), st.frozensets, max_leaves=3)
find_any(nested_sets, settings=settings(deadline=None))
Expand Down
5 changes: 5 additions & 0 deletions hypothesis-python/tests/nocover/test_regressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
from hypothesis._settings import note_deprecation
from hypothesis.errors import HypothesisDeprecationWarning

from tests.common.utils import Why, xfail_on_crosshair


@xfail_on_crosshair(Why.other)
def test_note_deprecation_blames_right_code_issue_652():
msg = "this is an arbitrary deprecation warning message"

Expand Down Expand Up @@ -58,6 +61,8 @@ def test_unique_floats_with_nan_is_not_flaky_3926(ls):

# this will take a while to find the regression, but will eventually trigger it.
# min_value=0 is critical to trigger the probing behavior which exhausts our buffer.
# https://github.com/pschanely/CrossHair/issues/285 for an upstream fix.
@xfail_on_crosshair(Why.other, strict=False)
@given(st.integers(min_value=0, max_value=1 << 25_000))
def test_overrun_during_datatree_simulation_3874(n):
pass
3 changes: 2 additions & 1 deletion hypothesis-python/tests/nocover/test_sampled_from.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from hypothesis.strategies._internal.strategies import SampledFromStrategy

from tests.common.debug import find_any, minimal
from tests.common.utils import fails_with
from tests.common.utils import Why, fails_with, xfail_on_crosshair


@pytest.mark.parametrize("size", [100, 10**5, 10**6, 2**25])
Expand Down Expand Up @@ -101,6 +101,7 @@ def test_flag_enum_repr_uses_class_not_a_list():
assert lazy_repr == "sampled_from(tests.nocover.test_sampled_from.AFlag)"


@xfail_on_crosshair(Why.undiscovered)
def test_exhaustive_flags():
# Generate powerset of flag combinations. There are only 2^3 of them, so
# we can reasonably expect that they are all are found.
Expand Down
3 changes: 3 additions & 0 deletions hypothesis-python/tests/nocover/test_simple_numbers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from hypothesis.strategies import floats, integers, lists

from tests.common.debug import minimal
from tests.common.utils import Why, xfail_on_crosshair


def test_minimize_negative_int():
Expand Down Expand Up @@ -116,6 +117,7 @@ def test_can_minimal_infinite_negative_float():
assert minimal(floats(), lambda x: x < -sys.float_info.max)


@xfail_on_crosshair(Why.undiscovered) # sometimes
def test_can_minimal_float_on_boundary_of_representable():
minimal(floats(), lambda x: x + 1 == x and not math.isinf(x))

Expand Down Expand Up @@ -153,6 +155,7 @@ def test_minimal_fractional_float():
assert minimal(floats(), lambda x: x >= 1.5) == 2


@xfail_on_crosshair(Why.undiscovered)
def test_minimizes_lists_of_negative_ints_up_to_boundary():
result = minimal(
lists(integers(), min_size=10),
Expand Down

0 comments on commit 4ff8bc3

Please sign in to comment.