diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index d2da8005..7796fad3 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -21,6 +21,7 @@ jobs: strategy: matrix: os: [ubuntu-24.04] + fail-fast: false python-version: [ # "3.8", (our current version of mypy doesn't support 3.8) "3.9", diff --git a/AGENTS.md b/AGENTS.md index 6cbc4438..46f218ae 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -19,6 +19,7 @@ CrossHair is a Python analysis tool that uses **symbolic execution** and an **SM - **Formatting**: Black (88 chars), isort, flake8 - **Tests**: pytest; run with `PYTHONHASHSEED=0` for reproducibility - **Pre-commit** runs black, isort, flake8, mypy, and pytest +- **Commenting**: Use a high bar for comments - genuinely surpising or confusing behaviors only. ## Must-Know Technical Background @@ -46,6 +47,7 @@ CrossHair is a Python analysis tool that uses **symbolic execution** and an **SM – If you use a `space` fixture parameter, you can `with ResumedTracing():` to enable tracing. - Otherwise, enter a statespace context to begin tracing. - Most pytest nicities (value printing!) are disabled because operating on symbolics under failure modes tends to obscure problems. + - The test suite is large; use `pytest -n auto --dist=worksteal` when running several tests (`pytest-xdist` is already in dev dependencies). - Consider **debugging individual tests with `pytest -v`** - CrossHair's logging is verbose, but will display some important events: - When and where SMT checks occur, including the SMT expression. - When and where a value is realized. diff --git a/crosshair/libimpl/binascii_ch_test.py b/crosshair/libimpl/binascii_ch_test.py index 0f010def..b9fd8b3d 100644 --- a/crosshair/libimpl/binascii_ch_test.py +++ b/crosshair/libimpl/binascii_ch_test.py @@ -27,4 +27,4 @@ def test_builtin(fn_name: str) -> None: fn = getattr(this_module, fn_name) messages = run_checkables(analyze_function(fn)) errors = [m for m in messages if m.state > MessageType.PRE_UNSAT] - assert errors == [] + assert errors == [], [(m.state, m.message) for m in errors] diff --git a/crosshair/libimpl/collectionslib_ch_test.py b/crosshair/libimpl/collectionslib_ch_test.py index 4e6f2147..7ee029fe 100644 --- a/crosshair/libimpl/collectionslib_ch_test.py +++ b/crosshair/libimpl/collectionslib_ch_test.py @@ -249,4 +249,4 @@ def test_builtin(fn_name: str) -> None: fn = getattr(this_module, fn_name) messages = run_checkables(analyze_function(fn, opts)) errors = [m for m in messages if m.state > MessageType.PRE_UNSAT] - assert errors == [] + assert errors == [], [(m.state, m.message) for m in errors] diff --git a/crosshair/libimpl/datetimelib_ch_test.py b/crosshair/libimpl/datetimelib_ch_test.py index ce89c6d4..888a7c6f 100644 --- a/crosshair/libimpl/datetimelib_ch_test.py +++ b/crosshair/libimpl/datetimelib_ch_test.py @@ -361,4 +361,4 @@ def test_builtin(fn_name: str) -> None: this_module = sys.modules[__name__] messages = run_checkables(analyze_function(getattr(this_module, fn_name))) errors = [m for m in messages if m.state > MessageType.PRE_UNSAT] - assert errors == [] + assert errors == [], [(m.state, m.message) for m in errors] diff --git a/crosshair/libimpl/decimallib_ch_test.py b/crosshair/libimpl/decimallib_ch_test.py index 87436cb5..bcbe6b7b 100644 --- a/crosshair/libimpl/decimallib_ch_test.py +++ b/crosshair/libimpl/decimallib_ch_test.py @@ -75,4 +75,4 @@ def test_builtin(fn_name: str) -> None: this_module = sys.modules[__name__] messages = run_checkables(analyze_function(getattr(this_module, fn_name))) errors = [m for m in messages if m.state > MessageType.PRE_UNSAT] - assert errors == [] + assert errors == [], [(m.state, m.message) for m in errors] diff --git a/crosshair/libimpl/encodings_ch_test.py b/crosshair/libimpl/encodings_ch_test.py index b71df23d..8e974d33 100644 --- a/crosshair/libimpl/encodings_ch_test.py +++ b/crosshair/libimpl/encodings_ch_test.py @@ -80,4 +80,4 @@ def test_builtin(fn_name: str) -> None: this_module = sys.modules[__name__] messages = run_checkables(analyze_function(getattr(this_module, fn_name))) errors = [m for m in messages if m.state > MessageType.PRE_UNSAT] - assert errors == [] + assert errors == [], [(m.state, m.message) for m in errors] diff --git a/crosshair/libimpl/iolib.py b/crosshair/libimpl/iolib.py index 0acae562..bb86f2ea 100644 --- a/crosshair/libimpl/iolib.py +++ b/crosshair/libimpl/iolib.py @@ -53,8 +53,6 @@ def __ch_pytype__(self): return StringIO def __ch_realize__(self): - if self.closed: - raise ValueError contents = realize(self._contents) newline_mode = realize(self._newline_mode) pos = realize(self._pos) @@ -62,6 +60,8 @@ def __ch_realize__(self): # Avoid constructor newline translation; set exact underlying state instead. sio = StringIO("", newline_mode) sio.__setstate__((contents, newline_mode, pos, None)) + if self.closed: + sio.close() return sio @property diff --git a/crosshair/libimpl/iolib_ch_test.py b/crosshair/libimpl/iolib_ch_test.py index de3c2d7e..a1fb764b 100644 --- a/crosshair/libimpl/iolib_ch_test.py +++ b/crosshair/libimpl/iolib_ch_test.py @@ -135,4 +135,4 @@ def test_builtin(fn_name: str) -> None: fn = getattr(sys.modules[__name__], fn_name) messages = run_checkables(analyze_function(fn)) errors = [m for m in messages if m.state > MessageType.PRE_UNSAT] - assert errors == [] + assert errors == [], [(m.state, m.message) for m in errors] diff --git a/crosshair/libimpl/jsonlib_ch_test.py b/crosshair/libimpl/jsonlib_ch_test.py index a18b8568..a80b19bf 100644 --- a/crosshair/libimpl/jsonlib_ch_test.py +++ b/crosshair/libimpl/jsonlib_ch_test.py @@ -39,4 +39,4 @@ def test_builtin(fn_name: str) -> None: fn = getattr(sys.modules[__name__], fn_name) messages = run_checkables(analyze_function(fn)) errors = [m for m in messages if m.state > MessageType.PRE_UNSAT] - assert errors == [] + assert errors == [], [(m.state, m.message) for m in errors] diff --git a/crosshair/libimpl/mathlib_ch_test.py b/crosshair/libimpl/mathlib_ch_test.py index c492a72b..04f1217a 100644 --- a/crosshair/libimpl/mathlib_ch_test.py +++ b/crosshair/libimpl/mathlib_ch_test.py @@ -41,4 +41,4 @@ def test_builtin(fn_name: str) -> None: ) ) errors = [m for m in messages if m.state > MessageType.PRE_UNSAT] - assert errors == [] + assert errors == [], [(m.state, m.message) for m in errors] diff --git a/crosshair/libimpl/relib_ch_test.py b/crosshair/libimpl/relib_ch_test.py index 7091df71..409af11f 100644 --- a/crosshair/libimpl/relib_ch_test.py +++ b/crosshair/libimpl/relib_ch_test.py @@ -190,4 +190,4 @@ def test_builtin(fn_name: str) -> None: fn = getattr(this_module, fn_name) messages = run_checkables(analyze_function(fn)) errors = [m for m in messages if m.state > MessageType.PRE_UNSAT] - assert errors == [] + assert errors == [], [(m.state, m.message) for m in errors] diff --git a/crosshair/libimpl/structlib_ch_test.py b/crosshair/libimpl/structlib_ch_test.py index b7b94291..f8127aa7 100644 --- a/crosshair/libimpl/structlib_ch_test.py +++ b/crosshair/libimpl/structlib_ch_test.py @@ -55,4 +55,4 @@ def test_builtin(fn_name: str) -> None: fn = getattr(this_module, fn_name) messages = run_checkables(analyze_function(fn)) errors = [m for m in messages if m.state > MessageType.PRE_UNSAT] - assert errors == [] + assert errors == [], [(m.state, m.message) for m in errors] diff --git a/crosshair/test_util.py b/crosshair/test_util.py index 734a477c..0d7efdb1 100644 --- a/crosshair/test_util.py +++ b/crosshair/test_util.py @@ -8,6 +8,7 @@ from typing import ( Callable, Collection, + Dict, Iterable, List, Mapping, @@ -145,9 +146,53 @@ def check_messages(checkables: Iterable[Checkable], **kw) -> ComparableLists: _NAN_ABLE = (Decimal, Real) +class _Unrealizable: + """ + Sentinel type returned by `safe_deep_realize` when realization fails. + + Treated as equal to anything by `flexible_equal`, so unrealizable values + do not poison comparisons (e.g. an arg that became a closed I/O stream). + """ + + def __repr__(self) -> str: + return "" + + +UNREALIZABLE = _Unrealizable() + + +def safe_deep_realize( + value: object, label: str = "", memo: Optional[Dict] = None +) -> object: + """ + Best-effort `deep_realize`. + + On any exception, debug-logs the failure and returns `UNREALIZABLE`, + which `flexible_equal` treats as equal to anything. This is useful for + diagnostic / comparison contexts (like post-state capture) where we + don't want a non-realizable concrete object to mask the actual result. + + Pass a shared `memo` to preserve identity across multiple calls when + realizing several values that may share substructure. + """ + try: + return deep_realize(value, memo) + except Exception as exc: + debug( + "Could not realize", + label or type(value).__name__, + ":", + type(exc).__name__, + exc, + ) + return UNREALIZABLE + + def flexible_equal(a: object, b: object) -> bool: if a is b: return True + if a is UNREALIZABLE or b is UNREALIZABLE: + return True if type(a) is type(b) and type(a).__eq__ is object.__eq__: # If types match and it uses identity-equals, we can't do much. Assume equal. return True @@ -194,8 +239,8 @@ def __eq__(self, other: object) -> bool: return ( flexible_equal(self.ret, other.ret) and type(self.exc) == type(other.exc) - and self.post_args == other.post_args - and self.post_kwargs == other.post_kwargs + and flexible_equal(self.post_args, other.post_args) + and flexible_equal(self.post_kwargs, other.post_kwargs) ) def describe(self, include_postexec=False) -> str: @@ -246,8 +291,6 @@ def summarize_execution( ret = f"C-based callable {type(_ret).__name__}" else: ret = _ret - args = deep_realize(args) - kwargs = deep_realize(kwargs) except Exception as e: exc = e if detach_path: @@ -258,6 +301,19 @@ def summarize_execution( tbstr = ch_stack(currently_handling=exc) if in_debug(): debug("hit exception:", type(exc), exc, tbstr) + # Per-element best-effort realization: an unrealizable arg becomes + # UNREALIZABLE (compares equal to anything in flexible_equal) without + # poisoning sibling args. A shared memo preserves identity across + # arguments that alias the same object. + memo: Dict = {} + args = tuple( + safe_deep_realize(a, label=f"argument {idx + 1}", memo=memo) + for idx, a in enumerate(args) + ) + kwargs = { + k: safe_deep_realize(v, label=f"keyword argument {k!r}", memo=memo) + for k, v in kwargs.items() + } return ExecutionResult(ret, exc, tbstr, args, kwargs) diff --git a/crosshair/test_util_test.py b/crosshair/test_util_test.py index 89734ff2..07159672 100644 --- a/crosshair/test_util_test.py +++ b/crosshair/test_util_test.py @@ -1,4 +1,14 @@ -from crosshair.test_util import flexible_equal +from crosshair.core import proxy_for_type +from crosshair.core_and_libs import standalone_statespace +from crosshair.libimpl.iolib import BackedStringIO +from crosshair.test_util import ( + UNREALIZABLE, + compare_returns, + flexible_equal, + safe_deep_realize, + summarize_execution, +) +from crosshair.tracers import ResumedTracing def test_flexible_equal(): @@ -24,3 +34,60 @@ def gen(): ordered_dict_2 = {3: 4, 1: 2} assert list(ordered_dict_1.items()) != list(ordered_dict_2.items()) assert flexible_equal(ordered_dict_1, ordered_dict_2) + + +def test_summarize_execution_realizes_post_args_on_exception(): + def raiser(x): + raise ValueError + + with standalone_statespace: + symbolic_int = proxy_for_type(int, "i") + with ResumedTracing(): + result = summarize_execution(raiser, (symbolic_int,)) + assert type(result.exc) is ValueError + assert type(result.post_args[0]) is int + + +def test_compare_returns_handles_closed_stream_post_args(): + """End-to-end smoke test: a function that closes its IO stream and then + raises during a follow-up operation should still produce a clean + comparison via `compare_returns` (which discards post_args).""" + + def close_and_truncate(s: BackedStringIO): + s.close() + return s.truncate() + + with standalone_statespace: + stream = BackedStringIO("abc") + with ResumedTracing(): + result = compare_returns(close_and_truncate, stream) + assert bool(result) + + +def test_summarize_execution_uses_sentinel_for_unrealizable_post_args(): + class Stubborn: + def __reduce__(self): + raise RuntimeError("intentionally unrealizable") + + def consume(s): + return None + + with standalone_statespace: + with ResumedTracing(): + result = summarize_execution(consume, (Stubborn(),)) + assert result.post_args[0] is UNREALIZABLE + + +def test_safe_deep_realize_returns_sentinel_on_failure(): + class Stubborn: + def __reduce__(self): + raise RuntimeError("nope") + + assert safe_deep_realize(Stubborn()) is UNREALIZABLE + + +def test_flexible_equal_treats_unrealizable_as_equal(): + assert flexible_equal(UNREALIZABLE, 42) + assert flexible_equal("anything", UNREALIZABLE) + assert flexible_equal((1, UNREALIZABLE, 3), (1, "different", 3)) + assert flexible_equal({"k": UNREALIZABLE}, {"k": object()}) diff --git a/crosshair/type_repo.py b/crosshair/type_repo.py index 9a258dd7..e3a27ad3 100644 --- a/crosshair/type_repo.py +++ b/crosshair/type_repo.py @@ -17,6 +17,8 @@ "z3", # These are disabled for performance or type search effectiveness: "hypothesis", + # pdb._ScriptTarget is assumption-breaking: it's a str subclass that can compare unequal after copy: + "pdb", "pkg_resources", "pytest", "py", # (part of pytest)