pschanely · pschanely · May 22, 2026 · May 21, 2026 · May 22, 2026 · May 22, 2026
diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml
@@ -21,6 +21,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-24.04]
+        fail-fast: false
         python-version: [
           # "3.8",  (our current version of mypy doesn't support 3.8)
           "3.9",

diff --git a/AGENTS.md b/AGENTS.md
@@ -19,6 +19,7 @@ CrossHair is a Python analysis tool that uses **symbolic execution** and an **SM
 - **Formatting**: Black (88 chars), isort, flake8
 - **Tests**: pytest; run with `PYTHONHASHSEED=0` for reproducibility
 - **Pre-commit** runs black, isort, flake8, mypy, and pytest
+- **Commenting**: Use a high bar for comments - genuinely surpising or confusing behaviors only.
 
 ## Must-Know Technical Background
 
@@ -46,6 +47,7 @@ CrossHair is a Python analysis tool that uses **symbolic execution** and an **SM
     – If you use a `space` fixture parameter, you can `with ResumedTracing():` to enable tracing.
     - Otherwise, enter a statespace context to begin tracing.
   - Most pytest nicities (value printing!) are disabled because operating on symbolics under failure modes tends to obscure problems.
+  - The test suite is large; use `pytest -n auto --dist=worksteal` when running several tests (`pytest-xdist` is already in dev dependencies).
   - Consider **debugging individual tests with `pytest -v`** - CrossHair's logging is verbose, but will display some important events:
     - When and where SMT checks occur, including the SMT expression.
     - When and where a value is realized.

diff --git a/crosshair/libimpl/binascii_ch_test.py b/crosshair/libimpl/binascii_ch_test.py
@@ -27,4 +27,4 @@ def test_builtin(fn_name: str) -> None:
     fn = getattr(this_module, fn_name)
     messages = run_checkables(analyze_function(fn))
     errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
-    assert errors == []
+    assert errors == [], [(m.state, m.message) for m in errors]
diff --git a/crosshair/libimpl/collectionslib_ch_test.py b/crosshair/libimpl/collectionslib_ch_test.py
@@ -249,4 +249,4 @@ def test_builtin(fn_name: str) -> None:
     fn = getattr(this_module, fn_name)
     messages = run_checkables(analyze_function(fn, opts))
     errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
-    assert errors == []
+    assert errors == [], [(m.state, m.message) for m in errors]
diff --git a/crosshair/libimpl/datetimelib_ch_test.py b/crosshair/libimpl/datetimelib_ch_test.py
@@ -361,4 +361,4 @@ def test_builtin(fn_name: str) -> None:
     this_module = sys.modules[__name__]
     messages = run_checkables(analyze_function(getattr(this_module, fn_name)))
     errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
-    assert errors == []
+    assert errors == [], [(m.state, m.message) for m in errors]
diff --git a/crosshair/libimpl/decimallib_ch_test.py b/crosshair/libimpl/decimallib_ch_test.py
@@ -75,4 +75,4 @@ def test_builtin(fn_name: str) -> None:
     this_module = sys.modules[__name__]
     messages = run_checkables(analyze_function(getattr(this_module, fn_name)))
     errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
-    assert errors == []
+    assert errors == [], [(m.state, m.message) for m in errors]
diff --git a/crosshair/libimpl/encodings_ch_test.py b/crosshair/libimpl/encodings_ch_test.py
@@ -80,4 +80,4 @@ def test_builtin(fn_name: str) -> None:
     this_module = sys.modules[__name__]
     messages = run_checkables(analyze_function(getattr(this_module, fn_name)))
     errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
-    assert errors == []
+    assert errors == [], [(m.state, m.message) for m in errors]
diff --git a/crosshair/libimpl/iolib.py b/crosshair/libimpl/iolib.py
@@ -53,15 +53,15 @@ def __ch_pytype__(self):
         return StringIO
 
     def __ch_realize__(self):
-        if self.closed:
-            raise ValueError
         contents = realize(self._contents)
         newline_mode = realize(self._newline_mode)
         pos = realize(self._pos)
         with NoTracing():
             # Avoid constructor newline translation; set exact underlying state instead.
             sio = StringIO("", newline_mode)
             sio.__setstate__((contents, newline_mode, pos, None))
+        if self.closed:
+            sio.close()
         return sio
 
     @property

diff --git a/crosshair/libimpl/iolib_ch_test.py b/crosshair/libimpl/iolib_ch_test.py
@@ -135,4 +135,4 @@ def test_builtin(fn_name: str) -> None:
     fn = getattr(sys.modules[__name__], fn_name)
     messages = run_checkables(analyze_function(fn))
     errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
-    assert errors == []
+    assert errors == [], [(m.state, m.message) for m in errors]
diff --git a/crosshair/libimpl/jsonlib_ch_test.py b/crosshair/libimpl/jsonlib_ch_test.py
@@ -39,4 +39,4 @@ def test_builtin(fn_name: str) -> None:
     fn = getattr(sys.modules[__name__], fn_name)
     messages = run_checkables(analyze_function(fn))
     errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
-    assert errors == []
+    assert errors == [], [(m.state, m.message) for m in errors]
diff --git a/crosshair/libimpl/mathlib_ch_test.py b/crosshair/libimpl/mathlib_ch_test.py
@@ -41,4 +41,4 @@ def test_builtin(fn_name: str) -> None:
         )
     )
     errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
-    assert errors == []
+    assert errors == [], [(m.state, m.message) for m in errors]
diff --git a/crosshair/libimpl/relib_ch_test.py b/crosshair/libimpl/relib_ch_test.py
@@ -190,4 +190,4 @@ def test_builtin(fn_name: str) -> None:
     fn = getattr(this_module, fn_name)
     messages = run_checkables(analyze_function(fn))
     errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
-    assert errors == []
+    assert errors == [], [(m.state, m.message) for m in errors]
diff --git a/crosshair/libimpl/structlib_ch_test.py b/crosshair/libimpl/structlib_ch_test.py
@@ -55,4 +55,4 @@ def test_builtin(fn_name: str) -> None:
     fn = getattr(this_module, fn_name)
     messages = run_checkables(analyze_function(fn))
     errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
-    assert errors == []
+    assert errors == [], [(m.state, m.message) for m in errors]
diff --git a/crosshair/test_util.py b/crosshair/test_util.py
@@ -8,6 +8,7 @@
 from typing import (
     Callable,
     Collection,
+    Dict,
     Iterable,
     List,
     Mapping,
@@ -145,9 +146,53 @@ def check_messages(checkables: Iterable[Checkable], **kw) -> ComparableLists:
 _NAN_ABLE = (Decimal, Real)
 
 
+class _Unrealizable:
+    """
+    Sentinel type returned by `safe_deep_realize` when realization fails.
+
+    Treated as equal to anything by `flexible_equal`, so unrealizable values
+    do not poison comparisons (e.g. an arg that became a closed I/O stream).
+    """
+
+    def __repr__(self) -> str:
+        return "<unrealizable>"
+
+
+UNREALIZABLE = _Unrealizable()
+
+
+def safe_deep_realize(
+    value: object, label: str = "", memo: Optional[Dict] = None
+) -> object:
+    """
+    Best-effort `deep_realize`.
+
+    On any exception, debug-logs the failure and returns `UNREALIZABLE`,
+    which `flexible_equal` treats as equal to anything. This is useful for
+    diagnostic / comparison contexts (like post-state capture) where we
+    don't want a non-realizable concrete object to mask the actual result.
+
+    Pass a shared `memo` to preserve identity across multiple calls when
+    realizing several values that may share substructure.
+    """
+    try:
+        return deep_realize(value, memo)
+    except Exception as exc:
+        debug(
+            "Could not realize",
+            label or type(value).__name__,
+            ":",
+            type(exc).__name__,
+            exc,
+        )
+        return UNREALIZABLE
+
+
 def flexible_equal(a: object, b: object) -> bool:
     if a is b:
         return True
+    if a is UNREALIZABLE or b is UNREALIZABLE:
+        return True
     if type(a) is type(b) and type(a).__eq__ is object.__eq__:
         # If types match and it uses identity-equals, we can't do much. Assume equal.
         return True
@@ -194,8 +239,8 @@ def __eq__(self, other: object) -> bool:
         return (
             flexible_equal(self.ret, other.ret)
             and type(self.exc) == type(other.exc)
-            and self.post_args == other.post_args
-            and self.post_kwargs == other.post_kwargs
+            and flexible_equal(self.post_args, other.post_args)
+            and flexible_equal(self.post_kwargs, other.post_kwargs)
         )
 
     def describe(self, include_postexec=False) -> str:
@@ -246,8 +291,6 @@ def summarize_execution(
             ret = f"C-based callable {type(_ret).__name__}"
         else:
             ret = _ret
-        args = deep_realize(args)
-        kwargs = deep_realize(kwargs)
     except Exception as e:
         exc = e
         if detach_path:
@@ -258,6 +301,19 @@ def summarize_execution(
         tbstr = ch_stack(currently_handling=exc)
         if in_debug():
             debug("hit exception:", type(exc), exc, tbstr)
+    # Per-element best-effort realization: an unrealizable arg becomes
+    # UNREALIZABLE (compares equal to anything in flexible_equal) without
+    # poisoning sibling args. A shared memo preserves identity across
+    # arguments that alias the same object.
+    memo: Dict = {}
+    args = tuple(
+        safe_deep_realize(a, label=f"argument {idx + 1}", memo=memo)
+        for idx, a in enumerate(args)
+    )
+    kwargs = {
+        k: safe_deep_realize(v, label=f"keyword argument {k!r}", memo=memo)
+        for k, v in kwargs.items()
+    }
     return ExecutionResult(ret, exc, tbstr, args, kwargs)
 
 

diff --git a/crosshair/test_util_test.py b/crosshair/test_util_test.py
@@ -1,4 +1,14 @@
-from crosshair.test_util import flexible_equal
+from crosshair.core import proxy_for_type
+from crosshair.core_and_libs import standalone_statespace
+from crosshair.libimpl.iolib import BackedStringIO
+from crosshair.test_util import (
+    UNREALIZABLE,
+    compare_returns,
+    flexible_equal,
+    safe_deep_realize,
+    summarize_execution,
+)
+from crosshair.tracers import ResumedTracing
 
 
 def test_flexible_equal():
@@ -24,3 +34,60 @@ def gen():
     ordered_dict_2 = {3: 4, 1: 2}
     assert list(ordered_dict_1.items()) != list(ordered_dict_2.items())
     assert flexible_equal(ordered_dict_1, ordered_dict_2)
+
+
+def test_summarize_execution_realizes_post_args_on_exception():
+    def raiser(x):
+        raise ValueError
+
+    with standalone_statespace:
+        symbolic_int = proxy_for_type(int, "i")
+        with ResumedTracing():
+            result = summarize_execution(raiser, (symbolic_int,))
+    assert type(result.exc) is ValueError
+    assert type(result.post_args[0]) is int
+
+
+def test_compare_returns_handles_closed_stream_post_args():
+    """End-to-end smoke test: a function that closes its IO stream and then
+    raises during a follow-up operation should still produce a clean
+    comparison via `compare_returns` (which discards post_args)."""
+
+    def close_and_truncate(s: BackedStringIO):
+        s.close()
+        return s.truncate()
+
+    with standalone_statespace:
+        stream = BackedStringIO("abc")
+        with ResumedTracing():
+            result = compare_returns(close_and_truncate, stream)
+    assert bool(result)
+
+
+def test_summarize_execution_uses_sentinel_for_unrealizable_post_args():
+    class Stubborn:
+        def __reduce__(self):
+            raise RuntimeError("intentionally unrealizable")
+
+    def consume(s):
+        return None
+
+    with standalone_statespace:
+        with ResumedTracing():
+            result = summarize_execution(consume, (Stubborn(),))
+    assert result.post_args[0] is UNREALIZABLE
+
+
+def test_safe_deep_realize_returns_sentinel_on_failure():
+    class Stubborn:
+        def __reduce__(self):
+            raise RuntimeError("nope")
+
+    assert safe_deep_realize(Stubborn()) is UNREALIZABLE
+
+
+def test_flexible_equal_treats_unrealizable_as_equal():
+    assert flexible_equal(UNREALIZABLE, 42)
+    assert flexible_equal("anything", UNREALIZABLE)
+    assert flexible_equal((1, UNREALIZABLE, 3), (1, "different", 3))
+    assert flexible_equal({"k": UNREALIZABLE}, {"k": object()})
diff --git a/crosshair/type_repo.py b/crosshair/type_repo.py
@@ -17,6 +17,8 @@
     "z3",
     # These are disabled for performance or type search effectiveness:
     "hypothesis",
+    # pdb._ScriptTarget is assumption-breaking: it's a str subclass that can compare unequal after copy:
+    "pdb",
     "pkg_resources",
     "pytest",
     "py",  # (part of pytest)