Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-24.04]
fail-fast: false
python-version: [
# "3.8", (our current version of mypy doesn't support 3.8)
"3.9",
Expand Down
2 changes: 2 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ CrossHair is a Python analysis tool that uses **symbolic execution** and an **SM
- **Formatting**: Black (88 chars), isort, flake8
- **Tests**: pytest; run with `PYTHONHASHSEED=0` for reproducibility
- **Pre-commit** runs black, isort, flake8, mypy, and pytest
- **Commenting**: Use a high bar for comments - genuinely surpising or confusing behaviors only.

## Must-Know Technical Background

Expand Down Expand Up @@ -46,6 +47,7 @@ CrossHair is a Python analysis tool that uses **symbolic execution** and an **SM
– If you use a `space` fixture parameter, you can `with ResumedTracing():` to enable tracing.
- Otherwise, enter a statespace context to begin tracing.
- Most pytest nicities (value printing!) are disabled because operating on symbolics under failure modes tends to obscure problems.
- The test suite is large; use `pytest -n auto --dist=worksteal` when running several tests (`pytest-xdist` is already in dev dependencies).
- Consider **debugging individual tests with `pytest -v`** - CrossHair's logging is verbose, but will display some important events:
- When and where SMT checks occur, including the SMT expression.
- When and where a value is realized.
Expand Down
2 changes: 1 addition & 1 deletion crosshair/libimpl/binascii_ch_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ def test_builtin(fn_name: str) -> None:
fn = getattr(this_module, fn_name)
messages = run_checkables(analyze_function(fn))
errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
assert errors == []
assert errors == [], [(m.state, m.message) for m in errors]
2 changes: 1 addition & 1 deletion crosshair/libimpl/collectionslib_ch_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,4 +249,4 @@ def test_builtin(fn_name: str) -> None:
fn = getattr(this_module, fn_name)
messages = run_checkables(analyze_function(fn, opts))
errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
assert errors == []
assert errors == [], [(m.state, m.message) for m in errors]
2 changes: 1 addition & 1 deletion crosshair/libimpl/datetimelib_ch_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,4 +361,4 @@ def test_builtin(fn_name: str) -> None:
this_module = sys.modules[__name__]
messages = run_checkables(analyze_function(getattr(this_module, fn_name)))
errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
assert errors == []
assert errors == [], [(m.state, m.message) for m in errors]
2 changes: 1 addition & 1 deletion crosshair/libimpl/decimallib_ch_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,4 @@ def test_builtin(fn_name: str) -> None:
this_module = sys.modules[__name__]
messages = run_checkables(analyze_function(getattr(this_module, fn_name)))
errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
assert errors == []
assert errors == [], [(m.state, m.message) for m in errors]
2 changes: 1 addition & 1 deletion crosshair/libimpl/encodings_ch_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,4 +80,4 @@ def test_builtin(fn_name: str) -> None:
this_module = sys.modules[__name__]
messages = run_checkables(analyze_function(getattr(this_module, fn_name)))
errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
assert errors == []
assert errors == [], [(m.state, m.message) for m in errors]
4 changes: 2 additions & 2 deletions crosshair/libimpl/iolib.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,15 @@ def __ch_pytype__(self):
return StringIO

def __ch_realize__(self):
if self.closed:
raise ValueError
contents = realize(self._contents)
newline_mode = realize(self._newline_mode)
pos = realize(self._pos)
with NoTracing():
# Avoid constructor newline translation; set exact underlying state instead.
sio = StringIO("", newline_mode)
sio.__setstate__((contents, newline_mode, pos, None))
if self.closed:
sio.close()
return sio

@property
Expand Down
2 changes: 1 addition & 1 deletion crosshair/libimpl/iolib_ch_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,4 +135,4 @@ def test_builtin(fn_name: str) -> None:
fn = getattr(sys.modules[__name__], fn_name)
messages = run_checkables(analyze_function(fn))
errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
assert errors == []
assert errors == [], [(m.state, m.message) for m in errors]
2 changes: 1 addition & 1 deletion crosshair/libimpl/jsonlib_ch_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,4 @@ def test_builtin(fn_name: str) -> None:
fn = getattr(sys.modules[__name__], fn_name)
messages = run_checkables(analyze_function(fn))
errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
assert errors == []
assert errors == [], [(m.state, m.message) for m in errors]
2 changes: 1 addition & 1 deletion crosshair/libimpl/mathlib_ch_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,4 @@ def test_builtin(fn_name: str) -> None:
)
)
errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
assert errors == []
assert errors == [], [(m.state, m.message) for m in errors]
2 changes: 1 addition & 1 deletion crosshair/libimpl/relib_ch_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,4 +190,4 @@ def test_builtin(fn_name: str) -> None:
fn = getattr(this_module, fn_name)
messages = run_checkables(analyze_function(fn))
errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
assert errors == []
assert errors == [], [(m.state, m.message) for m in errors]
2 changes: 1 addition & 1 deletion crosshair/libimpl/structlib_ch_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,4 @@ def test_builtin(fn_name: str) -> None:
fn = getattr(this_module, fn_name)
messages = run_checkables(analyze_function(fn))
errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
assert errors == []
assert errors == [], [(m.state, m.message) for m in errors]
64 changes: 60 additions & 4 deletions crosshair/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import (
Callable,
Collection,
Dict,
Iterable,
List,
Mapping,
Expand Down Expand Up @@ -145,9 +146,53 @@ def check_messages(checkables: Iterable[Checkable], **kw) -> ComparableLists:
_NAN_ABLE = (Decimal, Real)


class _Unrealizable:
"""
Sentinel type returned by `safe_deep_realize` when realization fails.

Treated as equal to anything by `flexible_equal`, so unrealizable values
do not poison comparisons (e.g. an arg that became a closed I/O stream).
"""

def __repr__(self) -> str:
return "<unrealizable>"


UNREALIZABLE = _Unrealizable()


def safe_deep_realize(
value: object, label: str = "", memo: Optional[Dict] = None
) -> object:
"""
Best-effort `deep_realize`.

On any exception, debug-logs the failure and returns `UNREALIZABLE`,
which `flexible_equal` treats as equal to anything. This is useful for
diagnostic / comparison contexts (like post-state capture) where we
don't want a non-realizable concrete object to mask the actual result.

Pass a shared `memo` to preserve identity across multiple calls when
realizing several values that may share substructure.
"""
try:
return deep_realize(value, memo)
except Exception as exc:
debug(
"Could not realize",
label or type(value).__name__,
":",
type(exc).__name__,
exc,
)
return UNREALIZABLE


def flexible_equal(a: object, b: object) -> bool:
if a is b:
return True
if a is UNREALIZABLE or b is UNREALIZABLE:
return True
if type(a) is type(b) and type(a).__eq__ is object.__eq__:
# If types match and it uses identity-equals, we can't do much. Assume equal.
return True
Expand Down Expand Up @@ -194,8 +239,8 @@ def __eq__(self, other: object) -> bool:
return (
flexible_equal(self.ret, other.ret)
and type(self.exc) == type(other.exc)
and self.post_args == other.post_args
and self.post_kwargs == other.post_kwargs
and flexible_equal(self.post_args, other.post_args)
and flexible_equal(self.post_kwargs, other.post_kwargs)
)

def describe(self, include_postexec=False) -> str:
Expand Down Expand Up @@ -246,8 +291,6 @@ def summarize_execution(
ret = f"C-based callable {type(_ret).__name__}"
else:
ret = _ret
args = deep_realize(args)
kwargs = deep_realize(kwargs)
except Exception as e:
exc = e
if detach_path:
Expand All @@ -258,6 +301,19 @@ def summarize_execution(
tbstr = ch_stack(currently_handling=exc)
if in_debug():
debug("hit exception:", type(exc), exc, tbstr)
# Per-element best-effort realization: an unrealizable arg becomes
# UNREALIZABLE (compares equal to anything in flexible_equal) without
# poisoning sibling args. A shared memo preserves identity across
# arguments that alias the same object.
memo: Dict = {}
args = tuple(
safe_deep_realize(a, label=f"argument {idx + 1}", memo=memo)
for idx, a in enumerate(args)
)
kwargs = {
k: safe_deep_realize(v, label=f"keyword argument {k!r}", memo=memo)
for k, v in kwargs.items()
}
return ExecutionResult(ret, exc, tbstr, args, kwargs)


Expand Down
69 changes: 68 additions & 1 deletion crosshair/test_util_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
from crosshair.test_util import flexible_equal
from crosshair.core import proxy_for_type
from crosshair.core_and_libs import standalone_statespace
from crosshair.libimpl.iolib import BackedStringIO
from crosshair.test_util import (
UNREALIZABLE,
compare_returns,
flexible_equal,
safe_deep_realize,
summarize_execution,
)
from crosshair.tracers import ResumedTracing


def test_flexible_equal():
Expand All @@ -24,3 +34,60 @@ def gen():
ordered_dict_2 = {3: 4, 1: 2}
assert list(ordered_dict_1.items()) != list(ordered_dict_2.items())
assert flexible_equal(ordered_dict_1, ordered_dict_2)


def test_summarize_execution_realizes_post_args_on_exception():
def raiser(x):
raise ValueError

with standalone_statespace:
symbolic_int = proxy_for_type(int, "i")
with ResumedTracing():
result = summarize_execution(raiser, (symbolic_int,))
assert type(result.exc) is ValueError
assert type(result.post_args[0]) is int


def test_compare_returns_handles_closed_stream_post_args():
"""End-to-end smoke test: a function that closes its IO stream and then
raises during a follow-up operation should still produce a clean
comparison via `compare_returns` (which discards post_args)."""

def close_and_truncate(s: BackedStringIO):
s.close()
return s.truncate()

with standalone_statespace:
stream = BackedStringIO("abc")
with ResumedTracing():
result = compare_returns(close_and_truncate, stream)
assert bool(result)


def test_summarize_execution_uses_sentinel_for_unrealizable_post_args():
class Stubborn:
def __reduce__(self):
raise RuntimeError("intentionally unrealizable")

def consume(s):
return None

with standalone_statespace:
with ResumedTracing():
result = summarize_execution(consume, (Stubborn(),))
assert result.post_args[0] is UNREALIZABLE


def test_safe_deep_realize_returns_sentinel_on_failure():
class Stubborn:
def __reduce__(self):
raise RuntimeError("nope")

assert safe_deep_realize(Stubborn()) is UNREALIZABLE


def test_flexible_equal_treats_unrealizable_as_equal():
assert flexible_equal(UNREALIZABLE, 42)
assert flexible_equal("anything", UNREALIZABLE)
assert flexible_equal((1, UNREALIZABLE, 3), (1, "different", 3))
assert flexible_equal({"k": UNREALIZABLE}, {"k": object()})
2 changes: 2 additions & 0 deletions crosshair/type_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
"z3",
# These are disabled for performance or type search effectiveness:
"hypothesis",
# pdb._ScriptTarget is assumption-breaking: it's a str subclass that can compare unequal after copy:
"pdb",
"pkg_resources",
"pytest",
"py", # (part of pytest)
Expand Down
Loading