diff --git a/pyproject.toml b/pyproject.toml index a563e89..baef899 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "uv_build" [project] name = "jsonlt-python" -version = "0.1.0a3" +version = "0.1.0a4" description = "Reference implementation of the JSONLT (JSON Lines Table) specification for Python." readme = "README.md" license = "MIT" diff --git a/tests/properties/strategies.py b/tests/properties/strategies.py new file mode 100644 index 0000000..08fd78a --- /dev/null +++ b/tests/properties/strategies.py @@ -0,0 +1,65 @@ +"""Hypothesis strategies for JSONLT property-based testing.""" + +from hypothesis import strategies as st + +from jsonlt._constants import MAX_INTEGER_KEY, MAX_TUPLE_ELEMENTS, MIN_INTEGER_KEY + +# Key-related strategies (migrated from test_key_comparison.py) +key_element_strategy = st.one_of( + st.text(), + st.integers(min_value=MIN_INTEGER_KEY, max_value=MAX_INTEGER_KEY), +) + +key_strategy = st.one_of( + st.text(), + st.integers(min_value=MIN_INTEGER_KEY, max_value=MAX_INTEGER_KEY), + st.tuples(*[key_element_strategy] * 1), + st.tuples(*[key_element_strategy] * 2), + st.lists(key_element_strategy, min_size=1, max_size=MAX_TUPLE_ELEMENTS).map(tuple), +) + +# JSON primitive strategy +json_primitive_strategy = st.one_of( + st.none(), + st.booleans(), + st.integers(), + st.floats(allow_nan=False, allow_infinity=False), + st.text(), +) + +# JSON value strategy (recursive, bounded depth) +# Use st.recursive to generate nested structures +json_value_strategy = st.recursive( + json_primitive_strategy, + lambda children: st.one_of( + st.lists(children, max_size=5), + st.dictionaries(st.text(max_size=20), children, max_size=5), + ), + max_leaves=50, +) + +# JSON object strategy (for records) +json_object_strategy = st.dictionaries( + st.text(max_size=20).filter( + lambda s: not s.startswith("$") + ), # No $-prefixed fields + json_value_strategy, + max_size=10, +) + +# Field name strategy (no $-prefix for valid records) +field_name_strategy = st.text(min_size=1, max_size=20).filter( + lambda s: not s.startswith("$") +) + +# Key specifier strategy +scalar_key_specifier_strategy = field_name_strategy +tuple_key_specifier_strategy = ( + st.lists(field_name_strategy, min_size=2, max_size=5) + .filter(lambda fields: len(fields) == len(set(fields))) + .map(tuple) +) +key_specifier_strategy = st.one_of( + scalar_key_specifier_strategy, + tuple_key_specifier_strategy, +) diff --git a/tests/properties/test_json_properties.py b/tests/properties/test_json_properties.py new file mode 100644 index 0000000..f2e7098 --- /dev/null +++ b/tests/properties/test_json_properties.py @@ -0,0 +1,52 @@ +"""Property-based tests for JSON serialization and parsing.""" + +import json +from typing import TYPE_CHECKING + +from hypothesis import given + +from jsonlt._json import parse_json_line, serialize_json + +from .strategies import json_object_strategy + +if TYPE_CHECKING: + from jsonlt._json import JSONObject + + +class TestSerializationRoundtrip: + """Serialize then parse produces equivalent data.""" + + @given(json_object_strategy) + def test_roundtrip_preserves_data(self, obj: "JSONObject") -> None: + """parse(serialize(obj)) == obj for any valid JSON object.""" + serialized = serialize_json(obj) + parsed = parse_json_line(serialized) + assert parsed == obj + + @given(json_object_strategy) + def test_serialize_is_deterministic(self, obj: "JSONObject") -> None: + """serialize(obj) == serialize(obj) always.""" + result1 = serialize_json(obj) + result2 = serialize_json(obj) + assert result1 == result2 + + +class TestSerializationProperties: + """Serialization output format invariants.""" + + @given(json_object_strategy) + def test_no_extraneous_whitespace(self, obj: "JSONObject") -> None: + """Output contains no space/newline/tab outside strings.""" + serialized = serialize_json(obj) + # Parse to check it's valid JSON + parsed = parse_json_line(serialized) + # Re-serialize and check for equality (no whitespace variation) + reserialized = serialize_json(parsed) + assert serialized == reserialized + + @given(json_object_strategy) + def test_valid_json_output(self, obj: "JSONObject") -> None: + """Output is parseable by standard json.loads.""" + serialized = serialize_json(obj) + # Should not raise + json.loads(serialized) diff --git a/tests/properties/test_key_comparison.py b/tests/properties/test_key_properties.py similarity index 87% rename from tests/properties/test_key_comparison.py rename to tests/properties/test_key_properties.py index e008ea0..cd0d880 100644 --- a/tests/properties/test_key_comparison.py +++ b/tests/properties/test_key_properties.py @@ -1,20 +1,11 @@ +"""Property-based tests for key comparison operations.""" + from hypothesis import given, strategies as st from jsonlt._constants import MAX_INTEGER_KEY, MAX_TUPLE_ELEMENTS, MIN_INTEGER_KEY from jsonlt._keys import compare_keys -key_element_strategy = st.one_of( - st.text(), - st.integers(min_value=MIN_INTEGER_KEY, max_value=MAX_INTEGER_KEY), -) - -key_strategy = st.one_of( - st.text(), - st.integers(min_value=MIN_INTEGER_KEY, max_value=MAX_INTEGER_KEY), - st.tuples(*[key_element_strategy] * 1), - st.tuples(*[key_element_strategy] * 2), - st.lists(key_element_strategy, min_size=1, max_size=MAX_TUPLE_ELEMENTS).map(tuple), -) +from .strategies import key_element_strategy, key_strategy class TestTotalOrderProperties: diff --git a/tests/properties/test_record_properties.py b/tests/properties/test_record_properties.py new file mode 100644 index 0000000..36ce8e6 --- /dev/null +++ b/tests/properties/test_record_properties.py @@ -0,0 +1,111 @@ +"""Property-based tests for record validation.""" + +from typing import TYPE_CHECKING + +from hypothesis import given, strategies as st + +from jsonlt._records import build_tombstone, extract_key, is_tombstone, validate_record + +from .strategies import ( + field_name_strategy, + json_value_strategy, + key_element_strategy, + key_specifier_strategy, + scalar_key_specifier_strategy, + tuple_key_specifier_strategy, +) + +if TYPE_CHECKING: + from jsonlt._json import JSONObject + + +class TestValidRecordProperties: + """Valid records pass validation without exception.""" + + @given( + scalar_key_specifier_strategy, + key_element_strategy, + st.dictionaries(field_name_strategy, json_value_strategy, max_size=5), + ) + def test_valid_scalar_key_record( + self, key_field: str, key_value: str | int, extra_fields: "JSONObject" + ) -> None: + """Records with valid scalar keys pass validation.""" + # Build record with key field and extra data + record: JSONObject = { + key_field: key_value, + **{k: v for k, v in extra_fields.items() if k != key_field}, + } + validate_record(record, key_field) # Should not raise + + @given(tuple_key_specifier_strategy, st.data()) + def test_valid_compound_key_record( + self, key_specifier: tuple[str, ...], data: st.DataObject + ) -> None: + """Records with valid compound keys pass validation.""" + # Generate a key value for each field in the specifier + record: JSONObject = {} + for field in key_specifier: + record[field] = data.draw(key_element_strategy) + validate_record(record, key_specifier) # Should not raise + + +class TestExtractKeyProperties: + """Key extraction invariants.""" + + @given(scalar_key_specifier_strategy, key_element_strategy) + def test_extracted_scalar_key_matches_field( + self, key_field: str, key_value: str | int + ) -> None: + """Extracted key equals the key field value.""" + record: JSONObject = {key_field: key_value} + extracted = extract_key(record, key_field) + assert extracted == key_value + + @given(tuple_key_specifier_strategy, st.data()) + def test_extracted_compound_key_matches_fields( + self, key_specifier: tuple[str, ...], data: st.DataObject + ) -> None: + """Extracted compound key is tuple of field values.""" + record: JSONObject = {} + expected_elements: list[str | int] = [] + for field in key_specifier: + value: str | int = data.draw(key_element_strategy) + record[field] = value + expected_elements.append(value) + + extracted = extract_key(record, key_specifier) + assert extracted == tuple(expected_elements) + + +class TestTombstoneProperties: + """Tombstone detection and construction.""" + + @given(key_specifier_strategy, st.data()) + def test_tombstone_detected( + self, key_specifier: str | tuple[str, ...], data: st.DataObject + ) -> None: + """is_tombstone returns True for tombstones.""" + # Build a valid key + if isinstance(key_specifier, str): + key: str | int | tuple[str | int, ...] = data.draw(key_element_strategy) + else: + key = tuple(data.draw(key_element_strategy) for _ in key_specifier) + + tombstone = build_tombstone(key, key_specifier) + assert is_tombstone(tombstone) is True + + @given(key_specifier_strategy, st.data()) + def test_build_tombstone_roundtrip( + self, key_specifier: str | tuple[str, ...], data: st.DataObject + ) -> None: + """extract_key(build_tombstone(key, specifier), specifier) == key.""" + # Build a valid key + if isinstance(key_specifier, str): + key: str | int | tuple[str | int, ...] = data.draw(key_element_strategy) + else: + key = tuple(data.draw(key_element_strategy) for _ in key_specifier) + + tombstone = build_tombstone(key, key_specifier) + extracted = extract_key(tombstone, key_specifier) + assert extracted == key diff --git a/tests/properties/test_state_properties.py b/tests/properties/test_state_properties.py new file mode 100644 index 0000000..6b55fb0 --- /dev/null +++ b/tests/properties/test_state_properties.py @@ -0,0 +1,150 @@ +"""Property-based tests for state computation.""" + +from typing import TYPE_CHECKING + +from hypothesis import given, strategies as st + +from jsonlt._records import extract_key, is_tombstone +from jsonlt._state import compute_logical_state + +from .strategies import field_name_strategy, json_value_strategy, key_element_strategy + +if TYPE_CHECKING: + from jsonlt._json import JSONObject + + +class TestStateComputationBasics: + """Fundamental state computation properties.""" + + @given(field_name_strategy) + def test_empty_sequence_yields_empty_state(self, key_field: str) -> None: + """Empty operation sequence produces empty state.""" + state = compute_logical_state([], key_field) + assert state == {} + + @given( + field_name_strategy, + key_element_strategy, + st.dictionaries(field_name_strategy, json_value_strategy, max_size=3), + ) + def test_single_upsert_yields_single_entry( + self, key_field: str, key_value: str | int, extra: "JSONObject" + ) -> None: + """Single record produces state with one entry.""" + record: JSONObject = { + key_field: key_value, + **{k: v for k, v in extra.items() if k != key_field}, + } + state = compute_logical_state([record], key_field) + assert len(state) == 1 + assert state[key_value] == record + + +class TestUpsertProperties: + """Upsert operation semantics.""" + + @given(field_name_strategy, key_element_strategy) + def test_upsert_idempotent(self, key_field: str, key_value: str | int) -> None: + """Applying same record twice leaves state unchanged.""" + record: JSONObject = {key_field: key_value, "data": "value"} + state = compute_logical_state([record, record], key_field) + assert len(state) == 1 + assert state[key_value] == record + + @given(field_name_strategy, key_element_strategy) + def test_last_upsert_wins(self, key_field: str, key_value: str | int) -> None: + """Last record for a key determines final state.""" + record1: JSONObject = {key_field: key_value, "version": 1} + record2: JSONObject = {key_field: key_value, "version": 2} + state = compute_logical_state([record1, record2], key_field) + assert state[key_value] == record2 + + +class TestDeleteProperties: + """Delete operation semantics.""" + + @given(field_name_strategy, key_element_strategy) + def test_delete_removes_existing( + self, key_field: str, key_value: str | int + ) -> None: + """Tombstone removes key from state.""" + record: JSONObject = {key_field: key_value, "data": "value"} + tombstone: JSONObject = {"$deleted": True, key_field: key_value} + state = compute_logical_state([record, tombstone], key_field) + assert key_value not in state + + @given(field_name_strategy, key_element_strategy) + def test_delete_nonexistent_is_noop( + self, key_field: str, key_value: str | int + ) -> None: + """Deleting nonexistent key has no effect.""" + tombstone: JSONObject = {"$deleted": True, key_field: key_value} + state = compute_logical_state([tombstone], key_field) + assert state == {} + + @given(field_name_strategy, key_element_strategy) + def test_reinsert_after_delete(self, key_field: str, key_value: str | int) -> None: + """Record after tombstone re-adds key.""" + record1: JSONObject = {key_field: key_value, "version": 1} + tombstone: JSONObject = {"$deleted": True, key_field: key_value} + record2: JSONObject = {key_field: key_value, "version": 2} + state = compute_logical_state([record1, tombstone, record2], key_field) + assert state[key_value] == record2 + + +class TestStateInvariants: + """Invariants that hold for all operation sequences.""" + + @given( + field_name_strategy, + st.lists( + st.tuples( + key_element_strategy, + st.booleans(), # is_delete + ), + min_size=0, + max_size=20, + ), + ) + def test_state_values_are_not_tombstones( + self, key_field: str, operations: list[tuple[str | int, bool]] + ) -> None: + """All values in state are records, not tombstones.""" + ops: list[JSONObject] = [] + for key_value, is_delete in operations: + if is_delete: + ops.append({"$deleted": True, key_field: key_value}) + else: + ops.append({key_field: key_value, "data": "value"}) + + state = compute_logical_state(ops, key_field) + + for value in state.values(): + assert not is_tombstone(value) + + @given( + field_name_strategy, + st.lists( + st.tuples( + key_element_strategy, + st.booleans(), # is_delete + ), + min_size=0, + max_size=20, + ), + ) + def test_state_keys_match_record_keys( + self, key_field: str, operations: list[tuple[str | int, bool]] + ) -> None: + """For all (k, v) in state: extract_key(v, ks) == k.""" + ops: list[JSONObject] = [] + for key_value, is_delete in operations: + if is_delete: + ops.append({"$deleted": True, key_field: key_value}) + else: + ops.append({key_field: key_value, "data": "value"}) + + state = compute_logical_state(ops, key_field) + + for key, record in state.items(): + assert extract_key(record, key_field) == key diff --git a/uv.lock b/uv.lock index d52b255..8aaef54 100644 --- a/uv.lock +++ b/uv.lock @@ -852,7 +852,7 @@ wheels = [ [[package]] name = "jsonlt-python" -version = "0.1.0a3" +version = "0.1.0a4" source = { editable = "." } dependencies = [ { name = "typing-extensions" },