Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions tests/test_extraction_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,36 @@ def test_file_path_extraction(self):
result = rule_based_extract("Edit the file src/main.py to fix the bug")
entity_names = [e.name for e in result.entities]
assert any("main.py" in n for n in entity_names)

def test_rule_based_extract_integration(self):
text = "I prefer Python. The meeting is on 2025-10-12. Email me at test@example.com."
result = rule_based_extract(text)

# Assert entities
entity_names = [e.name for e in result.entities]
assert "Python" in entity_names
assert "test@example.com" in entity_names

# Assert preference relations
prefers_rels = [r for r in result.relations if r.predicate == "prefers"]
assert len(prefers_rels) > 0
assert prefers_rels[0].subject == "user"
assert prefers_rels[0].object == "Python"

# Assert temporal_context
assert "2025-10-12" in result.temporal_context
Comment on lines +80 to +92
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The assertions in this integration test can be made more robust and precise. Verifying the entity_type ensures that the rule-based logic is correctly categorizing entities (e.g., 'tech' vs 'person'), which is critical for downstream memory processing. Additionally, using exact equality for the relation count and temporal context provides better protection against regressions or unexpected side effects in the extraction pipeline, adhering to the SOTA+ 'Production-Grade' philosophy.

Suggested change
# Assert entities
entity_names = [e.name for e in result.entities]
assert "Python" in entity_names
assert "test@example.com" in entity_names
# Assert preference relations
prefers_rels = [r for r in result.relations if r.predicate == "prefers"]
assert len(prefers_rels) > 0
assert prefers_rels[0].subject == "user"
assert prefers_rels[0].object == "Python"
# Assert temporal_context
assert "2025-10-12" in result.temporal_context
# Assert entities and their types
entities = {e.name: e.entity_type for e in result.entities}
assert entities.get("Python") == "tech"
assert entities.get("test@example.com") == "person"
# Assert preference relations
prefers_rels = [r for r in result.relations if r.predicate == "prefers"]
assert len(prefers_rels) == 1
assert prefers_rels[0].subject == "user"
assert prefers_rels[0].object == "Python"
# Assert temporal_context and summary
assert result.temporal_context == "2025-10-12"
assert result.summary == "I prefer Python."
References
  1. Production-Grade Only: NEVER use placeholders, stubs, or 'samples' for core logic. All code must be production-ready, typed, and robust. (link)


def test_rule_based_extract_dependency_relations(self):
text = "Django depends on Python."
result = rule_based_extract(text)

# Assert entities
entity_names = [e.name for e in result.entities]
assert "Django" in entity_names
assert "Python" in entity_names

# Assert dependency relations
depends_rels = [r for r in result.relations if r.predicate == "depends_on"]
assert len(depends_rels) > 0
assert depends_rels[0].subject == "Django"
assert depends_rels[0].object == "Python"
Comment on lines +98 to +107
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Similar to the previous test, verifying the entity_type and checking for an exact relation count improves the quality of the test suite. It ensures that the dependency extraction logic correctly identifies both the subject and object as 'tech' entities and doesn't produce duplicate or extraneous relations.

Suggested change
# Assert entities
entity_names = [e.name for e in result.entities]
assert "Django" in entity_names
assert "Python" in entity_names
# Assert dependency relations
depends_rels = [r for r in result.relations if r.predicate == "depends_on"]
assert len(depends_rels) > 0
assert depends_rels[0].subject == "Django"
assert depends_rels[0].object == "Python"
# Assert entities and their types
entities = {e.name: e.entity_type for e in result.entities}
assert entities.get("Django") == "tech"
assert entities.get("Python") == "tech"
# Assert dependency relations
depends_rels = [r for r in result.relations if r.predicate == "depends_on"]
assert len(depends_rels) == 1
assert depends_rels[0].subject == "Django"
assert depends_rels[0].object == "Python"
assert result.summary == "Django depends on Python."
References
  1. Production-Grade Only: NEVER use placeholders, stubs, or 'samples' for core logic. All code must be production-ready, typed, and robust. (link)

Loading