diff --git a/assayer/scorer.py b/assayer/scorer.py index db77d74..cc8d73a 100644 --- a/assayer/scorer.py +++ b/assayer/scorer.py @@ -1,6 +1,7 @@ from __future__ import annotations from assayer.models import ModelResult +import re _model = None @@ -40,11 +41,7 @@ def compute_similarity(results: list[ModelResult]) -> dict[tuple[str, str], floa def readability_stats(text: str) -> dict[str, float]: - sentences = [ - s - for s in text.replace("!", ".").replace("?", ".").split(".") - if s.strip() - ] + sentences = [s for s in re.split(r"[.!?]+(?:\s|$)", text) if s.strip()] words = text.split() word_count = len(words) sentence_count = len(sentences) or 1 diff --git a/tests/test_scorer.py b/tests/test_scorer.py index 007e7fa..2ff6aec 100644 --- a/tests/test_scorer.py +++ b/tests/test_scorer.py @@ -78,3 +78,21 @@ def test_readability_stats_empty(): assert stats["word_count"] == 0 assert stats["sentence_count"] == 1 assert stats["avg_sentence_length"] == 0.0 + + +def test_readability_stats_abbreviations(): + stats = readability_stats("Dr. Smith scored 3.5. Well done.") + assert stats["word_count"] == 6 + assert stats["sentence_count"] == 2 + + +def test_readability_stats_urls(): + stats = readability_stats("Visit example.com for details.") + assert stats["word_count"] == 4 + assert stats["sentence_count"] == 1 + + +def test_readability_stats_exclamations(): + stats = readability_stats("The price is $3.99. Cheap!") + assert stats["word_count"] == 5 + assert stats["sentence_count"] == 2