Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 17 additions & 11 deletions packages/shared/src/five08/resume_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2330,6 +2330,22 @@ def extract(
limit=5,
)
parsed_role_rationale = _normalize_scalar(parsed.get("role_rationale"))
parsed_primary_roles_raw = parsed.get("primary_roles")
if not parsed_primary_roles_raw:
parsed_primary_roles_raw = parsed.get("primary_role")
parsed_primary_roles = _normalize_role_collection(parsed_primary_roles_raw)
Comment on lines +2333 to +2336
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change adds support for honoring the legacy primary_role field (via the new fallback logic), but there isn’t a unit test exercising the primary_role path (only primary_roles). Adding a regression test where the LLM returns primary_role (string) and verifying heuristic role inference does not expand it would help prevent future regressions.

Copilot uses AI. Check for mistakes.
llm_provided_role_suggestion = bool(parsed_primary_roles)
resolved_primary_roles = parsed_primary_roles
if not llm_provided_role_suggestion:
resolved_primary_roles = (
resolved_primary_roles
or self._infer_roles_from_signals(
current_title=parsed_current_title,
recent_titles=parsed_recent_titles,
role_rationale=parsed_role_rationale,
)
or self._infer_roles_from_resume(resume_text)
)
(
parsed_city,
parsed_state,
Expand All @@ -2354,17 +2370,7 @@ def extract(
email=parsed_email,
additional_emails=parsed_emails,
description=_normalize_description(parsed.get("description")),
primary_roles=(
_normalize_role_collection(
parsed.get("primary_roles") or parsed.get("primary_role")
)
or self._infer_roles_from_signals(
current_title=parsed_current_title,
recent_titles=parsed_recent_titles,
role_rationale=parsed_role_rationale,
)
or self._infer_roles_from_resume(resume_text)
),
primary_roles=resolved_primary_roles,
github_username=github_username,
linkedin_url=linkedin_url,
timezone=parsed_timezone,
Expand Down
129 changes: 129 additions & 0 deletions tests/unit/test_resume_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1713,6 +1713,135 @@ def create(**_: object) -> object:
assert result.current_location_evidence is not None


def test_extract_does_not_backfill_heuristic_roles_when_llm_suggests_roles() -> None:
"""LLM-suggested role fields should not be expanded by heuristic role inference."""

class _FakeChatCompletions:
@staticmethod
def create(**_: object) -> object:
return type(
"Response",
(),
{
"choices": [
type(
"Choice",
(),
{
"message": type(
"Message",
(),
{
"content": (
'{"name": "Jane Doe", '
'"email": "jane@example.com", '
'"primary_roles": ["platform specialist"], '
'"current_title": "Software Engineer", '
'"recent_titles": ["Software Engineer"], '
'"role_rationale": "Engineering title indicates a developer profile.", '
'"current_location_raw": null, '
'"current_location_source": null, '
'"current_location_evidence": null, '
'"address_city": null, '
'"address_state": null, '
'"address_country": null, '
'"timezone": null, '
'"website_url_candidates": [], '
'"website_links": [], '
'"social_links": [], '
'"phone": null, '
'"skills": [], '
'"skill_attrs": null, '
'"confidence": 0.88}'
)
},
)()
},
)()
]
},
)()

extractor = ResumeProfileExtractor(api_key="test-key")
extractor.client = type(
"Client",
(),
{"chat": type("Chat", (), {"completions": _FakeChatCompletions()})()},
)()
extractor.model = "fake-model"

result = extractor.extract("Jane Doe\nSoftware Engineer")

# LLM-suggested roles should be preserved, and heuristic roles like
# "developer" should not be added on top.
assert result.primary_roles == ["platform specialist"]
assert "developer" not in result.primary_roles


def test_extract_does_not_backfill_heuristic_roles_for_legacy_primary_role() -> None:
"""Legacy primary_role should also suppress heuristic role expansion."""

class _FakeChatCompletions:
@staticmethod
def create(**_: object) -> object:
return type(
"Response",
(),
{
"choices": [
type(
"Choice",
(),
{
"message": type(
"Message",
(),
{
"content": (
'{"name": "Jane Doe", '
'"email": "jane@example.com", '
'"primary_roles": null, '
'"primary_role": "platform specialist", '
'"current_title": "Software Engineer", '
'"recent_titles": ["Software Engineer"], '
'"role_rationale": "Engineering title indicates a developer profile.", '
'"current_location_raw": null, '
'"current_location_source": null, '
'"current_location_evidence": null, '
'"address_city": null, '
'"address_state": null, '
'"address_country": null, '
'"timezone": null, '
'"website_url_candidates": [], '
'"website_links": [], '
'"social_links": [], '
'"phone": null, '
'"skills": [], '
'"skill_attrs": null, '
'"confidence": 0.88}'
)
},
)()
},
)()
]
},
)()

extractor = ResumeProfileExtractor(api_key="test-key")
extractor.client = type(
"Client",
(),
{"chat": type("Chat", (), {"completions": _FakeChatCompletions()})()},
)()
extractor.model = "fake-model"

result = extractor.extract("Jane Doe\nSoftware Engineer")

assert result.primary_roles == ["platform specialist"]
assert "developer" not in result.primary_roles


def test_extract_discards_invalid_country_and_repairs_current_location_region() -> None:
"""Invalid LLM location fields should be replaced by deterministic parsing."""

Expand Down