diff --git a/packages/shared/src/five08/resume_extractor.py b/packages/shared/src/five08/resume_extractor.py index e4aa766..738d083 100644 --- a/packages/shared/src/five08/resume_extractor.py +++ b/packages/shared/src/five08/resume_extractor.py @@ -2330,6 +2330,22 @@ def extract( limit=5, ) parsed_role_rationale = _normalize_scalar(parsed.get("role_rationale")) + parsed_primary_roles_raw = parsed.get("primary_roles") + if not parsed_primary_roles_raw: + parsed_primary_roles_raw = parsed.get("primary_role") + parsed_primary_roles = _normalize_role_collection(parsed_primary_roles_raw) + llm_provided_role_suggestion = bool(parsed_primary_roles) + resolved_primary_roles = parsed_primary_roles + if not llm_provided_role_suggestion: + resolved_primary_roles = ( + resolved_primary_roles + or self._infer_roles_from_signals( + current_title=parsed_current_title, + recent_titles=parsed_recent_titles, + role_rationale=parsed_role_rationale, + ) + or self._infer_roles_from_resume(resume_text) + ) ( parsed_city, parsed_state, @@ -2354,17 +2370,7 @@ def extract( email=parsed_email, additional_emails=parsed_emails, description=_normalize_description(parsed.get("description")), - primary_roles=( - _normalize_role_collection( - parsed.get("primary_roles") or parsed.get("primary_role") - ) - or self._infer_roles_from_signals( - current_title=parsed_current_title, - recent_titles=parsed_recent_titles, - role_rationale=parsed_role_rationale, - ) - or self._infer_roles_from_resume(resume_text) - ), + primary_roles=resolved_primary_roles, github_username=github_username, linkedin_url=linkedin_url, timezone=parsed_timezone, diff --git a/tests/unit/test_resume_extractor.py b/tests/unit/test_resume_extractor.py index 3134856..8b0b244 100644 --- a/tests/unit/test_resume_extractor.py +++ b/tests/unit/test_resume_extractor.py @@ -1713,6 +1713,135 @@ def create(**_: object) -> object: assert result.current_location_evidence is not None +def test_extract_does_not_backfill_heuristic_roles_when_llm_suggests_roles() -> None: + """LLM-suggested role fields should not be expanded by heuristic role inference.""" + + class _FakeChatCompletions: + @staticmethod + def create(**_: object) -> object: + return type( + "Response", + (), + { + "choices": [ + type( + "Choice", + (), + { + "message": type( + "Message", + (), + { + "content": ( + '{"name": "Jane Doe", ' + '"email": "jane@example.com", ' + '"primary_roles": ["platform specialist"], ' + '"current_title": "Software Engineer", ' + '"recent_titles": ["Software Engineer"], ' + '"role_rationale": "Engineering title indicates a developer profile.", ' + '"current_location_raw": null, ' + '"current_location_source": null, ' + '"current_location_evidence": null, ' + '"address_city": null, ' + '"address_state": null, ' + '"address_country": null, ' + '"timezone": null, ' + '"website_url_candidates": [], ' + '"website_links": [], ' + '"social_links": [], ' + '"phone": null, ' + '"skills": [], ' + '"skill_attrs": null, ' + '"confidence": 0.88}' + ) + }, + )() + }, + )() + ] + }, + )() + + extractor = ResumeProfileExtractor(api_key="test-key") + extractor.client = type( + "Client", + (), + {"chat": type("Chat", (), {"completions": _FakeChatCompletions()})()}, + )() + extractor.model = "fake-model" + + result = extractor.extract("Jane Doe\nSoftware Engineer") + + # LLM-suggested roles should be preserved, and heuristic roles like + # "developer" should not be added on top. + assert result.primary_roles == ["platform specialist"] + assert "developer" not in result.primary_roles + + +def test_extract_does_not_backfill_heuristic_roles_for_legacy_primary_role() -> None: + """Legacy primary_role should also suppress heuristic role expansion.""" + + class _FakeChatCompletions: + @staticmethod + def create(**_: object) -> object: + return type( + "Response", + (), + { + "choices": [ + type( + "Choice", + (), + { + "message": type( + "Message", + (), + { + "content": ( + '{"name": "Jane Doe", ' + '"email": "jane@example.com", ' + '"primary_roles": null, ' + '"primary_role": "platform specialist", ' + '"current_title": "Software Engineer", ' + '"recent_titles": ["Software Engineer"], ' + '"role_rationale": "Engineering title indicates a developer profile.", ' + '"current_location_raw": null, ' + '"current_location_source": null, ' + '"current_location_evidence": null, ' + '"address_city": null, ' + '"address_state": null, ' + '"address_country": null, ' + '"timezone": null, ' + '"website_url_candidates": [], ' + '"website_links": [], ' + '"social_links": [], ' + '"phone": null, ' + '"skills": [], ' + '"skill_attrs": null, ' + '"confidence": 0.88}' + ) + }, + )() + }, + )() + ] + }, + )() + + extractor = ResumeProfileExtractor(api_key="test-key") + extractor.client = type( + "Client", + (), + {"chat": type("Chat", (), {"completions": _FakeChatCompletions()})()}, + )() + extractor.model = "fake-model" + + result = extractor.extract("Jane Doe\nSoftware Engineer") + + assert result.primary_roles == ["platform specialist"] + assert "developer" not in result.primary_roles + + def test_extract_discards_invalid_country_and_repairs_current_location_region() -> None: """Invalid LLM location fields should be replaced by deterministic parsing."""