508-dev · michaelmwu · Mar 9, 2026 · Mar 9, 2026 · Mar 9, 2026 · Mar 9, 2026
diff --git a/packages/shared/src/five08/resume_extractor.py b/packages/shared/src/five08/resume_extractor.py
@@ -2330,6 +2330,22 @@ def extract(
                 limit=5,
             )
             parsed_role_rationale = _normalize_scalar(parsed.get("role_rationale"))
+            parsed_primary_roles_raw = parsed.get("primary_roles")
+            if not parsed_primary_roles_raw:
+                parsed_primary_roles_raw = parsed.get("primary_role")
+            parsed_primary_roles = _normalize_role_collection(parsed_primary_roles_raw)
+            llm_provided_role_suggestion = bool(parsed_primary_roles)
+            resolved_primary_roles = parsed_primary_roles
+            if not llm_provided_role_suggestion:
+                resolved_primary_roles = (
+                    resolved_primary_roles
+                    or self._infer_roles_from_signals(
+                        current_title=parsed_current_title,
+                        recent_titles=parsed_recent_titles,
+                        role_rationale=parsed_role_rationale,
+                    )
+                    or self._infer_roles_from_resume(resume_text)
+                )
             (
                 parsed_city,
                 parsed_state,
@@ -2354,17 +2370,7 @@ def extract(
                 email=parsed_email,
                 additional_emails=parsed_emails,
                 description=_normalize_description(parsed.get("description")),
-                primary_roles=(
-                    _normalize_role_collection(
-                        parsed.get("primary_roles") or parsed.get("primary_role")
-                    )
-                    or self._infer_roles_from_signals(
-                        current_title=parsed_current_title,
-                        recent_titles=parsed_recent_titles,
-                        role_rationale=parsed_role_rationale,
-                    )
-                    or self._infer_roles_from_resume(resume_text)
-                ),
+                primary_roles=resolved_primary_roles,
                 github_username=github_username,
                 linkedin_url=linkedin_url,
                 timezone=parsed_timezone,

diff --git a/tests/unit/test_resume_extractor.py b/tests/unit/test_resume_extractor.py
@@ -1713,6 +1713,135 @@ def create(**_: object) -> object:
     assert result.current_location_evidence is not None
 
 
+def test_extract_does_not_backfill_heuristic_roles_when_llm_suggests_roles() -> None:
+    """LLM-suggested role fields should not be expanded by heuristic role inference."""
+
+    class _FakeChatCompletions:
+        @staticmethod
+        def create(**_: object) -> object:
+            return type(
+                "Response",
+                (),
+                {
+                    "choices": [
+                        type(
+                            "Choice",
+                            (),
+                            {
+                                "message": type(
+                                    "Message",
+                                    (),
+                                    {
+                                        "content": (
+                                            '{"name": "Jane Doe", '
+                                            '"email": "jane@example.com", '
+                                            '"primary_roles": ["platform specialist"], '
+                                            '"current_title": "Software Engineer", '
+                                            '"recent_titles": ["Software Engineer"], '
+                                            '"role_rationale": "Engineering title indicates a developer profile.", '
+                                            '"current_location_raw": null, '
+                                            '"current_location_source": null, '
+                                            '"current_location_evidence": null, '
+                                            '"address_city": null, '
+                                            '"address_state": null, '
+                                            '"address_country": null, '
+                                            '"timezone": null, '
+                                            '"website_url_candidates": [], '
+                                            '"website_links": [], '
+                                            '"social_links": [], '
+                                            '"phone": null, '
+                                            '"skills": [], '
+                                            '"skill_attrs": null, '
+                                            '"confidence": 0.88}'
+                                        )
+                                    },
+                                )()
+                            },
+                        )()
+                    ]
+                },
+            )()
+
+    extractor = ResumeProfileExtractor(api_key="test-key")
+    extractor.client = type(
+        "Client",
+        (),
+        {"chat": type("Chat", (), {"completions": _FakeChatCompletions()})()},
+    )()
+    extractor.model = "fake-model"
+
+    result = extractor.extract("Jane Doe\nSoftware Engineer")
+
+    # LLM-suggested roles should be preserved, and heuristic roles like
+    # "developer" should not be added on top.
+    assert result.primary_roles == ["platform specialist"]
+    assert "developer" not in result.primary_roles
+
+
+def test_extract_does_not_backfill_heuristic_roles_for_legacy_primary_role() -> None:
+    """Legacy primary_role should also suppress heuristic role expansion."""
+
+    class _FakeChatCompletions:
+        @staticmethod
+        def create(**_: object) -> object:
+            return type(
+                "Response",
+                (),
+                {
+                    "choices": [
+                        type(
+                            "Choice",
+                            (),
+                            {
+                                "message": type(
+                                    "Message",
+                                    (),
+                                    {
+                                        "content": (
+                                            '{"name": "Jane Doe", '
+                                            '"email": "jane@example.com", '
+                                            '"primary_roles": null, '
+                                            '"primary_role": "platform specialist", '
+                                            '"current_title": "Software Engineer", '
+                                            '"recent_titles": ["Software Engineer"], '
+                                            '"role_rationale": "Engineering title indicates a developer profile.", '
+                                            '"current_location_raw": null, '
+                                            '"current_location_source": null, '
+                                            '"current_location_evidence": null, '
+                                            '"address_city": null, '
+                                            '"address_state": null, '
+                                            '"address_country": null, '
+                                            '"timezone": null, '
+                                            '"website_url_candidates": [], '
+                                            '"website_links": [], '
+                                            '"social_links": [], '
+                                            '"phone": null, '
+                                            '"skills": [], '
+                                            '"skill_attrs": null, '
+                                            '"confidence": 0.88}'
+                                        )
+                                    },
+                                )()
+                            },
+                        )()
+                    ]
+                },
+            )()
+
+    extractor = ResumeProfileExtractor(api_key="test-key")
+    extractor.client = type(
+        "Client",
+        (),
+        {"chat": type("Chat", (), {"completions": _FakeChatCompletions()})()},
+    )()
+    extractor.model = "fake-model"
+
+    result = extractor.extract("Jane Doe\nSoftware Engineer")
+
+    assert result.primary_roles == ["platform specialist"]
+    assert "developer" not in result.primary_roles
+
+
 def test_extract_discards_invalid_country_and_repairs_current_location_region() -> None:
     """Invalid LLM location fields should be replaced by deterministic parsing."""