From ad09db91deb45e9ad29a84e0a8de2d1cf9519ed2 Mon Sep 17 00:00:00 2001
From: Michael Wu <michaelmwu@gmail.com>
Date: Mon, 9 Mar 2026 21:30:58 +0100
Subject: [PATCH 1/4] fix: skip heuristic role backfill for llm roles

---
 .../shared/src/five08/resume_extractor.py     | 35 +++++++----
 tests/unit/test_resume_extractor.py           | 62 +++++++++++++++++++
 2 files changed, 86 insertions(+), 11 deletions(-)

diff --git a/packages/shared/src/five08/resume_extractor.py b/packages/shared/src/five08/resume_extractor.py
index e4aa766..0f2ebc0 100644
--- a/packages/shared/src/five08/resume_extractor.py
+++ b/packages/shared/src/five08/resume_extractor.py
@@ -2330,6 +2330,29 @@ def extract(
                 limit=5,
             )
             parsed_role_rationale = _normalize_scalar(parsed.get("role_rationale"))
+            parsed_primary_roles_raw = parsed.get("primary_roles")
+            if not parsed_primary_roles_raw:
+                parsed_primary_roles_raw = parsed.get("primary_role")
+            parsed_primary_roles = _normalize_role_collection(parsed_primary_roles_raw)
+            llm_provided_role_suggestion = False
+            if isinstance(parsed_primary_roles_raw, str):
+                llm_provided_role_suggestion = bool(parsed_primary_roles_raw.strip())
+            elif isinstance(parsed_primary_roles_raw, (list, tuple)):
+                llm_provided_role_suggestion = any(
+                    isinstance(item, str) and bool(item.strip())
+                    for item in parsed_primary_roles_raw
+                )
+            resolved_primary_roles = parsed_primary_roles
+            if not llm_provided_role_suggestion:
+                resolved_primary_roles = (
+                    resolved_primary_roles
+                    or self._infer_roles_from_signals(
+                        current_title=parsed_current_title,
+                        recent_titles=parsed_recent_titles,
+                        role_rationale=parsed_role_rationale,
+                    )
+                    or self._infer_roles_from_resume(resume_text)
+                )
             (
                 parsed_city,
                 parsed_state,
@@ -2354,17 +2377,7 @@ def extract(
                 email=parsed_email,
                 additional_emails=parsed_emails,
                 description=_normalize_description(parsed.get("description")),
-                primary_roles=(
-                    _normalize_role_collection(
-                        parsed.get("primary_roles") or parsed.get("primary_role")
-                    )
-                    or self._infer_roles_from_signals(
-                        current_title=parsed_current_title,
-                        recent_titles=parsed_recent_titles,
-                        role_rationale=parsed_role_rationale,
-                    )
-                    or self._infer_roles_from_resume(resume_text)
-                ),
+                primary_roles=resolved_primary_roles,
                 github_username=github_username,
                 linkedin_url=linkedin_url,
                 timezone=parsed_timezone,
diff --git a/tests/unit/test_resume_extractor.py b/tests/unit/test_resume_extractor.py
index 3134856..5fbb916 100644
--- a/tests/unit/test_resume_extractor.py
+++ b/tests/unit/test_resume_extractor.py
@@ -1713,6 +1713,68 @@ def create(**_: object) -> object:
     assert result.current_location_evidence is not None
 
 
+def test_extract_does_not_backfill_heuristic_roles_when_llm_suggests_roles() -> None:
+    """LLM-suggested role fields should not be expanded by heuristic role inference."""
+
+    class _FakeChatCompletions:
+        @staticmethod
+        def create(**_: object) -> object:
+            return type(
+                "Response",
+                (),
+                {
+                    "choices": [
+                        type(
+                            "Choice",
+                            (),
+                            {
+                                "message": type(
+                                    "Message",
+                                    (),
+                                    {
+                                        "content": (
+                                            '{"name": "Jane Doe", '
+                                            '"email": "jane@example.com", '
+                                            '"primary_roles": ["platform specialist"], '
+                                            '"current_title": "Software Engineer", '
+                                            '"recent_titles": ["Software Engineer"], '
+                                            '"role_rationale": "Engineering title indicates a developer profile.", '
+                                            '"current_location_raw": null, '
+                                            '"current_location_source": null, '
+                                            '"current_location_evidence": null, '
+                                            '"address_city": null, '
+                                            '"address_state": null, '
+                                            '"address_country": null, '
+                                            '"timezone": null, '
+                                            '"website_url_candidates": [], '
+                                            '"website_links": [], '
+                                            '"social_links": [], '
+                                            '"phone": null, '
+                                            '"skills": [], '
+                                            '"skill_attrs": null, '
+                                            '"confidence": 0.88}'
+                                        )
+                                    },
+                                )()
+                            },
+                        )()
+                    ]
+                },
+            )()
+
+    extractor = ResumeProfileExtractor(api_key="test-key")
+    extractor.client = type(
+        "Client",
+        (),
+        {"chat": type("Chat", (), {"completions": _FakeChatCompletions()})()},
+    )()
+    extractor.model = "fake-model"
+
+    result = extractor.extract("Jane Doe\nSoftware Engineer")
+
+    assert result.primary_roles == []
+
+
 def test_extract_discards_invalid_country_and_repairs_current_location_region() -> None:
     """Invalid LLM location fields should be replaced by deterministic parsing."""
 

From b1ac43224cf86a4ce5eed77bad0d240c340d25a3 Mon Sep 17 00:00:00 2001
From: Michael Wu <michaelmwu@gmail.com>
Date: Mon, 9 Mar 2026 22:02:29 +0100
Subject: [PATCH 2/4] test: fix llm role suggestion expectation

---
 tests/unit/test_resume_extractor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_resume_extractor.py b/tests/unit/test_resume_extractor.py
index 5fbb916..da3109e 100644
--- a/tests/unit/test_resume_extractor.py
+++ b/tests/unit/test_resume_extractor.py
@@ -1772,7 +1772,7 @@ def create(**_: object) -> object:
 
     result = extractor.extract("Jane Doe\nSoftware Engineer")
 
-    assert result.primary_roles == []
+    assert result.primary_roles == ["platform specialist"]
 
 
 def test_extract_discards_invalid_country_and_repairs_current_location_region() -> None:

From 5ab00d8fc3e92f53b9705253a2c6075251c5ce63 Mon Sep 17 00:00:00 2001
From: Michael Wu <michaelmwu@gmail.com>
Date: Mon, 9 Mar 2026 22:04:05 +0100
Subject: [PATCH 3/4] test: assert no heuristic developer role add

---
 tests/unit/test_resume_extractor.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/unit/test_resume_extractor.py b/tests/unit/test_resume_extractor.py
index da3109e..11c8ae2 100644
--- a/tests/unit/test_resume_extractor.py
+++ b/tests/unit/test_resume_extractor.py
@@ -1772,7 +1772,10 @@ def create(**_: object) -> object:
 
     result = extractor.extract("Jane Doe\nSoftware Engineer")
 
+    # LLM-suggested roles should be preserved, and heuristic roles like
+    # "developer" should not be added on top.
     assert result.primary_roles == ["platform specialist"]
+    assert "developer" not in result.primary_roles
 
 
 def test_extract_discards_invalid_country_and_repairs_current_location_region() -> None:

From dd02aabe9a6fdc521a4b1e2b1ae55cf2a9219eec Mon Sep 17 00:00:00 2001
From: Michael Wu <michaelmwu@gmail.com>
Date: Mon, 9 Mar 2026 22:11:29 +0100
Subject: [PATCH 4/4] fix: gate role heuristics on normalized llm roles

---
 .../shared/src/five08/resume_extractor.py     |  9 +--
 tests/unit/test_resume_extractor.py           | 64 +++++++++++++++++++
 2 files changed, 65 insertions(+), 8 deletions(-)

diff --git a/packages/shared/src/five08/resume_extractor.py b/packages/shared/src/five08/resume_extractor.py
index 0f2ebc0..738d083 100644
--- a/packages/shared/src/five08/resume_extractor.py
+++ b/packages/shared/src/five08/resume_extractor.py
@@ -2334,14 +2334,7 @@ def extract(
             if not parsed_primary_roles_raw:
                 parsed_primary_roles_raw = parsed.get("primary_role")
             parsed_primary_roles = _normalize_role_collection(parsed_primary_roles_raw)
-            llm_provided_role_suggestion = False
-            if isinstance(parsed_primary_roles_raw, str):
-                llm_provided_role_suggestion = bool(parsed_primary_roles_raw.strip())
-            elif isinstance(parsed_primary_roles_raw, (list, tuple)):
-                llm_provided_role_suggestion = any(
-                    isinstance(item, str) and bool(item.strip())
-                    for item in parsed_primary_roles_raw
-                )
+            llm_provided_role_suggestion = bool(parsed_primary_roles)
             resolved_primary_roles = parsed_primary_roles
             if not llm_provided_role_suggestion:
                 resolved_primary_roles = (
diff --git a/tests/unit/test_resume_extractor.py b/tests/unit/test_resume_extractor.py
index 11c8ae2..8b0b244 100644
--- a/tests/unit/test_resume_extractor.py
+++ b/tests/unit/test_resume_extractor.py
@@ -1778,6 +1778,70 @@ def create(**_: object) -> object:
     assert "developer" not in result.primary_roles
 
 
+def test_extract_does_not_backfill_heuristic_roles_for_legacy_primary_role() -> None:
+    """Legacy primary_role should also suppress heuristic role expansion."""
+
+    class _FakeChatCompletions:
+        @staticmethod
+        def create(**_: object) -> object:
+            return type(
+                "Response",
+                (),
+                {
+                    "choices": [
+                        type(
+                            "Choice",
+                            (),
+                            {
+                                "message": type(
+                                    "Message",
+                                    (),
+                                    {
+                                        "content": (
+                                            '{"name": "Jane Doe", '
+                                            '"email": "jane@example.com", '
+                                            '"primary_roles": null, '
+                                            '"primary_role": "platform specialist", '
+                                            '"current_title": "Software Engineer", '
+                                            '"recent_titles": ["Software Engineer"], '
+                                            '"role_rationale": "Engineering title indicates a developer profile.", '
+                                            '"current_location_raw": null, '
+                                            '"current_location_source": null, '
+                                            '"current_location_evidence": null, '
+                                            '"address_city": null, '
+                                            '"address_state": null, '
+                                            '"address_country": null, '
+                                            '"timezone": null, '
+                                            '"website_url_candidates": [], '
+                                            '"website_links": [], '
+                                            '"social_links": [], '
+                                            '"phone": null, '
+                                            '"skills": [], '
+                                            '"skill_attrs": null, '
+                                            '"confidence": 0.88}'
+                                        )
+                                    },
+                                )()
+                            },
+                        )()
+                    ]
+                },
+            )()
+
+    extractor = ResumeProfileExtractor(api_key="test-key")
+    extractor.client = type(
+        "Client",
+        (),
+        {"chat": type("Chat", (), {"completions": _FakeChatCompletions()})()},
+    )()
+    extractor.model = "fake-model"
+
+    result = extractor.extract("Jane Doe\nSoftware Engineer")
+
+    assert result.primary_roles == ["platform specialist"]
+    assert "developer" not in result.primary_roles
+
+
 def test_extract_discards_invalid_country_and_repairs_current_location_region() -> None:
     """Invalid LLM location fields should be replaced by deterministic parsing."""