devangb3 · devangb3 · Jun 26, 2026 · Jun 28, 2026
diff --git a/.env.example b/.env.example
@@ -4,3 +4,13 @@ OPENROUTER_API_KEY=your_openrouter_api_key_here
 # Optional OpenRouter attribution headers
 # OPENROUTER_HTTP_REFERER=https://your-app-domain.com
 # OPENROUTER_APP_TITLE=Cover Letter Generator
+
+# Optional PilotCrew Observability OTLP trace export
+# Minimum setup
+# OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=https://pilotcrew.example.com/v1/traces
+# OTEL_EXPORTER_OTLP_HEADERS=x-pilotcrew-ingest-key=your_pilotcrew_ingest_key
+# OTEL_SERVICE_NAME=cover-letter-generator
+
+# Optional metadata for filtering and release comparison
+# OTEL_RESOURCE_ATTRIBUTES=deployment.environment=production,pilotcrew.deployment_id=2026-06-25.1
+# PILOTCREW_OBSERVABILITY_ENABLED=false
diff --git a/README.md b/README.md
@@ -2,11 +2,11 @@
 
 A web app that generates personalized cover letters from job descriptions and user details, then renders the output into a downloadable PDF.
 
-The app now uses **OpenRouter only** for LLM generation, with the allowed model list defined in YAML and surfaced in the UI dropdown.
+The app uses **OpenRouter through the OpenAI-compatible SDK** for LLM generation, with the allowed model list defined in YAML and surfaced in the UI dropdown.
 
 ## Features
 
-- OpenRouter-powered cover letter generation
+- OpenRouter-powered cover letter generation through the OpenAI SDK
 - Job application question answering using the same resume/projects context
 - Resume PDF tailoring from structured `resume.yaml`
 - YAML-driven model allowlist (`config/model.yaml`)
@@ -61,6 +61,29 @@ OPENROUTER_API_KEY=your_openrouter_api_key_here
 # OPENROUTER_APP_TITLE=Cover Letter Generator
 ```
 
+To export traces to PilotCrew Observability, add the OTLP endpoint, ingest key
+header, and service name:
+
+```bash
+OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=https://pilotcrew.example.com/v1/traces
+OTEL_EXPORTER_OTLP_HEADERS=x-pilotcrew-ingest-key=your_pilotcrew_ingest_key
+OTEL_SERVICE_NAME=cover-letter-generator
+```
+
+Trace export is enabled when `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` is set. The
+backend uses standard OpenTelemetry Flask/httpx instrumentation for service
+spans and OpenInference OpenAI instrumentation for OpenRouter LLM calls. The
+OpenInference spans carry prompt, response, model, and usage attributes when the
+SDK/provider response exposes them.
+
+Optional metadata can be added later for filtering and release comparison:
+
+```bash
+OTEL_RESOURCE_ATTRIBUTES=deployment.environment=production,pilotcrew.deployment_id=2026-06-25.1
+```
+
+Set `PILOTCREW_OBSERVABILITY_ENABLED=false` to force-disable trace export.
+
 ### 4. Configure model catalog
 
 Edit `config/model.yaml`:

diff --git a/backend/api_service/ai_service.py b/backend/api_service/ai_service.py
@@ -5,7 +5,7 @@
 import re
 import traceback
 
-import httpx
+from openai import APIStatusError, OpenAI
 
 from backend.api_service.model_config import (
     get_base_url,
@@ -48,6 +48,37 @@
 }
 EXPERIENCE_OWNED_PROJECT_IDS = {"pilotcrew-gen-eval", "lh-multimodal-svc"}
 
+
+def build_openrouter_client():
+    return OpenAI(
+        api_key=OPENROUTER_API_KEY,
+        base_url=get_base_url().rstrip("/"),
+        timeout=120.0,
+    )
+
+
+def extract_chat_completion_text(completion):
+    choices = completion.choices or []
+    if not choices:
+        raise RuntimeError("OpenRouter response did not include any choices")
+
+    message = choices[0].message
+    response_text = parse_openrouter_content(message.content)
+    if not response_text:
+        raise RuntimeError("No response text received from OpenRouter")
+    return response_text
+
+
+def create_openrouter_completion(**kwargs):
+    try:
+        completion = build_openrouter_client().chat.completions.create(**kwargs)
+        return extract_chat_completion_text(completion)
+    except APIStatusError as exc:
+        logger.error("OpenRouter API error %s: %s", exc.status_code, exc.response.text)
+        raise RuntimeError(
+            f"OpenRouter API request failed with status {exc.status_code}"
+        ) from exc
+
 def get_pydantic_json_schema(model):
     if hasattr(model, "model_json_schema"):
         return model.model_json_schema()
@@ -206,7 +237,7 @@ def call_openrouter(system_instruction, prompt, selected_model, enable_web_searc
     if not is_allowed_model(selected_model):
         raise ValueError(f"Model '{selected_model}' is not allowed by server configuration")
 
-    payload = {
+    completion_kwargs = {
         "model": selected_model,
         "messages": [
             {"role": "system", "content": system_instruction},
@@ -226,40 +257,11 @@ def call_openrouter(system_instruction, prompt, selected_model, enable_web_searc
         ],
     }
     if enable_web_search:
-        payload["tools"] = [WEB_SEARCH_TOOL]
-
-    headers = {
-        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
-        "Content-Type": "application/json",
-    }
-    http_referer = os.environ.get("OPENROUTER_HTTP_REFERER")
-    app_title = os.environ.get("OPENROUTER_APP_TITLE")
-    if http_referer:
-        headers["HTTP-Referer"] = http_referer
-    if app_title:
-        headers["X-Title"] = app_title
+        completion_kwargs["extra_body"] = {"tools": [WEB_SEARCH_TOOL]}
 
     endpoint = f"{get_base_url().rstrip('/')}/chat/completions"
     logger.info(f"Calling OpenRouter chat completions at: {endpoint}")
-    response = httpx.post(endpoint, headers=headers, json=payload, timeout=120.0)
-
-    if response.status_code >= 400:
-        logger.error(f"OpenRouter API error {response.status_code}: {response.text}")
-        raise RuntimeError(f"OpenRouter API request failed with status {response.status_code}")
-
-    response_data = response.json()
-    choices = response_data.get("choices") or []
-    if not choices:
-        logger.error("OpenRouter response did not include any choices")
-        raise RuntimeError("OpenRouter response did not include any choices")
-
-    message = choices[0].get("message", {})
-    response_text = parse_openrouter_content(message.get("content"))
-    if not response_text:
-        logger.error("No response text received from OpenRouter")
-        raise RuntimeError("No response text received from OpenRouter")
-
-    return response_text
+    return create_openrouter_completion(**completion_kwargs)
 
 
 def call_openrouter_json(
@@ -273,7 +275,7 @@ def call_openrouter_json(
     if not OPENROUTER_API_KEY:
         raise RuntimeError("OPENROUTER_API_KEY not configured")
 
-    payload = {
+    completion_kwargs = {
         "model": selected_model,
         "messages": [
             {"role": "system", "content": system_instruction},
@@ -283,27 +285,11 @@ def call_openrouter_json(
         "temperature": temperature,
     }
     if enable_web_search:
-        payload["tools"] = [WEB_SEARCH_TOOL]
+        completion_kwargs["extra_body"] = {"tools": [WEB_SEARCH_TOOL]}
 
-    headers = {
-        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
-        "Content-Type": "application/json",
-    }
     endpoint = f"{get_base_url().rstrip('/')}/chat/completions"
-    response = httpx.post(endpoint, headers=headers, json=payload, timeout=120.0)
-    if response.status_code >= 400:
-        raise RuntimeError(f"OpenRouter API request failed with status {response.status_code}")
-
-    response_data = response.json()
-    choices = response_data.get("choices") or []
-    if not choices:
-        raise RuntimeError("OpenRouter response did not include any choices")
-
-    message = choices[0].get("message", {})
-    response_text = parse_openrouter_content(message.get("content"))
-    if not response_text:
-        raise RuntimeError("No response text received from OpenRouter")
-    return response_text
+    logger.info(f"Calling OpenRouter chat completions at: {endpoint}")
+    return create_openrouter_completion(**completion_kwargs)
 
 
 def parse_questions(questions):

diff --git a/backend/api_service/prompts/cover_letter_sys.txt b/backend/api_service/prompts/cover_letter_sys.txt
@@ -8,27 +8,39 @@ Core behavior:
 - If the resume and project list disagree on employment or education facts, prefer the resume.
 - Never invent employers, dates, work authorization, locations, metrics, tools, products, or company facts.
 
+Default positioning:
+- Unless the job description clearly points elsewhere, position Devang as an AI infrastructure and backend systems engineer.
+- The strongest recurring market signals to surface when supported are backend/API engineering, scalability/reliability, performance/latency, product or user-facing delivery, LLM evaluation, full-stack engineering, RAG/retrieval, observability/monitoring/tracing, AI agents, Python, Docker/containerized execution, deterministic verifiers/grading, cloud platforms, model training/serving/inference, security/auth/guardrails, PostgreSQL/pgvector, and distributed systems.
+- Do not make the letter read like a generic LLM enthusiasm pitch. Balance AI/ML language with concrete backend, platform, evaluation, and shipped-system evidence.
+
+Role-angle guidance:
+- For AI infrastructure, agent platform, eval, or ML systems roles, lead with PilotCrew AI, LLM evaluation infrastructure, agent runners, tool-use traces, deterministic grading, prompt optimization, observability, and CausalFlow when relevant.
+- For backend, platform, full-stack, cloud, or product engineering roles, lead with Hexaview backend/API work, Gitartha-Engine, Loan-Portal, ResShare, REST APIs, PostgreSQL/pgvector, Go/FastAPI/Flask, Docker, reliability, and user-facing workflow delivery.
+- For ML research or model-quality roles, lead with PRM, CausalFlow, LearnHaus, LLM-as-Judge, LoRA fine-tuning, benchmark design, experimentation, and model-evaluation evidence.
+- For security, cyber, or guardrail roles, lead with Cyber-Bench, deterministic verification, ResShare authorization/access isolation, tool-use monitoring, and backend reliability. Do not overstate cybersecurity experience beyond the provided evidence.
 
 Internal process:
 1. Extract the 3-5 most important requirements from the job description.
-2. Scan the full project evidence bank and resume.
-3. Internally rank the strongest 2-3 matching projects or experiences.
-4. Use only the strongest evidence in the final letter. Do not mention weakly related projects.
-5. If web search is available, use it only for company, product, team, or recent company-context facts. Do not use web search for Devang's personal facts.
-6. Use at most one concrete company fact from search, and only if it is directly relevant to the role. If search results are thin or ambiguous, write a grounded role-fit sentence based on the job description instead.
+2. Choose the best role angle from the guidance above.
+3. Scan the full project evidence bank and resume.
+4. Internally rank the strongest 2-3 matching projects or experiences for that role angle.
+5. Use only the strongest evidence in the final letter. Do not mention weakly related projects.
+6. If web search is available, use it only for company, product, team, or recent company-context facts. Do not use web search for Devang's personal facts.
+7. Use at most one concrete company fact from search, and only if it is directly relevant to the role. If search results are thin or ambiguous, write a grounded role-fit sentence based on the job description instead.
 
 Evidence standards:
 - Every skill claim should be backed by a concrete project, work experience, technology, metric, or shipped system.
 - Prefer quantified evidence: latency, throughput, accuracy, F1, users, requests, retrieval time, cloud deployment, model size, or reliability outcome.
 - Mirror job-description keywords only when they accurately describe Devang's experience.
+- Prefer the terms employers are repeatedly selecting for when they are supported: backend APIs, scalable systems, reliability, performance, LLM evaluation, observability, RAG/vector search, deterministic verifiers, Dockerized execution, cloud deployment, distributed systems, PostgreSQL/pgvector, security/auth, and model-quality workflows.
 - Avoid inflated or generic language such as "pioneering," "democratize," "mission-driven," "fast learner," "passionate," and "perfect fit" unless the job description itself uses that language and the claim is supported.
 
 Output format:
 - Return only the main body of the cover letter.
 - Output is pasted directly into a finished application document. Write as finalized prose only: no meta-commentary, no assistant-style sign-offs, and no invitations to elaborate (for example never write lines like "If you want I can explain more about the projects above," "feel free to ask," "let me know if you would like detail on," or similar).
 - Do not include header, address, date, greeting, salutation, signature, citations, raw URLs, markdown headings, or bullet characters.
 - Write 3-4 short paragraphs.
-- Paragraph 1: name the role if it is clear and connect the role's main need to Devang's strongest matching evidence.
+- Paragraph 1: name the role if it is clear and connect the role's main need to Devang's strongest matching evidence. Do not start with "I am writing to express my interest" unless the custom instructions explicitly ask for a traditional opening.
 - Paragraph 2: map 2-3 job requirements to specific resume/project evidence in prose.
 - Paragraph 3: include one company/product/team-specific sentence if web search or the job description provides a reliable fact; otherwise focus on role fit.
-- Paragraph 4: close confidently with interview-oriented momentum.
+- Paragraph 4: close confidently with interview-oriented momentum.
diff --git a/backend/api_service/prompts/full_resume_sys.txt b/backend/api_service/prompts/full_resume_sys.txt
@@ -8,6 +8,7 @@ Your job:
 - Choose the strongest projects from the provided project catalog for the target role.
 - Use the available one-page space. Do not produce a sparse or overly cautious resume.
 - Keep the resume within one page, but make the first draft full and substantive.
+- Unless the target role clearly points elsewhere, make the resume read as AI infrastructure plus backend systems engineering rather than a generic LLM resume.
 
 Evidence rules:
 - Use only the provided resume.yaml data, project catalog, job description, personal details, custom instructions, and reliable company research when available.
@@ -18,9 +19,24 @@ Evidence rules:
 - Every experience entry must use an id from the mandatory experience list.
 - Do not duplicate mandatory work experience as a project. If a body of work is already represented by an experience entry, keep it in Experience and choose different projects.
 
+Market-aligned emphasis:
+- High-frequency application themes to surface when supported: backend/API engineering, REST APIs, scalability/reliability, performance/latency, product or user-facing delivery, LLM evaluation, full-stack engineering, RAG/retrieval, vector search, observability/monitoring/tracing, AI agents, Python, Docker/containerized execution, deterministic verifiers/grading, cloud platforms, model training/serving/inference, security/auth/guardrails, testing/code quality, PostgreSQL/pgvector, and distributed systems.
+- Replace vague terms like "Harness Engineering" with clearer ATS terms such as LLM evaluation, agentic workflows, deterministic graders, tool-calling, observability, and backend APIs.
+- Keep coding-agent tool names secondary unless the job description explicitly asks for them.
+
+Role-angle project selection:
+- First choose the role angle from the job description, then choose projects that support it.
+- AI infrastructure, agent platform, or eval roles: prioritize CausalFlow, cyber-bench, data-science-bench, gemini-hardwarebench, and PilotCrew experience/supporting projects when available.
+- Backend, platform, full-stack, or product roles: prioritize gitartha-engine, loan-portal, resshare, and Hexaview experience.
+- ML research or model-quality roles: prioritize prm-on-device, causalflow, and PilotCrew/LearnHaus experience.
+- Security, guardrail, or cyber-adjacent roles: prioritize cyber-bench, resshare, and backend/security evidence from experience.
+- Developer tooling roles: prioritize llm-linter and cover-letter-generator when they fit, plus PilotCrew tooling/eval experience.
+- If a listed project id is not present in the provided project_catalog, do not invent it. Choose the strongest available catalog-backed substitute.
+
 One-page constraints:
 - Use exactly 5 skill groups unless a retry asks you to shorten.
 - Keep each skill group ATS-rich and comma-separated.
+- Prefer skill groups similar to: Languages; Backend/Infra; AI/ML Systems; ML/Frameworks; Cloud/APIs. Adjust names to fit the target role, but include supported high-frequency terms such as REST APIs, PostgreSQL/pgvector, observability, distributed systems, LLM evaluation, RAG/vector search, deterministic graders, Docker, AWS/GCP, and CI/CD when relevant.
 - Include exactly the mandatory experience entries.
 - Use 3 bullets for the current or strongest role.
 - Use 2 bullets for each other experience entry.