diff --git a/.env.example b/.env.example
index cae00ff..57f6f9b 100644
--- a/.env.example
+++ b/.env.example
@@ -1,11 +1,16 @@
 # LLM Provider Configuration
 # Options: "ollama" or "gemini"
-LLM_PROVIDER=ollama
+LLM_PROVIDER=gemini
 
 # Default model to use
 # For Ollama: "gemma3:4b", "qwen3:4b", "mistral:7b", etc.
 # For Gemini: "gemini-2.5-pro", "gemini-2.5-flash", etc.
-DEFAULT_MODEL=gemma3:4b
+DEFAULT_MODEL=gemini-2.5-flash
 
 # Google Gemini API Key (required if using Gemini provider)
-GEMINI_API_KEY=your_gemini_api_key_here
+# Example: GEMINI_API_KEY=your_gemini_api_key_here
+GEMINI_API_KEY=
+
+# Optional: Personal access token to increase GitHub API rate limits
+# Example: GITHUB_TOKEN=ghp_xxx
+GITHUB_TOKEN=
diff --git a/.gitignore b/.gitignore
index a2e75f9..010cae0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,9 @@ test_*.py
 cache/
 resume_evaluations.csv
 greenhouse_resumes/*
+*.pdf
+.venv_win/
+python-*.exe
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/README.md b/README.md
index 0396ef4..5fb21cd 100644
--- a/README.md
+++ b/README.md
@@ -215,6 +215,12 @@ What happens:
 2. If a GitHub profile is found in the resume, repositories are fetched and cached to `cache/githubcache_<basename>.json`.
 3. The evaluator prints a report and, in development mode, appends a CSV row to `resume_evaluations.csv`.
 
+### Flags
+
+- `--force`: bypass caches and fully re-extract from the PDF.
+- `--no-github`: skip GitHub enrichment (useful when rate-limited or offline).
+- `--max-workers N`: control parallel section extraction (default: 3). Lower it if you hit LLM 429s.
+
 ---
 
 ## Directory layout
diff --git a/evaluator.py b/evaluator.py
index 1f9e91f..abad532 100644
--- a/evaluator.py
+++ b/evaluator.py
@@ -1,7 +1,12 @@
 from typing import Dict, List, Optional, Tuple, Any
+import hashlib
 from pydantic import BaseModel, Field, field_validator
 from models import JSONResume, EvaluationData
-from llm_utils import initialize_llm_provider, extract_json_from_response
+from llm_utils import (
+    initialize_llm_provider,
+    extract_json_from_response,
+    ensure_valid_json,
+)
 import logging
 import json
 import re
@@ -78,12 +83,39 @@ def evaluate_resume(self, resume_text: str) -> EvaluationData:
             response = self.provider.chat(**chat_params, **kwargs)
 
             response_text = response["message"]["content"]
-            response_text = extract_json_from_response(response_text)
-            logger.error(f"🔤 Prompt response: {response_text}")
+            cleaned_text = extract_json_from_response(response_text)
+            repaired_json_str = ensure_valid_json(
+                cleaned_text,
+                provider=self.provider,
+                model=self.model_name,
+                original_prompt=full_prompt,
+            )
+            logger.error(f"🔤 Prompt response: {repaired_json_str}")
+
+            try:
+                evaluation_dict = json.loads(repaired_json_str)
+            except Exception as e:
+                logger.error(f"Failed to parse evaluation JSON after repair attempts: {e}")
+                raise
 
-            evaluation_dict = json.loads(response_text)
             evaluation_data = EvaluationData(**evaluation_dict)
 
+            # Attach prompt/version metadata
+            template_sources = self.template_manager.get_all_template_sources()
+            template_hashes = {
+                name: hashlib.sha256(src.encode("utf-8")).hexdigest()
+                for name, src in template_sources.items()
+            }
+            evaluation_data.meta = {
+                "model": self.model_name,
+                "provider": MODEL_PROVIDER_MAPPING.get(self.model_name, None).value
+                if MODEL_PROVIDER_MAPPING.get(self.model_name, None)
+                else None,
+                "template_hashes": template_hashes,
+                "temperature": self.model_params.get("temperature"),
+                "top_p": self.model_params.get("top_p"),
+            }
+
             return evaluation_data
 
         except Exception as e:
diff --git a/github.py b/github.py
index 1c52bc6..210e966 100644
--- a/github.py
+++ b/github.py
@@ -57,23 +57,13 @@ def _fetch_github_api(api_url, params=None):
         # Log rate limit information and handle proactively
         if remaining < 10 and rate_limit_reset:
             reset_timestamp = int(rate_limit_reset)
-            current_timestamp = int(time.time())
-            wait_seconds = max(0, reset_timestamp - current_timestamp) + 5  # Add 5 second buffer
             reset_time = datetime.datetime.fromtimestamp(reset_timestamp)
-            
-            # Cap maximum wait time at 1 hour
-            max_wait = 3600
-            if wait_seconds > max_wait:
-                print(f"⚠️  Rate limit reset time is too far in the future ({wait_seconds}s). Capping wait to {max_wait}s")
-                wait_seconds = max_wait
-            
-            logger.error(f"⚠️  GitHub API rate limit low: {remaining}/{limit} requests remaining. Resets at {reset_time}")
-            print(f"💡 Tip: Set GITHUB_TOKEN environment variable to increase rate limits (60/hour → 5000/hour)")
-            
-            if wait_seconds > 0:
-                logger.info(f"⏳ Proactively sleeping for {wait_seconds} seconds until rate limit resets...")
-                time.sleep(wait_seconds)
-                print(f"✅ Rate limit should be reset now. Continuing...")
+            logger.error(
+                f"⚠️  GitHub API rate limit low: {remaining}/{limit} requests remaining. Resets at {reset_time}"
+            )
+            print(
+                "💡 Tip: Set GITHUB_TOKEN environment variable to increase rate limits (60/hour → 5000/hour). Continuing without delay."
+            )
         elif remaining < 100:
             logger.info(f"ℹ️  GitHub API rate limit: {remaining}/{limit} requests remaining")
     
@@ -210,24 +200,66 @@ def fetch_all_github_repos(github_url: str, max_repos: int = 100) -> List[Dict]:
         if status_code == 200:
             projects = []
             for repo in repos_data:
-                if repo.get("fork") and repo.get("forks_count", 0) < 5:
-                    continue
-
                 repo_name = repo.get("name")
+                if not repo_name:
+                    continue
 
+                # Fetch contributors for user's fork (or original if not fork)
                 contributors_data = fetch_repo_contributors(username, repo_name)
                 contributor_count = len(contributors_data)
-
                 user_contributions, total_contributions = fetch_contributions_count(
                     username, contributors_data
                 )
 
+                # Determine project type (consider upstream if fork)
                 project_type = (
                     "open_source" if contributor_count > 1 else "self_project"
                 )
 
+                github_details = {
+                    "stars": repo.get("stargazers_count", 0),
+                    "forks": repo.get("forks_count", 0),
+                    "language": repo.get("language"),
+                    "description": repo.get("description"),
+                    "created_at": repo.get("created_at"),
+                    "updated_at": repo.get("updated_at"),
+                    "topics": repo.get("topics", []),
+                    "open_issues": repo.get("open_issues_count", 0),
+                    "size": repo.get("size", 0),
+                    "fork": repo.get("fork", False),
+                    "archived": repo.get("archived", False),
+                    "default_branch": repo.get("default_branch"),
+                    "contributors": contributor_count,
+                }
+
+                upstream_details = None
+                if repo.get("fork"):
+                    # Fetch upstream parent for accurate stats (#155) and avoid skipping low-fork repos (#162)
+                    upstream_api = f"https://api.github.com/repos/{username}/{repo_name}"
+                    status_code, upstream_data = _fetch_github_api(upstream_api)
+                    if status_code == 200 and isinstance(upstream_data, dict):
+                        parent = upstream_data.get("parent")
+                        if parent:
+                            upstream_details = {
+                                "name": parent.get("name"),
+                                "owner": parent.get("owner", {}).get("login"),
+                                "html_url": parent.get("html_url"),
+                                "stars": parent.get("stargazers_count", 0),
+                                "forks": parent.get("forks_count", 0),
+                                "language": parent.get("language"),
+                                "topics": parent.get("topics", []),
+                                "description": parent.get("description"),
+                            }
+                            # Prefer upstream popularity metrics for evaluation
+                            github_details["stars"] = upstream_details["stars"]
+                            github_details["forks"] = upstream_details["forks"]
+                            github_details["topics"] = upstream_details["topics"]
+                            github_details["upstream_owner"] = upstream_details["owner"]
+                            github_details["upstream_name"] = upstream_details["name"]
+                            github_details["upstream_html_url"] = upstream_details["html_url"]
+
                 project = {
-                    "name": repo.get("name"),
+                    "name": repo_name,
                     "description": repo.get("description"),
                     "github_url": repo.get("html_url"),
                     "live_url": repo.get("homepage") if repo.get("homepage") else None,
@@ -238,21 +270,8 @@ def fetch_all_github_repos(github_url: str, max_repos: int = 100) -> List[Dict]:
                     "contributor_count": contributor_count,
                     "author_commit_count": user_contributions,
                     "total_commit_count": total_contributions,
-                    "github_details": {
-                        "stars": repo.get("stargazers_count", 0),
-                        "forks": repo.get("forks_count", 0),
-                        "language": repo.get("language"),
-                        "description": repo.get("description"),
-                        "created_at": repo.get("created_at"),
-                        "updated_at": repo.get("updated_at"),
-                        "topics": repo.get("topics", []),
-                        "open_issues": repo.get("open_issues_count", 0),
-                        "size": repo.get("size", 0),
-                        "fork": repo.get("fork", False),
-                        "archived": repo.get("archived", False),
-                        "default_branch": repo.get("default_branch"),
-                        "contributors": contributor_count,
-                    },
+                    "github_details": github_details,
+                    "upstream_details": upstream_details,
                 }
                 projects.append(project)
 
diff --git a/llm_utils.py b/llm_utils.py
index 7e1d96d..a143b99 100644
--- a/llm_utils.py
+++ b/llm_utils.py
@@ -3,6 +3,8 @@
 """
 
 import logging
+import json
+import hashlib
 from typing import Any, Dict, Optional
 from models import ModelProvider, OllamaProvider, GeminiProvider
 from prompt import MODEL_PROVIDER_MAPPING, GEMINI_API_KEY
@@ -37,6 +39,86 @@ def extract_json_from_response(response_text: str) -> str:
     return response_text
 
 
+def _try_parse_json(text: str) -> Optional[str]:
+    """Attempt to parse JSON and return the canonical string if successful."""
+    try:
+        obj = json.loads(text)
+        return json.dumps(obj, ensure_ascii=False)
+    except Exception:
+        return None
+
+
+def ensure_valid_json(
+    response_text: str,
+    provider: Any = None,
+    model: str = None,
+    original_prompt: str = None,
+    max_repair_attempts: int = 2,
+) -> str:
+    """Validate JSON; attempt lightweight repairs or LLM self-repair if needed.
+
+    Strategy:
+    1. Strip markdown fences / think tags (already handled outside).
+    2. Trim to first/last brace.
+    3. Try direct parse.
+    4. If still failing and provider available, send a repair prompt asking ONLY for valid JSON.
+    5. Return raw text if irreparable to allow upstream fallback handling.
+    """
+    cleaned = response_text.strip()
+
+    # Fast path
+    parsed = _try_parse_json(cleaned)
+    if parsed is not None:
+        return parsed
+
+    # Attempt brace slicing
+    start = cleaned.find("{")
+    end = cleaned.rfind("}")
+    if start != -1 and end != -1 and end > start:
+        sliced = cleaned[start : end + 1]
+        parsed = _try_parse_json(sliced)
+        if parsed is not None:
+            return parsed
+
+    # Attempt LLM repair
+    if provider and model:
+        repair_instruction = (
+            "You previously returned malformed JSON. Return ONLY valid JSON for the same task. "
+            "No explanations, code fences, or commentary. If fields are missing, infer minimal plausible empty values." 
+        )
+        for attempt in range(max_repair_attempts):
+            try:
+                repair_messages = [
+                    {"role": "system", "content": repair_instruction},
+                    {
+                        "role": "user",
+                        "content": (
+                            "Original prompt:\n" + (original_prompt or "<none>") +
+                            "\nMalformed JSON response:\n" + cleaned +
+                            "\nReturn ONLY repaired JSON now."
+                        ),
+                    },
+                ]
+                # Low creativity for repair
+                repair_options = {"temperature": 0.0, "top_p": 0.9}
+                repair_resp = provider.chat(
+                    model=model,
+                    messages=repair_messages,
+                    options=repair_options,
+                )
+                candidate = extract_json_from_response(
+                    repair_resp["message"]["content"]
+                )
+                parsed = _try_parse_json(candidate)
+                if parsed is not None:
+                    return parsed
+            except Exception as e:
+                logger.warning(f"JSON repair attempt {attempt+1} failed: {e}")
+
+    # Return original cleaned text (upstream may log and skip)
+    return cleaned
+
+
 def initialize_llm_provider(model_name: str) -> Any:
     """
     Initialize the appropriate LLM provider based on the model name.
diff --git a/models.py b/models.py
index e83779e..c9e9e9e 100644
--- a/models.py
+++ b/models.py
@@ -158,6 +158,8 @@ class Project(BaseModel):
     description: Optional[str] = None
     highlights: Optional[List[str]] = None
     url: Optional[str] = None
+    repo_url: Optional[str] = None
+    live_url: Optional[str] = None
     technologies: Optional[List[str]] = None
     skills: Optional[List[str]] = None
 
@@ -198,6 +200,12 @@ class AwardsSection(BaseModel):
     awards: Optional[List[Award]] = None
 
 
+class LanguagesSection(BaseModel):
+    """Language section containing a list of languages."""
+
+    languages: Optional[List[Language]] = None
+
+
 class JSONResume(BaseModel):
     """Complete JSON Resume format model."""
 
@@ -247,6 +255,7 @@ class EvaluationData(BaseModel):
     deductions: Deductions
     key_strengths: List[str] = Field(min_items=1, max_items=5)
     areas_for_improvement: List[str] = Field(min_items=1, max_items=5)
+    meta: Optional[Dict[str, Any]] = None  # metadata: template hashes, model, provider, timestamps
 
 
 class GitHubProfile(BaseModel):
diff --git a/pdf.py b/pdf.py
index 296db47..666d3dc 100644
--- a/pdf.py
+++ b/pdf.py
@@ -3,6 +3,7 @@
 import json
 import time
 import logging
+import re
 import pymupdf
 
 from models import (
@@ -13,14 +14,20 @@
     Skill,
     Project,
     Award,
+    Language,
     BasicsSection,
     WorkSection,
     EducationSection,
     SkillsSection,
     ProjectsSection,
     AwardsSection,
+    LanguagesSection,
+)
+from llm_utils import (
+    initialize_llm_provider,
+    extract_json_from_response,
+    ensure_valid_json,
 )
-from llm_utils import initialize_llm_provider, extract_json_from_response
 from pymupdf_rag import to_markdown
 from typing import List, Optional, Dict, Any
 from prompt import (
@@ -37,9 +44,14 @@
 
 class PDFHandler:
 
-    def __init__(self):
+    def __init__(self, max_workers: int = 3):
         self.template_manager = TemplateManager()
         self._initialize_llm_provider()
+        # Limit the concurrency for section extraction to reduce rate limits
+        try:
+            self.max_workers = int(max_workers) if max_workers and max_workers > 0 else 3
+        except Exception:
+            self.max_workers = 3
 
     def _initialize_llm_provider(self):
         """Initialize the appropriate LLM provider based on the model."""
@@ -103,33 +115,70 @@ def _call_llm_for_section(
             if return_model:
                 kwargs["format"] = return_model.model_json_schema()
 
-            # Use the appropriate provider to make the API call
-            response = self.provider.chat(**chat_params, **kwargs)
+            # Retry logic for rate limits / transient failures
+            max_attempts = 3
+            attempt = 0
+            response_text = None
+            while attempt < max_attempts and response_text is None:
+                attempt += 1
+                try:
+                    response = self.provider.chat(**chat_params, **kwargs)
+                    response_text = response["message"]["content"]
+                except Exception as e:
+                    err_msg = str(e)
+                    if "429" in err_msg or "quota" in err_msg.lower():
+                        # Parse suggested retry delay if present
+                        retry_delay = 8
+                        m = re.search(r"retry in ([0-9]+(?:\.[0-9]+)?)s", err_msg)
+                        if m:
+                            try:
+                                retry_delay = min(float(m.group(1)) + 1, 30)
+                            except Exception:
+                                pass
+                        logger.warning(
+                            f"⚠️ Rate limit for {section_name} (attempt {attempt}/{max_attempts}). Sleeping {retry_delay:.1f}s before retry."
+                        )
+                        time.sleep(retry_delay)
+                        continue
+                    else:
+                        logger.error(
+                            f"❌ Non-retryable error extracting {section_name}: {e}"
+                        )
+                        return None
+
+            if response_text is None:
+                logger.error(
+                    f"❌ Exhausted retries for {section_name} due to rate limits."
+                )
+                return None
 
-            response_text = response["message"]["content"]
+            cleaned = extract_json_from_response(response_text)
+            repaired = ensure_valid_json(
+                cleaned,
+                provider=self.provider,
+                model=DEFAULT_MODEL,
+                original_prompt=prompt,
+            )
 
             try:
-                response_text = extract_json_from_response(response_text)
-                json_start = response_text.find("{")
-                json_end = response_text.rfind("}")
-                if json_start != -1 and json_end != -1:
-                    response_text = response_text[json_start : json_end + 1]
-                parsed_data = json.loads(response_text)
+                parsed_data = json.loads(repaired)
                 logger.debug(f"✅ Successfully extracted {section_name} section")
-
-                transformed_data = transform_parsed_data(parsed_data)
-                end_time = time.time()
-                total_time = end_time - start_time
-                logger.debug(
-                    f"⏱️ Total time for separate section extraction: {total_time:.2f} seconds"
-                )
-
-                return transformed_data
             except json.JSONDecodeError as e:
-                logger.error(f"❌ Error parsing JSON for {section_name} section: {e}")
-                logger.error(f"Raw response: {response_text}")
+                logger.error(
+                    f"❌ Error parsing JSON for {section_name} section after repair attempts: {e}"
+                )
+                logger.error(f"Raw repaired text: {repaired}")
                 return None
 
+            transformed_data = transform_parsed_data(parsed_data)
+            end_time = time.time()
+            total_time = end_time - start_time
+            logger.debug(
+                f"⏱️ Total time for separate section extraction: {total_time:.2f} seconds"
+            )
+
+            return transformed_data
+
         except Exception as e:
             logger.error(f"❌ Error calling LLM for {section_name} section: {e}")
             return None
@@ -190,6 +239,15 @@ def extract_awards_section(self, resume_text: str) -> Optional[Dict]:
             return None
         return self._call_llm_for_section("awards", resume_text, prompt, AwardsSection)
 
+    def extract_languages_section(self, resume_text: str) -> Optional[Dict]:
+        prompt = self.template_manager.render_template(
+            "languages", text_content=resume_text
+        )
+        if not prompt:
+            logger.error("❌ Failed to render languages template")
+            return None
+        return self._call_llm_for_section("languages", resume_text, prompt, LanguagesSection)
+
     def extract_json_from_text(self, resume_text: str) -> Optional[JSONResume]:
         try:
             return self._extract_all_sections_separately(resume_text)
@@ -227,6 +285,7 @@ def _extract_section_data(
             "skills": self.extract_skills_section,
             "projects": self.extract_projects_section,
             "awards": self.extract_awards_section,
+            "languages": self.extract_languages_section,
         }
 
         if section_name not in section_extractors:
@@ -269,7 +328,7 @@ def _extract_all_sections_separately(
     ) -> Optional[JSONResume]:
         start_time = time.time()
 
-        sections = ["basics", "work", "education", "skills", "projects", "awards"]
+        sections = ["basics", "work", "education", "skills", "projects", "awards", "languages"]
 
         complete_resume = {
             "basics": None,
@@ -287,14 +346,156 @@ def _extract_all_sections_separately(
             "meta": None,
         }
 
-        for section_name in sections:
-            section_data = self._extract_section_data(text_content, section_name)
+        # Parallel extraction using threads (I/O bound network calls)
+        from concurrent.futures import ThreadPoolExecutor, as_completed
 
-            if section_data:
-                complete_resume.update(section_data)
-                logger.debug(f"✅ Successfully extracted {section_name} section")
-            else:
-                logger.error(f"⚠️ Failed to extract {section_name} section")
+        results = {}
+        # Constrain parallelism to reduce rate limit pressure
+        parallel_workers = self.max_workers
+        with ThreadPoolExecutor(max_workers=min(len(sections), parallel_workers)) as executor:
+            future_map = {
+                executor.submit(self._extract_section_data, text_content, section_name): section_name
+                for section_name in sections
+            }
+            for future in as_completed(future_map):
+                sec = future_map[future]
+                try:
+                    section_data = future.result()
+                    if section_data:
+                        results[sec] = section_data
+                        complete_resume.update(section_data)
+                        logger.debug(f"✅ Successfully extracted {sec} section (parallel)")
+                    else:
+                        logger.error(f"⚠️ Failed to extract {sec} section")
+                except Exception as e:
+                    logger.error(f"❌ Exception extracting {sec} section: {e}")
+
+        # Fallback: if all sections failed in parallel, retry sequentially with small delay
+        if not results:
+            logger.warning("⚠️ Parallel extraction returned no sections. Retrying sequentially to mitigate rate limits.")
+            for sec in sections:
+                try:
+                    section_data = self._extract_section_data(text_content, sec, None)
+                    if section_data:
+                        complete_resume.update(section_data)
+                        results[sec] = section_data
+                        logger.debug(f"✅ Sequentially extracted {sec} section")
+                    else:
+                        logger.error(f"⚠️ Sequential retry failed for {sec} section")
+                    time.sleep(2)  # gentle pacing to avoid hitting per-minute limits
+                except Exception as e:
+                    logger.error(f"❌ Exception in sequential retry for {sec}: {e}")
+        else:
+            # Targeted retries for only the missing sections (avoid flooding API)
+            missing = [s for s in sections if complete_resume.get(s) is None]
+            if missing:
+                logger.warning(f"⚠️ Missing sections after parallel run: {missing}. Retrying them sequentially with pacing.")
+                for sec in missing:
+                    try:
+                        section_data = self._extract_section_data(text_content, sec, None)
+                        if section_data:
+                            complete_resume.update(section_data)
+                            results[sec] = section_data
+                            logger.debug(f"✅ Filled missing {sec} section via sequential retry")
+                        else:
+                            logger.error(f"⚠️ Sequential retry could not extract {sec} section")
+                        time.sleep(2)
+                    except Exception as e:
+                        logger.error(f"❌ Exception retrying missing {sec} section: {e}")
+
+        # Fallback heuristics for skills & projects if still None
+        def _simple_skill_extraction(text: str):
+            tech_keywords = [
+                "python",
+                "java",
+                "javascript",
+                "typescript",
+                "react",
+                "node",
+                "django",
+                "flask",
+                "aws",
+                "docker",
+                "kubernetes",
+                "postgres",
+                "mysql",
+                "mongodb",
+                "git",
+                "linux",
+                "tensorflow",
+                "pytorch",
+                "llm",
+            ]
+            found = set()
+            lower = text.lower()
+            for kw in tech_keywords:
+                if kw in lower:
+                    found.add(kw)
+            if not found:
+                return None
+            return {
+                "skills": [
+                    {"name": "Technologies", "level": None, "keywords": sorted(list(found))}
+                ]
+            }
+
+        def _simple_projects_extraction(text: str):
+            # Use work highlights as proxy if present
+            projects = []
+            if complete_resume.get("work") and isinstance(complete_resume["work"], list):
+                for w in complete_resume["work"]:
+                    highlights = w.get("highlights") if isinstance(w, dict) else None
+                    if highlights:
+                        for h in highlights:
+                            if any(word in h.lower() for word in ["developed", "built", "engineered", "implemented", "created"]):
+                                projects.append({
+                                    "name": h[:60] + ("..." if len(h) > 60 else ""),
+                                    "description": h,
+                                    "highlights": [h],
+                                })
+            if not projects:
+                return None
+            return {"projects": projects}
+
+        if complete_resume.get("skills") is None:
+            fallback_skills = _simple_skill_extraction(text_content)
+            if fallback_skills:
+                complete_resume.update(fallback_skills)
+                logger.warning("⚠️ Applied heuristic fallback for skills section.")
+        if complete_resume.get("projects") is None:
+            fallback_projects = _simple_projects_extraction(text_content)
+            if fallback_projects:
+                complete_resume.update(fallback_projects)
+                logger.warning("⚠️ Applied heuristic fallback for projects section from work highlights.")
+
+        # Heuristic fallback for spoken languages if missing
+        def _simple_languages_extraction(text: str):
+            try:
+                lines = [l.strip() for l in text.splitlines() if l.strip()]
+                langs = []
+                for line in lines:
+                    if line.lower().startswith("languages:") or line.lower().startswith("language:"):
+                        content = line.split(":", 1)[1].strip()
+                        parts = [p.strip() for p in re.split(r",|;", content) if p.strip()]
+                        for p in parts:
+                            # Match formats like "English (Professional)" or just "English"
+                            m = re.match(r"^(.*?)\s*\((.*?)\)$", p)
+                            if m:
+                                langs.append({"language": m.group(1).strip(), "fluency": m.group(2).strip()})
+                            else:
+                                langs.append({"language": p, "fluency": None})
+                        break
+                if langs:
+                    return {"languages": langs}
+            except Exception:
+                return None
+            return None
+
+        if complete_resume.get("languages") in (None, []):
+            fallback_langs = _simple_languages_extraction(text_content)
+            if fallback_langs:
+                complete_resume.update(fallback_langs)
+                logger.warning("⚠️ Applied heuristic fallback for languages section.")
 
         try:
             if complete_resume.get("basics") and isinstance(
@@ -308,6 +509,14 @@ def _extract_all_sections_separately(
 
             json_resume = JSONResume(**complete_resume)
 
+            # If still completely empty, log explicit warning
+            all_empty = all(
+                getattr(json_resume, s, None) in (None, [], {})
+                for s in ["basics", "work", "education", "skills", "projects", "awards"]
+            )
+            if all_empty:
+                logger.warning("⚠️ Extraction produced an empty resume (all key sections None).")
+
             end_time = time.time()
             total_time = end_time - start_time
             logger.info(
diff --git a/prompts/template_manager.py b/prompts/template_manager.py
index b68f680..3b7b8d0 100644
--- a/prompts/template_manager.py
+++ b/prompts/template_manager.py
@@ -41,6 +41,7 @@ def _load_templates(self):
             "skills": "skills.jinja",
             "projects": "projects.jinja",
             "awards": "awards.jinja",
+            "languages": "languages.jinja",
             "system_message": "system_message.jinja",
             "github_project_selection": "github_project_selection.jinja",
             "resume_evaluation_criteria": "resume_evaluation_criteria.jinja",
@@ -57,6 +58,29 @@ def _load_templates(self):
             except Exception as e:
                 print(f"❌ Error loading template {filename}: {e}")
 
+    def get_template_source(self, section_name: str) -> Optional[str]:
+        """Return raw template source for hashing/versioning."""
+        if section_name not in self._templates:
+            return None
+        try:
+            # FileSystemLoader get_source returns (source, filename, uptodate)
+            loader = self.env.loader
+            if hasattr(loader, "get_source"):
+                source_tuple = loader.get_source(self.env, self._templates[section_name].name)
+                return source_tuple[0]
+        except Exception:
+            return None
+        return None
+
+    def get_all_template_sources(self) -> dict:
+        """Return mapping of template name to source for all loaded templates."""
+        sources = {}
+        for name in self._templates:
+            src = self.get_template_source(name)
+            if src is not None:
+                sources[name] = src
+        return sources
+
     def get_available_sections(self) -> list:
         """
         Get list of available section names.
diff --git a/prompts/templates/basics.jinja b/prompts/templates/basics.jinja
index d02320c..4adda27 100644
--- a/prompts/templates/basics.jinja
+++ b/prompts/templates/basics.jinja
@@ -6,7 +6,7 @@ Extract ONLY the basic information (name, email, phone, location, profiles) from
 
 --- The input resume markdown ends here ---
 
-Return ONLY a JSON object with this structure:
+Return ONLY a JSON object with this structure (fill fields ONLY if present in the resume; otherwise use null):
 {
   "basics": {
     "name": "Full name",
@@ -15,8 +15,11 @@ Return ONLY a JSON object with this structure:
     "url": null,
     "summary": null,
     "location": {
+      "address": null,
+      "postalCode": null,
       "city": "City",
-      "countryCode": "Country code"
+      "countryCode": "Country code",
+      "region": null
     },
     "profiles": [
       {
diff --git a/prompts/templates/languages.jinja b/prompts/templates/languages.jinja
new file mode 100644
index 0000000..342bab0
--- /dev/null
+++ b/prompts/templates/languages.jinja
@@ -0,0 +1,23 @@
+Extract ONLY the spoken languages from this resume.
+
+--- The input resume markdown starts here ---
+
+{{ text_content }}
+
+--- The input resume markdown ends here ---
+
+Return ONLY a JSON object with this structure:
+{
+  "languages": [
+    {
+      "language": "Language name",
+      "fluency": "Fluency level (e.g., Native, Professional, Intermediate)"
+    }
+  ]
+}
+
+Rules:
+- Only include languages explicitly present in the resume text.
+- If fluency/level is not stated, set "fluency" to null.
+- Do not invent or infer languages.
+- Return valid JSON only; no extra commentary.
diff --git a/prompts/templates/projects.jinja b/prompts/templates/projects.jinja
index feea98a..bf1be56 100644
--- a/prompts/templates/projects.jinja
+++ b/prompts/templates/projects.jinja
@@ -11,11 +11,16 @@ Return ONLY a JSON object with this structure:
   "projects": [
     {
       "name": "Project name",
-      "description": "Project description",
-      "url": "Project URL",
+      "description": "Short description in one sentence",
+      "repo_url": "GitHub/GitLab repository URL if present, else null",
+      "live_url": "Public live demo/hosted URL if present, else null",
       "technologies": ["Tech 1", "Tech 2"]
     }
   ]
 }
 
+Guidelines:
+- Prefer putting source code link in "repo_url" and hosted demo link (e.g. vercel.app, netlify.app, render.com, fly.dev, onrender.com, herokuapp.com) in "live_url".
+- If both links exist, fill both; if only one exists, set the other to null.
+
 **IMPORTANT**: Return ONLY valid JSON. Do not include any explanatory text. 
\ No newline at end of file
diff --git a/score.py b/score.py
index b0944dd..7a25b52 100644
--- a/score.py
+++ b/score.py
@@ -3,6 +3,7 @@
 import json
 import logging
 import csv
+import argparse
 from pdf import PDFHandler
 from github import fetch_and_display_github_info
 from models import JSONResume, EvaluationData
@@ -17,6 +18,8 @@
     convert_blog_data_to_text,
 )
 from config import DEVELOPMENT_MODE
+from prompts.template_manager import TemplateManager
+import hashlib
 
 logger = logging.getLogger(__name__)
 
@@ -197,7 +200,38 @@ def find_profile(profiles, network):
     )
 
 
-def main(pdf_path):
+def _is_empty_resume(resume_data: JSONResume) -> bool:
+    if not resume_data:
+        return True
+    key_sections = [
+        "basics",
+        "work",
+        "education",
+        "skills",
+        "projects",
+        "awards",
+    ]
+    for sec in key_sections:
+        val = getattr(resume_data, sec, None)
+        if val:
+            # If any section has content (dict/list/object), treat as non-empty
+            try:
+                if isinstance(val, (list, dict)) and len(val) > 0:
+                    return False
+                # Basics is a pydantic model. If it has at least one non-null attribute -> non-empty
+                fields = getattr(val.__class__, "model_fields", None)
+                if fields:
+                    for field_name in fields.keys():
+                        if getattr(val, field_name, None):
+                            return False
+            except Exception:
+                pass
+            # Non-container truthy object
+            return False
+    return True
+
+
+def main(pdf_path, force: bool = False, no_github: bool = False, max_workers: int = 3):
     # Create cache filename based on PDF path
     cache_filename = (
         f"cache/resumecache_{os.path.basename(pdf_path).replace('.pdf', '')}.json"
@@ -207,16 +241,40 @@ def main(pdf_path):
     )
 
     # Check if cache exists and we're in development mode
-    if DEVELOPMENT_MODE and os.path.exists(cache_filename):
+    if not force and DEVELOPMENT_MODE and os.path.exists(cache_filename):
         print(f"Loading cached data from {cache_filename}")
-        cached_data = json.loads(Path(cache_filename).read_text())
-        resume_data = JSONResume(**cached_data)
+        cached_raw = json.loads(Path(cache_filename).read_text())
+
+        # Validate cache metadata if present
+        use_cache = True
+        cache_meta = cached_raw.get("_cache_meta")
+        if cache_meta:
+            # Verify file hash
+            try:
+                with open(pdf_path, "rb") as f:
+                    data = f.read()
+                file_hash = hashlib.md5(data).hexdigest()
+                if cache_meta.get("file_hash") != file_hash:
+                    print("⚠️ Resume file changed since cache was written. Ignoring cached resume.")
+                    use_cache = False
+                # Verify model/template
+                if cache_meta.get("model") != DEFAULT_MODEL:
+                    print("⚠️ Model changed since cache was written. Ignoring cached resume.")
+                    use_cache = False
+            except Exception:
+                use_cache = False
+
+        if use_cache:
+            cached_data = cached_raw.get("data", cached_raw)
+            resume_data = JSONResume(**cached_data)
+        else:
+            resume_data = None
     else:
         logger.debug(
             f"Extracting data from PDF"
             + (" and caching to " + cache_filename if DEVELOPMENT_MODE else "")
         )
-        pdf_handler = PDFHandler()
+        pdf_handler = PDFHandler(max_workers=max_workers)
         resume_data = pdf_handler.extract_json_from_pdf(pdf_path)
 
         if resume_data == None:
@@ -224,17 +282,55 @@ def main(pdf_path):
 
         if DEVELOPMENT_MODE:
             os.makedirs(os.path.dirname(cache_filename), exist_ok=True)
-            Path(cache_filename).write_text(
-                json.dumps(resume_data.model_dump(), indent=2, ensure_ascii=False),
-                encoding='utf-8'
-            )
+            # Write cache with metadata to allow validation later
+            tm = TemplateManager()
+            template_sources = tm.get_all_template_sources()
+            template_hashes = {name: hashlib.sha256(src.encode("utf-8")).hexdigest() for name, src in template_sources.items()}
+            with open(cache_filename, "w", encoding="utf-8") as fh:
+                wrapper = {
+                    "_cache_meta": {
+                        "file_hash": hashlib.md5(open(pdf_path, "rb").read()).hexdigest(),
+                        "model": DEFAULT_MODEL,
+                        "template_hashes": template_hashes,
+                    },
+                    "data": resume_data.model_dump(),
+                }
+                fh.write(json.dumps(wrapper, indent=2, ensure_ascii=False))
 
     # Check if cache exists and we're in development mode
     github_data = {}
-    if DEVELOPMENT_MODE and os.path.exists(github_cache_filename):
+    gh_cache_exists = os.path.exists(github_cache_filename)
+    use_gh_cache = (not force) and DEVELOPMENT_MODE and gh_cache_exists
+    if no_github:
+        print("Skipping GitHub fetch due to --no-github flag")
+        github_data = {}
+    elif use_gh_cache:
         print(f"Loading cached data from {github_cache_filename}")
-        github_data = json.loads(Path(github_cache_filename).read_text())
-    else:
+        try:
+            cached_raw = json.loads(Path(github_cache_filename).read_text())
+        except Exception as e:
+            print(f"⚠️ Failed to read GitHub cache: {e}. Will refetch.")
+            cached_raw = None
+
+        cache_valid = False
+        if cached_raw:
+            cache_meta = cached_raw.get("_cache_meta")
+            if cache_meta and cache_meta.get("model") != DEFAULT_MODEL:
+                print("⚠️ GitHub cache model mismatch. Ignoring cached GitHub data.")
+            else:
+                candidate = cached_raw.get("data", cached_raw)
+                # Consider cache invalid if empty or missing profile/projects
+                if candidate and isinstance(candidate, dict):
+                    total_projects = candidate.get("total_projects")
+                    profile = candidate.get("profile")
+                    has_username = bool(profile and profile.get("username"))
+                    has_projects = isinstance(candidate.get("projects"), list) and len(candidate.get("projects")) > 0
+                    if has_username and (has_projects or (isinstance(total_projects, int) and total_projects > 0)):
+                        github_data = candidate
+                        cache_valid = True
+        if not cache_valid:
+            print("⚠️ GitHub cache is empty or invalid. Fetching fresh data...")
+    if (not no_github) and (not use_gh_cache or (use_gh_cache and not cache_valid)):
         print(
             f"Fetching GitHub data"
             + (" and caching to " + github_cache_filename if DEVELOPMENT_MODE else "")
@@ -250,10 +346,43 @@ def main(pdf_path):
             github_data = fetch_and_display_github_info(github_profile.url)
         if DEVELOPMENT_MODE:
             os.makedirs(os.path.dirname(github_cache_filename), exist_ok=True)
-            Path(github_cache_filename).write_text(
-                json.dumps(github_data, indent=2, ensure_ascii=False),
-                encoding='utf-8'
-            )
+            with open(github_cache_filename, "w", encoding="utf-8") as fh:
+                wrapper = {
+                    "_cache_meta": {
+                        "model": DEFAULT_MODEL,
+                    },
+                    "data": github_data,
+                }
+                fh.write(json.dumps(wrapper, indent=2, ensure_ascii=False))
+
+    # If cached resume is empty, attempt a fresh extraction
+    if (force or _is_empty_resume(resume_data)):
+        if _is_empty_resume(resume_data):
+            print("⚠️ Cached resume appears empty. Attempting re-extraction...")
+        pdf_handler = PDFHandler(max_workers=max_workers)
+        fresh_resume = pdf_handler.extract_json_from_pdf(pdf_path)
+        if fresh_resume and not _is_empty_resume(fresh_resume):
+            resume_data = fresh_resume
+            if DEVELOPMENT_MODE:
+                try:
+                    tm = TemplateManager()
+                    template_sources = tm.get_all_template_sources()
+                    template_hashes = {name: hashlib.sha256(src.encode("utf-8")).hexdigest() for name, src in template_sources.items()}
+                    with open(cache_filename, "w", encoding="utf-8") as fh:
+                        wrapper = {
+                            "_cache_meta": {
+                                "file_hash": hashlib.md5(open(pdf_path, "rb").read()).hexdigest(),
+                                "model": DEFAULT_MODEL,
+                                "template_hashes": template_hashes,
+                            },
+                            "data": resume_data.model_dump(),
+                        }
+                        fh.write(json.dumps(wrapper, indent=2, ensure_ascii=False))
+                    print("✅ Re-extracted resume and updated cache.")
+                except Exception as e:
+                    print(f"⚠️ Failed to update cache after re-extraction: {e}")
+        else:
+            print("❌ Re-extraction failed or still empty; proceeding with existing data.")
 
     score = _evaluate_resume(resume_data, github_data)
 
@@ -297,13 +426,15 @@ def main(pdf_path):
 
 
 if __name__ == "__main__":
-    if len(sys.argv) < 2:
-        print("Usage: python score.py <pdf_path>")
-        exit(1)
-    pdf_path = sys.argv[1]
-
-    if not os.path.exists(pdf_path):
-        print(f"Error: File '{pdf_path}' does not exist.")
+    parser = argparse.ArgumentParser(description="Evaluate a resume PDF and output scores.")
+    parser.add_argument("pdf_path", help="Path to the resume PDF file")
+    parser.add_argument("--force", action="store_true", help="Bypass caches and re-extract")
+    parser.add_argument("--no-github", action="store_true", help="Skip GitHub fetch and enrichment")
+    parser.add_argument("--max-workers", type=int, default=3, help="Max parallel section extractions (default: 3)")
+    args = parser.parse_args()
+
+    if not os.path.exists(args.pdf_path):
+        print(f"Error: File '{args.pdf_path}' does not exist.")
         exit(1)
 
-    main(pdf_path)
+    main(args.pdf_path, force=args.force, no_github=args.no_github, max_workers=args.max_workers)
diff --git a/transform.py b/transform.py
index 25eab1d..b49af68 100644
--- a/transform.py
+++ b/transform.py
@@ -330,6 +330,8 @@ def transform_projects(projects_list: List) -> List[Dict]:
             if not skills and technologies:
                 skills = technologies
 
+            repo_url = item.get("repo_url") or item.get("url")
+            live_url = item.get("live_url")
             transformed.append(
                 {
                     "name": item.get("name", ""),
@@ -337,7 +339,9 @@ def transform_projects(projects_list: List) -> List[Dict]:
                     "endDate": None,
                     "description": item.get("description", ""),
                     "highlights": [item.get("type", "")] if item.get("type") else [],
-                    "url": item.get("url", None),
+                    "url": repo_url or live_url,
+                    "repo_url": repo_url,
+                    "live_url": live_url,
                     "technologies": technologies,
                     "skills": skills,
                 }
@@ -393,6 +397,8 @@ def transform_projects_comprehensive(parsed_data: Dict) -> List[Dict]:
                         skills = [skill.strip() for skill in skills_part.split(",")]
                         item["name"] = name_parts[0].strip()
 
+                repo_url = item.get("repo_url") or item.get("url")
+                live_url = item.get("live_url")
                 projects.append(
                     {
                         "name": item.get("name", ""),
@@ -400,7 +406,9 @@ def transform_projects_comprehensive(parsed_data: Dict) -> List[Dict]:
                         "endDate": None,
                         "description": item.get("summary", ""),
                         "highlights": [],
-                        "url": item.get("url", None),
+                        "url": repo_url or live_url,
+                        "repo_url": repo_url,
+                        "live_url": live_url,
                         "technologies": item.get("technologies", []),
                         "skills": skills,
                     }
@@ -823,8 +831,17 @@ def convert_json_resume_to_text(resume_data: JSONResume) -> str:
                 text_parts.append(f"   Period: {project.startDate} - {project.endDate}")
             if project.description:
                 text_parts.append(f"   Description: {project.description}")
-            if project.url:
-                text_parts.append(f"   URL: {project.url}")
+            # Prefer separating Repo vs Live Demo for clarity in evaluation
+            repo_url = getattr(project, "repo_url", None) or (
+                project.url if (project.url and "github.com" in project.url) else None
+            )
+            live_url = getattr(project, "live_url", None)
+            if not live_url and project.url and not ("github.com" in project.url):
+                live_url = project.url
+            if repo_url:
+                text_parts.append(f"   Repo URL: {repo_url}")
+            if live_url:
+                text_parts.append(f"   Live Demo: {live_url}")
             if project.highlights:
                 text_parts.append("   Highlights:")
                 for highlight in project.highlights:
@@ -910,7 +927,11 @@ def convert_github_data_to_text(github_data: dict) -> str:
         for i, project in enumerate(projects[:10], 1):
             github_text += f"{i}. {project.get('name', 'N/A')}\n"
             github_text += f"   Description: {project.get('description', 'N/A')}\n"
-            github_text += f"   URL: {project.get('github_url', 'N/A')}\n"
+            github_url = project.get('github_url') or 'N/A'
+            github_text += f"   Repo URL: {github_url}\n"
+            live_url = project.get('live_url')
+            if live_url:
+                github_text += f"   Live Demo: {live_url}\n"
             if "github_details" in project:
                 details = project["github_details"]
                 github_text += f"   Stars: {details.get('stars', 'N/A')}\n"