prakhar728 · prakhar728 · Mar 22, 2026 · Mar 21, 2026 · Mar 22, 2026 · Mar 22, 2026
diff --git a/.env.example b/.env.example
@@ -1,9 +1,6 @@
-# LLM provider — set one of: openai, anthropic, google
-CONCLAVE_LLM_PROVIDER=openai
-CONCLAVE_OPENAI_API_KEY=
-CONCLAVE_OPENAI_MODEL=gpt-4o
-CONCLAVE_ANTHROPIC_API_KEY=
-CONCLAVE_GOOGLE_API_KEY=
+# NearAI API — all models served via NearAI confidential compute
+CONCLAVE_NEARAI_API_KEY=
+CONCLAVE_DEFAULT_MODEL=deepseek-ai/DeepSeek-V3.1
 
 # Supabase auth — Project Settings → API in your Supabase dashboard
 # JWT validation uses JWKS (ES256/ECC P-256) — no shared secret needed
@@ -14,3 +11,6 @@ CONCLAVE_SUPABASE_ANON_KEY=
 LANGCHAIN_TRACING_V2=true
 LANGCHAIN_API_KEY=
 LANGCHAIN_PROJECT=conclave-eval
+
+# Per-skill model config lives in skills/<skill-name>/.env
+# See skills/hackathon_novelty/.env.example for an example
diff --git a/api/routes.py b/api/routes.py
@@ -317,7 +317,10 @@ def get_results(submission_id: str, request: Request):
     if role == "user":
         if submission_id not in token_info["submission_ids"]:
             raise HTTPException(status_code=403, detail="Access denied: submission not owned by this token")
-        return instance_results[submission_id]
+        # Participant view: filtered to skill-declared user_output_keys
+        card = _skill_router.get_card(_instances[instance_id]["skill_name"])
+        result = instance_results[submission_id]
+        return {k: result[k] for k in card.user_output_keys if k in result}
 
     # admin: unrestricted access within the instance
     return instance_results[submission_id]

diff --git a/client/apps/web/lib/api.ts b/client/apps/web/lib/api.ts
@@ -32,8 +32,7 @@ const MOCK_SKILLS: SkillCard[] = [
     output_keys: [
       "submission_id",
       "novelty_score",
-      "percentile",
-      "cluster",
+      "aligned",
       "criteria_scores",
       "status",
       "analysis_depth",
@@ -78,8 +77,7 @@ const MOCK_RESULTS: NoveltyResult[] = [
   {
     submission_id: "sub_001",
     novelty_score: 0.84,
-    percentile: 82,
-    cluster: "AI/ML Infrastructure",
+    aligned: true,
     criteria_scores: { originality: 8.5, feasibility: 7.2, impact: 9.0 },
     status: "analyzed",
     analysis_depth: "full",
@@ -90,18 +88,16 @@ const MOCK_RESULTS: NoveltyResult[] = [
   {
     submission_id: "sub_002",
     novelty_score: 0.61,
-    percentile: 55,
-    cluster: "Developer Tools",
+    aligned: true,
     criteria_scores: { originality: 6.0, feasibility: 8.5, impact: 5.5 },
     status: "analyzed",
-    analysis_depth: "quick",
+    analysis_depth: "full",
     duplicate_of: null,
   },
   {
     submission_id: "sub_003",
     novelty_score: 0.12,
-    percentile: 8,
-    cluster: "AI/ML Infrastructure",
+    aligned: true,
     criteria_scores: { originality: 2.0, feasibility: 6.0, impact: 3.0 },
     status: "duplicate",
     analysis_depth: "flagged",

diff --git a/client/apps/web/lib/types.ts b/client/apps/web/lib/types.ts
@@ -51,11 +51,10 @@ export interface SubmitResponse {
 export interface NoveltyResult {
   submission_id: string
   novelty_score: number
-  percentile: number
-  cluster: string
+  aligned?: boolean
   criteria_scores: Record<string, number>
-  status: "analyzed" | "duplicate" | "quick_scored"
-  analysis_depth: "full" | "quick" | "flagged"
+  status: "analyzed" | "duplicate"
+  analysis_depth: "full" | "flagged"
   duplicate_of: string | null
   enclave_signature?: string
   attestation_quote?: string

diff --git a/config.py b/config.py
@@ -1,15 +1,14 @@
+from __future__ import annotations
 from pydantic_settings import BaseSettings
-from typing import Literal
 
 
 class Settings(BaseSettings):
-    llm_provider: Literal["openai", "anthropic", "google", "nearai"] = "openai"
-    openai_api_key: str = ""
-    openai_model: str = "gpt-4o"
-    anthropic_api_key: str = ""
-    google_api_key: str = ""
+    # NearAI API — all models served via NearAI confidential compute
     nearai_api_key: str = ""
-    nearai_model: str = "deepseek-ai/DeepSeek-V3.1"
+    nearai_base_url: str = "https://cloud-api.near.ai/v1"
+    default_model: str = "deepseek-ai/DeepSeek-V3.1"
+
+    # Embedding (unchanged)
     embedding_model: str = "all-MiniLM-L6-v2"
 
     # Supabase auth (optional — if unset, /auth/* endpoints return 503 and /register is the fallback)
@@ -22,23 +21,15 @@ class Settings(BaseSettings):
 settings = Settings()
 
 
-def get_llm():
-    """Return the configured LangChain chat model."""
-    if settings.llm_provider == "openai":
-        from langchain_openai import ChatOpenAI
-        return ChatOpenAI(model=settings.openai_model, api_key=settings.openai_api_key)
-    elif settings.llm_provider == "anthropic":
-        from langchain_anthropic import ChatAnthropic
-        return ChatAnthropic(model="claude-sonnet-4-6", api_key=settings.anthropic_api_key)
-    elif settings.llm_provider == "google":
-        from langchain_google_genai import ChatGoogleGenerativeAI
-        return ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=settings.google_api_key)
-    elif settings.llm_provider == "nearai":
-        from langchain_openai import ChatOpenAI
-        return ChatOpenAI(
-            model=settings.nearai_model,
-            api_key=settings.nearai_api_key,
-            base_url="https://cloud-api.near.ai/v1",
-        )
-    else:
-        raise ValueError(f"Unsupported LLM provider: {settings.llm_provider}")
+def get_llm(model: str | None = None):
+    """Return the configured LangChain chat model via NearAI.
+
+    model: specific model ID to use. Falls back to settings.default_model if None.
+    Skills declare their own per-node models in their own config.py.
+    """
+    from langchain_openai import ChatOpenAI
+    return ChatOpenAI(
+        model=model or settings.default_model,
+        api_key=settings.nearai_api_key,
+        base_url=settings.nearai_base_url,
+    )
diff --git a/core/skill_card.py b/core/skill_card.py
@@ -29,6 +29,7 @@ class SkillCard:
     run: Callable                        # the run_skill() entry point
     input_model: Type[BaseModel]         # Pydantic model for this skill's inputs
     output_keys: set                     # allowed output keys (mirrors ALLOWED_OUTPUT_KEYS)
+    user_output_keys: set = field(default_factory=set)  # keys visible to user role (subset of output_keys)
     config: dict = field(default_factory=dict)          # skill-specific config params
     trigger_modes: list = field(default_factory=list)   # supported trigger declarations
     roles: dict = field(default_factory=dict)           # admin + user role declarations
@@ -44,6 +45,7 @@ def metadata(self) -> dict:
             "version": self.version,
             "input_schema": self.input_model.model_json_schema(),
             "output_keys": sorted(self.output_keys),
+            "user_output_keys": sorted(self.user_output_keys),
             "config": self.config,
             "trigger_modes": self.trigger_modes,
             "roles": self.roles,

diff --git a/requirements.txt b/requirements.txt
@@ -19,3 +19,5 @@ cryptography>=42.0.0
 scipy
 pandas
 langgraph-cli[inmem]
+pdfplumber
+python-docx
diff --git a/skills/hackathon_novelty/.env.example b/skills/hackathon_novelty/.env.example
@@ -0,0 +1,8 @@
+# Per-node model overrides for hackathon_novelty skill.
+# Copy to skills/hackathon_novelty/.env and fill in values.
+# Empty value = fallback to CONCLAVE_DEFAULT_MODEL in root .env
+
+CONCLAVE_INIT_MODEL=
+CONCLAVE_INGEST_MODEL=deepseek-ai/DeepSeek-V3.1
+CONCLAVE_TRIAGE_MODEL=deepseek-ai/DeepSeek-V3.1
+CONCLAVE_SCORE_MODEL=deepseek-ai/DeepSeek-V3.1
diff --git a/skills/hackathon_novelty/__init__.py b/skills/hackathon_novelty/__init__.py
@@ -1,9 +1,10 @@
 """
 Entry point for the hackathon_novelty skill.
 
-3-layer pipeline:
+4-layer pipeline:
+    0. ingest.py         — agentic text extraction + normalization (LLM)
     1. deterministic.py  — embeddings, similarity, novelty scores, clustering (no LLM)
-    2. agent.py          — multi-node LangGraph graph (triage → router → flag/quick/analyze → finalize)
+    2. agent.py          — multi-node LangGraph graph (triage → router → flag/score → finalize)
     3. guardrails.py     — key whitelist, score clamping, leakage detection
 
 What to edit here:
@@ -19,42 +20,62 @@
 from core.skill_card import SkillCard
 from skills.hackathon_novelty.models import HackathonSubmission, NoveltyResult
 from skills.hackathon_novelty.deterministic import run_deterministic
+from skills.hackathon_novelty.ingest import run_ingest
 from skills.hackathon_novelty.tools import set_context
 from skills.hackathon_novelty.agent import run_agent
 from skills.hackathon_novelty.guardrails import HackathonNoveltyFilter
-from skills.hackathon_novelty.config import ALLOWED_OUTPUT_KEYS, MIN_SUBMISSIONS
+from skills.hackathon_novelty.config import ALLOWED_OUTPUT_KEYS, USER_OUTPUT_KEYS, MIN_SUBMISSIONS, SIMILARITY_DUPLICATE_THRESHOLD
 from skills.hackathon_novelty.init import hackathon_init_handler
 
 
 def run_skill(inputs: list[HackathonSubmission], params: OperatorConfig) -> SkillResponse:
-    """Full 3-layer pipeline: deterministic → agent (multi-node graph) → guardrails → response."""
+    """Full 4-layer pipeline: ingest → deterministic → agent (multi-node graph) → guardrails → response."""
 
     if len(inputs) < MIN_SUBMISSIONS:
         return SkillResponse(
             skill="hackathon_novelty",
             results=[{"submission_id": s.submission_id, "status": "insufficient_submissions"} for s in inputs],
         )
 
-    # Layer 1: Deterministic
-    det = run_deterministic(inputs)
+    # Layer 0: Ingestion — normalize/extract text from any format
+    normalized = run_ingest(inputs)
+    for sub in inputs:
+        if sub.submission_id in normalized:
+            sub.idea_text = normalized[sub.submission_id]
+
+    # Layer 1: Deterministic (now uses normalized text for embeddings)
+    det = run_deterministic(inputs, guidelines=params.guidelines, criteria=params.criteria)
 
     # Build submissions map and set tool context
     submissions_map = {s.submission_id: s for s in inputs}
     set_context(det, submissions_map)
 
-    # Build triage_context — rich signals the triage LLM uses to classify each submission
-    # Add more signals here as new tools or deterministic outputs become available
+    # Build triage_context — rich signals the triage LLM uses to classify + judge relevance
     clusters = det["clusters"]
+    sim_matrix = det["sim_matrix"]
+    submission_ids = det["submission_ids"]
+
+    # Pre-compute high-similarity pairs so triage LLM knows which to confirm as duplicates
+    near_duplicate_pairs = []
+    n = len(submission_ids)
+    for i in range(n):
+        for j in range(i + 1, n):
+            sim = float(sim_matrix[i, j])
+            if sim >= SIMILARITY_DUPLICATE_THRESHOLD:
+                near_duplicate_pairs.append((submission_ids[i], submission_ids[j], sim))
+
     triage_context = {}
-    for i, sid in enumerate(det["submission_ids"]):
-        sub = submissions_map[sid]
+    for i, sid in enumerate(submission_ids):
         triage_context[sid] = {
             "novelty_score": float(det["novelty_scores"][i]),
             "percentile": float(det["percentiles"][i]),
             "cluster": clusters[i],
             "cluster_size": clusters.count(clusters[i]),
-            "has_repo": sub.repo_summary is not None,
-            "has_deck": sub.deck_text is not None,
+            "idea_text": submissions_map[sid].idea_text,
+            "near_duplicates": [
+                {"other_id": a if b == sid else b, "similarity": round(sim, 3)}
+                for a, b, sim in near_duplicate_pairs if sid in (a, b)
+            ],
         }
 
     # Layer 2: Agent (multi-node graph)
@@ -73,8 +94,7 @@ def run_skill(inputs: list[HackathonSubmission], params: OperatorConfig) -> Skil
         result = NoveltyResult(
             submission_id=sid,
             novelty_score=float(det["novelty_scores"][i]),
-            percentile=float(det["percentiles"][i]),
-            cluster=det["clusters"][i],
+            aligned=ar.get("aligned"),
             criteria_scores=ar.get("criteria_scores", {}),
             status=ar.get("status", "analyzed") if ar else "error",
             analysis_depth=ar.get("analysis_depth", "full"),
@@ -93,14 +113,15 @@ def run_skill(inputs: list[HackathonSubmission], params: OperatorConfig) -> Skil
 skill_card = SkillCard(
     name="hackathon_novelty",
     description=(
-        "Scores hackathon submissions for novelty using embedding similarity, "
-        "KMeans clustering, and a multi-node LangGraph agent (triage → analysis → guardrails). "
+        "Scores hackathon submissions for novelty using agentic ingestion, embedding similarity, "
+        "KMeans clustering, and a multi-node LangGraph agent (ingest → triage → score → guardrails). "
         "Raw submission content is accessible to the LLM inside the TEE; "
         "only derived outputs leave the pipeline."
     ),
     run=run_skill,
     input_model=HackathonSubmission,
     output_keys=ALLOWED_OUTPUT_KEYS,
+    user_output_keys=USER_OUTPUT_KEYS,
     config={"min_submissions": MIN_SUBMISSIONS},
     trigger_modes=[
         {
@@ -153,8 +174,9 @@ def run_skill(inputs: list[HackathonSubmission], params: OperatorConfig) -> Skil
         "- idea_text (required): A description of their hackathon idea.\n"
         "- repo_summary (optional): Technical details or a summary of their implementation.\n"
         "- deck_text (optional): Pitch deck or business case content.\n\n"
-        "Each user receives: novelty_score (0-1), percentile rank, cluster assignment, "
-        "per-criteria scores (0-10), and analysis status. They never see other teams' data."
+        "Each user receives: novelty_score (0-1, how unique your idea is compared to others) "
+        "and an alignment flag (whether your idea fits the hackathon theme). "
+        "They never see other teams' submissions or scores."
     ),
     init_handler=hackathon_init_handler,
 )