martinapugliese · bernomone · Jan 31, 2026 · Jan 25, 2026 · Jan 25, 2026 · Jan 25, 2026
diff --git a/.env-template b/.env-template
@@ -1,4 +1,4 @@
-LLM_FAMILY="gemini" or "claude" or "claude-aws-bedrock" or "nova-lite-aws-bedrock"
+LLM_FAMILY="gemini" or "claude" or "claude-aws-bedrock"
 GOOGLE_API_KEY="your key if using gemini"
 ANTHROPIC_API_KEY="your key if using Claude"
 AWS_ACCESS_KEY_ID="your aws access key id if using Claude via aws bedrock"

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -60,13 +60,12 @@ pre-commit run --all-files      # Run pre-commit hooks
 - **Gemini 2.0 Flash** (default, preferred for cost and context window)
 - **Claude 4.5 Haiku** (experimental, rate limited)
 - **Claude via AWS Bedrock** (experimental)
-- **Nova 2 Lite via AWS Bedrock** (experimental)
 
 ## Configuration
 
 ### Environment Variables
 Required in `.env` file (copy from `.env-template`):
-- `LLM_FAMILY`: "gemini", "claude", "claude-aws-bedrock", or "nova-lite-aws-bedrock"
+- `LLM_FAMILY`: "gemini", "claude", or "claude-aws-bedrock"
 - `GOOGLE_API_KEY`: For Gemini models
 - `ANTHROPIC_API_KEY`: For Claude models
 - `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_REGION`: For AWS Bedrock

diff --git a/README.md b/README.md
@@ -25,7 +25,6 @@ As its underlying LLM, you can choose to run it either with:
 * Gemini (it will use 2.0 Flash) [preferred and default option]
 * Claude (it will use Haiku 4.5) [experimental]
 * Claude via AWS Bedrock (it will use Haiku 4.5) [experimental]
-* Nova 2 Lite via AWS Bedrock [experimental]
 
 Gemini is preferred because:
 * it has a free tier - we privilege cost-effectiveness over speed, which means for short conversations you should be within the quotas of the free tier
@@ -111,7 +110,7 @@ python evals.py -m claude -e allower orchestrator
 python evals.py --list
 ```
 
-Available models: `gemini`, `claude`, `claude-aws-bedrock`, `nova-lite-aws-bedrock`
+Available models: `gemini`, `claude`, `claude-aws-bedrock`
 
 Available evals: `allower`, `orchestrator`, `summary`, `question`, `article`, `general`
 

diff --git a/evals/evals.py b/evals/evals.py
@@ -3,6 +3,7 @@
 import os
 
 import boto3
+import logfire
 from botocore.exceptions import ClientError, NoCredentialsError
 from dotenv import load_dotenv
 from evals_allower import run_evals as run_evals_allower
@@ -13,9 +14,16 @@
 from evals_summary import run_evals as run_evals_summary
 from rich.console import Console
 
+# Load environment and configure logfire early
+load_dotenv()
+logfire_token = os.getenv("LOGFIRE_TOKEN", None)
+if logfire_token:
+    logfire.configure(token=logfire_token, console=False)
+    logfire.instrument_pydantic_ai()
+
 console = Console()
 
-ALL_MODELS = ["gemini", "claude", "claude-aws-bedrock", "nova-lite-aws-bedrock"]
+ALL_MODELS = ["gemini", "claude", "claude-aws-bedrock"]
 ALL_EVALS = ["allower", "orchestrator", "summary", "question", "article", "general"]
 
 EVAL_RUNNERS = {
@@ -85,13 +93,13 @@ def check_model_credentials(model_family: str) -> bool:
         )
         return False
 
-    if model_family in ("claude-aws-bedrock", "nova-lite-aws-bedrock"):
+    if model_family == "claude-aws-bedrock":
         try:
             _ = boto3.client("sts").get_caller_identity()
         except (ClientError, NoCredentialsError):
             console.print(
-                f"[bold red]AWS credentials are not set or invalid. "
-                f"Skipping {model_family} evals.[/bold red]"
+                "[bold red]AWS credentials are not set or invalid. "
+                "Skipping claude-aws-bedrock evals.[/bold red]"
             )
             return False
 

diff --git a/evals/evals_allower.py b/evals/evals_allower.py
@@ -2,14 +2,24 @@
 Checks that the is_scientific flag is set correctly.
 """
 
+import os
 import time
 
+import logfire
+from dotenv import load_dotenv
 from rich.console import Console
 
 from askademic.allower import allower_agent_base
 from askademic.prompts.general import USER_PROMPT_ALLOWER_TEMPLATE
 from askademic.utils import choose_model
 
+# Load environment and configure logfire
+load_dotenv()
+logfire_token = os.getenv("LOGFIRE_TOKEN", None)
+if logfire_token:
+    logfire.configure(token=logfire_token, console=False)
+    logfire.instrument_pydantic_ai()
+
 
 class AllowerTestCase:
     def __init__(self, question: str, is_scientic_gt: bool):

diff --git a/evals/evals_article.py b/evals/evals_article.py
@@ -3,14 +3,24 @@
 searching first by title and then by link.
 """
 
+import os
 import re
 import time
 
+import logfire
+from dotenv import load_dotenv
 from rich.console import Console
 
 from askademic.article import ArticleAgent
 from askademic.utils import choose_model
 
+# Load environment and configure logfire
+load_dotenv()
+logfire_token = os.getenv("LOGFIRE_TOKEN", None)
+if logfire_token:
+    logfire.configure(token=logfire_token, console=False)
+    logfire.instrument_pydantic_ai()
+
 
 class ArticleResponseTestCase:
     def __init__(

diff --git a/evals/evals_general.py b/evals/evals_general.py
@@ -3,14 +3,24 @@
 Tests flexibility, adaptability, and handling of diverse academic requests.
 """
 
+import os
 import time
 from typing import List
 
+import logfire
+from dotenv import load_dotenv
 from rich.console import Console
 
 from askademic.general import GeneralAgent
 from askademic.utils import choose_model
 
+# Load environment and configure logfire
+load_dotenv()
+logfire_token = os.getenv("LOGFIRE_TOKEN", None)
+if logfire_token:
+    logfire.configure(token=logfire_token, console=False)
+    logfire.instrument_pydantic_ai()
+
 
 class GeneralTestCase:
     def __init__(

diff --git a/evals/evals_orchestrator.py b/evals/evals_orchestrator.py
@@ -2,8 +2,11 @@
 Checks delegation to right agent via type of response.
 """
 
+import os
 import time
 
+import logfire
+from dotenv import load_dotenv
 from pydantic_ai.usage import UsageLimits
 from rich.console import Console
 
@@ -14,6 +17,13 @@
 from askademic.summary import SummaryResponse
 from askademic.utils import choose_model
 
+# Load environment and configure logfire
+load_dotenv()
+logfire_token = os.getenv("LOGFIRE_TOKEN", None)
+if logfire_token:
+    logfire.configure(token=logfire_token, console=False)
+    logfire.instrument_pydantic_ai()
+
 
 class OrchestratorTestCase:
     def __init__(

diff --git a/evals/evals_question.py b/evals/evals_question.py
@@ -2,13 +2,23 @@
 Checks response contains substring.
 """
 
+import os
 import time
 
+import logfire
+from dotenv import load_dotenv
 from rich.console import Console
 
 from askademic.question import QuestionAgent
 from askademic.utils import choose_model
 
+# Load environment and configure logfire
+load_dotenv()
+logfire_token = os.getenv("LOGFIRE_TOKEN", None)
+if logfire_token:
+    logfire.configure(token=logfire_token, console=False)
+    logfire.instrument_pydantic_ai()
+
 
 class QuestionAnswerTestCaseSingle:
     def __init__(self, request: str, answer: str):
@@ -55,9 +65,6 @@ async def run_evals(model_family: str):
     question_agent = QuestionAgent(
         model=model,
         model_settings=model_settings,
-        query_list_limit=2,
-        relevance_score_threshold=0.8,
-        article_list_limit=2,
     )
 
     # single-answer ones

diff --git a/evals/evals_summary.py b/evals/evals_summary.py
@@ -2,14 +2,24 @@
 Checks category fetched is correct.
 """
 
+import os
 import time
 from typing import List
 
+import logfire
+from dotenv import load_dotenv
 from rich.console import Console
 
 from askademic.summary import SummaryAgent
 from askademic.utils import choose_model
 
+# Load environment and configure logfire
+load_dotenv()
+logfire_token = os.getenv("LOGFIRE_TOKEN", None)
+if logfire_token:
+    logfire.configure(token=logfire_token, console=False)
+    logfire.instrument_pydantic_ai()
+
 
 class SummaryTestCase:
     def __init__(self, request: str, category_list: List[str]):

diff --git a/src/askademic/constants.py b/src/askademic/constants.py
@@ -13,7 +13,6 @@
 CLAUDE_HAIKU_4_5_BEDROCK_MODEL_ID = (
     "bedrock:{region}.anthropic.claude-haiku-4-5-20251001-v1:0"
 )
-NOVA_LITE_BEDROCK_MODEL_ID = "bedrock:{region}.amazon.nova-2-lite-v1:0"
 MISTRAL_LARGE_MODEL_ID = "mistral:mistral-large-latest"
 
 # ARXIV URLS

diff --git a/src/askademic/main.py b/src/askademic/main.py
@@ -59,7 +59,7 @@ async def check_environment_variables(user_model: str):
                 "[bold red]The ANTHROPIC_API_KEY environment variable is not set.[/bold red]"
             )
             sys.exit()
-    elif user_model in ("claude-aws-bedrock", "nova-lite-aws-bedrock"):
+    elif user_model == "claude-aws-bedrock":
         try:
             _ = boto3.client("sts").get_caller_identity()
         except boto3.exceptions.ClientError:
@@ -74,8 +74,7 @@ async def check_environment_variables(user_model: str):
     else:
         console.print(
             "[bold red]Invalid model family selected. "
-            + "Please choose 'gemini', 'claude', 'claude-aws-bedrock'"
-            + " or 'nova-lite-aws-bedrock'.[/bold red]"
+            + "Please choose 'gemini', 'claude', or 'claude-aws-bedrock'.[/bold red]"
         )
         sys.exit()
 
@@ -125,12 +124,10 @@ async def ask_me():
         "gemini",
         "claude",
         "claude-aws-bedrock",
-        "nova-lite-aws-bedrock",
     ):
         console.print(
             """[bold red]Please configure the LLM family
-        to be either "gemini" or "claude", "claude-aws-bedrock"
-         or "nova-lite-aws-bedrock"):[/bold red]"""
+        to be either "gemini", "claude", or "claude-aws-bedrock":[/bold red]"""
         )
         return
 

diff --git a/src/askademic/orchestrator.py b/src/askademic/orchestrator.py
@@ -73,9 +73,6 @@ async def answer_question(ctx: RunContext[Context], question: str) -> list[str]:
     question_agent = QuestionAgent(
         orchestrator_agent_base.model,
         orchestrator_agent_base.model_settings,
-        query_list_limit=2,
-        relevance_score_threshold=0.8,
-        article_list_limit=2,
     )
     r = await question_agent(question=question)
     return r
@@ -99,7 +96,7 @@ async def answer_article(ctx: RunContext[Context], question: str) -> list[str]:
     article_agent = ArticleAgent(
         orchestrator_agent_base.model,
         orchestrator_agent_base.model_settings,
-        use_cache=True  # Enable caching by default
+        use_cache=True,  # Enable caching by default
     )
     r = await article_agent.run(request=question)
     return r

diff --git a/src/askademic/prompts/general.py b/src/askademic/prompts/general.py
@@ -252,6 +252,36 @@
     """
 )
 
+SYSTEM_PROMPT_QUESTION_AGENT = cleandoc(
+    """
+    You are an expert in answering research questions using scientific literature from arXiv.
+
+    You have two tools available:
+    1. search_articles: Search arXiv for articles by querying their abstracts
+    2. fetch_article: Fetch the full content of an article given its link or arXiv ID
+
+    When you receive a question:
+    <instructions>
+        - First use search_articles with relevant search terms to find papers related to the question.
+        - Review the search results and identify the most relevant articles.
+        - Use fetch_article to retrieve the full content of the most promising articles.
+        - Read and analyze the articles to formulate your answer.
+        - You may need to iterate: search with different queries or fetch additional articles
+          if the initial results don't fully answer the question.
+        - Quote relevant parts of the articles in your response.
+        - If no relevant articles are found, inform the user that no papers were found on arXiv
+          addressing their question.
+    </instructions>
+
+    <output_format>
+        - Provide a clear, well-structured response to the question.
+        - Cite the articles you used with their arXiv links.
+        - The article_list MUST contain all article URLs you used, in PDF format:
+          https://arxiv.org/pdf/XXXX.XXXXX.pdf
+    </output_format>
+    """
+)
+
 #######################################
 
 # ############## Article ##############