Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .env-template
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
LLM_FAMILY="gemini" or "claude" or "claude-aws-bedrock" or "nova-lite-aws-bedrock"
LLM_FAMILY="gemini" or "claude" or "claude-aws-bedrock"
GOOGLE_API_KEY="your key if using gemini"
ANTHROPIC_API_KEY="your key if using Claude"
AWS_ACCESS_KEY_ID="your aws access key id if using Claude via aws bedrock"
Expand Down
3 changes: 1 addition & 2 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,12 @@ pre-commit run --all-files # Run pre-commit hooks
- **Gemini 2.0 Flash** (default, preferred for cost and context window)
- **Claude 4.5 Haiku** (experimental, rate limited)
- **Claude via AWS Bedrock** (experimental)
- **Nova 2 Lite via AWS Bedrock** (experimental)

## Configuration

### Environment Variables
Required in `.env` file (copy from `.env-template`):
- `LLM_FAMILY`: "gemini", "claude", "claude-aws-bedrock", or "nova-lite-aws-bedrock"
- `LLM_FAMILY`: "gemini", "claude", or "claude-aws-bedrock"
- `GOOGLE_API_KEY`: For Gemini models
- `ANTHROPIC_API_KEY`: For Claude models
- `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_REGION`: For AWS Bedrock
Expand Down
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ As its underlying LLM, you can choose to run it either with:
* Gemini (it will use 2.0 Flash) [preferred and default option]
* Claude (it will use Haiku 4.5) [experimental]
* Claude via AWS Bedrock (it will use Haiku 4.5) [experimental]
* Nova 2 Lite via AWS Bedrock [experimental]

Gemini is preferred because:
* it has a free tier - we privilege cost-effectiveness over speed, which means for short conversations you should be within the quotas of the free tier
Expand Down Expand Up @@ -111,7 +110,7 @@ python evals.py -m claude -e allower orchestrator
python evals.py --list
```

Available models: `gemini`, `claude`, `claude-aws-bedrock`, `nova-lite-aws-bedrock`
Available models: `gemini`, `claude`, `claude-aws-bedrock`

Available evals: `allower`, `orchestrator`, `summary`, `question`, `article`, `general`

Expand Down
16 changes: 12 additions & 4 deletions evals/evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os

import boto3
import logfire
from botocore.exceptions import ClientError, NoCredentialsError
from dotenv import load_dotenv
from evals_allower import run_evals as run_evals_allower
Expand All @@ -13,9 +14,16 @@
from evals_summary import run_evals as run_evals_summary
from rich.console import Console

# Load environment and configure logfire early
load_dotenv()
logfire_token = os.getenv("LOGFIRE_TOKEN", None)
if logfire_token:
logfire.configure(token=logfire_token, console=False)
logfire.instrument_pydantic_ai()

console = Console()

ALL_MODELS = ["gemini", "claude", "claude-aws-bedrock", "nova-lite-aws-bedrock"]
ALL_MODELS = ["gemini", "claude", "claude-aws-bedrock"]
ALL_EVALS = ["allower", "orchestrator", "summary", "question", "article", "general"]

EVAL_RUNNERS = {
Expand Down Expand Up @@ -85,13 +93,13 @@ def check_model_credentials(model_family: str) -> bool:
)
return False

if model_family in ("claude-aws-bedrock", "nova-lite-aws-bedrock"):
if model_family == "claude-aws-bedrock":
try:
_ = boto3.client("sts").get_caller_identity()
except (ClientError, NoCredentialsError):
console.print(
f"[bold red]AWS credentials are not set or invalid. "
f"Skipping {model_family} evals.[/bold red]"
"[bold red]AWS credentials are not set or invalid. "
"Skipping claude-aws-bedrock evals.[/bold red]"
)
return False

Expand Down
10 changes: 10 additions & 0 deletions evals/evals_allower.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,24 @@
Checks that the is_scientific flag is set correctly.
"""

import os
import time

import logfire
from dotenv import load_dotenv
from rich.console import Console

from askademic.allower import allower_agent_base
from askademic.prompts.general import USER_PROMPT_ALLOWER_TEMPLATE
from askademic.utils import choose_model

# Load environment and configure logfire
load_dotenv()
logfire_token = os.getenv("LOGFIRE_TOKEN", None)
if logfire_token:
logfire.configure(token=logfire_token, console=False)
logfire.instrument_pydantic_ai()


class AllowerTestCase:
def __init__(self, question: str, is_scientic_gt: bool):
Expand Down
10 changes: 10 additions & 0 deletions evals/evals_article.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,24 @@
searching first by title and then by link.
"""

import os
import re
import time

import logfire
from dotenv import load_dotenv
from rich.console import Console

from askademic.article import ArticleAgent
from askademic.utils import choose_model

# Load environment and configure logfire
load_dotenv()
logfire_token = os.getenv("LOGFIRE_TOKEN", None)
if logfire_token:
logfire.configure(token=logfire_token, console=False)
logfire.instrument_pydantic_ai()


class ArticleResponseTestCase:
def __init__(
Expand Down
10 changes: 10 additions & 0 deletions evals/evals_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,24 @@
Tests flexibility, adaptability, and handling of diverse academic requests.
"""

import os
import time
from typing import List

import logfire
from dotenv import load_dotenv
from rich.console import Console

from askademic.general import GeneralAgent
from askademic.utils import choose_model

# Load environment and configure logfire
load_dotenv()
logfire_token = os.getenv("LOGFIRE_TOKEN", None)
if logfire_token:
logfire.configure(token=logfire_token, console=False)
logfire.instrument_pydantic_ai()


class GeneralTestCase:
def __init__(
Expand Down
10 changes: 10 additions & 0 deletions evals/evals_orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@
Checks delegation to right agent via type of response.
"""

import os
import time

import logfire
from dotenv import load_dotenv
from pydantic_ai.usage import UsageLimits
from rich.console import Console

Expand All @@ -14,6 +17,13 @@
from askademic.summary import SummaryResponse
from askademic.utils import choose_model

# Load environment and configure logfire
load_dotenv()
logfire_token = os.getenv("LOGFIRE_TOKEN", None)
if logfire_token:
logfire.configure(token=logfire_token, console=False)
logfire.instrument_pydantic_ai()


class OrchestratorTestCase:
def __init__(
Expand Down
13 changes: 10 additions & 3 deletions evals/evals_question.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,23 @@
Checks response contains substring.
"""

import os
import time

import logfire
from dotenv import load_dotenv
from rich.console import Console

from askademic.question import QuestionAgent
from askademic.utils import choose_model

# Load environment and configure logfire
load_dotenv()
logfire_token = os.getenv("LOGFIRE_TOKEN", None)
if logfire_token:
logfire.configure(token=logfire_token, console=False)
logfire.instrument_pydantic_ai()


class QuestionAnswerTestCaseSingle:
def __init__(self, request: str, answer: str):
Expand Down Expand Up @@ -55,9 +65,6 @@ async def run_evals(model_family: str):
question_agent = QuestionAgent(
model=model,
model_settings=model_settings,
query_list_limit=2,
relevance_score_threshold=0.8,
article_list_limit=2,
)

# single-answer ones
Expand Down
10 changes: 10 additions & 0 deletions evals/evals_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,24 @@
Checks category fetched is correct.
"""

import os
import time
from typing import List

import logfire
from dotenv import load_dotenv
from rich.console import Console

from askademic.summary import SummaryAgent
from askademic.utils import choose_model

# Load environment and configure logfire
load_dotenv()
logfire_token = os.getenv("LOGFIRE_TOKEN", None)
if logfire_token:
logfire.configure(token=logfire_token, console=False)
logfire.instrument_pydantic_ai()


class SummaryTestCase:
def __init__(self, request: str, category_list: List[str]):
Expand Down
1 change: 0 additions & 1 deletion src/askademic/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
CLAUDE_HAIKU_4_5_BEDROCK_MODEL_ID = (
"bedrock:{region}.anthropic.claude-haiku-4-5-20251001-v1:0"
)
NOVA_LITE_BEDROCK_MODEL_ID = "bedrock:{region}.amazon.nova-2-lite-v1:0"
MISTRAL_LARGE_MODEL_ID = "mistral:mistral-large-latest"

# ARXIV URLS
Expand Down
9 changes: 3 additions & 6 deletions src/askademic/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ async def check_environment_variables(user_model: str):
"[bold red]The ANTHROPIC_API_KEY environment variable is not set.[/bold red]"
)
sys.exit()
elif user_model in ("claude-aws-bedrock", "nova-lite-aws-bedrock"):
elif user_model == "claude-aws-bedrock":
try:
_ = boto3.client("sts").get_caller_identity()
except boto3.exceptions.ClientError:
Expand All @@ -74,8 +74,7 @@ async def check_environment_variables(user_model: str):
else:
console.print(
"[bold red]Invalid model family selected. "
+ "Please choose 'gemini', 'claude', 'claude-aws-bedrock'"
+ " or 'nova-lite-aws-bedrock'.[/bold red]"
+ "Please choose 'gemini', 'claude', or 'claude-aws-bedrock'.[/bold red]"
)
sys.exit()

Expand Down Expand Up @@ -125,12 +124,10 @@ async def ask_me():
"gemini",
"claude",
"claude-aws-bedrock",
"nova-lite-aws-bedrock",
):
console.print(
"""[bold red]Please configure the LLM family
to be either "gemini" or "claude", "claude-aws-bedrock"
or "nova-lite-aws-bedrock"):[/bold red]"""
to be either "gemini", "claude", or "claude-aws-bedrock":[/bold red]"""
)
return

Expand Down
5 changes: 1 addition & 4 deletions src/askademic/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,6 @@ async def answer_question(ctx: RunContext[Context], question: str) -> list[str]:
question_agent = QuestionAgent(
orchestrator_agent_base.model,
orchestrator_agent_base.model_settings,
query_list_limit=2,
relevance_score_threshold=0.8,
article_list_limit=2,
)
r = await question_agent(question=question)
return r
Expand All @@ -99,7 +96,7 @@ async def answer_article(ctx: RunContext[Context], question: str) -> list[str]:
article_agent = ArticleAgent(
orchestrator_agent_base.model,
orchestrator_agent_base.model_settings,
use_cache=True # Enable caching by default
use_cache=True, # Enable caching by default
)
r = await article_agent.run(request=question)
return r
Expand Down
30 changes: 30 additions & 0 deletions src/askademic/prompts/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,36 @@
"""
)

SYSTEM_PROMPT_QUESTION_AGENT = cleandoc(
"""
You are an expert in answering research questions using scientific literature from arXiv.

You have two tools available:
1. search_articles: Search arXiv for articles by querying their abstracts
2. fetch_article: Fetch the full content of an article given its link or arXiv ID

When you receive a question:
<instructions>
- First use search_articles with relevant search terms to find papers related to the question.
- Review the search results and identify the most relevant articles.
- Use fetch_article to retrieve the full content of the most promising articles.
- Read and analyze the articles to formulate your answer.
- You may need to iterate: search with different queries or fetch additional articles
if the initial results don't fully answer the question.
- Quote relevant parts of the articles in your response.
- If no relevant articles are found, inform the user that no papers were found on arXiv
addressing their question.
</instructions>

<output_format>
- Provide a clear, well-structured response to the question.
- Cite the articles you used with their arXiv links.
- The article_list MUST contain all article URLs you used, in PDF format:
https://arxiv.org/pdf/XXXX.XXXXX.pdf
</output_format>
"""
)

#######################################

# ############## Article ##############
Expand Down
Loading