Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 2 additions & 73 deletions src/fhda/data_analysis_env.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import hashlib
import json
import logging
import shutil
from typing import Any, cast
Expand All @@ -10,11 +9,10 @@
Message,
Messages,
Tool,
eval_answer,
)

from .notebook_env import NBEnvironment
from .utils import NBLanguage, MultipleChoiceQuestion, nb_to_html
from .utils import NBLanguage, nb_to_html
from . import prompts
from . import config as cfg

Expand All @@ -35,14 +33,12 @@ def __init__(
correct_reward: float = 1.0,
eval_mode: EvalAnswerMode,
metadata: dict[str, Any] | None = None, # used for NBEvalExpt
mcqs: list[MultipleChoiceQuestion] | None = None,
**kwargs,
):
super().__init__(**kwargs)

self.problem_id = problem_id
self.problem = problem
self.mcqs = mcqs
self.answer = answer
self.eval_mode = eval_mode
self.correct_reward = correct_reward
Expand Down Expand Up @@ -74,80 +70,13 @@ async def submit_answer(self, answer: str | float | dict[str, Any] | None) -> st
Args:
answer: The answer to the problem
"""
# TODO: support various eval modes
self.state.answer = answer
self.state.done = True
logger.info("Submitting answer and closing environment")
await self.close()
correct = False
logger.info("Answer: %s", answer)

if self.eval_mode is None:
return CORRECT_MSG

if isinstance(self.answer, int):
try:
answer = int(answer) # type: ignore[arg-type]
except ValueError:
pass
else:
correct = answer == self.answer

elif isinstance(self.answer, float):
try:
answer = float(answer) # type: ignore[arg-type]
except ValueError:
pass
else:
correct = abs(answer - self.answer) < 1e-4 * self.answer

elif isinstance(self.answer, str):
correct = bool(
await eval_answer(
proposed=str(answer),
correct=str(self.answer),
question=self.problem,
eval_mode=self.eval_mode,
)
)
elif isinstance(self.answer, dict): # This is for mcqs and open questions
# Check if answer is a json string
if isinstance(answer, str): # type: ignore[unreachable]
# Process json into dictionary
try:
processed_answer = json.loads(answer)
except json.JSONDecodeError:
return INCORRECT_MSG
else:
processed_answer = answer if isinstance(answer, dict) else {}

# Loop through each question and answer
for question_id, agent_answer in processed_answer.items():
try:
ideal_answer = self.answer[question_id]
question = next(
q
for q in self.mcqs
if q.question_id.lower() == question_id.lower()
)
correct = bool(
await eval_answer(
proposed=str(agent_answer),
correct=str(ideal_answer),
question=question,
eval_mode=self.eval_mode,
)
)
self.question_rewards[question_id] = correct
except KeyError:
self.question_rewards[question_id] = 0
average_reward = sum(self.question_rewards.values()) / len(self.mcqs)
correct = round(average_reward) == 1.0

if correct:
self.state.total_reward += self.correct_reward
return CORRECT_MSG
return INCORRECT_MSG
return f"Submitted answer: {answer}"

@classmethod
def from_task(
Expand Down
2 changes: 1 addition & 1 deletion src/fhda/notebook_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ async def close(self):

class NBEnvironment(Environment[NBEnvironmentState]):
NOTEBOOK_NAME: ClassVar[str] = "notebook.ipynb"
EXEC_TIMEOUT: ClassVar[float | None] = 300.0
EXEC_TIMEOUT: ClassVar[float | None] = 1200.0

state: NBEnvironmentState

Expand Down
50 changes: 22 additions & 28 deletions src/fhda/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,20 @@

CAPSULE_SYSTEM_PROMPT_MCQ = """
You are an expert bioinformatician and seasoned biological data scientist.
Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that analyzes data to answer a series of Multiple Choice Questions (MCQs).
The notebook should contain all necessary artifacts (plots, tables, print outputs) to fully answer these questions, structured in a way that another model could use to derive the answers.
Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that analyzes data to answer a Multiple Choice Question (MCQ).
The notebook should contain all necessary artifacts (plots, tables, print outputs) to fully answer this question, structured in a way that another model could use to derive the answer.
"""

CAPSULE_SYSTEM_PROMPT_OPEN = """
You are an expert bioinformatician and seasoned biological data scientist.
Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that analyzes data to answer a series of open-ended questions.
The notebook should contain all necessary artifacts (plots, tables, print outputs) to fully answer these questions, structured in a way that another model could use to derive the answers.
Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that analyzes data to answer an open-ended question.
The notebook should contain all necessary artifacts (plots, tables, print outputs) to fully answer this question, structured in a way that another model could use to derive the answer.
"""

CAPSULE_SYSTEM_PROMPT_QUERY = """
You are an expert bioinformatician and seasoned biological data scientist.
Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that analyzes data to answer a user query.
The notebook should contain all necessary artifacts (plots, tables, print outputs) to fully answer these questions.
The notebook should contain all necessary artifacts (plots, tables, print outputs) to fully answer this question.
Take your time to think through the question and the data before writing any code, explore the data rigorously and defend your conclusions rigorously.
"""

Expand Down Expand Up @@ -168,35 +168,29 @@
"""
SUBMIT_ANSWER_SINGLE = """
[Use the submit_answer tool to submit your final answer as a single string]
IMPORTANT: Wrap your answer in XML tags <answer> </answer>
Example output:
```
submit_answer("CD94") or submit_answer("-1.23")
submit_answer("<answer>CD94</answer>") or submit_answer("<answer>-1.23</answer>")
```
Remember, the final notebook should contain all necessary artifacts (plots, tables, print outputs) to solve the task provided.
"""
SUBMIT_ANSWER_OPEN = """
[Use the submit_answer tool to submit your final answer as a jsondictionary with keys as the question number and values as a short answer]
[Use the submit_answer tool to submit your final answer as a single string with your short answer]
IMPORTANT: Wrap your answer in XML tags <answer> </answer>
Example output:
```
submit_answer({{
"q1": "Short answer to question 1",
"q2": "Short answer to question 2",
"q3": "Short answer to question 3",
"q4": "Short answer to question 4"
}})
submit_answer("<answer>Your concise answer to the question</answer>")
```
Remember, the final notebook should contain all necessary artifacts (plots, tables, print outputs) to solve the task provided.
"""
SUBMIT_ANSWER_MCQ = """
[Use the submit_answer tool to submit your final answer as a json dictionary with keys as the question number and values as the answer]
[Use the submit_answer tool to submit your final answer as a single string with the letter choice]
IMPORTANT: Wrap your answer in XML tags <answer> </answer>
Example output:
```
submit_answer({{
"q1": "A",
"q2": "B",
"q3": "C",
"q4": "D"
}})
submit_answer("<answer>A</answer>") or submit_answer("<answer>B</answer>") or submit_answer("<answer>C</answer>") or submit_answer("<answer>D</answer>")
```
Remember, the final notebook should contain all necessary artifacts (plots, tables, print outputs) to solve the task provided.
"""

Expand All @@ -215,10 +209,10 @@
"""
# MCQ
MCQ_PROMPT_TEMPLATE = f"""
Here are the questions you need to address:
<questions>
{{questions}}
</questions>
Here is the question you need to address:
<question>
{{question}}
</question>

{CHAIN_OF_THOUGHT_AGNOSTIC}
{SUBMIT_ANSWER_MCQ}
Expand All @@ -227,11 +221,11 @@
"""
# Open answer
OPEN_PROMPT_TEMPLATE = f"""
Here are the questions you need to address:
Here is the question you need to address:

<questions>
{{questions}}
</questions>
<question>
{{question}}
</question>

{CHAIN_OF_THOUGHT_AGNOSTIC}
{SUBMIT_ANSWER_OPEN}
Expand Down