diff --git a/pyproject.toml b/pyproject.toml
index 6f215a3..0675846 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,21 +8,16 @@ authors = [
 ]
 dependencies = [
   "aiodocker==0.24.0",
-  "fhaviary[server]==0.18.1",
-  "fh-llm-client==0.0.11",
-  "ldp==0.23.0",
+  "fhaviary[server]==0.19.0",
+  "ldp==0.26.0",
   "pandas==2.2.3",
   "numpy==2.2.3",
   "matplotlib==3.10.0",
-  "scipy==1.15.2",
-  "seaborn==0.13.2",
-  "scikit-learn==1.6.1",
-  "statsmodels==0.14.4",
   "aiofiles==24.1.0",
   "google-auth==2.38.0",
   "google-cloud-storage==3.0.0",
   "google-cloud-secret-manager==2.23.0",
-  "crow-client==0.3.6",
+  "futurehouse-client==0.3.18",
   "jupyter==1.1.1",
   "nbconvert==7.16.6",
   "notebook==7.3.2",
@@ -52,4 +47,4 @@ run_expt = 'scripts.configurable:_run_expt'
 package-dir = {"" = "src"}
 
 [tool.setuptools.packages.find]
-where = ["src"]
\ No newline at end of file
+where = ["src"]
diff --git a/src/fhda/Dockerfile.pinned b/src/fhda/Dockerfile.pinned
index 73cd5f9..1add015 100644
--- a/src/fhda/Dockerfile.pinned
+++ b/src/fhda/Dockerfile.pinned
@@ -75,14 +75,19 @@ RUN mamba install -c conda-forge -c bioconda -y \
     bioconductor-apeglm=1.24.0
 
 
-COPY requirements.txt .
-RUN mamba install -c conda-forge --file requirements.txt -y
+COPY kernel_requirements.txt .
+
+# Install conda packages first
+RUN mamba install -c conda-forge --file kernel_requirements.txt -y
+
+# Install pip packages
+RUN pip install aiodocker ldp==0.26.0 fhaviary[server]==0.19.0 futurehouse-client==0.3.14
 
 # Certain tools are not easily installable via conda. A common practice for
 # bioinformaticians is to use udocker to run certain heavy duty omics processing
 # tools in an isolated environment
-RUN udocker --allow-root install && \
-    udocker --allow-root pull ezlabgva/busco:v5.8.0_cv1
+# RUN udocker --allow-root install && \
+#     udocker --allow-root pull ezlabgva/busco:v5.8.0_cv1
 
 WORKDIR /workspace
 
diff --git a/src/fhda/kernel_requirements.txt b/src/fhda/kernel_requirements.txt
index 8d02401..2443198 100644
--- a/src/fhda/kernel_requirements.txt
+++ b/src/fhda/kernel_requirements.txt
@@ -1,18 +1,16 @@
-aiodocker
 anndata==0.11.1
 biopython==1.84
-datasets
 ete3==3.1.3
+fcsparser==0.2.8
+cython==3.0.12
 gseapy==1.1.4
-fhaviary[server] >= 0.18.0
 keras==3.7.0
-ldp
 jupyter==1.0.0
 matplotlib==3.10.0
 matplotlib-venn==1.1.1
 mygene==3.2.2
 nbconvert==7.16.4
-numpy==2.0.2
+numpy==1.26.4 # Pinned lower for fcsparser <2
 optuna==4.1.0
 openpyxl==3.1.5
 pandas==2.2.3
@@ -24,13 +22,3 @@ seaborn==0.13.2
 scikit-learn==1.6.0
 statsmodels==0.14.4
 umap-learn==0.5.7
-aiofiles
-google-auth
-google-cloud-storage
-google-cloud-secret-manager
-google-crc32c
-httpx
-pydantic
-requests
-tqdm
-crow-client
diff --git a/src/fhda/prompts.py b/src/fhda/prompts.py
index 20ddc41..786ce04 100644
--- a/src/fhda/prompts.py
+++ b/src/fhda/prompts.py
@@ -18,14 +18,14 @@
 """
 
 CAPSULE_SYSTEM_PROMPT_QUERY = """
-You are an expert data scientist.
-Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that thoroughly analyzes data to answer a user query
-The notebook should contain all necessary artifacts (plots, tables, print outputs, code commentary) to fully answer the query.
+You are an expert bioinformatician and seasoned biological data scientist.
+Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that analyzes data to answer a user query.
+The notebook should contain all necessary artifacts (plots, tables, print outputs) to fully answer these questions.
+Take your time to think through the question and the data before writing any code, explore the data rigorously and defend your conclusions rigorously.
 """
 
 # Guidelines for R code output optimization
-R_OUTPUT_RECOMMENDATION_PROMPT = """
-R-Specific Guidelines:
+R_SPECIFIC_GUIDELINES = """Guidelines for using the R programming language:
 1. Load packages using this format to minimize verbose output:
    ```r
    if (!requireNamespace("package_name", quietly = TRUE)) {{
@@ -33,15 +33,27 @@
    }}
    suppressPackageStartupMessages(library(package_name))
    ```
+2. You must use the tidyverse wherever possible: dplyr, tidyr, ggplot2, readr, stringr, forcats, purrr, tibble, and lubridate.
 
-2. For data operations, suppress messages about column name repairs:
-   ```r
-   variable_name <- read_excel("<fpath>.csv", col_names = FALSE, .name_repair = "minimal")
-   ```
+3. All plots must be made using ggplot2. Here is an example of how to make a plot:
+
+   # Create a density scatter plot of FSC-A vs SSC-A
+plot_data <- as.data.frame(dmso_data[, c("FSC-A", "SSC-A")])
+scatter_plot <- ggplot2::ggplot(plot_data, ggplot2::aes(x = `FSC-A`, y = `SSC-A`)) +
+  ggplot2::geom_hex(bins = 100) +
+  ggplot2::scale_fill_viridis_c(trans = "log10") +
+  ggplot2::labs(
+    title = "FSC-A vs SSC-A Density Plot (DMSO Control)",
+    x = "FSC-A",
+    y = "SSC-A"
+  ) +
+  ggplot2::theme_minimal()
+
+3. Use explicit namespace qualification for functions. For example, use dplyr::select() instead of select().
 
-3. When printing dataframes, always wrap them in print() statements:
+4. For data operations, suppress messages about column name repairs:
    ```r
-   print(head(dataframe))
+   variable_name <- read_excel("<fpath>.csv", col_names = FALSE, .name_repair = "minimal")
    ```
 """
 
@@ -54,13 +66,13 @@
 - Check dataframe shapes before printing. Use head() for large dataframes.
 - Ensure each cell executes successfully before moving to the next.
 - Assume you already have the packages you need installed and only install new ones if you receive errors.
-- If you need to install packages, use mamba or conda.
-IMPORTANT: R vs Python vs bash
-- You can use either Python, R or bash cells to complete the analysis.
-- All cells are by default Python cells. However, you can use both bash and R cells by adding %%bash or %%R to the first line of the cell.
-- The first cell has already been loaded with %load_ext rpy2.ipython so you can use %%R cells from the second cell onwards
+- If you need to install packages, use pip or mamba.
+- All cells are by default {language} cells. Use {language} or bash tools for all analysis.
+- You can use bash cells by adding %%bash to the first line of the cell or running a subprocess.
+- You can only create code cells, no markdown cells.
 """
 
+
 AVOID_IMAGES = """
 AVOID USING PLOTS/IMAGES. USE TABLES AND PRINT OUTPUTS INSTEAD AS MUCH AS POSSIBLE.
 """
@@ -101,19 +113,10 @@
 CHAIN_OF_THOUGHT_AGNOSTIC = """
 Follow these steps to create your notebook, using chain-of-thought reasoning at each stage:
 
-1. List Directory Contents:
-<analysis_planning>
-- Consider how to use the list_workdir tool to recursively list the directory contents.
-- Think about how to organize and present this information clearly in the notebook.
-- List potential challenges in interpreting the directory structure.
-- Consider how the directory structure might inform your approach to the analysis.
-</analysis_planning>
-Place the output of the list_workdir tool inside <directory_contents> tags.
-
-2. Load Data and Perform Descriptive Statistics:
+1. Load Data and Perform Descriptive Statistics:
 <analysis_planning>
-- Identify which data files are most relevant to resolving the task. List these files.
-- Plan how to load these files efficiently in R or Python.
+- Identify which data files are most relevant to resolving the task.
+- Plan how to load these files efficiently in {language}.
 - List the specific descriptive statistics you plan to use (e.g., summary(), str(), head()).
 - Consider potential issues like missing data or unexpected formats. How will you handle each?
 - Plan how to present this information clearly in the notebook.
@@ -122,7 +125,7 @@
 </analysis_planning>
 Execute your plan to load data and perform descriptive statistics.
 
-3. Develop Analysis Plan:
+2. Develop Analysis Plan:
 <analysis_planning>
 - Break down each task into testable components. List these components.
 - For each component, list appropriate statistical tests or visualizations.
@@ -135,9 +138,9 @@
 </analysis_planning>
 Write out your analysis plan as comments in the notebook.
 
-4. Execute Analysis Plan:
+3. Execute Analysis Plan:
 <analysis_planning>
-- For each step in your analysis plan, list the R, Python or bash functions and libraries you'll use.
+- For each step in your analysis plan, list the {language} or bash functions and libraries you'll use.
 - Think about how to structure your code for readability and efficiency.
 - Plan how to document your code with clear comments.
 - Consider how to present results clearly, using tables or visualizations where appropriate.
@@ -147,7 +150,7 @@
 </analysis_planning>
 Execute your analysis plan, creating new cells as needed.
 
-5. Conclude and Submit Answer:
+4. Conclude and Submit Answer:
 <thought_process>
 - Reflect on how your results relate to the original task.
 - Consider any limitations or uncertainties in your analysis.
@@ -163,6 +166,14 @@
 [Use the submit_answer tool to submit your final answer as a single string either "True" or "False"]
 Remember, the final notebook should contain all necessary artifacts (plots, tables, print outputs) to solve the task provided.
 """
+SUBMIT_ANSWER_SINGLE = """
+[Use the submit_answer tool to submit your final answer as a single string]
+Example output:
+```
+submit_answer("CD94") or submit_answer("-1.23")
+```
+Remember, the final notebook should contain all necessary artifacts (plots, tables, print outputs) to solve the task provided.
+"""
 SUBMIT_ANSWER_OPEN = """
 [Use the submit_answer tool to submit your final answer as a jsondictionary with keys as the question number and values as a short answer]
 Example output:
@@ -200,7 +211,7 @@
 {CHAIN_OF_THOUGHT_AGNOSTIC}
 {SUBMIT_ANSWER_HYPOTHESIS}
 {GENERAL_NOTEBOOK_GUIDELINES}
-{R_OUTPUT_RECOMMENDATION_PROMPT}
+{R_SPECIFIC_GUIDELINES}
 """
 # MCQ
 MCQ_PROMPT_TEMPLATE = f"""
@@ -212,7 +223,7 @@
 {CHAIN_OF_THOUGHT_AGNOSTIC}
 {SUBMIT_ANSWER_MCQ}
 {GENERAL_NOTEBOOK_GUIDELINES}
-{R_OUTPUT_RECOMMENDATION_PROMPT}
+{R_SPECIFIC_GUIDELINES}
 """
 # Open answer
 OPEN_PROMPT_TEMPLATE = f"""
@@ -225,5 +236,30 @@
 {CHAIN_OF_THOUGHT_AGNOSTIC}
 {SUBMIT_ANSWER_OPEN}
 {GENERAL_NOTEBOOK_GUIDELINES}
-{R_OUTPUT_RECOMMENDATION_PROMPT}
+{R_SPECIFIC_GUIDELINES}
+"""
+
+CONTINUATION_PROMPT_TEMPLATE = f"""
+{GENERAL_NOTEBOOK_GUIDELINES}
+
+You have been provided with a notebook previously generated by an agent based on a user's research question.
+
+This was the user's research question:
+<previous_research_question>
+{{previous_research_question}}
+</previous_research_question>
+
+This was the final answer generated by the previous agent:
+<previous_final_answer>
+{{previous_final_answer}}
+</previous_final_answer>
+
+The user has now tasked you with addressing a new query:
+<query>
+{{query}}
+</query>
+
+Please make any edits required to the notebook and the answer to address the new query. Be extremely diligent and ensure that the notebook is fully updated to address the new query.
+Note you may have to run all cells one by one again if the user query involved updating one of the intermediate cells and subsequent cells depend on it.
+Once you have updated the notebook, use the submit_answer tool to submit your final answer once the user's query is addressed.
 """
diff --git a/src/fhda/tortoise.py b/src/fhda/tortoise.py
new file mode 100644
index 0000000..e591507
--- /dev/null
+++ b/src/fhda/tortoise.py
@@ -0,0 +1,340 @@
+import os
+import uuid
+import asyncio
+import copy
+from typing import Any, Callable, Optional
+from os import PathLike
+import time
+import json
+from pydantic import BaseModel, Field
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_exponential,
+    retry_if_exception_type,
+)
+from . import prompts
+
+from futurehouse_client import FutureHouseClient
+from futurehouse_client.models import TaskRequest, RuntimeConfig
+from futurehouse_client.models.app import AuthType
+from futurehouse_client.clients.rest_client import TaskFetchError
+
+
+class StepConfig(BaseModel):
+    """Agent runtime configuration."""
+
+    language: str = Field(
+        default="PYTHON", description="Language for execution environment"
+    )
+    max_steps: int = Field(
+        default=30, description="Maximum number of steps for the agent"
+    )
+    timeout: int = Field(default=15 * 60, description="Timeout for the step in seconds")
+    eval: bool = Field(
+        default=True,
+        description="For Finch, this indicates whether this is an API call or UI call. Setting it to True removes the automatic CoT additions.",
+    )
+
+
+class Step(BaseModel):
+    """A step in the agent execution pipeline."""
+
+    name: str = Field(
+        description="Name of the job to run (e.g. 'job-futurehouse-data-analysis-crow-high')"
+    )
+    prompt_template: str = Field(description="Prompt template to use for the step")
+    cot_prompt: bool = Field(
+        default=False, description="Whether to augment the query with COT prompting"
+    )
+    prompt_args: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Keyword arguments to format the prompt template.",
+    )
+    input_files: dict[str, str] = Field(
+        default_factory=dict, description="Files to upload {'source_path': 'dest_name'}"
+    )
+    output_files: dict[str, str] = Field(
+        default_factory=dict,
+        description="Files to download {'source_name': 'dest_path'}",
+    )
+    step_id: str = Field(
+        default_factory=lambda: str(uuid.uuid4())[:8],
+        description="Small UID for the step",
+    )
+    upload_id: Optional[str] = Field(default=None, description="Upload ID for GCS")
+    parallel: int = Field(default=1, description="Number of parallel tasks to run")
+    config: StepConfig = Field(
+        default_factory=StepConfig, description="Configuration for the step"
+    )
+    post_process: Optional[Callable[[dict[str, Any], str], None]] = Field(
+        default=None, description="Function to run after step completion"
+    )
+    prompt_generator: Optional[Callable[[], list[tuple[str, dict[str, Any]]]]] = Field(
+        default=None,
+        description="Function to generate prompts and args for parallel tasks based on previous results",
+    )
+
+    def cot_prompting(self, query: str, language: str) -> str:
+        """Apply chain-of-thought prompting to the query."""
+        guidelines = prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=language)
+        if language == "R":
+            guidelines = prompts.R_SPECIFIC_GUIDELINES.format(language=language)
+        return (
+            f"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=language)}\n"
+            f"{guidelines}"
+            f"Here is the research question to address:\n"
+            f"<query>\n"
+            f"{query}\n"
+            f"</query>\n"
+        )
+
+    def format_prompt(self) -> str:
+        """Format the prompt template with the provided arguments."""
+        final_prompt = self.prompt_template.format(**self.prompt_args)
+        if self.cot_prompt:
+            final_prompt = self.cot_prompting(final_prompt, self.config.language)
+        return final_prompt
+
+
+class Tortoise:
+    """Runner for multi-step agent pipelines."""
+
+    def __init__(self, api_key: str):
+        """Initialize the tortoise framework with FutureHouse API key."""
+        self.client = FutureHouseClient(
+            auth_type=AuthType.API_KEY, api_key=api_key, verbose_logging=True
+        )
+        self.steps: list[Step] = []
+        self.results: dict[str, Any] = {}
+
+    def add_step(self, step: Step) -> None:
+        """Add a step to the pipeline."""
+        self.steps.append(step)
+
+    def save_results(self, output_dir: str | PathLike = "output") -> None:
+        """Save the results to a JSON file."""
+        results_path = f"{output_dir}/results_{time.strftime('%Y%m%d_%H%M%S')}.json"
+        print(f"Saving all results to {results_path}")
+        try:
+            os.makedirs(output_dir, exist_ok=True)
+            serializable_results = {}
+            for step_id, step_result in self.results.items():
+                serializable_results[step_id] = dict(step_result)
+
+            with open(results_path, "w") as f:
+                json.dump(serializable_results, f, indent=2)
+            print(f"Results successfully saved to {results_path}")
+        except Exception as e:
+            print(f"Error saving results to {results_path}: {e}")
+
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=2, max=10),
+        retry=retry_if_exception_type(Exception),
+    )
+    def _upload_file_with_retry(
+        self, job_name: str, file_path: str, upload_id: str
+    ) -> None:
+        """Upload a file with retry logic."""
+        self.client.upload_file(job_name, file_path=file_path, upload_id=upload_id)
+
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=2, max=10),
+        retry=retry_if_exception_type(Exception),
+    )
+    def _download_file_with_retry(
+        self, job_name: str, trajectory_id: str, file_path: str, destination_path: str
+    ) -> None:
+        """Download a file with retry logic."""
+        self.client.download_file(
+            job_name,
+            trajectory_id=trajectory_id,
+            file_path=file_path,
+            destination_path=destination_path,
+        )
+
+    def _create_task_requests(
+        self, step: Step, runtime_config: RuntimeConfig
+    ) -> list[TaskRequest]:
+        """Create task requests with either identical or dynamic prompts.
+
+        Args:
+            step: The step configuration
+            runtime_config: The runtime configuration for the task
+
+        Returns:
+            List of task requests to be executed
+        """
+        task_requests = []
+        task_count = max(step.parallel, 1)
+
+        if step.prompt_generator and task_count > 1:
+            # Generate dynamic prompts based on previous results
+            prompt_pairs = step.prompt_generator()
+            # Create a task request for each generated prompt
+            for prompt_text, prompt_args in prompt_pairs[
+                :task_count
+            ]:  # Limit to requested parallel count
+                step_copy = copy.deepcopy(step)
+                step_copy.prompt_template = prompt_text
+                step_copy.prompt_args = prompt_args
+                query = step_copy.format_prompt()
+                task_requests.append(
+                    TaskRequest(
+                        name=step.name,
+                        query=query,
+                        runtime_config=runtime_config,
+                    )
+                )
+        else:
+            # Default behavior: use the same prompt for all tasks
+            query = step.format_prompt()
+            task_requests = [
+                TaskRequest(
+                    name=step.name,
+                    query=query,
+                    runtime_config=runtime_config,
+                )
+            ] * task_count
+
+        return task_requests
+
+    @retry(
+        stop=stop_after_attempt(5),
+        wait=wait_exponential(multiplier=1, min=2, max=30),
+        retry=retry_if_exception_type((Exception, TaskFetchError)),
+    )
+    async def _run_tasks_with_retry(
+        self, task_requests, progress_bar, verbose, timeout
+    ):
+        """Run tasks with retry logic."""
+        return await self.client.arun_tasks_until_done(
+            task_requests,
+            progress_bar=progress_bar,
+            verbose=verbose,
+            timeout=timeout,
+            concurrency=1,  # Reduce concurrency to avoid overwhelming the server
+        )
+
+    async def run_pipeline(
+        self, output_dir: str | PathLike = "output"
+    ) -> dict[str, Any]:
+        """Run the entire pipeline of steps."""
+        os.makedirs(output_dir, exist_ok=True)
+
+        for i, step in enumerate(self.steps):
+            print(f"Running step {i + 1}/{len(self.steps)}: {step.name}")
+            if not step.upload_id:
+                step.upload_id = f"{step.name}_{step.step_id}"
+
+            for source_path, dest_name in step.input_files.items():
+                print(f"Uploading file {source_path} as {dest_name}")
+                try:
+                    self._upload_file_with_retry(
+                        step.name, file_path=source_path, upload_id=step.upload_id
+                    )
+                except Exception as e:
+                    print(
+                        f"Failed to upload file {source_path} after multiple retries: {e}"
+                    )
+                    raise
+
+            if step.config:
+                runtime_config = RuntimeConfig(
+                    max_steps=step.config.max_steps,
+                    upload_id=step.upload_id,
+                    environment_config={
+                        "eval": step.config.eval,
+                        "language": step.config.language,
+                    },
+                )
+            else:
+                runtime_config = None
+
+            task_requests = self._create_task_requests(step, runtime_config)
+
+            print(
+                f"Running {len(task_requests)} task{'s' if len(task_requests) > 1 else ''}"
+            )
+            try:
+                task_responses = await self._run_tasks_with_retry(
+                    task_requests,
+                    progress_bar=True,
+                    verbose=False,
+                    timeout=step.config.timeout,
+                )
+            except Exception as e:
+                print(
+                    f"Failed to run tasks for step {step.step_id} after multiple retries: {e}"
+                )
+                # Create an error result entry and continue to the next step
+                self.results[step.step_id] = {
+                    "task_ids": [],
+                    "task_responses": [],
+                    "success_rate": 0,
+                    "error": str(e),
+                }
+                continue
+
+            task_ids = [str(task.task_id) for task in task_responses]
+            success_rate = sum(
+                [task.status == "success" for task in task_responses]
+            ) / len(task_responses)
+            print(f"Task success rate: {success_rate * 100}%")
+
+            self.results[step.step_id] = {
+                "task_ids": task_ids,
+                "task_responses": task_responses,
+                "success_rate": success_rate,
+            }
+
+            os.makedirs(f"{output_dir}/{step.step_id}", exist_ok=True)
+
+            for idx, task_id in enumerate(task_ids):
+                for source_name, dest_path in step.output_files.items():
+                    try:
+                        # Add index suffix only when there are multiple tasks
+                        path_suffix = f"_{idx}" if len(task_ids) > 1 else ""
+                        if "." in dest_path:
+                            base, ext = os.path.splitext(dest_path)
+                            dest_path_with_idx = f"{base}{path_suffix}{ext}"
+                        else:
+                            dest_path_with_idx = f"{dest_path}{path_suffix}"
+
+                        path = f"{output_dir}/{step.step_id}/{dest_path_with_idx}"
+                        os.makedirs(
+                            os.path.dirname(os.path.abspath(path)), exist_ok=True
+                        )
+                        print(f"Downloading file {source_name} to {path}")
+                        try:
+                            self._download_file_with_retry(
+                                step.name,
+                                trajectory_id=task_id,
+                                file_path=source_name,
+                                destination_path=path,
+                            )
+                        except Exception as e:
+                            print(
+                                f"Failed to download {source_name} from task {task_id} after multiple retries: {e}"
+                            )
+                    except Exception as e:
+                        print(
+                            f"Error downloading {source_name} from task {task_id}: {e}"
+                        )
+
+            if step.post_process:
+                print(f"Running post-processing for step {step.step_id}")
+                step.post_process(
+                    self.results[step.step_id], f"{output_dir}/{step.step_id}"
+                )
+
+            print(f"Completed step {i + 1}/{len(self.steps)}")
+
+        self.save_results(output_dir)
+        return self.results
+
+    def run(self, output_dir: str | PathLike = "output") -> dict[str, Any]:
+        """Synchronous version of run_pipeline."""
+        return asyncio.run(self.run_pipeline(output_dir))
diff --git a/src/scripts/deploy.py b/src/scripts/deploy.py
deleted file mode 100644
index cf8c3cd..0000000
--- a/src/scripts/deploy.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import os
-from pathlib import Path
-
-from crow_client import CrowClient
-from crow_client.models import (
-    CrowDeploymentConfig,
-    DockerContainerConfiguration,
-    Stage,
-    FramePath,
-    AuthType,
-)
-
-ENV_VARS = {
-    "OPENAI_API_KEY": os.environ["OPENAI_API_KEY"],
-    "ANTHROPIC_API_KEY": os.environ["ANTHROPIC_API_KEY"],
-    "USE_R": "false",
-    "USE_DOCKER": "false",
-    "STAGE": "DEV",
-}
-
-CONTAINER_CONFIG = DockerContainerConfiguration(cpu="2", memory="4Gi")
-
-frame_paths = [
-    FramePath(path="state.answer", type="text"),
-    FramePath(path="state.nb_state_html", type="notebook"),
-]
-
-CROWS_TO_DEPLOY = [
-    CrowDeploymentConfig(
-        requirements_path=Path("pyproject.toml"),
-        path=Path("src"),
-        name="data-analysis-crow",
-        environment="src.fhda.data_analysis_env.DataAnalysisEnv",
-        environment_variables=ENV_VARS,
-        agent="ldp.agent.ReActAgent",
-        container_config=CONTAINER_CONFIG,
-        force=True,
-        frame_paths=frame_paths,
-    ),
-]
-
-if __name__ == "__main__":
-    client = CrowClient(
-        stage=Stage.from_string(os.environ.get("CROW_ENV", "DEV")),
-        organization="FutureHouse",
-        auth_type=AuthType.API_KEY,
-        api_key=os.environ["CROW_API_KEY"],
-    )
-    for crow in CROWS_TO_DEPLOY:
-        try:
-            client.create_crow(crow)
-            print(f"Deploying {crow.name}: {client.get_build_status()}")
-        except Exception as e:
-            print(f"Error deploying {crow.name}: {e}")
diff --git a/tutorial/consensus.ipynb b/tutorial/consensus.ipynb
new file mode 100644
index 0000000..cea2619
--- /dev/null
+++ b/tutorial/consensus.ipynb
@@ -0,0 +1,351 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Multi-agent consensus tutorial\n",
+    "\n",
+    "In this tutorial, we will be using two different agents, Finch and Crow to do differential expression analysis on some RNASeq data from [here](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE52778). Additionally, we do consensus sampling with Finch to improve reliability of the results.\n",
+    "\n",
+    "The process follows four steps:\n",
+    "1. Differential expression analysis: run 10 DEAs in parallel with Finch\n",
+    "2. Consensus sampling: Aggregate the results of the DEAs with Finch\n",
+    "3. Literature search: Use Crow to search the literature for the top differentially expressed genes\n",
+    "4. Visualization: Use Finch to create a final interactive volcano plot containing all differentially expressed genes, their evidence and the evidence score.\n",
+    "\n",
+    "Let's get started!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import pandas as pd\n",
+    "import uuid\n",
+    "\n",
+    "from futurehouse_client import FutureHouseClient, JobNames\n",
+    "from futurehouse_client.models import TaskRequest, RuntimeConfig\n",
+    "from futurehouse_client.models.app import AuthType\n",
+    "import fhda.prompts as prompts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Here are the prompts we'll be using\n",
+    "TREATMENT = \"dexamethasone\"\n",
+    "MECHANISM = \"airway smooth muscle cells\"\n",
+    "CONTEXT = \"asthma\"\n",
+    "N_TOP_GENES = 10\n",
+    "DEA_PROMPT = \"\"\"\n",
+    "Determine the effect of {treatment} on {mechanism} in {context}. \n",
+    "\n",
+    "Perform differential expression analysis and pathway analysis on relevant comparison groups. Map all gene IDs to gene symbols using annotation package such as ‘org.Hs.eg.db’.\n",
+    "\n",
+    "Generate volcano plots and heatmap of differentially expressed genes, and dot plots for enriched pathways, use gene symbols for labels where relevant.\n",
+    "\n",
+    "Output a single csv file named \"dea_results.csv\"  with the results for all tested genes of the most relevant contrast, report both gene ID and gene symbol.\n",
+    "\n",
+    "If there is an error, keep trying, do not give up until you reach the end of the analysis. When mapping gene ID to gene symbol, consider all possible forms of gene IDs, keep trying until the gene symbols are obtained.\n",
+    "\"\"\"\n",
+    "\n",
+    "CONSENSUS_PROMPT = f\"\"\"\n",
+    "Combine these differential expression analysis results by calculating the mode of log2FC and adjusted p values. Output the results in a file named ‘consensus_results.csv’, include the columns gene_symbol, log2FC and adjusted P values. In a separate file named ‘top_genes.csv’, output the top {N_TOP_GENES} gene symbols of the consensus most significant genes with the column name “gene_symbol”. \n",
+    "\n",
+    "Create a stacked bar plot showing gene regulation consistency across all analyses. Plot regulation direction (up vs down) on x-axis and percentage of genes in each category on y-axis. Color-code by significance category: all analyses, >50% of analyses and  <50% of analyses. Include percentages within each segment and a clear legend. Exclude genes that are non-significant across all analyses.\n",
+    "\"\"\"\n",
+    "\n",
+    "PQA_PROMPT = \"\"\"\n",
+    "    What are the possible mechanisms for {gene} in the effect of {treatment} on {mechanism} in {context}?\n",
+    "    From 1 to 5, with 1 being no evidence of association at all and 5 being strong association with supporting evidence, how strong is the evidence supporting this mechanism?\n",
+    "    Give a concise summary for the evidence in up to 10 words, and a short summary of mechanisms in up to 20 words. Do not include references or links.\n",
+    "    Please share this information in json format in the form of: `\"gene_symbol\": <gene_symbol>, \"association_evidence_score\":[1...5], \"evidence_summary\": <evidence_summary>, \"mechanism_summary\": <mechanism_summary>`.\n",
+    "    Share nothing else but the JSON output.\n",
+    "    \"\"\"\n",
+    "\n",
+    "VOLCANO_PROMPT = \"\"\"\n",
+    "Make an interactive volcano plot. Colour-code by significance categories: top up-regulated genes, up-regulated genes, top down-regulated genes, down-regulated genes, and non-significant genes. Genes considered as top differentially expressed genes have extra annotation available in 'pqa_results.csv’.\n",
+    "\n",
+    "Include hover information according to the categories, for the top genes, on hover, show gene symbol, log2FC, adjusted p value, mechanism, evidence and evidence score. For up and down regulated genes that are not in top differentially expressed genes, show gene symbol, log2FC and adjusted p value. For non-significant genes, do not include hover information.\n",
+    "\n",
+    "For the annotations, remove all text in the brackets in the summary columns, and remove the fullstop at the end. For annotations with 6 words or more in a line, use text-wrap. Don’t include text on the plot itself. Include a legend explaining the color-codes.\n",
+    "\n",
+    "PLEASE USE TEXT WRAP FOR THE HOVER INFORMATION!\n",
+    "\"\"\"\n",
+    "\n",
+    "\n",
+    "def augment_query(query, language):\n",
+    "    guidelines = prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=language)\n",
+    "    if language == \"R\":\n",
+    "        guidelines = prompts.R_SPECIFIC_GUIDELINES.format(language=language)\n",
+    "    return (\n",
+    "        f\"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=language)}\\n\"\n",
+    "        f\"{guidelines}\"\n",
+    "        f\"Here is the research question to address:\\n\"\n",
+    "        f\"<query>\\n\"\n",
+    "        f\"{query}\\n\"\n",
+    "        f\"</query>\\n\"\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Here we instantiate the FutureHouse client and define the job names\n",
+    "FH_API_KEY = \"\"  # Add your API key here\n",
+    "# We will be creating three folders in GCS to store the results of the three steps\n",
+    "DEA_UPLOAD_ID = f\"consensus_tutorial_dea_{str(uuid.uuid4())[:8]}\"\n",
+    "CONSENSUS_UPLOAD_ID = f\"consensus_tutorial_consensus_{str(uuid.uuid4())[:8]}\"\n",
+    "PQA_UPLOAD_ID = f\"consensus_tutorial_pqa_{str(uuid.uuid4())[:8]}\"\n",
+    "INITIAL_RNASEQ_FILE = \"datasets/GSE52778_All_Sample_FPKM_Matrix.txt.gz\"\n",
+    "client = FutureHouseClient(\n",
+    "    auth_type=AuthType.API_KEY,\n",
+    "    api_key=FH_API_KEY,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# First let's upload the dataset to GCS and check the files were uploaded correctly\n",
+    "client.upload_file(\n",
+    "    JobNames.FINCH, file_path=INITIAL_RNASEQ_FILE, upload_id=DEA_UPLOAD_ID\n",
+    ")\n",
+    "# Check what files were uploaded to your gcs folder\n",
+    "client.list_files(JobNames.FINCH, upload_id=DEA_UPLOAD_ID)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now let's run 5 Finch DEA tasks in parallel\n",
+    "NUM_DEA_TASKS = 5\n",
+    "TIMEOUT = 15 * 60\n",
+    "runtime_config = RuntimeConfig(\n",
+    "    max_steps=30,\n",
+    "    upload_id=DEA_UPLOAD_ID,\n",
+    "    environment_config={\n",
+    "        \"eval\": True,  # DO NOT CHANGE THIS\n",
+    "        \"language\": \"R\",\n",
+    "    },\n",
+    ")\n",
+    "task_request = TaskRequest(\n",
+    "    name=JobNames.FINCH,\n",
+    "    query=augment_query(\n",
+    "        DEA_PROMPT.format(treatment=TREATMENT, mechanism=MECHANISM, context=CONTEXT),\n",
+    "        \"R\",\n",
+    "    ),\n",
+    "    runtime_config=runtime_config,\n",
+    ")\n",
+    "dea_completed_tasks = await client.arun_tasks_until_done(\n",
+    "    [task_request for i in range(NUM_DEA_TASKS)], progress_bar=True, timeout=TIMEOUT\n",
+    ")\n",
+    "dea_task_ids = [str(task.task_id) for task in dea_completed_tasks]\n",
+    "success = sum([task.status == \"success\" for task in dea_completed_tasks])\n",
+    "print(f\"Task success rate: {success / NUM_DEA_TASKS * 100}%\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The Finch runs should take anywhere between 3-10 minutes to complete.\n",
+    "# Once the runs have completed, lets's download the results upload them to a new folder in GCS and run a consensus step\n",
+    "for c, task_id in enumerate(dea_task_ids):\n",
+    "    try:\n",
+    "        client.download_file(\n",
+    "            JobNames.FINCH,\n",
+    "            trajectory_id=task_id,\n",
+    "            file_path=\"dea_results.csv\",\n",
+    "            destination_path=f\"output/dea_results/dea_results_{c}.csv\",\n",
+    "        )\n",
+    "    except Exception as e:\n",
+    "        print(f\"Error downloading task results for task {task_id}: {e}\")\n",
+    "\n",
+    "# Now let's upload the whole directory of consensus results to GCS\n",
+    "client.upload_file(\n",
+    "    JobNames.FINCH, file_path=\"output/dea_results\", upload_id=CONSENSUS_UPLOAD_ID\n",
+    ")\n",
+    "\n",
+    "print(\"These files have been uploaded to GCS:\")\n",
+    "print(client.list_files(JobNames.FINCH, upload_id=CONSENSUS_UPLOAD_ID))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now lets's run a single consensus step\n",
+    "runtime_config = RuntimeConfig(\n",
+    "    max_steps=30,\n",
+    "    upload_id=CONSENSUS_UPLOAD_ID,\n",
+    "    environment_config={\n",
+    "        \"eval\": True,  # DO NOT CHANGE THIS\n",
+    "        \"language\": \"R\",\n",
+    "    },\n",
+    ")\n",
+    "consensus_task_request = TaskRequest(\n",
+    "    name=JobNames.FINCH,\n",
+    "    query=augment_query(CONSENSUS_PROMPT, \"R\"),\n",
+    "    runtime_config=runtime_config,\n",
+    ")\n",
+    "consensus_task_response = client.run_tasks_until_done(\n",
+    "    [consensus_task_request], progress_bar=True, timeout=TIMEOUT\n",
+    ")\n",
+    "consensus_task_id = consensus_task_response[0].task_id"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Once the consensus step is done, lets's download the results\n",
+    "client.download_file(\n",
+    "    JobNames.FINCH,\n",
+    "    trajectory_id=consensus_task_id,\n",
+    "    file_path=\"consensus_results.csv\",\n",
+    "    destination_path=\"output/consensus_results.csv\",\n",
+    ")\n",
+    "client.download_file(\n",
+    "    JobNames.FINCH,\n",
+    "    trajectory_id=consensus_task_id,\n",
+    "    file_path=\"top_genes.csv\",\n",
+    "    destination_path=\"output/top_genes.csv\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's use PaperQA to give us a summary of each gene\n",
+    "top_genes_df = pd.read_csv(\"output/top_genes.csv\")\n",
+    "display(top_genes_df.head())\n",
+    "gene_symbols = top_genes_df[\"gene_symbol\"].tolist()\n",
+    "pqa_tasks = [\n",
+    "    {\n",
+    "        \"name\": JobNames.CROW,\n",
+    "        \"query\": PQA_PROMPT.format(\n",
+    "            gene=gene, treatment=TREATMENT, mechanism=MECHANISM, context=CONTEXT\n",
+    "        ),\n",
+    "    }\n",
+    "    for gene in gene_symbols\n",
+    "]\n",
+    "pqa_task_list = await client.arun_tasks_until_done(\n",
+    "    pqa_tasks, progress_bar=True, timeout=TIMEOUT, verbose=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# when PQAs are done, parse answers to csv\n",
+    "\n",
+    "answer_list = []\n",
+    "for task_response in pqa_task_list:\n",
+    "    try:\n",
+    "        answer = json.loads(\n",
+    "            task_response.environment_frame[\"state\"][\"state\"][\"response\"][\"answer\"][\n",
+    "                \"answer\"\n",
+    "            ]\n",
+    "        )\n",
+    "        if isinstance(answer, list):\n",
+    "            answer = answer[0]\n",
+    "        answer_list.append(answer)\n",
+    "    except Exception as e:\n",
+    "        print(f\"Error parsing answer for task {task_response.task_id}: {e}\")\n",
+    "\n",
+    "pqa_df = pd.DataFrame(answer_list)\n",
+    "pqa_df.to_csv(\"output/pqa_results.csv\", index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Finally let's create a beutiful interactive plotly plot that brings all the results together\n",
+    "# Now lets's run a single consensus step\n",
+    "client.upload_file(\n",
+    "    JobNames.FINCH, file_path=\"output/pqa_results.csv\", upload_id=PQA_UPLOAD_ID\n",
+    ")\n",
+    "client.upload_file(\n",
+    "    JobNames.FINCH, file_path=\"output/consensus_results.csv\", upload_id=PQA_UPLOAD_ID\n",
+    ")\n",
+    "runtime_config = RuntimeConfig(\n",
+    "    max_steps=30,\n",
+    "    upload_id=PQA_UPLOAD_ID,\n",
+    "    environment_config={\n",
+    "        \"eval\": True,  # DO NOT CHANGE THIS\n",
+    "        \"language\": \"PYTHON\",\n",
+    "    },\n",
+    ")\n",
+    "volcano_task_request = TaskRequest(\n",
+    "    name=JobNames.FINCH,\n",
+    "    query=augment_query(VOLCANO_PROMPT, \"PYTHON\"),\n",
+    "    runtime_config=runtime_config,\n",
+    ")\n",
+    "volcano_task_id = client.create_task(volcano_task_request)\n",
+    "\n",
+    "print(\n",
+    "    f\"Task running on platform, you can view progress live for our final results at:https://platform.futurehouse.org/trajectories/{volcano_task_id}\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The final trajectory will have the reliable results of our DEA analysis in an interactive volcano plotly plot containing the top differentially expressed genes, their evidence and the evidence score! All in about 20 minutes!"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tutorial/datasets/GSE52778_All_Sample_FPKM_Matrix.txt.gz b/tutorial/datasets/GSE52778_All_Sample_FPKM_Matrix.txt.gz
new file mode 100644
index 0000000..98db61d
Binary files /dev/null and b/tutorial/datasets/GSE52778_All_Sample_FPKM_Matrix.txt.gz differ
diff --git a/tutorial/dataset/brain_size_data.csv b/tutorial/datasets/brain_size_data.csv
similarity index 100%
rename from tutorial/dataset/brain_size_data.csv
rename to tutorial/datasets/brain_size_data.csv
diff --git a/tutorial/example.ipynb b/tutorial/example.ipynb
index 89f9692..b3d0bc3 100644
--- a/tutorial/example.ipynb
+++ b/tutorial/example.ipynb
@@ -2,15 +2,18 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "import os\n",
+    "\n",
     "import hashlib\n",
     "import shutil\n",
     "from pathlib import Path\n",
     "import time\n",
+    "import logging\n",
+    "\n",
     "\n",
     "from ldp.agent import AgentConfig\n",
     "from ldp.alg.rollout import RolloutManager\n",
@@ -19,54 +22,49 @@
     "from fhda.data_analysis_env import DataAnalysisEnv\n",
     "from fhda.notebook_env import NBEnvironment\n",
     "from fhda.utils import NBLanguage\n",
-    "from fhda import prompts"
+    "from fhda import prompts\n",
+    "import fhda.config as cfg"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# ENVIRONMENT CONFIGURATION\n",
-    "\n",
-    "# Set your API keys\n",
-    "os.environ[\"ANTHROPIC_API_KEY\"] = \"YOUR_ANTHROPIC_API_KEY\"\n",
-    "os.environ[\"OPENAI_API_KEY\"] = \"YOUR_OPENAI_API_KEY\"\n",
-    "# If using docker, be sure to pull the image from docker hub first\n",
-    "# docker pull futurehouse/bixbench:aviary-notebook-env\n",
-    "# This image includes many bioinformatics and data science packages\n",
-    "os.environ[\"use_docker\"] = \"False\"\n",
-    "\n",
-    "\n",
-    "def setup_data_analysis_env(query: str, dataset_folder: Path):\n",
+    "def setup_data_analysis_env(\n",
+    "    query: str, dataset: Path, language: NBLanguage = NBLanguage.PYTHON\n",
+    "):\n",
     "    # Hash the task to get a unique identifier\n",
     "    task_hash = hashlib.sha256(query.encode()).hexdigest()\n",
-    "    trajectory_path = Path(\"tmp_results_dir\") / f\"{task_hash}-{time.time()}\"\n",
+    "    trajectory_path = (\n",
+    "        Path(os.path.abspath(\"tmp_results_dir\")) / f\"{task_hash}-{time.time()}\"\n",
+    "    )\n",
     "    trajectory_path.mkdir(parents=True, exist_ok=True)\n",
     "    nb_path = trajectory_path / NBEnvironment.NOTEBOOK_NAME\n",
     "    # Copy task data to trajectory path\n",
-    "    for item in dataset_folder.iterdir():\n",
-    "        if item.is_file():\n",
-    "            shutil.copy2(item, trajectory_path)\n",
-    "        elif item.is_dir():\n",
-    "            shutil.copytree(item, trajectory_path / item.name, dirs_exist_ok=True)\n",
-    "\n",
+    "    if dataset.is_dir():\n",
+    "        for item in dataset.iterdir():\n",
+    "            if item.is_file():\n",
+    "                shutil.copy2(item, trajectory_path)\n",
+    "            elif item.is_dir():\n",
+    "                shutil.copytree(item, trajectory_path / item.name, dirs_exist_ok=True)\n",
+    "    else:\n",
+    "        shutil.copy2(dataset, trajectory_path)\n",
     "    # Augment incoming task with CoT instructions\n",
     "    augmented_task = f\"\"\"\\\n",
     "    Here is the user query to address:\n",
     "\n",
+    "\n",
     "    <query>\n",
     "    {query}\n",
     "    </query>\n",
     "\n",
-    "    {prompts.CHAIN_OF_THOUGHT_AGNOSTIC}\n",
-    "    {prompts.GENERAL_NOTEBOOK_GUIDELINES}\"\"\"\n",
+    "    {prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=language.name)}\n",
+    "    {prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=language.name)}\"\"\"\n",
     "\n",
-    "    # This can be R or PYTHON in Docker or with a local kernel if you have R installed\n",
-    "    language = NBLanguage.PYTHON\n",
     "    if language == NBLanguage.R:\n",
-    "        augmented_task += f\"\\n{prompts.R_OUTPUT_RECOMMENDATION_PROMPT}\"\n",
+    "        augmented_task += f\"\\n{prompts.R_SPECIFIC_GUIDELINES}\"\n",
     "\n",
     "    dae = DataAnalysisEnv(\n",
     "        problem_id=f\"data-analysis-task-{task_hash}\",\n",
@@ -83,69 +81,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Encountered exception during tool call for tool edit_cell: ValueError(\"Error executing cell 0:\\nName: ModuleNotFoundError\\nValue: No module named 'rpy2'\\n\")\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Trajectory completed! Final notebook available at: \n",
-      " tmp_results_dir/bf222a115d3970be6e12430b1cd57eb67d2f36b1950ed5765a247a1f071e7569-1741036231.876748/notebook.ipynb\n",
-      "Final agent answer:\n",
-      "Based on our comprehensive analysis of the brain_size_data.csv dataset with a focus on crows (Corvidae family), we can make the following key observations:\n",
-      "\n",
-      "1. DATA REPRESENTATION:\n",
-      "   - Corvid species are well-represented in the dataset with 73 entries (7.6% of all bird entries)\n",
-      "   - The dataset contains 68 unique corvid species including various crows, jays, magpies, and ravens\n",
-      "   - Data completeness for corvids is excellent for brain size (97.3%) and good for body mass (69.9%), but poor for metabolic rate (only 2.7%)\n",
-      "\n",
-      "2. BRAIN SIZE CHARACTERISTICS:\n",
-      "   - Corvids have significantly larger brains than other birds, with an average brain size of 5.68 grams compared to 3.76 grams for other birds\n",
-      "   - Corvids have approximately 1.51 times larger brains than the average bird\n",
-      "   - The largest-brained corvid species in the dataset is Corvus tristis at 10.3 grams\n",
-      "\n",
-      "3. BRAIN-TO-BODY MASS RATIO:\n",
-      "   - The average brain-to-body ratio for corvids (0.022826) is slightly lower than other birds (0.023965)\n",
-      "   - Corvids rank 21st out of 49 bird families in brain-to-body ratio (minimum 3 samples)\n",
-      "   - Within Corvidae, there is substantial variation in brain-to-body ratios, with the highest being Cyanopica cyanus (0.038571)\n",
-      "   - The top 5 corvid species by brain-to-body ratio are: Cyanopica cyanus, Aphelocoma coerulescens, Nucifraga caryocatactes, Cyanolyca viridicyanea, and Podoces hendersoni\n",
-      "\n",
-      "4. OVERALL FINDINGS:\n",
-      "   - While corvids don't have the highest brain-to-body ratio among birds, they do have larger absolute brain sizes\n",
-      "   - The log-log relationship between brain and body size shows corvids generally following the allometric scaling pattern seen in other birds\n",
-      "   - There is considerable variation among corvid species in both absolute brain size and relative brain size (brain-to-body ratio)\n",
-      "   - These findings align with scientific observations of complex behaviors, problem-solving abilities, and tool use in corvids\n",
-      "   - The data supports corvids' reputation for intelligence among birds, particularly in species like the New Caledonian crow (Corvus moneduloides)\n",
-      "\n",
-      "The analysis demonstrates that corvids possess notable neuroanatomical characteristics that likely support their documented cognitive abilities, though brain size alone is not a perfect predictor of intelligence.\n"
-     ]
-    }
-   ],
+   "outputs": [],
+   "source": [
+    "# ENVIRONMENT CONFIGURATION\n",
+    "\n",
+    "# Set your API keys\n",
+    "os.environ[\"ANTHROPIC_API_KEY\"] = \"\"\n",
+    "# os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
+    "# If using docker, be sure to pull the image from docker hub first\n",
+    "# docker pull futurehouse/bixbench:aviary-notebook-env\n",
+    "# This image includes many bioinformatics and data science packages\n",
+    "cfg.USE_DOCKER = False\n",
+    "# This can be R or PYTHON in Docker or with a local kernel if you have R installed\n",
+    "LANGUAGE = NBLanguage.PYTHON\n",
+    "MAX_STEPS = 3\n",
+    "MODEL_NAME = \"claude-3-7-sonnet-latest\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# AVIARY ROLLOUT\n",
     "# This folder only contains a single csv file on animal brain size and body mass from here:\n",
     "# https://animaltraits.org/\n",
     "# However, it could contain many files including nested folders\n",
-    "dataset_folder = Path(\"dataset\")\n",
+    "\n",
+    "logger = logging.getLogger(__name__)\n",
+    "logger.info(\"Setting up data analysis environment\")\n",
+    "\n",
+    "dataset = Path(\"datasets/brain_size_data.csv\")\n",
     "query = \"Analyze the dataset and give me an in depth analysis using pretty plots. I am particularly interested in crows.\"\n",
-    "environment = setup_data_analysis_env(query, dataset_folder)\n",
+    "environment = setup_data_analysis_env(query, dataset, LANGUAGE)\n",
     "\n",
     "agent = AgentConfig(\n",
     "    agent_type=\"ReActAgent\",\n",
     "    agent_kwargs={\n",
     "        \"llm_model\": {\n",
-    "            \"model\": \"claude-3-7-sonnet-20250219\",  # any litellm supported model will work\n",
     "            \"parallel_tool_calls\": False,\n",
-    "            \"num_retries\": 5,\n",
+    "            \"num_retries\": 3,\n",
     "            \"temperature\": 1.0,\n",
+    "            \"name\": MODEL_NAME,\n",
     "        },\n",
     "        \"hide_old_env_states\": True,\n",
     "    },\n",
@@ -155,826 +135,19 @@
     "rollout = RolloutManager(agent=agent)\n",
     "\n",
     "# You can see the notebook updating live in the tmp_results_dir folder\n",
-    "result = await rollout.sample_trajectories(environments=[environment], max_steps=10)\n",
+    "result = await rollout.sample_trajectories(\n",
+    "    environments=[environment], max_steps=MAX_STEPS\n",
+    ")\n",
     "\n",
-    "print(f\"Trajectory completed! Final notebook available at: \\n\", environment.nb_path)\n",
+    "print(\"Trajectory completed! Final notebook available at: \\n\", environment.nb_path)\n",
     "print(f\"Final agent answer:\\n{environment.state.answer}\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Timestep 0\n",
-      "Done: False\n",
-      "Agent Reasoning:\n",
-      "Action: Tool request message '' for tool calls: list_workdir() [id=toolu_01Jp9WASaSKkeFaXzk74on7V]\n",
-      "---\n",
-      "Timestep 1\n",
-      "Done: False\n",
-      "Agent Reasoning:\n",
-      "Message:     Here is the user query to address:\n",
-      "\n",
-      "    <query>\n",
-      "    Analyze the dataset and give me an in depth analysis using pretty plots. I am particularly interested in crows.\n",
-      "    </query>\n",
-      "\n",
-      "    \n",
-      "Follow these  [Truncated]\n",
-      "Message: [{\"type\": \"text\", \"text\": \"Markdown representation of notebook contents (/workspace/notebook.ipynb):\\n\\n\"}] [Truncated]\n",
-      "Message: \n",
-      "You are an expert data scientist.\n",
-      "Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that thoroughly analyzes data to answer a user query\n",
-      "The notebook should contain all n [Truncated]\n",
-      "Message: Thought: I'll analyze the dataset with a focus on crows as requested. Let's start by exploring the directory to understand what data we have available.\n",
-      "\n",
-      "Thought: First, I need to list the working dire [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Action: Tool request message '' for tool calls: edit_cell(contents='%load_ext rpy2.ipython\n",
-      "\n",
-      "# First let's load our libraries\n",
-      "import pandas as pd\n",
-      "import numpy as np\n",
-      "import matplotlib.pyplot as plt\n",
-      "import seaborn as sns\n",
-      "\n",
-      "# Set a nice visualization style\n",
-      "plt.style.use('ggplot')\n",
-      "sns.set(style=\"whitegrid\")\n",
-      "sns.set_context(\"notebook\", font_scale=1.2)\n",
-      "\n",
-      "# For reproducibility\n",
-      "np.random.seed(42)\n",
-      "\n",
-      "print(\"Libraries loaded successfully!\")') [id=toolu_01Ny1FNdcBy7Y3uDbJJ94jos]\n",
-      "---\n",
-      "Timestep 2\n",
-      "Done: False\n",
-      "Agent Reasoning:\n",
-      "Message:     Here is the user query to address:\n",
-      "\n",
-      "    <query>\n",
-      "    Analyze the dataset and give me an in depth analysis using pretty plots. I am particularly interested in crows.\n",
-      "    </query>\n",
-      "\n",
-      "    \n",
-      "Follow these  [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: \n",
-      "You are an expert data scientist.\n",
-      "Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that thoroughly analyzes data to answer a user query\n",
-      "The notebook should contain all n [Truncated]\n",
-      "Message: Thought: I'll analyze the dataset with a focus on crows as requested. Let's start by exploring the directory to understand what data we have available.\n",
-      "\n",
-      "Thought: First, I need to list the working dire [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: {\n",
-      "  \"files\": [\n",
-      "    \"notebook.md\",\n",
-      "    \"brain_size_data.csv\"\n",
-      "  ]\n",
-      "} [Truncated]\n",
-      "Message: [{\"type\": \"text\", \"text\": \"Markdown representation of notebook contents (/workspace/notebook.ipynb):\\n\\n\"}] [Truncated]\n",
-      "Message: Thought: I can see there's a file called \"brain_size_data.csv\" which is likely our dataset for analysis. Let me create a comprehensive notebook to analyze this data with a focus on crows.. Based on th [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Action: Tool request message '' for tool calls: edit_cell(idx='0', contents='# Import necessary libraries for data analysis and visualization\n",
-      "import pandas as pd\n",
-      "import numpy as np\n",
-      "import matplotlib.pyplot as plt\n",
-      "import seaborn as sns\n",
-      "\n",
-      "# Set a nice visualization style\n",
-      "plt.style.use('ggplot')\n",
-      "sns.set(style=\"whitegrid\")\n",
-      "sns.set_context(\"notebook\", font_scale=1.2)\n",
-      "\n",
-      "# For reproducibility\n",
-      "np.random.seed(42)\n",
-      "\n",
-      "print(\"Libraries loaded successfully!\")') [id=toolu_01Y9Vqg16hvex4o7qBURDux7]\n",
-      "---\n",
-      "Timestep 3\n",
-      "Done: False\n",
-      "Agent Reasoning:\n",
-      "Message:     Here is the user query to address:\n",
-      "\n",
-      "    <query>\n",
-      "    Analyze the dataset and give me an in depth analysis using pretty plots. I am particularly interested in crows.\n",
-      "    </query>\n",
-      "\n",
-      "    \n",
-      "Follow these  [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: \n",
-      "You are an expert data scientist.\n",
-      "Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that thoroughly analyzes data to answer a user query\n",
-      "The notebook should contain all n [Truncated]\n",
-      "Message: Thought: I'll analyze the dataset with a focus on crows as requested. Let's start by exploring the directory to understand what data we have available.\n",
-      "\n",
-      "Thought: First, I need to list the working dire [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: {\n",
-      "  \"files\": [\n",
-      "    \"notebook.md\",\n",
-      "    \"brain_size_data.csv\"\n",
-      "  ]\n",
-      "} [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: I can see there's a file called \"brain_size_data.csv\" which is likely our dataset for analysis. Let me create a comprehensive notebook to analyze this data with a focus on crows.. Based on th [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Encountered exception during tool call: Error executing cell 0:\n",
-      "Name: ModuleNotFoundError\n",
-      "Value: No module named 'rpy2'\n",
-      " [Truncated]\n",
-      "Message: [{\"type\": \"text\", \"text\": \"Markdown representation of notebook contents (/workspace/notebook.ipynb):\\n\\n### Cell 0:\\n```python\\n%load_ext rpy2.ipython\\n\\n# First let's load our libraries\\nimport panda [Truncated]\n",
-      "Message: Thought: I see that we have an error loading the rpy2 module, which means we can't use R in this environment. Let's proceed with Python only. I'll fix the first cell and remove the rpy2 import, then p [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Action: Tool request message '' for tool calls: edit_cell(contents='# Load the dataset\n",
-      "df = pd.read_csv('brain_size_data.csv')\n",
-      "\n",
-      "# Display the first few rows to understand the data structure\n",
-      "print(\"First 5 rows of the dataset:\")\n",
-      "display(df.head())\n",
-      "\n",
-      "# Check the shape of the dataset\n",
-      "print(f\"\\nDataset dimensions: {df.shape[0]} rows and {df.shape[1]} columns\")\n",
-      "\n",
-      "# Check for missing values\n",
-      "print(\"\\nMissing values in each column:\")\n",
-      "print(df.isnull().sum())\n",
-      "\n",
-      "# Get basic statistics of the dataset\n",
-      "print(\"\\nBasic statistics:\")\n",
-      "display(df.describe())') [id=toolu_01YCZGyVzTRmKXm3BQypJUMQ]\n",
-      "---\n",
-      "Timestep 4\n",
-      "Done: False\n",
-      "Agent Reasoning:\n",
-      "Message:     Here is the user query to address:\n",
-      "\n",
-      "    <query>\n",
-      "    Analyze the dataset and give me an in depth analysis using pretty plots. I am particularly interested in crows.\n",
-      "    </query>\n",
-      "\n",
-      "    \n",
-      "Follow these  [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: \n",
-      "You are an expert data scientist.\n",
-      "Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that thoroughly analyzes data to answer a user query\n",
-      "The notebook should contain all n [Truncated]\n",
-      "Message: Thought: I'll analyze the dataset with a focus on crows as requested. Let's start by exploring the directory to understand what data we have available.\n",
-      "\n",
-      "Thought: First, I need to list the working dire [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: {\n",
-      "  \"files\": [\n",
-      "    \"notebook.md\",\n",
-      "    \"brain_size_data.csv\"\n",
-      "  ]\n",
-      "} [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: I can see there's a file called \"brain_size_data.csv\" which is likely our dataset for analysis. Let me create a comprehensive notebook to analyze this data with a focus on crows.. Based on th [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Encountered exception during tool call: Error executing cell 0:\n",
-      "Name: ModuleNotFoundError\n",
-      "Value: No module named 'rpy2'\n",
-      " [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: I see that we have an error loading the rpy2 module, which means we can't use R in this environment. Let's proceed with Python only. I'll fix the first cell and remove the rpy2 import, then p [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Edited cell #0. [Truncated]\n",
-      "Message: [{\"type\": \"text\", \"text\": \"Markdown representation of notebook contents (/workspace/notebook.ipynb):\\n\\n### Cell 0:\\n```python\\n# Import necessary libraries for data analysis and visualization\\nimport [Truncated]\n",
-      "Message: Thought: Now that I have the basic libraries loaded, I need to load the dataset and perform some exploratory data analysis. I'll focus on understanding the data structure first, then perform specific  [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Action: Tool request message '' for tool calls: edit_cell(contents='# Let's explore the taxonomic information in our dataset\n",
-      "print(\"Unique phyla in the dataset:\")\n",
-      "print(df['phylum'].unique())\n",
-      "\n",
-      "print(\"\\nUnique classes in the dataset:\")\n",
-      "print(df['class'].unique())\n",
-      "\n",
-      "print(\"\\nUnique orders in the dataset:\")\n",
-      "print(df['order'].unique())\n",
-      "\n",
-      "# Since we are interested in crows, which belong to the Corvidae family\n",
-      "# Let's check if Corvidae is in our dataset\n",
-      "print(\"\\nIs Corvidae family present in the dataset?\", 'Corvidae' in df['family'].unique())\n",
-      "\n",
-      "# Let's see how many crow-related entries we have\n",
-      "crow_data = df[\n",
-      "    (df['family'] == 'Corvidae') | \n",
-      "    (df['genus'].str.contains('Corv', case=False, na=False)) |\n",
-      "    (df['species'].str.contains('crow', case=False, na=False))\n",
-      "]\n",
-      "\n",
-      "print(f\"\\nNumber of crow-related entries: {len(crow_data)}\")\n",
-      "\n",
-      "# Display the first few rows of crow data\n",
-      "if len(crow_data) > 0:\n",
-      "    print(\"\\nSample of crow-related data:\")\n",
-      "    display(crow_data.head())\n",
-      "    \n",
-      "    # Check which species of crows we have\n",
-      "    print(\"\\nCrow species in the dataset:\")\n",
-      "    print(crow_data['species'].unique())\n",
-      "else:\n",
-      "    print(\"\\nNo crow-related entries found in the dataset.\")') [id=toolu_018DsZQZ2TFzXasWKsxzL9fU]\n",
-      "---\n",
-      "Timestep 5\n",
-      "Done: False\n",
-      "Agent Reasoning:\n",
-      "Message:     Here is the user query to address:\n",
-      "\n",
-      "    <query>\n",
-      "    Analyze the dataset and give me an in depth analysis using pretty plots. I am particularly interested in crows.\n",
-      "    </query>\n",
-      "\n",
-      "    \n",
-      "Follow these  [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: \n",
-      "You are an expert data scientist.\n",
-      "Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that thoroughly analyzes data to answer a user query\n",
-      "The notebook should contain all n [Truncated]\n",
-      "Message: Thought: I'll analyze the dataset with a focus on crows as requested. Let's start by exploring the directory to understand what data we have available.\n",
-      "\n",
-      "Thought: First, I need to list the working dire [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: {\n",
-      "  \"files\": [\n",
-      "    \"notebook.md\",\n",
-      "    \"brain_size_data.csv\"\n",
-      "  ]\n",
-      "} [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: I can see there's a file called \"brain_size_data.csv\" which is likely our dataset for analysis. Let me create a comprehensive notebook to analyze this data with a focus on crows.. Based on th [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Encountered exception during tool call: Error executing cell 0:\n",
-      "Name: ModuleNotFoundError\n",
-      "Value: No module named 'rpy2'\n",
-      " [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: I see that we have an error loading the rpy2 module, which means we can't use R in this environment. Let's proceed with Python only. I'll fix the first cell and remove the rpy2 import, then p [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Edited cell #0. [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: Now that I have the basic libraries loaded, I need to load the dataset and perform some exploratory data analysis. I'll focus on understanding the data structure first, then perform specific  [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Appended new cell (#1). [Truncated]\n",
-      "Message: [{\"type\": \"text\", \"text\": \"Markdown representation of notebook contents (/workspace/notebook.ipynb):\\n\\n### Cell 0:\\n```python\\n# Import necessary libraries for data analysis and visualization\\nimport [Truncated]\n",
-      "Message: Thought: The output shows that we have a large dataset with 3580 rows and 43 columns. There's a significant amount of missing data in various columns. Let me continue with a more focused analysis on t [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Action: Tool request message '' for tool calls: edit_cell(contents='# Let's analyze the key variables for crows\n",
-      "\n",
-      "# First, let's check which columns we have data for in the crow dataset\n",
-      "print(\"Data availability for crows:\")\n",
-      "for column in ['brain size', 'body mass', 'metabolic rate']:\n",
-      "    available = crow_data[column].notna().sum()\n",
-      "    total = len(crow_data)\n",
-      "    print(f\"{column}: {available}/{total} entries available ({available/total*100:.1f}%)\")\n",
-      "\n",
-      "# Get basic statistics for crows\n",
-      "print(\"\\nBasic statistics for crow-related data:\")\n",
-      "columns_of_interest = ['brain size', 'body mass', 'metabolic rate', \n",
-      "                      'mass-specific metabolic rate']\n",
-      "display(crow_data[columns_of_interest].describe())\n",
-      "\n",
-      "# Check the units to ensure consistent comparisons\n",
-      "print(\"\\nUnits used for measurements:\")\n",
-      "for column in columns_of_interest:\n",
-      "    if column + \" - units\" in crow_data.columns:\n",
-      "        units = crow_data[column + \" - units\"].unique()\n",
-      "        print(f\"{column}: {units}\")\n",
-      "\n",
-      "# Compare crows with other birds\n",
-      "bird_data = df[df['class'] == 'Aves']\n",
-      "print(f\"\\nTotal bird entries in dataset: {len(bird_data)}\")\n",
-      "print(f\"Percentage of bird data that is crow-related: {len(crow_data)/len(bird_data)*100:.1f}%\")') [id=toolu_01JZdbhxDWCNXQsDXRXaoWxU]\n",
-      "---\n",
-      "Timestep 6\n",
-      "Done: False\n",
-      "Agent Reasoning:\n",
-      "Message:     Here is the user query to address:\n",
-      "\n",
-      "    <query>\n",
-      "    Analyze the dataset and give me an in depth analysis using pretty plots. I am particularly interested in crows.\n",
-      "    </query>\n",
-      "\n",
-      "    \n",
-      "Follow these  [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: \n",
-      "You are an expert data scientist.\n",
-      "Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that thoroughly analyzes data to answer a user query\n",
-      "The notebook should contain all n [Truncated]\n",
-      "Message: Thought: I'll analyze the dataset with a focus on crows as requested. Let's start by exploring the directory to understand what data we have available.\n",
-      "\n",
-      "Thought: First, I need to list the working dire [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: {\n",
-      "  \"files\": [\n",
-      "    \"notebook.md\",\n",
-      "    \"brain_size_data.csv\"\n",
-      "  ]\n",
-      "} [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: I can see there's a file called \"brain_size_data.csv\" which is likely our dataset for analysis. Let me create a comprehensive notebook to analyze this data with a focus on crows.. Based on th [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Encountered exception during tool call: Error executing cell 0:\n",
-      "Name: ModuleNotFoundError\n",
-      "Value: No module named 'rpy2'\n",
-      " [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: I see that we have an error loading the rpy2 module, which means we can't use R in this environment. Let's proceed with Python only. I'll fix the first cell and remove the rpy2 import, then p [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Edited cell #0. [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: Now that I have the basic libraries loaded, I need to load the dataset and perform some exploratory data analysis. I'll focus on understanding the data structure first, then perform specific  [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Appended new cell (#1). [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: The output shows that we have a large dataset with 3580 rows and 43 columns. There's a significant amount of missing data in various columns. Let me continue with a more focused analysis on t [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Appended new cell (#2). [Truncated]\n",
-      "Message: [{\"type\": \"text\", \"text\": \"Markdown representation of notebook contents (/workspace/notebook.ipynb):\\n\\n### Cell 0:\\n```python\\n# Import necessary libraries for data analysis and visualization\\nimport [Truncated]\n",
-      "Message: Thought: Now that we have confirmed the presence of crow-related data (Corvidae family) in our dataset, let's perform a more focused analysis. We found multiple crow species. Let's explore their brain [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Action: Tool request message '' for tool calls: edit_cell(contents='# Create visualizations focusing on crow brain size compared to other birds\n",
-      "\n",
-      "# Let's first ensure we're using consistent units for comparison\n",
-      "# Check the units used for brain size and body mass\n",
-      "print(\"Units for brain size:\")\n",
-      "print(df['brain size - units'].unique())\n",
-      "print(\"\\nUnits for body mass:\")\n",
-      "print(df['body mass - units'].unique())\n",
-      "\n",
-      "# Create a new dataframe for birds with complete brain size data\n",
-      "bird_brain_data = bird_data[bird_data['brain size'].notna()].copy()\n",
-      "\n",
-      "# Add a column to indicate if the bird is a crow/corvid\n",
-      "bird_brain_data['is_corvid'] = bird_brain_data['family'] == 'Corvidae'\n",
-      "\n",
-      "# Create a more readable species name column combining genus and species\n",
-      "bird_brain_data['full_species_name'] = bird_brain_data['genus'] + ' ' + bird_brain_data['species']\n",
-      "\n",
-      "# Let's visualize brain size distribution among birds, highlighting crows\n",
-      "plt.figure(figsize=(12, 6))\n",
-      "sns.histplot(data=bird_brain_data, x='brain size', hue='is_corvid', multiple='stack',\n",
-      "             palette=['lightgray', 'darkblue'], bins=30)\n",
-      "plt.title('Distribution of Brain Sizes in Birds', fontsize=15)\n",
-      "plt.xlabel('Brain Size (kg)', fontsize=12)\n",
-      "plt.ylabel('Count', fontsize=12)\n",
-      "plt.legend(['Other Birds', 'Corvids (Crows & Ravens)'])\n",
-      "plt.tight_layout()\n",
-      "plt.show()\n",
-      "\n",
-      "# Calculate the average brain size for corvids vs other birds\n",
-      "corvid_avg_brain = bird_brain_data[bird_brain_data['is_corvid']]['brain size'].mean()\n",
-      "non_corvid_avg_brain = bird_brain_data[~bird_brain_data['is_corvid']]['brain size'].mean()\n",
-      "\n",
-      "print(f\"Average brain size of corvids: {corvid_avg_brain:.8f} kg\")\n",
-      "print(f\"Average brain size of other birds: {non_corvid_avg_brain:.8f} kg\")\n",
-      "print(f\"Corvids have {corvid_avg_brain/non_corvid_avg_brain:.2f}x larger brains than average birds\")\n",
-      "\n",
-      "# Visualize brain size vs body mass, highlighting corvids\n",
-      "plt.figure(figsize=(12, 8))\n",
-      "brain_body_data = bird_brain_data[bird_brain_data['body mass'].notna()]\n",
-      "sns.scatterplot(data=brain_body_data, x='body mass', y='brain size', \n",
-      "                hue='is_corvid', size='is_corvid', sizes=(30, 120),\n",
-      "                palette=['gray', 'darkblue'], alpha=0.7)\n",
-      "\n",
-      "# Add a line for the average ratio for context\n",
-      "plt.title('Brain Size vs Body Mass in Birds', fontsize=15)\n",
-      "plt.xlabel('Body Mass (kg)', fontsize=12)\n",
-      "plt.ylabel('Brain Size (kg)', fontsize=12)\n",
-      "plt.legend(['Other Birds', 'Corvids (Crows & Ravens)'])\n",
-      "\n",
-      "# Add annotations for some corvid species\n",
-      "for idx, row in brain_body_data[brain_body_data['is_corvid']].sample(min(5, len(crow_data))).iterrows():\n",
-      "    plt.annotate(row['species'], \n",
-      "                 xy=(row['body mass'], row['brain size']),\n",
-      "                 xytext=(5, 5), textcoords='offset points',\n",
-      "                 fontsize=8)\n",
-      "\n",
-      "plt.tight_layout()\n",
-      "plt.show()\n",
-      "\n",
-      "# Create a log-log plot to better visualize the relationship\n",
-      "plt.figure(figsize=(12, 8))\n",
-      "log_brain_body = brain_body_data.copy()\n",
-      "log_brain_body['log_brain'] = np.log10(log_brain_body['brain size'])\n",
-      "log_brain_body['log_body'] = np.log10(log_brain_body['body mass'])\n",
-      "\n",
-      "sns.scatterplot(data=log_brain_body, x='log_body', y='log_brain', \n",
-      "                hue='is_corvid', size='is_corvid', sizes=(30, 120),\n",
-      "                palette=['gray', 'darkblue'], alpha=0.7)\n",
-      "\n",
-      "# Add a regression line\n",
-      "sns.regplot(data=log_brain_body, x='log_body', y='log_brain', \n",
-      "            scatter=False, color='red', line_kws={'linestyle':'--'})\n",
-      "\n",
-      "plt.title('Brain Size vs Body Mass in Birds (Log-Log Scale)', fontsize=15)\n",
-      "plt.xlabel('Log10 Body Mass (kg)', fontsize=12)\n",
-      "plt.ylabel('Log10 Brain Size (kg)', fontsize=12)\n",
-      "plt.legend(['Other Birds', 'Corvids (Crows & Ravens)', 'Trend Line'])\n",
-      "plt.tight_layout()\n",
-      "plt.show()') [id=toolu_01JLFSsMCEeRQBAUz85jpEYH]\n",
-      "---\n",
-      "Timestep 7\n",
-      "Done: False\n",
-      "Agent Reasoning:\n",
-      "Message:     Here is the user query to address:\n",
-      "\n",
-      "    <query>\n",
-      "    Analyze the dataset and give me an in depth analysis using pretty plots. I am particularly interested in crows.\n",
-      "    </query>\n",
-      "\n",
-      "    \n",
-      "Follow these  [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: \n",
-      "You are an expert data scientist.\n",
-      "Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that thoroughly analyzes data to answer a user query\n",
-      "The notebook should contain all n [Truncated]\n",
-      "Message: Thought: I'll analyze the dataset with a focus on crows as requested. Let's start by exploring the directory to understand what data we have available.\n",
-      "\n",
-      "Thought: First, I need to list the working dire [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: {\n",
-      "  \"files\": [\n",
-      "    \"notebook.md\",\n",
-      "    \"brain_size_data.csv\"\n",
-      "  ]\n",
-      "} [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: I can see there's a file called \"brain_size_data.csv\" which is likely our dataset for analysis. Let me create a comprehensive notebook to analyze this data with a focus on crows.. Based on th [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Encountered exception during tool call: Error executing cell 0:\n",
-      "Name: ModuleNotFoundError\n",
-      "Value: No module named 'rpy2'\n",
-      " [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: I see that we have an error loading the rpy2 module, which means we can't use R in this environment. Let's proceed with Python only. I'll fix the first cell and remove the rpy2 import, then p [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Edited cell #0. [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: Now that I have the basic libraries loaded, I need to load the dataset and perform some exploratory data analysis. I'll focus on understanding the data structure first, then perform specific  [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Appended new cell (#1). [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: The output shows that we have a large dataset with 3580 rows and 43 columns. There's a significant amount of missing data in various columns. Let me continue with a more focused analysis on t [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Appended new cell (#2). [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: Now that we have confirmed the presence of crow-related data (Corvidae family) in our dataset, let's perform a more focused analysis. We found multiple crow species. Let's explore their brain [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Appended new cell (#3). [Truncated]\n",
-      "Message: [{\"type\": \"text\", \"text\": \"Markdown representation of notebook contents (/workspace/notebook.ipynb):\\n\\n### Cell 0:\\n```python\\n# Import necessary libraries for data analysis and visualization\\nimport [Truncated]\n",
-      "Message: Thought: Based on the data analysis so far, I can see that we have good data on brain size for crows (97.3% available), decent data on body mass (69.9% available), but very limited data on metabolic r [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Action: Tool request message '' for tool calls: edit_cell(contents='# Let's analyze the brain-to-body mass ratio, which is an important indicator of cognitive ability\n",
-      "\n",
-      "# Create a new column for brain-to-body mass ratio\n",
-      "brain_body_data = brain_body_data.copy()\n",
-      "brain_body_data['brain_body_ratio'] = brain_body_data['brain size'] / brain_body_data['body mass']\n",
-      "\n",
-      "# Create a boxplot comparing corvids to other bird families\n",
-      "plt.figure(figsize=(12, 6))\n",
-      "sns.boxplot(x='is_corvid', y='brain_body_ratio', data=brain_body_data, \n",
-      "           palette=['lightgray', 'darkblue'])\n",
-      "plt.title('Brain-to-Body Mass Ratio: Corvids vs Other Birds', fontsize=15)\n",
-      "plt.xlabel('', fontsize=12)\n",
-      "plt.ylabel('Brain-to-Body Mass Ratio', fontsize=12)\n",
-      "plt.xticks([0, 1], ['Other Birds', 'Corvids (Crows & Ravens)'])\n",
-      "plt.tight_layout()\n",
-      "plt.show()\n",
-      "\n",
-      "# Calculate average brain-to-body ratio\n",
-      "corvid_ratio = brain_body_data[brain_body_data['is_corvid']]['brain_body_ratio'].mean()\n",
-      "other_ratio = brain_body_data[~brain_body_data['is_corvid']]['brain_body_ratio'].mean()\n",
-      "print(f\"Average brain-to-body ratio for corvids: {corvid_ratio:.6f}\")\n",
-      "print(f\"Average brain-to-body ratio for other birds: {other_ratio:.6f}\")\n",
-      "print(f\"Corvids have {corvid_ratio/other_ratio:.2f}x higher brain-to-body ratio than other birds\")\n",
-      "\n",
-      "# Let's examine the top 10 bird families by brain-to-body ratio\n",
-      "top_families = brain_body_data.groupby('family').agg(\n",
-      "    avg_ratio=('brain_body_ratio', 'mean'),\n",
-      "    count=('brain_body_ratio', 'count')\n",
-      ").sort_values('avg_ratio', ascending=False)\n",
-      "\n",
-      "# Only include families with at least 3 samples for reliability\n",
-      "top_families = top_families[top_families['count'] >= 3].head(10)\n",
-      "\n",
-      "print(\"\\nTop 10 bird families by brain-to-body mass ratio (minimum 3 samples):\")\n",
-      "display(top_families)\n",
-      "\n",
-      "# Create a bar chart of the top 10 families\n",
-      "plt.figure(figsize=(14, 7))\n",
-      "sns.barplot(x=top_families.index, y='avg_ratio', data=top_families, palette='viridis')\n",
-      "plt.title('Top 10 Bird Families by Brain-to-Body Mass Ratio', fontsize=15)\n",
-      "plt.xlabel('Bird Family', fontsize=12)\n",
-      "plt.ylabel('Average Brain-to-Body Mass Ratio', fontsize=12)\n",
-      "plt.xticks(rotation=45, ha='right')\n",
-      "plt.tight_layout()\n",
-      "plt.show()\n",
-      "\n",
-      "# Let's perform a comparison between corvid species \n",
-      "if len(brain_body_data[brain_body_data['is_corvid']]) >= 5:\n",
-      "    corvid_species = brain_body_data[brain_body_data['is_corvid']].copy()\n",
-      "    \n",
-      "    # Plot brain size of corvid species\n",
-      "    plt.figure(figsize=(14, 7))\n",
-      "    sns.barplot(x='species', y='brain size', data=corvid_species, palette='Blues_d')\n",
-      "    plt.title('Brain Size Comparison Among Corvid Species', fontsize=15)\n",
-      "    plt.xlabel('Species', fontsize=12)\n",
-      "    plt.ylabel('Brain Size (kg)', fontsize=12)\n",
-      "    plt.xticks(rotation=90)\n",
-      "    plt.tight_layout()\n",
-      "    plt.show()\n",
-      "    \n",
-      "    # Plot brain-to-body ratio of corvid species\n",
-      "    plt.figure(figsize=(14, 7))\n",
-      "    sns.barplot(x='species', y='brain_body_ratio', data=corvid_species, palette='Blues_d')\n",
-      "    plt.title('Brain-to-Body Mass Ratio Comparison Among Corvid Species', fontsize=15)\n",
-      "    plt.xlabel('Species', fontsize=12)\n",
-      "    plt.ylabel('Brain-to-Body Mass Ratio', fontsize=12)\n",
-      "    plt.xticks(rotation=90)\n",
-      "    plt.tight_layout()\n",
-      "    plt.show()\n",
-      "    \n",
-      "    # Get the top 5 corvid species by brain-to-body ratio\n",
-      "    top_corvids = corvid_species.sort_values('brain_body_ratio', ascending=False).head(5)\n",
-      "    print(\"\\nTop 5 corvid species by brain-to-body mass ratio:\")\n",
-      "    for _, row in top_corvids.iterrows():\n",
-      "        print(f\"{row['genus']} {row['species']}: {row['brain_body_ratio']:.6f}\")\n",
-      "else:\n",
-      "    print(\"Not enough corvid species with complete data for detailed comparison\")') [id=toolu_01N4C6RHfBzCx3MCLGVDrQiR]\n",
-      "---\n",
-      "Timestep 8\n",
-      "Done: False\n",
-      "Agent Reasoning:\n",
-      "Message:     Here is the user query to address:\n",
-      "\n",
-      "    <query>\n",
-      "    Analyze the dataset and give me an in depth analysis using pretty plots. I am particularly interested in crows.\n",
-      "    </query>\n",
-      "\n",
-      "    \n",
-      "Follow these  [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: \n",
-      "You are an expert data scientist.\n",
-      "Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that thoroughly analyzes data to answer a user query\n",
-      "The notebook should contain all n [Truncated]\n",
-      "Message: Thought: I'll analyze the dataset with a focus on crows as requested. Let's start by exploring the directory to understand what data we have available.\n",
-      "\n",
-      "Thought: First, I need to list the working dire [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: {\n",
-      "  \"files\": [\n",
-      "    \"notebook.md\",\n",
-      "    \"brain_size_data.csv\"\n",
-      "  ]\n",
-      "} [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: I can see there's a file called \"brain_size_data.csv\" which is likely our dataset for analysis. Let me create a comprehensive notebook to analyze this data with a focus on crows.. Based on th [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Encountered exception during tool call: Error executing cell 0:\n",
-      "Name: ModuleNotFoundError\n",
-      "Value: No module named 'rpy2'\n",
-      " [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: I see that we have an error loading the rpy2 module, which means we can't use R in this environment. Let's proceed with Python only. I'll fix the first cell and remove the rpy2 import, then p [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Edited cell #0. [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: Now that I have the basic libraries loaded, I need to load the dataset and perform some exploratory data analysis. I'll focus on understanding the data structure first, then perform specific  [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Appended new cell (#1). [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: The output shows that we have a large dataset with 3580 rows and 43 columns. There's a significant amount of missing data in various columns. Let me continue with a more focused analysis on t [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Appended new cell (#2). [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: Now that we have confirmed the presence of crow-related data (Corvidae family) in our dataset, let's perform a more focused analysis. We found multiple crow species. Let's explore their brain [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Appended new cell (#3). [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: Based on the data analysis so far, I can see that we have good data on brain size for crows (97.3% available), decent data on body mass (69.9% available), but very limited data on metabolic r [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Appended new cell (#4). [Truncated]\n",
-      "Message: [{\"type\": \"image_url\", \"image_url\": {\"url\": \"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABJgAAAJACAYAAADB6dzIAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvl [Truncated]\n",
-      "Message: Thought: I can see the visualizations show some interesting patterns. Let me analyze these results more deeply and create additional visualizations to complete the analysis of crows' brain characteris [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Action: Tool request message '' for tool calls: edit_cell(contents='# Comprehensive Analysis of Corvidae (Crow) Brain Characteristics\n",
-      "\n",
-      "## Summary of Findings\n",
-      "\n",
-      "# Let's create a summary visualization of our key findings\n",
-      "plt.figure(figsize=(15, 10))\n",
-      "\n",
-      "# Create a 2x2 subplot layout\n",
-      "plt.subplot(2, 2, 1)\n",
-      "# Plot 1: Brain Size Distribution - Simplified\n",
-      "sns.histplot(data=bird_brain_data, x='brain size', hue='is_corvid', \n",
-      "             multiple='stack', bins=15, \n",
-      "             palette=['lightgray', 'darkblue'], alpha=0.7)\n",
-      "plt.title('Brain Size Distribution', fontsize=14)\n",
-      "plt.xlabel('Brain Size (kg)', fontsize=12)\n",
-      "plt.ylabel('Count', fontsize=12)\n",
-      "\n",
-      "# Plot 2: Brain-to-Body Ratio Comparison\n",
-      "plt.subplot(2, 2, 2)\n",
-      "corvid_ratio_data = pd.DataFrame({\n",
-      "    'Bird Group': ['Corvids', 'Other Birds'],\n",
-      "    'Brain-to-Body Ratio': [corvid_ratio, other_ratio]\n",
-      "})\n",
-      "sns.barplot(x='Bird Group', y='Brain-to-Body Ratio', data=corvid_ratio_data, palette=['darkblue', 'lightgray'])\n",
-      "plt.title('Brain-to-Body Mass Ratio', fontsize=14)\n",
-      "plt.xlabel('')\n",
-      "plt.ylabel('Ratio', fontsize=12)\n",
-      "\n",
-      "# Plot 3: Rank of Corvidae among bird families\n",
-      "plt.subplot(2, 2, 3)\n",
-      "# Find rank of Corvidae in all families\n",
-      "all_families = brain_body_data.groupby('family').agg(\n",
-      "    avg_ratio=('brain_body_ratio', 'mean'),\n",
-      "    count=('brain_body_ratio', 'count')\n",
-      ").sort_values('avg_ratio', ascending=False)\n",
-      "all_families = all_families[all_families['count'] >= 3]  # At least 3 samples\n",
-      "corvid_rank = all_families.index.get_loc('Corvidae') + 1  # +1 because ranks start at 1\n",
-      "\n",
-      "top_n = 10\n",
-      "families_to_plot = all_families.head(top_n).copy()\n",
-      "# Add a color column\n",
-      "families_to_plot['color'] = ['lightgray'] * len(families_to_plot)\n",
-      "if 'Corvidae' in families_to_plot.index:\n",
-      "    families_to_plot.loc['Corvidae', 'color'] = 'darkblue'\n",
-      "\n",
-      "# Create the bar plot\n",
-      "bars = plt.bar(range(len(families_to_plot)), families_to_plot['avg_ratio'], \n",
-      "        color=families_to_plot['color'])\n",
-      "plt.xticks(range(len(families_to_plot)), families_to_plot.index, rotation=45, ha='right')\n",
-      "plt.title(f'Top {top_n} Bird Families by Brain-to-Body Ratio\\n(Corvidae Rank: {corvid_rank} of {len(all_families)})', fontsize=14)\n",
-      "plt.xlabel('Bird Family', fontsize=12)\n",
-      "plt.ylabel('Brain-to-Body Ratio', fontsize=12)\n",
-      "\n",
-      "# Plot 4: Top Corvid Species by Brain-to-Body Ratio\n",
-      "plt.subplot(2, 2, 4)\n",
-      "top_species = brain_body_data[brain_body_data['is_corvid']].nlargest(5, 'brain_body_ratio')\n",
-      "sns.barplot(x='species', y='brain_body_ratio', data=top_species, palette='Blues_d')\n",
-      "plt.title('Top 5 Corvid Species by Brain-to-Body Ratio', fontsize=14)\n",
-      "plt.xlabel('Species', fontsize=12)\n",
-      "plt.xticks(rotation=45, ha='right')\n",
-      "plt.ylabel('Brain-to-Body Ratio', fontsize=12)\n",
-      "\n",
-      "plt.tight_layout()\n",
-      "plt.show()\n",
-      "\n",
-      "# Create a comprehensive summary of our findings\n",
-      "print(\"=\" * 80)\n",
-      "print(\"COMPREHENSIVE ANALYSIS OF CROW (CORVIDAE) BRAIN CHARACTERISTICS\")\n",
-      "print(\"=\" * 80)\n",
-      "\n",
-      "print(\"\\n1. DATA AVAILABILITY\")\n",
-      "print(f\"   - Total corvid species in dataset: {len(crow_data['species'].unique())}\")\n",
-      "print(f\"   - Corvids represent {len(crow_data)/len(bird_data)*100:.1f}% of all bird entries\")\n",
-      "print(\"   - Data completeness for corvids:\")\n",
-      "for column in ['brain size', 'body mass', 'metabolic rate']:\n",
-      "    available = crow_data[column].notna().sum()\n",
-      "    total = len(crow_data)\n",
-      "    print(f\"     * {column}: {available}/{total} entries ({available/total*100:.1f}%)\")\n",
-      "\n",
-      "print(\"\\n2. BRAIN SIZE\")\n",
-      "print(f\"   - Average brain size of corvids: {corvid_avg_brain*1000:.2f} grams\")\n",
-      "print(f\"   - Average brain size of other birds: {non_corvid_avg_brain*1000:.2f} grams\")\n",
-      "print(f\"   - Corvids have {corvid_avg_brain/non_corvid_avg_brain:.2f}x larger brains than average birds\")\n",
-      "print(\"   - Top 3 corvid species by absolute brain size:\")\n",
-      "top_brain_corvids = brain_body_data[brain_body_data['is_corvid']].nlargest(3, 'brain size')\n",
-      "for _, row in top_brain_corvids.iterrows():\n",
-      "    print(f\"     * {row['genus']} {row['species']}: {row['brain size']*1000:.2f} grams\")\n",
-      "\n",
-      "print(\"\\n3. BRAIN-TO-BODY MASS RATIO\")\n",
-      "print(f\"   - Average brain-to-body ratio for corvids: {corvid_ratio:.6f}\")\n",
-      "print(f\"   - Average brain-to-body ratio for other birds: {other_ratio:.6f}\")\n",
-      "print(f\"   - Corvids have {corvid_ratio/other_ratio:.2f}x the brain-to-body ratio of other birds\")\n",
-      "print(f\"   - Corvidae ranks #{corvid_rank} out of {len(all_families)} bird families (min 3 samples)\")\n",
-      "print(\"   - Top 5 corvid species by brain-to-body ratio:\")\n",
-      "for _, row in top_species.iterrows():\n",
-      "    print(f\"     * {row['genus']} {row['species']}: {row['brain_body_ratio']:.6f}\")\n",
-      "\n",
-      "print(\"\\n4. KEY OBSERVATIONS\")\n",
-      "print(\"   - Corvids have significantly larger brains than other birds of similar size\")\n",
-      "print(\"   - The brain-to-body ratio varies considerably among different corvid species\")\n",
-      "print(\"   - Several corvid species show exceptionally high brain-to-body ratios\")\n",
-      "print(\"   - Their large brain size correlates with the family's known complex behaviors\")\n",
-      "print(\"   - While they don't have the highest brain-to-body ratio among birds, they still\")\n",
-      "print(\"     rank relatively high, supporting their reputation for intelligence\")\n",
-      "print(\"   - The data supports scientific observations of advanced problem-solving and tool use\")\n",
-      "print(\"     among corvids, particularly in species like Corvus moneduloides (New Caledonian crow)\")\n",
-      "\n",
-      "print(\"\\n5. LIMITATIONS\")\n",
-      "print(\"   - Limited metabolic rate data for corvids (only 2.7% complete)\")\n",
-      "print(\"   - Potentially uneven sampling across different corvid species\")\n",
-      "print(\"   - Measurements taken by different researchers might vary in methodology\")\n",
-      "print(\"   - Brain size alone is not a perfect predictor of cognitive abilities\")') [id=toolu_01Vpx9igDpMa77cYG5UNpYia]\n",
-      "---\n",
-      "Timestep 9\n",
-      "Done: True\n",
-      "Agent Reasoning:\n",
-      "Message:     Here is the user query to address:\n",
-      "\n",
-      "    <query>\n",
-      "    Analyze the dataset and give me an in depth analysis using pretty plots. I am particularly interested in crows.\n",
-      "    </query>\n",
-      "\n",
-      "    \n",
-      "Follow these  [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: \n",
-      "You are an expert data scientist.\n",
-      "Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that thoroughly analyzes data to answer a user query\n",
-      "The notebook should contain all n [Truncated]\n",
-      "Message: Thought: I'll analyze the dataset with a focus on crows as requested. Let's start by exploring the directory to understand what data we have available.\n",
-      "\n",
-      "Thought: First, I need to list the working dire [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: {\n",
-      "  \"files\": [\n",
-      "    \"notebook.md\",\n",
-      "    \"brain_size_data.csv\"\n",
-      "  ]\n",
-      "} [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: I can see there's a file called \"brain_size_data.csv\" which is likely our dataset for analysis. Let me create a comprehensive notebook to analyze this data with a focus on crows.. Based on th [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Encountered exception during tool call: Error executing cell 0:\n",
-      "Name: ModuleNotFoundError\n",
-      "Value: No module named 'rpy2'\n",
-      " [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: I see that we have an error loading the rpy2 module, which means we can't use R in this environment. Let's proceed with Python only. I'll fix the first cell and remove the rpy2 import, then p [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Edited cell #0. [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: Now that I have the basic libraries loaded, I need to load the dataset and perform some exploratory data analysis. I'll focus on understanding the data structure first, then perform specific  [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Appended new cell (#1). [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: The output shows that we have a large dataset with 3580 rows and 43 columns. There's a significant amount of missing data in various columns. Let me continue with a more focused analysis on t [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Appended new cell (#2). [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: Now that we have confirmed the presence of crow-related data (Corvidae family) in our dataset, let's perform a more focused analysis. We found multiple crow species. Let's explore their brain [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Appended new cell (#3). [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: Based on the data analysis so far, I can see that we have good data on brain size for crows (97.3% available), decent data on body mass (69.9% available), but very limited data on metabolic r [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Appended new cell (#4). [Truncated]\n",
-      "Message: [Previous environment state - hidden] [Truncated]\n",
-      "Message: Thought: I can see the visualizations show some interesting patterns. Let me analyze these results more deeply and create additional visualizations to complete the analysis of crows' brain characteris [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Message: Observation: Appended new cell (#5). [Truncated]\n",
-      "Message: [{\"type\": \"image_url\", \"image_url\": {\"url\": \"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABJgAAAJACAYAAADB6dzIAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvl [Truncated]\n",
-      "Message: Thought: Let me now finalize our analysis by creating a comprehensive conclusion cell that summarizes our findings about crows. Based on this reasoning, let's select the appropriate tool!. Based on th [Truncated]\n",
-      "Message: Continue... [Truncated]\n",
-      "Action: Tool request message '' for tool calls: submit_answer(answer='Based on our comprehensive analysis of the brain_size_data.csv dataset with a focus on crows (Corvidae family), we can make the following key observations:\n",
-      "\n",
-      "1. DATA REPRESENTATION:\n",
-      "   - Corvid species are well-represented in the dataset with 73 entries (7.6% of all bird entries)\n",
-      "   - The dataset contains 68 unique corvid species including various crows, jays, magpies, and ravens\n",
-      "   - Data completeness for corvids is excellent for brain size (97.3%) and good for body mass (69.9%), but poor for metabolic rate (only 2.7%)\n",
-      "\n",
-      "2. BRAIN SIZE CHARACTERISTICS:\n",
-      "   - Corvids have significantly larger brains than other birds, with an average brain size of 5.68 grams compared to 3.76 grams for other birds\n",
-      "   - Corvids have approximately 1.51 times larger brains than the average bird\n",
-      "   - The largest-brained corvid species in the dataset is Corvus tristis at 10.3 grams\n",
-      "\n",
-      "3. BRAIN-TO-BODY MASS RATIO:\n",
-      "   - The average brain-to-body ratio for corvids (0.022826) is slightly lower than other birds (0.023965)\n",
-      "   - Corvids rank 21st out of 49 bird families in brain-to-body ratio (minimum 3 samples)\n",
-      "   - Within Corvidae, there is substantial variation in brain-to-body ratios, with the highest being Cyanopica cyanus (0.038571)\n",
-      "   - The top 5 corvid species by brain-to-body ratio are: Cyanopica cyanus, Aphelocoma coerulescens, Nucifraga caryocatactes, Cyanolyca viridicyanea, and Podoces hendersoni\n",
-      "\n",
-      "4. OVERALL FINDINGS:\n",
-      "   - While corvids don't have the highest brain-to-body ratio among birds, they do have larger absolute brain sizes\n",
-      "   - The log-log relationship between brain and body size shows corvids generally following the allometric scaling pattern seen in other birds\n",
-      "   - There is considerable variation among corvid species in both absolute brain size and relative brain size (brain-to-body ratio)\n",
-      "   - These findings align with scientific observations of complex behaviors, problem-solving abilities, and tool use in corvids\n",
-      "   - The data supports corvids' reputation for intelligence among birds, particularly in species like the New Caledonian crow (Corvus moneduloides)\n",
-      "\n",
-      "The analysis demonstrates that corvids possess notable neuroanatomical characteristics that likely support their documented cognitive abilities, though brain size alone is not a perfect predictor of intelligence.') [id=toolu_01K8uxfKUsGXFkcYtnSbMw18]\n",
-      "---\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# INSPECT THE RESULT\n",
     "trajectory = result[0]\n",
@@ -994,20 +167,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Encountered exception during tool call for tool edit_cell: ValueError(\"Error executing cell 0:\\nName: OSError\\nValue: 'seaborn-whitegrid' is not a valid package style, path of style file, URL of style file, or library style name (library styles are listed in `style.available`)\\n\")\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# VANILLA ROLLOUT - this is a simple version of the what the rollout Manager does\n",
-    "dataset_folder = Path(\"dataset\")\n",
+    "dataset_folder = Path(\"datasets/brain_size_data.csv\")\n",
     "query = \"Analyze the dataset and give me an in depth analysis using pretty plots. I am particularly interested in crows.\"\n",
     "environment = setup_data_analysis_env(query, dataset_folder)\n",
     "\n",
@@ -1056,8 +221,7 @@
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.7"
+   "pygments_lexer": "ipython3"
   }
  },
  "nbformat": 4,
diff --git a/tutorial/multi_agent_orchestration.ipynb b/tutorial/multi_agent_orchestration.ipynb
new file mode 100644
index 0000000..54059a9
--- /dev/null
+++ b/tutorial/multi_agent_orchestration.ipynb
@@ -0,0 +1,214 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this advanced tutorial, we show how to orchestrate a multi-agent, multi-step workflow. The workflow does the following:\n",
+    "1. Runs RNAseq DEA across 10 parallel Finch runs\n",
+    "2. Run a single meta-analysis (consensus) Finch run using all the outputs of step 1.\n",
+    "3. Run 10 parallel Crow runs on the top 10 differentially expressed genes from step 2.\n",
+    "4. Use Finch to create a volcano plot incorporating results from step 2 and step 3."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from fhda.tortoise import Tortoise, Step, StepConfig\n",
+    "from futurehouse_client import JobNames\n",
+    "import pandas as pd\n",
+    "import json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define our parameters\n",
+    "TREATMENT = \"dexamethasone\"\n",
+    "MECHANISM = \"airway smooth muscle cells\"\n",
+    "CONTEXT = \"asthma\"\n",
+    "N_TOP_GENES = 10\n",
+    "PARALLEL_DEA = 5\n",
+    "FH_API_KEY = \"\"  # Add your API key here\n",
+    "\n",
+    "# Define the prompts\n",
+    "DEA_PROMPT = \"\"\"\n",
+    "Determine the effect of {treatment} on {mechanism} in {context}. \n",
+    "\n",
+    "Perform differential expression analysis and pathway analysis on relevant comparison groups. Map all gene IDs to gene symbols using annotation package such as 'org.Hs.eg.db'.\n",
+    "\n",
+    "Generate volcano plots and heatmap of differentially expressed genes, and dot plots for enriched pathways, use gene symbols for labels where relevant.\n",
+    "\n",
+    "Output a single csv file named \"dea_results.csv\"  with the results for all tested genes of the most relevant contrast, report both gene ID and gene symbol.\n",
+    "\n",
+    "If there is an error, keep trying, do not give up until you reach the end of the analysis. When mapping gene ID to gene symbol, consider all possible forms of gene IDs, keep trying until the gene symbols are obtained.\n",
+    "\"\"\"\n",
+    "\n",
+    "CONSENSUS_PROMPT = f\"\"\"\n",
+    "Combine these differential expression analysis results by calculating the mode of log2FC and adjusted p values. Output the results in a file named 'consensus_results.csv', include the columns gene_symbol, log2FC and adjusted P values. In a separate file named 'top{N_TOP_GENES}_genes.csv', output the gene symbols of the consensus most significant genes with the column name \"gene_symbol\". \n",
+    "\n",
+    "Create a stacked bar plot showing gene regulation consistency across all analyses. Plot regulation direction (up vs down) on x-axis and percentage of genes in each category on y-axis. Color-code by significance category: all analyses, >50% of analyses and  <50% of analyses. Include percentages within each segment and a clear legend. Exclude genes that are non-significant across all analyses.\n",
+    "\"\"\"\n",
+    "\n",
+    "PQA_PROMPT = \"\"\"\n",
+    "What are the possible mechanisms for {gene} in the effect of {treatment} on {mechanism} in {context}?\n",
+    "From 1 to 5, with 1 being no evidence of association at all and 5 being strong association with supporting evidence, how strong is the evidence supporting this mechanism?\n",
+    "Give a concise summary for the evidence in up to 10 words, and a short summary of mechanisms in up to 20 words. Do not include references or links.\n",
+    "Please share this information in json format in the form of: `\"gene_symbol\": <gene_symbol>, \"association_evidence_score\":[1...5], \"evidence_summary\": <evidence_summary>, \"mechanism_summary\": <mechanism_summary>`.\n",
+    "Share nothing else but the JSON output.\n",
+    "\"\"\"\n",
+    "\n",
+    "VOLCANO_PROMPT = f\"\"\"\n",
+    "Make an interactive volcano plot. Colour-code by significance categories: top up-regulated genes, up-regulated genes, top down-regulated genes, down-regulated genes, and non-significant genes. Genes considered as top have extra annotation available in 'pqa_results.csv'.\n",
+    "\n",
+    "Include hover information according to the categories, for the top genes, on hover, show gene symbol, log2FC, adjusted p value, mechanism, evidence and evidence score. For up and down regulated genes that are not in top {N_TOP_GENES}, show gene symbol, log2FC and adjusted p value. For non-significant genes, do not include hover information.\n",
+    "\n",
+    "For the annotations, remove all text in the brackets in the summary columns, and remove the fullstop at the end. For annotations with 6 words or more in a line, use text-wrap. Don't include text on the plot itself. Include a legend explaining the color-codes.\n",
+    "\n",
+    "PLEASE USE TEXT WRAP FOR THE HOVER INFORMATION!\n",
+    "\"\"\"\n",
+    "\n",
+    "# Initialize Tortoise\n",
+    "tortoise = Tortoise(api_key=FH_API_KEY)\n",
+    "\n",
+    "OUTPUT_DIR = \"output\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Step 1: Differential Expression Analysis (DEA)\n",
+    "dea_step = Step(\n",
+    "    name=JobNames.FINCH,\n",
+    "    prompt_template=DEA_PROMPT,\n",
+    "    cot_prompt=True,\n",
+    "    prompt_args={\"treatment\": TREATMENT, \"mechanism\": MECHANISM, \"context\": CONTEXT},\n",
+    "    input_files={\n",
+    "        \"datasets/GSE52778_All_Sample_FPKM_Matrix.txt.gz\": \"GSE52778_series_matrix.txt.gz\"\n",
+    "    },\n",
+    "    output_files={\"dea_results.csv\": \"dea_results/dea_results.csv\"},\n",
+    "    parallel=PARALLEL_DEA,\n",
+    "    config=StepConfig(language=\"R\", max_steps=30, timeout=15 * 60),\n",
+    ")\n",
+    "tortoise.add_step(dea_step)\n",
+    "\n",
+    "# Step 2: Consensus Analysis\n",
+    "consensus_step = Step(\n",
+    "    name=JobNames.FINCH,\n",
+    "    prompt_template=CONSENSUS_PROMPT,\n",
+    "    cot_prompt=True,\n",
+    "    input_files={f\"{OUTPUT_DIR}/{dea_step.step_id}/dea_results\": \"dea_results/\"},\n",
+    "    output_files={\n",
+    "        \"consensus_results.csv\": \"consensus_results.csv\",\n",
+    "        f\"top{N_TOP_GENES}_genes.csv\": f\"top{N_TOP_GENES}_genes.csv\",\n",
+    "    },\n",
+    "    config=StepConfig(language=\"R\", max_steps=30, timeout=15 * 60),\n",
+    ")\n",
+    "tortoise.add_step(consensus_step)\n",
+    "\n",
+    "\n",
+    "# Step 3: Literature Search with PaperQA\n",
+    "def pqa_post_process(results, output_dir):\n",
+    "    \"\"\"Process the results from multiple PQA tasks\"\"\"\n",
+    "\n",
+    "    answer_list = []\n",
+    "    for task_response in results.get(\"task_responses\", []):\n",
+    "        try:\n",
+    "            answer = json.loads(task_response.answer)\n",
+    "            if isinstance(answer, list):\n",
+    "                answer = answer[0]\n",
+    "            answer_list.append(answer)\n",
+    "        except Exception as e:\n",
+    "            print(f\"Error parsing answer for task {task_response.task_id}: {e}\")\n",
+    "\n",
+    "    # Create DataFrame and save\n",
+    "    pqa_df = pd.DataFrame(answer_list)\n",
+    "    pqa_df.to_csv(f\"{output_dir}/pqa_results.csv\", index=False)\n",
+    "    return pqa_df\n",
+    "\n",
+    "\n",
+    "# Define a function to create multiple PQA prompts for genes\n",
+    "def pqa_prompt_generator():\n",
+    "    \"\"\"Generate PQA prompts for each top gene\"\"\"\n",
+    "    top_genes_df = pd.read_csv(\n",
+    "        f\"{OUTPUT_DIR}/{consensus_step.step_id}/top{N_TOP_GENES}_genes.csv\"\n",
+    "    )\n",
+    "    gene_symbols = top_genes_df[\"gene_symbol\"].tolist()\n",
+    "    prompt_pairs = []\n",
+    "    for gene in gene_symbols:\n",
+    "        prompt_pairs.append(\n",
+    "            (\n",
+    "                PQA_PROMPT,\n",
+    "                {\n",
+    "                    \"gene\": gene,\n",
+    "                    \"treatment\": TREATMENT,\n",
+    "                    \"mechanism\": MECHANISM,\n",
+    "                    \"context\": CONTEXT,\n",
+    "                },\n",
+    "            )\n",
+    "        )\n",
+    "    return prompt_pairs\n",
+    "\n",
+    "\n",
+    "# Read top genes and create PQA steps\n",
+    "pqa_step = Step(\n",
+    "    name=JobNames.CROW,\n",
+    "    prompt_template=PQA_PROMPT,\n",
+    "    prompt_generator=pqa_prompt_generator,\n",
+    "    parallel=N_TOP_GENES,  # Will process all top genes in parallel\n",
+    "    post_process=pqa_post_process,\n",
+    ")\n",
+    "tortoise.add_step(pqa_step)\n",
+    "\n",
+    "# Step 4: Visualization with Volcano Plot\n",
+    "volcano_step = Step(\n",
+    "    name=JobNames.FINCH,\n",
+    "    prompt_template=VOLCANO_PROMPT,\n",
+    "    cot_prompt=True,\n",
+    "    input_files={\n",
+    "        f\"{OUTPUT_DIR}/{consensus_step.step_id}/consensus_results.csv\": \"consensus_results.csv\",\n",
+    "        f\"{OUTPUT_DIR}/{pqa_step.step_id}/pqa_results.csv\": \"pqa_results.csv\",\n",
+    "    },\n",
+    "    config=StepConfig(language=\"PYTHON\", max_steps=30, timeout=15 * 60),\n",
+    ")\n",
+    "tortoise.add_step(volcano_step)\n",
+    "\n",
+    "# Run the pipeline\n",
+    "results = await tortoise.run_pipeline(OUTPUT_DIR)\n",
+    "print(\"Pipeline execution completed\")\n",
+    "print(\n",
+    "    f\"View the final volcano plot at: https://platform.futurehouse.org/trajectories/{tortoise.results[volcano_step.step_id]['task_ids'][0]}\"\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tutorial/platform_api.ipynb b/tutorial/platform_api.ipynb
new file mode 100644
index 0000000..c293091
--- /dev/null
+++ b/tutorial/platform_api.ipynb
@@ -0,0 +1,186 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Installation\n",
+    "\n",
+    "From the root of the repository, run:\n",
+    "\n",
+    "```bash\n",
+    "pip install -e .\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import time\n",
+    "\n",
+    "from futurehouse_client import FutureHouseClient, JobNames\n",
+    "from futurehouse_client.models import TaskRequest, RuntimeConfig\n",
+    "from futurehouse_client.models.app import AuthType\n",
+    "import fhda.prompts as prompts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate the FutureHouse client with your API key\n",
+    "FH_API_KEY = \"\"  # Add your API key here\n",
+    "UPLOAD_ID = (\n",
+    "    \"finch_tutorial\"  # This is the folder name of the dataset you uploaded to GCS\n",
+    ")\n",
+    "\n",
+    "client = FutureHouseClient(\n",
+    "    auth_type=AuthType.API_KEY,\n",
+    "    api_key=FH_API_KEY,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load your dataset – note you only have to do this once\n",
+    "# File path can be an absolute path or a relative path to either a directory or a file containing the dataset\n",
+    "client.upload_file(\n",
+    "    JobNames.FINCH, file_path=\"datasets/brain_size_data.csv\", upload_id=UPLOAD_ID\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Check what files were uploaded to your gcs folder\n",
+    "client.list_files(JobNames.FINCH, upload_id=UPLOAD_ID)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define your task\n",
+    "# Here is where you can update the prompt. As shown below, by default we use CoT prompting,\n",
+    "# but it is not necessary and we encourage users to experiment with different prompting strategies.\n",
+    "LANGUAGE = \"PYTHON\"  # Choose between \"R\" and \"PYTHON\"\n",
+    "MAX_STEPS = 30  # You can change this to impose a limit on the number of steps the agent can take\n",
+    "query = \"Make a short notebook with visualizations exploring the dataset.\"\n",
+    "\n",
+    "task = (\n",
+    "    f\"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=LANGUAGE)}\\n\"\n",
+    "    f\"{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=LANGUAGE)}\"\n",
+    "    f\"Here is the research question to address:\\n\"\n",
+    "    f\"<query>\\n\"\n",
+    "    f\"{query}\\n\"\n",
+    "    f\"</query>\\n\"\n",
+    ")\n",
+    "\n",
+    "# This is extra R prompting to avoid long R output blocks – also feel free to discard this\n",
+    "if LANGUAGE == \"R\":\n",
+    "    task += f\"\\n{prompts.R_SPECIFIC_GUIDELINES}\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This is how to create a task – you shouldn't need to change anything here\n",
+    "task_data = TaskRequest(\n",
+    "    name=JobNames.FINCH,\n",
+    "    query=task,\n",
+    "    runtime_config=RuntimeConfig(\n",
+    "        max_steps=MAX_STEPS,\n",
+    "        upload_id=UPLOAD_ID,\n",
+    "        environment_config={\n",
+    "            \"eval\": True,  # DO NOT CHANGE THIS\n",
+    "            \"language\": LANGUAGE,\n",
+    "        },\n",
+    "    ),\n",
+    ")\n",
+    "trajectory_id = client.create_task(task_data)\n",
+    "print(\n",
+    "    f\"Task running on platform, you can view progress live at:https://platform.futurehouse.org/trajectories/{trajectory_id}\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Jobs take on average 3-10 minutes to complete\n",
+    "status = \"in progress\"\n",
+    "while status in [\"in progress\", \"queued\"]:\n",
+    "    time.sleep(15)\n",
+    "    status = client.get_task(trajectory_id).status\n",
+    "\n",
+    "if status == \"failed\":\n",
+    "    raise Exception(\"Task failed\")\n",
+    "\n",
+    "job_result = client.get_task(trajectory_id, verbose=True)\n",
+    "answer = job_result.environment_frame[\"state\"][\"state\"][\"answer\"]\n",
+    "print(f\"The agent's answer to your research question is: \\n{answer}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# In addition to viewing the notebook and reasoning trace via the platform,\n",
+    "# you can also list the files in the trajectory directory and download any files you need\n",
+    "print(client.list_files(JobNames.FINCH, trajectory_id=trajectory_id))\n",
+    "\n",
+    "destination_path = \"output/notebook.ipynb\"\n",
+    "file_path = \"notebook.ipynb\"\n",
+    "client.download_file(\n",
+    "    JobNames.FINCH,\n",
+    "    trajectory_id=trajectory_id,\n",
+    "    file_path=file_path,\n",
+    "    destination_path=destination_path,\n",
+    ")\n",
+    "print(f\"Notebook saved to {os.path.abspath(destination_path)}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}