Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/code_checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
poetry env use '3.10'
source $(poetry env info --path)/bin/activate
poetry install --with test --all-extras
pre-commit run --all-files
SKIP=pytest pre-commit run --all-files

- name: Security audit
run: |
Expand Down
142 changes: 1 addition & 141 deletions tests/src/test_capability_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,10 @@
import json
import os
import shutil
from unittest.mock import MagicMock, patch

import pytest

from src.capability import Capability, CapabilitySeedDataset, _import_from_path
from src.utils import constants
from src.utils.capability_utils import extract_and_parse_response, run_inspect_evals
from src.utils.capability_utils import extract_and_parse_response


# Define a capability seed dataset configuration and create an object
Expand Down Expand Up @@ -481,140 +478,3 @@ def test__create_inspect_file_w_judge():
print(f"Permission error: {e}")
else:
raise e


def test_run_inspect_evals_success():
"""
Test the `run_inspect_evals` function for a successful evaluation.

This test verifies that the function correctly runs the inspect evals
command and processes the results when the evaluation is successful.
"""
path = "capabilities/math/algebra"
model = MagicMock()
model.get_model_name.side_effect = (
lambda with_provider: "local-model" if with_provider else "model"
)
model.model_provider = "local"
model.model_url = "http://localhost:8000"
log_dir = "logs"

mock_eval_log = MagicMock()
mock_eval_log.status = "success"
mock_eval_log.samples = [{}]
mock_eval_log.stats.model_usage = {
"openai-model": MagicMock(
input_tokens=100,
output_tokens=50,
total_tokens=150,
reasoning_tokens=30,
)
}

with (
patch("src.utils.capability_utils.inspect_eval", return_value=[mock_eval_log]),
patch(
"src.utils.capability_utils.traceable",
side_effect=lambda *args, **kwargs: lambda f: f,
),
):
run_inspect_evals(path, model, log_dir)

model.get_model_name.assert_any_call(with_provider=True)
model.get_model_name.assert_any_call(with_provider=False)


def test_run_inspect_evals_error():
"""
Test the `run_inspect_evals` function for an evaluation error.

This test verifies that the function raises a ValueError when the
evaluation fails with an error status.
"""
path = "capabilities/math/algebra"
model = MagicMock()
model.get_model_name.side_effect = (
lambda with_provider: "openai-model" if with_provider else "model"
)
model.model_provider = "openai"
log_dir = "logs"

mock_eval_log = MagicMock()
mock_eval_log.status = "error"
mock_eval_log.error = "Evaluation failed"

with (
patch("src.utils.capability_utils.inspect_eval", return_value=[mock_eval_log]),
patch(
"src.utils.capability_utils.traceable",
side_effect=lambda *args, **kwargs: lambda f: f,
),
pytest.raises(ValueError, match="Error running inspect evals"),
):
run_inspect_evals(path, model, log_dir)


def test_run_inspect_evals_local_model():
"""
Test the `run_inspect_evals` function for a local model.

This test verifies that the function correctly sets and resets the
`OPENAI_BASE_URL` environment variable when using a local model.
"""
path = "capabilities/math/algebra"
model = MagicMock()
model.get_model_name.side_effect = (
lambda with_provider: "local-model" if with_provider else "model"
)
model.model_provider = "local"
model.model_url = "http://localhost:8000"
log_dir = "logs"

mock_eval_log = MagicMock()
mock_eval_log.status = "success"
mock_eval_log.samples = [{}]
mock_eval_log.stats.model_usage = {}

with (
patch("src.utils.capability_utils.inspect_eval", return_value=[mock_eval_log]),
patch(
"src.utils.capability_utils.traceable",
side_effect=lambda *args, **kwargs: lambda f: f,
),
patch.dict(
"os.environ", {"ORIGINAL_OPENAI_BASE_URL": "https://api.openai.com"}
),
):
run_inspect_evals(path, model, log_dir)
assert os.environ["OPENAI_BASE_URL"] == "https://api.openai.com"


def test_run_inspect_evals_with_kwargs():
"""
Test the `run_inspect_evals` function with additional keyword arguments.

This test verifies that the function correctly processes additional
keyword arguments and includes them in the metadata.
"""
path = "capabilities/math/algebra"
model = MagicMock()
model.get_model_name.side_effect = (
lambda with_provider: "openai-model" if with_provider else "model"
)
model.model_provider = "openai"
log_dir = "logs"
kwargs = {"temperature": 0.7, "max_tokens": 100}

mock_eval_log = MagicMock()
mock_eval_log.status = "success"
mock_eval_log.samples = [{}]
mock_eval_log.stats.model_usage = {}

with (
patch("src.utils.capability_utils.inspect_eval", return_value=[mock_eval_log]),
patch(
"src.utils.capability_utils.traceable",
side_effect=lambda *args, **kwargs: lambda f: f,
),
):
run_inspect_evals(path, model, log_dir, **kwargs)
Loading