Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 206 additions & 0 deletions src/bedrock_agentcore/evaluation/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from botocore.config import Config
from pydantic import BaseModel

from bedrock_agentcore._utils.config import WaitConfig
from bedrock_agentcore._utils.polling import wait_until, wait_until_deleted
from bedrock_agentcore._utils.snake_case import accept_snake_case_kwargs, convert_kwargs
from bedrock_agentcore._utils.user_agent import build_user_agent_suffix
from bedrock_agentcore.evaluation.agent_span_collector import CloudWatchAgentSpanCollector

Expand All @@ -17,6 +20,9 @@
QUERY_TIMEOUT_SECONDS = 60
POLL_INTERVAL_SECONDS = 2

_EVALUATOR_FAILED_STATUSES = {"CREATE_FAILED", "UPDATE_FAILED"}
_ONLINE_EVAL_FAILED_STATUSES = {"CREATE_FAILED", "UPDATE_FAILED"}


class ReferenceInputs(BaseModel):
"""Ground truth inputs for evaluation.
Expand Down Expand Up @@ -92,6 +98,46 @@ def __init__(

logger.info("Initialized EvaluationClient in region %s", self.region_name)

# Pass-through
# -------------------------------------------------------------------------
_ALLOWED_DP_METHODS = {
"evaluate",
}

_ALLOWED_CP_METHODS = {
# Evaluator CRUD
"create_evaluator",
"get_evaluator",
"list_evaluators",
"update_evaluator",
"delete_evaluator",
# Online evaluation config CRUD
"create_online_evaluation_config",
"get_online_evaluation_config",
"list_online_evaluation_configs",
"update_online_evaluation_config",
"delete_online_evaluation_config",
}

def __getattr__(self, name: str):
"""Dynamically forward allowlisted method calls to the appropriate boto3 client."""
if name in self._ALLOWED_DP_METHODS and hasattr(self._dp_client, name):
method = getattr(self._dp_client, name)
logger.debug("Forwarding method '%s' to _dp_client", name)
return accept_snake_case_kwargs(method)

if name in self._ALLOWED_CP_METHODS and hasattr(self._cp_client, name):
method = getattr(self._cp_client, name)
logger.debug("Forwarding method '%s' to _cp_client", name)
return accept_snake_case_kwargs(method)

raise AttributeError(
f"'{self.__class__.__name__}' object has no attribute '{name}'. "
f"Method not found on data plane or control plane client. "
f"Available methods can be found in the boto3 documentation for "
f"'bedrock-agentcore' and 'bedrock-agentcore-control' services."
)

def run(
self,
evaluator_ids: List[str],
Expand Down Expand Up @@ -349,3 +395,163 @@ def _build_reference_inputs(
)

return result

# *_and_wait methods
# -------------------------------------------------------------------------
def create_evaluator_and_wait(
self,
wait_config: Optional[WaitConfig] = None,
**kwargs,
) -> Dict[str, Any]:
"""Create an evaluator and wait for it to reach ACTIVE status.

Args:
wait_config: Optional WaitConfig for polling behavior.
**kwargs: Arguments forwarded to the create_evaluator API.

Returns:
Evaluator details when ACTIVE.

Raises:
RuntimeError: If the evaluator reaches a failed state.
TimeoutError: If the evaluator doesn't become ACTIVE within max_wait.
"""
response = self._cp_client.create_evaluator(**convert_kwargs(kwargs))
eid = response["evaluatorId"]
return wait_until(
lambda: self._cp_client.get_evaluator(evaluatorId=eid),
"ACTIVE",
_EVALUATOR_FAILED_STATUSES,
wait_config,
)

def update_evaluator_and_wait(
self,
wait_config: Optional[WaitConfig] = None,
**kwargs,
) -> Dict[str, Any]:
"""Update an evaluator and wait for it to reach ACTIVE status.

Args:
wait_config: Optional WaitConfig for polling behavior.
**kwargs: Arguments forwarded to the update_evaluator API.

Returns:
Evaluator details when ACTIVE.

Raises:
RuntimeError: If the evaluator reaches a failed state.
TimeoutError: If the evaluator doesn't become ACTIVE within max_wait.
"""
response = self._cp_client.update_evaluator(**convert_kwargs(kwargs))
eid = response["evaluatorId"]
return wait_until(
lambda: self._cp_client.get_evaluator(evaluatorId=eid),
"ACTIVE",
_EVALUATOR_FAILED_STATUSES,
wait_config,
)

def create_online_evaluation_config_and_wait(
self,
wait_config: Optional[WaitConfig] = None,
**kwargs,
) -> Dict[str, Any]:
"""Create an online evaluation config and wait for ACTIVE status.

Args:
wait_config: Optional WaitConfig for polling behavior.
**kwargs: Arguments forwarded to the create_online_evaluation_config API.

Returns:
Online evaluation config details when ACTIVE.

Raises:
RuntimeError: If the config reaches a failed state.
TimeoutError: If the config doesn't become ACTIVE within max_wait.
"""
response = self._cp_client.create_online_evaluation_config(**convert_kwargs(kwargs))
cid = response["onlineEvaluationConfigId"]
return wait_until(
lambda: self._cp_client.get_online_evaluation_config(
onlineEvaluationConfigId=cid,
),
"ACTIVE",
_ONLINE_EVAL_FAILED_STATUSES,
wait_config,
error_field="failureReason",
)

def update_online_evaluation_config_and_wait(
self,
wait_config: Optional[WaitConfig] = None,
**kwargs,
) -> Dict[str, Any]:
"""Update an online evaluation config and wait for ACTIVE status.

Args:
wait_config: Optional WaitConfig for polling behavior.
**kwargs: Arguments forwarded to the update_online_evaluation_config API.

Returns:
Online evaluation config details when ACTIVE.

Raises:
RuntimeError: If the config reaches a failed state.
TimeoutError: If the config doesn't become ACTIVE within max_wait.
"""
response = self._cp_client.update_online_evaluation_config(**convert_kwargs(kwargs))
cid = response["onlineEvaluationConfigId"]
return wait_until(
lambda: self._cp_client.get_online_evaluation_config(
onlineEvaluationConfigId=cid,
),
"ACTIVE",
_ONLINE_EVAL_FAILED_STATUSES,
wait_config,
error_field="failureReason",
)

def delete_evaluator_and_wait(
self,
wait_config: Optional[WaitConfig] = None,
**kwargs,
) -> None:
"""Delete an evaluator and wait for deletion to complete.

Args:
wait_config: Optional WaitConfig for polling behavior.
**kwargs: Arguments forwarded to the delete_evaluator API.

Raises:
TimeoutError: If the evaluator isn't deleted within max_wait.
"""
response = self._cp_client.delete_evaluator(**convert_kwargs(kwargs))
eid = response["evaluatorId"]
wait_until_deleted(
lambda: self._cp_client.get_evaluator(evaluatorId=eid),
wait_config=wait_config,
)

def delete_online_evaluation_config_and_wait(
self,
wait_config: Optional[WaitConfig] = None,
**kwargs,
) -> None:
"""Delete an online evaluation config and wait for deletion to complete.

Args:
wait_config: Optional WaitConfig for polling behavior.
**kwargs: Arguments forwarded to the delete_online_evaluation_config API.

Raises:
TimeoutError: If the config isn't deleted within max_wait.
"""
response = self._cp_client.delete_online_evaluation_config(**convert_kwargs(kwargs))
cid = response["onlineEvaluationConfigId"]
wait_until_deleted(
lambda: self._cp_client.get_online_evaluation_config(
onlineEvaluationConfigId=cid,
),
wait_config=wait_config,
)
155 changes: 155 additions & 0 deletions tests/bedrock_agentcore/evaluation/test_client_passthrough.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
"""Tests for EvaluationClient passthrough and *_and_wait methods."""

from unittest.mock import Mock, patch

import pytest
from botocore.exceptions import ClientError

from bedrock_agentcore.evaluation.client import EvaluationClient


class TestEvaluationClientPassthrough:
"""Tests for __getattr__ passthrough."""

def _make_client(self):
with patch("boto3.client"):
client = EvaluationClient(region_name="us-west-2")
client._cp_client = Mock()
client._dp_client = Mock()
return client

def test_cp_method_forwarded(self):
client = self._make_client()
client._cp_client.get_evaluator.return_value = {"evaluatorId": "e-123"}
result = client.get_evaluator(evaluatorId="e-123")
client._cp_client.get_evaluator.assert_called_once_with(evaluatorId="e-123")
assert result["evaluatorId"] == "e-123"

def test_dp_method_forwarded(self):
client = self._make_client()
client._dp_client.evaluate.return_value = {"evaluationResults": []}
result = client.evaluate(evaluatorId="e-123")
client._dp_client.evaluate.assert_called_once_with(evaluatorId="e-123")
assert result["evaluationResults"] == []

def test_snake_case_kwargs_converted(self):
client = self._make_client()
client._cp_client.get_evaluator.return_value = {"evaluatorId": "e-123"}
client.get_evaluator(evaluator_id="e-123")
client._cp_client.get_evaluator.assert_called_once_with(evaluatorId="e-123")

def test_non_allowlisted_method_raises(self):
client = self._make_client()
with pytest.raises(AttributeError, match="has no attribute"):
client.not_a_real_method()

def test_all_cp_methods_in_allowlist(self):
expected = {
"create_evaluator",
"get_evaluator",
"list_evaluators",
"update_evaluator",
"delete_evaluator",
"create_online_evaluation_config",
"get_online_evaluation_config",
"list_online_evaluation_configs",
"update_online_evaluation_config",
"delete_online_evaluation_config",
}
assert expected == EvaluationClient._ALLOWED_CP_METHODS

def test_all_dp_methods_in_allowlist(self):
expected = {"evaluate"}
assert expected == EvaluationClient._ALLOWED_DP_METHODS


class TestEvaluationAndWait:
"""Tests for *_and_wait methods."""

def _make_client(self):
with patch("boto3.client"):
client = EvaluationClient(region_name="us-west-2")
client._cp_client = Mock()
client._dp_client = Mock()
return client

def test_create_evaluator_and_wait(self):
client = self._make_client()
client._cp_client.create_evaluator.return_value = {"evaluatorId": "e-123"}
client._cp_client.get_evaluator.return_value = {
"status": "ACTIVE",
"evaluatorId": "e-123",
}
result = client.create_evaluator_and_wait(evaluatorName="test")
assert result["status"] == "ACTIVE"

def test_create_evaluator_and_wait_failed(self):
client = self._make_client()
client._cp_client.create_evaluator.return_value = {"evaluatorId": "e-123"}
client._cp_client.get_evaluator.return_value = {"status": "CREATE_FAILED"}
with pytest.raises(RuntimeError, match="CREATE_FAILED"):
client.create_evaluator_and_wait(evaluatorName="test")

def test_update_evaluator_and_wait(self):
client = self._make_client()
client._cp_client.update_evaluator.return_value = {"evaluatorId": "e-123"}
client._cp_client.get_evaluator.return_value = {
"status": "ACTIVE",
"evaluatorId": "e-123",
}
result = client.update_evaluator_and_wait(evaluatorId="e-123")
assert result["status"] == "ACTIVE"

def test_create_online_eval_config_and_wait(self):
client = self._make_client()
client._cp_client.create_online_evaluation_config.return_value = {
"onlineEvaluationConfigId": "c-123",
}
client._cp_client.get_online_evaluation_config.return_value = {
"status": "ACTIVE",
"onlineEvaluationConfigId": "c-123",
}
result = client.create_online_evaluation_config_and_wait(
onlineEvaluationConfigName="test",
)
assert result["status"] == "ACTIVE"

def test_update_online_eval_config_and_wait(self):
client = self._make_client()
client._cp_client.update_online_evaluation_config.return_value = {
"onlineEvaluationConfigId": "c-123",
}
client._cp_client.get_online_evaluation_config.return_value = {
"status": "ACTIVE",
"onlineEvaluationConfigId": "c-123",
}
result = client.update_online_evaluation_config_and_wait(
onlineEvaluationConfigId="c-123",
)
assert result["status"] == "ACTIVE"

@patch("bedrock_agentcore._utils.polling.time.sleep")
def test_delete_evaluator_and_wait(self, _mock_sleep):
client = self._make_client()
client._cp_client.delete_evaluator.return_value = {"evaluatorId": "e-123"}
client._cp_client.get_evaluator.side_effect = ClientError(
{"Error": {"Code": "ResourceNotFoundException", "Message": "gone"}},
"GetEvaluator",
)
client.delete_evaluator_and_wait(evaluatorId="e-123")
client._cp_client.delete_evaluator.assert_called_once()

@patch("bedrock_agentcore._utils.polling.time.sleep")
def test_delete_online_eval_config_and_wait(self, _mock_sleep):
client = self._make_client()
client._cp_client.delete_online_evaluation_config.return_value = {
"onlineEvaluationConfigId": "c-123",
}
client._cp_client.get_online_evaluation_config.side_effect = ClientError(
{"Error": {"Code": "ResourceNotFoundException", "Message": "gone"}},
"GetOnlineEvaluationConfig",
)
client.delete_online_evaluation_config_and_wait(
onlineEvaluationConfigId="c-123",
)
client._cp_client.delete_online_evaluation_config.assert_called_once()
Loading
Loading