Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions eval_protocol/adapters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,19 @@
__all__.extend(["WeaveAdapter"])
except ImportError:
pass

# DataFrame adapter (pandas integration for Lilac, etc.)
try:
from .dataframe import (
evaluation_rows_to_dataframe,
dataframe_to_evaluation_rows,
)

__all__.extend(
[
"evaluation_rows_to_dataframe",
"dataframe_to_evaluation_rows",
]
)
except ImportError:
pass
66 changes: 66 additions & 0 deletions eval_protocol/adapters/dataframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""
Pandas DataFrame adapter for Eval Protocol.

This module provides utilities for converting between EvaluationRow format
and pandas DataFrame format, enabling integration with data curation tools
such as Lilac, Great Expectations, or any pandas-based workflow.

Example usage:
>>> from eval_protocol.adapters.dataframe import (
... evaluation_rows_to_dataframe,
... dataframe_to_evaluation_rows,
... )
>>>
>>> # Convert EvaluationRows to DataFrame
>>> df = evaluation_rows_to_dataframe(rows)
>>>
>>> # Convert back to EvaluationRows
>>> rows = dataframe_to_evaluation_rows(df)
"""

from __future__ import annotations

import logging

import pandas as pd

from ..models import EvaluationRow

logger = logging.getLogger(__name__)


def evaluation_rows_to_dataframe(rows: list[EvaluationRow]) -> pd.DataFrame:
"""Convert EvaluationRows to a pandas DataFrame.

Uses EvaluationRow.to_dict() for serialization.

Args:
rows: List of EvaluationRow objects

Returns:
DataFrame with 'data_json' containing serialized rows plus convenience fields
"""
records = [row.to_dict() for row in rows]
return pd.DataFrame(records)


def dataframe_to_evaluation_rows(df: pd.DataFrame) -> list[EvaluationRow]:
"""Convert a pandas DataFrame back to EvaluationRows.

Uses EvaluationRow.from_dict() for deserialization.

Args:
df: DataFrame with 'data_json' column containing serialized EvaluationRows

Returns:
List of EvaluationRow objects
"""
rows = []
for _, row_data in df.iterrows():
try:
row = EvaluationRow.from_dict(row_data.to_dict())
rows.append(row)
except Exception as e:
logger.warning(f"Failed to convert row: {e}")
continue
return rows
43 changes: 43 additions & 0 deletions eval_protocol/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -953,6 +953,49 @@ def get_termination_reason(self) -> str:
return str(reason)
return "unknown"

def to_dict(self) -> Dict[str, Any]:
"""Serialize this EvaluationRow to a dictionary.

The entire EvaluationRow is serialized to JSON, allowing full reconstruction.
Additional scalar fields are included for convenient filtering/grouping.

Returns:
Dictionary with 'data_json' containing the full serialized row,
plus convenience fields for filtering.
"""
return {
"data_json": self.model_dump_json(),
"row_id": self.input_metadata.row_id if self.input_metadata else None,
"score": self.evaluation_result.score if self.evaluation_result else None,
"message_count": len(self.messages),
"has_tools": bool(self.tools),
"created_at": self.created_at.isoformat() if self.created_at else None,
}

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "EvaluationRow":
"""Reconstruct an EvaluationRow from a dictionary.

Args:
data: Dictionary containing 'data_json' with the serialized EvaluationRow.

Returns:
Reconstructed EvaluationRow instance.

Raises:
ValueError: If 'data_json' is missing or invalid.
"""
from pydantic import ValidationError

data_json = data.get("data_json")
if not data_json:
raise ValueError("Missing 'data_json' field in dictionary")

try:
return cls.model_validate_json(data_json)
except ValidationError as e:
raise ValueError(f"Failed to deserialize EvaluationRow: {e}") from e

def __hash__(self) -> int:
# Use a stable hash that works across Python processes
return self._stable_hash()
Expand Down