Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions backend/app/api/v1/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ class SearchRequest(BaseModel):
)
embedding_model: str = Field(
default="text-embedding-3-small",
description="Embedding model: text-embedding-ada-002, text-embedding-3-small, text-embedding-3-large",
description="Embedding model: text-embedding-3-small",
)
top_n: int = Field(default=10, ge=1, le=100, description="Number of results")
temperature: float = Field(default=0.1, ge=0, le=2, description="LLM temperature")
Expand All @@ -141,9 +141,7 @@ def validate_algorithm(cls, v: str) -> str:
def validate_model(cls, v: str) -> str:
"""Validate embedding model."""
allowed = [
"text-embedding-ada-002",
"text-embedding-3-small",
"text-embedding-3-large",
]
if v not in allowed:
raise ValueError(f"Model must be one of: {', '.join(allowed)}")
Expand Down
2 changes: 0 additions & 2 deletions backend/app/domain/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,7 @@ class EventDetail:
count: int
json_data: str
summary: str | None = None
summary_embedding_ada_002: list[float] | None = None
summary_embedding_t3_small: list[float] | None = None
summary_embedding_t3_large: list[float] | None = None
summary_embedding_e5: list[float] | None = None

@property
Expand Down
2 changes: 0 additions & 2 deletions backend/app/repositories/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,9 +545,7 @@ def search_by_embedding(
"""
# Map embedding model to column name
embedding_column_map = {
EmbeddingModel.ADA_002: "summary_embedding_ada_002",
EmbeddingModel.T3_SMALL: "summary_embedding_t3_small",
EmbeddingModel.T3_LARGE: "summary_embedding_t3_large",
}

# Map search algorithm to pgvector operator
Expand Down
1 change: 0 additions & 1 deletion backend/app/repositories/sqlserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,6 @@ def search_by_embedding(
Note: SQL Server uses VECTOR_DISTANCE function for similarity search.
"""
embedding_column_map = {
EmbeddingModel.ADA_002: "embedding_ada_002",
EmbeddingModel.T3_SMALL: "embedding_3_small",
}

Expand Down
33 changes: 12 additions & 21 deletions backend/app/services/ingestion_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,9 +398,7 @@ def get_embeddings_status(self, source: str) -> dict[str, Any]:
cur.execute(
"""
SELECT COUNT(*),
SUM(CASE WHEN summary_embedding_ada_002 IS NOT NULL THEN 1 ELSE 0 END),
SUM(CASE WHEN summary_embedding_t3_small IS NOT NULL THEN 1 ELSE 0 END),
SUM(CASE WHEN summary_embedding_t3_large IS NOT NULL THEN 1 ELSE 0 END),
SUM(CASE WHEN embedding_status = 'done' THEN 1 ELSE 0 END),
SUM(CASE WHEN embedding_status = 'error' THEN 1 ELSE 0 END),
SUM(CASE WHEN embedding_status = 'pending' OR embedding_status IS NULL THEN 1 ELSE 0 END)
Expand All @@ -413,21 +411,18 @@ def get_embeddings_status(self, source: str) -> dict[str, Any]:
"table": "events_details__quarter_minute",
"total_rows": int(row[0] or 0),
"coverage": {
"text-embedding-ada-002": int(row[1] or 0),
"text-embedding-3-small": int(row[2] or 0),
"text-embedding-3-large": int(row[3] or 0),
"text-embedding-3-small": int(row[1] or 0),
},
"status": {
"done": int(row[4] or 0),
"error": int(row[5] or 0),
"pending": int(row[6] or 0),
"done": int(row[2] or 0),
"error": int(row[3] or 0),
"pending": int(row[4] or 0),
},
}

cur.execute(
"""
SELECT COUNT(*),
SUM(CASE WHEN embedding_ada_002 IS NOT NULL THEN 1 ELSE 0 END),
SUM(CASE WHEN embedding_3_small IS NOT NULL THEN 1 ELSE 0 END),
SUM(CASE WHEN embedding_status = 'done' THEN 1 ELSE 0 END),
SUM(CASE WHEN embedding_status = 'error' THEN 1 ELSE 0 END),
Expand All @@ -441,13 +436,12 @@ def get_embeddings_status(self, source: str) -> dict[str, Any]:
"table": "events_details__15secs_agg",
"total_rows": int(row[0] or 0),
"coverage": {
"text-embedding-ada-002": int(row[1] or 0),
"text-embedding-3-small": int(row[2] or 0),
"text-embedding-3-small": int(row[1] or 0),
},
"status": {
"done": int(row[3] or 0),
"error": int(row[4] or 0),
"pending": int(row[5] or 0),
"done": int(row[2] or 0),
"error": int(row[3] or 0),
"pending": int(row[4] or 0),
},
}

Expand Down Expand Up @@ -1002,10 +996,10 @@ def _build_aggregations(self, conn, source: str, match_ids: list[int]) -> int:
cur.execute(
f"""
INSERT INTO events_details__15secs_agg (
match_id, period, minute, _15secs, count, json_, summary, embedding_3_small, embedding_ada_002
match_id, period, minute, _15secs, count, json_, summary, embedding_3_small
)
SELECT match_id, ISNULL(period,0), ISNULL(minute,0), (ISNULL(second,0)/15)+1,
COUNT(*), STRING_AGG(CAST(ISNULL(json_, '') AS NVARCHAR(MAX)), ', '), NULL, NULL, NULL
COUNT(*), STRING_AGG(CAST(ISNULL(json_, '') AS NVARCHAR(MAX)), ', '), NULL, NULL
FROM events_details
WHERE match_id IN ({placeholders})
GROUP BY match_id, ISNULL(period,0), ISNULL(minute,0), (ISNULL(second,0)/15)+1
Expand All @@ -1017,10 +1011,10 @@ def _build_aggregations(self, conn, source: str, match_ids: list[int]) -> int:
cur.execute(
"""
INSERT INTO events_details__15secs_agg (
match_id, period, minute, _15secs, count, json_, summary, embedding_3_small, embedding_ada_002
match_id, period, minute, _15secs, count, json_, summary, embedding_3_small
)
SELECT match_id, ISNULL(period,0), ISNULL(minute,0), (ISNULL(second,0)/15)+1,
COUNT(*), STRING_AGG(CAST(ISNULL(json_, '') AS NVARCHAR(MAX)), ', '), NULL, NULL, NULL
COUNT(*), STRING_AGG(CAST(ISNULL(json_, '') AS NVARCHAR(MAX)), ', '), NULL, NULL
FROM events_details
GROUP BY match_id, ISNULL(period,0), ISNULL(minute,0), (ISNULL(second,0)/15)+1
"""
Expand All @@ -1041,9 +1035,7 @@ def _update_embeddings_for_row(
try:
if source == "postgres":
model_cols = {
"text-embedding-ada-002": "summary_embedding_ada_002",
"text-embedding-3-small": "summary_embedding_t3_small",
"text-embedding-3-large": "summary_embedding_t3_large",
}
for model in models:
col = model_cols.get(model)
Expand All @@ -1062,7 +1054,6 @@ def _update_embeddings_for_row(
return

model_cols = {
"text-embedding-ada-002": "embedding_ada_002",
"text-embedding-3-small": "embedding_3_small",
}
for model in models:
Expand Down
4 changes: 2 additions & 2 deletions backend/tests/api/test_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,12 @@ def test_postgres_rejects_deprecated_t3_large(self, client):
payload = {**VALID_PAYLOAD, "embedding_model": "text-embedding-3-large",
"search_algorithm": "cosine"}
response = client.post("/api/v1/chat/search?source=postgres", json=payload)
assert response.status_code == 400
assert response.status_code == 422

def test_sqlserver_rejects_deprecated_t3_large(self, client):
payload = {**VALID_PAYLOAD, "embedding_model": "text-embedding-3-large"}
response = client.post("/api/v1/chat/search?source=sqlserver", json=payload)
assert response.status_code == 400
assert response.status_code == 422

def test_postgres_supports_l1_manhattan(self, client):
payload = {**VALID_PAYLOAD, "search_algorithm": "l1_manhattan"}
Expand Down
2 changes: 1 addition & 1 deletion backend/tests/api/test_explorer_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def test_get_tables_info_valid_request_returns_200(self, client, mock_explorer_s

def test_table_info_fields_present(self, client, mock_explorer_svc):
mock_explorer_svc.get_tables_info.return_value = [
{"table": "matches", "row_count": 10, "embedding_columns": ["summary_embedding_ada_002"]},
{"table": "matches", "row_count": 10, "embedding_columns": ["summary_embedding_t3_small"]},
]
data = client.get("/api/v1/tables-info").json()
assert "table" in data[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def test_get_tables_info_returns_multiple_tables(self):
mock_repo = MagicMock(spec=MatchRepository)
mock_repo.get_tables_info.return_value = [
{"table": "matches", "row_count": 42, "embedding_columns": []},
{"table": "events_details__quarter_minute", "row_count": 1000, "embedding_columns": ["summary_embedding_ada_002"]},
{"table": "events_details__quarter_minute", "row_count": 1000, "embedding_columns": ["summary_embedding_t3_small"]},
]

svc = DataExplorerService(match_repo=mock_repo)
Expand Down
3 changes: 1 addition & 2 deletions backend/tests/unit/test_domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,6 @@ def test_time_description_first_quarter(self):
def test_optional_embeddings_default_none(self):
e = self._make_event()
assert e.summary is None
assert e.summary_embedding_ada_002 is None
assert e.summary_embedding_t3_small is None


Expand Down Expand Up @@ -231,7 +230,7 @@ def test_all_valid_algorithms(self):
assert r.search_algorithm == algo

def test_all_valid_models(self):
for model in ["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]:
for model in ["text-embedding-3-small"]:
r = ApiSearchRequest(match_id=1, query="q", embedding_model=model)
assert r.embedding_model == model

Expand Down
33 changes: 7 additions & 26 deletions backend/tests/unit/test_postgres_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ def test_search_by_embedding_returns_results(self, mock_connect):
req = SearchRequest(
match_id=3943043,
query="who scored?",
embedding_model=EmbeddingModel.ADA_002,
embedding_model=EmbeddingModel.T3_SMALL,
search_algorithm=SearchAlgorithm.COSINE,
top_n=3,
)
Expand Down Expand Up @@ -488,8 +488,7 @@ def test_search_by_embedding_empty_rows(self, mock_connect):

assert results == []

@patch("app.repositories.postgres.psycopg2.connect")
def test_search_by_embedding_t3_large(self, mock_connect):
def test_search_by_embedding_deprecated_t3_large_raises(self):
from app.domain.entities import EmbeddingModel, SearchAlgorithm, SearchRequest

req = SearchRequest(
Expand All @@ -499,27 +498,9 @@ def test_search_by_embedding_t3_large(self, mock_connect):
search_algorithm=SearchAlgorithm.INNER_PRODUCT,
top_n=2,
)
row1 = _event_row()
row1["similarity_score"] = 0.05
row2 = _event_row()
row2["id"] = 1002
row2["similarity_score"] = 0.08

repo = PostgresEventRepository()
with patch.object(repo, "get_connection") as mock_gc:
conn = MagicMock()
cursor = MagicMock()
cursor.fetchall.return_value = [row1, row2]
conn.cursor.return_value.__enter__ = MagicMock(return_value=cursor)
conn.cursor.return_value.__exit__ = MagicMock(return_value=False)
mock_gc.return_value.__enter__ = MagicMock(return_value=conn)
mock_gc.return_value.__exit__ = MagicMock(return_value=False)

results = repo.search_by_embedding(req, query_embedding=[0.1] * 1536)

assert len(results) == 2
assert results[0].rank == 1
assert results[1].rank == 2
with pytest.raises(ValueError, match="Unsupported embedding model"):
repo.search_by_embedding(req, query_embedding=[0.1] * 1536)

@patch("app.repositories.postgres.psycopg2.connect")
def test_search_by_embedding_l1_manhattan(self, mock_connect):
Expand All @@ -528,7 +509,7 @@ def test_search_by_embedding_l1_manhattan(self, mock_connect):
req = SearchRequest(
match_id=1,
query="test",
embedding_model=EmbeddingModel.ADA_002,
embedding_model=EmbeddingModel.T3_SMALL,
search_algorithm=SearchAlgorithm.L1_MANHATTAN,
top_n=1,
)
Expand Down Expand Up @@ -571,7 +552,7 @@ def test_search_by_embedding_invalid_algorithm_raises(self):
from app.domain.entities import EmbeddingModel, SearchRequest

req_mock = MagicMock()
req_mock.embedding_model = EmbeddingModel.ADA_002
req_mock.embedding_model = EmbeddingModel.T3_SMALL
req_mock.search_algorithm = "bad_algo"
req_mock.match_id = 1
req_mock.top_n = 5
Expand Down Expand Up @@ -629,7 +610,7 @@ def test_search_by_embedding_propagates_exception(self):
req = SearchRequest(
match_id=1,
query="test",
embedding_model=EmbeddingModel.ADA_002,
embedding_model=EmbeddingModel.T3_SMALL,
search_algorithm=SearchAlgorithm.COSINE,
top_n=5,
)
Expand Down
8 changes: 4 additions & 4 deletions backend/tests/unit/test_sqlserver_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ def test_search_by_embedding_returns_results(self, mock_connect):
req = SearchRequest(
match_id=3943043,
query="who scored?",
embedding_model=EmbeddingModel.ADA_002,
embedding_model=EmbeddingModel.T3_SMALL,
search_algorithm=SearchAlgorithm.COSINE,
top_n=3,
)
Expand Down Expand Up @@ -448,7 +448,7 @@ def test_search_by_embedding_inner_product(self, mock_connect):
req = SearchRequest(
match_id=1,
query="test",
embedding_model=EmbeddingModel.ADA_002,
embedding_model=EmbeddingModel.T3_SMALL,
search_algorithm=SearchAlgorithm.INNER_PRODUCT,
top_n=2,
)
Expand Down Expand Up @@ -491,7 +491,7 @@ def test_search_by_embedding_unsupported_algorithm_raises(self):
from app.domain.entities import EmbeddingModel, SearchRequest

req_mock = MagicMock()
req_mock.embedding_model = EmbeddingModel.ADA_002
req_mock.embedding_model = EmbeddingModel.T3_SMALL
req_mock.search_algorithm = "bad_algo"
req_mock.match_id = 1
req_mock.top_n = 5
Expand Down Expand Up @@ -564,7 +564,7 @@ def test_search_by_embedding_propagates_exception(self):
req = SearchRequest(
match_id=1,
query="test",
embedding_model=EmbeddingModel.ADA_002,
embedding_model=EmbeddingModel.T3_SMALL,
search_algorithm=SearchAlgorithm.COSINE,
top_n=5,
)
Expand Down
35 changes: 7 additions & 28 deletions frontend/webapp/src/pages/ExplorerPage.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { useEffect, useState } from 'react'
import { useEffect, useMemo, useState } from 'react'
import { useQuery } from '@tanstack/react-query'

import { api } from '../lib/api/client'
Expand Down Expand Up @@ -54,6 +54,11 @@ export function ExplorerPage() {
enabled: activeTab === 'tables',
})

// Reset match selection when source changes
useEffect(() => {
setSelectedMatchId(null)
}, [source])

useEffect(() => {
if (!matchesQuery.data || matchesQuery.data.length === 0) {
setSelectedMatchId(null)
Expand Down Expand Up @@ -207,33 +212,7 @@ export function ExplorerPage() {
) : null}

{activeTab === 'events' ? (
<>
{selectedMatchId === null ? <p className="text-mute">Selecciona un partido para ver eventos.</p> : null}
{eventsQuery.isLoading ? <p className="text-mute">Cargando eventos...</p> : null}
{eventsQuery.isError ? <p className="text-rose-300">Error cargando eventos.</p> : null}
<div className="overflow-x-auto">
<table className="min-w-full divide-y divide-white/10 text-sm">
<thead>
<tr className="text-left text-mute">
<th className="px-2 py-2">ID</th>
<th className="px-2 py-2">Time</th>
<th className="px-2 py-2">Count</th>
<th className="px-2 py-2">Summary</th>
</tr>
</thead>
<tbody className="divide-y divide-white/5">
{(eventsQuery.data ?? []).map((event) => (
<tr key={event.id}>
<td className="px-2 py-2 text-mute">{event.id}</td>
<td className="px-2 py-2 text-mute">{event.time_description}</td>
<td className="px-2 py-2 text-ink">{event.count}</td>
<td className="px-2 py-2 text-ink">{event.summary ?? '-'}</td>
</tr>
))}
</tbody>
</table>
</div>
</>
<EventsTab events={eventsQuery.data ?? []} isLoading={eventsQuery.isLoading} isError={eventsQuery.isError} noMatch={selectedMatchId === null} />
) : null}

{activeTab === 'tables' ? (
Expand Down
24 changes: 24 additions & 0 deletions frontend/webapp/tests/e2e/cross-cutting.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,30 @@ test.describe('Cross-cutting', () => {
await page.screenshot({ path: `${SCREENSHOTS}/source-sqlserver.png`, fullPage: true })
})

test('US-23b: source switching updates Tables Info tab', async ({ page }) => {
await page.goto('/explorer')
await page.waitForLoadState('networkidle')

// Go to Tables tab on postgres
const tablesTab = page.getByRole('button', { name: 'Tables' })
await tablesTab.click()
await page.waitForLoadState('networkidle')
// Postgres has events_details__quarter_minute
await expect(page.getByText('events_details__quarter_minute')).toBeVisible({ timeout: 10_000 })

// Switch to sqlserver
const sourceSelect = page.locator('select').filter({ hasText: /PostgreSQL/i }).first()
await sourceSelect.selectOption('sqlserver')
await page.waitForLoadState('networkidle')

// Must click Tables tab again after source switch (tab may reset)
await tablesTab.click()
await page.waitForLoadState('networkidle')

// SQL Server has events_details__15secs_agg (different name)
await expect(page.getByText('events_details__15secs_agg')).toBeVisible({ timeout: 10_000 })
})

test('US-24: seed data is available out of the box', async ({ page }) => {
// Explorer has data
await page.goto('/explorer')
Expand Down
Loading