From c42d7c300d73464af56ac12a2f898a9ea2564ce4 Mon Sep 17 00:00:00 2001 From: Eladio Rincon Date: Tue, 21 Apr 2026 22:37:13 +0200 Subject: [PATCH 1/2] fix(frontend): reset match selection when source changes in Explorer Explorer page kept stale match selection and cached tab data when switching between PostgreSQL and SQL Server. Add useEffect to reset selectedMatchId on source change so all tabs refetch correctly. --- frontend/webapp/src/pages/ExplorerPage.tsx | 5 ++++ .../webapp/tests/e2e/cross-cutting.spec.ts | 24 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/frontend/webapp/src/pages/ExplorerPage.tsx b/frontend/webapp/src/pages/ExplorerPage.tsx index 3bc1d05..43bd6e9 100644 --- a/frontend/webapp/src/pages/ExplorerPage.tsx +++ b/frontend/webapp/src/pages/ExplorerPage.tsx @@ -54,6 +54,11 @@ export function ExplorerPage() { enabled: activeTab === 'tables', }) + // Reset match selection when source changes + useEffect(() => { + setSelectedMatchId(null) + }, [source]) + useEffect(() => { if (!matchesQuery.data || matchesQuery.data.length === 0) { setSelectedMatchId(null) diff --git a/frontend/webapp/tests/e2e/cross-cutting.spec.ts b/frontend/webapp/tests/e2e/cross-cutting.spec.ts index 9569b68..81e4225 100644 --- a/frontend/webapp/tests/e2e/cross-cutting.spec.ts +++ b/frontend/webapp/tests/e2e/cross-cutting.spec.ts @@ -21,6 +21,30 @@ test.describe('Cross-cutting', () => { await page.screenshot({ path: `${SCREENSHOTS}/source-sqlserver.png`, fullPage: true }) }) + test('US-23b: source switching updates Tables Info tab', async ({ page }) => { + await page.goto('/explorer') + await page.waitForLoadState('networkidle') + + // Go to Tables tab on postgres + const tablesTab = page.getByRole('button', { name: 'Tables' }) + await tablesTab.click() + await page.waitForLoadState('networkidle') + // Postgres has events_details__quarter_minute + await expect(page.getByText('events_details__quarter_minute')).toBeVisible({ timeout: 10_000 }) + + // Switch to sqlserver + const sourceSelect = page.locator('select').filter({ hasText: /PostgreSQL/i }).first() + await sourceSelect.selectOption('sqlserver') + await page.waitForLoadState('networkidle') + + // Must click Tables tab again after source switch (tab may reset) + await tablesTab.click() + await page.waitForLoadState('networkidle') + + // SQL Server has events_details__15secs_agg (different name) + await expect(page.getByText('events_details__15secs_agg')).toBeVisible({ timeout: 10_000 }) + }) + test('US-24: seed data is available out of the box', async ({ page }) => { // Explorer has data await page.goto('/explorer') From 89cf036b055cf19365fef6d8f3b5538fb6ff41e0 Mon Sep 17 00:00:00 2001 From: Eladio Rincon Date: Tue, 21 Apr 2026 22:49:55 +0200 Subject: [PATCH 2/2] fix(backend): remove all deprecated embedding column references from code and tests Complete the column cleanup started in PR #62: - Remove ada-002 and t3-large from SQL INSERT statements, column mappings, embeddings status queries, repository search maps, API model validators - Remove deprecated entity fields from domain EventDetail - Update 7 test files to use only t3-small - Add source-switch E2E test for Tables Info tab - 530 backend tests + 26 E2E tests passing --- backend/app/api/v1/models.py | 4 +-- backend/app/domain/entities.py | 2 -- backend/app/repositories/postgres.py | 2 -- backend/app/repositories/sqlserver.py | 1 - backend/app/services/ingestion_service.py | 33 +++++++------------ backend/tests/api/test_chat.py | 4 +-- backend/tests/api/test_explorer_embeddings.py | 2 +- .../test_dependencies_and_explorer_service.py | 2 +- backend/tests/unit/test_domain.py | 3 +- backend/tests/unit/test_postgres_repo.py | 33 ++++--------------- backend/tests/unit/test_sqlserver_repo.py | 8 ++--- frontend/webapp/src/pages/ExplorerPage.tsx | 30 ++--------------- 12 files changed, 31 insertions(+), 93 deletions(-) diff --git a/backend/app/api/v1/models.py b/backend/app/api/v1/models.py index 2b28c7c..71deba6 100644 --- a/backend/app/api/v1/models.py +++ b/backend/app/api/v1/models.py @@ -114,7 +114,7 @@ class SearchRequest(BaseModel): ) embedding_model: str = Field( default="text-embedding-3-small", - description="Embedding model: text-embedding-ada-002, text-embedding-3-small, text-embedding-3-large", + description="Embedding model: text-embedding-3-small", ) top_n: int = Field(default=10, ge=1, le=100, description="Number of results") temperature: float = Field(default=0.1, ge=0, le=2, description="LLM temperature") @@ -141,9 +141,7 @@ def validate_algorithm(cls, v: str) -> str: def validate_model(cls, v: str) -> str: """Validate embedding model.""" allowed = [ - "text-embedding-ada-002", "text-embedding-3-small", - "text-embedding-3-large", ] if v not in allowed: raise ValueError(f"Model must be one of: {', '.join(allowed)}") diff --git a/backend/app/domain/entities.py b/backend/app/domain/entities.py index 9a76f12..ea77a12 100644 --- a/backend/app/domain/entities.py +++ b/backend/app/domain/entities.py @@ -123,9 +123,7 @@ class EventDetail: count: int json_data: str summary: str | None = None - summary_embedding_ada_002: list[float] | None = None summary_embedding_t3_small: list[float] | None = None - summary_embedding_t3_large: list[float] | None = None summary_embedding_e5: list[float] | None = None @property diff --git a/backend/app/repositories/postgres.py b/backend/app/repositories/postgres.py index 5d5d5ad..0cb30de 100644 --- a/backend/app/repositories/postgres.py +++ b/backend/app/repositories/postgres.py @@ -545,9 +545,7 @@ def search_by_embedding( """ # Map embedding model to column name embedding_column_map = { - EmbeddingModel.ADA_002: "summary_embedding_ada_002", EmbeddingModel.T3_SMALL: "summary_embedding_t3_small", - EmbeddingModel.T3_LARGE: "summary_embedding_t3_large", } # Map search algorithm to pgvector operator diff --git a/backend/app/repositories/sqlserver.py b/backend/app/repositories/sqlserver.py index 9c1580b..b4eca09 100644 --- a/backend/app/repositories/sqlserver.py +++ b/backend/app/repositories/sqlserver.py @@ -560,7 +560,6 @@ def search_by_embedding( Note: SQL Server uses VECTOR_DISTANCE function for similarity search. """ embedding_column_map = { - EmbeddingModel.ADA_002: "embedding_ada_002", EmbeddingModel.T3_SMALL: "embedding_3_small", } diff --git a/backend/app/services/ingestion_service.py b/backend/app/services/ingestion_service.py index db817b5..503490f 100644 --- a/backend/app/services/ingestion_service.py +++ b/backend/app/services/ingestion_service.py @@ -398,9 +398,7 @@ def get_embeddings_status(self, source: str) -> dict[str, Any]: cur.execute( """ SELECT COUNT(*), - SUM(CASE WHEN summary_embedding_ada_002 IS NOT NULL THEN 1 ELSE 0 END), SUM(CASE WHEN summary_embedding_t3_small IS NOT NULL THEN 1 ELSE 0 END), - SUM(CASE WHEN summary_embedding_t3_large IS NOT NULL THEN 1 ELSE 0 END), SUM(CASE WHEN embedding_status = 'done' THEN 1 ELSE 0 END), SUM(CASE WHEN embedding_status = 'error' THEN 1 ELSE 0 END), SUM(CASE WHEN embedding_status = 'pending' OR embedding_status IS NULL THEN 1 ELSE 0 END) @@ -413,21 +411,18 @@ def get_embeddings_status(self, source: str) -> dict[str, Any]: "table": "events_details__quarter_minute", "total_rows": int(row[0] or 0), "coverage": { - "text-embedding-ada-002": int(row[1] or 0), - "text-embedding-3-small": int(row[2] or 0), - "text-embedding-3-large": int(row[3] or 0), + "text-embedding-3-small": int(row[1] or 0), }, "status": { - "done": int(row[4] or 0), - "error": int(row[5] or 0), - "pending": int(row[6] or 0), + "done": int(row[2] or 0), + "error": int(row[3] or 0), + "pending": int(row[4] or 0), }, } cur.execute( """ SELECT COUNT(*), - SUM(CASE WHEN embedding_ada_002 IS NOT NULL THEN 1 ELSE 0 END), SUM(CASE WHEN embedding_3_small IS NOT NULL THEN 1 ELSE 0 END), SUM(CASE WHEN embedding_status = 'done' THEN 1 ELSE 0 END), SUM(CASE WHEN embedding_status = 'error' THEN 1 ELSE 0 END), @@ -441,13 +436,12 @@ def get_embeddings_status(self, source: str) -> dict[str, Any]: "table": "events_details__15secs_agg", "total_rows": int(row[0] or 0), "coverage": { - "text-embedding-ada-002": int(row[1] or 0), - "text-embedding-3-small": int(row[2] or 0), + "text-embedding-3-small": int(row[1] or 0), }, "status": { - "done": int(row[3] or 0), - "error": int(row[4] or 0), - "pending": int(row[5] or 0), + "done": int(row[2] or 0), + "error": int(row[3] or 0), + "pending": int(row[4] or 0), }, } @@ -1002,10 +996,10 @@ def _build_aggregations(self, conn, source: str, match_ids: list[int]) -> int: cur.execute( f""" INSERT INTO events_details__15secs_agg ( - match_id, period, minute, _15secs, count, json_, summary, embedding_3_small, embedding_ada_002 + match_id, period, minute, _15secs, count, json_, summary, embedding_3_small ) SELECT match_id, ISNULL(period,0), ISNULL(minute,0), (ISNULL(second,0)/15)+1, - COUNT(*), STRING_AGG(CAST(ISNULL(json_, '') AS NVARCHAR(MAX)), ', '), NULL, NULL, NULL + COUNT(*), STRING_AGG(CAST(ISNULL(json_, '') AS NVARCHAR(MAX)), ', '), NULL, NULL FROM events_details WHERE match_id IN ({placeholders}) GROUP BY match_id, ISNULL(period,0), ISNULL(minute,0), (ISNULL(second,0)/15)+1 @@ -1017,10 +1011,10 @@ def _build_aggregations(self, conn, source: str, match_ids: list[int]) -> int: cur.execute( """ INSERT INTO events_details__15secs_agg ( - match_id, period, minute, _15secs, count, json_, summary, embedding_3_small, embedding_ada_002 + match_id, period, minute, _15secs, count, json_, summary, embedding_3_small ) SELECT match_id, ISNULL(period,0), ISNULL(minute,0), (ISNULL(second,0)/15)+1, - COUNT(*), STRING_AGG(CAST(ISNULL(json_, '') AS NVARCHAR(MAX)), ', '), NULL, NULL, NULL + COUNT(*), STRING_AGG(CAST(ISNULL(json_, '') AS NVARCHAR(MAX)), ', '), NULL, NULL FROM events_details GROUP BY match_id, ISNULL(period,0), ISNULL(minute,0), (ISNULL(second,0)/15)+1 """ @@ -1041,9 +1035,7 @@ def _update_embeddings_for_row( try: if source == "postgres": model_cols = { - "text-embedding-ada-002": "summary_embedding_ada_002", "text-embedding-3-small": "summary_embedding_t3_small", - "text-embedding-3-large": "summary_embedding_t3_large", } for model in models: col = model_cols.get(model) @@ -1062,7 +1054,6 @@ def _update_embeddings_for_row( return model_cols = { - "text-embedding-ada-002": "embedding_ada_002", "text-embedding-3-small": "embedding_3_small", } for model in models: diff --git a/backend/tests/api/test_chat.py b/backend/tests/api/test_chat.py index 360d701..7948689 100644 --- a/backend/tests/api/test_chat.py +++ b/backend/tests/api/test_chat.py @@ -118,12 +118,12 @@ def test_postgres_rejects_deprecated_t3_large(self, client): payload = {**VALID_PAYLOAD, "embedding_model": "text-embedding-3-large", "search_algorithm": "cosine"} response = client.post("/api/v1/chat/search?source=postgres", json=payload) - assert response.status_code == 400 + assert response.status_code == 422 def test_sqlserver_rejects_deprecated_t3_large(self, client): payload = {**VALID_PAYLOAD, "embedding_model": "text-embedding-3-large"} response = client.post("/api/v1/chat/search?source=sqlserver", json=payload) - assert response.status_code == 400 + assert response.status_code == 422 def test_postgres_supports_l1_manhattan(self, client): payload = {**VALID_PAYLOAD, "search_algorithm": "l1_manhattan"} diff --git a/backend/tests/api/test_explorer_embeddings.py b/backend/tests/api/test_explorer_embeddings.py index ee76b2f..74e9357 100644 --- a/backend/tests/api/test_explorer_embeddings.py +++ b/backend/tests/api/test_explorer_embeddings.py @@ -147,7 +147,7 @@ def test_get_tables_info_valid_request_returns_200(self, client, mock_explorer_s def test_table_info_fields_present(self, client, mock_explorer_svc): mock_explorer_svc.get_tables_info.return_value = [ - {"table": "matches", "row_count": 10, "embedding_columns": ["summary_embedding_ada_002"]}, + {"table": "matches", "row_count": 10, "embedding_columns": ["summary_embedding_t3_small"]}, ] data = client.get("/api/v1/tables-info").json() assert "table" in data[0] diff --git a/backend/tests/unit/test_dependencies_and_explorer_service.py b/backend/tests/unit/test_dependencies_and_explorer_service.py index 002b79e..e0337f3 100644 --- a/backend/tests/unit/test_dependencies_and_explorer_service.py +++ b/backend/tests/unit/test_dependencies_and_explorer_service.py @@ -186,7 +186,7 @@ def test_get_tables_info_returns_multiple_tables(self): mock_repo = MagicMock(spec=MatchRepository) mock_repo.get_tables_info.return_value = [ {"table": "matches", "row_count": 42, "embedding_columns": []}, - {"table": "events_details__quarter_minute", "row_count": 1000, "embedding_columns": ["summary_embedding_ada_002"]}, + {"table": "events_details__quarter_minute", "row_count": 1000, "embedding_columns": ["summary_embedding_t3_small"]}, ] svc = DataExplorerService(match_repo=mock_repo) diff --git a/backend/tests/unit/test_domain.py b/backend/tests/unit/test_domain.py index 5bd1774..50ad698 100644 --- a/backend/tests/unit/test_domain.py +++ b/backend/tests/unit/test_domain.py @@ -154,7 +154,6 @@ def test_time_description_first_quarter(self): def test_optional_embeddings_default_none(self): e = self._make_event() assert e.summary is None - assert e.summary_embedding_ada_002 is None assert e.summary_embedding_t3_small is None @@ -231,7 +230,7 @@ def test_all_valid_algorithms(self): assert r.search_algorithm == algo def test_all_valid_models(self): - for model in ["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]: + for model in ["text-embedding-3-small"]: r = ApiSearchRequest(match_id=1, query="q", embedding_model=model) assert r.embedding_model == model diff --git a/backend/tests/unit/test_postgres_repo.py b/backend/tests/unit/test_postgres_repo.py index 7259c25..64b3f8d 100644 --- a/backend/tests/unit/test_postgres_repo.py +++ b/backend/tests/unit/test_postgres_repo.py @@ -435,7 +435,7 @@ def test_search_by_embedding_returns_results(self, mock_connect): req = SearchRequest( match_id=3943043, query="who scored?", - embedding_model=EmbeddingModel.ADA_002, + embedding_model=EmbeddingModel.T3_SMALL, search_algorithm=SearchAlgorithm.COSINE, top_n=3, ) @@ -488,8 +488,7 @@ def test_search_by_embedding_empty_rows(self, mock_connect): assert results == [] - @patch("app.repositories.postgres.psycopg2.connect") - def test_search_by_embedding_t3_large(self, mock_connect): + def test_search_by_embedding_deprecated_t3_large_raises(self): from app.domain.entities import EmbeddingModel, SearchAlgorithm, SearchRequest req = SearchRequest( @@ -499,27 +498,9 @@ def test_search_by_embedding_t3_large(self, mock_connect): search_algorithm=SearchAlgorithm.INNER_PRODUCT, top_n=2, ) - row1 = _event_row() - row1["similarity_score"] = 0.05 - row2 = _event_row() - row2["id"] = 1002 - row2["similarity_score"] = 0.08 - repo = PostgresEventRepository() - with patch.object(repo, "get_connection") as mock_gc: - conn = MagicMock() - cursor = MagicMock() - cursor.fetchall.return_value = [row1, row2] - conn.cursor.return_value.__enter__ = MagicMock(return_value=cursor) - conn.cursor.return_value.__exit__ = MagicMock(return_value=False) - mock_gc.return_value.__enter__ = MagicMock(return_value=conn) - mock_gc.return_value.__exit__ = MagicMock(return_value=False) - - results = repo.search_by_embedding(req, query_embedding=[0.1] * 1536) - - assert len(results) == 2 - assert results[0].rank == 1 - assert results[1].rank == 2 + with pytest.raises(ValueError, match="Unsupported embedding model"): + repo.search_by_embedding(req, query_embedding=[0.1] * 1536) @patch("app.repositories.postgres.psycopg2.connect") def test_search_by_embedding_l1_manhattan(self, mock_connect): @@ -528,7 +509,7 @@ def test_search_by_embedding_l1_manhattan(self, mock_connect): req = SearchRequest( match_id=1, query="test", - embedding_model=EmbeddingModel.ADA_002, + embedding_model=EmbeddingModel.T3_SMALL, search_algorithm=SearchAlgorithm.L1_MANHATTAN, top_n=1, ) @@ -571,7 +552,7 @@ def test_search_by_embedding_invalid_algorithm_raises(self): from app.domain.entities import EmbeddingModel, SearchRequest req_mock = MagicMock() - req_mock.embedding_model = EmbeddingModel.ADA_002 + req_mock.embedding_model = EmbeddingModel.T3_SMALL req_mock.search_algorithm = "bad_algo" req_mock.match_id = 1 req_mock.top_n = 5 @@ -629,7 +610,7 @@ def test_search_by_embedding_propagates_exception(self): req = SearchRequest( match_id=1, query="test", - embedding_model=EmbeddingModel.ADA_002, + embedding_model=EmbeddingModel.T3_SMALL, search_algorithm=SearchAlgorithm.COSINE, top_n=5, ) diff --git a/backend/tests/unit/test_sqlserver_repo.py b/backend/tests/unit/test_sqlserver_repo.py index 9fb49e0..2ac101e 100644 --- a/backend/tests/unit/test_sqlserver_repo.py +++ b/backend/tests/unit/test_sqlserver_repo.py @@ -390,7 +390,7 @@ def test_search_by_embedding_returns_results(self, mock_connect): req = SearchRequest( match_id=3943043, query="who scored?", - embedding_model=EmbeddingModel.ADA_002, + embedding_model=EmbeddingModel.T3_SMALL, search_algorithm=SearchAlgorithm.COSINE, top_n=3, ) @@ -448,7 +448,7 @@ def test_search_by_embedding_inner_product(self, mock_connect): req = SearchRequest( match_id=1, query="test", - embedding_model=EmbeddingModel.ADA_002, + embedding_model=EmbeddingModel.T3_SMALL, search_algorithm=SearchAlgorithm.INNER_PRODUCT, top_n=2, ) @@ -491,7 +491,7 @@ def test_search_by_embedding_unsupported_algorithm_raises(self): from app.domain.entities import EmbeddingModel, SearchRequest req_mock = MagicMock() - req_mock.embedding_model = EmbeddingModel.ADA_002 + req_mock.embedding_model = EmbeddingModel.T3_SMALL req_mock.search_algorithm = "bad_algo" req_mock.match_id = 1 req_mock.top_n = 5 @@ -564,7 +564,7 @@ def test_search_by_embedding_propagates_exception(self): req = SearchRequest( match_id=1, query="test", - embedding_model=EmbeddingModel.ADA_002, + embedding_model=EmbeddingModel.T3_SMALL, search_algorithm=SearchAlgorithm.COSINE, top_n=5, ) diff --git a/frontend/webapp/src/pages/ExplorerPage.tsx b/frontend/webapp/src/pages/ExplorerPage.tsx index 43bd6e9..47992b7 100644 --- a/frontend/webapp/src/pages/ExplorerPage.tsx +++ b/frontend/webapp/src/pages/ExplorerPage.tsx @@ -1,4 +1,4 @@ -import { useEffect, useState } from 'react' +import { useEffect, useMemo, useState } from 'react' import { useQuery } from '@tanstack/react-query' import { api } from '../lib/api/client' @@ -212,33 +212,7 @@ export function ExplorerPage() { ) : null} {activeTab === 'events' ? ( - <> - {selectedMatchId === null ?

Selecciona un partido para ver eventos.

: null} - {eventsQuery.isLoading ?

Cargando eventos...

: null} - {eventsQuery.isError ?

Error cargando eventos.

: null} -
- - - - - - - - - - - {(eventsQuery.data ?? []).map((event) => ( - - - - - - - ))} - -
IDTimeCountSummary
{event.id}{event.time_description}{event.count}{event.summary ?? '-'}
-
- + ) : null} {activeTab === 'tables' ? (