From 9aabf6425ea69ae68a94b15e112feb3182a73659 Mon Sep 17 00:00:00 2001 From: ionfwsrijan Date: Sat, 13 Jun 2026 15:58:33 +0530 Subject: [PATCH 01/11] fix: move profile_router registration before frontend catch-all route to fix shadowed endpoints (#563) --- backend/app/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/main.py b/backend/app/main.py index 13fdaed7..41096d3e 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -192,6 +192,7 @@ async def unhandled_exception_handler(request: Request, exc: Exception): app.include_router(admin_router, prefix="/api/v1") app.include_router(workspaces_router, prefix="/api/v1") app.include_router(health_router, prefix="/api/v1") +app.include_router(profile_router) setup_prometheus_metrics(app) @@ -294,4 +295,3 @@ def root(): "docs": "/docs", "health": "/api/health", } -app.include_router(profile_router) From a4520cd0894ffbd31d7ef8043c81638c38ade1b1 Mon Sep 17 00:00:00 2001 From: ionfwsrijan Date: Sat, 13 Jun 2026 16:26:02 +0530 Subject: [PATCH 02/11] chore: add missing pymupdf4llm dependency to requirements.txt --- backend/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/requirements.txt b/backend/requirements.txt index f46463bf..a4c8d700 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -29,6 +29,7 @@ httpx # Document Processing PyMuPDF +pymupdf4llm pdfplumber python-docx unstructured[pdf] From a9317b50d7e4ff169345b737dc3a48b725d58a5d Mon Sep 17 00:00:00 2001 From: ionfwsrijan Date: Sat, 13 Jun 2026 16:30:27 +0530 Subject: [PATCH 03/11] chore: add missing google-generativeai dependency to requirements.txt --- backend/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/requirements.txt b/backend/requirements.txt index a4c8d700..74d26a84 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -55,6 +55,7 @@ spacy>=3.7 neo4j>=5.0 # LLM Inference +google-generativeai huggingface-hub # Production From fe9d238a7482f552a435f668db1d0e6de8e9c285 Mon Sep 17 00:00:00 2001 From: ionfwsrijan Date: Sat, 13 Jun 2026 16:39:06 +0530 Subject: [PATCH 04/11] ci: add explicit pip install for pymupdf4llm and google-generativeai --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 74838149..47a0bef2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,6 +40,8 @@ jobs: pip install flake8 flake8-bugbear # Install project deps (skip heavy ML libs with stub extras) pip install -r backend/requirements.txt --quiet || true + # Install document-processing dependencies added after CI was broken + pip install pymupdf4llm google-generativeai - name: Flake8 lint (errors only, no style noise) run: | From 7d973475f87943b447d1dbc7fb5a0f186893d73e Mon Sep 17 00:00:00 2001 From: ionfwsrijan Date: Sat, 13 Jun 2026 16:44:12 +0530 Subject: [PATCH 05/11] ci: debug google namespace package import --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 47a0bef2..71b921d9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -62,6 +62,7 @@ jobs: CHROMA_PERSIST_DIR: /tmp/chroma run: | python -c "import sys; sys.path.insert(0, 'backend'); from app.config import get_settings; get_settings(); print('Config imports OK')" + python -c "import google; print('google' in dir()); print(dir(google)); print(google.__file__ if hasattr(google, '__file__') else 'no __file__'); print(google.__path__ if hasattr(google, '__path__') else 'no __path__')" - name: Install pytest-cov run: pip install pytest-cov From e924d496808e7796c157937329c18f030f53a234 Mon Sep 17 00:00:00 2001 From: ionfwsrijan Date: Sat, 13 Jun 2026 16:48:11 +0530 Subject: [PATCH 06/11] ci: add more google genai debug --- .github/workflows/ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 71b921d9..cbffbaec 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -62,7 +62,9 @@ jobs: CHROMA_PERSIST_DIR: /tmp/chroma run: | python -c "import sys; sys.path.insert(0, 'backend'); from app.config import get_settings; get_settings(); print('Config imports OK')" - python -c "import google; print('google' in dir()); print(dir(google)); print(google.__file__ if hasattr(google, '__file__') else 'no __file__'); print(google.__path__ if hasattr(google, '__path__') else 'no __path__')" + python -c "import google; print('google.__file__:', google.__file__); print('google.__path__:', list(google.__path__))" + python -c "import os; google_path = '/opt/hostedtoolcache/Python/3.11.15/x64/lib/python3.11/site-packages/google'; print('google dir contents:', os.listdir(google_path) if os.path.isdir(google_path) else 'NOT A DIR')" + python -c "from google import genai; print('genai imported successfully:', genai)" - name: Install pytest-cov run: pip install pytest-cov From ab8ded073d8aa9a9de504303acfd1f0391171255 Mon Sep 17 00:00:00 2001 From: ionfwsrijan Date: Sat, 13 Jun 2026 16:51:56 +0530 Subject: [PATCH 07/11] ci: force-reinstall pymupdf4llm and google-generativeai to fix stale cache --- .github/workflows/ci.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cbffbaec..ffbd04e4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,8 +40,8 @@ jobs: pip install flake8 flake8-bugbear # Install project deps (skip heavy ML libs with stub extras) pip install -r backend/requirements.txt --quiet || true - # Install document-processing dependencies added after CI was broken - pip install pymupdf4llm google-generativeai + # Install document-processing dependencies (force reinstall to fix cached stale files) + pip install --force-reinstall pymupdf4llm google-generativeai - name: Flake8 lint (errors only, no style noise) run: | @@ -62,9 +62,6 @@ jobs: CHROMA_PERSIST_DIR: /tmp/chroma run: | python -c "import sys; sys.path.insert(0, 'backend'); from app.config import get_settings; get_settings(); print('Config imports OK')" - python -c "import google; print('google.__file__:', google.__file__); print('google.__path__:', list(google.__path__))" - python -c "import os; google_path = '/opt/hostedtoolcache/Python/3.11.15/x64/lib/python3.11/site-packages/google'; print('google dir contents:', os.listdir(google_path) if os.path.isdir(google_path) else 'NOT A DIR')" - python -c "from google import genai; print('genai imported successfully:', genai)" - name: Install pytest-cov run: pip install pytest-cov From 46220ced0edbcb8a88c8ca6be2ac4e1307d707c2 Mon Sep 17 00:00:00 2001 From: ionfwsrijan Date: Sat, 13 Jun 2026 16:57:04 +0530 Subject: [PATCH 08/11] fix: use google-genai (not deprecated google-generativeai) for genai import --- .github/workflows/ci.yml | 2 +- backend/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ffbd04e4..09d82ea8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,7 +41,7 @@ jobs: # Install project deps (skip heavy ML libs with stub extras) pip install -r backend/requirements.txt --quiet || true # Install document-processing dependencies (force reinstall to fix cached stale files) - pip install --force-reinstall pymupdf4llm google-generativeai + pip install --force-reinstall pymupdf4llm google-genai - name: Flake8 lint (errors only, no style noise) run: | diff --git a/backend/requirements.txt b/backend/requirements.txt index 74d26a84..8c3db07f 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -55,7 +55,7 @@ spacy>=3.7 neo4j>=5.0 # LLM Inference -google-generativeai +google-genai huggingface-hub # Production From 5487bee88d1ee39c095c9c691e74b8e194ac55ea Mon Sep 17 00:00:00 2001 From: ionfwsrijan Date: Sat, 13 Jun 2026 17:01:43 +0530 Subject: [PATCH 09/11] ci: add GOOGLE_API_KEY env var for genai.Client() at import time --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 09d82ea8..06101910 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -58,6 +58,7 @@ jobs: DATABASE_URL: sqlite:///./ci_test.db DEBUG: "false" HF_TOKEN: ci-dummy-token + GOOGLE_API_KEY: ci-dummy-key UPLOAD_DIR: /tmp/uploads CHROMA_PERSIST_DIR: /tmp/chroma run: | @@ -72,6 +73,7 @@ jobs: DATABASE_URL: sqlite:///./ci_test.db DEBUG: "false" HF_TOKEN: ci-dummy-token + GOOGLE_API_KEY: ci-dummy-key UPLOAD_DIR: /tmp/uploads CHROMA_PERSIST_DIR: /tmp/chroma run: | From 063ea3d8befe53e9ec13246feed031f20f372035 Mon Sep 17 00:00:00 2001 From: ionfwsrijan Date: Sat, 13 Jun 2026 17:07:24 +0530 Subject: [PATCH 10/11] fix: update test to mock AdvancedPDFParser instead of removed ingest_document --- backend/tests/test_celery_ingestion.py | 31 ++++++++++++-------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/backend/tests/test_celery_ingestion.py b/backend/tests/test_celery_ingestion.py index 2e359e63..de997965 100644 --- a/backend/tests/test_celery_ingestion.py +++ b/backend/tests/test_celery_ingestion.py @@ -5,10 +5,11 @@ from app.models import Document from app.tasks import process_document + def test_process_document_ingestion_pipeline(db_session): """ - Test that the Celery task updates document status from pending to ready - by executing the ingestion engine inside the active test database session. + Test that the Celery task updates document status from pending to completed + by executing the layout-aware parser pipeline inside the active test database session. """ # 1. SETUP: Create a mock document that starts as 'pending' @@ -17,7 +18,7 @@ def test_process_document_ingestion_pipeline(db_session): filename="sample.pdf", original_name="sample.pdf", status="pending", - user_id="user-456" + user_id="user-456", ) db_session.add(test_doc) db_session.commit() @@ -27,20 +28,16 @@ def test_process_document_ingestion_pipeline(db_session): mock_session_factory.return_value.__enter__.return_value = db_session mock_session_factory.return_value = db_session - # Patch the factory globally, and patch ingest_document right where app.tasks calls it + # Patch the factory globally, and mock AdvancedPDFParser so no real PDF is parsed with patch("app.database.SessionLocal", mock_session_factory, create=True), \ patch("app.services.document_ingestion.SessionLocal", mock_session_factory, create=True), \ - patch("app.tasks.ingest_document") as mock_ingest: - - # Simulate what the underlying service does upon a successful processing run - def simulate_successful_ingestion(*args, **kwargs): - doc = db_session.query(Document).filter_by(id="test-doc-123").first() - if doc: - doc.status = "ready" - db_session.commit() - return {"status": "success"} - - mock_ingest.side_effect = simulate_successful_ingestion + patch("app.services.layout_parser.AdvancedPDFParser") as mock_parser_cls: + + mock_parser = MagicMock() + mock_parser_cls.return_value = mock_parser + mock_parser.ingest_document.return_value = [ + {"text": "mock chunk 1", "page_number": 1, "type": "text_layout"}, + ] task_result = process_document.apply( kwargs={ @@ -53,8 +50,8 @@ def simulate_successful_ingestion(*args, **kwargs): # 3. ASSERT: Verify the task metrics and status changes inside the session context assert task_result.status == "SUCCESS" - + # Query the database to verify the state update updated_doc = db_session.query(Document).filter_by(id="test-doc-123").first() assert updated_doc is not None - assert updated_doc.status == "ready" \ No newline at end of file + assert updated_doc.status == "completed" \ No newline at end of file From 1727af3a8124528ef6af364a4ec7bb89eb986b19 Mon Sep 17 00:00:00 2001 From: ionfwsrijan Date: Sat, 13 Jun 2026 17:13:42 +0530 Subject: [PATCH 11/11] Fix test: patch AdvancedPDFParser at app.tasks namespace (not layout_parser) to account for module-level import --- backend/tests/test_celery_ingestion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/tests/test_celery_ingestion.py b/backend/tests/test_celery_ingestion.py index de997965..bb4d3001 100644 --- a/backend/tests/test_celery_ingestion.py +++ b/backend/tests/test_celery_ingestion.py @@ -31,7 +31,7 @@ def test_process_document_ingestion_pipeline(db_session): # Patch the factory globally, and mock AdvancedPDFParser so no real PDF is parsed with patch("app.database.SessionLocal", mock_session_factory, create=True), \ patch("app.services.document_ingestion.SessionLocal", mock_session_factory, create=True), \ - patch("app.services.layout_parser.AdvancedPDFParser") as mock_parser_cls: + patch("app.tasks.AdvancedPDFParser") as mock_parser_cls: mock_parser = MagicMock() mock_parser_cls.return_value = mock_parser