From 8ece71d836515b97d9918a2c4217024ef01a8dfc Mon Sep 17 00:00:00 2001
From: nikkybel <sulymanaishat0@gmail.com>
Date: Fri, 29 May 2026 17:53:04 +0100
Subject: [PATCH] feat: implement  Data-processing: Soroban Event Indexer
 (Testnet) -> Normalized Event Store

---
 ...000001-AddLedgerSequenceToSorobanEvents.ts |   19 +
 apps/data-processing/alert_notifier.py        |  Bin 0 -> 2909 bytes
 apps/data-processing/alertbot.py              |  Bin 0 -> 12737 bytes
 apps/data-processing/analytics/__init__.py    |  Bin 0 -> 569 bytes
 .../analytics/correlation_engine.py           |  Bin 0 -> 12910 bytes
 .../analytics/entity_linker.py                |  212 +++
 apps/data-processing/analytics/forecaster.py  |  Bin 0 -> 20062 bytes
 apps/data-processing/analytics/keywords.py    |  Bin 0 -> 7843 bytes
 .../analytics/market_analyzer.py              |  Bin 0 -> 6820 bytes
 apps/data-processing/analytics/ner_service.py |  Bin 0 -> 5604 bytes
 apps/data-processing/analytics/sentiment.py   |  Bin 0 -> 12829 bytes
 .../analytics/sentiment_indicators.py         |  Bin 0 -> 7732 bytes
 apps/data-processing/anomaly_detector.py      |  Bin 0 -> 32052 bytes
 .../api/ingestion_quality_routes.py           |  Bin 0 -> 1874 bytes
 apps/data-processing/api/server.py            |  Bin 0 -> 23236 bytes
 apps/data-processing/cache_manager.py         |  Bin 0 -> 4728 bytes
 apps/data-processing/config/anomaly_config.py |  Bin 0 -> 4461 bytes
 apps/data-processing/database.py              |  Bin 0 -> 8492 bytes
 apps/data-processing/db/__init__.py           |   18 +
 apps/data-processing/db/models.py             |  304 ++++
 apps/data-processing/db/postgres_service.py   | 1245 +++++++++++++++++
 apps/data-processing/fetchers.py              |  Bin 0 -> 4341 bytes
 apps/data-processing/ingestion/__init__.py    |  Bin 0 -> 892 bytes
 .../ingestion/news_deduplicator.py            |  Bin 0 -> 7335 bytes
 .../data-processing/ingestion/news_fetcher.py |  Bin 0 -> 12180 bytes
 .../ingestion/price_fetcher.py                |  Bin 0 -> 8131 bytes
 .../ingestion/run_ingestion_quality_checks.py |  Bin 0 -> 721 bytes
 .../ingestion/social_fetcher.py               |  Bin 0 -> 25228 bytes
 .../ingestion/soroban_event_indexer.py        |  267 ++++
 .../ingestion/stellar_fetcher.py              |  Bin 0 -> 20177 bytes
 .../ingestion/stellar_ingestion_checks.py     |  Bin 0 -> 17144 bytes
 apps/data-processing/main.py                  |  Bin 0 -> 14703 bytes
 apps/data-processing/ml/__init__.py           |  Bin 0 -> 623 bytes
 apps/data-processing/ml/feature_store.py      |  Bin 0 -> 3732 bytes
 apps/data-processing/ml/model_registry.py     |  Bin 0 -> 7036 bytes
 apps/data-processing/ml/price_predictor.py    |  Bin 0 -> 3119 bytes
 .../data-processing/ml/retraining_pipeline.py |  Bin 0 -> 10353 bytes
 apps/data-processing/qa_exporter.py           |  Bin 0 -> 9736 bytes
 apps/data-processing/scheduler.py             |  Bin 0 -> 11497 bytes
 apps/data-processing/security.py              |  Bin 0 -> 7044 bytes
 apps/data-processing/sentiment.py             |  Bin 0 -> 9799 bytes
 .../src/analytics/entity_linker.py            |  212 +++
 .../src/ingestion/soroban_event_indexer.py    |  267 ++++
 apps/data-processing/standalone_test.py       |  278 ++++
 apps/data-processing/test_entity_linker.py    |   54 +
 apps/data-processing/trends.py                |  Bin 0 -> 5268 bytes
 apps/data-processing/utils/http_client.py     |  Bin 0 -> 5456 bytes
 apps/data-processing/utils/logger.py          |  Bin 0 -> 1460 bytes
 apps/data-processing/utils/metrics.py         |  Bin 0 -> 1286 bytes
 apps/data-processing/utils/translator.py      |  Bin 0 -> 2789 bytes
 apps/data-processing/validators.py            |  Bin 0 -> 1638 bytes
 ...000001-AddLedgerSequenceToSorobanEvents.ts |   19 +
 temp_backup/backfill_contract_events.py       |  323 +++++
 .../dto/ingest-soroban-event.dto.ts           |   29 +
 .../entities/soroban-event.entity.ts          |   57 +
 .../soroban-events.controller.ts              |   42 +
 .../soroban-events/soroban-events.module.ts   |   20 +
 .../soroban-events.processor.ts               |   78 ++
 .../soroban-events/soroban-events.service.ts  |   32 +
 temp_backup/src/alert_notifier.py             |   85 ++
 temp_backup/src/alertbot.py                   |  353 +++++
 temp_backup/src/analytics/__init__.py         |   21 +
 .../__pycache__/__init__.cpython-314.pyc      |  Bin 0 -> 698 bytes
 .../__pycache__/forecaster.cpython-314.pyc    |  Bin 0 -> 26172 bytes
 .../market_analyzer.cpython-314.pyc           |  Bin 0 -> 8295 bytes
 .../src/analytics/correlation_engine.py       |  358 +++++
 temp_backup/src/analytics/entity_linker.py    |  212 +++
 temp_backup/src/analytics/forecaster.py       |  507 +++++++
 temp_backup/src/analytics/keywords.py         |  309 ++++
 temp_backup/src/analytics/market_analyzer.py  |  201 +++
 temp_backup/src/analytics/ner_service.py      |  171 +++
 temp_backup/src/analytics/sentiment.py        |  388 +++++
 .../src/analytics/sentiment_indicators.py     |  236 ++++
 temp_backup/src/anomaly_detector.py           |  818 +++++++++++
 .../src/api/ingestion_quality_routes.py       |   59 +
 temp_backup/src/api/server.py                 |  661 +++++++++
 temp_backup/src/cache_manager.py              |  146 ++
 temp_backup/src/config/anomaly_config.py      |  114 ++
 temp_backup/src/database.py                   |  241 ++++
 temp_backup/src/db/__init__.py                |   18 +
 .../db/__pycache__/__init__.cpython-314.pyc   |  Bin 0 -> 510 bytes
 .../src/db/__pycache__/models.cpython-314.pyc |  Bin 0 -> 13195 bytes
 temp_backup/src/db/models.py                  |  304 ++++
 temp_backup/src/db/postgres_service.py        | 1245 +++++++++++++++++
 temp_backup/src/fetchers.py                   |  116 ++
 temp_backup/src/ingestion/__init__.py         |   42 +
 .../src/ingestion/news_deduplicator.py        |  198 +++
 temp_backup/src/ingestion/news_fetcher.py     |  333 +++++
 temp_backup/src/ingestion/price_fetcher.py    |  226 +++
 .../ingestion/run_ingestion_quality_checks.py |   28 +
 temp_backup/src/ingestion/social_fetcher.py   |  741 ++++++++++
 .../src/ingestion/soroban_event_indexer.py    |  267 ++++
 temp_backup/src/ingestion/stellar_fetcher.py  |  565 ++++++++
 .../src/ingestion/stellar_ingestion_checks.py |  463 ++++++
 temp_backup/src/main.py                       |  382 +++++
 temp_backup/src/ml/__init__.py                |   28 +
 temp_backup/src/ml/feature_store.py           |   83 ++
 temp_backup/src/ml/model_registry.py          |  223 +++
 temp_backup/src/ml/price_predictor.py         |   93 ++
 temp_backup/src/ml/retraining_pipeline.py     |  274 ++++
 temp_backup/src/qa_exporter.py                |  256 ++++
 temp_backup/src/scheduler.py                  |  285 ++++
 temp_backup/src/security.py                   |  215 +++
 temp_backup/src/sentiment.py                  |  283 ++++
 temp_backup/src/trends.py                     |  153 ++
 .../utils/__pycache__/logger.cpython-314.pyc  |  Bin 0 -> 2989 bytes
 temp_backup/src/utils/http_client.py          |  138 ++
 temp_backup/src/utils/logger.py               |   52 +
 temp_backup/src/utils/metrics.py              |   42 +
 temp_backup/src/utils/translator.py           |   88 ++
 temp_backup/src/validators.py                 |   58 +
 111 files changed, 15555 insertions(+)
 create mode 100644 apps/backend/src/database/migrations/1774000000001-AddLedgerSequenceToSorobanEvents.ts
 create mode 100644 apps/data-processing/alert_notifier.py
 create mode 100644 apps/data-processing/alertbot.py
 create mode 100644 apps/data-processing/analytics/__init__.py
 create mode 100644 apps/data-processing/analytics/correlation_engine.py
 create mode 100644 apps/data-processing/analytics/entity_linker.py
 create mode 100644 apps/data-processing/analytics/forecaster.py
 create mode 100644 apps/data-processing/analytics/keywords.py
 create mode 100644 apps/data-processing/analytics/market_analyzer.py
 create mode 100644 apps/data-processing/analytics/ner_service.py
 create mode 100644 apps/data-processing/analytics/sentiment.py
 create mode 100644 apps/data-processing/analytics/sentiment_indicators.py
 create mode 100644 apps/data-processing/anomaly_detector.py
 create mode 100644 apps/data-processing/api/ingestion_quality_routes.py
 create mode 100644 apps/data-processing/api/server.py
 create mode 100644 apps/data-processing/cache_manager.py
 create mode 100644 apps/data-processing/config/anomaly_config.py
 create mode 100644 apps/data-processing/database.py
 create mode 100644 apps/data-processing/db/__init__.py
 create mode 100644 apps/data-processing/db/models.py
 create mode 100644 apps/data-processing/db/postgres_service.py
 create mode 100644 apps/data-processing/fetchers.py
 create mode 100644 apps/data-processing/ingestion/__init__.py
 create mode 100644 apps/data-processing/ingestion/news_deduplicator.py
 create mode 100644 apps/data-processing/ingestion/news_fetcher.py
 create mode 100644 apps/data-processing/ingestion/price_fetcher.py
 create mode 100644 apps/data-processing/ingestion/run_ingestion_quality_checks.py
 create mode 100644 apps/data-processing/ingestion/social_fetcher.py
 create mode 100644 apps/data-processing/ingestion/soroban_event_indexer.py
 create mode 100644 apps/data-processing/ingestion/stellar_fetcher.py
 create mode 100644 apps/data-processing/ingestion/stellar_ingestion_checks.py
 create mode 100644 apps/data-processing/main.py
 create mode 100644 apps/data-processing/ml/__init__.py
 create mode 100644 apps/data-processing/ml/feature_store.py
 create mode 100644 apps/data-processing/ml/model_registry.py
 create mode 100644 apps/data-processing/ml/price_predictor.py
 create mode 100644 apps/data-processing/ml/retraining_pipeline.py
 create mode 100644 apps/data-processing/qa_exporter.py
 create mode 100644 apps/data-processing/scheduler.py
 create mode 100644 apps/data-processing/security.py
 create mode 100644 apps/data-processing/sentiment.py
 create mode 100644 apps/data-processing/src/analytics/entity_linker.py
 create mode 100644 apps/data-processing/src/ingestion/soroban_event_indexer.py
 create mode 100644 apps/data-processing/standalone_test.py
 create mode 100644 apps/data-processing/test_entity_linker.py
 create mode 100644 apps/data-processing/trends.py
 create mode 100644 apps/data-processing/utils/http_client.py
 create mode 100644 apps/data-processing/utils/logger.py
 create mode 100644 apps/data-processing/utils/metrics.py
 create mode 100644 apps/data-processing/utils/translator.py
 create mode 100644 apps/data-processing/validators.py
 create mode 100644 temp_backup/1774000000001-AddLedgerSequenceToSorobanEvents.ts
 create mode 100644 temp_backup/backfill_contract_events.py
 create mode 100644 temp_backup/soroban-events/dto/ingest-soroban-event.dto.ts
 create mode 100644 temp_backup/soroban-events/entities/soroban-event.entity.ts
 create mode 100644 temp_backup/soroban-events/soroban-events.controller.ts
 create mode 100644 temp_backup/soroban-events/soroban-events.module.ts
 create mode 100644 temp_backup/soroban-events/soroban-events.processor.ts
 create mode 100644 temp_backup/soroban-events/soroban-events.service.ts
 create mode 100644 temp_backup/src/alert_notifier.py
 create mode 100644 temp_backup/src/alertbot.py
 create mode 100644 temp_backup/src/analytics/__init__.py
 create mode 100644 temp_backup/src/analytics/__pycache__/__init__.cpython-314.pyc
 create mode 100644 temp_backup/src/analytics/__pycache__/forecaster.cpython-314.pyc
 create mode 100644 temp_backup/src/analytics/__pycache__/market_analyzer.cpython-314.pyc
 create mode 100644 temp_backup/src/analytics/correlation_engine.py
 create mode 100644 temp_backup/src/analytics/entity_linker.py
 create mode 100644 temp_backup/src/analytics/forecaster.py
 create mode 100644 temp_backup/src/analytics/keywords.py
 create mode 100644 temp_backup/src/analytics/market_analyzer.py
 create mode 100644 temp_backup/src/analytics/ner_service.py
 create mode 100644 temp_backup/src/analytics/sentiment.py
 create mode 100644 temp_backup/src/analytics/sentiment_indicators.py
 create mode 100644 temp_backup/src/anomaly_detector.py
 create mode 100644 temp_backup/src/api/ingestion_quality_routes.py
 create mode 100644 temp_backup/src/api/server.py
 create mode 100644 temp_backup/src/cache_manager.py
 create mode 100644 temp_backup/src/config/anomaly_config.py
 create mode 100644 temp_backup/src/database.py
 create mode 100644 temp_backup/src/db/__init__.py
 create mode 100644 temp_backup/src/db/__pycache__/__init__.cpython-314.pyc
 create mode 100644 temp_backup/src/db/__pycache__/models.cpython-314.pyc
 create mode 100644 temp_backup/src/db/models.py
 create mode 100644 temp_backup/src/db/postgres_service.py
 create mode 100644 temp_backup/src/fetchers.py
 create mode 100644 temp_backup/src/ingestion/__init__.py
 create mode 100644 temp_backup/src/ingestion/news_deduplicator.py
 create mode 100644 temp_backup/src/ingestion/news_fetcher.py
 create mode 100644 temp_backup/src/ingestion/price_fetcher.py
 create mode 100644 temp_backup/src/ingestion/run_ingestion_quality_checks.py
 create mode 100644 temp_backup/src/ingestion/social_fetcher.py
 create mode 100644 temp_backup/src/ingestion/soroban_event_indexer.py
 create mode 100644 temp_backup/src/ingestion/stellar_fetcher.py
 create mode 100644 temp_backup/src/ingestion/stellar_ingestion_checks.py
 create mode 100644 temp_backup/src/main.py
 create mode 100644 temp_backup/src/ml/__init__.py
 create mode 100644 temp_backup/src/ml/feature_store.py
 create mode 100644 temp_backup/src/ml/model_registry.py
 create mode 100644 temp_backup/src/ml/price_predictor.py
 create mode 100644 temp_backup/src/ml/retraining_pipeline.py
 create mode 100644 temp_backup/src/qa_exporter.py
 create mode 100644 temp_backup/src/scheduler.py
 create mode 100644 temp_backup/src/security.py
 create mode 100644 temp_backup/src/sentiment.py
 create mode 100644 temp_backup/src/trends.py
 create mode 100644 temp_backup/src/utils/__pycache__/logger.cpython-314.pyc
 create mode 100644 temp_backup/src/utils/http_client.py
 create mode 100644 temp_backup/src/utils/logger.py
 create mode 100644 temp_backup/src/utils/metrics.py
 create mode 100644 temp_backup/src/utils/translator.py
 create mode 100644 temp_backup/src/validators.py

diff --git a/apps/backend/src/database/migrations/1774000000001-AddLedgerSequenceToSorobanEvents.ts b/apps/backend/src/database/migrations/1774000000001-AddLedgerSequenceToSorobanEvents.ts
new file mode 100644
index 00000000..6b41e5d6
--- /dev/null
+++ b/apps/backend/src/database/migrations/1774000000001-AddLedgerSequenceToSorobanEvents.ts
@@ -0,0 +1,19 @@
+import { MigrationInterface, QueryRunner } from 'typeorm';
+
+export class AddLedgerSequenceToSorobanEvents1774000000001 implements MigrationInterface {
+  async up(queryRunner: QueryRunner): Promise<void> {
+    await queryRunner.query(`
+      ALTER TABLE soroban_events 
+      ADD COLUMN ledger_sequence INTEGER NOT NULL DEFAULT 0;
+      
+      CREATE INDEX idx_soroban_events_ledger_sequence ON soroban_events (ledger_sequence);
+    `);
+  }
+
+  async down(queryRunner: QueryRunner): Promise<void> {
+    await queryRunner.query(`
+      DROP INDEX IF EXISTS idx_soroban_events_ledger_sequence;
+      ALTER TABLE soroban_events DROP COLUMN IF EXISTS ledger_sequence;
+    `);
+  }
+}
diff --git a/apps/data-processing/alert_notifier.py b/apps/data-processing/alert_notifier.py
new file mode 100644
index 0000000000000000000000000000000000000000..8db5f012937b4364e5e880e3d09f1ec37a49c9f1
GIT binary patch
literal 2909
jcmeIuF#!Mo0K%a4Pi+ZLh(KY$fB^#r3>YwAz`$i-3ta#K

literal 0
HcmV?d00001

diff --git a/apps/data-processing/alertbot.py b/apps/data-processing/alertbot.py
new file mode 100644
index 0000000000000000000000000000000000000000..a11c3999cff5ae11c46a4bdb1c6106e20ec34448
GIT binary patch
literal 12737
zcmeIuF#!Mo0K%a4Pi+c6h(KY$fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM
a7%*VKfB^#r3>YwAz<>b*1`HT5a2*&i!2kgO

literal 0
HcmV?d00001

diff --git a/apps/data-processing/analytics/__init__.py b/apps/data-processing/analytics/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec4e89abaed3fb4046191e75f29537739322ae1f
GIT binary patch
literal 569
QcmZQz7zLvtKt>1v00KDx0RR91

literal 0
HcmV?d00001

diff --git a/apps/data-processing/analytics/correlation_engine.py b/apps/data-processing/analytics/correlation_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..d883143e97ad642cf9768cca753a155824361ebe
GIT binary patch
literal 12910
zcmeIuF#!Mo0K%a4Pwiz3h(KY$fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM
b7%*VKfB^#r3>YwAz<>b*1`HT5VBiA-GHw6?

literal 0
HcmV?d00001

diff --git a/apps/data-processing/analytics/entity_linker.py b/apps/data-processing/analytics/entity_linker.py
new file mode 100644
index 00000000..21b388d8
--- /dev/null
+++ b/apps/data-processing/analytics/entity_linker.py
@@ -0,0 +1,212 @@
+"""
+On-chain Entity Linker for news articles.
+Links news content to on-chain projects and assets, producing stable IDs
+and storing links in the database.
+"""
+
+import logging
+import re
+from typing import Dict, List, Optional, Tuple
+from dataclasses import dataclass
+
+from .keywords import CRYPTO_PROJECT_MAP, KNOWN_TICKERS, TICKER_TO_PROJECT
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class LinkedEntity:
+    stable_id: str
+    entity_type: str  # "project" or "asset"
+    name: str
+    ticker: Optional[str] = None
+    confidence: float = 1.0
+
+
+class EntityLinker:
+    """
+    Links text content to known on-chain entities (projects and assets)
+    with stable, deterministic IDs.
+    """
+
+    def __init__(self) -> None:
+        self._project_patterns = self._compile_project_patterns()
+        # Filter out SDF from asset tickers since it's a project
+        self._asset_tickers = {t for t in KNOWN_TICKERS if t not in ["SDF"]}
+
+    def _compile_project_patterns(self) -> List[Tuple[str, re.Pattern]]:
+        """Compile regex patterns for project name matching, sorted by length descending."""
+        patterns = []
+        # Sort project names by length descending to prefer longer matches
+        sorted_projects = sorted(
+            CRYPTO_PROJECT_MAP.keys(),
+            key=lambda x: len(x),
+            reverse=True
+        )
+        for project_name in sorted_projects:
+            pattern = re.compile(r"\b" + re.escape(project_name) + r"\b", re.IGNORECASE)
+            patterns.append((project_name, pattern))
+        return patterns
+
+    def _generate_stable_id(self, entity_type: str, identifier: str) -> str:
+        """Generate a stable, deterministic ID for an entity."""
+        normalized = identifier.strip().lower()
+        return f"{entity_type}:{normalized}"
+
+    def link_text(
+        self,
+        text: str,
+        title: Optional[str] = None
+    ) -> List[LinkedEntity]:
+        """
+        Link the given text to known on-chain entities.
+        
+        Args:
+            text: Main text content to analyze
+            title: Optional article title (higher weight for entities found here)
+        
+        Returns:
+            List of LinkedEntity objects with stable IDs
+        """
+        entities: Dict[str, LinkedEntity] = {}
+        
+        # Combine title and text for analysis, title first for priority
+        full_text = f"{title or ''}\n{text or ''}"
+        
+        # Match project names
+        for project_name, pattern in self._project_patterns:
+            if pattern.search(full_text):
+                # Get canonical project name (the last one in the list)
+                canonical_name = CRYPTO_PROJECT_MAP[project_name][-1] if CRYPTO_PROJECT_MAP[project_name] else project_name
+                canonical_stable_id = self._generate_stable_id("project", canonical_name.lower())
+                
+                if canonical_stable_id not in entities:
+                    entities[canonical_stable_id] = LinkedEntity(
+                        stable_id=canonical_stable_id,
+                        entity_type="project",
+                        name=canonical_name,
+                        confidence=0.95
+                    )
+
+        # Match tickers
+        ticker_pattern = re.compile(r"\b([A-Z]{2,6})\b")
+        for ticker in ticker_pattern.findall(full_text):
+            ticker = ticker.upper()
+            if ticker in self._asset_tickers:
+                stable_id = self._generate_stable_id("asset", ticker)
+                if stable_id not in entities:
+                    entities[stable_id] = LinkedEntity(
+                        stable_id=stable_id,
+                        entity_type="asset",
+                        name=ticker,
+                        ticker=ticker,
+                        confidence=0.9
+                    )
+                # Also link the associated project if available, using canonical ID
+                if ticker in TICKER_TO_PROJECT:
+                    for project_name in TICKER_TO_PROJECT[ticker]:
+                        # Get canonical project name
+                        canonical_name = CRYPTO_PROJECT_MAP.get(project_name.lower(), [project_name])[-1]
+                        canonical_stable_id = self._generate_stable_id("project", canonical_name.lower())
+                        if canonical_stable_id not in entities:
+                            entities[canonical_stable_id] = LinkedEntity(
+                                stable_id=canonical_stable_id,
+                                entity_type="project",
+                                name=canonical_name,
+                                confidence=0.85
+                            )
+
+        return list(entities.values())
+
+    def link_article(
+        self,
+        title: Optional[str],
+        summary: Optional[str],
+        content: Optional[str]
+    ) -> List[LinkedEntity]:
+        """Link an article's content to on-chain entities."""
+        combined_text = "\n".join([
+            title or "",
+            summary or "",
+            content or ""
+        ])
+        return self.link_text(combined_text, title)
+
+
+# Small labeled test set for precision measurement
+LABELED_TEST_SET = [
+    {
+        "text": "Stellar Development Foundation (SDF) announces new Soroban upgrade. XLM price surges.",
+        "expected_entities": [
+            {"stable_id": "project:stellar", "type": "project"},
+            {"stable_id": "project:soroban", "type": "project"},
+            {"stable_id": "asset:xlm", "type": "asset"}
+        ]
+    },
+    {
+        "text": "Bitcoin (BTC) reaches new all-time high. Ethereum (ETH) follows closely.",
+        "expected_entities": [
+            {"stable_id": "asset:btc", "type": "asset"},
+            {"stable_id": "asset:eth", "type": "asset"}
+        ]
+    },
+    {
+        "text": "DeFi protocol Uniswap launches new liquidity pool on Solana.",
+        "expected_entities": [
+            {"stable_id": "project:uniswap", "type": "project"},
+            {"stable_id": "asset:sol", "type": "asset"}
+        ]
+    },
+    {
+        "text": "Cardano (ADA) releases new roadmap for governance.",
+        "expected_entities": [
+            {"stable_id": "asset:ada", "type": "asset"}
+        ]
+    },
+    {
+        "text": "Tech stocks rally on positive earnings. Apple and Microsoft lead gains.",
+        "expected_entities": []  # No crypto entities
+    }
+]
+
+
+def measure_precision(entity_linker: EntityLinker) -> Dict[str, float]:
+    """
+    Measure precision of the entity linker using the labeled test set.
+    
+    Returns:
+        Dictionary with precision metrics
+    """
+    true_positives = 0
+    false_positives = 0
+    total_expected = 0
+
+    for test_case in LABELED_TEST_SET:
+        text = test_case["text"]
+        expected = test_case["expected_entities"]
+        total_expected += len(expected)
+
+        actual = entity_linker.link_text(text)
+        actual_stable_ids = {e.stable_id for e in actual}
+        expected_stable_ids = {e["stable_id"] for e in expected}
+
+        # Calculate true positives and false positives
+        for entity in actual:
+            if entity.stable_id in expected_stable_ids:
+                true_positives += 1
+            else:
+                false_positives += 1
+
+    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 1.0
+    recall = true_positives / total_expected if total_expected > 0 else 1.0
+    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
+
+    return {
+        "precision": precision,
+        "recall": recall,
+        "f1": f1,
+        "true_positives": true_positives,
+        "false_positives": false_positives,
+        "total_expected": total_expected,
+        "test_cases": len(LABELED_TEST_SET)
+    }
diff --git a/apps/data-processing/analytics/forecaster.py b/apps/data-processing/analytics/forecaster.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2a18497440fd4cd699ab5b129d3763b078f30f1
GIT binary patch
literal 20062
zcmeIu0Sy2E0K%a6Pi+o2h(KY$fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM
z7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM7%*VKfB^#r3>YwAz<>b*
G{{sU~UH}0A

literal 0
HcmV?d00001

diff --git a/apps/data-processing/analytics/keywords.py b/apps/data-processing/analytics/keywords.py
new file mode 100644
index 0000000000000000000000000000000000000000..7207aa7d087bbb6f7246e726f05e6de291427481
GIT binary patch
literal 7843
zcmeIuF#!Mo0K%a4Pi+e?h(KY$fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0Rsju
F0|OqT00961

literal 0
HcmV?d00001

diff --git a/apps/data-processing/analytics/market_analyzer.py b/apps/data-processing/analytics/market_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3385afeaaebca2ea5020b722ee689fc12d8a6e1
GIT binary patch
literal 6820
zcmeIuF#!Mo0K%a4Pi+e?h(KY$fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFks+2FdC!)
A0RR91

literal 0
HcmV?d00001

diff --git a/apps/data-processing/analytics/ner_service.py b/apps/data-processing/analytics/ner_service.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d716edbb9d1f605f7327b29393d28e2f88180bd
GIT binary patch
literal 5604
vcmeIu0Sy2E0K%a6Pi+o2h(KY$fB^#r3>YwAz<>b*1`HT5V8DO@1Fr)E732T`

literal 0
HcmV?d00001

diff --git a/apps/data-processing/analytics/sentiment.py b/apps/data-processing/analytics/sentiment.py
new file mode 100644
index 0000000000000000000000000000000000000000..82e30d8e7c3e13c6ac28e7a89bbb772139664cef
GIT binary patch
literal 12829
zcmeIu0Sy2E0K%a6Pi+o2h(KY$fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM
a7%*VKfB^#r3>YwAz<>b*1`HT5a62$E9RL9U

literal 0
HcmV?d00001

diff --git a/apps/data-processing/analytics/sentiment_indicators.py b/apps/data-processing/analytics/sentiment_indicators.py
new file mode 100644
index 0000000000000000000000000000000000000000..7782f4621f2f9503321b99d7b35339cb834de41d
GIT binary patch
literal 7732
zcmeIufdBvi0K=g9Qy=7oP+`D;0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@1Fr)E
D9y9;}

literal 0
HcmV?d00001

diff --git a/apps/data-processing/anomaly_detector.py b/apps/data-processing/anomaly_detector.py
new file mode 100644
index 0000000000000000000000000000000000000000..139eb0ecbac7b0e6d063fc77dd6f3ce1b388c3e4
GIT binary patch
literal 32052
zcmeIuF#!Mo0K%a4Pi+Tph(KY$fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM
z7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM7%*VKfB^#r3>YwAz<>b*
z1`HT5V8DO@0|pEjFkrxd0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd
Gfq!6qGynkr

literal 0
HcmV?d00001

diff --git a/apps/data-processing/api/ingestion_quality_routes.py b/apps/data-processing/api/ingestion_quality_routes.py
new file mode 100644
index 0000000000000000000000000000000000000000..153f02bb615f7351060adcfc4d6f2b590121b585
GIT binary patch
literal 1874
ccmZQz7zLvtFd71*Aut*OqaiRF0+fdU00&Y40RR91

literal 0
HcmV?d00001

diff --git a/apps/data-processing/api/server.py b/apps/data-processing/api/server.py
new file mode 100644
index 0000000000000000000000000000000000000000..2aeebed3a8b1addf54d8f1ff4b57a9bee3231e96
GIT binary patch
literal 23236
zcmeIuF#!Mo0K%a4Pwj07h(KY$fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM
z7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM7%*VKfB^#r3>YwAz<>b*
U1`HT5V8DO@0|pEjFks*Z23o`b0RR91

literal 0
HcmV?d00001

diff --git a/apps/data-processing/cache_manager.py b/apps/data-processing/cache_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb6ae110db7851b43d67f32d47a16a972e327c7b
GIT binary patch
literal 4728
rcmeIuF#!Mo0K%a4Pi+ZLh(KY$fB^#r3>YwAz<>b*1`HT5VBkM65_kXs

literal 0
HcmV?d00001

diff --git a/apps/data-processing/config/anomaly_config.py b/apps/data-processing/config/anomaly_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..82c4106c1e5811bc38f909a8074426bc0d225b76
GIT binary patch
literal 4461
qcmeIuF#!Mo0K%a4Pi+ZLh(KY$fB^#r3>YwAz<>b*1`HT5a2psAZ2$oP

literal 0
HcmV?d00001

diff --git a/apps/data-processing/database.py b/apps/data-processing/database.py
new file mode 100644
index 0000000000000000000000000000000000000000..b0210d02c03aa8b3833fdea678a63f28c821c77f
GIT binary patch
literal 8492
zcmeIufdBvi0K=g9Qy=7oP+`D;0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEj
G_!$@>EC2xj

literal 0
HcmV?d00001

diff --git a/apps/data-processing/db/__init__.py b/apps/data-processing/db/__init__.py
new file mode 100644
index 00000000..dcb6992a
--- /dev/null
+++ b/apps/data-processing/db/__init__.py
@@ -0,0 +1,18 @@
+"""
+Database package for analytics data persistence
+"""
+
+from .models import Base, Article, SocialPost, AnalyticsRecord, NewsInsight, AssetTrend, OnChainEntity, ArticleEntityLink
+from .postgres_service import PostgresService
+
+__all__ = [
+    "Base",
+    "Article",
+    "SocialPost",
+    "AnalyticsRecord",
+    "NewsInsight",
+    "AssetTrend",
+    "OnChainEntity",
+    "ArticleEntityLink",
+    "PostgresService",
+]
diff --git a/apps/data-processing/db/models.py b/apps/data-processing/db/models.py
new file mode 100644
index 00000000..5cc7fa5f
--- /dev/null
+++ b/apps/data-processing/db/models.py
@@ -0,0 +1,304 @@
+"""
+Database models for analytics data persistence
+"""
+
+from datetime import datetime
+from typing import Optional
+from sqlalchemy import Column, Integer, String, Float, DateTime, JSON, Text, Index, BigInteger
+from sqlalchemy.orm import declarative_base
+from sqlalchemy.sql import func
+
+Base = declarative_base()
+
+
+class OnChainEntity(Base):
+    """
+    Stores on-chain entities (projects and assets) with stable IDs
+    """
+    __tablename__ = "on_chain_entities"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    stable_id = Column(String(255), unique=True, nullable=False, index=True)  # Stable unique ID (e.g., "asset:XLM", "project:stellar")
+    entity_type = Column(String(50), nullable=False, index=True)  # "project" or "asset"
+    name = Column(String(255), nullable=False)  # Human-readable name
+    ticker = Column(String(20), nullable=True, index=True)  # Asset ticker (if applicable)
+    contract_ids = Column(JSON, nullable=True)  # Array of associated contract IDs
+    extra_data = Column(JSON, nullable=True)  # Additional metadata
+    created_at = Column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+    updated_at = Column(
+        DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False
+    )
+
+    __table_args__ = (
+        Index("idx_entities_type_ticker", "entity_type", "ticker"),
+    )
+
+
+class ArticleEntityLink(Base):
+    """
+    Links articles to on-chain entities (many-to-many relationship)
+    """
+    __tablename__ = "article_entity_links"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    article_id = Column(String(255), nullable=False, index=True)  # Foreign key to articles.article_id
+    entity_stable_id = Column(String(255), nullable=False, index=True)  # Foreign key to on_chain_entities.stable_id
+    confidence = Column(Float, nullable=True)  # Confidence score for the link (0-1)
+    context = Column(Text, nullable=True)  # Context snippet where the entity was found
+    created_at = Column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+
+    __table_args__ = (
+        Index("idx_article_entity_link", "article_id", "entity_stable_id", unique=True),
+        Index("idx_entity_article_link", "entity_stable_id", "article_id"),
+    )
+
+
+class Article(Base):
+    """
+    Stores news articles with full content and metadata
+    """
+
+    __tablename__ = "articles"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    article_id = Column(String(255), unique=True, nullable=False, index=True)
+    title = Column(Text, nullable=False)
+    content = Column(Text, nullable=True)
+    summary = Column(Text, nullable=True)
+    source = Column(String(100), nullable=True, index=True)
+    url = Column(Text, nullable=True)
+    
+    # Asset information
+    asset_codes = Column(JSON, nullable=True)  # Array of asset codes mentioned in article
+    primary_asset = Column(String(20), nullable=True, index=True)  # Primary asset being discussed
+    categories = Column(JSON, nullable=True)  # Article categories
+    
+    # Sentiment scores
+    sentiment_score = Column(Float, nullable=True)  # compound score -1 to 1
+    positive_score = Column(Float, nullable=True)
+    negative_score = Column(Float, nullable=True)
+    neutral_score = Column(Float, nullable=True)
+    sentiment_label = Column(String(20), nullable=True, index=True)  # positive/negative/neutral
+    
+    # Keywords and metadata
+    keywords = Column(JSON, nullable=True)  # Array of keywords
+    detected_entities = Column(JSON, nullable=True)  # NER entities detected in article text
+    linked_entities = Column(JSON, nullable=True)  # Structured linked entities (array of {stable_id, type, name})
+    language = Column(String(10), nullable=True)
+    
+    # Timestamps
+    published_at = Column(DateTime(timezone=True), nullable=True, index=True)
+    fetched_at = Column(DateTime(timezone=True), nullable=True)
+    analyzed_at = Column(DateTime(timezone=True), nullable=True)
+    created_at = Column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+    updated_at = Column(
+        DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False
+    )
+
+    # Indexes for efficient querying
+    __table_args__ = (
+        Index("idx_articles_published_at", "published_at"),
+        Index("idx_articles_sentiment_label", "sentiment_label"),
+        Index("idx_articles_source", "source"),
+        Index("idx_articles_primary_asset", "primary_asset"),
+        Index("idx_articles_asset_sentiment", "primary_asset", "sentiment_label"),
+        Index("idx_articles_created_at", "created_at"),
+    )
+
+    def __repr__(self):
+        return f"<Article(id={self.article_id}, title={self.title[:50]}, asset={self.primary_asset}, sentiment={self.sentiment_label})>"
+
+
+class SocialPost(Base):
+    """
+    Stores social media posts (Twitter, Reddit, etc.)
+    """
+
+    __tablename__ = "social_posts"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    post_id = Column(String(255), unique=True, nullable=False, index=True)
+    platform = Column(String(50), nullable=False, index=True)  # twitter, reddit, etc.
+    content = Column(Text, nullable=False)
+    author = Column(String(255), nullable=True)
+    url = Column(Text, nullable=True)
+    
+    # Engagement metrics
+    likes = Column(Integer, default=0)
+    comments = Column(Integer, default=0)
+    shares = Column(Integer, default=0)
+    
+    # Asset information
+    asset_codes = Column(JSON, nullable=True)  # Array of asset codes mentioned
+    primary_asset = Column(String(20), nullable=True, index=True)
+    hashtags = Column(JSON, nullable=True)  # Array of hashtags
+    subreddit = Column(String(100), nullable=True)  # For Reddit posts
+    
+    # Sentiment scores
+    sentiment_score = Column(Float, nullable=True)  # compound score -1 to 1
+    positive_score = Column(Float, nullable=True)
+    negative_score = Column(Float, nullable=True)
+    neutral_score = Column(Float, nullable=True)
+    sentiment_label = Column(String(20), nullable=True, index=True)
+    
+    # Timestamps
+    posted_at = Column(DateTime(timezone=True), nullable=False, index=True)
+    fetched_at = Column(DateTime(timezone=True), nullable=True)
+    analyzed_at = Column(DateTime(timezone=True), nullable=True)
+    created_at = Column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+    updated_at = Column(
+        DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False
+    )
+
+    # Indexes for efficient querying
+    __table_args__ = (
+        Index("idx_social_posts_platform", "platform"),
+        Index("idx_social_posts_posted_at", "posted_at"),
+        Index("idx_social_posts_sentiment_label", "sentiment_label"),
+        Index("idx_social_posts_primary_asset", "primary_asset"),
+        Index("idx_social_posts_platform_asset", "platform", "primary_asset"),
+        Index("idx_social_posts_created_at", "created_at"),
+    )
+
+    def __repr__(self):
+        return f"<SocialPost(id={self.post_id}, platform={self.platform}, asset={self.primary_asset}, sentiment={self.sentiment_label})>"
+
+
+class AnalyticsRecord(Base):
+    """
+    Stores computed analytics and aggregated metrics
+    """
+
+    __tablename__ = "analytics_records"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    record_type = Column(String(50), nullable=False, index=True)  # sentiment_summary, trend, etc.
+    asset = Column(String(50), nullable=True, index=True)  # Asset symbol (e.g., 'XLM', 'BTC')
+    metric_name = Column(String(100), nullable=False)  # e.g., 'sentiment_score', 'volume'
+    window = Column(String(20), nullable=True)  # e.g., '1h', '24h', '7d'
+    
+    # Metric values
+    value = Column(Float, nullable=False)
+    previous_value = Column(Float, nullable=True)
+    change_percentage = Column(Float, nullable=True)
+    trend_direction = Column(String(20), nullable=True)  # up/down/stable
+    
+    # Additional data
+    extra_data = Column(JSON, nullable=True)  # Additional metadata
+    
+    # Timestamps
+    timestamp = Column(DateTime(timezone=True), nullable=False, index=True)
+    created_at = Column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+
+    # Indexes for efficient querying
+    __table_args__ = (
+        Index("idx_analytics_records_type", "record_type"),
+        Index("idx_analytics_records_asset", "asset"),
+        Index("idx_analytics_records_timestamp", "timestamp"),
+        Index("idx_analytics_records_type_asset", "record_type", "asset"),
+        Index("idx_analytics_records_asset_metric", "asset", "metric_name"),
+    )
+
+    def __repr__(self):
+        return f"<AnalyticsRecord(type={self.record_type}, asset={self.asset}, metric={self.metric_name}, value={self.value})>"
+
+
+class NewsInsight(Base):
+    """
+    Stores sentiment analysis results for news articles (legacy table, kept for backward compatibility)
+    """
+
+    __tablename__ = "news_insights"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    article_id = Column(String(255), nullable=True, index=True)
+    article_title = Column(Text, nullable=True)
+    article_url = Column(Text, nullable=True)
+    source = Column(String(100), nullable=True)
+    
+    # Asset information
+    asset_codes = Column(JSON, nullable=True)  # Array of asset codes mentioned in article
+    primary_asset = Column(String(20), nullable=True, index=True)  # Primary asset being discussed
+    
+    # Sentiment scores
+    sentiment_score = Column(Float, nullable=False)  # compound score -1 to 1
+    positive_score = Column(Float, nullable=False)
+    negative_score = Column(Float, nullable=False)
+    neutral_score = Column(Float, nullable=False)
+    sentiment_label = Column(String(20), nullable=False)  # positive/negative/neutral
+    
+    # Keywords and metadata
+    keywords = Column(JSON, nullable=True)  # Array of keywords
+    language = Column(String(10), nullable=True)
+    
+    # Timestamps
+    article_published_at = Column(DateTime(timezone=True), nullable=True)
+    analyzed_at = Column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+    created_at = Column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+
+    # Indexes for efficient querying
+    __table_args__ = (
+        Index("idx_news_insights_analyzed_at", "analyzed_at"),
+        Index("idx_news_insights_sentiment_label", "sentiment_label"),
+        Index("idx_news_insights_source", "source"),
+        Index("idx_news_insights_primary_asset", "primary_asset"),
+        Index("idx_news_insights_asset_sentiment", "primary_asset", "sentiment_label"),
+    )
+
+    def __repr__(self):
+        return f"<NewsInsight(id={self.id}, asset={self.primary_asset}, sentiment={self.sentiment_label}, score={self.sentiment_score})>"
+
+
+class AssetTrend(Base):
+    """
+    Stores calculated trends for assets and metrics (legacy table, kept for backward compatibility)
+    """
+
+    __tablename__ = "asset_trends"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    asset = Column(String(50), nullable=False, index=True)  # e.g., 'XLM', 'BTC'
+    metric_name = Column(String(100), nullable=False)  # e.g., 'sentiment_score', 'volume'
+    window = Column(String(20), nullable=False)  # e.g., '1h', '24h', '7d'
+    
+    # Trend data
+    trend_direction = Column(String(20), nullable=False)  # up/down/stable
+    score = Column(Float, nullable=False)  # trend score/strength
+    current_value = Column(Float, nullable=False)
+    previous_value = Column(Float, nullable=False)
+    change_percentage = Column(Float, nullable=False)
+    
+    # Additional data (renamed from metadata to avoid SQLAlchemy conflict)
+    extra_data = Column(JSON, nullable=True)  # Additional trend metadata
+    
+    # Timestamps
+    timestamp = Column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False, index=True
+    )
+    created_at = Column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+
+    # Indexes for efficient querying
+    __table_args__ = (
+        Index("idx_asset_trends_asset_metric", "asset", "metric_name"),
+        Index("idx_asset_trends_timestamp", "timestamp"),
+        Index("idx_asset_trends_window", "window"),
+    )
+
+    def __repr__(self):
+        return f"<AssetTrend(asset={self.asset}, metric={self.metric_name}, trend={self.trend_direction})>"
diff --git a/apps/data-processing/db/postgres_service.py b/apps/data-processing/db/postgres_service.py
new file mode 100644
index 00000000..3903c428
--- /dev/null
+++ b/apps/data-processing/db/postgres_service.py
@@ -0,0 +1,1245 @@
+"""
+PostgreSQL service for persisting analytics data
+"""
+
+import logging
+import os
+import time
+from typing import List, Dict, Any, Optional
+from datetime import datetime, timedelta
+from contextlib import contextmanager
+
+from sqlalchemy import create_engine, select, and_, desc
+from sqlalchemy.orm import sessionmaker, Session
+from sqlalchemy.exc import SQLAlchemyError, OperationalError
+
+from .models import Base, Article, SocialPost, AnalyticsRecord, NewsInsight, AssetTrend, OnChainEntity, ArticleEntityLink
+from src.analytics.ner_service import NERService
+from src.analytics.entity_linker import EntityLinker, measure_precision
+
+logger = logging.getLogger(__name__)
+
+
+class PostgresService:
+    """
+    Service for persisting and retrieving analytics data from PostgreSQL
+    """
+
+    def __init__(self, database_url: Optional[str] = None):
+        """
+        Initialize PostgreSQL service
+
+        Args:
+            database_url: PostgreSQL connection URL. If None, reads from environment
+        """
+        self.database_url = database_url or os.getenv(
+            "DATABASE_URL", "postgresql://postgres:postgres@localhost:5432/lumenpulse"
+        )
+
+        try:
+            self.engine = create_engine(
+                self.database_url,
+                pool_pre_ping=True,  # Verify connections before using
+                pool_size=5,
+                max_overflow=10,
+                echo=False,  # Set to True for SQL query logging
+            )
+            self.SessionLocal = sessionmaker(
+                autocommit=False,
+                autoflush=False,
+                expire_on_commit=False,
+                bind=self.engine,
+            )
+            self.ner_service = NERService()
+            self.entity_linker = EntityLinker()
+            logger.info("PostgreSQL service initialized successfully")
+        except Exception as e:
+            logger.error(f"Failed to initialize PostgreSQL service: {e}")
+            raise
+
+    def _ensure_detected_entities(self, article_data: Dict[str, Any]) -> Dict[str, Any]:
+        """Populate detected_entities when absent using the NER service."""
+        normalized = dict(article_data)
+        existing_entities = normalized.get("detected_entities")
+        if isinstance(existing_entities, list) and existing_entities:
+            return normalized
+
+        normalized["detected_entities"] = self.ner_service.extract_entities_from_article(
+            title=normalized.get("title"),
+            summary=normalized.get("summary"),
+            content=normalized.get("content"),
+        )
+        return normalized
+
+    @contextmanager
+    def get_session(self):
+        """
+        Context manager for database sessions
+
+        Yields:
+            Session: SQLAlchemy session
+        """
+        session = self.SessionLocal()
+        try:
+            yield session
+            session.commit()
+        except Exception as e:
+            session.rollback()
+            logger.error(f"Session error: {e}")
+            raise
+        finally:
+            session.close()
+
+    def _retry_operation(self, operation, max_retries=3, retry_delay=1.0):
+        """
+        Retry a database operation with exponential backoff
+
+        Args:
+            operation: Callable to execute
+            max_retries: Maximum number of retry attempts
+            retry_delay: Initial delay between retries (doubles each retry)
+
+        Returns:
+            Result of the operation
+
+        Raises:
+            Exception: If all retries fail
+        """
+        last_exception = None
+        for attempt in range(max_retries):
+            try:
+                return operation()
+            except OperationalError as e:
+                last_exception = e
+                if attempt < max_retries - 1:
+                    wait_time = retry_delay * (2 ** attempt)  # Exponential backoff
+                    logger.warning(
+                        f"Database operation failed (attempt {attempt + 1}/{max_retries}): {e}. "
+                        f"Retrying in {wait_time:.1f}s..."
+                    )
+                    time.sleep(wait_time)
+                else:
+                    logger.error(f"Database operation failed after {max_retries} attempts: {e}")
+                    raise
+            except SQLAlchemyError as e:
+                # Non-retryable errors
+                logger.error(f"Database operation failed with non-retryable error: {e}")
+                raise
+        raise last_exception
+
+    def create_tables(self):
+        """
+        Create all tables in the database
+        """
+        try:
+            Base.metadata.create_all(bind=self.engine)
+            logger.info("Database tables created successfully")
+        except Exception as e:
+            logger.error(f"Failed to create tables: {e}")
+            raise
+
+    def drop_tables(self):
+        """
+        Drop all tables (use with caution!)
+        """
+        try:
+            Base.metadata.drop_all(bind=self.engine)
+            logger.warning("All database tables dropped")
+        except Exception as e:
+            logger.error(f"Failed to drop tables: {e}")
+            raise
+
+    # Article Methods
+
+    def save_article(
+        self,
+        article_data: Dict[str, Any],
+        sentiment_result: Optional[Dict[str, Any]] = None,
+    ) -> Optional[Article]:
+        """
+        Save an article with optional sentiment analysis and entity linking
+
+        Args:
+            article_data: Article data dictionary
+            sentiment_result: Optional sentiment analysis result
+
+        Returns:
+            Article object if successful, None otherwise
+        """
+        article_data = self._ensure_detected_entities(article_data)
+        
+        # Link entities
+        linked_entities = self.entity_linker.link_article(
+            title=article_data.get("title"),
+            summary=article_data.get("summary"),
+            content=article_data.get("content")
+        )
+        
+        # Prepare structured linked entities for the article
+        structured_linked_entities = [
+            {
+                "stable_id": e.stable_id,
+                "type": e.entity_type,
+                "name": e.name,
+                "ticker": getattr(e, 'ticker', None),
+                "confidence": getattr(e, 'confidence', None)
+            }
+            for e in linked_entities
+        ]
+        article_data["linked_entities"] = structured_linked_entities
+
+        def _save():
+            with self.get_session() as session:
+                # Check if article already exists
+                existing = session.execute(
+                    select(Article).where(Article.article_id == article_data.get("id"))
+                ).scalar_one_or_none()
+
+                if existing:
+                    # Update existing article
+                    existing.title = article_data.get("title", existing.title)
+                    existing.content = article_data.get("content", existing.content)
+                    existing.summary = article_data.get("summary", existing.summary)
+                    existing.source = article_data.get("source", existing.source)
+                    existing.url = article_data.get("url", existing.url)
+                    existing.asset_codes = article_data.get("asset_codes", existing.asset_codes)
+                    existing.primary_asset = article_data.get("primary_asset", existing.primary_asset)
+                    existing.categories = article_data.get("categories", existing.categories)
+                    existing.keywords = article_data.get("keywords", existing.keywords)
+                    existing.detected_entities = article_data.get("detected_entities", existing.detected_entities)
+                    existing.linked_entities = article_data.get("linked_entities", existing.linked_entities)
+                    existing.language = article_data.get("language", existing.language)
+                    existing.published_at = article_data.get("published_at", existing.published_at)
+                    existing.fetched_at = article_data.get("fetched_at", existing.fetched_at)
+
+                    if sentiment_result:
+                        existing.sentiment_score = sentiment_result.get("compound_score")
+                        existing.positive_score = sentiment_result.get("positive")
+                        existing.negative_score = sentiment_result.get("negative")
+                        existing.neutral_score = sentiment_result.get("neutral")
+                        existing.sentiment_label = sentiment_result.get("sentiment_label")
+                        existing.analyzed_at = datetime.utcnow()
+
+                    session.flush()
+                    logger.debug(f"Updated article: {existing.article_id}")
+                    return existing
+                else:
+                    # Create new article
+                    article = Article(
+                        article_id=article_data.get("id"),
+                        title=article_data.get("title", ""),
+                        content=article_data.get("content"),
+                        summary=article_data.get("summary"),
+                        source=article_data.get("source"),
+                        url=article_data.get("url"),
+                        asset_codes=article_data.get("asset_codes"),
+                        primary_asset=article_data.get("primary_asset"),
+                        categories=article_data.get("categories"),
+                        keywords=article_data.get("keywords"),
+                        detected_entities=article_data.get("detected_entities"),
+                        linked_entities=article_data.get("linked_entities"),
+                        language=article_data.get("language"),
+                        published_at=article_data.get("published_at"),
+                        fetched_at=article_data.get("fetched_at"),
+                    )
+
+                    if sentiment_result:
+                        article.sentiment_score = sentiment_result.get("compound_score")
+                        article.positive_score = sentiment_result.get("positive")
+                        article.negative_score = sentiment_result.get("negative")
+                        article.neutral_score = sentiment_result.get("neutral")
+                        article.sentiment_label = sentiment_result.get("sentiment_label")
+                        article.analyzed_at = datetime.utcnow()
+
+                    session.add(article)
+                    session.flush()
+                    logger.debug(f"Saved article: {article.article_id}")
+                    return article
+
+        try:
+            article = self._retry_operation(_save)
+            if article:
+                # Link entities in the database
+                self.link_article_to_entities(article.article_id, linked_entities)
+            return article
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save article: {e}")
+            return None
+
+    def save_articles_batch(
+        self,
+        articles_data: List[Dict[str, Any]],
+        sentiment_results: Optional[List[Dict[str, Any]]] = None,
+    ) -> int:
+        """
+        Save multiple articles in a batch
+
+        Args:
+            articles_data: List of article data dictionaries
+            sentiment_results: Optional list of sentiment analysis results
+
+        Returns:
+            Number of articles saved
+        """
+        saved_count = 0
+        try:
+            with self.get_session() as session:
+                for i, article_data in enumerate(articles_data):
+                    article_data = self._ensure_detected_entities(article_data)
+                    sentiment_result = sentiment_results[i] if sentiment_results and i < len(sentiment_results) else None
+
+                    # Check if article already exists
+                    existing = session.execute(
+                        select(Article).where(Article.article_id == article_data.get("id"))
+                    ).scalar_one_or_none()
+
+                    if existing:
+                        # Update existing article
+                        existing.title = article_data.get("title", existing.title)
+                        existing.content = article_data.get("content", existing.content)
+                        existing.summary = article_data.get("summary", existing.summary)
+                        existing.source = article_data.get("source", existing.source)
+                        existing.url = article_data.get("url", existing.url)
+                        existing.asset_codes = article_data.get("asset_codes", existing.asset_codes)
+                        existing.primary_asset = article_data.get("primary_asset", existing.primary_asset)
+                        existing.categories = article_data.get("categories", existing.categories)
+                        existing.keywords = article_data.get("keywords", existing.keywords)
+                        existing.detected_entities = article_data.get("detected_entities", existing.detected_entities)
+                        existing.language = article_data.get("language", existing.language)
+                        existing.published_at = article_data.get("published_at", existing.published_at)
+                        existing.fetched_at = article_data.get("fetched_at", existing.fetched_at)
+
+                        if sentiment_result:
+                            existing.sentiment_score = sentiment_result.get("compound_score")
+                            existing.positive_score = sentiment_result.get("positive")
+                            existing.negative_score = sentiment_result.get("negative")
+                            existing.neutral_score = sentiment_result.get("neutral")
+                            existing.sentiment_label = sentiment_result.get("sentiment_label")
+                            existing.analyzed_at = datetime.utcnow()
+                    else:
+                        # Create new article
+                        article = Article(
+                            article_id=article_data.get("id"),
+                            title=article_data.get("title", ""),
+                            content=article_data.get("content"),
+                            summary=article_data.get("summary"),
+                            source=article_data.get("source"),
+                            url=article_data.get("url"),
+                            asset_codes=article_data.get("asset_codes"),
+                            primary_asset=article_data.get("primary_asset"),
+                            categories=article_data.get("categories"),
+                            keywords=article_data.get("keywords"),
+                            detected_entities=article_data.get("detected_entities"),
+                            language=article_data.get("language"),
+                            published_at=article_data.get("published_at"),
+                            fetched_at=article_data.get("fetched_at"),
+                        )
+
+                        if sentiment_result:
+                            article.sentiment_score = sentiment_result.get("compound_score")
+                            article.positive_score = sentiment_result.get("positive")
+                            article.negative_score = sentiment_result.get("negative")
+                            article.neutral_score = sentiment_result.get("neutral")
+                            article.sentiment_label = sentiment_result.get("sentiment_label")
+                            article.analyzed_at = datetime.utcnow()
+
+                        session.add(article)
+
+                    saved_count += 1
+
+                logger.info(f"Saved {saved_count} articles")
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save articles batch: {e}")
+
+        return saved_count
+
+    def get_recent_articles(
+        self,
+        limit: int = 100,
+        hours: int = 24,
+        asset: Optional[str] = None,
+        entity: Optional[str] = None,
+    ) -> List[Article]:
+        """
+        Get recent articles
+
+        Args:
+            limit: Maximum number of results
+            hours: Time window in hours
+            asset: Optional asset filter
+            entity: Optional NER entity filter
+
+        Returns:
+            List of Article objects
+        """
+        try:
+            with self.get_session() as session:
+                cutoff_time = datetime.utcnow() - timedelta(hours=hours)
+                stmt = (
+                    select(Article)
+                    .where(Article.published_at >= cutoff_time)
+                    .order_by(desc(Article.published_at))
+                    .limit(limit * 5 if entity else limit)
+                )
+
+                if asset:
+                    stmt = stmt.where(Article.primary_asset == asset)
+
+                results = session.execute(stmt).scalars().all()
+                if entity:
+                    target = entity.strip().lower()
+                    results = [
+                        article
+                        for article in results
+                        if any(
+                            str(value).strip().lower() == target
+                            for value in (article.detected_entities or [])
+                        )
+                    ][:limit]
+                logger.debug(f"Retrieved {len(results)} articles")
+                return results
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to retrieve articles: {e}")
+            return []
+
+    # Social Post Methods
+
+    def save_social_post(
+        self,
+        post_data: Dict[str, Any],
+        sentiment_result: Optional[Dict[str, Any]] = None,
+    ) -> Optional[SocialPost]:
+        """
+        Save a social media post with optional sentiment analysis
+
+        Args:
+            post_data: Social post data dictionary
+            sentiment_result: Optional sentiment analysis result
+
+        Returns:
+            SocialPost object if successful, None otherwise
+        """
+        def _save():
+            with self.get_session() as session:
+                # Check if post already exists
+                existing = session.execute(
+                    select(SocialPost).where(SocialPost.post_id == post_data.get("id"))
+                ).scalar_one_or_none()
+
+                if existing:
+                    # Update existing post
+                    existing.content = post_data.get("content", existing.content)
+                    existing.author = post_data.get("author", existing.author)
+                    existing.url = post_data.get("url", existing.url)
+                    existing.likes = post_data.get("likes", existing.likes)
+                    existing.comments = post_data.get("comments", existing.comments)
+                    existing.shares = post_data.get("shares", existing.shares)
+                    existing.asset_codes = post_data.get("asset_codes", existing.asset_codes)
+                    existing.primary_asset = post_data.get("primary_asset", existing.primary_asset)
+                    existing.hashtags = post_data.get("hashtags", existing.hashtags)
+                    existing.subreddit = post_data.get("subreddit", existing.subreddit)
+                    existing.posted_at = post_data.get("posted_at", existing.posted_at)
+                    existing.fetched_at = post_data.get("fetched_at", existing.fetched_at)
+
+                    if sentiment_result:
+                        existing.sentiment_score = sentiment_result.get("compound_score")
+                        existing.positive_score = sentiment_result.get("positive")
+                        existing.negative_score = sentiment_result.get("negative")
+                        existing.neutral_score = sentiment_result.get("neutral")
+                        existing.sentiment_label = sentiment_result.get("sentiment_label")
+                        existing.analyzed_at = datetime.utcnow()
+
+                    session.flush()
+                    logger.debug(f"Updated social post: {existing.post_id}")
+                    return existing
+                else:
+                    # Create new post
+                    post = SocialPost(
+                        post_id=post_data.get("id"),
+                        platform=post_data.get("platform", "unknown"),
+                        content=post_data.get("content", ""),
+                        author=post_data.get("author"),
+                        url=post_data.get("url"),
+                        likes=post_data.get("likes", 0),
+                        comments=post_data.get("comments", 0),
+                        shares=post_data.get("shares", 0),
+                        asset_codes=post_data.get("asset_codes"),
+                        primary_asset=post_data.get("primary_asset"),
+                        hashtags=post_data.get("hashtags"),
+                        subreddit=post_data.get("subreddit"),
+                        posted_at=post_data.get("posted_at"),
+                        fetched_at=post_data.get("fetched_at"),
+                    )
+
+                    if sentiment_result:
+                        post.sentiment_score = sentiment_result.get("compound_score")
+                        post.positive_score = sentiment_result.get("positive")
+                        post.negative_score = sentiment_result.get("negative")
+                        post.neutral_score = sentiment_result.get("neutral")
+                        post.sentiment_label = sentiment_result.get("sentiment_label")
+                        post.analyzed_at = datetime.utcnow()
+
+                    session.add(post)
+                    session.flush()
+                    logger.debug(f"Saved social post: {post.post_id}")
+                    return post
+
+        try:
+            return self._retry_operation(_save)
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save social post: {e}")
+            return None
+
+    def save_social_posts_batch(
+        self,
+        posts_data: List[Dict[str, Any]],
+        sentiment_results: Optional[List[Dict[str, Any]]] = None,
+    ) -> int:
+        """
+        Save multiple social posts in a batch
+
+        Args:
+            posts_data: List of social post data dictionaries
+            sentiment_results: Optional list of sentiment analysis results
+
+        Returns:
+            Number of posts saved
+        """
+        saved_count = 0
+        try:
+            with self.get_session() as session:
+                for i, post_data in enumerate(posts_data):
+                    sentiment_result = sentiment_results[i] if sentiment_results and i < len(sentiment_results) else None
+
+                    # Check if post already exists
+                    existing = session.execute(
+                        select(SocialPost).where(SocialPost.post_id == post_data.get("id"))
+                    ).scalar_one_or_none()
+
+                    if existing:
+                        # Update existing post
+                        existing.content = post_data.get("content", existing.content)
+                        existing.author = post_data.get("author", existing.author)
+                        existing.url = post_data.get("url", existing.url)
+                        existing.likes = post_data.get("likes", existing.likes)
+                        existing.comments = post_data.get("comments", existing.comments)
+                        existing.shares = post_data.get("shares", existing.shares)
+                        existing.asset_codes = post_data.get("asset_codes", existing.asset_codes)
+                        existing.primary_asset = post_data.get("primary_asset", existing.primary_asset)
+                        existing.hashtags = post_data.get("hashtags", existing.hashtags)
+                        existing.subreddit = post_data.get("subreddit", existing.subreddit)
+                        existing.posted_at = post_data.get("posted_at", existing.posted_at)
+                        existing.fetched_at = post_data.get("fetched_at", existing.fetched_at)
+
+                        if sentiment_result:
+                            existing.sentiment_score = sentiment_result.get("compound_score")
+                            existing.positive_score = sentiment_result.get("positive")
+                            existing.negative_score = sentiment_result.get("negative")
+                            existing.neutral_score = sentiment_result.get("neutral")
+                            existing.sentiment_label = sentiment_result.get("sentiment_label")
+                            existing.analyzed_at = datetime.utcnow()
+                    else:
+                        # Create new post
+                        post = SocialPost(
+                            post_id=post_data.get("id"),
+                            platform=post_data.get("platform", "unknown"),
+                            content=post_data.get("content", ""),
+                            author=post_data.get("author"),
+                            url=post_data.get("url"),
+                            likes=post_data.get("likes", 0),
+                            comments=post_data.get("comments", 0),
+                            shares=post_data.get("shares", 0),
+                            asset_codes=post_data.get("asset_codes"),
+                            primary_asset=post_data.get("primary_asset"),
+                            hashtags=post_data.get("hashtags"),
+                            subreddit=post_data.get("subreddit"),
+                            posted_at=post_data.get("posted_at"),
+                            fetched_at=post_data.get("fetched_at"),
+                        )
+
+                        if sentiment_result:
+                            post.sentiment_score = sentiment_result.get("compound_score")
+                            post.positive_score = sentiment_result.get("positive")
+                            post.negative_score = sentiment_result.get("negative")
+                            post.neutral_score = sentiment_result.get("neutral")
+                            post.sentiment_label = sentiment_result.get("sentiment_label")
+                            post.analyzed_at = datetime.utcnow()
+
+                        session.add(post)
+
+                    saved_count += 1
+
+                logger.info(f"Saved {saved_count} social posts")
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save social posts batch: {e}")
+
+        return saved_count
+
+    def get_recent_social_posts(
+        self,
+        limit: int = 100,
+        hours: int = 24,
+        platform: Optional[str] = None,
+        asset: Optional[str] = None,
+    ) -> List[SocialPost]:
+        """
+        Get recent social posts
+
+        Args:
+            limit: Maximum number of results
+            hours: Time window in hours
+            platform: Optional platform filter
+            asset: Optional asset filter
+
+        Returns:
+            List of SocialPost objects
+        """
+        try:
+            with self.get_session() as session:
+                cutoff_time = datetime.utcnow() - timedelta(hours=hours)
+                stmt = (
+                    select(SocialPost)
+                    .where(SocialPost.posted_at >= cutoff_time)
+                    .order_by(desc(SocialPost.posted_at))
+                    .limit(limit)
+                )
+
+                if platform:
+                    stmt = stmt.where(SocialPost.platform == platform)
+                if asset:
+                    stmt = stmt.where(SocialPost.primary_asset == asset)
+
+                results = session.execute(stmt).scalars().all()
+                logger.debug(f"Retrieved {len(results)} social posts")
+                return results
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to retrieve social posts: {e}")
+            return []
+
+    # Analytics Record Methods
+
+    def save_analytics_record(
+        self,
+        record_type: str,
+        metric_name: str,
+        value: float,
+        asset: Optional[str] = None,
+        window: Optional[str] = None,
+        previous_value: Optional[float] = None,
+        change_percentage: Optional[float] = None,
+        trend_direction: Optional[str] = None,
+        extra_data: Optional[Dict[str, Any]] = None,
+        timestamp: Optional[datetime] = None,
+    ) -> Optional[AnalyticsRecord]:
+        """
+        Save an analytics record
+
+        Args:
+            record_type: Type of record (e.g., 'sentiment_summary', 'trend')
+            metric_name: Metric name (e.g., 'sentiment_score', 'volume')
+            value: Metric value
+            asset: Optional asset symbol
+            window: Optional time window
+            previous_value: Optional previous value
+            change_percentage: Optional change percentage
+            trend_direction: Optional trend direction
+            extra_data: Optional additional metadata
+            timestamp: Optional timestamp (defaults to now)
+
+        Returns:
+            AnalyticsRecord object if successful, None otherwise
+        """
+        def _save():
+            with self.get_session() as session:
+                record = AnalyticsRecord(
+                    record_type=record_type,
+                    metric_name=metric_name,
+                    value=value,
+                    asset=asset,
+                    window=window,
+                    previous_value=previous_value,
+                    change_percentage=change_percentage,
+                    trend_direction=trend_direction,
+                    extra_data=extra_data,
+                    timestamp=timestamp or datetime.utcnow(),
+                )
+                session.add(record)
+                session.flush()
+                logger.debug(f"Saved analytics record: {record_type}/{metric_name}")
+                return record
+
+        try:
+            return self._retry_operation(_save)
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save analytics record: {e}")
+            return None
+
+    def save_analytics_records_batch(
+        self,
+        records_data: List[Dict[str, Any]],
+    ) -> int:
+        """
+        Save multiple analytics records in a batch
+
+        Args:
+            records_data: List of analytics record data dictionaries
+
+        Returns:
+            Number of records saved
+        """
+        saved_count = 0
+        try:
+            with self.get_session() as session:
+                for record_data in records_data:
+                    record = AnalyticsRecord(
+                        record_type=record_data.get("record_type"),
+                        metric_name=record_data.get("metric_name"),
+                        value=record_data.get("value"),
+                        asset=record_data.get("asset"),
+                        window=record_data.get("window"),
+                        previous_value=record_data.get("previous_value"),
+                        change_percentage=record_data.get("change_percentage"),
+                        trend_direction=record_data.get("trend_direction"),
+                        extra_data=record_data.get("extra_data"),
+                        timestamp=record_data.get("timestamp", datetime.utcnow()),
+                    )
+                    session.add(record)
+                    saved_count += 1
+
+                logger.info(f"Saved {saved_count} analytics records")
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save analytics records batch: {e}")
+
+        return saved_count
+
+    def get_analytics_records(
+        self,
+        record_type: Optional[str] = None,
+        asset: Optional[str] = None,
+        metric_name: Optional[str] = None,
+        hours: int = 24,
+        limit: int = 100,
+    ) -> List[AnalyticsRecord]:
+        """
+        Get analytics records
+
+        Args:
+            record_type: Optional record type filter
+            asset: Optional asset filter
+            metric_name: Optional metric name filter
+            hours: Time window in hours
+            limit: Maximum number of results
+
+        Returns:
+            List of AnalyticsRecord objects
+        """
+        try:
+            with self.get_session() as session:
+                cutoff_time = datetime.utcnow() - timedelta(hours=hours)
+                stmt = (
+                    select(AnalyticsRecord)
+                    .where(AnalyticsRecord.timestamp >= cutoff_time)
+                    .order_by(desc(AnalyticsRecord.timestamp))
+                    .limit(limit)
+                )
+
+                if record_type:
+                    stmt = stmt.where(AnalyticsRecord.record_type == record_type)
+                if asset:
+                    stmt = stmt.where(AnalyticsRecord.asset == asset)
+                if metric_name:
+                    stmt = stmt.where(AnalyticsRecord.metric_name == metric_name)
+
+                results = session.execute(stmt).scalars().all()
+                logger.debug(f"Retrieved {len(results)} analytics records")
+                return results
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to retrieve analytics records: {e}")
+            return []
+
+    # Legacy News Insights Methods (kept for backward compatibility)
+
+    def save_news_insight(
+        self,
+        sentiment_result: Dict[str, Any],
+        article_data: Optional[Dict[str, Any]] = None,
+    ) -> Optional[NewsInsight]:
+        """
+        Save a news sentiment analysis result
+
+        Args:
+            sentiment_result: Sentiment analysis result dictionary
+            article_data: Optional article metadata
+
+        Returns:
+            NewsInsight object if successful, None otherwise
+        """
+        try:
+            with self.get_session() as session:
+                insight = NewsInsight(
+                    article_id=article_data.get("id") if article_data else None,
+                    article_title=article_data.get("title") if article_data else None,
+                    article_url=article_data.get("url") if article_data else None,
+                    source=article_data.get("source") if article_data else None,
+                    sentiment_score=sentiment_result["compound_score"],
+                    positive_score=sentiment_result["positive"],
+                    negative_score=sentiment_result["negative"],
+                    neutral_score=sentiment_result["neutral"],
+                    sentiment_label=sentiment_result["sentiment_label"],
+                    keywords=article_data.get("keywords") if article_data else None,
+                    language=article_data.get("language") if article_data else None,
+                    article_published_at=(
+                        article_data.get("published_at") if article_data else None
+                    ),
+                )
+                session.add(insight)
+                session.flush()
+                logger.debug(f"Saved news insight: {insight.id}")
+                return insight
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save news insight: {e}")
+            return None
+
+    def save_news_insights_batch(
+        self, sentiment_results: List[Dict[str, Any]], articles_data: List[Dict[str, Any]] = None
+    ) -> int:
+        """
+        Save multiple news insights in a batch
+
+        Args:
+            sentiment_results: List of sentiment analysis results
+            articles_data: Optional list of article metadata
+
+        Returns:
+            Number of insights saved
+        """
+        saved_count = 0
+        try:
+            with self.get_session() as session:
+                for i, result in enumerate(sentiment_results):
+                    article_data = articles_data[i] if articles_data and i < len(articles_data) else None
+                    
+                    insight = NewsInsight(
+                        article_id=article_data.get("id") if article_data else None,
+                        article_title=article_data.get("title") if article_data else None,
+                        article_url=article_data.get("url") if article_data else None,
+                        source=article_data.get("source") if article_data else None,
+                        sentiment_score=result["compound_score"],
+                        positive_score=result["positive"],
+                        negative_score=result["negative"],
+                        neutral_score=result["neutral"],
+                        sentiment_label=result["sentiment_label"],
+                        keywords=article_data.get("keywords") if article_data else None,
+                        language=article_data.get("language") if article_data else None,
+                        article_published_at=(
+                            article_data.get("published_at") if article_data else None
+                        ),
+                    )
+                    session.add(insight)
+                    saved_count += 1
+                
+                logger.info(f"Saved {saved_count} news insights")
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save news insights batch: {e}")
+        
+        return saved_count
+
+    def get_recent_news_insights(
+        self, limit: int = 100, hours: int = 24
+    ) -> List[NewsInsight]:
+        """
+        Get recent news insights
+
+        Args:
+            limit: Maximum number of results
+            hours: Time window in hours
+
+        Returns:
+            List of NewsInsight objects
+        """
+        try:
+            with self.get_session() as session:
+                cutoff_time = datetime.utcnow() - timedelta(hours=hours)
+                stmt = (
+                    select(NewsInsight)
+                    .where(NewsInsight.analyzed_at >= cutoff_time)
+                    .order_by(desc(NewsInsight.analyzed_at))
+                    .limit(limit)
+                )
+                results = session.execute(stmt).scalars().all()
+                logger.debug(f"Retrieved {len(results)} news insights")
+                return results
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to retrieve news insights: {e}")
+            return []
+
+    # Legacy Asset Trends Methods (kept for backward compatibility)
+
+    def save_asset_trend(
+        self,
+        asset: str,
+        metric_name: str,
+        window: str,
+        trend_data: Dict[str, Any],
+    ) -> Optional[AssetTrend]:
+        """
+        Save an asset trend
+
+        Args:
+            asset: Asset symbol (e.g., 'XLM')
+            metric_name: Metric name (e.g., 'sentiment_score')
+            window: Time window (e.g., '24h')
+            trend_data: Trend data dictionary
+
+        Returns:
+            AssetTrend object if successful, None otherwise
+        """
+        try:
+            with self.get_session() as session:
+                trend = AssetTrend(
+                    asset=asset,
+                    metric_name=metric_name,
+                    window=window,
+                    trend_direction=trend_data["trend_direction"],
+                    score=trend_data.get("score", 0.0),
+                    current_value=trend_data["current_value"],
+                    previous_value=trend_data["previous_value"],
+                    change_percentage=trend_data["change_percentage"],
+                    extra_data=trend_data.get("extra_data") or trend_data.get("metadata"),
+                )
+                session.add(trend)
+                session.flush()
+                logger.debug(f"Saved asset trend: {asset}/{metric_name}")
+                return trend
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save asset trend: {e}")
+            return None
+
+    def save_asset_trends_batch(
+        self, asset: str, window: str, trends: List[Dict[str, Any]]
+    ) -> int:
+        """
+        Save multiple asset trends in a batch
+
+        Args:
+            asset: Asset symbol
+            window: Time window
+            trends: List of trend dictionaries
+
+        Returns:
+            Number of trends saved
+        """
+        saved_count = 0
+        try:
+            with self.get_session() as session:
+                for trend_data in trends:
+                    trend = AssetTrend(
+                        asset=asset,
+                        metric_name=trend_data["metric_name"],
+                        window=window,
+                        trend_direction=trend_data["trend_direction"],
+                        score=trend_data.get("score", 0.0),
+                        current_value=trend_data["current_value"],
+                        previous_value=trend_data["previous_value"],
+                        change_percentage=trend_data["change_percentage"],
+                        extra_data=trend_data.get("extra_data") or trend_data.get("metadata"),
+                    )
+                    session.add(trend)
+                    saved_count += 1
+                
+                logger.info(f"Saved {saved_count} asset trends for {asset}")
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save asset trends batch: {e}")
+        
+        return saved_count
+
+    def get_recent_asset_trends(
+        self, asset: str, metric_name: Optional[str] = None, limit: int = 100
+    ) -> List[AssetTrend]:
+        """
+        Get recent asset trends
+
+        Args:
+            asset: Asset symbol
+            metric_name: Optional metric name filter
+            limit: Maximum number of results
+
+        Returns:
+            List of AssetTrend objects
+        """
+        try:
+            with self.get_session() as session:
+                stmt = select(AssetTrend).where(AssetTrend.asset == asset)
+                
+                if metric_name:
+                    stmt = stmt.where(AssetTrend.metric_name == metric_name)
+                
+                stmt = stmt.order_by(desc(AssetTrend.timestamp)).limit(limit)
+                
+                results = session.execute(stmt).scalars().all()
+                logger.debug(f"Retrieved {len(results)} asset trends for {asset}")
+                return results
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to retrieve asset trends: {e}")
+            return []
+
+    def upsert_on_chain_entity(self, stable_id: str, entity_type: str, name: str, ticker: Optional[str] = None, extra_data: Optional[Dict] = None) -> OnChainEntity:
+        """
+        Upsert an on-chain entity (create if not exists, update if exists).
+        
+        Args:
+            stable_id: Stable unique ID for the entity
+            entity_type: "project" or "asset"
+            name: Human-readable name
+            ticker: Optional asset ticker
+            extra_data: Optional additional metadata
+            
+        Returns:
+            The OnChainEntity object
+        """
+        def _upsert():
+            with self.get_session() as session:
+                existing = session.execute(
+                    select(OnChainEntity).where(OnChainEntity.stable_id == stable_id)
+                ).scalar_one_or_none()
+                
+                if existing:
+                    existing.name = name
+                    existing.ticker = ticker or existing.ticker
+                    existing.extra_data = extra_data or existing.extra_data
+                    session.flush()
+                    return existing
+                else:
+                    entity = OnChainEntity(
+                        stable_id=stable_id,
+                        entity_type=entity_type,
+                        name=name,
+                        ticker=ticker,
+                        extra_data=extra_data
+                    )
+                    session.add(entity)
+                    session.flush()
+                    return entity
+        return self._retry_operation(_upsert)
+
+    def link_article_to_entities(self, article_id: str, linked_entities: List) -> None:
+        """
+        Link an article to on-chain entities.
+        
+        Args:
+            article_id: The article's unique ID
+            linked_entities: List of LinkedEntity objects
+        """
+        def _link():
+            with self.get_session() as session:
+                for entity in linked_entities:
+                    # Upsert the entity first
+                    self.upsert_on_chain_entity(
+                        stable_id=entity.stable_id,
+                        entity_type=entity.entity_type,
+                        name=entity.name,
+                        ticker=getattr(entity, 'ticker', None)
+                    )
+                    
+                    # Check if link already exists
+                    existing_link = session.execute(
+                        select(ArticleEntityLink).where(
+                            and_(
+                                ArticleEntityLink.article_id == article_id,
+                                ArticleEntityLink.entity_stable_id == entity.stable_id
+                            )
+                        )
+                    ).scalar_one_or_none()
+                    
+                    if not existing_link:
+                        link = ArticleEntityLink(
+                            article_id=article_id,
+                            entity_stable_id=entity.stable_id,
+                            confidence=getattr(entity, 'confidence', None)
+                        )
+                        session.add(link)
+        self._retry_operation(_link)
+
+    def get_article_linked_entities(self, article_id: str) -> List[Dict]:
+        """
+        Get all entities linked to an article.
+        
+        Args:
+            article_id: The article's unique ID
+            
+        Returns:
+            List of entity data dictionaries
+        """
+        try:
+            with self.get_session() as session:
+                links = session.execute(
+                    select(ArticleEntityLink).where(ArticleEntityLink.article_id == article_id)
+                ).scalars().all()
+                
+                entities = []
+                for link in links:
+                    entity = session.execute(
+                        select(OnChainEntity).where(OnChainEntity.stable_id == link.entity_stable_id)
+                    ).scalar_one_or_none()
+                    if entity:
+                        entities.append({
+                            "stable_id": entity.stable_id,
+                            "type": entity.entity_type,
+                            "name": entity.name,
+                            "ticker": entity.ticker,
+                            "confidence": link.confidence
+                        })
+                return entities
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to get linked entities for article {article_id}: {e}")
+            return []
+
+    def get_articles_for_entity(self, stable_id: str, limit: int = 100) -> List[Article]:
+        """
+        Get all articles linked to a specific entity.
+        
+        Args:
+            stable_id: The entity's stable ID
+            limit: Maximum number of articles to return
+            
+        Returns:
+            List of Article objects
+        """
+        try:
+            with self.get_session() as session:
+                links = session.execute(
+                    select(ArticleEntityLink).where(ArticleEntityLink.entity_stable_id == stable_id).limit(limit)
+                ).scalars().all()
+                
+                article_ids = [link.article_id for link in links]
+                articles = session.execute(
+                    select(Article).where(Article.article_id.in_(article_ids)).order_by(desc(Article.published_at))
+                ).scalars().all()
+                return articles
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to get articles for entity {stable_id}: {e}")
+            return []
+
+    def measure_entity_linker_precision(self) -> Dict[str, float]:
+        """
+        Measure and log the precision of the entity linker.
+        
+        Returns:
+            Precision metrics dictionary
+        """
+        metrics = measure_precision(self.entity_linker)
+        logger.info("Entity Linker Precision Metrics:")
+        logger.info(f"  Precision: {metrics['precision']:.4f}")
+        logger.info(f"  Recall: {metrics['recall']:.4f}")
+        logger.info(f"  F1 Score: {metrics['f1']:.4f}")
+        logger.info(f"  Test Cases: {metrics['test_cases']}")
+        return metrics
+
+    def get_sentiment_summary(self, hours: int = 24) -> Dict[str, Any]:
+        """
+        Get sentiment summary statistics
+        Args:
+            hours: Time window in hours
+
+        Returns:
+            Summary statistics dictionary
+        """
+        try:
+            with self.get_session() as session:
+                cutoff_time = datetime.utcnow() - timedelta(hours=hours)
+                
+                insights = session.execute(
+                    select(NewsInsight).where(NewsInsight.analyzed_at >= cutoff_time)
+                ).scalars().all()
+                
+                if not insights:
+                    return {
+                        "total_articles": 0,
+                        "average_sentiment": 0.0,
+                        "positive_count": 0,
+                        "negative_count": 0,
+                        "neutral_count": 0,
+                    }
+                
+                total = len(insights)
+                avg_sentiment = sum(i.sentiment_score for i in insights) / total
+                positive = sum(1 for i in insights if i.sentiment_label == "positive")
+                negative = sum(1 for i in insights if i.sentiment_label == "negative")
+                neutral = sum(1 for i in insights if i.sentiment_label == "neutral")
+                
+                return {
+                    "total_articles": total,
+                    "average_sentiment": round(avg_sentiment, 4),
+                    "positive_count": positive,
+                    "negative_count": negative,
+                    "neutral_count": neutral,
+                    "positive_percentage": round(positive / total * 100, 2),
+                    "negative_percentage": round(negative / total * 100, 2),
+                    "neutral_percentage": round(neutral / total * 100, 2),
+                }
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to get sentiment summary: {e}")
+            return {}
+
+    def cleanup_old_data(self, days: int = 30) -> Dict[str, int]:
+        """
+        Clean up old analytics data
+
+        Args:
+            days: Number of days to keep
+
+        Returns:
+            Dictionary with counts of deleted records
+        """
+        try:
+            cutoff_date = datetime.utcnow() - timedelta(days=days)
+            deleted_counts = {
+                "articles": 0,
+                "social_posts": 0,
+                "analytics_records": 0,
+                "news_insights": 0,
+                "asset_trends": 0,
+            }
+            
+            with self.get_session() as session:
+                # Delete old articles
+                articles_deleted = session.query(Article).filter(
+                    Article.created_at < cutoff_date
+                ).delete()
+                deleted_counts["articles"] = articles_deleted
+                
+                # Delete old social posts
+                posts_deleted = session.query(SocialPost).filter(
+                    SocialPost.created_at < cutoff_date
+                ).delete()
+                deleted_counts["social_posts"] = posts_deleted
+                
+                # Delete old analytics records
+                records_deleted = session.query(AnalyticsRecord).filter(
+                    AnalyticsRecord.created_at < cutoff_date
+                ).delete()
+                deleted_counts["analytics_records"] = records_deleted
+                
+                # Delete old news insights (legacy)
+                news_deleted = session.query(NewsInsight).filter(
+                    NewsInsight.created_at < cutoff_date
+                ).delete()
+                deleted_counts["news_insights"] = news_deleted
+                
+                # Delete old asset trends (legacy)
+                trends_deleted = session.query(AssetTrend).filter(
+                    AssetTrend.created_at < cutoff_date
+                ).delete()
+                deleted_counts["asset_trends"] = trends_deleted
+                
+                logger.info(f"Cleaned up old data: {deleted_counts}")
+                return deleted_counts
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to cleanup old data: {e}")
+            return {
+                "articles": 0,
+                "social_posts": 0,
+                "analytics_records": 0,
+                "news_insights": 0,
+                "asset_trends": 0,
+            }
diff --git a/apps/data-processing/fetchers.py b/apps/data-processing/fetchers.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb12f97139416e5c5807cc43fc4efa5a77767ce7
GIT binary patch
literal 4341
pcmeIu0Sy2E0K%a6Pi+qe5hx58Fkrxd0RsjM7%*VKfB^%C0|OBC00961

literal 0
HcmV?d00001

diff --git a/apps/data-processing/ingestion/__init__.py b/apps/data-processing/ingestion/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ef27ea5828c7b7d0b19fc95cd7b88949f079978
GIT binary patch
literal 892
ScmZQz7zLvtFd6~_5dr`Md;kFe

literal 0
HcmV?d00001

diff --git a/apps/data-processing/ingestion/news_deduplicator.py b/apps/data-processing/ingestion/news_deduplicator.py
new file mode 100644
index 0000000000000000000000000000000000000000..d326408e27aeb0bf33ca9c8a504dcd1d93d24e95
GIT binary patch
literal 7335
zcmeIuF#!Mo0K%a4Pi+e?h(KY$fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxdf$P8k
D9H#&Q

literal 0
HcmV?d00001

diff --git a/apps/data-processing/ingestion/news_fetcher.py b/apps/data-processing/ingestion/news_fetcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..6445058bc92ae07b6078e52cce83ac3a7cc49186
GIT binary patch
literal 12180
zcmeIuF#!Mo0K%a4Pi+Tph(KY$fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM
Y7%*VKfB^#r3>YwAz<>b*1`Ip{122>S0RR91

literal 0
HcmV?d00001

diff --git a/apps/data-processing/ingestion/price_fetcher.py b/apps/data-processing/ingestion/price_fetcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..868e1f69c07a0edbde5da3884697dffd23cf4ec9
GIT binary patch
literal 8131
zcmeIuF#!Mo0K%a4Pi+kkh(KY$fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM
G91IK}!vFyQ

literal 0
HcmV?d00001

diff --git a/apps/data-processing/ingestion/run_ingestion_quality_checks.py b/apps/data-processing/ingestion/run_ingestion_quality_checks.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f21cd7b2c09f6518b6c33781c2163bd95e5573f
GIT binary patch
literal 721
QcmZQz7zLvtFpNR~00Pkf0RR91

literal 0
HcmV?d00001

diff --git a/apps/data-processing/ingestion/social_fetcher.py b/apps/data-processing/ingestion/social_fetcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..40477603e848695f2ba84d0559f7a6fe89f247b1
GIT binary patch
literal 25228
zcmeIu0Sy2E0K%a6Pi+qe5hx58Fkrxd0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@
z0|pEjFkrxd0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM7%*VK
cfB^#r3>YwAz<>b*1`HT5V8DO@0|s6O24ai=0RR91

literal 0
HcmV?d00001

diff --git a/apps/data-processing/ingestion/soroban_event_indexer.py b/apps/data-processing/ingestion/soroban_event_indexer.py
new file mode 100644
index 00000000..41bbf0f6
--- /dev/null
+++ b/apps/data-processing/ingestion/soroban_event_indexer.py
@@ -0,0 +1,267 @@
+"""
+Soroban Event Indexer for incremental sync
+Polls Soroban RPC for new events and sends them to backend for processing
+"""
+
+import os
+import time
+import json
+import logging
+from pathlib import Path
+from datetime import datetime, timezone
+import requests
+from typing import List, Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+class SorobanEventIndexer:
+    def __init__(
+        self,
+        rpc_url: str,
+        backend_url: str,
+        ingest_secret: str,
+        contract_ids: Optional[List[str]] = None,
+        state_file: str = "./data/soroban_indexer_state.json",
+        poll_interval: int = 30
+    ):
+        self.rpc_url = rpc_url
+        self.backend_url = backend_url
+        self.ingest_secret = ingest_secret
+        self.contract_ids = contract_ids or []
+        self.state_file = Path(state_file)
+        self.poll_interval = poll_interval
+        self.last_ledger: int = self._load_last_ledger()
+
+    def _load_last_ledger(self) -> int:
+        """Load last processed ledger from state file"""
+        if self.state_file.exists():
+            try:
+                with open(self.state_file, 'r') as f:
+                    state = json.load(f)
+                    return state.get("last_ledger", 0)
+            except (json.JSONDecodeError, KeyError):
+                logger.warning("Failed to load state file, starting from ledger 0")
+        return 0
+
+    def _save_last_ledger(self, ledger: int):
+        """Save last processed ledger to state file"""
+        self.state_file.parent.mkdir(parents=True, exist_ok=True)
+        with open(self.state_file, 'w') as f:
+            json.dump({"last_ledger": ledger, "timestamp": datetime.now(timezone.utc).isoformat()}, f)
+        self.last_ledger = ledger
+
+    def fetch_latest_ledger(self) -> int:
+        """Get the latest ledger sequence from Soroban RPC"""
+        payload = {
+            "jsonrpc": "2.0",
+            "id": 1,
+            "method": "getLatestLedger"
+        }
+        
+        try:
+            response = requests.post(self.rpc_url, json=payload, timeout=30)
+            response.raise_for_status()
+            data = response.json()
+            return int(data.get("result", {}).get("sequence", 0))
+        except Exception as e:
+            logger.error(f"Failed to fetch latest ledger: {e}")
+            raise
+
+    def fetch_events_since(self, start_ledger: int) -> List[Dict]:
+        """Fetch events from Soroban RPC starting at the given ledger"""
+        all_events = []
+        cursor = None
+
+        while True:
+            filters = []
+            if self.contract_ids:
+                filters.append({
+                    "type": "contract",
+                    "contractIds": self.contract_ids
+                })
+
+            payload = {
+                "jsonrpc": "2.0",
+                "id": 1,
+                "method": "getEvents",
+                "params": {
+                    "startLedger": start_ledger,
+                    "filters": filters,
+                    "pagination": {
+                        "limit": 100
+                    }
+                }
+            }
+            
+            if cursor:
+                payload["params"]["pagination"]["cursor"] = cursor
+
+            try:
+                response = requests.post(self.rpc_url, json=payload, timeout=30)
+                response.raise_for_status()
+                data = response.json()
+            except Exception as e:
+                logger.error(f"RPC Request failed: {e}")
+                raise
+
+            if "error" in data:
+                logger.error(f"RPC Error: {data['error']}")
+                raise RuntimeError(f"RPC Error: {data['error']}")
+
+            events = data.get("result", {}).get("events", [])
+            all_events.extend(events)
+
+            # Check if we need to paginate
+            if len(events) < 100:
+                break
+
+            # Get cursor from last event
+            if events:
+                cursor = events[-1].get("pagingToken")
+            
+            if not cursor:
+                break
+
+            time.sleep(0.5)  # Rate limiting
+
+        return all_events
+
+    def send_event_to_backend(self, event: Dict, event_index: int) -> bool:
+        """Send a single event to the backend ingest endpoint"""
+        tx_hash = event.get("transactionHash", "")
+        ledger_sequence = int(event.get("ledger", 0))
+        contract_id = event.get("contractId")
+        event_type = event.get("type")
+        raw_payload = event
+
+        ingest_payload = {
+            "txHash": tx_hash,
+            "eventIndex": event_index,
+            "ledgerSequence": ledger_sequence,
+            "contractId": contract_id,
+            "eventType": event_type,
+            "rawPayload": raw_payload
+        }
+
+        headers = {
+            "Content-Type": "application/json",
+            "x-ingest-secret": self.ingest_secret
+        }
+
+        try:
+            response = requests.post(
+                f"{self.backend_url}/soroban-events/ingest",
+                json=ingest_payload,
+                headers=headers,
+                timeout=30
+            )
+            response.raise_for_status()
+            logger.debug(f"Successfully sent event {tx_hash}:{event_index} to backend")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to send event {tx_hash}:{event_index} to backend: {e}")
+            return False
+
+    def run_once(self) -> Dict:
+        """Run one iteration of the indexer"""
+        logger.info("=" * 60)
+        logger.info("SOROBAN EVENT INDEXER - INCREMENTAL SYNC")
+        logger.info("=" * 60)
+        
+        try:
+            latest_ledger = self.fetch_latest_ledger()
+            logger.info(f"Latest ledger: {latest_ledger}")
+            logger.info(f"Last processed ledger: {self.last_ledger}")
+
+            if latest_ledger <= self.last_ledger:
+                logger.info("No new ledgers to process")
+                return {"status": "no_new_ledgers", "events_processed": 0}
+
+            start_ledger = self.last_ledger + 1
+            logger.info(f"Fetching events from ledger {start_ledger} to {latest_ledger}")
+            
+            events = self.fetch_events_since(start_ledger)
+            logger.info(f"Found {len(events)} new events")
+
+            # Send events to backend
+            sent_count = 0
+            failed_count = 0
+            highest_ledger = self.last_ledger
+
+            for idx, event in enumerate(events):
+                success = self.send_event_to_backend(event, idx)
+                if success:
+                    sent_count += 1
+                else:
+                    failed_count += 1
+                
+                # Update highest ledger seen
+                event_ledger = int(event.get("ledger", 0))
+                if event_ledger > highest_ledger:
+                    highest_ledger = event_ledger
+
+            # Update state to the highest ledger processed
+            self._save_last_ledger(highest_ledger)
+
+            logger.info(f"Sent {sent_count} events to backend, {failed_count} failed")
+            logger.info(f"Updated last processed ledger to {highest_ledger}")
+            logger.info("=" * 60)
+            
+            return {
+                "status": "success",
+                "events_found": len(events),
+                "events_sent": sent_count,
+                "events_failed": failed_count,
+                "last_ledger": highest_ledger
+            }
+
+        except Exception as e:
+            logger.error(f"Error in indexer run: {e}", exc_info=True)
+            return {"status": "error", "error": str(e)}
+
+    def run_forever(self):
+        """Run the indexer continuously, polling for new events"""
+        logger.info("Starting Soroban event indexer (continuous mode)")
+        logger.info(f"Poll interval: {self.poll_interval} seconds")
+        
+        while True:
+            self.run_once()
+            time.sleep(self.poll_interval)
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Soroban Event Indexer")
+    parser.add_argument("--rpc-url", type=str, default=os.getenv("SOROBAN_RPC_URL", "https://soroban-testnet.stellar.org"), help="Soroban RPC URL")
+    parser.add_argument("--backend-url", type=str, default=os.getenv("BACKEND_URL", "http://localhost:3000"), help="Backend API URL")
+    parser.add_argument("--ingest-secret", type=str, default=os.getenv("SOROBAN_INGEST_SECRET", ""), help="Secret for backend ingest endpoint")
+    parser.add_argument("--contract-ids", nargs="*", default=os.getenv("SOROBAN_CONTRACT_IDS", "").split(","), help="List of contract IDs to index (comma-separated)")
+    parser.add_argument("--state-file", type=str, default="./data/soroban_indexer_state.json", help="Path to state file")
+    parser.add_argument("--poll-interval", type=int, default=30, help="Poll interval in seconds")
+    parser.add_argument("--once", action="store_true", help="Run once and exit")
+    
+    args = parser.parse_args()
+
+    # Clean up contract ids
+    contract_ids = [cid.strip() for cid in args.contract_ids if cid.strip()]
+
+    indexer = SorobanEventIndexer(
+        rpc_url=args.rpc_url,
+        backend_url=args.backend_url,
+        ingest_secret=args.ingest_secret,
+        contract_ids=contract_ids,
+        state_file=args.state_file,
+        poll_interval=args.poll_interval
+    )
+
+    if args.once:
+        indexer.run_once()
+    else:
+        indexer.run_forever()
+
+if __name__ == "__main__":
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - %(levelname)s - %(message)s"
+    )
+    main()
diff --git a/apps/data-processing/ingestion/stellar_fetcher.py b/apps/data-processing/ingestion/stellar_fetcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..625578ab8609c0e23c322302c00d00a38aaa1b8b
GIT binary patch
literal 20177
zcmeIuF#!Mo0K%a4Pi+Tph(KY$fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM
z7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM7%*VKfB^#r3>YwAz<>b*
H29ALNPSF4X

literal 0
HcmV?d00001

diff --git a/apps/data-processing/ingestion/stellar_ingestion_checks.py b/apps/data-processing/ingestion/stellar_ingestion_checks.py
new file mode 100644
index 0000000000000000000000000000000000000000..920b6a996b378a0f24194b84c8c23b33e2c89b22
GIT binary patch
literal 17144
zcmeIuF#!Mo0K%a4Pi+e?h(KY$fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM
t7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM7%*VqKQKc000961

literal 0
HcmV?d00001

diff --git a/apps/data-processing/main.py b/apps/data-processing/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..d46b5e7a101880b45bf10a74da8ea71fdd71cdc3
GIT binary patch
literal 14703
zcmeIufdBvi0K=g9Qy=7oP+`D;0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEj
hFkrxd0RsjM7%*VKfB^#r3>YwAz<>b*1`HUu9vC@q00961

literal 0
HcmV?d00001

diff --git a/apps/data-processing/ml/__init__.py b/apps/data-processing/ml/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9be79a5b6e4d0e40fb1bcab3c0e3f261e26712a6
GIT binary patch
literal 623
QcmZQz7zLvtK(`P800M6S0RR91

literal 0
HcmV?d00001

diff --git a/apps/data-processing/ml/feature_store.py b/apps/data-processing/ml/feature_store.py
new file mode 100644
index 0000000000000000000000000000000000000000..51b0c3d05e46a277dca1853917d9b55753e0c668
GIT binary patch
literal 3732
ncmeIuF#!Mo0K%a4Pi+hzh(KY$fB^#r3>YwAz<>b*20jA=4wL`^

literal 0
HcmV?d00001

diff --git a/apps/data-processing/ml/model_registry.py b/apps/data-processing/ml/model_registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..465a1a02c285ec2423c03def3082615b7953ff4a
GIT binary patch
literal 7036
zcmeIuF#!Mo0K%a4Pi+ZLh(KY$fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkryIV_+M6
B00961

literal 0
HcmV?d00001

diff --git a/apps/data-processing/ml/price_predictor.py b/apps/data-processing/ml/price_predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..845c4d2797d2184e5b9e6a1672dbd20097bebcdb
GIT binary patch
literal 3119
kcmeIufdBvi0K=g9Q(xc+g-~I@fB^#r3>YwAz<_}}FbppM0RR91

literal 0
HcmV?d00001

diff --git a/apps/data-processing/ml/retraining_pipeline.py b/apps/data-processing/ml/retraining_pipeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..803aa2cd90bdc9dc638ceb77f72437c25c9cde17
GIT binary patch
literal 10353
zcmeIufdBvi0K=g9Qy<|1g-~I@fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM
Q7%*VKfB^#r3>Y{A11NC-0RR91

literal 0
HcmV?d00001

diff --git a/apps/data-processing/qa_exporter.py b/apps/data-processing/qa_exporter.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1d14d3c813f8c8f8ae45e2fa786e518ed09068b
GIT binary patch
literal 9736
zcmeIu0Sy2E0K%a6Pi+o2h(KY$fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM
N7%*VKfB^%~0|O=q00961

literal 0
HcmV?d00001

diff --git a/apps/data-processing/scheduler.py b/apps/data-processing/scheduler.py
new file mode 100644
index 0000000000000000000000000000000000000000..2aeda0c2231d880775238162ba531ebb0b9bd233
GIT binary patch
literal 11497
zcmeIuF#!Mo0K%a4Pi+kkh(KY$fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM
V7%*VKfB^#r3>YwAz<`0XfdMS(00961

literal 0
HcmV?d00001

diff --git a/apps/data-processing/security.py b/apps/data-processing/security.py
new file mode 100644
index 0000000000000000000000000000000000000000..468ad34a64842b77d7e019eb1ad0270687530640
GIT binary patch
literal 7044
zcmeIuF#!Mo0K%a4Pi+ZLh(KY$fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkryIb6^{U
B00961

literal 0
HcmV?d00001

diff --git a/apps/data-processing/sentiment.py b/apps/data-processing/sentiment.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3bbbe997f1785a6ae6fd86e01408f41ec46b78c
GIT binary patch
literal 9799
zcmeIufdBvi0K=g9Qy=7oP+`D;0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEj
MFkrxd0RzVa113iR0RR91

literal 0
HcmV?d00001

diff --git a/apps/data-processing/src/analytics/entity_linker.py b/apps/data-processing/src/analytics/entity_linker.py
new file mode 100644
index 00000000..21b388d8
--- /dev/null
+++ b/apps/data-processing/src/analytics/entity_linker.py
@@ -0,0 +1,212 @@
+"""
+On-chain Entity Linker for news articles.
+Links news content to on-chain projects and assets, producing stable IDs
+and storing links in the database.
+"""
+
+import logging
+import re
+from typing import Dict, List, Optional, Tuple
+from dataclasses import dataclass
+
+from .keywords import CRYPTO_PROJECT_MAP, KNOWN_TICKERS, TICKER_TO_PROJECT
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class LinkedEntity:
+    stable_id: str
+    entity_type: str  # "project" or "asset"
+    name: str
+    ticker: Optional[str] = None
+    confidence: float = 1.0
+
+
+class EntityLinker:
+    """
+    Links text content to known on-chain entities (projects and assets)
+    with stable, deterministic IDs.
+    """
+
+    def __init__(self) -> None:
+        self._project_patterns = self._compile_project_patterns()
+        # Filter out SDF from asset tickers since it's a project
+        self._asset_tickers = {t for t in KNOWN_TICKERS if t not in ["SDF"]}
+
+    def _compile_project_patterns(self) -> List[Tuple[str, re.Pattern]]:
+        """Compile regex patterns for project name matching, sorted by length descending."""
+        patterns = []
+        # Sort project names by length descending to prefer longer matches
+        sorted_projects = sorted(
+            CRYPTO_PROJECT_MAP.keys(),
+            key=lambda x: len(x),
+            reverse=True
+        )
+        for project_name in sorted_projects:
+            pattern = re.compile(r"\b" + re.escape(project_name) + r"\b", re.IGNORECASE)
+            patterns.append((project_name, pattern))
+        return patterns
+
+    def _generate_stable_id(self, entity_type: str, identifier: str) -> str:
+        """Generate a stable, deterministic ID for an entity."""
+        normalized = identifier.strip().lower()
+        return f"{entity_type}:{normalized}"
+
+    def link_text(
+        self,
+        text: str,
+        title: Optional[str] = None
+    ) -> List[LinkedEntity]:
+        """
+        Link the given text to known on-chain entities.
+        
+        Args:
+            text: Main text content to analyze
+            title: Optional article title (higher weight for entities found here)
+        
+        Returns:
+            List of LinkedEntity objects with stable IDs
+        """
+        entities: Dict[str, LinkedEntity] = {}
+        
+        # Combine title and text for analysis, title first for priority
+        full_text = f"{title or ''}\n{text or ''}"
+        
+        # Match project names
+        for project_name, pattern in self._project_patterns:
+            if pattern.search(full_text):
+                # Get canonical project name (the last one in the list)
+                canonical_name = CRYPTO_PROJECT_MAP[project_name][-1] if CRYPTO_PROJECT_MAP[project_name] else project_name
+                canonical_stable_id = self._generate_stable_id("project", canonical_name.lower())
+                
+                if canonical_stable_id not in entities:
+                    entities[canonical_stable_id] = LinkedEntity(
+                        stable_id=canonical_stable_id,
+                        entity_type="project",
+                        name=canonical_name,
+                        confidence=0.95
+                    )
+
+        # Match tickers
+        ticker_pattern = re.compile(r"\b([A-Z]{2,6})\b")
+        for ticker in ticker_pattern.findall(full_text):
+            ticker = ticker.upper()
+            if ticker in self._asset_tickers:
+                stable_id = self._generate_stable_id("asset", ticker)
+                if stable_id not in entities:
+                    entities[stable_id] = LinkedEntity(
+                        stable_id=stable_id,
+                        entity_type="asset",
+                        name=ticker,
+                        ticker=ticker,
+                        confidence=0.9
+                    )
+                # Also link the associated project if available, using canonical ID
+                if ticker in TICKER_TO_PROJECT:
+                    for project_name in TICKER_TO_PROJECT[ticker]:
+                        # Get canonical project name
+                        canonical_name = CRYPTO_PROJECT_MAP.get(project_name.lower(), [project_name])[-1]
+                        canonical_stable_id = self._generate_stable_id("project", canonical_name.lower())
+                        if canonical_stable_id not in entities:
+                            entities[canonical_stable_id] = LinkedEntity(
+                                stable_id=canonical_stable_id,
+                                entity_type="project",
+                                name=canonical_name,
+                                confidence=0.85
+                            )
+
+        return list(entities.values())
+
+    def link_article(
+        self,
+        title: Optional[str],
+        summary: Optional[str],
+        content: Optional[str]
+    ) -> List[LinkedEntity]:
+        """Link an article's content to on-chain entities."""
+        combined_text = "\n".join([
+            title or "",
+            summary or "",
+            content or ""
+        ])
+        return self.link_text(combined_text, title)
+
+
+# Small labeled test set for precision measurement
+LABELED_TEST_SET = [
+    {
+        "text": "Stellar Development Foundation (SDF) announces new Soroban upgrade. XLM price surges.",
+        "expected_entities": [
+            {"stable_id": "project:stellar", "type": "project"},
+            {"stable_id": "project:soroban", "type": "project"},
+            {"stable_id": "asset:xlm", "type": "asset"}
+        ]
+    },
+    {
+        "text": "Bitcoin (BTC) reaches new all-time high. Ethereum (ETH) follows closely.",
+        "expected_entities": [
+            {"stable_id": "asset:btc", "type": "asset"},
+            {"stable_id": "asset:eth", "type": "asset"}
+        ]
+    },
+    {
+        "text": "DeFi protocol Uniswap launches new liquidity pool on Solana.",
+        "expected_entities": [
+            {"stable_id": "project:uniswap", "type": "project"},
+            {"stable_id": "asset:sol", "type": "asset"}
+        ]
+    },
+    {
+        "text": "Cardano (ADA) releases new roadmap for governance.",
+        "expected_entities": [
+            {"stable_id": "asset:ada", "type": "asset"}
+        ]
+    },
+    {
+        "text": "Tech stocks rally on positive earnings. Apple and Microsoft lead gains.",
+        "expected_entities": []  # No crypto entities
+    }
+]
+
+
+def measure_precision(entity_linker: EntityLinker) -> Dict[str, float]:
+    """
+    Measure precision of the entity linker using the labeled test set.
+    
+    Returns:
+        Dictionary with precision metrics
+    """
+    true_positives = 0
+    false_positives = 0
+    total_expected = 0
+
+    for test_case in LABELED_TEST_SET:
+        text = test_case["text"]
+        expected = test_case["expected_entities"]
+        total_expected += len(expected)
+
+        actual = entity_linker.link_text(text)
+        actual_stable_ids = {e.stable_id for e in actual}
+        expected_stable_ids = {e["stable_id"] for e in expected}
+
+        # Calculate true positives and false positives
+        for entity in actual:
+            if entity.stable_id in expected_stable_ids:
+                true_positives += 1
+            else:
+                false_positives += 1
+
+    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 1.0
+    recall = true_positives / total_expected if total_expected > 0 else 1.0
+    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
+
+    return {
+        "precision": precision,
+        "recall": recall,
+        "f1": f1,
+        "true_positives": true_positives,
+        "false_positives": false_positives,
+        "total_expected": total_expected,
+        "test_cases": len(LABELED_TEST_SET)
+    }
diff --git a/apps/data-processing/src/ingestion/soroban_event_indexer.py b/apps/data-processing/src/ingestion/soroban_event_indexer.py
new file mode 100644
index 00000000..41bbf0f6
--- /dev/null
+++ b/apps/data-processing/src/ingestion/soroban_event_indexer.py
@@ -0,0 +1,267 @@
+"""
+Soroban Event Indexer for incremental sync
+Polls Soroban RPC for new events and sends them to backend for processing
+"""
+
+import os
+import time
+import json
+import logging
+from pathlib import Path
+from datetime import datetime, timezone
+import requests
+from typing import List, Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+class SorobanEventIndexer:
+    def __init__(
+        self,
+        rpc_url: str,
+        backend_url: str,
+        ingest_secret: str,
+        contract_ids: Optional[List[str]] = None,
+        state_file: str = "./data/soroban_indexer_state.json",
+        poll_interval: int = 30
+    ):
+        self.rpc_url = rpc_url
+        self.backend_url = backend_url
+        self.ingest_secret = ingest_secret
+        self.contract_ids = contract_ids or []
+        self.state_file = Path(state_file)
+        self.poll_interval = poll_interval
+        self.last_ledger: int = self._load_last_ledger()
+
+    def _load_last_ledger(self) -> int:
+        """Load last processed ledger from state file"""
+        if self.state_file.exists():
+            try:
+                with open(self.state_file, 'r') as f:
+                    state = json.load(f)
+                    return state.get("last_ledger", 0)
+            except (json.JSONDecodeError, KeyError):
+                logger.warning("Failed to load state file, starting from ledger 0")
+        return 0
+
+    def _save_last_ledger(self, ledger: int):
+        """Save last processed ledger to state file"""
+        self.state_file.parent.mkdir(parents=True, exist_ok=True)
+        with open(self.state_file, 'w') as f:
+            json.dump({"last_ledger": ledger, "timestamp": datetime.now(timezone.utc).isoformat()}, f)
+        self.last_ledger = ledger
+
+    def fetch_latest_ledger(self) -> int:
+        """Get the latest ledger sequence from Soroban RPC"""
+        payload = {
+            "jsonrpc": "2.0",
+            "id": 1,
+            "method": "getLatestLedger"
+        }
+        
+        try:
+            response = requests.post(self.rpc_url, json=payload, timeout=30)
+            response.raise_for_status()
+            data = response.json()
+            return int(data.get("result", {}).get("sequence", 0))
+        except Exception as e:
+            logger.error(f"Failed to fetch latest ledger: {e}")
+            raise
+
+    def fetch_events_since(self, start_ledger: int) -> List[Dict]:
+        """Fetch events from Soroban RPC starting at the given ledger"""
+        all_events = []
+        cursor = None
+
+        while True:
+            filters = []
+            if self.contract_ids:
+                filters.append({
+                    "type": "contract",
+                    "contractIds": self.contract_ids
+                })
+
+            payload = {
+                "jsonrpc": "2.0",
+                "id": 1,
+                "method": "getEvents",
+                "params": {
+                    "startLedger": start_ledger,
+                    "filters": filters,
+                    "pagination": {
+                        "limit": 100
+                    }
+                }
+            }
+            
+            if cursor:
+                payload["params"]["pagination"]["cursor"] = cursor
+
+            try:
+                response = requests.post(self.rpc_url, json=payload, timeout=30)
+                response.raise_for_status()
+                data = response.json()
+            except Exception as e:
+                logger.error(f"RPC Request failed: {e}")
+                raise
+
+            if "error" in data:
+                logger.error(f"RPC Error: {data['error']}")
+                raise RuntimeError(f"RPC Error: {data['error']}")
+
+            events = data.get("result", {}).get("events", [])
+            all_events.extend(events)
+
+            # Check if we need to paginate
+            if len(events) < 100:
+                break
+
+            # Get cursor from last event
+            if events:
+                cursor = events[-1].get("pagingToken")
+            
+            if not cursor:
+                break
+
+            time.sleep(0.5)  # Rate limiting
+
+        return all_events
+
+    def send_event_to_backend(self, event: Dict, event_index: int) -> bool:
+        """Send a single event to the backend ingest endpoint"""
+        tx_hash = event.get("transactionHash", "")
+        ledger_sequence = int(event.get("ledger", 0))
+        contract_id = event.get("contractId")
+        event_type = event.get("type")
+        raw_payload = event
+
+        ingest_payload = {
+            "txHash": tx_hash,
+            "eventIndex": event_index,
+            "ledgerSequence": ledger_sequence,
+            "contractId": contract_id,
+            "eventType": event_type,
+            "rawPayload": raw_payload
+        }
+
+        headers = {
+            "Content-Type": "application/json",
+            "x-ingest-secret": self.ingest_secret
+        }
+
+        try:
+            response = requests.post(
+                f"{self.backend_url}/soroban-events/ingest",
+                json=ingest_payload,
+                headers=headers,
+                timeout=30
+            )
+            response.raise_for_status()
+            logger.debug(f"Successfully sent event {tx_hash}:{event_index} to backend")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to send event {tx_hash}:{event_index} to backend: {e}")
+            return False
+
+    def run_once(self) -> Dict:
+        """Run one iteration of the indexer"""
+        logger.info("=" * 60)
+        logger.info("SOROBAN EVENT INDEXER - INCREMENTAL SYNC")
+        logger.info("=" * 60)
+        
+        try:
+            latest_ledger = self.fetch_latest_ledger()
+            logger.info(f"Latest ledger: {latest_ledger}")
+            logger.info(f"Last processed ledger: {self.last_ledger}")
+
+            if latest_ledger <= self.last_ledger:
+                logger.info("No new ledgers to process")
+                return {"status": "no_new_ledgers", "events_processed": 0}
+
+            start_ledger = self.last_ledger + 1
+            logger.info(f"Fetching events from ledger {start_ledger} to {latest_ledger}")
+            
+            events = self.fetch_events_since(start_ledger)
+            logger.info(f"Found {len(events)} new events")
+
+            # Send events to backend
+            sent_count = 0
+            failed_count = 0
+            highest_ledger = self.last_ledger
+
+            for idx, event in enumerate(events):
+                success = self.send_event_to_backend(event, idx)
+                if success:
+                    sent_count += 1
+                else:
+                    failed_count += 1
+                
+                # Update highest ledger seen
+                event_ledger = int(event.get("ledger", 0))
+                if event_ledger > highest_ledger:
+                    highest_ledger = event_ledger
+
+            # Update state to the highest ledger processed
+            self._save_last_ledger(highest_ledger)
+
+            logger.info(f"Sent {sent_count} events to backend, {failed_count} failed")
+            logger.info(f"Updated last processed ledger to {highest_ledger}")
+            logger.info("=" * 60)
+            
+            return {
+                "status": "success",
+                "events_found": len(events),
+                "events_sent": sent_count,
+                "events_failed": failed_count,
+                "last_ledger": highest_ledger
+            }
+
+        except Exception as e:
+            logger.error(f"Error in indexer run: {e}", exc_info=True)
+            return {"status": "error", "error": str(e)}
+
+    def run_forever(self):
+        """Run the indexer continuously, polling for new events"""
+        logger.info("Starting Soroban event indexer (continuous mode)")
+        logger.info(f"Poll interval: {self.poll_interval} seconds")
+        
+        while True:
+            self.run_once()
+            time.sleep(self.poll_interval)
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Soroban Event Indexer")
+    parser.add_argument("--rpc-url", type=str, default=os.getenv("SOROBAN_RPC_URL", "https://soroban-testnet.stellar.org"), help="Soroban RPC URL")
+    parser.add_argument("--backend-url", type=str, default=os.getenv("BACKEND_URL", "http://localhost:3000"), help="Backend API URL")
+    parser.add_argument("--ingest-secret", type=str, default=os.getenv("SOROBAN_INGEST_SECRET", ""), help="Secret for backend ingest endpoint")
+    parser.add_argument("--contract-ids", nargs="*", default=os.getenv("SOROBAN_CONTRACT_IDS", "").split(","), help="List of contract IDs to index (comma-separated)")
+    parser.add_argument("--state-file", type=str, default="./data/soroban_indexer_state.json", help="Path to state file")
+    parser.add_argument("--poll-interval", type=int, default=30, help="Poll interval in seconds")
+    parser.add_argument("--once", action="store_true", help="Run once and exit")
+    
+    args = parser.parse_args()
+
+    # Clean up contract ids
+    contract_ids = [cid.strip() for cid in args.contract_ids if cid.strip()]
+
+    indexer = SorobanEventIndexer(
+        rpc_url=args.rpc_url,
+        backend_url=args.backend_url,
+        ingest_secret=args.ingest_secret,
+        contract_ids=contract_ids,
+        state_file=args.state_file,
+        poll_interval=args.poll_interval
+    )
+
+    if args.once:
+        indexer.run_once()
+    else:
+        indexer.run_forever()
+
+if __name__ == "__main__":
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - %(levelname)s - %(message)s"
+    )
+    main()
diff --git a/apps/data-processing/standalone_test.py b/apps/data-processing/standalone_test.py
new file mode 100644
index 00000000..30361134
--- /dev/null
+++ b/apps/data-processing/standalone_test.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python3
+"""
+Standalone test for Entity Linker core logic
+"""
+
+import logging
+import re
+from typing import Dict, List, Optional, Tuple
+from dataclasses import dataclass
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+
+# Copy of the relevant constants from keywords.py
+CRYPTO_PROJECT_MAP: dict[str, List[str]] = {
+    "stellar": ["XLM", "Stellar"],
+    "xlm": ["XLM", "Stellar"],
+    "soroban": ["XLM", "Soroban"],
+    "stellar development foundation": ["SDF", "Stellar"],
+    "bitcoin": ["BTC", "Bitcoin"],
+    "btc": ["BTC", "Bitcoin"],
+    "ethereum": ["ETH", "Ethereum"],
+    "eth": ["ETH", "Ethereum"],
+    "solana": ["SOL", "Solana"],
+    "sol": ["SOL", "Solana"],
+    "usdc": ["USDC", "USDC"],
+    "usd coin": ["USDC", "USDC"],
+    "ripple": ["XRP", "Ripple"],
+    "xrp": ["XRP", "XRP"],
+    "cardano": ["ADA", "Cardano"],
+    "ada": ["ADA", "ADA"],
+    "polkadot": ["DOT", "Polkadot"],
+    "dot": ["DOT", "DOT"],
+    "dogecoin": ["DOGE", "Dogecoin"],
+    "doge": ["DOGE", "DOGE"],
+    "litecoin": ["LTC", "Litecoin"],
+    "ltc": ["LTC", "LTC"],
+    "chainlink": ["LINK", "Chainlink"],
+    "link": ["LINK", "LINK"],
+    "avalanche": ["AVAX", "Avalanche"],
+    "avax": ["AVAX", "AVAX"],
+    "polygon": ["MATIC", "Polygon"],
+    "matic": ["MATIC", "MATIC"],
+    "algorand": ["ALGO", "Algorand"],
+    "algo": ["ALGO", "ALGO"],
+    "cosmos": ["ATOM", "Cosmos"],
+    "atom": ["ATOM", "ATOM"],
+    "uniswap": ["UNI", "Uniswap"],
+    "defi": ["DeFi", "DeFi"],
+    "nft": ["NFT", "NFT"],
+    "nfts": ["NFT", "NFT"],
+}
+
+KNOWN_TICKERS = {
+    "XLM", "BTC", "ETH", "SOL", "USDC", "XRP", "ADA", "DOT", "DOGE", "LTC",
+    "LINK", "AVAX", "MATIC", "ALGO", "ATOM", "UNI", "USDT", "BUSD", "BNB", "SDF"
+}
+
+TICKER_TO_PROJECT: dict[str, List[str]] = {
+    "XLM": ["Stellar"],
+    "BTC": ["Bitcoin"],
+    "ETH": ["Ethereum"],
+    "SOL": ["Solana"],
+    "XRP": ["Ripple"],
+    "ADA": ["Cardano"],
+    "DOT": ["Polkadot"],
+    "DOGE": ["Dogecoin"],
+    "LTC": ["Litecoin"],
+    "LINK": ["Chainlink"],
+    "AVAX": ["Avalanche"],
+    "MATIC": ["Polygon"],
+    "ALGO": ["Algorand"],
+    "ATOM": ["Cosmos"],
+    "UNI": ["Uniswap"],
+    "USDC": ["USDC"],
+    "USDT": ["Tether"],
+}
+
+
+@dataclass
+class LinkedEntity:
+    stable_id: str
+    entity_type: str  # "project" or "asset"
+    name: str
+    ticker: Optional[str] = None
+    confidence: float = 1.0
+
+
+class EntityLinker:
+    def __init__(self) -> None:
+        self._project_patterns = self._compile_project_patterns()
+        self._asset_tickers = {t for t in KNOWN_TICKERS if t not in ["SDF"]}
+
+    def _compile_project_patterns(self) -> List[Tuple[str, re.Pattern]]:
+        patterns = []
+        sorted_projects = sorted(
+            CRYPTO_PROJECT_MAP.keys(),
+            key=lambda x: len(x),
+            reverse=True
+        )
+        for project_name in sorted_projects:
+            pattern = re.compile(r"\b" + re.escape(project_name) + r"\b", re.IGNORECASE)
+            patterns.append((project_name, pattern))
+        return patterns
+
+    def _generate_stable_id(self, entity_type: str, identifier: str) -> str:
+        normalized = identifier.strip().lower()
+        return f"{entity_type}:{normalized}"
+
+    def link_text(
+        self,
+        text: str,
+        title: Optional[str] = None
+    ) -> List[LinkedEntity]:
+        entities: Dict[str, LinkedEntity] = {}
+        
+        full_text = f"{title or ''}\n{text or ''}"
+        
+        for project_name, pattern in self._project_patterns:
+            if pattern.search(full_text):
+                canonical_name = CRYPTO_PROJECT_MAP[project_name][-1] if CRYPTO_PROJECT_MAP[project_name] else project_name
+                canonical_stable_id = self._generate_stable_id("project", canonical_name.lower())
+                
+                if canonical_stable_id not in entities:
+                    entities[canonical_stable_id] = LinkedEntity(
+                        stable_id=canonical_stable_id,
+                        entity_type="project",
+                        name=canonical_name,
+                        confidence=0.95
+                    )
+
+        ticker_pattern = re.compile(r"\b([A-Z]{2,6})\b")
+        for ticker in ticker_pattern.findall(full_text):
+            ticker = ticker.upper()
+            if ticker in self._asset_tickers:
+                stable_id = self._generate_stable_id("asset", ticker)
+                if stable_id not in entities:
+                    entities[stable_id] = LinkedEntity(
+                        stable_id=stable_id,
+                        entity_type="asset",
+                        name=ticker,
+                        ticker=ticker,
+                        confidence=0.9
+                    )
+                if ticker in TICKER_TO_PROJECT:
+                    for project_name in TICKER_TO_PROJECT[ticker]:
+                        canonical_name = CRYPTO_PROJECT_MAP.get(project_name.lower(), [project_name])[-1]
+                        canonical_stable_id = self._generate_stable_id("project", canonical_name.lower())
+                        if canonical_stable_id not in entities:
+                            entities[canonical_stable_id] = LinkedEntity(
+                                stable_id=canonical_stable_id,
+                                entity_type="project",
+                                name=canonical_name,
+                                confidence=0.85
+                            )
+
+        return list(entities.values())
+
+    def link_article(
+        self,
+        title: Optional[str],
+        summary: Optional[str],
+        content: Optional[str]
+    ) -> List[LinkedEntity]:
+        combined_text = "\n".join([
+            title or "",
+            summary or "",
+            content or ""
+        ])
+        return self.link_text(combined_text, title)
+
+
+LABELED_TEST_SET = [
+    {
+        "text": "Stellar Development Foundation (SDF) announces new Soroban upgrade. XLM price surges.",
+        "expected_entities": [
+            {"stable_id": "project:stellar", "type": "project"},
+            {"stable_id": "project:soroban", "type": "project"},
+            {"stable_id": "asset:xlm", "type": "asset"}
+        ]
+    },
+    {
+        "text": "Bitcoin (BTC) reaches new all-time high. Ethereum (ETH) follows closely.",
+        "expected_entities": [
+            {"stable_id": "asset:btc", "type": "asset"},
+            {"stable_id": "asset:eth", "type": "asset"}
+        ]
+    },
+    {
+        "text": "DeFi protocol Uniswap launches new liquidity pool on Solana.",
+        "expected_entities": [
+            {"stable_id": "project:uniswap", "type": "project"},
+            {"stable_id": "asset:sol", "type": "asset"}
+        ]
+    },
+    {
+        "text": "Cardano (ADA) releases new roadmap for governance.",
+        "expected_entities": [
+            {"stable_id": "asset:ada", "type": "asset"}
+        ]
+    },
+    {
+        "text": "Tech stocks rally on positive earnings. Apple and Microsoft lead gains.",
+        "expected_entities": []  # No crypto entities
+    }
+]
+
+
+def measure_precision(entity_linker: EntityLinker) -> Dict[str, float]:
+    true_positives = 0
+    false_positives = 0
+    total_expected = 0
+
+    for test_case in LABELED_TEST_SET:
+        text = test_case["text"]
+        expected = test_case["expected_entities"]
+        total_expected += len(expected)
+
+        actual = entity_linker.link_text(text)
+        actual_stable_ids = {e.stable_id for e in actual}
+        expected_stable_ids = {e["stable_id"] for e in expected}
+
+        for entity in actual:
+            if entity.stable_id in expected_stable_ids:
+                true_positives += 1
+            else:
+                false_positives += 1
+
+    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 1.0
+    recall = true_positives / total_expected if total_expected > 0 else 1.0
+    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
+
+    return {
+        "precision": precision,
+        "recall": recall,
+        "f1": f1,
+        "true_positives": true_positives,
+        "false_positives": false_positives,
+        "total_expected": total_expected,
+        "test_cases": len(LABELED_TEST_SET)
+    }
+
+
+def test_entity_linker():
+    logger.info("=" * 60)
+    logger.info("Testing Entity Linker")
+    logger.info("=" * 60)
+    
+    entity_linker = EntityLinker()
+    
+    test_text = "Stellar Development Foundation (SDF) announces new Soroban upgrade. XLM price surges."
+    linked_entities = entity_linker.link_text(test_text)
+    
+    logger.info(f"\nTest text: {test_text}")
+    logger.info(f"Linked entities:")
+    for entity in linked_entities:
+        logger.info(f"  - {entity.name} ({entity.entity_type}), stable ID: {entity.stable_id}")
+    
+    logger.info("\n" + "=" * 60)
+    logger.info("Measuring Entity Linker Precision")
+    logger.info("=" * 60)
+    
+    metrics = measure_precision(entity_linker)
+    logger.info(f"Precision: {metrics['precision']:.4f}")
+    logger.info(f"Recall: {metrics['recall']:.4f}")
+    logger.info(f"F1 Score: {metrics['f1']:.4f}")
+    logger.info(f"True Positives: {metrics['true_positives']}")
+    logger.info(f"False Positives: {metrics['false_positives']}")
+    logger.info(f"Total Expected: {metrics['total_expected']}")
+
+
+if __name__ == "__main__":
+    test_entity_linker()
diff --git a/apps/data-processing/test_entity_linker.py b/apps/data-processing/test_entity_linker.py
new file mode 100644
index 00000000..69d4cfb5
--- /dev/null
+++ b/apps/data-processing/test_entity_linker.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+"""
+Test script for the Entity Linker functionality
+"""
+
+import logging
+import sys
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+# Add src to path
+sys.path.insert(0, 'src')
+
+from src.analytics.entity_linker import EntityLinker, measure_precision
+
+
+def test_entity_linker():
+    """Test the entity linker functionality"""
+    logger.info("=" * 60)
+    logger.info("Testing Entity Linker")
+    logger.info("=" * 60)
+    
+    # Test entity linking directly
+    entity_linker = EntityLinker()
+    
+    test_text = "Stellar Development Foundation (SDF) announces new Soroban upgrade. XLM price surges."
+    linked_entities = entity_linker.link_text(test_text)
+    
+    logger.info(f"\nTest text: {test_text}")
+    logger.info(f"Linked entities:")
+    for entity in linked_entities:
+        logger.info(f"  - {entity.name} ({entity.entity_type}), stable ID: {entity.stable_id}")
+    
+    # Test precision measurement
+    logger.info("\n" + "=" * 60)
+    logger.info("Measuring Entity Linker Precision")
+    logger.info("=" * 60)
+    
+    metrics = measure_precision(entity_linker)
+    logger.info(f"Precision: {metrics['precision']:.4f}")
+    logger.info(f"Recall: {metrics['recall']:.4f}")
+    logger.info(f"F1 Score: {metrics['f1']:.4f}")
+    logger.info(f"True Positives: {metrics['true_positives']}")
+    logger.info(f"False Positives: {metrics['false_positives']}")
+    logger.info(f"Total Expected: {metrics['total_expected']}")
+
+
+if __name__ == "__main__":
+    test_entity_linker()
diff --git a/apps/data-processing/trends.py b/apps/data-processing/trends.py
new file mode 100644
index 0000000000000000000000000000000000000000..597a38e3fac81e44a843d9256716da143451d61c
GIT binary patch
literal 5268
ucmeIuF#!Mo0K%a4Pi+e?h(KY$fB^#r3>YwAz<>b*1`HT5V8DQZ-@pJAlmGz$

literal 0
HcmV?d00001

diff --git a/apps/data-processing/utils/http_client.py b/apps/data-processing/utils/http_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..50ffb9f3648ae8cbe234aba1690ceed063b47e32
GIT binary patch
literal 5456
vcmeIufdBvi0K=g9Q(xf#g-~I@fB^#r3>YwAz<>b*1`HT5V8DO@1JA$!6;J>H

literal 0
HcmV?d00001

diff --git a/apps/data-processing/utils/logger.py b/apps/data-processing/utils/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..40c4ab4d3a8829e3957bbe4e2c2480f046b474ce
GIT binary patch
literal 1460
XcmZQz7zLvtFd71*Aut*OLnQ<N1+)MG

literal 0
HcmV?d00001

diff --git a/apps/data-processing/utils/metrics.py b/apps/data-processing/utils/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..c97e68b579d2b6d584ff0efb06cbd015ff94ad24
GIT binary patch
literal 1286
VcmZQz7zLvtFd71*Auy6d000FB00961

literal 0
HcmV?d00001

diff --git a/apps/data-processing/utils/translator.py b/apps/data-processing/utils/translator.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f9ed4e51a691bf2c7da9b1fdd8a40e280cfb9af
GIT binary patch
literal 2789
icmeIu0Sy2E0K%a6Pi+qe5hx58Fkrxd0RsjMoDB>L<p2Qy

literal 0
HcmV?d00001

diff --git a/apps/data-processing/validators.py b/apps/data-processing/validators.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c4644483fc643bf320dd87b9d295841809d8eb0
GIT binary patch
literal 1638
acmZQz7zLvtFd71*Aut*Oqai@65C8xMW&i;I

literal 0
HcmV?d00001

diff --git a/temp_backup/1774000000001-AddLedgerSequenceToSorobanEvents.ts b/temp_backup/1774000000001-AddLedgerSequenceToSorobanEvents.ts
new file mode 100644
index 00000000..6b41e5d6
--- /dev/null
+++ b/temp_backup/1774000000001-AddLedgerSequenceToSorobanEvents.ts
@@ -0,0 +1,19 @@
+import { MigrationInterface, QueryRunner } from 'typeorm';
+
+export class AddLedgerSequenceToSorobanEvents1774000000001 implements MigrationInterface {
+  async up(queryRunner: QueryRunner): Promise<void> {
+    await queryRunner.query(`
+      ALTER TABLE soroban_events 
+      ADD COLUMN ledger_sequence INTEGER NOT NULL DEFAULT 0;
+      
+      CREATE INDEX idx_soroban_events_ledger_sequence ON soroban_events (ledger_sequence);
+    `);
+  }
+
+  async down(queryRunner: QueryRunner): Promise<void> {
+    await queryRunner.query(`
+      DROP INDEX IF EXISTS idx_soroban_events_ledger_sequence;
+      ALTER TABLE soroban_events DROP COLUMN IF EXISTS ledger_sequence;
+    `);
+  }
+}
diff --git a/temp_backup/backfill_contract_events.py b/temp_backup/backfill_contract_events.py
new file mode 100644
index 00000000..9be016a0
--- /dev/null
+++ b/temp_backup/backfill_contract_events.py
@@ -0,0 +1,323 @@
+#!/usr/bin/env python3
+"""
+Soroban Contract Event Backfill Script
+
+Fetches events for specific Soroban contract IDs within a given ledger range.
+Saves results idempotently to allow safe re-runs.
+Sends events to backend ingest endpoint for processing.
+
+Usage:
+    python scripts/backfill_contract_events.py --contract-ids CABC... --start-ledger 1000 --end-ledger 2000
+"""
+
+import os
+import sys
+import json
+import time
+import argparse
+import logging
+from datetime import datetime, timezone
+from pathlib import Path
+import requests
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+    handlers=[logging.StreamHandler(sys.stdout)],
+)
+logger = logging.getLogger(__name__)
+
+class BackfillContractEvents:
+    def __init__(
+        self, 
+        contract_ids, 
+        start_ledger, 
+        end_ledger, 
+        output_dir, 
+        rpc_url, 
+        backend_url,
+        ingest_secret,
+        batch_size, 
+        dry_run=False
+    ):
+        self.contract_ids = contract_ids
+        self.start_ledger = start_ledger
+        self.end_ledger = end_ledger
+        self.output_dir = Path(output_dir)
+        self.rpc_url = rpc_url
+        self.backend_url = backend_url
+        self.ingest_secret = ingest_secret
+        self.batch_size = batch_size
+        self.dry_run = dry_run
+        
+        if not self.dry_run:
+            self.output_dir.mkdir(parents=True, exist_ok=True)
+
+    def _get_output_filepath(self, contract_id, batch_start, batch_end):
+        return self.output_dir / f"{contract_id}_{batch_start}_{batch_end}.json"
+
+    def _is_already_processed(self, filepath):
+        if filepath.exists():
+            try:
+                with open(filepath, 'r') as f:
+                    data = json.load(f)
+                    if data.get("status") == "completed":
+                        return True
+            except json.JSONDecodeError:
+                pass
+        return False
+
+    def fetch_events_batch(self, contract_id, batch_start, batch_end):
+        """Fetch a batch of events from Soroban RPC"""
+        all_events = []
+        cursor = None
+
+        while True:
+            payload = {
+                "jsonrpc": "2.0",
+                "id": 1,
+                "method": "getEvents",
+                "params": {
+                    "startLedger": batch_start,
+                    "filters": [
+                        {
+                            "type": "contract",
+                            "contractIds": [contract_id]
+                        }
+                    ],
+                    "pagination": {
+                        "limit": 100
+                    }
+                }
+            }
+            if cursor:
+                payload["params"]["pagination"]["cursor"] = cursor
+
+            try:
+                response = requests.post(self.rpc_url, json=payload, timeout=30)
+                response.raise_for_status()
+                data = response.json()
+            except Exception as e:
+                logger.error(f"RPC Request failed: {e}")
+                raise
+
+            if "error" in data:
+                logger.error(f"RPC Error: {data['error']}")
+                raise RuntimeError(f"RPC Error: {data['error']}")
+
+            events = data.get("result", {}).get("events", [])
+            
+            # Filter events by ledger <= batch_end
+            valid_events = []
+            for event in events:
+                ledger = int(event.get("ledger", 0))
+                if ledger <= batch_end:
+                    valid_events.append(event)
+            
+            all_events.extend(valid_events)
+
+            # Check if we need to paginate
+            # We break if we received fewer events than the limit, or if the latest event exceeds batch_end
+            if len(events) < 100:
+                break
+                
+            last_ledger = int(events[-1].get("ledger", 0))
+            if last_ledger > batch_end:
+                break
+
+            cursor = data.get("result", {}).get("latestLedger") # fallback
+            # Usually getEvents cursor is based on the paging token of the last event
+            if events:
+                cursor = events[-1].get("pagingToken")
+            
+            if not cursor:
+                break
+
+            time.sleep(0.5) # Rate limiting
+
+        return all_events
+
+    def send_event_to_backend(self, event, event_index):
+        """Send a single event to the backend ingest endpoint"""
+        tx_hash = event.get("transactionHash", "")
+        ledger_sequence = int(event.get("ledger", 0))
+        contract_id = event.get("contractId")
+        event_type = event.get("type")
+        raw_payload = event
+
+        ingest_payload = {
+            "txHash": tx_hash,
+            "eventIndex": event_index,
+            "ledgerSequence": ledger_sequence,
+            "contractId": contract_id,
+            "eventType": event_type,
+            "rawPayload": raw_payload
+        }
+
+        headers = {
+            "Content-Type": "application/json",
+            "x-ingest-secret": self.ingest_secret
+        }
+
+        try:
+            response = requests.post(
+                f"{self.backend_url}/soroban-events/ingest",
+                json=ingest_payload,
+                headers=headers,
+                timeout=30
+            )
+            response.raise_for_status()
+            logger.debug(f"Successfully sent event {tx_hash}:{event_index} to backend")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to send event {tx_hash}:{event_index} to backend: {e}")
+            return False
+
+    def run(self):
+        logger.info("=" * 60)
+        logger.info("SOROBAN CONTRACT EVENT BACKFILL")
+        logger.info("=" * 60)
+        logger.info(f"Target RPC: {self.rpc_url}")
+        logger.info(f"Backend URL: {self.backend_url}")
+        logger.info(f"Ledger Range: {self.start_ledger} to {self.end_ledger}")
+        logger.info(f"Contracts: {len(self.contract_ids)}")
+        logger.info(f"Batch Size: {self.batch_size}")
+        
+        stats = {
+            "total_events": 0,
+            "sent_to_backend": 0,
+            "failed_to_send": 0,
+            "contracts": {},
+            "batches_processed": 0,
+            "batches_skipped": 0,
+            "batches_failed": 0
+        }
+
+        for contract_id in self.contract_ids:
+            stats["contracts"][contract_id] = {"events": 0, "failures": 0}
+            logger.info(f"\nProcessing contract: {contract_id}")
+            
+            current_start = self.start_ledger
+            while current_start <= self.end_ledger:
+                current_end = min(current_start + self.batch_size - 1, self.end_ledger)
+                
+                filepath = self._get_output_filepath(contract_id, current_start, current_end)
+                
+                if self._is_already_processed(filepath) and not self.dry_run:
+                    logger.info(f"  [SKIPPED] Ledgers {current_start}-{current_end} already processed")
+                    stats["batches_skipped"] += 1
+                    
+                    # Read count to update stats
+                    try:
+                        with open(filepath, 'r') as f:
+                            data = json.load(f)
+                            count = data.get("event_count", 0)
+                            stats["contracts"][contract_id]["events"] += count
+                            stats["total_events"] += count
+                    except:
+                        pass
+                else:
+                    logger.info(f"  [FETCHING] Ledgers {current_start}-{current_end}")
+                    
+                    if self.dry_run:
+                        stats["batches_processed"] += 1
+                    else:
+                        try:
+                            events = self.fetch_events_batch(contract_id, current_start, current_end)
+                            
+                            # Send each event to backend
+                            for idx, event in enumerate(events):
+                                success = self.send_event_to_backend(event, idx)
+                                if success:
+                                    stats["sent_to_backend"] += 1
+                                else:
+                                    stats["failed_to_send"] += 1
+                            
+                            # Save results
+                            output_data = {
+                                "contract_id": contract_id,
+                                "start_ledger": current_start,
+                                "end_ledger": current_end,
+                                "event_count": len(events),
+                                "events": events,
+                                "status": "completed",
+                                "timestamp": datetime.now(timezone.utc).isoformat()
+                            }
+                            
+                            with open(filepath, 'w') as f:
+                                json.dump(output_data, f, indent=2)
+                            
+                            stats["contracts"][contract_id]["events"] += len(events)
+                            stats["total_events"] += len(events)
+                            stats["batches_processed"] += 1
+                            
+                            logger.info(f"    Found {len(events)} events, sent {stats['sent_to_backend']} to backend")
+                        except Exception as e:
+                            logger.error(f"    Failed to process batch: {e}")
+                            stats["batches_failed"] += 1
+                            stats["contracts"][contract_id]["failures"] += 1
+
+                current_start = current_end + 1
+
+        logger.info("\n" + "=" * 60)
+        logger.info("BACKFILL SUMMARY")
+        logger.info("=" * 60)
+        logger.info(f"Total Events Found: {stats['total_events']}")
+        logger.info(f"Events Sent to Backend: {stats['sent_to_backend']}")
+        logger.info(f"Events Failed to Send: {stats['failed_to_send']}")
+        logger.info(f"Batches Processed:  {stats['batches_processed']}")
+        logger.info(f"Batches Skipped:    {stats['batches_skipped']} (Idempotent)")
+        logger.info(f"Batches Failed:     {stats['batches_failed']}")
+        
+        for cid, c_stats in stats["contracts"].items():
+            logger.info(f"Contract {cid[:8]}...: {c_stats['events']} events, {c_stats['failures']} failures")
+
+        return stats
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Backfill Soroban contract events")
+    parser.add_argument("--contract-ids", nargs="+", required=True, help="List of contract IDs to backfill")
+    parser.add_argument("--start-ledger", type=int, required=True, help="Starting ledger sequence")
+    parser.add_argument("--end-ledger", type=int, required=True, help="Ending ledger sequence")
+    parser.add_argument("--output-dir", type=str, default="./data/contract_events", help="Directory to save output files")
+    parser.add_argument("--rpc-url", type=str, default=os.getenv("SOROBAN_RPC_URL", "https://soroban-testnet.stellar.org"), help="Soroban RPC URL")
+    parser.add_argument("--backend-url", type=str, default=os.getenv("BACKEND_URL", "http://localhost:3000"), help="Backend API URL")
+    parser.add_argument("--ingest-secret", type=str, default=os.getenv("SOROBAN_INGEST_SECRET", ""), help="Secret for backend ingest endpoint")
+    parser.add_argument("--batch-size", type=int, default=1000, help="Number of ledgers per batch")
+    parser.add_argument("--dry-run", action="store_true", help="Print operations without executing")
+    
+    return parser.parse_args()
+
+def main():
+    args = parse_args()
+    
+    if args.start_ledger > args.end_ledger:
+        logger.error("start-ledger must be <= end-ledger")
+        sys.exit(1)
+        
+    backfill = BackfillContractEvents(
+        contract_ids=args.contract_ids,
+        start_ledger=args.start_ledger,
+        end_ledger=args.end_ledger,
+        output_dir=args.output_dir,
+        rpc_url=args.rpc_url,
+        backend_url=args.backend_url,
+        ingest_secret=args.ingest_secret,
+        batch_size=args.batch_size,
+        dry_run=args.dry_run
+    )
+    
+    try:
+        stats = backfill.run()
+        if stats["batches_failed"] > 0 or stats["failed_to_send"] > 0:
+            sys.exit(1)
+        sys.exit(0)
+    except KeyboardInterrupt:
+        logger.info("Backfill interrupted by user")
+        sys.exit(130)
+    except Exception as e:
+        logger.error(f"Unexpected error: {e}")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
diff --git a/temp_backup/soroban-events/dto/ingest-soroban-event.dto.ts b/temp_backup/soroban-events/dto/ingest-soroban-event.dto.ts
new file mode 100644
index 00000000..84885811
--- /dev/null
+++ b/temp_backup/soroban-events/dto/ingest-soroban-event.dto.ts
@@ -0,0 +1,29 @@
+import {
+  IsInt,
+  IsNotEmpty,
+  IsObject,
+  IsOptional,
+  IsString,
+  Min,
+} from 'class-validator';
+
+export class IngestSorobanEventDto {
+  @IsString()
+  @IsNotEmpty()
+  txHash: string;
+
+  @IsInt()
+  @Min(0)
+  eventIndex: number;
+
+  @IsString()
+  @IsOptional()
+  contractId?: string;
+
+  @IsString()
+  @IsOptional()
+  eventType?: string;
+
+  @IsObject()
+  rawPayload: Record<string, unknown>;
+}
diff --git a/temp_backup/soroban-events/entities/soroban-event.entity.ts b/temp_backup/soroban-events/entities/soroban-event.entity.ts
new file mode 100644
index 00000000..2f26b297
--- /dev/null
+++ b/temp_backup/soroban-events/entities/soroban-event.entity.ts
@@ -0,0 +1,57 @@
+import {
+  Column,
+  CreateDateColumn,
+  Entity,
+  Index,
+  PrimaryGeneratedColumn,
+} from 'typeorm';
+
+export enum SorobanEventStatus {
+  PENDING = 'pending',
+  PROCESSED = 'processed',
+  FAILED = 'failed',
+}
+
+@Entity('soroban_events')
+@Index(['txHash', 'eventIndex'], { unique: true })
+@Index(['status'])
+export class SorobanEvent {
+  @PrimaryGeneratedColumn('uuid')
+  id: string;
+
+  /** Idempotency key: transaction hash */
+  @Column({ type: 'varchar', length: 128 })
+  txHash: string;
+
+  /** Idempotency key: position of the event within the transaction */
+  @Column({ type: 'integer' })
+  eventIndex: number;
+
+  /** Soroban contract address that emitted the event */
+  @Column({ type: 'varchar', length: 128, nullable: true })
+  contractId: string | null;
+
+  /** Event type / topic, e.g. "transfer", "mint" */
+  @Column({ type: 'varchar', length: 128, nullable: true })
+  eventType: string | null;
+
+  /** Full raw payload stored for audit/debug */
+  @Column({ type: 'jsonb' })
+  rawPayload: Record<string, unknown>;
+
+  @Column({
+    type: 'enum',
+    enum: SorobanEventStatus,
+    default: SorobanEventStatus.PENDING,
+  })
+  status: SorobanEventStatus;
+
+  @Column({ type: 'text', nullable: true })
+  errorMessage: string | null;
+
+  @CreateDateColumn({ type: 'timestamptz' })
+  createdAt: Date;
+
+  @Column({ type: 'timestamptz', nullable: true })
+  processedAt: Date | null;
+}
diff --git a/temp_backup/soroban-events/soroban-events.controller.ts b/temp_backup/soroban-events/soroban-events.controller.ts
new file mode 100644
index 00000000..60a2f1ce
--- /dev/null
+++ b/temp_backup/soroban-events/soroban-events.controller.ts
@@ -0,0 +1,42 @@
+import {
+  Body,
+  Controller,
+  HttpCode,
+  HttpStatus,
+  Post,
+  UnauthorizedException,
+  Headers,
+} from '@nestjs/common';
+import { ConfigService } from '@nestjs/config';
+import { ApiTags, ApiOperation, ApiResponse } from '@nestjs/swagger';
+import { IngestSorobanEventDto } from './dto/ingest-soroban-event.dto';
+import { SorobanEventsService } from './soroban-events.service';
+
+@ApiTags('soroban-events')
+@Controller('soroban-events')
+export class SorobanEventsController {
+  private readonly ingestSecret: string;
+
+  constructor(
+    private readonly service: SorobanEventsService,
+    private readonly config: ConfigService,
+  ) {
+    this.ingestSecret = this.config.get<string>('SOROBAN_INGEST_SECRET', '');
+  }
+
+  @Post('ingest')
+  @HttpCode(HttpStatus.ACCEPTED)
+  @ApiOperation({ summary: 'Ingest a Soroban event from the indexer/cron' })
+  @ApiResponse({ status: 202, description: 'Event accepted for processing' })
+  @ApiResponse({ status: 401, description: 'Missing or invalid ingest secret' })
+  async ingest(
+    @Headers('x-ingest-secret') secret: string,
+    @Body() dto: IngestSorobanEventDto,
+  ) {
+    if (!this.ingestSecret || secret !== this.ingestSecret) {
+      throw new UnauthorizedException('Invalid ingest secret');
+    }
+
+    return this.service.ingest(dto);
+  }
+}
diff --git a/temp_backup/soroban-events/soroban-events.module.ts b/temp_backup/soroban-events/soroban-events.module.ts
new file mode 100644
index 00000000..e5c85734
--- /dev/null
+++ b/temp_backup/soroban-events/soroban-events.module.ts
@@ -0,0 +1,20 @@
+import { Module } from '@nestjs/common';
+import { TypeOrmModule } from '@nestjs/typeorm';
+import { BullModule } from '@nestjs/bullmq';
+import { SorobanEvent } from './entities/soroban-event.entity';
+import {
+  SorobanEventsService,
+  SOROBAN_EVENTS_QUEUE,
+} from './soroban-events.service';
+import { SorobanEventsProcessor } from './soroban-events.processor';
+import { SorobanEventsController } from './soroban-events.controller';
+
+@Module({
+  imports: [
+    TypeOrmModule.forFeature([SorobanEvent]),
+    BullModule.registerQueue({ name: SOROBAN_EVENTS_QUEUE }),
+  ],
+  controllers: [SorobanEventsController],
+  providers: [SorobanEventsService, SorobanEventsProcessor],
+})
+export class SorobanEventsModule {}
diff --git a/temp_backup/soroban-events/soroban-events.processor.ts b/temp_backup/soroban-events/soroban-events.processor.ts
new file mode 100644
index 00000000..f41c87f4
--- /dev/null
+++ b/temp_backup/soroban-events/soroban-events.processor.ts
@@ -0,0 +1,78 @@
+import { Processor, WorkerHost } from '@nestjs/bullmq';
+import { Injectable, Logger } from '@nestjs/common';
+import { InjectRepository } from '@nestjs/typeorm';
+import { Repository } from 'typeorm';
+import { Job } from 'bullmq';
+import {
+  SorobanEvent,
+  SorobanEventStatus,
+} from './entities/soroban-event.entity';
+import { IngestSorobanEventDto } from './dto/ingest-soroban-event.dto';
+import {
+  SOROBAN_EVENTS_QUEUE,
+  PROCESS_EVENT_JOB,
+} from './soroban-events.service';
+
+@Processor(SOROBAN_EVENTS_QUEUE)
+@Injectable()
+export class SorobanEventsProcessor extends WorkerHost {
+  private readonly logger = new Logger(SorobanEventsProcessor.name);
+
+  constructor(
+    @InjectRepository(SorobanEvent)
+    private readonly eventRepo: Repository<SorobanEvent>,
+  ) {
+    super();
+  }
+
+  async process(job: Job<IngestSorobanEventDto>): Promise<void> {
+    if (job.name !== PROCESS_EVENT_JOB) {
+      this.logger.warn(`Unknown job name: ${job.name}`);
+      return;
+    }
+
+    const { txHash, eventIndex, contractId, eventType, rawPayload } = job.data;
+
+    // Idempotency: skip if already stored (unique index on txHash + eventIndex)
+    const existing = await this.eventRepo.findOne({
+      where: { txHash, eventIndex },
+      select: ['id', 'status'],
+    });
+
+    if (existing) {
+      this.logger.debug(
+        `Soroban event ${txHash}:${eventIndex} already processed (${existing.status}), skipping`,
+      );
+      return;
+    }
+
+    const event = this.eventRepo.create({
+      txHash,
+      eventIndex,
+      contractId: contractId ?? null,
+      eventType: eventType ?? null,
+      rawPayload,
+      status: SorobanEventStatus.PENDING,
+      processedAt: null,
+      errorMessage: null,
+    });
+
+    await this.eventRepo.save(event);
+
+    try {
+      // placeholder for downstream processing (e.g. trigger notifications, update state)
+      event.status = SorobanEventStatus.PROCESSED;
+      event.processedAt = new Date();
+    } catch (err) {
+      event.status = SorobanEventStatus.FAILED;
+      event.errorMessage = err instanceof Error ? err.message : String(err);
+      await this.eventRepo.save(event);
+      throw err; // let BullMQ retry
+    }
+
+    await this.eventRepo.save(event);
+    this.logger.log(
+      `Processed soroban event ${txHash}:${eventIndex} (${eventType})`,
+    );
+  }
+}
diff --git a/temp_backup/soroban-events/soroban-events.service.ts b/temp_backup/soroban-events/soroban-events.service.ts
new file mode 100644
index 00000000..b90fb323
--- /dev/null
+++ b/temp_backup/soroban-events/soroban-events.service.ts
@@ -0,0 +1,32 @@
+import { Injectable, Logger } from '@nestjs/common';
+import { InjectQueue } from '@nestjs/bullmq';
+import { Queue } from 'bullmq';
+import { IngestSorobanEventDto } from './dto/ingest-soroban-event.dto';
+
+export const SOROBAN_EVENTS_QUEUE = 'soroban-events';
+export const PROCESS_EVENT_JOB = 'process-event';
+
+@Injectable()
+export class SorobanEventsService {
+  private readonly logger = new Logger(SorobanEventsService.name);
+
+  constructor(
+    @InjectQueue(SOROBAN_EVENTS_QUEUE) private readonly queue: Queue,
+  ) {}
+
+  async ingest(dto: IngestSorobanEventDto): Promise<{ queued: boolean }> {
+    const jobId = `${dto.txHash}:${dto.eventIndex}`;
+
+    // BullMQ deduplicates by jobId — duplicate submissions are silently dropped
+    await this.queue.add(PROCESS_EVENT_JOB, dto, {
+      jobId,
+      attempts: 3,
+      backoff: { type: 'exponential', delay: 1000 },
+      removeOnComplete: { count: 500 },
+      removeOnFail: { count: 200 },
+    });
+
+    this.logger.debug(`Queued soroban event ${jobId}`);
+    return { queued: true };
+  }
+}
diff --git a/temp_backup/src/alert_notifier.py b/temp_backup/src/alert_notifier.py
new file mode 100644
index 00000000..ac1d8ecc
--- /dev/null
+++ b/temp_backup/src/alert_notifier.py
@@ -0,0 +1,85 @@
+import os
+import time
+import requests
+from src.utils.http_client import RobustHTTPClient
+
+
+class AlertNotifier:
+    def __init__(self):
+        self.telegram_bot_token = os.getenv("TELEGRAM_BOT_TOKEN")
+        self.telegram_channel_id = os.getenv("TELEGRAM_CHANNEL_ID")
+        self.webhook_urls = self._load_webhook_urls()
+        self.max_retries = int(os.getenv("WEBHOOK_MAX_RETRIES", "3"))
+        self.base_backoff_seconds = float(os.getenv("WEBHOOK_BACKOFF_SECONDS", "1"))
+        self.session = RobustHTTPClient()
+
+    def _load_webhook_urls(self):
+        urls = []
+
+        single_url = os.getenv("ALERT_WEBHOOK_URL")
+        if single_url:
+            urls.append(single_url)
+
+        registry = os.getenv("ALERT_WEBHOOK_URLS", "")
+        if registry:
+            urls.extend([url.strip() for url in registry.split(",") if url.strip()])
+
+        return list(dict.fromkeys(urls))
+
+    def notify_anomaly(self, result):
+        if not getattr(result, "is_anomaly", False):
+            return
+
+        payload = {
+            "event": "high_priority_insight",
+            "type": "anomaly",
+            "metric_name": result.metric_name,
+            "severity_score": result.severity_score,
+            "current_value": result.current_value,
+            "baseline_mean": result.baseline_mean,
+            "baseline_std": result.baseline_std,
+            "z_score": result.z_score,
+            "timestamp": result.timestamp.isoformat() if result.timestamp else None,
+        }
+
+        self._send_telegram(payload)
+        self._send_webhooks(payload)
+
+    def _send_telegram(self, payload):
+        if not self.telegram_bot_token or not self.telegram_channel_id:
+            return
+
+        text = (
+            "🚨 High-Priority Insight\n"
+            f"Metric: {payload['metric_name']}\n"
+            f"Severity: {payload['severity_score']}\n"
+            f"Current: {payload['current_value']}\n"
+            f"Z-Score: {payload['z_score']}"
+        )
+
+        self.session.post(
+            f"https://api.telegram.org/bot{self.telegram_bot_token}/sendMessage",
+            json={
+                "chat_id": self.telegram_channel_id,
+                "text": text,
+            },
+            timeout=10,
+        )
+
+    def _send_webhooks(self, payload):
+        for url in self.webhook_urls:
+            self._post_with_retry(url, payload)
+
+    def _post_with_retry(self, url, payload):
+        for attempt in range(self.max_retries):
+            try:
+                response = self.session.post(url, json=payload, timeout=10)
+                if response.status_code < 400:
+                    return True
+            except requests.RequestException:
+                pass
+
+            if attempt < self.max_retries - 1:
+                time.sleep(self.base_backoff_seconds * (2**attempt))
+
+        return False
diff --git a/temp_backup/src/alertbot.py b/temp_backup/src/alertbot.py
new file mode 100644
index 00000000..1d51233c
--- /dev/null
+++ b/temp_backup/src/alertbot.py
@@ -0,0 +1,353 @@
+"""
+Telegram Alert Bot module - Sends notifications when market sentiment exceeds threshold.
+
+This module provides the AlertBot class that integrates with Telegram's Bot API
+to send alerts when the MarketAnalyzer detects high sentiment scores (>0.8).
+"""
+
+import os
+import time
+import logging
+import threading
+from datetime import datetime, timezone
+from typing import Optional, Dict, Any
+
+import requests
+
+# Load environment variables from .env file if present
+try:
+    from dotenv import load_dotenv
+
+    # Try loading from multiple possible locations
+    load_dotenv()  # Current directory
+    load_dotenv(
+        dotenv_path=os.path.join(os.path.dirname(__file__), "..", ".env")
+    )  # data-processing root
+    load_dotenv(
+        dotenv_path=os.path.join(os.path.dirname(__file__), "..", "..", "..", ".env")
+    )  # project root
+except ImportError:
+    pass  # python-dotenv not installed, rely on system env vars
+
+logger = logging.getLogger(__name__)
+
+
+class AlertBot:
+    """
+    Telegram bot for sending market sentiment alerts.
+
+    Sends notifications via Telegram Bot API when sentiment score exceeds
+    the configured threshold (default: 0.8).
+
+    Features:
+    - Thread-safe send operations
+    - Exponential backoff for rate limiting (429 responses)
+    - Graceful error handling for auth and network failures
+    - Configurable dry-run mode for testing
+    - Message truncation for Telegram's 4096 char limit
+
+    Environment Variables:
+        TELEGRAM_BOT_TOKEN: Bot token from @BotFather
+        TELEGRAM_CHANNEL_ID: Target channel/chat ID (numeric or @channel_name)
+    """
+
+    # Telegram API configuration
+    API_BASE_URL = "https://api.telegram.org/bot{token}/sendMessage"
+    MAX_MESSAGE_LENGTH = 4096
+
+    # Retry configuration
+    MAX_RETRIES = 3
+    INITIAL_RETRY_DELAY = 1.0  # seconds
+    MAX_RETRY_DELAY = 10.0  # seconds
+    REQUEST_TIMEOUT = 10  # seconds
+
+    # Alert threshold
+    ALERT_THRESHOLD = 0.8
+
+    def __init__(
+        self,
+        telegram_bot_token: Optional[str] = None,
+        telegram_channel_id: Optional[str] = None,
+        dry_run: bool = False,
+    ):
+        """
+        Initialize the AlertBot.
+
+        Args:
+            telegram_bot_token: Telegram bot token (falls back to TELEGRAM_BOT_TOKEN env var)
+            telegram_channel_id: Target channel/chat ID (falls back to TELEGRAM_CHANNEL_ID env var)
+            dry_run: If True, log messages instead of sending them (useful for testing)
+        """
+        self.bot_token = telegram_bot_token or os.getenv("TELEGRAM_BOT_TOKEN")
+        self.channel_id = telegram_channel_id or os.getenv("TELEGRAM_CHANNEL_ID")
+        self.dry_run = dry_run
+        self._lock = threading.Lock()
+
+        # Validate configuration
+        self._configured = bool(self.bot_token and self.channel_id)
+
+        if not self._configured:
+            logger.warning(
+                "AlertBot not configured: missing TELEGRAM_BOT_TOKEN or TELEGRAM_CHANNEL_ID. "
+                "Alerts will be logged but not sent."
+            )
+        elif dry_run:
+            logger.info(
+                "AlertBot initialized in dry-run mode (messages will be logged, not sent)"
+            )
+        else:
+            logger.info(
+                f"AlertBot initialized for channel: {self._mask_channel_id(self.channel_id)}"
+            )
+
+    @staticmethod
+    def _mask_channel_id(channel_id: str) -> str:
+        """Mask channel ID for logging (show first 4 chars only)."""
+        if not channel_id:
+            return "<none>"
+        if len(channel_id) <= 4:
+            return channel_id
+        return f"{channel_id[:4]}..."
+
+    def _truncate_message(self, message: str) -> str:
+        """
+        Truncate message to fit Telegram's character limit.
+
+        Args:
+            message: Original message text
+
+        Returns:
+            Truncated message with ellipsis if needed
+        """
+        if len(message) <= self.MAX_MESSAGE_LENGTH:
+            return message
+
+        # Leave room for ellipsis indicator
+        truncation_marker = "\n\n... (message truncated)"
+        max_content_length = self.MAX_MESSAGE_LENGTH - len(truncation_marker)
+
+        logger.warning(
+            f"Message truncated from {len(message)} to {self.MAX_MESSAGE_LENGTH} characters"
+        )
+        return message[:max_content_length] + truncation_marker
+
+    def _send_request(self, message: str) -> bool:
+        """
+        Send message to Telegram with retry logic.
+
+        Args:
+            message: Message text to send
+
+        Returns:
+            True if message was sent successfully, False otherwise
+        """
+        url = self.API_BASE_URL.format(token=self.bot_token)
+        payload = {"chat_id": self.channel_id, "text": message, "parse_mode": "HTML"}
+
+        retry_delay = self.INITIAL_RETRY_DELAY
+
+        for attempt in range(self.MAX_RETRIES + 1):
+            try:
+                response = requests.post(
+                    url, json=payload, timeout=self.REQUEST_TIMEOUT
+                )
+
+                if response.status_code == 200:
+                    logger.info("Alert sent successfully to Telegram")
+                    return True
+
+                elif response.status_code == 429:
+                    # Rate limited - extract retry_after if provided
+                    retry_after = (
+                        response.json()
+                        .get("parameters", {})
+                        .get("retry_after", retry_delay)
+                    )
+                    retry_delay = min(float(retry_after), self.MAX_RETRY_DELAY)
+
+                    if attempt < self.MAX_RETRIES:
+                        logger.warning(
+                            f"Rate limited by Telegram (429). Retrying in {retry_delay:.1f}s "
+                            f"(attempt {attempt + 1}/{self.MAX_RETRIES})"
+                        )
+                        time.sleep(retry_delay)
+                        retry_delay = min(retry_delay * 2, self.MAX_RETRY_DELAY)
+                        continue
+                    else:
+                        logger.error("Rate limit exceeded, max retries reached")
+                        return False
+
+                elif response.status_code in (401, 403):
+                    logger.error(
+                        f"Telegram authentication failed ({response.status_code}). "
+                        "Check TELEGRAM_BOT_TOKEN and ensure bot has channel permissions."
+                    )
+                    return False
+
+                else:
+                    error_desc = response.json().get("description", "Unknown error")
+                    logger.error(
+                        f"Telegram API error ({response.status_code}): {error_desc}"
+                    )
+                    return False
+
+            except requests.exceptions.Timeout:
+                if attempt < self.MAX_RETRIES:
+                    logger.warning(
+                        f"Request timeout. Retrying in {retry_delay:.1f}s "
+                        f"(attempt {attempt + 1}/{self.MAX_RETRIES})"
+                    )
+                    time.sleep(retry_delay)
+                    retry_delay = min(retry_delay * 2, self.MAX_RETRY_DELAY)
+                    continue
+                else:
+                    logger.error("Request timeout, max retries reached")
+                    return False
+
+            except requests.exceptions.ConnectionError as e:
+                logger.error(f"Connection error sending Telegram alert: {e}")
+                return False
+
+            except requests.exceptions.RequestException as e:
+                logger.error(f"Request error sending Telegram alert: {e}")
+                return False
+
+            except Exception as e:
+                logger.error(
+                    f"Unexpected error sending Telegram alert: {e}", exc_info=True
+                )
+                return False
+
+        return False
+
+    def send_alert(self, message: str) -> bool:
+        """
+        Send an alert message to Telegram.
+
+        Thread-safe method that sends a message to the configured Telegram channel.
+        Handles rate limiting with exponential backoff and logs all operations.
+
+        Args:
+            message: The alert message to send (supports HTML formatting)
+
+        Returns:
+            True if message was sent successfully, False otherwise
+        """
+        with self._lock:
+            # Truncate if necessary
+            message = self._truncate_message(message)
+
+            # Handle unconfigured state
+            if not self._configured:
+                logger.info(f"[DRY-RUN/UNCONFIGURED] Alert message:\n{message}")
+                return False
+
+            # Handle dry-run mode
+            if self.dry_run:
+                logger.info(f"[DRY-RUN] Would send alert:\n{message}")
+                return True
+
+            return self._send_request(message)
+
+    def _format_alert_message(
+        self,
+        score: float,
+        sentiment_data: Dict[str, Any],
+        timestamp: Optional[datetime] = None,
+    ) -> str:
+        """
+        Format a sentiment alert message.
+
+        Args:
+            score: The sentiment score that triggered the alert
+            sentiment_data: Dictionary containing sentiment analysis details
+            timestamp: Alert timestamp (defaults to current UTC time)
+
+        Returns:
+            Formatted alert message with HTML markup
+        """
+        if timestamp is None:
+            timestamp = datetime.now(timezone.utc)
+
+        # Determine trend direction
+        trend_direction = sentiment_data.get("trend_direction", "Unknown")
+        if isinstance(trend_direction, str):
+            trend_display = trend_direction.capitalize()
+        else:
+            trend_display = str(trend_direction)
+
+        # Add trend emoji
+        trend_emoji = (
+            "📈"
+            if "bull" in trend_display.lower()
+            else ("📉" if "bear" in trend_display.lower() else "➡️")
+        )
+
+        # Extract metrics
+        avg_sentiment = sentiment_data.get("average_compound_score", 0)
+        sentiment_dist = sentiment_data.get("sentiment_distribution", {})
+        positive_ratio = sentiment_dist.get("positive", 0)
+        negative_ratio = sentiment_dist.get("negative", 0)
+        news_count = sentiment_data.get("total_analyzed", 0)
+
+        # Calculate confidence (based on sample size and score strength)
+        confidence = min(100, max(0, int(abs(score) * 100 * min(news_count / 20, 1))))
+
+        # Format timestamp
+        time_str = timestamp.strftime("%Y-%m-%d %H:%M:%S UTC")
+
+        # Build message
+        message = f"""🚨 <b>High Sentiment Alert</b>
+
+<b>Score:</b> {score:.2f}
+<b>Trend:</b> {trend_display} {trend_emoji}
+<b>Confidence:</b> {confidence}%
+<b>Timestamp:</b> {time_str}
+
+<b>Details:</b>
+• Average sentiment: {avg_sentiment:.2f}
+• Positive ratio: {positive_ratio:.1%}
+• Negative ratio: {negative_ratio:.1%}
+• News analyzed: {news_count}"""
+
+        # Add anomaly info if present
+        anomalies_count = sentiment_data.get("anomalies_detected", 0)
+        if anomalies_count > 0:
+            message += f"\n• ⚠️ Anomalies detected: {anomalies_count}"
+
+        return message
+
+    def check_and_alert(
+        self,
+        analyzer_score: float,
+        sentiment_data: Dict[str, Any],
+        timestamp: Optional[datetime] = None,
+    ) -> bool:
+        """
+        Check if sentiment score exceeds threshold and send alert if so.
+
+        Args:
+            analyzer_score: The sentiment/health score from MarketAnalyzer
+            sentiment_data: Dictionary containing sentiment analysis details
+            timestamp: Optional timestamp for the alert
+
+        Returns:
+            True if alert was triggered and sent successfully, False otherwise
+        """
+        if analyzer_score <= self.ALERT_THRESHOLD:
+            logger.debug(
+                f"Score {analyzer_score:.2f} below threshold {self.ALERT_THRESHOLD}, no alert"
+            )
+            return False
+
+        logger.info(
+            f"Score {analyzer_score:.2f} exceeds threshold {self.ALERT_THRESHOLD}, triggering alert"
+        )
+
+        message = self._format_alert_message(analyzer_score, sentiment_data, timestamp)
+        return self.send_alert(message)
+
+    @property
+    def is_configured(self) -> bool:
+        """Check if the bot is properly configured."""
+        return self._configured
diff --git a/temp_backup/src/analytics/__init__.py b/temp_backup/src/analytics/__init__.py
new file mode 100644
index 00000000..1a4f5060
--- /dev/null
+++ b/temp_backup/src/analytics/__init__.py
@@ -0,0 +1,21 @@
+"""
+Analytics module for market analysis and trend detection.
+"""
+
+from .market_analyzer import MarketAnalyzer, Trend, MarketData, get_explanation
+from .forecaster import SentimentForecaster, ForecastResult
+from .correlation_engine import CorrelationEngine, CorrelationResult, DataPoint
+from .ner_service import NERService
+
+__all__ = [
+    "MarketAnalyzer",
+    "Trend",
+    "MarketData",
+    "get_explanation",
+    "SentimentForecaster",
+    "ForecastResult",
+    "CorrelationEngine",
+    "CorrelationResult",
+    "DataPoint",
+    "NERService",
+]
diff --git a/temp_backup/src/analytics/__pycache__/__init__.cpython-314.pyc b/temp_backup/src/analytics/__pycache__/__init__.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..200545436019a2096abdf32eeb53ce4262f662e9
GIT binary patch
literal 698
zcmZuu&ui2`6rRbCO*Uz>YVqKyfr1dkr3XQ@B1m^-7S?V_>qUY?NXBJsHVHFt#r7oP
zKjYcIM7#}15B>vc4_=)~EJg7_zRY{`y&v<vJJDu<w0-z=>-7T*p>M{yH0^hG@k0OJ
zp%ejniBmko!~%=hU=s%%;)1(v;?`_HJn)DQe&^e>AqgNLA%tWDHoBNI+awW0+h~l^
z!2-i5b$3wWo$Uor>byDyUdVXWl<SJcuNo1ra&f{S&dr_VQXk3~1k+eDU<L4|9tVk|
zqGuhkt8vDJa*qv31$}suL#{?k29&*RD_zXwC6?McVm0uUhNq2SMJ_?7MwjQ5$#n&Z
zt*-AkLa?gKn$}BRv%f+;PYq3%!-m(8U=<up(<3I{@PZvAfiOA>+kj)hH83#X8So7Z
z4VX=cP(yM}d6bq-L8%(`BG66*{kZ>CQ-N0N&kL?9s~g4D<dk)z|I<x{H4{|!qjs%L
z<GiXU{fI8~BuE|aKg^zMhRi0@`8>XVZ+ADFFnIz^o6Xl+y>?wmmgQ|Lvr=ntw-rsn
zq~!H7lcLDFxy@oTN_owJ(s6sL{73zoAB&s13D%#yt>L?kG5%p$*!krne2BlG#W~tL
VM~lyB@fGd-bi&Yj?};r<{SD7p#*_d6

literal 0
HcmV?d00001

diff --git a/temp_backup/src/analytics/__pycache__/forecaster.cpython-314.pyc b/temp_backup/src/analytics/__pycache__/forecaster.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bbf0797c4ce572940189707e18504c9cc90979a9
GIT binary patch
literal 26172
zcmc(I3sjufedjkX=D`dz!0-|XJ^_*#1Oq~{g+T9@K@WsS2-yzSc*G1a@n~l9&0rbq
z^yoBcieh(-V&@pePK~zCTH50_xJ^&+K2DUzZJl;c&yZmX{z{JJRBf8G+b#;qD%<Wk
z-QWLykNH3&$#J@8ug#tNzW2M2|NCBTDR9_0xG(=v#ploVaNOTgL~(g?V5L^aaThrs
zm*mcJK3%sisbjzTq#nQeZbO$bY3wp3O*NEn=+5iPOXhW%lV%n-c3ZlvNh=GRx@}$g
z$^0&R(%$7rI=Tvy1zpahljZ5UUCBb$Zc(xrzj@v6u99R4%QJVEc9kW|YPhG%HgHdu
zZ^wAZB`cn`BChY<F2%L}2F_>kS-0z@G_^*h&z7w66(`r9H6*K_HkEKFmtV&D>}pB5
zkz|d}f%JkhF5*h^J}1I17T&<>6e3=v)JfL*bakA^J^BwAn5|a`1;c~M@N*$P5Di4e
zlHtKb2R{@OLW6-sl0P31o((1Wq!5Y*6MVyg(MTkm7-=~W3JCc1P(eO965^ww=aYQf
z_U7&F{77t6NZ3Xb;pi|w5>6yzLU=F`;S-@~GJGB$`U&#S#Ui8ULwqoh4Ddrj?0mL!
z@8=V-XvAx?ol-0Gha$1TaB_^_%?p8Kh>s0vS{ob*M2ACsJS4E5`G(-BmSB>~oR7tV
z9yD|yF!(Hnk2;YMzBCr)8v>)rSaUFxL_8Ga14&*OWux<S*m%B~?-gS45qx&TiBK}&
z;rX9je2tF_p`nm~+3MuO(L^#3iJ<0l;Q;^4Gx2a-NkcJK^_gc_Ly5ugv*BcO1k)Ae
zec>PmDujlGP$EIYpk@P+3xTl&A9yYhjs(s{LKs^_2nB*;Xc1+<Fd7y@G;0YDYp^Rc
ziiyV}@j)!m7;9oEjD_QeFrst#QvSk7C@M|tE`A#ykA+d$*5lEOd4U8iwP>YTAB+SN
z2_#w&3DLrc7V^hq(U3<c8hZoD5syhU?2nF##zR!gcpUwTmXmRs!9YYb^^L|ORMyl-
zn*#xRB9t7B`y;X8;gBG@_xJ2S{$yYG!BhUDr%v`9_xJAa>l!chwovaa>RywU`={Ty
zLhgrq2eqd{J`UW<0i*_SsGsJNIvi~M*~X-yl#AhbCyo8wHl0t8&2I4-*p@f36eFc1
z^L(bHxt~i~e0kXNX0hOiys>?u#AqZrzV(D=e`k07nqAG0usutA+dD|7Sw7L65n%)V
zRvv}7OLvj$=g#Wezszw#q*if{>qc}w-6`GYIQ-GCew)5&9g51X1?TX({%7==VxEA>
z6QUl2V4<iXkrV_oLbPre&iO!6bol*j)o2m@{sdL4y(sn{>=-zm2nmUSLx+za=XY#x
zZ5=oiN<5p4#Rrb#j78(4kwj=95RWGYXzH7>LkDRCMu!Iy!r*|q5(DUl9r}>qjgN_T
zzdsO-#*)|`e!p0`dg8p=%$*oWq6*$GFLHM|ovG?Jc~{(52Cr_L8u?~5i>@G_W0OY#
zax`)Hv#gXOaFH8OCKZ#Y06<KSuE(Pj=u>G|;sg&RsN5!@0={TY#{6tcm#!I}oX$pO
zXU#7;OD2Oay_m983N+N*%lI|q<X*-WT0*W<@6&BU89A(%!#boJG-0DAY|@1DG-0zG
zrZz2dSdXw(lWwCh_HDl4MnEhE2%@hI1^o;j``fmU+?%+)eMBr6j75jS!BBKC#0qKS
zlwYI*WB+q9Y!!=?<@YBBu@P9kwP{q>aXuCdMf?P6aO_1F?J2(mW&ERwP*AiFhoT{Z
z(?NeAiTzwkV>b;&V%Wk403(lHC_|D!%PwzXwweN+451QUBjx-yykGt{_nX{p3%73l
zoU!V*mDA~N+vG?-MeJP3x;bOTZ3jgP<cO0ZF0P__&ba<|Aw`PhNHIm+a-@VJrJTEL
z&RBZ8j3VV+8EdVABI~%~(m7+v&nppOJLWpq<FSeszdsr{Pbb3W_hTkTBNWD2`NC)*
zBIh{${-LmtNJhfZP&9^Yv)><#4f_28k8ctSHD_mNG>UYw1jiW%Ej)-BOA6t0qsdSL
z$tbBF_`y&l;`d+INz+DWV4pN=g$N4dv00;IfmyrAePZmqyAH>2na<+f8hpi<IQAW^
zZ0_yfipsfR*Z^`Q=hF|+X=JWNj{|CuHm8a08leOkSm0*-u!YCrrNw54wpO#uwATA5
zg8~FL@^m$~$$RsWW%_yh6pLlgDvB{jodaqZSN;M48f<oG?XLmwV*gZe*&+5CrD|Hg
zH%}Xr!gbUVL0;zCi*mGNQ*yJvxm(ioPOxW<%U#E#8?QeRi1Pt{Bov4wM|d`!d@_a<
zhF^J52cd`;IIQr%>Ep+{Pj&hGx_pOEb)7tZNT6;+vz#uPo(n`qLnwE6zi(AJ6p@ld
zbI;+^eZKw2u{@oqtA(jfY0rg+#v~{s97KVAROk@Ai`-4Kecs8>oB1WD`;{+V`r?AK
zcFtLwveYi#FJmd+^~&x`yBG4S=klvl#_H^%Dj=U035&|De99e&qLQN>D0Co<51~Ex
zpzYR;{6sZ8+HQ7TkM1_}-`=>)e&0PT#j~SBQRZk{V06ZnzeXT8qKfPKb=hTa$B2S}
z-PnjGsR<6Ginc^kzLJg{eUn;><C?M=+*yM{v)5@_8X&TOIiJC2T-B;(i~3CWYgK7e
z+qx%j^ap6t)+Yp_i3riyMAV-T3`fJs(IBI^14IE!=tsLbTYqx#)vbIM-|%enf$Hu7
z%`3A8qe%e->N%qO8~)_IR&Se!|H)Ur#z$fov=tkEqt*Ke==D=cAr>8ujPd7WX5*a1
zY*5{;-d3sZNO*W;ZRMR*dw3gj>AivzA2d9~+&}FdmS*gx68ML`9;0XooJ$D1kt3Rf
z*eH;h;e0p>=tW0Dv?5bNg$zk;;w6%!)SvxUKXEQn8R01EupM>=fs5Ro0<O?KdH7Y^
z#qLG3Yu>$a-n{Xa(>>WY_2j&#BeT9^Hkhe8kSRHEV}IItBxN~rE8q26F|xN$@0@Sh
zm#N!#<4~ryD^uB(F6{nrYdZf#%6LNBnrL5jba2S6JcFaOibDQemO}RFlP2GWELH3?
z5uF^-fjX8_^BDDPmQ&47TYR;ww1uTxrF4aBO<Gx=jpczE1;c4$DR!0uHWVrOEQQ!n
z3o)Zeu|I8LX~dRVd|*f;F11Am%PC?x#jJDzOL4Q55|-j*DWxo>jHS3R&N`M_&QdEV
zHKJ2nEo6D?SY9QoQS@{%ORHjO>nTkw?Uv`Fn&s6{9%iV-$0tj{Ka}|lpu_6LVwEXX
z2u@Hsn=$t`C7#H#rp@O9pobI|S_{_`V3jpY1s_0W5ND=uEY5i4(L`W4)X`xB%g3u+
z2^iene2)Gbh%@B*;1IGE8V)2p=&hh%;=jk_BIiROE(Q~;%i(N>qYc3!sTHuM;EPt5
zkk36UV>>DE0kEq)_{3%+a0F$TYR#NO^n$_0C`5Ob^30C4CpS4lYm=KA&oWh7cFG4T
zoDhP?D3K&KCQ0ze!OAka1U#@n5K<ug|3E5K%r(!ec%JiUB2fd6@jNEe=Gz>+tL@hh
zeQVD$1!wzMigaXEiU}16&*9IqGL8TU7~N+4k_!Th17j#S5jhGQbi$OyE&W_nZOsRw
zB}(lv$6^XVAbi6`RlMTUBc_r)K4K~iJ)(6)ktQ7RnAxm|1^z>akL*8vyia1<g_CF+
zq^?Bs6ThK^i50M@TtL1*oB+9iofHaUW@%zrRS9&;+!|=-Fz^Wo+o~LU1k)?!vZ-xB
z3m3VMx9@q|IQ`T--}*t%gz;tP;<{?>&-w<$9E%$^O&I^$Ql3TJ$`sOM(DbYjaQ@YH
z5;5z!C^fdfWRlmY?kwy$bz_P8cu=$+0#|fI0H^T4eJ1o`V9((%Q3sE0rER97+Z?>B
z<=5-KRlQ8X_uIa=>-}9U_1^77+!emjzVd(YwV!f30Xp}pn+Ww~7uWaed(|zYq;i}B
zBbALhjK#r5Rku^GHCt8*Vf*RrAZ^+V$|kPi2563$>&-zIZ&3TG<GftlFhKa5xqiLU
zMyJlN8`X~*{A$^_vES(84(pB-a9rHfZ|WsZXpK|GPx<OMV9u(zV;~1BxdBl7lIwLV
zq3rJluHU5eAJ6O0>m`0>jRV}_6tXIA?l<QwW7L$f^c#AKRa@hHCe0iXXSBv)ERbkv
zVV*KFU*5oarSN_FH~TEg7}Z+V0XnM8-RH}!XVouLYuPk?S#o}BevZEE53S|MQLEsg
zwVXcJ+V5(~Q^s0@y>SeC!pc=c1Wx~LCx`%7!}S}~-LU0T?C9cbo0?U6$WDG($$s^G
z^eTgqhR1xLy7PKHnq)cXbNfnm$gnPM)0{_q&06Ox9U%UQxqh4SB{-pFbWYohN(yGh
zR>$$&052Ej25Qw%_MFr!Y1!XxhCD8~k>)XO)r=(<Y$)urqEGcy*Kq5xHgW4-E7t0#
zV2D^atP69UhV@)n$8%daWpaiAx2m|Xp6b-F|Bm01N_oo1zhUbQ2#FBisLC`Y>2f1v
z>5P*m33qci8Vo%Tx=aPpAiNHeIwG1N=|9g?DTrgv$D6XWoJ8d}LC<j^0S*A#f+kir
z9!rEtdE$=`CMkCqQk-mzF+P-K8(+U>Ogu!ZAlJZkkosY~vJ=975&)CdBy>IwWrsRG
zu^>w{k+e-L)x;r`X96P#<wdJHa?zPB!Xyb|Y2Z1Ks>2}+{(PLemA(kmEjiJa{WQ_8
z{VdU<#6-LH14WAx6Z4}X^;1Q&9Fdz|-aB4*>e+BSPG5FD5CPeDo^&pXvWX`uvV-p#
zU%P6=-X$4Dfw>~KG92Wg2ob`eL>~!*gV3~$n~#IcMRVgtlFCC8L`o_pPZ^_WBpN0!
z^gO02A?C^I3G)T0WH2Ow<VV8O@I+%Q9*T-4n$WmtB;rvt5fz;f4a1=%)F*M|2gQ66
zF3U<2Nzmn`+LmLXvBQE86GU4-$zP<flXM@4LW7W*N(uRin2_`{COIMIgJ=#VLHA%s
zipFqsC?*;rp(q5tjDq?c4UJJ?W=Q5^0!E@A8W9ab;DTtPsTdRWi6CZzG&6?K^Mj&3
zIFumK!lq5Mk(ra&R=buR$W7jAOE8+`=YTDcmAJV2hS&P0>R)^6THm+**ZmW=o6d>_
zCqL)pA^Z7}vv$c|^h(dAp2eb;8QZlNQ=R=O*O@y8y`$hx9_KE7eb?7^y}Bo5-*B^`
zal*XhD4c9uaIBwqte-AgDyx_@ES6Th9=;rY_1Q_o$7Sn2a#t<u3k!R7OI7?^N564&
zI`PKI$-|2k)o)c?sYumroAIS9+9wYzR<3{R(JPOpYCC2Rq$~GM9$Iu)zTR`W=VsOB
zcZ<GV{U57mO|#Et+K;9^#}=!a-yL1#Yv0McnwQ$V_eOD=KfGelw^lFbaqB(r)?EvI
zJA6GnYrhdld%Mz&-HTO??;f71|8B#34YM6L66v;M>89iNN%Xw?$c+2D<?ofxdTyLb
zZ|zQRIl5eEtyp*0kY7>?*>gEUTtW5SR?bx>&r+)Lv3YyfQgMkoFPJ5V^OfUYJw8>I
zwr}{@QGV%jQ<Z7QMky4W_*~jiw`}ID`FCub)%i;PrTod^HwUL{-x!{HGPS9FcHOK!
z<vu)bIkM<1oUs1#P9;~d;qAtEgA3jRbKV1~vV(Ux9Zjfv9nDZ`W7mhpADU8iJqzxW
zY4^#0SvFAaKi}Pgfns6)Ik5x#=leT1bZ;^JN#((ouKK(`vzZY2u%7R3u>R0k-d$(?
zVU-yvKdduRN`ryITk^X*t*iJrq7MnzuIz*NAnH38E$Y(~1@1GzH?$D%E%9{1SJ0N2
zcL66Rc0m$08?z@^po1vM0UxMiAgcQh@RuN@%Bb!d)0W#DypQY!uN=R0eAz(Z+Kue@
z6Klb36XN&M-k`W8+IyCrlzuJ*N@EoZr)sjG79^Q6qdH0b4hmeRL(qomvMgU*uVDmL
zOnFZ$dI%3SbYd=Pw=YjyqW@FWILm2kG^tcOC~O1j8??Efs>g@cxPOnmW_5DZfI`pv
zj9M`&aRNTm>RBF8sgE<;wPnxdDHJ1imU;Alp=po+=i|`3z=+@ibm=gCP%$VUPVmgi
zfG}9YU%i&iXyO0rHD>NWW(K`7DZpf#WP3pM&H-*is0hploth%i^g=i)5eloyD(GJ-
zDhWN&E|j|oNgNx)sDKCn`T0ia2sBch5Q9QKB=}Zut7HNI39b;b1<>tK=mLaCNf=7-
zZCW(xwFyCdx^RxX06Yn0^Y8Ba>F@mGA5Y)?vza|V`&T%RN%#%qibkO02=ElFF+@ia
zs^N$~f$m`$B0PqC6r<xQe3i;VpG^Z5%}^FXL6}eiBMIU#8Ge#5(>fwZF!5+gHW10`
zB)zAUG)24y!PfoD|2|qy{8xAvx#e}7z2K`IZx&Da-YA>i`ex-v#ich3i(fChTsBqu
zYRyF6Qhve2b1!vH6~4R|S-=I3;#6^c#=ddE-Z*D(yw;YnduK{h?cJHJM;Er9oZEUb
z)%$p6t1n~sEgLwiYuUmTxF?Tg9Q=Y~<D6sT^tp_~bM44XXQpZILertSrb9O#&on)j
zaXj|1W8I~vU-B;&xPM{NJ4&S5cFqoI@5_78H;IMMWJ)$&)1{q_DNEzMatsxc65#r5
zkd)vk{)}r35V8t9Y3WFPcip6csHFQ~iagmE;X$ff8r>ZJ5}V;Muv$cNF?d(>Z`rTd
zmnnGNeSPq~ZP!MAA7%lF-V0RakF~EXp%oHn+yhiqxtI(N>ln~g1sb_T7(TU54TKV_
zq-M~e_)KRx66^pds{jL?%PtF)>AT1UUDPAAFJQ(sU|Q`%ZcC%%k?L_8nO+v~90|j|
z3fdM$9x6c#92>}3wFXo~`U4#J7@I&MSVf#mN*R~~B?iF=P~{6F;3zIYN-GQGnDGjM
zwF}T8du`oA5)1%zO2%S*BF=aw?FaxY5@FDxNho2+tO#I?42!@*p_EM(8p<X}7{&;I
z9T_tvA#WzA2U96BwE|rZBPMG3nP+5sfoGoKpAC(9ZR2~oqlwX>q3~cB{YV4C1OQCZ
zhqHm>Q#yIsj7*VJs#Z=%M6=wY;KSsC7m;lcM6+yx<;i0}ONb#`h?7T}8L<drO44kC
zvGB_!A@r2uiD#LimLk9;V75wtX=261id;~MWwG5)eE|Tu)&L-dN_O=!+87Xr-#Bq-
z-d?rnUO)B1wWG7TRB=bj-f=VExsYEwmtQ;GwvfN+NBNs5f9zW0cLrx`|7bW>3{rZ}
zO=q#BcAaiZJ2#~)o9=}fMvQ_ej1}l2KO@YbZ7fWoI*d3pT++E%Eb?%vCZUq7fV|#R
zahrp8)%VW8)q!OSz87RE_W}SjYGUN}m3|!Qi;&MTGfS>Z7YElrpdP9|^|%eFG8l&a
zA^lh-omAzBXjgVVW3F}}4KZcgS}heJ{m<%=^yzT?&ED1Y7wEsW4y7zUv(MV1_t{iw
zjiFD!A?LT8EpLC9%bjzLa(wwdyU($MRu?XA>@)TjOJS|wr%IJ-xF`2TRSFJab^d$>
zeOy$XIiC|Thth^dev@nVTv=(6&&i}k@_0B7G9b{SRglbR+Q<P^eJV}Mq(7M5th8sK
zN*RYLQ8H;-Qze#4Da=Y?p9!m1q?I;9rc@j^?KRQLNujJn$>SbBxIdXBIg4uR$qq1J
zj+a*;Y@aX+&3BYfj1H2)ToZpJ082VP27o492q!{bNFmI<lD+3>8%86;6DHcp+K3Wh
zOeg{nxi|x;5GEWMcl7}VgFurFHTj0|GRdk|F4+m865!|_GJQoCj8+4ipAdCp)cv3&
zxEhx1C$I0713x46c3}|Y+8F4n07MxOpCre`q6?wWvuo1IBnw(5IRQuv#{`Im0+End
z0I69>fT+TMAsmDb?VISoZ@e`J^iwg-g)mH4)_mi5g_595F`zim3ycBA*LgY_NGG(V
z;20SS_jp{aN8wxOR?P2))JjM)843gGqA3_UH##g4rf&a<?jAo3TDp6>dmi)mp6u@F
zJ0*OPDuNCrvn<gt6iy0#G=rzf>nD#P1nc4F2Sd!Lm0<~>4JLbGlq~Do7##A)McsMf
zw@@cR<cTC*k*t}17jbb@jydt(i)a{3tY^I9ve9U*__2L6czU~YLU?J%e2Hg1-$O{^
z*rj8OMYV5ln-+d=$Fv9bTTPWql`W}tt;DIf-7y#(1u%(oS1cP#SQ9skOD11P)jyId
zeKb?NW1)D@T=Aa8iu&nbs%iIZB2|766sLRd3a57$OdPtM#}$?@)q-68z0KErSDTmW
zHc>y<2Cwc}Y-n1nsk>t}6&6hFU$%3Of>#b-Iy_nb^0CG0nwL*XUKMcG%O_Bdp)Ps2
zsq&-q_G2Fvm;LfiC+DiX!|AORlm$*YRdHh8-Xj*5FPo6~FL!ovt_n3DLZj6BM`sc<
zp;YJTRAv9X{mhSx%m0NH{pUm@X7)Gp4i<6WHy^Mb+{yi*h(GwK;RlcC$lsZNsKM}o
zvEopj;e$F8!ns8cM3Vkn6U+=H3Rlqb!?B<fi=8Eh8%?&`9K5SV?^IodWgdd>)xVc`
z|KQB?f9z#B_hLhW*9;qOf`4%pE(BiGck8<haRa+{VCcrRhPV-sSB2wQ(iOTdt~UoP
z&z87=_J!fNMN?bVqR50c#48|sVR0Q*E#-V@H>$$-0absNQ!Avl2O`|;Gx;q2CZE-3
z15UHG7-8Z>7NN}bYGfKp8tJ|iyaNrx!^)b~vbKkowWwwD`wh5aVedC)wUOKOK1V;W
zwXeYERPgsUy;sFwar<Ye<LI~du2;S+`+Mdaid+JE7)Q<$Z)8hoba(dN4cS(dP_{Mo
z`wBF5969T#?W=X1XKqAQy5>wOs%n9JF74X;U~iM%7n+@*a81gv@arq;FYpyRIG?-U
z$$m@vfxp|-^=13RCGC#^k5aL};H**kdSAXK-I-0NedH+}-_u6~K14WRZpfNVx_W_o
z8X?j)B#75ye93AvoDp1A!3Br}A$Ak1f`Fx0u7z;HsU(cp0z&W<AxPl}<2}aPd!WjJ
zrSB+A(o__efc_{V_atMqd$<|l4SbsLCOn|2HnO<xVF9wO$YVHIfMpup8WAEWM578s
z;v)gkEVtM<x((?$kk4v_ESKhA<b8kght$pPaf5f;&?wP`JWE55yT^?@AKmRKfsKyD
z2YG3(0)k}xA<WV+T}qZRmBRa!T%slmp}2azK=@C{lMr;7|J-OeLU!v|79e}SJUt8r
zOUPK|(?oqVjt>=tz?kqJwLpqXW|ODE--O#Gp_5C4uW5{IM67F37Zoi-p#bD+A?R9?
z0Rd{91T$<`BQ({zCq%<o+je-{+r>QkGDO(#_IBYvQ<MJ%UII+NR<Dv(t_W`-N36}U
z3`7GEqbW}h!v<er`2X)L-XB|<e{5;LWp_^MUwUjpx7fUOp}Bjmx%<P~bn}ThOXGxP
z>V;d7Qa%6D?y0Tw`BjTG^%B2WwV80R%ul8&PtV)?RUUG-{YKx1?WxmGri!0R*`K-v
zd3M)VyC$DV+p8yZO9if1M!q^S`9iv&Zo;rS_DNyIRM*vLdeft+${n+tzyHXGiTQ)4
zQak%n>rST&`zP`~dRT#*mV#Gomu!<;=Pm0NHQ`%@rIXLS{^I2qr}wAJwxkQ2Abh(w
z>E=3~?0d*7-@wdzDr+W;^Oo{kXbL)yWxdW>wpg})YH%_#-FU5k=HRvCv&PxSXF=ZR
z9Q$>ki>$U~8|NsU99y)PPU#ly?$<(7N6=~MCT)1xYI50NqGh$+%QVZDzTC=dSkYgL
zZeYSTt)N^RR)ld6OWU?eaGqtywE{Y#4+KKMpHlC!izt#kDE~_`gYGc3-6l`fdms#b
z$ED=uLsv?^ek|kUuhw53{nnQ2$@dQb{tGE*N5<ND+k`^dxkFRT5i7|Em9Ij;dw(lw
zkP<je!KWFMj`EW0hrOD1Hgd`E6tVw<R06#!VWPCmalI<bn@v;OQc0zI^PpPM)Kt--
z{sdGYP|v8u)LGqtiXI_YP}89*F)8ROS4&Es*3@ZLTX~<MEh9{)O!&*gpLswP4P?nY
zW-Rqj;8iWz*3WYNma}@TqDLJYjD2$%MV*;Ct82<m87{=Z00lcsa#;ftd9{J!s$f{+
zGp|}>l~`2jfiNi0miRO^?yowE=cE4WUG-^td}xjP_xPZlwt6>#Xw1*ff4@Cz_@vz_
zaR>IAH4gLWTx-y8(au%80LE`QOw$IkXG|q?HE9K_*F@Rnni7s2X$3x;w(p!J@*i5l
zo~wk<p`8J*h9t~wpr#oqm<nx+x#ny@HM%?VCGGz0*I_@aVltRK%Gv~Nd&*ZTzaY_b
z8v1?a5q<gB9}x_wZcD~)XZrii+aUV#ILH6ac7TX4zTt@`evHT;`T^nN$x^F~+DbsX
z06iNY3Ji|$0`vuq;l?ILCQGCfN%l#YpHP%?k_i#nFHtgT*ajmpXe$wq@qJquaoqsg
zEAB;%ZqItWJb#9+CqrWZN!rjTk!Dme!3Sd`HnTA+9o8J~C*!KPdM%p9gZ}A;A>VM0
z444AK7?105=gF`L`d3Lj_A+1zBbJ9Hpwhl?_yzYLJod@>mzA3zUHc?`=kHL-AbCUN
zjgS|DM?xC7C-({e4nH1;q(vYalniZ#hQvHFJc<S&gi8vSDLD`JOyNKjw?_icGny1c
zJKaeaCXp+UDUHwt&*PMIY=2I5EK(gmjHnaIP@EB^QV0tpd`NYENZwyiVO>}>U!WU;
z0?d38qvwSGK&kp5Oo4EH6ZSxuKtelNqaO`%`T>>Ql9M>iO_O&Lh0w`l?{`W$hf5+)
zp|7wzCUq}Ax>Q!R=qj4*{JL|&RX^vdPrEiPxSHo&&1qN5f~#%L)wbZ;H|N@yb{$x7
zb<erF)2?G54rN?@FuEx(xYPy3X?f*D*RA5x$uGRQaoYHH;#&JmUwU&#hVMwP>r5B#
zny@TADD&o~h6&4J-NuQ0<`H@Mh1bg`_TMV4nA$M)_;lenPrrJ8;<3dd_v@Q4Z=TwG
zP4}(CsiMY-gG<h$$!#w^KehGcFD{mprz#pVC0iCsTIWhyXNoc<+h-D~&f}RKCl+@2
z=63i}r}{EGPG?F^PwH>F%ck-(?)n9{XU^@pR-17*&lqNoWm@+wv>u*oJ$xgOY3<6m
zyFM<ex!n9}%k&ZGc}urU>M><y6>n~v5~jDmu{%@RaM^IP6b4T><$-PZI>rRb^p%sB
zPTq23?5}n$;ioQD-I7_?ns#qxu~(0>pQ&hOU31#)%@K)Y)@@0<o34#!+}oEO=E9;o
z2CJiB+08l17wpw@_Ua{9$>cfu+|n%zrLA+Nt=}o0dHjzDZfw2rg+F}+R!f=EqgW$F
zEA^|@x=VImw|8(w>z4O$uJY+;Q_ig^%hoJ;s1OB4Y(qjt|JM2_cAfR%g1B+o)Fjwt
zWN!@#;^1AWeXHq8(=r9G?_nwT3g8N)f!kO9fE@|oV&#*Np|~u41tWT0$i9ZURaI3d
z@D>S-Xt-QvGm4x<x;cn4Bx+vce7g20Ks`fE4|?h$+Ug4!uW<Op(O;uC{i@zH)UYzF
zGwQ1%a&Nd`)tiH+URy3p`#|NErJfD_s(?qqz;O*ly{4>6U8`kHS&DanOc|I%oAQcj
z(p1Ch0hMObl*^{6<@)tm+M(Z=6*y^9P1)4ktJiNBPz9U4Rm%9XzfcL{LW1Oac_ozn
zRlcLQE}N!=6pqGc+#%C3!2NmS&&l=zYc8B=pLJ0U4;cQ>s{4n9z+h+ycQD6Pdm>pq
z4?IlP%S^TbbPSS#Y!e7QOU491#^1!agd5~V;CXBU4OyrsZzFk|$fF%EG>}I!KB1Ak
zE#wg)G>kvE<?nflgs&qY;o&nBBIS+fg070)M^)AM*%Vpc0%gBaMm1Ux7O4fo#|&}^
z2Pt%vyvN8RFeMtIFfIfai_mCtP_%{<F$fIt1cz2RBwCcG6NEpfqSoxQ333pae66~O
ztP|Lv#mZb5S<Ym@YCSeB%n0n9yL_RzZLYX&rY&9kC@^1H#n*o0<%5&@mmj;fbwbC?
z1u7TvD^vNE)5dokR~<7I^L)o*-R6b5_PM(D*`jpat~onD(G9tXqYxIJ_&){f+xmB!
zuQt!@oUh%rs_YTD>`iK*W3H}awk=)P#S&qtT2ek`gzahF^jNxjYuep5VO;`pT$6Om
zxqgXnnX#t%4)wQk>x?H|xob)OU98!>R8v3w<cuy|)0Pe4e%D*gSDF{sS1(m;c&q9g
zRd0{H6S*2mS8Q9T*gaRVJ6*B&PDwtAZ}M}AZwO#A9P?|deGK>7NlBIvT5D5+ho4gF
z@&=(F+TRTQ6p^(lypKQ$G+l05HXuw=m(k0ii5D~0s@o<c+>43i&$Fz2N5MpBaIFgg
zb^$l&Lg4-BH<xiOojY<ti1Ha8p}oGwWicAFi&~RQQFdL-d~e3tRv#YZ@fmi|QoxbX
zXimE-)kpy_Ud1h0D15e+Ns4eRVa}oA6c~9LwOm;*;pa8(Y?-!%0yTfdHd=DEu?;3M
zImcT5|8uOluY}!qtX%tRSbJ=OUv*w{_Y*grIZb^5=RIT}dGdNh-N+G-0bno$Ljzbb
z7>Dr2mFNsd2P1fAK)J6h8pmM6CNW?Zp%2T8`wny|M)(H`mz)ng8}dusBVEM^4AD(X
z;eR48%dIi)OAD}t?@--5sTvdm@@q%np<vcQXG^Qr|9dK1p9@B-C<TfRdL)b;?Mew)
zgGX|`viH*7AHjO-+`s?)%z^KAeYb0-Yq7X&p}29b7*?g}VsFasy@_I=`XTf$ELtdR
zm@90UDVnYOa5PoekS;tuk+)dvetqZVom2hk;`#}Tq69cFpI^OHTs~Dky(v@SS*Y;N
zRd}xnnF=(z?K`I-`9G5BJhsr;JJ;Epdi+$TvoHO~=~VIQl>IaryOJ8=*KdIo5lcm7
zQ<mw{OnJjXdGlO(^R;uC@~!EjHgGfG+uA2iGV`=eb558R+|Q)dg85~jnsE7Nle9~h
z10YLhR|VDpY)C$?Ypx6k{{!9JdtZRKdVxUC!x@Ng69nIZqA~=33eSfWnV!;VU^pU;
zEd<P;(yfK&Pf2eG4ay)9&2Sv`i@0T_^+Dhqd_4ovD6`fLJpg=X32p!^6-Z}+I)Jh+
zm#}>GZ2<sgnV5ta9Zbzy2ZA*#?pDFMCe6570IP!HY%NufzN*$LtGXPujH_=U<SelY
zO(-P*!c<hlao#+y%9_PZnh|hBL9BJT?iOe72Co(iEB&o%GshZ!mNp64ddxkN7;6Zl
z%nGlBG)0-<%F5uTWbH&yQ-X|4B=x5h{WJ2|0U)@!3Q)4c^5>NQ6Y~BNo(v#K!X()}
z{S=800gy5k&Q8w9C=O~?3m}8<#IDAEv}yM$=-8gJYgD4pR}Nnqo;>$E&q7@(sWNXt
zm=So@LNblP)VeL1(x$YlnPMZaI$7eBH&g0KyBc#unlh!E)2@bVk7Qh}kdzb@EOTau
za|PlO+l20pn?oyoulq0i7fPDuN}8@co-S#b(Pc_tV@vRL`}g*~zjvY2H`htRp)@R(
z`p;xKpGZIQWXkzu%JSsBG7lBV(IPzv$N`Aya)|`ABnZ0^Wne(hC@T=xQmFuX!oQ*`
zT*RPbn>E^%*7?%`x)0U>x_W|h^&i>Ymwm4fTpq~SYYD{FfAmNP`&|p^>hGmnX)FMA
z_7#$x1Wd5$f6zrKj>#NQ9;QYn0526}-lHQVUTuwA4Zf5^7{{fjnCZ-hY+D+TXu|W6
z^l+plds*nL`ev(3eP}>wOZmOu>q>FM1Nvc)Qfm5XRa*ND{TR<|mjJ;i9ar1o=fRH0
zW9;pw!BM<Z3affmP%oR&D-0{o%rryK$MJ{@?%d#QCia*p#Q%(kZRpzq-M~*SP4G%X
zTX?nV4_@d0>i7IFBzL!d@$3sD&0D`H)jgZU>orgUNUxF(1qSi#D?J$X96FG+26!M=
z9w0q^%HDhp;;OVTOtzo&W~oi>1A>GI8EZ%v7jU^X22npr7hs^1gmB>;K3IO|>A&w~
zhJVU7pv^)iF)NS4+oh|89r8u4k7KVx=uSa2*F<MhcF%Jc@d)rSm;5>a9z}V?5Y`DK
zf)QHaT{nn&Z>y+FlC3EoTTM{woA`^;y(Q}|dgt)@xUfgu`9L~R6+L)&;o=l%%{^YV
z>O4-w3OF|Imp{WmQ*Dobf9%GFnMY>#f4AcgJAN?szgK9Ff2p)W_y6uI1rr^3?WYM=
zqzE>684Bc+XNLzODTv`n9>X!xy)X&75N09m#ChQ(gn&=JLS<=h!b(}%kitJw(m#=B
zL3>%ETYk!r(A0fwY44Ha{D2zxGAe)`TQ1k$wb=h=)nakw#K~JAQ}wCJrc8PBLV4R<
zIRq$~@<-D}J0{GFF8729SBBp$c{efN_V@>*ANoEROSe6qZh1WI@nx!g8Mkl3`tSnI
z@|3eKWvROt^b@M5IbGr5JqYyE8?XZD>G5=3Dg8Y3=Bh|v?nY5YiJDDiw>fy12iZ?{
zBq+cf?I4WAzVbB$GzO%)E<IhSAk)VCNGnw>C=rYIEv3Fu2g6hKaOKj3<KC5F^|KHC
zq@Og5AL_;HwK(NW?WYJ8750oSF0y+^vg{GhC!;9f0lg*%7VLZs=6&I$7w_ac<fvq@
zCfX8s@<0lYH{>{>Jmu3v1Zq3M4&IXoV>V&Xdp_co-aJzJ6N)kIf*YP_J8=?E?fUzk
z>^&^qE+S(zTu>(Uf~J@mz>S`HPS6UdgcBaIMYm#kjsbaD4z2Dkwp2g09Q>rHYO3!o
z{}unWV7h4Ag!v;2Ua@^Wb~!eEVcycRB$-B~O83lOm<^}gUGtXiMQ7<mL3S6QHRja)
zNYL$m`_ZST-H6*hw7VF$Bpjy@0mFyy%Ud*ZlDe~fX)_hy=HR7D8rg4lNGQRaOdZ+f
zmps*5K00hlX}i(Ht#Jy%gaMAq)tuoK1;A#x^zcR|+#s_k=IF{_F8T_0MhqHpj>?N@
z@BWPFvUh@VSy1~_^#UCxOrL3dFFi`ZcC!rlAVQb-W>A&3V`DQUNUC$`)|;}`$o#Yb
zri%UuJte~O#y9(veJ?!|f{i;$*hji$mmG_;`-A-Eal_^gemvK`Sv<GMUdmvduI|ud
z5lXOlLKS(X@I+e}??Xf3j0c28D_*6OAFnV{N18ov5DkNo1gj?+V&^_DqY?S3L?iW>
zd#6**-_m}eCoV2>zsTdPJ3lF?nmYB?*(+z$1x+CLolu1?IO~4otXnFoTrAo-TlYQh
z``*;f?hlLqTVKj|`Y)gS@L0<A1hmS<pyQ?2CZy*PQtm_ZmcxtAqKO<Pezd0;V3sHZ
zB1U1P^koRdkh=EC?tAEOJsIIC2jnE0l4J@7iQdkm1kz?b{IJm58d-g$)x_WC;9VKH
zn*3&TnS$>ZuoUUo(CJki4W7aCml2@hKU5!?^RX}Ws60T3kbQUyTbs*ecb6@JY0-^-
z2}81-#LIvnbMWFnk!ypa$qu&l<g2WBvK~)=GAPeCgooJeCcGKPQe`9$I`mXv2v3m4
zc-9VViebbX3gQ&eT`D02BM{twjq=P`gvdt)X|QeWwE-6YHbx>D7f(}&vA6~aLbGKf
z;zZi5%EO)pyih2X<pRsdV9V8kI!q%|P_*Evo^w=Bji()r6Z%EEjah^LQ_h-2NcN6i
zIyx!5+=Jv-UcB_;)PXc|EH!K;!I_c3OBxx;H6{L>4T&JLNB1+5p?dT^jLPzq3$0XW
zD|v0?5pgSQCy&ml0A(=8z7>z}vj?Q5%ea!lY??BEm%O*g`+z)}2;uL^`zd)uaS7#g
z;`7-{({gqCCp3gj<k2Y+NbDuF!(;CR%fuSP4CK)fPAtLuCA~X+TkkMgmaDm<hKon<
z8u1X~&*?S9yG3}%?&tKF-EvgN<rUr4D=%)W7!X~i=-neaTnJ~E#P2rYf+)Ksdbd}H
zTQcmb%<>abO<14Gl@Pl7q7KS?roq2^0QI*}{VjJJNC-zk_zkQ-u&j)cm8&9pw+Unu
zBmnS=xV*x<dSENcEnKG1-6|ZyyL1xSab&B<u%Udz?uXy=)JY4Gz^9A)7%a?56wRhF
zlweQR3v^bQN>S1#n(4v7Ncf!e)M6fhDK;;=SkE5)m&|w>=T5A;Xo`-WkB<o?t${f@
zT|7uImR?*>6pH$Ql2=mZ;<bMK^Bv<ML3$0^MP){l;Yb26fzm(7Adm`0+D<XnA}~xS
z`kZizl6RB0kGyY_M=(sI8C8h@X%@(aPPFX8VZ-a^djvws_@7I>1`o#AcsyNi(;4xs
zzs~+Qob_YQ^D(#i$6Wcx+_nt2?QglFzu}7ij%)cb=lR46u}#`qb1@GW+KNgi&%GZ0
zS~$f&nriP&mp+~@^j$1~?M;d+Ty&SihxDktBE`9(6V>P6)jM^#LIn?NriXbrLceIl
X%i+H$qPLyz)<2V{<5xKH*{J_7ZoY%*

literal 0
HcmV?d00001

diff --git a/temp_backup/src/analytics/__pycache__/market_analyzer.cpython-314.pyc b/temp_backup/src/analytics/__pycache__/market_analyzer.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9d1bd6c0ceb8262cb5b8dcbf17a5e134fb1ca282
GIT binary patch
literal 8295
zcmai3TWniLdY(fbUc`$eUu4PF?MSXI)4Ex{%38;^EGw1+Nmh<zDs2=KN7RwTnB-yS
z99ofz1=MY@>i_|o0K14?q^PmT!v@=jEZT=GP;|5C%cf5x6hYjBF6y9*7JZ{uPQ8!)
z|Cu?|kxZ`#;GDT}X8xJ~|Nd`gx@)5$0^$9CI{NNs#|Zf^%s9a(5<5SH#5z%lMeY$*
zx-E@)EKfV8J-5AMvL%oCEFa5zZ~MmrR-m0UkPf2CjYRdm<2mo;{aC?w0<7E*<$?1a
zUV2^(V%4y^Ak@`3byi3XK|b6_()CtYjT|Sj=vp>3q0#%grOZ%0yP%Bed73h<R8kpE
zFBvptEoVX_MrJ;h)lDU<ub7IdXRTBQB1MDtMz%M(tfjKb10$W!=!#`1IchBAle!}M
z9n-b6wXB#)gX(>um}JYN*?cDEv3)c7Tw1q-cXC$B$ZBb*s#(x1Et%F#Gb!X?A4z0(
zPC{axyhAJrR?l-UY<X45l2wo8Q@xg7m92p4vx2H0Bnv?QDA8K8n1{vWW1XiKQ%Ma6
zwFC3{bUI}&LqgX;a7bkJyhXM2=VUTgV+Rw7td`Le2|JWXWI(PorXz{OU*@&6sHsaN
z7E{!;(y&HZ1F8dw#DbB;#ErY-<F}{BATc_uvc%-*-5GUw+-^uDObh0aOlX!xQ}cOC
zHxmgO1_5_aTKnyhp}D)JPR+TSqvPYsrSoUb%-z(@`<9WL8;6z7=JIJ%pVM+Va}HOn
z7nVJ#n`SDzG-uM}oCYhp3jLdN8MeO(R<NeizT7Gezz8hD@yx>z{gC`tKJm1+as5_F
zMDO3C4NkK?yCF1<><mJ+=H8D(miGuTK&Qo?cr1UMoRd^9Xj~BXMz(@+V%4Y~*dL!Q
z6z?WXeoYxMvKFY3PL)N2DtyWc%B7@0Qp=o7q7pcYAVV6>VCP+ku1M=7PVRXI-zQ`N
zN?XXA(z4V~rlq&Z_dKeEN+Z%9k(Myu0|x2@Xy^SCbu!lBr2iL%x9ci4kYH+UN4fSV
zz!b7ex?P*hQ_wam!Hey>9MvDBjJzodQ27_rhGtO|1af{5I-^;3G?8Emx{yj*pddI{
z=Q=%tV}j0U*{lINgBsY#l^WNs&s}i{nwrB9{qaZS2V^@)jx-nLW7{=ElD0$S@X?~&
zv>nDwgfz51zWb9qMfvo06bov}k+#SBPZCAB`ww+cz&9WDBNn0&_|PauwHVc5RF6?B
zMr{z;2VB!w%x6LUDe4uLS42wc>2xCTxkNjl#>CCM&V-{)kYo{?e>3D@{_Esh`E)r*
zy1LgVOAy_8j`R8(!e@qm*`t=I4;XengJ_Rhw(e6)2P*0SLTi|U`cOet0Mc}Q1wcch
zF2L#lNL_W|cOvY6HLMOm)m0Z2bqKRTHL|C!R*hP9U;_1OEof7{UCXy|7;P0yGye$I
zBqVaCV+G3`K=WwY%8H&^TDJ5BMSGxAZAn**#eIk@fS9YI*%u1E4!V_3YeOLg0v`}y
zo`Lc-g0k|Oa%#E)>i_nMa;EQ6OzBomO`=k!Q)~J{BF@k~F~am)VQtRELZR7Zo$9>f
zUZv8<ka9Cc^`zyR7&8;)RPR9F8FUfwiLvSQDlZ#kNU^l+@~J(B6~q6|8+q_M^SZU7
z>sh6DfVrW8P-q0*Mg`d`lVTEH1)0tcDFTkg$Xxg<N-wYD2rcUFSRmO-%G8Gxf!Lv?
zMHc@1+R`}|Hm)uG-D3=2U#pHyh#A$fNoDw>dJu*B5e&T)v52-nWc#UZ<!KhIq#d#`
z4^^;=S&R!0XeYK6zRl5}Jy3878ko$K*2!}c@EzaA=oh{+etvm^<(Mh4*+XL%(2C#}
z*+HXT=LCg}Nja&V#H9|v+4E?&5Mb7xwGTs;?Q;kK*E@x4>+)(&r}IV{=mhNM5|W0O
z?qFYNW;ta7d(a<1J5$l$1CI@iLg=%ZOWg+wvPg}LvS4IVS#X|U53o;eYJoZfcq#*!
zF4I1kCafQ>DoZmwJ*jX!ZAxSv^%kh?pm*0^qwITG!;8?bc@Uy?@?C_~w0&E5=%a~^
zy3Rtdb9Y_EY7sxQ!?oS%f#}s*p?aBacwv=grW<xm#piH=PD4Mm8=^m{AryxjhM$T0
z@<-H!L%xA+j6T0m#?P-C*yrmDtdLI}^~?!K6_`T&D<sy*oKsGwP@H`sE;;Iv^Tge4
z<$Vny+vD(INXI>{esQVx1$SGN$O$>`T#b`fJ#*--SWu<7*I~WR7tL&KBQ&C=lYoYn
zz7JkGLRAM@0MTK(WO7WY>;pi<UOxOXklTY{Lkh=I1uP>8ijEl0Z6*{_ndOUd1X2t1
z=Y8*@9^fd_ZvfKJ5|_Dr%}M+!=BKBike-3auAd&AoVh&#QDS!V_N}oQI~u<;es^Nj
z$$B%|d$t!W3RLYJ{tXSrK2R}_1&78&_*^*WF>y;zo>YW6H71hI!Y16!?34Sgsd~HX
z(3?38(K>n7L|VJIT1JX3BVSEyw5WyP(XyXJk9=CY)pV)YbZIkksZ`tkS>IOY)nez>
z&DyKqd8O#JvX|5}Jd2R}Lm!?0^w6W}Cp{ZS`#0*&6oO}7v__%SWCw?Rr8Od2BUYnL
z%ICap-snPaJDlM}8(Aww^Njg8u|Y%Olp7J<mXY1P+G=N^t=R<;v)WU>o^1lr=b<uw
zev$v>-CvJ>eZKJ8oy|*AtmZ}2Jpg}Zru*-JLdcP)95jLoC&Z;Gx4DaAml;b_Rg4$<
zdWD(hytB$w&>3a->o(<_JbQkRIs0a(rq}^`w>_o?e+<kXkhX=)MVy)`WTN{*&Ob2#
zJs>mP7Y5<qFZ6)ey)Qry^{z3H!R`w=KY)u*^}xDSSr^qC_o{OBHC26akLr)hYCwf!
zP_^|_fsIyMvwly1AQza`<AHPN5klG!Y^1{z9USB5+j-3da%f~-+zW9+I8jv1Jz(Zi
z*&)FV@l2r@^Y0>oOY&10I@${CG2G$(?L#bfo<~A@ip`~mJLVoGqr(X|X~vWlH(g|H
zhQzgl4p#!oDZ$VB#c8=;P%nonG7KD2RBc6ZqoSZ}ViY8wNp_(?SCtjGzW^aJIRlR7
zz(<h`njty_drFZS*@r4(B`VFhf4z*=^$v79UDyFmz)gmfwx8F>!VGHpAF*G00V5;`
z_5mSOVrEPoogTY0ev?B18Yfh8*6FH5rKETaiUn<NGHueUSb(#k=*Q^|j8FjE$5zk|
z2*FJ>HUwl0oa02rnLoi>w(NuAJ+*Q8xA{AW)=9aIv?^OIgT<D?FPpb6tHsOeM$0t7
zaH%b})pohqcKOSZtt(T-D^r_oQ-xr2skvvX|9Y|i`eyU>La+(Ix~Y4!@wJB$NJW~q
zj`bIh^?wuY|Bu5*KbiP=;)Bsr^ypU0K(S@uo9IBL@b|p-;V7K=qRo%SOOe(`f3?*<
zRBRtAMLG({FP0+BkKW#Dy;y98WP73GY$?)KP|lPh4UfW4rngR9E}ppj<=|J-Tch#f
zXuQxkTdoTp3Ox*z50m<aC$|c9-GyNHi>?6q0|NBU>kz%#6)<AvI0L*^koGx*UCPEa
zEG$%L*DJ`3*wbq;9<8{BSNnq>KznllBIXZTeSO;mqTjY1FMIKO_9FX!TT}Z%{$u^|
zOrfr0v!-*~2X$O%9Lfp8|3!BG1xmm(c&d2@Rk}wU@|5$wq>abbMvlw(I97vlw-CT~
z7naB6o(er9B<Gue8<(7Kt}?4QplaL;=qO(jxSE5@qbV0jbS5A7JKhPf(6{zKa6g%a
zBST8hCV|xH*0QmHrwpOp{KP;3GjInK)5rtu!een2M#q`Wkn%bnGLYQ8u3JgKEQT<y
zeH1OGEx6CnC<3Nv*aN4Q8~`UkFw9^X&ge>)xC`oHxY_001a&F*^;L5pfKJ0zqoSG4
z)VbLTkwSJr^kfH}P6XittdN<qaR92^$;7<2cL}hU-LAX=%NGmKTkzp+CoolM%m0d4
zW1@1L<Uqn<<gf!2c=$?)6||4P1WnwH{u=wkSu#*LcH$q}AGiPP=oiBqM=w6CEkVBj
zasSUwf1zz08+=&zG<@vQo8N@nO0C_St*?F6zS%PM!Nk*+_J<R{KcbXcx=Y8pO3g7y
zJgf0Fh8~8?wLtc^>Xc%g@}zmAuD1~EWtz?|Q74_l`LALJUVeR5foVDllcWd%c7)%{
zaz)~}{yi)Yvui9-jH^jijF2MG6BwO@$bG5BC^$yA$fgAmyVTnD)cS(uPSVs|kPnv*
zH?B_rOdf41$PJ|?jGy`rJoms6eaB1u2g{f$AB5C%4;<D+8Klami67bl6~wXwWlWW4
zBvuF}&kFy~;}}0X4(z$5K=#I9?+PTU-Jhz`9MtdtN2>})RK0V;J_+gH1v}fvoGTrJ
ze{k~a<uHfZWP44E#w2!=!B(h~U;A}pV~l<ZKNHsv52#(z4QZRjpTZFpbN>iuRQ&$)
zIbQxE)x}Xj9C@b`n9-h7YGvZkLWDkeFS+9quNskqHo-k@aKZKkXj5}X>i6aV1mPV=
z29Welxj@coKHK3wGC1Is1Dd$I#8XxJAAAc)YX?FpTyJw=1we(I9D%o@I8nV(cojM;
zALsQ*ad+!|d+PBY0H^qmvy4Kn{e9?`4%h8?5MIt_v}`ZcwFPZHts{h`!QzoaA(_J)
zmJEL)Vl-kebRwg2nB$AAOmMuLx}{S%U?MEyE6hIF8+8o`iUMpiG`V(M8P~NeW1eZe
z@=UKPIm1j@DZtcIf(fp9`_3+|d18IlT4(hokip&hNVVI~AqB1lW~MD_0LiV4P@#p$
z9`W&Ofpf`JHVNy*ZeqDVP|W<&l5WC%m3T=~G6p`5<uiS2GgHol#ro~`YUXn}gQB09
zUlq%?>X7s$YODZlO=aPn2-{;S`P_;|#nSbyUAw`btegf06}MvZ2K=ka6CJ!}E#Nxx
z3HO1xZ%jICvpb`tfv<5wOd(qi-llY#g*U&7ko=VlZ|O`k4-a~4cc<{tDZ}3kg`7s-
zN*l&~h3cs+ylf=7n4(iWQ&5djbVme`alu&0x~05%LyTmmmf&_=h`2jl=8{afFRb<m
zDHt(|Iy#Yh4~B;~CGoDOI32RH!Mp~lj`MT(Tsbb`EFYw-)xNdc6WjvKWdoi-VQchc
z%0vTUy%>v&rf#wO$chHxyecXv_7b)Vx|-w<j0*S}WGvwCs}AnW6E-FbyC7z&=vs1F
zft`kjJh<jT+@pAupp|n2Tp+{PM#lChmyJ|Xhg%4|N?^!rM_k5j(!YRmyB?2>33oYi
zF6RHa`>z8I;jX9mpq?T2&mqC?Yier$Hu^#I4@7PXJqwYS=E{xrhyCm0JAX!e2cI^!
ze)9c~zyD-z^Tgmr<7M!LPXm!3`#y<&9Q`KHS_;>HH1Of&(vg#ehOR=_8^5_wcys1A
z-G#feg+p%@B5#$V2S0lA!#{s%{nOgELbScme&GxK%aJeME`)~*@-TB7ld%ZJ3j$ll
zGbcs&VEe7r99Rfv<LKS&Fp@>)@O6{odhnHIOp5t+TmwclIOSkZbiph9h4?C+hgxt%
z#6*%+_Db-+=J87McOfDj`A^dMJJRvY?~|Gy-h4*jQ?4hT@N<t}>M0Y5z;}DXWz0T5
Tc*7$B&&T-r8#1~;Hr)RM9Qy#P

literal 0
HcmV?d00001

diff --git a/temp_backup/src/analytics/correlation_engine.py b/temp_backup/src/analytics/correlation_engine.py
new file mode 100644
index 00000000..a73b2820
--- /dev/null
+++ b/temp_backup/src/analytics/correlation_engine.py
@@ -0,0 +1,358 @@
+"""
+Correlation Analysis Engine
+Calculates statistical correlation between social sentiment and Stellar on-chain metrics.
+"""
+
+from dataclasses import dataclass, field
+from typing import List, Dict, Any, Optional, Tuple
+from datetime import datetime
+import pandas as pd
+import numpy as np
+
+
+@dataclass
+class DataPoint:
+    """Single data point for scatter plot visualization."""
+
+    timestamp: datetime
+    sentiment: float
+    metric_value: float
+    metric_type: str  # 'price' or 'volume'
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "timestamp": self.timestamp.isoformat(),
+            "sentiment": self.sentiment,
+            "metric_value": self.metric_value,
+            "metric_type": self.metric_type,
+        }
+
+
+@dataclass
+class CorrelationResult:
+    """Result of correlation analysis between sentiment and a market metric."""
+
+    metric_type: str  # 'price' or 'volume'
+    correlation_score: float  # Pearson correlation coefficient (-1 to 1)
+    p_value: Optional[float]  # Statistical significance
+    sample_size: int
+    confidence_level: str  # 'high', 'medium', 'low', 'insufficient_data'
+    data_points: List[DataPoint] = field(default_factory=list)
+    lag_hours: int = 0  # Time lag between sentiment and metric
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "metric_type": self.metric_type,
+            "correlation_score": round(self.correlation_score, 4),
+            "p_value": round(self.p_value, 6) if self.p_value is not None else None,
+            "sample_size": self.sample_size,
+            "confidence_level": self.confidence_level,
+            "lag_hours": self.lag_hours,
+            "interpretation": self._interpret_correlation(),
+            "scatter_data": [dp.to_dict() for dp in self.data_points],
+        }
+
+    def _interpret_correlation(self) -> str:
+        """Provide human-readable interpretation of the correlation score."""
+        score = abs(self.correlation_score)
+        direction = "positive" if self.correlation_score > 0 else "negative"
+
+        if self.sample_size < 10:
+            return "Insufficient data for reliable interpretation."
+        if score >= 0.7:
+            return f"Strong {direction} correlation: sentiment is a strong leading indicator."
+        if score >= 0.4:
+            return f"Moderate {direction} correlation: sentiment shows predictive value."
+        if score >= 0.2:
+            return f"Weak {direction} correlation: limited predictive relationship."
+        return "No significant correlation: sentiment does not predict this metric."
+
+
+class CorrelationEngine:
+    """
+    Calculates statistical correlation between social sentiment and on-chain metrics.
+
+    Supports:
+    - Sentiment vs Price correlation
+    - Sentiment vs Volume correlation
+    - Time-lagged correlation analysis
+    - Scatter plot data generation
+    """
+
+    MIN_SAMPLES = 5
+    RECOMMENDED_SAMPLES = 30
+
+    @staticmethod
+    def _calculate_pearson(
+        x: pd.Series, y: pd.Series
+    ) -> Tuple[float, Optional[float]]:
+        """
+        Calculate Pearson correlation coefficient and p-value.
+
+        Returns:
+            Tuple of (correlation_score, p_value)
+        """
+        if len(x) < 2 or len(y) < 2:
+            return 0.0, None
+
+        # Remove any NaN values
+        mask = ~(x.isna() | y.isna())
+        x_clean = x[mask]
+        y_clean = y[mask]
+
+        if len(x_clean) < 2:
+            return 0.0, None
+
+        # Calculate correlation using pandas
+        correlation = x_clean.corr(y_clean)
+
+        if pd.isna(correlation):
+            return 0.0, None
+
+        # Calculate p-value using t-distribution approximation
+        n = len(x_clean)
+        if abs(correlation) >= 1.0:
+            p_value = 0.0
+        else:
+            t_stat = correlation * np.sqrt((n - 2) / (1 - correlation**2))
+            # Two-tailed p-value approximation
+            from math import erfc, sqrt
+
+            p_value = erfc(abs(t_stat) / sqrt(2))
+
+        return float(correlation), float(p_value)
+
+    @staticmethod
+    def _determine_confidence(sample_size: int, p_value: Optional[float]) -> str:
+        """Determine confidence level based on sample size and p-value."""
+        if sample_size < CorrelationEngine.MIN_SAMPLES:
+            return "insufficient_data"
+        if p_value is None:
+            return "low"
+        if sample_size >= CorrelationEngine.RECOMMENDED_SAMPLES and p_value < 0.05:
+            return "high"
+        if sample_size >= 15 and p_value < 0.10:
+            return "medium"
+        return "low"
+
+    @classmethod
+    def calculate_correlation(
+        cls,
+        sentiment_data: List[Dict[str, Any]],
+        metric_data: List[Dict[str, Any]],
+        metric_type: str = "volume",
+        lag_hours: int = 0,
+    ) -> CorrelationResult:
+        """
+        Calculate correlation between sentiment and a market metric.
+
+        Args:
+            sentiment_data: List of dicts with 'timestamp' and 'score' keys
+            metric_data: List of dicts with 'timestamp' and 'value' keys
+            metric_type: Type of metric ('price' or 'volume')
+            lag_hours: Hours to shift sentiment data (positive = sentiment leads)
+
+        Returns:
+            CorrelationResult with score, confidence, and scatter data
+        """
+        if not sentiment_data or not metric_data:
+            return CorrelationResult(
+                metric_type=metric_type,
+                correlation_score=0.0,
+                p_value=None,
+                sample_size=0,
+                confidence_level="insufficient_data",
+                data_points=[],
+                lag_hours=lag_hours,
+            )
+
+        # Convert to DataFrames
+        sentiment_df = pd.DataFrame(sentiment_data)
+        metric_df = pd.DataFrame(metric_data)
+
+        # Parse timestamps
+        sentiment_df["timestamp"] = pd.to_datetime(sentiment_df["timestamp"])
+        metric_df["timestamp"] = pd.to_datetime(metric_df["timestamp"])
+
+        # Apply lag to sentiment data
+        if lag_hours > 0:
+            sentiment_df["timestamp"] = sentiment_df["timestamp"] + pd.Timedelta(
+                hours=lag_hours
+            )
+
+        # Round to hourly for alignment
+        sentiment_df["hour"] = sentiment_df["timestamp"].dt.floor("h")
+        metric_df["hour"] = metric_df["timestamp"].dt.floor("h")
+
+        # Aggregate sentiment by hour (average)
+        sentiment_hourly = (
+            sentiment_df.groupby("hour")["score"].mean().reset_index()
+        )
+        sentiment_hourly.columns = ["hour", "sentiment"]
+
+        # Aggregate metric by hour (average for price, sum for volume)
+        if metric_type == "volume":
+            metric_hourly = metric_df.groupby("hour")["value"].sum().reset_index()
+        else:
+            metric_hourly = metric_df.groupby("hour")["value"].mean().reset_index()
+        metric_hourly.columns = ["hour", "metric_value"]
+
+        # Merge on hour
+        merged = pd.merge(sentiment_hourly, metric_hourly, on="hour", how="inner")
+
+        if len(merged) < cls.MIN_SAMPLES:
+            return CorrelationResult(
+                metric_type=metric_type,
+                correlation_score=0.0,
+                p_value=None,
+                sample_size=len(merged),
+                confidence_level="insufficient_data",
+                data_points=[],
+                lag_hours=lag_hours,
+            )
+
+        # Calculate correlation
+        correlation, p_value = cls._calculate_pearson(
+            merged["sentiment"], merged["metric_value"]
+        )
+
+        # Build scatter data points
+        data_points = [
+            DataPoint(
+                timestamp=row["hour"].to_pydatetime(),
+                sentiment=float(row["sentiment"]),
+                metric_value=float(row["metric_value"]),
+                metric_type=metric_type,
+            )
+            for _, row in merged.iterrows()
+        ]
+
+        confidence = cls._determine_confidence(len(merged), p_value)
+
+        return CorrelationResult(
+            metric_type=metric_type,
+            correlation_score=correlation,
+            p_value=p_value,
+            sample_size=len(merged),
+            confidence_level=confidence,
+            data_points=data_points,
+            lag_hours=lag_hours,
+        )
+
+    @classmethod
+    def analyze_with_lags(
+        cls,
+        sentiment_data: List[Dict[str, Any]],
+        metric_data: List[Dict[str, Any]],
+        metric_type: str = "volume",
+        max_lag_hours: int = 24,
+    ) -> Dict[str, Any]:
+        """
+        Analyze correlation across multiple time lags to find optimal lead time.
+
+        Args:
+            sentiment_data: List of dicts with 'timestamp' and 'score' keys
+            metric_data: List of dicts with 'timestamp' and 'value' keys
+            metric_type: Type of metric ('price' or 'volume')
+            max_lag_hours: Maximum lag to test
+
+        Returns:
+            Dict with best lag, all correlations, and recommendation
+        """
+        lag_results = []
+
+        for lag in range(0, max_lag_hours + 1, 1):
+            result = cls.calculate_correlation(
+                sentiment_data, metric_data, metric_type, lag_hours=lag
+            )
+            lag_results.append(
+                {
+                    "lag_hours": lag,
+                    "correlation": result.correlation_score,
+                    "p_value": result.p_value,
+                    "confidence": result.confidence_level,
+                }
+            )
+
+        # Find best correlation (highest absolute value with sufficient confidence)
+        valid_results = [
+            r for r in lag_results if r["confidence"] != "insufficient_data"
+        ]
+
+        if not valid_results:
+            return {
+                "best_lag_hours": 0,
+                "best_correlation": 0.0,
+                "lag_analysis": lag_results,
+                "recommendation": "Insufficient data to determine optimal lag.",
+            }
+
+        best = max(valid_results, key=lambda x: abs(x["correlation"]))
+
+        if abs(best["correlation"]) >= 0.4:
+            recommendation = (
+                f"Sentiment appears to lead {metric_type} changes by approximately "
+                f"{best['lag_hours']} hours with {best['confidence']} confidence."
+            )
+        else:
+            recommendation = (
+                f"No strong leading relationship found. Best correlation of "
+                f"{best['correlation']:.3f} at {best['lag_hours']}h lag."
+            )
+
+        return {
+            "best_lag_hours": best["lag_hours"],
+            "best_correlation": best["correlation"],
+            "lag_analysis": lag_results,
+            "recommendation": recommendation,
+        }
+
+    @classmethod
+    def full_analysis(
+        cls,
+        sentiment_data: List[Dict[str, Any]],
+        price_data: List[Dict[str, Any]],
+        volume_data: List[Dict[str, Any]],
+        lag_hours: int = 0,
+    ) -> Dict[str, Any]:
+        """
+        Perform full correlation analysis for both price and volume.
+
+        Args:
+            sentiment_data: List of dicts with 'timestamp' and 'score' keys
+            price_data: List of dicts with 'timestamp' and 'value' keys
+            volume_data: List of dicts with 'timestamp' and 'value' keys
+            lag_hours: Time lag to apply
+
+        Returns:
+            Complete analysis results with both correlations
+        """
+        price_result = cls.calculate_correlation(
+            sentiment_data, price_data, metric_type="price", lag_hours=lag_hours
+        )
+
+        volume_result = cls.calculate_correlation(
+            sentiment_data, volume_data, metric_type="volume", lag_hours=lag_hours
+        )
+
+        return {
+            "price_correlation": price_result.to_dict(),
+            "volume_correlation": volume_result.to_dict(),
+            "summary": {
+                "sentiment_is_leading_indicator": (
+                    abs(price_result.correlation_score) >= 0.4
+                    or abs(volume_result.correlation_score) >= 0.4
+                )
+                and (
+                    price_result.confidence_level in ("high", "medium")
+                    or volume_result.confidence_level in ("high", "medium")
+                ),
+                "strongest_relationship": (
+                    "price"
+                    if abs(price_result.correlation_score)
+                    > abs(volume_result.correlation_score)
+                    else "volume"
+                ),
+                "analysis_timestamp": datetime.utcnow().isoformat(),
+            },
+        }
diff --git a/temp_backup/src/analytics/entity_linker.py b/temp_backup/src/analytics/entity_linker.py
new file mode 100644
index 00000000..21b388d8
--- /dev/null
+++ b/temp_backup/src/analytics/entity_linker.py
@@ -0,0 +1,212 @@
+"""
+On-chain Entity Linker for news articles.
+Links news content to on-chain projects and assets, producing stable IDs
+and storing links in the database.
+"""
+
+import logging
+import re
+from typing import Dict, List, Optional, Tuple
+from dataclasses import dataclass
+
+from .keywords import CRYPTO_PROJECT_MAP, KNOWN_TICKERS, TICKER_TO_PROJECT
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class LinkedEntity:
+    stable_id: str
+    entity_type: str  # "project" or "asset"
+    name: str
+    ticker: Optional[str] = None
+    confidence: float = 1.0
+
+
+class EntityLinker:
+    """
+    Links text content to known on-chain entities (projects and assets)
+    with stable, deterministic IDs.
+    """
+
+    def __init__(self) -> None:
+        self._project_patterns = self._compile_project_patterns()
+        # Filter out SDF from asset tickers since it's a project
+        self._asset_tickers = {t for t in KNOWN_TICKERS if t not in ["SDF"]}
+
+    def _compile_project_patterns(self) -> List[Tuple[str, re.Pattern]]:
+        """Compile regex patterns for project name matching, sorted by length descending."""
+        patterns = []
+        # Sort project names by length descending to prefer longer matches
+        sorted_projects = sorted(
+            CRYPTO_PROJECT_MAP.keys(),
+            key=lambda x: len(x),
+            reverse=True
+        )
+        for project_name in sorted_projects:
+            pattern = re.compile(r"\b" + re.escape(project_name) + r"\b", re.IGNORECASE)
+            patterns.append((project_name, pattern))
+        return patterns
+
+    def _generate_stable_id(self, entity_type: str, identifier: str) -> str:
+        """Generate a stable, deterministic ID for an entity."""
+        normalized = identifier.strip().lower()
+        return f"{entity_type}:{normalized}"
+
+    def link_text(
+        self,
+        text: str,
+        title: Optional[str] = None
+    ) -> List[LinkedEntity]:
+        """
+        Link the given text to known on-chain entities.
+        
+        Args:
+            text: Main text content to analyze
+            title: Optional article title (higher weight for entities found here)
+        
+        Returns:
+            List of LinkedEntity objects with stable IDs
+        """
+        entities: Dict[str, LinkedEntity] = {}
+        
+        # Combine title and text for analysis, title first for priority
+        full_text = f"{title or ''}\n{text or ''}"
+        
+        # Match project names
+        for project_name, pattern in self._project_patterns:
+            if pattern.search(full_text):
+                # Get canonical project name (the last one in the list)
+                canonical_name = CRYPTO_PROJECT_MAP[project_name][-1] if CRYPTO_PROJECT_MAP[project_name] else project_name
+                canonical_stable_id = self._generate_stable_id("project", canonical_name.lower())
+                
+                if canonical_stable_id not in entities:
+                    entities[canonical_stable_id] = LinkedEntity(
+                        stable_id=canonical_stable_id,
+                        entity_type="project",
+                        name=canonical_name,
+                        confidence=0.95
+                    )
+
+        # Match tickers
+        ticker_pattern = re.compile(r"\b([A-Z]{2,6})\b")
+        for ticker in ticker_pattern.findall(full_text):
+            ticker = ticker.upper()
+            if ticker in self._asset_tickers:
+                stable_id = self._generate_stable_id("asset", ticker)
+                if stable_id not in entities:
+                    entities[stable_id] = LinkedEntity(
+                        stable_id=stable_id,
+                        entity_type="asset",
+                        name=ticker,
+                        ticker=ticker,
+                        confidence=0.9
+                    )
+                # Also link the associated project if available, using canonical ID
+                if ticker in TICKER_TO_PROJECT:
+                    for project_name in TICKER_TO_PROJECT[ticker]:
+                        # Get canonical project name
+                        canonical_name = CRYPTO_PROJECT_MAP.get(project_name.lower(), [project_name])[-1]
+                        canonical_stable_id = self._generate_stable_id("project", canonical_name.lower())
+                        if canonical_stable_id not in entities:
+                            entities[canonical_stable_id] = LinkedEntity(
+                                stable_id=canonical_stable_id,
+                                entity_type="project",
+                                name=canonical_name,
+                                confidence=0.85
+                            )
+
+        return list(entities.values())
+
+    def link_article(
+        self,
+        title: Optional[str],
+        summary: Optional[str],
+        content: Optional[str]
+    ) -> List[LinkedEntity]:
+        """Link an article's content to on-chain entities."""
+        combined_text = "\n".join([
+            title or "",
+            summary or "",
+            content or ""
+        ])
+        return self.link_text(combined_text, title)
+
+
+# Small labeled test set for precision measurement
+LABELED_TEST_SET = [
+    {
+        "text": "Stellar Development Foundation (SDF) announces new Soroban upgrade. XLM price surges.",
+        "expected_entities": [
+            {"stable_id": "project:stellar", "type": "project"},
+            {"stable_id": "project:soroban", "type": "project"},
+            {"stable_id": "asset:xlm", "type": "asset"}
+        ]
+    },
+    {
+        "text": "Bitcoin (BTC) reaches new all-time high. Ethereum (ETH) follows closely.",
+        "expected_entities": [
+            {"stable_id": "asset:btc", "type": "asset"},
+            {"stable_id": "asset:eth", "type": "asset"}
+        ]
+    },
+    {
+        "text": "DeFi protocol Uniswap launches new liquidity pool on Solana.",
+        "expected_entities": [
+            {"stable_id": "project:uniswap", "type": "project"},
+            {"stable_id": "asset:sol", "type": "asset"}
+        ]
+    },
+    {
+        "text": "Cardano (ADA) releases new roadmap for governance.",
+        "expected_entities": [
+            {"stable_id": "asset:ada", "type": "asset"}
+        ]
+    },
+    {
+        "text": "Tech stocks rally on positive earnings. Apple and Microsoft lead gains.",
+        "expected_entities": []  # No crypto entities
+    }
+]
+
+
+def measure_precision(entity_linker: EntityLinker) -> Dict[str, float]:
+    """
+    Measure precision of the entity linker using the labeled test set.
+    
+    Returns:
+        Dictionary with precision metrics
+    """
+    true_positives = 0
+    false_positives = 0
+    total_expected = 0
+
+    for test_case in LABELED_TEST_SET:
+        text = test_case["text"]
+        expected = test_case["expected_entities"]
+        total_expected += len(expected)
+
+        actual = entity_linker.link_text(text)
+        actual_stable_ids = {e.stable_id for e in actual}
+        expected_stable_ids = {e["stable_id"] for e in expected}
+
+        # Calculate true positives and false positives
+        for entity in actual:
+            if entity.stable_id in expected_stable_ids:
+                true_positives += 1
+            else:
+                false_positives += 1
+
+    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 1.0
+    recall = true_positives / total_expected if total_expected > 0 else 1.0
+    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
+
+    return {
+        "precision": precision,
+        "recall": recall,
+        "f1": f1,
+        "true_positives": true_positives,
+        "false_positives": false_positives,
+        "total_expected": total_expected,
+        "test_cases": len(LABELED_TEST_SET)
+    }
diff --git a/temp_backup/src/analytics/forecaster.py b/temp_backup/src/analytics/forecaster.py
new file mode 100644
index 00000000..3fa2225e
--- /dev/null
+++ b/temp_backup/src/analytics/forecaster.py
@@ -0,0 +1,507 @@
+"""
+Predictive analytics: forecast market trends (Bullish/Bearish) for the next 24-48 hours
+using historical sentiment and volume data from analytics.jsonl.
+
+Sentiment Velocity = rate of sentiment change per hour (dS/dt of mood).
+
+Backend selection (auto-detected at runtime):
+  - Prophet (Meta)  — preferred; installed via ``pip install prophet``
+  - scikit-learn Ridge regression — always available (already in requirements)
+  - Heuristic decay — final fallback when data < 3 points
+"""
+
+import json
+import os
+from dataclasses import asdict, dataclass
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+import numpy as np
+import pandas as pd
+
+from src.analytics.market_analyzer import Trend
+from src.utils.logger import setup_logger
+
+logger = setup_logger(__name__)
+
+# ── Constants ──────────────────────────────────────────────────────────────
+
+_DEFAULT_JSONL = Path(os.getenv("ANALYTICS_JSONL_PATH", "./data/analytics.jsonl"))
+
+BULLISH_THRESHOLD = 0.2
+BEARISH_THRESHOLD = -0.2
+
+# Minimum rows required to fit a statistical model
+_MIN_TRAINING_POINTS = 3
+
+
+# ── Output type ───────────────────────────────────────────────────────────
+
+
+@dataclass
+class ForecastResult:
+    """Market trend forecast for the next 24 h and 48 h."""
+
+    predicted_trend_24h: str      # "bullish" | "bearish" | "neutral"
+    predicted_trend_48h: str
+    confidence_24h: float         # 0.0 – 1.0
+    confidence_48h: float
+    sentiment_velocity: float     # Δsentiment per hour (positive → accelerating bullish)
+    forecast_score_24h: float     # predicted market health score at T+24 h
+    forecast_score_48h: float     # predicted market health score at T+48 h
+    model_backend: str            # "prophet" | "sklearn" | "heuristic"
+    data_points_used: int
+    generated_at: str
+
+    def to_dict(self) -> Dict[str, Any]:
+        return asdict(self)
+
+
+# ── Helpers ───────────────────────────────────────────────────────────────
+
+
+def _classify_trend(score: float) -> str:
+    """Map a health score to a Trend label."""
+    if score > BULLISH_THRESHOLD:
+        return Trend.BULLISH.value
+    if score < BEARISH_THRESHOLD:
+        return Trend.BEARISH.value
+    return Trend.NEUTRAL.value
+
+
+def _confidence_from_score(score: float) -> float:
+    """
+    Translate the magnitude of a predicted score to a 0–1 confidence value.
+
+    Near the neutral band (±0.2) → low confidence (~0.5).
+    Strongly bullish/bearish (±1.0) → high confidence (~0.95).
+    """
+    abs_score = abs(score)
+    if abs_score <= BULLISH_THRESHOLD:
+        # Linear scale within neutral band: 0.30 … 0.50
+        return round(0.30 + (abs_score / BULLISH_THRESHOLD) * 0.20, 3)
+    # Sigmoid-like growth beyond neutral band
+    return round(min(0.95, 0.50 + (abs_score - BULLISH_THRESHOLD) * 0.75), 3)
+
+
+# ── Main class ────────────────────────────────────────────────────────────
+
+
+class SentimentForecaster:
+    """
+    Forecasts sentiment-based market health scores 24 h and 48 h ahead.
+
+    Typical usage::
+
+        forecaster = SentimentForecaster()
+        df = forecaster.load_history()
+        metrics = forecaster.train(df)
+        result = forecaster.predict(df)
+
+    Or as a one-liner::
+
+        result = SentimentForecaster().run()
+    """
+
+    MODEL_TYPE = "sentiment_forecaster"
+
+    def __init__(self, jsonl_path: Optional[Path] = None) -> None:
+        self.jsonl_path: Path = Path(jsonl_path) if jsonl_path else _DEFAULT_JSONL
+        self._model_24h = None   # fitted model / Prophet instance
+        self._model_48h = None   # separate ridge for 48 h (sklearn path)
+        self._backend: str = "heuristic"
+        self._is_trained: bool = False
+
+    # ── Data loading ──────────────────────────────────────────────────────
+
+    def load_history(self, path: Optional[Path] = None) -> pd.DataFrame:
+        """
+        Parse *analytics.jsonl* into a time-indexed DataFrame.
+
+        Columns:
+            timestamp, sentiment_score, news_count,
+            positive_pct, negative_pct, neutral_pct
+        """
+        jsonl_path = Path(path) if path else self.jsonl_path
+
+        if not jsonl_path.exists():
+            logger.warning(
+                f"analytics.jsonl not found at {jsonl_path}; returning empty DataFrame"
+            )
+            return pd.DataFrame()
+
+        records: List[Dict[str, Any]] = []
+        with open(jsonl_path) as fh:
+            for raw in fh:
+                raw = raw.strip()
+                if not raw:
+                    continue
+                try:
+                    entry = json.loads(raw)
+                    sd = entry.get("sentiment_data", {})
+                    dist = sd.get("sentiment_distribution", {})
+                    records.append(
+                        {
+                            "timestamp": pd.to_datetime(entry["timestamp"]),
+                            "sentiment_score": float(
+                                sd.get("average_compound_score", 0.0)
+                            ),
+                            "news_count": int(entry.get("news_count", 0)),
+                            "positive_pct": float(dist.get("positive", 0.0)),
+                            "negative_pct": float(dist.get("negative", 0.0)),
+                            "neutral_pct": float(dist.get("neutral", 1.0)),
+                        }
+                    )
+                except (KeyError, ValueError, json.JSONDecodeError) as exc:
+                    logger.warning(f"Skipping malformed analytics line: {exc}")
+
+        if not records:
+            logger.warning("analytics.jsonl contained no valid entries")
+            return pd.DataFrame()
+
+        df = (
+            pd.DataFrame(records)
+            .sort_values("timestamp")
+            .reset_index(drop=True)
+        )
+        logger.info(f"Loaded {len(df)} data points from {jsonl_path}")
+        return df
+
+    # ── Sentiment velocity ────────────────────────────────────────────────
+
+    @staticmethod
+    def compute_sentiment_velocity(
+        df: pd.DataFrame, window: int = 5
+    ) -> float:
+        """
+        Compute how fast sentiment is changing (Δsentiment / Δhours).
+
+        Positive → mood is becoming more bullish.
+        Negative → mood is turning more bearish.
+
+        Uses the most recent *window* records; returns 0.0 when there
+        are fewer than 2 data points.
+        """
+        if df is None or len(df) < 2:
+            return 0.0
+
+        recent = df.tail(window)
+        if len(recent) < 2:
+            return 0.0
+
+        delta_s = (
+            recent["sentiment_score"].iloc[-1] - recent["sentiment_score"].iloc[0]
+        )
+        delta_h = (
+            (recent["timestamp"].iloc[-1] - recent["timestamp"].iloc[0])
+            .total_seconds()
+            / 3600.0
+        )
+
+        if delta_h < 1e-6:
+            return 0.0
+
+        return round(delta_s / delta_h, 6)
+
+    # ── Training ──────────────────────────────────────────────────────────
+
+    def train(self, df: pd.DataFrame) -> Dict[str, Any]:
+        """
+        Fit the forecasting model on historical data.
+
+        Returns a metrics dict describing what was trained and how well.
+        If data is too sparse the forecaster silently falls back to the
+        heuristic decay method — this is reflected in the ``backend`` key.
+        """
+        if df is None or len(df) < 2:
+            logger.warning(
+                "Insufficient data for model training; using heuristic fallback"
+            )
+            self._is_trained = False
+            return {"backend": "heuristic", "n_points": 0}
+
+        if self._try_train_prophet(df):
+            return {"backend": "prophet", "n_points": len(df)}
+
+        return self._train_sklearn(df)
+
+    # ── Prophet backend ───────────────────────────────────────────────────
+
+    def _try_train_prophet(self, df: pd.DataFrame) -> bool:
+        """Attempt Prophet training. Returns True on success, False otherwise."""
+        try:
+            from prophet import Prophet  # type: ignore  # noqa: F401
+        except ImportError:
+            logger.debug("prophet not installed — skipping Prophet backend")
+            return False
+
+        if len(df) < _MIN_TRAINING_POINTS:
+            logger.info(
+                f"Too few points ({len(df)}) for Prophet; "
+                f"need >= {_MIN_TRAINING_POINTS}"
+            )
+            return False
+
+        try:
+            from prophet import Prophet  # type: ignore
+
+            df_p = df[["timestamp", "sentiment_score"]].rename(
+                columns={"timestamp": "ds", "sentiment_score": "y"}
+            )
+            m = Prophet(
+                daily_seasonality=len(df) >= 24,
+                weekly_seasonality=len(df) >= 168,
+                changepoint_prior_scale=0.05,
+                interval_width=0.80,
+            )
+            m.fit(df_p)
+            self._model_24h = m
+            self._model_48h = m  # single Prophet model, different horizons
+            self._backend = "prophet"
+            self._is_trained = True
+            logger.info("SentimentForecaster trained with Prophet backend")
+            return True
+        except Exception as exc:
+            logger.warning(f"Prophet training failed ({exc}); falling back to sklearn")
+            return False
+
+    # ── sklearn backend ───────────────────────────────────────────────────
+
+    def _train_sklearn(self, df: pd.DataFrame) -> Dict[str, Any]:
+        """Train separate Ridge pipelines for the 24 h and 48 h horizons."""
+        from sklearn.linear_model import Ridge
+        from sklearn.pipeline import Pipeline
+        from sklearn.preprocessing import StandardScaler
+
+        n = len(df)
+        features, targets_24h, targets_48h = self._build_training_samples(df)
+
+        if len(features) < 2:
+            self._is_trained = False
+            self._backend = "heuristic"
+            logger.warning("Not enough training samples for sklearn; using heuristic")
+            return {"backend": "heuristic", "n_points": n, "r2_24h": None, "r2_48h": None}
+
+        X = np.array(features)
+        y24 = np.array(targets_24h)
+        y48 = np.array(targets_48h)
+
+        pipe24 = Pipeline([("scaler", StandardScaler()), ("ridge", Ridge(alpha=1.0))])
+        pipe48 = Pipeline([("scaler", StandardScaler()), ("ridge", Ridge(alpha=1.0))])
+
+        pipe24.fit(X, y24)
+        pipe48.fit(X, y48)
+
+        r2_24h = float(pipe24.score(X, y24))
+        r2_48h = float(pipe48.score(X, y48))
+
+        self._model_24h = pipe24
+        self._model_48h = pipe48
+        self._backend = "sklearn"
+        self._is_trained = True
+
+        logger.info(
+            f"SentimentForecaster trained with sklearn | "
+            f"R²_24h={r2_24h:.3f}  R²_48h={r2_48h:.3f}  n={n}"
+        )
+        return {
+            "backend": "sklearn",
+            "n_points": n,
+            "r2_24h": round(r2_24h, 4),
+            "r2_48h": round(r2_48h, 4),
+        }
+
+    @staticmethod
+    def _build_training_samples(
+        df: pd.DataFrame,
+    ) -> Tuple[List[List[float]], List[float], List[float]]:
+        """
+        Build (X, y_24h, y_48h) training arrays.
+
+        For each row *i*, the target is the sentiment_score at the row
+        closest to T+24 h (resp. T+48 h).  When those future rows do not
+        exist the last available row is used (boundary clamping).
+        """
+        n = len(df)
+
+        # Estimate typical interval between records
+        if n >= 2:
+            median_h = float(
+                df["timestamp"].diff().dropna().dt.total_seconds().median() / 3600.0
+            )
+        else:
+            median_h = 1.0
+
+        step_24h = max(1, round(24.0 / max(median_h, 0.01)))
+        step_48h = max(1, round(48.0 / max(median_h, 0.01)))
+
+        features: List[List[float]] = []
+        targets_24h: List[float] = []
+        targets_48h: List[float] = []
+
+        for i in range(n):
+            # Rolling 3-row velocity window
+            w_start = max(0, i - 2)
+            sub = df.iloc[w_start : i + 1]
+            if len(sub) >= 2:
+                ds = sub["sentiment_score"].iloc[-1] - sub["sentiment_score"].iloc[0]
+                dh = (
+                    sub["timestamp"].iloc[-1] - sub["timestamp"].iloc[0]
+                ).total_seconds() / 3600.0
+                vel = ds / max(dh, 1e-6)
+            else:
+                vel = 0.0
+
+            row = df.iloc[i]
+            features.append(
+                [
+                    float(i),                               # time index (captures trend)
+                    float(row["sentiment_score"]),
+                    float(vel),
+                    float(row["positive_pct"]),
+                    float(row["negative_pct"]),
+                    float(row["news_count"]) / 100.0,       # rough normalisation
+                ]
+            )
+            targets_24h.append(
+                float(df["sentiment_score"].iloc[min(i + step_24h, n - 1)])
+            )
+            targets_48h.append(
+                float(df["sentiment_score"].iloc[min(i + step_48h, n - 1)])
+            )
+
+        return features, targets_24h, targets_48h
+
+    # ── Prediction ────────────────────────────────────────────────────────
+
+    def predict(self, df: pd.DataFrame) -> ForecastResult:
+        """
+        Return 24 h and 48 h market trend forecasts.
+
+        Falls back gracefully when the model is not trained or data is sparse.
+        """
+        velocity = self.compute_sentiment_velocity(df)
+        n = len(df) if df is not None else 0
+
+        if self._is_trained and self._backend == "prophet":
+            score_24h, score_48h = self._predict_prophet(df)
+        elif self._is_trained and self._backend == "sklearn":
+            score_24h, score_48h = self._predict_sklearn(df, velocity)
+        else:
+            score_24h, score_48h = self._predict_heuristic(df, velocity)
+
+        # Keep scores within valid bounds
+        score_24h = max(-1.0, min(1.0, score_24h))
+        score_48h = max(-1.0, min(1.0, score_48h))
+
+        return ForecastResult(
+            predicted_trend_24h=_classify_trend(score_24h),
+            predicted_trend_48h=_classify_trend(score_48h),
+            confidence_24h=_confidence_from_score(score_24h),
+            confidence_48h=_confidence_from_score(score_48h),
+            sentiment_velocity=velocity,
+            forecast_score_24h=round(score_24h, 4),
+            forecast_score_48h=round(score_48h, 4),
+            model_backend=self._backend,
+            data_points_used=n,
+            generated_at=datetime.now(timezone.utc).isoformat(),
+        )
+
+    def _predict_prophet(
+        self, df: pd.DataFrame
+    ) -> Tuple[float, float]:
+        if self._model_24h is None or df is None or df.empty:
+            return self._predict_heuristic(df)
+
+        m = self._model_24h
+        future = m.make_future_dataframe(periods=48, freq="h", include_history=False)
+        forecast = m.predict(future)
+
+        if len(forecast) >= 48:
+            return float(forecast["yhat"].iloc[23]), float(forecast["yhat"].iloc[47])
+        if len(forecast) >= 24:
+            return float(forecast["yhat"].iloc[23]), float(forecast["yhat"].iloc[-1])
+        if len(forecast) > 0:
+            val = float(forecast["yhat"].iloc[-1])
+            return val, val
+
+        return self._predict_heuristic(df)
+
+    def _predict_sklearn(
+        self, df: pd.DataFrame, velocity: float
+    ) -> Tuple[float, float]:
+        if self._model_24h is None or self._model_48h is None:
+            return self._predict_heuristic(df, velocity)
+
+        n = len(df)
+        row = df.iloc[-1]
+        X = np.array(
+            [[
+                float(n),
+                float(row["sentiment_score"]),
+                float(velocity),
+                float(row["positive_pct"]),
+                float(row["negative_pct"]),
+                float(row["news_count"]) / 100.0,
+            ]]
+        )
+        return float(self._model_24h.predict(X)[0]), float(self._model_48h.predict(X)[0])
+
+    @staticmethod
+    def _predict_heuristic(
+        df: Optional[pd.DataFrame] = None, velocity: float = 0.0
+    ) -> Tuple[float, float]:
+        """
+        Extrapolate current sentiment using velocity with exponential decay.
+
+        score(T+h) ≈ current + velocity × Σ_{t=0}^{h-1} decay^t
+
+        The decay factor prevents the extrapolation from diverging when
+        velocity is large and history is short.
+        """
+        if df is None or len(df) == 0:
+            return 0.0, 0.0
+
+        current = float(df["sentiment_score"].iloc[-1])
+        decay = 0.85  # velocity impact halves roughly every ~4 h
+        score_24h = current + velocity * float(sum(decay ** t for t in range(24)))
+        score_48h = current + velocity * float(sum(decay ** t for t in range(48)))
+        return score_24h, score_48h
+
+    # ── Model persistence ─────────────────────────────────────────────────
+
+    def save(self) -> str:
+        """Persist the trained forecaster to the model registry and promote it."""
+        from src.ml.model_registry import promote_model, save_model
+
+        version = save_model(self.MODEL_TYPE, self)
+        promote_model(self.MODEL_TYPE, version)
+        logger.info(f"SentimentForecaster saved and promoted: {version}")
+        return version
+
+    @classmethod
+    def load(cls) -> "SentimentForecaster":
+        """Load the currently promoted forecaster from the model registry."""
+        from src.ml.model_registry import get_live_model
+
+        obj = get_live_model(cls.MODEL_TYPE)
+        if not isinstance(obj, cls):
+            raise TypeError(
+                f"Registry returned unexpected type for '{cls.MODEL_TYPE}': {type(obj)}"
+            )
+        logger.info("SentimentForecaster loaded from model registry")
+        return obj
+
+    # ── Convenience ───────────────────────────────────────────────────────
+
+    def run(self, jsonl_path: Optional[Path] = None) -> ForecastResult:
+        """
+        One-call shortcut: load history → train (if needed) → predict.
+
+        Safe to call repeatedly — reuses an existing trained model.
+        """
+        df = self.load_history(jsonl_path)
+        if not self._is_trained:
+            self.train(df)
+        return self.predict(df)
diff --git a/temp_backup/src/analytics/keywords.py b/temp_backup/src/analytics/keywords.py
new file mode 100644
index 00000000..c2b7411a
--- /dev/null
+++ b/temp_backup/src/analytics/keywords.py
@@ -0,0 +1,309 @@
+"""
+Keyword extraction module for analytics.
+
+Extracts key entities (coins, protocols, people) from news content
+to tag and filter analytics.
+"""
+
+import re
+from typing import List, Set
+
+# Static dictionary of known crypto projects and their tickers
+CRYPTO_PROJECT_MAP: dict[str, List[str]] = {
+    # Stellar ecosystem
+    "stellar": ["XLM", "Stellar"],
+    "xlm": ["XLM", "Stellar"],  # XLM ticker also maps to Stellar
+    "soroban": ["XLM", "Soroban"],
+    "stellar development foundation": ["SDF", "Stellar"],
+    # Bitcoin
+    "bitcoin": ["BTC", "Bitcoin"],
+    "btc": ["BTC", "Bitcoin"],
+    # Ethereum
+    "ethereum": ["ETH", "Ethereum"],
+    "eth": ["ETH", "Ethereum"],
+    # Solana
+    "solana": ["SOL", "Solana"],
+    "sol": ["SOL", "Solana"],
+    # USDC
+    "usdc": ["USDC", "USDC"],
+    "usd coin": ["USDC", "USDC"],
+    # Ripple
+    "ripple": ["XRP", "Ripple"],
+    "xrp": ["XRP", "XRP"],
+    # Cardano
+    "cardano": ["ADA", "Cardano"],
+    "ada": ["ADA", "ADA"],
+    # Polkadot
+    "polkadot": ["DOT", "Polkadot"],
+    "dot": ["DOT", "DOT"],
+    # Dogecoin
+    "dogecoin": ["DOGE", "Dogecoin"],
+    "doge": ["DOGE", "DOGE"],
+    # Litecoin
+    "litecoin": ["LTC", "Litecoin"],
+    "ltc": ["LTC", "LTC"],
+    # Chainlink
+    "chainlink": ["LINK", "Chainlink"],
+    "link": ["LINK", "LINK"],
+    # Avalanche
+    "avalanche": ["AVAX", "Avalanche"],
+    "avax": ["AVAX", "AVAX"],
+    # Polygon
+    "polygon": ["MATIC", "Polygon"],
+    "matic": ["MATIC", "MATIC"],
+    # Algorand
+    "algorand": ["ALGO", "Algorand"],
+    "algo": ["ALGO", "ALGO"],
+    # Cosmos
+    "cosmos": ["ATOM", "Cosmos"],
+    "atom": ["ATOM", "ATOM"],
+    # Uniswap
+    "univ3": ["UNI", "Uniswap"],
+    "uniswap": ["UNI", "Uniswap"],
+    # DeFi
+    "defi": ["DeFi", "DeFi"],
+    # NFTs
+    "nft": ["NFT", "NFT"],
+    "nfts": ["NFT", "NFT"],
+}
+
+# Set of all known tickers for regex matching
+KNOWN_TICKERS: Set[str] = {
+    "XLM",
+    "BTC",
+    "ETH",
+    "SOL",
+    "USDC",
+    "XRP",
+    "ADA",
+    "DOT",
+    "DOGE",
+    "LTC",
+    "LINK",
+    "AVAX",
+    "MATIC",
+    "ALGO",
+    "ATOM",
+    "UNI",
+    "USDT",
+    "Tether",
+    "BUSD",
+    "BNB",
+    "XLM",
+    "SDF",
+}
+
+# Regex pattern for matching crypto tickers (2-5 uppercase letters)
+TICKER_PATTERN = r"\b[A-Z]{2,5}\b"
+
+# Reverse mapping from ticker to project names (for when ticker appears without project name)
+TICKER_TO_PROJECT: dict[str, List[str]] = {
+    "XLM": ["Stellar"],
+    "BTC": ["Bitcoin"],
+    "ETH": ["Ethereum"],
+    "SOL": ["Solana"],
+    "XRP": ["Ripple"],
+    "ADA": ["Cardano"],
+    "DOT": ["Polkadot"],
+    "DOGE": ["Dogecoin"],
+    "LTC": ["Litecoin"],
+    "LINK": ["Chainlink"],
+    "AVAX": ["Avalanche"],
+    "MATIC": ["Polygon"],
+    "ALGO": ["Algorand"],
+    "ATOM": ["Cosmos"],
+    "UNI": ["Uniswap"],
+    "USDC": ["USDC"],
+    "USDT": ["Tether"],
+}
+
+# Words to exclude from ticker matching (common English words)
+TICKER_EXCLUSIONS: Set[str] = {
+    "THE",
+    "AND",
+    "FOR",
+    "ARE",
+    "BUT",
+    "NOT",
+    "YOU",
+    "ALL",
+    "CAN",
+    "HER",
+    "WAS",
+    "ONE",
+    "OUR",
+    "OUT",
+    "DAY",
+    "GET",
+    "HAS",
+    "HIM",
+    "HIS",
+    "HOW",
+    "ITS",
+    "LET",
+    "MAY",
+    "NEW",
+    "NOW",
+    "OLD",
+    "SEE",
+    "TWO",
+    "WAY",
+    "WHO",
+    "BOY",
+    "DID",
+    "SAY",
+    "SHE",
+    "TOO",
+    "USE",
+    "FROM",
+    "THIS",
+    "THAT",
+    "WITH",
+    "HAVE",
+    "WILL",
+    "YOUR",
+    "THEY",
+    "BEEN",
+    "HAVE",
+    "WHAT",
+    "WHEN",
+    "WEVE",
+    "MORE",
+    "VERY",
+    "JUST",
+    "ONLY",
+    "OVER",
+    "SUCH",
+    "THEN",
+    "THEM",
+    "THESE",
+    "SOME",
+    "INTO",
+    "YEAR",
+    "MADE",
+    "MAKE",
+    "ALSO",
+    "MOST",
+    "SOME",
+    "EVEN",
+    "BACK",
+    "JUST",
+    "LIKE",
+    "TIME",
+    "VERY",
+    "AFTER",
+    "USED",
+    "TWITTER",
+    "POST",
+    "DATA",
+    "COIN",
+    "COINS",
+    "NODE",
+    "NODES",
+}
+
+
+class KeywordExtractor:
+    """
+    Extracts key entities (coins, protocols, people) from news content
+    to tag and filter analytics.
+    """
+
+    def __init__(self):
+        """Initialize the keyword extractor with regex patterns."""
+        self.ticker_regex = re.compile(TICKER_PATTERN)
+        # Create a sorted list of project names for longest-match-first matching
+        self.project_names = sorted(CRYPTO_PROJECT_MAP.keys(), key=len, reverse=True)
+        # Compile regex for project name matching (case insensitive)
+        self._project_pattern = re.compile(
+            r"\b(" + "|".join(re.escape(name) for name in self.project_names) + r")\b",
+            re.IGNORECASE,
+        )
+
+    def extract(self, text: str) -> List[str]:
+        """
+        Extract key entities from the given text.
+
+        Args:
+            text: The text to extract keywords from.
+
+        Returns:
+            A list of unique extracted keywords (tickers and project names).
+        """
+        if not text or not isinstance(text, str):
+            return []
+
+        # Use a set to avoid duplicates
+        keywords: Set[str] = set()
+
+        # Extract project names (case insensitive matching)
+        project_matches = self._project_pattern.findall(text)
+        for match in project_matches:
+            # Get the normalized (lowercase) project name
+            normalized_match = match.lower()
+            if normalized_match in CRYPTO_PROJECT_MAP:
+                # Add all associated tickers and names
+                keywords.update(CRYPTO_PROJECT_MAP[normalized_match])
+
+        # Extract tickers using regex
+        ticker_matches = self.ticker_regex.findall(text)
+        for ticker in ticker_matches:
+            # Filter out common English words that happen to be all caps
+            if ticker not in TICKER_EXCLUSIONS:
+                # Check if it's a known ticker
+                if ticker in KNOWN_TICKERS:
+                    keywords.add(ticker)
+                    # Also add associated project name if available
+                    if ticker in TICKER_TO_PROJECT:
+                        keywords.update(TICKER_TO_PROJECT[ticker])
+
+        # Return sorted list for consistent output
+        return sorted(list(keywords))
+
+    def extract_tickers_only(self, text: str) -> List[str]:
+        """
+        Extract only crypto tickers from the given text.
+
+        Args:
+            text: The text to extract tickers from.
+
+        Returns:
+            A list of unique extracted tickers.
+        """
+        if not text or not isinstance(text, str):
+            return []
+
+        tickers: Set[str] = set()
+
+        # Extract tickers using regex
+        ticker_matches = self.ticker_regex.findall(text)
+        for ticker in ticker_matches:
+            if ticker not in TICKER_EXCLUSIONS and ticker in KNOWN_TICKERS:
+                tickers.add(ticker)
+
+        return sorted(list(tickers))
+
+    def extract_projects_only(self, text: str) -> List[str]:
+        """
+        Extract only project names from the given text.
+
+        Args:
+            text: The text to extract project names from.
+
+        Returns:
+            A list of unique extracted project names.
+        """
+        if not text or not isinstance(text, str):
+            return []
+
+        projects: Set[str] = set()
+
+        # Extract project names
+        project_matches = self._project_pattern.findall(text)
+        for match in project_matches:
+            normalized_match = match.lower()
+            if normalized_match in CRYPTO_PROJECT_MAP:
+                # Add project names (not tickers)
+                projects.add(match.capitalize())
+
+        return sorted(list(projects))
diff --git a/temp_backup/src/analytics/market_analyzer.py b/temp_backup/src/analytics/market_analyzer.py
new file mode 100644
index 00000000..cd8b5b68
--- /dev/null
+++ b/temp_backup/src/analytics/market_analyzer.py
@@ -0,0 +1,201 @@
+"""
+Market Trend Heuristic Algorithm
+Combines news sentiment and on-chain volume to produce Market Health score.
+"""
+
+from enum import Enum
+from typing import Tuple, Optional
+from dataclasses import dataclass
+
+
+class Trend(Enum):
+    """Market trend classification"""
+
+    BULLISH = "bullish"
+    BEARISH = "bearish"
+    NEUTRAL = "neutral"
+
+
+@dataclass
+class MarketData:
+    """Container for market data inputs"""
+
+    sentiment_score: float  # Range: -1.0 to 1.0
+    volume_change: float  # Percentage change (e.g., 0.15 for 15% increase)
+    current_volume: Optional[float] = None
+    previous_volume: Optional[float] = None
+
+
+class MarketAnalyzer:
+    """
+    Analyzes market health using weighted average of sentiment and volume changes.
+
+    Formula:
+        Market Health Score = (Sentiment × 0.7) + (Normalized_Volume_Change × 0.3)
+
+    Where:
+        - Sentiment: Direct sentiment score (-1.0 to 1.0)
+        - Normalized_Volume_Change: tanh(volume_change) to bound between -1 and 1
+
+    Classification:
+        - Score > 0.2: BULLISH
+        - Score < -0.2: BEARISH
+        - Otherwise: NEUTRAL
+    """
+
+    # Weights for the weighted average
+    SENTIMENT_WEIGHT = 0.7
+    VOLUME_WEIGHT = 0.3
+
+    # Thresholds for trend classification
+    BULLISH_THRESHOLD = 0.2
+    BEARISH_THRESHOLD = -0.2
+
+    @staticmethod
+    def _normalize_volume_change(volume_change: float) -> float:
+        """
+        Normalize volume change using hyperbolic tangent to bound between -1 and 1.
+        This prevents extreme volume spikes from dominating the score.
+        """
+        from math import tanh
+
+        return tanh(volume_change)
+
+    @staticmethod
+    def _calculate_health_score(sentiment: float, volume_change: float) -> float:
+        """
+        Calculate market health score using weighted average.
+
+        Args:
+            sentiment: News sentiment score (-1.0 to 1.0)
+            volume_change: Volume percentage change
+
+        Returns:
+            Market health score between -1.0 and 1.0
+        """
+        normalized_volume = MarketAnalyzer._normalize_volume_change(volume_change)
+
+        health_score = (
+            sentiment * MarketAnalyzer.SENTIMENT_WEIGHT
+            + normalized_volume * MarketAnalyzer.VOLUME_WEIGHT
+        )
+
+        # Ensure score stays within bounds
+        return max(-1.0, min(1.0, health_score))
+
+    @classmethod
+    def analyze_trend(cls, market_data: MarketData) -> Tuple[Trend, float, dict]:
+        """
+        Analyze market trend based on sentiment and volume data.
+
+        Args:
+            market_data: MarketData object containing sentiment and volume
+
+        Returns:
+            Tuple of (trend, score, metrics) where:
+            - trend: Trend enum (BULLISH/BEARISH/NEUTRAL)
+            - score: Raw health score
+            - metrics: Dictionary with component scores
+        """
+        # Calculate component scores
+        normalized_volume = cls._normalize_volume_change(market_data.volume_change)
+        sentiment_component = market_data.sentiment_score * cls.SENTIMENT_WEIGHT
+        volume_component = normalized_volume * cls.VOLUME_WEIGHT
+
+        # Calculate total score
+        health_score = sentiment_component + volume_component
+
+        # Classify trend
+        if health_score > cls.BULLISH_THRESHOLD:
+            trend = Trend.BULLISH
+        elif health_score < cls.BEARISH_THRESHOLD:
+            trend = Trend.BEARISH
+        else:
+            trend = Trend.NEUTRAL
+
+        # Prepare metrics
+        metrics = {
+            "health_score": health_score,
+            "sentiment_score": market_data.sentiment_score,
+            "sentiment_component": sentiment_component,
+            "volume_change": market_data.volume_change,
+            "normalized_volume": normalized_volume,
+            "volume_component": volume_component,
+            "weights": {"sentiment": cls.SENTIMENT_WEIGHT, "volume": cls.VOLUME_WEIGHT},
+        }
+
+        return trend, health_score, metrics
+
+    @classmethod
+    def analyze_from_sources(
+        cls, sentiment_score: float, volume_data: dict
+    ) -> Tuple[Trend, float, dict]:
+        """
+        Convenience method to analyze from raw data sources.
+
+        Args:
+            sentiment_score: From NewsFetcher
+            volume_data: From StellarDataFetcher, expected to have 'current' and 'previous' keys
+
+        Returns:
+            Same as analyze_trend method
+        """
+        # Calculate volume change percentage
+        current_volume = volume_data.get("current", 0)
+        previous_volume = volume_data.get("previous", 0)
+
+        if previous_volume > 0:
+            volume_change = (current_volume - previous_volume) / previous_volume
+        else:
+            volume_change = 0.0  # Handle division by zero
+
+        market_data = MarketData(
+            sentiment_score=sentiment_score,
+            volume_change=volume_change,
+            current_volume=current_volume,
+            previous_volume=previous_volume,
+        )
+
+        return cls.analyze_trend(market_data)
+
+
+def get_explanation(score: float, trend: Trend) -> str:
+    """
+    Generate human-readable explanation of the market trend.
+
+    Args:
+        score: Market health score
+        trend: Determined trend
+
+    Returns:
+        Explanation string
+    """
+    explanations = {
+        Trend.BULLISH: [
+            "Strong positive sentiment combined with increasing volume suggests bullish momentum.",
+            "Positive market sentiment supported by healthy volume growth indicates upward trend.",
+            "Bullish indicators from both news sentiment and trading volume.",
+        ],
+        Trend.BEARISH: [
+            "Negative sentiment coupled with volume patterns suggests bearish pressure.",
+            "Pessimistic market outlook reinforced by volume contraction indicates downward trend.",
+            "Bearish signals from sentiment analysis and on-chain volume metrics.",
+        ],
+        Trend.NEUTRAL: [
+            "Mixed or neutral signals with balanced sentiment and volume activity.",
+            "Market shows indecision with offsetting positive and negative indicators.",
+            "Neutral stance as sentiment and volume signals counterbalance each other.",
+        ],
+    }
+
+    import random
+
+    base_explanation = random.choice(explanations[trend])
+
+    if trend == Trend.NEUTRAL:
+        if score > 0:
+            return f"{base_explanation} Leaning slightly positive (score: {score:.2f})."
+        elif score < 0:
+            return f"{base_explanation} Leaning slightly negative (score: {score:.2f})."
+
+    return f"{base_explanation} Market Health Score: {score:.2f}"
diff --git a/temp_backup/src/analytics/ner_service.py b/temp_backup/src/analytics/ner_service.py
new file mode 100644
index 00000000..bb94d06c
--- /dev/null
+++ b/temp_backup/src/analytics/ner_service.py
@@ -0,0 +1,171 @@
+"""
+Named Entity Recognition service for news tagging.
+
+Uses spaCy for entity extraction and includes crypto-specific patterns so
+LumenPulse ecosystem entities are detected consistently.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from functools import lru_cache
+from typing import Dict, List, Optional
+
+import spacy
+from spacy.language import Language
+
+from .keywords import CRYPTO_PROJECT_MAP, KNOWN_TICKERS
+
+logger = logging.getLogger(__name__)
+
+
+class NERService:
+    """Extract entities from news text for downstream filtering and tagging."""
+
+    _MODEL_CANDIDATES = ("en_core_web_sm", "en_core_web_md")
+    _PERSON_PATTERN = re.compile(
+        r"\b([A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)+)\b"
+    )
+    _TICKER_PATTERN = re.compile(r"(?:\$)?\b([A-Z]{2,6})\b")
+
+    def __init__(self) -> None:
+        self._canonical_names = self._build_canonical_name_map()
+        self._known_tickers = {ticker.upper() for ticker in KNOWN_TICKERS}
+        self._nlp = self._initialize_pipeline()
+
+    def _build_canonical_name_map(self) -> Dict[str, str]:
+        canonical_names: Dict[str, str] = {}
+
+        for key, values in CRYPTO_PROJECT_MAP.items():
+            if values:
+                name_candidate = values[-1]
+                canonical_names[key.lower()] = name_candidate
+                canonical_names[name_candidate.lower()] = name_candidate
+
+            for value in values:
+                canonical_names[value.lower()] = value
+
+        return canonical_names
+
+    def _initialize_pipeline(self) -> Language:
+        nlp: Optional[Language] = None
+
+        for model_name in self._MODEL_CANDIDATES:
+            try:
+                nlp = spacy.load(model_name, disable=["parser", "lemmatizer", "textcat"])
+                logger.info("Initialized spaCy model for NER: %s", model_name)
+                break
+            except OSError:
+                continue
+
+        if nlp is None:
+            nlp = spacy.blank("en")
+            logger.warning(
+                "spaCy pretrained model not found; using blank English pipeline with custom entity rules"
+            )
+
+        if "entity_ruler" in nlp.pipe_names:
+            nlp.remove_pipe("entity_ruler")
+
+        ruler_config = {"phrase_matcher_attr": "LOWER"}
+        if "ner" in nlp.pipe_names:
+            ruler = nlp.add_pipe("entity_ruler", before="ner", config=ruler_config)
+        else:
+            ruler = nlp.add_pipe("entity_ruler", config=ruler_config)
+
+        patterns = []
+
+        for project_name in CRYPTO_PROJECT_MAP:
+            patterns.append({"label": "PROJECT", "pattern": project_name})
+
+        for ticker in self._known_tickers:
+            patterns.append({"label": "ASSET", "pattern": ticker})
+            patterns.append({"label": "ASSET", "pattern": f"${ticker}"})
+
+        ruler.add_patterns(patterns)
+
+        if "sentencizer" not in nlp.pipe_names:
+            nlp.add_pipe("sentencizer")
+
+        return nlp
+
+    def _normalize_entity(self, value: str) -> Optional[str]:
+        cleaned = value.strip(" \n\t.,:;()[]{}\"'`")
+        if len(cleaned) < 2:
+            return None
+
+        ticker_candidate = cleaned.lstrip("$")
+        if ticker_candidate.isupper() and ticker_candidate in self._known_tickers:
+            return ticker_candidate
+
+        normalized_lookup = cleaned.lower()
+        if normalized_lookup in self._canonical_names:
+            return self._canonical_names[normalized_lookup]
+
+        return cleaned
+
+    @lru_cache(maxsize=4096)
+    def extract_entities(self, text: str) -> List[str]:
+        """
+        Extract entities from text.
+
+        Returns a deduplicated list containing projects, assets, and people.
+        """
+        if not text or not text.strip():
+            return []
+
+        if len(text) > 20000:
+            text = text[:20000]
+
+        candidates: List[str] = []
+        doc = self._nlp(text)
+
+        for ent in doc.ents:
+            if ent.label_ in {
+                "PERSON",
+                "ORG",
+                "PRODUCT",
+                "NORP",
+                "GPE",
+                "EVENT",
+                "PROJECT",
+                "ASSET",
+            }:
+                candidates.append(ent.text)
+
+        # Heuristic for names when running without a pretrained NER model.
+        for match in self._PERSON_PATTERN.findall(text):
+            candidates.append(match)
+
+        # Explicit ticker extraction catches tokens that may not be tagged as entities.
+        for ticker in self._TICKER_PATTERN.findall(text):
+            if ticker in self._known_tickers:
+                candidates.append(ticker)
+
+        deduped: List[str] = []
+        seen = set()
+
+        for candidate in candidates:
+            normalized = self._normalize_entity(candidate)
+            if not normalized:
+                continue
+
+            key = normalized.lower()
+            if key not in seen:
+                deduped.append(normalized)
+                seen.add(key)
+
+        return deduped
+
+    def extract_entities_from_article(
+        self,
+        title: Optional[str] = None,
+        summary: Optional[str] = None,
+        content: Optional[str] = None,
+    ) -> List[str]:
+        """Extract entities from combined article fields."""
+        chunks = [value.strip() for value in [title or "", summary or "", content or ""] if value and value.strip()]
+        if not chunks:
+            return []
+        return self.extract_entities("\n".join(chunks))
diff --git a/temp_backup/src/analytics/sentiment.py b/temp_backup/src/analytics/sentiment.py
new file mode 100644
index 00000000..cd9e0a18
--- /dev/null
+++ b/temp_backup/src/analytics/sentiment.py
@@ -0,0 +1,388 @@
+import logging
+import os
+import re
+import unicodedata
+from typing import Any, Dict, Optional, Set, Tuple
+
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+
+try:
+    from langdetect import DetectorFactory, LangDetectException, detect
+
+    DetectorFactory.seed = 0
+    LANGDETECT_AVAILABLE = True
+except ImportError:
+    LANGDETECT_AVAILABLE = False
+
+    class LangDetectException(Exception):
+        """Fallback exception when langdetect is unavailable."""
+
+logger = logging.getLogger(__name__)
+
+_DEFAULT_FINBERT_MODEL = "ProsusAI/finbert"
+
+
+class SentimentScore(float):
+    """
+    Float sentiment score enriched with language metadata.
+    """
+
+    language: str
+    language_supported: bool
+    language_unsupported: bool
+
+    def __new__(
+        cls,
+        value: float,
+        language: str,
+        language_supported: bool,
+        language_unsupported: bool,
+    ) -> "SentimentScore":
+        instance = float.__new__(cls, value)
+        instance.language = language
+        instance.language_supported = language_supported
+        instance.language_unsupported = language_unsupported
+        return instance
+
+    def to_dict(self) -> dict:
+        return {
+            "score": float(self),
+            "language": self.language,
+            "language_supported": self.language_supported,
+            "language_unsupported": self.language_unsupported,
+        }
+
+    @property
+    def score(self) -> float:
+        return float(self)
+
+    def __getitem__(self, key: str):
+        return self.to_dict()[key]
+
+    def get(self, key: str, default=None):
+        return self.to_dict().get(key, default)
+
+
+def _env_flag(name: str) -> bool:
+    return os.environ.get(name, "").strip().lower() in ("1", "true", "yes", "on")
+
+
+class SentimentAnalyzer:
+    """
+    Analyze sentiment using a financial FinBERT model for English when available,
+    with VADER (and crypto keyword hints) as fallback if transformers fail or are disabled.
+    Spanish and Portuguese use lightweight keyword scoring.
+    """
+
+    def __init__(
+        self,
+        *,
+        enable_transformer: Optional[bool] = None,
+        transformer_model: Optional[str] = None,
+    ) -> None:
+        self.analyzer = SentimentIntensityAnalyzer()
+        self.supported_languages: Set[str] = {"en", "es", "pt"}
+
+        env_off = _env_flag("SENTIMENT_DISABLE_TRANSFORMER")
+        if enable_transformer is None:
+            self._transformer_enabled = not env_off
+        else:
+            self._transformer_enabled = bool(enable_transformer) and not env_off
+
+        self._transformer_model_name = (
+            transformer_model
+            or os.environ.get("SENTIMENT_TRANSFORMER_MODEL", _DEFAULT_FINBERT_MODEL).strip()
+            or _DEFAULT_FINBERT_MODEL
+        )
+
+        self._transformer_model: Any = None
+        self._transformer_tokenizer: Any = None
+        self._transformer_load_failed = False
+
+        self.negative_keywords_en = {
+            "crash",
+            "crashing",
+            "dump",
+            "bear",
+            "plunge",
+            "collapse",
+        }
+        self.positive_keywords_en = {
+            "moon",
+            "bull",
+            "surge",
+            "rally",
+            "all time high",
+            "ath",
+        }
+
+        # Lightweight keyword mapping for non-English sentiment support.
+        self.positive_keywords_es = {
+            "sube",
+            "subida",
+            "alza",
+            "rally",
+            "maximo historico",
+            "alcista",
+        }
+        self.negative_keywords_es = {
+            "cae",
+            "caida",
+            "baja",
+            "desplome",
+            "colapso",
+            "bajista",
+        }
+
+        self.positive_keywords_pt = {
+            "sobe",
+            "alta",
+            "rali",
+            "maxima historica",
+            "otimista",
+            "altista",
+        }
+        self.negative_keywords_pt = {
+            "cai",
+            "queda",
+            "baixa",
+            "despenca",
+            "colapso",
+            "baixista",
+        }
+
+    def _load_transformer(self) -> bool:
+        if not self._transformer_enabled or self._transformer_load_failed:
+            return False
+        if self._transformer_model is not None:
+            return True
+        try:
+            from transformers import AutoModelForSequenceClassification, AutoTokenizer
+
+            model_name = self._transformer_model_name
+            self._transformer_tokenizer = AutoTokenizer.from_pretrained(model_name)
+            self._transformer_model = AutoModelForSequenceClassification.from_pretrained(
+                model_name
+            )
+            self._transformer_model.eval()
+            logger.info("Loaded transformer sentiment model: %s", model_name)
+            return True
+        except Exception as e:
+            logger.warning(
+                "Transformer sentiment unavailable, using VADER fallback: %s", e
+            )
+            self._transformer_load_failed = True
+            return False
+
+    def _finbert_compound(self, text: str) -> Optional[float]:
+        if not self._load_transformer():
+            return None
+        try:
+            import torch
+
+            inputs = self._transformer_tokenizer(
+                text,
+                return_tensors="pt",
+                truncation=True,
+                max_length=512,
+                padding=True,
+            )
+            with torch.no_grad():
+                logits = self._transformer_model(**inputs).logits
+            probs = torch.softmax(logits, dim=-1)[0]
+
+            id2label = self._transformer_model.config.id2label
+            pos_idx: Optional[int] = None
+            neg_idx: Optional[int] = None
+            for key, label in id2label.items():
+                idx = int(key) if not isinstance(key, int) else key
+                low = str(label).lower()
+                if low == "positive":
+                    pos_idx = idx
+                elif low == "negative":
+                    neg_idx = idx
+            if pos_idx is None or neg_idx is None:
+                return None
+
+            p_pos = float(probs[pos_idx].item())
+            p_neg = float(probs[neg_idx].item())
+            return max(-1.0, min(1.0, p_pos - p_neg))
+        except Exception as e:
+            logger.warning("FinBERT inference failed, falling back to VADER: %s", e)
+            return None
+
+    def _vader_english_compound(self, text: str) -> float:
+        cleaned = text.lower()
+        scores = self.analyzer.polarity_scores(cleaned)
+        compound = float(scores.get("compound", 0.0))
+
+        if compound == 0.0:
+            if any(word in cleaned for word in self.negative_keywords_en):
+                return -0.4
+            if any(word in cleaned for word in self.positive_keywords_en):
+                return 0.4
+
+        return compound
+
+    def analyze_text(
+        self, text: Optional[str], lang_hint: Optional[str] = None
+    ) -> SentimentScore:
+        """
+        Analyze the sentiment of the given text.
+
+        Args:
+            text (str): Input text (headline or article)
+            lang_hint (str, optional): Optional ISO language hint (e.g. "en", "es").
+
+        Returns:
+            SentimentScore: Float-like score with language metadata.
+        """
+        if not text or not isinstance(text, str):
+            return SentimentScore(0.0, "unknown", False, False)
+
+        cleaned = text.strip()
+        if not cleaned:
+            return SentimentScore(0.0, "unknown", False, False)
+
+        language = self._resolve_language(cleaned, lang_hint)
+        if language not in self.supported_languages:
+            return SentimentScore(0.0, language, False, True)
+
+        if language == "en":
+            score = self._analyze_english(cleaned)
+        elif language == "es":
+            score = self._keyword_sentiment_score(
+                cleaned, self.positive_keywords_es, self.negative_keywords_es
+            )
+        else:
+            score = self._keyword_sentiment_score(
+                cleaned, self.positive_keywords_pt, self.negative_keywords_pt
+            )
+
+        return SentimentScore(score, language, True, False)
+
+    def _analyze_english(self, text: str) -> float:
+        finbert_score = self._finbert_compound(text)
+        if finbert_score is not None:
+            return finbert_score
+        return self._vader_english_compound(text)
+
+    def _keyword_sentiment_score(
+        self, text: str, positive_keywords: Set[str], negative_keywords: Set[str]
+    ) -> float:
+        normalized_text = self._normalize_text(text)
+        positive_hits = sum(1 for word in positive_keywords if word in normalized_text)
+        negative_hits = sum(1 for word in negative_keywords if word in normalized_text)
+
+        total_hits = positive_hits + negative_hits
+        if total_hits == 0:
+            return 0.0
+
+        score = (positive_hits - negative_hits) / total_hits
+        return max(-1.0, min(1.0, float(score)))
+
+    def _normalize_text(self, text: str) -> str:
+        normalized = unicodedata.normalize("NFKD", text).encode("ascii", "ignore")
+        ascii_text = normalized.decode("ascii")
+        return re.sub(r"\s+", " ", ascii_text).strip().lower()
+
+    def _resolve_language(self, text: str, lang_hint: Optional[str]) -> str:
+        if lang_hint:
+            return self._normalize_language_code(lang_hint)
+
+        script_language = self._detect_script_language(text)
+        if script_language:
+            return script_language
+
+        if LANGDETECT_AVAILABLE:
+            try:
+                detected = detect(text)
+                return self._normalize_language_code(detected)
+            except LangDetectException:
+                pass
+
+        return self._heuristic_language_detection(text)
+
+    def _normalize_language_code(self, language: str) -> str:
+        normalized = language.strip().lower().replace("_", "-")
+        if not normalized:
+            return "unknown"
+        return normalized.split("-")[0]
+
+    def _heuristic_language_detection(self, text: str) -> str:
+        normalized_text = self._normalize_text(text)
+        words = set(normalized_text.split())
+
+        spanish_markers = {"sube", "caida", "mercado", "hoy", "alcista", "bajista"}
+        portuguese_markers = {
+            "sobe",
+            "queda",
+            "alta",
+            "baixa",
+            "mercado",
+            "hoje",
+            "altista",
+            "baixista",
+        }
+
+        spanish_hits = len(words & spanish_markers)
+        portuguese_hits = len(words & portuguese_markers)
+
+        if spanish_hits > portuguese_hits and spanish_hits > 0:
+            return "es"
+        if portuguese_hits > spanish_hits and portuguese_hits > 0:
+            return "pt"
+        return "en"
+
+    def _detect_script_language(self, text: str) -> Optional[str]:
+        if re.search(r"[\u4e00-\u9fff]", text):
+            return "zh"
+        if re.search(r"[\u3040-\u30ff]", text):
+            return "ja"
+        if re.search(r"[\uac00-\ud7af]", text):
+            return "ko"
+        if re.search(r"[\u0400-\u04ff]", text):
+            return "ru"
+        if re.search(r"[\u0600-\u06ff]", text):
+            return "ar"
+        return None
+
+
+def benchmark_vader_vs_transformer(
+    texts: Tuple[str, ...],
+) -> Tuple[Dict[str, Tuple[float, Optional[float]]], Dict[str, Any]]:
+    """
+    Run the same English headlines through VADER-only and FinBERT paths.
+
+    Returns:
+        (per_text_scores, summary) where each value is (vader_compound, transformer_compound).
+        transformer_compound is None if the model could not be loaded or inference failed.
+    """
+    vader_analyzer = SentimentAnalyzer(enable_transformer=False)
+    full_analyzer = SentimentAnalyzer(enable_transformer=True)
+
+    rows: Dict[str, Tuple[float, Optional[float]]] = {}
+    tf_ok = 0
+    agreement = 0
+    n = 0
+
+    for raw in texts:
+        t = raw.strip()
+        if not t:
+            continue
+        v = vader_analyzer._vader_english_compound(t)
+        tf = full_analyzer._finbert_compound(t)
+        rows[t] = (v, tf)
+        n += 1
+        if tf is not None:
+            tf_ok += 1
+            if (v >= 0) == (tf >= 0):
+                agreement += 1
+
+    summary = {
+        "samples": n,
+        "transformer_inferences_ok": tf_ok,
+        "sign_agreement_with_vader": agreement,
+        "sign_agreement_rate": (agreement / tf_ok) if tf_ok else 0.0,
+    }
+    return rows, summary
diff --git a/temp_backup/src/analytics/sentiment_indicators.py b/temp_backup/src/analytics/sentiment_indicators.py
new file mode 100644
index 00000000..9abab57f
--- /dev/null
+++ b/temp_backup/src/analytics/sentiment_indicators.py
@@ -0,0 +1,236 @@
+"""
+sentiment_indicators.py
+
+Maps numeric sentiment scores (-1 to 1) to color-coded visual indicators for
+use in news feed and asset detail views.
+
+Color scheme
+------------
+* Bullish  (score >=  0.05) → Green  #00C853
+* Bearish  (score <= -0.05) → Red    #D50000
+* Neutral  (-0.05 < score < 0.05) → Gray   #9E9E9E
+
+Thresholds match the VADER compound-score cut-offs already used across the
+project (see src/sentiment.py).
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import Enum
+from typing import List, Dict, Any
+
+
+# ---------------------------------------------------------------------------
+# Thresholds (matching VADER cut-offs used in src/sentiment.py)
+# ---------------------------------------------------------------------------
+BULLISH_THRESHOLD: float = 0.05
+BEARISH_THRESHOLD: float = -0.05
+
+
+# ---------------------------------------------------------------------------
+# Enums
+# ---------------------------------------------------------------------------
+
+class SentimentColor(str, Enum):
+    """Canonical color names for sentiment categories."""
+
+    GREEN = "green"
+    RED = "red"
+    GRAY = "gray"
+
+
+class SentimentLabel(str, Enum):
+    """Human-readable trading labels for sentiment categories."""
+
+    BULLISH = "Bullish"
+    BEARISH = "Bearish"
+    NEUTRAL = "Neutral"
+
+
+# ---------------------------------------------------------------------------
+# Dataclass
+# ---------------------------------------------------------------------------
+
+@dataclass(frozen=True)
+class SentimentIndicator:
+    """
+    Full visual indicator for a single sentiment score.
+
+    Attributes
+    ----------
+    score:        Original compound sentiment score (-1 to 1).
+    color:        Semantic color name ("green" | "red" | "gray").
+    hex_color:    CSS hex colour value ("#00C853" | "#D50000" | "#9E9E9E").
+    label:        Human-readable label ("Bullish" | "Bearish" | "Neutral").
+    display_text: Formatted string for UI badges, e.g. "0.85 Bullish".
+    """
+
+    score: float
+    color: SentimentColor
+    hex_color: str
+    label: SentimentLabel
+    display_text: str
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "score": self.score,
+            "color": self.color.value,
+            "hex_color": self.hex_color,
+            "label": self.label.value,
+            "display_text": self.display_text,
+        }
+
+
+# ---------------------------------------------------------------------------
+# Mapper
+# ---------------------------------------------------------------------------
+
+class SentimentIndicatorMapper:
+    """
+    Converts a raw sentiment score into a :class:`SentimentIndicator` ready
+    for serialisation to the API response.
+
+    Usage
+    -----
+    >>> mapper = SentimentIndicatorMapper()
+    >>> indicator = mapper.score_to_indicator(0.82)
+    >>> indicator.color
+    <SentimentColor.GREEN: 'green'>
+    >>> indicator.label
+    <SentimentLabel.BULLISH: 'Bullish'>
+    >>> indicator.display_text
+    '0.82 Bullish'
+    """
+
+    # Hex values chosen for accessibility contrast on both dark and light UIs
+    _HEX: Dict[SentimentColor, str] = {
+        SentimentColor.GREEN: "#00C853",
+        SentimentColor.RED: "#D50000",
+        SentimentColor.GRAY: "#9E9E9E",
+    }
+
+    # Legend copy consumed by GET /sentiment/legend
+    _LEGEND: List[Dict[str, str]] = [
+        {
+            "color": SentimentColor.GREEN.value,
+            "hex_color": _HEX[SentimentColor.GREEN],
+            "label": SentimentLabel.BULLISH.value,
+            "description": (
+                f"Positive sentiment (score ≥ {BULLISH_THRESHOLD:+.2f}). "
+                "The market or news is generally optimistic about this asset."
+            ),
+            "score_range": f"≥ {BULLISH_THRESHOLD}",
+        },
+        {
+            "color": SentimentColor.RED.value,
+            "hex_color": _HEX[SentimentColor.RED],
+            "label": SentimentLabel.BEARISH.value,
+            "description": (
+                f"Negative sentiment (score ≤ {BEARISH_THRESHOLD:+.2f}). "
+                "The market or news is generally pessimistic about this asset."
+            ),
+            "score_range": f"≤ {BEARISH_THRESHOLD}",
+        },
+        {
+            "color": SentimentColor.GRAY.value,
+            "hex_color": _HEX[SentimentColor.GRAY],
+            "label": SentimentLabel.NEUTRAL.value,
+            "description": (
+                f"Neutral sentiment ({BEARISH_THRESHOLD:+.2f} < score < "
+                f"{BULLISH_THRESHOLD:+.2f}). Insufficient signal to determine"
+                " market direction."
+            ),
+            "score_range": f"{BEARISH_THRESHOLD} to {BULLISH_THRESHOLD}",
+        },
+    ]
+
+    def score_to_indicator(self, score: float) -> SentimentIndicator:
+        """
+        Map a compound sentiment score to a :class:`SentimentIndicator`.
+
+        Parameters
+        ----------
+        score:
+            Compound sentiment score in the range [-1, 1].  Values outside
+            this range are clamped to the nearest label boundary.
+
+        Returns
+        -------
+        SentimentIndicator
+        """
+        score = float(score)
+
+        if score >= BULLISH_THRESHOLD:
+            color = SentimentColor.GREEN
+            label = SentimentLabel.BULLISH
+        elif score <= BEARISH_THRESHOLD:
+            color = SentimentColor.RED
+            label = SentimentLabel.BEARISH
+        else:
+            color = SentimentColor.GRAY
+            label = SentimentLabel.NEUTRAL
+
+        hex_color = self._HEX[color]
+        display_text = self.format_display(score, label)
+
+        return SentimentIndicator(
+            score=score,
+            color=color,
+            hex_color=hex_color,
+            label=label,
+            display_text=display_text,
+        )
+
+    @staticmethod
+    def format_display(score: float, label: SentimentLabel | None = None) -> str:
+        """
+        Return a formatted display string such as ``"0.85 Bullish"``.
+
+        If *label* is not supplied it is derived from *score* on the fly.
+        """
+        if label is None:
+            if score >= BULLISH_THRESHOLD:
+                label = SentimentLabel.BULLISH
+            elif score <= BEARISH_THRESHOLD:
+                label = SentimentLabel.BEARISH
+            else:
+                label = SentimentLabel.NEUTRAL
+        return f"{score:.2f} {label.value}"
+
+    def get_legend(self) -> List[Dict[str, str]]:
+        """
+        Return the legend definition that the frontend uses to render
+        colour-key tooltips.
+
+        Returns
+        -------
+        list of dict with keys: color, hex_color, label, description, score_range
+        """
+        return list(self._LEGEND)
+
+
+# ---------------------------------------------------------------------------
+# Module-level convenience
+# ---------------------------------------------------------------------------
+
+_default_mapper = SentimentIndicatorMapper()
+
+
+def get_sentiment_indicator(score: float) -> SentimentIndicator:
+    """
+    Convenience wrapper around :class:`SentimentIndicatorMapper`.
+
+    >>> get_sentiment_indicator(0.72).label
+    <SentimentLabel.BULLISH: 'Bullish'>
+    >>> get_sentiment_indicator(-0.3).hex_color
+    '#D50000'
+    >>> get_sentiment_indicator(0.0).color
+    <SentimentColor.GRAY: 'gray'>
+    """
+    return _default_mapper.score_to_indicator(score)
+
+
+def get_legend() -> List[Dict[str, str]]:
+    """Return the colour legend used throughout the application."""
+    return _default_mapper.get_legend()
diff --git a/temp_backup/src/anomaly_detector.py b/temp_backup/src/anomaly_detector.py
new file mode 100644
index 00000000..c493ed36
--- /dev/null
+++ b/temp_backup/src/anomaly_detector.py
@@ -0,0 +1,818 @@
+"""
+Anomaly Detector module - Detects abnormal spikes in trade volume or social sentiment
+using statistical methods (Z-Score) and Machine Learning (Isolation Forest) to identify
+outliers and complex pump-and-dump patterns.
+"""
+
+from src.utils.logger import setup_logger
+from src.utils.metrics import ANOMALIES_DETECTED_TOTAL
+from typing import List, Dict, Any, Tuple, Optional, Union
+from datetime import datetime, timedelta
+from collections import deque
+import numpy as np
+from dataclasses import dataclass, field
+from sklearn.ensemble import IsolationForest
+import joblib
+import os
+import json
+
+logger = setup_logger(__name__)
+
+
+@dataclass
+class AnomalyResult:
+    """Result of anomaly detection"""
+
+    is_anomaly: bool
+    severity_score: float  # 0.0 - 1.0
+    metric_name: str
+    current_value: float
+    baseline_mean: float
+    baseline_std: float
+    z_score: float
+    timestamp: datetime
+    ml_anomaly_score: Optional[float] = None  # Isolation Forest anomaly score
+    ml_is_anomaly: Optional[bool] = None  # Isolation Forest prediction
+
+    def to_dict(self) -> Dict[str, Any]:
+        result = {
+            "is_anomaly": self.is_anomaly,
+            "severity_score": self.severity_score,
+            "metric_name": self.metric_name,
+            "current_value": self.current_value,
+            "baseline_mean": self.baseline_mean,
+            "baseline_std": self.baseline_std,
+            "z_score": self.z_score,
+            "timestamp": self.timestamp.isoformat(),
+        }
+        if self.ml_anomaly_score is not None:
+            result["ml_anomaly_score"] = self.ml_anomaly_score
+            result["ml_is_anomaly"] = self.ml_is_anomaly
+        return result
+
+
+@dataclass
+class MultiDimensionalAnomalyResult:
+    """Result for multi-dimensional anomaly detection using Isolation Forest"""
+    
+    is_anomaly: bool
+    anomaly_score: float  # Lower = more anomalous (typical for Isolation Forest)
+    severity_score: float  # 0.0 - 1.0
+    features_used: List[str]
+    feature_values: Dict[str, float]
+    timestamp: datetime
+    
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "is_anomaly": self.is_anomaly,
+            "anomaly_score": self.anomaly_score,
+            "severity_score": self.severity_score,
+            "features_used": self.features_used,
+            "feature_values": self.feature_values,
+            "timestamp": self.timestamp.isoformat(),
+        }
+
+
+class IsolationForestDetector:
+    """
+    ML-based anomaly detector using Isolation Forest algorithm.
+    Detects multi-dimensional anomalies that might be missed by univariate methods.
+    """
+    
+    DEFAULT_CONTAMINATION = 0.1  # Expected proportion of anomalies (10%)
+    DEFAULT_N_ESTIMATORS = 100
+    DEFAULT_MAX_SAMPLES = 'auto'
+    DEFAULT_FEATURES = ['volume', 'sentiment', 'volume_change_rate', 'sentiment_change_rate']
+    
+    def __init__(
+        self,
+        contamination: float = None,
+        n_estimators: int = None,
+        max_samples: Union[str, int] = 'auto',
+        random_state: int = 42,
+        feature_columns: List[str] = None
+    ):
+        """
+        Initialize Isolation Forest detector.
+        
+        Args:
+            contamination: Expected proportion of anomalies (0.0 to 0.5)
+            n_estimators: Number of base estimators in the ensemble
+            max_samples: Number of samples to draw for training
+            random_state: Random seed for reproducibility
+            feature_columns: List of feature names to use
+        """
+        self.contamination = contamination or self.DEFAULT_CONTAMINATION
+        self.n_estimators = n_estimators or self.DEFAULT_N_ESTIMATORS
+        self.max_samples = max_samples
+        self.random_state = random_state
+        self.feature_columns = feature_columns or self.DEFAULT_FEATURES
+        
+        self.model = IsolationForest(
+            contamination=self.contamination,
+            n_estimators=self.n_estimators,
+            max_samples=self.max_samples,
+            random_state=self.random_state,
+            verbose=0
+        )
+        
+        self.is_trained = False
+        self.training_data = deque(maxlen=1000)  # Store recent data for retraining
+        self.min_training_samples = 50  # Minimum samples needed for training
+        
+        logger.info(
+            f"IsolationForestDetector initialized with contamination={self.contamination}, "
+            f"n_estimators={self.n_estimators}, features={self.feature_columns}"
+        )
+    
+    def _extract_features(
+        self,
+        volume: float,
+        sentiment: float,
+        volume_history: List[float] = None,
+        sentiment_history: List[float] = None
+    ) -> np.ndarray:
+        """
+        Extract feature vector for anomaly detection.
+        
+        Args:
+            volume: Current volume value
+            sentiment: Current sentiment value
+            volume_history: Historical volume values for rate calculation
+            sentiment_history: Historical sentiment values for rate calculation
+            
+        Returns:
+            Feature vector as numpy array
+        """
+        features = {}
+        
+        # Basic features
+        features['volume'] = volume
+        features['sentiment'] = sentiment
+        
+        # Rate of change features (if history available)
+        if volume_history and len(volume_history) >= 2:
+            volume_change_rate = (volume - volume_history[-1]) / (volume_history[-1] + 1e-10)
+            features['volume_change_rate'] = np.clip(volume_change_rate, -10, 10)  # Cap extreme values
+        else:
+            features['volume_change_rate'] = 0.0
+            
+        if sentiment_history and len(sentiment_history) >= 2:
+            sentiment_change_rate = (sentiment - sentiment_history[-1]) / (abs(sentiment_history[-1]) + 1e-10)
+            features['sentiment_change_rate'] = np.clip(sentiment_change_rate, -5, 5)
+        else:
+            features['sentiment_change_rate'] = 0.0
+        
+        # Interaction feature (volume * sentiment) - captures pump-and-dump patterns
+        features['volume_sentiment_product'] = volume * (sentiment + 1)  # Shift sentiment to positive range
+        
+        # Return only configured features
+        feature_vector = [features[f] for f in self.feature_columns if f in features]
+        
+        # Pad with zeros if some features are missing
+        while len(feature_vector) < len(self.feature_columns):
+            feature_vector.append(0.0)
+        
+        return np.array(feature_vector).reshape(1, -1)
+    
+    def train(self, historical_data: List[Dict[str, float]]) -> bool:
+        """
+        Train the Isolation Forest model on historical data.
+        
+        Args:
+            historical_data: List of dictionaries containing historical data points
+                           each with 'volume' and 'sentiment' keys at minimum
+            
+        Returns:
+            bool: True if training successful, False otherwise
+        """
+        if len(historical_data) < self.min_training_samples:
+            logger.warning(
+                f"Insufficient data for training: {len(historical_data)}/{self.min_training_samples}"
+            )
+            return False
+        
+        # Extract features from historical data
+        features = []
+        for i, point in enumerate(historical_data):
+            # Use previous points for rate calculation
+            volume_history = [p['volume'] for p in historical_data[max(0, i-5):i]]
+            sentiment_history = [p['sentiment'] for p in historical_data[max(0, i-5):i]]
+            
+            feature_vec = self._extract_features(
+                point['volume'],
+                point['sentiment'],
+                volume_history,
+                sentiment_history
+            )
+            features.append(feature_vec.flatten())
+        
+        X_train = np.array(features)
+        
+        # Train the model
+        try:
+            self.model.fit(X_train)
+            self.is_trained = True
+            logger.info(f"Isolation Forest trained successfully on {len(historical_data)} samples")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to train Isolation Forest: {e}")
+            return False
+    
+    def detect_anomaly(
+        self,
+        volume: float,
+        sentiment: float,
+        volume_history: List[float] = None,
+        sentiment_history: List[float] = None
+    ) -> Optional[MultiDimensionalAnomalyResult]:
+        """
+        Detect anomaly in the current data point.
+        
+        Args:
+            volume: Current volume
+            sentiment: Current sentiment
+            volume_history: Historical volumes for context
+            sentiment_history: Historical sentiments for context
+            
+        Returns:
+            MultiDimensionalAnomalyResult if model is trained, None otherwise
+        """
+        if not self.is_trained:
+            logger.debug("Isolation Forest not trained yet, skipping detection")
+            return None
+        
+        # Extract features
+        features = self._extract_features(volume, sentiment, volume_history, sentiment_history)
+        
+        # Predict anomaly (-1 for anomaly, 1 for normal)
+        prediction = self.model.predict(features)[0]
+        anomaly_score = self.model.score_samples(features)[0]  # Lower = more anomalous
+        
+        is_anomaly = prediction == -1
+        
+        # Calculate severity score (0-1, higher = more severe)
+        # Convert anomaly score to severity (anomaly scores are typically negative)
+        # Map typical range (-0.5 to 0) to severity (0 to 1)
+        normalized_score = np.clip(-anomaly_score * 2, 0, 1)
+        severity_score = normalized_score if is_anomaly else 0.0
+        
+        if is_anomaly:
+            ANOMALIES_DETECTED_TOTAL.labels(metric_name="ml_multi_dimensional").inc()
+            logger.info(f"ML anomaly detected! Score: {anomaly_score:.3f}, Severity: {severity_score:.3f}")
+        
+        # Create feature value dictionary for result
+        feature_values = {
+            'volume': volume,
+            'sentiment': sentiment,
+            'volume_change_rate': float(features[0][2]) if features.shape[1] > 2 else 0.0,
+            'sentiment_change_rate': float(features[0][3]) if features.shape[1] > 3 else 0.0
+        }
+        
+        return MultiDimensionalAnomalyResult(
+            is_anomaly=is_anomaly,
+            anomaly_score=float(anomaly_score),
+            severity_score=severity_score,
+            features_used=self.feature_columns,
+            feature_values=feature_values,
+            timestamp=datetime.utcnow()
+        )
+    
+    def add_training_point(self, volume: float, sentiment: float):
+        """
+        Add a data point to the training buffer for future retraining.
+        
+        Args:
+            volume: Volume value
+            sentiment: Sentiment value
+        """
+        self.training_data.append({
+            'volume': volume,
+            'sentiment': sentiment,
+            'timestamp': datetime.utcnow()
+        })
+        
+        # Auto-retrain every 200 new points if enough data
+        if len(self.training_data) >= 200 and len(self.training_data) % 50 == 0:
+            self.train(list(self.training_data))
+    
+    def save_model(self, filepath: str):
+        """Save the trained model to disk."""
+        if self.is_trained:
+            joblib.dump(self.model, filepath)
+            # Save configuration
+            config = {
+                'contamination': self.contamination,
+                'n_estimators': self.n_estimators,
+                'max_samples': self.max_samples,
+                'feature_columns': self.feature_columns
+            }
+            with open(f"{filepath}.config.json", 'w') as f:
+                json.dump(config, f)
+            logger.info(f"Model saved to {filepath}")
+    
+    def load_model(self, filepath: str) -> bool:
+        """Load a trained model from disk."""
+        if os.path.exists(filepath):
+            self.model = joblib.load(filepath)
+            self.is_trained = True
+            logger.info(f"Model loaded from {filepath}")
+            return True
+        return False
+
+
+class AnomalyDetector:
+    """
+    Statistical anomaly detector using Z-Score methodology to identify outliers
+    in time-series data for trade volume and social sentiment metrics.
+    
+    Now enhanced with Isolation Forest for multi-dimensional anomaly detection.
+    """
+
+    # Default configuration
+    DEFAULT_WINDOW_SIZE_HOURS = 24
+    DEFAULT_Z_THRESHOLD = 2.5  # Standard deviations from mean
+    MIN_DATA_POINTS = 10  # Minimum data points required for reliable statistics
+    DEFAULT_USE_ML = True  # Enable ML-based detection by default
+    DEFAULT_ML_CONTAMINATION = 0.1  # 10% expected anomalies
+
+    def __init__(
+        self,
+        window_size_hours: int = None,
+        z_threshold: float = None,
+        use_ml: bool = None,
+        ml_contamination: float = None,
+        enable_comparison_mode: bool = False
+    ):
+        """
+        Initialize the anomaly detector.
+        
+        Args:
+            window_size_hours: Size of rolling window in hours (default: 24)
+            z_threshold: Z-score threshold for anomaly detection (default: 2.5)
+            use_ml: Enable Isolation Forest for multi-dimensional detection
+            ml_contamination: Expected proportion of anomalies for ML model
+            enable_comparison_mode: Run both Z-score and ML and compare results
+        """
+        self.window_size_hours = window_size_hours or self.DEFAULT_WINDOW_SIZE_HOURS
+        self.z_threshold = z_threshold or self.DEFAULT_Z_THRESHOLD
+        self.use_ml = use_ml if use_ml is not None else self.DEFAULT_USE_ML
+        self.enable_comparison_mode = enable_comparison_mode
+        
+        # Data storage for rolling windows
+        self.volume_data = deque(maxlen=self.window_size_hours * 4)
+        self.sentiment_data = deque(maxlen=self.window_size_hours * 4)
+        self.timestamp_data = deque(maxlen=self.window_size_hours * 4)
+        
+        # Initialize ML detector if enabled
+        self.ml_detector = None
+        if self.use_ml:
+            self.ml_detector = IsolationForestDetector(
+                contamination=ml_contamination or self.DEFAULT_ML_CONTAMINATION
+            )
+        
+        # Historical storage for ML training
+        self.historical_points = []
+        
+        logger.info(
+            f"AnomalyDetector initialized with {self.window_size_hours}h window, "
+            f"Z-threshold: {self.z_threshold}, ML-enabled: {self.use_ml}, "
+            f"Comparison mode: {self.enable_comparison_mode}"
+        )
+    
+    def _calculate_statistics(self, data_points: List[float]) -> Tuple[float, float]:
+        """
+        Calculate mean and standard deviation for a list of data points.
+        
+        Args:
+            data_points: List of numerical values
+            
+        Returns:
+            Tuple of (mean, standard_deviation)
+        """
+        if len(data_points) < self.MIN_DATA_POINTS:
+            raise ValueError(
+                f"Need at least {self.MIN_DATA_POINTS} data points for reliable statistics"
+            )
+        
+        mean = np.mean(data_points)
+        std = np.std(data_points, ddof=1)
+        
+        if std == 0:
+            std = 1e-10
+        
+        return float(mean), float(std)
+    
+    def _calculate_z_score(self, value: float, mean: float, std: float) -> float:
+        """Calculate Z-score for a value given mean and standard deviation."""
+        return (value - mean) / std
+    
+    def _calculate_severity_score(self, z_score: float) -> float:
+        """
+        Convert Z-score to severity score (0.0-1.0).
+        Higher absolute Z-scores result in higher severity.
+        """
+        abs_z = abs(z_score)
+        
+        if abs_z <= self.z_threshold:
+            return 0.0
+        elif abs_z <= self.z_threshold * 2:
+            return (abs_z - self.z_threshold) / self.z_threshold
+        else:
+            return 1.0
+    
+    def _clean_old_data(self, current_timestamp: datetime):
+        """Remove data points older than the window size."""
+        cutoff_time = current_timestamp - timedelta(hours=self.window_size_hours)
+        
+        while (
+            self.timestamp_data
+            and len(self.timestamp_data) > 0
+            and self.timestamp_data[0] < cutoff_time
+        ):
+            self.timestamp_data.popleft()
+            if self.volume_data:
+                self.volume_data.popleft()
+            if self.sentiment_data:
+                self.sentiment_data.popleft()
+    
+    def add_data_point(
+        self, volume: float, sentiment_score: float, timestamp: datetime = None
+    ):
+        """Add a new data point to the rolling window."""
+        if timestamp is None:
+            timestamp = datetime.utcnow()
+        
+        self._clean_old_data(timestamp)
+        
+        self.timestamp_data.append(timestamp)
+        self.volume_data.append(float(volume))
+        self.sentiment_data.append(float(sentiment_score))
+        
+        # Store for ML training
+        self.historical_points.append({
+            'volume': float(volume),
+            'sentiment': float(sentiment_score),
+            'timestamp': timestamp
+        })
+        
+        # Keep only last 1000 points
+        if len(self.historical_points) > 1000:
+            self.historical_points = self.historical_points[-1000:]
+        
+        # Train ML model if we have enough data and it's not trained yet
+        if self.ml_detector and not self.ml_detector.is_trained:
+            if len(self.historical_points) >= self.ml_detector.min_training_samples:
+                self.ml_detector.train(self.historical_points)
+        
+        # Add to ML training buffer
+        if self.ml_detector:
+            self.ml_detector.add_training_point(float(volume), float(sentiment_score))
+        
+        logger.debug(f"Added data point: volume={volume}, sentiment={sentiment_score}")
+    
+    def detect_volume_anomaly(
+        self, current_volume: float, timestamp: datetime = None
+    ) -> AnomalyResult:
+        """Detect anomalies in trade volume data."""
+        if timestamp is None:
+            timestamp = datetime.utcnow()
+        
+        try:
+            baseline_values = list(self.volume_data)
+            if len(baseline_values) < self.MIN_DATA_POINTS:
+                return AnomalyResult(
+                    is_anomaly=False,
+                    severity_score=0.0,
+                    metric_name="volume",
+                    current_value=current_volume,
+                    baseline_mean=0.0,
+                    baseline_std=0.0,
+                    z_score=0.0,
+                    timestamp=timestamp,
+                )
+            
+            mean, std = self._calculate_statistics(baseline_values)
+            z_score = self._calculate_z_score(current_volume, mean, std)
+            severity = self._calculate_severity_score(z_score)
+            is_anomaly = abs(z_score) > self.z_threshold
+            
+            if is_anomaly:
+                ANOMALIES_DETECTED_TOTAL.labels(metric_name="volume").inc()
+            
+            return AnomalyResult(
+                is_anomaly=is_anomaly,
+                severity_score=severity,
+                metric_name="volume",
+                current_value=current_volume,
+                baseline_mean=mean,
+                baseline_std=std,
+                z_score=z_score,
+                timestamp=timestamp,
+            )
+        
+        except Exception as e:
+            logger.error(f"Error detecting volume anomaly: {e}")
+            return AnomalyResult(
+                is_anomaly=False,
+                severity_score=0.0,
+                metric_name="volume",
+                current_value=current_volume,
+                baseline_mean=0.0,
+                baseline_std=0.0,
+                z_score=0.0,
+                timestamp=timestamp,
+            )
+    
+    def detect_sentiment_anomaly(
+        self, current_sentiment: float, timestamp: datetime = None
+    ) -> AnomalyResult:
+        """Detect anomalies in social sentiment data."""
+        if timestamp is None:
+            timestamp = datetime.utcnow()
+        
+        try:
+            baseline_values = list(self.sentiment_data)
+            if len(baseline_values) < self.MIN_DATA_POINTS:
+                return AnomalyResult(
+                    is_anomaly=False,
+                    severity_score=0.0,
+                    metric_name="sentiment",
+                    current_value=current_sentiment,
+                    baseline_mean=0.0,
+                    baseline_std=0.0,
+                    z_score=0.0,
+                    timestamp=timestamp,
+                )
+            
+            mean, std = self._calculate_statistics(baseline_values)
+            z_score = self._calculate_z_score(current_sentiment, mean, std)
+            severity = self._calculate_severity_score(z_score)
+            is_anomaly = abs(z_score) > self.z_threshold
+            
+            if is_anomaly:
+                ANOMALIES_DETECTED_TOTAL.labels(metric_name="sentiment").inc()
+            
+            return AnomalyResult(
+                is_anomaly=is_anomaly,
+                severity_score=severity,
+                metric_name="sentiment",
+                current_value=current_sentiment,
+                baseline_mean=mean,
+                baseline_std=std,
+                z_score=z_score,
+                timestamp=timestamp,
+            )
+        
+        except Exception as e:
+            logger.error(f"Error detecting sentiment anomaly: {e}")
+            return AnomalyResult(
+                is_anomaly=False,
+                severity_score=0.0,
+                metric_name="sentiment",
+                current_value=current_sentiment,
+                baseline_mean=0.0,
+                baseline_std=0.0,
+                z_score=0.0,
+                timestamp=timestamp,
+            )
+    
+    def detect_multi_dimensional_anomaly(
+        self, volume: float, sentiment: float, timestamp: datetime = None
+    ) -> Optional[MultiDimensionalAnomalyResult]:
+        """
+        Detect anomalies using Isolation Forest (multi-dimensional).
+        
+        Returns:
+            MultiDimensionalAnomalyResult or None if ML not enabled/trained
+        """
+        if not self.ml_detector or not self.ml_detector.is_trained:
+            return None
+        
+        volume_history = list(self.volume_data)[-10:] if self.volume_data else []
+        sentiment_history = list(self.sentiment_data)[-10:] if self.sentiment_data else []
+        
+        return self.ml_detector.detect_anomaly(
+            volume, sentiment, volume_history, sentiment_history
+        )
+    
+    def detect_anomalies(
+        self, volume: float, sentiment_score: float, timestamp: datetime = None
+    ) -> Dict[str, Any]:
+        """
+        Detect anomalies for both volume and sentiment simultaneously.
+        
+        Now enhanced with ML-based multi-dimensional detection.
+        
+        Args:
+            volume: Current trade volume
+            sentiment_score: Current sentiment score
+            timestamp: Timestamp of current data point
+            
+        Returns:
+            Dictionary containing all anomaly detection results
+        """
+        if timestamp is None:
+            timestamp = datetime.utcnow()
+        
+        # Add data point first
+        self.add_data_point(volume, sentiment_score, timestamp)
+        
+        # Detect univariate anomalies
+        volume_result = self.detect_volume_anomaly(volume, timestamp)
+        sentiment_result = self.detect_sentiment_anomaly(sentiment_score, timestamp)
+        
+        results = {
+            'volume_anomaly': volume_result,
+            'sentiment_anomaly': sentiment_result,
+            'timestamp': timestamp,
+            'ml_anomaly': None
+        }
+        
+        # Detect multi-dimensional anomaly if ML is enabled
+        if self.use_ml:
+            ml_result = self.detect_multi_dimensional_anomaly(volume, sentiment_score, timestamp)
+            results['ml_anomaly'] = ml_result
+            
+            # Enhanced detection: Combine signals for better accuracy
+            if ml_result and ml_result.is_anomaly:
+                # Log when ML detects something Z-score might miss
+                if not (volume_result.is_anomaly or sentiment_result.is_anomaly):
+                    logger.warning(
+                        f"ML detected multi-dimensional anomaly missed by univariate methods! "
+                        f"Volume: {volume:.2f}, Sentiment: {sentiment_score:.3f}, "
+                        f"ML Score: {ml_result.anomaly_score:.3f}"
+                    )
+                
+                # Boost severity if multiple methods agree
+                if volume_result.is_anomaly or sentiment_result.is_anomaly:
+                    combined_severity = max(
+                        volume_result.severity_score,
+                        sentiment_result.severity_score,
+                        ml_result.severity_score
+                    )
+                    results['combined_severity'] = combined_severity
+                    results['is_anomaly_consensus'] = True
+        
+        # Comparison mode: Run both and generate comparison report
+        if self.enable_comparison_mode and self.ml_detector and self.ml_detector.is_trained:
+            results['comparison'] = self._compare_detection_methods(
+                volume_result, sentiment_result, ml_result
+            )
+        
+        return results
+    
+    def _compare_detection_methods(
+        self,
+        volume_result: AnomalyResult,
+        sentiment_result: AnomalyResult,
+        ml_result: Optional[MultiDimensionalAnomalyResult]
+    ) -> Dict[str, Any]:
+        """
+        Compare performance between Z-score and Isolation Forest methods.
+        """
+        z_score_anomaly = volume_result.is_anomaly or sentiment_result.is_anomaly
+        ml_anomaly = ml_result.is_anomaly if ml_result else False
+        
+        comparison = {
+            'z_score_detected': z_score_anomaly,
+            'ml_detected': ml_anomaly,
+            'agreement': z_score_anomaly == ml_anomaly,
+            'z_score_severity': max(volume_result.severity_score, sentiment_result.severity_score),
+            'ml_severity': ml_result.severity_score if ml_result else 0.0,
+        }
+        
+        # Analysis of detection differences
+        if z_score_anomaly and not ml_anomaly:
+            comparison['analysis'] = "Z-score detected anomaly but ML didn't - possible false positive from simple outlier"
+        elif not z_score_anomaly and ml_anomaly:
+            comparison['analysis'] = "ML detected complex multi-dimensional anomaly missed by univariate Z-score"
+        elif z_score_anomaly and ml_anomaly:
+            comparison['analysis'] = "Both methods agree - high confidence anomaly detected"
+        else:
+            comparison['analysis'] = "No anomaly detected by either method"
+        
+        return comparison
+    
+    def get_window_stats(self) -> Dict[str, Any]:
+        """Get current window statistics for monitoring/debugging."""
+        volume_list = list(self.volume_data)
+        sentiment_list = list(self.sentiment_data)
+        
+        stats = {
+            "window_size_hours": self.window_size_hours,
+            "z_threshold": self.z_threshold,
+            "data_points_count": len(self.timestamp_data),
+            "use_ml": self.use_ml,
+            "volume_stats": {},
+            "sentiment_stats": {},
+        }
+        
+        if volume_list:
+            stats["volume_stats"] = {
+                "count": len(volume_list),
+                "mean": float(np.mean(volume_list)),
+                "std": float(np.std(volume_list, ddof=1)),
+                "min": float(np.min(volume_list)),
+                "max": float(np.max(volume_list)),
+            }
+        
+        if sentiment_list:
+            stats["sentiment_stats"] = {
+                "count": len(sentiment_list),
+                "mean": float(np.mean(sentiment_list)),
+                "std": float(np.std(sentiment_list, ddof=1)),
+                "min": float(np.min(sentiment_list)),
+                "max": float(np.max(sentiment_list)),
+            }
+        
+        # Add ML stats if available
+        if self.ml_detector:
+            stats["ml"] = {
+                "is_trained": self.ml_detector.is_trained,
+                "contamination": self.ml_detector.contamination,
+                "training_samples": len(self.ml_detector.training_data),
+                "features": self.ml_detector.feature_columns
+            }
+        
+        return stats
+    
+    def reset(self):
+        """Reset the detector by clearing all stored data."""
+        self.volume_data.clear()
+        self.sentiment_data.clear()
+        self.timestamp_data.clear()
+        self.historical_points.clear()
+        if self.ml_detector:
+            self.ml_detector = IsolationForestDetector(
+                contamination=self.ml_detector.contamination
+            )
+        logger.info("AnomalyDetector reset completed")
+    
+    def save_ml_model(self, filepath: str):
+        """Save the ML model to disk."""
+        if self.ml_detector:
+            self.ml_detector.save_model(filepath)
+    
+    def load_ml_model(self, filepath: str) -> bool:
+        """Load a pre-trained ML model."""
+        if self.ml_detector:
+            return self.ml_detector.load_model(filepath)
+        return False
+
+
+# Convenience functions for easy usage
+def create_detector(
+    window_size_hours: int = 24,
+    z_threshold: float = 2.5,
+    use_ml: bool = True,
+    ml_contamination: float = 0.1,
+    enable_comparison_mode: bool = False
+) -> AnomalyDetector:
+    """
+    Factory function to create an AnomalyDetector instance.
+    
+    Args:
+        window_size_hours: Size of rolling window in hours
+        z_threshold: Z-score threshold for anomaly detection
+        use_ml: Enable ML-based multi-dimensional detection
+        ml_contamination: Expected proportion of anomalies (0.0-0.5)
+        enable_comparison_mode: Compare Z-score vs ML performance
+        
+    Returns:
+        Configured AnomalyDetector instance
+    """
+    return AnomalyDetector(
+        window_size_hours=window_size_hours,
+        z_threshold=z_threshold,
+        use_ml=use_ml,
+        ml_contamination=ml_contamination,
+        enable_comparison_mode=enable_comparison_mode
+    )
+
+
+def detect_spike(
+    current_value: float, baseline_values: List[float], z_threshold: float = 2.5
+) -> Tuple[bool, float]:
+    """
+    Simple spike detection for a single value against baseline.
+    
+    Args:
+        current_value: Value to test
+        baseline_values: Historical baseline values
+        z_threshold: Z-score threshold for anomaly detection
+        
+    Returns:
+        Tuple of (is_anomaly, severity_score)
+    """
+    if len(baseline_values) < 10:
+        return False, 0.0
+    
+    detector = AnomalyDetector(z_threshold=z_threshold, use_ml=False)
+    
+    dummy_timestamp = datetime.utcnow()
+    for value in baseline_values:
+        detector.add_data_point(value, 0.0, dummy_timestamp)
+    
+    result = detector.detect_volume_anomaly(current_value, dummy_timestamp)
+    return result.is_anomaly, result.severity_score
\ No newline at end of file
diff --git a/temp_backup/src/api/ingestion_quality_routes.py b/temp_backup/src/api/ingestion_quality_routes.py
new file mode 100644
index 00000000..d67b6cac
--- /dev/null
+++ b/temp_backup/src/api/ingestion_quality_routes.py
@@ -0,0 +1,59 @@
+"""FastAPI routes for triggering ingestion quality checks."""
+
+from __future__ import annotations
+
+from typing import Any, Dict, Optional
+from datetime import datetime
+
+from fastapi import APIRouter, Depends
+from pydantic import BaseModel
+
+from src.ingestion.stellar_ingestion_checks import run_all_checks
+
+
+router = APIRouter()
+
+
+class IngestionQualityRunRequest(BaseModel):
+    network: str = "testnet"  # "testnet" only in MVP
+    asset: str = "XLM"
+    ingestion_lag_seconds: int = 300
+    duplicate_window_hours: int = 24
+    drift_compare_window_hours: int = 24
+    drift_ratio_threshold: float = 0.05
+    drift_hours: Optional[str] = "24,48"  # comma-separated
+    manual_run_id: Optional[str] = None
+
+
+class IngestionQualityRunResponse(BaseModel):
+    schema_version: int
+    generated_at: str
+    network: str
+    asset: str
+    manual_run_id: Optional[str] = None
+    thresholds: Dict[str, Any]
+    summary: Dict[str, Any]
+    findings: list[Dict[str, Any]]
+    exit_code: int
+
+
+@router.post("/ingestion/quality/run", response_model=IngestionQualityRunResponse)
+async def run_ingestion_quality(req: IngestionQualityRunRequest) -> IngestionQualityRunResponse:
+    hours_list = [int(x.strip()) for x in (req.drift_hours or "").split(",") if x.strip()]
+    if not hours_list:
+        hours_list = [24, 48]
+
+    result = run_all_checks(
+        network=req.network,
+        asset=req.asset.upper(),
+        ingestion_lag_seconds=req.ingestion_lag_seconds,
+        dup_window_hours=req.duplicate_window_hours,
+        drift_compare_window_hours=req.drift_compare_window_hours,
+        drift_ratio_threshold=req.drift_ratio_threshold,
+        hours_list=hours_list,
+        report_dir="./data/ingestion_reports",
+        manual_run_id=req.manual_run_id,
+    )
+
+    return IngestionQualityRunResponse(**result)
+
diff --git a/temp_backup/src/api/server.py b/temp_backup/src/api/server.py
new file mode 100644
index 00000000..e2bcbb07
--- /dev/null
+++ b/temp_backup/src/api/server.py
@@ -0,0 +1,661 @@
+"""
+FastAPI server to expose sentiment analysis as an HTTP API
+for the Node.js backend to consume.
+"""
+
+from fastapi import FastAPI, HTTPException, Request, Response, Query
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, ConfigDict
+from typing import Dict, Any, Optional, List
+from datetime import datetime
+
+# Import your existing SentimentAnalyzer
+import sys
+import os
+
+# Add parent directory to path to import from src
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+
+from sentiment import SentimentAnalyzer
+from src.utils.logger import setup_logger, correlation_id_ctx, generate_correlation_id
+from src.utils.metrics import API_FAILURES_TOTAL, generate_latest, CONTENT_TYPE_LATEST
+from src.security import (
+    security_config,
+    setup_security_middleware,
+    setup_rate_limiter,
+    get_rate_limit_decorator,
+)
+from src.ml.retraining_pipeline import run_retraining, get_last_run_status
+from src.ml.model_registry import get_registry_status
+from src.analytics.correlation_engine import CorrelationEngine
+from src.db import PostgresService
+from src.ingestion.stellar_ingestion_checks import run_all_checks
+
+from src.analytics.sentiment_indicators import SentimentIndicatorMapper, get_legend as sentiment_legend
+
+_indicator_mapper = SentimentIndicatorMapper()
+
+# Initialize structured logger
+logger = setup_logger(__name__)
+
+# Initialize FastAPI app
+app = FastAPI(
+    title="Sentiment Analysis API",
+    description="Exposes sentiment analysis for Node.js backend integration",
+    version="1.0.0",
+)
+
+# Setup security middleware (API key authentication)
+setup_security_middleware(app)
+
+# Setup rate limiting
+limiter = security_config.limiter
+if limiter:
+    setup_rate_limiter(app, limiter)
+    logger.info(f"Rate limiting enabled: {security_config.rate_limit_default}")
+else:
+    logger.warning("Rate limiting is disabled")
+
+# Add CORS middleware to allow requests from Node.js backend
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=[
+        "http://localhost:3000",
+        "http://localhost:3001",
+    ],  # Adjust for your NestJS ports
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+@app.middleware("http")
+async def metrics_and_logging_middleware(request: Request, call_next):
+    corr_id = request.headers.get("X-Correlation-ID", generate_correlation_id())
+    correlation_id_ctx.set(corr_id)
+    try:
+        response = await call_next(request)
+        if response.status_code >= 500:
+            API_FAILURES_TOTAL.labels(method=request.method, endpoint=request.url.path).inc()
+        response.headers["X-Correlation-ID"] = corr_id
+        return response
+    except Exception as e:
+        API_FAILURES_TOTAL.labels(method=request.method, endpoint=request.url.path).inc()
+        logger.error("Unhandled exception during request processing", exc_info=True)
+        raise
+
+# Initialize your existing SentimentAnalyzer
+sentiment_analyzer = SentimentAnalyzer()
+
+# Ingestion quality check routes
+from src.api.ingestion_quality_routes import router as ingestion_quality_router
+app.include_router(ingestion_quality_router)
+
+
+try:
+    postgres_service = PostgresService()
+except Exception as exc:
+    postgres_service = None
+    logger.warning("PostgreSQL service unavailable for /news endpoint: %s", exc)
+
+
+# ---------------------------------------------------------------------------
+# Request/Response models
+# ---------------------------------------------------------------------------
+
+class SentimentIndicatorResponse(BaseModel):
+    """Visual indicator fields attached to every sentiment-bearing response."""
+
+    score: float
+    color: str  # "green" | "red" | "gray"
+    hex_color: str  # CSS hex, e.g. "#00C853"
+    label: str  # "Bullish" | "Bearish" | "Neutral"
+    display_text: str  # e.g. "0.85 Bullish"
+
+
+class AnalyzeRequest(BaseModel):
+    text: str
+    asset: Optional[str] = None  # Optional asset filter
+
+
+class AnalyzeResponse(BaseModel):
+    sentiment: float  # compound_score from SentimentResult
+    asset_codes: List[str] = []  # Asset codes found in text
+    sentiment_label: str = ""  # positive/negative/neutral
+    indicator: Optional[SentimentIndicatorResponse] = None  # Visual colour indicator
+
+
+class AssetAnalysisResponse(BaseModel):
+    asset: str
+    sentiment: float
+    sentiment_label: str
+    analysis_count: int
+    asset_distribution: Dict[str, int] = {}
+    sentiment_distribution: Dict[str, float] = {}
+    indicator: Optional[SentimentIndicatorResponse] = None  # Visual colour indicator
+
+
+class HealthResponse(BaseModel):
+    status: str
+    timestamp: str
+    service: str
+
+
+class NewsArticleResponse(BaseModel):
+    article_id: str
+    title: str
+    content: Optional[str] = None
+    summary: Optional[str] = None
+    source: Optional[str] = None
+    url: Optional[str] = None
+    published_at: Optional[str] = None
+    primary_asset: Optional[str] = None
+    asset_codes: List[str] = []
+    categories: List[str] = []
+    keywords: List[str] = []
+    detected_entities: List[str] = []
+    sentiment_score: Optional[float] = None  # Raw compound score stored in DB
+    sentiment_label: Optional[str] = None  # positive / negative / neutral
+    indicator: Optional[SentimentIndicatorResponse] = None  # Visual colour indicator
+
+@app.get("/metrics")
+async def metrics():
+    """Expose Prometheus metrics"""
+    return Response(content=generate_latest(), media_type=CONTENT_TYPE_LATEST)
+
+@app.get("/")
+@limiter.limit("20/minute") if limiter else lambda x: x
+async def root(request: Request) -> Dict[str, Any]:
+    """Root endpoint with API information"""
+    return {
+        "service": "Sentiment Analysis API",
+        "version": "1.0.0",
+        "endpoints": {
+            "GET /health": "Health check (no auth required)",
+            "GET /metrics": "Prometheus metrics (no auth required)",
+            "GET /news": "Get recent news with optional ?entity=... filter (requires X-API-Key header)",
+            "POST /analyze": "Analyze text sentiment (requires X-API-Key header)",
+            "GET /analyze": "Get asset-specific sentiment analysis (requires X-API-Key header)",
+            "POST /analyze-batch": "Batch analyze multiple texts (requires X-API-Key header)",
+            "GET /sentiment/legend": "Get colour legend for sentiment indicators (no auth required)",
+        },
+        "note": "Returns sentiment score between -1 (negative) and 1 (positive)",
+        "security": "All endpoints except /health and /metrics require X-API-Key header",
+    }
+
+
+@app.get("/health", response_model=HealthResponse)
+@limiter.limit("30/minute") if limiter else lambda x: x
+async def health_check(request: Request) -> HealthResponse:
+
+    """Health check endpoint for monitoring"""
+    return HealthResponse(
+        status="healthy",
+        timestamp=datetime.now().isoformat(),
+        service="sentiment-analysis",
+    )
+
+
+@app.get("/news", response_model=List[NewsArticleResponse])
+@limiter.limit("30/minute") if limiter else lambda x: x
+async def get_news(
+    request: Request,
+    limit: int = Query(50, ge=1, le=500),
+    hours: int = Query(24, ge=1, le=168),
+    asset: Optional[str] = Query(None, description="Optional primary asset code filter"),
+    entity: Optional[str] = Query(
+        None,
+        description="Optional detected entity filter (example: Soroban)",
+    ),
+) -> List[NewsArticleResponse]:
+    """Return recent articles with optional asset and entity filters."""
+    if postgres_service is None:
+        raise HTTPException(status_code=503, detail="Database service unavailable")
+
+    try:
+        articles = postgres_service.get_recent_articles(
+            limit=limit,
+            hours=hours,
+            asset=asset,
+            entity=entity,
+        )
+
+        logger.info(
+            "Retrieved %d news articles | hours=%d | asset=%s | entity=%s | client_ip=%s",
+            len(articles),
+            hours,
+            asset,
+            entity,
+            request.client.host,
+        )
+
+        def _build_indicator(
+            score: Optional[float],
+        ) -> Optional[SentimentIndicatorResponse]:
+            if score is None:
+                return None
+            ind = _indicator_mapper.score_to_indicator(score)
+            return SentimentIndicatorResponse(**ind.to_dict())
+
+        return [
+            NewsArticleResponse(
+                article_id=article.article_id,
+                title=article.title,
+                content=article.content,
+                summary=article.summary,
+                source=article.source,
+                url=article.url,
+                published_at=(
+                    article.published_at.isoformat() if article.published_at else None
+                ),
+                primary_asset=article.primary_asset,
+                asset_codes=article.asset_codes or [],
+                categories=article.categories or [],
+                keywords=article.keywords or [],
+                detected_entities=article.detected_entities or [],
+                sentiment_score=article.sentiment_score,
+                sentiment_label=article.sentiment_label,
+                indicator=_build_indicator(article.sentiment_score),
+            )
+            for article in articles
+        ]
+    except Exception as exc:
+        logger.error("Error retrieving news: %s", str(exc), exc_info=True)
+        raise HTTPException(status_code=500, detail="Failed to fetch news articles")
+
+
+@app.post("/analyze", response_model=AnalyzeResponse)
+@limiter.limit("50/minute") if limiter else lambda x: x
+async def analyze_text(body: AnalyzeRequest, request: Request) -> AnalyzeResponse:
+    """
+    Analyze the sentiment of provided text.
+
+    This endpoint connects to your existing SentimentAnalyzer class
+    and returns the compound_score as the sentiment value.
+
+    Args:
+        request: Contains the text to analyze and optional asset filter
+
+    Returns:
+        sentiment: float between -1 and 1
+        asset_codes: List of asset codes found in text
+        sentiment_label: positive/negative/neutral
+    """
+    try:
+        # Validate input
+        if not body.text or not body.text.strip():
+            raise HTTPException(status_code=400, detail="Text cannot be empty")
+
+        # Use your existing SentimentAnalyzer with asset filter
+        result = sentiment_analyzer.analyze(body.text, body.asset)
+
+        logger.info(
+            f"Analyzed text: '{body.text[:50]}...' -> sentiment: {result.compound_score} | "
+            f"asset: {body.asset} | client_ip: {request.client.host}"
+        )
+
+        # Build visual indicator
+        ind = _indicator_mapper.score_to_indicator(result.compound_score)
+
+        # Return enhanced response with asset information
+        return AnalyzeResponse(
+            sentiment=result.compound_score,
+            asset_codes=result.asset_codes,
+            sentiment_label=result.sentiment_label,
+            indicator=SentimentIndicatorResponse(**ind.to_dict()),
+        )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error in sentiment analysis: {str(e)}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
+
+
+@app.get("/analyze", response_model=AssetAnalysisResponse)
+@limiter.limit("30/minute") if limiter else lambda x: x
+async def get_asset_analysis(
+    request: Request,
+    asset: str = Query(..., description="Asset code (e.g., XLM, USDC, BTC)")
+) -> AssetAnalysisResponse:
+    """
+    Get sentiment analysis for a specific asset.
+    
+    This endpoint provides asset-specific sentiment analysis by filtering
+    news and social media content that mentions the specified asset.
+
+    Args:
+        asset: Asset code to analyze (e.g., XLM, USDC, BTC)
+
+    Returns:
+        Asset-specific sentiment analysis with distribution statistics
+    """
+    try:
+        if not asset or not asset.strip():
+            raise HTTPException(status_code=400, detail="Asset code cannot be empty")
+        
+        asset = asset.upper().strip()
+        
+        # For now, return a mock response since we need to integrate with actual data sources
+        # In a real implementation, this would query the database for recent sentiment data
+        # related to the specific asset
+        
+        logger.info(f"Requested asset analysis for: {asset} | client_ip: {request.client.host}")
+        
+        # Mock response - replace with actual database query
+        mock_score = 0.0
+        ind = _indicator_mapper.score_to_indicator(mock_score)
+        return AssetAnalysisResponse(
+            asset=asset,
+            sentiment=mock_score,
+            sentiment_label="neutral",
+            analysis_count=0,
+            asset_distribution={},
+            sentiment_distribution={"positive": 0.0, "negative": 0.0, "neutral": 1.0},
+            indicator=SentimentIndicatorResponse(**ind.to_dict()),
+        )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error in asset analysis: {str(e)}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
+
+
+# Optional: Batch analysis endpoint if needed
+@app.post("/analyze-batch")
+@limiter.limit("10/minute") if limiter else lambda x: x
+async def analyze_batch(request: Request, texts: list[str], asset: Optional[str] = None) -> Dict[str, Any]:
+    """Batch analyze multiple texts with optional asset filter"""
+    try:
+        if not texts:
+            raise HTTPException(status_code=400, detail="Texts list cannot be empty")
+
+        results = sentiment_analyzer.analyze_batch(texts, asset)
+        summary = sentiment_analyzer.get_sentiment_summary(results)
+
+        return {
+            "results": [r.to_dict() for r in results],
+            "summary": summary,
+            "count": len(results),
+            "asset_filter": asset,
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.get("/sentiment/legend")
+async def get_sentiment_legend() -> Dict[str, Any]:
+    """
+    Return the colour legend that frontend clients use to render
+    sentiment badge tooltips.
+
+    No authentication required — purely informational.
+
+    Returns a list of objects with keys:
+    - color       : semantic name ("green" | "red" | "gray")
+    - hex_color   : CSS hex value
+    - label       : human-readable label ("Bullish" | "Bearish" | "Neutral")
+    - description : tooltip copy
+    - score_range : score boundary description
+    """
+    return {
+        "legend": sentiment_legend(),
+        "thresholds": {
+            "bullish": "score >= 0.05",
+            "bearish": "score <= -0.05",
+            "neutral": "-0.05 < score < 0.05",
+        },
+    }
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    # Run the server
+    uvicorn.run(
+        "server:app",
+        host="0.0.0.0",  # Listen on all interfaces
+        port=8000,  # Default FastAPI port
+        reload=True,  # Auto-reload during development
+    )
+
+
+# ---------------------------------------------------------------------------
+# Model retraining endpoints (Issue #454)
+# ---------------------------------------------------------------------------
+
+class RetrainRequest(BaseModel):
+    force: bool = False  # Skip quality gates when True
+
+
+class RetrainResponse(BaseModel):
+    status: str
+    started_at: Optional[str] = None
+    finished_at: Optional[str] = None
+    duration_seconds: Optional[float] = None
+    models: Dict[str, Any] = {}
+    registry: Dict[str, Any] = {}
+    error: Optional[str] = None
+
+
+class ModelStatusResponse(BaseModel):
+    last_run: Dict[str, Any]
+    registry: Dict[str, Any]
+
+
+@app.post("/retrain", response_model=RetrainResponse)
+@limiter.limit("5/minute") if limiter else lambda x: x
+async def trigger_retraining(
+    body: RetrainRequest,
+    request: Request,
+) -> RetrainResponse:
+    """
+    Trigger an immediate model retraining run.
+
+    Runs synchronously in a thread pool so the HTTP response is returned
+    only after retraining completes (or fails). For long-running production
+    retrains, consider making this async with a task queue.
+
+    Requires X-API-Key header.
+    """
+    import asyncio
+
+    logger.info(
+        f"Retraining triggered via API | force={body.force} | "
+        f"client_ip={request.client.host}"
+    )
+
+    loop = asyncio.get_event_loop()
+    result = await loop.run_in_executor(
+        None, lambda: run_retraining(force=body.force)
+    )
+
+    return RetrainResponse(**{k: result.get(k) for k in RetrainResponse.model_fields if k in result})
+
+
+@app.get("/model/status", response_model=ModelStatusResponse)
+@limiter.limit("30/minute") if limiter else lambda x: x
+async def model_status(request: Request) -> ModelStatusResponse:
+    """
+    Return the current model registry state and last retraining run metadata.
+
+    Requires X-API-Key header.
+    """
+    return ModelStatusResponse(
+        last_run=get_last_run_status(),
+        registry=get_registry_status(),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Predictive analytics endpoint (forecast market trends)
+# ---------------------------------------------------------------------------
+
+
+class ForecastResponse(BaseModel):
+    model_config = ConfigDict(protected_namespaces=())
+
+    predicted_trend_24h: str
+    predicted_trend_48h: str
+    confidence_24h: float
+    confidence_48h: float
+    sentiment_velocity: float
+    forecast_score_24h: float
+    forecast_score_48h: float
+    model_backend: str
+    data_points_used: int
+    generated_at: str
+
+
+@app.get("/analytics/forecast", response_model=ForecastResponse)
+@limiter.limit("20/minute") if limiter else lambda x: x
+async def get_forecast(request: Request) -> ForecastResponse:
+    """
+    Predict market trends (Bullish / Bearish / Neutral) for the next 24-48 hours.
+
+    Uses historical sentiment data from *analytics.jsonl* to train a
+    SentimentForecaster (Prophet when installed, sklearn Ridge otherwise)
+    and returns predicted health scores together with a Sentiment Velocity
+    value that measures how fast the market mood is changing.
+
+    Requires X-API-Key header.
+    """
+    import asyncio
+
+    logger.info(f"Forecast requested | client_ip={request.client.host}")
+
+    def _run_forecast():
+        from src.analytics.forecaster import SentimentForecaster
+
+        forecaster = SentimentForecaster()
+        return forecaster.run()
+
+    loop = asyncio.get_event_loop()
+    try:
+        result = await loop.run_in_executor(None, _run_forecast)
+    except Exception as exc:
+        logger.error(f"Forecast failed: {exc}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Forecast error: {exc}")
+
+    return ForecastResponse(**result.to_dict())
+
+
+# ---------------------------------------------------------------------------
+# Correlation Analysis endpoints (Issue #452)
+# ---------------------------------------------------------------------------
+
+
+class CorrelationDataPoint(BaseModel):
+    timestamp: str
+    score: float
+
+
+class MetricDataPoint(BaseModel):
+    timestamp: str
+    value: float
+
+
+class CorrelationRequest(BaseModel):
+    sentiment_data: List[CorrelationDataPoint]
+    price_data: Optional[List[MetricDataPoint]] = None
+    volume_data: Optional[List[MetricDataPoint]] = None
+    lag_hours: int = 0
+
+
+class CorrelationResponse(BaseModel):
+    price_correlation: Optional[Dict[str, Any]] = None
+    volume_correlation: Optional[Dict[str, Any]] = None
+    summary: Dict[str, Any]
+
+
+class LagAnalysisRequest(BaseModel):
+    sentiment_data: List[CorrelationDataPoint]
+    metric_data: List[MetricDataPoint]
+    metric_type: str = "volume"
+    max_lag_hours: int = 24
+
+
+class LagAnalysisResponse(BaseModel):
+    best_lag_hours: int
+    best_correlation: float
+    lag_analysis: List[Dict[str, Any]]
+    recommendation: str
+
+
+@app.post("/correlation/analyze", response_model=CorrelationResponse)
+@limiter.limit("20/minute") if limiter else lambda x: x
+async def analyze_correlation(
+    body: CorrelationRequest,
+    request: Request,
+) -> CorrelationResponse:
+    """
+    Analyze correlation between sentiment and price/volume data.
+
+    Returns correlation scores (-1 to 1) and scatter plot data points.
+    Requires X-API-Key header.
+    """
+    sentiment_list = [{"timestamp": dp.timestamp, "score": dp.score} for dp in body.sentiment_data]
+    price_list = (
+        [{"timestamp": dp.timestamp, "value": dp.value} for dp in body.price_data]
+        if body.price_data
+        else []
+    )
+    volume_list = (
+        [{"timestamp": dp.timestamp, "value": dp.value} for dp in body.volume_data]
+        if body.volume_data
+        else []
+    )
+
+    logger.info(
+        f"Correlation analysis requested | sentiment_points={len(sentiment_list)} | "
+        f"price_points={len(price_list)} | volume_points={len(volume_list)} | "
+        f"lag_hours={body.lag_hours} | client_ip={request.client.host}"
+    )
+
+    result = CorrelationEngine.full_analysis(
+        sentiment_data=sentiment_list,
+        price_data=price_list,
+        volume_data=volume_list,
+        lag_hours=body.lag_hours,
+    )
+
+    return CorrelationResponse(
+        price_correlation=result.get("price_correlation"),
+        volume_correlation=result.get("volume_correlation"),
+        summary=result.get("summary", {}),
+    )
+
+
+@app.post("/correlation/lag-analysis", response_model=LagAnalysisResponse)
+@limiter.limit("10/minute") if limiter else lambda x: x
+async def analyze_lag_correlation(
+    body: LagAnalysisRequest,
+    request: Request,
+) -> LagAnalysisResponse:
+    """
+    Analyze correlation across multiple time lags to find optimal lead time.
+
+    Returns the best lag hours and correlation strength for predicting market changes.
+    Requires X-API-Key header.
+    """
+    sentiment_list = [{"timestamp": dp.timestamp, "score": dp.score} for dp in body.sentiment_data]
+    metric_list = [{"timestamp": dp.timestamp, "value": dp.value} for dp in body.metric_data]
+
+    logger.info(
+        f"Lag correlation analysis | metric_type={body.metric_type} | "
+        f"max_lag={body.max_lag_hours}h | client_ip={request.client.host}"
+    )
+
+    result = CorrelationEngine.analyze_with_lags(
+        sentiment_data=sentiment_list,
+        metric_data=metric_list,
+        metric_type=body.metric_type,
+        max_lag_hours=body.max_lag_hours,
+    )
+
+    return LagAnalysisResponse(
+        best_lag_hours=result["best_lag_hours"],
+        best_correlation=result["best_correlation"],
+        lag_analysis=result["lag_analysis"],
+        recommendation=result["recommendation"],
+    )
diff --git a/temp_backup/src/cache_manager.py b/temp_backup/src/cache_manager.py
new file mode 100644
index 00000000..9f79529d
--- /dev/null
+++ b/temp_backup/src/cache_manager.py
@@ -0,0 +1,146 @@
+"""
+Cache Manager module - Implements caching layer for expensive operations using Redis
+"""
+
+import hashlib
+import json
+import logging
+import os
+from typing import Any, Optional
+
+import redis
+
+logger = logging.getLogger(__name__)
+
+
+class CacheManager:
+    """
+    Manages caching using Redis for expensive operations like sentiment analysis.
+    Uses a 24-hour TTL for cached results.
+    """
+
+    DEFAULT_TTL_SECONDS = 24 * 60 * 60  # 24 hours
+
+    def __init__(
+        self,
+        host: Optional[str] = None,
+        port: Optional[int] = None,
+        db: Optional[int] = None,
+        ttl_seconds: Optional[int] = None,
+        namespace: str = "cache",
+    ):
+        self.host = host if host is not None else os.getenv("REDIS_HOST", "localhost")
+        self.port = port if port is not None else int(os.getenv("REDIS_PORT", "6379"))
+        self.db = db if db is not None else int(os.getenv("REDIS_DB", "0"))
+        self.ttl_seconds = (
+            ttl_seconds
+            if ttl_seconds is not None
+            else int(os.getenv("CACHE_TTL_SECONDS", str(self.DEFAULT_TTL_SECONDS)))
+        )
+        self.namespace = namespace
+
+        self.redis_client = redis.Redis(
+            host=self.host,
+            port=self.port,
+            db=self.db,
+            decode_responses=True,
+            socket_connect_timeout=5,
+            socket_timeout=5,
+        )
+        self.redis_client.ping()
+        logger.info(
+            "Connected to Redis at %s:%s/%s (namespace=%s, ttl=%ss)",
+            self.host,
+            self.port,
+            self.db,
+            self.namespace,
+            self.ttl_seconds,
+        )
+
+    def _generate_key(self, raw_key: str) -> str:
+        """Return ``namespace:sha256(raw_key)``."""
+        digest = hashlib.sha256(raw_key.encode("utf-8")).hexdigest()
+        return f"{self.namespace}:{digest}"
+
+    @staticmethod
+    def make_key(*parts: Any) -> str:
+        """Build a deterministic cache key from arbitrary ordered parts."""
+        return "|".join(str(p) for p in parts)
+
+    def get(self, raw_key: str) -> Optional[Any]:
+        """
+        Return deserialised value for raw_key, or None on miss.
+
+        Args:
+            raw_key: Key to retrieve the result from
+
+        Returns:
+            Cached result if found, None otherwise
+        """
+        try:
+            key = self._generate_key(raw_key)
+            cached = self.redis_client.get(key)
+            if cached is not None:
+                logger.info("CACHE HIT  [%s] %s", self.namespace, raw_key[:80])
+                return json.loads(cached)
+            logger.debug("CACHE MISS [%s] %s", self.namespace, raw_key[:80])
+            return None
+        except Exception as e:
+            logger.error("Cache get error: %s", e)
+            return None
+
+    def set(self, raw_key: str, value: Any) -> bool:
+        """
+        Store result in cache with TTL.
+
+        Args:
+            raw_key: Key to store the result under
+            value: Result to store in cache
+
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            key = self._generate_key(raw_key)
+            serialised = json.dumps(value, default=str)
+            ok = self.redis_client.setex(key, self.ttl_seconds, serialised)
+            if ok:
+                logger.debug(
+                    "CACHE SET  [%s] ttl=%ss", self.namespace, self.ttl_seconds
+                )
+            return bool(ok)
+        except Exception as e:
+            logger.error("Cache set error: %s", e)
+            return False
+
+    def delete(self, raw_key: str) -> bool:
+        """Remove a single entry."""
+        try:
+            return self.redis_client.delete(self._generate_key(raw_key)) > 0
+        except Exception as e:
+            logger.error("Cache delete error: %s", e)
+            return False
+
+    def clear_namespace(self) -> int:
+        """Delete every key that belongs to this namespace."""
+        try:
+            keys = list(self.redis_client.scan_iter(match=f"{self.namespace}:*"))
+            count = self.redis_client.delete(*keys) if keys else 0
+            if count:
+                logger.info("Cleared %d entries from [%s]", count, self.namespace)
+            return count
+        except Exception as e:
+            logger.error("Cache clear error: %s", e)
+            return 0
+
+    def ping(self) -> bool:
+        """
+        Test Redis connection.
+
+        Returns:
+            True if connected, False otherwise
+        """
+        try:
+            return self.redis_client.ping()
+        except Exception:
+            return False
diff --git a/temp_backup/src/config/anomaly_config.py b/temp_backup/src/config/anomaly_config.py
new file mode 100644
index 00000000..82e4e419
--- /dev/null
+++ b/temp_backup/src/config/anomaly_config.py
@@ -0,0 +1,114 @@
+#! /usr/bin/env python3
+"""
+Configuration module for anomaly detection settings.
+Supports both Z-score and Isolation Forest configurations.
+"""
+
+from dataclasses import dataclass
+from typing import Optional
+import os
+import json
+
+
+@dataclass
+class ZScoreConfig:
+    """Configuration for Z-score based anomaly detection."""
+    
+    window_size_hours: int = 24
+    z_threshold: float = 2.5
+    min_data_points: int = 10
+    
+    @classmethod
+    def from_dict(cls, data: dict) -> 'ZScoreConfig':
+        return cls(
+            window_size_hours=data.get('window_size_hours', 24),
+            z_threshold=data.get('z_threshold', 2.5),
+            min_data_points=data.get('min_data_points', 10)
+        )
+
+
+@dataclass
+class IsolationForestConfig:
+    """Configuration for Isolation Forest based anomaly detection."""
+    
+    enabled: bool = True
+    contamination: float = 0.1  # Expected proportion of anomalies (0.0 to 0.5)
+    n_estimators: int = 100
+    max_samples: str = 'auto'
+    random_state: int = 42
+    min_training_samples: int = 50
+    auto_retrain_interval: int = 200  # Retrain every N new samples
+    features: list = None  # Features to use for detection
+    
+    def __post_init__(self):
+        if self.features is None:
+            self.features = ['volume', 'sentiment', 'volume_change_rate', 'sentiment_change_rate']
+    
+    @classmethod
+    def from_dict(cls, data: dict) -> 'IsolationForestConfig':
+        return cls(
+            enabled=data.get('enabled', True),
+            contamination=data.get('contamination', 0.1),
+            n_estimators=data.get('n_estimators', 100),
+            max_samples=data.get('max_samples', 'auto'),
+            random_state=data.get('random_state', 42),
+            min_training_samples=data.get('min_training_samples', 50),
+            auto_retrain_interval=data.get('auto_retrain_interval', 200),
+            features=data.get('features', ['volume', 'sentiment', 'volume_change_rate', 'sentiment_change_rate'])
+        )
+
+
+@dataclass
+class AnomalyDetectionConfig:
+    """Main configuration for anomaly detection system."""
+    
+    zscore: ZScoreConfig
+    isolation_forest: IsolationForestConfig
+    enable_comparison_mode: bool = False
+    model_save_path: str = "models/anomaly_detector"
+    
+    @classmethod
+    def from_dict(cls, data: dict) -> 'AnomalyDetectionConfig':
+        return cls(
+            zscore=ZScoreConfig.from_dict(data.get('zscore', {})),
+            isolation_forest=IsolationForestConfig.from_dict(data.get('isolation_forest', {})),
+            enable_comparison_mode=data.get('enable_comparison_mode', False),
+            model_save_path=data.get('model_save_path', "models/anomaly_detector")
+        )
+    
+    @classmethod
+    def from_env(cls) -> 'AnomalyDetectionConfig':
+        """Load configuration from environment variables."""
+        config = {
+            'zscore': {
+                'window_size_hours': int(os.getenv('ANOMALY_WINDOW_HOURS', '24')),
+                'z_threshold': float(os.getenv('ANOMALY_Z_THRESHOLD', '2.5')),
+            },
+            'isolation_forest': {
+                'enabled': os.getenv('ANOMALY_ML_ENABLED', 'true').lower() == 'true',
+                'contamination': float(os.getenv('ANOMALY_ML_CONTAMINATION', '0.1')),
+                'n_estimators': int(os.getenv('ANOMALY_ML_ESTIMATORS', '100')),
+            },
+            'enable_comparison_mode': os.getenv('ANOMALY_COMPARISON_MODE', 'false').lower() == 'true',
+            'model_save_path': os.getenv('ANOMALY_MODEL_PATH', 'models/anomaly_detector')
+        }
+        return cls.from_dict(config)
+    
+    def save_to_file(self, filepath: str):
+        """Save configuration to JSON file."""
+        with open(filepath, 'w') as f:
+            json.dump({
+                'zscore': self.zscore.__dict__,
+                'isolation_forest': self.isolation_forest.__dict__,
+                'enable_comparison_mode': self.enable_comparison_mode,
+                'model_save_path': self.model_save_path
+            }, f, indent=2)
+    
+    @classmethod
+    def load_from_file(cls, filepath: str) -> 'AnomalyDetectionConfig':
+        """Load configuration from JSON file."""
+        if os.path.exists(filepath):
+            with open(filepath, 'r') as f:
+                data = json.load(f)
+            return cls.from_dict(data)
+        return cls.from_env()
\ No newline at end of file
diff --git a/temp_backup/src/database.py b/temp_backup/src/database.py
new file mode 100644
index 00000000..504aa2eb
--- /dev/null
+++ b/temp_backup/src/database.py
@@ -0,0 +1,241 @@
+"""
+Database service module - stores analytics data
+Supports both file-based storage (legacy) and PostgreSQL persistence
+"""
+
+import json
+import logging
+from typing import Dict, Any, List, Optional
+from datetime import datetime
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+class AnalyticsRecord:
+    """Record of analytics data"""
+
+    def __init__(
+        self,
+        timestamp: datetime,
+        news_count: int,
+        sentiment_data: Dict[str, Any],
+        trends: List[Dict[str, Any]],
+    ):
+        self.timestamp = timestamp
+        self.news_count = news_count
+        self.sentiment_data = sentiment_data
+        self.trends = trends
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "timestamp": self.timestamp.isoformat(),
+            "news_count": self.news_count,
+            "sentiment_data": self.sentiment_data,
+            "trends": self.trends,
+        }
+
+
+class DatabaseService:
+    """
+    Stores and retrieves analytics data
+    Supports both file-based storage and PostgreSQL
+    """
+
+    def __init__(
+        self,
+        storage_dir: str = "./data",
+        use_postgres: bool = True,
+        postgres_service: Optional[Any] = None,
+    ):
+        # File-based storage (legacy/fallback)
+        self.storage_dir = Path(storage_dir)
+        self.storage_dir.mkdir(parents=True, exist_ok=True)
+        self.analytics_file = self.storage_dir / "analytics.jsonl"
+        self.latest_file = self.storage_dir / "latest.json"
+        
+        # PostgreSQL storage
+        self.use_postgres = use_postgres
+        self.postgres_service = postgres_service
+        
+        if self.use_postgres and self.postgres_service:
+            logger.info("DatabaseService initialized with PostgreSQL support")
+        else:
+            logger.info("DatabaseService initialized with file-based storage only")
+
+    def save_analytics(self, record: AnalyticsRecord) -> bool:
+        """
+        Save analytics record to storage
+
+        Args:
+            record: AnalyticsRecord to save
+
+        Returns:
+            True if successful, False otherwise
+        """
+        success = True
+        
+        # Save to file-based storage (always for backward compatibility)
+        try:
+            # Append to JSONL file for historical data
+            with open(self.analytics_file, "a") as f:
+                f.write(json.dumps(record.to_dict()) + "\n")
+
+            # Update latest.json for quick access
+            with open(self.latest_file, "w") as f:
+                json.dump(record.to_dict(), f, indent=2)
+
+            logger.info(f"Analytics saved to file: {record.news_count} news items analyzed")
+        except Exception as e:
+            logger.error(f"Error saving analytics to file: {e}")
+            success = False
+        
+        # Save to PostgreSQL if enabled
+        if self.use_postgres and self.postgres_service:
+            try:
+                # Save sentiment data as news insights
+                if record.sentiment_data and "results" in record.sentiment_data:
+                    sentiment_results = record.sentiment_data["results"]
+                    if sentiment_results:
+                        saved_count = self.postgres_service.save_news_insights_batch(
+                            [r.to_dict() if hasattr(r, "to_dict") else r for r in sentiment_results]
+                        )
+                        logger.info(f"Saved {saved_count} news insights to PostgreSQL")
+                
+                # Save trends as asset trends
+                if record.trends:
+                    for trend in record.trends:
+                        trend_data = trend.to_dict() if hasattr(trend, "to_dict") else trend
+                        self.postgres_service.save_asset_trend(
+                            asset="XLM",  # Default asset
+                            metric_name=trend_data.get("metric_name", "unknown"),
+                            window="24h",  # Default window
+                            trend_data=trend_data,
+                        )
+                    logger.info(f"Saved {len(record.trends)} trends to PostgreSQL")
+                    
+            except Exception as e:
+                logger.error(f"Error saving analytics to PostgreSQL: {e}")
+                # Don't fail if PostgreSQL save fails
+        
+        return success
+
+    def get_latest_analytics(self) -> Dict[str, Any]:
+        """
+        Get the latest analytics record
+
+        Returns:
+            Latest analytics data or empty dict if not available
+        """
+        try:
+            if self.latest_file.exists():
+                with open(self.latest_file, "r") as f:
+                    return json.load(f)
+        except Exception as e:
+            logger.error(f"Error reading latest analytics: {e}")
+
+        return {}
+
+    def get_analytics_history(self, limit: int = 24) -> List[Dict[str, Any]]:
+        """
+        Get historical analytics data
+
+        Args:
+            limit: Maximum number of records to return
+
+        Returns:
+            List of analytics records (most recent first)
+        """
+        records = []
+        try:
+            if self.analytics_file.exists():
+                with open(self.analytics_file, "r") as f:
+                    lines = f.readlines()
+                    # Get last 'limit' records in reverse order
+                    for line in reversed(lines[-limit:]):
+                        records.append(json.loads(line))
+        except Exception as e:
+            logger.error(f"Error reading analytics history: {e}")
+
+        return records
+
+    def expose_metrics(self) -> Dict[str, Any]:
+        """
+        Expose all metrics for monitoring/API purposes
+
+        Returns:
+            Dictionary of all available metrics
+        """
+        latest = self.get_latest_analytics()
+        history = self.get_analytics_history(limit=24)
+
+        metrics = {
+            "latest": latest,
+            "history": history,
+            "history_count": len(history),
+            "last_updated": latest.get("timestamp") if latest else None,
+        }
+        
+        # Add PostgreSQL metrics if available
+        if self.use_postgres and self.postgres_service:
+            try:
+                pg_summary = self.postgres_service.get_sentiment_summary(hours=24)
+                metrics["postgres_summary"] = pg_summary
+            except Exception as e:
+                logger.error(f"Error getting PostgreSQL metrics: {e}")
+        
+        return metrics
+
+    def clear_old_data(self, days: int = 30) -> int:
+        """
+        Clear analytics data older than specified days
+
+        Args:
+            days: Number of days to keep
+
+        Returns:
+            Number of records deleted
+        """
+        deleted_count = 0
+        
+        # Clear file-based data
+        try:
+            from datetime import timedelta
+
+            cutoff_date = datetime.utcnow() - timedelta(days=days)
+
+            if not self.analytics_file.exists():
+                return 0
+
+            with open(self.analytics_file, "r") as f:
+                lines = f.readlines()
+
+            # Filter out old records
+            new_lines = []
+            for line in lines:
+                try:
+                    record = json.loads(line)
+                    record_date = datetime.fromisoformat(record.get("timestamp", ""))
+                    if record_date > cutoff_date:
+                        new_lines.append(line)
+                    else:
+                        deleted_count += 1
+                except:
+                    new_lines.append(line)
+
+            with open(self.analytics_file, "w") as f:
+                f.writelines(new_lines)
+
+            logger.info(f"Deleted {deleted_count} old analytics records from files")
+        except Exception as e:
+            logger.error(f"Error clearing old file data: {e}")
+        
+        # Clear PostgreSQL data
+        if self.use_postgres and self.postgres_service:
+            try:
+                pg_deleted = self.postgres_service.cleanup_old_data(days=days)
+                logger.info(f"Deleted old PostgreSQL data: {pg_deleted}")
+            except Exception as e:
+                logger.error(f"Error clearing old PostgreSQL data: {e}")
+        
+        return deleted_count
diff --git a/temp_backup/src/db/__init__.py b/temp_backup/src/db/__init__.py
new file mode 100644
index 00000000..dcb6992a
--- /dev/null
+++ b/temp_backup/src/db/__init__.py
@@ -0,0 +1,18 @@
+"""
+Database package for analytics data persistence
+"""
+
+from .models import Base, Article, SocialPost, AnalyticsRecord, NewsInsight, AssetTrend, OnChainEntity, ArticleEntityLink
+from .postgres_service import PostgresService
+
+__all__ = [
+    "Base",
+    "Article",
+    "SocialPost",
+    "AnalyticsRecord",
+    "NewsInsight",
+    "AssetTrend",
+    "OnChainEntity",
+    "ArticleEntityLink",
+    "PostgresService",
+]
diff --git a/temp_backup/src/db/__pycache__/__init__.cpython-314.pyc b/temp_backup/src/db/__pycache__/__init__.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3ac3bb070b00a86d1664c6ba8ba8150b33b90667
GIT binary patch
literal 510
zcmaKp&ui5{5XX0uyd=%TwpS0*fTtiW6$C+~@bn3J@T`(5dRV$_H{&xlO_rT`N<E4A
z&v^Ea5f(i852!tObywS~bNDdBeCIp+<JRW2g8BIL<@%dr&e-=*Tt#%5Tpal118W$u
zW8UxuuOo`;nBqF2q@K{EPARQ3%639OTIBTxZS1f;)+A3j6-|7TRg?F(i@73oq7X`>
z^r>1w`D(PK8dQHyoyM~D?b3k74haSgMU}ebf!DiiX1$^ZR~(JHQ~lEs$!*Q9CL7SB
zZQb+`-r-`vZuOd6F+&7;W?^V6?zY0j$^ssNecNd`tTG#JXXAh*U=ol9WC6KHHFcSk
z?Wm=6>DkyqkN=-?+vakQ#D9za`UDr%_fmdl7v7e+#}Ds`=U#zg{%E-@@7~$p7jwW<
z8jV=4&tNd?9ziH$P=r_6HFng1*bOUzR*UvTNZAb?Nx5gvU4GvmZr#Iy-S+PO7a890
g_z`o?e?}2ce&>w8<X_o~Z*2Ehl24<Lnceh?KY|~N6951J

literal 0
HcmV?d00001

diff --git a/temp_backup/src/db/__pycache__/models.cpython-314.pyc b/temp_backup/src/db/__pycache__/models.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8ea2d8428c61b9b92b869bec97c52a87f3cc449a
GIT binary patch
literal 13195
zcmd5@Yitx*cCK#squlSd+wBJrH(;=vhXLC>hJhI#wy_NuQy7??wW&1Sh27(JcT-gj
zW6x%%wHj>_B}$C=V}z1;eodoDStn7l&aXJhkDZ^5Tq}l>M$sx#qWoiovm4Ey<eYo!
zX*Ue8qa=-dtM0ky-n#X;=ljk%mwln2kAdIWzia)--NP{diWU9m)=pl891K%tI7Vh}
zGMr=BG31aP?Rf4Sb`I6ab>zHm*fr#q-9sMPGvt-ML#)gW)ywrmKG`?qm;FNlIWQEI
zgGARg92yGCVRG&sjtoWRXgf0*-of-~my{ctm>mq~X=XU@WJ6ya70Zo8%7V1sB8?HL
z52SvJw24RqAPriiaUu<YG;EPJ6KMpbQH!*NNE<-fXpy#Zv0Y5EY5tI}KP9Jbr6eIS
zTgV7GDRH|XCQ|uSZcfgor9=i=6GcIkvXU(1(}FMQP+1TPa&}h0^UFm!TLATBz3Mqv
z$dzXEs&_aq3sZuqdalZ1Hb14h2Xlp#tg>(^VI2CYu92&kM^)Fj@Ug7Ahw~ZXW7T&y
zJEir5VIvtKolA)+IeS;&adsec-7e+RY5le0(W_Nn{26(*lZ@<u*Xz6)lIu8!?BblV
zd(yLmne=jX@XvKje*r9!*6(0${&yer#2>V~;m>o-MeDN9(CIOBP{V7C;x{zRQQRDR
z%&CnKFxq3r2sjUOgdouffz0d`t|5yMSrk5{I<EehHsi^Lu|4!k*3$g=;C_6@Tw`xD
z!fRZ?Z#=K2$vDg}zy-OG{-O6e<Yq1`w{Q{o)S_zea{k<ODw`k3%UOAD{<QC^To45*
zQOF-iqdXyCLlz`mMX~Uakd`I*5HpFCBnh&VOx(-L(+LSa)|`+S?w5ROw2<dXPhRgi
z{@AJdi(+;*CC>3P!kijRmE=M;pB9B#&>4JOr+P~H?46RJviVXjhhwSkEPjBGonkXQ
zNU;Tq#}0D-7(Q?O%;yVv0R|39f_PUD`HXNoRm#a!rB2)q9aOfEFBNfF5UJXd&3tT3
zikIh#0uQTc29|`MOm&XPR1XzCW=4~(D$Dcv)U3ess*mSkEtYb49^m;qrBqI93G@8z
ztSHI3Y+lG0pxMjwnL-*)f;>;23{@2ID;Cl6sm^Rh^x#Q7eRMq3cVgXzs^TtGtjBsF
zUC7H~DlLPCr21fKiYXqKzsRCa2zBc6g?p+Go+jLBhKD8Xqj!zxq8~Lv+C2D_I3>Zn
zns^?r3-<{cU<_`RWC6rt2k5?lKSsT9?!?43c(T$&|G>qIiDP|-4o&n6(u`avPFyU(
zTPv1wk}#1f7NrTC%>no}(t;#`U73)?^hD;?1T|dU#W^toBa#AVEBz9RpEBP#oo@EI
zhiU36U;0&jc)`0gz7kXF-z%Se7Kz`#yO@6PNu_UP_o}Q!E|!O%HMBg8FF7BzRfLs3
zrQvk>!ZUyL{+`9g2m2~+#ot}-e->=KKe*WQV5AaT9#?`V%7f1uTNgVjF{QD)eDPVd
zdC|2bE7ASs5jcOwMix61wtd<4bM~*<zwxbJTd#9Ac*-N+c$kKkO|9d<_~4&D_~#F+
zAM(1IkEst;LtT{vD=lm6+2?i6xaYG`@rZt(wap<#I>Q`xzJ)oY#!xqyrZKddIy9HM
z1~rBOV%~HEXqq>#VHBx(ix5-B8m25dh9AU+pUsrj8&_(aj6rW7=Lcg`#|3o*M5G~*
z`3>{ql$*f(#5MC1JtM+rm=gww`MH?Q&&;3k;Q_1+ZIKdkp_Vn8P37kf$b|!VOo&1b
zKIcMSn$8xJJ~gKGrK^38hm|=ajf)L%FN0z$lwuneTd>#$h1!g!p++awrwg+gHKJJw
zW635RV%+GhHR}3!NwT0ulg;98yb$7o*oj3K7D+7jVqusCaUWLEE{N}9aS{uJQ&`=F
z{OxQ8v0g+{rFzjWpfxbefOrVC4`YFAUhKsJIS-iy{Wy<e^|)T(T%<pR!m=FqZCZ}e
z=7*j|>5-o-z36FJK+W48-d?);Xl8j}HKsHVR$Vb%U+#)ai5`IFP;6m*sb2{t%jaLY
z*61SYtZc0w8?JKK752t6HdqaHR+I0pOsugZ&)s$AvcgH*mK9n@{JqG)$DU%!jIppz
z%5`z30A%ue;TnEGY;&Pd_TaCvpzN{`L3ZRa85_%BjRixOoV}OEC~TuQ7$e#FiaAmv
zWf8Vslvy;{WO1Trmo?XrwbYQc5*gml%VyC=G@`aEZmA*LT0^$Y7~N-iTIN&VPTJe;
zObf1r3{5w6iI?b@qwM%jjh(eLCU=bu(NE1<+RUKI=yt!&2(NJklhN&&?4*pYPc!JG
z9^%5bdW4JG>J40@tsdhVp@;VD$OuhbT-WIJ$X#4>Gb1Ot7J2V=M&8G@0#|HPy=SON
zo<E|q#Jq6NVyFohybTO4L9A0=CQNZwkW<JJd@8FMN5d31!6T)nm>6Zl9*gbJptf3!
zvcwllw{lr&nlhY#t+v&sD4``rV#x!O6mn{KqoC2QUMPuaLG?)V*k)_gSu0_?YS3P_
zUzVuw#<gsaBJn0WBPKenxeMfzLQ@8>f_?F0I6+_dIx|KZ>_q@f8nkGy-P*=E_amNZ
zKhUcKJw%laKWK453j)lBAGEl8>ft4O<^w0Sx7?M@oSJ9%CxHIvgi|L*{{wt@X7pQ-
zkXt?VZD4=GxZP9VI+O1uT?DsOH|ZjdfW9~g1yFXEB;;;Oc-MPGL?I;BqEHlpj7GHU
zb?Xul+NB>rQD(l1Zd=U!dEsH9x~;o1y)6HH{;%d&cm3_B)uZQEKdz3AKOMXEWb9V;
za=Mx>REzR&80L;+!1)b5JZG4qW6%kQLFW)2BEwkL4?8IucEBY@ll$Kmoe_sc4hjv7
z^RnudO0($Yh-fj?x{{cqEZ$E)jx_l45_v2D=s?z|3L-dG6yYsjsgwo}Obrza5;$Jq
zX>0YoFhxCYS`X%h5;y}n+Dx9a%7S-wuONb_5X}fO_-e3P2rq%ls9~feRsow!<)=z1
zhzSr9(T{WX-4^7uw#fV>;4n{C9Pu<P6a&8n3(vTQRU{i~l;<HvAW4~QT22eO9M8ks
z2S3OVg6n~w!D;dth-a`P7RYs^Z=mJBeh=O|bq`pksr}`Aq<`hemq0l#0p$oZEc7ky
zRsvn+0UQ0WQW4@H^gg&y=~&Jv!FRzy@I@A4OD@H?yL=9aKe!7qpfC*wz`2OEJv_R!
z^U;ZA*Gj(<J5#=7bs#{udE4T(%D58mDPMlpv}JLi(yuffE|1#IV{MB^Dt$`q5cH03
zeRzH8?4u8tJ61AE{9O4mj1h@1WR}L2@ZR#!vzG0PawVg*^j2L>pu9K^1I7=-;NhnG
zw->nwAHhK3L**gp22<;T*0#ed{c!h<mxy!okgaqmEeBURl<52LIKW$KfgY`gbZ6;%
zaCrj6<w;iFTVqcUp0>Z*{r(zznw;-i`e2RiCZ~HVm)F>nRHIU;zCX6cULiFgdTZ>_
z=Rt4WQy%?IgTsxB5lgxl@f%3PpYbyY-UQf28^OE#y%IbmHLngP1%LvYp&;r%Z46VH
z?UeeV7i7vP+4DkFv)=HzDA}{iqJ%!!Wt0Fm+Qy^wZZjP-gIG;uBzrGP0I`>ql$x`o
zWU$3{B})A4GD`LAGD`LAGD`JcHA_nTwi9CK)BR2o@-y$Go!rSr9qZGj)bCXql=^MZ
znA|yr;I!dqBX)j+UUt-)#dnSHUF~qE0o_xUcTett=YWAao!DVL%tdVVDA!=CH*zss
zy@?BfL??DQLY!;XHGtT4axFmYy0}(33B+zM*9MrsMfF`Rq_e5qSV5BKdv$In5m{nZ
z$Yded3{4OM90!UA;k5k;PRL}k^8N&%MK?Iy0jkUsHQmuB?9TAEAy(|wgLJiso^=Gi
z2<|+Dyk}L`I3vW&t^{+Ubg&+;OHUKX-)w3IN1=R>BIWi?YBp|1LArgjk~S}%0J^$m
zlfrB9ydr#aZ^QF|r8!^9?2z--W_B*Z?A-I=lUI$`Ovs&oe?ot}r^L%JDRB&oD^Mg|
zsuv$B1}5_;G{~HI6{;2{CnEN%jpon>kNXQ$L~Z18iKW;VTR-2r6spXubo|4Czdx|j
z{v}(za_#Ar^ph*;>R6_lDOT^4fWe8t*?_^}0od9d$9X)Qhjb7eA|qJVkJuR8zPDg-
zcVSxe#t4nOf%o8nu=I366p?zW?p$^TNE4eb%%U$LfrBueg1|T-bmE8D9UqCxPN$@4
zIW;9gD7SP=B+G&@I&lJZCb77Q1&;+99Llzec$UFJzyd>~uR*WG+o(4Mg_T5oghDLv
zuB3n8M51<WB2l5({p*Y89{jKpTXC<BE1?mH)fimk$cjq|p8{G!=+l4_+6%2FeNuu4
zH<71c<HE6}GfH3&5U_C5!qn0oC7gs2Kz@GdiW2NB4+3|2HN}E{1|c$Hz~f4!r#uXO
zzYoe4ZNC38M1-5WSDIEcO5_3rmLa<JzsH?U00GiS(uFnlI5|C3?K!u`_LKAO<*?4E
z4lcK?@N4WPBHvemc-RRGb;4EPaO<l8-+XQA^lSJOY}DyzZ$zE!w8#uAn}KIj1EVpf
zhM9#ksmA6w+D2+%#%lB+wY86#gERFgiNai+DWfFn<zt(K%Iu@X%LoxM0^o#*nD?Z`
z&+Iyr&0~k)@o)IqNQ4<AZ=Gy|-Vh|$0h!bxJ7=p~V>zfFfuMTOfOM2ZfhHJ<>t46K
zg=+**-^#`0Z2;=qxh4ShxEemA=ZiT3Jj%>`kB;nM(~6Mtfh3-;K~s8DQzH79&;T(T
z5faUi%`;TMUB^+3*o^_~mM}UEp6G+>J!o3ZPy5pf0nSZBYpCj`;7ZVUvkt~~AOb&I
zRO`)Cg8DUdF~>(o+lf%US%Wn4^mbK0J(7%H?FbE8?f(Phj-NcEKUDfIyD)o6G5g#4
z{o3my(VDxdTnSnzhKoOe`HG)m@dr?-ZqiyC!CN+5!hmrFdP=9EKp5|=xSn<$f6{fl
z+WF2(%WB6j_WtAE)vaItpnBtnPj5^;xiM9}K3$zgB!7D6lP7mRsTO}!{n3xB;ZLjV
zr`D)T`&(f4hMV!+gA{S$9tGP!!rKcKMd5C?P?C6ZpK28H(D^BWhn*5>z;K9ks9{+Y
z@)<spg|874MikBuLT?JoKgF{RU~HySj4(%S9R3tLWAQWilaiZocyq3Eb6zvhaR2Dy
z&IczduH}9u@DAW9(W!g1rBYHFjzDn2%ycTjZi1_TvPYJVD1jtsTf-3$Rwz|urKOLC
z74#T{5<f;F5DhIxQWOBTCOB@&g&LVs8V}f8xunMf_Em1Iu}27A9bDeB#-5_^wtQf<
zd5s+=m7eAEE0b&Nh39?`2?9hM?$ZRxPHTJ)pIvPv`wW~S8<KtXMo4BRND-&phFeFG
z46`sE!!b8=Jf_Ta`b|8u_MuLH*ftKurj0mcy*fpz2F`1_I?d+TWfYz4GV0*lWu}8~
zW@)TrQwQHZOPcM$Sz2>1=AF|30HO}M-0(@`&vu{G_1~Z|xpnLW)wPxuT=YUHL~~PR
zn&0_tMrfZ*Y(Q45Tagv(R%FGxHA{l^21HgJX#rPv5M12}xVj5)wL`;I|0pCqhVxQ(
zYFeHbbWAn8ObV*7GYgW^66{E!d$;U~lCB)!Y<ez1Hizs_%m_u9?C`pkO3&O&i5bYB
z%oZWodn=oRO*!CSL$LRr#AoS6H3&8n*@G4c-iA15)267Wg>li@P<L$BF{W>IlF5b{
z`&cwfLgU*6pzX~yoah=Cqo8YC^8XdoMox~JuNS>!(=o<m2#LKL@JjOa=mt{9j(B`E
z-Q$>LPy<*!u$rXn&>Mku$5O`^ou79uwN|>8`=0g=Jn0>%_6)A(s#kA3y*l;e>Qt4}
zfc0%$<lS!pw`2=~55Je7mbZiLQjiK#{dyJi8<^?X;1hp``};fC1FbwkTyw96HKk{P
zVB#JhM)lXm;RmRV#aHkr-P?r29h>}NW0ONhkD){Oz7K-xv4ar42cR@k0~`F{@A7JO
z&-I!T@70q914>hmo-EMLNwVOG5<6^77VK8y$2X-43@_ct2yET?c^7bno$svnD_e%E
zt`?YYP26tulrO-1UeZeTKT%XaQ2AXQzYkY?``6e30^ePgt~It-L-_VJ_6#{ay!?K(
z|JoXR-Gb_N{}kVZ4b^c=3;tgG8@w7Ts>7ZQx_83~dpGKgeWEVTWx;H7pQ!15dO*Vr
z8Jqs8iPj!VINtP5O`Np0*V-d$ZP&e1t4#M!tuoy^waO^whd8TD_efoqIhr1+%d)#P
zKGsbiowc-}Dn19F5mMH6asaZu*%)Tnn|iFa{iDQZwT7jsM{0znd+T1}kNVEwmeFzK
z-&r5L`1zEMo#|9AU4qCeIG%)_Yula44pV&#0lK4!H^$8Xg&I1l9+$Rzu1UIClxz_t
z`%LMMFoL7j-qerOTt$mdMSWhJD^D*2R#Nj&Mf4`LjQ~~!&2jl-XtiM<t_$Eq><bDC
zZ@v-W)dAujFT@30^9?7>7tuxT@Raz6aAlI~$G4-wTb+56;(chZ?KU==gBief5P=O%
zHUitOrT#BQJ|9^+S(#gjJ$+~R$veZ<<0D^o*@5k&YIvr~&iuA6qT%X__;Bg8@u{kA
zEoq)EiExWDbr8irL8S#OFxx1aZlLJ_5{K~3;AaWP!UDf?sTB%)PJ44W)^PgVuz59b
z;#E#x6A8Z(Ske4B3KN(EH=IB#oDhdlgBjuwmXwww;1CiduseX8Q3Lkn!JgNshlF5X
z2RQDn1lQO>0t|aAMv$|!0{dipo_k%y?F%^EIR^$Xa}N5x@mY-~+vt`^%znWh?71TB
zB^J>`P<_&!99W2HVRp{4SJDGblxdLN3*-eNTcI=|fE`0-$8Ja_iNTFRXeUfVb)AL(
z%|JIQlkJgba3GAU5_g_(3Gzla?4?xMlXOq`d*WY$GQ4R<!lm@Q&f##t9tMZ&KUv1H
z{cC2+zc4qSGB<z8-29r^_chc0D{u3fx3%nA_rx8Zh5mI04(mM)4$q=%oq@yp-eZno
z=kI6M89c7bj-!qd=R$0q!Q;B<*yA|uT!2<Qu5*rq*xIqq;Bh?ztvSa+?AOq$J>mZW
DT}t?7

literal 0
HcmV?d00001

diff --git a/temp_backup/src/db/models.py b/temp_backup/src/db/models.py
new file mode 100644
index 00000000..5cc7fa5f
--- /dev/null
+++ b/temp_backup/src/db/models.py
@@ -0,0 +1,304 @@
+"""
+Database models for analytics data persistence
+"""
+
+from datetime import datetime
+from typing import Optional
+from sqlalchemy import Column, Integer, String, Float, DateTime, JSON, Text, Index, BigInteger
+from sqlalchemy.orm import declarative_base
+from sqlalchemy.sql import func
+
+Base = declarative_base()
+
+
+class OnChainEntity(Base):
+    """
+    Stores on-chain entities (projects and assets) with stable IDs
+    """
+    __tablename__ = "on_chain_entities"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    stable_id = Column(String(255), unique=True, nullable=False, index=True)  # Stable unique ID (e.g., "asset:XLM", "project:stellar")
+    entity_type = Column(String(50), nullable=False, index=True)  # "project" or "asset"
+    name = Column(String(255), nullable=False)  # Human-readable name
+    ticker = Column(String(20), nullable=True, index=True)  # Asset ticker (if applicable)
+    contract_ids = Column(JSON, nullable=True)  # Array of associated contract IDs
+    extra_data = Column(JSON, nullable=True)  # Additional metadata
+    created_at = Column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+    updated_at = Column(
+        DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False
+    )
+
+    __table_args__ = (
+        Index("idx_entities_type_ticker", "entity_type", "ticker"),
+    )
+
+
+class ArticleEntityLink(Base):
+    """
+    Links articles to on-chain entities (many-to-many relationship)
+    """
+    __tablename__ = "article_entity_links"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    article_id = Column(String(255), nullable=False, index=True)  # Foreign key to articles.article_id
+    entity_stable_id = Column(String(255), nullable=False, index=True)  # Foreign key to on_chain_entities.stable_id
+    confidence = Column(Float, nullable=True)  # Confidence score for the link (0-1)
+    context = Column(Text, nullable=True)  # Context snippet where the entity was found
+    created_at = Column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+
+    __table_args__ = (
+        Index("idx_article_entity_link", "article_id", "entity_stable_id", unique=True),
+        Index("idx_entity_article_link", "entity_stable_id", "article_id"),
+    )
+
+
+class Article(Base):
+    """
+    Stores news articles with full content and metadata
+    """
+
+    __tablename__ = "articles"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    article_id = Column(String(255), unique=True, nullable=False, index=True)
+    title = Column(Text, nullable=False)
+    content = Column(Text, nullable=True)
+    summary = Column(Text, nullable=True)
+    source = Column(String(100), nullable=True, index=True)
+    url = Column(Text, nullable=True)
+    
+    # Asset information
+    asset_codes = Column(JSON, nullable=True)  # Array of asset codes mentioned in article
+    primary_asset = Column(String(20), nullable=True, index=True)  # Primary asset being discussed
+    categories = Column(JSON, nullable=True)  # Article categories
+    
+    # Sentiment scores
+    sentiment_score = Column(Float, nullable=True)  # compound score -1 to 1
+    positive_score = Column(Float, nullable=True)
+    negative_score = Column(Float, nullable=True)
+    neutral_score = Column(Float, nullable=True)
+    sentiment_label = Column(String(20), nullable=True, index=True)  # positive/negative/neutral
+    
+    # Keywords and metadata
+    keywords = Column(JSON, nullable=True)  # Array of keywords
+    detected_entities = Column(JSON, nullable=True)  # NER entities detected in article text
+    linked_entities = Column(JSON, nullable=True)  # Structured linked entities (array of {stable_id, type, name})
+    language = Column(String(10), nullable=True)
+    
+    # Timestamps
+    published_at = Column(DateTime(timezone=True), nullable=True, index=True)
+    fetched_at = Column(DateTime(timezone=True), nullable=True)
+    analyzed_at = Column(DateTime(timezone=True), nullable=True)
+    created_at = Column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+    updated_at = Column(
+        DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False
+    )
+
+    # Indexes for efficient querying
+    __table_args__ = (
+        Index("idx_articles_published_at", "published_at"),
+        Index("idx_articles_sentiment_label", "sentiment_label"),
+        Index("idx_articles_source", "source"),
+        Index("idx_articles_primary_asset", "primary_asset"),
+        Index("idx_articles_asset_sentiment", "primary_asset", "sentiment_label"),
+        Index("idx_articles_created_at", "created_at"),
+    )
+
+    def __repr__(self):
+        return f"<Article(id={self.article_id}, title={self.title[:50]}, asset={self.primary_asset}, sentiment={self.sentiment_label})>"
+
+
+class SocialPost(Base):
+    """
+    Stores social media posts (Twitter, Reddit, etc.)
+    """
+
+    __tablename__ = "social_posts"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    post_id = Column(String(255), unique=True, nullable=False, index=True)
+    platform = Column(String(50), nullable=False, index=True)  # twitter, reddit, etc.
+    content = Column(Text, nullable=False)
+    author = Column(String(255), nullable=True)
+    url = Column(Text, nullable=True)
+    
+    # Engagement metrics
+    likes = Column(Integer, default=0)
+    comments = Column(Integer, default=0)
+    shares = Column(Integer, default=0)
+    
+    # Asset information
+    asset_codes = Column(JSON, nullable=True)  # Array of asset codes mentioned
+    primary_asset = Column(String(20), nullable=True, index=True)
+    hashtags = Column(JSON, nullable=True)  # Array of hashtags
+    subreddit = Column(String(100), nullable=True)  # For Reddit posts
+    
+    # Sentiment scores
+    sentiment_score = Column(Float, nullable=True)  # compound score -1 to 1
+    positive_score = Column(Float, nullable=True)
+    negative_score = Column(Float, nullable=True)
+    neutral_score = Column(Float, nullable=True)
+    sentiment_label = Column(String(20), nullable=True, index=True)
+    
+    # Timestamps
+    posted_at = Column(DateTime(timezone=True), nullable=False, index=True)
+    fetched_at = Column(DateTime(timezone=True), nullable=True)
+    analyzed_at = Column(DateTime(timezone=True), nullable=True)
+    created_at = Column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+    updated_at = Column(
+        DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False
+    )
+
+    # Indexes for efficient querying
+    __table_args__ = (
+        Index("idx_social_posts_platform", "platform"),
+        Index("idx_social_posts_posted_at", "posted_at"),
+        Index("idx_social_posts_sentiment_label", "sentiment_label"),
+        Index("idx_social_posts_primary_asset", "primary_asset"),
+        Index("idx_social_posts_platform_asset", "platform", "primary_asset"),
+        Index("idx_social_posts_created_at", "created_at"),
+    )
+
+    def __repr__(self):
+        return f"<SocialPost(id={self.post_id}, platform={self.platform}, asset={self.primary_asset}, sentiment={self.sentiment_label})>"
+
+
+class AnalyticsRecord(Base):
+    """
+    Stores computed analytics and aggregated metrics
+    """
+
+    __tablename__ = "analytics_records"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    record_type = Column(String(50), nullable=False, index=True)  # sentiment_summary, trend, etc.
+    asset = Column(String(50), nullable=True, index=True)  # Asset symbol (e.g., 'XLM', 'BTC')
+    metric_name = Column(String(100), nullable=False)  # e.g., 'sentiment_score', 'volume'
+    window = Column(String(20), nullable=True)  # e.g., '1h', '24h', '7d'
+    
+    # Metric values
+    value = Column(Float, nullable=False)
+    previous_value = Column(Float, nullable=True)
+    change_percentage = Column(Float, nullable=True)
+    trend_direction = Column(String(20), nullable=True)  # up/down/stable
+    
+    # Additional data
+    extra_data = Column(JSON, nullable=True)  # Additional metadata
+    
+    # Timestamps
+    timestamp = Column(DateTime(timezone=True), nullable=False, index=True)
+    created_at = Column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+
+    # Indexes for efficient querying
+    __table_args__ = (
+        Index("idx_analytics_records_type", "record_type"),
+        Index("idx_analytics_records_asset", "asset"),
+        Index("idx_analytics_records_timestamp", "timestamp"),
+        Index("idx_analytics_records_type_asset", "record_type", "asset"),
+        Index("idx_analytics_records_asset_metric", "asset", "metric_name"),
+    )
+
+    def __repr__(self):
+        return f"<AnalyticsRecord(type={self.record_type}, asset={self.asset}, metric={self.metric_name}, value={self.value})>"
+
+
+class NewsInsight(Base):
+    """
+    Stores sentiment analysis results for news articles (legacy table, kept for backward compatibility)
+    """
+
+    __tablename__ = "news_insights"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    article_id = Column(String(255), nullable=True, index=True)
+    article_title = Column(Text, nullable=True)
+    article_url = Column(Text, nullable=True)
+    source = Column(String(100), nullable=True)
+    
+    # Asset information
+    asset_codes = Column(JSON, nullable=True)  # Array of asset codes mentioned in article
+    primary_asset = Column(String(20), nullable=True, index=True)  # Primary asset being discussed
+    
+    # Sentiment scores
+    sentiment_score = Column(Float, nullable=False)  # compound score -1 to 1
+    positive_score = Column(Float, nullable=False)
+    negative_score = Column(Float, nullable=False)
+    neutral_score = Column(Float, nullable=False)
+    sentiment_label = Column(String(20), nullable=False)  # positive/negative/neutral
+    
+    # Keywords and metadata
+    keywords = Column(JSON, nullable=True)  # Array of keywords
+    language = Column(String(10), nullable=True)
+    
+    # Timestamps
+    article_published_at = Column(DateTime(timezone=True), nullable=True)
+    analyzed_at = Column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+    created_at = Column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+
+    # Indexes for efficient querying
+    __table_args__ = (
+        Index("idx_news_insights_analyzed_at", "analyzed_at"),
+        Index("idx_news_insights_sentiment_label", "sentiment_label"),
+        Index("idx_news_insights_source", "source"),
+        Index("idx_news_insights_primary_asset", "primary_asset"),
+        Index("idx_news_insights_asset_sentiment", "primary_asset", "sentiment_label"),
+    )
+
+    def __repr__(self):
+        return f"<NewsInsight(id={self.id}, asset={self.primary_asset}, sentiment={self.sentiment_label}, score={self.sentiment_score})>"
+
+
+class AssetTrend(Base):
+    """
+    Stores calculated trends for assets and metrics (legacy table, kept for backward compatibility)
+    """
+
+    __tablename__ = "asset_trends"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    asset = Column(String(50), nullable=False, index=True)  # e.g., 'XLM', 'BTC'
+    metric_name = Column(String(100), nullable=False)  # e.g., 'sentiment_score', 'volume'
+    window = Column(String(20), nullable=False)  # e.g., '1h', '24h', '7d'
+    
+    # Trend data
+    trend_direction = Column(String(20), nullable=False)  # up/down/stable
+    score = Column(Float, nullable=False)  # trend score/strength
+    current_value = Column(Float, nullable=False)
+    previous_value = Column(Float, nullable=False)
+    change_percentage = Column(Float, nullable=False)
+    
+    # Additional data (renamed from metadata to avoid SQLAlchemy conflict)
+    extra_data = Column(JSON, nullable=True)  # Additional trend metadata
+    
+    # Timestamps
+    timestamp = Column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False, index=True
+    )
+    created_at = Column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+
+    # Indexes for efficient querying
+    __table_args__ = (
+        Index("idx_asset_trends_asset_metric", "asset", "metric_name"),
+        Index("idx_asset_trends_timestamp", "timestamp"),
+        Index("idx_asset_trends_window", "window"),
+    )
+
+    def __repr__(self):
+        return f"<AssetTrend(asset={self.asset}, metric={self.metric_name}, trend={self.trend_direction})>"
diff --git a/temp_backup/src/db/postgres_service.py b/temp_backup/src/db/postgres_service.py
new file mode 100644
index 00000000..3903c428
--- /dev/null
+++ b/temp_backup/src/db/postgres_service.py
@@ -0,0 +1,1245 @@
+"""
+PostgreSQL service for persisting analytics data
+"""
+
+import logging
+import os
+import time
+from typing import List, Dict, Any, Optional
+from datetime import datetime, timedelta
+from contextlib import contextmanager
+
+from sqlalchemy import create_engine, select, and_, desc
+from sqlalchemy.orm import sessionmaker, Session
+from sqlalchemy.exc import SQLAlchemyError, OperationalError
+
+from .models import Base, Article, SocialPost, AnalyticsRecord, NewsInsight, AssetTrend, OnChainEntity, ArticleEntityLink
+from src.analytics.ner_service import NERService
+from src.analytics.entity_linker import EntityLinker, measure_precision
+
+logger = logging.getLogger(__name__)
+
+
+class PostgresService:
+    """
+    Service for persisting and retrieving analytics data from PostgreSQL
+    """
+
+    def __init__(self, database_url: Optional[str] = None):
+        """
+        Initialize PostgreSQL service
+
+        Args:
+            database_url: PostgreSQL connection URL. If None, reads from environment
+        """
+        self.database_url = database_url or os.getenv(
+            "DATABASE_URL", "postgresql://postgres:postgres@localhost:5432/lumenpulse"
+        )
+
+        try:
+            self.engine = create_engine(
+                self.database_url,
+                pool_pre_ping=True,  # Verify connections before using
+                pool_size=5,
+                max_overflow=10,
+                echo=False,  # Set to True for SQL query logging
+            )
+            self.SessionLocal = sessionmaker(
+                autocommit=False,
+                autoflush=False,
+                expire_on_commit=False,
+                bind=self.engine,
+            )
+            self.ner_service = NERService()
+            self.entity_linker = EntityLinker()
+            logger.info("PostgreSQL service initialized successfully")
+        except Exception as e:
+            logger.error(f"Failed to initialize PostgreSQL service: {e}")
+            raise
+
+    def _ensure_detected_entities(self, article_data: Dict[str, Any]) -> Dict[str, Any]:
+        """Populate detected_entities when absent using the NER service."""
+        normalized = dict(article_data)
+        existing_entities = normalized.get("detected_entities")
+        if isinstance(existing_entities, list) and existing_entities:
+            return normalized
+
+        normalized["detected_entities"] = self.ner_service.extract_entities_from_article(
+            title=normalized.get("title"),
+            summary=normalized.get("summary"),
+            content=normalized.get("content"),
+        )
+        return normalized
+
+    @contextmanager
+    def get_session(self):
+        """
+        Context manager for database sessions
+
+        Yields:
+            Session: SQLAlchemy session
+        """
+        session = self.SessionLocal()
+        try:
+            yield session
+            session.commit()
+        except Exception as e:
+            session.rollback()
+            logger.error(f"Session error: {e}")
+            raise
+        finally:
+            session.close()
+
+    def _retry_operation(self, operation, max_retries=3, retry_delay=1.0):
+        """
+        Retry a database operation with exponential backoff
+
+        Args:
+            operation: Callable to execute
+            max_retries: Maximum number of retry attempts
+            retry_delay: Initial delay between retries (doubles each retry)
+
+        Returns:
+            Result of the operation
+
+        Raises:
+            Exception: If all retries fail
+        """
+        last_exception = None
+        for attempt in range(max_retries):
+            try:
+                return operation()
+            except OperationalError as e:
+                last_exception = e
+                if attempt < max_retries - 1:
+                    wait_time = retry_delay * (2 ** attempt)  # Exponential backoff
+                    logger.warning(
+                        f"Database operation failed (attempt {attempt + 1}/{max_retries}): {e}. "
+                        f"Retrying in {wait_time:.1f}s..."
+                    )
+                    time.sleep(wait_time)
+                else:
+                    logger.error(f"Database operation failed after {max_retries} attempts: {e}")
+                    raise
+            except SQLAlchemyError as e:
+                # Non-retryable errors
+                logger.error(f"Database operation failed with non-retryable error: {e}")
+                raise
+        raise last_exception
+
+    def create_tables(self):
+        """
+        Create all tables in the database
+        """
+        try:
+            Base.metadata.create_all(bind=self.engine)
+            logger.info("Database tables created successfully")
+        except Exception as e:
+            logger.error(f"Failed to create tables: {e}")
+            raise
+
+    def drop_tables(self):
+        """
+        Drop all tables (use with caution!)
+        """
+        try:
+            Base.metadata.drop_all(bind=self.engine)
+            logger.warning("All database tables dropped")
+        except Exception as e:
+            logger.error(f"Failed to drop tables: {e}")
+            raise
+
+    # Article Methods
+
+    def save_article(
+        self,
+        article_data: Dict[str, Any],
+        sentiment_result: Optional[Dict[str, Any]] = None,
+    ) -> Optional[Article]:
+        """
+        Save an article with optional sentiment analysis and entity linking
+
+        Args:
+            article_data: Article data dictionary
+            sentiment_result: Optional sentiment analysis result
+
+        Returns:
+            Article object if successful, None otherwise
+        """
+        article_data = self._ensure_detected_entities(article_data)
+        
+        # Link entities
+        linked_entities = self.entity_linker.link_article(
+            title=article_data.get("title"),
+            summary=article_data.get("summary"),
+            content=article_data.get("content")
+        )
+        
+        # Prepare structured linked entities for the article
+        structured_linked_entities = [
+            {
+                "stable_id": e.stable_id,
+                "type": e.entity_type,
+                "name": e.name,
+                "ticker": getattr(e, 'ticker', None),
+                "confidence": getattr(e, 'confidence', None)
+            }
+            for e in linked_entities
+        ]
+        article_data["linked_entities"] = structured_linked_entities
+
+        def _save():
+            with self.get_session() as session:
+                # Check if article already exists
+                existing = session.execute(
+                    select(Article).where(Article.article_id == article_data.get("id"))
+                ).scalar_one_or_none()
+
+                if existing:
+                    # Update existing article
+                    existing.title = article_data.get("title", existing.title)
+                    existing.content = article_data.get("content", existing.content)
+                    existing.summary = article_data.get("summary", existing.summary)
+                    existing.source = article_data.get("source", existing.source)
+                    existing.url = article_data.get("url", existing.url)
+                    existing.asset_codes = article_data.get("asset_codes", existing.asset_codes)
+                    existing.primary_asset = article_data.get("primary_asset", existing.primary_asset)
+                    existing.categories = article_data.get("categories", existing.categories)
+                    existing.keywords = article_data.get("keywords", existing.keywords)
+                    existing.detected_entities = article_data.get("detected_entities", existing.detected_entities)
+                    existing.linked_entities = article_data.get("linked_entities", existing.linked_entities)
+                    existing.language = article_data.get("language", existing.language)
+                    existing.published_at = article_data.get("published_at", existing.published_at)
+                    existing.fetched_at = article_data.get("fetched_at", existing.fetched_at)
+
+                    if sentiment_result:
+                        existing.sentiment_score = sentiment_result.get("compound_score")
+                        existing.positive_score = sentiment_result.get("positive")
+                        existing.negative_score = sentiment_result.get("negative")
+                        existing.neutral_score = sentiment_result.get("neutral")
+                        existing.sentiment_label = sentiment_result.get("sentiment_label")
+                        existing.analyzed_at = datetime.utcnow()
+
+                    session.flush()
+                    logger.debug(f"Updated article: {existing.article_id}")
+                    return existing
+                else:
+                    # Create new article
+                    article = Article(
+                        article_id=article_data.get("id"),
+                        title=article_data.get("title", ""),
+                        content=article_data.get("content"),
+                        summary=article_data.get("summary"),
+                        source=article_data.get("source"),
+                        url=article_data.get("url"),
+                        asset_codes=article_data.get("asset_codes"),
+                        primary_asset=article_data.get("primary_asset"),
+                        categories=article_data.get("categories"),
+                        keywords=article_data.get("keywords"),
+                        detected_entities=article_data.get("detected_entities"),
+                        linked_entities=article_data.get("linked_entities"),
+                        language=article_data.get("language"),
+                        published_at=article_data.get("published_at"),
+                        fetched_at=article_data.get("fetched_at"),
+                    )
+
+                    if sentiment_result:
+                        article.sentiment_score = sentiment_result.get("compound_score")
+                        article.positive_score = sentiment_result.get("positive")
+                        article.negative_score = sentiment_result.get("negative")
+                        article.neutral_score = sentiment_result.get("neutral")
+                        article.sentiment_label = sentiment_result.get("sentiment_label")
+                        article.analyzed_at = datetime.utcnow()
+
+                    session.add(article)
+                    session.flush()
+                    logger.debug(f"Saved article: {article.article_id}")
+                    return article
+
+        try:
+            article = self._retry_operation(_save)
+            if article:
+                # Link entities in the database
+                self.link_article_to_entities(article.article_id, linked_entities)
+            return article
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save article: {e}")
+            return None
+
+    def save_articles_batch(
+        self,
+        articles_data: List[Dict[str, Any]],
+        sentiment_results: Optional[List[Dict[str, Any]]] = None,
+    ) -> int:
+        """
+        Save multiple articles in a batch
+
+        Args:
+            articles_data: List of article data dictionaries
+            sentiment_results: Optional list of sentiment analysis results
+
+        Returns:
+            Number of articles saved
+        """
+        saved_count = 0
+        try:
+            with self.get_session() as session:
+                for i, article_data in enumerate(articles_data):
+                    article_data = self._ensure_detected_entities(article_data)
+                    sentiment_result = sentiment_results[i] if sentiment_results and i < len(sentiment_results) else None
+
+                    # Check if article already exists
+                    existing = session.execute(
+                        select(Article).where(Article.article_id == article_data.get("id"))
+                    ).scalar_one_or_none()
+
+                    if existing:
+                        # Update existing article
+                        existing.title = article_data.get("title", existing.title)
+                        existing.content = article_data.get("content", existing.content)
+                        existing.summary = article_data.get("summary", existing.summary)
+                        existing.source = article_data.get("source", existing.source)
+                        existing.url = article_data.get("url", existing.url)
+                        existing.asset_codes = article_data.get("asset_codes", existing.asset_codes)
+                        existing.primary_asset = article_data.get("primary_asset", existing.primary_asset)
+                        existing.categories = article_data.get("categories", existing.categories)
+                        existing.keywords = article_data.get("keywords", existing.keywords)
+                        existing.detected_entities = article_data.get("detected_entities", existing.detected_entities)
+                        existing.language = article_data.get("language", existing.language)
+                        existing.published_at = article_data.get("published_at", existing.published_at)
+                        existing.fetched_at = article_data.get("fetched_at", existing.fetched_at)
+
+                        if sentiment_result:
+                            existing.sentiment_score = sentiment_result.get("compound_score")
+                            existing.positive_score = sentiment_result.get("positive")
+                            existing.negative_score = sentiment_result.get("negative")
+                            existing.neutral_score = sentiment_result.get("neutral")
+                            existing.sentiment_label = sentiment_result.get("sentiment_label")
+                            existing.analyzed_at = datetime.utcnow()
+                    else:
+                        # Create new article
+                        article = Article(
+                            article_id=article_data.get("id"),
+                            title=article_data.get("title", ""),
+                            content=article_data.get("content"),
+                            summary=article_data.get("summary"),
+                            source=article_data.get("source"),
+                            url=article_data.get("url"),
+                            asset_codes=article_data.get("asset_codes"),
+                            primary_asset=article_data.get("primary_asset"),
+                            categories=article_data.get("categories"),
+                            keywords=article_data.get("keywords"),
+                            detected_entities=article_data.get("detected_entities"),
+                            language=article_data.get("language"),
+                            published_at=article_data.get("published_at"),
+                            fetched_at=article_data.get("fetched_at"),
+                        )
+
+                        if sentiment_result:
+                            article.sentiment_score = sentiment_result.get("compound_score")
+                            article.positive_score = sentiment_result.get("positive")
+                            article.negative_score = sentiment_result.get("negative")
+                            article.neutral_score = sentiment_result.get("neutral")
+                            article.sentiment_label = sentiment_result.get("sentiment_label")
+                            article.analyzed_at = datetime.utcnow()
+
+                        session.add(article)
+
+                    saved_count += 1
+
+                logger.info(f"Saved {saved_count} articles")
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save articles batch: {e}")
+
+        return saved_count
+
+    def get_recent_articles(
+        self,
+        limit: int = 100,
+        hours: int = 24,
+        asset: Optional[str] = None,
+        entity: Optional[str] = None,
+    ) -> List[Article]:
+        """
+        Get recent articles
+
+        Args:
+            limit: Maximum number of results
+            hours: Time window in hours
+            asset: Optional asset filter
+            entity: Optional NER entity filter
+
+        Returns:
+            List of Article objects
+        """
+        try:
+            with self.get_session() as session:
+                cutoff_time = datetime.utcnow() - timedelta(hours=hours)
+                stmt = (
+                    select(Article)
+                    .where(Article.published_at >= cutoff_time)
+                    .order_by(desc(Article.published_at))
+                    .limit(limit * 5 if entity else limit)
+                )
+
+                if asset:
+                    stmt = stmt.where(Article.primary_asset == asset)
+
+                results = session.execute(stmt).scalars().all()
+                if entity:
+                    target = entity.strip().lower()
+                    results = [
+                        article
+                        for article in results
+                        if any(
+                            str(value).strip().lower() == target
+                            for value in (article.detected_entities or [])
+                        )
+                    ][:limit]
+                logger.debug(f"Retrieved {len(results)} articles")
+                return results
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to retrieve articles: {e}")
+            return []
+
+    # Social Post Methods
+
+    def save_social_post(
+        self,
+        post_data: Dict[str, Any],
+        sentiment_result: Optional[Dict[str, Any]] = None,
+    ) -> Optional[SocialPost]:
+        """
+        Save a social media post with optional sentiment analysis
+
+        Args:
+            post_data: Social post data dictionary
+            sentiment_result: Optional sentiment analysis result
+
+        Returns:
+            SocialPost object if successful, None otherwise
+        """
+        def _save():
+            with self.get_session() as session:
+                # Check if post already exists
+                existing = session.execute(
+                    select(SocialPost).where(SocialPost.post_id == post_data.get("id"))
+                ).scalar_one_or_none()
+
+                if existing:
+                    # Update existing post
+                    existing.content = post_data.get("content", existing.content)
+                    existing.author = post_data.get("author", existing.author)
+                    existing.url = post_data.get("url", existing.url)
+                    existing.likes = post_data.get("likes", existing.likes)
+                    existing.comments = post_data.get("comments", existing.comments)
+                    existing.shares = post_data.get("shares", existing.shares)
+                    existing.asset_codes = post_data.get("asset_codes", existing.asset_codes)
+                    existing.primary_asset = post_data.get("primary_asset", existing.primary_asset)
+                    existing.hashtags = post_data.get("hashtags", existing.hashtags)
+                    existing.subreddit = post_data.get("subreddit", existing.subreddit)
+                    existing.posted_at = post_data.get("posted_at", existing.posted_at)
+                    existing.fetched_at = post_data.get("fetched_at", existing.fetched_at)
+
+                    if sentiment_result:
+                        existing.sentiment_score = sentiment_result.get("compound_score")
+                        existing.positive_score = sentiment_result.get("positive")
+                        existing.negative_score = sentiment_result.get("negative")
+                        existing.neutral_score = sentiment_result.get("neutral")
+                        existing.sentiment_label = sentiment_result.get("sentiment_label")
+                        existing.analyzed_at = datetime.utcnow()
+
+                    session.flush()
+                    logger.debug(f"Updated social post: {existing.post_id}")
+                    return existing
+                else:
+                    # Create new post
+                    post = SocialPost(
+                        post_id=post_data.get("id"),
+                        platform=post_data.get("platform", "unknown"),
+                        content=post_data.get("content", ""),
+                        author=post_data.get("author"),
+                        url=post_data.get("url"),
+                        likes=post_data.get("likes", 0),
+                        comments=post_data.get("comments", 0),
+                        shares=post_data.get("shares", 0),
+                        asset_codes=post_data.get("asset_codes"),
+                        primary_asset=post_data.get("primary_asset"),
+                        hashtags=post_data.get("hashtags"),
+                        subreddit=post_data.get("subreddit"),
+                        posted_at=post_data.get("posted_at"),
+                        fetched_at=post_data.get("fetched_at"),
+                    )
+
+                    if sentiment_result:
+                        post.sentiment_score = sentiment_result.get("compound_score")
+                        post.positive_score = sentiment_result.get("positive")
+                        post.negative_score = sentiment_result.get("negative")
+                        post.neutral_score = sentiment_result.get("neutral")
+                        post.sentiment_label = sentiment_result.get("sentiment_label")
+                        post.analyzed_at = datetime.utcnow()
+
+                    session.add(post)
+                    session.flush()
+                    logger.debug(f"Saved social post: {post.post_id}")
+                    return post
+
+        try:
+            return self._retry_operation(_save)
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save social post: {e}")
+            return None
+
+    def save_social_posts_batch(
+        self,
+        posts_data: List[Dict[str, Any]],
+        sentiment_results: Optional[List[Dict[str, Any]]] = None,
+    ) -> int:
+        """
+        Save multiple social posts in a batch
+
+        Args:
+            posts_data: List of social post data dictionaries
+            sentiment_results: Optional list of sentiment analysis results
+
+        Returns:
+            Number of posts saved
+        """
+        saved_count = 0
+        try:
+            with self.get_session() as session:
+                for i, post_data in enumerate(posts_data):
+                    sentiment_result = sentiment_results[i] if sentiment_results and i < len(sentiment_results) else None
+
+                    # Check if post already exists
+                    existing = session.execute(
+                        select(SocialPost).where(SocialPost.post_id == post_data.get("id"))
+                    ).scalar_one_or_none()
+
+                    if existing:
+                        # Update existing post
+                        existing.content = post_data.get("content", existing.content)
+                        existing.author = post_data.get("author", existing.author)
+                        existing.url = post_data.get("url", existing.url)
+                        existing.likes = post_data.get("likes", existing.likes)
+                        existing.comments = post_data.get("comments", existing.comments)
+                        existing.shares = post_data.get("shares", existing.shares)
+                        existing.asset_codes = post_data.get("asset_codes", existing.asset_codes)
+                        existing.primary_asset = post_data.get("primary_asset", existing.primary_asset)
+                        existing.hashtags = post_data.get("hashtags", existing.hashtags)
+                        existing.subreddit = post_data.get("subreddit", existing.subreddit)
+                        existing.posted_at = post_data.get("posted_at", existing.posted_at)
+                        existing.fetched_at = post_data.get("fetched_at", existing.fetched_at)
+
+                        if sentiment_result:
+                            existing.sentiment_score = sentiment_result.get("compound_score")
+                            existing.positive_score = sentiment_result.get("positive")
+                            existing.negative_score = sentiment_result.get("negative")
+                            existing.neutral_score = sentiment_result.get("neutral")
+                            existing.sentiment_label = sentiment_result.get("sentiment_label")
+                            existing.analyzed_at = datetime.utcnow()
+                    else:
+                        # Create new post
+                        post = SocialPost(
+                            post_id=post_data.get("id"),
+                            platform=post_data.get("platform", "unknown"),
+                            content=post_data.get("content", ""),
+                            author=post_data.get("author"),
+                            url=post_data.get("url"),
+                            likes=post_data.get("likes", 0),
+                            comments=post_data.get("comments", 0),
+                            shares=post_data.get("shares", 0),
+                            asset_codes=post_data.get("asset_codes"),
+                            primary_asset=post_data.get("primary_asset"),
+                            hashtags=post_data.get("hashtags"),
+                            subreddit=post_data.get("subreddit"),
+                            posted_at=post_data.get("posted_at"),
+                            fetched_at=post_data.get("fetched_at"),
+                        )
+
+                        if sentiment_result:
+                            post.sentiment_score = sentiment_result.get("compound_score")
+                            post.positive_score = sentiment_result.get("positive")
+                            post.negative_score = sentiment_result.get("negative")
+                            post.neutral_score = sentiment_result.get("neutral")
+                            post.sentiment_label = sentiment_result.get("sentiment_label")
+                            post.analyzed_at = datetime.utcnow()
+
+                        session.add(post)
+
+                    saved_count += 1
+
+                logger.info(f"Saved {saved_count} social posts")
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save social posts batch: {e}")
+
+        return saved_count
+
+    def get_recent_social_posts(
+        self,
+        limit: int = 100,
+        hours: int = 24,
+        platform: Optional[str] = None,
+        asset: Optional[str] = None,
+    ) -> List[SocialPost]:
+        """
+        Get recent social posts
+
+        Args:
+            limit: Maximum number of results
+            hours: Time window in hours
+            platform: Optional platform filter
+            asset: Optional asset filter
+
+        Returns:
+            List of SocialPost objects
+        """
+        try:
+            with self.get_session() as session:
+                cutoff_time = datetime.utcnow() - timedelta(hours=hours)
+                stmt = (
+                    select(SocialPost)
+                    .where(SocialPost.posted_at >= cutoff_time)
+                    .order_by(desc(SocialPost.posted_at))
+                    .limit(limit)
+                )
+
+                if platform:
+                    stmt = stmt.where(SocialPost.platform == platform)
+                if asset:
+                    stmt = stmt.where(SocialPost.primary_asset == asset)
+
+                results = session.execute(stmt).scalars().all()
+                logger.debug(f"Retrieved {len(results)} social posts")
+                return results
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to retrieve social posts: {e}")
+            return []
+
+    # Analytics Record Methods
+
+    def save_analytics_record(
+        self,
+        record_type: str,
+        metric_name: str,
+        value: float,
+        asset: Optional[str] = None,
+        window: Optional[str] = None,
+        previous_value: Optional[float] = None,
+        change_percentage: Optional[float] = None,
+        trend_direction: Optional[str] = None,
+        extra_data: Optional[Dict[str, Any]] = None,
+        timestamp: Optional[datetime] = None,
+    ) -> Optional[AnalyticsRecord]:
+        """
+        Save an analytics record
+
+        Args:
+            record_type: Type of record (e.g., 'sentiment_summary', 'trend')
+            metric_name: Metric name (e.g., 'sentiment_score', 'volume')
+            value: Metric value
+            asset: Optional asset symbol
+            window: Optional time window
+            previous_value: Optional previous value
+            change_percentage: Optional change percentage
+            trend_direction: Optional trend direction
+            extra_data: Optional additional metadata
+            timestamp: Optional timestamp (defaults to now)
+
+        Returns:
+            AnalyticsRecord object if successful, None otherwise
+        """
+        def _save():
+            with self.get_session() as session:
+                record = AnalyticsRecord(
+                    record_type=record_type,
+                    metric_name=metric_name,
+                    value=value,
+                    asset=asset,
+                    window=window,
+                    previous_value=previous_value,
+                    change_percentage=change_percentage,
+                    trend_direction=trend_direction,
+                    extra_data=extra_data,
+                    timestamp=timestamp or datetime.utcnow(),
+                )
+                session.add(record)
+                session.flush()
+                logger.debug(f"Saved analytics record: {record_type}/{metric_name}")
+                return record
+
+        try:
+            return self._retry_operation(_save)
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save analytics record: {e}")
+            return None
+
+    def save_analytics_records_batch(
+        self,
+        records_data: List[Dict[str, Any]],
+    ) -> int:
+        """
+        Save multiple analytics records in a batch
+
+        Args:
+            records_data: List of analytics record data dictionaries
+
+        Returns:
+            Number of records saved
+        """
+        saved_count = 0
+        try:
+            with self.get_session() as session:
+                for record_data in records_data:
+                    record = AnalyticsRecord(
+                        record_type=record_data.get("record_type"),
+                        metric_name=record_data.get("metric_name"),
+                        value=record_data.get("value"),
+                        asset=record_data.get("asset"),
+                        window=record_data.get("window"),
+                        previous_value=record_data.get("previous_value"),
+                        change_percentage=record_data.get("change_percentage"),
+                        trend_direction=record_data.get("trend_direction"),
+                        extra_data=record_data.get("extra_data"),
+                        timestamp=record_data.get("timestamp", datetime.utcnow()),
+                    )
+                    session.add(record)
+                    saved_count += 1
+
+                logger.info(f"Saved {saved_count} analytics records")
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save analytics records batch: {e}")
+
+        return saved_count
+
+    def get_analytics_records(
+        self,
+        record_type: Optional[str] = None,
+        asset: Optional[str] = None,
+        metric_name: Optional[str] = None,
+        hours: int = 24,
+        limit: int = 100,
+    ) -> List[AnalyticsRecord]:
+        """
+        Get analytics records
+
+        Args:
+            record_type: Optional record type filter
+            asset: Optional asset filter
+            metric_name: Optional metric name filter
+            hours: Time window in hours
+            limit: Maximum number of results
+
+        Returns:
+            List of AnalyticsRecord objects
+        """
+        try:
+            with self.get_session() as session:
+                cutoff_time = datetime.utcnow() - timedelta(hours=hours)
+                stmt = (
+                    select(AnalyticsRecord)
+                    .where(AnalyticsRecord.timestamp >= cutoff_time)
+                    .order_by(desc(AnalyticsRecord.timestamp))
+                    .limit(limit)
+                )
+
+                if record_type:
+                    stmt = stmt.where(AnalyticsRecord.record_type == record_type)
+                if asset:
+                    stmt = stmt.where(AnalyticsRecord.asset == asset)
+                if metric_name:
+                    stmt = stmt.where(AnalyticsRecord.metric_name == metric_name)
+
+                results = session.execute(stmt).scalars().all()
+                logger.debug(f"Retrieved {len(results)} analytics records")
+                return results
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to retrieve analytics records: {e}")
+            return []
+
+    # Legacy News Insights Methods (kept for backward compatibility)
+
+    def save_news_insight(
+        self,
+        sentiment_result: Dict[str, Any],
+        article_data: Optional[Dict[str, Any]] = None,
+    ) -> Optional[NewsInsight]:
+        """
+        Save a news sentiment analysis result
+
+        Args:
+            sentiment_result: Sentiment analysis result dictionary
+            article_data: Optional article metadata
+
+        Returns:
+            NewsInsight object if successful, None otherwise
+        """
+        try:
+            with self.get_session() as session:
+                insight = NewsInsight(
+                    article_id=article_data.get("id") if article_data else None,
+                    article_title=article_data.get("title") if article_data else None,
+                    article_url=article_data.get("url") if article_data else None,
+                    source=article_data.get("source") if article_data else None,
+                    sentiment_score=sentiment_result["compound_score"],
+                    positive_score=sentiment_result["positive"],
+                    negative_score=sentiment_result["negative"],
+                    neutral_score=sentiment_result["neutral"],
+                    sentiment_label=sentiment_result["sentiment_label"],
+                    keywords=article_data.get("keywords") if article_data else None,
+                    language=article_data.get("language") if article_data else None,
+                    article_published_at=(
+                        article_data.get("published_at") if article_data else None
+                    ),
+                )
+                session.add(insight)
+                session.flush()
+                logger.debug(f"Saved news insight: {insight.id}")
+                return insight
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save news insight: {e}")
+            return None
+
+    def save_news_insights_batch(
+        self, sentiment_results: List[Dict[str, Any]], articles_data: List[Dict[str, Any]] = None
+    ) -> int:
+        """
+        Save multiple news insights in a batch
+
+        Args:
+            sentiment_results: List of sentiment analysis results
+            articles_data: Optional list of article metadata
+
+        Returns:
+            Number of insights saved
+        """
+        saved_count = 0
+        try:
+            with self.get_session() as session:
+                for i, result in enumerate(sentiment_results):
+                    article_data = articles_data[i] if articles_data and i < len(articles_data) else None
+                    
+                    insight = NewsInsight(
+                        article_id=article_data.get("id") if article_data else None,
+                        article_title=article_data.get("title") if article_data else None,
+                        article_url=article_data.get("url") if article_data else None,
+                        source=article_data.get("source") if article_data else None,
+                        sentiment_score=result["compound_score"],
+                        positive_score=result["positive"],
+                        negative_score=result["negative"],
+                        neutral_score=result["neutral"],
+                        sentiment_label=result["sentiment_label"],
+                        keywords=article_data.get("keywords") if article_data else None,
+                        language=article_data.get("language") if article_data else None,
+                        article_published_at=(
+                            article_data.get("published_at") if article_data else None
+                        ),
+                    )
+                    session.add(insight)
+                    saved_count += 1
+                
+                logger.info(f"Saved {saved_count} news insights")
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save news insights batch: {e}")
+        
+        return saved_count
+
+    def get_recent_news_insights(
+        self, limit: int = 100, hours: int = 24
+    ) -> List[NewsInsight]:
+        """
+        Get recent news insights
+
+        Args:
+            limit: Maximum number of results
+            hours: Time window in hours
+
+        Returns:
+            List of NewsInsight objects
+        """
+        try:
+            with self.get_session() as session:
+                cutoff_time = datetime.utcnow() - timedelta(hours=hours)
+                stmt = (
+                    select(NewsInsight)
+                    .where(NewsInsight.analyzed_at >= cutoff_time)
+                    .order_by(desc(NewsInsight.analyzed_at))
+                    .limit(limit)
+                )
+                results = session.execute(stmt).scalars().all()
+                logger.debug(f"Retrieved {len(results)} news insights")
+                return results
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to retrieve news insights: {e}")
+            return []
+
+    # Legacy Asset Trends Methods (kept for backward compatibility)
+
+    def save_asset_trend(
+        self,
+        asset: str,
+        metric_name: str,
+        window: str,
+        trend_data: Dict[str, Any],
+    ) -> Optional[AssetTrend]:
+        """
+        Save an asset trend
+
+        Args:
+            asset: Asset symbol (e.g., 'XLM')
+            metric_name: Metric name (e.g., 'sentiment_score')
+            window: Time window (e.g., '24h')
+            trend_data: Trend data dictionary
+
+        Returns:
+            AssetTrend object if successful, None otherwise
+        """
+        try:
+            with self.get_session() as session:
+                trend = AssetTrend(
+                    asset=asset,
+                    metric_name=metric_name,
+                    window=window,
+                    trend_direction=trend_data["trend_direction"],
+                    score=trend_data.get("score", 0.0),
+                    current_value=trend_data["current_value"],
+                    previous_value=trend_data["previous_value"],
+                    change_percentage=trend_data["change_percentage"],
+                    extra_data=trend_data.get("extra_data") or trend_data.get("metadata"),
+                )
+                session.add(trend)
+                session.flush()
+                logger.debug(f"Saved asset trend: {asset}/{metric_name}")
+                return trend
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save asset trend: {e}")
+            return None
+
+    def save_asset_trends_batch(
+        self, asset: str, window: str, trends: List[Dict[str, Any]]
+    ) -> int:
+        """
+        Save multiple asset trends in a batch
+
+        Args:
+            asset: Asset symbol
+            window: Time window
+            trends: List of trend dictionaries
+
+        Returns:
+            Number of trends saved
+        """
+        saved_count = 0
+        try:
+            with self.get_session() as session:
+                for trend_data in trends:
+                    trend = AssetTrend(
+                        asset=asset,
+                        metric_name=trend_data["metric_name"],
+                        window=window,
+                        trend_direction=trend_data["trend_direction"],
+                        score=trend_data.get("score", 0.0),
+                        current_value=trend_data["current_value"],
+                        previous_value=trend_data["previous_value"],
+                        change_percentage=trend_data["change_percentage"],
+                        extra_data=trend_data.get("extra_data") or trend_data.get("metadata"),
+                    )
+                    session.add(trend)
+                    saved_count += 1
+                
+                logger.info(f"Saved {saved_count} asset trends for {asset}")
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to save asset trends batch: {e}")
+        
+        return saved_count
+
+    def get_recent_asset_trends(
+        self, asset: str, metric_name: Optional[str] = None, limit: int = 100
+    ) -> List[AssetTrend]:
+        """
+        Get recent asset trends
+
+        Args:
+            asset: Asset symbol
+            metric_name: Optional metric name filter
+            limit: Maximum number of results
+
+        Returns:
+            List of AssetTrend objects
+        """
+        try:
+            with self.get_session() as session:
+                stmt = select(AssetTrend).where(AssetTrend.asset == asset)
+                
+                if metric_name:
+                    stmt = stmt.where(AssetTrend.metric_name == metric_name)
+                
+                stmt = stmt.order_by(desc(AssetTrend.timestamp)).limit(limit)
+                
+                results = session.execute(stmt).scalars().all()
+                logger.debug(f"Retrieved {len(results)} asset trends for {asset}")
+                return results
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to retrieve asset trends: {e}")
+            return []
+
+    def upsert_on_chain_entity(self, stable_id: str, entity_type: str, name: str, ticker: Optional[str] = None, extra_data: Optional[Dict] = None) -> OnChainEntity:
+        """
+        Upsert an on-chain entity (create if not exists, update if exists).
+        
+        Args:
+            stable_id: Stable unique ID for the entity
+            entity_type: "project" or "asset"
+            name: Human-readable name
+            ticker: Optional asset ticker
+            extra_data: Optional additional metadata
+            
+        Returns:
+            The OnChainEntity object
+        """
+        def _upsert():
+            with self.get_session() as session:
+                existing = session.execute(
+                    select(OnChainEntity).where(OnChainEntity.stable_id == stable_id)
+                ).scalar_one_or_none()
+                
+                if existing:
+                    existing.name = name
+                    existing.ticker = ticker or existing.ticker
+                    existing.extra_data = extra_data or existing.extra_data
+                    session.flush()
+                    return existing
+                else:
+                    entity = OnChainEntity(
+                        stable_id=stable_id,
+                        entity_type=entity_type,
+                        name=name,
+                        ticker=ticker,
+                        extra_data=extra_data
+                    )
+                    session.add(entity)
+                    session.flush()
+                    return entity
+        return self._retry_operation(_upsert)
+
+    def link_article_to_entities(self, article_id: str, linked_entities: List) -> None:
+        """
+        Link an article to on-chain entities.
+        
+        Args:
+            article_id: The article's unique ID
+            linked_entities: List of LinkedEntity objects
+        """
+        def _link():
+            with self.get_session() as session:
+                for entity in linked_entities:
+                    # Upsert the entity first
+                    self.upsert_on_chain_entity(
+                        stable_id=entity.stable_id,
+                        entity_type=entity.entity_type,
+                        name=entity.name,
+                        ticker=getattr(entity, 'ticker', None)
+                    )
+                    
+                    # Check if link already exists
+                    existing_link = session.execute(
+                        select(ArticleEntityLink).where(
+                            and_(
+                                ArticleEntityLink.article_id == article_id,
+                                ArticleEntityLink.entity_stable_id == entity.stable_id
+                            )
+                        )
+                    ).scalar_one_or_none()
+                    
+                    if not existing_link:
+                        link = ArticleEntityLink(
+                            article_id=article_id,
+                            entity_stable_id=entity.stable_id,
+                            confidence=getattr(entity, 'confidence', None)
+                        )
+                        session.add(link)
+        self._retry_operation(_link)
+
+    def get_article_linked_entities(self, article_id: str) -> List[Dict]:
+        """
+        Get all entities linked to an article.
+        
+        Args:
+            article_id: The article's unique ID
+            
+        Returns:
+            List of entity data dictionaries
+        """
+        try:
+            with self.get_session() as session:
+                links = session.execute(
+                    select(ArticleEntityLink).where(ArticleEntityLink.article_id == article_id)
+                ).scalars().all()
+                
+                entities = []
+                for link in links:
+                    entity = session.execute(
+                        select(OnChainEntity).where(OnChainEntity.stable_id == link.entity_stable_id)
+                    ).scalar_one_or_none()
+                    if entity:
+                        entities.append({
+                            "stable_id": entity.stable_id,
+                            "type": entity.entity_type,
+                            "name": entity.name,
+                            "ticker": entity.ticker,
+                            "confidence": link.confidence
+                        })
+                return entities
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to get linked entities for article {article_id}: {e}")
+            return []
+
+    def get_articles_for_entity(self, stable_id: str, limit: int = 100) -> List[Article]:
+        """
+        Get all articles linked to a specific entity.
+        
+        Args:
+            stable_id: The entity's stable ID
+            limit: Maximum number of articles to return
+            
+        Returns:
+            List of Article objects
+        """
+        try:
+            with self.get_session() as session:
+                links = session.execute(
+                    select(ArticleEntityLink).where(ArticleEntityLink.entity_stable_id == stable_id).limit(limit)
+                ).scalars().all()
+                
+                article_ids = [link.article_id for link in links]
+                articles = session.execute(
+                    select(Article).where(Article.article_id.in_(article_ids)).order_by(desc(Article.published_at))
+                ).scalars().all()
+                return articles
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to get articles for entity {stable_id}: {e}")
+            return []
+
+    def measure_entity_linker_precision(self) -> Dict[str, float]:
+        """
+        Measure and log the precision of the entity linker.
+        
+        Returns:
+            Precision metrics dictionary
+        """
+        metrics = measure_precision(self.entity_linker)
+        logger.info("Entity Linker Precision Metrics:")
+        logger.info(f"  Precision: {metrics['precision']:.4f}")
+        logger.info(f"  Recall: {metrics['recall']:.4f}")
+        logger.info(f"  F1 Score: {metrics['f1']:.4f}")
+        logger.info(f"  Test Cases: {metrics['test_cases']}")
+        return metrics
+
+    def get_sentiment_summary(self, hours: int = 24) -> Dict[str, Any]:
+        """
+        Get sentiment summary statistics
+        Args:
+            hours: Time window in hours
+
+        Returns:
+            Summary statistics dictionary
+        """
+        try:
+            with self.get_session() as session:
+                cutoff_time = datetime.utcnow() - timedelta(hours=hours)
+                
+                insights = session.execute(
+                    select(NewsInsight).where(NewsInsight.analyzed_at >= cutoff_time)
+                ).scalars().all()
+                
+                if not insights:
+                    return {
+                        "total_articles": 0,
+                        "average_sentiment": 0.0,
+                        "positive_count": 0,
+                        "negative_count": 0,
+                        "neutral_count": 0,
+                    }
+                
+                total = len(insights)
+                avg_sentiment = sum(i.sentiment_score for i in insights) / total
+                positive = sum(1 for i in insights if i.sentiment_label == "positive")
+                negative = sum(1 for i in insights if i.sentiment_label == "negative")
+                neutral = sum(1 for i in insights if i.sentiment_label == "neutral")
+                
+                return {
+                    "total_articles": total,
+                    "average_sentiment": round(avg_sentiment, 4),
+                    "positive_count": positive,
+                    "negative_count": negative,
+                    "neutral_count": neutral,
+                    "positive_percentage": round(positive / total * 100, 2),
+                    "negative_percentage": round(negative / total * 100, 2),
+                    "neutral_percentage": round(neutral / total * 100, 2),
+                }
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to get sentiment summary: {e}")
+            return {}
+
+    def cleanup_old_data(self, days: int = 30) -> Dict[str, int]:
+        """
+        Clean up old analytics data
+
+        Args:
+            days: Number of days to keep
+
+        Returns:
+            Dictionary with counts of deleted records
+        """
+        try:
+            cutoff_date = datetime.utcnow() - timedelta(days=days)
+            deleted_counts = {
+                "articles": 0,
+                "social_posts": 0,
+                "analytics_records": 0,
+                "news_insights": 0,
+                "asset_trends": 0,
+            }
+            
+            with self.get_session() as session:
+                # Delete old articles
+                articles_deleted = session.query(Article).filter(
+                    Article.created_at < cutoff_date
+                ).delete()
+                deleted_counts["articles"] = articles_deleted
+                
+                # Delete old social posts
+                posts_deleted = session.query(SocialPost).filter(
+                    SocialPost.created_at < cutoff_date
+                ).delete()
+                deleted_counts["social_posts"] = posts_deleted
+                
+                # Delete old analytics records
+                records_deleted = session.query(AnalyticsRecord).filter(
+                    AnalyticsRecord.created_at < cutoff_date
+                ).delete()
+                deleted_counts["analytics_records"] = records_deleted
+                
+                # Delete old news insights (legacy)
+                news_deleted = session.query(NewsInsight).filter(
+                    NewsInsight.created_at < cutoff_date
+                ).delete()
+                deleted_counts["news_insights"] = news_deleted
+                
+                # Delete old asset trends (legacy)
+                trends_deleted = session.query(AssetTrend).filter(
+                    AssetTrend.created_at < cutoff_date
+                ).delete()
+                deleted_counts["asset_trends"] = trends_deleted
+                
+                logger.info(f"Cleaned up old data: {deleted_counts}")
+                return deleted_counts
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to cleanup old data: {e}")
+            return {
+                "articles": 0,
+                "social_posts": 0,
+                "analytics_records": 0,
+                "news_insights": 0,
+                "asset_trends": 0,
+            }
diff --git a/temp_backup/src/fetchers.py b/temp_backup/src/fetchers.py
new file mode 100644
index 00000000..237a1336
--- /dev/null
+++ b/temp_backup/src/fetchers.py
@@ -0,0 +1,116 @@
+"""
+News fetcher module - fetches crypto/market news from various sources
+"""
+
+import requests
+import logging
+from datetime import datetime, timedelta
+from typing import List, Dict, Any
+
+logger = logging.getLogger(__name__)
+
+
+class NewsItem:
+    """Data class representing a news item"""
+
+    def __init__(
+        self, title: str, content: str, source: str, url: str, published_at: datetime
+    ):
+        self.title = title
+        self.content = content
+        self.source = source
+        self.url = url
+        self.published_at = published_at
+        self.fetched_at = datetime.utcnow()
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "title": self.title,
+            "content": self.content,
+            "source": self.source,
+            "url": self.url,
+            "published_at": self.published_at.isoformat(),
+            "fetched_at": self.fetched_at.isoformat(),
+        }
+
+
+class NewsFetcher:
+    """Fetches news from cryptocurrency and market sources"""
+
+    def __init__(self):
+        self.sources = {
+            "crypto_news": "https://api.coingecko.com/api/v3/news",
+            "mock_market": "https://jsonplaceholder.typicode.com/posts",
+        }
+
+    def fetch_crypto_news(self) -> List[NewsItem]:
+        """Fetch crypto news from CoinGecko API"""
+        try:
+            response = requests.get(self.sources["crypto_news"], timeout=10)
+            response.raise_for_status()
+            data = response.json()
+
+            news_items = []
+            # CoinGecko news endpoint returns a 'data' array
+            for article in data.get("data", [])[:10]:  # Limit to 10 articles
+                try:
+                    news_item = NewsItem(
+                        title=article.get("title", ""),
+                        content=article.get("description", article.get("title", "")),
+                        source="CoinGecko",
+                        url=article.get("url", ""),
+                        published_at=(
+                            datetime.fromisoformat(
+                                article.get(
+                                    "published_at", datetime.utcnow().isoformat()
+                                ).replace("Z", "+00:00")
+                            )
+                            if article.get("published_at")
+                            else datetime.utcnow()
+                        ),
+                    )
+                    news_items.append(news_item)
+                except Exception as e:
+                    logger.warning(f"Error processing article: {e}")
+                    continue
+
+            logger.info(f"Fetched {len(news_items)} crypto news items")
+            return news_items
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Error fetching crypto news: {e}")
+            return []
+
+    def fetch_market_news(self) -> List[NewsItem]:
+        """Fetch market news from mock source"""
+        try:
+            response = requests.get(self.sources["mock_market"], timeout=10)
+            response.raise_for_status()
+            data = response.json()
+
+            news_items = []
+            for article in data[:10]:  # Limit to 10 articles
+                news_item = NewsItem(
+                    title=article.get("title", f"Post {article.get('id', 'N/A')}"),
+                    content=article.get("body", ""),
+                    source="Mock Market Feed",
+                    url=f"https://example.com/news/{article.get('id')}",
+                    published_at=datetime.utcnow()
+                    - timedelta(hours=article.get("id", 1) % 24),
+                )
+                news_items.append(news_item)
+
+            logger.info(f"Fetched {len(news_items)} market news items")
+            return news_items
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Error fetching market news: {e}")
+            return []
+
+    def fetch_all_news(self) -> List[NewsItem]:
+        """Fetch news from all sources"""
+        crypto_news = self.fetch_crypto_news()
+        market_news = self.fetch_market_news()
+
+        all_news = crypto_news + market_news
+        logger.info(f"Total news items fetched: {len(all_news)}")
+
+        return all_news
diff --git a/temp_backup/src/ingestion/__init__.py b/temp_backup/src/ingestion/__init__.py
new file mode 100644
index 00000000..2bff5394
--- /dev/null
+++ b/temp_backup/src/ingestion/__init__.py
@@ -0,0 +1,42 @@
+"""
+Data ingestion module for fetching external data.
+"""
+
+from .news_fetcher import NewsFetcher, NewsArticle, fetch_news
+from .stellar_fetcher import (
+    StellarDataFetcher,
+    VolumeData,
+    TransactionRecord,
+    get_asset_volume,
+    get_network_overview,
+)
+from .price_fetcher import PriceFetcher
+from .social_fetcher import (
+    SocialFetcher,
+    SocialPost,
+    TwitterFetcher,
+    RedditFetcher,
+    RateLimiter,
+    SocialPlatform,
+    fetch_social,
+)
+
+__all__ = [
+    "NewsFetcher",
+    "NewsArticle",
+    "fetch_news",
+    "StellarDataFetcher",
+    "VolumeData",
+    "TransactionRecord",
+    "get_asset_volume",
+    "get_network_overview",
+    "PriceFetcher",
+    # Social media fetchers
+    "SocialFetcher",
+    "SocialPost",
+    "TwitterFetcher",
+    "RedditFetcher",
+    "RateLimiter",
+    "SocialPlatform",
+    "fetch_social",
+]
diff --git a/temp_backup/src/ingestion/news_deduplicator.py b/temp_backup/src/ingestion/news_deduplicator.py
new file mode 100644
index 00000000..a94eed1d
--- /dev/null
+++ b/temp_backup/src/ingestion/news_deduplicator.py
@@ -0,0 +1,198 @@
+"""
+News deduplication module - removes duplicate articles to prevent re-processing
+"""
+import hashlib
+import json
+from datetime import datetime, timedelta, timezone
+from typing import List, Dict, Optional, Set
+from pathlib import Path
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class NewsDeduplicator:
+    """
+    Handles deduplication of news articles to prevent re-processing of the same content.
+    Uses SHA-256 hashing of normalized content to identify duplicates.
+    """
+
+    def __init__(self, deduplication_window_days: int = 7, storage_path: str = "./data/deduplication.json"):
+        """
+        Initialize the deduplicator
+        
+        Args:
+            deduplication_window_days: How many days back to check for duplicates
+            storage_path: Path to store seen hashes
+        """
+        self.deduplication_window_days = deduplication_window_days
+        self.storage_path = Path(storage_path)
+        self.storage_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        # Load existing hashes
+        self.seen_hashes: Dict[str, datetime] = {}
+        self._load_seen_hashes()
+        
+        # Calculate cutoff time for old hashes
+        self.cutoff_time = datetime.now(timezone.utc) - timedelta(days=self.deduplication_window_days)
+        
+        # Clean up old hashes periodically
+        self._cleanup_old_hashes()
+        
+        logger.info(f"Initialized NewsDeduplicator with window of {deduplication_window_days} days")
+
+    def _normalize_article(self, article: Dict) -> str:
+        """
+        Normalize article content for consistent hashing
+        
+        Args:
+            article: Article dictionary to normalize
+            
+        Returns:
+            Normalized string representation of the article
+        """
+        # Extract and normalize key fields
+        title = (article.get('title') or '').strip().lower()
+        content = (article.get('content') or '').strip().lower()
+        url = (article.get('url') or '').strip().lower()
+        
+        # Create a canonical representation
+        canonical_data = {
+            'title': title,
+            'content': content,
+            'url': url,
+            'source': (article.get('source') or '').strip().lower(),
+        }
+        
+        # Convert to JSON string for consistent hashing
+        return json.dumps(canonical_data, sort_keys=True, separators=(',', ':'))
+
+    def _compute_hash(self, article: Dict) -> str:
+        """
+        Compute SHA-256 hash for an article
+        
+        Args:
+            article: Article dictionary to hash
+            
+        Returns:
+            SHA-256 hash as hex string
+        """
+        normalized_content = self._normalize_article(article)
+        return hashlib.sha256(normalized_content.encode('utf-8')).hexdigest()
+
+    def _load_seen_hashes(self):
+        """Load previously seen hashes from storage"""
+        if self.storage_path.exists():
+            try:
+                with open(self.storage_path, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                    
+                for hash_str, timestamp_str in data.items():
+                    try:
+                        if timestamp_str.endswith('+00:00'):
+                            timestamp = datetime.fromisoformat(timestamp_str)
+                        else:
+                            # Handle naive datetime by assuming UTC
+                            dt = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
+                            timestamp = dt
+                        self.seen_hashes[hash_str] = timestamp
+                    except ValueError:
+                        logger.warning(f"Invalid timestamp format for hash {hash_str}: {timestamp_str}")
+                        
+                logger.info(f"Loaded {len(self.seen_hashes)} previously seen hashes")
+            except (json.JSONDecodeError, IOError) as e:
+                logger.error(f"Error loading seen hashes from {self.storage_path}: {e}")
+                self.seen_hashes = {}
+
+    def _save_seen_hashes(self):
+        """Save seen hashes to storage"""
+        try:
+            # Convert datetime objects to ISO format strings
+            data = {
+                hash_str: timestamp.isoformat()
+                for hash_str, timestamp in self.seen_hashes.items()
+            }
+            
+            with open(self.storage_path, 'w', encoding='utf-8') as f:
+                json.dump(data, f, indent=2)
+                
+        except IOError as e:
+            logger.error(f"Error saving seen hashes to {self.storage_path}: {e}")
+
+    def _cleanup_old_hashes(self):
+        """Remove hashes older than the deduplication window"""
+        old_count = len(self.seen_hashes)
+        self.seen_hashes = {
+            hash_str: timestamp 
+            for hash_str, timestamp in self.seen_hashes.items() 
+            if timestamp > self.cutoff_time
+        }
+        removed_count = old_count - len(self.seen_hashes)
+        
+        if removed_count > 0:
+            logger.info(f"Removed {removed_count} old hashes outside the {self.deduplication_window_days}-day window")
+
+    def is_duplicate(self, article: Dict) -> bool:
+        """
+        Check if an article is a duplicate
+        
+        Args:
+            article: Article to check
+            
+        Returns:
+            True if the article is a duplicate, False otherwise
+        """
+        article_hash = self._compute_hash(article)
+        return article_hash in self.seen_hashes
+
+    def mark_seen(self, article: Dict):
+        """
+        Mark an article as seen (add its hash to the seen set)
+        
+        Args:
+            article: Article to mark as seen
+        """
+        article_hash = self._compute_hash(article)
+        self.seen_hashes[article_hash] = datetime.now(timezone.utc)
+
+    def filter_duplicates(self, articles: List[Dict]) -> List[Dict]:
+        """
+        Filter out duplicate articles from a list
+        
+        Args:
+            articles: List of articles to filter
+            
+        Returns:
+            List of articles with duplicates removed
+        """
+        filtered_articles = []
+        duplicates_found = 0
+        
+        for article in articles:
+            if not self.is_duplicate(article):
+                self.mark_seen(article)
+                filtered_articles.append(article)
+            else:
+                duplicates_found += 1
+                
+        if duplicates_found > 0:
+            logger.info(f"Filtered out {duplicates_found} duplicate articles")
+            
+        # Save updated hashes to storage
+        self._save_seen_hashes()
+        
+        return filtered_articles
+
+    def get_statistics(self) -> Dict:
+        """
+        Get statistics about the deduplication process
+        
+        Returns:
+            Dictionary with deduplication statistics
+        """
+        return {
+            'seen_hashes_count': len(self.seen_hashes),
+            'deduplication_window_days': self.deduplication_window_days,
+            'cutoff_time': self.cutoff_time.isoformat(),
+            'storage_path': str(self.storage_path),
+        }
\ No newline at end of file
diff --git a/temp_backup/src/ingestion/news_fetcher.py b/temp_backup/src/ingestion/news_fetcher.py
new file mode 100644
index 00000000..0ff1e04c
--- /dev/null
+++ b/temp_backup/src/ingestion/news_fetcher.py
@@ -0,0 +1,333 @@
+"""
+News Fetcher Service for cryptocurrency news.
+Fetches data from external APIs and standardizes the format.
+"""
+
+import os
+import json
+import time
+from typing import List, Dict, Optional
+from dataclasses import dataclass, asdict
+from .news_deduplicator import NewsDeduplicator
+from datetime import datetime
+from src.utils.translator import translate_and_normalize
+import requests
+from requests.exceptions import RequestException
+from src.utils.http_client import RobustHTTPClient
+
+
+@dataclass
+class NewsArticle:
+    """Standardized news article format"""
+
+    id: str
+    title: str
+    content: Optional[str]
+    summary: Optional[str]
+    source: str
+    url: str
+    published_at: datetime
+    categories: List[str]
+    sentiment_score: Optional[float] = None  # To be filled by sentiment engine
+    tags: Optional[List[str]] = None
+
+    def to_dict(self) -> Dict:
+        """Convert to dictionary with serialized datetime"""
+        data = asdict(self)
+        data["published_at"] = self.published_at.isoformat()
+        return data
+
+
+class APIConfig:
+    """Configuration for news APIs"""
+
+    # API Endpoints
+    CRYPTOCOMPARE_URL = "https://min-api.cryptocompare.com/data/v2/news/"
+    NEWSAPI_URL = "https://newsapi.org/v2/everything"
+
+    # Rate limiting
+    RATE_LIMIT_DELAY = 1.0  # seconds between requests
+    MAX_RETRIES = 3
+    TIMEOUT = 10  # seconds
+
+
+class NewsFetcher:
+    """
+    Fetches cryptocurrency news from multiple APIs.
+
+    Environment Variables Required:
+    - CRYPTOCOMPARE_API_KEY: API key for CryptoCompare
+    - NEWSAPI_API_KEY: API key for NewsAPI
+    """
+
+    def __init__(self, use_cryptocompare: bool = True, use_newsapi: bool = True):
+        """
+        Initialize NewsFetcher with API keys from environment.
+
+        Args:
+            use_cryptocompare: Whether to use CryptoCompare API
+            use_newsapi: Whether to use NewsAPI
+        """
+        self.use_cryptocompare = use_cryptocompare
+        self.use_newsapi = use_newsapi
+
+        # Load API keys from environment
+        self.cryptocompare_key = os.getenv("CRYPTOCOMPARE_API_KEY")
+        self.newsapi_key = os.getenv("NEWSAPI_API_KEY")
+
+        # Validate API keys are available if services are enabled
+        if use_cryptocompare and not self.cryptocompare_key:
+            raise ValueError("CRYPTOCOMPARE_API_KEY environment variable not set")
+        if use_newsapi and not self.newsapi_key:
+            raise ValueError("NEWSAPI_API_KEY environment variable not set")
+
+        # Session for connection pooling
+        self.session = RobustHTTPClient()
+        self.last_request_time = 0
+
+        # Cache for avoiding duplicate articles
+        self.seen_articles = set()
+
+        # Initialize deduplicator
+        self.deduplicator = NewsDeduplicator(deduplication_window_days=7)
+
+    def _respect_rate_limit(self):
+        """Ensure we respect rate limits by delaying if needed"""
+        current_time = time.time()
+        time_since_last = current_time - self.last_request_time
+
+        if time_since_last < APIConfig.RATE_LIMIT_DELAY:
+            time.sleep(APIConfig.RATE_LIMIT_DELAY - time_since_last)
+
+        self.last_request_time = time.time()
+
+    def _handle_api_error(self, response: requests.Response, api_name: str) -> None:
+        """Handle API errors and raise appropriate exceptions"""
+        if response.status_code == 401:
+            raise PermissionError(f"{api_name} API: Invalid API key")
+        elif response.status_code == 429:
+            raise ConnectionError(f"{api_name} API: Rate limit exceeded")
+        elif response.status_code >= 500:
+            raise ConnectionError(f"{api_name} API: Server error")
+        else:
+            response.raise_for_status()
+
+    def _fetch_cryptocompare(self, limit: int) -> List[NewsArticle]:
+        """Fetch news from CryptoCompare API"""
+        articles = []
+
+        try:
+            headers = {"Authorization": f"Apikey {self.cryptocompare_key}"}
+
+            for lang in ["EN", "ES", "PT"]:
+                self._respect_rate_limit()
+
+                params = {
+                    "lang": lang,
+                    "categories": "BTC,ETH,BLOCKCHAIN",
+                    "excludeCategories": "Sponsored",
+                }
+
+                response = self.session.get(
+                    APIConfig.CRYPTOCOMPARE_URL,
+                    params=params,
+                    headers=headers,
+                    timeout=APIConfig.TIMEOUT,
+                )
+
+                if response.status_code != 200:
+                    self._handle_api_error(response, "CryptoCompare")
+
+                data = response.json()
+
+                if data.get("Type") != 100:
+                    raise ValueError(
+                        f"CryptoCompare API returned error: {data.get('Message', 'Unknown error')}"
+                    )
+
+                # Parse articles
+                for item in data.get("Data", [])[:limit]:
+                    try:
+                        article = NewsArticle(
+                            id=f"cc_{item['id']}",
+                            title=translate_and_normalize(item.get("title", "")),
+                            content=translate_and_normalize(item.get("body", "")),
+                            summary=translate_and_normalize(
+                                item.get("short_description", "")
+                            ),
+                            source=item.get("source", "Unknown"),
+                            url=item.get("url", ""),
+                            published_at=datetime.fromtimestamp(
+                                item.get("published_on", 0)
+                            ),
+                            categories=(
+                                item.get("categories", "").split("|")
+                                if item.get("categories")
+                                else []
+                            ),
+                            tags=(
+                                item.get("tags", "").split("|")
+                                if item.get("tags")
+                                else []
+                            ),
+                        )
+
+                        # Avoid duplicates
+                        if article.id not in self.seen_articles:
+                            articles.append(article)
+                            self.seen_articles.add(article.id)
+
+                    except KeyError as e:
+                        print(f"Warning: Missing key in CryptoCompare data: {e}")
+                        continue
+
+        except RequestException as e:
+            print(f"Error fetching from CryptoCompare: {e}")
+        except json.JSONDecodeError as e:
+            print(f"Error parsing CryptoCompare JSON: {e}")
+
+        return articles
+
+    def _fetch_newsapi(self, limit: int) -> List[NewsArticle]:
+        """Fetch news from NewsAPI"""
+        articles = []
+
+        try:
+            # Calculate date range (last 7 days for recent news)
+            to_date = datetime.now()
+            from_date = datetime.fromtimestamp(to_date.timestamp() - (7 * 24 * 3600))
+
+            for lang in ["en", "es", "pt"]:
+                self._respect_rate_limit()
+
+                params = {
+                    "q": "cryptocurrency OR blockchain OR bitcoin OR ethereum",
+                    "language": lang,
+                    "sortBy": "publishedAt",
+                    "pageSize": min(limit, 100),  # NewsAPI max is 100
+                    "from": from_date.strftime("%Y-%m-%d"),
+                    "to": to_date.strftime("%Y-%m-%d"),
+                    "apiKey": self.newsapi_key,
+                }
+
+                response = self.session.get(
+                    APIConfig.NEWSAPI_URL, params=params, timeout=APIConfig.TIMEOUT
+                )
+
+                if response.status_code != 200:
+                    self._handle_api_error(response, "NewsAPI")
+
+                data = response.json()
+
+                # Parse articles
+                for item in data.get("articles", [])[:limit]:
+                    try:
+                        published_at = datetime.fromisoformat(
+                            item["publishedAt"].replace("Z", "+00:00")
+                        )
+
+                        article = NewsArticle(
+                            id=f"na_{hash(item['url']) & 0xFFFFFFFF}",
+                            title=translate_and_normalize(item.get("title", "")),
+                            content=translate_and_normalize(item.get("content", "")),
+                            summary=translate_and_normalize(
+                                item.get("description", "")
+                            ),
+                            source=item.get("source", {}).get("name", "Unknown"),
+                            url=item.get("url", ""),
+                            published_at=published_at,
+                            categories=[
+                                "crypto",
+                                "blockchain",
+                            ],  # NewsAPI doesn't provide categories
+                        )
+
+                        # Avoid duplicates
+                        if article.id not in self.seen_articles:
+                            articles.append(article)
+                            self.seen_articles.add(article.id)
+
+                    except (KeyError, ValueError) as e:
+                        print(f"Warning: Error parsing NewsAPI article: {e}")
+                        continue
+
+        except RequestException as e:
+            print(f"Error fetching from NewsAPI: {e}")
+        except json.JSONDecodeError as e:
+            print(f"Error parsing NewsAPI JSON: {e}")
+
+        return articles
+
+    def fetch_latest(self, limit: int = 10) -> List[Dict]:
+        """
+        Fetch latest news articles from configured APIs.
+
+        Args:
+            limit: Maximum number of articles to return from each API
+
+        Returns:
+            List of standardized article dictionaries
+
+        Raises:
+            ConnectionError: If all APIs fail
+            ValueError: If invalid parameters provided
+        """
+        if limit <= 0:
+            raise ValueError("Limit must be positive")
+
+        all_articles = []
+
+        # Fetch from CryptoCompare
+        if self.use_cryptocompare:
+            articles = self._fetch_cryptocompare(limit)
+            all_articles.extend(articles)
+            print(f"Fetched {len(articles)} articles from CryptoCompare")
+
+        # Fetch from NewsAPI
+        if self.use_newsapi:
+            articles = self._fetch_newsapi(limit)
+            all_articles.extend(articles)
+            print(f"Fetched {len(articles)} articles from NewsAPI")
+
+        # Sort by publication date (newest first)
+        all_articles.sort(key=lambda x: x.published_at, reverse=True)
+
+        # Convert to dictionaries
+        articles_as_dicts = [article.to_dict() for article in all_articles]
+
+        # Apply deduplication filter
+        deduplicated_articles = self.deduplicator.filter_duplicates(articles_as_dicts)
+
+        result = deduplicated_articles[:limit]
+
+        if not result:
+            print("Warning: No articles fetched from any API")
+
+        return result
+
+    def clear_cache(self):
+        """Clear the cache of seen articles"""
+        self.seen_articles.clear()
+
+    def close(self):
+        """Close the session"""
+        self.session.close()
+
+
+# Utility function for easy usage
+def fetch_news(
+    limit: int = 10, use_cryptocompare: bool = True, use_newsapi: bool = True
+) -> List[Dict]:
+    """
+    Convenience function to fetch news.
+
+    Example:
+        articles = fetch_news(limit=5)
+        for article in articles:
+            print(f"{article['title']} - {article['source']}")
+    """
+    fetcher = NewsFetcher(use_cryptocompare=use_cryptocompare, use_newsapi=use_newsapi)
+    try:
+        return fetcher.fetch_latest(limit)
+    finally:
+        fetcher.close()
diff --git a/temp_backup/src/ingestion/price_fetcher.py b/temp_backup/src/ingestion/price_fetcher.py
new file mode 100644
index 00000000..2146c4da
--- /dev/null
+++ b/temp_backup/src/ingestion/price_fetcher.py
@@ -0,0 +1,226 @@
+"""
+Off-chain price fetcher for Soroban pricing adapter feeds.
+
+This module supports fetching USD prices for supported Stellar assets,
+scaling them to the pricing adapter base decimals, and handling failures
+with stale cache fallback.
+"""
+
+from __future__ import annotations
+
+import logging
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional
+
+from requests.exceptions import RequestException
+from src.utils.http_client import RobustHTTPClient
+
+logger = logging.getLogger(__name__)
+
+BASE_DECIMALS = 7
+DEFAULT_CACHE_TTL_SECONDS = 300
+DEFAULT_STALE_TTL_SECONDS = 600
+DEFAULT_REQUEST_TIMEOUT = 10
+
+COINGECKO_URL = "https://api.coingecko.com/api/v3/simple/price"
+COINCAP_URL = "https://api.coincap.io/v2/assets"
+
+SUPPORTED_ASSETS: Dict[str, Dict[str, Any]] = {
+    "XLM": {
+        "coingecko_id": "stellar",
+        "coincap_id": "stellar",
+        "asset_decimals": 7,
+        "asset_issuer": None,
+    },
+    "USDC": {
+        "coingecko_id": "usd-coin",
+        "coincap_id": "usd-coin",
+        "asset_decimals": 6,
+        "asset_issuer": None,
+    },
+}
+
+
+class PriceFetcher:
+    """Fetch current asset prices and prepare adapter-ready payloads."""
+
+    def __init__(
+        self,
+        cache_ttl_seconds: int = DEFAULT_CACHE_TTL_SECONDS,
+        stale_ttl_seconds: int = DEFAULT_STALE_TTL_SECONDS,
+        request_timeout: int = DEFAULT_REQUEST_TIMEOUT,
+    ):
+        self.cache_ttl_seconds = cache_ttl_seconds
+        self.stale_ttl_seconds = stale_ttl_seconds
+        self.request_timeout = request_timeout
+        self.cache: Dict[str, Dict[str, Any]] = {}
+        self.session = RobustHTTPClient()
+
+    def fetch_all_prices(
+        self, asset_codes: Optional[List[str]] = None
+    ) -> List[Dict[str, Any]]:
+        """Fetch prices for supported assets and return adapter-ready values."""
+        asset_codes = asset_codes or list(SUPPORTED_ASSETS.keys())
+        now = datetime.now(timezone.utc)
+        source = "coingecko"
+
+        try:
+            price_map = self._fetch_coingecko(asset_codes)
+        except Exception as primary_error:
+            logger.warning(
+                "Primary price source failed: %s; trying fallback endpoint.",
+                primary_error,
+            )
+            source = "coincap"
+            try:
+                price_map = self._fetch_coincap(asset_codes)
+            except Exception as fallback_error:
+                logger.warning(
+                    "Fallback price source failed: %s; using cached stale values if available.",
+                    fallback_error,
+                )
+                price_map = {}
+                source = "cache"
+
+        results: List[Dict[str, Any]] = []
+        for asset_code in asset_codes:
+            asset_config = SUPPORTED_ASSETS.get(asset_code)
+            if not asset_config:
+                logger.warning("Skipping unsupported asset code: %s", asset_code)
+                continue
+
+            coingecko_id = asset_config["coingecko_id"]
+            price_usd = price_map.get(coingecko_id)
+
+            if price_usd is not None:
+                scaled_price = self._scale_price(price_usd)
+                payload = self._build_price_payload(
+                    asset_code=asset_code,
+                    asset_issuer=asset_config.get("asset_issuer"),
+                    price_usd=price_usd,
+                    scaled_price=scaled_price,
+                    asset_decimals=asset_config["asset_decimals"],
+                    source=source,
+                    timestamp=now,
+                    is_stale=False,
+                )
+                self.cache[asset_code] = {
+                    "payload": payload,
+                    "cached_at": now,
+                }
+                results.append(payload)
+                continue
+
+            stale_payload = self._get_stale_payload(asset_code, now)
+            if stale_payload is not None:
+                results.append(stale_payload)
+                continue
+
+            results.append(
+                {
+                    "asset_code": asset_code,
+                    "asset_issuer": asset_config.get("asset_issuer"),
+                    "success": False,
+                    "error": "price_unavailable",
+                    "source": source,
+                    "is_stale": False,
+                    "timestamp": now.isoformat(),
+                }
+            )
+
+        return results
+
+    def fetch_price(self, asset_code: str) -> Dict[str, Any]:
+        """Fetch the current price for a single asset."""
+        return self.fetch_all_prices([asset_code])[0]
+
+    def _fetch_coingecko(self, asset_codes: List[str]) -> Dict[str, float]:
+        """Fetch usd prices from CoinGecko."""
+        asset_ids = self._asset_ids(asset_codes, key="coingecko_id")
+        response = self.session.get(
+            COINGECKO_URL,
+            params={"ids": ",".join(asset_ids), "vs_currencies": "usd"},
+            timeout=self.request_timeout,
+        )
+        response.raise_for_status()
+        data = response.json()
+        prices: Dict[str, float] = {}
+        for asset_code in asset_codes:
+            asset_id = SUPPORTED_ASSETS[asset_code]["coingecko_id"]
+            asset_data = data.get(asset_id, {})
+            usd_value = asset_data.get("usd")
+            if usd_value is not None:
+                prices[asset_id] = float(usd_value)
+        if not prices:
+            raise RequestException("CoinGecko returned no valid prices")
+        return prices
+
+    def _fetch_coincap(self, asset_codes: List[str]) -> Dict[str, float]:
+        """Fetch usd prices from CoinCap as a fallback."""
+        asset_ids = self._asset_ids(asset_codes, key="coincap_id")
+        response = self.session.get(
+            COINCAP_URL,
+            params={"ids": ",".join(asset_ids)},
+            timeout=self.request_timeout,
+        )
+        response.raise_for_status()
+        data = response.json()
+        prices: Dict[str, float] = {}
+        for item in data.get("data", []):
+            asset_id = item.get("id")
+            price_usd = item.get("priceUsd")
+            if asset_id and price_usd:
+                prices[asset_id] = float(price_usd)
+        if not prices:
+            raise RequestException("CoinCap returned no valid prices")
+        return prices
+
+    def _scale_price(self, price_usd: float) -> int:
+        return int(round(price_usd * (10**BASE_DECIMALS)))
+
+    def _build_price_payload(
+        self,
+        asset_code: str,
+        asset_issuer: Optional[str],
+        price_usd: float,
+        scaled_price: int,
+        asset_decimals: int,
+        source: str,
+        timestamp: datetime,
+        is_stale: bool,
+    ) -> Dict[str, Any]:
+        return {
+            "asset_code": asset_code,
+            "asset_issuer": asset_issuer,
+            "price_usd": price_usd,
+            "price": scaled_price,
+            "asset_decimals": asset_decimals,
+            "base_decimals": BASE_DECIMALS,
+            "source": source,
+            "timestamp": timestamp.isoformat(),
+            "is_stale": is_stale,
+            "success": True,
+        }
+
+    def _asset_ids(self, asset_codes: List[str], key: str) -> List[str]:
+        return [SUPPORTED_ASSETS[asset_code][key] for asset_code in asset_codes]
+
+    def _get_stale_payload(
+        self, asset_code: str, now: datetime
+    ) -> Optional[Dict[str, Any]]:
+        cached = self.cache.get(asset_code)
+        if not cached:
+            return None
+        age = (now - cached["cached_at"]).total_seconds()
+        if age > self.stale_ttl_seconds:
+            logger.warning(
+                "Cached price for %s is stale (%.0fs old), discarding.",
+                asset_code,
+                age,
+            )
+            return None
+        payload = cached["payload"].copy()
+        payload["is_stale"] = True
+        payload["source"] = "cache"
+        payload["timestamp"] = now.isoformat()
+        return payload
diff --git a/temp_backup/src/ingestion/run_ingestion_quality_checks.py b/temp_backup/src/ingestion/run_ingestion_quality_checks.py
new file mode 100644
index 00000000..3431f90d
--- /dev/null
+++ b/temp_backup/src/ingestion/run_ingestion_quality_checks.py
@@ -0,0 +1,28 @@
+"""CLI wrapper to run Stellar ingestion quality checks.
+
+This exists so scheduler/API can invoke a stable entrypoint.
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+
+# Ensure local imports work when executed from repo root or app root.
+HERE = os.path.dirname(__file__)
+# apps/data-processing/src needs to be on sys.path so `import ingestion...` works.
+# The project uses both import styles; this script uses the direct package under src.
+SRC_ROOT = os.path.abspath(os.path.join(HERE, ".."))
+sys.path.insert(0, SRC_ROOT)
+
+from ingestion.stellar_ingestion_checks import main
+
+
+
+if __name__ == "__main__":
+    raise SystemExit(
+        main(
+            argv=None
+        )
+    )
+
diff --git a/temp_backup/src/ingestion/social_fetcher.py b/temp_backup/src/ingestion/social_fetcher.py
new file mode 100644
index 00000000..f300f3de
--- /dev/null
+++ b/temp_backup/src/ingestion/social_fetcher.py
@@ -0,0 +1,741 @@
+"""
+Social Media Fetcher Service for cryptocurrency sentiment analysis.
+Fetches data from Twitter/X and Reddit APIs with proper rate limiting.
+"""
+
+import json
+import logging
+import math
+import os
+import re
+import time
+from dataclasses import asdict, dataclass
+from datetime import datetime, timezone
+from enum import Enum
+from typing import Dict, List, Optional
+
+from requests.exceptions import RequestException
+from src.utils.http_client import RobustHTTPClient
+from src.utils.translator import translate_and_normalize
+
+logger = logging.getLogger(__name__)
+
+
+class SocialPlatform(Enum):
+    """Supported social media platforms"""
+
+    TWITTER = "twitter"
+    REDDIT = "reddit"
+
+
+@dataclass
+class SocialPost:
+    """
+    Standardized social media post format.
+    Normalizes data from different platforms (Twitter/X, Reddit).
+    """
+
+    id: str
+    platform: str
+    content: str
+    author: str
+    posted_at: datetime
+    url: str
+    # Engagement metrics
+    likes: int = 0
+    comments: int = 0
+    shares: int = 0
+    # Sentiment-related
+    sentiment_score: Optional[float] = None
+    # Platform-specific metadata
+    hashtags: Optional[List[str]] = None
+    subreddit: Optional[str] = None
+    # Tracking
+    fetched_at: datetime = None
+
+    def __post_init__(self):
+        if self.fetched_at is None:
+            self.fetched_at = datetime.now(timezone.utc)
+        if self.hashtags is None:
+            self.hashtags = []
+
+    def to_dict(self) -> Dict:
+        """Convert to dictionary with serialized datetimes"""
+        data = asdict(self)
+        data["posted_at"] = self.posted_at.isoformat()
+        data["fetched_at"] = self.fetched_at.isoformat() if self.fetched_at else None
+        data["platform"] = self.platform
+        return data
+
+    def to_news_article_format(self) -> Dict:
+        """
+        Convert to NewsArticle-compatible format for sentiment pipeline.
+        Allows social posts to flow through the existing sentiment analysis.
+        """
+        return {
+            "id": f"social_{self.platform}_{self.id}",
+            "title": (
+                self.content[:100] + "..." if len(self.content) > 100 else self.content
+            ),
+            "content": self.content,
+            "summary": self.content[:200] if len(self.content) > 200 else self.content,
+            "source": f"{self.platform.title()} - {self.subreddit or 'feed'}",
+            "url": self.url,
+            "published_at": self.posted_at.isoformat(),
+            "categories": self.hashtags or [],
+            "tags": self.hashtags or [],
+            "platform": self.platform,
+            "author": self.author,
+            "engagement": {
+                "likes": self.likes,
+                "comments": self.comments,
+                "shares": self.shares,
+            },
+        }
+
+
+class SocialAPIConfig:
+    """Configuration for social media APIs"""
+
+    # Twitter/X API v2 endpoints
+    TWITTER_BASE_URL = "https://api.twitter.com/2"
+    TWITTER_SEARCH_ENDPOINT = "/tweets/search/recent"
+
+    # Reddit API endpoints (using JSON feed - no auth required for public subreddits)
+    REDDIT_BASE_URL = "https://www.reddit.com"
+    REDDIT_SUBREDDIT_ENDPOINT = "/r/{subreddit}/new.json"
+    REDDIT_SEARCH_ENDPOINT = "/search.json"
+
+    # Rate limiting (per platform)
+    # Twitter: 450 requests/15min = 30/min for app auth
+    TWITTER_RATE_LIMIT_DELAY = 2.0  # 2 seconds between requests (conservative)
+    TWITTER_REQUESTS_PER_WINDOW = 450
+    TWITTER_WINDOW_SECONDS = 900  # 15 minutes
+
+    # Reddit: 60 requests/minute
+    REDDIT_RATE_LIMIT_DELAY = 1.0  # 1 second between requests
+    REDDIT_REQUESTS_PER_MINUTE = 60
+
+    # Common settings
+    MAX_RETRIES = 3
+    TIMEOUT = 15
+    RETRY_BACKOFF_BASE = 2
+
+    # Target hashtags and subreddits for Stellar ecosystem
+    DEFAULT_HASHTAGS = ["#Stellar", "#Soroban", "#XLM", "#StellarLumen", "#DeFi"]
+    DEFAULT_SUBREDDITS = ["Stellar", "StellarLumen", "Soroban", "CryptoCurrency"]
+
+
+class RateLimiter:
+    """
+    Token bucket rate limiter for API requests.
+    Ensures we stay within API tier limits.
+    """
+
+    def __init__(
+        self, requests_per_window: int, window_seconds: int, min_delay: float = 0
+    ):
+        """
+        Initialize rate limiter.
+
+        Args:
+            requests_per_window: Maximum requests allowed in the time window
+            window_seconds: Time window in seconds
+            min_delay: Minimum delay between requests (additional throttle)
+        """
+        self.requests_per_window = requests_per_window
+        self.window_seconds = window_seconds
+        self.min_delay = min_delay
+        self.request_times: List[float] = []
+        self.last_request_time = 0
+
+    def wait_if_needed(self) -> float:
+        """
+        Wait if necessary to respect rate limits.
+
+        Returns:
+            Time waited in seconds
+        """
+        current_time = time.time()
+        waited = 0.0
+
+        # Ensure minimum delay between requests
+        time_since_last = current_time - self.last_request_time
+        if time_since_last < self.min_delay:
+            wait_time = self.min_delay - time_since_last
+            time.sleep(wait_time)
+            waited += wait_time
+
+        # Clean old requests from tracking
+        cutoff_time = current_time - self.window_seconds
+        self.request_times = [t for t in self.request_times if t > cutoff_time]
+
+        # Check if we're at the rate limit
+        if len(self.request_times) >= self.requests_per_window:
+            # Wait until oldest request exits the window
+            oldest = self.request_times[0]
+            wait_until = oldest + self.window_seconds
+            wait_time = wait_until - current_time
+            if wait_time > 0:
+                time.sleep(wait_time)
+                waited += wait_time
+                # Clean again after waiting
+                self.request_times = [
+                    t
+                    for t in self.request_times
+                    if t > time.time() - self.window_seconds
+                ]
+
+        # Record this request
+        self.last_request_time = time.time()
+        self.request_times.append(self.last_request_time)
+
+        return waited
+
+
+class TwitterFetcher:
+    """
+    Fetches tweets from Twitter/X API v2.
+    Requires Bearer Token for API access.
+    """
+
+    def __init__(self, bearer_token: Optional[str] = None):
+        """
+        Initialize Twitter fetcher.
+
+        Args:
+            bearer_token: Twitter API Bearer Token (can be from env TWITTER_BEARER_TOKEN)
+        """
+        self.bearer_token = bearer_token or os.getenv("TWITTER_BEARER_TOKEN")
+        if not self.bearer_token:
+            logger.warning(
+                "TWITTER_BEARER_TOKEN not set. Twitter fetching will be disabled."
+            )
+
+        self.session = RobustHTTPClient()
+        self.session.headers.update({"Authorization": f"Bearer {self.bearer_token}"})
+
+        self.rate_limiter = RateLimiter(
+            SocialAPIConfig.TWITTER_REQUESTS_PER_WINDOW,
+            SocialAPIConfig.TWITTER_WINDOW_SECONDS,
+            SocialAPIConfig.TWITTER_RATE_LIMIT_DELAY,
+        )
+
+        self.enabled = bool(self.bearer_token)
+
+    def fetch_hashtag(
+        self, hashtag: str, limit: int = 50, since_id: Optional[str] = None
+    ) -> List[SocialPost]:
+        """
+        Fetch recent tweets containing a hashtag.
+
+        Args:
+            hashtag: Hashtag to search (with or without #)
+            limit: Maximum tweets to return
+            since_id: Fetch tweets newer than this ID
+
+        Returns:
+            List of SocialPost objects
+        """
+        if not self.enabled:
+            logger.warning("Twitter API not configured. Skipping Twitter fetch.")
+            return []
+
+        posts = []
+
+        # Normalize hashtag
+        query = hashtag if hashtag.startswith("#") else f"#{hashtag}"
+        query = f"{query} -is:retweet (lang:en OR lang:es OR lang:pt)"  # Exclude retweets, English/Spanish/Portuguese
+
+        params = {
+            "query": query,
+            "max_results": min(limit, 100),  # Twitter max is 100 per request
+            "tweet.fields": "created_at,public_metrics,entities,author_id",
+            "expansions": "author_id",
+            "user.fields": "username,name",
+        }
+
+        if since_id:
+            params["since_id"] = since_id
+
+        try:
+            self.rate_limiter.wait_if_needed()
+
+            response = self.session.get(
+                f"{SocialAPIConfig.TWITTER_BASE_URL}{SocialAPIConfig.TWITTER_SEARCH_ENDPOINT}",
+                params=params,
+                timeout=SocialAPIConfig.TIMEOUT,
+            )
+
+            if response.status_code == 429:
+                logger.warning("Twitter rate limit exceeded. Waiting...")
+                # Get reset time from header
+                reset_time = int(
+                    response.headers.get("x-rate-limit-reset", time.time() + 900)
+                )
+                wait_seconds = reset_time - time.time()
+                if wait_seconds > 0:
+                    time.sleep(wait_seconds)
+                return self.fetch_hashtag(hashtag, limit, since_id)
+
+            response.raise_for_status()
+            data = response.json()
+
+            # Parse tweets
+            includes = data.get("includes", {})
+            users_map = {u["id"]: u for u in includes.get("users", [])}
+
+            for tweet in data.get("data", [])[:limit]:
+                author_id = tweet.get("author_id", "")
+                user = users_map.get(author_id, {})
+                metrics = tweet.get("public_metrics", {})
+
+                # Extract hashtags
+                entities = tweet.get("entities", {})
+                hashtags = [f"#{tag['tag']}" for tag in entities.get("hashtags", [])]
+
+                post = SocialPost(
+                    id=tweet["id"],
+                    platform=SocialPlatform.TWITTER.value,
+                    content=translate_and_normalize(tweet.get("text", "")),
+                    author=user.get("username", "unknown"),
+                    posted_at=datetime.fromisoformat(
+                        tweet["created_at"].replace("Z", "+00:00")
+                    ),
+                    url=f"https://twitter.com/user/status/{tweet['id']}",
+                    likes=metrics.get("like_count", 0),
+                    comments=metrics.get("reply_count", 0),
+                    shares=metrics.get("retweet_count", 0),
+                    hashtags=hashtags,
+                )
+                posts.append(post)
+
+            logger.info(f"Fetched {len(posts)} tweets for {hashtag}")
+
+        except RequestException as e:
+            logger.error(f"Error fetching Twitter data for {hashtag}: {e}")
+        except (KeyError, json.JSONDecodeError) as e:
+            logger.error(f"Error parsing Twitter response: {e}")
+
+        return posts
+
+    def fetch_multiple_hashtags(
+        self, hashtags: List[str] = None, limit_per_hashtag: int = 25
+    ) -> List[SocialPost]:
+        """
+        Fetch tweets for multiple hashtags.
+
+        Args:
+            hashtags: List of hashtags to search
+            limit_per_hashtag: Max tweets per hashtag
+
+        Returns:
+            Combined list of SocialPosts
+        """
+        hashtags = hashtags or SocialAPIConfig.DEFAULT_HASHTAGS
+        all_posts = []
+
+        for hashtag in hashtags:
+            posts = self.fetch_hashtag(hashtag, limit=limit_per_hashtag)
+            all_posts.extend(posts)
+            # Small delay between different hashtag searches
+            time.sleep(0.5)
+
+        return all_posts
+
+    def close(self):
+        """Close the session"""
+        self.session.close()
+
+
+class RedditFetcher:
+    """
+    Fetches posts from Reddit.
+    Uses public JSON API (no auth required for public subreddits).
+    """
+
+    def __init__(self):
+        """Initialize Reddit fetcher"""
+        self.session = RobustHTTPClient()
+        self.session.headers.update(
+            {
+                "User-Agent": "LumenPulseSentimentBot/1.0 (cryptocurrency sentiment analysis)"
+            }
+        )
+
+        self.rate_limiter = RateLimiter(
+            SocialAPIConfig.REDDIT_REQUESTS_PER_MINUTE,
+            60,
+            SocialAPIConfig.REDDIT_RATE_LIMIT_DELAY,
+        )
+
+    def fetch_subreddit(
+        self, subreddit: str, limit: int = 50, after: Optional[str] = None
+    ) -> List[SocialPost]:
+        """
+        Fetch recent posts from a subreddit.
+
+        Args:
+            subreddit: Subreddit name (without r/)
+            limit: Maximum posts to return
+            after: Reddit fullname to fetch posts after
+
+        Returns:
+            List of SocialPost objects
+        """
+        posts = []
+
+        url = f"{SocialAPIConfig.REDDIT_BASE_URL}{SocialAPIConfig.REDDIT_SUBREDDIT_ENDPOINT.format(subreddit=subreddit)}"
+
+        params = {"limit": min(limit, 100)}
+        if after:
+            params["after"] = after
+
+        try:
+            self.rate_limiter.wait_if_needed()
+
+            response = self.session.get(
+                url, params=params, timeout=SocialAPIConfig.TIMEOUT
+            )
+
+            if response.status_code == 429:
+                logger.warning("Reddit rate limit exceeded. Waiting...")
+                time.sleep(60)
+                return self.fetch_subreddit(subreddit, limit, after)
+
+            response.raise_for_status()
+            data = response.json()
+
+            # Parse posts
+            for child in data.get("data", {}).get("children", [])[:limit]:
+                post_data = child.get("data", {})
+
+                post = SocialPost(
+                    id=post_data.get("id", ""),
+                    platform=SocialPlatform.REDDIT.value,
+                    content=translate_and_normalize(
+                        post_data.get("selftext", "") or post_data.get("title", "")
+                    ),
+                    author=post_data.get("author", "[deleted]"),
+                    posted_at=datetime.fromtimestamp(
+                        post_data.get("created_utc", time.time()), tz=timezone.utc
+                    ),
+                    url=f"https://reddit.com{post_data.get('permalink', '')}",
+                    likes=post_data.get("ups", 0),
+                    comments=post_data.get("num_comments", 0),
+                    shares=post_data.get("num_crossposts", 0),
+                    subreddit=post_data.get("subreddit", subreddit),
+                    hashtags=self._extract_hashtags(post_data),
+                )
+                posts.append(post)
+
+            logger.info(f"Fetched {len(posts)} posts from r/{subreddit}")
+
+        except RequestException as e:
+            logger.error(f"Error fetching Reddit data from r/{subreddit}: {e}")
+        except (KeyError, json.JSONDecodeError) as e:
+            logger.error(f"Error parsing Reddit response: {e}")
+
+        return posts
+
+    def fetch_search(
+        self, query: str, subreddits: List[str] = None, limit: int = 50
+    ) -> List[SocialPost]:
+        """
+        Search Reddit for specific terms.
+
+        Args:
+            query: Search query
+            subreddits: Restrict to these subreddits
+            limit: Maximum results
+
+        Returns:
+            List of SocialPost objects
+        """
+        posts = []
+
+        params = {"q": query, "limit": min(limit, 100), "sort": "new", "type": "link"}
+
+        if subreddits:
+            params["restrict_sr"] = True
+            params["sr"] = ",".join(subreddits)
+
+        try:
+            self.rate_limiter.wait_if_needed()
+
+            response = self.session.get(
+                f"{SocialAPIConfig.REDDIT_BASE_URL}{SocialAPIConfig.REDDIT_SEARCH_ENDPOINT}",
+                params=params,
+                timeout=SocialAPIConfig.TIMEOUT,
+            )
+
+            response.raise_for_status()
+            data = response.json()
+
+            for child in data.get("data", {}).get("children", [])[:limit]:
+                post_data = child.get("data", {})
+
+                post = SocialPost(
+                    id=post_data.get("id", ""),
+                    platform=SocialPlatform.REDDIT.value,
+                    content=translate_and_normalize(
+                        post_data.get("selftext", "") or post_data.get("title", "")
+                    ),
+                    author=post_data.get("author", "[deleted]"),
+                    posted_at=datetime.fromtimestamp(
+                        post_data.get("created_utc", time.time()), tz=timezone.utc
+                    ),
+                    url=f"https://reddit.com{post_data.get('permalink', '')}",
+                    likes=post_data.get("ups", 0),
+                    comments=post_data.get("num_comments", 0),
+                    shares=post_data.get("num_crossposts", 0),
+                    subreddit=post_data.get("subreddit", ""),
+                )
+                posts.append(post)
+
+            logger.info(f"Fetched {len(posts)} Reddit posts for query: {query}")
+
+        except RequestException as e:
+            logger.error(f"Error searching Reddit: {e}")
+        except (KeyError, json.JSONDecodeError) as e:
+            logger.error(f"Error parsing Reddit search response: {e}")
+
+        return posts
+
+    def fetch_multiple_subreddits(
+        self, subreddits: List[str] = None, limit_per_subreddit: int = 25
+    ) -> List[SocialPost]:
+        """
+        Fetch posts from multiple subreddits.
+
+        Args:
+            subreddits: List of subreddit names
+            limit_per_subreddit: Max posts per subreddit
+
+        Returns:
+            Combined list of SocialPosts
+        """
+        subreddits = subreddits or SocialAPIConfig.DEFAULT_SUBREDDITS
+        all_posts = []
+
+        for subreddit in subreddits:
+            posts = self.fetch_subreddit(subreddit, limit=limit_per_subreddit)
+            all_posts.extend(posts)
+            # Small delay between subreddit fetches
+            time.sleep(0.5)
+
+        return all_posts
+
+    def _extract_hashtags(self, post_data: Dict) -> List[str]:
+        """Extract hashtags from Reddit post title and body"""
+        hashtags = []
+        text = f"{post_data.get('title', '')} {post_data.get('selftext', '')}"
+
+        # Simple hashtag extraction
+        hashtags = re.findall(r"#\w+", text)
+
+        # Also add link flair as hashtag
+        if post_data.get("link_flair_text"):
+            hashtags.append(f"#{post_data['link_flair_text'].replace(' ', '')}")
+
+        return list(set(hashtags))
+
+    def close(self):
+        """Close the session"""
+        self.session.close()
+
+
+class SocialFetcher:
+    """
+    Main social media fetcher that coordinates Twitter and Reddit fetching.
+    Provides a unified interface for collecting social sentiment data.
+    """
+
+    def __init__(
+        self,
+        use_twitter: bool = True,
+        use_reddit: bool = True,
+        twitter_token: Optional[str] = None,
+    ):
+        """
+        Initialize SocialFetcher.
+
+        Args:
+            use_twitter: Enable Twitter/X fetching
+            use_reddit: Enable Reddit fetching
+            twitter_token: Twitter Bearer Token (optional, uses env)
+        """
+        self.use_twitter = use_twitter
+        self.use_reddit = use_reddit
+
+        # Initialize fetchers
+        self.twitter = (
+            TwitterFetcher(bearer_token=twitter_token) if use_twitter else None
+        )
+        self.reddit = RedditFetcher() if use_reddit else None
+
+        # Deduplication tracking
+        self.seen_post_ids: set = set()
+
+    def fetch_all(
+        self,
+        hashtags: List[str] = None,
+        subreddits: List[str] = None,
+        limit_per_source: int = 25,
+    ) -> List[Dict]:
+        """
+        Fetch social posts from all configured sources.
+
+        Args:
+            hashtags: Twitter hashtags to search
+            subreddits: Reddit subreddits to fetch
+            limit_per_source: Max posts per source/hashtag/subreddit
+
+        Returns:
+            List of normalized post dictionaries
+        """
+        all_posts = []
+
+        # Fetch from Twitter
+        if self.twitter and self.use_twitter:
+            twitter_posts = self.twitter.fetch_multiple_hashtags(
+                hashtags=hashtags, limit_per_hashtag=limit_per_source
+            )
+            all_posts.extend(twitter_posts)
+
+        # Fetch from Reddit
+        if self.reddit and self.use_reddit:
+            reddit_posts = self.reddit.fetch_multiple_subreddits(
+                subreddits=subreddits, limit_per_subreddit=limit_per_source
+            )
+            all_posts.extend(reddit_posts)
+
+        # Deduplicate and sort by date
+        unique_posts = []
+        for post in all_posts:
+            post_id = f"{post.platform}_{post.id}"
+            if post_id not in self.seen_post_ids:
+                self.seen_post_ids.add(post_id)
+                unique_posts.append(post)
+
+        # Sort by posted_at (newest first)
+        unique_posts.sort(key=lambda p: p.posted_at, reverse=True)
+
+        logger.info(f"Total unique social posts: {len(unique_posts)}")
+
+        return [post.to_dict() for post in unique_posts]
+
+    def fetch_as_articles(
+        self,
+        hashtags: List[str] = None,
+        subreddits: List[str] = None,
+        limit_per_source: int = 25,
+    ) -> List[Dict]:
+        """
+        Fetch posts in NewsArticle-compatible format.
+        Useful for feeding into existing sentiment analysis pipeline.
+
+        Args:
+            hashtags: Twitter hashtags to search
+            subreddits: Reddit subreddits to fetch
+            limit_per_source: Max posts per source
+
+        Returns:
+            List of posts in article-compatible format
+        """
+        posts = self.fetch_all(
+            hashtags=hashtags, subreddits=subreddits, limit_per_source=limit_per_source
+        )
+
+        return [
+            SocialPost(
+                id=p["id"],
+                platform=p["platform"],
+                content=p["content"],
+                author=p["author"],
+                posted_at=datetime.fromisoformat(p["posted_at"].replace("Z", "+00:00")),
+                url=p["url"],
+                likes=p.get("likes", 0),
+                comments=p.get("comments", 0),
+                shares=p.get("shares", 0),
+                hashtags=p.get("hashtags", []),
+                subreddit=p.get("subreddit"),
+            ).to_news_article_format()
+            for p in posts
+        ]
+
+    def get_sentiment_weight(self, post: SocialPost) -> float:
+        """
+        Calculate sentiment weight based on engagement.
+        Higher engagement = more weight for sentiment scoring.
+
+        Args:
+            post: SocialPost to weight
+
+        Returns:
+            Weight multiplier for sentiment scoring
+        """
+        # Base weight
+        weight = 1.0
+
+        # Engagement bonus (logarithmic scaling)
+        total_engagement = post.likes + (post.comments * 2) + (post.shares * 3)
+        if total_engagement > 0:
+            weight += math.log10(total_engagement + 1) / 2  # Max ~0.5 bonus
+
+        # Platform-specific weights
+        if post.platform == SocialPlatform.REDDIT.value:
+            # Reddit tends to have more detailed analysis
+            weight *= 1.2
+
+        return min(weight, 3.0)  # Cap at 3x
+
+    def clear_cache(self):
+        """Clear the seen post cache"""
+        self.seen_post_ids.clear()
+
+    def close(self):
+        """Close all fetcher sessions"""
+        if self.twitter:
+            self.twitter.close()
+        if self.reddit:
+            self.reddit.close()
+
+
+# Convenience function for easy usage
+def fetch_social(
+    hashtags: List[str] = None,
+    subreddits: List[str] = None,
+    limit_per_source: int = 25,
+    use_twitter: bool = True,
+    use_reddit: bool = True,
+) -> List[Dict]:
+    """
+    Convenience function to fetch social posts.
+
+    Example:
+        posts = fetch_social(
+            hashtags=["#Stellar", "#Soroban"],
+            subreddits=["Stellar"],
+            limit_per_source=10
+        )
+        for post in posts:
+            print(f"{post['platform']}: {post['content'][:50]}...")
+
+    Args:
+        hashtags: Twitter hashtags to search
+        subreddits: Reddit subreddits to fetch
+        limit_per_source: Max posts per source
+        use_twitter: Enable Twitter fetching
+        use_reddit: Enable Reddit fetching
+
+    Returns:
+        List of social post dictionaries
+    """
+    fetcher = SocialFetcher(use_twitter=use_twitter, use_reddit=use_reddit)
+    try:
+        return fetcher.fetch_all(
+            hashtags=hashtags, subreddits=subreddits, limit_per_source=limit_per_source
+        )
+    finally:
+        fetcher.close()
diff --git a/temp_backup/src/ingestion/soroban_event_indexer.py b/temp_backup/src/ingestion/soroban_event_indexer.py
new file mode 100644
index 00000000..41bbf0f6
--- /dev/null
+++ b/temp_backup/src/ingestion/soroban_event_indexer.py
@@ -0,0 +1,267 @@
+"""
+Soroban Event Indexer for incremental sync
+Polls Soroban RPC for new events and sends them to backend for processing
+"""
+
+import os
+import time
+import json
+import logging
+from pathlib import Path
+from datetime import datetime, timezone
+import requests
+from typing import List, Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+class SorobanEventIndexer:
+    def __init__(
+        self,
+        rpc_url: str,
+        backend_url: str,
+        ingest_secret: str,
+        contract_ids: Optional[List[str]] = None,
+        state_file: str = "./data/soroban_indexer_state.json",
+        poll_interval: int = 30
+    ):
+        self.rpc_url = rpc_url
+        self.backend_url = backend_url
+        self.ingest_secret = ingest_secret
+        self.contract_ids = contract_ids or []
+        self.state_file = Path(state_file)
+        self.poll_interval = poll_interval
+        self.last_ledger: int = self._load_last_ledger()
+
+    def _load_last_ledger(self) -> int:
+        """Load last processed ledger from state file"""
+        if self.state_file.exists():
+            try:
+                with open(self.state_file, 'r') as f:
+                    state = json.load(f)
+                    return state.get("last_ledger", 0)
+            except (json.JSONDecodeError, KeyError):
+                logger.warning("Failed to load state file, starting from ledger 0")
+        return 0
+
+    def _save_last_ledger(self, ledger: int):
+        """Save last processed ledger to state file"""
+        self.state_file.parent.mkdir(parents=True, exist_ok=True)
+        with open(self.state_file, 'w') as f:
+            json.dump({"last_ledger": ledger, "timestamp": datetime.now(timezone.utc).isoformat()}, f)
+        self.last_ledger = ledger
+
+    def fetch_latest_ledger(self) -> int:
+        """Get the latest ledger sequence from Soroban RPC"""
+        payload = {
+            "jsonrpc": "2.0",
+            "id": 1,
+            "method": "getLatestLedger"
+        }
+        
+        try:
+            response = requests.post(self.rpc_url, json=payload, timeout=30)
+            response.raise_for_status()
+            data = response.json()
+            return int(data.get("result", {}).get("sequence", 0))
+        except Exception as e:
+            logger.error(f"Failed to fetch latest ledger: {e}")
+            raise
+
+    def fetch_events_since(self, start_ledger: int) -> List[Dict]:
+        """Fetch events from Soroban RPC starting at the given ledger"""
+        all_events = []
+        cursor = None
+
+        while True:
+            filters = []
+            if self.contract_ids:
+                filters.append({
+                    "type": "contract",
+                    "contractIds": self.contract_ids
+                })
+
+            payload = {
+                "jsonrpc": "2.0",
+                "id": 1,
+                "method": "getEvents",
+                "params": {
+                    "startLedger": start_ledger,
+                    "filters": filters,
+                    "pagination": {
+                        "limit": 100
+                    }
+                }
+            }
+            
+            if cursor:
+                payload["params"]["pagination"]["cursor"] = cursor
+
+            try:
+                response = requests.post(self.rpc_url, json=payload, timeout=30)
+                response.raise_for_status()
+                data = response.json()
+            except Exception as e:
+                logger.error(f"RPC Request failed: {e}")
+                raise
+
+            if "error" in data:
+                logger.error(f"RPC Error: {data['error']}")
+                raise RuntimeError(f"RPC Error: {data['error']}")
+
+            events = data.get("result", {}).get("events", [])
+            all_events.extend(events)
+
+            # Check if we need to paginate
+            if len(events) < 100:
+                break
+
+            # Get cursor from last event
+            if events:
+                cursor = events[-1].get("pagingToken")
+            
+            if not cursor:
+                break
+
+            time.sleep(0.5)  # Rate limiting
+
+        return all_events
+
+    def send_event_to_backend(self, event: Dict, event_index: int) -> bool:
+        """Send a single event to the backend ingest endpoint"""
+        tx_hash = event.get("transactionHash", "")
+        ledger_sequence = int(event.get("ledger", 0))
+        contract_id = event.get("contractId")
+        event_type = event.get("type")
+        raw_payload = event
+
+        ingest_payload = {
+            "txHash": tx_hash,
+            "eventIndex": event_index,
+            "ledgerSequence": ledger_sequence,
+            "contractId": contract_id,
+            "eventType": event_type,
+            "rawPayload": raw_payload
+        }
+
+        headers = {
+            "Content-Type": "application/json",
+            "x-ingest-secret": self.ingest_secret
+        }
+
+        try:
+            response = requests.post(
+                f"{self.backend_url}/soroban-events/ingest",
+                json=ingest_payload,
+                headers=headers,
+                timeout=30
+            )
+            response.raise_for_status()
+            logger.debug(f"Successfully sent event {tx_hash}:{event_index} to backend")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to send event {tx_hash}:{event_index} to backend: {e}")
+            return False
+
+    def run_once(self) -> Dict:
+        """Run one iteration of the indexer"""
+        logger.info("=" * 60)
+        logger.info("SOROBAN EVENT INDEXER - INCREMENTAL SYNC")
+        logger.info("=" * 60)
+        
+        try:
+            latest_ledger = self.fetch_latest_ledger()
+            logger.info(f"Latest ledger: {latest_ledger}")
+            logger.info(f"Last processed ledger: {self.last_ledger}")
+
+            if latest_ledger <= self.last_ledger:
+                logger.info("No new ledgers to process")
+                return {"status": "no_new_ledgers", "events_processed": 0}
+
+            start_ledger = self.last_ledger + 1
+            logger.info(f"Fetching events from ledger {start_ledger} to {latest_ledger}")
+            
+            events = self.fetch_events_since(start_ledger)
+            logger.info(f"Found {len(events)} new events")
+
+            # Send events to backend
+            sent_count = 0
+            failed_count = 0
+            highest_ledger = self.last_ledger
+
+            for idx, event in enumerate(events):
+                success = self.send_event_to_backend(event, idx)
+                if success:
+                    sent_count += 1
+                else:
+                    failed_count += 1
+                
+                # Update highest ledger seen
+                event_ledger = int(event.get("ledger", 0))
+                if event_ledger > highest_ledger:
+                    highest_ledger = event_ledger
+
+            # Update state to the highest ledger processed
+            self._save_last_ledger(highest_ledger)
+
+            logger.info(f"Sent {sent_count} events to backend, {failed_count} failed")
+            logger.info(f"Updated last processed ledger to {highest_ledger}")
+            logger.info("=" * 60)
+            
+            return {
+                "status": "success",
+                "events_found": len(events),
+                "events_sent": sent_count,
+                "events_failed": failed_count,
+                "last_ledger": highest_ledger
+            }
+
+        except Exception as e:
+            logger.error(f"Error in indexer run: {e}", exc_info=True)
+            return {"status": "error", "error": str(e)}
+
+    def run_forever(self):
+        """Run the indexer continuously, polling for new events"""
+        logger.info("Starting Soroban event indexer (continuous mode)")
+        logger.info(f"Poll interval: {self.poll_interval} seconds")
+        
+        while True:
+            self.run_once()
+            time.sleep(self.poll_interval)
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Soroban Event Indexer")
+    parser.add_argument("--rpc-url", type=str, default=os.getenv("SOROBAN_RPC_URL", "https://soroban-testnet.stellar.org"), help="Soroban RPC URL")
+    parser.add_argument("--backend-url", type=str, default=os.getenv("BACKEND_URL", "http://localhost:3000"), help="Backend API URL")
+    parser.add_argument("--ingest-secret", type=str, default=os.getenv("SOROBAN_INGEST_SECRET", ""), help="Secret for backend ingest endpoint")
+    parser.add_argument("--contract-ids", nargs="*", default=os.getenv("SOROBAN_CONTRACT_IDS", "").split(","), help="List of contract IDs to index (comma-separated)")
+    parser.add_argument("--state-file", type=str, default="./data/soroban_indexer_state.json", help="Path to state file")
+    parser.add_argument("--poll-interval", type=int, default=30, help="Poll interval in seconds")
+    parser.add_argument("--once", action="store_true", help="Run once and exit")
+    
+    args = parser.parse_args()
+
+    # Clean up contract ids
+    contract_ids = [cid.strip() for cid in args.contract_ids if cid.strip()]
+
+    indexer = SorobanEventIndexer(
+        rpc_url=args.rpc_url,
+        backend_url=args.backend_url,
+        ingest_secret=args.ingest_secret,
+        contract_ids=contract_ids,
+        state_file=args.state_file,
+        poll_interval=args.poll_interval
+    )
+
+    if args.once:
+        indexer.run_once()
+    else:
+        indexer.run_forever()
+
+if __name__ == "__main__":
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - %(levelname)s - %(message)s"
+    )
+    main()
diff --git a/temp_backup/src/ingestion/stellar_fetcher.py b/temp_backup/src/ingestion/stellar_fetcher.py
new file mode 100644
index 00000000..16460a30
--- /dev/null
+++ b/temp_backup/src/ingestion/stellar_fetcher.py
@@ -0,0 +1,565 @@
+"""
+Stellar Blockchain Data Fetcher
+Fetches historical transaction and volume data from Stellar Horizon API.
+"""
+
+import time
+from typing import Dict, List, Optional, Tuple, Any
+from datetime import datetime, timedelta
+from dataclasses import dataclass
+import json
+from stellar_sdk import Server, Asset
+from stellar_sdk.exceptions import NotFoundError, BadRequestError, ConnectionError
+from stellar_sdk.call_builder.call_builder_async import PaymentsCallBuilder
+
+
+@dataclass
+class VolumeData:
+    """Volume data for a specific asset over a time period"""
+
+    asset_code: str
+    asset_issuer: Optional[str]
+    time_period_hours: int
+    total_volume: float
+    transaction_count: int
+    start_time: datetime
+    end_time: datetime
+    volume_by_hour: Dict[str, float]  # hour -> volume
+
+    def to_dict(self) -> Dict:
+        """Convert to dictionary with serialized datetime"""
+        return {
+            "asset_code": self.asset_code,
+            "asset_issuer": self.asset_issuer,
+            "time_period_hours": self.time_period_hours,
+            "total_volume": self.total_volume,
+            "transaction_count": self.transaction_count,
+            "start_time": self.start_time.isoformat(),
+            "end_time": self.end_time.isoformat(),
+            "volume_by_hour": self.volume_by_hour,
+            "average_hourly_volume": (
+                self.total_volume / self.time_period_hours
+                if self.time_period_hours > 0
+                else 0
+            ),
+        }
+
+
+@dataclass
+class TransactionRecord:
+    """Individual transaction record"""
+
+    id: str
+    hash: str
+    created_at: datetime
+    source_account: str
+    operation_count: int
+    total_amount: float
+    fee_charged: float
+    memo: Optional[str]
+    successful: bool
+
+    def to_dict(self) -> Dict:
+        """Convert to dictionary"""
+        return {
+            "id": self.id,
+            "hash": self.hash,
+            "created_at": self.created_at.isoformat(),
+            "source_account": self.source_account,
+            "operation_count": self.operation_count,
+            "total_amount": self.total_amount,
+            "fee_charged": self.fee_charged,
+            "memo": self.memo,
+            "successful": self.successful,
+        }
+
+
+class StellarDataFetcher:
+    """
+    Fetches on-chain data from Stellar blockchain via Horizon API.
+
+    Features:
+    - Fetch volume data for specific assets
+    - Handle pagination for large datasets
+    - Aggregate data by time periods
+    - Error handling and retry logic
+    """
+
+    # Default Horizon servers (public instances)
+    HORIZON_SERVERS = [
+        "https://horizon.stellar.org",  # Mainnet - Stellar Development Foundation
+        "https://horizon-testnet.stellar.org",  # Testnet
+    ]
+
+    # Rate limiting
+    MAX_RETRIES = 3
+    RETRY_DELAY = 1  # seconds
+    REQUEST_TIMEOUT = 30  # seconds
+
+    def __init__(
+        self,
+        horizon_url: Optional[str] = None,
+        network: str = "public",
+        timeout: Optional[float] = None,
+    ):
+        """
+        Initialize Stellar data fetcher.
+
+        Args:
+            horizon_url: Custom Horizon server URL (optional)
+            network: 'public' for mainnet, 'testnet' for testnet
+        """
+        if horizon_url:
+            self.horizon_url = horizon_url
+        else:
+            if network == "testnet":
+                self.horizon_url = self.HORIZON_SERVERS[1]
+            else:
+                self.horizon_url = self.HORIZON_SERVERS[0]
+
+        print(f"Connecting to Horizon server: {self.horizon_url}")
+
+        # Initialize Stellar SDK server
+        self.timeout = timeout if timeout is not None else self.REQUEST_TIMEOUT
+        self.server = Server(horizon_url=self.horizon_url, timeout=self.timeout)
+
+        # Cache for recent requests
+        self.cache = {}
+        self.cache_ttl = 300  # 5 minutes
+
+    def _handle_pagination(self, callable_func, *args, **kwargs) -> List[Dict]:
+        """
+        Handle pagination for Horizon API responses.
+
+        Args:
+            callable_func: Function that returns a pageable response
+            *args, **kwargs: Arguments for the function
+
+        Returns:
+            List of all records across all pages
+        """
+        records = []
+        cursor = None
+        page_count = 0
+        max_pages = 100  # Safety limit
+
+        try:
+            while page_count < max_pages:
+                # Build query parameters
+                query_params = kwargs.copy()
+                if cursor:
+                    query_params["cursor"] = cursor
+
+                # Make the request
+                if "call" in dir(callable_func):
+                    # If it's a call builder object
+                    response = callable_func.call()
+                else:
+                    # If it's a regular function
+                    response = callable_func(*args, **query_params)
+
+                # Get records from this page
+                page_records = response["_embedded"]["records"]
+                records.extend(page_records)
+
+                # Check if there are more pages
+                links = response["_links"]
+                if "next" in links and "href" in links["next"]:
+                    # Extract cursor from next URL
+                    next_url = links["next"]["href"]
+                    if "cursor=" in next_url:
+                        cursor = next_url.split("cursor=")[1].split("&")[0]
+                    else:
+                        break  # No more pages
+                else:
+                    break
+
+                page_count += 1
+
+                # Small delay to be nice to the API
+                time.sleep(0.1)
+
+        except (ConnectionError, BadRequestError) as e:
+            print(f"Error during pagination: {e}")
+        except Exception as e:
+            print(f"Unexpected error during pagination: {e}")
+
+        return records
+
+    def _retry_request(self, func, *args, **kwargs):
+        """
+        Retry logic for failed requests.
+
+        Args:
+            func: Function to retry
+            *args, **kwargs: Arguments for the function
+
+        Returns:
+            Function result
+        """
+        for attempt in range(self.MAX_RETRIES):
+            try:
+                return func(*args, **kwargs)
+            except (ConnectionError, BadRequestError, Exception) as e:
+                if attempt < self.MAX_RETRIES - 1:
+                    print(
+                        f"Attempt {attempt + 1} failed: {e}. Retrying in {self.RETRY_DELAY}s..."
+                    )
+                    time.sleep(self.RETRY_DELAY * (attempt + 1))
+                else:
+                    print(f"All retry attempts failed for {func.__name__}")
+                    raise e
+
+    def get_asset_volume(self, asset_code: str, hours: int = 24) -> VolumeData:
+        """
+        Get trading volume for a specific asset over the last N hours.
+
+        Args:
+            asset_code: Asset code (e.g., 'XLM', 'USDC')
+            hours: Number of hours to look back
+
+        Returns:
+            VolumeData object with aggregated volume information
+        """
+        # Generate cache key
+        cache_key = f"volume_{asset_code}_{hours}_{datetime.now().strftime('%Y%m%d%H')}"
+
+        # Check cache
+        if cache_key in self.cache:
+            cached_time, cached_data = self.cache[cache_key]
+            if time.time() - cached_time < self.cache_ttl:
+                print(f"Returning cached data for {asset_code} (last {hours}h)")
+                return cached_data
+
+        print(f"Fetching volume data for {asset_code} (last {hours}h)...")
+
+        end_time = datetime.now()
+        start_time = end_time - timedelta(hours=hours)
+
+        # Initialize volume tracking
+        total_volume = 0.0
+        transaction_count = 0
+        volume_by_hour = {f"hour_{i}": 0.0 for i in range(hours)}
+
+        try:
+            # For XLM (native asset)
+            if asset_code == "XLM":
+                # Get payments (XLM transactions)
+                payments = self._get_payments_for_period(
+                    start_time, end_time, asset_code="native"
+                )
+
+                for payment in payments:
+                    try:
+                        amount = float(payment.get("amount", "0"))
+                        if amount > 0:
+                            total_volume += amount
+                            transaction_count += 1
+
+                            # Add to hourly bucket
+                            created_at = datetime.fromisoformat(
+                                payment["created_at"].replace("Z", "+00:00")
+                            )
+                            hours_ago = int(
+                                (end_time - created_at).total_seconds() / 3600
+                            )
+                            if 0 <= hours_ago < hours:
+                                volume_by_hour[f"hour_{hours_ago}"] += amount
+
+                    except (KeyError, ValueError) as e:
+                        print(f"Error processing payment: {e}")
+                        continue
+
+            else:
+                # For other assets, we need to look at trades and path payments
+                # This is a simplified approach - in production you'd want more sophisticated logic
+                trades = self._get_trades_for_asset(asset_code, start_time, end_time)
+
+                for trade in trades:
+                    try:
+                        # Check if this is buying or selling our target asset
+                        base_asset = trade.get("base_asset_code")
+                        counter_asset = trade.get("counter_asset_code")
+
+                        if base_asset == asset_code:
+                            amount = float(trade.get("base_amount", "0"))
+                        elif counter_asset == asset_code:
+                            amount = float(trade.get("counter_amount", "0"))
+                        else:
+                            continue
+
+                        if amount > 0:
+                            total_volume += amount
+                            transaction_count += 1
+
+                            # Add to hourly bucket
+                            ledger_close_time = datetime.fromisoformat(
+                                trade["ledger_close_time"].replace("Z", "+00:00")
+                            )
+                            hours_ago = int(
+                                (end_time - ledger_close_time).total_seconds() / 3600
+                            )
+                            if 0 <= hours_ago < hours:
+                                volume_by_hour[f"hour_{hours_ago}"] += amount
+
+                    except (KeyError, ValueError) as e:
+                        print(f"Error processing trade: {e}")
+                        continue
+
+            # Create VolumeData object
+            volume_data = VolumeData(
+                asset_code=asset_code,
+                asset_issuer=None,  # Native XLM has no issuer, for others we'd need issuer info
+                time_period_hours=hours,
+                total_volume=total_volume,
+                transaction_count=transaction_count,
+                start_time=start_time,
+                end_time=end_time,
+                volume_by_hour=volume_by_hour,
+            )
+
+            # Cache the result
+            self.cache[cache_key] = (time.time(), volume_data)
+
+            return volume_data
+
+        except Exception as e:
+            print(f"Error fetching volume for {asset_code}: {e}")
+            import traceback
+
+            traceback.print_exc()
+
+            # Return empty volume data on error
+            return VolumeData(
+                asset_code=asset_code,
+                asset_issuer=None,
+                time_period_hours=hours,
+                total_volume=0.0,
+                transaction_count=0,
+                start_time=start_time,
+                end_time=end_time,
+                volume_by_hour={f"hour_{i}": 0.0 for i in range(hours)},
+            )
+
+    def _get_payments_for_period(
+        self, start_time: datetime, end_time: datetime, asset_code: str = "native"
+    ) -> List[Dict]:
+        """
+        Get payments for a specific asset within a time period.
+
+        Args:
+            start_time: Start of time period
+            end_time: End of time period
+            asset_code: Asset code or 'native' for XLM
+
+        Returns:
+            List of payment records
+        """
+        payments = []
+
+        try:
+            # Build query
+            payments_call = self.server.payments().order(desc=False).limit(200)
+
+            # For XLM (native asset)
+            if asset_code == "native":
+                payments_call = payments_call.for_asset(Asset.native())
+            # Note: For other assets, we'd need the issuer as well
+
+            # Get payments with pagination
+            records = self._retry_request(self._handle_pagination, payments_call)
+
+            # Filter by time
+            for payment in records:
+                try:
+                    created_at = datetime.fromisoformat(
+                        payment["created_at"].replace("Z", "+00:00")
+                    )
+                    if start_time <= created_at <= end_time:
+                        payments.append(payment)
+                    elif created_at > end_time:
+                        # Since we're ordering ascending, we can break early
+                        pass
+
+                except (KeyError, ValueError) as e:
+                    print(f"Error parsing payment timestamp: {e}")
+                    continue
+
+        except Exception as e:
+            print(f"Error getting payments: {e}")
+
+        return payments
+
+    def _get_trades_for_asset(
+        self, asset_code: str, start_time: datetime, end_time: datetime
+    ) -> List[Dict]:
+        """
+        Get trades involving a specific asset.
+
+        Args:
+            asset_code: Asset code to filter by
+            start_time: Start of time period
+            end_time: End of time period
+
+        Returns:
+            List of trade records
+        """
+        trades = []
+
+        try:
+            # Get trades with pagination
+            trades_call = self.server.trades().order(desc=False).limit(200)
+            records = self._retry_request(self._handle_pagination, trades_call)
+
+            # Filter by asset and time
+            for trade in records:
+                try:
+                    base_asset = trade.get("base_asset_code")
+                    counter_asset = trade.get("counter_asset_code")
+                    ledger_close_time = datetime.fromisoformat(
+                        trade["ledger_close_time"].replace("Z", "+00:00")
+                    )
+
+                    # Check if trade involves our asset and is within time period
+                    if (
+                        base_asset == asset_code or counter_asset == asset_code
+                    ) and start_time <= ledger_close_time <= end_time:
+                        trades.append(trade)
+
+                except (KeyError, ValueError) as e:
+                    print(f"Error parsing trade: {e}")
+                    continue
+
+        except Exception as e:
+            print(f"Error getting trades: {e}")
+
+        return trades
+
+    def get_network_stats(self) -> Dict[str, Any]:
+        """
+        Get general Stellar network statistics.
+
+        Returns:
+            Dictionary with network metrics
+        """
+        try:
+            # Get ledger stats
+            ledgers_call = self.server.ledgers().order("desc").limit(1)
+            ledgers = self._retry_request(ledgers_call.call)
+            latest_ledger = (
+                ledgers["_embedded"]["records"][0]
+                if ledgers["_embedded"]["records"]
+                else {}
+            )
+
+            # Get fee stats
+            fee_stats = self._retry_request(self.server.fee_stats)
+
+            return {
+                "latest_ledger": latest_ledger.get("sequence", 0),
+                "ledger_close_time": latest_ledger.get("closed_at", ""),
+                "transaction_count": latest_ledger.get("transaction_count", 0),
+                "operation_count": latest_ledger.get("operation_count", 0),
+                "base_fee": fee_stats.get("last_ledger_base_fee", 0),
+                "fee_pool": fee_stats.get("fee_charged", {}).get("max", 0),
+                "protocol_version": latest_ledger.get("protocol_version", ""),
+                "total_coins": latest_ledger.get("total_coins", "0"),
+            }
+
+        except Exception as e:
+            print(f"Error getting network stats: {e}")
+            return {}
+
+    def get_account_transactions(
+        self, account_id: str, limit: int = 100
+    ) -> List[TransactionRecord]:
+        """
+        Get recent transactions for a specific account.
+
+        Args:
+            account_id: Stellar account ID
+            limit: Maximum number of transactions to return
+
+        Returns:
+            List of TransactionRecord objects
+        """
+        transactions = []
+
+        try:
+            # Get transactions for account
+            transactions_call = (
+                self.server.transactions()
+                .for_account(account_id)
+                .order("desc")
+                .limit(min(limit, 200))
+            )
+            records = self._retry_request(self._handle_pagination, transactions_call)
+
+            for tx in records[:limit]:
+                try:
+                    transaction = TransactionRecord(
+                        id=tx.get("id", ""),
+                        hash=tx.get("hash", ""),
+                        created_at=datetime.fromisoformat(
+                            tx["created_at"].replace("Z", "+00:00")
+                        ),
+                        source_account=tx.get("source_account", ""),
+                        operation_count=int(tx.get("operation_count", 0)),
+                        total_amount=float(tx.get("fee_charged", 0))
+                        * 0.0000001,  # Convert stroops to XLM
+                        fee_charged=float(tx.get("fee_charged", 0)) * 0.0000001,
+                        memo=tx.get("memo", ""),
+                        successful=tx.get("successful", False),
+                    )
+                    transactions.append(transaction)
+
+                except (KeyError, ValueError) as e:
+                    print(f"Error parsing transaction: {e}")
+                    continue
+
+        except Exception as e:
+            print(f"Error getting account transactions: {e}")
+
+        return transactions
+
+    def clear_cache(self):
+        """Clear the request cache."""
+        self.cache.clear()
+
+    def test_connection(self) -> bool:
+        """Test connection to Horizon server."""
+        try:
+            root = self._retry_request(self.server.root)
+            return "horizon_version" in root
+        except Exception as e:
+            print(f"Connection test failed: {e}")
+            return False
+
+
+# Convenience functions
+def get_asset_volume(asset_code: str = "XLM", hours: int = 24) -> Dict:
+    """
+    Convenience function to get asset volume.
+
+    Args:
+        asset_code: Asset code (default: 'XLM')
+        hours: Hours to look back (default: 24)
+
+    Returns:
+        Dictionary with volume data
+    """
+    fetcher = StellarDataFetcher()
+    try:
+        volume_data = fetcher.get_asset_volume(asset_code, hours)
+        return volume_data.to_dict()
+    finally:
+        fetcher.clear_cache()
+
+
+def get_network_overview() -> Dict:
+    """Get Stellar network overview."""
+    fetcher = StellarDataFetcher()
+    try:
+        return fetcher.get_network_stats()
+    finally:
+        fetcher.clear_cache()
diff --git a/temp_backup/src/ingestion/stellar_ingestion_checks.py b/temp_backup/src/ingestion/stellar_ingestion_checks.py
new file mode 100644
index 00000000..3025ea78
--- /dev/null
+++ b/temp_backup/src/ingestion/stellar_ingestion_checks.py
@@ -0,0 +1,463 @@
+"""Stellar ingestion quality checks for testnet.
+
+MVP goals (idempotent + low-noise):
+- Detect missing ledger ranges / ingestion lag (best-effort via Horizon ledger + pipeline lag)
+- Detect duplicate events (best-effort; this pipeline currently ingests aggregates, not raw ops)
+- Detect drift between raw events and materialized views (best-effort; currently only aggregates exist)
+- Produce a clear report to stdout + persisted JSON file
+
+This repository's current ingestion pipeline stores *aggregated* on-chain metrics (e.g. XLM volume windows)
+rather than per-transaction/per-operation raw events. Therefore, checks are implemented against
+what we actually persist:
+- network/ledger freshness via Horizon latest ledger close time
+- analytics drift between raw fetched volume vs stored recent analytics/materializations (analytics_records)
+
+If/when raw event tables are added, these checks can be extended without changing the report schema.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+from dataclasses import dataclass
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+from src.db import PostgresService
+from src.ingestion.stellar_fetcher import StellarDataFetcher
+
+
+REPORT_DIR_DEFAULT = "./data/ingestion_reports"
+
+
+@dataclass
+class CheckFinding:
+    check_id: str
+    severity: str  # "warning" | "error"
+    passed: bool
+    metric: Optional[str] = None
+    details: Optional[Dict[str, Any]] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "check_id": self.check_id,
+            "severity": self.severity,
+            "passed": self.passed,
+            "metric": self.metric,
+            "details": self.details or {},
+        }
+
+
+def _parse_iso_datetime(s: str) -> Optional[datetime]:
+    if not s:
+        return None
+    try:
+        # Stellar Horizon uses RFC3339, often ends with Z
+        if s.endswith("Z"):
+            return datetime.fromisoformat(s.replace("Z", "+00:00"))
+        return datetime.fromisoformat(s)
+    except Exception:
+        return None
+
+
+def _horizon_latest_ledger(fetcher: StellarDataFetcher) -> Dict[str, Any]:
+    """Return latest ledger sequence + close time via Horizon.
+
+    Best-effort: uses existing StellarDataFetcher.get_network_stats().
+    """
+    stats = fetcher.get_network_stats() or {}
+    seq = stats.get("latest_ledger") or stats.get("latest_ledger_sequence")
+    closed_at = stats.get("ledger_close_time") or stats.get("closed_at")
+    dt = _parse_iso_datetime(closed_at) if isinstance(closed_at, str) else None
+    return {
+        "latest_ledger_sequence": seq,
+        "ledger_close_time": closed_at,
+        "ledger_close_time_dt": dt.isoformat() if dt else None,
+    }
+
+
+def check_ingestion_lag(
+    *,
+    fetcher: StellarDataFetcher,
+    allowed_lag_seconds: int,
+) -> CheckFinding:
+    """Detect ingestion lag.
+
+    We can only reliably measure *network freshness* (latest ledger close time).
+    The current codebase does not persist per-ledger ingestion cursors.
+
+    Heuristic: ingestion is considered stale if Horizon's latest ledger closed_at is
+    older than allowed_lag_seconds.
+    """
+    latest = _horizon_latest_ledger(fetcher)
+    dt = _parse_iso_datetime(latest.get("ledger_close_time") or "")
+    if dt is None:
+        return CheckFinding(
+            check_id="missing_ledger_ranges_or_ingestion_lag",
+            severity="error",
+            passed=False,
+            metric="horizon_latest_ledger_close_time",
+            details={"reason": "Could not parse ledger_close_time from Horizon" , "latest": latest},
+        )
+
+    now = datetime.now(timezone.utc)
+    lag = (now - dt).total_seconds()
+
+    passed = lag <= allowed_lag_seconds
+    return CheckFinding(
+        check_id="missing_ledger_ranges_or_ingestion_lag",
+        severity="warning" if not passed else "warning",
+        passed=passed,
+        metric="ingestion_lag_seconds",
+        details={
+            "now_utc": now.isoformat(),
+            "latest_ledger_close_time": dt.isoformat(),
+            "lag_seconds": lag,
+            "allowed_lag_seconds": allowed_lag_seconds,
+            "latest_ledger_sequence": latest.get("latest_ledger_sequence"),
+        },
+    )
+
+
+def check_duplicate_events_best_effort(
+    *,
+    postgres: Optional[PostgresService],
+    window_hours: int,
+) -> CheckFinding:
+    """Detect duplicates.
+
+    The current ingestion pipeline persists analytics_records (aggregates) and
+    legacy tables (articles, social posts, insights).
+
+    There is no canonical raw event table (tx hash + event index) to dedupe.
+    Therefore we detect likely duplicates by looking for repeated analytics_records
+    with same (record_type, metric_name, asset, window, timestamp bucket).
+
+    Idempotent + safe: read-only.
+    """
+    if postgres is None:
+        return CheckFinding(
+            check_id="duplicate_events",
+            severity="warning",
+            passed=True,
+            details={"note": "PostgreSQL unavailable; skipping duplicate event checks"},
+        )
+
+    cutoff = datetime.utcnow() - timedelta(hours=window_hours)
+
+    # PostgresService only exposes get_analytics_records(...)
+    # We'll fetch recent records and compute duplicates in-memory.
+    records = postgres.get_analytics_records(hours=window_hours, limit=5000)
+    if not records:
+        return CheckFinding(
+            check_id="duplicate_events",
+            severity="warning",
+            passed=True,
+            details={"note": "No analytics_records found in window"},
+        )
+
+    # Bucket timestamp to the minute to keep noise low.
+    def bucket(ts: datetime) -> str:
+        return ts.replace(second=0, microsecond=0).isoformat()
+
+    seen: Dict[Tuple[Any, ...], int] = {}
+    for r in records:
+        key = (r.record_type, r.asset, r.metric_name, r.window, bucket(r.timestamp))
+        seen[key] = seen.get(key, 0) + 1
+
+    dupes = [{"key": list(k), "count": c} for k, c in seen.items() if c > 1]
+
+    passed = len(dupes) == 0
+    return CheckFinding(
+        check_id="duplicate_events",
+        severity="warning" if not passed else "warning",
+        passed=passed,
+        metric="duplicate_analytics_record_groups",
+        details={
+            "window_hours": window_hours,
+            "records_fetched": len(records),
+            "duplicate_groups": len(dupes),
+            "examples": dupes[:10],
+            "cutoff_utc": cutoff.isoformat(),
+        },
+    )
+
+
+def _compute_expected_volume_windows(asset: str, hours_list: List[int], network: str) -> Dict[str, float]:
+    """Fetch current on-chain volume for multiple horizons."""
+    fetcher = StellarDataFetcher(network=network)
+    out: Dict[str, float] = {}
+    try:
+        for h in hours_list:
+            v = fetcher.get_asset_volume(asset, hours=h)
+            out[f"{h}h"] = float(v.total_volume)
+        return out
+    finally:
+        fetcher.clear_cache()
+
+
+def check_drift_between_raw_and_materialized(
+    *,
+    postgres: Optional[PostgresService],
+    asset: str,
+    network: str,
+    hours_list: List[int],
+    compare_window_hours: int,
+    drift_ratio_threshold: float,
+) -> CheckFinding:
+    """Detect drift between raw fetch results and materialized views.
+
+    In this codebase, "materialized views" are approximated by analytics_records
+    persisted in PostgreSQL. Since the ingestion pipeline does not write a dedicated
+    view for raw volume, we look for analytics_records with metric_name == "volume"
+    and record_type == "onchain_volume" (best-effort).
+
+    If no matching records exist, we pass with note (low-noise).
+    """
+    if postgres is None:
+        return CheckFinding(
+            check_id="drift_between_raw_and_materialized_views",
+            severity="warning",
+            passed=True,
+            details={"note": "PostgreSQL unavailable; skipping drift checks"},
+        )
+
+    # Fetch raw volume windows (fresh)
+    raw = _compute_expected_volume_windows(asset, hours_list, network)
+
+    # Load recent analytics records and attempt to match by metric_name/window.
+    # get_analytics_records only supports record_type/asset/metric_name filters.
+    # We'll fetch by time window and filter in-memory.
+    recent = postgres.get_analytics_records(hours=compare_window_hours, limit=8000)
+
+    # Best-effort matching:
+    # metric_name "volume" and record_type "onchain_volume" and asset == asset.
+    matches = [
+        r
+        for r in recent
+        if (r.asset == asset)
+        and (str(r.metric_name).lower() in {"volume", "onchain_volume", "xlm_volume"})
+        and (r.window is not None)
+        and (str(r.record_type).lower() in {"onchain_volume", "ingestion_onchain_volume", "stellar_volume"})
+    ]
+
+    if not matches:
+        return CheckFinding(
+            check_id="drift_between_raw_and_materialized_views",
+            severity="warning",
+            passed=True,
+            details={
+                "note": "No matching analytics_records for on-chain volume found; skipping drift check to avoid noise.",
+                "raw": raw,
+                "compare_window_hours": compare_window_hours,
+            },
+        )
+
+    # Take the latest per window
+    latest_by_window: Dict[str, Any] = {}
+    for r in matches:
+        latest_by_window[r.window] = max(
+            latest_by_window.get(r.window, r),
+            r,
+            key=lambda x: x.timestamp,
+        )
+
+    drift_reports: List[Dict[str, Any]] = []
+    passed_all = True
+    for h in hours_list:
+        window_key_candidates = [f"{h}h", f"{h}h_window", f"{h}h".upper()]
+        found = None
+        for w in window_key_candidates:
+            if w in latest_by_window:
+                found = latest_by_window[w]
+                break
+        if found is None:
+            passed_all = False
+            drift_reports.append({
+                "window": f"{h}h",
+                "status": "missing_materialization",
+            })
+            continue
+
+        materialized = float(found.value)
+        expected = float(raw[f"{h}h"])
+        if expected == 0:
+            ratio = None
+            abs_diff = abs(materialized - expected)
+            passed = abs_diff == 0
+        else:
+            ratio = abs(materialized - expected) / expected
+            passed = ratio <= drift_ratio_threshold
+        passed_all = passed_all and passed
+
+        drift_reports.append({
+            "window": f"{h}h",
+            "expected_raw_volume": expected,
+            "materialized_volume": materialized,
+            "abs_diff": abs(materialized - expected),
+            "drift_ratio": ratio,
+            "threshold": drift_ratio_threshold,
+            "passed": passed,
+        })
+
+    return CheckFinding(
+        check_id="drift_between_raw_and_materialized_views",
+        severity="warning" if not passed_all else "warning",
+        passed=passed_all,
+        metric="drift_ratio",
+        details={
+            "asset": asset,
+            "network": network,
+            "raw": raw,
+            "compare_window_hours": compare_window_hours,
+            "drift_ratio_threshold": drift_ratio_threshold,
+            "drift_reports": drift_reports,
+        },
+    )
+
+
+def run_all_checks(
+    *,
+    network: str,
+    asset: str,
+    ingestion_lag_seconds: int,
+    dup_window_hours: int,
+    drift_compare_window_hours: int,
+    drift_ratio_threshold: float,
+    hours_list: List[int],
+    report_dir: str,
+    manual_run_id: Optional[str],
+) -> Dict[str, Any]:
+    """Run all checks and return report dict."""
+
+    report_ts = datetime.now(timezone.utc).isoformat()
+
+    report_path = Path(report_dir)
+    report_path.mkdir(parents=True, exist_ok=True)
+
+    out_file = report_path / f"stellar_ingestion_quality_{report_ts.replace(':','-')}.json"
+
+    # Fetcher + postgres are created inside to keep this script safe.
+    fetcher = StellarDataFetcher(network=network)
+
+    postgres: Optional[PostgresService] = None
+    try:
+        postgres = PostgresService()
+    except Exception:
+        postgres = None
+
+    findings: List[CheckFinding] = []
+
+    findings.append(
+        check_ingestion_lag(
+            fetcher=fetcher,
+            allowed_lag_seconds=ingestion_lag_seconds,
+        )
+    )
+
+    findings.append(
+        check_duplicate_events_best_effort(
+            postgres=postgres,
+            window_hours=dup_window_hours,
+        )
+    )
+
+    findings.append(
+        check_drift_between_raw_and_materialized(
+            postgres=postgres,
+            asset=asset,
+            network=network,
+            hours_list=hours_list,
+            compare_window_hours=drift_compare_window_hours,
+            drift_ratio_threshold=drift_ratio_threshold,
+        )
+    )
+
+    passed = all(f.passed for f in findings)
+
+    report: Dict[str, Any] = {
+        "schema_version": 1,
+        "generated_at": report_ts,
+        "network": network,
+        "asset": asset,
+        "manual_run_id": manual_run_id,
+        "thresholds": {
+            "ingestion_lag_seconds": ingestion_lag_seconds,
+            "duplicate_check_window_hours": dup_window_hours,
+            "drift_compare_window_hours": drift_compare_window_hours,
+            "drift_ratio_threshold": drift_ratio_threshold,
+            "drift_hours_list": hours_list,
+        },
+        "summary": {
+            "passed": passed,
+            "findings_total": len(findings),
+            "findings_failed": sum(1 for f in findings if not f.passed),
+        },
+        "findings": [f.to_dict() for f in findings],
+    }
+
+    # Persist
+    with open(out_file, "w", encoding="utf-8") as f:
+        json.dump(report, f, indent=2, ensure_ascii=False)
+
+    # Print MVP clear report
+    print("\n=== Stellar Ingestion Quality Report ===")
+    print(f"generated_at: {report_ts}")
+    print(f"network: {network} | asset: {asset}")
+    print(f"passed: {passed}")
+    print(f"report_file: {str(out_file)}")
+    for fi in findings:
+        status = "PASS" if fi.passed else "FAIL"
+        print(f"- [{status}] {fi.check_id} severity={fi.severity} metric={fi.metric}")
+
+    # If we want low-noise: exit non-zero only when ingestion lag fails.
+    # Drift/duplicates are warning-level (but can still be useful).
+    # Keep this as MVP behavior.
+    critical_fail = any((f.check_id == "missing_ledger_ranges_or_ingestion_lag") and (not f.passed) for f in findings)
+    return {
+        **report,
+        "exit_code": 1 if critical_fail else 0,
+    }
+
+
+def main(argv: Optional[List[str]] = None) -> int:
+    parser = argparse.ArgumentParser(description="Run Stellar ingestion quality checks (testnet-focused).")
+    parser.add_argument("--network", default=os.getenv("STELLAR_NETWORK", "testnet"), choices=["testnet", "public"], help="Horizon network selector")
+    parser.add_argument("--asset", default=os.getenv("ONCHAIN_ASSET", "XLM"), help="Asset code")
+
+    parser.add_argument("--ingestion-lag-seconds", type=int, default=int(os.getenv("INGESTION_LAG_SECONDS", "300")), help="Max allowed lag between Horizon latest ledger close time and now")
+    parser.add_argument("--duplicate-window-hours", type=int, default=int(os.getenv("DUPLICATE_WINDOW_HOURS", "24")), help="Lookback window for duplicate analytics record grouping")
+
+    parser.add_argument("--drift-compare-window-hours", type=int, default=int(os.getenv("DRIFT_COMPARE_WINDOW_HOURS", "24")), help="Lookback for materialized view records")
+    parser.add_argument("--drift-ratio-threshold", type=float, default=float(os.getenv("DRIFT_RATIO_THRESHOLD", "0.05")), help="Max allowed relative drift (abs(diff)/expected)")
+    parser.add_argument("--drift-hours", default=os.getenv("DRIFT_HOURS_LIST", "24,48"), help="Comma-separated list of horizons to compare, e.g. 24,48")
+
+    parser.add_argument("--report-dir", default=os.getenv("INGESTION_REPORT_DIR", REPORT_DIR_DEFAULT), help="Directory to persist reports")
+    parser.add_argument("--manual-run-id", default=os.getenv("MANUAL_RUN_ID"), help="Optional run identifier")
+
+    args = parser.parse_args(argv)
+
+    hours_list = [int(x.strip()) for x in str(args.drift_hours).split(",") if x.strip()]
+    if not hours_list:
+        hours_list = [24, 48]
+
+    result = run_all_checks(
+        network=args.network,
+        asset=str(args.asset).upper(),
+        ingestion_lag_seconds=args.ingestion_lag_seconds,
+        dup_window_hours=args.duplicate_window_hours,
+        drift_compare_window_hours=args.drift_compare_window_hours,
+        drift_ratio_threshold=args.drift_ratio_threshold,
+        hours_list=hours_list,
+        report_dir=args.report_dir,
+        manual_run_id=args.manual_run_id,
+    )
+
+    return int(result.get("exit_code", 0))
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
+
diff --git a/temp_backup/src/main.py b/temp_backup/src/main.py
new file mode 100644
index 00000000..233b7293
--- /dev/null
+++ b/temp_backup/src/main.py
@@ -0,0 +1,382 @@
+"""
+Main entry point for the data processing pipeline with both single-run and scheduled modes.
+"""
+
+import os
+import sys
+import logging
+import signal
+import time
+from concurrent.futures import ThreadPoolExecutor
+from datetime import datetime
+from dotenv import load_dotenv
+
+# Add the src directory to the Python path
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+
+# Import both pipeline and scheduler
+from src.ingestion.news_fetcher import fetch_news
+from src.ingestion.price_fetcher import PriceFetcher
+from src.ingestion.stellar_fetcher import get_asset_volume, get_network_overview
+from src.validators import validate_news_article, validate_onchain_metric
+from src.analytics.market_analyzer import MarketAnalyzer, MarketData
+from src.analytics.market_analyzer import get_explanation
+from src.sentiment import SentimentAnalyzer
+from src.anomaly_detector import AnomalyDetector
+from src.alert_notifier import notifier
+from scheduler import AnalyticsScheduler
+
+from src.utils.logger import setup_logger, CorrelationIdFilter
+from src.utils.metrics import API_FAILURES_TOTAL, start_metrics_server
+from pythonjsonlogger import jsonlogger
+
+# Configure logging
+logger = setup_logger(__name__)
+os.makedirs("./logs", exist_ok=True)
+file_handler = logging.FileHandler("./logs/data_processor.log")
+formatter = jsonlogger.JsonFormatter(
+    "%(asctime)s %(levelname)s %(name)s %(correlation_id)s %(message)s",
+    rename_fields={"levelname": "level"}
+)
+file_handler.addFilter(CorrelationIdFilter())
+file_handler.setFormatter(formatter)
+logger.addHandler(file_handler)
+
+# Module-level detector so it accumulates rolling window data across
+# scheduled pipeline runs (meaningful baselines build up over time).
+anomaly_detector = AnomalyDetector(window_size_hours=24, z_threshold=2.5)
+
+# Global scheduler instance
+scheduler = None
+
+
+def setup_signal_handlers():
+    """Setup signal handlers for graceful shutdown"""
+
+    def signal_handler(sig, frame):
+        logger.info("Received shutdown signal, cleaning up...")
+        if scheduler:
+            scheduler.stop()
+        sys.exit(0)
+
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
+
+
+def run_data_pipeline():
+    """Run a single execution of the complete data processing pipeline."""
+    print("=" * 60)
+    print("DATA PROCESSING PIPELINE")
+    print("=" * 60)
+    print(f"Started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    print()
+
+    try:
+        pipeline_start = time.perf_counter()
+
+        # ── Step 1 & 2: Fetch news + on-chain data concurrently ──────
+        print("1. FETCHING DATA (news + on-chain in parallel)")
+        print("-" * 40)
+
+        price_fetcher = PriceFetcher()
+        with ThreadPoolExecutor(max_workers=5) as io_pool:
+            news_future = io_pool.submit(fetch_news, limit=5)
+            vol_24h_future = io_pool.submit(get_asset_volume, "XLM", hours=24)
+            vol_48h_future = io_pool.submit(get_asset_volume, "XLM", hours=48)
+            network_future = io_pool.submit(get_network_overview)
+            price_future = io_pool.submit(price_fetcher.fetch_all_prices, ["XLM", "USDC"])
+
+            raw_news_articles = news_future.result()
+            raw_volume_24h = vol_24h_future.result()
+            raw_volume_48h = vol_48h_future.result()
+            network_stats = network_future.result()
+            raw_price_feed = price_future.result()
+
+        fetch_elapsed = time.perf_counter() - pipeline_start
+        print(f"All fetches completed in {fetch_elapsed:.2f}s (parallel)")
+
+        # Validate and sanitize news articles
+        news_articles = []
+        for idx, article in enumerate(raw_news_articles):
+            validated = validate_news_article(article)
+            if validated:
+                news_articles.append(validated.dict())
+            else:
+                logger.warning(f"Dropped invalid news article at index {idx}")
+
+        print(f"Fetched {len(raw_news_articles)} raw → {len(news_articles)} validated articles")
+
+        print("\n2. PRICE FEED")
+        print("-" * 40)
+        if raw_price_feed:
+            for price_point in raw_price_feed:
+                status = "stale" if price_point.get("is_stale") else "fresh"
+                print(
+                    f"{price_point['asset_code']}: ${price_point['price_usd']:.7f} "
+                    f"({price_point['price']} scaled, decimals={price_point['asset_decimals']}, {status})"
+                )
+        else:
+            print("Price feed unavailable")
+
+        # ── Sentiment analysis (parallel for large batches) ──────────
+        print("\n3. SENTIMENT ANALYSIS")
+        print("-" * 40)
+
+        sentiment_analyzer = SentimentAnalyzer()
+        if news_articles:
+            article_texts = [
+                (a.get("title", "") + " " + a.get("summary", "")).strip()
+                for a in news_articles
+            ]
+            sentiment_results = sentiment_analyzer.analyze_batch_parallel(article_texts)
+            summary = sentiment_analyzer.get_sentiment_summary(sentiment_results)
+            avg_sentiment = summary["average_compound_score"]
+            print(f"Avg sentiment: {avg_sentiment:.4f} "
+                  f"(+{summary['positive_count']} / "
+                  f"-{summary['negative_count']} / "
+                  f"~{summary['neutral_count']})")
+        else:
+            avg_sentiment = 0.0
+            sentiment_results = []
+            print("No valid articles, using neutral sentiment")
+
+        # ── Validate on-chain metrics ────────────────────────────────
+        print("\n4. STELLAR ON-CHAIN DATA")
+        print("-" * 40)
+
+        validated_volume_24h = validate_onchain_metric({
+            "metric_id": "xlm_volume_24h",
+            "value": raw_volume_24h.get("total_volume", 0.0),
+            "timestamp": raw_volume_24h.get("end_time", ""),
+            "chain": "stellar",
+            "extra": raw_volume_24h,
+        })
+        if validated_volume_24h:
+            volume_24h = validated_volume_24h.dict()
+        else:
+            logger.warning("Invalid on-chain metric for 24h volume, using defaults.")
+            volume_24h = {"total_volume": 0.0, "transaction_count": 0}
+
+        print(f"XLM Volume (24h): {volume_24h.get('total_volume', 0.0):,.2f}")
+        print(f"Transactions: {volume_24h.get('transaction_count', 0)}")
+
+        validated_volume_48h = validate_onchain_metric({
+            "metric_id": "xlm_volume_48h",
+            "value": raw_volume_48h.get("total_volume", 0.0),
+            "timestamp": raw_volume_48h.get("end_time", ""),
+            "chain": "stellar",
+            "extra": raw_volume_48h,
+        })
+        if validated_volume_48h:
+            volume_48h = validated_volume_48h.dict()
+        else:
+            logger.warning("Invalid on-chain metric for 48h volume, using defaults.")
+            volume_48h = {"total_volume": 0.0}
+
+        # Calculate volume change percentage
+        if volume_48h["total_volume"] > 0:
+            volume_change = (
+                volume_24h["total_volume"] - volume_48h["total_volume"]
+            ) / volume_48h["total_volume"]
+            print(f"Volume Change (24h vs 48h): {volume_change:.2%}")
+        else:
+            volume_change = 0.0
+            print("Insufficient data for volume change calculation")
+
+        if network_stats:
+            print(f"Latest Ledger: {network_stats.get('latest_ledger', 'N/A')}")
+            print(f"Transaction Count: {network_stats.get('transaction_count', 0)}")
+
+        # Step 5: Market Analysis
+        print("\n5. MARKET ANALYSIS")
+        print("-" * 40)
+
+        # Create market data
+        market_data = MarketData(
+            sentiment_score=avg_sentiment, volume_change=volume_change
+        )
+
+        # Analyze market trend
+        trend, score, metrics = MarketAnalyzer.analyze_trend(market_data)
+
+        print(f"Market Health Score: {score:.2f}")
+        print(f"Trend: {trend.value.upper()}")
+        print(f"Sentiment Component: {metrics['sentiment_component']:.2f}")
+        print(f"Volume Component: {metrics['volume_component']:.2f}")
+
+        # Generate explanation
+        explanation = get_explanation(score, trend)
+        print(f"\nAnalysis: {explanation}")
+
+        # Step 6: Anomaly Detection
+        print("\n6. ANOMALY DETECTION")
+        print("-" * 40)
+
+        current_volume = float(volume_24h["total_volume"])
+        now = datetime.utcnow()
+
+        # Feed current data point into the rolling window detector
+        anomaly_detector.add_data_point(
+            volume=current_volume,
+            sentiment_score=avg_sentiment,
+            timestamp=now,
+        )
+
+        # Run detection on both metrics
+        volume_anomaly = anomaly_detector.detect_volume_anomaly(current_volume, now)
+        sentiment_anomaly = anomaly_detector.detect_sentiment_anomaly(avg_sentiment, now)
+
+        anomalies_found = []
+
+        for result in [volume_anomaly, sentiment_anomaly]:
+            status = "⚠️  ANOMALY" if result.is_anomaly else "✓  Normal"
+            print(
+                f"{status} | {result.metric_name.capitalize():<10} | "
+                f"value={result.current_value:.4f} | "
+                f"z={result.z_score:.2f} | "
+                f"severity={result.severity_score:.2f}"
+            )
+            if result.is_anomaly:
+                anomalies_found.append(result.to_dict())
+                logger.warning(
+                    f"Anomaly detected — metric={result.metric_name}, "
+                    f"value={result.current_value:.4f}, "
+                    f"z_score={result.z_score:.2f}, "
+                    f"severity={result.severity_score:.2f}"
+                )
+        
+        # Trigger alerts for detected anomalies
+        if anomalies_found:
+            notifier.notify_batch([volume_anomaly, sentiment_anomaly])
+
+        window_stats = anomaly_detector.get_window_stats()
+        print(f"Detector window: {window_stats['data_points_count']} data points")
+
+        if not anomalies_found:
+            print("No anomalies detected in current pipeline run.")
+
+        # Step 6: Output summary
+        total_elapsed = time.perf_counter() - pipeline_start
+        print("\n6. PIPELINE SUMMARY")
+        print("-" * 40)
+        print(f"✓ News Articles Processed: {len(news_articles)}")
+        print(f"✓ Sentiment Scores Computed: {len(sentiment_results)}")
+        print(f"✓ XLM Volume Analyzed: {volume_24h['total_volume']:,.2f}")
+        print(f"✓ Market Trend: {trend.value.upper()}")
+        print(f"✓ Anomalies Detected: {len(anomalies_found)}")
+        print(f"✓ Total Pipeline Time: {total_elapsed:.2f}s")
+        print(f"✓ Analysis Complete: {datetime.now().strftime('%H:%M:%S')}")
+
+        result = {
+            "success": True,
+            "news_count": len(news_articles),
+            "volume_xlm": volume_24h["total_volume"],
+            "price_feed": raw_price_feed,
+            "market_trend": trend.value,
+            "health_score": score,
+            "anomalies": anomalies_found,
+            "timestamp": datetime.now().isoformat(),
+        }
+
+        logger.info(f"Pipeline completed successfully: {result}")
+        return result
+
+    except Exception as e:
+        error_msg = f"Pipeline Error: {e}"
+        print(f"\n❌ {error_msg}")
+        import traceback
+
+        traceback.print_exc()
+        logger.error(error_msg, exc_info=True)
+        API_FAILURES_TOTAL.labels(method="worker", endpoint="pipeline").inc()
+        return {
+            "success": False,
+            "error": str(e),
+            "timestamp": datetime.now().isoformat(),
+        }
+
+
+def start_scheduler():
+    """Start the scheduled data processing service."""
+    global scheduler
+
+    # Start metrics server on port 9091 for background worker
+    start_metrics_server(port=9091)
+
+    logger.info("=" * 70)
+    logger.info("LumenPulse Data Processing Service Starting")
+    logger.info("=" * 70)
+
+    try:
+        # Initialize and start the scheduler
+        scheduler = AnalyticsScheduler(run_data_pipeline)
+        setup_signal_handlers()
+
+        # Option to run immediately on startup (useful for testing)
+        run_on_startup = os.getenv("RUN_IMMEDIATELY", "false").lower() == "true"
+
+        if run_on_startup:
+            logger.info("Running analyzer immediately on startup...")
+            scheduler.run_immediately()
+
+        # Start the scheduler
+        scheduler.start()
+
+        logger.info("Data processing service is running. Press Ctrl+C to stop.")
+        logger.info("The Market Analyzer will run automatically every hour.")
+
+        # Keep the application running
+        import time
+
+        while True:
+            time.sleep(1)
+
+    except Exception as e:
+        logger.error(f"Fatal error in data processing service: {e}", exc_info=True)
+        if scheduler:
+            scheduler.stop()
+        sys.exit(1)
+
+
+def main():
+    """Main entry point - handles both CLI modes"""
+    load_dotenv()
+
+    # Create logs directory if it doesn't exist
+    os.makedirs("./logs", exist_ok=True)
+
+    # Check command line arguments
+    if len(sys.argv) > 1:
+        command = sys.argv[1].lower()
+
+        if command == "run":
+            # Run pipeline once and exit
+            return run_data_pipeline()
+        elif command == "serve":
+            # Start scheduled service
+            start_scheduler()
+        elif command == "help":
+            print("Usage:")
+            print("  python pipeline.py run     - Run pipeline once")
+            print("  python pipeline.py serve   - Start scheduled service")
+            print("  python pipeline.py help    - Show this help")
+            return {"help": True}
+        else:
+            print(f"Unknown command: {command}")
+            print("Use 'python pipeline.py help' for usage instructions")
+            return {"error": f"Unknown command: {command}"}
+    else:
+        # Default: run once (original behavior)
+        result = run_data_pipeline()
+        print("\n" + "=" * 60)
+        print("PIPELINE COMPLETE")
+        print("=" * 60)
+        return result
+
+
+if __name__ == "__main__":
+    result = main()
+    if result and result.get("help"):
+        sys.exit(0)
+    elif result and not result.get("success", True):
+        sys.exit(1)
\ No newline at end of file
diff --git a/temp_backup/src/ml/__init__.py b/temp_backup/src/ml/__init__.py
new file mode 100644
index 00000000..3a6ef8f9
--- /dev/null
+++ b/temp_backup/src/ml/__init__.py
@@ -0,0 +1,28 @@
+"""
+ML module for price prediction and other data-driven models.
+"""
+
+from .price_predictor import PricePredictor
+from .model_registry import (
+    save_model,
+    load_model,
+    promote_model,
+    get_live_model,
+    list_versions,
+    get_current_version,
+    get_registry_status,
+)
+from .retraining_pipeline import run_retraining, get_last_run_status
+
+__all__ = [
+    "PricePredictor",
+    "save_model",
+    "load_model",
+    "promote_model",
+    "get_live_model",
+    "list_versions",
+    "get_current_version",
+    "get_registry_status",
+    "run_retraining",
+    "get_last_run_status",
+]
diff --git a/temp_backup/src/ml/feature_store.py b/temp_backup/src/ml/feature_store.py
new file mode 100644
index 00000000..a7d29cb0
--- /dev/null
+++ b/temp_backup/src/ml/feature_store.py
@@ -0,0 +1,83 @@
+import pandas as pd
+from sqlalchemy.orm import Session
+from sqlalchemy import text
+from datetime import datetime, timedelta, timezone
+
+class FeatureStore:
+    def __init__(self, db_session: Session):
+        """
+        Initialize the FeatureStore with a SQLAlchemy database session.
+        """
+        self.db = db_session
+
+    def _parse_window_to_datetime(self, window: str) -> datetime:
+        """Helper to parse window strings like '24h' or '7d' into a past timestamp."""
+        # Fix deprecation warning by using timezone-aware UTC datetime
+        now = datetime.now(timezone.utc)
+        if window.endswith('h'):
+            return now - timedelta(hours=int(window[:-1]))
+        elif window.endswith('d'):
+            return now - timedelta(days=int(window[:-1]))
+        else:
+            raise ValueError("Unsupported window format. Use 'h' (hours) or 'd' (days).")
+
+    def _ensure_columns(self, df: pd.DataFrame, expected_col: str) -> pd.DataFrame:
+        """Ensures the DataFrame has the correct base columns, even if it's completely empty."""
+        if 'timestamp' not in df.columns:
+            df['timestamp'] = pd.Series(dtype='datetime64[ns]')
+        if expected_col not in df.columns:
+            df[expected_col] = pd.Series(dtype='float64')
+        return df
+
+    def get_features_for_asset(self, asset: str, window: str) -> pd.DataFrame:
+        """
+        Retrieves and combines features for a specific asset over a given time window.
+        Combines: Sentiment stats, Volume metrics, and Volatility indicators.
+        """
+        start_time = self._parse_window_to_datetime(window)
+        
+        sentiment_query = text("""
+            SELECT timestamp, sentiment_score FROM asset_sentiment_view
+            WHERE asset = :asset AND timestamp >= :start_time
+        """)
+        
+        volume_query = text("""
+            SELECT timestamp, volume FROM asset_volume_view
+            WHERE asset = :asset AND timestamp >= :start_time
+        """)
+        
+        volatility_query = text("""
+            SELECT timestamp, volatility FROM asset_volatility_view
+            WHERE asset = :asset AND timestamp >= :start_time
+        """)
+
+        conn = self.db.connection()
+        try:
+            params = {"asset": asset, "start_time": start_time}
+            sentiment_df = pd.read_sql(sentiment_query, conn, params=params)
+            volume_df = pd.read_sql(volume_query, conn, params=params)
+            volatility_df = pd.read_sql(volatility_query, conn, params=params)
+        except Exception:
+            sentiment_df = pd.DataFrame()
+            volume_df = pd.DataFrame()
+            volatility_df = pd.DataFrame()
+
+        # Ensure all dataframes have the right columns before merging
+        sentiment_df = self._ensure_columns(sentiment_df, 'sentiment_score')
+        volume_df = self._ensure_columns(volume_df, 'volume')
+        volatility_df = self._ensure_columns(volatility_df, 'volatility')
+
+        # Always merge using outer joins to align the time series and preserve column names
+        features_df = pd.merge(sentiment_df, volume_df, on='timestamp', how='outer')
+        features_df = pd.merge(features_df, volatility_df, on='timestamp', how='outer')
+
+        # If no actual data exists, return the empty DataFrame (now with the correct headers)
+        if features_df.empty:
+            return features_df
+
+        # Clean up the merged dataset (sort by time, forward fill missing values)
+        features_df.sort_values('timestamp', inplace=True)
+        features_df.ffill(inplace=True)
+        features_df.fillna(0, inplace=True) # Fill remaining NaNs with 0
+
+        return features_df
\ No newline at end of file
diff --git a/temp_backup/src/ml/model_registry.py b/temp_backup/src/ml/model_registry.py
new file mode 100644
index 00000000..077c662b
--- /dev/null
+++ b/temp_backup/src/ml/model_registry.py
@@ -0,0 +1,223 @@
+"""
+Model Registry - versioned model storage with atomic zero-downtime swap.
+
+Versions follow semver-lite: v<major>.<minor>  (e.g. v1.0, v1.1, v2.0)
+Each model type (sentiment, price_predictor) is stored independently.
+
+Directory layout:
+  models/
+    sentiment/
+      v1.0.pkl
+      v1.1.pkl
+      current -> v1.1.pkl   (symlink, updated atomically)
+    price_predictor/
+      v1.0.pkl
+      current -> v1.0.pkl
+"""
+
+import os
+import pickle
+import shutil
+import threading
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, Optional, Tuple
+
+from src.utils.logger import setup_logger
+
+logger = setup_logger(__name__)
+
+_MODELS_ROOT = Path(os.getenv("MODEL_REGISTRY_PATH", "./models"))
+
+# In-memory hot-swap: the live model is held here so the API never reads disk
+# during inference. A reentrant read-write lock guards concurrent access.
+_live_models: Dict[str, Any] = {}
+_live_versions: Dict[str, str] = {}
+_lock = threading.RLock()
+
+
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+
+def _model_dir(model_type: str) -> Path:
+    d = _MODELS_ROOT / model_type
+    d.mkdir(parents=True, exist_ok=True)
+    return d
+
+
+def _symlink_path(model_type: str) -> Path:
+    return _model_dir(model_type) / "current"
+
+
+def _version_path(model_type: str, version: str) -> Path:
+    return _model_dir(model_type) / f"{version}.pkl"
+
+
+def _next_version(model_type: str) -> str:
+    """Increment the minor version of the latest saved model."""
+    existing = list_versions(model_type)
+    if not existing:
+        return "v1.0"
+    # Parse the highest version
+    def _parse(v: str) -> Tuple[int, int]:
+        parts = v.lstrip("v").split(".")
+        return int(parts[0]), int(parts[1])
+
+    major, minor = max(_parse(v) for v in existing)
+    return f"v{major}.{minor + 1}"
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+def save_model(model_type: str, model_obj: Any, version: Optional[str] = None) -> str:
+    """
+    Persist a trained model to disk and return the version string.
+
+    Args:
+        model_type: e.g. "sentiment" or "price_predictor"
+        model_obj:  The object to pickle (sklearn pipeline, VADER lexicon dict, …)
+        version:    Explicit version string; auto-incremented if omitted.
+
+    Returns:
+        The version string that was saved (e.g. "v1.2").
+    """
+    if version is None:
+        version = _next_version(model_type)
+
+    path = _version_path(model_type, version)
+    with open(path, "wb") as fh:
+        pickle.dump(model_obj, fh, protocol=pickle.HIGHEST_PROTOCOL)
+
+    logger.info(f"Model saved: type={model_type} version={version} path={path}")
+    return version
+
+
+def load_model(model_type: str, version: str = "current") -> Any:
+    """
+    Load a model from disk.
+
+    Args:
+        model_type: e.g. "sentiment" or "price_predictor"
+        version:    Specific version string or "current" (follows symlink).
+
+    Returns:
+        The unpickled model object.
+    """
+    if version == "current":
+        sym = _symlink_path(model_type)
+        if not sym.exists():
+            raise FileNotFoundError(
+                f"No current model for '{model_type}'. Run retraining first."
+            )
+        path = sym.resolve()
+    else:
+        path = _version_path(model_type, version)
+
+    if not path.exists():
+        raise FileNotFoundError(f"Model not found: {path}")
+
+    with open(path, "rb") as fh:
+        obj = pickle.load(fh)
+
+    logger.info(f"Model loaded from disk: type={model_type} version={version}")
+    return obj
+
+
+def promote_model(model_type: str, version: str) -> None:
+    """
+    Atomically promote a saved version to 'current' (zero-downtime swap).
+
+    The on-disk symlink is updated atomically via a rename, and the
+    in-memory hot model is swapped under the RLock so in-flight requests
+    finish with the old model while new requests immediately use the new one.
+
+    Args:
+        model_type: e.g. "sentiment" or "price_predictor"
+        version:    The version to promote (must already be saved).
+    """
+    target = _version_path(model_type, version)
+    if not target.exists():
+        raise FileNotFoundError(
+            f"Cannot promote {model_type}@{version}: file not found at {target}"
+        )
+
+    sym = _symlink_path(model_type)
+    tmp_sym = sym.with_suffix(".tmp")
+
+    # Atomic symlink swap (POSIX rename is atomic)
+    if tmp_sym.exists() or tmp_sym.is_symlink():
+        tmp_sym.unlink()
+    tmp_sym.symlink_to(target.name)
+    tmp_sym.rename(sym)
+
+    # Hot-swap in memory
+    new_model = load_model(model_type, version)
+    with _lock:
+        _live_models[model_type] = new_model
+        _live_versions[model_type] = version
+
+    logger.info(f"Model promoted: type={model_type} version={version} (zero-downtime swap complete)")
+
+
+def get_live_model(model_type: str) -> Any:
+    """
+    Return the currently active in-memory model.
+    Falls back to loading from disk if not yet warm.
+
+    Args:
+        model_type: e.g. "sentiment" or "price_predictor"
+
+    Returns:
+        The live model object.
+    """
+    with _lock:
+        if model_type in _live_models:
+            return _live_models[model_type]
+
+    # Cold start: load from disk and cache
+    model = load_model(model_type, "current")
+    with _lock:
+        _live_models[model_type] = model
+        sym = _symlink_path(model_type)
+        if sym.exists():
+            _live_versions[model_type] = sym.resolve().stem  # filename without .pkl
+    return model
+
+
+def list_versions(model_type: str) -> list:
+    """Return sorted list of saved version strings for a model type."""
+    d = _model_dir(model_type)
+    versions = [
+        p.stem for p in d.glob("v*.pkl")
+    ]
+    return sorted(versions)
+
+
+def get_current_version(model_type: str) -> Optional[str]:
+    """Return the currently promoted version string, or None."""
+    with _lock:
+        if model_type in _live_versions:
+            return _live_versions[model_type]
+
+    sym = _symlink_path(model_type)
+    if sym.exists():
+        return sym.resolve().stem
+    return None
+
+
+def get_registry_status() -> Dict[str, Any]:
+    """Return a status snapshot of all registered model types."""
+    status = {}
+    if _MODELS_ROOT.exists():
+        for model_dir in _MODELS_ROOT.iterdir():
+            if model_dir.is_dir():
+                mtype = model_dir.name
+                status[mtype] = {
+                    "current_version": get_current_version(mtype),
+                    "available_versions": list_versions(mtype),
+                    "live_in_memory": mtype in _live_models,
+                }
+    return status
diff --git a/temp_backup/src/ml/price_predictor.py b/temp_backup/src/ml/price_predictor.py
new file mode 100644
index 00000000..be9e1e14
--- /dev/null
+++ b/temp_backup/src/ml/price_predictor.py
@@ -0,0 +1,93 @@
+import logging
+import pandas as pd
+import numpy as np
+from typing import Dict, Any, List, Optional
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.linear_model import LinearRegression
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import mean_squared_error, r2_score
+
+logger = logging.getLogger(__name__)
+
+class PricePredictor:
+    """
+    A structured ML predictor for asset prices using scikit-learn pipelines.
+    """
+
+    def __init__(self, model_name: str = "linear_regression"):
+        self.model_name = model_name
+        self.pipeline = self._build_pipeline()
+        self.is_trained = False
+        self.metrics: Dict[str, float] = {}
+
+    def _build_pipeline(self) -> Pipeline:
+        """
+        Builds the scikit-learn pipeline with scaling and a regressor.
+        """
+        return Pipeline([
+            ('scaler', StandardScaler()),
+            ('regressor', LinearRegression())
+        ])
+
+    def fit(self, data: pd.DataFrame, target_column: str = 'target') -> Dict[str, float]:
+        """
+        Trains the model using the provided training data.
+        
+        Args:
+            data: DataFrame containing features and the target column.
+            target_column: The name of the column to predict.
+            
+        Returns:
+            A dictionary containing training metrics.
+        """
+        if data.empty:
+            raise ValueError("Training data is empty.")
+
+        if target_column not in data.columns:
+            raise ValueError(f"Target column '{target_column}' not found in data.")
+
+        logger.info(f"Training PricePredictor model: {self.model_name}")
+
+        X = data.drop(columns=[target_column])
+        y = data[target_column]
+
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+        self.pipeline.fit(X_train, y_train)
+        
+        y_pred = self.pipeline.predict(X_test)
+        self.metrics = {
+            "mse": float(mean_squared_error(y_test, y_pred)),
+            "r2": float(r2_score(y_test, y_pred))
+        }
+        
+        self.is_trained = True
+        logger.info(f"Model trained successfully. Metrics: {self.metrics}")
+        
+        return self.metrics
+
+    def predict(self, features: pd.DataFrame) -> np.ndarray:
+        """
+        Predicts the price based on input features.
+        
+        Args:
+            features: DataFrame containing the features for prediction.
+            
+        Returns:
+            Array of predicted values.
+        """
+        if not self.is_trained:
+            raise RuntimeError("Model must be trained before calling predict.")
+
+        if features.empty:
+            return np.array([])
+
+        logger.info(f"Predicting with model: {self.model_name}")
+        return self.pipeline.predict(features)
+
+    def get_metrics(self) -> Dict[str, float]:
+        """
+        Returns the metrics calculated during the last training session.
+        """
+        return self.metrics
diff --git a/temp_backup/src/ml/retraining_pipeline.py b/temp_backup/src/ml/retraining_pipeline.py
new file mode 100644
index 00000000..6b16e61d
--- /dev/null
+++ b/temp_backup/src/ml/retraining_pipeline.py
@@ -0,0 +1,274 @@
+"""
+Automated Model Retraining Pipeline (Issue #454)
+
+Retrains both models on fresh data, evaluates quality gates,
+versions the artifacts, and promotes them with zero downtime.
+
+Models:
+  - sentiment   : VADER lexicon + custom crypto slang dictionary
+  - price_predictor : scikit-learn LinearRegression pipeline
+"""
+
+import os
+import json
+import threading
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Any, Dict, Optional, Tuple
+
+import pandas as pd
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+
+from src.ml.model_registry import (
+    save_model,
+    promote_model,
+    get_current_version,
+    get_registry_status,
+)
+from src.ml.price_predictor import PricePredictor
+from src.utils.logger import setup_logger
+from src.utils.metrics import JOBS_RUN_TOTAL, MODEL_RETRAINING_TOTAL, MODEL_RETRAINING_DURATION
+
+logger = setup_logger(__name__)
+
+# Path to the custom crypto-slang lexicon file (JSON: {"word": score, ...})
+_SLANG_LEXICON_PATH = Path(
+    os.getenv("CRYPTO_SLANG_LEXICON", "./data/crypto_slang_lexicon.json")
+)
+
+# Quality gates: minimum acceptable metrics before promotion
+_MIN_SENTIMENT_COVERAGE = float(os.getenv("MIN_SENTIMENT_COVERAGE", "0.0"))
+_MIN_PRICE_R2 = float(os.getenv("MIN_PRICE_R2", "-1.0"))  # permissive default
+
+# Thread-safety: only one retraining run at a time
+_retrain_lock = threading.Lock()
+
+# Last run metadata (in-memory, also written to disk)
+_last_run: Dict[str, Any] = {}
+
+
+# ---------------------------------------------------------------------------
+# Sentiment model retraining
+# ---------------------------------------------------------------------------
+
+def _load_crypto_slang() -> Dict[str, float]:
+    """
+    Load the custom crypto-slang lexicon from disk.
+    Returns an empty dict if the file doesn't exist yet.
+    """
+    if not _SLANG_LEXICON_PATH.exists():
+        logger.warning(
+            f"Crypto slang lexicon not found at {_SLANG_LEXICON_PATH}. "
+            "Using base VADER lexicon only."
+        )
+        return {}
+
+    with open(_SLANG_LEXICON_PATH) as fh:
+        lexicon = json.load(fh)
+
+    logger.info(f"Loaded {len(lexicon)} custom crypto-slang entries")
+    return lexicon
+
+
+def _build_sentiment_model() -> Tuple[SentimentIntensityAnalyzer, Dict[str, Any]]:
+    """
+    Build a VADER analyzer enriched with the latest crypto-slang lexicon.
+
+    Returns:
+        (analyzer, metrics_dict)
+    """
+    analyzer = SentimentIntensityAnalyzer()
+    slang = _load_crypto_slang()
+
+    if slang:
+        analyzer.lexicon.update(slang)
+        logger.info(f"Enriched VADER lexicon with {len(slang)} crypto-slang terms")
+
+    metrics = {
+        "base_lexicon_size": len(SentimentIntensityAnalyzer().lexicon),
+        "custom_terms_added": len(slang),
+        "total_lexicon_size": len(analyzer.lexicon),
+        "coverage_ratio": len(slang) / max(len(analyzer.lexicon), 1),
+    }
+    return analyzer, metrics
+
+
+# ---------------------------------------------------------------------------
+# Price predictor retraining
+# ---------------------------------------------------------------------------
+
+def _fetch_training_data(db_session=None) -> pd.DataFrame:
+    """
+    Fetch recent feature data for the price predictor.
+
+    In production this queries the feature store; falls back to a
+    synthetic dataset so the pipeline never hard-fails in CI/dev.
+    """
+    if db_session is not None:
+        try:
+            from src.ml.feature_store import FeatureStore
+            store = FeatureStore(db_session)
+            df = store.get_features_for_asset("XLM", "30d")
+            if not df.empty and len(df) >= 20:
+                # Create a simple target: next-period sentiment shift
+                df["target"] = df["sentiment_score"].shift(-1)
+                df.dropna(inplace=True)
+                logger.info(f"Fetched {len(df)} rows from feature store for retraining")
+                return df
+        except Exception as exc:
+            logger.warning(f"Feature store unavailable, using synthetic data: {exc}")
+
+    # Synthetic fallback — keeps the pipeline runnable without a live DB
+    import numpy as np
+    rng = np.random.default_rng(seed=int(datetime.utcnow().timestamp()) % 10_000)
+    n = 200
+    df = pd.DataFrame({
+        "sentiment_score": rng.uniform(-1, 1, n),
+        "volume": rng.uniform(1_000, 100_000, n),
+        "volatility": rng.uniform(0, 0.5, n),
+        "target": rng.uniform(-1, 1, n),
+    })
+    logger.info("Using synthetic training data (no live DB session provided)")
+    return df
+
+
+def _build_price_predictor(db_session=None) -> Tuple[PricePredictor, Dict[str, Any]]:
+    """
+    Retrain the PricePredictor on fresh data.
+
+    Returns:
+        (predictor, metrics_dict)
+    """
+    df = _fetch_training_data(db_session)
+    predictor = PricePredictor(model_name="linear_regression")
+    metrics = predictor.fit(df, target_column="target")
+    logger.info(f"PricePredictor retrained: {metrics}")
+    return predictor, metrics
+
+
+# ---------------------------------------------------------------------------
+# Orchestrator
+# ---------------------------------------------------------------------------
+
+def run_retraining(
+    db_session=None,
+    force: bool = False,
+) -> Dict[str, Any]:
+    """
+    Full retraining run: train → evaluate → version → promote.
+
+    Args:
+        db_session: Optional SQLAlchemy session for the feature store.
+        force:      Skip quality gates and always promote.
+
+    Returns:
+        A result dict with versions, metrics, and status.
+    """
+    global _last_run
+
+    if not _retrain_lock.acquire(blocking=False):
+        logger.warning("Retraining already in progress, skipping this trigger")
+        return {"status": "skipped", "reason": "already_running"}
+
+    started_at = datetime.utcnow()
+    result: Dict[str, Any] = {
+        "status": "started",
+        "started_at": started_at.isoformat(),
+        "models": {},
+    }
+
+    try:
+        logger.info("=" * 60)
+        logger.info("Automated Model Retraining Pipeline — START")
+        logger.info(f"Timestamp: {started_at.isoformat()}")
+
+        # ── 1. Sentiment model ──────────────────────────────────────────────
+        logger.info("Step 1: Retraining sentiment model …")
+        with MODEL_RETRAINING_DURATION.labels(model_type="sentiment").time():
+            sentiment_model, sentiment_metrics = _build_sentiment_model()
+
+        passes_sentiment_gate = (
+            force
+            or sentiment_metrics["coverage_ratio"] >= _MIN_SENTIMENT_COVERAGE
+        )
+
+        if passes_sentiment_gate:
+            s_version = save_model("sentiment", sentiment_model)
+            promote_model("sentiment", s_version)
+            MODEL_RETRAINING_TOTAL.labels(model_type="sentiment", status="success").inc()
+            result["models"]["sentiment"] = {
+                "version": s_version,
+                "metrics": sentiment_metrics,
+                "promoted": True,
+            }
+            logger.info(f"Sentiment model promoted: {s_version}")
+        else:
+            MODEL_RETRAINING_TOTAL.labels(model_type="sentiment", status="failed").inc()
+            result["models"]["sentiment"] = {
+                "metrics": sentiment_metrics,
+                "promoted": False,
+                "reason": "quality_gate_failed",
+            }
+            logger.warning("Sentiment model did NOT pass quality gate — skipping promotion")
+
+        # ── 2. Price predictor ──────────────────────────────────────────────
+        logger.info("Step 2: Retraining price predictor …")
+        with MODEL_RETRAINING_DURATION.labels(model_type="price_predictor").time():
+            price_model, price_metrics = _build_price_predictor(db_session)
+
+        passes_price_gate = force or price_metrics.get("r2", -999) >= _MIN_PRICE_R2
+
+        if passes_price_gate:
+            p_version = save_model("price_predictor", price_model)
+            promote_model("price_predictor", p_version)
+            MODEL_RETRAINING_TOTAL.labels(model_type="price_predictor", status="success").inc()
+            result["models"]["price_predictor"] = {
+                "version": p_version,
+                "metrics": price_metrics,
+                "promoted": True,
+            }
+            logger.info(f"PricePredictor promoted: {p_version}")
+        else:
+            MODEL_RETRAINING_TOTAL.labels(model_type="price_predictor", status="failed").inc()
+            result["models"]["price_predictor"] = {
+                "metrics": price_metrics,
+                "promoted": False,
+                "reason": "quality_gate_failed",
+            }
+            logger.warning("PricePredictor did NOT pass quality gate — skipping promotion")
+
+        # ── 3. Finalise ─────────────────────────────────────────────────────
+        finished_at = datetime.utcnow()
+        result.update(
+            {
+                "status": "completed",
+                "finished_at": finished_at.isoformat(),
+                "duration_seconds": (finished_at - started_at).total_seconds(),
+                "registry": get_registry_status(),
+            }
+        )
+
+        JOBS_RUN_TOTAL.inc()
+        logger.info("Automated Model Retraining Pipeline — DONE")
+        logger.info("=" * 60)
+
+    except Exception as exc:
+        result.update(
+            {
+                "status": "failed",
+                "error": str(exc),
+                "finished_at": datetime.utcnow().isoformat(),
+            }
+        )
+        logger.error(f"Retraining pipeline failed: {exc}", exc_info=True)
+
+    finally:
+        _last_run = result
+        _retrain_lock.release()
+
+    return result
+
+
+def get_last_run_status() -> Dict[str, Any]:
+    """Return metadata from the most recent retraining run."""
+    return _last_run or {"status": "never_run"}
diff --git a/temp_backup/src/qa_exporter.py b/temp_backup/src/qa_exporter.py
new file mode 100644
index 00000000..017be4a3
--- /dev/null
+++ b/temp_backup/src/qa_exporter.py
@@ -0,0 +1,256 @@
+"""
+QA Dataset Exporter
+
+Exports raw events, materialized views, and KPIs for a given Stellar ledger range.
+Intended for QA engineers and contributor debugging.
+
+Output format: JSON files written to output_dir/
+  - events_<start>_<end>.json      : raw contract events (from AnalyticsRecord where record_type='event')
+  - views_<start>_<end>.json       : materialized views (aggregated Article + SocialPost sentiment)
+  - kpis_<start>_<end>.json        : computed KPIs (from AssetTrend)
+
+Each file has the envelope:
+  {
+    "status": "completed",
+    "exported_at": "<ISO-8601>",
+    "start_ledger": <int>,
+    "end_ledger": <int>,
+    "count": <int>,
+    "records": [ ... ]
+  }
+"""
+
+import json
+import logging
+import sys
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from sqlalchemy import create_engine, select, and_
+from sqlalchemy.orm import sessionmaker
+
+from src.db.models import AnalyticsRecord, Article, AssetTrend, SocialPost
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ExportResult:
+    """Result of a single export operation."""
+
+    dataset: str
+    path: str
+    count: int
+    status: str
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "dataset": self.dataset,
+            "path": self.path,
+            "count": self.count,
+            "status": self.status,
+        }
+
+
+class QAExporter:
+    """
+    Exports QA datasets (events, views, KPIs) for a Stellar ledger range.
+
+    Ledger numbers are mapped to AnalyticsRecord / AssetTrend rows via the
+    ``extra_data->>'ledger'`` JSON field written by the ingestion pipeline.
+    Articles and SocialPosts are included in the views export regardless of
+    ledger (they carry no ledger field) when no ledger filter can be applied.
+    """
+
+    def __init__(
+        self,
+        start_ledger: int,
+        end_ledger: int,
+        output_dir: str,
+        database_url: Optional[str] = None,
+    ):
+        import os
+
+        self.start_ledger = start_ledger
+        self.end_ledger = end_ledger
+        self.output_dir = Path(output_dir)
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+
+        db_url = database_url or os.getenv(
+            "DATABASE_URL",
+            "postgresql://postgres:postgres@localhost:5432/lumenpulse",
+        )
+        engine = create_engine(db_url, pool_pre_ping=True, echo=False)
+        self.Session = sessionmaker(bind=engine, expire_on_commit=False)
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    def _envelope(self, records: List[Dict], dataset: str) -> Dict[str, Any]:
+        return {
+            "status": "completed",
+            "exported_at": datetime.now(timezone.utc).isoformat(),
+            "start_ledger": self.start_ledger,
+            "end_ledger": self.end_ledger,
+            "dataset": dataset,
+            "count": len(records),
+            "records": records,
+        }
+
+    def _write(self, data: Dict, name: str) -> Path:
+        path = self.output_dir / f"{name}_{self.start_ledger}_{self.end_ledger}.json"
+        with open(path, "w") as f:
+            json.dump(data, f, indent=2, default=str)
+        return path
+
+    def _ledger_filter(self, model):
+        """
+        Return a SQLAlchemy filter that restricts rows whose extra_data JSON
+        contains a 'ledger' key within [start_ledger, end_ledger].
+        Falls back to no filter if the column cast is unavailable.
+        """
+        from sqlalchemy import cast, Integer
+        from sqlalchemy.dialects.postgresql import JSONB
+
+        try:
+            ledger_col = model.extra_data["ledger"].astext.cast(Integer)
+            return and_(
+                ledger_col >= self.start_ledger,
+                ledger_col <= self.end_ledger,
+            )
+        except Exception:
+            return None  # no ledger field on this model; caller handles it
+
+    # ------------------------------------------------------------------
+    # Export methods
+    # ------------------------------------------------------------------
+
+    def export_events(self) -> ExportResult:
+        """Export raw events (AnalyticsRecord rows with record_type='event')."""
+        with self.Session() as session:
+            q = select(AnalyticsRecord).where(
+                AnalyticsRecord.record_type == "event"
+            )
+            ledger_f = self._ledger_filter(AnalyticsRecord)
+            if ledger_f is not None:
+                q = q.where(ledger_f)
+
+            rows = session.execute(q).scalars().all()
+            records = [
+                {
+                    "id": r.id,
+                    "record_type": r.record_type,
+                    "asset": r.asset,
+                    "metric_name": r.metric_name,
+                    "window": r.window,
+                    "value": r.value,
+                    "previous_value": r.previous_value,
+                    "change_percentage": r.change_percentage,
+                    "trend_direction": r.trend_direction,
+                    "extra_data": r.extra_data,
+                    "timestamp": r.timestamp.isoformat() if r.timestamp else None,
+                }
+                for r in rows
+            ]
+
+        data = self._envelope(records, "events")
+        path = self._write(data, "events")
+        logger.info("Exported %d events → %s", len(records), path)
+        return ExportResult("events", str(path), len(records), "completed")
+
+    def export_views(self) -> ExportResult:
+        """
+        Export materialized views: aggregated sentiment from Articles and
+        SocialPosts, plus all non-event AnalyticsRecord rows.
+        """
+        with self.Session() as session:
+            articles = session.execute(select(Article)).scalars().all()
+            posts = session.execute(select(SocialPost)).scalars().all()
+
+            analytics_q = select(AnalyticsRecord).where(
+                AnalyticsRecord.record_type != "event"
+            )
+            analytics = session.execute(analytics_q).scalars().all()
+
+            records = {
+                "articles": [
+                    {
+                        "article_id": a.article_id,
+                        "title": a.title,
+                        "source": a.source,
+                        "primary_asset": a.primary_asset,
+                        "sentiment_score": a.sentiment_score,
+                        "sentiment_label": a.sentiment_label,
+                        "published_at": a.published_at.isoformat() if a.published_at else None,
+                    }
+                    for a in articles
+                ],
+                "social_posts": [
+                    {
+                        "post_id": p.post_id,
+                        "platform": p.platform,
+                        "primary_asset": p.primary_asset,
+                        "sentiment_score": p.sentiment_score,
+                        "sentiment_label": p.sentiment_label,
+                        "posted_at": p.posted_at.isoformat() if p.posted_at else None,
+                    }
+                    for p in posts
+                ],
+                "analytics_records": [
+                    {
+                        "id": r.id,
+                        "record_type": r.record_type,
+                        "asset": r.asset,
+                        "metric_name": r.metric_name,
+                        "window": r.window,
+                        "value": r.value,
+                        "timestamp": r.timestamp.isoformat() if r.timestamp else None,
+                    }
+                    for r in analytics
+                ],
+            }
+
+        total = len(records["articles"]) + len(records["social_posts"]) + len(records["analytics_records"])
+        data = self._envelope(records, "views")  # type: ignore[arg-type]
+        data["count"] = total
+        path = self._write(data, "views")
+        logger.info("Exported views (%d total rows) → %s", total, path)
+        return ExportResult("views", str(path), total, "completed")
+
+    def export_kpis(self) -> ExportResult:
+        """Export KPIs from AssetTrend rows within the ledger range."""
+        with self.Session() as session:
+            rows = session.execute(select(AssetTrend)).scalars().all()
+            records = [
+                {
+                    "id": r.id,
+                    "asset": r.asset,
+                    "metric_name": r.metric_name,
+                    "window": r.window,
+                    "trend_direction": r.trend_direction,
+                    "score": r.score,
+                    "current_value": r.current_value,
+                    "previous_value": r.previous_value,
+                    "change_percentage": r.change_percentage,
+                    "extra_data": r.extra_data,
+                    "timestamp": r.timestamp.isoformat() if r.timestamp else None,
+                }
+                for r in rows
+            ]
+
+        data = self._envelope(records, "kpis")
+        path = self._write(data, "kpis")
+        logger.info("Exported %d KPIs → %s", len(records), path)
+        return ExportResult("kpis", str(path), len(records), "completed")
+
+    def run(self) -> List[ExportResult]:
+        """Run all three exports and return results."""
+        results = [
+            self.export_events(),
+            self.export_views(),
+            self.export_kpis(),
+        ]
+        return results
diff --git a/temp_backup/src/scheduler.py b/temp_backup/src/scheduler.py
new file mode 100644
index 00000000..1bb768d6
--- /dev/null
+++ b/temp_backup/src/scheduler.py
@@ -0,0 +1,285 @@
+"""
+Job scheduler module - schedules and manages background jobs
+"""
+
+from src.utils.logger import setup_logger
+from src.utils.metrics import JOBS_RUN_TOTAL
+from datetime import datetime
+from apscheduler.schedulers.background import BackgroundScheduler
+from apscheduler.triggers.interval import IntervalTrigger
+from apscheduler.triggers.cron import CronTrigger
+from apscheduler.job import Job
+
+from fetchers import NewsFetcher
+from sentiment import SentimentAnalyzer
+from trends import TrendCalculator
+from database import DatabaseService, AnalyticsRecord
+from anomaly_detector import AnomalyDetector, AnomalyResult
+from alertbot import AlertBot
+from src.ml.retraining_pipeline import run_retraining, get_last_run_status
+from src.ingestion.run_ingestion_quality_checks import main as run_ingestion_quality_checks
+
+
+logger = setup_logger(__name__)
+
+
+class MarketAnalyzer:
+    """Main job that orchestrates the entire analysis pipeline"""
+
+    def __init__(self):
+        self.fetcher = NewsFetcher()
+        self.sentiment_analyzer = SentimentAnalyzer()
+        self.trend_calculator = TrendCalculator()
+        self.db_service = DatabaseService()
+        self.anomaly_detector = AnomalyDetector(window_size_hours=24, z_threshold=2.5)
+        self.alert_bot = AlertBot()
+
+    def run(self):
+        """
+        Execute the full analysis pipeline:
+        1. Fetch News
+        2. Analyze Sentiment
+        3. Calculate Trend
+        4. Save to DB
+        """
+        try:
+            logger.info("=" * 60)
+            logger.info("Starting MarketAnalyzer job")
+            logger.info(f"Timestamp: {datetime.utcnow().isoformat()}")
+
+            # Step 1: Fetch News
+            logger.info("Step 1: Fetching news...")
+            news_items = self.fetcher.fetch_all_news()
+
+            if not news_items:
+                logger.warning("No news items fetched")
+                return
+
+            # Step 2: Analyze Sentiment
+            logger.info(
+                f"Step 2: Analyzing sentiment for {len(news_items)} articles..."
+            )
+            news_texts = [f"{item.title} {item.content}" for item in news_items]
+            sentiment_results = self.sentiment_analyzer.analyze_batch(news_texts)
+            sentiment_summary = self.sentiment_analyzer.get_sentiment_summary(
+                sentiment_results
+            )
+
+            # Step 3: Calculate Trends
+            logger.info("Step 3: Calculating trends...")
+            trends = self.trend_calculator.calculate_all_trends(sentiment_summary)
+            trends_dict = [trend.to_dict() for trend in trends]
+
+            # Step 4: Detect Anomalies
+            logger.info("Step 4: Detecting market anomalies...")
+
+            # Get volume data (mock for demo - in real implementation, fetch actual volume)
+            current_volume = 1000.0  # This would come from Stellar fetcher
+            current_sentiment = sentiment_summary.get("average_compound_score", 0)
+
+            # Detect anomalies
+            anomalies = self.anomaly_detector.detect_anomalies(
+                volume=current_volume, sentiment_score=current_sentiment
+            )
+
+            # Log anomaly results
+            anomaly_alerts = []
+            for anomaly in anomalies:
+                if anomaly.is_anomaly:
+                    logger.warning(
+                        f"🚨 ANOMALY DETECTED: {anomaly.metric_name} "
+                        f"(Severity: {anomaly.severity_score:.2f}, "
+                        f"Z-Score: {anomaly.z_score:.2f})"
+                    )
+                    anomaly_alerts.append(anomaly.to_dict())
+                else:
+                    logger.debug(
+                        f"Normal {anomaly.metric_name} behavior "
+                        f"(Z-Score: {anomaly.z_score:.2f})"
+                    )
+
+            # Step 5: Save to Database
+            logger.info("Step 5: Saving analytics to database...")
+
+            # Enhance record with anomaly data
+            enhanced_sentiment_data = sentiment_summary.copy()
+            enhanced_sentiment_data["anomalies_detected"] = len(
+                [a for a in anomalies if a.is_anomaly]
+            )
+            enhanced_sentiment_data["anomaly_details"] = [
+                a.to_dict() for a in anomalies
+            ]
+
+            # Step 5.5: Check for high sentiment alerts
+            # Determine trend direction from calculated trends
+            trend_direction = "Unknown"
+            if trends:
+                primary_trend = trends[0]
+                trend_direction = getattr(primary_trend, "trend_direction", "Unknown")
+
+            alert_sentiment_data = enhanced_sentiment_data.copy()
+            alert_sentiment_data["trend_direction"] = trend_direction
+            alert_sentiment_data["total_analyzed"] = len(news_items)
+
+            self.alert_bot.check_and_alert(
+                analyzer_score=current_sentiment,
+                sentiment_data=alert_sentiment_data,
+                timestamp=datetime.utcnow(),
+            )
+
+            record = AnalyticsRecord(
+                timestamp=datetime.utcnow(),
+                news_count=len(news_items),
+                sentiment_data=enhanced_sentiment_data,
+                trends=trends_dict,
+            )
+
+            success = self.db_service.save_analytics(record)
+
+            if success:
+                logger.info("✓ Analytics job completed successfully")
+                logger.info(f"  - News items: {len(news_items)}")
+                logger.info(
+                    f"  - Average sentiment: {sentiment_summary.get('average_compound_score', 0):.4f}"
+                )
+                logger.info(
+                    f"  - Positive: {sentiment_summary.get('sentiment_distribution', {}).get('positive', 0):.1%}"
+                )
+                logger.info(
+                    f"  - Negative: {sentiment_summary.get('sentiment_distribution', {}).get('negative', 0):.1%}"
+                )
+                logger.info(f"  - Anomalies detected: {len(anomaly_alerts)}")
+                JOBS_RUN_TOTAL.inc()
+            else:
+                logger.error("✗ Failed to save analytics to database")
+
+            logger.info("=" * 60)
+        except Exception as e:
+            logger.error(f"Error in MarketAnalyzer job: {e}", exc_info=True)
+
+
+def _retraining_job() -> None:
+    """
+    Scheduled retraining job wrapper.
+    Runs the full retraining pipeline and logs the outcome.
+    Errors are caught so a failed retrain never crashes the scheduler.
+    """
+    logger.info("Scheduled model retraining job triggered")
+    try:
+        result = run_retraining()
+        if result.get("status") == "completed":
+            logger.info(
+                f"Scheduled retraining completed in "
+                f"{result.get('duration_seconds', 0):.1f}s — "
+                f"models: {list(result.get('models', {}).keys())}"
+            )
+        else:
+            logger.warning(f"Scheduled retraining ended with status: {result.get('status')}")
+    except Exception as exc:
+        logger.error(f"Scheduled retraining job raised an exception: {exc}", exc_info=True)
+
+
+def _ingestion_quality_checks_job() -> None:
+    """Run Stellar testnet ingestion quality checks.
+
+    Scheduled wrapper. Errors are caught so the scheduler keeps running.
+    """
+    try:
+        run_ingestion_quality_checks(argv=None)
+    except SystemExit:
+        # CLI may call sys.exit; ignore to keep scheduler alive.
+        pass
+    except Exception as e:
+        logger.error(f"Ingestion quality checks failed: {e}", exc_info=True)
+
+
+class AnalyticsScheduler:
+
+    """Manages the APScheduler scheduler for analytics jobs"""
+
+    def __init__(self, pipeline_fn=None):
+        self.scheduler = BackgroundScheduler()
+        self.analyzer = MarketAnalyzer()
+        # Allow injecting a custom pipeline function (used by main.py)
+        self._pipeline_fn = pipeline_fn
+
+    def start(self):
+        """Start the scheduler with all registered jobs."""
+        try:
+            # ── Market Analyzer: every hour ──────────────────────────────
+            run_fn = self._pipeline_fn if self._pipeline_fn else self.analyzer.run
+            market_job = self.scheduler.add_job(
+                func=run_fn,
+                trigger=IntervalTrigger(hours=1),
+                id="market_analyzer_hourly",
+                name="Market Analyzer - Hourly Analytics",
+                replace_existing=True,
+            )
+
+            # ── Stellar ingestion quality checks: every hour ──────────
+            # Low-noise: only fails CI/process when ingestion lag is critical.
+            quality_job = self.scheduler.add_job(
+                func=self._ingestion_quality_checks_job,
+                trigger=IntervalTrigger(hours=1),
+                id="stellar_ingestion_quality_checks_hourly",
+                name="Stellar Ingestion Quality Checks - Hourly",
+                replace_existing=True,
+            )
+
+            # ── Model Retraining: daily at 02:00 UTC ─────────────────────
+            retrain_job = self.scheduler.add_job(
+                func=_retraining_job,
+                trigger=CronTrigger(hour=2, minute=0, timezone="UTC"),
+                id="model_retraining_daily",
+                name="Automated Model Retraining - Daily",
+                replace_existing=True,
+            )
+
+            self.scheduler.start()
+            logger.info("✓ Analytics scheduler started")
+            logger.info(f"  - Job: {market_job.name} | Next: {market_job.next_run_time}")
+            logger.info(f"  - Job: {retrain_job.name} | Next: {retrain_job.next_run_time}")
+        except Exception as e:
+            logger.error(f"Error starting scheduler: {e}")
+            raise
+
+    def run_immediately(self):
+        """Run the analyzer job immediately (useful for testing)"""
+        logger.info("Running MarketAnalyzer immediately...")
+        if self._pipeline_fn:
+            self._pipeline_fn()
+        else:
+            self.analyzer.run()
+
+    def trigger_retraining(self, force: bool = False) -> dict:
+        """Manually trigger a retraining run (e.g. from the API)."""
+        logger.info(f"Manual retraining triggered (force={force})")
+        return run_retraining(force=force)
+
+    def stop(self):
+        """Stop the scheduler"""
+        try:
+            self.scheduler.shutdown(wait=True)
+            logger.info("✓ Analytics scheduler stopped")
+        except Exception as e:
+            logger.error(f"Error stopping scheduler: {e}")
+
+    def get_jobs(self) -> list:
+        """Get list of scheduled jobs"""
+        return self.scheduler.get_jobs()
+
+    def get_job_status(self, job_id: str) -> dict:
+        """Get status of a specific job"""
+        job = self.scheduler.get_job(job_id)
+        if job:
+            return {
+                "id": job.id,
+                "name": job.name,
+                "next_run_time": str(job.next_run_time),
+                "trigger": str(job.trigger),
+            }
+        return None
+
+    def get_retraining_status(self) -> dict:
+        """Return the last retraining run metadata."""
+        return get_last_run_status()
diff --git a/temp_backup/src/security.py b/temp_backup/src/security.py
new file mode 100644
index 00000000..e9ccd55b
--- /dev/null
+++ b/temp_backup/src/security.py
@@ -0,0 +1,215 @@
+"""
+Security middleware for API key authentication and rate limiting.
+"""
+
+import os
+import re
+from typing import Optional, Callable, Dict, Any
+from functools import wraps
+from fastapi import Request, HTTPException, status
+from fastapi.responses import JSONResponse
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.util import get_remote_address
+from slowapi.errors import RateLimitExceeded
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv()
+
+
+class SecurityConfig:
+    """Security configuration manager."""
+    
+    def __init__(self):
+        self.api_key = os.getenv("API_KEY", "")
+        self.rate_limit_enabled = os.getenv("RATE_LIMIT_ENABLED", "true").lower() == "true"
+        self.rate_limit_default = os.getenv("RATE_LIMIT_DEFAULT", "100/minute")
+        self.rate_limit_strict = os.getenv("RATE_LIMIT_STRICT", "10/minute")
+        
+        # Parse rate limit strings
+        self._validate_rate_limit(self.rate_limit_default)
+        self._validate_rate_limit(self.rate_limit_strict)
+    
+    def _validate_rate_limit(self, limit_string: str) -> None:
+        """Validate rate limit string format (e.g., '100/minute')."""
+        pattern = r'^\d+/(second|minute|hour|day)$'
+        if not re.match(pattern, limit_string):
+            raise ValueError(
+                f"Invalid rate limit format: {limit_string}. "
+                "Expected format: 'N/second', 'N/minute', 'N/hour', or 'N/day'"
+            )
+    
+    @property
+    def limiter(self) -> Optional[Limiter]:
+        """Create and configure the rate limiter."""
+        if not self.rate_limit_enabled:
+            return None
+        
+        limiter = Limiter(
+            key_func=get_remote_address,
+            default_limits=[self.rate_limit_default],
+            storage_uri="memory://",  # In-memory storage (use redis:// for production)
+        )
+        return limiter
+    
+    def validate_api_key(self, request: Request) -> bool:
+        """
+        Validate API key from request headers.
+        
+        Args:
+            request: FastAPI request object
+            
+        Returns:
+            True if API key is valid
+            
+        Raises:
+            HTTPException: If API key is missing or invalid
+        """
+        if not self.api_key:
+            raise HTTPException(
+                status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+                detail="API is not configured: API_KEY environment variable is missing.",
+            )
+
+        api_key_header = request.headers.get("X-API-Key")
+        
+        if not api_key_header:
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail="Missing API key. Please provide X-API-Key header.",
+                headers={"WWW-Authenticate": "ApiKey"},
+            )
+        
+        if api_key_header != self.api_key:
+            raise HTTPException(
+                status_code=status.HTTP_403_FORBIDDEN,
+                detail="Invalid API key",
+                headers={"WWW-Authenticate": "ApiKey"},
+            )
+        
+        return True
+    
+    def get_limiter_for_endpoint(self, endpoint_type: str = "default") -> Optional[Limiter]:
+        """
+        Get a limiter configured for a specific endpoint type.
+        
+        Args:
+            endpoint_type: Type of endpoint ('default' or 'strict')
+            
+        Returns:
+            Configured Limiter instance or None if rate limiting is disabled
+        """
+        if not self.rate_limit_enabled:
+            return None
+        
+        limit_string = (
+            self.rate_limit_strict 
+            if endpoint_type == "strict" 
+            else self.rate_limit_default
+        )
+        
+        limiter = Limiter(
+            key_func=get_remote_address,
+            default_limits=[limit_string],
+            storage_uri="memory://",
+        )
+        return limiter
+
+
+# Global security config instance
+security_config = SecurityConfig()
+
+
+def require_api_key(func: Callable) -> Callable:
+    """
+    Decorator to require API key authentication for an endpoint.
+    
+    Usage:
+        @app.get("/protected")
+        @require_api_key
+        async def protected_endpoint(request: Request):
+            ...
+    """
+    @wraps(func)
+    async def wrapper(request: Request, *args, **kwargs) -> Any:
+        security_config.validate_api_key(request)
+        return await func(request, *args, **kwargs)
+    return wrapper
+
+
+def setup_security_middleware(app) -> None:
+    """
+    Setup security middleware for a FastAPI application.
+    
+    Args:
+        app: FastAPI application instance
+    """
+    @app.middleware("http")
+    async def api_key_middleware(request: Request, call_next):
+        """Middleware to check API key for all requests except health/metrics."""
+        # Skip API key check for health checks and metrics
+        excluded_paths = [
+            "/health",
+            "/metrics",
+            "/",
+            "/docs",
+            "/redoc",
+            "/openapi.json",
+            "/sentiment/legend",
+        ]
+        
+        if request.url.path in excluded_paths:
+            return await call_next(request)
+        
+        # Validate API key
+        try:
+            security_config.validate_api_key(request)
+        except HTTPException as exc:
+            return JSONResponse(
+                status_code=exc.status_code,
+                content={"detail": exc.detail},
+                headers=exc.headers,
+            )
+        
+        # Continue processing
+        return await call_next(request)
+
+
+def setup_rate_limiter(app, limiter: Limiter) -> None:
+    """
+    Setup rate limiting for a FastAPI application.
+    
+    Args:
+        app: FastAPI application instance
+        limiter: Slowapi Limiter instance
+    """
+    app.state.limiter = limiter
+    app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+    
+    @app.exception_handler(RateLimitExceeded)
+    async def rate_limit_handler(request: Request, exc: RateLimitExceeded) -> JSONResponse:
+        """Custom rate limit exceeded handler."""
+        return JSONResponse(
+            status_code=status.HTTP_429_TOO_MANY_REQUESTS,
+            content={
+                "detail": "Rate limit exceeded",
+                "message": "Too many requests. Please try again later.",
+                "retry_after": str(exc.detail),
+            },
+        )
+
+
+def get_rate_limit_decorator(limiter: Limiter, limit_string: Optional[str] = None):
+    """
+    Get a rate limit decorator for specific endpoints.
+    
+    Args:
+        limiter: Slowapi Limiter instance
+        limit_string: Optional custom limit (e.g., "10/minute")
+        
+    Returns:
+        Decorator function for rate limiting
+    """
+    if limit_string:
+        return limiter.limit(limit_string)
+    return limiter.limit
diff --git a/temp_backup/src/sentiment.py b/temp_backup/src/sentiment.py
new file mode 100644
index 00000000..f1b490bd
--- /dev/null
+++ b/temp_backup/src/sentiment.py
@@ -0,0 +1,283 @@
+"""
+Sentiment analyzer module - analyzes sentiment of news articles
+"""
+
+import os
+import logging
+from typing import List, Dict, Any, Optional, Tuple
+from concurrent.futures import ProcessPoolExecutor
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+from dataclasses import dataclass
+
+# Import keyword extractor for asset filtering
+from src.analytics.keywords import KeywordExtractor
+
+logger = logging.getLogger(__name__)
+
+# Minimum batch size to justify spawning worker processes.
+_PARALLEL_THRESHOLD = 20
+
+
+def _analyze_in_worker(args: Tuple[str, Optional[str]]) -> dict:
+    """Process-safe sentiment analysis for a single text.
+
+    Each worker initialises its own VADER analyzer and KeywordExtractor
+    because they cannot be pickled across process boundaries.  Redis cache
+    is intentionally skipped in workers to avoid per-process connections.
+    """
+    text, asset_filter = args
+
+    extractor = KeywordExtractor()
+    asset_codes = extractor.extract_tickers_only(text)
+
+    if asset_filter:
+        asset_filter = asset_filter.upper()
+        if asset_filter not in asset_codes:
+            return {
+                "text": text[:100],
+                "compound_score": 0.0,
+                "positive": 0.0,
+                "negative": 0.0,
+                "neutral": 1.0,
+                "sentiment_label": "neutral",
+                "asset_codes": [],
+            }
+
+    analyzer = SentimentIntensityAnalyzer()
+    scores = analyzer.polarity_scores(text)
+    compound = scores["compound"]
+
+    if compound >= 0.05:
+        label = "positive"
+    elif compound <= -0.05:
+        label = "negative"
+    else:
+        label = "neutral"
+
+    return {
+        "text": text[:100],
+        "compound_score": compound,
+        "positive": scores["pos"],
+        "negative": scores["neg"],
+        "neutral": scores["neu"],
+        "sentiment_label": label,
+        "asset_codes": asset_codes,
+    }
+
+
+@dataclass
+class SentimentResult:
+    """Sentiment analysis result"""
+
+    text: str
+    compound_score: float  # -1 to 1
+    positive: float  # 0 to 1
+    negative: float  # 0 to 1
+    neutral: float  # 0 to 1
+    sentiment_label: str  # 'positive', 'negative', 'neutral'
+    asset_codes: List[str] = None  # List of asset codes mentioned in text
+
+    def __post_init__(self):
+        if self.asset_codes is None:
+            self.asset_codes = []
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "text": self.text,
+            "compound_score": self.compound_score,
+            "positive": self.positive,
+            "negative": self.negative,
+            "neutral": self.neutral,
+            "sentiment_label": self.sentiment_label,
+            "asset_codes": self.asset_codes,
+        }
+
+
+class SentimentAnalyzer:
+    """Analyzes sentiment of text using VADER sentiment analysis"""
+
+    def __init__(self):
+        self.analyzer = SentimentIntensityAnalyzer()
+        self.keyword_extractor = KeywordExtractor()
+        self.cache: object | None = None
+        try:
+            from cache_manager import CacheManager
+        except ImportError:
+            logger.info("CacheManager unavailable - sentiment caching disabled")
+        else:
+            try:
+                self.cache = CacheManager(namespace="sentiment")
+            except Exception as e:
+                logger.warning("Redis unavailable - sentiment caching disabled: %s", e)
+            else:
+                logger.info("Sentiment cache ready")
+
+    def analyze(self, text: str, asset_filter: Optional[str] = None) -> SentimentResult:
+        """
+        Analyze sentiment of a single text
+
+        Args:
+            text: Text to analyze
+            asset_filter: Optional asset code to filter results (e.g., 'XLM', 'USDC')
+
+        Returns:
+            SentimentResult object
+        """
+        # Extract asset codes from text
+        asset_codes = self.keyword_extractor.extract_tickers_only(text)
+        
+        # If asset_filter is specified, check if text mentions that asset
+        if asset_filter:
+            asset_filter = asset_filter.upper()
+            if asset_filter not in asset_codes:
+                # Return neutral result if asset not mentioned
+                return SentimentResult(
+                    text=text[:100],
+                    compound_score=0.0,
+                    positive=0.0,
+                    negative=0.0,
+                    neutral=1.0,
+                    sentiment_label="neutral",
+                    asset_codes=[],
+                )
+        
+        cache_key = f"{text}:{asset_filter}" if asset_filter else text
+        if self.cache:
+            cached = self.cache.get(cache_key)
+            if cached:
+                return SentimentResult(**cached)
+
+        scores = self.analyzer.polarity_scores(text)
+        compound = scores["compound"]
+        if compound >= 0.05:
+            label = "positive"
+        elif compound <= -0.05:
+            label = "negative"
+        else:
+            label = "neutral"
+
+        result = SentimentResult(
+            text=text[:100],
+            compound_score=compound,
+            positive=scores["pos"],
+            negative=scores["neg"],
+            neutral=scores["neu"],
+            sentiment_label=label,
+            asset_codes=asset_codes,
+        )
+
+        if self.cache:
+            self.cache.set(cache_key, result.to_dict())
+
+        return result
+
+    def analyze_batch(self, texts: List[str], asset_filter: Optional[str] = None) -> List[SentimentResult]:
+        """
+        Analyze sentiment of multiple texts
+
+        Args:
+            texts: List of texts to analyze
+            asset_filter: Optional asset code to filter results (e.g., 'XLM', 'USDC')
+
+        Returns:
+            List of SentimentResult objects
+        """
+        results = [self.analyze(t, asset_filter) for t in texts]
+        logger.info("Analyzed %d texts for sentiment", len(results))
+        if asset_filter:
+            logger.info("Filtered for asset: %s", asset_filter)
+        return results
+
+    def analyze_batch_parallel(
+        self,
+        texts: List[str],
+        asset_filter: Optional[str] = None,
+        max_workers: Optional[int] = None,
+    ) -> List[SentimentResult]:
+        """Analyze sentiment using ProcessPoolExecutor for large batches.
+
+        Falls back to sequential processing when the batch is smaller than
+        ``_PARALLEL_THRESHOLD`` or when running inside a child process.
+
+        Args:
+            texts: List of texts to analyze.
+            asset_filter: Optional asset code to filter results.
+            max_workers: Max worker processes (defaults to CPU count).
+
+        Returns:
+            List of SentimentResult objects.
+        """
+        if not texts:
+            return []
+
+        # Fall back to sequential for small batches (overhead > benefit).
+        if len(texts) < _PARALLEL_THRESHOLD:
+            return self.analyze_batch(texts, asset_filter)
+
+        if max_workers is None:
+            max_workers = min(os.cpu_count() or 2, 8)
+
+        args = [(text, asset_filter) for text in texts]
+
+        results: List[SentimentResult] = []
+        try:
+            with ProcessPoolExecutor(max_workers=max_workers) as pool:
+                for result_dict in pool.map(_analyze_in_worker, args):
+                    results.append(SentimentResult(**result_dict))
+        except Exception:
+            logger.warning(
+                "ProcessPoolExecutor failed, falling back to sequential",
+                exc_info=True,
+            )
+            return self.analyze_batch(texts, asset_filter)
+
+        logger.info(
+            "Analyzed %d texts in parallel (%d workers)", len(results), max_workers
+        )
+        return results
+
+    def get_sentiment_summary(self, results: List[SentimentResult]) -> Dict[str, Any]:
+        """
+        Get summary statistics from sentiment analysis results
+
+        Args:
+            results: List of SentimentResult objects
+
+        Returns:
+            Summary statistics
+        """
+        if not results:
+            return {
+                "total_items": 0,
+                "average_compound_score": 0,
+                "positive_count": 0,
+                "negative_count": 0,
+                "neutral_count": 0,
+                "sentiment_distribution": {"positive": 0, "negative": 0, "neutral": 0},
+                "asset_distribution": {},
+            }
+
+        positive_count = sum(1 for r in results if r.sentiment_label == "positive")
+        negative_count = sum(1 for r in results if r.sentiment_label == "negative")
+        neutral_count = sum(1 for r in results if r.sentiment_label == "neutral")
+        avg_compound = sum(r.compound_score for r in results) / len(results)
+
+        # Calculate asset distribution
+        asset_distribution = {}
+        for result in results:
+            for asset in result.asset_codes:
+                asset_distribution[asset] = asset_distribution.get(asset, 0) + 1
+
+        return {
+            "total_items": len(results),
+            "average_compound_score": round(avg_compound, 4),
+            "positive_count": positive_count,
+            "negative_count": negative_count,
+            "neutral_count": neutral_count,
+            "sentiment_distribution": {
+                "positive": round(positive_count / len(results), 4),
+                "negative": round(negative_count / len(results), 4),
+                "neutral": round(neutral_count / len(results), 4),
+            },
+            "asset_distribution": asset_distribution,
+        }
diff --git a/temp_backup/src/trends.py b/temp_backup/src/trends.py
new file mode 100644
index 00000000..44dcb736
--- /dev/null
+++ b/temp_backup/src/trends.py
@@ -0,0 +1,153 @@
+"""
+Trend calculator module - calculates market trends from sentiment and data
+"""
+
+import json
+import logging
+from typing import List, Dict, Any
+from datetime import datetime, timezone
+from dataclasses import dataclass
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class Trend:
+    """Market trend information"""
+
+    metric_name: str
+    current_value: float
+    previous_value: float
+    change_percentage: float
+    trend_direction: str  # 'up', 'down', 'stable'
+    timestamp: datetime
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "metric_name": self.metric_name,
+            "current_value": self.current_value,
+            "previous_value": self.previous_value,
+            "change_percentage": self.change_percentage,
+            "trend_direction": self.trend_direction,
+            "timestamp": self.timestamp.isoformat(),
+        }
+
+
+class TrendCalculator:
+    """Calculates trends from sentiment analysis and market data"""
+
+    def __init__(self):
+        self.trend_history: Dict[str, Any] = {}
+        self.cache: object | None = None
+        try:
+            from cache_manager import CacheManager
+        except ImportError:
+            logger.info("CacheManager unavailable - trends caching disabled")
+        else:
+            try:
+                self.cache = CacheManager(namespace="trends")
+            except Exception as e:
+                logger.warning("Redis unavailable - trends caching disabled: %s", e)
+            else:
+                logger.info("Trends cache ready")
+
+    @staticmethod
+    def _summary_cache_key(sentiment_summary: Dict[str, Any]) -> str:
+        """Deterministic key from a sentiment summary dict."""
+        return json.dumps(sentiment_summary, sort_keys=True, default=str)
+
+    def _compute_trend(
+        self,
+        metric_name: str,
+        current_value: float,
+    ) -> Trend:
+        previous_value = self.trend_history.get(metric_name, {}).get(
+            "value", current_value
+        )
+
+        # Calculate change
+        if previous_value != 0:
+            change_pct = ((current_value - previous_value) / abs(previous_value)) * 100
+        else:
+            change_pct = 0.0
+
+        # Determine trend direction
+        if change_pct > 2:
+            direction = "up"
+        elif change_pct < -2:
+            direction = "down"
+        else:
+            direction = "stable"
+
+        # Update trend history
+        self.trend_history[metric_name] = {
+            "value": current_value,
+            "timestamp": datetime.now(timezone.utc),
+        }
+
+        trend = Trend(
+            metric_name=metric_name,
+            current_value=round(current_value, 4),
+            previous_value=round(previous_value, 4),
+            change_percentage=round(change_pct, 2),
+            trend_direction=direction,
+            timestamp=datetime.now(timezone.utc),
+        )
+        logger.info("%s trend: %s (%.2f%%)", metric_name, direction, change_pct)
+        return trend
+
+    def calculate_sentiment_trend(self, sentiment_summary: Dict[str, Any]) -> Trend:
+        current = sentiment_summary.get("average_compound_score", 0)
+        return self._compute_trend("sentiment_score", current)
+
+    def calculate_positive_sentiment_trend(
+        self, sentiment_summary: Dict[str, Any]
+    ) -> Trend:
+        current = sentiment_summary.get("sentiment_distribution", {}).get("positive", 0)
+        return self._compute_trend("positive_sentiment_percentage", current)
+
+    def calculate_negative_sentiment_trend(
+        self, sentiment_summary: Dict[str, Any]
+    ) -> Trend:
+        current = sentiment_summary.get("sentiment_distribution", {}).get("negative", 0)
+        return self._compute_trend("negative_sentiment_percentage", current)
+
+    def calculate_all_trends(self, sentiment_summary: Dict[str, Any]) -> List[Trend]:
+        """
+        Calculate all trends
+
+        Args:
+            sentiment_summary: Summary from SentimentAnalyzer
+
+        Returns:
+            List of Trend objects
+        """
+        cache_key = self._summary_cache_key(sentiment_summary)
+
+        # Check cache for cached results
+        if self.cache:
+            cached = self.cache.get(cache_key)
+            if cached:
+                return [
+                    Trend(
+                        metric_name=t["metric_name"],
+                        current_value=t["current_value"],
+                        previous_value=t["previous_value"],
+                        change_percentage=t["change_percentage"],
+                        trend_direction=t["trend_direction"],
+                        timestamp=datetime.fromisoformat(t["timestamp"]),
+                    )
+                    for t in cached
+                ]
+
+        trends = [
+            self.calculate_sentiment_trend(sentiment_summary),
+            self.calculate_positive_sentiment_trend(sentiment_summary),
+            self.calculate_negative_sentiment_trend(sentiment_summary),
+        ]
+
+        if self.cache:
+            self.cache.set(cache_key, [t.to_dict() for t in trends])
+
+        logger.info("Calculated %d trends", len(trends))
+        return trends
diff --git a/temp_backup/src/utils/__pycache__/logger.cpython-314.pyc b/temp_backup/src/utils/__pycache__/logger.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..89e9cc4259b28de511215182cb9a5155e0813062
GIT binary patch
literal 2989
zcma)8Uu+ab7@ysp+g|Tq|Ip(A+p^`LaG))b29O|XD3xMu%cAGY(an0dZ4bA5=gb^|
z#)LB=DNz%7@S!9oy!b-mn>=V@qOT?<0&UVVU_v4<ywwzgkA5?|_ovv1^Dy(x%s1bB
z^ZUN<w|gKFk05AQzuSJ{b_k(A#H7|>F5CK#FjUbLGL=!p{83?woU2TQQ61GzV}^I4
z^U9>O7n}@+K4fYpHbX~Le}U;q1Z#Q<nJwqhWMZSXm<?%xX4njaM(4jZ4yk4<oR6fC
z+wLkXVn$&W6EhYy;~*!b95a<P%CtQZ2F5cAk6fT7&n?Z&ID{wiB|;q6X2p_c6$@Oa
zmnn1RK&z$TOxqQg<(uzSg30=R5NrVCJYtH80DuV7wxSZi!?3BQ5DS?a)3b=Rm>4b(
z@hvAC7mXH1iY{}={Qgnzf|F;|X!61s9X5&{D;aFoF~CcMIIvsD2e?TGgbe6}*N=iw
zMLD@By2${u`T__;!6d7AbsHuEr<C_H8t-hn8UV2J>=M^z97Zr4%cvYv$DQW7?*tVF
z`uq9#$z!?qsY7UP_|({#ab)P=!Q8MzKVqeFZmcrrc;$*qot#}R(_F!3_A6yl$~%-6
zy_p;(`CNq+U7GVlFi^hC^=XL*-?0fu1O8qdZl+tItD=X-t1HIgyT;)q<LEVgIoxx<
zZOc0Bpj!~$Fb`XnXU{p7#UqwAS1MFoA;&E1<BIJDJ8hOVT_lvbMbGg{u-#%=g;L(K
zNE_JUNz0-Paw2cpjFIAp73NS->aeW5Yg1Y%=2_lxUCV+zi8n36F_9r}5|Pd%T<b(R
zP?5iXFM#{^@z67@hPu`ebflxE#3ODUxxBDa%QYQj5!Urd=&C4-rqrWX5h}o1H#)1#
zDyGs%08`C1f~@5*9Fc8OSb44uYLEo@iO}a*0K-GBbJ1}{v{{9C8O$}x2-hGTd5ee^
z#E<J^vU1?%`gDoR*-Sopmwd87+4j5=U<a6!ey}W_P4_@oMbA*T()p+8zM^YlxSf{6
zuc_a9f9aJQ5{?b2E(i!-ir23K9vf1<{!Y*cS|6Y@6yR1?v+891{WT4Q_6DMlUsg>c
z&}(Vz*QBgcv$PYcI;(nhTMe`~C%Rg}m9<=~W^FTq1Nu`1DGn+PS}fzNHYsxAxt7HZ
zZ*p5|oAsMQ@P=e<wJ~NxjZ<LvMH=d4s&Td^Wwj<7fL2DI?>z_kQ8sJ?vZIoRRB;N%
znR654g06s6N9Nz!v(Kh^R-AJ()Y!96W|c@VIjajBnMdx;0ZG_1pvPP5hsaKFMHCuw
zWc5xL9k)O;F_MOL-U?jhXCx2L+Frp0R^nj_i!zm4fqBaI4CEt^pJT+a=T7@d1d<Bz
z_!;;VjL0+wKISnvSD(rH<*jxhU~3)$F*u#kNiUq2u5sOeHg5^;=dFHWfJI3sRMcL7
zzX2_%61b*Pw)|t{Fzm`I`4)sKdK~XsiTB=%_b#%f`0nb+nvTMWg)0kJs#jLqlGoyo
zaAaZP(~0Y+mhq0&RCje`8FxKSBv%sYdx`W?Vo&w$r#k9*X{CMNz4m>p;ddUzQj4iI
zO-)6fhC&^Bb#yI;La`OR?JnN7xP2L?K|Rs86yH@HS;dhR+;tasUC%D#?o}LJ!O6Qg
zd3|~r_rSHmY-Snvu0}hm@A}2sOjV*p1g`7D@+tqHsx~XsL6K}4=>bzd*ak?j&?go|
z?8$Dipb)_>O(Y~~M|KCrhKQ5k9z_9)f$pnwYhw8NgPT(~rsPWF9iS?kc!PY$HV}Yx
z0!l$m^pZxm6!@8t(St$UB7NxhLmYv$tOfWO>`7O|Hw->s(bOVdiuT;WJ&l`dHwaRb
zH;cEui0lUyKuGlwGC&{v5a9e5B!^(13jXjR=@9_*e6vLkzBjN}Zx|%$%?S{iacF#G
zHj7+e{hy;o9ce^f0~0~?IIdLSUn?Z~AtG{LUhN~7nsGdbh!4m{ejsmvUX%L|qlqur
zm-C;^fBwl*wEGV3mNqhRpBAF|lE<7&?4nI5c^yvj_VQ&mTk;zJem(<4PWp;jn-KAx
z=DPoH=2t%+9UqyHF(x94(ue0A4j=b_GER_FpdtcE&qBARDT?v{B_5#82PpbG+6#re
zwQnVP;9l~;otKaQJiDBnxPv;M>LI0TaqrE68v{=fh->YrcmMY-KgE8C$?xS~$01pt
K=@2FPzJCBtIcVGf

literal 0
HcmV?d00001

diff --git a/temp_backup/src/utils/http_client.py b/temp_backup/src/utils/http_client.py
new file mode 100644
index 00000000..2d91e074
--- /dev/null
+++ b/temp_backup/src/utils/http_client.py
@@ -0,0 +1,138 @@
+import logging
+import random
+import time
+from typing import Optional
+
+import requests
+
+logger = logging.getLogger("RobustHTTPClient")
+
+
+class CircuitBreakerOpenException(Exception):
+    """Raised when the circuit breaker is open and fast-failing requests."""
+
+    pass
+
+
+class RobustHTTPClient(requests.Session):
+    """
+    A robust HTTP client extending requests.Session.
+    Features:
+    - Exponential backoff with jitter for transient errors (500, 502, 503, 504, 429) and connection issues.
+    - Circuit Breaker pattern to protect downstream services and fail fast.
+    """
+
+    def __init__(
+        self,
+        max_retries: int = 4,
+        backoff_factor: float = 1.5,
+        status_forcelist: Optional[set[int]] = None,
+        failure_threshold: int = 5,
+        recovery_timeout: float = 30.0,
+    ):
+        super().__init__()
+        self.max_retries = max_retries
+        self.backoff_factor = backoff_factor
+        self.status_forcelist = status_forcelist or {429, 500, 502, 503, 504}
+
+        # Circuit Breaker state
+        self.failure_threshold = failure_threshold
+        self.recovery_timeout = recovery_timeout
+        self.failure_count = 0
+        self.state = "CLOSED"  # CLOSED, OPEN, HALF-OPEN
+        self.last_state_change = time.time()
+        self._circuit_opened_at = 0.0
+
+    def _check_circuit(self) -> None:
+        """Check and update circuit breaker state."""
+        if self.state == "OPEN":
+            elapsed = time.time() - self._circuit_opened_at
+            if elapsed > self.recovery_timeout:
+                self.state = "HALF-OPEN"
+                self.last_state_change = time.time()
+                logger.warning(
+                    "Circuit breaker transitioning to HALF-OPEN. Allowing trial request."
+                )
+            else:
+                raise CircuitBreakerOpenException(
+                    f"Circuit breaker is OPEN. Fast-failing request. Time remaining: {self.recovery_timeout - elapsed:.1f}s"
+                )
+
+    def _record_success(self) -> None:
+        """Record a successful request and reset breaker state if needed."""
+        if self.state == "HALF-OPEN":
+            logger.info(
+                "Trial request succeeded. Circuit breaker transitioning to CLOSED."
+            )
+        self.failure_count = 0
+        self.state = "CLOSED"
+        self.last_state_change = time.time()
+
+    def _record_failure(self) -> None:
+        """Record a failed request and trip breaker if threshold exceeded."""
+        self.failure_count += 1
+        if self.state == "HALF-OPEN" or self.failure_count >= self.failure_threshold:
+            self.state = "OPEN"
+            self._circuit_opened_at = time.time()
+            self.last_state_change = time.time()
+            logger.error(
+                f"Circuit breaker tripped to OPEN. Failure count: {self.failure_count}. "
+                f"Will reject requests for next {self.recovery_timeout} seconds."
+            )
+
+    def request(self, method: str, url: str, **kwargs) -> requests.Response:
+        """
+        Sends an HTTP request with retry logic and circuit breaker protection.
+        """
+        self._check_circuit()
+
+        # Respect any custom timeout or set a default of 10s
+        if "timeout" not in kwargs:
+            kwargs["timeout"] = 10.0
+
+        retries = 0
+        while True:
+            try:
+                response = super().request(method, url, **kwargs)
+
+                # Check if the status code is a transient error that warrants a retry
+                if response.status_code in self.status_forcelist:
+                    raise requests.exceptions.HTTPError(
+                        f"Transient status {response.status_code}", response=response
+                    )
+
+                # If we get here, it's a successful response (or non-retryable error like 400/404)
+                self._record_success()
+                return response
+
+            except (
+                requests.exceptions.ConnectionError,
+                requests.exceptions.Timeout,
+                requests.exceptions.HTTPError,
+            ) as e:
+
+                # Check if the error response is non-retryable (e.g. 400 Bad Request)
+                if isinstance(e, requests.exceptions.HTTPError):
+                    status_code = e.response.status_code
+                    if status_code not in self.status_forcelist:
+                        # Non-retryable HTTP error; record success (meaning server responded normally) and raise
+                        self._record_success()
+                        raise e
+
+                retries += 1
+                if retries > self.max_retries:
+                    logger.error(
+                        f"Max retries ({self.max_retries}) exceeded for {url}. Failure details: {str(e)}"
+                    )
+                    self._record_failure()
+                    raise e
+
+                # Calculate exponential backoff with jitter
+                sleep_time = self.backoff_factor * (2 ** (retries - 1))
+                sleep_time += random.uniform(0, 0.5)  # Add jitter
+
+                logger.warning(
+                    f"Request to {url} failed: {str(e)}. "
+                    f"Retrying in {sleep_time:.2f}s... (Attempt {retries}/{self.max_retries})"
+                )
+                time.sleep(sleep_time)
diff --git a/temp_backup/src/utils/logger.py b/temp_backup/src/utils/logger.py
new file mode 100644
index 00000000..1b6f0f8a
--- /dev/null
+++ b/temp_backup/src/utils/logger.py
@@ -0,0 +1,52 @@
+import logging
+import contextvars
+import uuid
+from pythonjsonlogger import jsonlogger
+
+# Context variable for correlation ID
+correlation_id_ctx = contextvars.ContextVar("correlation_id", default="system")
+
+
+class CorrelationIdFilter(logging.Filter):
+    """Injects correlation ID into the log record"""
+
+    def filter(self, record):
+        record.correlation_id = correlation_id_ctx.get()
+        return True
+
+
+def setup_logger(name: str = "lumenpulse", level: int = logging.INFO) -> logging.Logger:
+    """Setup a structured JSON logger"""
+    logger = logging.getLogger(name)
+
+    # Avoid duplicate handlers if setup_logger is called multiple times
+    if logger.handlers:
+        return logger
+
+    logger.setLevel(level)
+    logger.propagate = False
+
+    handler = logging.StreamHandler()
+
+    # Use python-json-logger for JSON formatting
+    formatter = jsonlogger.JsonFormatter(
+        "%(asctime)s %(levelname)s %(name)s %(correlation_id)s %(message)s",
+        rename_fields={
+            "levelname": "level"
+        }
+    )
+    handler.setFormatter(formatter)
+    
+    # Add filter to inject correlation ID
+    filter = CorrelationIdFilter()
+    logger.addFilter(filter)
+    handler.addFilter(filter)
+
+    logger.addHandler(handler)
+    return logger
+
+def get_logger(name: str) -> logging.Logger:
+    return setup_logger(name)
+
+def generate_correlation_id() -> str:
+    return str(uuid.uuid4())
diff --git a/temp_backup/src/utils/metrics.py b/temp_backup/src/utils/metrics.py
new file mode 100644
index 00000000..9f36b944
--- /dev/null
+++ b/temp_backup/src/utils/metrics.py
@@ -0,0 +1,42 @@
+from prometheus_client import Counter, Histogram, generate_latest, CONTENT_TYPE_LATEST
+from prometheus_client import start_http_server
+
+# Define simple Prometheus counters
+JOBS_RUN_TOTAL = Counter(
+    "jobs_run", 
+    "Total number of jobs run in the pipeline"
+)
+
+API_FAILURES_TOTAL = Counter(
+    "api_failures", 
+    "Total number of API request failures",
+    ["method", "endpoint"]
+)
+
+ANOMALIES_DETECTED_TOTAL = Counter(
+    "anomalies_detected", 
+    "Total number of anomalies detected",
+    ["metric_name"]
+)
+
+MODEL_RETRAINING_TOTAL = Counter(
+    "model_retraining_total",
+    "Total number of model retraining runs",
+    ["model_type", "status"],  # status: success | failed | skipped
+)
+
+MODEL_RETRAINING_DURATION = Histogram(
+    "model_retraining_duration_seconds",
+    "Duration of model retraining runs in seconds",
+    ["model_type"],
+    buckets=[1, 5, 10, 30, 60, 120, 300, 600],
+)
+
+def start_metrics_server(port: int = 9090):
+    """Start standalone prometheus metrics server (for background workers)"""
+    try:
+        start_http_server(port)
+    except Exception as e:
+        # Ignore if server is already running
+        import logging
+        logging.getLogger(__name__).warning("Metrics server could not start: %s", e)
diff --git a/temp_backup/src/utils/translator.py b/temp_backup/src/utils/translator.py
new file mode 100644
index 00000000..928fc1dc
--- /dev/null
+++ b/temp_backup/src/utils/translator.py
@@ -0,0 +1,88 @@
+import logging
+import unicodedata
+from src.utils.http_client import RobustHTTPClient
+
+from langdetect import detect
+
+logger = logging.getLogger(__name__)
+
+_client = RobustHTTPClient()
+
+
+def normalize_text(text: str) -> str:
+    """
+    Applies NFKD unicode normalization, normalizes spacing, and strips text.
+    Keeps casing intact as it is valuable for sentiment analysis.
+    """
+    if not text:
+        return ""
+
+    # NFKD normalization decomposes characters (e.g. accented characters)
+    normalized = unicodedata.normalize("NFKD", text)
+
+    # Clean up whitespace and join
+    lines = normalized.splitlines()
+    cleaned_lines = []
+    for line in lines:
+        cleaned_words = " ".join(line.split())
+        if cleaned_words:
+            cleaned_lines.append(cleaned_words)
+
+    return "\n".join(cleaned_lines).strip()
+
+
+def translate_to_english(text: str, source_lang: str = "auto") -> str:
+    """
+    Translates non-English text to English using Google's public translation endpoint.
+    If the translation fails or times out, falls back to the original text.
+    """
+    if not text or not text.strip():
+        return text
+
+    url = "https://translate.googleapis.com/translate_a/single"
+    params = {"client": "gtx", "sl": source_lang, "tl": "en", "dt": "t", "q": text}
+
+    try:
+        response = _client.get(url, params=params, timeout=5)
+        response.raise_for_status()
+        data = response.json()
+
+        # Parse the translation chunks returned by Google Translate
+        if data and len(data) > 0 and data[0]:
+            translated_chunks = []
+            for chunk in data[0]:
+                if chunk and len(chunk) > 0 and chunk[0]:
+                    translated_chunks.append(chunk[0])
+            if translated_chunks:
+                return "".join(translated_chunks)
+
+    except Exception as e:
+        logger.warning(f"Translation failed, falling back to original text. Error: {e}")
+
+    return text
+
+
+def translate_and_normalize(text: str) -> str:
+    """
+    Detects the language of the text. If it is not English, normalizes and
+    translates it to English. If it is English, just normalizes it.
+    """
+    if not text or not text.strip():
+        return ""
+
+    # 1. Normalize first (helpful for language detection)
+    normalized = normalize_text(text)
+
+    # 2. Detect language
+    try:
+        lang = detect(normalized)
+    except Exception:
+        # Default to English if detection fails (e.g. no letters)
+        lang = "en"
+
+    # 3. Translate if not English
+    if lang != "en":
+        logger.info(f"Detected language '{lang}'. Translating to English.")
+        return translate_to_english(normalized, source_lang=lang)
+
+    return normalized
diff --git a/temp_backup/src/validators.py b/temp_backup/src/validators.py
new file mode 100644
index 00000000..bfa7cd8d
--- /dev/null
+++ b/temp_backup/src/validators.py
@@ -0,0 +1,58 @@
+"""
+validators.py
+
+Provides data validation and sanitization for ingested records using Pydantic models.
+Schemas:
+- NewsArticle
+- OnChainMetric
+
+Invalid records are logged and handled safely.
+"""
+from typing import Optional, Any
+from pydantic import BaseModel, ValidationError, validator
+import logging
+
+logger = logging.getLogger("data_validation")
+
+class NewsArticle(BaseModel):
+    id: str
+    title: str
+    content: str
+    published_at: str  # ISO8601 string
+    source: Optional[str]
+    url: Optional[str]
+
+    @validator("published_at")
+    def validate_published_at(cls, v):
+        # Optionally, add stricter ISO8601 validation here
+        if not v or not isinstance(v, str):
+            raise ValueError("published_at must be a non-empty string")
+        return v
+
+
+class OnChainMetric(BaseModel):
+    metric_id: str
+    value: float
+    timestamp: str  # ISO8601 string
+    chain: str
+    extra: Optional[Any] = None
+
+    @validator("timestamp")
+    def validate_timestamp(cls, v):
+        if not v or not isinstance(v, str):
+            raise ValueError("timestamp must be a non-empty string")
+        return v
+
+def validate_news_article(data: dict) -> Optional[NewsArticle]:
+    try:
+        return NewsArticle(**data)
+    except ValidationError as e:
+        logger.warning(f"Invalid NewsArticle: {e.errors()}")
+        return None
+
+def validate_onchain_metric(data: dict) -> Optional[OnChainMetric]:
+    try:
+        return OnChainMetric(**data)
+    except ValidationError as e:
+        logger.warning(f"Invalid OnChainMetric: {e.errors()}")
+        return None