diff --git a/financial_loss_functions/config/hparams.json b/financial_loss_functions/config/hparams.json
index e03b678e..a63df1c0 100644
--- a/financial_loss_functions/config/hparams.json
+++ b/financial_loss_functions/config/hparams.json
@@ -18,7 +18,10 @@
             "train_batch_size": 256,
             "val_batch_size": 1,
             "clip_grad_norm": 0.5,
-            "epochs": 100
+            "epochs": 300,
+            "early_stopping_patience": 15,
+            "lr_scheduler_patience": 7,
+            "lr_scheduler_factor": 0.5
         }
     },
     "AttentionLSTM": {
@@ -35,7 +38,59 @@
             "train_batch_size": 256,
             "val_batch_size": 1,
             "clip_grad_norm": 0.5,
-            "epochs": 100
+            "epochs": 300,
+            "early_stopping_patience": 15,
+            "lr_scheduler_patience": 7,
+            "lr_scheduler_factor": 0.5
         }
+    },
+    "DeformTime": {
+        "model": {
+            "max_seq_len": 200,
+            "e_layers": 3,
+            "d_layers": 2,
+            "d_model": 64,
+            "nheads": 4,
+            "kernel_size": 3,
+            "dropout": 0.2,
+            "n_reshape": 2,
+            "patch_len": 16,
+            "stride": 8
+        },
+        "optimizer": {
+            "lr": 1e-4,
+            "weight_decay": 3e-4
+        },
+        "train": {
+            "train_batch_size": 256,
+            "val_batch_size": 1,
+            "clip_grad_norm": 0.5,
+            "epochs": 300,
+            "early_stopping_patience": 15,
+            "lr_scheduler_patience": 7,
+            "lr_scheduler_factor": 0.5
+        }
+    },
+    "CompositeSRLoss": {
+        "alpha": 0.03,
+        "beta": 0.02,
+        "gamma": 0.05,
+        "delta": 0.01,
+        "psych_sigma": 1.5,
+        "psych_thresholds": [0.0, 2.0, -2.0, 5.0, -5.0, 8.0, -8.0, 12.0, -12.0],
+        "ema_span": 10,
+        "sr_use_multi_timeframe": true,
+        "sr_lookback_windows": [5, 10, 21, 42, 63, 105],
+        "sr_pivot_threshold": 0.02,
+        "sr_importance_hidden": 8,
+        "use_macro_override": true,
+        "macro_override_hidden": 8
+    },
+    "CVaRBenchmark": {
+        "confidence": 0.95,
+        "risk_aversion": 1.0,
+        "w_min": 0.0,
+        "w_max": 0.30,
+        "L_tar": 1.6
     }
 }
\ No newline at end of file
diff --git a/financial_loss_functions/config/paths.json b/financial_loss_functions/config/paths.json
index 2cfe918e..fce01ca3 100644
--- a/financial_loss_functions/config/paths.json
+++ b/financial_loss_functions/config/paths.json
@@ -4,6 +4,7 @@
         "raw_dir": "data/raw/",
         "processed_dir": "data/processed/",
         "raw_macro_dir": "data/raw/macro/",
+        "sec_filings_dir": "data/raw/sec_filings/",
         "crsp_dir": ""
     },
     "raw_files": {
@@ -28,6 +29,7 @@
     "artifacts": {
         "artifact_dir": "artifacts/",
         "results": "artifacts/results/",
-        "plots": "artifacts/results/plots/"
+        "plots": "artifacts/results/plots/",
+        "pyfolio_output": "artifacts/results/plots/pyfolio/"
     }
 }
\ No newline at end of file
diff --git a/financial_loss_functions/exploration/feature_selection_findings.ipynb b/financial_loss_functions/exploration/feature_selection_findings.ipynb
index 7457506b..cb02d0d1 100644
--- a/financial_loss_functions/exploration/feature_selection_findings.ipynb
+++ b/financial_loss_functions/exploration/feature_selection_findings.ipynb
@@ -2650,7 +2650,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "id": "757fab41",
    "metadata": {
     "execution": {
@@ -2725,7 +2725,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "55af6918",
    "metadata": {
     "execution": {
@@ -2790,7 +2790,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "id": "de47886a",
    "metadata": {
     "execution": {
@@ -2850,7 +2850,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "id": "c5ed9f54",
    "metadata": {
     "execution": {
@@ -2916,7 +2916,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "id": "b46ff82a",
    "metadata": {
     "execution": {
@@ -3029,7 +3029,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "id": "a94d8a19",
    "metadata": {
     "execution": {
@@ -3098,7 +3098,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "id": "610307a0",
    "metadata": {
     "execution": {
@@ -3197,7 +3197,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "id": "968d3180",
    "metadata": {
     "execution": {
@@ -3312,7 +3312,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.13.9"
+   "version": "3.13.5"
   }
  },
  "nbformat": 4,
diff --git a/financial_loss_functions/requirements.txt b/financial_loss_functions/requirements.txt
index bba07a16..751867d1 100644
--- a/financial_loss_functions/requirements.txt
+++ b/financial_loss_functions/requirements.txt
@@ -3,6 +3,7 @@ fredapi==0.5.2
 numpy==2.3.4
 pandas==2.3.3
 cvxopt==1.3.2
+cvxpy
 scikit-learn==1.7.2
 scipy==1.16.3
 pytest==8.4.2
@@ -12,4 +13,7 @@ seaborn==0.13.0
 statsmodels==0.14.5
 torch==2.9.1
 torchvision==0.24.1
-optuna==4.6.0
\ No newline at end of file
+optuna==4.6.0
+edgartools
+pyfolio-reloaded
+empyrical-reloaded
\ No newline at end of file
diff --git a/financial_loss_functions/scripts/run_sec_collection.py b/financial_loss_functions/scripts/run_sec_collection.py
new file mode 100644
index 00000000..2336e3e0
--- /dev/null
+++ b/financial_loss_functions/scripts/run_sec_collection.py
@@ -0,0 +1,75 @@
+"""
+CLI entry point for SEC filing data collection.
+
+Usage:
+    python -m scripts.run_sec_collection [--identity "Name email@domain.com"]
+"""
+
+import argparse
+import json
+import logging
+from pathlib import Path
+
+import pandas as pd
+
+from scripts.utils import load_config
+from src.data_collection.sec_filings import run_sec_filing_pipeline
+
+logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
+logger = logging.getLogger(__name__)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Fetch SEC filings and compute fundamental features")
+    parser.add_argument(
+        "--identity",
+        default="FinLossFunctions research@example.com",
+        help="SEC EDGAR identity string (Name + email)",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default=None,
+        help="Override output directory for cached Parquet files",
+    )
+    args = parser.parse_args()
+
+    paths_config = load_config("paths")
+
+    sector_path = Path("config/sector_classification.json")
+    with open(sector_path, "r") as f:
+        sector_map = json.load(f)
+    tickers = sorted(sector_map.keys())
+    logger.info("Found %d tickers from sector classification", len(tickers))
+
+    crsp_dir = Path(paths_config["data"]["crsp_dir"])
+    if not crsp_dir.exists():
+        crsp_dir = Path(paths_config["data"]["raw_dir"]) / "sample"
+
+    train_file = crsp_dir / paths_config["raw_files"]["train"]
+    if not train_file.exists():
+        logger.error("Training data not found at %s", train_file)
+        return
+
+    train_df = pd.read_csv(train_file)
+    if "date" in train_df.columns:
+        train_df["date"] = pd.to_datetime(train_df["date"])
+        train_df = train_df.set_index("date")
+
+    cache_dir = Path(args.output_dir) if args.output_dir else Path(
+        paths_config.get("data", {}).get("sec_filings_dir", "data/raw/sec_filings")
+    )
+
+    scores = run_sec_filing_pipeline(
+        tickers=tickers,
+        target_index=train_df.index,
+        cache_dir=cache_dir,
+        identity=args.identity,
+    )
+
+    output_path = cache_dir / "composite_fundamental_scores.csv"
+    scores.to_csv(output_path)
+    logger.info("Composite scores saved to %s", output_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/financial_loss_functions/scripts/run_training.py b/financial_loss_functions/scripts/run_training.py
index 9afa375e..be14c73c 100644
--- a/financial_loss_functions/scripts/run_training.py
+++ b/financial_loss_functions/scripts/run_training.py
@@ -1,13 +1,23 @@
+import argparse
 import os
 # from dotenv import load_dotenv
 from scripts.utils import load_path_config, load_config
-from src.training.pipeline import run_training_pipeline
+from src.training.pipeline import ALL_MODELS, run_training_pipeline
 
 if __name__ == '__main__':
     # load_dotenv()
 
-    paths_config = load_path_config(os.path.join('config', 'paths.json'))
+    parser = argparse.ArgumentParser(description="Run training pipeline")
+    parser.add_argument(
+        '--models',
+        nargs='+',
+        choices=list(ALL_MODELS.keys()),
+        default=None,
+        help=f"Models to train. Choices: {list(ALL_MODELS.keys())}. Default: all",
+    )
+    args = parser.parse_args()
 
+    paths_config = load_path_config(os.path.join('config', 'paths.json'))
     hparams_config = load_config(os.path.join('config', 'hparams.json'))
 
-    run_training_pipeline(paths_config, hparams_config)
\ No newline at end of file
+    run_training_pipeline(paths_config, hparams_config, models=args.models)
\ No newline at end of file
diff --git a/financial_loss_functions/src/data_collection/sec_filings.py b/financial_loss_functions/src/data_collection/sec_filings.py
new file mode 100644
index 00000000..e55e4d50
--- /dev/null
+++ b/financial_loss_functions/src/data_collection/sec_filings.py
@@ -0,0 +1,475 @@
+"""
+SEC filing data pipeline using edgartools.
+
+Fetches 10-K, 10-Q, and 8-K filings for a set of tickers, extracts
+XBRL financial statements, computes fundamental features, aligns them
+to a daily business-day grid, and caches results as Parquet.
+"""
+
+import json
+import logging
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+FUNDAMENTAL_WEIGHTS = {
+    "revenue_growth": 0.30,
+    "operating_margin": 0.25,
+    "debt_to_equity": -0.20,
+    "fcf_yield": 0.15,
+    "event_signal": 0.10,
+}
+
+_EVENT_WINDOW_DAYS = 2
+
+
+# ---------------------------------------------------------------------------
+# Filing retrieval helpers
+# ---------------------------------------------------------------------------
+
+def _init_edgar(identity: str = "FinLossFunctions research@example.com"):
+    """Lazy-import edgartools and set SEC identity."""
+    try:
+        import edgar
+    except ImportError as exc:
+        raise ImportError(
+            "edgartools is required for the SEC filing pipeline. "
+            "Install with: pip install edgartools"
+        ) from exc
+    edgar.set_identity(identity)
+    return edgar
+
+
+def fetch_filings_for_ticker(
+    ticker: str,
+    forms: Tuple[str, ...] = ("10-K", "10-Q", "8-K"),
+    *,
+    edgar_module=None,
+) -> Dict[str, list]:
+    """
+    Retrieve filings for *ticker* grouped by form type.
+
+    Returns dict mapping form string to a list of Filing objects.
+    """
+    if edgar_module is None:
+        edgar_module = _init_edgar()
+
+    company = edgar_module.Company(ticker)
+    result: Dict[str, list] = {}
+    for form in forms:
+        try:
+            filings = company.get_filings(form=form)
+            result[form] = list(filings) if filings is not None else []
+        except Exception as exc:
+            logger.warning("Could not fetch %s for %s: %s", form, ticker, exc)
+            result[form] = []
+    return result
+
+
+# ---------------------------------------------------------------------------
+# XBRL extraction helpers
+# ---------------------------------------------------------------------------
+
+def _safe_float(value) -> Optional[float]:
+    """Convert a value to float, returning None on failure."""
+    if value is None:
+        return None
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return None
+
+
+def _extract_statement_value(statement, *concept_labels) -> Optional[float]:
+    """
+    Try to pull a numeric value from an XBRL statement by trying
+    multiple concept label variants.
+    """
+    if statement is None:
+        return None
+    try:
+        df = statement.to_dataframe() if hasattr(statement, "to_dataframe") else None
+    except Exception:
+        return None
+    if df is None or df.empty:
+        return None
+
+    for label in concept_labels:
+        for col in df.columns:
+            if label.lower() in str(col).lower():
+                vals = df[col].dropna()
+                if not vals.empty:
+                    return _safe_float(vals.iloc[0])
+    return None
+
+
+def extract_financials_from_filing(filing) -> Dict[str, Optional[float]]:
+    """
+    Extract key financial metrics from a single 10-K or 10-Q filing
+    via its XBRL data.
+
+    Returns a dict with keys:
+        revenue, operating_income, net_income,
+        total_assets, total_liabilities, stockholders_equity,
+        current_assets, current_liabilities,
+        operating_cashflow, capex
+    """
+    metrics: Dict[str, Optional[float]] = {
+        "revenue": None,
+        "operating_income": None,
+        "net_income": None,
+        "total_assets": None,
+        "total_liabilities": None,
+        "stockholders_equity": None,
+        "current_assets": None,
+        "current_liabilities": None,
+        "operating_cashflow": None,
+        "capex": None,
+    }
+
+    try:
+        obj = filing.obj()
+    except Exception:
+        return metrics
+
+    financials = getattr(obj, "financials", None)
+    if financials is None:
+        try:
+            from edgar.financials import Financials
+            financials = Financials.extract(filing)
+        except Exception:
+            return metrics
+
+    if financials is None:
+        return metrics
+
+    # Income statement
+    try:
+        inc = financials.income_statement()
+        metrics["revenue"] = _extract_statement_value(
+            inc, "Revenue", "Revenues", "Net Revenue", "SalesRevenueNet",
+            "RevenueFromContractWithCustomer",
+        )
+        metrics["operating_income"] = _extract_statement_value(
+            inc, "OperatingIncome", "Operating Income",
+        )
+        metrics["net_income"] = _extract_statement_value(
+            inc, "NetIncome", "Net Income",
+        )
+    except Exception:
+        pass
+
+    # Balance sheet
+    try:
+        bs = financials.balance_sheet()
+        metrics["total_assets"] = _extract_statement_value(
+            bs, "Assets", "TotalAssets",
+        )
+        metrics["total_liabilities"] = _extract_statement_value(
+            bs, "Liabilities", "TotalLiabilities",
+        )
+        metrics["stockholders_equity"] = _extract_statement_value(
+            bs, "StockholdersEquity", "Equity",
+        )
+        metrics["current_assets"] = _extract_statement_value(
+            bs, "CurrentAssets", "AssetsCurrent",
+        )
+        metrics["current_liabilities"] = _extract_statement_value(
+            bs, "CurrentLiabilities", "LiabilitiesCurrent",
+        )
+    except Exception:
+        pass
+
+    # Cash flow statement
+    try:
+        cf = financials.cashflow_statement()
+        metrics["operating_cashflow"] = _extract_statement_value(
+            cf, "OperatingCashFlow", "NetCashFromOperating",
+            "CashFromOperatingActivities",
+        )
+        metrics["capex"] = _extract_statement_value(
+            cf, "CapitalExpenditure", "Capex",
+            "PaymentsToAcquirePropertyPlantAndEquipment",
+        )
+    except Exception:
+        pass
+
+    return metrics
+
+
+# ---------------------------------------------------------------------------
+# Feature computation
+# ---------------------------------------------------------------------------
+
+def _compute_ticker_fundamentals(
+    filings_10k: list,
+    filings_10q: list,
+) -> pd.DataFrame:
+    """
+    Build a time-indexed DataFrame of fundamental features from 10-K
+    and 10-Q filings for a single ticker.
+
+    Columns: revenue_growth, operating_margin, debt_to_equity,
+             current_ratio, fcf_yield
+    """
+    records = []
+    all_filings = []
+    for f in filings_10k:
+        all_filings.append(("10-K", f))
+    for f in filings_10q:
+        all_filings.append(("10-Q", f))
+
+    for form_type, filing in all_filings:
+        try:
+            filing_date = pd.Timestamp(filing.filing_date)
+        except Exception:
+            continue
+
+        data = extract_financials_from_filing(filing)
+        records.append({"date": filing_date, "form": form_type, **data})
+
+    if not records:
+        return pd.DataFrame()
+
+    df = pd.DataFrame(records).sort_values("date").drop_duplicates(
+        subset=["date"], keep="last"
+    )
+    df = df.set_index("date")
+
+    # Revenue growth (QoQ)
+    if "revenue" in df.columns:
+        df["revenue_growth"] = df["revenue"].pct_change()
+    else:
+        df["revenue_growth"] = np.nan
+
+    # Operating margin
+    rev = df.get("revenue")
+    op_inc = df.get("operating_income")
+    if rev is not None and op_inc is not None:
+        df["operating_margin"] = np.where(
+            (rev != 0) & rev.notna() & op_inc.notna(),
+            op_inc / rev,
+            np.nan,
+        )
+    else:
+        df["operating_margin"] = np.nan
+
+    # Debt-to-equity
+    liab = df.get("total_liabilities")
+    equity = df.get("stockholders_equity")
+    if liab is not None and equity is not None:
+        df["debt_to_equity"] = np.where(
+            (equity != 0) & equity.notna() & liab.notna(),
+            liab / equity,
+            np.nan,
+        )
+    else:
+        df["debt_to_equity"] = np.nan
+
+    # Current ratio
+    ca = df.get("current_assets")
+    cl = df.get("current_liabilities")
+    if ca is not None and cl is not None:
+        df["current_ratio"] = np.where(
+            (cl != 0) & cl.notna() & ca.notna(),
+            ca / cl,
+            np.nan,
+        )
+    else:
+        df["current_ratio"] = np.nan
+
+    # FCF yield (proxy: (operating CF - capex) / total_assets as stand-in)
+    ocf = df.get("operating_cashflow")
+    capex = df.get("capex")
+    ta = df.get("total_assets")
+    if ocf is not None and ta is not None:
+        capex_vals = capex.fillna(0) if capex is not None else 0
+        df["fcf_yield"] = np.where(
+            (ta != 0) & ta.notna() & ocf.notna(),
+            (ocf - capex_vals.abs()) / ta,
+            np.nan,
+        )
+    else:
+        df["fcf_yield"] = np.nan
+
+    feature_cols = [
+        "revenue_growth",
+        "operating_margin",
+        "debt_to_equity",
+        "current_ratio",
+        "fcf_yield",
+    ]
+    return df[feature_cols]
+
+
+def _compute_event_signal(
+    filings_8k: list,
+    target_index: pd.DatetimeIndex,
+    window_days: int = _EVENT_WINDOW_DAYS,
+) -> pd.Series:
+    """
+    Create a binary event indicator that is 1 within +/- *window_days*
+    of any 8-K filing date.
+    """
+    event_dates = set()
+    for f in filings_8k:
+        try:
+            event_dates.add(pd.Timestamp(f.filing_date))
+        except Exception:
+            continue
+
+    signal = pd.Series(0.0, index=target_index, name="event_signal")
+    for ed in event_dates:
+        mask = (target_index >= ed - pd.Timedelta(days=window_days)) & (
+            target_index <= ed + pd.Timedelta(days=window_days)
+        )
+        signal.loc[mask] = 1.0
+    return signal
+
+
+# ---------------------------------------------------------------------------
+# Full pipeline
+# ---------------------------------------------------------------------------
+
+def run_sec_pipeline_for_ticker(
+    ticker: str,
+    target_index: pd.DatetimeIndex,
+    cache_dir: Optional[Path] = None,
+    edgar_module=None,
+) -> pd.DataFrame:
+    """
+    End-to-end: fetch filings, extract features, align to daily grid.
+
+    Returns DataFrame indexed by *target_index* with columns:
+        revenue_growth, operating_margin, debt_to_equity,
+        current_ratio, fcf_yield, event_signal
+    """
+    cache_path = None
+    if cache_dir is not None:
+        cache_path = cache_dir / f"{ticker}_fundamentals.parquet"
+        if cache_path.exists():
+            logger.info("Loading cached SEC data for %s", ticker)
+            cached = pd.read_parquet(cache_path)
+            cached.index = pd.to_datetime(cached.index)
+            return cached.reindex(target_index).ffill().bfill()
+
+    if edgar_module is None:
+        edgar_module = _init_edgar()
+
+    filings_by_form = fetch_filings_for_ticker(
+        ticker, ("10-K", "10-Q", "8-K"), edgar_module=edgar_module
+    )
+
+    fund_df = _compute_ticker_fundamentals(
+        filings_by_form.get("10-K", []),
+        filings_by_form.get("10-Q", []),
+    )
+
+    event_signal = _compute_event_signal(
+        filings_by_form.get("8-K", []),
+        target_index,
+    )
+
+    if fund_df.empty:
+        result = pd.DataFrame(
+            0.0,
+            index=target_index,
+            columns=[
+                "revenue_growth", "operating_margin", "debt_to_equity",
+                "current_ratio", "fcf_yield", "event_signal",
+            ],
+        )
+    else:
+        aligned = fund_df.reindex(target_index).ffill().bfill()
+        aligned["event_signal"] = event_signal.values
+        result = aligned
+
+    if cache_path is not None:
+        cache_path.parent.mkdir(parents=True, exist_ok=True)
+        result.to_parquet(cache_path)
+
+    return result
+
+
+def compute_composite_fundamental_scores(
+    all_ticker_fundamentals: Dict[str, pd.DataFrame],
+    tickers: List[str],
+    target_index: pd.DatetimeIndex,
+    weights: Optional[Dict[str, float]] = None,
+) -> pd.DataFrame:
+    """
+    Combine per-ticker fundamental DataFrames into a single composite
+    score DataFrame with columns = tickers, index = target_index.
+
+    Each ticker's features are z-scored cross-sectionally, then
+    combined with *weights* (defaults to FUNDAMENTAL_WEIGHTS).
+    """
+    if weights is None:
+        weights = FUNDAMENTAL_WEIGHTS
+
+    feature_names = [
+        "revenue_growth", "operating_margin", "debt_to_equity",
+        "fcf_yield", "event_signal",
+    ]
+
+    panels: Dict[str, pd.DataFrame] = {}
+    for feat in feature_names:
+        feat_df = pd.DataFrame(index=target_index)
+        for t in tickers:
+            if t in all_ticker_fundamentals and feat in all_ticker_fundamentals[t].columns:
+                feat_df[t] = all_ticker_fundamentals[t][feat].reindex(target_index)
+            else:
+                feat_df[t] = 0.0
+        feat_df = feat_df.ffill().bfill().fillna(0.0)
+        panels[feat] = feat_df
+
+    # Cross-sectional z-score per date and feature
+    z_panels: Dict[str, pd.DataFrame] = {}
+    for feat, df in panels.items():
+        row_mean = df.mean(axis=1)
+        row_std = df.std(axis=1).replace(0, 1)
+        z_panels[feat] = df.sub(row_mean, axis=0).div(row_std, axis=0)
+
+    composite = pd.DataFrame(0.0, index=target_index, columns=tickers)
+    for feat, w in weights.items():
+        if feat in z_panels:
+            composite += w * z_panels[feat]
+
+    return composite
+
+
+def run_sec_filing_pipeline(
+    tickers: List[str],
+    target_index: pd.DatetimeIndex,
+    cache_dir: Optional[Path] = None,
+    identity: str = "FinLossFunctions research@example.com",
+) -> pd.DataFrame:
+    """
+    Top-level entry: fetch SEC data for all *tickers*, compute composite
+    fundamental scores, return DataFrame (dates x tickers).
+    """
+    edgar_module = _init_edgar(identity)
+
+    all_funds: Dict[str, pd.DataFrame] = {}
+    for ticker in tickers:
+        logger.info("Processing SEC filings for %s", ticker)
+        try:
+            df = run_sec_pipeline_for_ticker(
+                ticker, target_index, cache_dir=cache_dir,
+                edgar_module=edgar_module,
+            )
+            all_funds[ticker] = df
+        except Exception as exc:
+            logger.warning("SEC pipeline failed for %s: %s", ticker, exc)
+
+    return compute_composite_fundamental_scores(
+        all_funds, tickers, target_index
+    )
diff --git a/financial_loss_functions/src/data_processing/dataset.py b/financial_loss_functions/src/data_processing/dataset.py
index 10967068..70ab10af 100644
--- a/financial_loss_functions/src/data_processing/dataset.py
+++ b/financial_loss_functions/src/data_processing/dataset.py
@@ -70,6 +70,8 @@ def extract_features(self, train_df: pd.DataFrame):
         features = []
 
         for col in train_df.columns:
+            if self.col_sep not in col:
+                continue
             t, f = self._split_col(col)
             tickers.append(t)
             features.append(f)
@@ -79,6 +81,8 @@ def extract_features(self, train_df: pd.DataFrame):
         # Features must be identical for all tickers
         features_by_ticker = {t: set() for t in self.tickers}
         for col in train_df.columns:
+            if self.col_sep not in col:
+                continue
             t, f = self._split_col(col)
             features_by_ticker[t].add(f)
         
diff --git a/financial_loss_functions/src/evaluation/__init__.py b/financial_loss_functions/src/evaluation/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/financial_loss_functions/src/evaluation/pyfolio_viz.py b/financial_loss_functions/src/evaluation/pyfolio_viz.py
new file mode 100644
index 00000000..8494c745
--- /dev/null
+++ b/financial_loss_functions/src/evaluation/pyfolio_viz.py
@@ -0,0 +1,220 @@
+"""
+pyfolio integration for portfolio strategy visualization.
+
+Converts model allocation weights and return arrays into the
+pandas-based formats that pyfolio expects, then generates
+comparison tearsheets across strategies.
+"""
+
+import logging
+from pathlib import Path
+from typing import Dict, List, Optional
+
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Data conversion
+# ---------------------------------------------------------------------------
+
+def weights_to_pyfolio(
+    weights: np.ndarray,
+    returns: np.ndarray,
+    tickers: List[str],
+    window_dates: List[pd.DatetimeIndex],
+    benchmark_returns: pd.Series,
+    initial_capital: float = 1_000_000.0,
+) -> Dict[str, pd.DataFrame]:
+    """
+    Convert rolling-window model outputs into pyfolio-compatible data.
+
+    @param weights np.ndarray (W, N) portfolio weights per window.
+    @param returns np.ndarray (W, T_out, N) future returns per window.
+    @param tickers list[str] ordered ticker symbols.
+    @param window_dates list[pd.DatetimeIndex] dates for each window's
+        T_out period.  ``len(window_dates)`` must equal W, and each
+        element has length T_out.
+    @param benchmark_returns pd.Series daily benchmark returns indexed
+        by date (e.g. sprtrn).
+    @param initial_capital float notional starting capital for the
+        positions table.
+
+    @return dict with keys:
+        ``returns``  – pd.Series (daily non-cumulative portfolio returns)
+        ``positions`` – pd.DataFrame (daily dollar notionals per ticker + cash)
+        ``benchmark_rets`` – pd.Series (aligned benchmark returns)
+    """
+    W, T_out, N = returns.shape
+
+    all_port_rets: Dict[pd.Timestamp, float] = {}
+    all_positions: Dict[pd.Timestamp, Dict[str, float]] = {}
+
+    portfolio_value = initial_capital
+
+    for w_idx in range(W):
+        w = weights[w_idx]  # (N,)
+        for t in range(T_out):
+            date = window_dates[w_idx][t]
+            if date in all_port_rets:
+                continue
+
+            daily_ret = float((w * returns[w_idx, t, :]).sum())
+            all_port_rets[date] = daily_ret
+
+            pos = {}
+            for k, ticker in enumerate(tickers):
+                pos[ticker] = portfolio_value * w[k]
+            pos["cash"] = 0.0
+            all_positions[date] = pos
+
+            portfolio_value *= (1 + daily_ret)
+
+    ret_series = pd.Series(all_port_rets).sort_index()
+    ret_series.index = pd.to_datetime(ret_series.index)
+    ret_series.index.name = None
+
+    pos_df = pd.DataFrame.from_dict(all_positions, orient="index").sort_index()
+    pos_df.index = pd.to_datetime(pos_df.index)
+    pos_df.index.name = None
+
+    aligned_bench = benchmark_returns.reindex(ret_series.index).fillna(0.0)
+
+    return {
+        "returns": ret_series,
+        "positions": pos_df,
+        "benchmark_rets": aligned_bench,
+    }
+
+
+def build_window_dates(
+    full_date_index: pd.DatetimeIndex,
+    good_starts: np.ndarray,
+    in_size: int,
+    out_size: int,
+) -> List[pd.DatetimeIndex]:
+    """
+    Compute the T_out date ranges for each rolling window.
+
+    @param full_date_index pd.DatetimeIndex  All dates in the split.
+    @param good_starts np.ndarray  Starting indices of valid windows.
+    @param in_size int  Input window length.
+    @param out_size int  Output window length.
+
+    @return list[pd.DatetimeIndex] one DatetimeIndex per window.
+    """
+    window_dates = []
+    for s in good_starts:
+        start = s + in_size
+        end = start + out_size
+        window_dates.append(full_date_index[start:end])
+    return window_dates
+
+
+# ---------------------------------------------------------------------------
+# Tearsheet generation
+# ---------------------------------------------------------------------------
+
+def _try_import_pyfolio():
+    """Lazy-import pyfolio (the reloaded fork)."""
+    try:
+        import pyfolio as pf
+        return pf
+    except ImportError as exc:
+        raise ImportError(
+            "pyfolio-reloaded is required for tearsheet generation. "
+            "Install with: pip install pyfolio-reloaded"
+        ) from exc
+
+
+def generate_returns_tearsheet(
+    strategy_data: Dict,
+    title: str = "",
+    output_path: Optional[Path] = None,
+):
+    """
+    Generate a pyfolio returns tearsheet for a single strategy.
+
+    @param strategy_data dict  Output of ``weights_to_pyfolio``.
+    @param title str  Optional plot title prefix.
+    @param output_path Path  If provided, save the figure to this path.
+    """
+    pf = _try_import_pyfolio()
+
+    fig = pf.create_returns_tear_sheet(
+        strategy_data["returns"],
+        benchmark_rets=strategy_data["benchmark_rets"],
+        return_fig=True,
+    )
+
+    if fig is not None and output_path is not None:
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        fig.savefig(str(output_path), dpi=150, bbox_inches="tight")
+        logger.info("Tearsheet saved to %s", output_path)
+
+    plt.close("all")
+
+
+def generate_comparison_tearsheets(
+    strategies: Dict[str, Dict],
+    output_dir: Path,
+):
+    """
+    Generate and save a pyfolio returns tearsheet for each strategy.
+
+    @param strategies dict  Mapping strategy name -> pyfolio data dict.
+    @param output_dir Path  Directory to save tearsheet PNGs.
+    """
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    for name, data in strategies.items():
+        safe_name = name.replace(" ", "_").replace("/", "_")
+        out_path = output_dir / f"tearsheet_{safe_name}.png"
+        try:
+            generate_returns_tearsheet(data, title=name, output_path=out_path)
+            logger.info("Generated tearsheet for %s", name)
+        except Exception as exc:
+            logger.warning("Tearsheet generation failed for %s: %s", name, exc)
+
+
+# ---------------------------------------------------------------------------
+# Comparison summary table
+# ---------------------------------------------------------------------------
+
+def comparison_summary(
+    strategies: Dict[str, Dict],
+) -> pd.DataFrame:
+    """
+    Compute summary performance metrics for all strategies.
+
+    @param strategies dict  strategy name -> pyfolio data dict.
+    @return pd.DataFrame  Rows = strategies, columns = metrics.
+    """
+    rows = []
+    for name, data in strategies.items():
+        rets = data["returns"]
+        ann_factor = 252
+        total_ret = float((1 + rets).prod() - 1)
+        ann_ret = float((1 + total_ret) ** (ann_factor / max(len(rets), 1)) - 1)
+        ann_vol = float(rets.std() * np.sqrt(ann_factor))
+        sharpe = ann_ret / ann_vol if ann_vol > 0 else 0.0
+
+        cum = (1 + rets).cumprod()
+        peak = cum.cummax()
+        dd = (cum - peak) / peak
+        max_dd = float(dd.min())
+
+        rows.append({
+            "strategy": name,
+            "total_return": round(total_ret, 4),
+            "ann_return": round(ann_ret, 4),
+            "ann_volatility": round(ann_vol, 4),
+            "sharpe_ratio": round(sharpe, 4),
+            "max_drawdown": round(max_dd, 4),
+        })
+    return pd.DataFrame(rows).set_index("strategy")
diff --git a/financial_loss_functions/src/models/DeformTime/DeformTime.py b/financial_loss_functions/src/models/DeformTime/DeformTime.py
new file mode 100644
index 00000000..8adcda11
--- /dev/null
+++ b/financial_loss_functions/src/models/DeformTime/DeformTime.py
@@ -0,0 +1,189 @@
+import torch
+from torch import nn
+from src.models.registry import NNModelLibrary
+
+from src.models.DeformTime.layers.TemporalDeformAttention import Encoder, CrossDeformAttn
+from src.models.DeformTime.layers.Embed import Deform_Temporal_Embedding, Local_Temporal_Embedding
+from math import ceil
+
+class Layernorm(nn.Module):
+    def __init__(self, dim):
+        super(Layernorm, self).__init__()
+        self.layernorm = nn.LayerNorm(dim)
+
+    def forward(self, x):
+        x_hat = self.layernorm(x)
+        bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1)
+        return x_hat - bias
+
+
+@NNModelLibrary.register(category='transformer')
+class DeformTime(nn.Module):
+    def __init__(
+            self,
+            input_size: int,
+            num_stocks: int,
+            max_seq_len: int,
+            e_layers: int,
+            d_layers: int,
+            d_model: int,
+            nheads: int,
+            kernel_size: int,
+            dropout: float,
+            n_reshape: int,
+            patch_len: int,
+            stride: int
+    ) -> None:
+        super().__init__()
+
+        self.input_size = input_size
+        self.num_stocks = num_stocks
+        
+        self.d_layers = d_layers
+        self.d_model = d_model
+
+        # Embedding
+        if self.input_size == 1:
+            self.enc_value_embedding = Deform_Temporal_Embedding(self.input_size, self.d_model, freq='d')
+        else:
+            self.s_group = 4
+            assert self.d_model % self.s_group == 0
+            # Embedding local patches
+            self.pad_in_len = ceil(1.0 * self.input_size / self.s_group) * self.s_group
+            self.enc_value_embedding = Local_Temporal_Embedding(self.pad_in_len//self.s_group, self.d_model, self.pad_in_len-self.input_size, self.s_group)
+
+        self.pre_norm = nn.LayerNorm(self.d_model)
+        # Encoder
+        n_days = [1,n_reshape,n_reshape]
+        assert len(n_days) > e_layers-1
+        drop_path_rate=dropout
+        dpr = [x.item() for x in torch.linspace(drop_path_rate, drop_path_rate, e_layers)]
+        self.encoder = Encoder(
+            [
+                CrossDeformAttn(seq_len=max_seq_len, 
+                                d_model=self.d_model, 
+                                n_heads=nheads, 
+                                dropout=dropout, 
+                                droprate=dpr[l], 
+                                n_days=n_days[l], 
+                                window_size=kernel_size, 
+                                patch_len=patch_len, 
+                                stride=stride) for l in range(e_layers)
+            ],
+            norm_layer=Layernorm(self.d_model)
+        )
+
+        # GRU layers
+        self.gru = torch.nn.GRU(
+            self.d_model, self.d_model, self.d_layers, batch_first=True, dropout=dropout
+        )
+
+        # @author: Atharva Vaidya - This head helps in converting the encoded DeformTime context into portfolio logits expected by the loss pipeline.
+        self.fc = nn.Sequential(
+            nn.Linear(self.d_model, self.d_model),
+            nn.LeakyReLU(),
+            nn.Linear(self.d_model, self.num_stocks)
+        )
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        """
+        Variables
+            • x_enc
+                type: tensor of numbers
+                usage: used to store the input feature window that is passed from the
+                       trainer for DeformTime encoding
+            • x_mark_enc
+                type: tensor of numbers or empty value
+                usage: reserved encoder marker input that is accepted for interface
+                       compatibility but is not used in this runtime path
+            • x_dec
+                type: tensor of numbers or empty value
+                usage: reserved decoder input kept for compatibility with the
+                       surrounding model interface
+            • x_mark_dec
+                type: tensor of numbers or empty value
+                usage: reserved decoder marker input kept for compatibility with the
+                       surrounding model interface
+            • mean_enc
+                type: tensor of numbers
+                usage: used to store the per-window mean so the input can be
+                       stationarized before attention is applied
+            • std_enc
+                type: tensor of numbers
+                usage: used to store the per-window standard deviation so the input
+                       scaling remains numerically stable
+            • enc_out
+                type: tensor of numbers
+                usage: used to store the encoded deformable-attention representation
+                       produced by the encoder
+            • h0
+                type: tensor of numbers
+                usage: used to store the initial hidden state passed into the GRU
+            • out
+                type: tensor of numbers
+                usage: used to store the GRU sequence output before collapsing it to
+                       a single allocation context
+            • context
+                type: tensor of numbers
+                usage: used to store the last GRU state which becomes the compact
+                       portfolio-allocation context for the final head
+
+        forecast now extracts a portfolio-allocation context from the encoded sequence
+        instead of reconstructing the original feature space. @author: Atharva Vaidya
+        """
+        assert x_enc.shape[-1] == self.input_size
+
+        # Series Stationarization adopted from NSformer, optional
+        mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E
+        x_enc = x_enc - mean_enc
+        std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()
+        x_enc = x_enc / std_enc
+
+        x_enc = self.enc_value_embedding(x_enc)
+        x_enc = self.pre_norm(x_enc)
+
+        # Deformed attention
+        enc_out, _ = self.encoder(x_enc) 
+
+        # Decoder
+        h0 = torch.zeros(self.d_layers, x_enc.size(0), self.d_model).requires_grad_().to(x_enc.device)
+        out, _ = self.gru(enc_out, h0.detach())
+        # Extract the final GRU state so one context vector represents the allocation decision.
+        context = out[:, -1, :]
+        # Apply the portfolio head to convert the context vector into allocation logits.
+        return self.fc(context)
+    
+    def forward(self, x_enc, x_mark_enc=None, x_dec=None, x_mark_dec=None, mask=None):
+        """
+        Variables
+            • x_enc
+                type: tensor of numbers
+                usage: used to store the encoded input window passed from the trainer
+                       for a portfolio-weight prediction
+            • x_mark_enc
+                type: tensor of numbers or empty value
+                usage: reserved encoder marker input that is kept for interface
+                       compatibility
+            • x_dec
+                type: tensor of numbers or empty value
+                usage: reserved decoder input that is kept for interface compatibility
+            • x_mark_dec
+                type: tensor of numbers or empty value
+                usage: reserved decoder marker input that is kept for interface
+                       compatibility
+            • mask
+                type: tensor of numbers or empty value
+                usage: reserved mask input accepted for compatibility with the model
+                       interface
+            • logits
+                type: tensor of numbers
+                usage: used to store the raw portfolio scores returned from forecast
+                       before normalization
+
+        forward normalizes the DeformTime logits into portfolio weights expected by
+        the training and loss pipeline. @author: Atharva Vaidya
+        """
+        logits = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+        # Normalize the logits so the downstream losses receive portfolio weights.
+        return torch.softmax(logits, dim=-1)
+
diff --git a/financial_loss_functions/src/models/DeformTime/__init__.py b/financial_loss_functions/src/models/DeformTime/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/financial_loss_functions/src/models/DeformTime/layers/Embed.py b/financial_loss_functions/src/models/DeformTime/layers/Embed.py
new file mode 100644
index 00000000..fd754c02
--- /dev/null
+++ b/financial_loss_functions/src/models/DeformTime/layers/Embed.py
@@ -0,0 +1,231 @@
+import torch
+import torch.nn as nn
+import math
+from einops import rearrange
+
+class PositionalEmbedding(nn.Module):
+    def __init__(self, d_model, max_len=5000):
+        super(PositionalEmbedding, self).__init__()
+        # Compute the positional encodings once in log space.
+        pe = torch.zeros(max_len, d_model).float()
+        pe.require_grad = False
+
+        position = torch.arange(0, max_len).float().unsqueeze(1)
+        div_term = (torch.arange(0, d_model, 2).float()
+                    * -(math.log(10000.0) / d_model)).exp()
+
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+
+        pe = pe.unsqueeze(0)
+        self.register_buffer('pe', pe)
+
+    def forward(self, x):
+        return self.pe[:, :x.size(1)]
+
+
+class TokenEmbedding(nn.Module):
+    def __init__(self, c_in, d_model):
+        super(TokenEmbedding, self).__init__()
+        padding = 1 if torch.__version__ >= '1.5.0' else 2
+        self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
+                                   kernel_size=3, padding=padding, padding_mode='circular', bias=False)
+        for m in self.modules():
+            if isinstance(m, nn.Conv1d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode='fan_in', nonlinearity='leaky_relu')
+
+    def forward(self, x):
+        x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
+        return x
+
+
+class FixedEmbedding(nn.Module):
+    def __init__(self, c_in, d_model):
+        super(FixedEmbedding, self).__init__()
+
+        w = torch.zeros(c_in, d_model).float()
+        w.require_grad = False
+
+        position = torch.arange(0, c_in).float().unsqueeze(1)
+        div_term = (torch.arange(0, d_model, 2).float()
+                    * -(math.log(10000.0) / d_model)).exp()
+
+        w[:, 0::2] = torch.sin(position * div_term)
+        w[:, 1::2] = torch.cos(position * div_term)
+
+        self.emb = nn.Embedding(c_in, d_model)
+        self.emb.weight = nn.Parameter(w, requires_grad=False)
+
+    def forward(self, x):
+        return self.emb(x).detach()
+
+
+class TemporalEmbedding(nn.Module):
+    def __init__(self, d_model, embed_type='fixed', freq='h'):
+        super(TemporalEmbedding, self).__init__()
+
+        minute_size = 4
+        hour_size = 24
+        weekday_size = 7
+        day_size = 32
+        month_size = 13
+
+        Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding
+        if freq == 't':
+            self.minute_embed = Embed(minute_size, d_model)
+        self.hour_embed = Embed(hour_size, d_model)
+        self.weekday_embed = Embed(weekday_size, d_model)
+        self.day_embed = Embed(day_size, d_model)
+        self.month_embed = Embed(month_size, d_model)
+
+    def forward(self, x):
+        x = x.long()
+        minute_x = self.minute_embed(x[:, :, 4]) if hasattr(
+            self, 'minute_embed') else 0.
+        hour_x = self.hour_embed(x[:, :, 3])
+        weekday_x = self.weekday_embed(x[:, :, 2])
+        day_x = self.day_embed(x[:, :, 1])
+        month_x = self.month_embed(x[:, :, 0])
+
+        return hour_x + weekday_x + day_x + month_x + minute_x
+
+
+class TimeFeatureEmbedding(nn.Module):
+    def __init__(self, d_model, embed_type='timeF', freq='h'):
+        super(TimeFeatureEmbedding, self).__init__()
+
+        freq_map = {'h': 4, 't': 5, 's': 6,
+                    'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
+        d_inp = freq_map[freq]
+        self.embed = nn.Linear(d_inp, d_model, bias=False)
+
+    def forward(self, x):
+        return self.embed(x)
+
+
+class DataEmbedding(nn.Module):
+    def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
+        super(DataEmbedding, self).__init__()
+
+        self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
+        self.position_embedding = PositionalEmbedding(d_model=d_model)
+        self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
+                                                    freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
+            d_model=d_model, embed_type=embed_type, freq=freq)
+        self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, x, x_mark):
+        if x_mark is None:
+            x = self.value_embedding(x) + self.position_embedding(x)
+        else:
+            x = self.value_embedding(
+                x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
+        return self.dropout(x)
+
+
+class DataEmbedding_inverted(nn.Module):
+    def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
+        super(DataEmbedding_inverted, self).__init__()
+        self.value_embedding = nn.Linear(c_in, d_model)
+        self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, x, x_mark):
+        x = x.permute(0, 2, 1)
+        # x: [Batch Variate Time]
+        if x_mark is None:
+            x = self.value_embedding(x)
+        else:
+            x = self.value_embedding(torch.cat([x, x_mark.permute(0, 2, 1)], 1))
+        # x: [Batch Variate d_model]
+        return self.dropout(x)
+
+
+class DataEmbedding_wo_pos(nn.Module):
+    def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
+        super(DataEmbedding_wo_pos, self).__init__()
+
+        self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
+        self.position_embedding = PositionalEmbedding(d_model=d_model)
+        self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
+                                                    freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
+            d_model=d_model, embed_type=embed_type, freq=freq)
+        self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, x, x_mark):
+        if x_mark is None:
+            x = self.value_embedding(x)
+        else:
+            x = self.value_embedding(x) + self.temporal_embedding(x_mark)
+        return self.dropout(x)
+
+
+class PatchEmbedding(nn.Module):
+    def __init__(self, d_model, patch_len, stride, padding, dropout):
+        super(PatchEmbedding, self).__init__()
+        # Patching
+        self.patch_len = patch_len
+        self.stride = stride
+        self.padding_patch_layer = nn.ReplicationPad1d((0, padding))
+
+        # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space
+        self.value_embedding = nn.Linear(patch_len, d_model, bias=False)
+
+        # Positional embedding
+        self.position_embedding = PositionalEmbedding(d_model)
+
+        # Residual dropout
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, x):
+        # do patching
+        n_vars = x.shape[1]
+        x = self.padding_patch_layer(x)
+        x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride)
+        x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
+        # Input encoding
+        x = self.value_embedding(x) + self.position_embedding(x)
+        return self.dropout(x), n_vars
+    
+
+class Deform_Temporal_Embedding(nn.Module):
+    def __init__(self, d_inp, d_model, embed_type='fixed', freq='h', dropout=0.1):
+        super(Deform_Temporal_Embedding, self).__init__()
+
+        self.value_embedding = nn.Linear(d_inp, d_model, bias=False)
+        self.position_embedding = PositionalEmbedding(d_model)
+
+        self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, x):
+        x = self.value_embedding(x) + self.position_embedding(x)
+        return self.dropout(x)
+
+class Local_Temporal_Embedding(nn.Module):
+    def __init__(self, d_inp, d_model, padding, sub_groups=8, dropout=0.1):
+        super(Local_Temporal_Embedding, self).__init__()
+
+        d_out = d_model // sub_groups if d_model % sub_groups == 0 else d_model // sub_groups + 1
+        self.sub_seqlen = d_inp
+        self.padding_patch_layer = nn.ReplicationPad1d((0, padding))
+        self.value_embedding = nn.Linear(d_inp, d_out, bias=False)
+                
+        self.position_embedding = PositionalEmbedding(d_model)
+        self.dropout = nn.Dropout(p=dropout)
+        self.d_model = d_model
+
+    def forward(self, x):
+        # The in_channel is still fully conv with the out channel
+        # the number of output channels (out_channels) determines 
+        # the number of filters applied to the input, and each filter 
+        # processes the input across all input channels
+        B, L, C = x.shape
+        x = self.padding_patch_layer(x)
+        x = x.unfold(dimension=-1, size=self.sub_seqlen, step=self.sub_seqlen)
+        x = rearrange(x, 'b l g c -> (b g) l c')
+        # x = x.permute(0, 2, 1)
+        x = self.value_embedding(x)
+        # .permute(0, 2, 1)
+        x = rearrange(x, '(b g) l c -> b l (g c)', b = B)[:,:,:self.d_model]
+        x = x + self.position_embedding(x)
+        return self.dropout(x)
\ No newline at end of file
diff --git a/financial_loss_functions/src/models/DeformTime/layers/MLP.py b/financial_loss_functions/src/models/DeformTime/layers/MLP.py
new file mode 100644
index 00000000..f971bc89
--- /dev/null
+++ b/financial_loss_functions/src/models/DeformTime/layers/MLP.py
@@ -0,0 +1,132 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+activation_functions = {
+    'tanh': nn.Tanh(),
+    'relu': nn.ReLU(),
+    'elu': nn.ELU(),
+    'sigmoid': nn.Sigmoid(),
+    'gelu':nn.GELU()
+    }
+    
+class LipSwish(torch.nn.Module):
+    def forward(self, x):
+        return 0.909 * F.silu(x)
+
+class MLPLipSwish(torch.nn.Module):
+    def __init__(self, in_size, out_size, mlp_size, num_layers, tanh):
+        super().__init__()
+
+        model = [torch.nn.Linear(in_size, mlp_size),
+                 LipSwish()]
+        for _ in range(num_layers - 1):
+            model.append(torch.nn.Linear(mlp_size, mlp_size))
+            ###################
+            # LipSwish activations are useful to constrain the Lipschitz constant of the discriminator.
+            # (For simplicity we additionally use them in the generator, but that's less important.)
+            ###################
+            model.append(LipSwish())
+        model.append(torch.nn.Linear(mlp_size, out_size))
+        if tanh:
+            model.append(torch.nn.Tanh())
+        self._model = torch.nn.Sequential(*model)
+
+    def forward(self, x):
+        return self._model(x)
+    
+class MLP(nn.Module):
+    # layer_sizes[0] is the dimension of the input
+    # layer_sizes[-1] is the dimension of the output
+    def __init__(self, layer_sizes, final_relu=False, drop_out=0.7):
+        super().__init__()
+        layer_list = []
+        layer_sizes = [int(x) for x in layer_sizes]
+        num_layers = len(layer_sizes) - 1
+        final_relu_layer = num_layers if final_relu else num_layers - 1
+        for i in range(len(layer_sizes) - 1):
+            input_size = layer_sizes[i]
+            curr_size = layer_sizes[i + 1]
+            if i < final_relu_layer:
+                layer_list.append(nn.ReLU(inplace=False))
+            if drop_out != 0:
+                layer_list.append(nn.Dropout(drop_out))
+            layer_list.append(nn.Linear(input_size, curr_size))
+        self.net = nn.Sequential(*layer_list)
+        self.last_linear = self.net[-1]
+
+    def forward(self, x):
+        return self.net(x)
+    
+class FreqMLP(nn.Module):
+    def __init__(self, layer_sizes, final_relu=False, drop_out=0.7) -> None:
+        super().__init__()
+        layer_list = []
+        layer_sizes = [int(x) for x in layer_sizes]
+        num_layers = len(layer_sizes) - 1
+        final_relu_layer = num_layers if final_relu else num_layers - 1
+        for i in range(len(layer_sizes) - 1):
+            input_size = layer_sizes[i]
+            curr_size = layer_sizes[i + 1]
+            if i < final_relu_layer:
+                layer_list.append(nn.ReLU(inplace=False))
+            if drop_out != 0:
+                layer_list.append(nn.Dropout(drop_out))
+            layer_list.append(nn.Linear(input_size, curr_size))
+        self.net = nn.Sequential(*layer_list)
+        self.last_linear = self.net[-1]
+
+        self.r1 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size))
+        self.i1 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size))
+        self.rb1 = nn.Parameter(self.scale * torch.randn(self.embed_size))
+        self.ib1 = nn.Parameter(self.scale * torch.randn(self.embed_size))
+        self.r2 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size))
+        self.i2 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size))
+        self.rb2 = nn.Parameter(self.scale * torch.randn(self.embed_size))
+        self.ib2 = nn.Parameter(self.scale * torch.randn(self.embed_size))
+    
+    # frequency temporal learner
+    def MLP_temporal(self, x, B, N, L):
+        # [B, N, T, D]
+        x = torch.fft.rfft(x, dim=2, norm='ortho') # FFT on L dimension
+        y = self.FreMLP(B, N, L, x, self.r2, self.i2, self.rb2, self.ib2)
+        x = torch.fft.irfft(y, n=self.seq_len, dim=2, norm="ortho")
+        return x
+    
+    # frequency channel learner
+    def MLP_channel(self, x, B, N, L):
+        # [B, N, T, D]
+        x = x.permute(0, 2, 1, 3)
+        # [B, T, N, D]
+        x = torch.fft.rfft(x, dim=2, norm='ortho') # FFT on N dimension
+        y = self.FreMLP(B, L, N, x, self.r1, self.i1, self.rb1, self.ib1)
+        x = torch.fft.irfft(y, n=self.feature_size, dim=2, norm="ortho")
+        x = x.permute(0, 2, 1, 3)
+        # [B, N, T, D]
+        return x
+    
+    # frequency-domain MLPs
+    # dimension: FFT along the dimension, r: the real part of weights, i: the imaginary part of weights
+    # rb: the real part of bias, ib: the imaginary part of bias
+    def FreMLP(self, B, nd, dimension, x, r, i, rb, ib):
+        o1_real = torch.zeros([B, nd, dimension // 2 + 1, self.embed_size],
+                              device=x.device)
+        o1_imag = torch.zeros([B, nd, dimension // 2 + 1, self.embed_size],
+                              device=x.device)
+
+        o1_real = F.relu(
+            torch.einsum('bijd,dd->bijd', x.real, r) - \
+            torch.einsum('bijd,dd->bijd', x.imag, i) + \
+            rb
+        )
+
+        o1_imag = F.relu(
+            torch.einsum('bijd,dd->bijd', x.imag, r) + \
+            torch.einsum('bijd,dd->bijd', x.real, i) + \
+            ib
+        )
+
+        y = torch.stack([o1_real, o1_imag], dim=-1)
+        y = F.softshrink(y, lambd=self.sparsity_threshold)
+        y = torch.view_as_complex(y)
+        return y
\ No newline at end of file
diff --git a/financial_loss_functions/src/models/DeformTime/layers/TemporalDeformAttention.py b/financial_loss_functions/src/models/DeformTime/layers/TemporalDeformAttention.py
new file mode 100644
index 00000000..61b27b3c
--- /dev/null
+++ b/financial_loss_functions/src/models/DeformTime/layers/TemporalDeformAttention.py
@@ -0,0 +1,457 @@
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+from timm.layers import trunc_normal_
+from src.models.DeformTime.layers.MLP import MLP
+from src.models.DeformTime.utils.functions import num_patches
+
+
+def normal_init(module, mean=0, std=1, bias=0):
+    if hasattr(module, 'weight') and module.weight is not None:
+        nn.init.normal_(module.weight, mean, std)
+    if hasattr(module, 'bias') and module.bias is not None:
+        nn.init.constant_(module.bias, bias)
+
+def constant_init(module, val, bias=0):
+    if hasattr(module, 'weight') and module.weight is not None:
+        nn.init.constant_(module.weight, val)
+    if hasattr(module, 'bias') and module.bias is not None:
+        nn.init.constant_(module.bias, bias)
+
+class series_decomp(nn.Module):
+    """
+    Series decomposition block
+    """
+
+    def __init__(self, kernel_size):
+        super(series_decomp, self).__init__()
+        self.moving_avg = moving_avg(kernel_size, stride=1)
+
+    def forward(self, x):
+        moving_mean = self.moving_avg(x)
+        res = x - moving_mean
+        return res, moving_mean
+
+class moving_avg(nn.Module):
+    """
+    Moving average block to highlight the trend of time series
+    """
+
+    def __init__(self, kernel_size, stride):
+        super(moving_avg, self).__init__()
+        self.kernel_size = kernel_size
+        self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)
+
+    def forward(self, x):
+        # padding on the both ends of time series
+        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
+        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
+        x = torch.cat([front, x, end], dim=1)
+        x = self.avg(x.permute(0, 2, 1))
+        x = x.permute(0, 2, 1)
+        return x
+    
+def drop_path(x, drop_prob: float = 0., training: bool = False):
+    """
+    From: https://github.com/huggingface/pytorch-image-models
+
+    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+
+    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
+    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
+    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
+    'survival rate' as the argument.
+
+    DropPath is dropping an entire sample from the batch while Dropout is dropping random values
+    """
+    if drop_prob == 0. or not training:
+        return x
+    keep_prob = 1 - drop_prob
+    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  
+    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
+    random_tensor.floor_()  # binarize
+    output = x.div(keep_prob) * random_tensor
+    return output
+
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
+    """
+    def __init__(self, drop_prob=None):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)
+
+class LayerScale(nn.Module):
+    def __init__(self,
+                 dim: int,
+                 inplace: bool = False,
+                 init_values: float = 1e-5):
+        super().__init__()
+        self.inplace = inplace
+        self.weight = nn.Parameter(torch.ones(dim) * init_values)
+
+    def forward(self, x):
+        if self.inplace:
+            return x.mul_(self.weight.view(-1, 1, 1))
+        else:
+            return x * self.weight.view(-1, 1, 1)
+
+class LayerNorm(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim)
+
+    def forward(self, x):
+        x = self.norm(x)
+        return x
+
+class LayerNormProxy(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim)
+
+    def forward(self, x):
+        x = rearrange(x, 'b c l -> b l c')
+        x = self.norm(x)
+        return rearrange(x, 'b l c -> b c l')
+
+
+class LayerNormProxy2D(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim)
+
+    def forward(self, x):
+        x = rearrange(x, 'b c h w -> b h w c')
+        x = self.norm(x)
+        return rearrange(x, 'b h w c -> b c h w')
+
+
+class Encoder(nn.Module):
+    def __init__(self, attn_layers, norm_layer=None):
+        super(Encoder, self).__init__()
+        self.attn_layers = nn.ModuleList(attn_layers)
+        self.norm = norm_layer
+
+    def forward(self, x, attn_mask=None, tau=None, delta=None):
+        # x [B, L, D]
+        attns = []
+        for attn_layer in self.attn_layers:
+            x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
+            attns.append(attn)
+
+        if self.norm is not None:
+            x = self.norm(x)
+        return x, attns
+
+
+class DeformAtten1D(nn.Module):
+    '''
+        max_offset (int): The maximum magnitude of the offset residue. Default: 14.
+    '''
+    def __init__(self, seq_len, d_model, n_heads, dropout, kernel=5, n_groups=4, no_off=False, rpb=True) -> None:
+        super().__init__()
+        self.offset_range_factor = kernel
+        self.no_off = no_off
+        self.seq_len = seq_len
+        self.d_model = d_model 
+        self.n_groups = n_groups
+        self.n_group_channels = self.d_model // self.n_groups
+        self.n_heads = n_heads
+        self.n_head_channels = self.d_model // self.n_heads
+        self.n_group_heads = self.n_heads // self.n_groups
+        self.scale = self.n_head_channels ** -0.5
+        self.rpb = rpb
+
+        self.proj_q = nn.Conv1d(self.d_model, self.d_model, kernel_size=1, stride=1, padding=0)
+        self.proj_k = nn.Conv1d(self.d_model, self.d_model, kernel_size=1, stride=1, padding=0)
+        self.proj_v = nn.Conv1d(self.d_model, self.d_model, kernel_size=1, stride=1, padding=0)
+        self.proj_out = nn.Linear(self.d_model, self.d_model)
+        kernel_size = kernel
+        self.stride = 1
+        pad_size = kernel_size // 2 if kernel_size != self.stride else 0
+        self.proj_offset = nn.Sequential(
+            nn.Conv1d(self.n_group_channels, self.n_group_channels, kernel_size=kernel_size, stride=self.stride, padding=pad_size),
+            nn.Conv1d(self.n_group_channels, 1, kernel_size=1, stride=self.stride, padding=pad_size),
+        )
+
+        self.scale_factor = self.d_model ** -0.5  # 1/np.sqrt(dim)
+
+        if self.rpb:
+            self.relative_position_bias_table = nn.Parameter(
+                torch.zeros(1, self.d_model, self.seq_len))
+            trunc_normal_(self.relative_position_bias_table, std=.02)
+
+    def forward(self, x, mask=None):
+        B, L, C = x.shape
+        dtype, device = x.dtype, x.device
+        x = x.permute(0,2,1) # B, C, L
+
+        q = self.proj_q(x) # B, C, L
+
+        group = lambda t: rearrange(t, 'b (g d) n -> (b g) d n', g = self.n_groups)
+
+        grouped_queries = group(q)
+
+        offset = self.proj_offset(grouped_queries) # B * g 1 Lg
+        offset = rearrange(offset, 'b 1 n -> b n')
+
+        def grid_sample_1d(feats, grid, *args, **kwargs):
+            # does 1d grid sample by reshaping it to 2d
+            grid = rearrange(grid, '... -> ... 1 1')
+            grid = F.pad(grid, (1, 0), value = 0.)
+            feats = rearrange(feats, '... -> ... 1')
+            # the backward of F.grid_sample is non-deterministic
+            # See for details: https://pytorch.org/docs/stable/generated/torch.nn.functional.grid_sample.html
+            out = F.grid_sample(feats, grid, **kwargs) 
+            return rearrange(out, '... 1 -> ...')
+        
+        def normalize_grid(arange, dim = 1, out_dim = -1):
+            # normalizes 1d sequence to range of -1 to 1
+            n = arange.shape[-1]
+            return 2.0 * arange / max(n - 1, 1) - 1.0
+
+        if self.offset_range_factor >= 0 and not self.no_off:
+            offset = offset.tanh().mul(self.offset_range_factor)
+
+        if self.no_off:
+            x_sampled = F.avg_pool1d(x, kernel_size=self.stride, stride=self.stride)
+        else:
+            grid = torch.arange(offset.shape[-1], device = device)
+            vgrid = grid + offset
+            vgrid_scaled = normalize_grid(vgrid)
+
+            x_sampled = grid_sample_1d(
+                group(x),
+                vgrid_scaled,
+            mode = 'bilinear', padding_mode = 'zeros', align_corners = False)[:,:,:L]
+            
+        if not self.no_off:
+            x_sampled = rearrange(x_sampled,'(b g) d n -> b (g d) n', g = self.n_groups)
+        q = q.reshape(B * self.n_heads, self.n_head_channels, L)
+        k = self.proj_k(x_sampled).reshape(B * self.n_heads, self.n_head_channels, L)
+        if self.rpb:
+            v = self.proj_v(x_sampled)
+            v = (v + self.relative_position_bias_table).reshape(B * self.n_heads, self.n_head_channels, L)
+        else:
+            v = self.proj_v(x_sampled).reshape(B * self.n_heads, self.n_head_channels, L)
+
+        scaled_dot_prod = torch.einsum('b i d , b j d -> b i j', q, k) * self.scale_factor
+
+        if mask is not None:
+            assert mask.shape == scaled_dot_prod.shape[1:]
+            scaled_dot_prod = scaled_dot_prod.masked_fill(mask, -np.inf)
+
+        attention = torch.softmax(scaled_dot_prod, dim=-1) # softmax: attention[0,0,:].sum() = 1
+
+        out = torch.einsum('b i j , b j d -> b i d', attention, v) 
+        
+        return self.proj_out(rearrange(out, '(b g) l c -> b c (g l)', b=B))
+
+
+class DeformAtten2D(nn.Module):
+    '''
+        max_offset (int): The maximum magnitude of the offset residue. Default: 14.
+    '''
+    def __init__(self, seq_len, d_model, n_heads, dropout, kernel=5, n_groups=4, no_off=False, rpb=True) -> None:
+        super().__init__()
+        self.offset_range_factor = kernel
+        self.no_off = no_off
+        self.f_sample = False
+        self.seq_len = seq_len
+        self.d_model = d_model # (512)
+        self.n_groups = n_groups
+        self.n_group_channels = self.d_model // self.n_groups
+        self.n_heads = n_heads
+        self.n_head_channels = self.d_model // self.n_heads
+        self.n_group_heads = self.n_heads // self.n_groups
+        self.scale = self.n_head_channels ** -0.5
+        self.rpb = rpb
+
+        self.proj_q = nn.Conv2d(self.d_model, self.d_model, kernel_size=1, stride=1, padding=0)
+        self.proj_k = nn.Conv2d(self.d_model, self.d_model, kernel_size=1, stride=1, padding=0)
+        self.proj_v = nn.Conv2d(self.d_model, self.d_model, kernel_size=1, stride=1, padding=0)
+        self.proj_out = nn.Linear(self.d_model, self.d_model)
+        kernel_size = kernel
+        self.stride = 1
+        pad_size = kernel_size // 2 if kernel_size != self.stride else 0
+        self.proj_offset = nn.Sequential( 
+            nn.Conv2d(self.n_group_channels, self.n_group_channels, kernel_size=kernel_size, stride=self.stride, padding=pad_size),
+            nn.Conv2d(self.n_group_channels, 2, kernel_size=1, stride=1, padding=0, bias=False)
+        )
+
+        self.scale_factor = self.d_model ** -0.5  # 1/np.sqrt(dim)
+
+        if self.rpb:
+            self.relative_position_bias_table = nn.Parameter(
+                torch.zeros(1, self.d_model, self.seq_len, 1))
+            trunc_normal_(self.relative_position_bias_table, std=.02)
+
+
+    def forward(self, x, mask=None):
+        B, H, W, C = x.shape
+        x = x.permute(0, 3, 1, 2) # B, C, H, W
+
+        q = self.proj_q(x) # B, 1, H, W
+
+        offset = self.proj_offset(q) # B, 2, H, W
+
+        if self.offset_range_factor >= 0 and not self.no_off:
+            offset = offset.tanh().mul(self.offset_range_factor)
+
+        def create_grid_like(t, dim = 0):
+            h, w, device = *t.shape[-2:], t.device
+
+            grid = torch.stack(torch.meshgrid(
+                torch.arange(w, device = device),
+                torch.arange(h, device = device),
+            indexing = 'xy'), dim = dim)
+
+            grid.requires_grad = False
+            grid = grid.type_as(t)
+            return grid
+        
+        def normalize_grid(grid, dim = 1, out_dim = -1):
+            # normalizes a grid to range from -1 to 1
+            h, w = grid.shape[-2:]
+            grid_h, grid_w = grid.unbind(dim = dim)
+
+            grid_h = 2.0 * grid_h / max(h - 1, 1) - 1.0
+            grid_w = 2.0 * grid_w / max(w - 1, 1) - 1.0
+
+            return torch.stack((grid_h, grid_w), dim = out_dim)
+
+        if self.no_off:
+            x_sampled = F.avg_pool2d(x, kernel_size=self.stride, stride=self.stride)
+        else:
+            grid =create_grid_like(offset)
+            vgrid = grid + offset
+            vgrid_scaled = normalize_grid(vgrid)
+            # the backward of F.grid_sample is non-deterministic
+            x_sampled = F.grid_sample(
+                x,
+                vgrid_scaled,
+            mode = 'bilinear', padding_mode = 'zeros', align_corners = False)[:,:,:H,:W]
+              
+        if not self.no_off:
+            x_sampled = rearrange(x_sampled, '(b g) c h w -> b (g c) h w', g=self.n_groups)
+        q = q.reshape(B * self.n_heads, H, W)
+        k = self.proj_k(x_sampled).reshape(B * self.n_heads, H, W)
+        if self.rpb:
+            v = self.proj_v(x_sampled)
+            v = (v + self.relative_position_bias_table).reshape(B * self.n_heads, H, W)
+        else:
+            v = self.proj_v(x_sampled).reshape(B * self.n_heads, H, W)
+
+        scaled_dot_prod = torch.einsum('b i d , b j d -> b i j', q, k) * self.scale_factor
+
+        if mask is not None:
+            assert mask.shape == scaled_dot_prod.shape[1:]
+            scaled_dot_prod = scaled_dot_prod.masked_fill(mask, -np.inf)
+
+        attention = torch.softmax(scaled_dot_prod, dim=-1)
+
+        out = torch.einsum('b i j , b j d -> b i d', attention, v)
+        
+        return self.proj_out(out.reshape(B, H, W, C))
+
+
+class CrossDeformAttn(nn.Module):
+    def __init__(self, seq_len, d_model, n_heads, dropout, droprate, 
+                 n_days=1, window_size=4, patch_len=7, stride=3, no_off=False) -> None:
+        super().__init__()
+        self.n_days = n_days
+        self.seq_len = seq_len
+        # 1d size: B*n_days, subseq_len, C
+        # 2d size: B*num_patches, 1, patch_len, C
+        self.subseq_len = seq_len // n_days + (1 if seq_len % n_days != 0 else 0)
+        self.patch_len = patch_len
+        self.stride = stride
+        self.num_patches = num_patches(self.seq_len, self.patch_len, self.stride)
+
+        self.layer_norm = LayerNorm(d_model)
+
+        # 1D
+        self.ff1 = nn.Linear(d_model, d_model, bias=True)
+        self.ff2 = nn.Linear(self.subseq_len, self.subseq_len, bias=True)
+        # Deform attention
+        self.deform_attn = DeformAtten1D(self.subseq_len, d_model, n_heads, dropout, kernel=window_size, no_off=no_off) 
+        self.attn_layers1d = nn.ModuleList([self.deform_attn])
+
+        self.mlps1d = nn.ModuleList(
+            [ 
+                MLP([d_model, d_model], final_relu=True, drop_out=0.0) for _ in range(len(self.attn_layers1d))
+            ]
+        )
+        self.drop_path1d = nn.ModuleList(
+            [
+                DropPath(droprate) if droprate > 0.0 else nn.Identity() for _ in range(len(self.attn_layers1d))
+            ]
+        )
+        #######################################
+        # 2D
+        d_route = 1
+        self.conv_in = nn.Conv2d(1, d_route, kernel_size=1, bias=True)
+        self.conv_out = nn.Conv2d(d_route, 1, kernel_size=1, bias=True)
+        self.deform_attn2d = DeformAtten2D(self.patch_len, d_route, n_heads=1, dropout=dropout, kernel=window_size, n_groups=1, no_off=no_off)
+        self.write_out = nn.Linear(self.num_patches*self.patch_len, self.seq_len)
+
+        self.attn_layers2d = nn.ModuleList([self.deform_attn2d])
+
+        self.mlps2d = nn.ModuleList(
+            [ 
+                MLP([d_model, d_model], final_relu=True, drop_out=0.0) for _ in range(len(self.attn_layers2d))
+            ]
+        )
+        self.drop_path2d = nn.ModuleList(
+            [
+                DropPath(droprate) if droprate > 0.0 else nn.Identity() for _ in range(len(self.attn_layers2d))
+            ]
+        )
+
+        self.fc = nn.Linear(2*d_model, d_model)
+        
+    def forward(self, x, attn_mask=None, tau=None, delta=None):
+        n_day = self.n_days 
+        B, L, C = x.shape
+
+        x = self.layer_norm(x)
+
+        padding_len = (n_day - (L % n_day)) % n_day
+        x_padded = torch.cat((x, x[:, [0], :].expand(-1, padding_len, -1)), dim=1)
+        x_1d = rearrange(x_padded, 'b (seg_num ts_d) d_model -> (b ts_d) seg_num d_model', ts_d=n_day) 
+        # attn on 1D
+        for d, attn_layer in enumerate(self.attn_layers1d):
+            x0 = x_1d
+            x_1d = attn_layer(x_1d)
+            x_1d = self.drop_path1d[d](x_1d) + x0
+            x0 = x_1d
+            x_1d = self.mlps1d[d](self.layer_norm(x_1d))
+            x_1d = self.drop_path1d[d](x_1d) + x0
+        x_1d = rearrange(x_1d, '(b ts_d) seg_num d_model -> b (seg_num ts_d) d_model', ts_d=n_day)[:,:L,:]
+
+        # Patch attn on 2D
+        x_unfold = x.unfold(dimension=-2, size=self.patch_len, step=self.stride)
+        x_2d = rearrange(x_unfold, 'b n c l -> (b n) l c').unsqueeze(-3)
+        x_2d = rearrange(x_2d, 'b c h w -> b h w c')
+        for d, attn_layer in enumerate(self.attn_layers2d):
+            x0 = x_2d
+            x_2d = attn_layer(x_2d)
+            x_2d = self.drop_path2d[d](x_2d) + x0
+            x0 = x_2d
+            x_2d = self.mlps2d[d](self.layer_norm(x_2d.permute(0,1,3,2))).permute(0,1,3,2)
+            x_2d = self.drop_path2d[d](x_2d) + x0
+        x_2d = rearrange(x_2d, 'b h w c -> b c h w')
+        x_2d = rearrange(x_2d, '(b n) 1 l c -> b (n l) c', b=B)
+        x_2d = self.write_out(x_2d.permute(0,2,1)).permute(0,2,1)
+
+        x = torch.concat([x_1d, x_2d], dim=-1)
+        x = self.fc(x)
+
+        return x, None
+    
diff --git a/financial_loss_functions/src/models/DeformTime/utils/functions.py b/financial_loss_functions/src/models/DeformTime/utils/functions.py
new file mode 100644
index 00000000..4148dd6a
--- /dev/null
+++ b/financial_loss_functions/src/models/DeformTime/utils/functions.py
@@ -0,0 +1,31 @@
+import torch
+
+
+def grid_sample1D(tensor, grid):
+    """Given an input and a flow-field grid, computes the output using input
+    values and pixel locations from grid.
+
+    Args:
+        tensor: (N, C, L_in) tensor
+        grid: (N, L_out, 2) tensor in the range of [-1, 1]
+
+    Returns:
+        (N, C, L_out) tensor
+
+    """
+    b, c, l_in = tensor.shape
+    b_, l_out, w_ = grid.shape
+    assert b == b_ 
+    out = []
+    for (t, g) in zip(tensor, grid):
+        x_ = 0.5 * (l_in - 1) * (g[:, 0] + 1)
+        ix = torch.floor(x_).to(torch.int32).clamp(0, l_in - 2)
+        dx = x_ - ix
+        out.append((1 - dx) * t[..., ix] + dx * t[..., ix + 1])
+    return torch.stack(out, dim=0)
+
+def num_patches(seq_len, patch_len, stride):
+    return (seq_len - patch_len) // stride + 1
+
+# print(num_patches(96, 7, 4))
+
diff --git a/financial_loss_functions/src/models/cvar_benchmark.py b/financial_loss_functions/src/models/cvar_benchmark.py
new file mode 100644
index 00000000..94149c33
--- /dev/null
+++ b/financial_loss_functions/src/models/cvar_benchmark.py
@@ -0,0 +1,164 @@
+"""
+Lightweight Mean-CVaR portfolio optimizer for benchmarking.
+
+Uses CVXPY with the Rockafellar-Uryasev linearization, inspired by
+the formulation in ``quantitative-portfolio-optimization`` (NVIDIA cufolio)
+but without any cuOpt / GPU dependency.
+
+The optimizer solves:
+
+    minimise   lambda * CVaR_alpha  -  E[w^T r]
+    s.t.       sum(w) + cash = 1
+               w >= w_min,  w <= w_max
+               cash >= 0
+               ||w||_1 <= L_tar
+
+where CVaR is expressed via auxiliary variables (t, u):
+
+    CVaR = t + (1 / ((1 - alpha) * S)) * sum(u_s)
+    u_s >= 0
+    u_s >= -(R_s^T w + t)          for each scenario s
+"""
+
+import logging
+from dataclasses import dataclass
+from typing import Optional
+
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class CVaRParams:
+    """Tunable parameters for CVaR benchmark optimization."""
+
+    confidence: float = 0.95
+    risk_aversion: float = 1.0
+    w_min: float = 0.0
+    w_max: float = 0.30
+    L_tar: float = 1.6
+
+
+class CVaRBenchmark:
+    """
+    Mean-CVaR portfolio optimizer using CVXPY (CPU-only).
+
+    @param params CVaRParams  Optimization parameters.
+    """
+
+    def __init__(self, params: Optional[CVaRParams] = None):
+        self.params = params or CVaRParams()
+        self._cp = None
+
+    def _ensure_cvxpy(self):
+        if self._cp is None:
+            try:
+                import cvxpy as cp
+            except ImportError as exc:
+                raise ImportError(
+                    "cvxpy is required for the CVaR benchmark. "
+                    "Install with: pip install cvxpy"
+                ) from exc
+            self._cp = cp
+        return self._cp
+
+    def optimize(self, returns: np.ndarray) -> np.ndarray:
+        """
+        Solve Mean-CVaR optimization given historical return scenarios.
+
+        @param returns np.ndarray (S, N) -- S scenarios, N assets.
+            Each row is one historical daily return vector.
+
+        @return np.ndarray (N,) optimal portfolio weights.
+            Falls back to equal-weight if the solver fails.
+        """
+        cp = self._ensure_cvxpy()
+
+        S, N = returns.shape
+        alpha = self.params.confidence
+        lam = self.params.risk_aversion
+
+        mu = returns.mean(axis=0)
+
+        w = cp.Variable(N, name="w")
+        c = cp.Variable(1, name="cash")
+        t = cp.Variable(1, name="VaR")
+        u = cp.Variable(S, name="u")
+
+        scenario_ptf_returns = returns @ w  # (S,)
+
+        cvar_risk = t + (1.0 / ((1 - alpha) * S)) * cp.sum(u)
+        expected_return = mu @ w
+
+        constraints = [
+            u >= 0,
+            u + t + scenario_ptf_returns >= 0,
+            cp.sum(w) + c == 1,
+            w >= self.params.w_min,
+            w <= self.params.w_max,
+            c >= 0,
+            cp.norm1(w) <= self.params.L_tar,
+        ]
+
+        objective = cp.Minimize(lam * cvar_risk - expected_return)
+        problem = cp.Problem(objective, constraints)
+
+        try:
+            problem.solve(solver=cp.CLARABEL, verbose=False)
+        except Exception:
+            try:
+                problem.solve(solver=cp.SCS, verbose=False)
+            except Exception as exc:
+                logger.warning("CVaR solve failed: %s – returning equal weights", exc)
+                return np.full(N, 1.0 / N)
+
+        if w.value is None:
+            logger.warning("CVaR solver returned None – returning equal weights")
+            return np.full(N, 1.0 / N)
+
+        weights = np.array(w.value).flatten()
+        weights = np.clip(weights, 0, None)
+        total = weights.sum()
+        if total > 0:
+            weights /= total
+        else:
+            weights = np.full(N, 1.0 / N)
+
+        # Re-clip to w_max after normalization and redistribute excess
+        w_max = self.params.w_max
+        for _ in range(10):
+            excess_mask = weights > w_max
+            if not excess_mask.any():
+                break
+            excess = (weights[excess_mask] - w_max).sum()
+            weights[excess_mask] = w_max
+            free_mask = ~excess_mask & (weights < w_max)
+            if free_mask.any():
+                weights[free_mask] += excess / free_mask.sum()
+            else:
+                break
+
+        return weights
+
+    def rolling_optimize(
+        self,
+        X_returns: np.ndarray,
+    ) -> np.ndarray:
+        """
+        Run CVaR optimization on each window's historical returns.
+
+        @param X_returns np.ndarray (num_windows, T_in, N)
+            Per-window historical return data (extracted from the input
+            windows used by the LSTM).
+
+        @return np.ndarray (num_windows, N) benchmark weights.
+        """
+        num_windows, T_in, N = X_returns.shape
+        all_weights = np.zeros((num_windows, N))
+
+        for i in range(num_windows):
+            scenarios = X_returns[i]  # (T_in, N)
+            all_weights[i] = self.optimize(scenarios)
+
+        return all_weights
diff --git a/financial_loss_functions/src/models/registry.py b/financial_loss_functions/src/models/registry.py
new file mode 100644
index 00000000..62b4a622
--- /dev/null
+++ b/financial_loss_functions/src/models/registry.py
@@ -0,0 +1,219 @@
+# models/registry.py
+import pkgutil
+import importlib
+from typing import Dict, Type, Any, Set
+
+NN_Registry = Dict[str, Dict[str, Type]] # category -> name -> class
+Trad_Registry = Dict[str, Type]
+
+class NNModelLibrary:
+    """
+    Neural Network Model Library to contain all available models 
+    like a library that can be queried.
+    """
+    _registry: NN_Registry = {}
+    _discovered_packages: Set[str] = set()
+
+    @classmethod
+    def register(cls, category: str, name: str | None = None):
+        """
+        Register a neural network model using this decorator.
+
+        Args:
+            category (str): Name of category the model architecture belongs to.
+            name (str | None): Name of the model. Default = None. 
+                If None, name of the class will be used as default.
+        
+        Raise:
+            KeyError: If a model already exists in the library.
+        """
+        def decorator(model_cls: Type):
+            key = name or model_cls.__name__
+            if key in cls._registry.get(category, {}):
+                raise KeyError(f"Model '{key}' already registered in category '{category}'")
+            cls._registry.setdefault(category, {})[key] = model_cls
+            return model_cls
+        return decorator
+
+    @classmethod
+    def autodiscover(cls, package: str):
+        """
+        Import all modules in a package exactly once per package.
+        Safe to call multiple times with different packages.
+        This method MUST be executed in a main file to register all models.
+
+        Args:
+            package (str): Name or pythonic module path to detect models.
+                Example: 'src.models'
+        """
+        if package in cls._discovered_packages:
+            return
+
+        pkg = importlib.import_module(package)
+
+        for _, modname, ispkg in pkgutil.walk_packages(pkg.__path__, pkg.__name__ + "."):
+            if not ispkg:
+                importlib.import_module(modname)
+
+        cls._discovered_packages.add(package)
+
+    @classmethod
+    def items(cls) -> NN_Registry:
+        """
+        Get entire library of neural network models.
+
+        Returns:
+            NN_Registry: Dictionary of all categories and their models.
+        """
+        return cls._registry
+    
+    @classmethod
+    def list_categories(cls) -> list[str]:
+        """
+        Get a list of available categories.
+
+        Returns:
+            list[str]: List of categories present in the library.
+        """
+        return list(cls._registry.keys())
+
+    @classmethod
+    def list_models(cls, category: str) -> list[str]:
+        """
+        Get a list of all available models in a particular category.
+
+        Args:
+            category (str): Category name of the required neural networks.
+        
+        Returns:
+            list[str]: List of all available neural network models 
+                which belong to the give category.
+        """
+        return list(cls._registry.get(category, {}).keys())
+
+    @classmethod
+    def get(cls, category: str, name: str) -> Type:
+        """
+        Get a particular model for the given category name.
+
+        Args:
+            category (str): Category name of the required model.
+            name (str): Name of the required model.
+        
+        Returns:
+            Class of the required neural network model.    
+        """
+        return cls._registry[category].get(name)
+
+    @classmethod
+    def instantiate(cls, category: str, name: str, *args, **kwargs) -> Any:
+        """
+        Instantiate a particular model with its arguments and hyperparameters.
+
+        Args:
+            category (str): Category name of the required model.
+            name (str): Name of the required model.
+            *args: Positional arguments to be passed on to the model object.
+            **kwargs: Key word arguments to be passed on to the model object.
+        
+        Returns:
+            Instantiated model object.
+        """
+        return cls.get(category, name)(*args, **kwargs)
+
+class TradModelLibrary:
+    """
+    Tradional Model Library to contain all available tradional portfolio 
+    optimization models/methods like a library that can be queried.
+    """
+    _registry: Trad_Registry = {}
+    _discovered_packages: Set[str] = set()
+
+    @classmethod
+    def register(cls, name: str = None):
+        """
+        Register a tradional portfolio optimization model/method using this decorator.
+
+        Args:
+            name (str | None): Name of the model. Default = None. 
+                If None, name of the class will be used as default.
+        
+        Raise:
+            KeyError: If a model already exists in the library.
+        """
+        def decorator(model_cls: Type):
+            key = name or model_cls.__name__
+            if key in cls._registry:
+                raise KeyError(f"Model '{key}' already registered in")
+            cls._registry[key] = model_cls
+            return model_cls
+        return decorator
+
+    @classmethod
+    def autodiscover(cls, package: str):
+        """
+        Import all modules in a package exactly once per package.
+        Safe to call multiple times with different packages.
+        This method MUST be executed in a main file to register all models.
+
+        Args:
+            package (str): Name or pythonic module path to detect models.
+                Example: 'src.models'
+        """
+        if package in cls._discovered_packages:
+            return
+
+        pkg = importlib.import_module(package)
+
+        for _, modname, ispkg in pkgutil.walk_packages(pkg.__path__, pkg.__name__ + "."):
+            if not ispkg:
+                importlib.import_module(modname)
+
+        cls._discovered_packages.add(package)
+
+    @classmethod
+    def items(cls) -> Trad_Registry:
+        """
+        Get entire library of tradional portfolio optimization models/methods.
+
+        Returns:
+            Trad_Registry: Dictionary of all categories and their models.
+        """
+        return cls._registry
+
+    @classmethod
+    def list_models(cls) -> list[str]:
+        """
+        Get a list of all available models.        
+        Returns:
+            list[str]: List of all available tradional portfolio optimization models.
+        """
+        return list(cls._registry.keys())
+
+    @classmethod
+    def get(cls, name: str) -> Type:
+        """
+        Get a particular model by its name.
+
+        Args:
+            name (str): Name of the reqired tradional model.
+        
+        Returns:
+            Class of the required tradional model.    
+        """
+        return cls._registry.get(name)
+
+    @classmethod
+    def instantiate(cls, name: str, *args, **kwargs) -> Any:
+        """
+        Instantiate a particular model with its arguments and hyperparameters.
+
+        Args:
+            name (str): Name of the required model.
+            *args: Positional arguments to be passed on to the model object.
+            **kwargs: Key word arguments to be passed on to the model object.
+        
+        Returns:
+            Instantiated model object.
+        """
+        return cls.get(name)(*args, **kwargs)
\ No newline at end of file
diff --git a/financial_loss_functions/src/training/loss_functions.py b/financial_loss_functions/src/training/loss_functions.py
index b560aba1..e3f947e2 100644
--- a/financial_loss_functions/src/training/loss_functions.py
+++ b/financial_loss_functions/src/training/loss_functions.py
@@ -1,5 +1,14 @@
+from typing import List, Optional
+
+import torch
+import torch.nn as nn
 from torch import clamp, tensor
 
+
+# ---------------------------------------------------------------------------
+# Original standalone loss functions (unchanged)
+# ---------------------------------------------------------------------------
+
 def raw_sharpe_loss(
         weights: tensor, returns: tensor, eps: float = 1e-8
     ):
@@ -7,12 +16,12 @@ def raw_sharpe_loss(
     @param weights torch.tensor (B, N)
     @param returns torch.tensor (B, T_out, N) -- raw returns
 
-    @return batch average Sharpe Ratio. 
+    @return batch average Sharpe Ratio.
         Negative since NN has to maximize Sharpe Ratio but minimize loss
     """
     # portfolio returns per step
     port = (weights.unsqueeze(1) * returns).sum(dim=-1)  # (B, T_out)
-    
+
     mean = port.mean(dim=1)          # (B,)
     std = port.std(dim=1) + eps      # (B,)
     sharpe = mean / std              # (B,)
@@ -39,18 +48,584 @@ def raw_sortino_loss(
     @param target float Minimum acceptable return (MAR), often 0 for risk-free rate adjusted.
     @param eps float Epsilon value to avoid divide by zero error
 
-    @return batch average Sortino Ratio. 
+    @return batch average Sortino Ratio.
         Negative since NN has to maximize Sortino but minimize loss
     """
     # Portfolio returns per step
     port = (weights.unsqueeze(1) * returns).sum(dim=-1)  # (B, T_out)
-    
+
     # Downside deviation: std of negative deviations from target
     downside = clamp(target - port, min=0.0)  # (B, T_out), only positive for downside
     downside_std = downside.std(dim=1) + eps  # (B,)
-    
+
     mean = port.mean(dim=1)  # (B,)
     sortino = mean / downside_std  # (B,)
-    
+
     # Maximize Sortino → minimize negative Sortino
-    return -sortino.mean()
\ No newline at end of file
+    return -sortino.mean()
+
+
+# ---------------------------------------------------------------------------
+# Backward-compatible wrappers so existing functions accept the new
+# 3-or-4 arg call signature used by the modified Trainer.
+# ---------------------------------------------------------------------------
+
+def sharpe_loss_compat(weights, returns, features=None, fundamentals=None):
+    return differentiable_sharpe_loss(weights, returns)
+
+
+def sortino_loss_compat(weights, returns, features=None, fundamentals=None):
+    return raw_sortino_loss(weights, returns)
+
+
+# ---------------------------------------------------------------------------
+# Learnable modules for multi-timeframe S/R and macro override
+# ---------------------------------------------------------------------------
+
+class TimeframeImportanceFn(nn.Module):
+    """
+    Maps normalised lookback duration in [0, 1] to an importance weight
+    in (0, 1).  Initialised with a monotonic bias so that longer
+    lookbacks produce higher importance by default.
+
+    Architecture: Linear(1, H) -> Softplus -> Linear(H, 1) -> Sigmoid
+    """
+
+    def __init__(self, hidden: int = 8):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(1, hidden),
+            nn.Softplus(),
+            nn.Linear(hidden, 1),
+        )
+        # Monotonic-bias init: positive weights so larger inputs → larger outputs
+        with torch.no_grad():
+            self.net[0].weight.fill_(1.0)
+            self.net[0].bias.fill_(0.0)
+            self.net[2].weight.fill_(1.0)
+            self.net[2].bias.fill_(0.0)
+
+    def forward(self, normed_lookbacks: torch.Tensor) -> torch.Tensor:
+        """
+        @param normed_lookbacks (W,) values in [0, 1]
+        @return (W,) importance weights in (0, 1)
+        """
+        out = self.net(normed_lookbacks.unsqueeze(-1))  # (W, 1)
+        return torch.sigmoid(out.squeeze(-1))            # (W,)
+
+
+class MacroOverrideGate(nn.Module):
+    """
+    Learnable gate that maps macro rate-of-change to an override
+    weight omega in (0, 1).
+
+    omega → 1 : macro signals dominate (extreme macro conditions)
+    omega → 0 : technical/fundamental signals dominate (calm macro)
+    """
+
+    def __init__(self, num_macro_features: int, hidden: int = 8):
+        super().__init__()
+        self.baseline = nn.Parameter(torch.tensor(0.0))
+        self.proj = nn.Sequential(
+            nn.Linear(num_macro_features, hidden),
+            nn.Softplus(),
+            nn.Linear(hidden, 1),
+        )
+
+    def forward(self, delta_macro: torch.Tensor) -> torch.Tensor:
+        """
+        @param delta_macro (B, M) macro rate-of-change
+        @return (B,) override weight in (0, 1)
+        """
+        activation = self.proj(delta_macro).squeeze(-1)  # (B,)
+        return torch.sigmoid(self.baseline + activation)
+
+
+# ---------------------------------------------------------------------------
+# Composite S/R Loss
+# ---------------------------------------------------------------------------
+
+class CompositeSRLoss(nn.Module):
+    """
+    Multi-component loss that augments the differentiable Sharpe loss
+    with four S/R-aware regularization penalties:
+
+    L_total = L_sharpe
+              + alpha * L_price_action
+              + beta  * L_psychological
+              + gamma * L_macro
+              + delta * L_fundamental
+
+    Optional enhancements (all backward-compatible, disabled by default):
+    - Multi-timeframe S/R hierarchy via rolling-window pivots
+    - Adaptive macro-override gate that re-weights penalties
+    - Sector-aware penalty scaling
+    - Ticker-specific macro sensitivity
+    - Cross-ticker correlation guard
+    """
+
+    def __init__(
+        self,
+        num_tickers: int,
+        num_features_per_ticker: int,
+        ret_feature_idx: int,
+        turnover_feature_idx: int,
+        illiq_feature_idx: int,
+        ba_spread_feature_idx: int,
+        macro_col_indices: Optional[List[int]] = None,
+        alpha: float = 0.10,
+        beta: float = 0.05,
+        gamma: float = 0.10,
+        delta: float = 0.10,
+        psych_thresholds: Optional[List[float]] = None,
+        psych_sigma: float = 0.01,
+        ema_span: int = 10,
+        # --- Multi-timeframe S/R ---
+        sr_use_multi_timeframe: bool = False,
+        sr_lookback_windows: Optional[List[int]] = None,
+        sr_pivot_threshold: float = 0.02,
+        sr_importance_hidden: int = 8,
+        # --- Macro override gate ---
+        use_macro_override: bool = False,
+        macro_override_hidden: int = 8,
+        # --- Sector-aware penalty ---
+        sector_ids: Optional[List[int]] = None,
+        # --- Ticker macro sensitivity ---
+        ticker_macro_sensitivity: Optional[torch.Tensor] = None,
+        # --- Correlation guard ---
+        corr_matrix: Optional[torch.Tensor] = None,
+    ):
+        """
+        @param num_tickers int  Number of assets N.
+        @param num_features_per_ticker int  Features F per ticker in the
+            flat layout produced by Reshaper (T_NxF).
+        @param ret_feature_idx int  Index of the RET feature within a
+            ticker's F-wide feature block.
+        @param turnover_feature_idx int  Index of TURNOVER.
+        @param illiq_feature_idx int  Index of ILLIQUIDITY.
+        @param ba_spread_feature_idx int  Index of BA_SPREAD.
+        @param macro_col_indices list[int]  Indices of macro features
+            within a ticker's feature block (after broadcast).
+        @param alpha float  Weight for price-action S/R penalty.
+        @param beta float  Weight for psychological-level penalty.
+        @param gamma float  Weight for macro-regime penalty.
+        @param delta float  Weight for SEC-fundamental penalty.
+        @param psych_thresholds list[float]  Return-space thresholds.
+        @param psych_sigma float  Gaussian kernel bandwidth.
+        @param ema_span int  EMA lookback for return smoothing.
+        @param sr_use_multi_timeframe bool  Enable multi-TF S/R hierarchy.
+        @param sr_lookback_windows list[int]  Lookback periods in days.
+        @param sr_pivot_threshold float  Soft activation threshold for S/R.
+        @param sr_importance_hidden int  Hidden size for importance MLP.
+        @param use_macro_override bool  Enable adaptive macro override gate.
+        @param macro_override_hidden int  Hidden size for gate MLP.
+        @param sector_ids list[int]  Integer sector ID per ticker (len N).
+        @param ticker_macro_sensitivity Tensor  (N,) macro sensitivity per ticker.
+        @param corr_matrix Tensor  (N, N) return correlation matrix.
+        """
+        super().__init__()
+        self.N = num_tickers
+        self.F = num_features_per_ticker
+        self.ret_idx = ret_feature_idx
+        self.turn_idx = turnover_feature_idx
+        self.illiq_idx = illiq_feature_idx
+        self.spread_idx = ba_spread_feature_idx
+        self.macro_idx = macro_col_indices or []
+
+        self.alpha = alpha
+        self.beta = beta
+        self.gamma = gamma
+        self.delta = delta
+        self.psych_sigma = psych_sigma
+        self.ema_span = ema_span
+
+        if psych_thresholds is None:
+            psych_thresholds = [
+                0.0, 0.01, -0.01, 0.02, -0.02,
+                0.05, -0.05, 0.10, -0.10, 0.20, -0.20,
+            ]
+        self.register_buffer(
+            "psych_thresholds",
+            torch.tensor(psych_thresholds, dtype=torch.float32),
+        )
+
+        # Learnable regime weights (one per macro feature)
+        if self.macro_idx:
+            self.regime_weights = nn.Parameter(
+                torch.zeros(len(self.macro_idx))
+            )
+        else:
+            self.regime_weights = None
+
+        # EMA decay factor
+        ema_alpha = 2.0 / (ema_span + 1)
+        self.register_buffer(
+            "ema_alpha", torch.tensor(ema_alpha, dtype=torch.float32)
+        )
+
+        # ---- Multi-timeframe S/R hierarchy ----
+        self.use_multi_tf = sr_use_multi_timeframe
+        self.sr_pivot_threshold = sr_pivot_threshold
+        if self.use_multi_tf:
+            windows = sr_lookback_windows or [5, 10, 21, 42, 63, 105]
+            self.sr_windows = sorted(windows)
+            max_w = float(max(self.sr_windows))
+            self.register_buffer(
+                "sr_lookback_normed",
+                torch.tensor([w / max_w for w in self.sr_windows], dtype=torch.float32),
+            )
+            self.importance_fn = TimeframeImportanceFn(sr_importance_hidden)
+        else:
+            self.sr_windows = []
+            self.importance_fn = None
+
+        # ---- Macro override gate ----
+        self.use_macro_override = use_macro_override
+        if use_macro_override and self.macro_idx:
+            self.macro_override = MacroOverrideGate(
+                len(self.macro_idx), macro_override_hidden
+            )
+        else:
+            self.macro_override = None
+
+        # ---- Sector-aware penalty scaling ----
+        if sector_ids is not None:
+            self.register_buffer(
+                "sector_ids",
+                torch.tensor(sector_ids, dtype=torch.long),
+            )
+            self.num_sectors = len(set(sector_ids))
+        else:
+            self.sector_ids = None
+            self.num_sectors = 0
+
+        # ---- Ticker macro sensitivity ----
+        if ticker_macro_sensitivity is not None:
+            self.register_buffer("ticker_macro_sensitivity", ticker_macro_sensitivity)
+        else:
+            self.ticker_macro_sensitivity = None
+
+        # ---- Correlation guard ----
+        if corr_matrix is not None:
+            self.register_buffer("corr_matrix", corr_matrix)
+        else:
+            self.corr_matrix = None
+
+    # -- helpers -------------------------------------------------------------
+
+    def _extract_ticker_feature(
+        self, features: torch.Tensor, feat_idx: int,
+    ) -> torch.Tensor:
+        """
+        From flat (B, T, N*F) extract one feature for all tickers -> (B, T, N).
+        """
+        B, T, _ = features.shape
+        indices = [
+            ticker_i * self.F + feat_idx for ticker_i in range(self.N)
+        ]
+        idx_tensor = torch.tensor(
+            indices, device=features.device, dtype=torch.long
+        )
+        return features[:, :, idx_tensor]  # (B, T, N)
+
+    def _ema_smooth(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Exponential moving average along dim=1 (time).
+        x: (B, T, N)
+        """
+        B, T, N = x.shape
+        ema = torch.zeros(B, N, device=x.device, dtype=x.dtype)
+        outputs = []
+        for t in range(T):
+            ema = self.ema_alpha * x[:, t, :] + (1 - self.ema_alpha) * ema
+            outputs.append(ema)
+        return torch.stack(outputs, dim=1)  # (B, T, N)
+
+    @staticmethod
+    def _z_score(x: torch.Tensor, dim: int = 1, eps: float = 1e-8):
+        """Z-score along *dim*."""
+        mu = x.mean(dim=dim, keepdim=True)
+        sigma = x.std(dim=dim, keepdim=True) + eps
+        return (x - mu) / sigma
+
+    def _compute_delta_macro(
+        self, features: torch.Tensor,
+    ) -> Optional[torch.Tensor]:
+        """
+        Shared helper: macro rate-of-change over the input window.
+
+        @return (B, M) or None if no macro features.
+        """
+        if not self.macro_idx:
+            return None
+        macro_list = []
+        for idx in self.macro_idx:
+            mf = self._extract_ticker_feature(features, idx)  # (B, T, N)
+            macro_list.append(mf.mean(dim=-1))  # average across tickers -> (B, T)
+        macro_stack = torch.stack(macro_list, dim=-1)  # (B, T, M)
+        return macro_stack[:, -1, :] - macro_stack[:, 0, :]  # (B, M)
+
+    def _detect_pivots(
+        self, prices: torch.Tensor,
+    ) -> torch.Tensor:
+        """
+        Detect support/resistance levels at multiple lookback windows.
+
+        @param prices (B, T, N) cumulative price proxy
+        @return (B, W, N)  normalised position in [-1, +1] per window.
+            +1 = at window high (resistance), -1 = at window low (support).
+        """
+        B, T, N = prices.shape
+        current = prices[:, -1, :]  # (B, N)
+        scores = []
+        for w in self.sr_windows:
+            w_clamped = min(w, T)
+            window_slice = prices[:, -w_clamped:, :]         # (B, w', N)
+            w_high = window_slice.max(dim=1).values           # (B, N)
+            w_low = window_slice.min(dim=1).values            # (B, N)
+            w_range = (w_high - w_low).clamp(min=1e-8)
+            midpoint = (w_high + w_low) / 2.0
+            score = ((current - midpoint) / (w_range / 2.0)).clamp(-1.0, 1.0)
+            scores.append(score)
+        return torch.stack(scores, dim=1)  # (B, W, N)
+
+    def _sector_avg_confidence(
+        self, confidence: torch.Tensor,
+    ) -> torch.Tensor:
+        """
+        Compute per-sector average confidence and map back to per-ticker.
+
+        @param confidence (B, N)
+        @return (B, N) sector-average confidence for each ticker's sector.
+        """
+        B, N = confidence.shape
+        sector_avg = torch.zeros(B, self.num_sectors, device=confidence.device)
+        sector_count = torch.zeros(self.num_sectors, device=confidence.device)
+        for s in range(self.num_sectors):
+            mask = (self.sector_ids == s)  # (N,)
+            if mask.any():
+                sector_avg[:, s] = confidence[:, mask].mean(dim=-1)
+                sector_count[s] = mask.float().sum()
+        # Map sector averages back to each ticker
+        return sector_avg[:, self.sector_ids]  # (B, N)
+
+    # -- sub-losses ----------------------------------------------------------
+
+    def _price_action_penalty(
+        self,
+        weights: torch.Tensor,
+        features: torch.Tensor,
+        returns: torch.Tensor,
+    ) -> torch.Tensor:
+        """
+        Detect S/R conditions from microstructure features and penalise
+        portfolio weights that ignore these signals.
+
+        When multi-timeframe is enabled, rolling-window pivot detection
+        augments the microstructure confidence with S/R level strength
+        weighted by a learnable importance function.
+        """
+        turnover = self._extract_ticker_feature(features, self.turn_idx)
+        illiq = self._extract_ticker_feature(features, self.illiq_idx)
+        spread = self._extract_ticker_feature(features, self.spread_idx)
+        rets = self._extract_ticker_feature(features, self.ret_idx)
+
+        turn_z = self._z_score(turnover)[:, -1, :]     # (B, N)
+        illiq_z = self._z_score(illiq)[:, -1, :]
+        spread_z = self._z_score(spread)[:, -1, :]
+
+        smoothed = self._ema_smooth(rets)
+
+        confidence = torch.sigmoid(turn_z + illiq_z + spread_z)
+        direction = torch.tanh(smoothed[:, -1, :])
+
+        # ---- Multi-timeframe S/R hierarchy ----
+        sr_active = None
+        if self.use_multi_tf and self.importance_fn is not None:
+            cum_prices = (1 + rets).cumprod(dim=1)                  # (B, T, N)
+            pivot_scores = self._detect_pivots(cum_prices)          # (B, W, N)
+            importance = self.importance_fn(self.sr_lookback_normed) # (W,)
+            imp_sum = importance.sum().clamp(min=1e-8)
+
+            # Importance-weighted aggregation
+            imp = importance[None, :, None]                         # (1, W, 1)
+            weighted_strength = (pivot_scores.abs() * imp).sum(dim=1) / imp_sum  # (B, N)
+            weighted_direction = (pivot_scores * imp).sum(dim=1) / imp_sum       # (B, N)
+
+            # Soft gate: activates when aggregate S/R strength exceeds threshold
+            sr_active = torch.sigmoid(
+                (weighted_strength - self.sr_pivot_threshold) / 0.01
+            )  # (B, N)
+
+            # Multiplicative boost to microstructure confidence
+            confidence = confidence * (1.0 + sr_active)
+            # Blend EMA direction with S/R direction
+            sr_dir = torch.tanh(weighted_direction)
+            direction = (1.0 - sr_active) * direction + sr_active * sr_dir
+
+        # ---- Sector-aware scaling ----
+        if self.sector_ids is not None:
+            sec_avg = self._sector_avg_confidence(confidence)
+            confidence = confidence * (1.0 + sec_avg)
+
+        equal_w = 1.0 / self.N
+        weight_diff = weights - equal_w
+
+        penalty = -(confidence * direction * weight_diff).mean()
+
+        # ---- Correlation guard (only when multi-TF active) ----
+        if (
+            self.corr_matrix is not None
+            and sr_active is not None
+        ):
+            w_col = weights.unsqueeze(-1)                          # (B, N, 1)
+            port_corr = (
+                w_col * self.corr_matrix.unsqueeze(0) * w_col.transpose(-1, -2)
+            ).sum(dim=(-1, -2))                                    # (B,)
+            sr_corr_penalty = (sr_active.mean(dim=-1) * port_corr).mean()
+            penalty = penalty + sr_corr_penalty
+
+        return penalty
+
+    def _psychological_penalty(
+        self,
+        weights: torch.Tensor,
+        features: torch.Tensor,
+    ) -> torch.Tensor:
+        """
+        Penalise when cumulative returns sit near psychological thresholds
+        and the model ignores this.
+        """
+        rets = self._extract_ticker_feature(features, self.ret_idx)  # (B, T, N)
+
+        # Use cumulative sum (not product) — features are RobustScaler-normalized,
+        # so (1+rets).prod() diverges. Sum over the window is well-behaved.
+        cum_ret = rets.sum(dim=1)  # (B, N)
+
+        # Distance to nearest threshold: (B, N, K)
+        thresholds = self.psych_thresholds  # (K,)
+        diffs = cum_ret.unsqueeze(-1) - thresholds.unsqueeze(0).unsqueeze(0)
+        abs_diffs = diffs.abs()
+        min_dist, nearest_idx = abs_diffs.min(dim=-1)  # (B, N)
+
+        proximity = torch.exp(-min_dist.pow(2) / (2 * self.psych_sigma ** 2))
+
+        nearest_thresh = thresholds[nearest_idx]  # (B, N)
+        direction = torch.sign(cum_ret - nearest_thresh)
+        direction = torch.where(direction == 0, torch.ones_like(direction), direction)
+
+        equal_w = 1.0 / self.N
+        weight_diff = weights - equal_w
+
+        return -(proximity * direction * weight_diff).mean()
+
+    def _macro_regime_penalty(
+        self,
+        weights: torch.Tensor,
+        returns: torch.Tensor,
+        features: torch.Tensor,
+        delta_macro: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        """
+        Compute risk-on/risk-off regime from macro features and penalise
+        portfolio returns that are misaligned with the regime direction.
+
+        @param delta_macro (B, M) pre-computed macro delta (avoids recomputing).
+        """
+        if self.regime_weights is None or not self.macro_idx:
+            return torch.tensor(0.0, device=weights.device)
+
+        if delta_macro is None:
+            delta_macro = self._compute_delta_macro(features)
+
+        regime = torch.tanh(
+            (delta_macro * self.regime_weights.unsqueeze(0)).sum(dim=-1)
+        )  # (B,)
+
+        port_ret = (weights.unsqueeze(1) * returns).sum(dim=-1)  # (B, T_out)
+
+        # ---- Ticker macro sensitivity ----
+        if self.ticker_macro_sensitivity is not None:
+            # Weight individual returns by macro sensitivity before portfolio sum
+            sens = self.ticker_macro_sensitivity.unsqueeze(0)  # (1, N)
+            weighted_port = (weights * sens).sum(dim=-1)       # (B,)
+            # Combine: portfolio return scaled by sensitivity-weighted exposure
+            mean_port_ret = (weighted_port * port_ret.mean(dim=1))
+        else:
+            mean_port_ret = port_ret.mean(dim=1)  # (B,)
+
+        return -(regime * mean_port_ret).mean()
+
+    def _fundamental_penalty(
+        self,
+        weights: torch.Tensor,
+        fundamentals: torch.Tensor,
+    ) -> torch.Tensor:
+        """
+        Penalise allocations misaligned with fundamental quality scores.
+
+        @param fundamentals torch.Tensor (B, N) composite z-scored scores
+            (positive = improving, negative = deteriorating).
+        """
+        # Skip if fundamentals have no cross-sectional variance (e.g. broken SEC data)
+        if fundamentals.var(dim=-1).mean() < 1e-4:
+            return torch.tensor(0.0, device=weights.device)
+
+        equal_w = 1.0 / self.N
+        weight_diff = weights - equal_w
+        return -(fundamentals * weight_diff).mean()
+
+    # -- forward -------------------------------------------------------------
+
+    def forward(
+        self,
+        weights: torch.Tensor,
+        returns: torch.Tensor,
+        features: Optional[torch.Tensor] = None,
+        fundamentals: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        """
+        @param weights (B, N) portfolio allocation weights
+        @param returns (B, T_out, N) future returns
+        @param features (B, T_in, N*F) raw input window (from xb)
+        @param fundamentals (B, N) per-ticker composite fundamental scores
+        """
+        loss = differentiable_sharpe_loss(weights, returns)
+
+        if features is not None:
+            # Pre-compute macro delta (shared by override gate and regime penalty)
+            delta_macro = self._compute_delta_macro(features)
+
+            # ---- Adaptive macro override gate ----
+            if self.macro_override is not None and delta_macro is not None:
+                omega = self.macro_override(delta_macro).mean()  # scalar
+            else:
+                omega = torch.tensor(0.0, device=weights.device)
+
+            # Scale penalty weights: macro boosted, others reduced
+            alpha_eff = self.alpha * (1.0 - omega)
+            beta_eff = self.beta * (1.0 - omega)
+            gamma_eff = self.gamma * (1.0 + omega)
+            delta_eff = self.delta * (1.0 - omega)
+
+            loss = loss + alpha_eff * self._price_action_penalty(
+                weights, features, returns
+            )
+            loss = loss + beta_eff * self._psychological_penalty(
+                weights, features
+            )
+            loss = loss + gamma_eff * self._macro_regime_penalty(
+                weights, returns, features, delta_macro
+            )
+
+            if fundamentals is not None:
+                loss = loss + delta_eff * self._fundamental_penalty(
+                    weights, fundamentals
+                )
+        elif fundamentals is not None:
+            # No features but fundamentals provided
+            loss = loss + self.delta * self._fundamental_penalty(
+                weights, fundamentals
+            )
+
+        return loss
diff --git a/financial_loss_functions/src/training/pipeline.py b/financial_loss_functions/src/training/pipeline.py
index 8f60ba96..c2a69f21 100644
--- a/financial_loss_functions/src/training/pipeline.py
+++ b/financial_loss_functions/src/training/pipeline.py
@@ -1,11 +1,16 @@
+import numpy as np
+import pandas as pd
+import torch
 from torch import optim
-from typing import Dict
+from typing import Dict, List, Optional
 from pathlib import Path
 from src.utils import create_directory
 from src.data_processing.dataset import Reshaper
 from src.data_processing.dataset import WindowDataset
 from src.data_processing.loading import load_csv_files
 from src.models.lstm import BaseLSTM, AttentionLSTM
+from src.models.DeformTime.DeformTime import DeformTime
+from src.models.cvar_benchmark import CVaRBenchmark, CVaRParams
 from src.training.train import (
     Trainer,
     train_val_losses_plot,
@@ -14,20 +19,256 @@
 from src.training.loss_functions import (
     raw_sharpe_loss,
     raw_sortino_loss,
-    differentiable_sharpe_loss
+    differentiable_sharpe_loss,
+    CompositeSRLoss,
 )
+from src.evaluation.pyfolio_viz import (
+    weights_to_pyfolio,
+    build_window_dates,
+    generate_comparison_tearsheets,
+    comparison_summary,
+)
+
+# Registry of available model keys (used by CLI --models flag)
+ALL_MODELS = {
+    'baseLSTM_sharpe': 'BaseLSTM (Sharpe)',
+    'baseLSTM_composite': 'BaseLSTM (Composite)',
+    'attentionLSTM_composite': 'AttentionLSTM (Composite)',
+    'deformTime_composite': 'DeformTime (Composite)',
+    'cvar': 'CVaR Benchmark',
+}
+
+
+def _load_sector_ids(
+    paths_config: Dict, tickers: list,
+) -> Optional[list]:
+    """
+    Load sector assignments and return integer sector IDs aligned to *tickers*.
+    Returns None if the file is missing.
+    """
+    feature_sel_dir = Path(paths_config.get("data", {}).get(
+        "feature_selection_dir", "data/feature_selection"
+    ))
+    sector_path = feature_sel_dir / "sector_assignment_50.csv"
+    if not sector_path.exists():
+        return None
+
+    df = pd.read_csv(sector_path)
+    if "ticker" not in df.columns or "sector" not in df.columns:
+        return None
+
+    sector_map = dict(zip(df["ticker"], df["sector"]))
+    unique_sectors = sorted(set(sector_map.values()))
+    sector_to_id = {s: i for i, s in enumerate(unique_sectors)}
+
+    ids = []
+    for t in tickers:
+        sector = sector_map.get(t)
+        if sector is not None:
+            ids.append(sector_to_id[sector])
+        else:
+            ids.append(0)
+    return ids
+
+
+def _load_macro_sensitivity(
+    paths_config: Dict, tickers: list,
+) -> Optional[torch.Tensor]:
+    """
+    Compute per-ticker macro sensitivity from ticker_macro_rankings.csv.
+    Returns a (N,) tensor normalised to [0, 1], or None if unavailable.
+    """
+    feature_sel_dir = Path(paths_config.get("data", {}).get(
+        "feature_selection_dir", "data/feature_selection"
+    ))
+    rank_path = feature_sel_dir / "ticker_macro_rankings.csv"
+    if not rank_path.exists():
+        return None
+
+    df = pd.read_csv(rank_path)
+    if "ticker" not in df.columns or "composite_score" not in df.columns:
+        return None
+
+    # Average composite score across each ticker's top-ranked features
+    avg_scores = df.groupby("ticker")["composite_score"].mean()
+    scores = []
+    for t in tickers:
+        scores.append(avg_scores.get(t, 0.0))
+    arr = np.array(scores, dtype=np.float32)
+    # Normalise to [0, 1]
+    rng = arr.max() - arr.min()
+    if rng > 0:
+        arr = (arr - arr.min()) / rng
+    return torch.tensor(arr, dtype=torch.float32)
+
+
+def _compute_corr_matrix(
+    returns_df: pd.DataFrame, tickers: list,
+) -> Optional[torch.Tensor]:
+    """
+    Compute a static (N, N) return correlation matrix from training data.
+    Returns None if the data is insufficient.
+    """
+    available = [t for t in tickers if t in returns_df.columns]
+    if len(available) < 2:
+        return None
+    corr = returns_df[available].corr().values.astype(np.float32)
+    # Reindex to match full ticker list (fill missing with identity)
+    N = len(tickers)
+    full_corr = np.eye(N, dtype=np.float32)
+    avail_idx = {t: i for i, t in enumerate(tickers) if t in available}
+    for i, ti in enumerate(available):
+        for j, tj in enumerate(available):
+            full_corr[avail_idx[ti], avail_idx[tj]] = corr[i, j]
+    return torch.tensor(full_corr, dtype=torch.float32)
+
+
+def _build_composite_loss(
+    reshaper: Reshaper,
+    hparams_config: Dict,
+    paths_config: Optional[Dict] = None,
+    returns_train_df: Optional[pd.DataFrame] = None,
+) -> CompositeSRLoss:
+    """Instantiate CompositeSRLoss using feature indices from the Reshaper."""
+    features = reshaper.get_features()
+    tickers = reshaper.get_tickers()
+
+    feat_index = {f: i for i, f in enumerate(features)}
+    ret_idx = feat_index.get("RET", 0)
+    turn_idx = feat_index.get("TURNOVER", 1)
+    illiq_idx = feat_index.get("ILLIQUIDITY", 2)
+    spread_idx = feat_index.get("BA_SPREAD", 3)
+
+    known_stock_features = {"RET", "TURNOVER", "ILLIQUIDITY", "BA_SPREAD", "VOL_CHANGE", "sprtrn"}
+    macro_indices = [
+        feat_index[f] for f in features
+        if f not in known_stock_features
+    ]
+
+    loss_cfg = hparams_config.get("CompositeSRLoss", {})
+
+    # ---- Data-driven extras (all optional) ----
+    sector_ids = None
+    ticker_macro_sensitivity = None
+    corr_matrix = None
+
+    if paths_config is not None:
+        sector_ids = _load_sector_ids(paths_config, tickers)
+        ticker_macro_sensitivity = _load_macro_sensitivity(paths_config, tickers)
+
+    if returns_train_df is not None:
+        corr_matrix = _compute_corr_matrix(returns_train_df, tickers)
+
+    return CompositeSRLoss(
+        num_tickers=len(tickers),
+        num_features_per_ticker=len(features),
+        ret_feature_idx=ret_idx,
+        turnover_feature_idx=turn_idx,
+        illiq_feature_idx=illiq_idx,
+        ba_spread_feature_idx=spread_idx,
+        macro_col_indices=macro_indices if macro_indices else None,
+        alpha=loss_cfg.get("alpha", 0.10),
+        beta=loss_cfg.get("beta", 0.05),
+        gamma=loss_cfg.get("gamma", 0.10),
+        delta=loss_cfg.get("delta", 0.10),
+        psych_thresholds=loss_cfg.get("psych_thresholds"),
+        psych_sigma=loss_cfg.get("psych_sigma", 0.01),
+        ema_span=loss_cfg.get("ema_span", 10),
+        sr_use_multi_timeframe=loss_cfg.get("sr_use_multi_timeframe", False),
+        sr_lookback_windows=loss_cfg.get("sr_lookback_windows"),
+        sr_pivot_threshold=loss_cfg.get("sr_pivot_threshold", 0.02),
+        sr_importance_hidden=loss_cfg.get("sr_importance_hidden", 8),
+        use_macro_override=loss_cfg.get("use_macro_override", False),
+        macro_override_hidden=loss_cfg.get("macro_override_hidden", 8),
+        sector_ids=sector_ids,
+        ticker_macro_sensitivity=ticker_macro_sensitivity,
+        corr_matrix=corr_matrix,
+    )
+
+
+def _extract_returns_from_X(
+    X: np.ndarray, reshaper: Reshaper,
+) -> np.ndarray:
+    """
+    Extract per-ticker returns from the flat (W, T, N*F) input array.
+    Returns (W, T, N).
+    """
+    features = reshaper.get_features()
+    tickers = reshaper.get_tickers()
+    F = len(features)
+    ret_idx = sorted(features).index("RET") if "RET" in features else 0
+
+    W, T, _ = X.shape
+    N = len(tickers)
+    out = np.zeros((W, T, N), dtype=X.dtype)
+    for j in range(N):
+        out[:, :, j] = X[:, :, j * F + ret_idx]
+    return out
+
+
+def _load_sec_fundamentals(
+    paths_config: Dict,
+    tickers: list,
+    good_starts_train: np.ndarray,
+    good_starts_val: np.ndarray,
+    in_size: int,
+    date_index_train: Optional[pd.DatetimeIndex] = None,
+    date_index_val: Optional[pd.DatetimeIndex] = None,
+):
+    """
+    Load pre-computed composite fundamental scores and window them.
+
+    Returns (fund_train, fund_val) as numpy arrays of shape (W, N),
+    or (None, None) if the file doesn't exist.
+    """
+    sec_dir = Path(paths_config.get("data", {}).get("sec_filings_dir", "data/raw/sec_filings"))
+    score_path = sec_dir / "composite_fundamental_scores.csv"
+    if not score_path.exists():
+        return None, None
+
+    scores = pd.read_csv(score_path, index_col=0, parse_dates=True)
+    available_tickers = [t for t in tickers if t in scores.columns]
+    if not available_tickers:
+        return None, None
+
+    scores = scores[available_tickers]
+
+    def _window_scores(starts, date_index):
+        if date_index is None:
+            return None
+        aligned = scores.reindex(date_index).ffill().bfill().fillna(0.0)
+        windows = []
+        for s in starts:
+            idx = s + in_size - 1
+            if idx < len(aligned):
+                windows.append(aligned.iloc[idx].values)
+            else:
+                windows.append(aligned.iloc[-1].values)
+        return np.array(windows) if windows else None
+
+    fund_train = _window_scores(good_starts_train, date_index_train)
+    fund_val = _window_scores(good_starts_val, date_index_val)
+    return fund_train, fund_val
 
-def run_training_pipeline(paths_config: Dict, hparams_config: Dict):
+
+def run_training_pipeline(
+    paths_config: Dict,
+    hparams_config: Dict,
+    models: Optional[List[str]] = None,
+):
     """
-    All models training pipeline entry point
+    All models training pipeline entry point.
 
     @param paths_config Dict Dictionary containing paths
-    @param features_config Dictionary containing hyperparameter information
+    @param hparams_config Dict Dictionary containing hyperparameter information
+    @param models Optional list of model keys to train (see ALL_MODELS). None = all.
     """
+    enabled = set(models) if models else set(ALL_MODELS.keys())
     print('\n', '=' * 20, ' Training Pipeline ', '=' * 20)
     
     # Create plots directory if it doesnt exist
-    create_directory(Path(paths_config['artifacts']['plots']))
+    plots_dir = Path(paths_config['artifacts']['plots'])
+    create_directory(plots_dir)
     
     # -------------------- Loading Processed Data -------------------- #
     processed_files = {
@@ -38,124 +279,257 @@ def run_training_pipeline(paths_config: Dict, hparams_config: Dict):
     }
 
     processed_dfs = load_csv_files(processed_files)
-    train_data = processed_dfs['processed_train']
-    returns_train = processed_dfs['returns_train']
+    train_data = processed_dfs['processed_train'].fillna(0.0)
+    returns_train = processed_dfs['returns_train'].fillna(0.0)
 
-    val_data = processed_dfs['processed_val']
-    returns_val = processed_dfs['returns_val']
+    val_data = processed_dfs['processed_val'].fillna(0.0)
+    returns_val = processed_dfs['returns_val'].fillna(0.0)
 
     print('Train shape:', train_data.shape)
     print('Val shape:', val_data.shape)
 
     # -------------------- Preprocessing (Reshaping) -------------------- #
+    in_size = hparams_config['rolling_windows']['in_size']
+    out_size = hparams_config['rolling_windows']['out_size']
+
     reshaper = Reshaper(
-        hparams_config['rolling_windows']['in_size'],
-        hparams_config['rolling_windows']['out_size'],
+        in_size,
+        out_size,
         hparams_config['rolling_windows']['stride']
     )
     reshaper.extract_features(train_data)
     
-    X_train, y_train, _ = reshaper.reshape(train_data, returns_train)
+    X_train, y_train, good_starts_train = reshaper.reshape(train_data, returns_train)
     print('-'*10, ' train shapes ', '-'*10)
-    print('X_train shpe:', X_train.shape)
+    print('X_train shape:', X_train.shape)
     print('y_train shape:', y_train.shape)
 
-
-    X_val, y_val, _ = reshaper.reshape(val_data, returns_val)
+    X_val, y_val, good_starts_val = reshaper.reshape(val_data, returns_val)
     print('-'*10, ' val shapes ', '-'*10)
-    print('X_val shape', X_val.shape)
+    print('X_val shape:', X_val.shape)
     print('y_val shape:', y_val.shape)
 
+    # -------------------- SEC Fundamentals (optional) -------------------- #
+    tickers = reshaper.get_tickers()
+    fund_train, fund_val = _load_sec_fundamentals(
+        paths_config, tickers,
+        good_starts_train, good_starts_val, in_size,
+        date_index_train=train_data.index if hasattr(train_data, 'index') else None,
+        date_index_val=val_data.index if hasattr(val_data, 'index') else None,
+    )
+    if fund_train is not None:
+        print(f'SEC fundamentals loaded: train={fund_train.shape}, val={fund_val.shape}')
+    else:
+        print('SEC fundamentals not available -- skipping L_fundamental')
+
+    # -------------------- Build Composite Loss -------------------- #
+    composite_loss = _build_composite_loss(
+        reshaper, hparams_config,
+        paths_config=paths_config,
+        returns_train_df=returns_train,
+    )
+
     # -------------------- Training Models -------------------- #
-    # Converting to pytorch tensors
     train_ds = WindowDataset(X_train, y_train)
     val_ds   = WindowDataset(X_val, y_val)
 
-    # Initializing once to compare all models together
     evaluator = Evaluator(y_val)
+    all_strategy_weights: Dict[str, np.ndarray] = {}
 
-    #### BaseLSTM ####
-    model1_name = 'BaseLSTM'
-    print('\n', '-'*10, f' Training {model1_name} ', '-'*10)
-    try:
-        trainer = Trainer(
-            model=BaseLSTM,
-            optimizer=optim.AdamW,
-            loss=differentiable_sharpe_loss,
-            model_hparams=hparams_config[model1_name]['model'],
-            optimizer_hparams=hparams_config[model1_name]['optimizer'],
-            train_hparams=hparams_config[model1_name]['train'],
-            in_size=X_train.shape[2],
-            num_stocks=y_train.shape[2]
-        )
-
-        trainer.train(train_ds)
-        trainer.evaluate(val_ds)
-
-        # Plot loss curves
-        train_val_losses_plot(
-            trainer.train_losses,
-            trainer.val_losses,
-            model1_name + ' Loss Curves',
-            Path(paths_config['artifacts']['plots']) /
-            (model1_name + ' Loss Curves' + '.png')
-        )
-
-        alloc_weights = trainer.get_val_alloc_weights()
-
-        # Call on every models output allocation weights to caluclated weighted returns
-        # Add daily returns for BaseLSTM generated weights
-        evaluator.calc_pf_daily_rets(alloc_weights, model1_name)
-    
-    except Exception as error:
-        print(f'DEBUG: Error while training {model1_name}. Skipping.', error)
-    
+    # ---- BaseLSTM (Sharpe-only, baseline) ----
+    if 'baseLSTM_sharpe' in enabled:
+        model1_name = 'BaseLSTM'
+        print('\n', '-'*10, f' Training {model1_name} (Sharpe) ', '-'*10)
+        try:
+            trainer = Trainer(
+                model=BaseLSTM,
+                optimizer=optim.AdamW,
+                loss=differentiable_sharpe_loss,
+                model_hparams=hparams_config[model1_name]['model'],
+                optimizer_hparams=hparams_config[model1_name]['optimizer'],
+                train_hparams=hparams_config[model1_name]['train'],
+                in_size=X_train.shape[2],
+                num_stocks=y_train.shape[2]
+            )
+            trainer.train(train_ds, val_ds)
+            trainer.evaluate(val_ds)
 
-    #### Attention LSTM ####
-    model2_name = 'AttentionLSTM'
-    print('\n', '-'*10, f' Training {model2_name} ', '-'*10)
-    try:
-        trainer = Trainer(
-            model=AttentionLSTM,
-            optimizer=optim.AdamW,
-            loss=differentiable_sharpe_loss,
-            model_hparams=hparams_config[model2_name]['model'],
-            optimizer_hparams=hparams_config[model2_name]['optimizer'],
-            train_hparams=hparams_config[model2_name]['train'],
-            in_size=X_train.shape[2],
-            num_stocks=y_train.shape[2]
-        )
-
-        trainer.train(train_ds)
-        trainer.evaluate(val_ds)
-
-        # Plot loss curves
-        train_val_losses_plot(
-            trainer.train_losses,
-            trainer.val_losses,
-            model2_name + ' Loss Curves',
-            Path(paths_config['artifacts']['plots']) /
-            (model2_name + ' Loss Curves' + '.png')
-        )
-
-        alloc_weights = trainer.get_val_alloc_weights()
-
-        # Add daily returns for AttentionLSTM generated weights
-        evaluator.calc_pf_daily_rets(alloc_weights, model2_name)
-    except Exception as error:
-        print(f'DEBUG: Error while training {model2_name}. Skipping.', error)
-    
-    # Evaluation/Comparison starts here
+            train_val_losses_plot(
+                trainer.train_losses, trainer.val_losses,
+                model1_name + ' (Sharpe) Loss Curves',
+                plots_dir / (model1_name + '_Sharpe_Loss.png'),
+            )
+            alloc_weights = trainer.get_val_alloc_weights()
+            evaluator.calc_pf_daily_rets(alloc_weights, model1_name + ' (Sharpe)')
+            all_strategy_weights[model1_name + ' (Sharpe)'] = alloc_weights
+        except Exception as error:
+            print(f'DEBUG: Error training {model1_name} (Sharpe). Skipping.', error)
+
+    # ---- BaseLSTM (Composite S/R Loss) ----
+    if 'baseLSTM_composite' in enabled:
+        model1c_name = 'BaseLSTM (Composite)'
+        print('\n', '-'*10, f' Training {model1c_name} ', '-'*10)
+        try:
+            trainer = Trainer(
+                model=BaseLSTM,
+                optimizer=optim.AdamW,
+                loss=composite_loss,
+                model_hparams=hparams_config['BaseLSTM']['model'],
+                optimizer_hparams=hparams_config['BaseLSTM']['optimizer'],
+                train_hparams=hparams_config['BaseLSTM']['train'],
+                in_size=X_train.shape[2],
+                num_stocks=y_train.shape[2],
+                fundamentals_train=fund_train,
+                fundamentals_val=fund_val,
+            )
+            trainer.train(train_ds, val_ds)
+            trainer.evaluate(val_ds)
+
+            train_val_losses_plot(
+                trainer.train_losses, trainer.val_losses,
+                model1c_name + ' Loss Curves',
+                plots_dir / 'BaseLSTM_Composite_Loss.png',
+            )
+            alloc_weights = trainer.get_val_alloc_weights()
+            evaluator.calc_pf_daily_rets(alloc_weights, model1c_name)
+            all_strategy_weights[model1c_name] = alloc_weights
+        except Exception as error:
+            print(f'DEBUG: Error training {model1c_name}. Skipping.', error)
+
+    # ---- AttentionLSTM (Composite S/R Loss) ----
+    if 'attentionLSTM_composite' in enabled:
+        model2_name = 'AttentionLSTM (Composite)'
+        print('\n', '-'*10, f' Training {model2_name} ', '-'*10)
+        try:
+            trainer = Trainer(
+                model=AttentionLSTM,
+                optimizer=optim.AdamW,
+                loss=composite_loss,
+                model_hparams=hparams_config['AttentionLSTM']['model'],
+                optimizer_hparams=hparams_config['AttentionLSTM']['optimizer'],
+                train_hparams=hparams_config['AttentionLSTM']['train'],
+                in_size=X_train.shape[2],
+                num_stocks=y_train.shape[2],
+                fundamentals_train=fund_train,
+                fundamentals_val=fund_val,
+            )
+            trainer.train(train_ds, val_ds)
+            trainer.evaluate(val_ds)
+
+            train_val_losses_plot(
+                trainer.train_losses, trainer.val_losses,
+                model2_name + ' Loss Curves',
+                plots_dir / 'AttentionLSTM_Composite_Loss.png',
+            )
+            alloc_weights = trainer.get_val_alloc_weights()
+            evaluator.calc_pf_daily_rets(alloc_weights, model2_name)
+            all_strategy_weights[model2_name] = alloc_weights
+        except Exception as error:
+            print(f'DEBUG: Error training {model2_name}. Skipping.', error)
+
+    # ---- DeformTime (Composite S/R Loss) ----
+    if 'deformTime_composite' in enabled:
+        model3_name = 'DeformTime (Composite)'
+        print('\n', '-'*10, f' Training {model3_name} ', '-'*10)
+        try:
+            trainer = Trainer(
+                model=DeformTime,
+                optimizer=optim.AdamW,
+                loss=composite_loss,
+                model_hparams=hparams_config['DeformTime']['model'],
+                optimizer_hparams=hparams_config['DeformTime']['optimizer'],
+                train_hparams=hparams_config['DeformTime']['train'],
+                in_size=X_train.shape[2],
+                num_stocks=y_train.shape[2],
+                fundamentals_train=fund_train,
+                fundamentals_val=fund_val,
+            )
+            trainer.train(train_ds, val_ds)
+            trainer.evaluate(val_ds)
+
+            train_val_losses_plot(
+                trainer.train_losses, trainer.val_losses,
+                model3_name + ' Loss Curves',
+                plots_dir / 'DeformTime_Composite_Loss.png',
+            )
+            alloc_weights = trainer.get_val_alloc_weights()
+            evaluator.calc_pf_daily_rets(alloc_weights, model3_name)
+            all_strategy_weights[model3_name] = alloc_weights
+        except Exception as error:
+            print(f'DEBUG: Error training {model3_name}. Skipping.', error)
+
+    # -------------------- CVaR Benchmark -------------------- #
+    if 'cvar' in enabled:
+        print('\n', '-'*10, ' CVaR Benchmark ', '-'*10)
+        try:
+            cvar_cfg = hparams_config.get('CVaRBenchmark', {})
+            cvar_params = CVaRParams(
+                confidence=cvar_cfg.get('confidence', 0.95),
+                risk_aversion=cvar_cfg.get('risk_aversion', 1.0),
+                w_min=cvar_cfg.get('w_min', 0.0),
+                w_max=cvar_cfg.get('w_max', 0.30),
+                L_tar=cvar_cfg.get('L_tar', 1.6),
+            )
+            cvar_bench = CVaRBenchmark(params=cvar_params)
+
+            X_val_rets = _extract_returns_from_X(X_val, reshaper)
+            cvar_weights = cvar_bench.rolling_optimize(X_val_rets)
+
+            evaluator.calc_pf_daily_rets(cvar_weights, 'CVaR Benchmark')
+            all_strategy_weights['CVaR Benchmark'] = cvar_weights
+            print(f'CVaR Benchmark weights shape: {cvar_weights.shape}')
+        except Exception as error:
+            print(f'DEBUG: CVaR Benchmark failed. Skipping.', error)
+
+    # -------------------- Equal Weight -------------------- #
     evaluator.calc_eq_wt_daily_rets()
-    
-    evaluator.plot_windowed_comparison(
-        Path(paths_config['artifacts']['plots']) /
-        (f'Daily Returns' + '.png')
-    )
+    eq_w = np.full((y_val.shape[0], y_val.shape[2]), 1.0 / y_val.shape[2])
+    all_strategy_weights['Equal Weight'] = eq_w
+
+    # -------------------- Windowed Comparison Plot -------------------- #
+    evaluator.plot_windowed_comparison(plots_dir / 'Daily_Returns.png')
 
     total_returns = evaluator.calc_total_performance('returns')
     total_sharpes = evaluator.calc_total_performance('sharpe')
 
-    print('\n', '-'*10, ' Portfolio Perfomance Metrics ', '-'*10)
+    print('\n', '-'*10, ' Portfolio Performance Metrics ', '-'*10)
     print('\n', 'Compounded returns for each window:\n', total_returns)
-    print('\n', 'Basic sharpe ratios for each window:\n', total_sharpes)
\ No newline at end of file
+    print('\n', 'Basic sharpe ratios for each window:\n', total_sharpes)
+
+    # -------------------- pyfolio Tearsheets -------------------- #
+    print('\n', '-'*10, ' Generating pyfolio Tearsheets ', '-'*10)
+    try:
+        val_dates = val_data.index if hasattr(val_data, 'index') else pd.RangeIndex(len(val_data))
+        window_dates = build_window_dates(val_dates, good_starts_val, in_size, out_size)
+
+        benchmark_col = 'sprtrn'
+        if benchmark_col in val_data.columns:
+            bench_rets = val_data[benchmark_col]
+        elif hasattr(val_data, 'index'):
+            bench_rets = pd.Series(0.0, index=val_data.index)
+        else:
+            bench_rets = pd.Series(0.0, index=pd.RangeIndex(len(val_data)))
+
+        strategies: Dict[str, dict] = {}
+        for name, w_arr in all_strategy_weights.items():
+            try:
+                pf_data = weights_to_pyfolio(
+                    weights=w_arr,
+                    returns=y_val,
+                    tickers=tickers,
+                    window_dates=window_dates,
+                    benchmark_returns=bench_rets,
+                )
+                strategies[name] = pf_data
+            except Exception as exc:
+                print(f'DEBUG: pyfolio conversion failed for {name}: {exc}')
+
+        if strategies:
+            pyfolio_dir = plots_dir / 'pyfolio'
+            generate_comparison_tearsheets(strategies, pyfolio_dir)
+            summary = comparison_summary(strategies)
+            print('\n', '-'*10, ' Strategy Comparison Summary ', '-'*10)
+            print(summary.to_string())
+            summary.to_csv(plots_dir / 'strategy_comparison.csv')
+    except Exception as error:
+        print(f'DEBUG: pyfolio tearsheets failed. Skipping.', error)
\ No newline at end of file
diff --git a/financial_loss_functions/src/training/train.py b/financial_loss_functions/src/training/train.py
index c4eb5b85..9beb2e81 100644
--- a/financial_loss_functions/src/training/train.py
+++ b/financial_loss_functions/src/training/train.py
@@ -4,7 +4,7 @@
 import pandas as pd
 import matplotlib.pyplot as plt
 from torch.utils.data import DataLoader
-from typing import List, Dict, Callable
+from typing import List, Dict, Callable, Optional
 from src.data_processing.dataset import WindowDataset
 
 if torch.mps.is_available():
@@ -31,7 +31,9 @@ def __init__(
         optimizer_hparams: Dict,  # Specific to optimizer
         train_hparams: Dict,      # Generic training params (epochs, batch_size, etc.)
         in_size: int,
-        num_stocks: int
+        num_stocks: int,
+        fundamentals_train: Optional[np.ndarray] = None,
+        fundamentals_val: Optional[np.ndarray] = None,
     ):
         """
         Initialize Trainer instance to train given model.
@@ -41,7 +43,7 @@ def __init__(
         @param optimizer torch.optim
             Pytorch optimization class to be used to loss optimization
         @param loss Callable
-            Custom loss function
+            Custom loss function (2-arg legacy or 4-arg CompositeSRLoss)
         @param model_hparams Dict
             Dictionary containing hyperparameters required for model initialization
         @param optimizer_hparams Dict
@@ -51,7 +53,11 @@ def __init__(
         @param in_size int
             Size of input window
         @param num_stocks int
-            Number of stocks, i.e, number of output nodes 
+            Number of stocks, i.e, number of output nodes
+        @param fundamentals_train np.ndarray (num_windows, N) optional
+            Pre-computed per-ticker fundamental scores for training windows
+        @param fundamentals_val np.ndarray (num_windows, N) optional
+            Pre-computed per-ticker fundamental scores for validation windows
         """
         self.device = DEVICE
         print('Model hyperparameters:\n', model_hparams)
@@ -66,13 +72,27 @@ def __init__(
         ).to(self.device)
         
         # Initialize optimizer with its specific hyperparameters
+        all_params = list(self.model.parameters())
+        if hasattr(loss, 'parameters'):
+            all_params += list(loss.parameters())
         self.optimizer = optimizer(
-            self.model.parameters(),
+            all_params,
             **optimizer_hparams
         )
+        if hasattr(loss, 'to'):
+            loss = loss.to(self.device)
         self.loss = loss
 
         self.train_hparams = train_hparams
+
+        self.fundamentals_train = (
+            torch.tensor(fundamentals_train, dtype=torch.float32)
+            if fundamentals_train is not None else None
+        )
+        self.fundamentals_val = (
+            torch.tensor(fundamentals_val, dtype=torch.float32)
+            if fundamentals_val is not None else None
+        )
         
         self.train_losses = []
         self.val_losses = []
@@ -81,13 +101,57 @@ def __init__(
 
         self.val_alloc_weights = []
     
-    def train(self, train_ds: WindowDataset):
+    def _call_loss(self, weights, yb, xb, sample_indices=None, fundamentals_source=None):
+        """
+        Invoke the loss function with the correct signature.
+
+        Legacy losses accept (weights, returns).
+        CompositeSRLoss accepts (weights, returns, features, fundamentals).
+        """
+        fund_batch = None
+        if fundamentals_source is not None and sample_indices is not None:
+            fund_batch = fundamentals_source[sample_indices].to(self.device)
+
+        try:
+            return self.loss(weights, yb, xb, fund_batch)
+        except TypeError:
+            return self.loss(weights, yb)
+
+    def _validate_epoch(self, val_loader: DataLoader) -> float:
+        """Run one validation pass and return average loss."""
+        self.model.eval()
+        total_loss, total_samples, sample_offset = 0.0, 0, 0
+        with torch.no_grad():
+            for xb, yb in val_loader:
+                b = xb.size(0)
+                xb, yb = xb.to(self.device), yb.to(self.device)
+
+                indices = None
+                if self.fundamentals_val is not None:
+                    indices = torch.arange(sample_offset, sample_offset + b)
+
+                weights = self.model(xb)
+                loss = self._call_loss(
+                    weights, yb, xb,
+                    sample_indices=indices,
+                    fundamentals_source=self.fundamentals_val,
+                )
+                total_loss += loss.item() * b
+                total_samples += b
+                sample_offset += b
+        return total_loss / total_samples
+
+    def train(self, train_ds: WindowDataset, val_ds: Optional[WindowDataset] = None):
         """
         Train inistalized model using a train data split.
 
         @param train_ds WindowDataset
             Training data split converted to windowed dataset tensors
+        @param val_ds Optional[WindowDataset]
+            Validation data split. If provided, enables early stopping and LR scheduling.
         """
+        import copy
+
         start_time = time.time()
         train_loader = DataLoader(
             train_ds,
@@ -95,18 +159,52 @@ def train(self, train_ds: WindowDataset):
             shuffle=False
         )
 
+        val_loader = None
+        if val_ds is not None:
+            val_loader = DataLoader(
+                val_ds,
+                batch_size=self.train_hparams['val_batch_size'],
+                shuffle=False
+            )
+
+        # Early stopping and LR scheduling setup
+        es_patience = self.train_hparams.get('early_stopping_patience', 15)
+        lr_patience = self.train_hparams.get('lr_scheduler_patience', 7)
+        lr_factor = self.train_hparams.get('lr_scheduler_factor', 0.5)
+
+        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
+            self.optimizer, mode='min', factor=lr_factor,
+            patience=lr_patience, min_lr=1e-6,
+        )
+
+        best_val_loss = float('inf')
+        best_model_state = None
+        patience_counter = 0
+
         for epoch in range(self.train_hparams['epochs']):
             epoch_start = time.time()
             self.model.train()
             total_loss_sum = 0.0
             total_samples = 0
+            sample_offset = 0
 
             for xb, yb in train_loader:
                 xb = xb.to(self.device)
                 yb = yb.to(self.device)
+                batch_size = xb.size(0)
+
+                indices = None
+                if self.fundamentals_train is not None:
+                    indices = torch.arange(
+                        sample_offset, sample_offset + batch_size
+                    )
 
                 weights = self.model(xb)  # (B, N)
-                loss = self.loss(weights, yb)
+                loss = self._call_loss(
+                    weights, yb, xb,
+                    sample_indices=indices,
+                    fundamentals_source=self.fundamentals_train,
+                )
 
                 self.optimizer.zero_grad()
                 loss.backward()
@@ -116,20 +214,41 @@ def train(self, train_ds: WindowDataset):
                 )
                 self.optimizer.step()
 
-                batch_size = xb.size(0)
-
                 total_loss_sum += loss.item() * batch_size
                 total_samples += batch_size
+                sample_offset += batch_size
 
-            epoch_end = time.time()
-            epoch_time = round(epoch_end - epoch_start, 3)
-            
             epoch_avg_loss = total_loss_sum / total_samples
             self.train_losses.append(epoch_avg_loss)
-            print(f'Epoch {epoch} | Train Loss: {epoch_avg_loss:.4f} | Took: {epoch_time}s')
-
             self.avg_train_loss = epoch_avg_loss
 
+            # Validation + early stopping
+            val_msg = ''
+            if val_loader is not None:
+                val_loss = self._validate_epoch(val_loader)
+                self.val_losses.append(val_loss)
+                scheduler.step(val_loss)
+                val_msg = f' | Val Loss: {val_loss:.4f} | LR: {self.optimizer.param_groups[0]["lr"]:.1e}'
+
+                if val_loss < best_val_loss:
+                    best_val_loss = val_loss
+                    best_model_state = copy.deepcopy(self.model.state_dict())
+                    patience_counter = 0
+                else:
+                    patience_counter += 1
+
+            epoch_time = round(time.time() - epoch_start, 3)
+            print(f'Epoch {epoch} | Train Loss: {epoch_avg_loss:.4f}{val_msg} | Took: {epoch_time}s')
+
+            if val_loader is not None and patience_counter >= es_patience:
+                print(f'Early stopping at epoch {epoch} (no improvement for {es_patience} epochs)')
+                break
+
+        # Restore best model
+        if best_model_state is not None:
+            self.model.load_state_dict(best_model_state)
+            print(f'Restored best model (val loss: {best_val_loss:.4f})')
+
         end_time = time.time()
         time_taken = round(end_time - start_time, 3)
         print(f'Average Train Loss: {self.avg_train_loss:.4f}, Time Taken: {time_taken}s')
@@ -153,12 +272,22 @@ def evaluate(self, val_ds: WindowDataset):
         with torch.no_grad():
             self.val_losses = []
             total_loss, total_samples = 0.0, 0
+            sample_offset = 0
 
             for xb, yb in val_loader:
                 b = xb.size(0)
                 xb, yb = xb.to(self.device), yb.to(self.device)
+
+                indices = None
+                if self.fundamentals_val is not None:
+                    indices = torch.arange(sample_offset, sample_offset + b)
+
                 weights = self.model(xb)
-                loss = self.loss(weights, yb)
+                loss = self._call_loss(
+                    weights, yb, xb,
+                    sample_indices=indices,
+                    fundamentals_source=self.fundamentals_val,
+                )
 
                 # detach & move to CPU BEFORE appending
                 self.val_alloc_weights.append(weights.detach().cpu()) 
@@ -169,6 +298,7 @@ def evaluate(self, val_ds: WindowDataset):
                 # --- accumulate weighted sum for overall avg ---
                 total_loss += loss.item() * b
                 total_samples += b
+                sample_offset += b
 
             # --- weighted average over all samples ---
             self.avg_val_loss = total_loss / total_samples
diff --git a/financial_loss_functions/tests/unit/test_composite_loss.py b/financial_loss_functions/tests/unit/test_composite_loss.py
new file mode 100644
index 00000000..99eabaa5
--- /dev/null
+++ b/financial_loss_functions/tests/unit/test_composite_loss.py
@@ -0,0 +1,417 @@
+"""
+Unit tests for CompositeSRLoss and backward-compatible loss wrappers.
+"""
+
+import torch
+import pytest
+import numpy as np
+
+from src.training.loss_functions import (
+    CompositeSRLoss,
+    TimeframeImportanceFn,
+    MacroOverrideGate,
+    differentiable_sharpe_loss,
+    sharpe_loss_compat,
+    sortino_loss_compat,
+)
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+B, T_IN, T_OUT, N, F = 4, 20, 10, 5, 6  # batch, time-in, time-out, tickers, features
+
+
+def _random_weights():
+    w = torch.softmax(torch.randn(B, N), dim=-1)
+    w.requires_grad_(True)
+    return w
+
+
+def _random_returns():
+    return torch.randn(B, T_OUT, N) * 0.02
+
+
+def _random_features():
+    return torch.randn(B, T_IN, N * F)
+
+
+def _random_fundamentals():
+    return torch.randn(B, N)
+
+
+def _make_loss(**kwargs):
+    defaults = dict(
+        num_tickers=N,
+        num_features_per_ticker=F,
+        ret_feature_idx=2,
+        turnover_feature_idx=3,
+        illiq_feature_idx=1,
+        ba_spread_feature_idx=0,
+        macro_col_indices=[4, 5],
+    )
+    defaults.update(kwargs)
+    return CompositeSRLoss(**defaults)
+
+
+# ---------------------------------------------------------------------------
+# Tests: CompositeSRLoss (original)
+# ---------------------------------------------------------------------------
+
+class TestCompositeSRLoss:
+
+    def test_forward_returns_scalar(self):
+        loss_fn = _make_loss()
+        w = _random_weights()
+        r = _random_returns()
+        f = _random_features()
+        fund = _random_fundamentals()
+
+        loss = loss_fn(w, r, f, fund)
+        assert loss.dim() == 0, "Loss should be a scalar"
+        assert torch.isfinite(loss), "Loss should be finite"
+
+    def test_gradients_flow_to_weights(self):
+        loss_fn = _make_loss()
+        w = _random_weights()
+        r = _random_returns()
+        f = _random_features()
+
+        loss = loss_fn(w, r, f)
+        loss.backward()
+
+        assert w.grad is not None, "Gradients must reach weights"
+        assert torch.isfinite(w.grad).all(), "Gradients must be finite"
+
+    def test_regime_weights_receive_gradients(self):
+        loss_fn = _make_loss()
+        w = _random_weights()
+        r = _random_returns()
+        f = _random_features()
+
+        loss = loss_fn(w, r, f)
+        loss.backward()
+
+        assert loss_fn.regime_weights.grad is not None
+        assert torch.isfinite(loss_fn.regime_weights.grad).all()
+
+    def test_reduces_to_sharpe_when_penalties_zero(self):
+        loss_fn = _make_loss(alpha=0.0, beta=0.0, gamma=0.0, delta=0.0)
+        w = _random_weights()
+        r = _random_returns()
+        f = _random_features()
+
+        composite = loss_fn(w, r, f)
+        sharpe_only = differentiable_sharpe_loss(w, r)
+
+        assert torch.allclose(composite, sharpe_only, atol=1e-6)
+
+    def test_no_features_still_works(self):
+        """When features=None, only the Sharpe component should fire."""
+        loss_fn = _make_loss()
+        w = _random_weights()
+        r = _random_returns()
+
+        loss = loss_fn(w, r)
+        assert torch.isfinite(loss)
+
+    def test_fundamentals_penalty_direction(self):
+        """Overweighting stocks with positive fundamentals should lower the loss."""
+        loss_fn = _make_loss(alpha=0, beta=0, gamma=0, delta=1.0)
+
+        fund = torch.ones(B, N)
+        w_overweight = torch.ones(B, N) * 0.5
+        w_equal = torch.ones(B, N) * (1.0 / N)
+
+        r = _random_returns()
+
+        loss_over = loss_fn(w_overweight, r, fundamentals=fund)
+        loss_eq = loss_fn(w_equal, r, fundamentals=fund)
+
+        assert loss_over.item() != loss_eq.item()
+
+    def test_no_macro_indices(self):
+        loss_fn = _make_loss(macro_col_indices=None)
+        w = _random_weights()
+        r = _random_returns()
+        f = _random_features()
+
+        loss = loss_fn(w, r, f)
+        assert torch.isfinite(loss)
+
+
+# ---------------------------------------------------------------------------
+# Tests: Backward-compatible wrappers
+# ---------------------------------------------------------------------------
+
+class TestCompatWrappers:
+
+    def test_sharpe_compat_ignores_extra_args(self):
+        w = _random_weights()
+        r = _random_returns()
+        f = _random_features()
+
+        loss = sharpe_loss_compat(w, r, f, _random_fundamentals())
+        expected = differentiable_sharpe_loss(w, r)
+        assert torch.allclose(loss, expected, atol=1e-7)
+
+    def test_sortino_compat_ignores_extra_args(self):
+        w = _random_weights()
+        r = _random_returns()
+
+        loss = sortino_loss_compat(w, r, _random_features())
+        assert torch.isfinite(loss)
+
+
+# ---------------------------------------------------------------------------
+# Tests: Multi-Timeframe S/R Hierarchy
+# ---------------------------------------------------------------------------
+
+class TestMultiTimeframeSR:
+
+    def test_backward_compat_flag_off(self):
+        """With sr_use_multi_timeframe=False, loss matches existing behavior."""
+        torch.manual_seed(42)
+        loss_fn_old = _make_loss(sr_use_multi_timeframe=False)
+
+        torch.manual_seed(0)
+        w = _random_weights()
+        r = _random_returns()
+        f = _random_features()
+
+        loss = loss_fn_old(w, r, f)
+        assert torch.isfinite(loss)
+        assert loss_fn_old.importance_fn is None
+
+    def test_multi_tf_returns_scalar(self):
+        loss_fn = _make_loss(sr_use_multi_timeframe=True)
+        w = _random_weights()
+        r = _random_returns()
+        f = _random_features()
+
+        loss = loss_fn(w, r, f)
+        assert loss.dim() == 0
+        assert torch.isfinite(loss)
+
+    def test_multi_tf_gradients_flow(self):
+        loss_fn = _make_loss(sr_use_multi_timeframe=True)
+        w = _random_weights()
+        r = _random_returns()
+        f = _random_features()
+
+        loss = loss_fn(w, r, f)
+        loss.backward()
+
+        assert w.grad is not None
+        assert torch.isfinite(w.grad).all()
+        # Importance MLP params should receive gradients
+        for p in loss_fn.importance_fn.parameters():
+            assert p.grad is not None, "Importance fn params must get gradients"
+
+    def test_importance_fn_monotonic_init(self):
+        """At init, importance should be non-decreasing for increasing lookbacks."""
+        fn = TimeframeImportanceFn(hidden=8)
+        x = torch.linspace(0.0, 1.0, steps=10)
+        with torch.no_grad():
+            y = fn(x)
+        diffs = y[1:] - y[:-1]
+        assert (diffs >= -1e-6).all(), "Importance should be non-decreasing at init"
+
+    def test_pivot_detection_at_high(self):
+        """Monotonically increasing prices → scores near +1."""
+        loss_fn = _make_loss(sr_use_multi_timeframe=True)
+        # Create strictly increasing prices
+        prices = torch.linspace(1.0, 2.0, steps=T_IN).unsqueeze(0).unsqueeze(-1)
+        prices = prices.expand(B, T_IN, N)
+        scores = loss_fn._detect_pivots(prices)  # (B, W, N)
+        assert (scores >= 0.9).all(), "Increasing prices should be near +1 (resistance)"
+
+    def test_pivot_detection_at_low(self):
+        """Monotonically decreasing prices → scores near -1."""
+        loss_fn = _make_loss(sr_use_multi_timeframe=True)
+        prices = torch.linspace(2.0, 1.0, steps=T_IN).unsqueeze(0).unsqueeze(-1)
+        prices = prices.expand(B, T_IN, N)
+        scores = loss_fn._detect_pivots(prices)
+        assert (scores <= -0.9).all(), "Decreasing prices should be near -1 (support)"
+
+    def test_custom_lookback_windows(self):
+        loss_fn = _make_loss(
+            sr_use_multi_timeframe=True,
+            sr_lookback_windows=[3, 7, 15],
+        )
+        assert loss_fn.sr_windows == [3, 7, 15]
+        assert loss_fn.sr_lookback_normed.shape == (3,)
+
+    def test_reduces_to_sharpe_when_alpha_zero_multi_tf(self):
+        loss_fn = _make_loss(
+            alpha=0.0, beta=0.0, gamma=0.0, delta=0.0,
+            sr_use_multi_timeframe=True,
+        )
+        w = _random_weights()
+        r = _random_returns()
+        f = _random_features()
+
+        composite = loss_fn(w, r, f)
+        sharpe_only = differentiable_sharpe_loss(w, r)
+        assert torch.allclose(composite, sharpe_only, atol=1e-5)
+
+
+# ---------------------------------------------------------------------------
+# Tests: Macro Override Gate
+# ---------------------------------------------------------------------------
+
+class TestMacroOverrideGate:
+
+    def test_gate_output_bounded(self):
+        gate = MacroOverrideGate(num_macro_features=3, hidden=8)
+        delta_macro = torch.randn(B, 3)
+        omega = gate(delta_macro)
+        assert (omega >= 0.0).all() and (omega <= 1.0).all()
+
+    def test_gate_gradients_flow(self):
+        loss_fn = _make_loss(use_macro_override=True)
+        w = _random_weights()
+        r = _random_returns()
+        f = _random_features()
+
+        loss = loss_fn(w, r, f)
+        loss.backward()
+
+        for p in loss_fn.macro_override.parameters():
+            assert p.grad is not None, "Override gate params must get gradients"
+
+    def test_override_off_matches_baseline(self):
+        """use_macro_override=False → standard penalty weights."""
+        loss_fn = _make_loss(use_macro_override=False)
+        assert loss_fn.macro_override is None
+
+        w = _random_weights()
+        r = _random_returns()
+        f = _random_features()
+
+        loss = loss_fn(w, r, f)
+        assert torch.isfinite(loss)
+
+    def test_override_extreme_macro_boosts_gamma(self):
+        """With extreme macro delta, the macro penalty should dominate."""
+        loss_fn = _make_loss(
+            use_macro_override=True,
+            alpha=0.10, beta=0.05, gamma=0.10, delta=0.10,
+        )
+        w = _random_weights()
+        r = _random_returns()
+        f = _random_features()
+        # Make macro features have huge range to push omega toward 1
+        for idx in [4, 5]:
+            for n_idx in range(N):
+                col = n_idx * F + idx
+                f[:, :, col] = torch.linspace(-100, 100, T_IN).unsqueeze(0)
+
+        loss = loss_fn(w, r, f)
+        assert torch.isfinite(loss)
+
+
+# ---------------------------------------------------------------------------
+# Tests: Sector-Aware Penalty
+# ---------------------------------------------------------------------------
+
+class TestSectorAware:
+
+    def test_sector_ids_affect_penalty(self):
+        sector_ids = [0, 0, 1, 1, 2]
+        loss_fn_sec = _make_loss(sector_ids=sector_ids)
+        loss_fn_nosec = _make_loss()
+
+        torch.manual_seed(99)
+        w = _random_weights()
+        r = _random_returns()
+        f = _random_features()
+
+        loss_sec = loss_fn_sec(w, r, f)
+        loss_nosec = loss_fn_nosec(w, r, f)
+
+        assert torch.isfinite(loss_sec)
+        # With sectors, the penalty scaling differs
+        assert not torch.allclose(loss_sec, loss_nosec, atol=1e-8)
+
+    def test_no_sector_ids_unchanged(self):
+        loss_fn = _make_loss(sector_ids=None)
+        assert loss_fn.sector_ids is None
+
+        w = _random_weights()
+        r = _random_returns()
+        f = _random_features()
+
+        loss = loss_fn(w, r, f)
+        assert torch.isfinite(loss)
+
+
+# ---------------------------------------------------------------------------
+# Tests: Ticker Macro Sensitivity
+# ---------------------------------------------------------------------------
+
+class TestTickerMacroSensitivity:
+
+    def test_sensitivity_affects_macro_penalty(self):
+        sens = torch.tensor([0.1, 0.2, 0.8, 0.9, 1.0])
+        loss_fn = _make_loss(ticker_macro_sensitivity=sens)
+        w = _random_weights()
+        r = _random_returns()
+        f = _random_features()
+
+        loss = loss_fn(w, r, f)
+        assert torch.isfinite(loss)
+
+    def test_no_sensitivity_unchanged(self):
+        loss_fn = _make_loss(ticker_macro_sensitivity=None)
+        assert loss_fn.ticker_macro_sensitivity is None
+
+
+# ---------------------------------------------------------------------------
+# Tests: Correlation Guard
+# ---------------------------------------------------------------------------
+
+class TestCorrelationGuard:
+
+    def test_corr_matrix_integration(self):
+        corr = torch.eye(N)
+        corr[0, 1] = 0.9
+        corr[1, 0] = 0.9
+        loss_fn = _make_loss(
+            sr_use_multi_timeframe=True,
+            corr_matrix=corr,
+        )
+        w = _random_weights()
+        r = _random_returns()
+        f = _random_features()
+
+        loss = loss_fn(w, r, f)
+        assert torch.isfinite(loss)
+
+    def test_no_corr_matrix_unchanged(self):
+        loss_fn = _make_loss(corr_matrix=None)
+        assert loss_fn.corr_matrix is None
+
+    def test_corr_requires_multi_tf(self):
+        """Correlation guard only activates when multi-TF is enabled."""
+        corr = torch.eye(N)
+        loss_fn_no_tf = _make_loss(
+            sr_use_multi_timeframe=False,
+            corr_matrix=corr,
+        )
+        loss_fn_tf = _make_loss(
+            sr_use_multi_timeframe=True,
+            corr_matrix=corr,
+        )
+
+        torch.manual_seed(42)
+        w = _random_weights()
+        r = _random_returns()
+        f = _random_features()
+
+        loss_no_tf = loss_fn_no_tf(w, r, f)
+        loss_tf = loss_fn_tf(w, r, f)
+
+        assert torch.isfinite(loss_no_tf)
+        assert torch.isfinite(loss_tf)
diff --git a/financial_loss_functions/tests/unit/test_cvar_benchmark.py b/financial_loss_functions/tests/unit/test_cvar_benchmark.py
new file mode 100644
index 00000000..960db1d7
--- /dev/null
+++ b/financial_loss_functions/tests/unit/test_cvar_benchmark.py
@@ -0,0 +1,54 @@
+"""
+Unit tests for the CVaR benchmark optimizer.
+"""
+
+import pytest
+import numpy as np
+
+
+class TestCVaRBenchmark:
+
+    def _make_benchmark(self, **kwargs):
+        from src.models.cvar_benchmark import CVaRBenchmark, CVaRParams
+        params = CVaRParams(**kwargs)
+        return CVaRBenchmark(params=params)
+
+    def test_optimize_returns_valid_weights(self):
+        bench = self._make_benchmark()
+        rng = np.random.default_rng(42)
+        returns = rng.normal(0.001, 0.02, size=(100, 5))
+
+        weights = bench.optimize(returns)
+
+        assert weights.shape == (5,)
+        assert np.all(weights >= -1e-6), "Weights should be non-negative"
+        assert abs(weights.sum() - 1.0) < 1e-4, "Weights should sum to 1"
+
+    def test_equal_weight_fallback_on_degenerate_input(self):
+        bench = self._make_benchmark(w_max=1.0)
+        returns = np.zeros((10, 5))
+
+        weights = bench.optimize(returns)
+
+        assert weights.shape == (5,)
+        assert abs(weights.sum() - 1.0) < 1e-4
+
+    def test_rolling_optimize_shape(self):
+        bench = self._make_benchmark()
+        rng = np.random.default_rng(99)
+        X_returns = rng.normal(0.0, 0.01, size=(3, 50, 4))
+
+        result = bench.rolling_optimize(X_returns)
+
+        assert result.shape == (3, 4)
+        for i in range(3):
+            assert abs(result[i].sum() - 1.0) < 1e-4
+
+    def test_respects_w_max_constraint(self):
+        bench = self._make_benchmark(w_max=0.25)
+        rng = np.random.default_rng(7)
+        returns = rng.normal(0.001, 0.02, size=(200, 8))
+
+        weights = bench.optimize(returns)
+
+        assert np.all(weights <= 0.25 + 1e-4)
diff --git a/financial_loss_functions/tests/unit/test_pyfolio_viz.py b/financial_loss_functions/tests/unit/test_pyfolio_viz.py
new file mode 100644
index 00000000..0257aa31
--- /dev/null
+++ b/financial_loss_functions/tests/unit/test_pyfolio_viz.py
@@ -0,0 +1,80 @@
+"""
+Unit tests for pyfolio data conversion utilities.
+"""
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from src.evaluation.pyfolio_viz import (
+    weights_to_pyfolio,
+    build_window_dates,
+    comparison_summary,
+)
+
+
+class TestWeightsToPyfolio:
+
+    def test_returns_series_has_correct_dates(self):
+        W, T_OUT, N = 2, 5, 3
+        tickers = ["A", "B", "C"]
+        weights = np.full((W, N), 1.0 / N)
+        returns = np.random.default_rng(0).normal(0, 0.01, (W, T_OUT, N))
+
+        all_dates = pd.bdate_range("2020-01-01", periods=20)
+        window_dates = [all_dates[0:5], all_dates[5:10]]
+        bench = pd.Series(0.001, index=all_dates)
+
+        result = weights_to_pyfolio(weights, returns, tickers, window_dates, bench)
+
+        assert isinstance(result["returns"], pd.Series)
+        assert len(result["returns"]) == T_OUT * W
+        assert isinstance(result["positions"], pd.DataFrame)
+        assert "cash" in result["positions"].columns
+
+    def test_positions_columns_match_tickers(self):
+        W, T_OUT, N = 1, 3, 2
+        tickers = ["X", "Y"]
+        weights = np.array([[0.6, 0.4]])
+        returns = np.random.default_rng(1).normal(0, 0.01, (W, T_OUT, N))
+
+        dates = pd.bdate_range("2021-06-01", periods=10)
+        window_dates = [dates[0:3]]
+        bench = pd.Series(0.0, index=dates)
+
+        result = weights_to_pyfolio(weights, returns, tickers, window_dates, bench)
+
+        pos_cols = set(result["positions"].columns)
+        assert {"X", "Y", "cash"}.issubset(pos_cols)
+
+
+class TestBuildWindowDates:
+
+    def test_correct_number_of_windows(self):
+        full_idx = pd.bdate_range("2020-01-01", periods=100)
+        starts = np.array([0, 10, 20])
+        in_size, out_size = 30, 10
+
+        wd = build_window_dates(full_idx, starts, in_size, out_size)
+
+        assert len(wd) == 3
+        for dates in wd:
+            assert len(dates) == out_size
+
+
+class TestComparisonSummary:
+
+    def test_summary_columns(self):
+        dates = pd.bdate_range("2020-01-01", periods=50)
+        rets = pd.Series(np.random.default_rng(5).normal(0.001, 0.02, 50), index=dates)
+
+        strategies = {
+            "A": {"returns": rets, "benchmark_rets": rets * 0},
+        }
+
+        summary = comparison_summary(strategies)
+
+        assert "total_return" in summary.columns
+        assert "sharpe_ratio" in summary.columns
+        assert "max_drawdown" in summary.columns
+        assert summary.index[0] == "A"
diff --git a/financial_loss_functions/tests/unit/test_sec_filings.py b/financial_loss_functions/tests/unit/test_sec_filings.py
new file mode 100644
index 00000000..43d78449
--- /dev/null
+++ b/financial_loss_functions/tests/unit/test_sec_filings.py
@@ -0,0 +1,102 @@
+"""
+Unit tests for SEC filing feature extraction utilities.
+
+These tests exercise the pure computation functions without requiring
+a live edgartools connection or SEC API access.
+"""
+
+import pytest
+import numpy as np
+import pandas as pd
+
+from src.data_collection.sec_filings import (
+    _compute_event_signal,
+    compute_composite_fundamental_scores,
+    FUNDAMENTAL_WEIGHTS,
+)
+
+
+class TestEventSignal:
+
+    def _mock_filing(self, date_str):
+        class _F:
+            filing_date = date_str
+        return _F()
+
+    def test_event_within_window(self):
+        idx = pd.bdate_range("2020-01-01", "2020-01-31")
+        filings = [self._mock_filing("2020-01-15")]
+
+        signal = _compute_event_signal(filings, idx, window_days=2)
+
+        assert signal.loc[pd.Timestamp("2020-01-15")] == 1.0
+        assert signal.loc[pd.Timestamp("2020-01-13")] == 1.0
+        assert signal.loc[pd.Timestamp("2020-01-10")] == 0.0
+
+    def test_no_filings_returns_zeros(self):
+        idx = pd.bdate_range("2020-06-01", "2020-06-30")
+        signal = _compute_event_signal([], idx)
+        assert (signal == 0.0).all()
+
+
+class TestCompositeScores:
+
+    def test_shape_and_columns(self):
+        tickers = ["A", "B"]
+        idx = pd.bdate_range("2020-01-01", "2020-01-10")
+        funds = {
+            "A": pd.DataFrame(
+                {"revenue_growth": 0.1, "operating_margin": 0.2,
+                 "debt_to_equity": 1.0, "fcf_yield": 0.05,
+                 "event_signal": 0.0},
+                index=idx,
+            ),
+            "B": pd.DataFrame(
+                {"revenue_growth": -0.1, "operating_margin": 0.1,
+                 "debt_to_equity": 2.0, "fcf_yield": -0.02,
+                 "event_signal": 1.0},
+                index=idx,
+            ),
+        }
+
+        scores = compute_composite_fundamental_scores(funds, tickers, idx)
+
+        assert list(scores.columns) == tickers
+        assert len(scores) == len(idx)
+        assert scores.isna().sum().sum() == 0
+
+    def test_positive_growth_yields_higher_score(self):
+        tickers = ["GOOD", "BAD"]
+        idx = pd.bdate_range("2020-01-01", "2020-01-05")
+        funds = {
+            "GOOD": pd.DataFrame(
+                {"revenue_growth": 0.5, "operating_margin": 0.3,
+                 "debt_to_equity": 0.5, "fcf_yield": 0.10,
+                 "event_signal": 0.0},
+                index=idx,
+            ),
+            "BAD": pd.DataFrame(
+                {"revenue_growth": -0.5, "operating_margin": -0.1,
+                 "debt_to_equity": 5.0, "fcf_yield": -0.10,
+                 "event_signal": 0.0},
+                index=idx,
+            ),
+        }
+
+        scores = compute_composite_fundamental_scores(funds, tickers, idx)
+        assert (scores["GOOD"] > scores["BAD"]).all()
+
+    def test_missing_ticker_defaults_to_zero(self):
+        tickers = ["X", "Y"]
+        idx = pd.bdate_range("2020-03-01", "2020-03-05")
+        funds = {
+            "X": pd.DataFrame(
+                {"revenue_growth": 0.1, "operating_margin": 0.2,
+                 "debt_to_equity": 1.0, "fcf_yield": 0.05,
+                 "event_signal": 0.0},
+                index=idx,
+            ),
+        }
+
+        scores = compute_composite_fundamental_scores(funds, tickers, idx)
+        assert "Y" in scores.columns