diff --git a/financial_loss_functions/config/hparams.json b/financial_loss_functions/config/hparams.json index e03b678e..a63df1c0 100644 --- a/financial_loss_functions/config/hparams.json +++ b/financial_loss_functions/config/hparams.json @@ -18,7 +18,10 @@ "train_batch_size": 256, "val_batch_size": 1, "clip_grad_norm": 0.5, - "epochs": 100 + "epochs": 300, + "early_stopping_patience": 15, + "lr_scheduler_patience": 7, + "lr_scheduler_factor": 0.5 } }, "AttentionLSTM": { @@ -35,7 +38,59 @@ "train_batch_size": 256, "val_batch_size": 1, "clip_grad_norm": 0.5, - "epochs": 100 + "epochs": 300, + "early_stopping_patience": 15, + "lr_scheduler_patience": 7, + "lr_scheduler_factor": 0.5 } + }, + "DeformTime": { + "model": { + "max_seq_len": 200, + "e_layers": 3, + "d_layers": 2, + "d_model": 64, + "nheads": 4, + "kernel_size": 3, + "dropout": 0.2, + "n_reshape": 2, + "patch_len": 16, + "stride": 8 + }, + "optimizer": { + "lr": 1e-4, + "weight_decay": 3e-4 + }, + "train": { + "train_batch_size": 256, + "val_batch_size": 1, + "clip_grad_norm": 0.5, + "epochs": 300, + "early_stopping_patience": 15, + "lr_scheduler_patience": 7, + "lr_scheduler_factor": 0.5 + } + }, + "CompositeSRLoss": { + "alpha": 0.03, + "beta": 0.02, + "gamma": 0.05, + "delta": 0.01, + "psych_sigma": 1.5, + "psych_thresholds": [0.0, 2.0, -2.0, 5.0, -5.0, 8.0, -8.0, 12.0, -12.0], + "ema_span": 10, + "sr_use_multi_timeframe": true, + "sr_lookback_windows": [5, 10, 21, 42, 63, 105], + "sr_pivot_threshold": 0.02, + "sr_importance_hidden": 8, + "use_macro_override": true, + "macro_override_hidden": 8 + }, + "CVaRBenchmark": { + "confidence": 0.95, + "risk_aversion": 1.0, + "w_min": 0.0, + "w_max": 0.30, + "L_tar": 1.6 } } \ No newline at end of file diff --git a/financial_loss_functions/config/paths.json b/financial_loss_functions/config/paths.json index 2cfe918e..fce01ca3 100644 --- a/financial_loss_functions/config/paths.json +++ b/financial_loss_functions/config/paths.json @@ -4,6 +4,7 @@ "raw_dir": "data/raw/", "processed_dir": "data/processed/", "raw_macro_dir": "data/raw/macro/", + "sec_filings_dir": "data/raw/sec_filings/", "crsp_dir": "" }, "raw_files": { @@ -28,6 +29,7 @@ "artifacts": { "artifact_dir": "artifacts/", "results": "artifacts/results/", - "plots": "artifacts/results/plots/" + "plots": "artifacts/results/plots/", + "pyfolio_output": "artifacts/results/plots/pyfolio/" } } \ No newline at end of file diff --git a/financial_loss_functions/exploration/feature_selection_findings.ipynb b/financial_loss_functions/exploration/feature_selection_findings.ipynb index 7457506b..cb02d0d1 100644 --- a/financial_loss_functions/exploration/feature_selection_findings.ipynb +++ b/financial_loss_functions/exploration/feature_selection_findings.ipynb @@ -2650,7 +2650,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "757fab41", "metadata": { "execution": { @@ -2725,7 +2725,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "55af6918", "metadata": { "execution": { @@ -2790,7 +2790,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "de47886a", "metadata": { "execution": { @@ -2850,7 +2850,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "c5ed9f54", "metadata": { "execution": { @@ -2916,7 +2916,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "b46ff82a", "metadata": { "execution": { @@ -3029,7 +3029,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "a94d8a19", "metadata": { "execution": { @@ -3098,7 +3098,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "610307a0", "metadata": { "execution": { @@ -3197,7 +3197,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "968d3180", "metadata": { "execution": { @@ -3312,7 +3312,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.9" + "version": "3.13.5" } }, "nbformat": 4, diff --git a/financial_loss_functions/requirements.txt b/financial_loss_functions/requirements.txt index bba07a16..751867d1 100644 --- a/financial_loss_functions/requirements.txt +++ b/financial_loss_functions/requirements.txt @@ -3,6 +3,7 @@ fredapi==0.5.2 numpy==2.3.4 pandas==2.3.3 cvxopt==1.3.2 +cvxpy scikit-learn==1.7.2 scipy==1.16.3 pytest==8.4.2 @@ -12,4 +13,7 @@ seaborn==0.13.0 statsmodels==0.14.5 torch==2.9.1 torchvision==0.24.1 -optuna==4.6.0 \ No newline at end of file +optuna==4.6.0 +edgartools +pyfolio-reloaded +empyrical-reloaded \ No newline at end of file diff --git a/financial_loss_functions/scripts/run_sec_collection.py b/financial_loss_functions/scripts/run_sec_collection.py new file mode 100644 index 00000000..2336e3e0 --- /dev/null +++ b/financial_loss_functions/scripts/run_sec_collection.py @@ -0,0 +1,75 @@ +""" +CLI entry point for SEC filing data collection. + +Usage: + python -m scripts.run_sec_collection [--identity "Name email@domain.com"] +""" + +import argparse +import json +import logging +from pathlib import Path + +import pandas as pd + +from scripts.utils import load_config +from src.data_collection.sec_filings import run_sec_filing_pipeline + +logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") +logger = logging.getLogger(__name__) + + +def main(): + parser = argparse.ArgumentParser(description="Fetch SEC filings and compute fundamental features") + parser.add_argument( + "--identity", + default="FinLossFunctions research@example.com", + help="SEC EDGAR identity string (Name + email)", + ) + parser.add_argument( + "--output-dir", + default=None, + help="Override output directory for cached Parquet files", + ) + args = parser.parse_args() + + paths_config = load_config("paths") + + sector_path = Path("config/sector_classification.json") + with open(sector_path, "r") as f: + sector_map = json.load(f) + tickers = sorted(sector_map.keys()) + logger.info("Found %d tickers from sector classification", len(tickers)) + + crsp_dir = Path(paths_config["data"]["crsp_dir"]) + if not crsp_dir.exists(): + crsp_dir = Path(paths_config["data"]["raw_dir"]) / "sample" + + train_file = crsp_dir / paths_config["raw_files"]["train"] + if not train_file.exists(): + logger.error("Training data not found at %s", train_file) + return + + train_df = pd.read_csv(train_file) + if "date" in train_df.columns: + train_df["date"] = pd.to_datetime(train_df["date"]) + train_df = train_df.set_index("date") + + cache_dir = Path(args.output_dir) if args.output_dir else Path( + paths_config.get("data", {}).get("sec_filings_dir", "data/raw/sec_filings") + ) + + scores = run_sec_filing_pipeline( + tickers=tickers, + target_index=train_df.index, + cache_dir=cache_dir, + identity=args.identity, + ) + + output_path = cache_dir / "composite_fundamental_scores.csv" + scores.to_csv(output_path) + logger.info("Composite scores saved to %s", output_path) + + +if __name__ == "__main__": + main() diff --git a/financial_loss_functions/scripts/run_training.py b/financial_loss_functions/scripts/run_training.py index 9afa375e..be14c73c 100644 --- a/financial_loss_functions/scripts/run_training.py +++ b/financial_loss_functions/scripts/run_training.py @@ -1,13 +1,23 @@ +import argparse import os # from dotenv import load_dotenv from scripts.utils import load_path_config, load_config -from src.training.pipeline import run_training_pipeline +from src.training.pipeline import ALL_MODELS, run_training_pipeline if __name__ == '__main__': # load_dotenv() - paths_config = load_path_config(os.path.join('config', 'paths.json')) + parser = argparse.ArgumentParser(description="Run training pipeline") + parser.add_argument( + '--models', + nargs='+', + choices=list(ALL_MODELS.keys()), + default=None, + help=f"Models to train. Choices: {list(ALL_MODELS.keys())}. Default: all", + ) + args = parser.parse_args() + paths_config = load_path_config(os.path.join('config', 'paths.json')) hparams_config = load_config(os.path.join('config', 'hparams.json')) - run_training_pipeline(paths_config, hparams_config) \ No newline at end of file + run_training_pipeline(paths_config, hparams_config, models=args.models) \ No newline at end of file diff --git a/financial_loss_functions/src/data_collection/sec_filings.py b/financial_loss_functions/src/data_collection/sec_filings.py new file mode 100644 index 00000000..e55e4d50 --- /dev/null +++ b/financial_loss_functions/src/data_collection/sec_filings.py @@ -0,0 +1,475 @@ +""" +SEC filing data pipeline using edgartools. + +Fetches 10-K, 10-Q, and 8-K filings for a set of tickers, extracts +XBRL financial statements, computes fundamental features, aligns them +to a daily business-day grid, and caches results as Parquet. +""" + +import json +import logging +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +import numpy as np +import pandas as pd + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +FUNDAMENTAL_WEIGHTS = { + "revenue_growth": 0.30, + "operating_margin": 0.25, + "debt_to_equity": -0.20, + "fcf_yield": 0.15, + "event_signal": 0.10, +} + +_EVENT_WINDOW_DAYS = 2 + + +# --------------------------------------------------------------------------- +# Filing retrieval helpers +# --------------------------------------------------------------------------- + +def _init_edgar(identity: str = "FinLossFunctions research@example.com"): + """Lazy-import edgartools and set SEC identity.""" + try: + import edgar + except ImportError as exc: + raise ImportError( + "edgartools is required for the SEC filing pipeline. " + "Install with: pip install edgartools" + ) from exc + edgar.set_identity(identity) + return edgar + + +def fetch_filings_for_ticker( + ticker: str, + forms: Tuple[str, ...] = ("10-K", "10-Q", "8-K"), + *, + edgar_module=None, +) -> Dict[str, list]: + """ + Retrieve filings for *ticker* grouped by form type. + + Returns dict mapping form string to a list of Filing objects. + """ + if edgar_module is None: + edgar_module = _init_edgar() + + company = edgar_module.Company(ticker) + result: Dict[str, list] = {} + for form in forms: + try: + filings = company.get_filings(form=form) + result[form] = list(filings) if filings is not None else [] + except Exception as exc: + logger.warning("Could not fetch %s for %s: %s", form, ticker, exc) + result[form] = [] + return result + + +# --------------------------------------------------------------------------- +# XBRL extraction helpers +# --------------------------------------------------------------------------- + +def _safe_float(value) -> Optional[float]: + """Convert a value to float, returning None on failure.""" + if value is None: + return None + try: + return float(value) + except (TypeError, ValueError): + return None + + +def _extract_statement_value(statement, *concept_labels) -> Optional[float]: + """ + Try to pull a numeric value from an XBRL statement by trying + multiple concept label variants. + """ + if statement is None: + return None + try: + df = statement.to_dataframe() if hasattr(statement, "to_dataframe") else None + except Exception: + return None + if df is None or df.empty: + return None + + for label in concept_labels: + for col in df.columns: + if label.lower() in str(col).lower(): + vals = df[col].dropna() + if not vals.empty: + return _safe_float(vals.iloc[0]) + return None + + +def extract_financials_from_filing(filing) -> Dict[str, Optional[float]]: + """ + Extract key financial metrics from a single 10-K or 10-Q filing + via its XBRL data. + + Returns a dict with keys: + revenue, operating_income, net_income, + total_assets, total_liabilities, stockholders_equity, + current_assets, current_liabilities, + operating_cashflow, capex + """ + metrics: Dict[str, Optional[float]] = { + "revenue": None, + "operating_income": None, + "net_income": None, + "total_assets": None, + "total_liabilities": None, + "stockholders_equity": None, + "current_assets": None, + "current_liabilities": None, + "operating_cashflow": None, + "capex": None, + } + + try: + obj = filing.obj() + except Exception: + return metrics + + financials = getattr(obj, "financials", None) + if financials is None: + try: + from edgar.financials import Financials + financials = Financials.extract(filing) + except Exception: + return metrics + + if financials is None: + return metrics + + # Income statement + try: + inc = financials.income_statement() + metrics["revenue"] = _extract_statement_value( + inc, "Revenue", "Revenues", "Net Revenue", "SalesRevenueNet", + "RevenueFromContractWithCustomer", + ) + metrics["operating_income"] = _extract_statement_value( + inc, "OperatingIncome", "Operating Income", + ) + metrics["net_income"] = _extract_statement_value( + inc, "NetIncome", "Net Income", + ) + except Exception: + pass + + # Balance sheet + try: + bs = financials.balance_sheet() + metrics["total_assets"] = _extract_statement_value( + bs, "Assets", "TotalAssets", + ) + metrics["total_liabilities"] = _extract_statement_value( + bs, "Liabilities", "TotalLiabilities", + ) + metrics["stockholders_equity"] = _extract_statement_value( + bs, "StockholdersEquity", "Equity", + ) + metrics["current_assets"] = _extract_statement_value( + bs, "CurrentAssets", "AssetsCurrent", + ) + metrics["current_liabilities"] = _extract_statement_value( + bs, "CurrentLiabilities", "LiabilitiesCurrent", + ) + except Exception: + pass + + # Cash flow statement + try: + cf = financials.cashflow_statement() + metrics["operating_cashflow"] = _extract_statement_value( + cf, "OperatingCashFlow", "NetCashFromOperating", + "CashFromOperatingActivities", + ) + metrics["capex"] = _extract_statement_value( + cf, "CapitalExpenditure", "Capex", + "PaymentsToAcquirePropertyPlantAndEquipment", + ) + except Exception: + pass + + return metrics + + +# --------------------------------------------------------------------------- +# Feature computation +# --------------------------------------------------------------------------- + +def _compute_ticker_fundamentals( + filings_10k: list, + filings_10q: list, +) -> pd.DataFrame: + """ + Build a time-indexed DataFrame of fundamental features from 10-K + and 10-Q filings for a single ticker. + + Columns: revenue_growth, operating_margin, debt_to_equity, + current_ratio, fcf_yield + """ + records = [] + all_filings = [] + for f in filings_10k: + all_filings.append(("10-K", f)) + for f in filings_10q: + all_filings.append(("10-Q", f)) + + for form_type, filing in all_filings: + try: + filing_date = pd.Timestamp(filing.filing_date) + except Exception: + continue + + data = extract_financials_from_filing(filing) + records.append({"date": filing_date, "form": form_type, **data}) + + if not records: + return pd.DataFrame() + + df = pd.DataFrame(records).sort_values("date").drop_duplicates( + subset=["date"], keep="last" + ) + df = df.set_index("date") + + # Revenue growth (QoQ) + if "revenue" in df.columns: + df["revenue_growth"] = df["revenue"].pct_change() + else: + df["revenue_growth"] = np.nan + + # Operating margin + rev = df.get("revenue") + op_inc = df.get("operating_income") + if rev is not None and op_inc is not None: + df["operating_margin"] = np.where( + (rev != 0) & rev.notna() & op_inc.notna(), + op_inc / rev, + np.nan, + ) + else: + df["operating_margin"] = np.nan + + # Debt-to-equity + liab = df.get("total_liabilities") + equity = df.get("stockholders_equity") + if liab is not None and equity is not None: + df["debt_to_equity"] = np.where( + (equity != 0) & equity.notna() & liab.notna(), + liab / equity, + np.nan, + ) + else: + df["debt_to_equity"] = np.nan + + # Current ratio + ca = df.get("current_assets") + cl = df.get("current_liabilities") + if ca is not None and cl is not None: + df["current_ratio"] = np.where( + (cl != 0) & cl.notna() & ca.notna(), + ca / cl, + np.nan, + ) + else: + df["current_ratio"] = np.nan + + # FCF yield (proxy: (operating CF - capex) / total_assets as stand-in) + ocf = df.get("operating_cashflow") + capex = df.get("capex") + ta = df.get("total_assets") + if ocf is not None and ta is not None: + capex_vals = capex.fillna(0) if capex is not None else 0 + df["fcf_yield"] = np.where( + (ta != 0) & ta.notna() & ocf.notna(), + (ocf - capex_vals.abs()) / ta, + np.nan, + ) + else: + df["fcf_yield"] = np.nan + + feature_cols = [ + "revenue_growth", + "operating_margin", + "debt_to_equity", + "current_ratio", + "fcf_yield", + ] + return df[feature_cols] + + +def _compute_event_signal( + filings_8k: list, + target_index: pd.DatetimeIndex, + window_days: int = _EVENT_WINDOW_DAYS, +) -> pd.Series: + """ + Create a binary event indicator that is 1 within +/- *window_days* + of any 8-K filing date. + """ + event_dates = set() + for f in filings_8k: + try: + event_dates.add(pd.Timestamp(f.filing_date)) + except Exception: + continue + + signal = pd.Series(0.0, index=target_index, name="event_signal") + for ed in event_dates: + mask = (target_index >= ed - pd.Timedelta(days=window_days)) & ( + target_index <= ed + pd.Timedelta(days=window_days) + ) + signal.loc[mask] = 1.0 + return signal + + +# --------------------------------------------------------------------------- +# Full pipeline +# --------------------------------------------------------------------------- + +def run_sec_pipeline_for_ticker( + ticker: str, + target_index: pd.DatetimeIndex, + cache_dir: Optional[Path] = None, + edgar_module=None, +) -> pd.DataFrame: + """ + End-to-end: fetch filings, extract features, align to daily grid. + + Returns DataFrame indexed by *target_index* with columns: + revenue_growth, operating_margin, debt_to_equity, + current_ratio, fcf_yield, event_signal + """ + cache_path = None + if cache_dir is not None: + cache_path = cache_dir / f"{ticker}_fundamentals.parquet" + if cache_path.exists(): + logger.info("Loading cached SEC data for %s", ticker) + cached = pd.read_parquet(cache_path) + cached.index = pd.to_datetime(cached.index) + return cached.reindex(target_index).ffill().bfill() + + if edgar_module is None: + edgar_module = _init_edgar() + + filings_by_form = fetch_filings_for_ticker( + ticker, ("10-K", "10-Q", "8-K"), edgar_module=edgar_module + ) + + fund_df = _compute_ticker_fundamentals( + filings_by_form.get("10-K", []), + filings_by_form.get("10-Q", []), + ) + + event_signal = _compute_event_signal( + filings_by_form.get("8-K", []), + target_index, + ) + + if fund_df.empty: + result = pd.DataFrame( + 0.0, + index=target_index, + columns=[ + "revenue_growth", "operating_margin", "debt_to_equity", + "current_ratio", "fcf_yield", "event_signal", + ], + ) + else: + aligned = fund_df.reindex(target_index).ffill().bfill() + aligned["event_signal"] = event_signal.values + result = aligned + + if cache_path is not None: + cache_path.parent.mkdir(parents=True, exist_ok=True) + result.to_parquet(cache_path) + + return result + + +def compute_composite_fundamental_scores( + all_ticker_fundamentals: Dict[str, pd.DataFrame], + tickers: List[str], + target_index: pd.DatetimeIndex, + weights: Optional[Dict[str, float]] = None, +) -> pd.DataFrame: + """ + Combine per-ticker fundamental DataFrames into a single composite + score DataFrame with columns = tickers, index = target_index. + + Each ticker's features are z-scored cross-sectionally, then + combined with *weights* (defaults to FUNDAMENTAL_WEIGHTS). + """ + if weights is None: + weights = FUNDAMENTAL_WEIGHTS + + feature_names = [ + "revenue_growth", "operating_margin", "debt_to_equity", + "fcf_yield", "event_signal", + ] + + panels: Dict[str, pd.DataFrame] = {} + for feat in feature_names: + feat_df = pd.DataFrame(index=target_index) + for t in tickers: + if t in all_ticker_fundamentals and feat in all_ticker_fundamentals[t].columns: + feat_df[t] = all_ticker_fundamentals[t][feat].reindex(target_index) + else: + feat_df[t] = 0.0 + feat_df = feat_df.ffill().bfill().fillna(0.0) + panels[feat] = feat_df + + # Cross-sectional z-score per date and feature + z_panels: Dict[str, pd.DataFrame] = {} + for feat, df in panels.items(): + row_mean = df.mean(axis=1) + row_std = df.std(axis=1).replace(0, 1) + z_panels[feat] = df.sub(row_mean, axis=0).div(row_std, axis=0) + + composite = pd.DataFrame(0.0, index=target_index, columns=tickers) + for feat, w in weights.items(): + if feat in z_panels: + composite += w * z_panels[feat] + + return composite + + +def run_sec_filing_pipeline( + tickers: List[str], + target_index: pd.DatetimeIndex, + cache_dir: Optional[Path] = None, + identity: str = "FinLossFunctions research@example.com", +) -> pd.DataFrame: + """ + Top-level entry: fetch SEC data for all *tickers*, compute composite + fundamental scores, return DataFrame (dates x tickers). + """ + edgar_module = _init_edgar(identity) + + all_funds: Dict[str, pd.DataFrame] = {} + for ticker in tickers: + logger.info("Processing SEC filings for %s", ticker) + try: + df = run_sec_pipeline_for_ticker( + ticker, target_index, cache_dir=cache_dir, + edgar_module=edgar_module, + ) + all_funds[ticker] = df + except Exception as exc: + logger.warning("SEC pipeline failed for %s: %s", ticker, exc) + + return compute_composite_fundamental_scores( + all_funds, tickers, target_index + ) diff --git a/financial_loss_functions/src/data_processing/dataset.py b/financial_loss_functions/src/data_processing/dataset.py index 10967068..70ab10af 100644 --- a/financial_loss_functions/src/data_processing/dataset.py +++ b/financial_loss_functions/src/data_processing/dataset.py @@ -70,6 +70,8 @@ def extract_features(self, train_df: pd.DataFrame): features = [] for col in train_df.columns: + if self.col_sep not in col: + continue t, f = self._split_col(col) tickers.append(t) features.append(f) @@ -79,6 +81,8 @@ def extract_features(self, train_df: pd.DataFrame): # Features must be identical for all tickers features_by_ticker = {t: set() for t in self.tickers} for col in train_df.columns: + if self.col_sep not in col: + continue t, f = self._split_col(col) features_by_ticker[t].add(f) diff --git a/financial_loss_functions/src/evaluation/__init__.py b/financial_loss_functions/src/evaluation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/financial_loss_functions/src/evaluation/pyfolio_viz.py b/financial_loss_functions/src/evaluation/pyfolio_viz.py new file mode 100644 index 00000000..8494c745 --- /dev/null +++ b/financial_loss_functions/src/evaluation/pyfolio_viz.py @@ -0,0 +1,220 @@ +""" +pyfolio integration for portfolio strategy visualization. + +Converts model allocation weights and return arrays into the +pandas-based formats that pyfolio expects, then generates +comparison tearsheets across strategies. +""" + +import logging +from pathlib import Path +from typing import Dict, List, Optional + +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Data conversion +# --------------------------------------------------------------------------- + +def weights_to_pyfolio( + weights: np.ndarray, + returns: np.ndarray, + tickers: List[str], + window_dates: List[pd.DatetimeIndex], + benchmark_returns: pd.Series, + initial_capital: float = 1_000_000.0, +) -> Dict[str, pd.DataFrame]: + """ + Convert rolling-window model outputs into pyfolio-compatible data. + + @param weights np.ndarray (W, N) portfolio weights per window. + @param returns np.ndarray (W, T_out, N) future returns per window. + @param tickers list[str] ordered ticker symbols. + @param window_dates list[pd.DatetimeIndex] dates for each window's + T_out period. ``len(window_dates)`` must equal W, and each + element has length T_out. + @param benchmark_returns pd.Series daily benchmark returns indexed + by date (e.g. sprtrn). + @param initial_capital float notional starting capital for the + positions table. + + @return dict with keys: + ``returns`` – pd.Series (daily non-cumulative portfolio returns) + ``positions`` – pd.DataFrame (daily dollar notionals per ticker + cash) + ``benchmark_rets`` – pd.Series (aligned benchmark returns) + """ + W, T_out, N = returns.shape + + all_port_rets: Dict[pd.Timestamp, float] = {} + all_positions: Dict[pd.Timestamp, Dict[str, float]] = {} + + portfolio_value = initial_capital + + for w_idx in range(W): + w = weights[w_idx] # (N,) + for t in range(T_out): + date = window_dates[w_idx][t] + if date in all_port_rets: + continue + + daily_ret = float((w * returns[w_idx, t, :]).sum()) + all_port_rets[date] = daily_ret + + pos = {} + for k, ticker in enumerate(tickers): + pos[ticker] = portfolio_value * w[k] + pos["cash"] = 0.0 + all_positions[date] = pos + + portfolio_value *= (1 + daily_ret) + + ret_series = pd.Series(all_port_rets).sort_index() + ret_series.index = pd.to_datetime(ret_series.index) + ret_series.index.name = None + + pos_df = pd.DataFrame.from_dict(all_positions, orient="index").sort_index() + pos_df.index = pd.to_datetime(pos_df.index) + pos_df.index.name = None + + aligned_bench = benchmark_returns.reindex(ret_series.index).fillna(0.0) + + return { + "returns": ret_series, + "positions": pos_df, + "benchmark_rets": aligned_bench, + } + + +def build_window_dates( + full_date_index: pd.DatetimeIndex, + good_starts: np.ndarray, + in_size: int, + out_size: int, +) -> List[pd.DatetimeIndex]: + """ + Compute the T_out date ranges for each rolling window. + + @param full_date_index pd.DatetimeIndex All dates in the split. + @param good_starts np.ndarray Starting indices of valid windows. + @param in_size int Input window length. + @param out_size int Output window length. + + @return list[pd.DatetimeIndex] one DatetimeIndex per window. + """ + window_dates = [] + for s in good_starts: + start = s + in_size + end = start + out_size + window_dates.append(full_date_index[start:end]) + return window_dates + + +# --------------------------------------------------------------------------- +# Tearsheet generation +# --------------------------------------------------------------------------- + +def _try_import_pyfolio(): + """Lazy-import pyfolio (the reloaded fork).""" + try: + import pyfolio as pf + return pf + except ImportError as exc: + raise ImportError( + "pyfolio-reloaded is required for tearsheet generation. " + "Install with: pip install pyfolio-reloaded" + ) from exc + + +def generate_returns_tearsheet( + strategy_data: Dict, + title: str = "", + output_path: Optional[Path] = None, +): + """ + Generate a pyfolio returns tearsheet for a single strategy. + + @param strategy_data dict Output of ``weights_to_pyfolio``. + @param title str Optional plot title prefix. + @param output_path Path If provided, save the figure to this path. + """ + pf = _try_import_pyfolio() + + fig = pf.create_returns_tear_sheet( + strategy_data["returns"], + benchmark_rets=strategy_data["benchmark_rets"], + return_fig=True, + ) + + if fig is not None and output_path is not None: + output_path.parent.mkdir(parents=True, exist_ok=True) + fig.savefig(str(output_path), dpi=150, bbox_inches="tight") + logger.info("Tearsheet saved to %s", output_path) + + plt.close("all") + + +def generate_comparison_tearsheets( + strategies: Dict[str, Dict], + output_dir: Path, +): + """ + Generate and save a pyfolio returns tearsheet for each strategy. + + @param strategies dict Mapping strategy name -> pyfolio data dict. + @param output_dir Path Directory to save tearsheet PNGs. + """ + output_dir.mkdir(parents=True, exist_ok=True) + + for name, data in strategies.items(): + safe_name = name.replace(" ", "_").replace("/", "_") + out_path = output_dir / f"tearsheet_{safe_name}.png" + try: + generate_returns_tearsheet(data, title=name, output_path=out_path) + logger.info("Generated tearsheet for %s", name) + except Exception as exc: + logger.warning("Tearsheet generation failed for %s: %s", name, exc) + + +# --------------------------------------------------------------------------- +# Comparison summary table +# --------------------------------------------------------------------------- + +def comparison_summary( + strategies: Dict[str, Dict], +) -> pd.DataFrame: + """ + Compute summary performance metrics for all strategies. + + @param strategies dict strategy name -> pyfolio data dict. + @return pd.DataFrame Rows = strategies, columns = metrics. + """ + rows = [] + for name, data in strategies.items(): + rets = data["returns"] + ann_factor = 252 + total_ret = float((1 + rets).prod() - 1) + ann_ret = float((1 + total_ret) ** (ann_factor / max(len(rets), 1)) - 1) + ann_vol = float(rets.std() * np.sqrt(ann_factor)) + sharpe = ann_ret / ann_vol if ann_vol > 0 else 0.0 + + cum = (1 + rets).cumprod() + peak = cum.cummax() + dd = (cum - peak) / peak + max_dd = float(dd.min()) + + rows.append({ + "strategy": name, + "total_return": round(total_ret, 4), + "ann_return": round(ann_ret, 4), + "ann_volatility": round(ann_vol, 4), + "sharpe_ratio": round(sharpe, 4), + "max_drawdown": round(max_dd, 4), + }) + return pd.DataFrame(rows).set_index("strategy") diff --git a/financial_loss_functions/src/models/DeformTime/DeformTime.py b/financial_loss_functions/src/models/DeformTime/DeformTime.py new file mode 100644 index 00000000..8adcda11 --- /dev/null +++ b/financial_loss_functions/src/models/DeformTime/DeformTime.py @@ -0,0 +1,189 @@ +import torch +from torch import nn +from src.models.registry import NNModelLibrary + +from src.models.DeformTime.layers.TemporalDeformAttention import Encoder, CrossDeformAttn +from src.models.DeformTime.layers.Embed import Deform_Temporal_Embedding, Local_Temporal_Embedding +from math import ceil + +class Layernorm(nn.Module): + def __init__(self, dim): + super(Layernorm, self).__init__() + self.layernorm = nn.LayerNorm(dim) + + def forward(self, x): + x_hat = self.layernorm(x) + bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1) + return x_hat - bias + + +@NNModelLibrary.register(category='transformer') +class DeformTime(nn.Module): + def __init__( + self, + input_size: int, + num_stocks: int, + max_seq_len: int, + e_layers: int, + d_layers: int, + d_model: int, + nheads: int, + kernel_size: int, + dropout: float, + n_reshape: int, + patch_len: int, + stride: int + ) -> None: + super().__init__() + + self.input_size = input_size + self.num_stocks = num_stocks + + self.d_layers = d_layers + self.d_model = d_model + + # Embedding + if self.input_size == 1: + self.enc_value_embedding = Deform_Temporal_Embedding(self.input_size, self.d_model, freq='d') + else: + self.s_group = 4 + assert self.d_model % self.s_group == 0 + # Embedding local patches + self.pad_in_len = ceil(1.0 * self.input_size / self.s_group) * self.s_group + self.enc_value_embedding = Local_Temporal_Embedding(self.pad_in_len//self.s_group, self.d_model, self.pad_in_len-self.input_size, self.s_group) + + self.pre_norm = nn.LayerNorm(self.d_model) + # Encoder + n_days = [1,n_reshape,n_reshape] + assert len(n_days) > e_layers-1 + drop_path_rate=dropout + dpr = [x.item() for x in torch.linspace(drop_path_rate, drop_path_rate, e_layers)] + self.encoder = Encoder( + [ + CrossDeformAttn(seq_len=max_seq_len, + d_model=self.d_model, + n_heads=nheads, + dropout=dropout, + droprate=dpr[l], + n_days=n_days[l], + window_size=kernel_size, + patch_len=patch_len, + stride=stride) for l in range(e_layers) + ], + norm_layer=Layernorm(self.d_model) + ) + + # GRU layers + self.gru = torch.nn.GRU( + self.d_model, self.d_model, self.d_layers, batch_first=True, dropout=dropout + ) + + # @author: Atharva Vaidya - This head helps in converting the encoded DeformTime context into portfolio logits expected by the loss pipeline. + self.fc = nn.Sequential( + nn.Linear(self.d_model, self.d_model), + nn.LeakyReLU(), + nn.Linear(self.d_model, self.num_stocks) + ) + + def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + """ + Variables + • x_enc + type: tensor of numbers + usage: used to store the input feature window that is passed from the + trainer for DeformTime encoding + • x_mark_enc + type: tensor of numbers or empty value + usage: reserved encoder marker input that is accepted for interface + compatibility but is not used in this runtime path + • x_dec + type: tensor of numbers or empty value + usage: reserved decoder input kept for compatibility with the + surrounding model interface + • x_mark_dec + type: tensor of numbers or empty value + usage: reserved decoder marker input kept for compatibility with the + surrounding model interface + • mean_enc + type: tensor of numbers + usage: used to store the per-window mean so the input can be + stationarized before attention is applied + • std_enc + type: tensor of numbers + usage: used to store the per-window standard deviation so the input + scaling remains numerically stable + • enc_out + type: tensor of numbers + usage: used to store the encoded deformable-attention representation + produced by the encoder + • h0 + type: tensor of numbers + usage: used to store the initial hidden state passed into the GRU + • out + type: tensor of numbers + usage: used to store the GRU sequence output before collapsing it to + a single allocation context + • context + type: tensor of numbers + usage: used to store the last GRU state which becomes the compact + portfolio-allocation context for the final head + + forecast now extracts a portfolio-allocation context from the encoded sequence + instead of reconstructing the original feature space. @author: Atharva Vaidya + """ + assert x_enc.shape[-1] == self.input_size + + # Series Stationarization adopted from NSformer, optional + mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E + x_enc = x_enc - mean_enc + std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() + x_enc = x_enc / std_enc + + x_enc = self.enc_value_embedding(x_enc) + x_enc = self.pre_norm(x_enc) + + # Deformed attention + enc_out, _ = self.encoder(x_enc) + + # Decoder + h0 = torch.zeros(self.d_layers, x_enc.size(0), self.d_model).requires_grad_().to(x_enc.device) + out, _ = self.gru(enc_out, h0.detach()) + # Extract the final GRU state so one context vector represents the allocation decision. + context = out[:, -1, :] + # Apply the portfolio head to convert the context vector into allocation logits. + return self.fc(context) + + def forward(self, x_enc, x_mark_enc=None, x_dec=None, x_mark_dec=None, mask=None): + """ + Variables + • x_enc + type: tensor of numbers + usage: used to store the encoded input window passed from the trainer + for a portfolio-weight prediction + • x_mark_enc + type: tensor of numbers or empty value + usage: reserved encoder marker input that is kept for interface + compatibility + • x_dec + type: tensor of numbers or empty value + usage: reserved decoder input that is kept for interface compatibility + • x_mark_dec + type: tensor of numbers or empty value + usage: reserved decoder marker input that is kept for interface + compatibility + • mask + type: tensor of numbers or empty value + usage: reserved mask input accepted for compatibility with the model + interface + • logits + type: tensor of numbers + usage: used to store the raw portfolio scores returned from forecast + before normalization + + forward normalizes the DeformTime logits into portfolio weights expected by + the training and loss pipeline. @author: Atharva Vaidya + """ + logits = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) + # Normalize the logits so the downstream losses receive portfolio weights. + return torch.softmax(logits, dim=-1) + diff --git a/financial_loss_functions/src/models/DeformTime/__init__.py b/financial_loss_functions/src/models/DeformTime/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/financial_loss_functions/src/models/DeformTime/layers/Embed.py b/financial_loss_functions/src/models/DeformTime/layers/Embed.py new file mode 100644 index 00000000..fd754c02 --- /dev/null +++ b/financial_loss_functions/src/models/DeformTime/layers/Embed.py @@ -0,0 +1,231 @@ +import torch +import torch.nn as nn +import math +from einops import rearrange + +class PositionalEmbedding(nn.Module): + def __init__(self, d_model, max_len=5000): + super(PositionalEmbedding, self).__init__() + # Compute the positional encodings once in log space. + pe = torch.zeros(max_len, d_model).float() + pe.require_grad = False + + position = torch.arange(0, max_len).float().unsqueeze(1) + div_term = (torch.arange(0, d_model, 2).float() + * -(math.log(10000.0) / d_model)).exp() + + pe[:, 0::2] = torch.sin(position * div_term) + pe[:, 1::2] = torch.cos(position * div_term) + + pe = pe.unsqueeze(0) + self.register_buffer('pe', pe) + + def forward(self, x): + return self.pe[:, :x.size(1)] + + +class TokenEmbedding(nn.Module): + def __init__(self, c_in, d_model): + super(TokenEmbedding, self).__init__() + padding = 1 if torch.__version__ >= '1.5.0' else 2 + self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, + kernel_size=3, padding=padding, padding_mode='circular', bias=False) + for m in self.modules(): + if isinstance(m, nn.Conv1d): + nn.init.kaiming_normal_( + m.weight, mode='fan_in', nonlinearity='leaky_relu') + + def forward(self, x): + x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2) + return x + + +class FixedEmbedding(nn.Module): + def __init__(self, c_in, d_model): + super(FixedEmbedding, self).__init__() + + w = torch.zeros(c_in, d_model).float() + w.require_grad = False + + position = torch.arange(0, c_in).float().unsqueeze(1) + div_term = (torch.arange(0, d_model, 2).float() + * -(math.log(10000.0) / d_model)).exp() + + w[:, 0::2] = torch.sin(position * div_term) + w[:, 1::2] = torch.cos(position * div_term) + + self.emb = nn.Embedding(c_in, d_model) + self.emb.weight = nn.Parameter(w, requires_grad=False) + + def forward(self, x): + return self.emb(x).detach() + + +class TemporalEmbedding(nn.Module): + def __init__(self, d_model, embed_type='fixed', freq='h'): + super(TemporalEmbedding, self).__init__() + + minute_size = 4 + hour_size = 24 + weekday_size = 7 + day_size = 32 + month_size = 13 + + Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding + if freq == 't': + self.minute_embed = Embed(minute_size, d_model) + self.hour_embed = Embed(hour_size, d_model) + self.weekday_embed = Embed(weekday_size, d_model) + self.day_embed = Embed(day_size, d_model) + self.month_embed = Embed(month_size, d_model) + + def forward(self, x): + x = x.long() + minute_x = self.minute_embed(x[:, :, 4]) if hasattr( + self, 'minute_embed') else 0. + hour_x = self.hour_embed(x[:, :, 3]) + weekday_x = self.weekday_embed(x[:, :, 2]) + day_x = self.day_embed(x[:, :, 1]) + month_x = self.month_embed(x[:, :, 0]) + + return hour_x + weekday_x + day_x + month_x + minute_x + + +class TimeFeatureEmbedding(nn.Module): + def __init__(self, d_model, embed_type='timeF', freq='h'): + super(TimeFeatureEmbedding, self).__init__() + + freq_map = {'h': 4, 't': 5, 's': 6, + 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3} + d_inp = freq_map[freq] + self.embed = nn.Linear(d_inp, d_model, bias=False) + + def forward(self, x): + return self.embed(x) + + +class DataEmbedding(nn.Module): + def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): + super(DataEmbedding, self).__init__() + + self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) + self.position_embedding = PositionalEmbedding(d_model=d_model) + self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, + freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( + d_model=d_model, embed_type=embed_type, freq=freq) + self.dropout = nn.Dropout(p=dropout) + + def forward(self, x, x_mark): + if x_mark is None: + x = self.value_embedding(x) + self.position_embedding(x) + else: + x = self.value_embedding( + x) + self.temporal_embedding(x_mark) + self.position_embedding(x) + return self.dropout(x) + + +class DataEmbedding_inverted(nn.Module): + def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): + super(DataEmbedding_inverted, self).__init__() + self.value_embedding = nn.Linear(c_in, d_model) + self.dropout = nn.Dropout(p=dropout) + + def forward(self, x, x_mark): + x = x.permute(0, 2, 1) + # x: [Batch Variate Time] + if x_mark is None: + x = self.value_embedding(x) + else: + x = self.value_embedding(torch.cat([x, x_mark.permute(0, 2, 1)], 1)) + # x: [Batch Variate d_model] + return self.dropout(x) + + +class DataEmbedding_wo_pos(nn.Module): + def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): + super(DataEmbedding_wo_pos, self).__init__() + + self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) + self.position_embedding = PositionalEmbedding(d_model=d_model) + self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, + freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( + d_model=d_model, embed_type=embed_type, freq=freq) + self.dropout = nn.Dropout(p=dropout) + + def forward(self, x, x_mark): + if x_mark is None: + x = self.value_embedding(x) + else: + x = self.value_embedding(x) + self.temporal_embedding(x_mark) + return self.dropout(x) + + +class PatchEmbedding(nn.Module): + def __init__(self, d_model, patch_len, stride, padding, dropout): + super(PatchEmbedding, self).__init__() + # Patching + self.patch_len = patch_len + self.stride = stride + self.padding_patch_layer = nn.ReplicationPad1d((0, padding)) + + # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space + self.value_embedding = nn.Linear(patch_len, d_model, bias=False) + + # Positional embedding + self.position_embedding = PositionalEmbedding(d_model) + + # Residual dropout + self.dropout = nn.Dropout(dropout) + + def forward(self, x): + # do patching + n_vars = x.shape[1] + x = self.padding_patch_layer(x) + x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride) + x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3])) + # Input encoding + x = self.value_embedding(x) + self.position_embedding(x) + return self.dropout(x), n_vars + + +class Deform_Temporal_Embedding(nn.Module): + def __init__(self, d_inp, d_model, embed_type='fixed', freq='h', dropout=0.1): + super(Deform_Temporal_Embedding, self).__init__() + + self.value_embedding = nn.Linear(d_inp, d_model, bias=False) + self.position_embedding = PositionalEmbedding(d_model) + + self.dropout = nn.Dropout(p=dropout) + + def forward(self, x): + x = self.value_embedding(x) + self.position_embedding(x) + return self.dropout(x) + +class Local_Temporal_Embedding(nn.Module): + def __init__(self, d_inp, d_model, padding, sub_groups=8, dropout=0.1): + super(Local_Temporal_Embedding, self).__init__() + + d_out = d_model // sub_groups if d_model % sub_groups == 0 else d_model // sub_groups + 1 + self.sub_seqlen = d_inp + self.padding_patch_layer = nn.ReplicationPad1d((0, padding)) + self.value_embedding = nn.Linear(d_inp, d_out, bias=False) + + self.position_embedding = PositionalEmbedding(d_model) + self.dropout = nn.Dropout(p=dropout) + self.d_model = d_model + + def forward(self, x): + # The in_channel is still fully conv with the out channel + # the number of output channels (out_channels) determines + # the number of filters applied to the input, and each filter + # processes the input across all input channels + B, L, C = x.shape + x = self.padding_patch_layer(x) + x = x.unfold(dimension=-1, size=self.sub_seqlen, step=self.sub_seqlen) + x = rearrange(x, 'b l g c -> (b g) l c') + # x = x.permute(0, 2, 1) + x = self.value_embedding(x) + # .permute(0, 2, 1) + x = rearrange(x, '(b g) l c -> b l (g c)', b = B)[:,:,:self.d_model] + x = x + self.position_embedding(x) + return self.dropout(x) \ No newline at end of file diff --git a/financial_loss_functions/src/models/DeformTime/layers/MLP.py b/financial_loss_functions/src/models/DeformTime/layers/MLP.py new file mode 100644 index 00000000..f971bc89 --- /dev/null +++ b/financial_loss_functions/src/models/DeformTime/layers/MLP.py @@ -0,0 +1,132 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +activation_functions = { + 'tanh': nn.Tanh(), + 'relu': nn.ReLU(), + 'elu': nn.ELU(), + 'sigmoid': nn.Sigmoid(), + 'gelu':nn.GELU() + } + +class LipSwish(torch.nn.Module): + def forward(self, x): + return 0.909 * F.silu(x) + +class MLPLipSwish(torch.nn.Module): + def __init__(self, in_size, out_size, mlp_size, num_layers, tanh): + super().__init__() + + model = [torch.nn.Linear(in_size, mlp_size), + LipSwish()] + for _ in range(num_layers - 1): + model.append(torch.nn.Linear(mlp_size, mlp_size)) + ################### + # LipSwish activations are useful to constrain the Lipschitz constant of the discriminator. + # (For simplicity we additionally use them in the generator, but that's less important.) + ################### + model.append(LipSwish()) + model.append(torch.nn.Linear(mlp_size, out_size)) + if tanh: + model.append(torch.nn.Tanh()) + self._model = torch.nn.Sequential(*model) + + def forward(self, x): + return self._model(x) + +class MLP(nn.Module): + # layer_sizes[0] is the dimension of the input + # layer_sizes[-1] is the dimension of the output + def __init__(self, layer_sizes, final_relu=False, drop_out=0.7): + super().__init__() + layer_list = [] + layer_sizes = [int(x) for x in layer_sizes] + num_layers = len(layer_sizes) - 1 + final_relu_layer = num_layers if final_relu else num_layers - 1 + for i in range(len(layer_sizes) - 1): + input_size = layer_sizes[i] + curr_size = layer_sizes[i + 1] + if i < final_relu_layer: + layer_list.append(nn.ReLU(inplace=False)) + if drop_out != 0: + layer_list.append(nn.Dropout(drop_out)) + layer_list.append(nn.Linear(input_size, curr_size)) + self.net = nn.Sequential(*layer_list) + self.last_linear = self.net[-1] + + def forward(self, x): + return self.net(x) + +class FreqMLP(nn.Module): + def __init__(self, layer_sizes, final_relu=False, drop_out=0.7) -> None: + super().__init__() + layer_list = [] + layer_sizes = [int(x) for x in layer_sizes] + num_layers = len(layer_sizes) - 1 + final_relu_layer = num_layers if final_relu else num_layers - 1 + for i in range(len(layer_sizes) - 1): + input_size = layer_sizes[i] + curr_size = layer_sizes[i + 1] + if i < final_relu_layer: + layer_list.append(nn.ReLU(inplace=False)) + if drop_out != 0: + layer_list.append(nn.Dropout(drop_out)) + layer_list.append(nn.Linear(input_size, curr_size)) + self.net = nn.Sequential(*layer_list) + self.last_linear = self.net[-1] + + self.r1 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size)) + self.i1 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size)) + self.rb1 = nn.Parameter(self.scale * torch.randn(self.embed_size)) + self.ib1 = nn.Parameter(self.scale * torch.randn(self.embed_size)) + self.r2 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size)) + self.i2 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size)) + self.rb2 = nn.Parameter(self.scale * torch.randn(self.embed_size)) + self.ib2 = nn.Parameter(self.scale * torch.randn(self.embed_size)) + + # frequency temporal learner + def MLP_temporal(self, x, B, N, L): + # [B, N, T, D] + x = torch.fft.rfft(x, dim=2, norm='ortho') # FFT on L dimension + y = self.FreMLP(B, N, L, x, self.r2, self.i2, self.rb2, self.ib2) + x = torch.fft.irfft(y, n=self.seq_len, dim=2, norm="ortho") + return x + + # frequency channel learner + def MLP_channel(self, x, B, N, L): + # [B, N, T, D] + x = x.permute(0, 2, 1, 3) + # [B, T, N, D] + x = torch.fft.rfft(x, dim=2, norm='ortho') # FFT on N dimension + y = self.FreMLP(B, L, N, x, self.r1, self.i1, self.rb1, self.ib1) + x = torch.fft.irfft(y, n=self.feature_size, dim=2, norm="ortho") + x = x.permute(0, 2, 1, 3) + # [B, N, T, D] + return x + + # frequency-domain MLPs + # dimension: FFT along the dimension, r: the real part of weights, i: the imaginary part of weights + # rb: the real part of bias, ib: the imaginary part of bias + def FreMLP(self, B, nd, dimension, x, r, i, rb, ib): + o1_real = torch.zeros([B, nd, dimension // 2 + 1, self.embed_size], + device=x.device) + o1_imag = torch.zeros([B, nd, dimension // 2 + 1, self.embed_size], + device=x.device) + + o1_real = F.relu( + torch.einsum('bijd,dd->bijd', x.real, r) - \ + torch.einsum('bijd,dd->bijd', x.imag, i) + \ + rb + ) + + o1_imag = F.relu( + torch.einsum('bijd,dd->bijd', x.imag, r) + \ + torch.einsum('bijd,dd->bijd', x.real, i) + \ + ib + ) + + y = torch.stack([o1_real, o1_imag], dim=-1) + y = F.softshrink(y, lambd=self.sparsity_threshold) + y = torch.view_as_complex(y) + return y \ No newline at end of file diff --git a/financial_loss_functions/src/models/DeformTime/layers/TemporalDeformAttention.py b/financial_loss_functions/src/models/DeformTime/layers/TemporalDeformAttention.py new file mode 100644 index 00000000..61b27b3c --- /dev/null +++ b/financial_loss_functions/src/models/DeformTime/layers/TemporalDeformAttention.py @@ -0,0 +1,457 @@ +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from einops import rearrange +from timm.layers import trunc_normal_ +from src.models.DeformTime.layers.MLP import MLP +from src.models.DeformTime.utils.functions import num_patches + + +def normal_init(module, mean=0, std=1, bias=0): + if hasattr(module, 'weight') and module.weight is not None: + nn.init.normal_(module.weight, mean, std) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + +def constant_init(module, val, bias=0): + if hasattr(module, 'weight') and module.weight is not None: + nn.init.constant_(module.weight, val) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + +class series_decomp(nn.Module): + """ + Series decomposition block + """ + + def __init__(self, kernel_size): + super(series_decomp, self).__init__() + self.moving_avg = moving_avg(kernel_size, stride=1) + + def forward(self, x): + moving_mean = self.moving_avg(x) + res = x - moving_mean + return res, moving_mean + +class moving_avg(nn.Module): + """ + Moving average block to highlight the trend of time series + """ + + def __init__(self, kernel_size, stride): + super(moving_avg, self).__init__() + self.kernel_size = kernel_size + self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0) + + def forward(self, x): + # padding on the both ends of time series + front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1) + end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1) + x = torch.cat([front, x, end], dim=1) + x = self.avg(x.permute(0, 2, 1)) + x = x.permute(0, 2, 1) + return x + +def drop_path(x, drop_prob: float = 0., training: bool = False): + """ + From: https://github.com/huggingface/pytorch-image-models + + Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + + This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, + the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... + See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for + changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use + 'survival rate' as the argument. + + DropPath is dropping an entire sample from the batch while Dropout is dropping random values + """ + if drop_prob == 0. or not training: + return x + keep_prob = 1 - drop_prob + shape = (x.shape[0],) + (1,) * (x.ndim - 1) + random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) + random_tensor.floor_() # binarize + output = x.div(keep_prob) * random_tensor + return output + +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + """ + def __init__(self, drop_prob=None): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training) + +class LayerScale(nn.Module): + def __init__(self, + dim: int, + inplace: bool = False, + init_values: float = 1e-5): + super().__init__() + self.inplace = inplace + self.weight = nn.Parameter(torch.ones(dim) * init_values) + + def forward(self, x): + if self.inplace: + return x.mul_(self.weight.view(-1, 1, 1)) + else: + return x * self.weight.view(-1, 1, 1) + +class LayerNorm(nn.Module): + def __init__(self, dim): + super().__init__() + self.norm = nn.LayerNorm(dim) + + def forward(self, x): + x = self.norm(x) + return x + +class LayerNormProxy(nn.Module): + def __init__(self, dim): + super().__init__() + self.norm = nn.LayerNorm(dim) + + def forward(self, x): + x = rearrange(x, 'b c l -> b l c') + x = self.norm(x) + return rearrange(x, 'b l c -> b c l') + + +class LayerNormProxy2D(nn.Module): + def __init__(self, dim): + super().__init__() + self.norm = nn.LayerNorm(dim) + + def forward(self, x): + x = rearrange(x, 'b c h w -> b h w c') + x = self.norm(x) + return rearrange(x, 'b h w c -> b c h w') + + +class Encoder(nn.Module): + def __init__(self, attn_layers, norm_layer=None): + super(Encoder, self).__init__() + self.attn_layers = nn.ModuleList(attn_layers) + self.norm = norm_layer + + def forward(self, x, attn_mask=None, tau=None, delta=None): + # x [B, L, D] + attns = [] + for attn_layer in self.attn_layers: + x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta) + attns.append(attn) + + if self.norm is not None: + x = self.norm(x) + return x, attns + + +class DeformAtten1D(nn.Module): + ''' + max_offset (int): The maximum magnitude of the offset residue. Default: 14. + ''' + def __init__(self, seq_len, d_model, n_heads, dropout, kernel=5, n_groups=4, no_off=False, rpb=True) -> None: + super().__init__() + self.offset_range_factor = kernel + self.no_off = no_off + self.seq_len = seq_len + self.d_model = d_model + self.n_groups = n_groups + self.n_group_channels = self.d_model // self.n_groups + self.n_heads = n_heads + self.n_head_channels = self.d_model // self.n_heads + self.n_group_heads = self.n_heads // self.n_groups + self.scale = self.n_head_channels ** -0.5 + self.rpb = rpb + + self.proj_q = nn.Conv1d(self.d_model, self.d_model, kernel_size=1, stride=1, padding=0) + self.proj_k = nn.Conv1d(self.d_model, self.d_model, kernel_size=1, stride=1, padding=0) + self.proj_v = nn.Conv1d(self.d_model, self.d_model, kernel_size=1, stride=1, padding=0) + self.proj_out = nn.Linear(self.d_model, self.d_model) + kernel_size = kernel + self.stride = 1 + pad_size = kernel_size // 2 if kernel_size != self.stride else 0 + self.proj_offset = nn.Sequential( + nn.Conv1d(self.n_group_channels, self.n_group_channels, kernel_size=kernel_size, stride=self.stride, padding=pad_size), + nn.Conv1d(self.n_group_channels, 1, kernel_size=1, stride=self.stride, padding=pad_size), + ) + + self.scale_factor = self.d_model ** -0.5 # 1/np.sqrt(dim) + + if self.rpb: + self.relative_position_bias_table = nn.Parameter( + torch.zeros(1, self.d_model, self.seq_len)) + trunc_normal_(self.relative_position_bias_table, std=.02) + + def forward(self, x, mask=None): + B, L, C = x.shape + dtype, device = x.dtype, x.device + x = x.permute(0,2,1) # B, C, L + + q = self.proj_q(x) # B, C, L + + group = lambda t: rearrange(t, 'b (g d) n -> (b g) d n', g = self.n_groups) + + grouped_queries = group(q) + + offset = self.proj_offset(grouped_queries) # B * g 1 Lg + offset = rearrange(offset, 'b 1 n -> b n') + + def grid_sample_1d(feats, grid, *args, **kwargs): + # does 1d grid sample by reshaping it to 2d + grid = rearrange(grid, '... -> ... 1 1') + grid = F.pad(grid, (1, 0), value = 0.) + feats = rearrange(feats, '... -> ... 1') + # the backward of F.grid_sample is non-deterministic + # See for details: https://pytorch.org/docs/stable/generated/torch.nn.functional.grid_sample.html + out = F.grid_sample(feats, grid, **kwargs) + return rearrange(out, '... 1 -> ...') + + def normalize_grid(arange, dim = 1, out_dim = -1): + # normalizes 1d sequence to range of -1 to 1 + n = arange.shape[-1] + return 2.0 * arange / max(n - 1, 1) - 1.0 + + if self.offset_range_factor >= 0 and not self.no_off: + offset = offset.tanh().mul(self.offset_range_factor) + + if self.no_off: + x_sampled = F.avg_pool1d(x, kernel_size=self.stride, stride=self.stride) + else: + grid = torch.arange(offset.shape[-1], device = device) + vgrid = grid + offset + vgrid_scaled = normalize_grid(vgrid) + + x_sampled = grid_sample_1d( + group(x), + vgrid_scaled, + mode = 'bilinear', padding_mode = 'zeros', align_corners = False)[:,:,:L] + + if not self.no_off: + x_sampled = rearrange(x_sampled,'(b g) d n -> b (g d) n', g = self.n_groups) + q = q.reshape(B * self.n_heads, self.n_head_channels, L) + k = self.proj_k(x_sampled).reshape(B * self.n_heads, self.n_head_channels, L) + if self.rpb: + v = self.proj_v(x_sampled) + v = (v + self.relative_position_bias_table).reshape(B * self.n_heads, self.n_head_channels, L) + else: + v = self.proj_v(x_sampled).reshape(B * self.n_heads, self.n_head_channels, L) + + scaled_dot_prod = torch.einsum('b i d , b j d -> b i j', q, k) * self.scale_factor + + if mask is not None: + assert mask.shape == scaled_dot_prod.shape[1:] + scaled_dot_prod = scaled_dot_prod.masked_fill(mask, -np.inf) + + attention = torch.softmax(scaled_dot_prod, dim=-1) # softmax: attention[0,0,:].sum() = 1 + + out = torch.einsum('b i j , b j d -> b i d', attention, v) + + return self.proj_out(rearrange(out, '(b g) l c -> b c (g l)', b=B)) + + +class DeformAtten2D(nn.Module): + ''' + max_offset (int): The maximum magnitude of the offset residue. Default: 14. + ''' + def __init__(self, seq_len, d_model, n_heads, dropout, kernel=5, n_groups=4, no_off=False, rpb=True) -> None: + super().__init__() + self.offset_range_factor = kernel + self.no_off = no_off + self.f_sample = False + self.seq_len = seq_len + self.d_model = d_model # (512) + self.n_groups = n_groups + self.n_group_channels = self.d_model // self.n_groups + self.n_heads = n_heads + self.n_head_channels = self.d_model // self.n_heads + self.n_group_heads = self.n_heads // self.n_groups + self.scale = self.n_head_channels ** -0.5 + self.rpb = rpb + + self.proj_q = nn.Conv2d(self.d_model, self.d_model, kernel_size=1, stride=1, padding=0) + self.proj_k = nn.Conv2d(self.d_model, self.d_model, kernel_size=1, stride=1, padding=0) + self.proj_v = nn.Conv2d(self.d_model, self.d_model, kernel_size=1, stride=1, padding=0) + self.proj_out = nn.Linear(self.d_model, self.d_model) + kernel_size = kernel + self.stride = 1 + pad_size = kernel_size // 2 if kernel_size != self.stride else 0 + self.proj_offset = nn.Sequential( + nn.Conv2d(self.n_group_channels, self.n_group_channels, kernel_size=kernel_size, stride=self.stride, padding=pad_size), + nn.Conv2d(self.n_group_channels, 2, kernel_size=1, stride=1, padding=0, bias=False) + ) + + self.scale_factor = self.d_model ** -0.5 # 1/np.sqrt(dim) + + if self.rpb: + self.relative_position_bias_table = nn.Parameter( + torch.zeros(1, self.d_model, self.seq_len, 1)) + trunc_normal_(self.relative_position_bias_table, std=.02) + + + def forward(self, x, mask=None): + B, H, W, C = x.shape + x = x.permute(0, 3, 1, 2) # B, C, H, W + + q = self.proj_q(x) # B, 1, H, W + + offset = self.proj_offset(q) # B, 2, H, W + + if self.offset_range_factor >= 0 and not self.no_off: + offset = offset.tanh().mul(self.offset_range_factor) + + def create_grid_like(t, dim = 0): + h, w, device = *t.shape[-2:], t.device + + grid = torch.stack(torch.meshgrid( + torch.arange(w, device = device), + torch.arange(h, device = device), + indexing = 'xy'), dim = dim) + + grid.requires_grad = False + grid = grid.type_as(t) + return grid + + def normalize_grid(grid, dim = 1, out_dim = -1): + # normalizes a grid to range from -1 to 1 + h, w = grid.shape[-2:] + grid_h, grid_w = grid.unbind(dim = dim) + + grid_h = 2.0 * grid_h / max(h - 1, 1) - 1.0 + grid_w = 2.0 * grid_w / max(w - 1, 1) - 1.0 + + return torch.stack((grid_h, grid_w), dim = out_dim) + + if self.no_off: + x_sampled = F.avg_pool2d(x, kernel_size=self.stride, stride=self.stride) + else: + grid =create_grid_like(offset) + vgrid = grid + offset + vgrid_scaled = normalize_grid(vgrid) + # the backward of F.grid_sample is non-deterministic + x_sampled = F.grid_sample( + x, + vgrid_scaled, + mode = 'bilinear', padding_mode = 'zeros', align_corners = False)[:,:,:H,:W] + + if not self.no_off: + x_sampled = rearrange(x_sampled, '(b g) c h w -> b (g c) h w', g=self.n_groups) + q = q.reshape(B * self.n_heads, H, W) + k = self.proj_k(x_sampled).reshape(B * self.n_heads, H, W) + if self.rpb: + v = self.proj_v(x_sampled) + v = (v + self.relative_position_bias_table).reshape(B * self.n_heads, H, W) + else: + v = self.proj_v(x_sampled).reshape(B * self.n_heads, H, W) + + scaled_dot_prod = torch.einsum('b i d , b j d -> b i j', q, k) * self.scale_factor + + if mask is not None: + assert mask.shape == scaled_dot_prod.shape[1:] + scaled_dot_prod = scaled_dot_prod.masked_fill(mask, -np.inf) + + attention = torch.softmax(scaled_dot_prod, dim=-1) + + out = torch.einsum('b i j , b j d -> b i d', attention, v) + + return self.proj_out(out.reshape(B, H, W, C)) + + +class CrossDeformAttn(nn.Module): + def __init__(self, seq_len, d_model, n_heads, dropout, droprate, + n_days=1, window_size=4, patch_len=7, stride=3, no_off=False) -> None: + super().__init__() + self.n_days = n_days + self.seq_len = seq_len + # 1d size: B*n_days, subseq_len, C + # 2d size: B*num_patches, 1, patch_len, C + self.subseq_len = seq_len // n_days + (1 if seq_len % n_days != 0 else 0) + self.patch_len = patch_len + self.stride = stride + self.num_patches = num_patches(self.seq_len, self.patch_len, self.stride) + + self.layer_norm = LayerNorm(d_model) + + # 1D + self.ff1 = nn.Linear(d_model, d_model, bias=True) + self.ff2 = nn.Linear(self.subseq_len, self.subseq_len, bias=True) + # Deform attention + self.deform_attn = DeformAtten1D(self.subseq_len, d_model, n_heads, dropout, kernel=window_size, no_off=no_off) + self.attn_layers1d = nn.ModuleList([self.deform_attn]) + + self.mlps1d = nn.ModuleList( + [ + MLP([d_model, d_model], final_relu=True, drop_out=0.0) for _ in range(len(self.attn_layers1d)) + ] + ) + self.drop_path1d = nn.ModuleList( + [ + DropPath(droprate) if droprate > 0.0 else nn.Identity() for _ in range(len(self.attn_layers1d)) + ] + ) + ####################################### + # 2D + d_route = 1 + self.conv_in = nn.Conv2d(1, d_route, kernel_size=1, bias=True) + self.conv_out = nn.Conv2d(d_route, 1, kernel_size=1, bias=True) + self.deform_attn2d = DeformAtten2D(self.patch_len, d_route, n_heads=1, dropout=dropout, kernel=window_size, n_groups=1, no_off=no_off) + self.write_out = nn.Linear(self.num_patches*self.patch_len, self.seq_len) + + self.attn_layers2d = nn.ModuleList([self.deform_attn2d]) + + self.mlps2d = nn.ModuleList( + [ + MLP([d_model, d_model], final_relu=True, drop_out=0.0) for _ in range(len(self.attn_layers2d)) + ] + ) + self.drop_path2d = nn.ModuleList( + [ + DropPath(droprate) if droprate > 0.0 else nn.Identity() for _ in range(len(self.attn_layers2d)) + ] + ) + + self.fc = nn.Linear(2*d_model, d_model) + + def forward(self, x, attn_mask=None, tau=None, delta=None): + n_day = self.n_days + B, L, C = x.shape + + x = self.layer_norm(x) + + padding_len = (n_day - (L % n_day)) % n_day + x_padded = torch.cat((x, x[:, [0], :].expand(-1, padding_len, -1)), dim=1) + x_1d = rearrange(x_padded, 'b (seg_num ts_d) d_model -> (b ts_d) seg_num d_model', ts_d=n_day) + # attn on 1D + for d, attn_layer in enumerate(self.attn_layers1d): + x0 = x_1d + x_1d = attn_layer(x_1d) + x_1d = self.drop_path1d[d](x_1d) + x0 + x0 = x_1d + x_1d = self.mlps1d[d](self.layer_norm(x_1d)) + x_1d = self.drop_path1d[d](x_1d) + x0 + x_1d = rearrange(x_1d, '(b ts_d) seg_num d_model -> b (seg_num ts_d) d_model', ts_d=n_day)[:,:L,:] + + # Patch attn on 2D + x_unfold = x.unfold(dimension=-2, size=self.patch_len, step=self.stride) + x_2d = rearrange(x_unfold, 'b n c l -> (b n) l c').unsqueeze(-3) + x_2d = rearrange(x_2d, 'b c h w -> b h w c') + for d, attn_layer in enumerate(self.attn_layers2d): + x0 = x_2d + x_2d = attn_layer(x_2d) + x_2d = self.drop_path2d[d](x_2d) + x0 + x0 = x_2d + x_2d = self.mlps2d[d](self.layer_norm(x_2d.permute(0,1,3,2))).permute(0,1,3,2) + x_2d = self.drop_path2d[d](x_2d) + x0 + x_2d = rearrange(x_2d, 'b h w c -> b c h w') + x_2d = rearrange(x_2d, '(b n) 1 l c -> b (n l) c', b=B) + x_2d = self.write_out(x_2d.permute(0,2,1)).permute(0,2,1) + + x = torch.concat([x_1d, x_2d], dim=-1) + x = self.fc(x) + + return x, None + diff --git a/financial_loss_functions/src/models/DeformTime/utils/functions.py b/financial_loss_functions/src/models/DeformTime/utils/functions.py new file mode 100644 index 00000000..4148dd6a --- /dev/null +++ b/financial_loss_functions/src/models/DeformTime/utils/functions.py @@ -0,0 +1,31 @@ +import torch + + +def grid_sample1D(tensor, grid): + """Given an input and a flow-field grid, computes the output using input + values and pixel locations from grid. + + Args: + tensor: (N, C, L_in) tensor + grid: (N, L_out, 2) tensor in the range of [-1, 1] + + Returns: + (N, C, L_out) tensor + + """ + b, c, l_in = tensor.shape + b_, l_out, w_ = grid.shape + assert b == b_ + out = [] + for (t, g) in zip(tensor, grid): + x_ = 0.5 * (l_in - 1) * (g[:, 0] + 1) + ix = torch.floor(x_).to(torch.int32).clamp(0, l_in - 2) + dx = x_ - ix + out.append((1 - dx) * t[..., ix] + dx * t[..., ix + 1]) + return torch.stack(out, dim=0) + +def num_patches(seq_len, patch_len, stride): + return (seq_len - patch_len) // stride + 1 + +# print(num_patches(96, 7, 4)) + diff --git a/financial_loss_functions/src/models/cvar_benchmark.py b/financial_loss_functions/src/models/cvar_benchmark.py new file mode 100644 index 00000000..94149c33 --- /dev/null +++ b/financial_loss_functions/src/models/cvar_benchmark.py @@ -0,0 +1,164 @@ +""" +Lightweight Mean-CVaR portfolio optimizer for benchmarking. + +Uses CVXPY with the Rockafellar-Uryasev linearization, inspired by +the formulation in ``quantitative-portfolio-optimization`` (NVIDIA cufolio) +but without any cuOpt / GPU dependency. + +The optimizer solves: + + minimise lambda * CVaR_alpha - E[w^T r] + s.t. sum(w) + cash = 1 + w >= w_min, w <= w_max + cash >= 0 + ||w||_1 <= L_tar + +where CVaR is expressed via auxiliary variables (t, u): + + CVaR = t + (1 / ((1 - alpha) * S)) * sum(u_s) + u_s >= 0 + u_s >= -(R_s^T w + t) for each scenario s +""" + +import logging +from dataclasses import dataclass +from typing import Optional + +import numpy as np + +logger = logging.getLogger(__name__) + + +@dataclass +class CVaRParams: + """Tunable parameters for CVaR benchmark optimization.""" + + confidence: float = 0.95 + risk_aversion: float = 1.0 + w_min: float = 0.0 + w_max: float = 0.30 + L_tar: float = 1.6 + + +class CVaRBenchmark: + """ + Mean-CVaR portfolio optimizer using CVXPY (CPU-only). + + @param params CVaRParams Optimization parameters. + """ + + def __init__(self, params: Optional[CVaRParams] = None): + self.params = params or CVaRParams() + self._cp = None + + def _ensure_cvxpy(self): + if self._cp is None: + try: + import cvxpy as cp + except ImportError as exc: + raise ImportError( + "cvxpy is required for the CVaR benchmark. " + "Install with: pip install cvxpy" + ) from exc + self._cp = cp + return self._cp + + def optimize(self, returns: np.ndarray) -> np.ndarray: + """ + Solve Mean-CVaR optimization given historical return scenarios. + + @param returns np.ndarray (S, N) -- S scenarios, N assets. + Each row is one historical daily return vector. + + @return np.ndarray (N,) optimal portfolio weights. + Falls back to equal-weight if the solver fails. + """ + cp = self._ensure_cvxpy() + + S, N = returns.shape + alpha = self.params.confidence + lam = self.params.risk_aversion + + mu = returns.mean(axis=0) + + w = cp.Variable(N, name="w") + c = cp.Variable(1, name="cash") + t = cp.Variable(1, name="VaR") + u = cp.Variable(S, name="u") + + scenario_ptf_returns = returns @ w # (S,) + + cvar_risk = t + (1.0 / ((1 - alpha) * S)) * cp.sum(u) + expected_return = mu @ w + + constraints = [ + u >= 0, + u + t + scenario_ptf_returns >= 0, + cp.sum(w) + c == 1, + w >= self.params.w_min, + w <= self.params.w_max, + c >= 0, + cp.norm1(w) <= self.params.L_tar, + ] + + objective = cp.Minimize(lam * cvar_risk - expected_return) + problem = cp.Problem(objective, constraints) + + try: + problem.solve(solver=cp.CLARABEL, verbose=False) + except Exception: + try: + problem.solve(solver=cp.SCS, verbose=False) + except Exception as exc: + logger.warning("CVaR solve failed: %s – returning equal weights", exc) + return np.full(N, 1.0 / N) + + if w.value is None: + logger.warning("CVaR solver returned None – returning equal weights") + return np.full(N, 1.0 / N) + + weights = np.array(w.value).flatten() + weights = np.clip(weights, 0, None) + total = weights.sum() + if total > 0: + weights /= total + else: + weights = np.full(N, 1.0 / N) + + # Re-clip to w_max after normalization and redistribute excess + w_max = self.params.w_max + for _ in range(10): + excess_mask = weights > w_max + if not excess_mask.any(): + break + excess = (weights[excess_mask] - w_max).sum() + weights[excess_mask] = w_max + free_mask = ~excess_mask & (weights < w_max) + if free_mask.any(): + weights[free_mask] += excess / free_mask.sum() + else: + break + + return weights + + def rolling_optimize( + self, + X_returns: np.ndarray, + ) -> np.ndarray: + """ + Run CVaR optimization on each window's historical returns. + + @param X_returns np.ndarray (num_windows, T_in, N) + Per-window historical return data (extracted from the input + windows used by the LSTM). + + @return np.ndarray (num_windows, N) benchmark weights. + """ + num_windows, T_in, N = X_returns.shape + all_weights = np.zeros((num_windows, N)) + + for i in range(num_windows): + scenarios = X_returns[i] # (T_in, N) + all_weights[i] = self.optimize(scenarios) + + return all_weights diff --git a/financial_loss_functions/src/models/registry.py b/financial_loss_functions/src/models/registry.py new file mode 100644 index 00000000..62b4a622 --- /dev/null +++ b/financial_loss_functions/src/models/registry.py @@ -0,0 +1,219 @@ +# models/registry.py +import pkgutil +import importlib +from typing import Dict, Type, Any, Set + +NN_Registry = Dict[str, Dict[str, Type]] # category -> name -> class +Trad_Registry = Dict[str, Type] + +class NNModelLibrary: + """ + Neural Network Model Library to contain all available models + like a library that can be queried. + """ + _registry: NN_Registry = {} + _discovered_packages: Set[str] = set() + + @classmethod + def register(cls, category: str, name: str | None = None): + """ + Register a neural network model using this decorator. + + Args: + category (str): Name of category the model architecture belongs to. + name (str | None): Name of the model. Default = None. + If None, name of the class will be used as default. + + Raise: + KeyError: If a model already exists in the library. + """ + def decorator(model_cls: Type): + key = name or model_cls.__name__ + if key in cls._registry.get(category, {}): + raise KeyError(f"Model '{key}' already registered in category '{category}'") + cls._registry.setdefault(category, {})[key] = model_cls + return model_cls + return decorator + + @classmethod + def autodiscover(cls, package: str): + """ + Import all modules in a package exactly once per package. + Safe to call multiple times with different packages. + This method MUST be executed in a main file to register all models. + + Args: + package (str): Name or pythonic module path to detect models. + Example: 'src.models' + """ + if package in cls._discovered_packages: + return + + pkg = importlib.import_module(package) + + for _, modname, ispkg in pkgutil.walk_packages(pkg.__path__, pkg.__name__ + "."): + if not ispkg: + importlib.import_module(modname) + + cls._discovered_packages.add(package) + + @classmethod + def items(cls) -> NN_Registry: + """ + Get entire library of neural network models. + + Returns: + NN_Registry: Dictionary of all categories and their models. + """ + return cls._registry + + @classmethod + def list_categories(cls) -> list[str]: + """ + Get a list of available categories. + + Returns: + list[str]: List of categories present in the library. + """ + return list(cls._registry.keys()) + + @classmethod + def list_models(cls, category: str) -> list[str]: + """ + Get a list of all available models in a particular category. + + Args: + category (str): Category name of the required neural networks. + + Returns: + list[str]: List of all available neural network models + which belong to the give category. + """ + return list(cls._registry.get(category, {}).keys()) + + @classmethod + def get(cls, category: str, name: str) -> Type: + """ + Get a particular model for the given category name. + + Args: + category (str): Category name of the required model. + name (str): Name of the required model. + + Returns: + Class of the required neural network model. + """ + return cls._registry[category].get(name) + + @classmethod + def instantiate(cls, category: str, name: str, *args, **kwargs) -> Any: + """ + Instantiate a particular model with its arguments and hyperparameters. + + Args: + category (str): Category name of the required model. + name (str): Name of the required model. + *args: Positional arguments to be passed on to the model object. + **kwargs: Key word arguments to be passed on to the model object. + + Returns: + Instantiated model object. + """ + return cls.get(category, name)(*args, **kwargs) + +class TradModelLibrary: + """ + Tradional Model Library to contain all available tradional portfolio + optimization models/methods like a library that can be queried. + """ + _registry: Trad_Registry = {} + _discovered_packages: Set[str] = set() + + @classmethod + def register(cls, name: str = None): + """ + Register a tradional portfolio optimization model/method using this decorator. + + Args: + name (str | None): Name of the model. Default = None. + If None, name of the class will be used as default. + + Raise: + KeyError: If a model already exists in the library. + """ + def decorator(model_cls: Type): + key = name or model_cls.__name__ + if key in cls._registry: + raise KeyError(f"Model '{key}' already registered in") + cls._registry[key] = model_cls + return model_cls + return decorator + + @classmethod + def autodiscover(cls, package: str): + """ + Import all modules in a package exactly once per package. + Safe to call multiple times with different packages. + This method MUST be executed in a main file to register all models. + + Args: + package (str): Name or pythonic module path to detect models. + Example: 'src.models' + """ + if package in cls._discovered_packages: + return + + pkg = importlib.import_module(package) + + for _, modname, ispkg in pkgutil.walk_packages(pkg.__path__, pkg.__name__ + "."): + if not ispkg: + importlib.import_module(modname) + + cls._discovered_packages.add(package) + + @classmethod + def items(cls) -> Trad_Registry: + """ + Get entire library of tradional portfolio optimization models/methods. + + Returns: + Trad_Registry: Dictionary of all categories and their models. + """ + return cls._registry + + @classmethod + def list_models(cls) -> list[str]: + """ + Get a list of all available models. + Returns: + list[str]: List of all available tradional portfolio optimization models. + """ + return list(cls._registry.keys()) + + @classmethod + def get(cls, name: str) -> Type: + """ + Get a particular model by its name. + + Args: + name (str): Name of the reqired tradional model. + + Returns: + Class of the required tradional model. + """ + return cls._registry.get(name) + + @classmethod + def instantiate(cls, name: str, *args, **kwargs) -> Any: + """ + Instantiate a particular model with its arguments and hyperparameters. + + Args: + name (str): Name of the required model. + *args: Positional arguments to be passed on to the model object. + **kwargs: Key word arguments to be passed on to the model object. + + Returns: + Instantiated model object. + """ + return cls.get(name)(*args, **kwargs) \ No newline at end of file diff --git a/financial_loss_functions/src/training/loss_functions.py b/financial_loss_functions/src/training/loss_functions.py index b560aba1..e3f947e2 100644 --- a/financial_loss_functions/src/training/loss_functions.py +++ b/financial_loss_functions/src/training/loss_functions.py @@ -1,5 +1,14 @@ +from typing import List, Optional + +import torch +import torch.nn as nn from torch import clamp, tensor + +# --------------------------------------------------------------------------- +# Original standalone loss functions (unchanged) +# --------------------------------------------------------------------------- + def raw_sharpe_loss( weights: tensor, returns: tensor, eps: float = 1e-8 ): @@ -7,12 +16,12 @@ def raw_sharpe_loss( @param weights torch.tensor (B, N) @param returns torch.tensor (B, T_out, N) -- raw returns - @return batch average Sharpe Ratio. + @return batch average Sharpe Ratio. Negative since NN has to maximize Sharpe Ratio but minimize loss """ # portfolio returns per step port = (weights.unsqueeze(1) * returns).sum(dim=-1) # (B, T_out) - + mean = port.mean(dim=1) # (B,) std = port.std(dim=1) + eps # (B,) sharpe = mean / std # (B,) @@ -39,18 +48,584 @@ def raw_sortino_loss( @param target float Minimum acceptable return (MAR), often 0 for risk-free rate adjusted. @param eps float Epsilon value to avoid divide by zero error - @return batch average Sortino Ratio. + @return batch average Sortino Ratio. Negative since NN has to maximize Sortino but minimize loss """ # Portfolio returns per step port = (weights.unsqueeze(1) * returns).sum(dim=-1) # (B, T_out) - + # Downside deviation: std of negative deviations from target downside = clamp(target - port, min=0.0) # (B, T_out), only positive for downside downside_std = downside.std(dim=1) + eps # (B,) - + mean = port.mean(dim=1) # (B,) sortino = mean / downside_std # (B,) - + # Maximize Sortino → minimize negative Sortino - return -sortino.mean() \ No newline at end of file + return -sortino.mean() + + +# --------------------------------------------------------------------------- +# Backward-compatible wrappers so existing functions accept the new +# 3-or-4 arg call signature used by the modified Trainer. +# --------------------------------------------------------------------------- + +def sharpe_loss_compat(weights, returns, features=None, fundamentals=None): + return differentiable_sharpe_loss(weights, returns) + + +def sortino_loss_compat(weights, returns, features=None, fundamentals=None): + return raw_sortino_loss(weights, returns) + + +# --------------------------------------------------------------------------- +# Learnable modules for multi-timeframe S/R and macro override +# --------------------------------------------------------------------------- + +class TimeframeImportanceFn(nn.Module): + """ + Maps normalised lookback duration in [0, 1] to an importance weight + in (0, 1). Initialised with a monotonic bias so that longer + lookbacks produce higher importance by default. + + Architecture: Linear(1, H) -> Softplus -> Linear(H, 1) -> Sigmoid + """ + + def __init__(self, hidden: int = 8): + super().__init__() + self.net = nn.Sequential( + nn.Linear(1, hidden), + nn.Softplus(), + nn.Linear(hidden, 1), + ) + # Monotonic-bias init: positive weights so larger inputs → larger outputs + with torch.no_grad(): + self.net[0].weight.fill_(1.0) + self.net[0].bias.fill_(0.0) + self.net[2].weight.fill_(1.0) + self.net[2].bias.fill_(0.0) + + def forward(self, normed_lookbacks: torch.Tensor) -> torch.Tensor: + """ + @param normed_lookbacks (W,) values in [0, 1] + @return (W,) importance weights in (0, 1) + """ + out = self.net(normed_lookbacks.unsqueeze(-1)) # (W, 1) + return torch.sigmoid(out.squeeze(-1)) # (W,) + + +class MacroOverrideGate(nn.Module): + """ + Learnable gate that maps macro rate-of-change to an override + weight omega in (0, 1). + + omega → 1 : macro signals dominate (extreme macro conditions) + omega → 0 : technical/fundamental signals dominate (calm macro) + """ + + def __init__(self, num_macro_features: int, hidden: int = 8): + super().__init__() + self.baseline = nn.Parameter(torch.tensor(0.0)) + self.proj = nn.Sequential( + nn.Linear(num_macro_features, hidden), + nn.Softplus(), + nn.Linear(hidden, 1), + ) + + def forward(self, delta_macro: torch.Tensor) -> torch.Tensor: + """ + @param delta_macro (B, M) macro rate-of-change + @return (B,) override weight in (0, 1) + """ + activation = self.proj(delta_macro).squeeze(-1) # (B,) + return torch.sigmoid(self.baseline + activation) + + +# --------------------------------------------------------------------------- +# Composite S/R Loss +# --------------------------------------------------------------------------- + +class CompositeSRLoss(nn.Module): + """ + Multi-component loss that augments the differentiable Sharpe loss + with four S/R-aware regularization penalties: + + L_total = L_sharpe + + alpha * L_price_action + + beta * L_psychological + + gamma * L_macro + + delta * L_fundamental + + Optional enhancements (all backward-compatible, disabled by default): + - Multi-timeframe S/R hierarchy via rolling-window pivots + - Adaptive macro-override gate that re-weights penalties + - Sector-aware penalty scaling + - Ticker-specific macro sensitivity + - Cross-ticker correlation guard + """ + + def __init__( + self, + num_tickers: int, + num_features_per_ticker: int, + ret_feature_idx: int, + turnover_feature_idx: int, + illiq_feature_idx: int, + ba_spread_feature_idx: int, + macro_col_indices: Optional[List[int]] = None, + alpha: float = 0.10, + beta: float = 0.05, + gamma: float = 0.10, + delta: float = 0.10, + psych_thresholds: Optional[List[float]] = None, + psych_sigma: float = 0.01, + ema_span: int = 10, + # --- Multi-timeframe S/R --- + sr_use_multi_timeframe: bool = False, + sr_lookback_windows: Optional[List[int]] = None, + sr_pivot_threshold: float = 0.02, + sr_importance_hidden: int = 8, + # --- Macro override gate --- + use_macro_override: bool = False, + macro_override_hidden: int = 8, + # --- Sector-aware penalty --- + sector_ids: Optional[List[int]] = None, + # --- Ticker macro sensitivity --- + ticker_macro_sensitivity: Optional[torch.Tensor] = None, + # --- Correlation guard --- + corr_matrix: Optional[torch.Tensor] = None, + ): + """ + @param num_tickers int Number of assets N. + @param num_features_per_ticker int Features F per ticker in the + flat layout produced by Reshaper (T_NxF). + @param ret_feature_idx int Index of the RET feature within a + ticker's F-wide feature block. + @param turnover_feature_idx int Index of TURNOVER. + @param illiq_feature_idx int Index of ILLIQUIDITY. + @param ba_spread_feature_idx int Index of BA_SPREAD. + @param macro_col_indices list[int] Indices of macro features + within a ticker's feature block (after broadcast). + @param alpha float Weight for price-action S/R penalty. + @param beta float Weight for psychological-level penalty. + @param gamma float Weight for macro-regime penalty. + @param delta float Weight for SEC-fundamental penalty. + @param psych_thresholds list[float] Return-space thresholds. + @param psych_sigma float Gaussian kernel bandwidth. + @param ema_span int EMA lookback for return smoothing. + @param sr_use_multi_timeframe bool Enable multi-TF S/R hierarchy. + @param sr_lookback_windows list[int] Lookback periods in days. + @param sr_pivot_threshold float Soft activation threshold for S/R. + @param sr_importance_hidden int Hidden size for importance MLP. + @param use_macro_override bool Enable adaptive macro override gate. + @param macro_override_hidden int Hidden size for gate MLP. + @param sector_ids list[int] Integer sector ID per ticker (len N). + @param ticker_macro_sensitivity Tensor (N,) macro sensitivity per ticker. + @param corr_matrix Tensor (N, N) return correlation matrix. + """ + super().__init__() + self.N = num_tickers + self.F = num_features_per_ticker + self.ret_idx = ret_feature_idx + self.turn_idx = turnover_feature_idx + self.illiq_idx = illiq_feature_idx + self.spread_idx = ba_spread_feature_idx + self.macro_idx = macro_col_indices or [] + + self.alpha = alpha + self.beta = beta + self.gamma = gamma + self.delta = delta + self.psych_sigma = psych_sigma + self.ema_span = ema_span + + if psych_thresholds is None: + psych_thresholds = [ + 0.0, 0.01, -0.01, 0.02, -0.02, + 0.05, -0.05, 0.10, -0.10, 0.20, -0.20, + ] + self.register_buffer( + "psych_thresholds", + torch.tensor(psych_thresholds, dtype=torch.float32), + ) + + # Learnable regime weights (one per macro feature) + if self.macro_idx: + self.regime_weights = nn.Parameter( + torch.zeros(len(self.macro_idx)) + ) + else: + self.regime_weights = None + + # EMA decay factor + ema_alpha = 2.0 / (ema_span + 1) + self.register_buffer( + "ema_alpha", torch.tensor(ema_alpha, dtype=torch.float32) + ) + + # ---- Multi-timeframe S/R hierarchy ---- + self.use_multi_tf = sr_use_multi_timeframe + self.sr_pivot_threshold = sr_pivot_threshold + if self.use_multi_tf: + windows = sr_lookback_windows or [5, 10, 21, 42, 63, 105] + self.sr_windows = sorted(windows) + max_w = float(max(self.sr_windows)) + self.register_buffer( + "sr_lookback_normed", + torch.tensor([w / max_w for w in self.sr_windows], dtype=torch.float32), + ) + self.importance_fn = TimeframeImportanceFn(sr_importance_hidden) + else: + self.sr_windows = [] + self.importance_fn = None + + # ---- Macro override gate ---- + self.use_macro_override = use_macro_override + if use_macro_override and self.macro_idx: + self.macro_override = MacroOverrideGate( + len(self.macro_idx), macro_override_hidden + ) + else: + self.macro_override = None + + # ---- Sector-aware penalty scaling ---- + if sector_ids is not None: + self.register_buffer( + "sector_ids", + torch.tensor(sector_ids, dtype=torch.long), + ) + self.num_sectors = len(set(sector_ids)) + else: + self.sector_ids = None + self.num_sectors = 0 + + # ---- Ticker macro sensitivity ---- + if ticker_macro_sensitivity is not None: + self.register_buffer("ticker_macro_sensitivity", ticker_macro_sensitivity) + else: + self.ticker_macro_sensitivity = None + + # ---- Correlation guard ---- + if corr_matrix is not None: + self.register_buffer("corr_matrix", corr_matrix) + else: + self.corr_matrix = None + + # -- helpers ------------------------------------------------------------- + + def _extract_ticker_feature( + self, features: torch.Tensor, feat_idx: int, + ) -> torch.Tensor: + """ + From flat (B, T, N*F) extract one feature for all tickers -> (B, T, N). + """ + B, T, _ = features.shape + indices = [ + ticker_i * self.F + feat_idx for ticker_i in range(self.N) + ] + idx_tensor = torch.tensor( + indices, device=features.device, dtype=torch.long + ) + return features[:, :, idx_tensor] # (B, T, N) + + def _ema_smooth(self, x: torch.Tensor) -> torch.Tensor: + """ + Exponential moving average along dim=1 (time). + x: (B, T, N) + """ + B, T, N = x.shape + ema = torch.zeros(B, N, device=x.device, dtype=x.dtype) + outputs = [] + for t in range(T): + ema = self.ema_alpha * x[:, t, :] + (1 - self.ema_alpha) * ema + outputs.append(ema) + return torch.stack(outputs, dim=1) # (B, T, N) + + @staticmethod + def _z_score(x: torch.Tensor, dim: int = 1, eps: float = 1e-8): + """Z-score along *dim*.""" + mu = x.mean(dim=dim, keepdim=True) + sigma = x.std(dim=dim, keepdim=True) + eps + return (x - mu) / sigma + + def _compute_delta_macro( + self, features: torch.Tensor, + ) -> Optional[torch.Tensor]: + """ + Shared helper: macro rate-of-change over the input window. + + @return (B, M) or None if no macro features. + """ + if not self.macro_idx: + return None + macro_list = [] + for idx in self.macro_idx: + mf = self._extract_ticker_feature(features, idx) # (B, T, N) + macro_list.append(mf.mean(dim=-1)) # average across tickers -> (B, T) + macro_stack = torch.stack(macro_list, dim=-1) # (B, T, M) + return macro_stack[:, -1, :] - macro_stack[:, 0, :] # (B, M) + + def _detect_pivots( + self, prices: torch.Tensor, + ) -> torch.Tensor: + """ + Detect support/resistance levels at multiple lookback windows. + + @param prices (B, T, N) cumulative price proxy + @return (B, W, N) normalised position in [-1, +1] per window. + +1 = at window high (resistance), -1 = at window low (support). + """ + B, T, N = prices.shape + current = prices[:, -1, :] # (B, N) + scores = [] + for w in self.sr_windows: + w_clamped = min(w, T) + window_slice = prices[:, -w_clamped:, :] # (B, w', N) + w_high = window_slice.max(dim=1).values # (B, N) + w_low = window_slice.min(dim=1).values # (B, N) + w_range = (w_high - w_low).clamp(min=1e-8) + midpoint = (w_high + w_low) / 2.0 + score = ((current - midpoint) / (w_range / 2.0)).clamp(-1.0, 1.0) + scores.append(score) + return torch.stack(scores, dim=1) # (B, W, N) + + def _sector_avg_confidence( + self, confidence: torch.Tensor, + ) -> torch.Tensor: + """ + Compute per-sector average confidence and map back to per-ticker. + + @param confidence (B, N) + @return (B, N) sector-average confidence for each ticker's sector. + """ + B, N = confidence.shape + sector_avg = torch.zeros(B, self.num_sectors, device=confidence.device) + sector_count = torch.zeros(self.num_sectors, device=confidence.device) + for s in range(self.num_sectors): + mask = (self.sector_ids == s) # (N,) + if mask.any(): + sector_avg[:, s] = confidence[:, mask].mean(dim=-1) + sector_count[s] = mask.float().sum() + # Map sector averages back to each ticker + return sector_avg[:, self.sector_ids] # (B, N) + + # -- sub-losses ---------------------------------------------------------- + + def _price_action_penalty( + self, + weights: torch.Tensor, + features: torch.Tensor, + returns: torch.Tensor, + ) -> torch.Tensor: + """ + Detect S/R conditions from microstructure features and penalise + portfolio weights that ignore these signals. + + When multi-timeframe is enabled, rolling-window pivot detection + augments the microstructure confidence with S/R level strength + weighted by a learnable importance function. + """ + turnover = self._extract_ticker_feature(features, self.turn_idx) + illiq = self._extract_ticker_feature(features, self.illiq_idx) + spread = self._extract_ticker_feature(features, self.spread_idx) + rets = self._extract_ticker_feature(features, self.ret_idx) + + turn_z = self._z_score(turnover)[:, -1, :] # (B, N) + illiq_z = self._z_score(illiq)[:, -1, :] + spread_z = self._z_score(spread)[:, -1, :] + + smoothed = self._ema_smooth(rets) + + confidence = torch.sigmoid(turn_z + illiq_z + spread_z) + direction = torch.tanh(smoothed[:, -1, :]) + + # ---- Multi-timeframe S/R hierarchy ---- + sr_active = None + if self.use_multi_tf and self.importance_fn is not None: + cum_prices = (1 + rets).cumprod(dim=1) # (B, T, N) + pivot_scores = self._detect_pivots(cum_prices) # (B, W, N) + importance = self.importance_fn(self.sr_lookback_normed) # (W,) + imp_sum = importance.sum().clamp(min=1e-8) + + # Importance-weighted aggregation + imp = importance[None, :, None] # (1, W, 1) + weighted_strength = (pivot_scores.abs() * imp).sum(dim=1) / imp_sum # (B, N) + weighted_direction = (pivot_scores * imp).sum(dim=1) / imp_sum # (B, N) + + # Soft gate: activates when aggregate S/R strength exceeds threshold + sr_active = torch.sigmoid( + (weighted_strength - self.sr_pivot_threshold) / 0.01 + ) # (B, N) + + # Multiplicative boost to microstructure confidence + confidence = confidence * (1.0 + sr_active) + # Blend EMA direction with S/R direction + sr_dir = torch.tanh(weighted_direction) + direction = (1.0 - sr_active) * direction + sr_active * sr_dir + + # ---- Sector-aware scaling ---- + if self.sector_ids is not None: + sec_avg = self._sector_avg_confidence(confidence) + confidence = confidence * (1.0 + sec_avg) + + equal_w = 1.0 / self.N + weight_diff = weights - equal_w + + penalty = -(confidence * direction * weight_diff).mean() + + # ---- Correlation guard (only when multi-TF active) ---- + if ( + self.corr_matrix is not None + and sr_active is not None + ): + w_col = weights.unsqueeze(-1) # (B, N, 1) + port_corr = ( + w_col * self.corr_matrix.unsqueeze(0) * w_col.transpose(-1, -2) + ).sum(dim=(-1, -2)) # (B,) + sr_corr_penalty = (sr_active.mean(dim=-1) * port_corr).mean() + penalty = penalty + sr_corr_penalty + + return penalty + + def _psychological_penalty( + self, + weights: torch.Tensor, + features: torch.Tensor, + ) -> torch.Tensor: + """ + Penalise when cumulative returns sit near psychological thresholds + and the model ignores this. + """ + rets = self._extract_ticker_feature(features, self.ret_idx) # (B, T, N) + + # Use cumulative sum (not product) — features are RobustScaler-normalized, + # so (1+rets).prod() diverges. Sum over the window is well-behaved. + cum_ret = rets.sum(dim=1) # (B, N) + + # Distance to nearest threshold: (B, N, K) + thresholds = self.psych_thresholds # (K,) + diffs = cum_ret.unsqueeze(-1) - thresholds.unsqueeze(0).unsqueeze(0) + abs_diffs = diffs.abs() + min_dist, nearest_idx = abs_diffs.min(dim=-1) # (B, N) + + proximity = torch.exp(-min_dist.pow(2) / (2 * self.psych_sigma ** 2)) + + nearest_thresh = thresholds[nearest_idx] # (B, N) + direction = torch.sign(cum_ret - nearest_thresh) + direction = torch.where(direction == 0, torch.ones_like(direction), direction) + + equal_w = 1.0 / self.N + weight_diff = weights - equal_w + + return -(proximity * direction * weight_diff).mean() + + def _macro_regime_penalty( + self, + weights: torch.Tensor, + returns: torch.Tensor, + features: torch.Tensor, + delta_macro: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + """ + Compute risk-on/risk-off regime from macro features and penalise + portfolio returns that are misaligned with the regime direction. + + @param delta_macro (B, M) pre-computed macro delta (avoids recomputing). + """ + if self.regime_weights is None or not self.macro_idx: + return torch.tensor(0.0, device=weights.device) + + if delta_macro is None: + delta_macro = self._compute_delta_macro(features) + + regime = torch.tanh( + (delta_macro * self.regime_weights.unsqueeze(0)).sum(dim=-1) + ) # (B,) + + port_ret = (weights.unsqueeze(1) * returns).sum(dim=-1) # (B, T_out) + + # ---- Ticker macro sensitivity ---- + if self.ticker_macro_sensitivity is not None: + # Weight individual returns by macro sensitivity before portfolio sum + sens = self.ticker_macro_sensitivity.unsqueeze(0) # (1, N) + weighted_port = (weights * sens).sum(dim=-1) # (B,) + # Combine: portfolio return scaled by sensitivity-weighted exposure + mean_port_ret = (weighted_port * port_ret.mean(dim=1)) + else: + mean_port_ret = port_ret.mean(dim=1) # (B,) + + return -(regime * mean_port_ret).mean() + + def _fundamental_penalty( + self, + weights: torch.Tensor, + fundamentals: torch.Tensor, + ) -> torch.Tensor: + """ + Penalise allocations misaligned with fundamental quality scores. + + @param fundamentals torch.Tensor (B, N) composite z-scored scores + (positive = improving, negative = deteriorating). + """ + # Skip if fundamentals have no cross-sectional variance (e.g. broken SEC data) + if fundamentals.var(dim=-1).mean() < 1e-4: + return torch.tensor(0.0, device=weights.device) + + equal_w = 1.0 / self.N + weight_diff = weights - equal_w + return -(fundamentals * weight_diff).mean() + + # -- forward ------------------------------------------------------------- + + def forward( + self, + weights: torch.Tensor, + returns: torch.Tensor, + features: Optional[torch.Tensor] = None, + fundamentals: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + """ + @param weights (B, N) portfolio allocation weights + @param returns (B, T_out, N) future returns + @param features (B, T_in, N*F) raw input window (from xb) + @param fundamentals (B, N) per-ticker composite fundamental scores + """ + loss = differentiable_sharpe_loss(weights, returns) + + if features is not None: + # Pre-compute macro delta (shared by override gate and regime penalty) + delta_macro = self._compute_delta_macro(features) + + # ---- Adaptive macro override gate ---- + if self.macro_override is not None and delta_macro is not None: + omega = self.macro_override(delta_macro).mean() # scalar + else: + omega = torch.tensor(0.0, device=weights.device) + + # Scale penalty weights: macro boosted, others reduced + alpha_eff = self.alpha * (1.0 - omega) + beta_eff = self.beta * (1.0 - omega) + gamma_eff = self.gamma * (1.0 + omega) + delta_eff = self.delta * (1.0 - omega) + + loss = loss + alpha_eff * self._price_action_penalty( + weights, features, returns + ) + loss = loss + beta_eff * self._psychological_penalty( + weights, features + ) + loss = loss + gamma_eff * self._macro_regime_penalty( + weights, returns, features, delta_macro + ) + + if fundamentals is not None: + loss = loss + delta_eff * self._fundamental_penalty( + weights, fundamentals + ) + elif fundamentals is not None: + # No features but fundamentals provided + loss = loss + self.delta * self._fundamental_penalty( + weights, fundamentals + ) + + return loss diff --git a/financial_loss_functions/src/training/pipeline.py b/financial_loss_functions/src/training/pipeline.py index 8f60ba96..c2a69f21 100644 --- a/financial_loss_functions/src/training/pipeline.py +++ b/financial_loss_functions/src/training/pipeline.py @@ -1,11 +1,16 @@ +import numpy as np +import pandas as pd +import torch from torch import optim -from typing import Dict +from typing import Dict, List, Optional from pathlib import Path from src.utils import create_directory from src.data_processing.dataset import Reshaper from src.data_processing.dataset import WindowDataset from src.data_processing.loading import load_csv_files from src.models.lstm import BaseLSTM, AttentionLSTM +from src.models.DeformTime.DeformTime import DeformTime +from src.models.cvar_benchmark import CVaRBenchmark, CVaRParams from src.training.train import ( Trainer, train_val_losses_plot, @@ -14,20 +19,256 @@ from src.training.loss_functions import ( raw_sharpe_loss, raw_sortino_loss, - differentiable_sharpe_loss + differentiable_sharpe_loss, + CompositeSRLoss, ) +from src.evaluation.pyfolio_viz import ( + weights_to_pyfolio, + build_window_dates, + generate_comparison_tearsheets, + comparison_summary, +) + +# Registry of available model keys (used by CLI --models flag) +ALL_MODELS = { + 'baseLSTM_sharpe': 'BaseLSTM (Sharpe)', + 'baseLSTM_composite': 'BaseLSTM (Composite)', + 'attentionLSTM_composite': 'AttentionLSTM (Composite)', + 'deformTime_composite': 'DeformTime (Composite)', + 'cvar': 'CVaR Benchmark', +} + + +def _load_sector_ids( + paths_config: Dict, tickers: list, +) -> Optional[list]: + """ + Load sector assignments and return integer sector IDs aligned to *tickers*. + Returns None if the file is missing. + """ + feature_sel_dir = Path(paths_config.get("data", {}).get( + "feature_selection_dir", "data/feature_selection" + )) + sector_path = feature_sel_dir / "sector_assignment_50.csv" + if not sector_path.exists(): + return None + + df = pd.read_csv(sector_path) + if "ticker" not in df.columns or "sector" not in df.columns: + return None + + sector_map = dict(zip(df["ticker"], df["sector"])) + unique_sectors = sorted(set(sector_map.values())) + sector_to_id = {s: i for i, s in enumerate(unique_sectors)} + + ids = [] + for t in tickers: + sector = sector_map.get(t) + if sector is not None: + ids.append(sector_to_id[sector]) + else: + ids.append(0) + return ids + + +def _load_macro_sensitivity( + paths_config: Dict, tickers: list, +) -> Optional[torch.Tensor]: + """ + Compute per-ticker macro sensitivity from ticker_macro_rankings.csv. + Returns a (N,) tensor normalised to [0, 1], or None if unavailable. + """ + feature_sel_dir = Path(paths_config.get("data", {}).get( + "feature_selection_dir", "data/feature_selection" + )) + rank_path = feature_sel_dir / "ticker_macro_rankings.csv" + if not rank_path.exists(): + return None + + df = pd.read_csv(rank_path) + if "ticker" not in df.columns or "composite_score" not in df.columns: + return None + + # Average composite score across each ticker's top-ranked features + avg_scores = df.groupby("ticker")["composite_score"].mean() + scores = [] + for t in tickers: + scores.append(avg_scores.get(t, 0.0)) + arr = np.array(scores, dtype=np.float32) + # Normalise to [0, 1] + rng = arr.max() - arr.min() + if rng > 0: + arr = (arr - arr.min()) / rng + return torch.tensor(arr, dtype=torch.float32) + + +def _compute_corr_matrix( + returns_df: pd.DataFrame, tickers: list, +) -> Optional[torch.Tensor]: + """ + Compute a static (N, N) return correlation matrix from training data. + Returns None if the data is insufficient. + """ + available = [t for t in tickers if t in returns_df.columns] + if len(available) < 2: + return None + corr = returns_df[available].corr().values.astype(np.float32) + # Reindex to match full ticker list (fill missing with identity) + N = len(tickers) + full_corr = np.eye(N, dtype=np.float32) + avail_idx = {t: i for i, t in enumerate(tickers) if t in available} + for i, ti in enumerate(available): + for j, tj in enumerate(available): + full_corr[avail_idx[ti], avail_idx[tj]] = corr[i, j] + return torch.tensor(full_corr, dtype=torch.float32) + + +def _build_composite_loss( + reshaper: Reshaper, + hparams_config: Dict, + paths_config: Optional[Dict] = None, + returns_train_df: Optional[pd.DataFrame] = None, +) -> CompositeSRLoss: + """Instantiate CompositeSRLoss using feature indices from the Reshaper.""" + features = reshaper.get_features() + tickers = reshaper.get_tickers() + + feat_index = {f: i for i, f in enumerate(features)} + ret_idx = feat_index.get("RET", 0) + turn_idx = feat_index.get("TURNOVER", 1) + illiq_idx = feat_index.get("ILLIQUIDITY", 2) + spread_idx = feat_index.get("BA_SPREAD", 3) + + known_stock_features = {"RET", "TURNOVER", "ILLIQUIDITY", "BA_SPREAD", "VOL_CHANGE", "sprtrn"} + macro_indices = [ + feat_index[f] for f in features + if f not in known_stock_features + ] + + loss_cfg = hparams_config.get("CompositeSRLoss", {}) + + # ---- Data-driven extras (all optional) ---- + sector_ids = None + ticker_macro_sensitivity = None + corr_matrix = None + + if paths_config is not None: + sector_ids = _load_sector_ids(paths_config, tickers) + ticker_macro_sensitivity = _load_macro_sensitivity(paths_config, tickers) + + if returns_train_df is not None: + corr_matrix = _compute_corr_matrix(returns_train_df, tickers) + + return CompositeSRLoss( + num_tickers=len(tickers), + num_features_per_ticker=len(features), + ret_feature_idx=ret_idx, + turnover_feature_idx=turn_idx, + illiq_feature_idx=illiq_idx, + ba_spread_feature_idx=spread_idx, + macro_col_indices=macro_indices if macro_indices else None, + alpha=loss_cfg.get("alpha", 0.10), + beta=loss_cfg.get("beta", 0.05), + gamma=loss_cfg.get("gamma", 0.10), + delta=loss_cfg.get("delta", 0.10), + psych_thresholds=loss_cfg.get("psych_thresholds"), + psych_sigma=loss_cfg.get("psych_sigma", 0.01), + ema_span=loss_cfg.get("ema_span", 10), + sr_use_multi_timeframe=loss_cfg.get("sr_use_multi_timeframe", False), + sr_lookback_windows=loss_cfg.get("sr_lookback_windows"), + sr_pivot_threshold=loss_cfg.get("sr_pivot_threshold", 0.02), + sr_importance_hidden=loss_cfg.get("sr_importance_hidden", 8), + use_macro_override=loss_cfg.get("use_macro_override", False), + macro_override_hidden=loss_cfg.get("macro_override_hidden", 8), + sector_ids=sector_ids, + ticker_macro_sensitivity=ticker_macro_sensitivity, + corr_matrix=corr_matrix, + ) + + +def _extract_returns_from_X( + X: np.ndarray, reshaper: Reshaper, +) -> np.ndarray: + """ + Extract per-ticker returns from the flat (W, T, N*F) input array. + Returns (W, T, N). + """ + features = reshaper.get_features() + tickers = reshaper.get_tickers() + F = len(features) + ret_idx = sorted(features).index("RET") if "RET" in features else 0 + + W, T, _ = X.shape + N = len(tickers) + out = np.zeros((W, T, N), dtype=X.dtype) + for j in range(N): + out[:, :, j] = X[:, :, j * F + ret_idx] + return out + + +def _load_sec_fundamentals( + paths_config: Dict, + tickers: list, + good_starts_train: np.ndarray, + good_starts_val: np.ndarray, + in_size: int, + date_index_train: Optional[pd.DatetimeIndex] = None, + date_index_val: Optional[pd.DatetimeIndex] = None, +): + """ + Load pre-computed composite fundamental scores and window them. + + Returns (fund_train, fund_val) as numpy arrays of shape (W, N), + or (None, None) if the file doesn't exist. + """ + sec_dir = Path(paths_config.get("data", {}).get("sec_filings_dir", "data/raw/sec_filings")) + score_path = sec_dir / "composite_fundamental_scores.csv" + if not score_path.exists(): + return None, None + + scores = pd.read_csv(score_path, index_col=0, parse_dates=True) + available_tickers = [t for t in tickers if t in scores.columns] + if not available_tickers: + return None, None + + scores = scores[available_tickers] + + def _window_scores(starts, date_index): + if date_index is None: + return None + aligned = scores.reindex(date_index).ffill().bfill().fillna(0.0) + windows = [] + for s in starts: + idx = s + in_size - 1 + if idx < len(aligned): + windows.append(aligned.iloc[idx].values) + else: + windows.append(aligned.iloc[-1].values) + return np.array(windows) if windows else None + + fund_train = _window_scores(good_starts_train, date_index_train) + fund_val = _window_scores(good_starts_val, date_index_val) + return fund_train, fund_val -def run_training_pipeline(paths_config: Dict, hparams_config: Dict): + +def run_training_pipeline( + paths_config: Dict, + hparams_config: Dict, + models: Optional[List[str]] = None, +): """ - All models training pipeline entry point + All models training pipeline entry point. @param paths_config Dict Dictionary containing paths - @param features_config Dictionary containing hyperparameter information + @param hparams_config Dict Dictionary containing hyperparameter information + @param models Optional list of model keys to train (see ALL_MODELS). None = all. """ + enabled = set(models) if models else set(ALL_MODELS.keys()) print('\n', '=' * 20, ' Training Pipeline ', '=' * 20) # Create plots directory if it doesnt exist - create_directory(Path(paths_config['artifacts']['plots'])) + plots_dir = Path(paths_config['artifacts']['plots']) + create_directory(plots_dir) # -------------------- Loading Processed Data -------------------- # processed_files = { @@ -38,124 +279,257 @@ def run_training_pipeline(paths_config: Dict, hparams_config: Dict): } processed_dfs = load_csv_files(processed_files) - train_data = processed_dfs['processed_train'] - returns_train = processed_dfs['returns_train'] + train_data = processed_dfs['processed_train'].fillna(0.0) + returns_train = processed_dfs['returns_train'].fillna(0.0) - val_data = processed_dfs['processed_val'] - returns_val = processed_dfs['returns_val'] + val_data = processed_dfs['processed_val'].fillna(0.0) + returns_val = processed_dfs['returns_val'].fillna(0.0) print('Train shape:', train_data.shape) print('Val shape:', val_data.shape) # -------------------- Preprocessing (Reshaping) -------------------- # + in_size = hparams_config['rolling_windows']['in_size'] + out_size = hparams_config['rolling_windows']['out_size'] + reshaper = Reshaper( - hparams_config['rolling_windows']['in_size'], - hparams_config['rolling_windows']['out_size'], + in_size, + out_size, hparams_config['rolling_windows']['stride'] ) reshaper.extract_features(train_data) - X_train, y_train, _ = reshaper.reshape(train_data, returns_train) + X_train, y_train, good_starts_train = reshaper.reshape(train_data, returns_train) print('-'*10, ' train shapes ', '-'*10) - print('X_train shpe:', X_train.shape) + print('X_train shape:', X_train.shape) print('y_train shape:', y_train.shape) - - X_val, y_val, _ = reshaper.reshape(val_data, returns_val) + X_val, y_val, good_starts_val = reshaper.reshape(val_data, returns_val) print('-'*10, ' val shapes ', '-'*10) - print('X_val shape', X_val.shape) + print('X_val shape:', X_val.shape) print('y_val shape:', y_val.shape) + # -------------------- SEC Fundamentals (optional) -------------------- # + tickers = reshaper.get_tickers() + fund_train, fund_val = _load_sec_fundamentals( + paths_config, tickers, + good_starts_train, good_starts_val, in_size, + date_index_train=train_data.index if hasattr(train_data, 'index') else None, + date_index_val=val_data.index if hasattr(val_data, 'index') else None, + ) + if fund_train is not None: + print(f'SEC fundamentals loaded: train={fund_train.shape}, val={fund_val.shape}') + else: + print('SEC fundamentals not available -- skipping L_fundamental') + + # -------------------- Build Composite Loss -------------------- # + composite_loss = _build_composite_loss( + reshaper, hparams_config, + paths_config=paths_config, + returns_train_df=returns_train, + ) + # -------------------- Training Models -------------------- # - # Converting to pytorch tensors train_ds = WindowDataset(X_train, y_train) val_ds = WindowDataset(X_val, y_val) - # Initializing once to compare all models together evaluator = Evaluator(y_val) + all_strategy_weights: Dict[str, np.ndarray] = {} - #### BaseLSTM #### - model1_name = 'BaseLSTM' - print('\n', '-'*10, f' Training {model1_name} ', '-'*10) - try: - trainer = Trainer( - model=BaseLSTM, - optimizer=optim.AdamW, - loss=differentiable_sharpe_loss, - model_hparams=hparams_config[model1_name]['model'], - optimizer_hparams=hparams_config[model1_name]['optimizer'], - train_hparams=hparams_config[model1_name]['train'], - in_size=X_train.shape[2], - num_stocks=y_train.shape[2] - ) - - trainer.train(train_ds) - trainer.evaluate(val_ds) - - # Plot loss curves - train_val_losses_plot( - trainer.train_losses, - trainer.val_losses, - model1_name + ' Loss Curves', - Path(paths_config['artifacts']['plots']) / - (model1_name + ' Loss Curves' + '.png') - ) - - alloc_weights = trainer.get_val_alloc_weights() - - # Call on every models output allocation weights to caluclated weighted returns - # Add daily returns for BaseLSTM generated weights - evaluator.calc_pf_daily_rets(alloc_weights, model1_name) - - except Exception as error: - print(f'DEBUG: Error while training {model1_name}. Skipping.', error) - + # ---- BaseLSTM (Sharpe-only, baseline) ---- + if 'baseLSTM_sharpe' in enabled: + model1_name = 'BaseLSTM' + print('\n', '-'*10, f' Training {model1_name} (Sharpe) ', '-'*10) + try: + trainer = Trainer( + model=BaseLSTM, + optimizer=optim.AdamW, + loss=differentiable_sharpe_loss, + model_hparams=hparams_config[model1_name]['model'], + optimizer_hparams=hparams_config[model1_name]['optimizer'], + train_hparams=hparams_config[model1_name]['train'], + in_size=X_train.shape[2], + num_stocks=y_train.shape[2] + ) + trainer.train(train_ds, val_ds) + trainer.evaluate(val_ds) - #### Attention LSTM #### - model2_name = 'AttentionLSTM' - print('\n', '-'*10, f' Training {model2_name} ', '-'*10) - try: - trainer = Trainer( - model=AttentionLSTM, - optimizer=optim.AdamW, - loss=differentiable_sharpe_loss, - model_hparams=hparams_config[model2_name]['model'], - optimizer_hparams=hparams_config[model2_name]['optimizer'], - train_hparams=hparams_config[model2_name]['train'], - in_size=X_train.shape[2], - num_stocks=y_train.shape[2] - ) - - trainer.train(train_ds) - trainer.evaluate(val_ds) - - # Plot loss curves - train_val_losses_plot( - trainer.train_losses, - trainer.val_losses, - model2_name + ' Loss Curves', - Path(paths_config['artifacts']['plots']) / - (model2_name + ' Loss Curves' + '.png') - ) - - alloc_weights = trainer.get_val_alloc_weights() - - # Add daily returns for AttentionLSTM generated weights - evaluator.calc_pf_daily_rets(alloc_weights, model2_name) - except Exception as error: - print(f'DEBUG: Error while training {model2_name}. Skipping.', error) - - # Evaluation/Comparison starts here + train_val_losses_plot( + trainer.train_losses, trainer.val_losses, + model1_name + ' (Sharpe) Loss Curves', + plots_dir / (model1_name + '_Sharpe_Loss.png'), + ) + alloc_weights = trainer.get_val_alloc_weights() + evaluator.calc_pf_daily_rets(alloc_weights, model1_name + ' (Sharpe)') + all_strategy_weights[model1_name + ' (Sharpe)'] = alloc_weights + except Exception as error: + print(f'DEBUG: Error training {model1_name} (Sharpe). Skipping.', error) + + # ---- BaseLSTM (Composite S/R Loss) ---- + if 'baseLSTM_composite' in enabled: + model1c_name = 'BaseLSTM (Composite)' + print('\n', '-'*10, f' Training {model1c_name} ', '-'*10) + try: + trainer = Trainer( + model=BaseLSTM, + optimizer=optim.AdamW, + loss=composite_loss, + model_hparams=hparams_config['BaseLSTM']['model'], + optimizer_hparams=hparams_config['BaseLSTM']['optimizer'], + train_hparams=hparams_config['BaseLSTM']['train'], + in_size=X_train.shape[2], + num_stocks=y_train.shape[2], + fundamentals_train=fund_train, + fundamentals_val=fund_val, + ) + trainer.train(train_ds, val_ds) + trainer.evaluate(val_ds) + + train_val_losses_plot( + trainer.train_losses, trainer.val_losses, + model1c_name + ' Loss Curves', + plots_dir / 'BaseLSTM_Composite_Loss.png', + ) + alloc_weights = trainer.get_val_alloc_weights() + evaluator.calc_pf_daily_rets(alloc_weights, model1c_name) + all_strategy_weights[model1c_name] = alloc_weights + except Exception as error: + print(f'DEBUG: Error training {model1c_name}. Skipping.', error) + + # ---- AttentionLSTM (Composite S/R Loss) ---- + if 'attentionLSTM_composite' in enabled: + model2_name = 'AttentionLSTM (Composite)' + print('\n', '-'*10, f' Training {model2_name} ', '-'*10) + try: + trainer = Trainer( + model=AttentionLSTM, + optimizer=optim.AdamW, + loss=composite_loss, + model_hparams=hparams_config['AttentionLSTM']['model'], + optimizer_hparams=hparams_config['AttentionLSTM']['optimizer'], + train_hparams=hparams_config['AttentionLSTM']['train'], + in_size=X_train.shape[2], + num_stocks=y_train.shape[2], + fundamentals_train=fund_train, + fundamentals_val=fund_val, + ) + trainer.train(train_ds, val_ds) + trainer.evaluate(val_ds) + + train_val_losses_plot( + trainer.train_losses, trainer.val_losses, + model2_name + ' Loss Curves', + plots_dir / 'AttentionLSTM_Composite_Loss.png', + ) + alloc_weights = trainer.get_val_alloc_weights() + evaluator.calc_pf_daily_rets(alloc_weights, model2_name) + all_strategy_weights[model2_name] = alloc_weights + except Exception as error: + print(f'DEBUG: Error training {model2_name}. Skipping.', error) + + # ---- DeformTime (Composite S/R Loss) ---- + if 'deformTime_composite' in enabled: + model3_name = 'DeformTime (Composite)' + print('\n', '-'*10, f' Training {model3_name} ', '-'*10) + try: + trainer = Trainer( + model=DeformTime, + optimizer=optim.AdamW, + loss=composite_loss, + model_hparams=hparams_config['DeformTime']['model'], + optimizer_hparams=hparams_config['DeformTime']['optimizer'], + train_hparams=hparams_config['DeformTime']['train'], + in_size=X_train.shape[2], + num_stocks=y_train.shape[2], + fundamentals_train=fund_train, + fundamentals_val=fund_val, + ) + trainer.train(train_ds, val_ds) + trainer.evaluate(val_ds) + + train_val_losses_plot( + trainer.train_losses, trainer.val_losses, + model3_name + ' Loss Curves', + plots_dir / 'DeformTime_Composite_Loss.png', + ) + alloc_weights = trainer.get_val_alloc_weights() + evaluator.calc_pf_daily_rets(alloc_weights, model3_name) + all_strategy_weights[model3_name] = alloc_weights + except Exception as error: + print(f'DEBUG: Error training {model3_name}. Skipping.', error) + + # -------------------- CVaR Benchmark -------------------- # + if 'cvar' in enabled: + print('\n', '-'*10, ' CVaR Benchmark ', '-'*10) + try: + cvar_cfg = hparams_config.get('CVaRBenchmark', {}) + cvar_params = CVaRParams( + confidence=cvar_cfg.get('confidence', 0.95), + risk_aversion=cvar_cfg.get('risk_aversion', 1.0), + w_min=cvar_cfg.get('w_min', 0.0), + w_max=cvar_cfg.get('w_max', 0.30), + L_tar=cvar_cfg.get('L_tar', 1.6), + ) + cvar_bench = CVaRBenchmark(params=cvar_params) + + X_val_rets = _extract_returns_from_X(X_val, reshaper) + cvar_weights = cvar_bench.rolling_optimize(X_val_rets) + + evaluator.calc_pf_daily_rets(cvar_weights, 'CVaR Benchmark') + all_strategy_weights['CVaR Benchmark'] = cvar_weights + print(f'CVaR Benchmark weights shape: {cvar_weights.shape}') + except Exception as error: + print(f'DEBUG: CVaR Benchmark failed. Skipping.', error) + + # -------------------- Equal Weight -------------------- # evaluator.calc_eq_wt_daily_rets() - - evaluator.plot_windowed_comparison( - Path(paths_config['artifacts']['plots']) / - (f'Daily Returns' + '.png') - ) + eq_w = np.full((y_val.shape[0], y_val.shape[2]), 1.0 / y_val.shape[2]) + all_strategy_weights['Equal Weight'] = eq_w + + # -------------------- Windowed Comparison Plot -------------------- # + evaluator.plot_windowed_comparison(plots_dir / 'Daily_Returns.png') total_returns = evaluator.calc_total_performance('returns') total_sharpes = evaluator.calc_total_performance('sharpe') - print('\n', '-'*10, ' Portfolio Perfomance Metrics ', '-'*10) + print('\n', '-'*10, ' Portfolio Performance Metrics ', '-'*10) print('\n', 'Compounded returns for each window:\n', total_returns) - print('\n', 'Basic sharpe ratios for each window:\n', total_sharpes) \ No newline at end of file + print('\n', 'Basic sharpe ratios for each window:\n', total_sharpes) + + # -------------------- pyfolio Tearsheets -------------------- # + print('\n', '-'*10, ' Generating pyfolio Tearsheets ', '-'*10) + try: + val_dates = val_data.index if hasattr(val_data, 'index') else pd.RangeIndex(len(val_data)) + window_dates = build_window_dates(val_dates, good_starts_val, in_size, out_size) + + benchmark_col = 'sprtrn' + if benchmark_col in val_data.columns: + bench_rets = val_data[benchmark_col] + elif hasattr(val_data, 'index'): + bench_rets = pd.Series(0.0, index=val_data.index) + else: + bench_rets = pd.Series(0.0, index=pd.RangeIndex(len(val_data))) + + strategies: Dict[str, dict] = {} + for name, w_arr in all_strategy_weights.items(): + try: + pf_data = weights_to_pyfolio( + weights=w_arr, + returns=y_val, + tickers=tickers, + window_dates=window_dates, + benchmark_returns=bench_rets, + ) + strategies[name] = pf_data + except Exception as exc: + print(f'DEBUG: pyfolio conversion failed for {name}: {exc}') + + if strategies: + pyfolio_dir = plots_dir / 'pyfolio' + generate_comparison_tearsheets(strategies, pyfolio_dir) + summary = comparison_summary(strategies) + print('\n', '-'*10, ' Strategy Comparison Summary ', '-'*10) + print(summary.to_string()) + summary.to_csv(plots_dir / 'strategy_comparison.csv') + except Exception as error: + print(f'DEBUG: pyfolio tearsheets failed. Skipping.', error) \ No newline at end of file diff --git a/financial_loss_functions/src/training/train.py b/financial_loss_functions/src/training/train.py index c4eb5b85..9beb2e81 100644 --- a/financial_loss_functions/src/training/train.py +++ b/financial_loss_functions/src/training/train.py @@ -4,7 +4,7 @@ import pandas as pd import matplotlib.pyplot as plt from torch.utils.data import DataLoader -from typing import List, Dict, Callable +from typing import List, Dict, Callable, Optional from src.data_processing.dataset import WindowDataset if torch.mps.is_available(): @@ -31,7 +31,9 @@ def __init__( optimizer_hparams: Dict, # Specific to optimizer train_hparams: Dict, # Generic training params (epochs, batch_size, etc.) in_size: int, - num_stocks: int + num_stocks: int, + fundamentals_train: Optional[np.ndarray] = None, + fundamentals_val: Optional[np.ndarray] = None, ): """ Initialize Trainer instance to train given model. @@ -41,7 +43,7 @@ def __init__( @param optimizer torch.optim Pytorch optimization class to be used to loss optimization @param loss Callable - Custom loss function + Custom loss function (2-arg legacy or 4-arg CompositeSRLoss) @param model_hparams Dict Dictionary containing hyperparameters required for model initialization @param optimizer_hparams Dict @@ -51,7 +53,11 @@ def __init__( @param in_size int Size of input window @param num_stocks int - Number of stocks, i.e, number of output nodes + Number of stocks, i.e, number of output nodes + @param fundamentals_train np.ndarray (num_windows, N) optional + Pre-computed per-ticker fundamental scores for training windows + @param fundamentals_val np.ndarray (num_windows, N) optional + Pre-computed per-ticker fundamental scores for validation windows """ self.device = DEVICE print('Model hyperparameters:\n', model_hparams) @@ -66,13 +72,27 @@ def __init__( ).to(self.device) # Initialize optimizer with its specific hyperparameters + all_params = list(self.model.parameters()) + if hasattr(loss, 'parameters'): + all_params += list(loss.parameters()) self.optimizer = optimizer( - self.model.parameters(), + all_params, **optimizer_hparams ) + if hasattr(loss, 'to'): + loss = loss.to(self.device) self.loss = loss self.train_hparams = train_hparams + + self.fundamentals_train = ( + torch.tensor(fundamentals_train, dtype=torch.float32) + if fundamentals_train is not None else None + ) + self.fundamentals_val = ( + torch.tensor(fundamentals_val, dtype=torch.float32) + if fundamentals_val is not None else None + ) self.train_losses = [] self.val_losses = [] @@ -81,13 +101,57 @@ def __init__( self.val_alloc_weights = [] - def train(self, train_ds: WindowDataset): + def _call_loss(self, weights, yb, xb, sample_indices=None, fundamentals_source=None): + """ + Invoke the loss function with the correct signature. + + Legacy losses accept (weights, returns). + CompositeSRLoss accepts (weights, returns, features, fundamentals). + """ + fund_batch = None + if fundamentals_source is not None and sample_indices is not None: + fund_batch = fundamentals_source[sample_indices].to(self.device) + + try: + return self.loss(weights, yb, xb, fund_batch) + except TypeError: + return self.loss(weights, yb) + + def _validate_epoch(self, val_loader: DataLoader) -> float: + """Run one validation pass and return average loss.""" + self.model.eval() + total_loss, total_samples, sample_offset = 0.0, 0, 0 + with torch.no_grad(): + for xb, yb in val_loader: + b = xb.size(0) + xb, yb = xb.to(self.device), yb.to(self.device) + + indices = None + if self.fundamentals_val is not None: + indices = torch.arange(sample_offset, sample_offset + b) + + weights = self.model(xb) + loss = self._call_loss( + weights, yb, xb, + sample_indices=indices, + fundamentals_source=self.fundamentals_val, + ) + total_loss += loss.item() * b + total_samples += b + sample_offset += b + return total_loss / total_samples + + def train(self, train_ds: WindowDataset, val_ds: Optional[WindowDataset] = None): """ Train inistalized model using a train data split. @param train_ds WindowDataset Training data split converted to windowed dataset tensors + @param val_ds Optional[WindowDataset] + Validation data split. If provided, enables early stopping and LR scheduling. """ + import copy + start_time = time.time() train_loader = DataLoader( train_ds, @@ -95,18 +159,52 @@ def train(self, train_ds: WindowDataset): shuffle=False ) + val_loader = None + if val_ds is not None: + val_loader = DataLoader( + val_ds, + batch_size=self.train_hparams['val_batch_size'], + shuffle=False + ) + + # Early stopping and LR scheduling setup + es_patience = self.train_hparams.get('early_stopping_patience', 15) + lr_patience = self.train_hparams.get('lr_scheduler_patience', 7) + lr_factor = self.train_hparams.get('lr_scheduler_factor', 0.5) + + scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( + self.optimizer, mode='min', factor=lr_factor, + patience=lr_patience, min_lr=1e-6, + ) + + best_val_loss = float('inf') + best_model_state = None + patience_counter = 0 + for epoch in range(self.train_hparams['epochs']): epoch_start = time.time() self.model.train() total_loss_sum = 0.0 total_samples = 0 + sample_offset = 0 for xb, yb in train_loader: xb = xb.to(self.device) yb = yb.to(self.device) + batch_size = xb.size(0) + + indices = None + if self.fundamentals_train is not None: + indices = torch.arange( + sample_offset, sample_offset + batch_size + ) weights = self.model(xb) # (B, N) - loss = self.loss(weights, yb) + loss = self._call_loss( + weights, yb, xb, + sample_indices=indices, + fundamentals_source=self.fundamentals_train, + ) self.optimizer.zero_grad() loss.backward() @@ -116,20 +214,41 @@ def train(self, train_ds: WindowDataset): ) self.optimizer.step() - batch_size = xb.size(0) - total_loss_sum += loss.item() * batch_size total_samples += batch_size + sample_offset += batch_size - epoch_end = time.time() - epoch_time = round(epoch_end - epoch_start, 3) - epoch_avg_loss = total_loss_sum / total_samples self.train_losses.append(epoch_avg_loss) - print(f'Epoch {epoch} | Train Loss: {epoch_avg_loss:.4f} | Took: {epoch_time}s') - self.avg_train_loss = epoch_avg_loss + # Validation + early stopping + val_msg = '' + if val_loader is not None: + val_loss = self._validate_epoch(val_loader) + self.val_losses.append(val_loss) + scheduler.step(val_loss) + val_msg = f' | Val Loss: {val_loss:.4f} | LR: {self.optimizer.param_groups[0]["lr"]:.1e}' + + if val_loss < best_val_loss: + best_val_loss = val_loss + best_model_state = copy.deepcopy(self.model.state_dict()) + patience_counter = 0 + else: + patience_counter += 1 + + epoch_time = round(time.time() - epoch_start, 3) + print(f'Epoch {epoch} | Train Loss: {epoch_avg_loss:.4f}{val_msg} | Took: {epoch_time}s') + + if val_loader is not None and patience_counter >= es_patience: + print(f'Early stopping at epoch {epoch} (no improvement for {es_patience} epochs)') + break + + # Restore best model + if best_model_state is not None: + self.model.load_state_dict(best_model_state) + print(f'Restored best model (val loss: {best_val_loss:.4f})') + end_time = time.time() time_taken = round(end_time - start_time, 3) print(f'Average Train Loss: {self.avg_train_loss:.4f}, Time Taken: {time_taken}s') @@ -153,12 +272,22 @@ def evaluate(self, val_ds: WindowDataset): with torch.no_grad(): self.val_losses = [] total_loss, total_samples = 0.0, 0 + sample_offset = 0 for xb, yb in val_loader: b = xb.size(0) xb, yb = xb.to(self.device), yb.to(self.device) + + indices = None + if self.fundamentals_val is not None: + indices = torch.arange(sample_offset, sample_offset + b) + weights = self.model(xb) - loss = self.loss(weights, yb) + loss = self._call_loss( + weights, yb, xb, + sample_indices=indices, + fundamentals_source=self.fundamentals_val, + ) # detach & move to CPU BEFORE appending self.val_alloc_weights.append(weights.detach().cpu()) @@ -169,6 +298,7 @@ def evaluate(self, val_ds: WindowDataset): # --- accumulate weighted sum for overall avg --- total_loss += loss.item() * b total_samples += b + sample_offset += b # --- weighted average over all samples --- self.avg_val_loss = total_loss / total_samples diff --git a/financial_loss_functions/tests/unit/test_composite_loss.py b/financial_loss_functions/tests/unit/test_composite_loss.py new file mode 100644 index 00000000..99eabaa5 --- /dev/null +++ b/financial_loss_functions/tests/unit/test_composite_loss.py @@ -0,0 +1,417 @@ +""" +Unit tests for CompositeSRLoss and backward-compatible loss wrappers. +""" + +import torch +import pytest +import numpy as np + +from src.training.loss_functions import ( + CompositeSRLoss, + TimeframeImportanceFn, + MacroOverrideGate, + differentiable_sharpe_loss, + sharpe_loss_compat, + sortino_loss_compat, +) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +B, T_IN, T_OUT, N, F = 4, 20, 10, 5, 6 # batch, time-in, time-out, tickers, features + + +def _random_weights(): + w = torch.softmax(torch.randn(B, N), dim=-1) + w.requires_grad_(True) + return w + + +def _random_returns(): + return torch.randn(B, T_OUT, N) * 0.02 + + +def _random_features(): + return torch.randn(B, T_IN, N * F) + + +def _random_fundamentals(): + return torch.randn(B, N) + + +def _make_loss(**kwargs): + defaults = dict( + num_tickers=N, + num_features_per_ticker=F, + ret_feature_idx=2, + turnover_feature_idx=3, + illiq_feature_idx=1, + ba_spread_feature_idx=0, + macro_col_indices=[4, 5], + ) + defaults.update(kwargs) + return CompositeSRLoss(**defaults) + + +# --------------------------------------------------------------------------- +# Tests: CompositeSRLoss (original) +# --------------------------------------------------------------------------- + +class TestCompositeSRLoss: + + def test_forward_returns_scalar(self): + loss_fn = _make_loss() + w = _random_weights() + r = _random_returns() + f = _random_features() + fund = _random_fundamentals() + + loss = loss_fn(w, r, f, fund) + assert loss.dim() == 0, "Loss should be a scalar" + assert torch.isfinite(loss), "Loss should be finite" + + def test_gradients_flow_to_weights(self): + loss_fn = _make_loss() + w = _random_weights() + r = _random_returns() + f = _random_features() + + loss = loss_fn(w, r, f) + loss.backward() + + assert w.grad is not None, "Gradients must reach weights" + assert torch.isfinite(w.grad).all(), "Gradients must be finite" + + def test_regime_weights_receive_gradients(self): + loss_fn = _make_loss() + w = _random_weights() + r = _random_returns() + f = _random_features() + + loss = loss_fn(w, r, f) + loss.backward() + + assert loss_fn.regime_weights.grad is not None + assert torch.isfinite(loss_fn.regime_weights.grad).all() + + def test_reduces_to_sharpe_when_penalties_zero(self): + loss_fn = _make_loss(alpha=0.0, beta=0.0, gamma=0.0, delta=0.0) + w = _random_weights() + r = _random_returns() + f = _random_features() + + composite = loss_fn(w, r, f) + sharpe_only = differentiable_sharpe_loss(w, r) + + assert torch.allclose(composite, sharpe_only, atol=1e-6) + + def test_no_features_still_works(self): + """When features=None, only the Sharpe component should fire.""" + loss_fn = _make_loss() + w = _random_weights() + r = _random_returns() + + loss = loss_fn(w, r) + assert torch.isfinite(loss) + + def test_fundamentals_penalty_direction(self): + """Overweighting stocks with positive fundamentals should lower the loss.""" + loss_fn = _make_loss(alpha=0, beta=0, gamma=0, delta=1.0) + + fund = torch.ones(B, N) + w_overweight = torch.ones(B, N) * 0.5 + w_equal = torch.ones(B, N) * (1.0 / N) + + r = _random_returns() + + loss_over = loss_fn(w_overweight, r, fundamentals=fund) + loss_eq = loss_fn(w_equal, r, fundamentals=fund) + + assert loss_over.item() != loss_eq.item() + + def test_no_macro_indices(self): + loss_fn = _make_loss(macro_col_indices=None) + w = _random_weights() + r = _random_returns() + f = _random_features() + + loss = loss_fn(w, r, f) + assert torch.isfinite(loss) + + +# --------------------------------------------------------------------------- +# Tests: Backward-compatible wrappers +# --------------------------------------------------------------------------- + +class TestCompatWrappers: + + def test_sharpe_compat_ignores_extra_args(self): + w = _random_weights() + r = _random_returns() + f = _random_features() + + loss = sharpe_loss_compat(w, r, f, _random_fundamentals()) + expected = differentiable_sharpe_loss(w, r) + assert torch.allclose(loss, expected, atol=1e-7) + + def test_sortino_compat_ignores_extra_args(self): + w = _random_weights() + r = _random_returns() + + loss = sortino_loss_compat(w, r, _random_features()) + assert torch.isfinite(loss) + + +# --------------------------------------------------------------------------- +# Tests: Multi-Timeframe S/R Hierarchy +# --------------------------------------------------------------------------- + +class TestMultiTimeframeSR: + + def test_backward_compat_flag_off(self): + """With sr_use_multi_timeframe=False, loss matches existing behavior.""" + torch.manual_seed(42) + loss_fn_old = _make_loss(sr_use_multi_timeframe=False) + + torch.manual_seed(0) + w = _random_weights() + r = _random_returns() + f = _random_features() + + loss = loss_fn_old(w, r, f) + assert torch.isfinite(loss) + assert loss_fn_old.importance_fn is None + + def test_multi_tf_returns_scalar(self): + loss_fn = _make_loss(sr_use_multi_timeframe=True) + w = _random_weights() + r = _random_returns() + f = _random_features() + + loss = loss_fn(w, r, f) + assert loss.dim() == 0 + assert torch.isfinite(loss) + + def test_multi_tf_gradients_flow(self): + loss_fn = _make_loss(sr_use_multi_timeframe=True) + w = _random_weights() + r = _random_returns() + f = _random_features() + + loss = loss_fn(w, r, f) + loss.backward() + + assert w.grad is not None + assert torch.isfinite(w.grad).all() + # Importance MLP params should receive gradients + for p in loss_fn.importance_fn.parameters(): + assert p.grad is not None, "Importance fn params must get gradients" + + def test_importance_fn_monotonic_init(self): + """At init, importance should be non-decreasing for increasing lookbacks.""" + fn = TimeframeImportanceFn(hidden=8) + x = torch.linspace(0.0, 1.0, steps=10) + with torch.no_grad(): + y = fn(x) + diffs = y[1:] - y[:-1] + assert (diffs >= -1e-6).all(), "Importance should be non-decreasing at init" + + def test_pivot_detection_at_high(self): + """Monotonically increasing prices → scores near +1.""" + loss_fn = _make_loss(sr_use_multi_timeframe=True) + # Create strictly increasing prices + prices = torch.linspace(1.0, 2.0, steps=T_IN).unsqueeze(0).unsqueeze(-1) + prices = prices.expand(B, T_IN, N) + scores = loss_fn._detect_pivots(prices) # (B, W, N) + assert (scores >= 0.9).all(), "Increasing prices should be near +1 (resistance)" + + def test_pivot_detection_at_low(self): + """Monotonically decreasing prices → scores near -1.""" + loss_fn = _make_loss(sr_use_multi_timeframe=True) + prices = torch.linspace(2.0, 1.0, steps=T_IN).unsqueeze(0).unsqueeze(-1) + prices = prices.expand(B, T_IN, N) + scores = loss_fn._detect_pivots(prices) + assert (scores <= -0.9).all(), "Decreasing prices should be near -1 (support)" + + def test_custom_lookback_windows(self): + loss_fn = _make_loss( + sr_use_multi_timeframe=True, + sr_lookback_windows=[3, 7, 15], + ) + assert loss_fn.sr_windows == [3, 7, 15] + assert loss_fn.sr_lookback_normed.shape == (3,) + + def test_reduces_to_sharpe_when_alpha_zero_multi_tf(self): + loss_fn = _make_loss( + alpha=0.0, beta=0.0, gamma=0.0, delta=0.0, + sr_use_multi_timeframe=True, + ) + w = _random_weights() + r = _random_returns() + f = _random_features() + + composite = loss_fn(w, r, f) + sharpe_only = differentiable_sharpe_loss(w, r) + assert torch.allclose(composite, sharpe_only, atol=1e-5) + + +# --------------------------------------------------------------------------- +# Tests: Macro Override Gate +# --------------------------------------------------------------------------- + +class TestMacroOverrideGate: + + def test_gate_output_bounded(self): + gate = MacroOverrideGate(num_macro_features=3, hidden=8) + delta_macro = torch.randn(B, 3) + omega = gate(delta_macro) + assert (omega >= 0.0).all() and (omega <= 1.0).all() + + def test_gate_gradients_flow(self): + loss_fn = _make_loss(use_macro_override=True) + w = _random_weights() + r = _random_returns() + f = _random_features() + + loss = loss_fn(w, r, f) + loss.backward() + + for p in loss_fn.macro_override.parameters(): + assert p.grad is not None, "Override gate params must get gradients" + + def test_override_off_matches_baseline(self): + """use_macro_override=False → standard penalty weights.""" + loss_fn = _make_loss(use_macro_override=False) + assert loss_fn.macro_override is None + + w = _random_weights() + r = _random_returns() + f = _random_features() + + loss = loss_fn(w, r, f) + assert torch.isfinite(loss) + + def test_override_extreme_macro_boosts_gamma(self): + """With extreme macro delta, the macro penalty should dominate.""" + loss_fn = _make_loss( + use_macro_override=True, + alpha=0.10, beta=0.05, gamma=0.10, delta=0.10, + ) + w = _random_weights() + r = _random_returns() + f = _random_features() + # Make macro features have huge range to push omega toward 1 + for idx in [4, 5]: + for n_idx in range(N): + col = n_idx * F + idx + f[:, :, col] = torch.linspace(-100, 100, T_IN).unsqueeze(0) + + loss = loss_fn(w, r, f) + assert torch.isfinite(loss) + + +# --------------------------------------------------------------------------- +# Tests: Sector-Aware Penalty +# --------------------------------------------------------------------------- + +class TestSectorAware: + + def test_sector_ids_affect_penalty(self): + sector_ids = [0, 0, 1, 1, 2] + loss_fn_sec = _make_loss(sector_ids=sector_ids) + loss_fn_nosec = _make_loss() + + torch.manual_seed(99) + w = _random_weights() + r = _random_returns() + f = _random_features() + + loss_sec = loss_fn_sec(w, r, f) + loss_nosec = loss_fn_nosec(w, r, f) + + assert torch.isfinite(loss_sec) + # With sectors, the penalty scaling differs + assert not torch.allclose(loss_sec, loss_nosec, atol=1e-8) + + def test_no_sector_ids_unchanged(self): + loss_fn = _make_loss(sector_ids=None) + assert loss_fn.sector_ids is None + + w = _random_weights() + r = _random_returns() + f = _random_features() + + loss = loss_fn(w, r, f) + assert torch.isfinite(loss) + + +# --------------------------------------------------------------------------- +# Tests: Ticker Macro Sensitivity +# --------------------------------------------------------------------------- + +class TestTickerMacroSensitivity: + + def test_sensitivity_affects_macro_penalty(self): + sens = torch.tensor([0.1, 0.2, 0.8, 0.9, 1.0]) + loss_fn = _make_loss(ticker_macro_sensitivity=sens) + w = _random_weights() + r = _random_returns() + f = _random_features() + + loss = loss_fn(w, r, f) + assert torch.isfinite(loss) + + def test_no_sensitivity_unchanged(self): + loss_fn = _make_loss(ticker_macro_sensitivity=None) + assert loss_fn.ticker_macro_sensitivity is None + + +# --------------------------------------------------------------------------- +# Tests: Correlation Guard +# --------------------------------------------------------------------------- + +class TestCorrelationGuard: + + def test_corr_matrix_integration(self): + corr = torch.eye(N) + corr[0, 1] = 0.9 + corr[1, 0] = 0.9 + loss_fn = _make_loss( + sr_use_multi_timeframe=True, + corr_matrix=corr, + ) + w = _random_weights() + r = _random_returns() + f = _random_features() + + loss = loss_fn(w, r, f) + assert torch.isfinite(loss) + + def test_no_corr_matrix_unchanged(self): + loss_fn = _make_loss(corr_matrix=None) + assert loss_fn.corr_matrix is None + + def test_corr_requires_multi_tf(self): + """Correlation guard only activates when multi-TF is enabled.""" + corr = torch.eye(N) + loss_fn_no_tf = _make_loss( + sr_use_multi_timeframe=False, + corr_matrix=corr, + ) + loss_fn_tf = _make_loss( + sr_use_multi_timeframe=True, + corr_matrix=corr, + ) + + torch.manual_seed(42) + w = _random_weights() + r = _random_returns() + f = _random_features() + + loss_no_tf = loss_fn_no_tf(w, r, f) + loss_tf = loss_fn_tf(w, r, f) + + assert torch.isfinite(loss_no_tf) + assert torch.isfinite(loss_tf) diff --git a/financial_loss_functions/tests/unit/test_cvar_benchmark.py b/financial_loss_functions/tests/unit/test_cvar_benchmark.py new file mode 100644 index 00000000..960db1d7 --- /dev/null +++ b/financial_loss_functions/tests/unit/test_cvar_benchmark.py @@ -0,0 +1,54 @@ +""" +Unit tests for the CVaR benchmark optimizer. +""" + +import pytest +import numpy as np + + +class TestCVaRBenchmark: + + def _make_benchmark(self, **kwargs): + from src.models.cvar_benchmark import CVaRBenchmark, CVaRParams + params = CVaRParams(**kwargs) + return CVaRBenchmark(params=params) + + def test_optimize_returns_valid_weights(self): + bench = self._make_benchmark() + rng = np.random.default_rng(42) + returns = rng.normal(0.001, 0.02, size=(100, 5)) + + weights = bench.optimize(returns) + + assert weights.shape == (5,) + assert np.all(weights >= -1e-6), "Weights should be non-negative" + assert abs(weights.sum() - 1.0) < 1e-4, "Weights should sum to 1" + + def test_equal_weight_fallback_on_degenerate_input(self): + bench = self._make_benchmark(w_max=1.0) + returns = np.zeros((10, 5)) + + weights = bench.optimize(returns) + + assert weights.shape == (5,) + assert abs(weights.sum() - 1.0) < 1e-4 + + def test_rolling_optimize_shape(self): + bench = self._make_benchmark() + rng = np.random.default_rng(99) + X_returns = rng.normal(0.0, 0.01, size=(3, 50, 4)) + + result = bench.rolling_optimize(X_returns) + + assert result.shape == (3, 4) + for i in range(3): + assert abs(result[i].sum() - 1.0) < 1e-4 + + def test_respects_w_max_constraint(self): + bench = self._make_benchmark(w_max=0.25) + rng = np.random.default_rng(7) + returns = rng.normal(0.001, 0.02, size=(200, 8)) + + weights = bench.optimize(returns) + + assert np.all(weights <= 0.25 + 1e-4) diff --git a/financial_loss_functions/tests/unit/test_pyfolio_viz.py b/financial_loss_functions/tests/unit/test_pyfolio_viz.py new file mode 100644 index 00000000..0257aa31 --- /dev/null +++ b/financial_loss_functions/tests/unit/test_pyfolio_viz.py @@ -0,0 +1,80 @@ +""" +Unit tests for pyfolio data conversion utilities. +""" + +import numpy as np +import pandas as pd +import pytest + +from src.evaluation.pyfolio_viz import ( + weights_to_pyfolio, + build_window_dates, + comparison_summary, +) + + +class TestWeightsToPyfolio: + + def test_returns_series_has_correct_dates(self): + W, T_OUT, N = 2, 5, 3 + tickers = ["A", "B", "C"] + weights = np.full((W, N), 1.0 / N) + returns = np.random.default_rng(0).normal(0, 0.01, (W, T_OUT, N)) + + all_dates = pd.bdate_range("2020-01-01", periods=20) + window_dates = [all_dates[0:5], all_dates[5:10]] + bench = pd.Series(0.001, index=all_dates) + + result = weights_to_pyfolio(weights, returns, tickers, window_dates, bench) + + assert isinstance(result["returns"], pd.Series) + assert len(result["returns"]) == T_OUT * W + assert isinstance(result["positions"], pd.DataFrame) + assert "cash" in result["positions"].columns + + def test_positions_columns_match_tickers(self): + W, T_OUT, N = 1, 3, 2 + tickers = ["X", "Y"] + weights = np.array([[0.6, 0.4]]) + returns = np.random.default_rng(1).normal(0, 0.01, (W, T_OUT, N)) + + dates = pd.bdate_range("2021-06-01", periods=10) + window_dates = [dates[0:3]] + bench = pd.Series(0.0, index=dates) + + result = weights_to_pyfolio(weights, returns, tickers, window_dates, bench) + + pos_cols = set(result["positions"].columns) + assert {"X", "Y", "cash"}.issubset(pos_cols) + + +class TestBuildWindowDates: + + def test_correct_number_of_windows(self): + full_idx = pd.bdate_range("2020-01-01", periods=100) + starts = np.array([0, 10, 20]) + in_size, out_size = 30, 10 + + wd = build_window_dates(full_idx, starts, in_size, out_size) + + assert len(wd) == 3 + for dates in wd: + assert len(dates) == out_size + + +class TestComparisonSummary: + + def test_summary_columns(self): + dates = pd.bdate_range("2020-01-01", periods=50) + rets = pd.Series(np.random.default_rng(5).normal(0.001, 0.02, 50), index=dates) + + strategies = { + "A": {"returns": rets, "benchmark_rets": rets * 0}, + } + + summary = comparison_summary(strategies) + + assert "total_return" in summary.columns + assert "sharpe_ratio" in summary.columns + assert "max_drawdown" in summary.columns + assert summary.index[0] == "A" diff --git a/financial_loss_functions/tests/unit/test_sec_filings.py b/financial_loss_functions/tests/unit/test_sec_filings.py new file mode 100644 index 00000000..43d78449 --- /dev/null +++ b/financial_loss_functions/tests/unit/test_sec_filings.py @@ -0,0 +1,102 @@ +""" +Unit tests for SEC filing feature extraction utilities. + +These tests exercise the pure computation functions without requiring +a live edgartools connection or SEC API access. +""" + +import pytest +import numpy as np +import pandas as pd + +from src.data_collection.sec_filings import ( + _compute_event_signal, + compute_composite_fundamental_scores, + FUNDAMENTAL_WEIGHTS, +) + + +class TestEventSignal: + + def _mock_filing(self, date_str): + class _F: + filing_date = date_str + return _F() + + def test_event_within_window(self): + idx = pd.bdate_range("2020-01-01", "2020-01-31") + filings = [self._mock_filing("2020-01-15")] + + signal = _compute_event_signal(filings, idx, window_days=2) + + assert signal.loc[pd.Timestamp("2020-01-15")] == 1.0 + assert signal.loc[pd.Timestamp("2020-01-13")] == 1.0 + assert signal.loc[pd.Timestamp("2020-01-10")] == 0.0 + + def test_no_filings_returns_zeros(self): + idx = pd.bdate_range("2020-06-01", "2020-06-30") + signal = _compute_event_signal([], idx) + assert (signal == 0.0).all() + + +class TestCompositeScores: + + def test_shape_and_columns(self): + tickers = ["A", "B"] + idx = pd.bdate_range("2020-01-01", "2020-01-10") + funds = { + "A": pd.DataFrame( + {"revenue_growth": 0.1, "operating_margin": 0.2, + "debt_to_equity": 1.0, "fcf_yield": 0.05, + "event_signal": 0.0}, + index=idx, + ), + "B": pd.DataFrame( + {"revenue_growth": -0.1, "operating_margin": 0.1, + "debt_to_equity": 2.0, "fcf_yield": -0.02, + "event_signal": 1.0}, + index=idx, + ), + } + + scores = compute_composite_fundamental_scores(funds, tickers, idx) + + assert list(scores.columns) == tickers + assert len(scores) == len(idx) + assert scores.isna().sum().sum() == 0 + + def test_positive_growth_yields_higher_score(self): + tickers = ["GOOD", "BAD"] + idx = pd.bdate_range("2020-01-01", "2020-01-05") + funds = { + "GOOD": pd.DataFrame( + {"revenue_growth": 0.5, "operating_margin": 0.3, + "debt_to_equity": 0.5, "fcf_yield": 0.10, + "event_signal": 0.0}, + index=idx, + ), + "BAD": pd.DataFrame( + {"revenue_growth": -0.5, "operating_margin": -0.1, + "debt_to_equity": 5.0, "fcf_yield": -0.10, + "event_signal": 0.0}, + index=idx, + ), + } + + scores = compute_composite_fundamental_scores(funds, tickers, idx) + assert (scores["GOOD"] > scores["BAD"]).all() + + def test_missing_ticker_defaults_to_zero(self): + tickers = ["X", "Y"] + idx = pd.bdate_range("2020-03-01", "2020-03-05") + funds = { + "X": pd.DataFrame( + {"revenue_growth": 0.1, "operating_margin": 0.2, + "debt_to_equity": 1.0, "fcf_yield": 0.05, + "event_signal": 0.0}, + index=idx, + ), + } + + scores = compute_composite_fundamental_scores(funds, tickers, idx) + assert "Y" in scores.columns