travisdesell · v-atharva · Mar 27, 2026 · Mar 29, 2026 · Mar 30, 2026
diff --git a/financial_loss_functions/config/hparams.json b/financial_loss_functions/config/hparams.json
@@ -18,7 +18,10 @@
             "train_batch_size": 256,
             "val_batch_size": 1,
             "clip_grad_norm": 0.5,
-            "epochs": 100
+            "epochs": 300,
+            "early_stopping_patience": 15,
+            "lr_scheduler_patience": 7,
+            "lr_scheduler_factor": 0.5
         }
     },
     "AttentionLSTM": {
@@ -35,7 +38,59 @@
             "train_batch_size": 256,
             "val_batch_size": 1,
             "clip_grad_norm": 0.5,
-            "epochs": 100
+            "epochs": 300,
+            "early_stopping_patience": 15,
+            "lr_scheduler_patience": 7,
+            "lr_scheduler_factor": 0.5
         }
+    },
+    "DeformTime": {
+        "model": {
+            "max_seq_len": 200,
+            "e_layers": 3,
+            "d_layers": 2,
+            "d_model": 64,
+            "nheads": 4,
+            "kernel_size": 3,
+            "dropout": 0.2,
+            "n_reshape": 2,
+            "patch_len": 16,
+            "stride": 8
+        },
+        "optimizer": {
+            "lr": 1e-4,
+            "weight_decay": 3e-4
+        },
+        "train": {
+            "train_batch_size": 256,
+            "val_batch_size": 1,
+            "clip_grad_norm": 0.5,
+            "epochs": 300,
+            "early_stopping_patience": 15,
+            "lr_scheduler_patience": 7,
+            "lr_scheduler_factor": 0.5
+        }
+    },
+    "CompositeSRLoss": {
+        "alpha": 0.03,
+        "beta": 0.02,
+        "gamma": 0.05,
+        "delta": 0.01,
+        "psych_sigma": 1.5,
+        "psych_thresholds": [0.0, 2.0, -2.0, 5.0, -5.0, 8.0, -8.0, 12.0, -12.0],
+        "ema_span": 10,
+        "sr_use_multi_timeframe": true,
+        "sr_lookback_windows": [5, 10, 21, 42, 63, 105],
+        "sr_pivot_threshold": 0.02,
+        "sr_importance_hidden": 8,
+        "use_macro_override": true,
+        "macro_override_hidden": 8
+    },
+    "CVaRBenchmark": {
+        "confidence": 0.95,
+        "risk_aversion": 1.0,
+        "w_min": 0.0,
+        "w_max": 0.30,
+        "L_tar": 1.6
     }
 }
diff --git a/financial_loss_functions/config/paths.json b/financial_loss_functions/config/paths.json
@@ -4,6 +4,7 @@
         "raw_dir": "data/raw/",
         "processed_dir": "data/processed/",
         "raw_macro_dir": "data/raw/macro/",
+        "sec_filings_dir": "data/raw/sec_filings/",
         "crsp_dir": ""
     },
     "raw_files": {
@@ -28,6 +29,7 @@
     "artifacts": {
         "artifact_dir": "artifacts/",
         "results": "artifacts/results/",
-        "plots": "artifacts/results/plots/"
+        "plots": "artifacts/results/plots/",
+        "pyfolio_output": "artifacts/results/plots/pyfolio/"
     }
 }
diff --git a/financial_loss_functions/exploration/feature_selection_findings.ipynb b/financial_loss_functions/exploration/feature_selection_findings.ipynb
@@ -2650,7 +2650,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "id": "757fab41",
    "metadata": {
     "execution": {
@@ -2725,7 +2725,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "55af6918",
    "metadata": {
     "execution": {
@@ -2790,7 +2790,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "id": "de47886a",
    "metadata": {
     "execution": {
@@ -2850,7 +2850,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "id": "c5ed9f54",
    "metadata": {
     "execution": {
@@ -2916,7 +2916,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "id": "b46ff82a",
    "metadata": {
     "execution": {
@@ -3029,7 +3029,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "id": "a94d8a19",
    "metadata": {
     "execution": {
@@ -3098,7 +3098,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "id": "610307a0",
    "metadata": {
     "execution": {
@@ -3197,7 +3197,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "id": "968d3180",
    "metadata": {
     "execution": {
@@ -3312,7 +3312,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.13.9"
+   "version": "3.13.5"
   }
  },
  "nbformat": 4,

diff --git a/financial_loss_functions/requirements.txt b/financial_loss_functions/requirements.txt
@@ -3,6 +3,7 @@ fredapi==0.5.2
 numpy==2.3.4
 pandas==2.3.3
 cvxopt==1.3.2
+cvxpy
 scikit-learn==1.7.2
 scipy==1.16.3
 pytest==8.4.2
@@ -12,4 +13,7 @@ seaborn==0.13.0
 statsmodels==0.14.5
 torch==2.9.1
 torchvision==0.24.1
-optuna==4.6.0
+optuna==4.6.0
+edgartools
+pyfolio-reloaded
+empyrical-reloaded
diff --git a/financial_loss_functions/scripts/run_sec_collection.py b/financial_loss_functions/scripts/run_sec_collection.py
@@ -0,0 +1,75 @@
+"""
+CLI entry point for SEC filing data collection.
+
+Usage:
+    python -m scripts.run_sec_collection [--identity "Name email@domain.com"]
+"""
+
+import argparse
+import json
+import logging
+from pathlib import Path
+
+import pandas as pd
+
+from scripts.utils import load_config
+from src.data_collection.sec_filings import run_sec_filing_pipeline
+
+logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
+logger = logging.getLogger(__name__)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Fetch SEC filings and compute fundamental features")
+    parser.add_argument(
+        "--identity",
+        default="FinLossFunctions research@example.com",
+        help="SEC EDGAR identity string (Name + email)",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default=None,
+        help="Override output directory for cached Parquet files",
+    )
+    args = parser.parse_args()
+
+    paths_config = load_config("paths")
+
+    sector_path = Path("config/sector_classification.json")
+    with open(sector_path, "r") as f:
+        sector_map = json.load(f)
+    tickers = sorted(sector_map.keys())
+    logger.info("Found %d tickers from sector classification", len(tickers))
+
+    crsp_dir = Path(paths_config["data"]["crsp_dir"])
+    if not crsp_dir.exists():
+        crsp_dir = Path(paths_config["data"]["raw_dir"]) / "sample"
+
+    train_file = crsp_dir / paths_config["raw_files"]["train"]
+    if not train_file.exists():
+        logger.error("Training data not found at %s", train_file)
+        return
+
+    train_df = pd.read_csv(train_file)
+    if "date" in train_df.columns:
+        train_df["date"] = pd.to_datetime(train_df["date"])
+        train_df = train_df.set_index("date")
+
+    cache_dir = Path(args.output_dir) if args.output_dir else Path(
+        paths_config.get("data", {}).get("sec_filings_dir", "data/raw/sec_filings")
+    )
+
+    scores = run_sec_filing_pipeline(
+        tickers=tickers,
+        target_index=train_df.index,
+        cache_dir=cache_dir,
+        identity=args.identity,
+    )
+
+    output_path = cache_dir / "composite_fundamental_scores.csv"
+    scores.to_csv(output_path)
+    logger.info("Composite scores saved to %s", output_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/financial_loss_functions/scripts/run_training.py b/financial_loss_functions/scripts/run_training.py
@@ -1,13 +1,23 @@
+import argparse
 import os
 # from dotenv import load_dotenv
 from scripts.utils import load_path_config, load_config
-from src.training.pipeline import run_training_pipeline
+from src.training.pipeline import ALL_MODELS, run_training_pipeline
 
 if __name__ == '__main__':
     # load_dotenv()
 
-    paths_config = load_path_config(os.path.join('config', 'paths.json'))
+    parser = argparse.ArgumentParser(description="Run training pipeline")
+    parser.add_argument(
+        '--models',
+        nargs='+',
+        choices=list(ALL_MODELS.keys()),
+        default=None,
+        help=f"Models to train. Choices: {list(ALL_MODELS.keys())}. Default: all",
+    )
+    args = parser.parse_args()
 
+    paths_config = load_path_config(os.path.join('config', 'paths.json'))
     hparams_config = load_config(os.path.join('config', 'hparams.json'))
 
-    run_training_pipeline(paths_config, hparams_config)
+    run_training_pipeline(paths_config, hparams_config, models=args.models)