Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 57 additions & 2 deletions financial_loss_functions/config/hparams.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
"train_batch_size": 256,
"val_batch_size": 1,
"clip_grad_norm": 0.5,
"epochs": 100
"epochs": 300,
"early_stopping_patience": 15,
"lr_scheduler_patience": 7,
"lr_scheduler_factor": 0.5
}
},
"AttentionLSTM": {
Expand All @@ -35,7 +38,59 @@
"train_batch_size": 256,
"val_batch_size": 1,
"clip_grad_norm": 0.5,
"epochs": 100
"epochs": 300,
"early_stopping_patience": 15,
"lr_scheduler_patience": 7,
"lr_scheduler_factor": 0.5
}
},
"DeformTime": {
"model": {
"max_seq_len": 200,
"e_layers": 3,
"d_layers": 2,
"d_model": 64,
"nheads": 4,
"kernel_size": 3,
"dropout": 0.2,
"n_reshape": 2,
"patch_len": 16,
"stride": 8
},
"optimizer": {
"lr": 1e-4,
"weight_decay": 3e-4
},
"train": {
"train_batch_size": 256,
"val_batch_size": 1,
"clip_grad_norm": 0.5,
"epochs": 300,
"early_stopping_patience": 15,
"lr_scheduler_patience": 7,
"lr_scheduler_factor": 0.5
}
},
"CompositeSRLoss": {
"alpha": 0.03,
"beta": 0.02,
"gamma": 0.05,
"delta": 0.01,
"psych_sigma": 1.5,
"psych_thresholds": [0.0, 2.0, -2.0, 5.0, -5.0, 8.0, -8.0, 12.0, -12.0],
"ema_span": 10,
"sr_use_multi_timeframe": true,
"sr_lookback_windows": [5, 10, 21, 42, 63, 105],
"sr_pivot_threshold": 0.02,
"sr_importance_hidden": 8,
"use_macro_override": true,
"macro_override_hidden": 8
},
"CVaRBenchmark": {
"confidence": 0.95,
"risk_aversion": 1.0,
"w_min": 0.0,
"w_max": 0.30,
"L_tar": 1.6
}
}
4 changes: 3 additions & 1 deletion financial_loss_functions/config/paths.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"raw_dir": "data/raw/",
"processed_dir": "data/processed/",
"raw_macro_dir": "data/raw/macro/",
"sec_filings_dir": "data/raw/sec_filings/",
"crsp_dir": ""
},
"raw_files": {
Expand All @@ -28,6 +29,7 @@
"artifacts": {
"artifact_dir": "artifacts/",
"results": "artifacts/results/",
"plots": "artifacts/results/plots/"
"plots": "artifacts/results/plots/",
"pyfolio_output": "artifacts/results/plots/pyfolio/"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2650,7 +2650,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"id": "757fab41",
"metadata": {
"execution": {
Expand Down Expand Up @@ -2725,7 +2725,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"id": "55af6918",
"metadata": {
"execution": {
Expand Down Expand Up @@ -2790,7 +2790,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"id": "de47886a",
"metadata": {
"execution": {
Expand Down Expand Up @@ -2850,7 +2850,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": null,
"id": "c5ed9f54",
"metadata": {
"execution": {
Expand Down Expand Up @@ -2916,7 +2916,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": null,
"id": "b46ff82a",
"metadata": {
"execution": {
Expand Down Expand Up @@ -3029,7 +3029,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"id": "a94d8a19",
"metadata": {
"execution": {
Expand Down Expand Up @@ -3098,7 +3098,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": null,
"id": "610307a0",
"metadata": {
"execution": {
Expand Down Expand Up @@ -3197,7 +3197,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": null,
"id": "968d3180",
"metadata": {
"execution": {
Expand Down Expand Up @@ -3312,7 +3312,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.9"
"version": "3.13.5"
}
},
"nbformat": 4,
Expand Down
6 changes: 5 additions & 1 deletion financial_loss_functions/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ fredapi==0.5.2
numpy==2.3.4
pandas==2.3.3
cvxopt==1.3.2
cvxpy
scikit-learn==1.7.2
scipy==1.16.3
pytest==8.4.2
Expand All @@ -12,4 +13,7 @@ seaborn==0.13.0
statsmodels==0.14.5
torch==2.9.1
torchvision==0.24.1
optuna==4.6.0
optuna==4.6.0
edgartools
pyfolio-reloaded
empyrical-reloaded
75 changes: 75 additions & 0 deletions financial_loss_functions/scripts/run_sec_collection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""
CLI entry point for SEC filing data collection.

Usage:
python -m scripts.run_sec_collection [--identity "Name email@domain.com"]
"""

import argparse
import json
import logging
from pathlib import Path

import pandas as pd

from scripts.utils import load_config
from src.data_collection.sec_filings import run_sec_filing_pipeline

logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
logger = logging.getLogger(__name__)


def main():
parser = argparse.ArgumentParser(description="Fetch SEC filings and compute fundamental features")
parser.add_argument(
"--identity",
default="FinLossFunctions research@example.com",
help="SEC EDGAR identity string (Name + email)",
)
parser.add_argument(
"--output-dir",
default=None,
help="Override output directory for cached Parquet files",
)
args = parser.parse_args()

paths_config = load_config("paths")

sector_path = Path("config/sector_classification.json")
with open(sector_path, "r") as f:
sector_map = json.load(f)
tickers = sorted(sector_map.keys())
logger.info("Found %d tickers from sector classification", len(tickers))

crsp_dir = Path(paths_config["data"]["crsp_dir"])
if not crsp_dir.exists():
crsp_dir = Path(paths_config["data"]["raw_dir"]) / "sample"

train_file = crsp_dir / paths_config["raw_files"]["train"]
if not train_file.exists():
logger.error("Training data not found at %s", train_file)
return

train_df = pd.read_csv(train_file)
if "date" in train_df.columns:
train_df["date"] = pd.to_datetime(train_df["date"])
train_df = train_df.set_index("date")

cache_dir = Path(args.output_dir) if args.output_dir else Path(
paths_config.get("data", {}).get("sec_filings_dir", "data/raw/sec_filings")
)

scores = run_sec_filing_pipeline(
tickers=tickers,
target_index=train_df.index,
cache_dir=cache_dir,
identity=args.identity,
)

output_path = cache_dir / "composite_fundamental_scores.csv"
scores.to_csv(output_path)
logger.info("Composite scores saved to %s", output_path)


if __name__ == "__main__":
main()
16 changes: 13 additions & 3 deletions financial_loss_functions/scripts/run_training.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
import argparse
import os
# from dotenv import load_dotenv
from scripts.utils import load_path_config, load_config
from src.training.pipeline import run_training_pipeline
from src.training.pipeline import ALL_MODELS, run_training_pipeline

if __name__ == '__main__':
# load_dotenv()

paths_config = load_path_config(os.path.join('config', 'paths.json'))
parser = argparse.ArgumentParser(description="Run training pipeline")
parser.add_argument(
'--models',
nargs='+',
choices=list(ALL_MODELS.keys()),
default=None,
help=f"Models to train. Choices: {list(ALL_MODELS.keys())}. Default: all",
)
args = parser.parse_args()

paths_config = load_path_config(os.path.join('config', 'paths.json'))
hparams_config = load_config(os.path.join('config', 'hparams.json'))

run_training_pipeline(paths_config, hparams_config)
run_training_pipeline(paths_config, hparams_config, models=args.models)
Loading
Loading