-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathload_data.py
More file actions
36 lines (29 loc) · 1.09 KB
/
load_data.py
File metadata and controls
36 lines (29 loc) · 1.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import yfinance as yf
import pandas as pd
import os
FILENAME = "market_data.parquet" # Parquet is much faster and smaller than CSV
def get_data(tickers, start_date, end_date):
# Check if we already have the file
if os.path.exists(FILENAME):
print("Loading data from local disk...")
return pd.read_parquet(FILENAME)
# Otherwise, download it
print("Downloading data from yfinance...")
df = yf.download(tickers, start=start_date, end=end_date, auto_adjust=True)
if not df.empty:
# Save it for next time
df.to_parquet(FILENAME)
return df
else:
raise ValueError("Download failed and no local file found.")
# Usage
tickers = ["AAPL", "MSFT", "GOOGL", "AMZN", "NVDA", "META", "TSLA"]
sector_map = {}
for ticker in tickers:
try:
sector_map[ticker] = yf.Ticker(ticker).info.get('sector', 'Unknown')
except Exception:
sector_map[ticker] = 'Unknown'
df = get_data(tickers, "2013-01-01", "2024-12-31")
df = df.stack(level=1).rename_axis(['Date', 'Ticker']).sort_index() # cleaner for returns
g = df.groupby('Ticker')