Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion benchmarks/liang/README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,39 @@
# Benchmarks
This folder contains code to replicate the benchmarks from the paper.
This folder contains code to replicate the benchmarks from the paper.


```
# require to create the dataset before
mkdir plot data
NB_FEATURE=100
# for // use the script: sim_parallel.py 0 100 100 100 --reset --nthreads 2
for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE; done
for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE --cv 10; done
for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE --robust 10; done
for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE --cv 10 --robust 10; done
python sim_liang_agg.py # aggregate all the simulated data

# for adding result with shappley value
for trial in {0..100}; do python sim_shapley.py $trial $NB_FEATURE; done

# create robust: sweep_robust / require normal sim for having X, Y, trust
for trial in {0..100}; do python sim_robust.py $trial $NB_FEATURE --reset-models; done
python sim_robust_agg.py # aggregate the result


# require result cv, sweep_robust and normal
python sim_agg.py

# Predictor:
python sim_predictors.py 0 100 0 100 --reset --nthreads 8 # create data
python sim_predictors_agg.py # aggregate data
python sim_predictors_importance.py
python sim_predictors_order.py

# require require predictor
python sim_knockoffs.py

# helper function: sim_model.py


```
6 changes: 3 additions & 3 deletions benchmarks/liang/sim_knockoffs.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ def run(trial):
ModelInfo(trial, 'Random Forest', None, 'rf')
]

folds = get_model(infos[0], X, y, None, False).folds
folds = get_model(infos[0], X, y, [], False).folds
models = [get_model(info, X, y, folds, False) for info in infos]

# Get the knockoffs for the OLS and neural net models
LINEAR_PATH = 'data/{}/cv_linear.pt'.format(trial)
NONLINEAR_PATH = 'data/{}/cv_nonlinear.pt'.format(trial)
ols_model = torch.load(LINEAR_PATH)
nn_model = torch.load(NONLINEAR_PATH)
ols_model = torch.load(LINEAR_PATH, weights_only=False)
nn_model = torch.load(NONLINEAR_PATH, weights_only=False)
models.append(ols_model)
models.append(nn_model)
infos.append(ModelInfo(trial, 'OLS', None, 'linear'))
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/liang/sim_liang.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ def run(trial, feature, reset, cv, robust):

# Load the checkpoint if available
if not reset and os.path.exists(LINEAR_PATH):
linear_model = torch.load(LINEAR_PATH)
nonlinear_model = torch.load(NONLINEAR_PATH)
linear_model = torch.load(LINEAR_PATH, weights_only=False)
nonlinear_model = torch.load(NONLINEAR_PATH, weights_only=False)
else:
# Train the model
print('Fitting models with N={} P={} S={} T={} nperms={}'.format(N, P, S, T, nperms))
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/liang/sim_liang_agg.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def bounds_plot(bounds):
plt.rc('axes', lw=2)
lower = bounds[:,:,0][~np.isnan(bounds[:,:,0])].flatten()
upper = bounds[:,:,1][~np.isnan(bounds[:,:,1])].flatten()
plt.hist(lower, label='Lower band', color='blue', bins=np.linspace(0,50,51), normed=True)
plt.hist(upper, label='Upper band', color='orange', bins=np.linspace(50,100,51), normed=True)
plt.hist(lower, label='Lower band', color='blue', bins=np.linspace(0,50,51), density=True)
plt.hist(upper, label='Upper band', color='orange', bins=np.linspace(50,100,51), density=True)
plt.xlabel('Band value', fontsize=18, weight='bold')
plt.ylabel('Proportion', fontsize=18, weight='bold')
plt.legend(loc='upper right')
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/liang/sim_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def fit_nn(X, y, nepochs=100, batch_size=10, val_pct=0.1,
if verbose:
print('Validation loss: {} Best: {}'.format(val_losses[epoch], best_loss))

model = torch.load(tmp_file)
model = torch.load(tmp_file, weights_only=False)
os.remove(tmp_file)
return model

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/liang/sim_predictors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numpy as np
import torch
from sim_liang import load_or_create_dataset
from sklearn.externals import joblib
import joblib
from pyhrt.utils import create_folds
from pyhrt.hrt import hrt

Expand Down Expand Up @@ -125,7 +125,7 @@ def get_r2(trial, info):
return np.load(r2_path + '.npy')
from sklearn.metrics import r2_score
X, y, truth = load_or_create_dataset(trial, None, None, None)
model = get_model(info, X, y, None, False)
model = get_model(info, X, y, [], False)
y_pred = model.predict(X)
score = r2_score(y, y_pred)
np.save(r2_path, score)
Expand Down
1 change: 1 addition & 0 deletions benchmarks/liang/sim_predictors_agg.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ def r2_scatter(tpr_vals, r2_vals, names):
for info in infos:
r2_scores[info.name].append(get_r2(trial, info))
all_p_filename = 'data/{}/{}.npy'.format(trial, info.prefix)
print('data/{}/{}.npy'.format(trial, info.prefix))
if not os.path.exists(all_p_filename):
np.save(all_p_filename, np.full(P, np.nan))
p_values[info.name][trial] = np.load(all_p_filename)
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/liang/sim_predictors_importance.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def rf_importance(models):
ModelInfo(trial, 'Elastic Net', None, 'enet'),
ModelInfo(trial, 'Lasso', None, 'lasso')]

models = [get_model(info, None, None, None, False) for info in infos]
models = [get_model(info, None, None, [], False) for info in infos]

# Load the p-values for the predictor models
for info, model in zip(infos, models):
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/liang/sim_shapley.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def main():
X = np.loadtxt(X_PATH, delimiter=',')
y = np.loadtxt(Y_PATH, delimiter=',')
truth = np.loadtxt(TRUTH_PATH, delimiter=',')
nonlinear_model = torch.load(NONLINEAR_PATH)
nonlinear_model = torch.load(NONLINEAR_PATH, weights_only=False)
yhat = nonlinear_model.predict(X)

# Check if all of the results have already been generated and compiled
Expand Down
2 changes: 1 addition & 1 deletion pyhrt/continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def fit_mdn(X, y, ncomponents=5,
if verbose:
print('Validation loss: {} Best: {}'.format(val_losses[epoch], best_loss))

model = torch.load(tmp_file)
model = torch.load(tmp_file, weights_only=False)
os.remove(tmp_file)
return model

Expand Down