From 27759bf2625253c37cb22d2cc2c5db387d02cb0f Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Wed, 18 Jun 2025 14:27:57 +0200 Subject: [PATCH 1/2] update the script with the new pacakges --- benchmarks/liang/README.md | 17 ++++++++++++++++- benchmarks/liang/sim_knockoffs.py | 2 +- benchmarks/liang/sim_liang.py | 4 ++-- benchmarks/liang/sim_liang_agg.py | 4 ++-- benchmarks/liang/sim_model.py | 2 +- benchmarks/liang/sim_predictors.py | 4 ++-- pyhrt/continuous.py | 2 +- 7 files changed, 25 insertions(+), 10 deletions(-) diff --git a/benchmarks/liang/README.md b/benchmarks/liang/README.md index 2c28510..76ed3e2 100644 --- a/benchmarks/liang/README.md +++ b/benchmarks/liang/README.md @@ -1,2 +1,17 @@ # Benchmarks -This folder contains code to replicate the benchmarks from the paper. \ No newline at end of file +This folder contains code to replicate the benchmarks from the paper. + + +``` +# require to create the dataset before +mkdir plot data +NB_FEATURE=100 +for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE; done +for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE --cv 10; done +for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE --robust 10; done +for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE --cv 10 --robust 10; done + +python sim_liang_agg.py +python sim_liang_model.py + +``` \ No newline at end of file diff --git a/benchmarks/liang/sim_knockoffs.py b/benchmarks/liang/sim_knockoffs.py index f0cf751..a1c8aba 100644 --- a/benchmarks/liang/sim_knockoffs.py +++ b/benchmarks/liang/sim_knockoffs.py @@ -25,7 +25,7 @@ def run(trial): ModelInfo(trial, 'Random Forest', None, 'rf') ] - folds = get_model(infos[0], X, y, None, False).folds + folds = get_model(infos[0], X, y, [], False).folds models = [get_model(info, X, y, folds, False) for info in infos] # Get the knockoffs for the OLS and neural net models diff --git a/benchmarks/liang/sim_liang.py b/benchmarks/liang/sim_liang.py index 06cdb53..a31c342 100644 --- a/benchmarks/liang/sim_liang.py +++ b/benchmarks/liang/sim_liang.py @@ -116,8 +116,8 @@ def run(trial, feature, reset, cv, robust): # Load the checkpoint if available if not reset and os.path.exists(LINEAR_PATH): - linear_model = torch.load(LINEAR_PATH) - nonlinear_model = torch.load(NONLINEAR_PATH) + linear_model = torch.load(LINEAR_PATH, weights_only=False) + nonlinear_model = torch.load(NONLINEAR_PATH, weights_only=False) else: # Train the model print('Fitting models with N={} P={} S={} T={} nperms={}'.format(N, P, S, T, nperms)) diff --git a/benchmarks/liang/sim_liang_agg.py b/benchmarks/liang/sim_liang_agg.py index 03680cd..f8945d4 100644 --- a/benchmarks/liang/sim_liang_agg.py +++ b/benchmarks/liang/sim_liang_agg.py @@ -36,8 +36,8 @@ def bounds_plot(bounds): plt.rc('axes', lw=2) lower = bounds[:,:,0][~np.isnan(bounds[:,:,0])].flatten() upper = bounds[:,:,1][~np.isnan(bounds[:,:,1])].flatten() - plt.hist(lower, label='Lower band', color='blue', bins=np.linspace(0,50,51), normed=True) - plt.hist(upper, label='Upper band', color='orange', bins=np.linspace(50,100,51), normed=True) + plt.hist(lower, label='Lower band', color='blue', bins=np.linspace(0,50,51), density=True) + plt.hist(upper, label='Upper band', color='orange', bins=np.linspace(50,100,51), density=True) plt.xlabel('Band value', fontsize=18, weight='bold') plt.ylabel('Proportion', fontsize=18, weight='bold') plt.legend(loc='upper right') diff --git a/benchmarks/liang/sim_model.py b/benchmarks/liang/sim_model.py index 64d57e2..402bb8e 100644 --- a/benchmarks/liang/sim_model.py +++ b/benchmarks/liang/sim_model.py @@ -152,7 +152,7 @@ def fit_nn(X, y, nepochs=100, batch_size=10, val_pct=0.1, if verbose: print('Validation loss: {} Best: {}'.format(val_losses[epoch], best_loss)) - model = torch.load(tmp_file) + model = torch.load(tmp_file, weights_only=False) os.remove(tmp_file) return model diff --git a/benchmarks/liang/sim_predictors.py b/benchmarks/liang/sim_predictors.py index 4b6b16b..5bd887c 100644 --- a/benchmarks/liang/sim_predictors.py +++ b/benchmarks/liang/sim_predictors.py @@ -2,7 +2,7 @@ import numpy as np import torch from sim_liang import load_or_create_dataset -from sklearn.externals import joblib +import joblib from pyhrt.utils import create_folds from pyhrt.hrt import hrt @@ -125,7 +125,7 @@ def get_r2(trial, info): return np.load(r2_path + '.npy') from sklearn.metrics import r2_score X, y, truth = load_or_create_dataset(trial, None, None, None) - model = get_model(info, X, y, None, False) + model = get_model(info, X, y, [], False) y_pred = model.predict(X) score = r2_score(y, y_pred) np.save(r2_path, score) diff --git a/pyhrt/continuous.py b/pyhrt/continuous.py index 125df78..53937b4 100644 --- a/pyhrt/continuous.py +++ b/pyhrt/continuous.py @@ -194,7 +194,7 @@ def fit_mdn(X, y, ncomponents=5, if verbose: print('Validation loss: {} Best: {}'.format(val_losses[epoch], best_loss)) - model = torch.load(tmp_file) + model = torch.load(tmp_file, weights_only=False) os.remove(tmp_file) return model From de9d19e04dd7e7f340d1b2ddd75b527983fa50a5 Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Mon, 23 Jun 2025 18:31:09 +0200 Subject: [PATCH 2/2] Fix addition errror Update the way to run the benchmark --- benchmarks/liang/README.md | 28 +++++++++++++++++-- benchmarks/liang/sim_knockoffs.py | 4 +-- benchmarks/liang/sim_predictors_agg.py | 1 + benchmarks/liang/sim_predictors_importance.py | 2 +- benchmarks/liang/sim_shapley.py | 2 +- 5 files changed, 30 insertions(+), 7 deletions(-) diff --git a/benchmarks/liang/README.md b/benchmarks/liang/README.md index 76ed3e2..5f214a2 100644 --- a/benchmarks/liang/README.md +++ b/benchmarks/liang/README.md @@ -6,12 +6,34 @@ This folder contains code to replicate the benchmarks from the paper. # require to create the dataset before mkdir plot data NB_FEATURE=100 +# for // use the script: sim_parallel.py 0 100 100 100 --reset --nthreads 2 for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE; done for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE --cv 10; done for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE --robust 10; done for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE --cv 10 --robust 10; done +python sim_liang_agg.py # aggregate all the simulated data -python sim_liang_agg.py -python sim_liang_model.py +# for adding result with shappley value +for trial in {0..100}; do python sim_shapley.py $trial $NB_FEATURE; done -``` \ No newline at end of file +# create robust: sweep_robust / require normal sim for having X, Y, trust +for trial in {0..100}; do python sim_robust.py $trial $NB_FEATURE --reset-models; done +python sim_robust_agg.py # aggregate the result + + +# require result cv, sweep_robust and normal +python sim_agg.py + +# Predictor: +python sim_predictors.py 0 100 0 100 --reset --nthreads 8 # create data +python sim_predictors_agg.py # aggregate data +python sim_predictors_importance.py +python sim_predictors_order.py + +# require require predictor +python sim_knockoffs.py + +# helper function: sim_model.py + + +``` diff --git a/benchmarks/liang/sim_knockoffs.py b/benchmarks/liang/sim_knockoffs.py index a1c8aba..da00b38 100644 --- a/benchmarks/liang/sim_knockoffs.py +++ b/benchmarks/liang/sim_knockoffs.py @@ -31,8 +31,8 @@ def run(trial): # Get the knockoffs for the OLS and neural net models LINEAR_PATH = 'data/{}/cv_linear.pt'.format(trial) NONLINEAR_PATH = 'data/{}/cv_nonlinear.pt'.format(trial) - ols_model = torch.load(LINEAR_PATH) - nn_model = torch.load(NONLINEAR_PATH) + ols_model = torch.load(LINEAR_PATH, weights_only=False) + nn_model = torch.load(NONLINEAR_PATH, weights_only=False) models.append(ols_model) models.append(nn_model) infos.append(ModelInfo(trial, 'OLS', None, 'linear')) diff --git a/benchmarks/liang/sim_predictors_agg.py b/benchmarks/liang/sim_predictors_agg.py index f4468b8..543b48e 100644 --- a/benchmarks/liang/sim_predictors_agg.py +++ b/benchmarks/liang/sim_predictors_agg.py @@ -143,6 +143,7 @@ def r2_scatter(tpr_vals, r2_vals, names): for info in infos: r2_scores[info.name].append(get_r2(trial, info)) all_p_filename = 'data/{}/{}.npy'.format(trial, info.prefix) + print('data/{}/{}.npy'.format(trial, info.prefix)) if not os.path.exists(all_p_filename): np.save(all_p_filename, np.full(P, np.nan)) p_values[info.name][trial] = np.load(all_p_filename) diff --git a/benchmarks/liang/sim_predictors_importance.py b/benchmarks/liang/sim_predictors_importance.py index 5e53037..ebb269b 100644 --- a/benchmarks/liang/sim_predictors_importance.py +++ b/benchmarks/liang/sim_predictors_importance.py @@ -97,7 +97,7 @@ def rf_importance(models): ModelInfo(trial, 'Elastic Net', None, 'enet'), ModelInfo(trial, 'Lasso', None, 'lasso')] - models = [get_model(info, None, None, None, False) for info in infos] + models = [get_model(info, None, None, [], False) for info in infos] # Load the p-values for the predictor models for info, model in zip(infos, models): diff --git a/benchmarks/liang/sim_shapley.py b/benchmarks/liang/sim_shapley.py index 5a68286..0d15921 100644 --- a/benchmarks/liang/sim_shapley.py +++ b/benchmarks/liang/sim_shapley.py @@ -28,7 +28,7 @@ def main(): X = np.loadtxt(X_PATH, delimiter=',') y = np.loadtxt(Y_PATH, delimiter=',') truth = np.loadtxt(TRUTH_PATH, delimiter=',') - nonlinear_model = torch.load(NONLINEAR_PATH) + nonlinear_model = torch.load(NONLINEAR_PATH, weights_only=False) yhat = nonlinear_model.predict(X) # Check if all of the results have already been generated and compiled