From 27759bf2625253c37cb22d2cc2c5db387d02cb0f Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Wed, 18 Jun 2025 14:27:57 +0200
Subject: [PATCH 1/2] update the script with the new pacakges

---
 benchmarks/liang/README.md         | 17 ++++++++++++++++-
 benchmarks/liang/sim_knockoffs.py  |  2 +-
 benchmarks/liang/sim_liang.py      |  4 ++--
 benchmarks/liang/sim_liang_agg.py  |  4 ++--
 benchmarks/liang/sim_model.py      |  2 +-
 benchmarks/liang/sim_predictors.py |  4 ++--
 pyhrt/continuous.py                |  2 +-
 7 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/benchmarks/liang/README.md b/benchmarks/liang/README.md
index 2c28510..76ed3e2 100644
--- a/benchmarks/liang/README.md
+++ b/benchmarks/liang/README.md
@@ -1,2 +1,17 @@
 # Benchmarks
-This folder contains code to replicate the benchmarks from the paper.
\ No newline at end of file
+This folder contains code to replicate the benchmarks from the paper.
+
+
+```
+# require to create the dataset before 
+mkdir plot data
+NB_FEATURE=100
+for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE; done
+for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE --cv 10; done
+for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE --robust 10; done
+for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE --cv 10 --robust 10; done
+
+python sim_liang_agg.py
+python sim_liang_model.py
+
+```
\ No newline at end of file
diff --git a/benchmarks/liang/sim_knockoffs.py b/benchmarks/liang/sim_knockoffs.py
index f0cf751..a1c8aba 100644
--- a/benchmarks/liang/sim_knockoffs.py
+++ b/benchmarks/liang/sim_knockoffs.py
@@ -25,7 +25,7 @@ def run(trial):
              ModelInfo(trial, 'Random Forest', None, 'rf') 
                ]
 
-    folds = get_model(infos[0], X, y, None, False).folds
+    folds = get_model(infos[0], X, y, [], False).folds
     models = [get_model(info, X, y, folds, False) for info in infos]
 
     # Get the knockoffs for the OLS and neural net models
diff --git a/benchmarks/liang/sim_liang.py b/benchmarks/liang/sim_liang.py
index 06cdb53..a31c342 100644
--- a/benchmarks/liang/sim_liang.py
+++ b/benchmarks/liang/sim_liang.py
@@ -116,8 +116,8 @@ def run(trial, feature, reset, cv, robust):
 
     # Load the checkpoint if available
     if not reset and os.path.exists(LINEAR_PATH):
-        linear_model = torch.load(LINEAR_PATH)
-        nonlinear_model = torch.load(NONLINEAR_PATH)
+        linear_model = torch.load(LINEAR_PATH, weights_only=False)
+        nonlinear_model = torch.load(NONLINEAR_PATH, weights_only=False)
     else:
         # Train the model
         print('Fitting models with N={} P={} S={} T={} nperms={}'.format(N, P, S, T, nperms))
diff --git a/benchmarks/liang/sim_liang_agg.py b/benchmarks/liang/sim_liang_agg.py
index 03680cd..f8945d4 100644
--- a/benchmarks/liang/sim_liang_agg.py
+++ b/benchmarks/liang/sim_liang_agg.py
@@ -36,8 +36,8 @@ def bounds_plot(bounds):
         plt.rc('axes', lw=2)
         lower = bounds[:,:,0][~np.isnan(bounds[:,:,0])].flatten()
         upper = bounds[:,:,1][~np.isnan(bounds[:,:,1])].flatten()
-        plt.hist(lower, label='Lower band', color='blue', bins=np.linspace(0,50,51), normed=True)
-        plt.hist(upper, label='Upper band', color='orange', bins=np.linspace(50,100,51), normed=True)
+        plt.hist(lower, label='Lower band', color='blue', bins=np.linspace(0,50,51), density=True)
+        plt.hist(upper, label='Upper band', color='orange', bins=np.linspace(50,100,51), density=True)
         plt.xlabel('Band value', fontsize=18, weight='bold')
         plt.ylabel('Proportion', fontsize=18, weight='bold')
         plt.legend(loc='upper right')
diff --git a/benchmarks/liang/sim_model.py b/benchmarks/liang/sim_model.py
index 64d57e2..402bb8e 100644
--- a/benchmarks/liang/sim_model.py
+++ b/benchmarks/liang/sim_model.py
@@ -152,7 +152,7 @@ def fit_nn(X, y, nepochs=100, batch_size=10, val_pct=0.1,
         if verbose:
             print('Validation loss: {} Best: {}'.format(val_losses[epoch], best_loss))
 
-    model = torch.load(tmp_file)
+    model = torch.load(tmp_file, weights_only=False)
     os.remove(tmp_file)
     return model
 
diff --git a/benchmarks/liang/sim_predictors.py b/benchmarks/liang/sim_predictors.py
index 4b6b16b..5bd887c 100644
--- a/benchmarks/liang/sim_predictors.py
+++ b/benchmarks/liang/sim_predictors.py
@@ -2,7 +2,7 @@
 import numpy as np
 import torch
 from sim_liang import load_or_create_dataset
-from sklearn.externals import joblib
+import joblib
 from pyhrt.utils import create_folds
 from pyhrt.hrt import hrt
 
@@ -125,7 +125,7 @@ def get_r2(trial, info):
         return np.load(r2_path + '.npy')
     from sklearn.metrics import r2_score
     X, y, truth = load_or_create_dataset(trial, None, None, None)
-    model = get_model(info, X, y, None, False)
+    model = get_model(info, X, y, [], False)
     y_pred = model.predict(X)
     score = r2_score(y, y_pred)
     np.save(r2_path, score)
diff --git a/pyhrt/continuous.py b/pyhrt/continuous.py
index 125df78..53937b4 100644
--- a/pyhrt/continuous.py
+++ b/pyhrt/continuous.py
@@ -194,7 +194,7 @@ def fit_mdn(X, y, ncomponents=5,
         if verbose:
             print('Validation loss: {} Best: {}'.format(val_losses[epoch], best_loss))
 
-    model = torch.load(tmp_file)
+    model = torch.load(tmp_file, weights_only=False)
     os.remove(tmp_file)
     return model
 

From de9d19e04dd7e7f340d1b2ddd75b527983fa50a5 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 23 Jun 2025 18:31:09 +0200
Subject: [PATCH 2/2] Fix addition errror

Update the way to run the benchmark
---
 benchmarks/liang/README.md                    | 28 +++++++++++++++++--
 benchmarks/liang/sim_knockoffs.py             |  4 +--
 benchmarks/liang/sim_predictors_agg.py        |  1 +
 benchmarks/liang/sim_predictors_importance.py |  2 +-
 benchmarks/liang/sim_shapley.py               |  2 +-
 5 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/benchmarks/liang/README.md b/benchmarks/liang/README.md
index 76ed3e2..5f214a2 100644
--- a/benchmarks/liang/README.md
+++ b/benchmarks/liang/README.md
@@ -6,12 +6,34 @@ This folder contains code to replicate the benchmarks from the paper.
 # require to create the dataset before 
 mkdir plot data
 NB_FEATURE=100
+# for // use the script: sim_parallel.py 0 100 100 100 --reset --nthreads 2
 for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE; done
 for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE --cv 10; done
 for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE --robust 10; done
 for trial in {0..100}; do python sim_liang.py $trial $NB_FEATURE --cv 10 --robust 10; done
+python sim_liang_agg.py # aggregate all the simulated data
 
-python sim_liang_agg.py
-python sim_liang_model.py
+# for adding result with shappley value
+for trial in {0..100}; do python sim_shapley.py $trial $NB_FEATURE; done
 
-```
\ No newline at end of file
+# create robust: sweep_robust / require normal sim for having X, Y, trust
+for trial in {0..100}; do python sim_robust.py $trial $NB_FEATURE --reset-models; done
+python sim_robust_agg.py # aggregate the result
+
+
+# require result cv, sweep_robust and normal
+python sim_agg.py
+
+# Predictor:
+python sim_predictors.py 0 100 0 100 --reset --nthreads 8 # create data
+python sim_predictors_agg.py # aggregate data
+python sim_predictors_importance.py
+python sim_predictors_order.py
+
+# require require predictor
+python sim_knockoffs.py
+
+# helper function: sim_model.py
+
+
+```
diff --git a/benchmarks/liang/sim_knockoffs.py b/benchmarks/liang/sim_knockoffs.py
index a1c8aba..da00b38 100644
--- a/benchmarks/liang/sim_knockoffs.py
+++ b/benchmarks/liang/sim_knockoffs.py
@@ -31,8 +31,8 @@ def run(trial):
     # Get the knockoffs for the OLS and neural net models
     LINEAR_PATH = 'data/{}/cv_linear.pt'.format(trial)
     NONLINEAR_PATH = 'data/{}/cv_nonlinear.pt'.format(trial)
-    ols_model = torch.load(LINEAR_PATH)
-    nn_model = torch.load(NONLINEAR_PATH)
+    ols_model = torch.load(LINEAR_PATH, weights_only=False)
+    nn_model = torch.load(NONLINEAR_PATH, weights_only=False)
     models.append(ols_model)
     models.append(nn_model)
     infos.append(ModelInfo(trial, 'OLS', None, 'linear'))
diff --git a/benchmarks/liang/sim_predictors_agg.py b/benchmarks/liang/sim_predictors_agg.py
index f4468b8..543b48e 100644
--- a/benchmarks/liang/sim_predictors_agg.py
+++ b/benchmarks/liang/sim_predictors_agg.py
@@ -143,6 +143,7 @@ def r2_scatter(tpr_vals, r2_vals, names):
         for info in infos:
             r2_scores[info.name].append(get_r2(trial, info))
             all_p_filename = 'data/{}/{}.npy'.format(trial, info.prefix)
+            print('data/{}/{}.npy'.format(trial, info.prefix))
             if not os.path.exists(all_p_filename):
                 np.save(all_p_filename, np.full(P, np.nan))
             p_values[info.name][trial] = np.load(all_p_filename)
diff --git a/benchmarks/liang/sim_predictors_importance.py b/benchmarks/liang/sim_predictors_importance.py
index 5e53037..ebb269b 100644
--- a/benchmarks/liang/sim_predictors_importance.py
+++ b/benchmarks/liang/sim_predictors_importance.py
@@ -97,7 +97,7 @@ def rf_importance(models):
                  ModelInfo(trial, 'Elastic Net', None, 'enet'),
                  ModelInfo(trial, 'Lasso', None, 'lasso')]
 
-        models = [get_model(info, None, None, None, False) for info in infos]
+        models = [get_model(info, None, None, [], False) for info in infos]
 
         # Load the p-values for the predictor models
         for info, model in zip(infos, models):
diff --git a/benchmarks/liang/sim_shapley.py b/benchmarks/liang/sim_shapley.py
index 5a68286..0d15921 100644
--- a/benchmarks/liang/sim_shapley.py
+++ b/benchmarks/liang/sim_shapley.py
@@ -28,7 +28,7 @@ def main():
     X = np.loadtxt(X_PATH, delimiter=',')
     y = np.loadtxt(Y_PATH, delimiter=',')
     truth = np.loadtxt(TRUTH_PATH, delimiter=',')
-    nonlinear_model = torch.load(NONLINEAR_PATH)
+    nonlinear_model = torch.load(NONLINEAR_PATH, weights_only=False)
     yhat = nonlinear_model.predict(X)
 
     # Check if all of the results have already been generated and compiled