From 3894b18b7b3acfc11d37a131669efcf27e8790a8 Mon Sep 17 00:00:00 2001
From: Luca Dovichi <lucadovichi@gmail.com>
Date: Sat, 15 Nov 2025 15:32:54 -0600
Subject: [PATCH 1/4] [ndm] added warnings regarding inconsistent random_state
 usage

---
 src/netml/ndm/ae.py    |  8 ++++++--
 src/netml/ndm/kde.py   |  8 ++++++--
 src/netml/ndm/model.py | 10 ++++++++--
 src/netml/ndm/ocsvm.py | 10 ++++++----
 4 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/src/netml/ndm/ae.py b/src/netml/ndm/ae.py
index 10ffa39..cc86a96 100644
--- a/src/netml/ndm/ae.py
+++ b/src/netml/ndm/ae.py
@@ -71,7 +71,7 @@ def __init__(self, epochs=100, batch_size=32, lr=1e-3,
                  loss='mse',
                  dropout_rate=0.2,
                  l2_regularizer=0.1, validation_size=0.1,
-                 verbose=1, random_state=42, contamination=0.1, hid_dim=16, lat_dim=8):
+                 verbose=1, contamination=0.1, hid_dim=16, lat_dim=8, **kwargs):
         """AutoEncoder
 
         Parameters
@@ -120,7 +120,6 @@ def __init__(self, epochs=100, batch_size=32, lr=1e-3,
         self.l2_regularizer = l2_regularizer
         self.validation_size = validation_size
         self.verbose = verbose
-        self.random_state = random_state
         self.lr = lr
         self.contamination = contamination
         self.hid_dim = hid_dim
@@ -128,6 +127,11 @@ def __init__(self, epochs=100, batch_size=32, lr=1e-3,
 
         check_parameter(dropout_rate, 0, 1, param_name='dropout_rate', include_left=True)
 
+        if "random_state" in kwargs and verbose > 5:
+            print(
+                "Warning: 'random_state' passed to AutoEncoder. Use torch.manual_seed() instead."
+            )
+
         if self.loss == 'mse' or (not self.loss):
             self.criterion = nn.MSELoss()
 
diff --git a/src/netml/ndm/kde.py b/src/netml/ndm/kde.py
index 8f4655e..45ccb4f 100644
--- a/src/netml/ndm/kde.py
+++ b/src/netml/ndm/kde.py
@@ -16,7 +16,7 @@ class KDE(KernelDensity, BaseDetector):
 
     def __init__(self, bandwidth=1.0, algorithm='auto',
                  kernel='gaussian', metric="euclidean", atol=0, rtol=0, contamination=0.1,
-                 breadth_first=True, leaf_size=40, metric_params=None, random_state=42):
+                 breadth_first=True, leaf_size=40, metric_params=None, verbose=0, **kwargs):
         """Kernel density estimation (KDE)
         Parameters
         ----------
@@ -63,7 +63,11 @@ def __init__(self, bandwidth=1.0, algorithm='auto',
         self.leaf_size = leaf_size
         self.metric_params = metric_params
         self.contamination = contamination
-        self.random_state = random_state
+
+        if "random_state" in kwargs and verbose > 5:
+            print(
+                "Warning: 'random_state' passed to KDE has no effect."
+            )
 
         # run the choose algorithm code so that exceptions will happen here
         # we're using clone() in the GenerativeBayes classifier,
diff --git a/src/netml/ndm/model.py b/src/netml/ndm/model.py
index a4a5532..8df29c0 100644
--- a/src/netml/ndm/model.py
+++ b/src/netml/ndm/model.py
@@ -13,7 +13,7 @@
 
 class MODEL:
 
-    def __init__(self, model=None, *, score_metric='auc', verbose=1, random_state=42):
+    def __init__(self, model=None, *, score_metric='auc', verbose=1, **kwargs):
         """Train and test a model on a given data.
 
         Parameters
@@ -40,10 +40,16 @@ def __init__(self, model=None, *, score_metric='auc', verbose=1, random_state=42
         self.model_name = model.name
         self.score_metric = score_metric
         self.verbose = verbose
-        self.random_state = random_state
         # store all data generated during training and testing the model.
         self.history = {}
 
+        if "random_state" in kwargs:
+            self.random_state = kwargs["random_state"]
+            if verbose > 5:
+                print("Warning: setting random_state for a model wrapper doesn't affect the underlying predictions.")
+        else:
+            self.random_state = 42
+
     @timing
     def _train(self, X_train, y_train=None):
         """fit the model on the train set
diff --git a/src/netml/ndm/ocsvm.py b/src/netml/ndm/ocsvm.py
index bc4bcc6..c98d250 100644
--- a/src/netml/ndm/ocsvm.py
+++ b/src/netml/ndm/ocsvm.py
@@ -12,7 +12,7 @@ class OCSVM(OneClassSVM):
 
     def __init__(self, kernel='rbf', degree=3, gamma='scale',
                  coef0=0.0, tol=1e-3, nu=0.5, shrinking=True, cache_size=200,
-                 verbose=False, max_iter=-1, random_state=100):
+                 verbose=False, max_iter=-1, **kwargs):
         """One Class SVM (OCSVM)
 
         Parameters
@@ -49,10 +49,13 @@ def __init__(self, kernel='rbf', degree=3, gamma='scale',
 
         verbose: bool (default is False)
             Enable verbose output.
+        """
 
-        random_state: int (default is 42)
+        if "random_state" in kwargs and verbose > 5:
+            print(
+                "Warning: 'random_state' passed to OCSVM has no effect."
+            )
 
-        """
         super(OCSVM, self).__init__(
             kernel=kernel,
             degree=degree,
@@ -66,7 +69,6 @@ def __init__(self, kernel='rbf', degree=3, gamma='scale',
             max_iter=max_iter,
         )
 
-        self.random_state = random_state
         self.verbose = verbose
 
     # override decision_function. because test and grid_search will use decision_function first

From 245741817ba02d57aefa543b10cea9063129310c Mon Sep 17 00:00:00 2001
From: Luca Dovichi <lucadovichi@gmail.com>
Date: Sat, 15 Nov 2025 15:40:51 -0600
Subject: [PATCH 2/4] clean up random_state documentation within source files

---
 src/netml/ndm/ae.py    | 4 +---
 src/netml/ndm/gmm.py   | 2 ++
 src/netml/ndm/kde.py   | 2 +-
 src/netml/ndm/model.py | 5 -----
 src/netml/ndm/ocsvm.py | 2 +-
 src/netml/ndm/pca.py   | 4 ++--
 6 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/src/netml/ndm/ae.py b/src/netml/ndm/ae.py
index cc86a96..b8c0e00 100644
--- a/src/netml/ndm/ae.py
+++ b/src/netml/ndm/ae.py
@@ -109,9 +109,7 @@ def __init__(self, epochs=100, batch_size=32, lr=1e-3,
         verbose: int (default is 1)
             A print level is to control what information should be printed according to the given value.
             The higher the value is, the more info is printed.
-
-        random_state: int (default is 42)
-
+        
         """
         self.epochs = epochs
         self.batch_size = batch_size
diff --git a/src/netml/ndm/gmm.py b/src/netml/ndm/gmm.py
index cba4278..724eda2 100644
--- a/src/netml/ndm/gmm.py
+++ b/src/netml/ndm/gmm.py
@@ -71,6 +71,8 @@ def __init__(self, n_components=1, covariance_type='full', tol=1e-3,
         contamination: float (default is 0.1)
              It's in range (0,1). A threshold used to decide the normal score (not used).
 
+        random_state: int (default is 42)
+
         """
         self.n_components = n_components
         self.covariance_type = covariance_type
diff --git a/src/netml/ndm/kde.py b/src/netml/ndm/kde.py
index 45ccb4f..d670597 100644
--- a/src/netml/ndm/kde.py
+++ b/src/netml/ndm/kde.py
@@ -66,7 +66,7 @@ def __init__(self, bandwidth=1.0, algorithm='auto',
 
         if "random_state" in kwargs and verbose > 5:
             print(
-                "Warning: 'random_state' passed to KDE has no effect."
+                "Warning: argument 'random_state' passed to KDE has no effect."
             )
 
         # run the choose algorithm code so that exceptions will happen here
diff --git a/src/netml/ndm/model.py b/src/netml/ndm/model.py
index 8df29c0..72d108f 100644
--- a/src/netml/ndm/model.py
+++ b/src/netml/ndm/model.py
@@ -28,9 +28,6 @@ def __init__(self, model=None, *, score_metric='auc', verbose=1, **kwargs):
             a print level is to control what information should be printed according to the given value.
             The higher the value is, the more info is printed.
 
-        random_state: int
-            a value is to make your experiments more reproducible.
-
         Returns
         -------
             a MODEL instance
@@ -47,8 +44,6 @@ def __init__(self, model=None, *, score_metric='auc', verbose=1, **kwargs):
             self.random_state = kwargs["random_state"]
             if verbose > 5:
                 print("Warning: setting random_state for a model wrapper doesn't affect the underlying predictions.")
-        else:
-            self.random_state = 42
 
     @timing
     def _train(self, X_train, y_train=None):
diff --git a/src/netml/ndm/ocsvm.py b/src/netml/ndm/ocsvm.py
index c98d250..00bbc4b 100644
--- a/src/netml/ndm/ocsvm.py
+++ b/src/netml/ndm/ocsvm.py
@@ -53,7 +53,7 @@ def __init__(self, kernel='rbf', degree=3, gamma='scale',
 
         if "random_state" in kwargs and verbose > 5:
             print(
-                "Warning: 'random_state' passed to OCSVM has no effect."
+                "Warning: argument 'random_state' passed to OCSVM has no effect."
             )
 
         super(OCSVM, self).__init__(
diff --git a/src/netml/ndm/pca.py b/src/netml/ndm/pca.py
index 53aa3ee..d505366 100644
--- a/src/netml/ndm/pca.py
+++ b/src/netml/ndm/pca.py
@@ -14,7 +14,7 @@ class PCA(BaseDetector):
 
     def __init__(self, n_components=None, n_selected_components=None,
                  contamination=0.1, copy=True, whiten=False, svd_solver='auto',
-                 tol=0.0, iterated_power='auto', random_state=None,
+                 tol=0.0, iterated_power='auto', random_state=42,
                  weighted=True, standardization=True):
         """Principal component analysis (PCA)
 
@@ -48,7 +48,7 @@ def __init__(self, n_components=None, n_selected_components=None,
             Number of iterations for the power method computed by
             svd_solver == 'randomized'.
 
-        random_state : int
+        random_state: int (default is 42)
 
         weighted : bool, optional (default=True)
             If True, the eigenvalues are used in score computation.

From 3900e4e6d6e55252bf7938e7cdff51177f778dc3 Mon Sep 17 00:00:00 2001
From: Luca Dovichi <lucadovichi@gmail.com>
Date: Sat, 15 Nov 2025 15:49:04 -0600
Subject: [PATCH 3/4] add expected verbose arg to PCA

---
 src/netml/ndm/pca.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/netml/ndm/pca.py b/src/netml/ndm/pca.py
index d505366..821fbf6 100644
--- a/src/netml/ndm/pca.py
+++ b/src/netml/ndm/pca.py
@@ -15,7 +15,7 @@ class PCA(BaseDetector):
     def __init__(self, n_components=None, n_selected_components=None,
                  contamination=0.1, copy=True, whiten=False, svd_solver='auto',
                  tol=0.0, iterated_power='auto', random_state=42,
-                 weighted=True, standardization=True):
+                 weighted=True, standardization=True, verbose=1):
         """Principal component analysis (PCA)
 
         Parameters
@@ -70,6 +70,7 @@ def __init__(self, n_components=None, n_selected_components=None,
         self.weighted = weighted
         self.standardization = standardization
         self.score_name = "reconstructed"  # the way to obtain outlier scores
+        self.verbose = verbose
 
         self.contamination = contamination
 

From 1ab3ea688c8c0bd833767e999b00ff1c2af9909a Mon Sep 17 00:00:00 2001
From: Luca Dovichi <lucadovichi@gmail.com>
Date: Sat, 15 Nov 2025 16:07:53 -0600
Subject: [PATCH 4/4] ensure that verbose defaults to 0 for signatures where
 it's recently been added

---
 src/netml/ndm/pca.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/netml/ndm/pca.py b/src/netml/ndm/pca.py
index 821fbf6..68f8915 100644
--- a/src/netml/ndm/pca.py
+++ b/src/netml/ndm/pca.py
@@ -15,7 +15,7 @@ class PCA(BaseDetector):
     def __init__(self, n_components=None, n_selected_components=None,
                  contamination=0.1, copy=True, whiten=False, svd_solver='auto',
                  tol=0.0, iterated_power='auto', random_state=42,
-                 weighted=True, standardization=True, verbose=1):
+                 weighted=True, standardization=True, verbose=0):
         """Principal component analysis (PCA)
 
         Parameters