diff --git a/src/netml/ndm/ae.py b/src/netml/ndm/ae.py index 10ffa39..b8c0e00 100644 --- a/src/netml/ndm/ae.py +++ b/src/netml/ndm/ae.py @@ -71,7 +71,7 @@ def __init__(self, epochs=100, batch_size=32, lr=1e-3, loss='mse', dropout_rate=0.2, l2_regularizer=0.1, validation_size=0.1, - verbose=1, random_state=42, contamination=0.1, hid_dim=16, lat_dim=8): + verbose=1, contamination=0.1, hid_dim=16, lat_dim=8, **kwargs): """AutoEncoder Parameters @@ -109,9 +109,7 @@ def __init__(self, epochs=100, batch_size=32, lr=1e-3, verbose: int (default is 1) A print level is to control what information should be printed according to the given value. The higher the value is, the more info is printed. - - random_state: int (default is 42) - + """ self.epochs = epochs self.batch_size = batch_size @@ -120,7 +118,6 @@ def __init__(self, epochs=100, batch_size=32, lr=1e-3, self.l2_regularizer = l2_regularizer self.validation_size = validation_size self.verbose = verbose - self.random_state = random_state self.lr = lr self.contamination = contamination self.hid_dim = hid_dim @@ -128,6 +125,11 @@ def __init__(self, epochs=100, batch_size=32, lr=1e-3, check_parameter(dropout_rate, 0, 1, param_name='dropout_rate', include_left=True) + if "random_state" in kwargs and verbose > 5: + print( + "Warning: 'random_state' passed to AutoEncoder. Use torch.manual_seed() instead." + ) + if self.loss == 'mse' or (not self.loss): self.criterion = nn.MSELoss() diff --git a/src/netml/ndm/gmm.py b/src/netml/ndm/gmm.py index cba4278..724eda2 100644 --- a/src/netml/ndm/gmm.py +++ b/src/netml/ndm/gmm.py @@ -71,6 +71,8 @@ def __init__(self, n_components=1, covariance_type='full', tol=1e-3, contamination: float (default is 0.1) It's in range (0,1). A threshold used to decide the normal score (not used). + random_state: int (default is 42) + """ self.n_components = n_components self.covariance_type = covariance_type diff --git a/src/netml/ndm/kde.py b/src/netml/ndm/kde.py index 8f4655e..d670597 100644 --- a/src/netml/ndm/kde.py +++ b/src/netml/ndm/kde.py @@ -16,7 +16,7 @@ class KDE(KernelDensity, BaseDetector): def __init__(self, bandwidth=1.0, algorithm='auto', kernel='gaussian', metric="euclidean", atol=0, rtol=0, contamination=0.1, - breadth_first=True, leaf_size=40, metric_params=None, random_state=42): + breadth_first=True, leaf_size=40, metric_params=None, verbose=0, **kwargs): """Kernel density estimation (KDE) Parameters ---------- @@ -63,7 +63,11 @@ def __init__(self, bandwidth=1.0, algorithm='auto', self.leaf_size = leaf_size self.metric_params = metric_params self.contamination = contamination - self.random_state = random_state + + if "random_state" in kwargs and verbose > 5: + print( + "Warning: argument 'random_state' passed to KDE has no effect." + ) # run the choose algorithm code so that exceptions will happen here # we're using clone() in the GenerativeBayes classifier, diff --git a/src/netml/ndm/model.py b/src/netml/ndm/model.py index a4a5532..72d108f 100644 --- a/src/netml/ndm/model.py +++ b/src/netml/ndm/model.py @@ -13,7 +13,7 @@ class MODEL: - def __init__(self, model=None, *, score_metric='auc', verbose=1, random_state=42): + def __init__(self, model=None, *, score_metric='auc', verbose=1, **kwargs): """Train and test a model on a given data. Parameters @@ -28,9 +28,6 @@ def __init__(self, model=None, *, score_metric='auc', verbose=1, random_state=42 a print level is to control what information should be printed according to the given value. The higher the value is, the more info is printed. - random_state: int - a value is to make your experiments more reproducible. - Returns ------- a MODEL instance @@ -40,10 +37,14 @@ def __init__(self, model=None, *, score_metric='auc', verbose=1, random_state=42 self.model_name = model.name self.score_metric = score_metric self.verbose = verbose - self.random_state = random_state # store all data generated during training and testing the model. self.history = {} + if "random_state" in kwargs: + self.random_state = kwargs["random_state"] + if verbose > 5: + print("Warning: setting random_state for a model wrapper doesn't affect the underlying predictions.") + @timing def _train(self, X_train, y_train=None): """fit the model on the train set diff --git a/src/netml/ndm/ocsvm.py b/src/netml/ndm/ocsvm.py index bc4bcc6..00bbc4b 100644 --- a/src/netml/ndm/ocsvm.py +++ b/src/netml/ndm/ocsvm.py @@ -12,7 +12,7 @@ class OCSVM(OneClassSVM): def __init__(self, kernel='rbf', degree=3, gamma='scale', coef0=0.0, tol=1e-3, nu=0.5, shrinking=True, cache_size=200, - verbose=False, max_iter=-1, random_state=100): + verbose=False, max_iter=-1, **kwargs): """One Class SVM (OCSVM) Parameters @@ -49,10 +49,13 @@ def __init__(self, kernel='rbf', degree=3, gamma='scale', verbose: bool (default is False) Enable verbose output. + """ - random_state: int (default is 42) + if "random_state" in kwargs and verbose > 5: + print( + "Warning: argument 'random_state' passed to OCSVM has no effect." + ) - """ super(OCSVM, self).__init__( kernel=kernel, degree=degree, @@ -66,7 +69,6 @@ def __init__(self, kernel='rbf', degree=3, gamma='scale', max_iter=max_iter, ) - self.random_state = random_state self.verbose = verbose # override decision_function. because test and grid_search will use decision_function first diff --git a/src/netml/ndm/pca.py b/src/netml/ndm/pca.py index 53aa3ee..68f8915 100644 --- a/src/netml/ndm/pca.py +++ b/src/netml/ndm/pca.py @@ -14,8 +14,8 @@ class PCA(BaseDetector): def __init__(self, n_components=None, n_selected_components=None, contamination=0.1, copy=True, whiten=False, svd_solver='auto', - tol=0.0, iterated_power='auto', random_state=None, - weighted=True, standardization=True): + tol=0.0, iterated_power='auto', random_state=42, + weighted=True, standardization=True, verbose=0): """Principal component analysis (PCA) Parameters @@ -48,7 +48,7 @@ def __init__(self, n_components=None, n_selected_components=None, Number of iterations for the power method computed by svd_solver == 'randomized'. - random_state : int + random_state: int (default is 42) weighted : bool, optional (default=True) If True, the eigenvalues are used in score computation. @@ -70,6 +70,7 @@ def __init__(self, n_components=None, n_selected_components=None, self.weighted = weighted self.standardization = standardization self.score_name = "reconstructed" # the way to obtain outlier scores + self.verbose = verbose self.contamination = contamination