From 5f283c26d5f1868edc5dbf06f3b720fdb5fc3188 Mon Sep 17 00:00:00 2001
From: Ozgur Aslan <ozguraslank@gmail.com>
Date: Sat, 10 Jan 2026 00:30:40 +0300
Subject: [PATCH 01/14] Native categorical support part 1

---
 flexml/config/ml_models.py           |   4 +-
 flexml/structures/supervised_base.py | 189 ++++++++++++++++++++++++---
 2 files changed, 172 insertions(+), 21 deletions(-)

diff --git a/flexml/config/ml_models.py b/flexml/config/ml_models.py
index 9e4147e..752572e 100644
--- a/flexml/config/ml_models.py
+++ b/flexml/config/ml_models.py
@@ -308,8 +308,8 @@ def get_ml_models(
 
         # Quick Classification Models
         LOGISTIC_REGRESSION = LogisticRegression(max_iter=1000, random_state=random_state, n_jobs=n_jobs)
-        XGBOOST_CLASSIFIER = XGBClassifier(objective=xgb_objective, random_state=random_state, n_jobs=n_jobs)
-        LIGHTGBM_CLASSIFIER = LGBMClassifier(verbose=-1, random_state=random_state, n_jobs=n_jobs)
+        XGBOOST_CLASSIFIER = XGBClassifier(enable_categorical=True, objective=xgb_objective, random_state=random_state, n_jobs=n_jobs)
+        LIGHTGBM_CLASSIFIER = LGBMClassifier(enable_categorical=True, verbose=-1, random_state=random_state, n_jobs=n_jobs)
         CATBOOST_CLASSIFIER = CatBoostClassifier(allow_writing_files=False, silent=True, random_seed=random_state, thread_count=n_jobs)
         DECISION_TREE_CLASSIFIER = DecisionTreeClassifier(random_state=random_state)
         RANDOM_FOREST_CLASSIFIER = RandomForestClassifier(random_state=random_state, n_jobs=n_jobs)
diff --git a/flexml/structures/supervised_base.py b/flexml/structures/supervised_base.py
index 78e7c82..9c797f4 100644
--- a/flexml/structures/supervised_base.py
+++ b/flexml/structures/supervised_base.py
@@ -41,6 +41,33 @@
 pd.set_option('display.max_columns', None)
 
 
+# Models that support native categorical features (no encoding needed)
+NATIVE_CATEGORICAL_MODELS = {
+    'CatBoostRegressor', 'CatBoostClassifier',
+    'LGBMRegressor', 'LGBMClassifier', 
+    'XGBRegressor', 'XGBClassifier',
+    'HistGradientBoostingRegressor', 'HistGradientBoostingClassifier'  # sklearn also supports!
+}
+
+def _get_encoded_categorical_columns(encoded_columns, original_cat_cols):
+    """
+    Identifies which columns in the encoded dataframe came from categorical encoding.
+    Handles both label encoding (same name) and one-hot encoding (prefix_value format).
+    """
+    encoded_cat_cols = []
+    for col in encoded_columns:
+        # Check if it's the original column name (label/ordinal encoding)
+        if col in original_cat_cols:
+            encoded_cat_cols.append(col)
+        else:
+            # Check if it's a one-hot encoded column (format: original_value)
+            for orig_col in original_cat_cols:
+                if col.startswith(f"{orig_col}_"):
+                    encoded_cat_cols.append(col)
+                    break
+    return encoded_cat_cols
+
+
 class SupervisedBase:
     """
     Base class for Supervised tasks (regression & classification)
@@ -325,9 +352,15 @@ def __prepare_holdout_data(self, test_size: Optional[float] = None):
         ], axis=1)
 
         self.feature_engineer.setup(data=train_data)
+        self.categorical_columns = self.feature_engineer.categorical_columns
+        
+        # Store raw categorical columns for holdout data (for native-categorical models)
+        self.X_train_cat_raw = self._get_raw_categorical_data(train_data)
+        self.X_test_cat_raw = self._get_raw_categorical_data(test_data)
 
         self.X_train, self.y_train = self.feature_engineer.fit_transform()
         self.X_test, self.y_test = self.feature_engineer.transform(test_data=test_data, y_included=True)
+        self.encoded_categorical_columns = _get_encoded_categorical_columns(self.X_train.columns, self.categorical_columns)
         self.feature_names = list(self.X_train.columns)
         self.y_class_mapping = self.feature_engineer.y_class_mapping
         
@@ -380,6 +413,90 @@ def __top_n_models_checker(self, top_n_models: Optional[int]) -> int:
         
         return top_n_models
     
+    def _get_raw_categorical_data(self, data: pd.DataFrame) -> Optional[pd.DataFrame]:
+        """
+        Extract raw categorical columns from data and convert to category dtype.
+        Used to preserve original categoricals before encoding for native-categorical models.
+        
+        Parameters
+        ----------
+        data : pd.DataFrame
+            The data containing categorical columns
+            
+        Returns
+        -------
+        Optional[pd.DataFrame]
+            DataFrame with categorical columns converted to 'category' dtype, or None if no categorical columns
+        """
+        if not hasattr(self, 'categorical_columns') or not self.categorical_columns:
+            return None
+        cat_df = data[self.categorical_columns].copy()
+        for col in self.categorical_columns:
+            cat_df[col] = cat_df[col].astype('category')
+        return cat_df
+    
+    def _prepare_data_for_model(
+        self, 
+        model_name: str, 
+        X_encoded: pd.DataFrame, 
+        X_cat_raw: Optional[pd.DataFrame] = None
+    ) -> pd.DataFrame:
+        """
+        Prepares data for a specific model by swapping encoded categoricals
+        with raw ones for models that support native categorical features.
+        
+        Parameters
+        ----------
+        model_name : str
+            The name of the model
+        X_encoded : pd.DataFrame
+            The encoded feature data
+        X_cat_raw : Optional[pd.DataFrame]
+            The raw categorical columns (with 'category' dtype)
+            
+        Returns
+        -------
+        pd.DataFrame
+            Prepared feature data for the model
+        """
+        if model_name in NATIVE_CATEGORICAL_MODELS and X_cat_raw is not None and hasattr(self, 'encoded_categorical_columns') and self.encoded_categorical_columns:
+            # Drop encoded categorical columns
+            X_final = X_encoded.drop(columns=self.encoded_categorical_columns, errors='ignore')
+            # Merge with raw categorical columns (aligned by index)
+            X_final = pd.concat([X_final, X_cat_raw.loc[X_final.index]], axis=1)
+            return X_final
+        return X_encoded
+    
+    def _fit_model(
+        self, 
+        model: object, 
+        X: pd.DataFrame, 
+        y: pd.Series,
+        model_name: Optional[str] = None
+    ):
+        """
+        Fits a model with proper categorical feature handling.
+        Passes cat_features to CatBoost models for native categorical support.
+        
+        Parameters
+        ----------
+        model : object
+            The model to fit
+        X : pd.DataFrame
+            The feature data
+        y : pd.Series
+            The target data
+        model_name : Optional[str]
+            The name of the model (if None, uses model's class name)
+        """
+        if model_name is None:
+            model_name = model.__class__.__name__
+        
+        if 'CatBoost' in model_name and hasattr(self, 'categorical_columns') and self.categorical_columns:
+            model.fit(X, y, cat_features=self.categorical_columns)
+        else:
+            model.fit(X, y)
+    
     def __process_experiment_result(self, experiment_stats: dict):
         """
         Processes and aggregates the results of an experiment, calculating average metrics and selecting the best model.
@@ -625,9 +742,13 @@ def start_experiment(
                 ], axis=1)
                 
                 self.feature_engineer.setup(data=train_data)
+
+                # Save raw categorical columns BEFORE encoding (for native-categorical models)
+                X_train_cat_raw = self._get_raw_categorical_data(train_data)
+                X_test_cat_raw = self._get_raw_categorical_data(test_data)
                 
-                X_train, y_train = self.feature_engineer.fit_transform()
-                X_test, y_test = self.feature_engineer.transform(test_data=test_data, y_included=True)
+                X_train_encoded, y_train = self.feature_engineer.fit_transform()
+                X_test_encoded, y_test = self.feature_engineer.transform(test_data=test_data, y_included=True)
 
                 for model_idx in range(len(self.__ML_MODELS)):
                     model_info = self.__ML_MODELS[model_idx]
@@ -638,19 +759,24 @@ def start_experiment(
                         continue  # Skip already trained or raised error models
 
                     model = model_info['model']
+
+                    # Prepare data based on model type (native categorical vs encoded)
+                    X_train_final = self._prepare_data_for_model(model_name, X_train_encoded, X_train_cat_raw)
+                    X_test_final = self._prepare_data_for_model(model_name, X_test_encoded, X_test_cat_raw)
+
                     try:
                         all_metrics = []
                         all_times = []
 
                         t_start = time()
-                        model.fit(X_train, y_train)
+                        self._fit_model(model, X_train_final, y_train, model_name)
                         t_end = time()
 
                         time_taken = round(t_end - t_start, 2)
                         if self.__ML_TASK_TYPE == "Classification" and hasattr(model, 'predict_proba'):
-                            y_pred = model.predict_proba(X_test)
+                            y_pred = model.predict_proba(X_test_final)
                         else:
-                            y_pred = model.predict(X_test)
+                            y_pred = model.predict(X_test_final)
 
                         model_perf = evaluate_model_perf(
                             self.__ML_TASK_TYPE,
@@ -860,8 +986,17 @@ def save_model(
             if not already_trained:
                 self.__logger.info("Training the model using the whole data")
                 self.feature_engineer.setup(data=self.data)
-                X_train, y_train = self.feature_engineer.fit_transform()
-                model.fit(X_train, y_train)
+                
+                # Get raw categoricals before encoding
+                X_cat_raw = self._get_raw_categorical_data(self.data)
+                
+                X_train_encoded, y_train = self.feature_engineer.fit_transform()
+                
+                # Prepare data for this specific model
+                X_train_final = self._prepare_data_for_model(model_name, X_train_encoded, X_cat_raw)
+                
+                # Fit with proper cat_features handling
+                self._fit_model(model, X_train_final, y_train, model_name)
 
                 # find the model in leaderboard and update the full_train to True, and update the model object in there
                 for model_info in self.__model_training_info:
@@ -975,8 +1110,13 @@ def _predict_helper(
             if not already_trained:
                 self.__logger.info("Training the model using the whole data")
                 self.feature_engineer.setup(data=self.data)
-                X_train, y_train = self.feature_engineer.fit_transform()
-                model.fit(X_train, y_train)
+                
+                # Get raw categoricals before encoding
+                X_cat_raw = self._get_raw_categorical_data(self.data)
+                
+                X_train_encoded, y_train = self.feature_engineer.fit_transform()
+                X_train_final = self._prepare_data_for_model(model_name, X_train_encoded, X_cat_raw)
+                self._fit_model(model, X_train_final, y_train, model_name)
 
                 # find the model in leaderboard and update the full_train to True, and update the model object in there
                 for model_info in self.__model_training_info:
@@ -987,10 +1127,11 @@ def _predict_helper(
                             break
                 # Update leaderboard
                 self.get_best_models()
-            X_test = self.feature_engineer.transform(test_data)
 
-        else:
-            X_test = self.feature_engineer.transform(test_data)
+        # Transform test data and prepare for model
+        X_test_encoded = self.feature_engineer.transform(test_data)
+        X_test_cat_raw = self._get_raw_categorical_data(test_data)
+        X_test = self._prepare_data_for_model(model_name, X_test_encoded, X_test_cat_raw)
 
         return model, X_test
 
@@ -1067,7 +1208,13 @@ def __add_holdout_model_to_stats(self, model: object, model_name: Optional[str]
             model_name = model.__class__.__name__
 
         model_copy = deepcopy(model)
-        model_copy.fit(self.X_train, self.y_train)
+        
+        # Prepare holdout data for this model (use raw categoricals for native-categorical models)
+        X_train_final = self._prepare_data_for_model(model_name, self.X_train, self.X_train_cat_raw)
+        
+        # Fit with proper categorical handling
+        self._fit_model(model_copy, X_train_final, self.y_train, model_name)
+        
         self._holdout_model_objects[model_name] = model_copy
         return model_copy
     
@@ -1147,11 +1294,15 @@ def plot(self, model: Optional[Union[str, object]] = None, kind: str = "feature_
         else:
             model = self.__add_holdout_model_to_stats(model, model_name)
 
-        # If kind expects predictions
+        # Prepare holdout data for this model (use raw categoricals for native-categorical models)
+        X_train_final = self._prepare_data_for_model(model_name, self.X_train, self.X_train_cat_raw)
+        X_test_final = self._prepare_data_for_model(model_name, self.X_test, self.X_test_cat_raw)
+
+        # If kind expects predictions
         if kind in ["confusion_matrix"]:
-            preds = model.predict(self.X_test)
+            preds = model.predict(X_test_final)
         elif kind in ["roc_curve", "calibration_curve"]:
-            preds = model.predict_proba(self.X_test)
+            preds = model.predict_proba(X_test_final)
 
         graph = None
 
@@ -1164,13 +1315,13 @@ def plot(self, model: Optional[Union[str, object]] = None, kind: str = "feature_
         elif kind == "roc_curve":
             graph = plot_roc_curve(self.y_test, preds, self.y_class_mapping, **kwargs)
         elif kind == "residuals":
-            graph = plot_residuals(model, self.X_train, self.y_train, self.X_test, self.y_test, **kwargs)
+            graph = plot_residuals(model, X_train_final, self.y_train, X_test_final, self.y_test, **kwargs)
         elif kind == "prediction_error":
-            graph = plot_prediction_error(model, self.X_train, self.y_train, self.X_test, self.y_test, **kwargs)
+            graph = plot_prediction_error(model, X_train_final, self.y_train, X_test_final, self.y_test, **kwargs)
         elif kind == "calibration_curve":
             graph = plot_calibration_curve(self.y_test, preds, self.y_class_mapping, **kwargs)
         elif 'shap' in kind:
-            graph = plot_shap(model, self.X_test, kind, **kwargs)
+            graph = plot_shap(model, X_test_final, kind, **kwargs)
         else:
             error_msg = f"Invalid plot type: {kind}. Available plot types: {available_plot_types}"
             self.__logger.error(error_msg)

From 97a82d2ab0ceef41857bd4a9d10ae835489c242e Mon Sep 17 00:00:00 2001
From: Ozgur Aslan <56040583+ozguraslank@users.noreply.github.com>
Date: Sat, 10 Jan 2026 18:29:53 +0300
Subject: [PATCH 02/14] Native categorical support part 2

---
 flexml/_feature_engineer.py          | 27 +++++++++++++++++++
 flexml/_model_tuner.py               |  6 ++++-
 flexml/structures/supervised_base.py | 40 ++++++++++++++++++++++++++--
 3 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/flexml/_feature_engineer.py b/flexml/_feature_engineer.py
index f2d8897..e306e33 100644
--- a/flexml/_feature_engineer.py
+++ b/flexml/_feature_engineer.py
@@ -27,6 +27,33 @@ def transform(self, X):
             A DataFrame with the specified columns dropped
         """
         return X.drop(columns=self.drop_columns, axis=1, errors='ignore')
+
+
+class CategoricalTypeConverter(BaseEstimator, TransformerMixin):
+    """
+    A transformer to convert categorical columns to 'category' dtype.
+    Used for tree-based models that support native categorical features.
+    """
+    def __init__(self, categorical_columns: Optional[List[str]] = None):
+        self.categorical_columns = categorical_columns or []
+
+    def fit(self, X, y=None):
+        return self
+
+    def transform(self, X):
+        """
+        Converts specified categorical columns to 'category' dtype
+        
+        Returns
+        -------
+        pd.DataFrame
+            A DataFrame with categorical columns converted to 'category' dtype
+        """
+        X = X.copy()
+        for col in self.categorical_columns:
+            if col in X.columns:
+                X[col] = X[col].astype('category')
+        return X
     
 
 class ColumnImputer(BaseEstimator, TransformerMixin):
diff --git a/flexml/_model_tuner.py b/flexml/_model_tuner.py
index 6153719..c769c67 100644
--- a/flexml/_model_tuner.py
+++ b/flexml/_model_tuner.py
@@ -204,7 +204,11 @@ def _setup_tuning(
             model = model.named_steps['model']
 
         if "CatBoost" in model.__class__.__name__:
-            model_params = model.get_all_params()
+            # Use get_all_params() only if the model is fitted, otherwise use get_params()
+            if model.is_fitted():
+                model_params = model.get_all_params()
+            else:
+                model_params = model.get_params()
         else:
             model_params = model.get_params()
         
diff --git a/flexml/structures/supervised_base.py b/flexml/structures/supervised_base.py
index 9c797f4..ef4b41f 100644
--- a/flexml/structures/supervised_base.py
+++ b/flexml/structures/supervised_base.py
@@ -1020,6 +1020,14 @@ def save_model(
             
             return model
 
+        # Warn users about native categorical models in Pipeline mode
+        if model_name in NATIVE_CATEGORICAL_MODELS and hasattr(self, 'categorical_columns') and self.categorical_columns:
+            self.__logger.warning(
+                f"'{model_name}' supports native categorical features, but Pipeline mode encodes categorical data. "
+                f"For optimal performance, consider using 'model_only=True' and handle feature engineering separately, "
+                f"or use the 'predict()' method directly which handles native categoricals automatically."
+            )
+
         # Add the model to the pipeline
         pipeline_steps.append(('model', model))
 
@@ -1749,8 +1757,36 @@ def _show_tuning_report(tuning_report: Optional[dict] = None):
                 y_encoded = self.y # No need to encode the target for regression or if the target is already encoded
             self.model_tuner = ModelTuner(self.__ML_TASK_TYPE, self.X, y_encoded, self.logging_to_file)
 
-        pipeline = self.feature_engineer.pipeline
-        pipeline = Pipeline(steps=pipeline.steps + [('model', model)])
+        # Get model name for native categorical check
+        model_name = model.__class__.__name__
+        
+        # Check if model supports native categorical features
+        if model_name in NATIVE_CATEGORICAL_MODELS and hasattr(self, 'categorical_columns') and self.categorical_columns:
+            # Clone the model to avoid modifying the fitted model
+            from sklearn.base import clone
+            model = clone(model)
+            
+            # For CatBoost, set cat_features parameter on the cloned model
+            if 'CatBoost' in model_name:
+                model.set_params(cat_features=list(self.categorical_columns))
+            
+            # Create pipeline WITHOUT the encoder step (keep other steps like imputer, normalizer)
+            # and add a step to convert categoricals to 'category' dtype
+            from flexml._feature_engineer import CategoricalTypeConverter
+            pipeline_steps_without_encoder = [
+                (name, step) for name, step in self.feature_engineer.pipeline.steps 
+                if name != 'encoder'
+            ]
+            # Add categorical type converter for native categorical models
+            pipeline_steps_without_encoder.append(
+                ('cat_type_converter', CategoricalTypeConverter(list(self.categorical_columns)))
+            )
+            pipeline = Pipeline(steps=pipeline_steps_without_encoder + [('model', model)])
+            
+            self.__logger.info(f"Using native categorical features for {model_name} during tuning (encoding step removed)")
+        else:
+            # Standard pipeline with encoding for non-native categorical models
+            pipeline = Pipeline(steps=self.feature_engineer.pipeline.steps + [('model', model)])
 
         self.__logger.info(f"[PROCESS] Model Tuning process started with '{tuning_method}' method")
         tuning_method = tuning_method.lower()

From a27e1eac27beda39de3e31eb93ac1dc7fa255af1 Mon Sep 17 00:00:00 2001
From: Ozgur Aslan <ozguraslank@gmail.com>
Date: Sat, 10 Jan 2026 22:48:11 +0300
Subject: [PATCH 03/14] Native categorical support part 3

---
 flexml/_model_tuner.py               |  8 +-------
 flexml/structures/supervised_base.py | 24 +++++++-----------------
 2 files changed, 8 insertions(+), 24 deletions(-)

diff --git a/flexml/_model_tuner.py b/flexml/_model_tuner.py
index c769c67..a8bc0cd 100644
--- a/flexml/_model_tuner.py
+++ b/flexml/_model_tuner.py
@@ -198,17 +198,11 @@ def _setup_tuning(
             
             * 'tuned_model_evaluation_metric': The evaluation metric that is used to evaluate the tuned model
         """
-        model_params = None
-        
         if isinstance(model, Pipeline):
             model = model.named_steps['model']
 
         if "CatBoost" in model.__class__.__name__:
-            # Use get_all_params() only if the model is fitted, otherwise use get_params()
-            if model.is_fitted():
-                model_params = model.get_all_params()
-            else:
-                model_params = model.get_params()
+            model_params = model.get_all_params()
         else:
             model_params = model.get_params()
         
diff --git a/flexml/structures/supervised_base.py b/flexml/structures/supervised_base.py
index ef4b41f..649eaa0 100644
--- a/flexml/structures/supervised_base.py
+++ b/flexml/structures/supervised_base.py
@@ -34,7 +34,7 @@
 )
 from flexml.structures.custom_score import CustomScore
 from flexml._model_tuner import ModelTuner
-from flexml._feature_engineer import FeatureEngineering
+from flexml._feature_engineer import FeatureEngineering, CategoricalTypeConverter
 
 import warnings
 warnings.filterwarnings("ignore")
@@ -493,7 +493,10 @@ def _fit_model(
             model_name = model.__class__.__name__
         
         if 'CatBoost' in model_name and hasattr(self, 'categorical_columns') and self.categorical_columns:
-            model.fit(X, y, cat_features=self.categorical_columns)
+            # check if model is fitted:
+            if not model.is_fitted():
+                model.set_params(cat_features=list(self.categorical_columns))
+            model.fit(X, y)
         else:
             model.fit(X, y)
     
@@ -1759,20 +1762,9 @@ def _show_tuning_report(tuning_report: Optional[dict] = None):
 
         # Get model name for native categorical check
         model_name = model.__class__.__name__
-        
+
         # Check if model supports native categorical features
-        if model_name in NATIVE_CATEGORICAL_MODELS and hasattr(self, 'categorical_columns') and self.categorical_columns:
-            # Clone the model to avoid modifying the fitted model
-            from sklearn.base import clone
-            model = clone(model)
-            
-            # For CatBoost, set cat_features parameter on the cloned model
-            if 'CatBoost' in model_name:
-                model.set_params(cat_features=list(self.categorical_columns))
-            
-            # Create pipeline WITHOUT the encoder step (keep other steps like imputer, normalizer)
-            # and add a step to convert categoricals to 'category' dtype
-            from flexml._feature_engineer import CategoricalTypeConverter
+        if model_name in NATIVE_CATEGORICAL_MODELS and hasattr(self, 'categorical_columns') and len(self.categorical_columns) > 0:
             pipeline_steps_without_encoder = [
                 (name, step) for name, step in self.feature_engineer.pipeline.steps 
                 if name != 'encoder'
@@ -1782,8 +1774,6 @@ def _show_tuning_report(tuning_report: Optional[dict] = None):
                 ('cat_type_converter', CategoricalTypeConverter(list(self.categorical_columns)))
             )
             pipeline = Pipeline(steps=pipeline_steps_without_encoder + [('model', model)])
-            
-            self.__logger.info(f"Using native categorical features for {model_name} during tuning (encoding step removed)")
         else:
             # Standard pipeline with encoding for non-native categorical models
             pipeline = Pipeline(steps=self.feature_engineer.pipeline.steps + [('model', model)])

From e79cb86e1913516fdebe64dde5f0fec3d069ee47 Mon Sep 17 00:00:00 2001
From: Ozgur Aslan <ozguraslank@gmail.com>
Date: Sun, 11 Jan 2026 17:23:17 +0300
Subject: [PATCH 04/14] Native categorical support part 4

---
 flexml/config/__init__.py            |   1 +
 flexml/config/supervised_config.py   |   8 +
 flexml/structures/supervised_base.py | 395 +++++++++++++++------------
 3 files changed, 226 insertions(+), 178 deletions(-)

diff --git a/flexml/config/__init__.py b/flexml/config/__init__.py
index f56a684..2346f85 100644
--- a/flexml/config/__init__.py
+++ b/flexml/config/__init__.py
@@ -3,6 +3,7 @@
 )
 
 from flexml.config.supervised_config import (
+    NATIVE_CATEGORICAL_MODELS,
     EVALUATION_METRICS,
     TUNING_METRIC_TRANSFORMATIONS,
     CROSS_VALIDATION_METHODS,
diff --git a/flexml/config/supervised_config.py b/flexml/config/supervised_config.py
index 9c61dca..25b066e 100644
--- a/flexml/config/supervised_config.py
+++ b/flexml/config/supervised_config.py
@@ -1,3 +1,11 @@
+# Models that support native categorical features
+NATIVE_CATEGORICAL_MODELS = {
+    'CatBoostRegressor', 'CatBoostClassifier',
+    'LGBMRegressor', 'LGBMClassifier', 
+    'XGBRegressor', 'XGBClassifier',
+    'HistGradientBoostingRegressor', 'HistGradientBoostingClassifier'
+}
+
 # Regression & Classification Evaluation Metrics
 EVALUATION_METRICS = {
     "Regression": {"DEFAULT": "R2",
diff --git a/flexml/structures/supervised_base.py b/flexml/structures/supervised_base.py
index 649eaa0..bd5709f 100644
--- a/flexml/structures/supervised_base.py
+++ b/flexml/structures/supervised_base.py
@@ -12,6 +12,7 @@
 from flexml.logger import get_logger
 from flexml.config import (
     get_ml_models,
+    NATIVE_CATEGORICAL_MODELS,
     EVALUATION_METRICS,
     CROSS_VALIDATION_METHODS,
     PLOT_TYPES
@@ -41,33 +42,6 @@
 pd.set_option('display.max_columns', None)
 
 
-# Models that support native categorical features (no encoding needed)
-NATIVE_CATEGORICAL_MODELS = {
-    'CatBoostRegressor', 'CatBoostClassifier',
-    'LGBMRegressor', 'LGBMClassifier', 
-    'XGBRegressor', 'XGBClassifier',
-    'HistGradientBoostingRegressor', 'HistGradientBoostingClassifier'  # sklearn also supports!
-}
-
-def _get_encoded_categorical_columns(encoded_columns, original_cat_cols):
-    """
-    Identifies which columns in the encoded dataframe came from categorical encoding.
-    Handles both label encoding (same name) and one-hot encoding (prefix_value format).
-    """
-    encoded_cat_cols = []
-    for col in encoded_columns:
-        # Check if it's the original column name (label/ordinal encoding)
-        if col in original_cat_cols:
-            encoded_cat_cols.append(col)
-        else:
-            # Check if it's a one-hot encoded column (format: original_value)
-            for orig_col in original_cat_cols:
-                if col.startswith(f"{orig_col}_"):
-                    encoded_cat_cols.append(col)
-                    break
-    return encoded_cat_cols
-
-
 class SupervisedBase:
     """
     Base class for Supervised tasks (regression & classification)
@@ -354,14 +328,15 @@ def __prepare_holdout_data(self, test_size: Optional[float] = None):
         self.feature_engineer.setup(data=train_data)
         self.categorical_columns = self.feature_engineer.categorical_columns
         
-        # Store raw categorical columns for holdout data (for native-categorical models)
-        self.X_train_cat_raw = self._get_raw_categorical_data(train_data)
-        self.X_test_cat_raw = self._get_raw_categorical_data(test_data)
-
-        self.X_train, self.y_train = self.feature_engineer.fit_transform()
-        self.X_test, self.y_test = self.feature_engineer.transform(test_data=test_data, y_included=True)
-        self.encoded_categorical_columns = _get_encoded_categorical_columns(self.X_train.columns, self.categorical_columns)
-        self.feature_names = list(self.X_train.columns)
+        # Store raw holdout data (preprocessing will be done per-model when needed)
+        self.X_train_raw = train_data.drop(columns=[self.target_col])
+        self.X_test_raw = test_data.drop(columns=[self.target_col])
+        self.y_train, self.y_test = self._encode_target(
+            train_data[self.target_col], 
+            test_data[self.target_col]
+        )
+        
+        self.feature_names = list(self.X_train_raw.columns)
         self.y_class_mapping = self.feature_engineer.y_class_mapping
         
     def __prepare_models(self, experiment_size: str, num_class: int, random_state: Optional[int] = None, n_jobs: Optional[int] = -1):
@@ -413,60 +388,6 @@ def __top_n_models_checker(self, top_n_models: Optional[int]) -> int:
         
         return top_n_models
     
-    def _get_raw_categorical_data(self, data: pd.DataFrame) -> Optional[pd.DataFrame]:
-        """
-        Extract raw categorical columns from data and convert to category dtype.
-        Used to preserve original categoricals before encoding for native-categorical models.
-        
-        Parameters
-        ----------
-        data : pd.DataFrame
-            The data containing categorical columns
-            
-        Returns
-        -------
-        Optional[pd.DataFrame]
-            DataFrame with categorical columns converted to 'category' dtype, or None if no categorical columns
-        """
-        if not hasattr(self, 'categorical_columns') or not self.categorical_columns:
-            return None
-        cat_df = data[self.categorical_columns].copy()
-        for col in self.categorical_columns:
-            cat_df[col] = cat_df[col].astype('category')
-        return cat_df
-    
-    def _prepare_data_for_model(
-        self, 
-        model_name: str, 
-        X_encoded: pd.DataFrame, 
-        X_cat_raw: Optional[pd.DataFrame] = None
-    ) -> pd.DataFrame:
-        """
-        Prepares data for a specific model by swapping encoded categoricals
-        with raw ones for models that support native categorical features.
-        
-        Parameters
-        ----------
-        model_name : str
-            The name of the model
-        X_encoded : pd.DataFrame
-            The encoded feature data
-        X_cat_raw : Optional[pd.DataFrame]
-            The raw categorical columns (with 'category' dtype)
-            
-        Returns
-        -------
-        pd.DataFrame
-            Prepared feature data for the model
-        """
-        if model_name in NATIVE_CATEGORICAL_MODELS and X_cat_raw is not None and hasattr(self, 'encoded_categorical_columns') and self.encoded_categorical_columns:
-            # Drop encoded categorical columns
-            X_final = X_encoded.drop(columns=self.encoded_categorical_columns, errors='ignore')
-            # Merge with raw categorical columns (aligned by index)
-            X_final = pd.concat([X_final, X_cat_raw.loc[X_final.index]], axis=1)
-            return X_final
-        return X_encoded
-    
     def _fit_model(
         self, 
         model: object, 
@@ -500,6 +421,58 @@ def _fit_model(
         else:
             model.fit(X, y)
     
+    def _encode_target(
+        self, 
+        y: pd.Series, 
+        y_test: Optional[pd.Series] = None,
+        fit: bool = True
+    ) -> Union[pd.Series, tuple]:
+        """
+        Encodes the target variable for classification tasks.
+        
+        Parameters
+        ----------
+        y : pd.Series
+            The target variable to encode
+        y_test : pd.Series, optional
+            Test target to transform (uses already fitted encoder)
+        fit : bool
+            If True, fits the encoder on y. If False, only transforms.
+            
+        Returns
+        -------
+        pd.Series or tuple
+            Encoded y, or (encoded_y, encoded_y_test) if y_test provided
+        """
+        # Skip encoding for regression or already numeric targets
+        if self.__ML_TASK_TYPE != 'Classification' or y.dtype not in ['object', 'category']:
+            return (y, y_test) if y_test is not None else y
+        
+        # Encode y
+        if fit:
+            encoded_y = pd.Series(
+                self.feature_engineer.target_encoder.fit_transform(y),
+                name=y.name,
+                index=y.index
+            )
+        else:
+            encoded_y = pd.Series(
+                self.feature_engineer.target_encoder.transform(y),
+                name=y.name,
+                index=y.index
+            )
+        
+        # Encode y_test if provided
+        if y_test is not None:
+            encoded_y_test = pd.Series(
+                self.feature_engineer.target_encoder.transform(y_test),
+                name=y_test.name,
+                index=y_test.index
+            )
+            return encoded_y, encoded_y_test
+        
+        return encoded_y
+    
     def __process_experiment_result(self, experiment_stats: dict):
         """
         Processes and aggregates the results of an experiment, calculating average metrics and selecting the best model.
@@ -745,13 +718,14 @@ def start_experiment(
                 ], axis=1)
                 
                 self.feature_engineer.setup(data=train_data)
-
-                # Save raw categorical columns BEFORE encoding (for native-categorical models)
-                X_train_cat_raw = self._get_raw_categorical_data(train_data)
-                X_test_cat_raw = self._get_raw_categorical_data(test_data)
                 
-                X_train_encoded, y_train = self.feature_engineer.fit_transform()
-                X_test_encoded, y_test = self.feature_engineer.transform(test_data=test_data, y_included=True)
+                # Get raw X and y from train/test data
+                X_train_raw = train_data.drop(columns=[self.target_col])
+                X_test_raw = test_data.drop(columns=[self.target_col])
+                y_train, y_test = self._encode_target(
+                    train_data[self.target_col],
+                    test_data[self.target_col]
+                )
 
                 for model_idx in range(len(self.__ML_MODELS)):
                     model_info = self.__ML_MODELS[model_idx]
@@ -763,9 +737,12 @@ def start_experiment(
 
                     model = model_info['model']
 
-                    # Prepare data based on model type (native categorical vs encoded)
-                    X_train_final = self._prepare_data_for_model(model_name, X_train_encoded, X_train_cat_raw)
-                    X_test_final = self._prepare_data_for_model(model_name, X_test_encoded, X_test_cat_raw)
+                    # Get preprocessing pipeline for this specific model
+                    preprocessing_pipeline = self._get_model_pipeline(model, include_model=False)
+                    
+                    # Transform data using model-specific preprocessing
+                    X_train_final = preprocessing_pipeline.fit_transform(X_train_raw)
+                    X_test_final = preprocessing_pipeline.transform(X_test_raw)
 
                     try:
                         all_metrics = []
@@ -974,32 +951,51 @@ def save_model(
                 raise ValueError(error_msg)
         else: # If model is an object, we can't know its name, so we use its class name
             model_name = model.__class__.__name__
-            
-        # Initialize pipeline steps
-        pipeline_steps = []
-
-        # Initialize and setup feature engineering if needed
-        if not model_only:
-            # Add the feature engineering pipeline directly
-            pipeline_steps.extend(self.feature_engineer.pipeline.steps)
 
         # Handle full training scenario if required
         if full_train:
             already_trained = self._check_if_model_is_full_trained(model_name, model_taken_from_leaderboard)
-            if not already_trained:
-                self.__logger.info("Training the model using the whole data")
+            
+            # Check if this is a native categorical model that will use a different pipeline structure
+            is_native_cat_model = (
+                model_name in NATIVE_CATEGORICAL_MODELS and 
+                hasattr(self, 'categorical_columns') and 
+                len(self.categorical_columns) > 0 and
+                not model_only  # Only use special flow if we're saving a pipeline
+            )
+            
+            # For native categorical models being saved as pipeline:
+            # ALWAYS retrain using the pipeline structure, even if previously "full trained"
+            # because the previous training used encode→swap, not the pipeline structure
+            needs_training = not already_trained or is_native_cat_model
+            
+            if needs_training:
+                if is_native_cat_model and already_trained:
+                    self.__logger.info(
+                        f"Retraining '{model_name}' to match pipeline structure for native categorical support."
+                    )
+                else:
+                    self.__logger.info("Training the model using the whole data")
+                    
                 self.feature_engineer.setup(data=self.data)
                 
-                # Get raw categoricals before encoding
-                X_cat_raw = self._get_raw_categorical_data(self.data)
+                # Get preprocessing pipeline for this model
+                preprocessing_pipeline = self._get_model_pipeline(model, include_model=False)
                 
-                X_train_encoded, y_train = self.feature_engineer.fit_transform()
+                # Prepare training data
+                X_raw = self.data.drop(columns=[self.target_col])
+                y_train = self._encode_target(self.data[self.target_col])
                 
-                # Prepare data for this specific model
-                X_train_final = self._prepare_data_for_model(model_name, X_train_encoded, X_cat_raw)
+                # Fit and transform data through the preprocessing pipeline
+                X_train_final = preprocessing_pipeline.fit_transform(X_raw)
                 
-                # Fit with proper cat_features handling
+                # Fit model with proper cat_features handling
                 self._fit_model(model, X_train_final, y_train, model_name)
+                
+                if is_native_cat_model:
+                    self.__logger.info(f"Model '{model_name}' trained using native categorical pipeline.")
+                else:
+                    self.__logger.info(f"Model '{model_name}' trained with full data.")
 
                 # find the model in leaderboard and update the full_train to True, and update the model object in there
                 for model_info in self.__model_training_info:
@@ -1023,19 +1019,8 @@ def save_model(
             
             return model
 
-        # Warn users about native categorical models in Pipeline mode
-        if model_name in NATIVE_CATEGORICAL_MODELS and hasattr(self, 'categorical_columns') and self.categorical_columns:
-            self.__logger.warning(
-                f"'{model_name}' supports native categorical features, but Pipeline mode encodes categorical data. "
-                f"For optimal performance, consider using 'model_only=True' and handle feature engineering separately, "
-                f"or use the 'predict()' method directly which handles native categoricals automatically."
-            )
-
-        # Add the model to the pipeline
-        pipeline_steps.append(('model', model))
-
-        # Create the pipeline
-        pipeline = Pipeline(pipeline_steps)
+        # Build pipeline with proper handling for native categorical models
+        pipeline = self._get_model_pipeline(model, include_model=True)
 
         # Save the pipeline
         try:
@@ -1074,6 +1059,68 @@ def _check_if_model_is_full_trained(self, model_name: str, model_taken_from_lead
                     return True
         return False
 
+    def _is_native_categorical_model(self, model_name: str) -> bool:
+        """Check if model supports native categorical features."""
+        return (
+            model_name in NATIVE_CATEGORICAL_MODELS and 
+            hasattr(self, 'categorical_columns') and 
+            len(self.categorical_columns) > 0
+        )
+    
+    def _get_preprocessing_steps(self, model_name: str) -> list:
+        """
+        Returns the appropriate preprocessing steps for a given model.
+        
+        For native categorical models: no encoder, uses CategoricalTypeConverter
+        For other models: includes encoder
+        
+        Parameters
+        ----------
+        model_name : str
+            The name of the model
+            
+        Returns
+        -------
+        list
+            List of preprocessing steps as (name, transformer) tuples
+        """
+        if self._is_native_categorical_model(model_name):
+            # Pipeline without encoder, with CategoricalTypeConverter
+            steps = [
+                (name, step) for name, step in self.feature_engineer.pipeline.steps 
+                if name != 'encoder'
+            ]
+            steps.append(('cat_type_converter', CategoricalTypeConverter(list(self.categorical_columns))))
+        else:
+            # Standard pipeline with encoder
+            steps = list(self.feature_engineer.pipeline.steps)
+        
+        return steps
+    
+    def _get_model_pipeline(self, model, include_model: bool = True) -> Pipeline:
+        """
+        Returns a complete Pipeline for a given model.
+        
+        Parameters
+        ----------
+        model : object
+            The model object
+        include_model : bool, optional
+            Whether to include the model as the last step (default: True)
+            
+        Returns
+        -------
+        Pipeline
+            sklearn Pipeline with preprocessing steps (and optionally the model)
+        """
+        model_name = model.__class__.__name__
+        steps = self._get_preprocessing_steps(model_name)
+        
+        if include_model:
+            steps.append(('model', model))
+        
+        return Pipeline(steps)
+
     def _predict_helper(
         self,
         test_data: pd.DataFrame,
@@ -1097,8 +1144,8 @@ def _predict_helper(
             if extra: error_msg += f" Extra: {extra}."
             raise ValueError(error_msg)
         
-        model_taken_from_leaderboard = False # If the model object is from leaderboard, track this
-
+        # Get model from leaderboard or use provided model
+        model_taken_from_leaderboard = False
         if model is None:
             model = self.get_best_models()
             model_name = self.__last_searched_model_name
@@ -1111,38 +1158,38 @@ def _predict_helper(
             model_name = model
             model = self.get_model_by_name(model)
             model_taken_from_leaderboard = True
-        else: # If model is an object, we can't know its name, so we use its class name
+        else:
             model_name = model.__class__.__name__
         
-        # Prepare training data if needed
+        # Get the preprocessing pipeline for this model (consistent with save_model)
+        self.feature_engineer.setup(data=self.data)
+        preprocessing_pipeline = self._get_model_pipeline(model, include_model=False)
+        
+        # Train model on full data if needed
         if full_train:
-            # Check If model_taken_from_leaderboard is True and Full Train in self.__model_training_info is True, then we don't need to train the model again
             already_trained = self._check_if_model_is_full_trained(model_name, model_taken_from_leaderboard)
             if not already_trained:
                 self.__logger.info("Training the model using the whole data")
-                self.feature_engineer.setup(data=self.data)
                 
-                # Get raw categoricals before encoding
-                X_cat_raw = self._get_raw_categorical_data(self.data)
+                # Prepare training data
+                X_raw = self.data.drop(columns=[self.target_col])
+                y_train = self._encode_target(self.data[self.target_col])
                 
-                X_train_encoded, y_train = self.feature_engineer.fit_transform()
-                X_train_final = self._prepare_data_for_model(model_name, X_train_encoded, X_cat_raw)
+                # Fit and transform through preprocessing pipeline
+                X_train_final = preprocessing_pipeline.fit_transform(X_raw)
                 self._fit_model(model, X_train_final, y_train, model_name)
 
-                # find the model in leaderboard and update the full_train to True, and update the model object in there
+                # Update leaderboard
                 for model_info in self.__model_training_info:
                     for name, info in model_info.items():
                         if name == model_name:
                             info["model_stats"]["Full Train"] = True
                             info["model"] = model
                             break
-                # Update leaderboard
                 self.get_best_models()
 
-        # Transform test data and prepare for model
-        X_test_encoded = self.feature_engineer.transform(test_data)
-        X_test_cat_raw = self._get_raw_categorical_data(test_data)
-        X_test = self._prepare_data_for_model(model_name, X_test_encoded, X_test_cat_raw)
+        # Transform test data through the same preprocessing pipeline
+        X_test = preprocessing_pipeline.transform(test_data)
 
         return model, X_test
 
@@ -1209,7 +1256,8 @@ def predict_proba(
         model, X_test = self._predict_helper(test_data, model, full_train)
         return model.predict_proba(X_test)
 
-    def __get_holdout_model_from_stats(self, model_name: str) -> object:
+    def __get_holdout_model_from_stats(self, model_name: str) -> Optional[dict]:
+        """Returns dict with 'model' and 'preprocessing_pipeline' keys, or None."""
         if self._holdout_model_objects is None or self._holdout_model_objects == {}:
             return None
         return self._holdout_model_objects.get(model_name)
@@ -1220,13 +1268,18 @@ def __add_holdout_model_to_stats(self, model: object, model_name: Optional[str]
 
         model_copy = deepcopy(model)
         
-        # Prepare holdout data for this model (use raw categoricals for native-categorical models)
-        X_train_final = self._prepare_data_for_model(model_name, self.X_train, self.X_train_cat_raw)
+        # Get preprocessing pipeline for this model
+        preprocessing_pipeline = self._get_model_pipeline(model_copy, include_model=False)
         
-        # Fit with proper categorical handling
+        # Transform holdout training data and fit model
+        X_train_final = preprocessing_pipeline.fit_transform(self.X_train_raw)
         self._fit_model(model_copy, X_train_final, self.y_train, model_name)
         
-        self._holdout_model_objects[model_name] = model_copy
+        # Store the fitted preprocessing pipeline with the model for later use
+        self._holdout_model_objects[model_name] = {
+            'model': model_copy,
+            'preprocessing_pipeline': preprocessing_pipeline
+        }
         return model_copy
     
     def plot(self, model: Optional[Union[str, object]] = None, kind: str = "feature_importance", **kwargs):
@@ -1297,17 +1350,22 @@ def plot(self, model: Optional[Union[str, object]] = None, kind: str = "feature_
         elif isinstance(model, str):
             model_name = model
             model = self.get_model_by_name(model)
-        else: # If model is an object, we can't know its name, so we use its class name
+        else:
             model_name = model.__class__.__name__
         
-        if self.__get_holdout_model_from_stats(model_name) is not None:
-            model = self.__get_holdout_model_from_stats(model_name)
+        # Get or create holdout model with its preprocessing pipeline
+        holdout_data = self.__get_holdout_model_from_stats(model_name)
+        if holdout_data is not None:
+            model = holdout_data['model']
+            preprocessing_pipeline = holdout_data['preprocessing_pipeline']
         else:
             model = self.__add_holdout_model_to_stats(model, model_name)
+            holdout_data = self.__get_holdout_model_from_stats(model_name)
+            preprocessing_pipeline = holdout_data['preprocessing_pipeline']
 
-        # Prepare holdout data for this model (use raw categoricals for native-categorical models)
-        X_train_final = self._prepare_data_for_model(model_name, self.X_train, self.X_train_cat_raw)
-        X_test_final = self._prepare_data_for_model(model_name, self.X_test, self.X_test_cat_raw)
+        # Transform holdout data using the model's preprocessing pipeline
+        X_train_final = preprocessing_pipeline.transform(self.X_train_raw)
+        X_test_final = preprocessing_pipeline.transform(self.X_test_raw)
 
         # If kind expects predictions
         if kind in ["confusion_matrix"]:
@@ -1319,7 +1377,7 @@ def plot(self, model: Optional[Union[str, object]] = None, kind: str = "feature_
 
         if kind == "feature_importance":
             if not hasattr(self, 'feature_names'):
-                self.feature_names = list(self.X_train.columns)
+                self.feature_names = list(self.X_train_raw.columns)
             graph = plot_feature_importance(model, self.feature_names, **kwargs)
         elif kind == "confusion_matrix":
             graph = plot_confusion_matrix(self.y_test, preds, self.y_class_mapping, **kwargs)
@@ -1751,32 +1809,13 @@ def _show_tuning_report(tuning_report: Optional[dict] = None):
                 logging_to_file=self.logging_to_file
             ))
 
-        # Create the ModelTuner object If It's not created before, avoid creating it everytime tune_model() function is called
+        # Create the ModelTuner object If It's not created before
         if not hasattr(self, 'model_tuner'):
-            if self.__ML_TASK_TYPE == 'Classification' and self.y.dtype in ['object', 'category']:
-                y_encoded = pd.Series(self.feature_engineer.target_encoder.fit_transform(self.y), name=self.target_col)
-                y_encoded.index = self.y.index
-            else:
-                y_encoded = self.y # No need to encode the target for regression or if the target is already encoded
+            y_encoded = self._encode_target(self.y)
             self.model_tuner = ModelTuner(self.__ML_TASK_TYPE, self.X, y_encoded, self.logging_to_file)
 
-        # Get model name for native categorical check
-        model_name = model.__class__.__name__
-
-        # Check if model supports native categorical features
-        if model_name in NATIVE_CATEGORICAL_MODELS and hasattr(self, 'categorical_columns') and len(self.categorical_columns) > 0:
-            pipeline_steps_without_encoder = [
-                (name, step) for name, step in self.feature_engineer.pipeline.steps 
-                if name != 'encoder'
-            ]
-            # Add categorical type converter for native categorical models
-            pipeline_steps_without_encoder.append(
-                ('cat_type_converter', CategoricalTypeConverter(list(self.categorical_columns)))
-            )
-            pipeline = Pipeline(steps=pipeline_steps_without_encoder + [('model', model)])
-        else:
-            # Standard pipeline with encoding for non-native categorical models
-            pipeline = Pipeline(steps=self.feature_engineer.pipeline.steps + [('model', model)])
+        # Build pipeline with proper handling for native categorical models
+        pipeline = self._get_model_pipeline(model, include_model=True)
 
         self.__logger.info(f"[PROCESS] Model Tuning process started with '{tuning_method}' method")
         tuning_method = tuning_method.lower()

From 447d9f2299c358194d572d82641b46bf8c12015d Mon Sep 17 00:00:00 2001
From: Ozgur Aslan <ozguraslank@gmail.com>
Date: Sun, 11 Jan 2026 18:45:15 +0300
Subject: [PATCH 05/14] Delete unnecessary retrain in save_model

---
 flexml/structures/supervised_base.py | 34 ++++------------------------
 1 file changed, 5 insertions(+), 29 deletions(-)

diff --git a/flexml/structures/supervised_base.py b/flexml/structures/supervised_base.py
index bd5709f..aed3b4f 100644
--- a/flexml/structures/supervised_base.py
+++ b/flexml/structures/supervised_base.py
@@ -956,27 +956,9 @@ def save_model(
         if full_train:
             already_trained = self._check_if_model_is_full_trained(model_name, model_taken_from_leaderboard)
             
-            # Check if this is a native categorical model that will use a different pipeline structure
-            is_native_cat_model = (
-                model_name in NATIVE_CATEGORICAL_MODELS and 
-                hasattr(self, 'categorical_columns') and 
-                len(self.categorical_columns) > 0 and
-                not model_only  # Only use special flow if we're saving a pipeline
-            )
-            
-            # For native categorical models being saved as pipeline:
-            # ALWAYS retrain using the pipeline structure, even if previously "full trained"
-            # because the previous training used encode→swap, not the pipeline structure
-            needs_training = not already_trained or is_native_cat_model
-            
-            if needs_training:
-                if is_native_cat_model and already_trained:
-                    self.__logger.info(
-                        f"Retraining '{model_name}' to match pipeline structure for native categorical support."
-                    )
-                else:
-                    self.__logger.info("Training the model using the whole data")
-                    
+            if not already_trained:
+                self.__logger.info("Training the model using the whole data")
+                
                 self.feature_engineer.setup(data=self.data)
                 
                 # Get preprocessing pipeline for this model
@@ -989,22 +971,16 @@ def save_model(
                 # Fit and transform data through the preprocessing pipeline
                 X_train_final = preprocessing_pipeline.fit_transform(X_raw)
                 
-                # Fit model with proper cat_features handling
+                # Fit model
                 self._fit_model(model, X_train_final, y_train, model_name)
-                
-                if is_native_cat_model:
-                    self.__logger.info(f"Model '{model_name}' trained using native categorical pipeline.")
-                else:
-                    self.__logger.info(f"Model '{model_name}' trained with full data.")
 
-                # find the model in leaderboard and update the full_train to True, and update the model object in there
+                # Update leaderboard
                 for model_info in self.__model_training_info:
                     for name, info in model_info.items():
                         if name == model_name:
                             info["model_stats"]["Full Train"] = True
                             info["model"] = model
                             break
-                # Update leaderboard
                 self.get_best_models()
 
         # If no feature pipeline is included, return the model directly

From fe52039544931a650546ce41d8b820cad3df1b24 Mon Sep 17 00:00:00 2001
From: Ozgur Aslan <ozguraslank@gmail.com>
Date: Sun, 11 Jan 2026 18:59:37 +0300
Subject: [PATCH 06/14] Improve code quality

---
 flexml/structures/supervised_base.py | 56 +++++++++-------------------
 1 file changed, 18 insertions(+), 38 deletions(-)

diff --git a/flexml/structures/supervised_base.py b/flexml/structures/supervised_base.py
index aed3b4f..3609586 100644
--- a/flexml/structures/supervised_base.py
+++ b/flexml/structures/supervised_base.py
@@ -316,24 +316,17 @@ def __prepare_holdout_data(self, test_size: Optional[float] = None):
         )[0]
         train_labels, test_labels = holdout_cv_splits[0], holdout_cv_splits[1]
 
-        train_data = pd.concat([
-            self.X.loc[train_labels], 
-            self.y.loc[train_labels]
-        ], axis=1)
-        test_data = pd.concat([
-            self.X.loc[test_labels],
-            self.y.loc[test_labels]
-        ], axis=1)
-
+        # Setup feature engineer with train data
+        train_data = pd.concat([self.X.loc[train_labels], self.y.loc[train_labels]], axis=1)
         self.feature_engineer.setup(data=train_data)
         self.categorical_columns = self.feature_engineer.categorical_columns
         
-        # Store raw holdout data (preprocessing will be done per-model when needed)
-        self.X_train_raw = train_data.drop(columns=[self.target_col])
-        self.X_test_raw = test_data.drop(columns=[self.target_col])
+        # Store raw holdout data (use X/y directly instead of concat→drop)
+        self.X_train_raw = self.X.loc[train_labels]
+        self.X_test_raw = self.X.loc[test_labels]
         self.y_train, self.y_test = self._encode_target(
-            train_data[self.target_col], 
-            test_data[self.target_col]
+            self.y.loc[train_labels], 
+            self.y.loc[test_labels]
         )
         
         self.feature_names = list(self.X_train_raw.columns)
@@ -708,23 +701,16 @@ def start_experiment(
                     train_labels = train_idx
                     test_labels = test_idx
                 
-                train_data = pd.concat([
-                    self.X.loc[train_labels], 
-                    self.y.loc[train_labels]
-                ], axis=1)
-                test_data = pd.concat([
-                    self.X.loc[test_labels],
-                    self.y.loc[test_labels]
-                ], axis=1)
-                
+                # Setup feature engineer with train data
+                train_data = pd.concat([self.X.loc[train_labels], self.y.loc[train_labels]], axis=1)
                 self.feature_engineer.setup(data=train_data)
                 
-                # Get raw X and y from train/test data
-                X_train_raw = train_data.drop(columns=[self.target_col])
-                X_test_raw = test_data.drop(columns=[self.target_col])
+                # Use X/y directly instead of concat→drop
+                X_train_raw = self.X.loc[train_labels]
+                X_test_raw = self.X.loc[test_labels]
                 y_train, y_test = self._encode_target(
-                    train_data[self.target_col],
-                    test_data[self.target_col]
+                    self.y.loc[train_labels],
+                    self.y.loc[test_labels]
                 )
 
                 for model_idx in range(len(self.__ML_MODELS)):
@@ -964,12 +950,9 @@ def save_model(
                 # Get preprocessing pipeline for this model
                 preprocessing_pipeline = self._get_model_pipeline(model, include_model=False)
                 
-                # Prepare training data
-                X_raw = self.data.drop(columns=[self.target_col])
-                y_train = self._encode_target(self.data[self.target_col])
-                
                 # Fit and transform data through the preprocessing pipeline
-                X_train_final = preprocessing_pipeline.fit_transform(X_raw)
+                X_train_final = preprocessing_pipeline.fit_transform(self.X)
+                y_train = self._encode_target(self.y)
                 
                 # Fit model
                 self._fit_model(model, X_train_final, y_train, model_name)
@@ -1147,12 +1130,9 @@ def _predict_helper(
             if not already_trained:
                 self.__logger.info("Training the model using the whole data")
                 
-                # Prepare training data
-                X_raw = self.data.drop(columns=[self.target_col])
-                y_train = self._encode_target(self.data[self.target_col])
-                
                 # Fit and transform through preprocessing pipeline
-                X_train_final = preprocessing_pipeline.fit_transform(X_raw)
+                X_train_final = preprocessing_pipeline.fit_transform(self.X)
+                y_train = self._encode_target(self.y)
                 self._fit_model(model, X_train_final, y_train, model_name)
 
                 # Update leaderboard

From b108716f13c1f4dcff707c22775444f01b0b89a7 Mon Sep 17 00:00:00 2001
From: Ozgur Aslan <ozguraslank@gmail.com>
Date: Sun, 11 Jan 2026 19:02:45 +0300
Subject: [PATCH 07/14] Improve supervised test coverage

---
 tests/test_supervised.py | 46 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

diff --git a/tests/test_supervised.py b/tests/test_supervised.py
index 992760a..1cb787b 100644
--- a/tests/test_supervised.py
+++ b/tests/test_supervised.py
@@ -266,4 +266,48 @@ def test_25_plot_multiclass_classification_shap_summary(self):
 
     def test_26_plot_multiclass_classification_shap_violin(self):
         exp_obj = self.test_config['MulticlassClassification']['exp_obj']
-        exp_obj.plot("RandomForestClassifier", kind="shap_violin")
\ No newline at end of file
+        exp_obj.plot("RandomForestClassifier", kind="shap_violin")
+
+    def test_27_native_categorical_pipeline_consistency(self):
+        """Test that saved pipeline predictions match exp.predict_proba() for native categorical models"""
+        exp_obj = self.test_config['BinaryClassification']['exp_obj']
+        test_data = self.test_config['BinaryClassification'].get('data').drop(columns=['target'])
+        
+        # Get predictions via FlexML (this trains with full data)
+        flexml_probs = exp_obj.predict_proba(test_data, model='LGBMClassifier', full_train=True)
+        
+        # Save pipeline (should use already trained model, no retraining)
+        save_path = "test_native_cat_pipeline.pkl"
+        exp_obj.save_model(model='LGBMClassifier', save_path=save_path, model_only=False, full_train=True)
+        
+        # Load and predict via pipeline
+        with open(save_path, 'rb') as f:
+            loaded_pipeline = pickle.load(f)
+        pipeline_probs = loaded_pipeline.predict_proba(test_data)
+        
+        # Predictions should match
+        np.testing.assert_array_almost_equal(flexml_probs, pipeline_probs, decimal=5,
+            err_msg="Loaded pipeline predictions don't match FlexML predictions")
+        os.remove(save_path)
+
+    def test_28_predict_column_mismatch_error(self):
+        """Test that predict raises proper error for column mismatch"""
+        exp_obj = self.test_config['Regression']['exp_obj']
+        test_data = self.test_config['Regression'].get('data').drop(columns=['target'])
+        
+        # Remove a column to create mismatch
+        bad_data = test_data.drop(columns=[test_data.columns[0]])
+        
+        with self.assertRaises(ValueError) as context:
+            exp_obj.predict(bad_data, full_train=False)
+        
+        self.assertIn("Missing", str(context.exception))
+
+    def test_29_get_model_by_invalid_name(self):
+        """Test get_model_by_name raises error for invalid model name"""
+        exp_obj = self.test_config['Regression']['exp_obj']
+        
+        with self.assertRaises(ValueError) as context:
+            exp_obj.get_model_by_name("NonExistentModel")
+        
+        self.assertIn("not found", str(context.exception))
\ No newline at end of file

From 46b593f352be12fda12fcbca9a8132798fbee797 Mon Sep 17 00:00:00 2001
From: Ozgur Aslan <ozguraslank@gmail.com>
Date: Sun, 11 Jan 2026 19:30:20 +0300
Subject: [PATCH 08/14] Error fix for ordinal encoding

---
 flexml/_feature_engineer.py          | 36 ++++++++++++++++++++++++----
 flexml/structures/supervised_base.py |  7 +++++-
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/flexml/_feature_engineer.py b/flexml/_feature_engineer.py
index e306e33..b9ef060 100644
--- a/flexml/_feature_engineer.py
+++ b/flexml/_feature_engineer.py
@@ -33,16 +33,19 @@ class CategoricalTypeConverter(BaseEstimator, TransformerMixin):
     """
     A transformer to convert categorical columns to 'category' dtype.
     Used for tree-based models that support native categorical features.
+    Supports ordered categories via ordinal_encode_map.
     """
-    def __init__(self, categorical_columns: Optional[List[str]] = None):
+    def __init__(self, categorical_columns: Optional[List[str]] = None, ordinal_encode_map: Optional[Dict[str, List]] = None):
         self.categorical_columns = categorical_columns or []
+        self.ordinal_encode_map = ordinal_encode_map or {}
 
     def fit(self, X, y=None):
         return self
 
     def transform(self, X):
         """
-        Converts specified categorical columns to 'category' dtype
+        Converts specified categorical columns to 'category' dtype.
+        For columns in ordinal_encode_map, creates ordered categorical with specified order.
         
         Returns
         -------
@@ -52,7 +55,13 @@ def transform(self, X):
         X = X.copy()
         for col in self.categorical_columns:
             if col in X.columns:
-                X[col] = X[col].astype('category')
+                if col in self.ordinal_encode_map:
+                    # Create ordered categorical with specified order
+                    categories = self.ordinal_encode_map[col]
+                    X[col] = pd.Categorical(X[col].astype(str), categories=categories, ordered=True)
+                else:
+                    # Regular unordered categorical
+                    X[col] = X[col].astype('category')
         return X
     
 
@@ -74,6 +83,8 @@ def fit(self, X, y=None):
         return self
 
     def transform(self, X) -> pd.DataFrame:
+        X = X.copy()  # Avoid modifying original data
+        
         # Categorical columns are converted to string
         categorical_cols = X.select_dtypes(exclude=['number']).columns
         X[categorical_cols] = X[categorical_cols].astype(str)
@@ -131,9 +142,17 @@ def __init__(
         self.ordinal_encoders = {}
 
     def fit(self, X, y=None):
-        # Categorical columns are converted to string
+        X = X.copy()  # Avoid modifying original data
+        
+        # First, convert all non-numeric columns to string (original behavior)
         categorical_cols = X.select_dtypes(exclude=['number']).columns
         X[categorical_cols] = X[categorical_cols].astype(str)
+        
+        # Also ensure columns in encoding_method_mapper are string 
+        # (handles case where column is numeric but needs encoding)
+        for col in self.encoding_method_mapper.keys():
+            if col in X.columns and col not in categorical_cols:
+                X[col] = X[col].astype(str)
 
         for col, method in self.encoding_method_mapper.items():
             if method == "label_encoder":
@@ -160,9 +179,16 @@ def fit(self, X, y=None):
         return self
 
     def transform(self, X) -> pd.DataFrame:
-        # Categorical columns are converted to string
+        X = X.copy()  # Avoid modifying original data
+        
+        # First, convert all non-numeric columns to string (original behavior)
         categorical_cols = X.select_dtypes(exclude=['number']).columns
         X[categorical_cols] = X[categorical_cols].astype(str)
+        
+        # Also ensure columns in encoding_method_mapper are string
+        for col in self.encoding_method_mapper.keys():
+            if col in X.columns and col not in categorical_cols:
+                X[col] = X[col].astype(str)
 
         for col, method in self.encoding_method_mapper.items():
             if method == "label_encoder":
diff --git a/flexml/structures/supervised_base.py b/flexml/structures/supervised_base.py
index 3609586..d32cb9b 100644
--- a/flexml/structures/supervised_base.py
+++ b/flexml/structures/supervised_base.py
@@ -1049,7 +1049,12 @@ def _get_preprocessing_steps(self, model_name: str) -> list:
                 (name, step) for name, step in self.feature_engineer.pipeline.steps 
                 if name != 'encoder'
             ]
-            steps.append(('cat_type_converter', CategoricalTypeConverter(list(self.categorical_columns))))
+            # Pass ordinal_encode_map to preserve category ordering for ordinal columns
+            ordinal_map = getattr(self.feature_engineer, 'ordinal_encode_map', None) or {}
+            steps.append(('cat_type_converter', CategoricalTypeConverter(
+                list(self.categorical_columns), 
+                ordinal_encode_map=ordinal_map
+            )))
         else:
             # Standard pipeline with encoder
             steps = list(self.feature_engineer.pipeline.steps)

From 1a543fd9ffde947c31ede7e62a8820f01faa893a Mon Sep 17 00:00:00 2001
From: Ozgur Aslan <ozguraslank@gmail.com>
Date: Sun, 11 Jan 2026 23:51:12 +0300
Subject: [PATCH 09/14] Ensure pipeline consistency in save_model()

---
 flexml/structures/supervised_base.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/flexml/structures/supervised_base.py b/flexml/structures/supervised_base.py
index d32cb9b..915385f 100644
--- a/flexml/structures/supervised_base.py
+++ b/flexml/structures/supervised_base.py
@@ -938,7 +938,8 @@ def save_model(
         else: # If model is an object, we can't know its name, so we use its class name
             model_name = model.__class__.__name__
 
-        # Handle full training scenario if required
+
+        fitted_preprocessing_pipeline = None
         if full_train:
             already_trained = self._check_if_model_is_full_trained(model_name, model_taken_from_leaderboard)
             
@@ -948,10 +949,10 @@ def save_model(
                 self.feature_engineer.setup(data=self.data)
                 
                 # Get preprocessing pipeline for this model
-                preprocessing_pipeline = self._get_model_pipeline(model, include_model=False)
+                fitted_preprocessing_pipeline = self._get_model_pipeline(model, include_model=False)
                 
                 # Fit and transform data through the preprocessing pipeline
-                X_train_final = preprocessing_pipeline.fit_transform(self.X)
+                X_train_final = fitted_preprocessing_pipeline.fit_transform(self.X)
                 y_train = self._encode_target(self.y)
                 
                 # Fit model
@@ -978,8 +979,13 @@ def save_model(
             
             return model
 
-        # Build pipeline with proper handling for native categorical models
-        pipeline = self._get_model_pipeline(model, include_model=True)
+        if fitted_preprocessing_pipeline is not None:
+            # Combine the exact fitted preprocessing steps with the fitted model
+            steps = list(fitted_preprocessing_pipeline.steps) + [('model', model)]
+            pipeline = Pipeline(steps)
+        else:
+            # Model was already trained, get pipeline from feature_engineer
+            pipeline = self._get_model_pipeline(model, include_model=True)
 
         # Save the pipeline
         try:

From 1a7d6d536133950577414220d5f44e470a0f5003 Mon Sep 17 00:00:00 2001
From: Ozgur Aslan <ozguraslank@gmail.com>
Date: Mon, 12 Jan 2026 00:02:21 +0300
Subject: [PATCH 10/14] Fill unseen categories to NaN

---
 flexml/_feature_engineer.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/flexml/_feature_engineer.py b/flexml/_feature_engineer.py
index b9ef060..975e918 100644
--- a/flexml/_feature_engineer.py
+++ b/flexml/_feature_engineer.py
@@ -56,9 +56,12 @@ def transform(self, X):
         for col in self.categorical_columns:
             if col in X.columns:
                 if col in self.ordinal_encode_map:
-                    # Create ordered categorical with specified order
-                    categories = self.ordinal_encode_map[col]
-                    X[col] = pd.Categorical(X[col].astype(str), categories=categories, ordered=True)
+                    # Handle unseen categories by mapping them to NaN
+                    categories = [str(c) for c in self.ordinal_encode_map[col]]
+                    col_values = X[col].astype(str)
+                    known_mask = col_values.isin(categories)
+                    col_values = col_values.where(known_mask, other=np.nan)
+                    X[col] = pd.Categorical(col_values, categories=categories, ordered=True)
                 else:
                     # Regular unordered categorical
                     X[col] = X[col].astype('category')

From ba6424b79849de964e2fbba882e6fce7ce99741f Mon Sep 17 00:00:00 2001
From: Ozgur Aslan <ozguraslank@gmail.com>
Date: Mon, 12 Jan 2026 00:13:47 +0300
Subject: [PATCH 11/14] Improve categorical feature coverage

---
 flexml/_feature_engineer.py | 14 +++++++++-----
 tests/test_supervised.py    | 20 +++++++++++++++++---
 2 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/flexml/_feature_engineer.py b/flexml/_feature_engineer.py
index 975e918..92df339 100644
--- a/flexml/_feature_engineer.py
+++ b/flexml/_feature_engineer.py
@@ -36,8 +36,9 @@ class CategoricalTypeConverter(BaseEstimator, TransformerMixin):
     Supports ordered categories via ordinal_encode_map.
     """
     def __init__(self, categorical_columns: Optional[List[str]] = None, ordinal_encode_map: Optional[Dict[str, List]] = None):
-        self.categorical_columns = categorical_columns or []
-        self.ordinal_encode_map = ordinal_encode_map or {}
+        # Keep original values for sklearn clone compatibility
+        self.categorical_columns = categorical_columns
+        self.ordinal_encode_map = ordinal_encode_map
 
     def fit(self, X, y=None):
         return self
@@ -53,11 +54,14 @@ def transform(self, X):
             A DataFrame with categorical columns converted to 'category' dtype
         """
         X = X.copy()
-        for col in self.categorical_columns:
+        categorical_cols = self.categorical_columns or []
+        ordinal_map = self.ordinal_encode_map or {}
+        
+        for col in categorical_cols:
             if col in X.columns:
-                if col in self.ordinal_encode_map:
+                if col in ordinal_map:
                     # Handle unseen categories by mapping them to NaN
-                    categories = [str(c) for c in self.ordinal_encode_map[col]]
+                    categories = [str(c) for c in ordinal_map[col]]
                     col_values = X[col].astype(str)
                     known_mask = col_values.isin(categories)
                     col_values = col_values.where(known_mask, other=np.nan)
diff --git a/tests/test_supervised.py b/tests/test_supervised.py
index 1cb787b..3fc4601 100644
--- a/tests/test_supervised.py
+++ b/tests/test_supervised.py
@@ -14,19 +14,33 @@ class TestRegression(unittest.TestCase):
     logger = get_logger(__name__, "TEST")
     logger.setLevel("DEBUG")
 
+    @staticmethod
+    def _add_synthetic_categorical_columns(df):
+        """Add synthetic categorical columns to test categorical encoding"""
+        n_rows = len(df)
+        np.random.seed(42)
+    
+        df['category_A'] = np.random.choice(['low', 'medium', 'high'], n_rows)
+        df['category_B'] = np.random.choice(['red', 'green', 'blue', 'yellow'], n_rows)
+        return df
+
     test_config = {
         'Regression': {
-            'data': load_diabetes(as_frame=True)['frame'],
+            'data': _add_synthetic_categorical_columns.__func__(load_diabetes(as_frame=True)['frame'].copy()),
             'target_col': 'target',
             'exp_obj': None
         },
         'BinaryClassification': {
-            'data': load_breast_cancer(as_frame=True)['frame'].assign(target=lambda df: df['target'].map({0: 'No', 1: 'Yes'})),
+            'data': _add_synthetic_categorical_columns.__func__(
+                load_breast_cancer(as_frame=True)['frame'].assign(target=lambda df: df['target'].map({0: 'No', 1: 'Yes'})).copy()
+            ),
             'target_col': 'target',
             'exp_obj': None
         },
         'MulticlassClassification': {
-            'data': load_iris(as_frame=True)['frame'].assign(target=lambda df: df['target'].map({0: 'Iris-Setosa', 1: 'Iris-Versicolor', 2: 'Iris-Virginica'})),
+            'data': _add_synthetic_categorical_columns.__func__(
+                load_iris(as_frame=True)['frame'].assign(target=lambda df: df['target'].map({0: 'Iris-Setosa', 1: 'Iris-Versicolor', 2: 'Iris-Virginica'})).copy()
+            ),
             'target_col': 'target',
             'exp_obj': None
         }

From 9f9fdb8cf62d5c02a8628e24106bbcbeba9ca751 Mon Sep 17 00:00:00 2001
From: Ozgur Aslan <ozguraslank@gmail.com>
Date: Sat, 24 Jan 2026 23:08:21 +0300
Subject: [PATCH 12/14] Error fix / shap violin graph plot error due to
 category dtype

---
 flexml/helpers/plot_model_graphs.py  |  4 ++
 flexml/structures/supervised_base.py | 76 +++++++++++++++-------------
 2 files changed, 44 insertions(+), 36 deletions(-)

diff --git a/flexml/helpers/plot_model_graphs.py b/flexml/helpers/plot_model_graphs.py
index 8c54235..333f044 100644
--- a/flexml/helpers/plot_model_graphs.py
+++ b/flexml/helpers/plot_model_graphs.py
@@ -410,6 +410,10 @@ def plot_shap(
         if shap_type == 'shap_summary':
             shap.summary_plot(shap_values, X_test)
         elif shap_type == 'shap_violin':
+            # While shap summary is okay with categorical columns, violin plot is not
+            cat_cols = X_test.select_dtypes(include=['category']).columns
+            for col in cat_cols:
+                X_test[col] = X_test[col].cat.codes
             shap.plots.violin(shap_values, X_test)
         else:
             return f"Invalid shap_type: {shap_type}"
diff --git a/flexml/structures/supervised_base.py b/flexml/structures/supervised_base.py
index 915385f..35d4865 100644
--- a/flexml/structures/supervised_base.py
+++ b/flexml/structures/supervised_base.py
@@ -727,22 +727,22 @@ def start_experiment(
                     preprocessing_pipeline = self._get_model_pipeline(model, include_model=False)
                     
                     # Transform data using model-specific preprocessing
-                    X_train_final = preprocessing_pipeline.fit_transform(X_train_raw)
-                    X_test_final = preprocessing_pipeline.transform(X_test_raw)
+                    X_train_processed = preprocessing_pipeline.fit_transform(X_train_raw)
+                    X_test_processed = preprocessing_pipeline.transform(X_test_raw)
 
                     try:
                         all_metrics = []
                         all_times = []
 
                         t_start = time()
-                        self._fit_model(model, X_train_final, y_train, model_name)
+                        self._fit_model(model, X_train_processed, y_train, model_name)
                         t_end = time()
 
                         time_taken = round(t_end - t_start, 2)
                         if self.__ML_TASK_TYPE == "Classification" and hasattr(model, 'predict_proba'):
-                            y_pred = model.predict_proba(X_test_final)
+                            y_pred = model.predict_proba(X_test_processed)
                         else:
-                            y_pred = model.predict(X_test_final)
+                            y_pred = model.predict(X_test_processed)
 
                         model_perf = evaluate_model_perf(
                             self.__ML_TASK_TYPE,
@@ -952,11 +952,11 @@ def save_model(
                 fitted_preprocessing_pipeline = self._get_model_pipeline(model, include_model=False)
                 
                 # Fit and transform data through the preprocessing pipeline
-                X_train_final = fitted_preprocessing_pipeline.fit_transform(self.X)
+                X_train_processed = fitted_preprocessing_pipeline.fit_transform(self.X)
                 y_train = self._encode_target(self.y)
                 
                 # Fit model
-                self._fit_model(model, X_train_final, y_train, model_name)
+                self._fit_model(model, X_train_processed, y_train, model_name)
 
                 # Update leaderboard
                 for model_info in self.__model_training_info:
@@ -1136,24 +1136,28 @@ def _predict_helper(
         preprocessing_pipeline = self._get_model_pipeline(model, include_model=False)
         
         # Train model on full data if needed
-        if full_train:
-            already_trained = self._check_if_model_is_full_trained(model_name, model_taken_from_leaderboard)
-            if not already_trained:
-                self.__logger.info("Training the model using the whole data")
-                
-                # Fit and transform through preprocessing pipeline
-                X_train_final = preprocessing_pipeline.fit_transform(self.X)
-                y_train = self._encode_target(self.y)
-                self._fit_model(model, X_train_final, y_train, model_name)
+        already_trained = self._check_if_model_is_full_trained(model_name, model_taken_from_leaderboard)
+        
+        if full_train and not already_trained:
+            # Fit the pipeline on full training data for consistent transformations
+            X_train_processed = preprocessing_pipeline.fit_transform(self.X)
+            
+            self.__logger.info("Training the model using the whole data")
+            
+            y_train = self._encode_target(self.y)
+            self._fit_model(model, X_train_processed, y_train, model_name)
 
-                # Update leaderboard
-                for model_info in self.__model_training_info:
-                    for name, info in model_info.items():
-                        if name == model_name:
-                            info["model_stats"]["Full Train"] = True
-                            info["model"] = model
-                            break
-                self.get_best_models()
+            # Update leaderboard
+            for model_info in self.__model_training_info:
+                for name, info in model_info.items():
+                    if name == model_name:
+                        info["model_stats"]["Full Train"] = True
+                        info["model"] = model
+                        break
+            self.get_best_models()
+        else:
+            # Just fit the preprocessing pipeline without retraining the model
+            preprocessing_pipeline.fit(self.X)
 
         # Transform test data through the same preprocessing pipeline
         X_test = preprocessing_pipeline.transform(test_data)
@@ -1239,8 +1243,8 @@ def __add_holdout_model_to_stats(self, model: object, model_name: Optional[str]
         preprocessing_pipeline = self._get_model_pipeline(model_copy, include_model=False)
         
         # Transform holdout training data and fit model
-        X_train_final = preprocessing_pipeline.fit_transform(self.X_train_raw)
-        self._fit_model(model_copy, X_train_final, self.y_train, model_name)
+        X_train_processed = preprocessing_pipeline.fit_transform(self.X_train_raw)
+        self._fit_model(model_copy, X_train_processed, self.y_train, model_name)
         
         # Store the fitted preprocessing pipeline with the model for later use
         self._holdout_model_objects[model_name] = {
@@ -1331,33 +1335,33 @@ def plot(self, model: Optional[Union[str, object]] = None, kind: str = "feature_
             preprocessing_pipeline = holdout_data['preprocessing_pipeline']
 
         # Transform holdout data using the model's preprocessing pipeline
-        X_train_final = preprocessing_pipeline.transform(self.X_train_raw)
-        X_test_final = preprocessing_pipeline.transform(self.X_test_raw)
+        X_train_processed = preprocessing_pipeline.transform(self.X_train_raw)
+        X_test_processed = preprocessing_pipeline.transform(self.X_test_raw)
 
         # If kind expects predictions
         if kind in ["confusion_matrix"]:
-            preds = model.predict(X_test_final)
+            preds = model.predict(X_test_processed)
         elif kind in ["roc_curve", "calibration_curve"]:
-            preds = model.predict_proba(X_test_final)
+            preds = model.predict_proba(X_test_processed)
 
         graph = None
 
         if kind == "feature_importance":
-            if not hasattr(self, 'feature_names'):
-                self.feature_names = list(self.X_train_raw.columns)
-            graph = plot_feature_importance(model, self.feature_names, **kwargs)
+            # Use feature names from transformed data (accounts for encoding)
+            feature_names = list(X_train_processed.columns) if hasattr(X_train_processed, 'columns') else None
+            graph = plot_feature_importance(model, feature_names, **kwargs)
         elif kind == "confusion_matrix":
             graph = plot_confusion_matrix(self.y_test, preds, self.y_class_mapping, **kwargs)
         elif kind == "roc_curve":
             graph = plot_roc_curve(self.y_test, preds, self.y_class_mapping, **kwargs)
         elif kind == "residuals":
-            graph = plot_residuals(model, X_train_final, self.y_train, X_test_final, self.y_test, **kwargs)
+            graph = plot_residuals(model, X_train_processed, self.y_train, X_test_processed, self.y_test, **kwargs)
         elif kind == "prediction_error":
-            graph = plot_prediction_error(model, X_train_final, self.y_train, X_test_final, self.y_test, **kwargs)
+            graph = plot_prediction_error(model, X_train_processed, self.y_train, X_test_processed, self.y_test, **kwargs)
         elif kind == "calibration_curve":
             graph = plot_calibration_curve(self.y_test, preds, self.y_class_mapping, **kwargs)
         elif 'shap' in kind:
-            graph = plot_shap(model, X_test_final, kind, **kwargs)
+            graph = plot_shap(model, X_test_processed, kind, **kwargs)
         else:
             error_msg = f"Invalid plot type: {kind}. Available plot types: {available_plot_types}"
             self.__logger.error(error_msg)

From 6b9d90370e7e448a19a907686e12ed99b76f0907 Mon Sep 17 00:00:00 2001
From: Ozgur Aslan <ozguraslank@gmail.com>
Date: Sat, 24 Jan 2026 23:36:57 +0300
Subject: [PATCH 13/14] Add categorical feature support to HistGradientBoosting
 models

---
 flexml/config/ml_models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flexml/config/ml_models.py b/flexml/config/ml_models.py
index 752572e..8f37df2 100644
--- a/flexml/config/ml_models.py
+++ b/flexml/config/ml_models.py
@@ -66,7 +66,7 @@ def get_ml_models(
         KNN_REGRESSION = KNeighborsRegressor(n_jobs=n_jobs) 
         BAYESIAN_RIDGE_REGRESSION = BayesianRidge()
         ADA_BOOST_REGRESSION = AdaBoostRegressor(random_state=random_state)
-        HIST_GRADIENT_BOOSTING_REGRESSION = HistGradientBoostingRegressor(random_state=random_state)
+        HIST_GRADIENT_BOOSTING_REGRESSION = HistGradientBoostingRegressor(random_state=random_state, categorical_features="from_dtype")
         GRADIENT_BOOSTING_REGRESSION = GradientBoostingRegressor(random_state=random_state)
         RANDOM_FOREST_REGRESSION = RandomForestRegressor(random_state=random_state, n_jobs=n_jobs)
         EXTRA_TREES_REGRESSION = ExtraTreesRegressor(random_state=random_state, n_jobs=n_jobs)
@@ -318,7 +318,7 @@ def get_ml_models(
 
         # Wide Classification Models
         ADA_BOOST_CLASSIFIER = AdaBoostClassifier(random_state=random_state)
-        HIST_GRADIENT_BOOSTING_CLASSIFIER = HistGradientBoostingClassifier(random_state=random_state)
+        HIST_GRADIENT_BOOSTING_CLASSIFIER = HistGradientBoostingClassifier(random_state=random_state, categorical_features="from_dtype")
         GRADIENT_BOOSTING_CLASSIFIER = GradientBoostingClassifier(random_state=random_state)
         EXTRA_TREES_CLASSIFIER = ExtraTreesClassifier(random_state=random_state, n_jobs=n_jobs)
         QDA_CLASSIFIER = QuadraticDiscriminantAnalysis()

From 81e727a675e3e157393afa11f428b09ce33f3c17 Mon Sep 17 00:00:00 2001
From: Ozgur Aslan <ozguraslank@gmail.com>
Date: Sat, 24 Jan 2026 23:58:27 +0300
Subject: [PATCH 14/14] Add better approach for tree based model detection to
 plot_shap()

---
 flexml/helpers/plot_model_graphs.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/flexml/helpers/plot_model_graphs.py b/flexml/helpers/plot_model_graphs.py
index 333f044..d233225 100644
--- a/flexml/helpers/plot_model_graphs.py
+++ b/flexml/helpers/plot_model_graphs.py
@@ -383,15 +383,8 @@ def plot_shap(
         or an error message if an error occurs during the process.
     """
     try:
-        # Check if model is a tree-based model
-        model_type = str(type(model))
-        
-        tree_based_models = [
-            "RandomForest", "GradientBoosting", "AdaBoost", 
-            "HistGradientBoosting", "DecisionTree", "ExtraTrees",
-            "XGB", "CatBoost", "LGBM"
-        ]
-        is_tree_based = any(model_name in model_type for model_name in tree_based_models)
+        # Check if the model is tree-based
+        is_tree_based = hasattr(model, 'feature_importances_')
         
         if is_tree_based:
             explainer = shap.TreeExplainer(model)