diff --git a/flexml/_feature_engineer.py b/flexml/_feature_engineer.py
index f2d8897..92df339 100644
--- a/flexml/_feature_engineer.py
+++ b/flexml/_feature_engineer.py
@@ -27,6 +27,49 @@ def transform(self, X):
             A DataFrame with the specified columns dropped
         """
         return X.drop(columns=self.drop_columns, axis=1, errors='ignore')
+
+
+class CategoricalTypeConverter(BaseEstimator, TransformerMixin):
+    """
+    A transformer to convert categorical columns to 'category' dtype.
+    Used for tree-based models that support native categorical features.
+    Supports ordered categories via ordinal_encode_map.
+    """
+    def __init__(self, categorical_columns: Optional[List[str]] = None, ordinal_encode_map: Optional[Dict[str, List]] = None):
+        # Keep original values for sklearn clone compatibility
+        self.categorical_columns = categorical_columns
+        self.ordinal_encode_map = ordinal_encode_map
+
+    def fit(self, X, y=None):
+        return self
+
+    def transform(self, X):
+        """
+        Converts specified categorical columns to 'category' dtype.
+        For columns in ordinal_encode_map, creates ordered categorical with specified order.
+        
+        Returns
+        -------
+        pd.DataFrame
+            A DataFrame with categorical columns converted to 'category' dtype
+        """
+        X = X.copy()
+        categorical_cols = self.categorical_columns or []
+        ordinal_map = self.ordinal_encode_map or {}
+        
+        for col in categorical_cols:
+            if col in X.columns:
+                if col in ordinal_map:
+                    # Handle unseen categories by mapping them to NaN
+                    categories = [str(c) for c in ordinal_map[col]]
+                    col_values = X[col].astype(str)
+                    known_mask = col_values.isin(categories)
+                    col_values = col_values.where(known_mask, other=np.nan)
+                    X[col] = pd.Categorical(col_values, categories=categories, ordered=True)
+                else:
+                    # Regular unordered categorical
+                    X[col] = X[col].astype('category')
+        return X
     
 
 class ColumnImputer(BaseEstimator, TransformerMixin):
@@ -47,6 +90,8 @@ def fit(self, X, y=None):
         return self
 
     def transform(self, X) -> pd.DataFrame:
+        X = X.copy()  # Avoid modifying original data
+        
         # Categorical columns are converted to string
         categorical_cols = X.select_dtypes(exclude=['number']).columns
         X[categorical_cols] = X[categorical_cols].astype(str)
@@ -104,9 +149,17 @@ def __init__(
         self.ordinal_encoders = {}
 
     def fit(self, X, y=None):
-        # Categorical columns are converted to string
+        X = X.copy()  # Avoid modifying original data
+        
+        # First, convert all non-numeric columns to string (original behavior)
         categorical_cols = X.select_dtypes(exclude=['number']).columns
         X[categorical_cols] = X[categorical_cols].astype(str)
+        
+        # Also ensure columns in encoding_method_mapper are string 
+        # (handles case where column is numeric but needs encoding)
+        for col in self.encoding_method_mapper.keys():
+            if col in X.columns and col not in categorical_cols:
+                X[col] = X[col].astype(str)
 
         for col, method in self.encoding_method_mapper.items():
             if method == "label_encoder":
@@ -133,9 +186,16 @@ def fit(self, X, y=None):
         return self
 
     def transform(self, X) -> pd.DataFrame:
-        # Categorical columns are converted to string
+        X = X.copy()  # Avoid modifying original data
+        
+        # First, convert all non-numeric columns to string (original behavior)
         categorical_cols = X.select_dtypes(exclude=['number']).columns
         X[categorical_cols] = X[categorical_cols].astype(str)
+        
+        # Also ensure columns in encoding_method_mapper are string
+        for col in self.encoding_method_mapper.keys():
+            if col in X.columns and col not in categorical_cols:
+                X[col] = X[col].astype(str)
 
         for col, method in self.encoding_method_mapper.items():
             if method == "label_encoder":
diff --git a/flexml/_model_tuner.py b/flexml/_model_tuner.py
index 6153719..a8bc0cd 100644
--- a/flexml/_model_tuner.py
+++ b/flexml/_model_tuner.py
@@ -198,8 +198,6 @@ def _setup_tuning(
             
             * 'tuned_model_evaluation_metric': The evaluation metric that is used to evaluate the tuned model
         """
-        model_params = None
-        
         if isinstance(model, Pipeline):
             model = model.named_steps['model']
 
diff --git a/flexml/config/__init__.py b/flexml/config/__init__.py
index f56a684..2346f85 100644
--- a/flexml/config/__init__.py
+++ b/flexml/config/__init__.py
@@ -3,6 +3,7 @@
 )
 
 from flexml.config.supervised_config import (
+    NATIVE_CATEGORICAL_MODELS,
     EVALUATION_METRICS,
     TUNING_METRIC_TRANSFORMATIONS,
     CROSS_VALIDATION_METHODS,
diff --git a/flexml/config/ml_models.py b/flexml/config/ml_models.py
index 9e4147e..8f37df2 100644
--- a/flexml/config/ml_models.py
+++ b/flexml/config/ml_models.py
@@ -66,7 +66,7 @@ def get_ml_models(
         KNN_REGRESSION = KNeighborsRegressor(n_jobs=n_jobs) 
         BAYESIAN_RIDGE_REGRESSION = BayesianRidge()
         ADA_BOOST_REGRESSION = AdaBoostRegressor(random_state=random_state)
-        HIST_GRADIENT_BOOSTING_REGRESSION = HistGradientBoostingRegressor(random_state=random_state)
+        HIST_GRADIENT_BOOSTING_REGRESSION = HistGradientBoostingRegressor(random_state=random_state, categorical_features="from_dtype")
         GRADIENT_BOOSTING_REGRESSION = GradientBoostingRegressor(random_state=random_state)
         RANDOM_FOREST_REGRESSION = RandomForestRegressor(random_state=random_state, n_jobs=n_jobs)
         EXTRA_TREES_REGRESSION = ExtraTreesRegressor(random_state=random_state, n_jobs=n_jobs)
@@ -308,8 +308,8 @@ def get_ml_models(
 
         # Quick Classification Models
         LOGISTIC_REGRESSION = LogisticRegression(max_iter=1000, random_state=random_state, n_jobs=n_jobs)
-        XGBOOST_CLASSIFIER = XGBClassifier(objective=xgb_objective, random_state=random_state, n_jobs=n_jobs)
-        LIGHTGBM_CLASSIFIER = LGBMClassifier(verbose=-1, random_state=random_state, n_jobs=n_jobs)
+        XGBOOST_CLASSIFIER = XGBClassifier(enable_categorical=True, objective=xgb_objective, random_state=random_state, n_jobs=n_jobs)
+        LIGHTGBM_CLASSIFIER = LGBMClassifier(enable_categorical=True, verbose=-1, random_state=random_state, n_jobs=n_jobs)
         CATBOOST_CLASSIFIER = CatBoostClassifier(allow_writing_files=False, silent=True, random_seed=random_state, thread_count=n_jobs)
         DECISION_TREE_CLASSIFIER = DecisionTreeClassifier(random_state=random_state)
         RANDOM_FOREST_CLASSIFIER = RandomForestClassifier(random_state=random_state, n_jobs=n_jobs)
@@ -318,7 +318,7 @@ def get_ml_models(
 
         # Wide Classification Models
         ADA_BOOST_CLASSIFIER = AdaBoostClassifier(random_state=random_state)
-        HIST_GRADIENT_BOOSTING_CLASSIFIER = HistGradientBoostingClassifier(random_state=random_state)
+        HIST_GRADIENT_BOOSTING_CLASSIFIER = HistGradientBoostingClassifier(random_state=random_state, categorical_features="from_dtype")
         GRADIENT_BOOSTING_CLASSIFIER = GradientBoostingClassifier(random_state=random_state)
         EXTRA_TREES_CLASSIFIER = ExtraTreesClassifier(random_state=random_state, n_jobs=n_jobs)
         QDA_CLASSIFIER = QuadraticDiscriminantAnalysis()
diff --git a/flexml/config/supervised_config.py b/flexml/config/supervised_config.py
index 9c61dca..25b066e 100644
--- a/flexml/config/supervised_config.py
+++ b/flexml/config/supervised_config.py
@@ -1,3 +1,11 @@
+# Models that support native categorical features
+NATIVE_CATEGORICAL_MODELS = {
+    'CatBoostRegressor', 'CatBoostClassifier',
+    'LGBMRegressor', 'LGBMClassifier', 
+    'XGBRegressor', 'XGBClassifier',
+    'HistGradientBoostingRegressor', 'HistGradientBoostingClassifier'
+}
+
 # Regression & Classification Evaluation Metrics
 EVALUATION_METRICS = {
     "Regression": {"DEFAULT": "R2",
diff --git a/flexml/helpers/plot_model_graphs.py b/flexml/helpers/plot_model_graphs.py
index 8c54235..d233225 100644
--- a/flexml/helpers/plot_model_graphs.py
+++ b/flexml/helpers/plot_model_graphs.py
@@ -383,15 +383,8 @@ def plot_shap(
         or an error message if an error occurs during the process.
     """
     try:
-        # Check if model is a tree-based model
-        model_type = str(type(model))
-        
-        tree_based_models = [
-            "RandomForest", "GradientBoosting", "AdaBoost", 
-            "HistGradientBoosting", "DecisionTree", "ExtraTrees",
-            "XGB", "CatBoost", "LGBM"
-        ]
-        is_tree_based = any(model_name in model_type for model_name in tree_based_models)
+        # Check if the model is tree-based
+        is_tree_based = hasattr(model, 'feature_importances_')
         
         if is_tree_based:
             explainer = shap.TreeExplainer(model)
@@ -410,6 +403,10 @@ def plot_shap(
         if shap_type == 'shap_summary':
             shap.summary_plot(shap_values, X_test)
         elif shap_type == 'shap_violin':
+            # While shap summary is okay with categorical columns, violin plot is not
+            cat_cols = X_test.select_dtypes(include=['category']).columns
+            for col in cat_cols:
+                X_test[col] = X_test[col].cat.codes
             shap.plots.violin(shap_values, X_test)
         else:
             return f"Invalid shap_type: {shap_type}"
diff --git a/flexml/structures/supervised_base.py b/flexml/structures/supervised_base.py
index 78e7c82..35d4865 100644
--- a/flexml/structures/supervised_base.py
+++ b/flexml/structures/supervised_base.py
@@ -12,6 +12,7 @@
 from flexml.logger import get_logger
 from flexml.config import (
     get_ml_models,
+    NATIVE_CATEGORICAL_MODELS,
     EVALUATION_METRICS,
     CROSS_VALIDATION_METHODS,
     PLOT_TYPES
@@ -34,7 +35,7 @@
 )
 from flexml.structures.custom_score import CustomScore
 from flexml._model_tuner import ModelTuner
-from flexml._feature_engineer import FeatureEngineering
+from flexml._feature_engineer import FeatureEngineering, CategoricalTypeConverter
 
 import warnings
 warnings.filterwarnings("ignore")
@@ -315,20 +316,20 @@ def __prepare_holdout_data(self, test_size: Optional[float] = None):
         )[0]
         train_labels, test_labels = holdout_cv_splits[0], holdout_cv_splits[1]
 
-        train_data = pd.concat([
-            self.X.loc[train_labels], 
-            self.y.loc[train_labels]
-        ], axis=1)
-        test_data = pd.concat([
-            self.X.loc[test_labels],
-            self.y.loc[test_labels]
-        ], axis=1)
-
+        # Setup feature engineer with train data
+        train_data = pd.concat([self.X.loc[train_labels], self.y.loc[train_labels]], axis=1)
         self.feature_engineer.setup(data=train_data)
-
-        self.X_train, self.y_train = self.feature_engineer.fit_transform()
-        self.X_test, self.y_test = self.feature_engineer.transform(test_data=test_data, y_included=True)
-        self.feature_names = list(self.X_train.columns)
+        self.categorical_columns = self.feature_engineer.categorical_columns
+        
+        # Store raw holdout data (use X/y directly instead of concat→drop)
+        self.X_train_raw = self.X.loc[train_labels]
+        self.X_test_raw = self.X.loc[test_labels]
+        self.y_train, self.y_test = self._encode_target(
+            self.y.loc[train_labels], 
+            self.y.loc[test_labels]
+        )
+        
+        self.feature_names = list(self.X_train_raw.columns)
         self.y_class_mapping = self.feature_engineer.y_class_mapping
         
     def __prepare_models(self, experiment_size: str, num_class: int, random_state: Optional[int] = None, n_jobs: Optional[int] = -1):
@@ -380,6 +381,91 @@ def __top_n_models_checker(self, top_n_models: Optional[int]) -> int:
         
         return top_n_models
     
+    def _fit_model(
+        self, 
+        model: object, 
+        X: pd.DataFrame, 
+        y: pd.Series,
+        model_name: Optional[str] = None
+    ):
+        """
+        Fits a model with proper categorical feature handling.
+        Passes cat_features to CatBoost models for native categorical support.
+        
+        Parameters
+        ----------
+        model : object
+            The model to fit
+        X : pd.DataFrame
+            The feature data
+        y : pd.Series
+            The target data
+        model_name : Optional[str]
+            The name of the model (if None, uses model's class name)
+        """
+        if model_name is None:
+            model_name = model.__class__.__name__
+        
+        if 'CatBoost' in model_name and hasattr(self, 'categorical_columns') and self.categorical_columns:
+            # check if model is fitted:
+            if not model.is_fitted():
+                model.set_params(cat_features=list(self.categorical_columns))
+            model.fit(X, y)
+        else:
+            model.fit(X, y)
+    
+    def _encode_target(
+        self, 
+        y: pd.Series, 
+        y_test: Optional[pd.Series] = None,
+        fit: bool = True
+    ) -> Union[pd.Series, tuple]:
+        """
+        Encodes the target variable for classification tasks.
+        
+        Parameters
+        ----------
+        y : pd.Series
+            The target variable to encode
+        y_test : pd.Series, optional
+            Test target to transform (uses already fitted encoder)
+        fit : bool
+            If True, fits the encoder on y. If False, only transforms.
+            
+        Returns
+        -------
+        pd.Series or tuple
+            Encoded y, or (encoded_y, encoded_y_test) if y_test provided
+        """
+        # Skip encoding for regression or already numeric targets
+        if self.__ML_TASK_TYPE != 'Classification' or y.dtype not in ['object', 'category']:
+            return (y, y_test) if y_test is not None else y
+        
+        # Encode y
+        if fit:
+            encoded_y = pd.Series(
+                self.feature_engineer.target_encoder.fit_transform(y),
+                name=y.name,
+                index=y.index
+            )
+        else:
+            encoded_y = pd.Series(
+                self.feature_engineer.target_encoder.transform(y),
+                name=y.name,
+                index=y.index
+            )
+        
+        # Encode y_test if provided
+        if y_test is not None:
+            encoded_y_test = pd.Series(
+                self.feature_engineer.target_encoder.transform(y_test),
+                name=y_test.name,
+                index=y_test.index
+            )
+            return encoded_y, encoded_y_test
+        
+        return encoded_y
+    
     def __process_experiment_result(self, experiment_stats: dict):
         """
         Processes and aggregates the results of an experiment, calculating average metrics and selecting the best model.
@@ -615,19 +701,17 @@ def start_experiment(
                     train_labels = train_idx
                     test_labels = test_idx
                 
-                train_data = pd.concat([
-                    self.X.loc[train_labels], 
-                    self.y.loc[train_labels]
-                ], axis=1)
-                test_data = pd.concat([
-                    self.X.loc[test_labels],
-                    self.y.loc[test_labels]
-                ], axis=1)
-                
+                # Setup feature engineer with train data
+                train_data = pd.concat([self.X.loc[train_labels], self.y.loc[train_labels]], axis=1)
                 self.feature_engineer.setup(data=train_data)
                 
-                X_train, y_train = self.feature_engineer.fit_transform()
-                X_test, y_test = self.feature_engineer.transform(test_data=test_data, y_included=True)
+                # Use X/y directly instead of concat→drop
+                X_train_raw = self.X.loc[train_labels]
+                X_test_raw = self.X.loc[test_labels]
+                y_train, y_test = self._encode_target(
+                    self.y.loc[train_labels],
+                    self.y.loc[test_labels]
+                )
 
                 for model_idx in range(len(self.__ML_MODELS)):
                     model_info = self.__ML_MODELS[model_idx]
@@ -638,19 +722,27 @@ def start_experiment(
                         continue  # Skip already trained or raised error models
 
                     model = model_info['model']
+
+                    # Get preprocessing pipeline for this specific model
+                    preprocessing_pipeline = self._get_model_pipeline(model, include_model=False)
+                    
+                    # Transform data using model-specific preprocessing
+                    X_train_processed = preprocessing_pipeline.fit_transform(X_train_raw)
+                    X_test_processed = preprocessing_pipeline.transform(X_test_raw)
+
                     try:
                         all_metrics = []
                         all_times = []
 
                         t_start = time()
-                        model.fit(X_train, y_train)
+                        self._fit_model(model, X_train_processed, y_train, model_name)
                         t_end = time()
 
                         time_taken = round(t_end - t_start, 2)
                         if self.__ML_TASK_TYPE == "Classification" and hasattr(model, 'predict_proba'):
-                            y_pred = model.predict_proba(X_test)
+                            y_pred = model.predict_proba(X_test_processed)
                         else:
-                            y_pred = model.predict(X_test)
+                            y_pred = model.predict(X_test_processed)
 
                         model_perf = evaluate_model_perf(
                             self.__ML_TASK_TYPE,
@@ -845,32 +937,34 @@ def save_model(
                 raise ValueError(error_msg)
         else: # If model is an object, we can't know its name, so we use its class name
             model_name = model.__class__.__name__
-            
-        # Initialize pipeline steps
-        pipeline_steps = []
 
-        # Initialize and setup feature engineering if needed
-        if not model_only:
-            # Add the feature engineering pipeline directly
-            pipeline_steps.extend(self.feature_engineer.pipeline.steps)
 
-        # Handle full training scenario if required
+        fitted_preprocessing_pipeline = None
         if full_train:
             already_trained = self._check_if_model_is_full_trained(model_name, model_taken_from_leaderboard)
+            
             if not already_trained:
                 self.__logger.info("Training the model using the whole data")
+                
                 self.feature_engineer.setup(data=self.data)
-                X_train, y_train = self.feature_engineer.fit_transform()
-                model.fit(X_train, y_train)
+                
+                # Get preprocessing pipeline for this model
+                fitted_preprocessing_pipeline = self._get_model_pipeline(model, include_model=False)
+                
+                # Fit and transform data through the preprocessing pipeline
+                X_train_processed = fitted_preprocessing_pipeline.fit_transform(self.X)
+                y_train = self._encode_target(self.y)
+                
+                # Fit model
+                self._fit_model(model, X_train_processed, y_train, model_name)
 
-                # find the model in leaderboard and update the full_train to True, and update the model object in there
+                # Update leaderboard
                 for model_info in self.__model_training_info:
                     for name, info in model_info.items():
                         if name == model_name:
                             info["model_stats"]["Full Train"] = True
                             info["model"] = model
                             break
-                # Update leaderboard
                 self.get_best_models()
 
         # If no feature pipeline is included, return the model directly
@@ -885,11 +979,13 @@ def save_model(
             
             return model
 
-        # Add the model to the pipeline
-        pipeline_steps.append(('model', model))
-
-        # Create the pipeline
-        pipeline = Pipeline(pipeline_steps)
+        if fitted_preprocessing_pipeline is not None:
+            # Combine the exact fitted preprocessing steps with the fitted model
+            steps = list(fitted_preprocessing_pipeline.steps) + [('model', model)]
+            pipeline = Pipeline(steps)
+        else:
+            # Model was already trained, get pipeline from feature_engineer
+            pipeline = self._get_model_pipeline(model, include_model=True)
 
         # Save the pipeline
         try:
@@ -928,6 +1024,73 @@ def _check_if_model_is_full_trained(self, model_name: str, model_taken_from_lead
                     return True
         return False
 
+    def _is_native_categorical_model(self, model_name: str) -> bool:
+        """Check if model supports native categorical features."""
+        return (
+            model_name in NATIVE_CATEGORICAL_MODELS and 
+            hasattr(self, 'categorical_columns') and 
+            len(self.categorical_columns) > 0
+        )
+    
+    def _get_preprocessing_steps(self, model_name: str) -> list:
+        """
+        Returns the appropriate preprocessing steps for a given model.
+        
+        For native categorical models: no encoder, uses CategoricalTypeConverter
+        For other models: includes encoder
+        
+        Parameters
+        ----------
+        model_name : str
+            The name of the model
+            
+        Returns
+        -------
+        list
+            List of preprocessing steps as (name, transformer) tuples
+        """
+        if self._is_native_categorical_model(model_name):
+            # Pipeline without encoder, with CategoricalTypeConverter
+            steps = [
+                (name, step) for name, step in self.feature_engineer.pipeline.steps 
+                if name != 'encoder'
+            ]
+            # Pass ordinal_encode_map to preserve category ordering for ordinal columns
+            ordinal_map = getattr(self.feature_engineer, 'ordinal_encode_map', None) or {}
+            steps.append(('cat_type_converter', CategoricalTypeConverter(
+                list(self.categorical_columns), 
+                ordinal_encode_map=ordinal_map
+            )))
+        else:
+            # Standard pipeline with encoder
+            steps = list(self.feature_engineer.pipeline.steps)
+        
+        return steps
+    
+    def _get_model_pipeline(self, model, include_model: bool = True) -> Pipeline:
+        """
+        Returns a complete Pipeline for a given model.
+        
+        Parameters
+        ----------
+        model : object
+            The model object
+        include_model : bool, optional
+            Whether to include the model as the last step (default: True)
+            
+        Returns
+        -------
+        Pipeline
+            sklearn Pipeline with preprocessing steps (and optionally the model)
+        """
+        model_name = model.__class__.__name__
+        steps = self._get_preprocessing_steps(model_name)
+        
+        if include_model:
+            steps.append(('model', model))
+        
+        return Pipeline(steps)
+
     def _predict_helper(
         self,
         test_data: pd.DataFrame,
@@ -951,8 +1114,8 @@ def _predict_helper(
             if extra: error_msg += f" Extra: {extra}."
             raise ValueError(error_msg)
         
-        model_taken_from_leaderboard = False # If the model object is from leaderboard, track this
-
+        # Get model from leaderboard or use provided model
+        model_taken_from_leaderboard = False
         if model is None:
             model = self.get_best_models()
             model_name = self.__last_searched_model_name
@@ -965,32 +1128,39 @@ def _predict_helper(
             model_name = model
             model = self.get_model_by_name(model)
             model_taken_from_leaderboard = True
-        else: # If model is an object, we can't know its name, so we use its class name
+        else:
             model_name = model.__class__.__name__
         
-        # Prepare training data if needed
-        if full_train:
-            # Check If model_taken_from_leaderboard is True and Full Train in self.__model_training_info is True, then we don't need to train the model again
-            already_trained = self._check_if_model_is_full_trained(model_name, model_taken_from_leaderboard)
-            if not already_trained:
-                self.__logger.info("Training the model using the whole data")
-                self.feature_engineer.setup(data=self.data)
-                X_train, y_train = self.feature_engineer.fit_transform()
-                model.fit(X_train, y_train)
-
-                # find the model in leaderboard and update the full_train to True, and update the model object in there
-                for model_info in self.__model_training_info:
-                    for name, info in model_info.items():
-                        if name == model_name:
-                            info["model_stats"]["Full Train"] = True
-                            info["model"] = model
-                            break
-                # Update leaderboard
-                self.get_best_models()
-            X_test = self.feature_engineer.transform(test_data)
+        # Get the preprocessing pipeline for this model (consistent with save_model)
+        self.feature_engineer.setup(data=self.data)
+        preprocessing_pipeline = self._get_model_pipeline(model, include_model=False)
+        
+        # Train model on full data if needed
+        already_trained = self._check_if_model_is_full_trained(model_name, model_taken_from_leaderboard)
+        
+        if full_train and not already_trained:
+            # Fit the pipeline on full training data for consistent transformations
+            X_train_processed = preprocessing_pipeline.fit_transform(self.X)
+            
+            self.__logger.info("Training the model using the whole data")
+            
+            y_train = self._encode_target(self.y)
+            self._fit_model(model, X_train_processed, y_train, model_name)
 
+            # Update leaderboard
+            for model_info in self.__model_training_info:
+                for name, info in model_info.items():
+                    if name == model_name:
+                        info["model_stats"]["Full Train"] = True
+                        info["model"] = model
+                        break
+            self.get_best_models()
         else:
-            X_test = self.feature_engineer.transform(test_data)
+            # Just fit the preprocessing pipeline without retraining the model
+            preprocessing_pipeline.fit(self.X)
+
+        # Transform test data through the same preprocessing pipeline
+        X_test = preprocessing_pipeline.transform(test_data)
 
         return model, X_test
 
@@ -1057,7 +1227,8 @@ def predict_proba(
         model, X_test = self._predict_helper(test_data, model, full_train)
         return model.predict_proba(X_test)
 
-    def __get_holdout_model_from_stats(self, model_name: str) -> object:
+    def __get_holdout_model_from_stats(self, model_name: str) -> Optional[dict]:
+        """Returns dict with 'model' and 'preprocessing_pipeline' keys, or None."""
         if self._holdout_model_objects is None or self._holdout_model_objects == {}:
             return None
         return self._holdout_model_objects.get(model_name)
@@ -1067,8 +1238,19 @@ def __add_holdout_model_to_stats(self, model: object, model_name: Optional[str]
             model_name = model.__class__.__name__
 
         model_copy = deepcopy(model)
-        model_copy.fit(self.X_train, self.y_train)
-        self._holdout_model_objects[model_name] = model_copy
+        
+        # Get preprocessing pipeline for this model
+        preprocessing_pipeline = self._get_model_pipeline(model_copy, include_model=False)
+        
+        # Transform holdout training data and fit model
+        X_train_processed = preprocessing_pipeline.fit_transform(self.X_train_raw)
+        self._fit_model(model_copy, X_train_processed, self.y_train, model_name)
+        
+        # Store the fitted preprocessing pipeline with the model for later use
+        self._holdout_model_objects[model_name] = {
+            'model': model_copy,
+            'preprocessing_pipeline': preprocessing_pipeline
+        }
         return model_copy
     
     def plot(self, model: Optional[Union[str, object]] = None, kind: str = "feature_importance", **kwargs):
@@ -1139,38 +1321,47 @@ def plot(self, model: Optional[Union[str, object]] = None, kind: str = "feature_
         elif isinstance(model, str):
             model_name = model
             model = self.get_model_by_name(model)
-        else: # If model is an object, we can't know its name, so we use its class name
+        else:
             model_name = model.__class__.__name__
         
-        if self.__get_holdout_model_from_stats(model_name) is not None:
-            model = self.__get_holdout_model_from_stats(model_name)
+        # Get or create holdout model with its preprocessing pipeline
+        holdout_data = self.__get_holdout_model_from_stats(model_name)
+        if holdout_data is not None:
+            model = holdout_data['model']
+            preprocessing_pipeline = holdout_data['preprocessing_pipeline']
         else:
             model = self.__add_holdout_model_to_stats(model, model_name)
+            holdout_data = self.__get_holdout_model_from_stats(model_name)
+            preprocessing_pipeline = holdout_data['preprocessing_pipeline']
+
+        # Transform holdout data using the model's preprocessing pipeline
+        X_train_processed = preprocessing_pipeline.transform(self.X_train_raw)
+        X_test_processed = preprocessing_pipeline.transform(self.X_test_raw)
 
-        # If kind expects predictions
+        # If kind expects predictions
         if kind in ["confusion_matrix"]:
-            preds = model.predict(self.X_test)
+            preds = model.predict(X_test_processed)
         elif kind in ["roc_curve", "calibration_curve"]:
-            preds = model.predict_proba(self.X_test)
+            preds = model.predict_proba(X_test_processed)
 
         graph = None
 
         if kind == "feature_importance":
-            if not hasattr(self, 'feature_names'):
-                self.feature_names = list(self.X_train.columns)
-            graph = plot_feature_importance(model, self.feature_names, **kwargs)
+            # Use feature names from transformed data (accounts for encoding)
+            feature_names = list(X_train_processed.columns) if hasattr(X_train_processed, 'columns') else None
+            graph = plot_feature_importance(model, feature_names, **kwargs)
         elif kind == "confusion_matrix":
             graph = plot_confusion_matrix(self.y_test, preds, self.y_class_mapping, **kwargs)
         elif kind == "roc_curve":
             graph = plot_roc_curve(self.y_test, preds, self.y_class_mapping, **kwargs)
         elif kind == "residuals":
-            graph = plot_residuals(model, self.X_train, self.y_train, self.X_test, self.y_test, **kwargs)
+            graph = plot_residuals(model, X_train_processed, self.y_train, X_test_processed, self.y_test, **kwargs)
         elif kind == "prediction_error":
-            graph = plot_prediction_error(model, self.X_train, self.y_train, self.X_test, self.y_test, **kwargs)
+            graph = plot_prediction_error(model, X_train_processed, self.y_train, X_test_processed, self.y_test, **kwargs)
         elif kind == "calibration_curve":
             graph = plot_calibration_curve(self.y_test, preds, self.y_class_mapping, **kwargs)
         elif 'shap' in kind:
-            graph = plot_shap(model, self.X_test, kind, **kwargs)
+            graph = plot_shap(model, X_test_processed, kind, **kwargs)
         else:
             error_msg = f"Invalid plot type: {kind}. Available plot types: {available_plot_types}"
             self.__logger.error(error_msg)
@@ -1589,17 +1780,13 @@ def _show_tuning_report(tuning_report: Optional[dict] = None):
                 logging_to_file=self.logging_to_file
             ))
 
-        # Create the ModelTuner object If It's not created before, avoid creating it everytime tune_model() function is called
+        # Create the ModelTuner object If It's not created before
         if not hasattr(self, 'model_tuner'):
-            if self.__ML_TASK_TYPE == 'Classification' and self.y.dtype in ['object', 'category']:
-                y_encoded = pd.Series(self.feature_engineer.target_encoder.fit_transform(self.y), name=self.target_col)
-                y_encoded.index = self.y.index
-            else:
-                y_encoded = self.y # No need to encode the target for regression or if the target is already encoded
+            y_encoded = self._encode_target(self.y)
             self.model_tuner = ModelTuner(self.__ML_TASK_TYPE, self.X, y_encoded, self.logging_to_file)
 
-        pipeline = self.feature_engineer.pipeline
-        pipeline = Pipeline(steps=pipeline.steps + [('model', model)])
+        # Build pipeline with proper handling for native categorical models
+        pipeline = self._get_model_pipeline(model, include_model=True)
 
         self.__logger.info(f"[PROCESS] Model Tuning process started with '{tuning_method}' method")
         tuning_method = tuning_method.lower()
diff --git a/tests/test_supervised.py b/tests/test_supervised.py
index 992760a..3fc4601 100644
--- a/tests/test_supervised.py
+++ b/tests/test_supervised.py
@@ -14,19 +14,33 @@ class TestRegression(unittest.TestCase):
     logger = get_logger(__name__, "TEST")
     logger.setLevel("DEBUG")
 
+    @staticmethod
+    def _add_synthetic_categorical_columns(df):
+        """Add synthetic categorical columns to test categorical encoding"""
+        n_rows = len(df)
+        np.random.seed(42)
+    
+        df['category_A'] = np.random.choice(['low', 'medium', 'high'], n_rows)
+        df['category_B'] = np.random.choice(['red', 'green', 'blue', 'yellow'], n_rows)
+        return df
+
     test_config = {
         'Regression': {
-            'data': load_diabetes(as_frame=True)['frame'],
+            'data': _add_synthetic_categorical_columns.__func__(load_diabetes(as_frame=True)['frame'].copy()),
             'target_col': 'target',
             'exp_obj': None
         },
         'BinaryClassification': {
-            'data': load_breast_cancer(as_frame=True)['frame'].assign(target=lambda df: df['target'].map({0: 'No', 1: 'Yes'})),
+            'data': _add_synthetic_categorical_columns.__func__(
+                load_breast_cancer(as_frame=True)['frame'].assign(target=lambda df: df['target'].map({0: 'No', 1: 'Yes'})).copy()
+            ),
             'target_col': 'target',
             'exp_obj': None
         },
         'MulticlassClassification': {
-            'data': load_iris(as_frame=True)['frame'].assign(target=lambda df: df['target'].map({0: 'Iris-Setosa', 1: 'Iris-Versicolor', 2: 'Iris-Virginica'})),
+            'data': _add_synthetic_categorical_columns.__func__(
+                load_iris(as_frame=True)['frame'].assign(target=lambda df: df['target'].map({0: 'Iris-Setosa', 1: 'Iris-Versicolor', 2: 'Iris-Virginica'})).copy()
+            ),
             'target_col': 'target',
             'exp_obj': None
         }
@@ -266,4 +280,48 @@ def test_25_plot_multiclass_classification_shap_summary(self):
 
     def test_26_plot_multiclass_classification_shap_violin(self):
         exp_obj = self.test_config['MulticlassClassification']['exp_obj']
-        exp_obj.plot("RandomForestClassifier", kind="shap_violin")
\ No newline at end of file
+        exp_obj.plot("RandomForestClassifier", kind="shap_violin")
+
+    def test_27_native_categorical_pipeline_consistency(self):
+        """Test that saved pipeline predictions match exp.predict_proba() for native categorical models"""
+        exp_obj = self.test_config['BinaryClassification']['exp_obj']
+        test_data = self.test_config['BinaryClassification'].get('data').drop(columns=['target'])
+        
+        # Get predictions via FlexML (this trains with full data)
+        flexml_probs = exp_obj.predict_proba(test_data, model='LGBMClassifier', full_train=True)
+        
+        # Save pipeline (should use already trained model, no retraining)
+        save_path = "test_native_cat_pipeline.pkl"
+        exp_obj.save_model(model='LGBMClassifier', save_path=save_path, model_only=False, full_train=True)
+        
+        # Load and predict via pipeline
+        with open(save_path, 'rb') as f:
+            loaded_pipeline = pickle.load(f)
+        pipeline_probs = loaded_pipeline.predict_proba(test_data)
+        
+        # Predictions should match
+        np.testing.assert_array_almost_equal(flexml_probs, pipeline_probs, decimal=5,
+            err_msg="Loaded pipeline predictions don't match FlexML predictions")
+        os.remove(save_path)
+
+    def test_28_predict_column_mismatch_error(self):
+        """Test that predict raises proper error for column mismatch"""
+        exp_obj = self.test_config['Regression']['exp_obj']
+        test_data = self.test_config['Regression'].get('data').drop(columns=['target'])
+        
+        # Remove a column to create mismatch
+        bad_data = test_data.drop(columns=[test_data.columns[0]])
+        
+        with self.assertRaises(ValueError) as context:
+            exp_obj.predict(bad_data, full_train=False)
+        
+        self.assertIn("Missing", str(context.exception))
+
+    def test_29_get_model_by_invalid_name(self):
+        """Test get_model_by_name raises error for invalid model name"""
+        exp_obj = self.test_config['Regression']['exp_obj']
+        
+        with self.assertRaises(ValueError) as context:
+            exp_obj.get_model_by_name("NonExistentModel")
+        
+        self.assertIn("not found", str(context.exception))
\ No newline at end of file