huggingface · albertvillanova · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026
diff --git a/trl/experimental/bco/bco_config.py b/trl/experimental/bco/bco_config.py
@@ -72,6 +72,9 @@ class BCOConfig(BaseConfig):
 
     > [!NOTE]
     > These parameters have default values different from [`~transformers.TrainingArguments`]:
+    > - `logging_steps`: Defaults to `10` instead of `500`.
+    > - `gradient_checkpointing`: Defaults to `True` instead of `False`.
+    > - `bf16`: Defaults to `True` if `fp16` is not set, instead of `False`.
     > - `learning_rate`: Defaults to `5e-7` instead of `5e-5`.
     """
 

diff --git a/trl/experimental/cpo/cpo_config.py b/trl/experimental/cpo/cpo_config.py
@@ -22,6 +22,7 @@
 
 @dataclass
 class CPOConfig(BaseConfig):
+    # docstyle-ignore
     r"""
     Configuration class for the [`experimental.cpo.CPOTrainer`].
 
@@ -81,6 +82,13 @@ class CPOConfig(BaseConfig):
             string.
         dataset_num_proc (`int`, *optional*):
             Number of processes to use for processing the dataset.
+
+    > [!NOTE]
+    > These parameters have default values different from [`~transformers.TrainingArguments`]:
+    > - `logging_steps`: Defaults to `10` instead of `500`.
+    > - `gradient_checkpointing`: Defaults to `True` instead of `False`.
+    > - `bf16`: Defaults to `True` if `fp16` is not set, instead of `False`.
+    > - `learning_rate`: Defaults to `1e-6` instead of `5e-5`.
     """
 
     _VALID_DICT_FIELDS = TrainingArguments._VALID_DICT_FIELDS + ["model_init_kwargs"]

diff --git a/trl/experimental/kto/kto_config.py b/trl/experimental/kto/kto_config.py
@@ -22,6 +22,7 @@
 
 @dataclass
 class KTOConfig(BaseConfig):
+    # docstyle-ignore
     r"""
     Configuration class for the [`experimental.kto.KTOTrainer`].
 
@@ -64,6 +65,13 @@ class KTOConfig(BaseConfig):
             Number of processes to use for processing the dataset.
         disable_dropout (`bool`, *optional*, defaults to `True`):
             Whether to disable dropout in the model and reference model.
+
+    > [!NOTE]
+    > These parameters have default values different from [`~transformers.TrainingArguments`]:
+    > - `logging_steps`: Defaults to `10` instead of `500`.
+    > - `gradient_checkpointing`: Defaults to `True` instead of `False`.
+    > - `bf16`: Defaults to `True` if `fp16` is not set, instead of `False`.
+    > - `learning_rate`: Defaults to `1e-6` instead of `5e-5`.
     """
 
     _VALID_DICT_FIELDS = TrainingArguments._VALID_DICT_FIELDS + ["model_init_kwargs"]

diff --git a/trl/experimental/online_dpo/online_dpo_config.py b/trl/experimental/online_dpo/online_dpo_config.py
@@ -21,6 +21,7 @@
 
 @dataclass
 class OnlineDPOConfig(BaseConfig):
+    # docstyle-ignore
     r"""
     Configuration class for the [`experimental.online_dpo.OnlineDPOTrainer`].
 
@@ -149,6 +150,13 @@ class may differ from those in [`~transformers.TrainingArguments`].
         model_init_kwargs (`dict[str, Any]`, *optional*):
             Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a
             string.
+
+    > [!NOTE]
+    > These parameters have default values different from [`~transformers.TrainingArguments`]:
+    > - `logging_steps`: Defaults to `10` instead of `500`.
+    > - `gradient_checkpointing`: Defaults to `True` instead of `False`.
+    > - `bf16`: Defaults to `True` if `fp16` is not set, instead of `False`.
+    > - `learning_rate`: Defaults to `5e-7` instead of `5e-5`.
     """
 
     # Parameters whose default values are overridden from TrainingArguments

diff --git a/trl/experimental/orpo/orpo_config.py b/trl/experimental/orpo/orpo_config.py
@@ -22,6 +22,7 @@
 
 @dataclass
 class ORPOConfig(BaseConfig):
+    # docstyle-ignore
     r"""
     Configuration class for the [`experimental.orpo.ORPOTrainer`].
 
@@ -61,6 +62,13 @@ class ORPOConfig(BaseConfig):
             string.
         dataset_num_proc (`int`, *optional*):
             Number of processes to use for processing the dataset.
+
+    > [!NOTE]
+    > These parameters have default values different from [`~transformers.TrainingArguments`]:
+    > - `logging_steps`: Defaults to `10` instead of `500`.
+    > - `gradient_checkpointing`: Defaults to `True` instead of `False`.
+    > - `bf16`: Defaults to `True` if `fp16` is not set, instead of `False`.
+    > - `learning_rate`: Defaults to `1e-6` instead of `5e-5`.
     """
 
     _VALID_DICT_FIELDS = TrainingArguments._VALID_DICT_FIELDS + ["model_init_kwargs"]

diff --git a/trl/experimental/ppo/ppo_config.py b/trl/experimental/ppo/ppo_config.py
@@ -114,6 +114,9 @@ class PPOConfig(BaseConfig):
 
     > [!NOTE]
     > These parameters have default values different from [`~transformers.TrainingArguments`]:
+    > - `logging_steps`: Defaults to `10` instead of `500`.
+    > - `gradient_checkpointing`: Defaults to `True` instead of `False`.
+    > - `bf16`: Defaults to `True` if `fp16` is not set, instead of `False`.
     > - `learning_rate`: Defaults to `3e-6` instead of `5e-5`.
     """
 

diff --git a/trl/experimental/prm/prm_config.py b/trl/experimental/prm/prm_config.py
@@ -19,6 +19,7 @@
 
 @dataclass
 class PRMConfig(BaseConfig):
+    # docstyle-ignore
     r"""
     Configuration class for the [`experimental.prm.PRMTrainer`].
 
@@ -43,6 +44,13 @@ class PRMConfig(BaseConfig):
             Whether to train only on the last step.
         dataset_num_proc (`int`, *optional*):
             Number of processes to use for processing the dataset.
+
+    > [!NOTE]
+    > These parameters have default values different from [`~transformers.TrainingArguments`]:
+    > - `logging_steps`: Defaults to `10` instead of `500`.
+    > - `gradient_checkpointing`: Defaults to `True` instead of `False`.
+    > - `bf16`: Defaults to `True` if `fp16` is not set, instead of `False`.
+    > - `learning_rate`: Defaults to `1e-5` instead of `5e-5`.
     """
 
     # Parameters whose default values are overridden from TrainingArguments