Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions trl/experimental/bco/bco_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ class BCOConfig(BaseConfig):

> [!NOTE]
> These parameters have default values different from [`~transformers.TrainingArguments`]:
> - `logging_steps`: Defaults to `10` instead of `500`.
> - `gradient_checkpointing`: Defaults to `True` instead of `False`.
> - `bf16`: Defaults to `True` if `fp16` is not set, instead of `False`.
> - `learning_rate`: Defaults to `5e-7` instead of `5e-5`.
"""

Expand Down
8 changes: 8 additions & 0 deletions trl/experimental/cpo/cpo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

@dataclass
class CPOConfig(BaseConfig):
# docstyle-ignore
r"""
Configuration class for the [`experimental.cpo.CPOTrainer`].

Expand Down Expand Up @@ -81,6 +82,13 @@ class CPOConfig(BaseConfig):
string.
dataset_num_proc (`int`, *optional*):
Number of processes to use for processing the dataset.

> [!NOTE]
> These parameters have default values different from [`~transformers.TrainingArguments`]:
> - `logging_steps`: Defaults to `10` instead of `500`.
> - `gradient_checkpointing`: Defaults to `True` instead of `False`.
> - `bf16`: Defaults to `True` if `fp16` is not set, instead of `False`.
> - `learning_rate`: Defaults to `1e-6` instead of `5e-5`.
"""

_VALID_DICT_FIELDS = TrainingArguments._VALID_DICT_FIELDS + ["model_init_kwargs"]
Expand Down
8 changes: 8 additions & 0 deletions trl/experimental/kto/kto_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

@dataclass
class KTOConfig(BaseConfig):
# docstyle-ignore
r"""
Configuration class for the [`experimental.kto.KTOTrainer`].

Expand Down Expand Up @@ -64,6 +65,13 @@ class KTOConfig(BaseConfig):
Number of processes to use for processing the dataset.
disable_dropout (`bool`, *optional*, defaults to `True`):
Whether to disable dropout in the model and reference model.

> [!NOTE]
> These parameters have default values different from [`~transformers.TrainingArguments`]:
> - `logging_steps`: Defaults to `10` instead of `500`.
> - `gradient_checkpointing`: Defaults to `True` instead of `False`.
> - `bf16`: Defaults to `True` if `fp16` is not set, instead of `False`.
> - `learning_rate`: Defaults to `1e-6` instead of `5e-5`.
"""

_VALID_DICT_FIELDS = TrainingArguments._VALID_DICT_FIELDS + ["model_init_kwargs"]
Expand Down
8 changes: 8 additions & 0 deletions trl/experimental/online_dpo/online_dpo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

@dataclass
class OnlineDPOConfig(BaseConfig):
# docstyle-ignore
r"""
Configuration class for the [`experimental.online_dpo.OnlineDPOTrainer`].

Expand Down Expand Up @@ -149,6 +150,13 @@ class may differ from those in [`~transformers.TrainingArguments`].
model_init_kwargs (`dict[str, Any]`, *optional*):
Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a
string.

> [!NOTE]
> These parameters have default values different from [`~transformers.TrainingArguments`]:
> - `logging_steps`: Defaults to `10` instead of `500`.
> - `gradient_checkpointing`: Defaults to `True` instead of `False`.
> - `bf16`: Defaults to `True` if `fp16` is not set, instead of `False`.
> - `learning_rate`: Defaults to `5e-7` instead of `5e-5`.
"""

# Parameters whose default values are overridden from TrainingArguments
Expand Down
8 changes: 8 additions & 0 deletions trl/experimental/orpo/orpo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

@dataclass
class ORPOConfig(BaseConfig):
# docstyle-ignore
r"""
Configuration class for the [`experimental.orpo.ORPOTrainer`].

Expand Down Expand Up @@ -61,6 +62,13 @@ class ORPOConfig(BaseConfig):
string.
dataset_num_proc (`int`, *optional*):
Number of processes to use for processing the dataset.

> [!NOTE]
> These parameters have default values different from [`~transformers.TrainingArguments`]:
> - `logging_steps`: Defaults to `10` instead of `500`.
> - `gradient_checkpointing`: Defaults to `True` instead of `False`.
> - `bf16`: Defaults to `True` if `fp16` is not set, instead of `False`.
> - `learning_rate`: Defaults to `1e-6` instead of `5e-5`.
"""

_VALID_DICT_FIELDS = TrainingArguments._VALID_DICT_FIELDS + ["model_init_kwargs"]
Expand Down
3 changes: 3 additions & 0 deletions trl/experimental/ppo/ppo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ class PPOConfig(BaseConfig):

> [!NOTE]
> These parameters have default values different from [`~transformers.TrainingArguments`]:
> - `logging_steps`: Defaults to `10` instead of `500`.
> - `gradient_checkpointing`: Defaults to `True` instead of `False`.
> - `bf16`: Defaults to `True` if `fp16` is not set, instead of `False`.
> - `learning_rate`: Defaults to `3e-6` instead of `5e-5`.
"""

Expand Down
8 changes: 8 additions & 0 deletions trl/experimental/prm/prm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

@dataclass
class PRMConfig(BaseConfig):
# docstyle-ignore
r"""
Configuration class for the [`experimental.prm.PRMTrainer`].

Expand All @@ -43,6 +44,13 @@ class PRMConfig(BaseConfig):
Whether to train only on the last step.
dataset_num_proc (`int`, *optional*):
Number of processes to use for processing the dataset.

> [!NOTE]
> These parameters have default values different from [`~transformers.TrainingArguments`]:
> - `logging_steps`: Defaults to `10` instead of `500`.
> - `gradient_checkpointing`: Defaults to `True` instead of `False`.
> - `bf16`: Defaults to `True` if `fp16` is not set, instead of `False`.
> - `learning_rate`: Defaults to `1e-5` instead of `5e-5`.
"""

# Parameters whose default values are overridden from TrainingArguments
Expand Down
Loading