From d7a1ce05b20c9f18608306e0d1c67666bbf89746 Mon Sep 17 00:00:00 2001 From: aivanni <4340981+aivanni@users.noreply.github.com> Date: Mon, 27 Apr 2026 11:13:41 +0300 Subject: [PATCH 1/5] Fixes to lerobot finetuning workload (#498) * Fixes to lerobot finetuning workload * pin lerobot default version and extra packages --- workloads/robotics-finetune-lerobot/helm/README.md | 6 +++++- .../helm/mount/entrypoint.sh.tpl | 9 ++++++--- .../helm/overrides/pi05-kettle.yaml | 2 +- .../helm/templates/_helpers.tpl | 4 ++-- .../robotics-finetune-lerobot/helm/values.yaml | 13 +++++++------ 5 files changed, 21 insertions(+), 13 deletions(-) diff --git a/workloads/robotics-finetune-lerobot/helm/README.md b/workloads/robotics-finetune-lerobot/helm/README.md index 80b256c..3f8a3b0 100644 --- a/workloads/robotics-finetune-lerobot/helm/README.md +++ b/workloads/robotics-finetune-lerobot/helm/README.md @@ -11,6 +11,10 @@ For more information, see: - [Imitation Learning Tutorial](https://huggingface.co/docs/lerobot/il_robots) - [LeRobot GitHub Repository](https://github.com/huggingface/lerobot) +## Compatibility + +The workload was tested on MI300 AMD GPU using `rocm/pytorch:rocm7.1.1_ubuntu24.04_py3.12_pytorch_release_2.10.0` image. + ## Typical Workflow The complete robot learning workflow consists of four stages: @@ -29,7 +33,7 @@ This Helm chart handles the finetuning stage, taking pre-collected demonstration The training job is configured through the `values.yaml` file. Refer to the `values.yaml` file for the complete list of configuration options. -Some of the important points are related to setting up Weight&Biases (for tracking the progress) and Huggingface tokens (for uploading finetuned checkpoints and potentially downloading pretrained models from gated repos). To set them properly one has to give working values to the `.envVars.WANDB_API_KEY` and `envVars.HF_TOKEN`. By default `WANDB_API_KEY` env variable is set from the `wandb-token` secret with `wandb-token` key and `HF_TOKEN` is is set from the `hf-token` secret with `hf-token` key: +Some of the important points are related to setting up Weight&Biases (for tracking the progress) and Huggingface tokens (for uploading finetuned checkpoints and potentially downloading pretrained models from gated repos). To set them properly one has to give working values to the `.envVars.WANDB_API_KEY` and `envVars.HF_TOKEN`. By default `WANDB_API_KEY` env variable is set from the `wandb-token` secret with `wandb-token` key and `HF_TOKEN` is set from the `hf-token` secret with `hf-token` key: ``` envVars: diff --git a/workloads/robotics-finetune-lerobot/helm/mount/entrypoint.sh.tpl b/workloads/robotics-finetune-lerobot/helm/mount/entrypoint.sh.tpl index a3dd049..ad2dc8c 100644 --- a/workloads/robotics-finetune-lerobot/helm/mount/entrypoint.sh.tpl +++ b/workloads/robotics-finetune-lerobot/helm/mount/entrypoint.sh.tpl @@ -2,12 +2,15 @@ set -euo pipefail # Install ffmpeg -apt update && apt install ffmpeg=7:6.1.1-3ubuntu5 -y +apt-get update && apt-get install -y --no-install-recommends ffmpeg=7:6.1.1-3ubuntu5 && apt-get clean && rm -rf /var/lib/apt/lists/* # Install lerobot cd /workload -git clone https://github.com/huggingface/lerobot.git +git clone --depth 1 https://github.com/huggingface/lerobot.git cd lerobot +{{- if .Values.setup.lerobotGitRef }} +git checkout {{ .Values.setup.lerobotGitRef }} +{{- end }} {{- if .Values.setup.lerobotExtraPackages }} pip install -e ".[{{ .Values.setup.lerobotExtraPackages }}]" {{- else }} @@ -23,7 +26,7 @@ lerobot-train \ {{- end }} --output_dir=/workload/outputs \ --job_name={{ .Values.jobName }} \ - --policy.repo_id={{ .Values.hfFinetunedModelId }} \ + --policy.repo_id={{ required "hfFinetunedModelId is required!" .Values.hfFinetunedModelId | quote }} \ --steps={{ .Values.training.steps }} \ --save_freq={{ .Values.training.save_freq }} \ --eval_freq={{ .Values.training.eval_freq }} \ diff --git a/workloads/robotics-finetune-lerobot/helm/overrides/pi05-kettle.yaml b/workloads/robotics-finetune-lerobot/helm/overrides/pi05-kettle.yaml index 73b56bf..ea42cfb 100644 --- a/workloads/robotics-finetune-lerobot/helm/overrides/pi05-kettle.yaml +++ b/workloads/robotics-finetune-lerobot/helm/overrides/pi05-kettle.yaml @@ -5,7 +5,7 @@ hfDatasetId: aivanni/SO-101-KATTLE-v1 setup: # These extra packages separated with commas will be installed during lerobot library installation. - lerobotExtraPackages: "pi" + lerobotExtraPackages: "pi,dataset,training" # Main policy settings policy: diff --git a/workloads/robotics-finetune-lerobot/helm/templates/_helpers.tpl b/workloads/robotics-finetune-lerobot/helm/templates/_helpers.tpl index 362e13e..14d717c 100644 --- a/workloads/robotics-finetune-lerobot/helm/templates/_helpers.tpl +++ b/workloads/robotics-finetune-lerobot/helm/templates/_helpers.tpl @@ -39,9 +39,9 @@ storageClassName: {{ .Values.storage.ephemeral.storageClassName }} name: ephemeral-storage {{- else }} -- emptyDir: {} +- emptyDir: + sizeLimit: {{ .Values.storage.ephemeral.quantity }} name: ephemeral-storage - sizeLimit: {{ .Values.storage.ephemeral.quantity }} {{- end }} - emptyDir: medium: Memory diff --git a/workloads/robotics-finetune-lerobot/helm/values.yaml b/workloads/robotics-finetune-lerobot/helm/values.yaml index df76491..b2fd491 100644 --- a/workloads/robotics-finetune-lerobot/helm/values.yaml +++ b/workloads/robotics-finetune-lerobot/helm/values.yaml @@ -10,7 +10,8 @@ hfFinetunedModelId: "" setup: # These extra packages e.g. "pi,peft" separated with commas will be installed during lerobot library installation. - lerobotExtraPackages: null + lerobotExtraPackages: "dataset,training" + lerobotGitRef: 05a5223885bcd36064fc1a967620329696595a76 # Main policy settings policy: @@ -20,9 +21,9 @@ policy: # Main training script args training: - steps: 300 # How many steps to train for - save_freq: 300 # Save a checkpoint every save_freq steps - eval_freq: 100 # Evaluate every eval_freq steps + steps: 600 # How many steps to train for + save_freq: 600 # Save a checkpoint every save_freq steps + eval_freq: 200 # Evaluate every eval_freq steps batch_size: 8 # Batch size per step # Any additional args to pass to the training script @@ -54,9 +55,9 @@ storage: dshm: sizeLimit: 32Gi -# Setting job labels +# Setting job labels (dictionary or null) metadata: - labels: {} + labels: null # kaiwo settings (if enabled, use kaiwo CRDs to have kaiwo operator manage the workload) kaiwo: From 5ad34d3f3957c124a90fabe3a622a6939c39bbdc Mon Sep 17 00:00:00 2001 From: Robert Talling Date: Wed, 29 Apr 2026 11:37:14 +0300 Subject: [PATCH 2/5] Add gemma-3-27b-it finetuning support (#497) * gemma-3-27b-it-override * Remove patch, copy image processor, update version * Restore values --- .../models/google-gemma-3-27b-it.yaml | 91 +++++++++++++++++++ .../helm/templates/_helpers.tpl | 9 ++ 2 files changed, 100 insertions(+) create mode 100644 workloads/llm-finetune-silogen-engine/helm/overrides/models/google-gemma-3-27b-it.yaml diff --git a/workloads/llm-finetune-silogen-engine/helm/overrides/models/google-gemma-3-27b-it.yaml b/workloads/llm-finetune-silogen-engine/helm/overrides/models/google-gemma-3-27b-it.yaml new file mode 100644 index 0000000..7837d3e --- /dev/null +++ b/workloads/llm-finetune-silogen-engine/helm/overrides/models/google-gemma-3-27b-it.yaml @@ -0,0 +1,91 @@ +metadata: + compatibleAccelerators: + # MI300X: + - 74a1 + - 74a9 + - 74b5 + - 74bd # MI325X: + - 74a5 + - 74b9 # MI350X: + - 75a0 + - 75b0 # MI355X: + - 75a3 + - 75b3 + +# Canonical model name: +model: "google/gemma-3-27b-it" + +# Resources: +downloadsReservedSize: 80Gi +checkpointsReservedSize: 160Gi +finetuningGpus: 1 +memoryPerGpu: 192 +cpuPerGpu: 8 + +# Runtime configuration: +distributedType: "auto-deepspeed-stage1" + +### Finetuning config section ### +finetuning_config: + method: sft + data_conf: + training_data: + type: CONCATENATION + validation_data: + type: AUTO_SPLIT + ratio: 0.1 + chat_template_name: "keep-original" + missing_pad_token_strategy: "bos-repurpose" + training_args: + learning_rate: 0.000005 + max_grad_norm: 1.0 + weight_decay: 0.000001 + optim: "adamw_torch" + num_train_epochs: 1 + lr_scheduler_type: cosine + warmup_ratio: 0.01 + logging_strategy: steps + logging_steps: 0.01 + save_strategy: "no" + seed: 42 + bf16: true + report_to: + - none + push_to_hub: false + gradient_checkpointing: true + gradient_checkpointing_kwargs: + use_reentrant: false + eval_steps: 0.2 + eval_strategy: "steps" + metric_for_best_model: "loss" + greater_is_better: false + load_best_model_at_end: false + batchsize_conf: + max_per_device_train_batch_size: 1 + peft_conf: + peft_type: "LORA" + task_type: "CAUSAL_LM" + peft_kwargs: + r: 64 + lora_alpha: 16.0 + lora_dropout: 0.05 + target_modules: + - q_proj + - k_proj + - v_proj + - o_proj + - up_proj + - down_proj + - gate_proj + run_conf: + model_args: + torch_dtype: bfloat16 + attn_implementation: "flash_attention_2" + resume_from_checkpoint: auto + sft_args: + max_seq_length: 8192 + +basemodel: hf://google/gemma-3-27b-it +aimManifest: + modelId: "google/gemma-3-27b-it" + aimId: "google/gemma-3-27b-it" diff --git a/workloads/llm-finetune-silogen-engine/helm/templates/_helpers.tpl b/workloads/llm-finetune-silogen-engine/helm/templates/_helpers.tpl index 8f84e56..1d18aae 100644 --- a/workloads/llm-finetune-silogen-engine/helm/templates/_helpers.tpl +++ b/workloads/llm-finetune-silogen-engine/helm/templates/_helpers.tpl @@ -119,6 +119,15 @@ merge_adapter $merge_base ./checkpoints/checkpoint-final-adapter ./checkpoints/c echo 'Copying AIMModel manifest to checkpoint directory...' cp /configs/aim-model-manifest.yaml /workdir/checkpoints/aim-model-manifest.yaml {{- end }} +# Copy preprocessor config into final checkpoint if present in downloaded base model. +# This keeps deployment artifacts self-contained without a separate processor job. +if [ -f /local_resources/basemodel/preprocessor_config.json ]; then + mkdir -p /workdir/checkpoints/checkpoint-final + cp /local_resources/basemodel/preprocessor_config.json /workdir/checkpoints/checkpoint-final/preprocessor_config.json + echo 'Copied preprocessor_config.json to checkpoint-final' +else + echo 'No preprocessor_config.json found in basemodel, skipping copy' +fi {{- if not .Values.debug.skip_checkpoint_upload }} # Once more to ensure everything gets uploaded echo 'Training done, syncing once more...' From b09440770ab179c27e15fb4681d181b7a8308afb Mon Sep 17 00:00:00 2001 From: Aku Rouhe Date: Wed, 29 Apr 2026 11:51:28 +0300 Subject: [PATCH 3/5] Mixtral 1GPU, Llama 3.1 8B It Radeon, Speed and max batch test, image v0.7.2 (#496) * Silogen engine 0.7.2 Attempts to reduce gpu count to 1 Working 1 GPU recipe for Mixtral Support longer sequences Update to engine v0.7.1 Add speed and max batch test override Need 4096 max len Random long sequences of data Update engine S3 env just for the modelSource Bring in the additional recipes Update config docs Gemma 3 in separate PR * Final ROCm 7.2 image * Fine-tuning instead of finetuning --- .../overrides/create-long-random-data.yaml | 40 +++ .../helm/config_doc_dpo.md | 290 ++++++++-------- .../helm/config_doc_sft.md | 293 ++++++++-------- .../helm/overrides/data/long-random-data.yaml | 6 + ...ta-llama_llama-3.1-8b-instruct-radeon.yaml | 91 +++++ .../mistralai_mixtral-8x7b-instruct-v0.1.yaml | 28 +- .../utilities/speed-and-max-batch-test.yaml | 6 + .../helm/templates/configmap.yaml | 29 +- .../helm/values.schema.json | 314 ++++++++++++------ .../helm/values.yaml | 2 +- 10 files changed, 680 insertions(+), 419 deletions(-) create mode 100644 workloads/download-data-to-bucket/helm/overrides/create-long-random-data.yaml create mode 100644 workloads/llm-finetune-silogen-engine/helm/overrides/data/long-random-data.yaml create mode 100644 workloads/llm-finetune-silogen-engine/helm/overrides/models/meta-llama_llama-3.1-8b-instruct-radeon.yaml create mode 100644 workloads/llm-finetune-silogen-engine/helm/overrides/utilities/speed-and-max-batch-test.yaml diff --git a/workloads/download-data-to-bucket/helm/overrides/create-long-random-data.yaml b/workloads/download-data-to-bucket/helm/overrides/create-long-random-data.yaml new file mode 100644 index 0000000..cafeafa --- /dev/null +++ b/workloads/download-data-to-bucket/helm/overrides/create-long-random-data.yaml @@ -0,0 +1,40 @@ + +# Data download and preprocess script: +dataScript: | + #!/usr/bin/env python3 + """Custom preprocessing script for generating random long sequences for speed tests.""" + import random + import string + import os + import datasets + + def generate_random_string(length): + return ''.join(random.choices(string.ascii_letters + string.digits + " ", k=length)) + + rows = [ + { + "messages": [ + {"role": "system", "content": "This is random content for speed testing and max sequence length (with truncation) validation."}, + {"role": "user", "content": generate_random_string(8192)}, + {"role": "assistant", "content": generate_random_string(8192)}, + ], + "data_source": "random_generated", + "extra_info": {"split": "test", "index": i, "skip_reason": ""}, + } for i in range(2048) + ] + dataset = datasets.Dataset.from_list(rows) + dataset.to_json("/downloads/datasets/long-random-data.jsonl") # Need to save any data files in this specific directory to be uploaded. + +# Where the resources should be stored: +bucketDataDir: default-bucket/datasets/ +bucketStorageHost: http://minio.minio-tenant-default.svc.cluster.local:80 + +# Bucket credentials from a secret: +bucketCredentialsSecret: + name: minio-credentials + accessKeyKey: minio-access-key + secretKeyKey: minio-secret-key + +# Storage configuration: +storageClass: mlstorage +storageQuantity: "128Mi" diff --git a/workloads/llm-finetune-silogen-engine/helm/config_doc_dpo.md b/workloads/llm-finetune-silogen-engine/helm/config_doc_dpo.md index 0c8cad7..e630367 100644 --- a/workloads/llm-finetune-silogen-engine/helm/config_doc_dpo.md +++ b/workloads/llm-finetune-silogen-engine/helm/config_doc_dpo.md @@ -9,15 +9,15 @@ See the various sub-configs for their options. Additional properties are not all | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `data_conf` | `object` | ✅ | [ChatTrainValidConfig](#chattrainvalidconfig) | | The data input config | -| `training_args` | `object` | ✅ | [SilogenDPOConfig](#silogendpoconfig) | | TRL `DPOTrainerArguments` with some restrictions | -| `batchsize_conf` | `object` | ✅ | [BatchsizeConfig](#batchsizeconfig) | | Batch size configuration | -| `peft_conf` | `object` | ✅ | [GenericPeftConfig](#genericpeftconfig) and/or [NoPeftConfig](#nopeftconfig) and/or [PretrainedPeftConfig](#pretrainedpeftconfig) | | Adapter configuration | -| `run_conf` | `object` | ✅ | [RunConfig](#runconfig) | | Model related configuration | -| `method` | `const` | | `dpo` | `"dpo"` | | -| `overrides` | `object` | | [Overrides](#overrides) | `{"lr_multiplier": 1.0, "lr_batch_size_scaling": "none"}` | Override options to simplify the config interface | -| `tracking` | `object` or `null` | | [FinetuningTrackingConfig](#finetuningtrackingconfig) | `null` | MLFlow tracking configuration | -| `quant_conf` | `object` | | [BnBQuantizationConfig](#bnbquantizationconfig) and/or [NoQuantizationConfig](#noquantizationconfig) | `{"quantization_type": "no-quantization"}` | Quantization configuration | +| data_conf | `object` | ✅ | [ChatTrainValidConfig](#chattrainvalidconfig) | | The data input config | +| training_args | `object` | ✅ | [SilogenDPOConfig](#silogendpoconfig) | | TRL DPOTrainerArguments with some restrictions | +| batchsize_conf | `object` | ✅ | [BatchsizeConfig](#batchsizeconfig) | | Batch size configuration | +| peft_conf | `object` | ✅ | [GenericPeftConfig](#genericpeftconfig) and/or [NoPeftConfig](#nopeftconfig) and/or [PretrainedPeftConfig](#pretrainedpeftconfig) | | Adapter configuration | +| run_conf | `object` | ✅ | [RunConfig](#runconfig) | | Model related configuration | +| method | `const` | | `dpo` | `"dpo"` | | +| overrides | `object` | | [Overrides](#overrides) | `{"lr_multiplier": 1.0, "lr_batch_size_scaling": "none"}` | Override options to simplify the config interface | +| tracking | `object` or `null` | | [FinetuningTrackingConfig](#finetuningtrackingconfig) | `null` | MLFlow tracking configuration | +| quant_conf | `object` | | [BnBQuantizationConfig](#bnbquantizationconfig) and/or [NoQuantizationConfig](#noquantizationconfig) | `{"quantization_type": "no-quantization"}` | Quantization configuration | --- @@ -32,10 +32,10 @@ Automatic validation split from the training data | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `type` | `const` | ✅ | `AUTO_SPLIT` | | | -| `data_type` | `string` | | `string` | `"ChatConversation"` | Generally, the data_type is automatically set based on the experiment config method. | -| `ratio` | `number` | | `number` | `0.2` | Ratio of the training data to use for validation | -| `seed` | `integer` | | `integer` | `1289525893` | Seed for the random number generator for splitting | +| type | `const` | ✅ | `AUTO_SPLIT` | | | +| data_type | `string` | | string | `"ChatConversation"` | Generally, the data_type is automatically set based on the experiment config method. | +| ratio | `number` | | number | `0.2` | Ratio of the training data to use for validation | +| seed | `integer` | | integer | `1289525893` | Seed for the random number generator for splitting | ## BatchsizeConfig @@ -51,8 +51,8 @@ This mostly limited by the memory capacity of the device. | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| total_train_batch_size | `integer` | ✅ | `integer` | | The total batch size for the training run | -| max_per_device_train_batch_size | `integer` | ✅ | `integer` | | The maximum training batch size per device | +| total_train_batch_size | `integer` | ✅ | integer | | The total batch size for the training run | +| max_per_device_train_batch_size | `integer` | ✅ | integer | | The maximum training batch size per device | | per_device_eval_batch_size | `integer` or `null` | | integer | `null` | The maximum eval batch size per device, if not given, will use same as training batch size | ## BnBQuantizationConfig @@ -60,23 +60,23 @@ This mostly limited by the memory capacity of the device. Bits and Bytes configuration The options are from the BitsAndBytes config, -see: +see: https://huggingface.co/docs/transformers/en/main_classes/quantization#transformers.BitsAndBytesConfig #### Type: `object` | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `quantization_type` | `const` | | `bits-and-bytes` | `"bits-and-bytes"` | | -| `load_in_8bit` | `boolean` | | `boolean` | `false` | | -| `load_in_4bit` | `boolean` | | `boolean` | `false` | | -| `llm_int8_threshold` | `number` | | `number` | `6.0` | | -| `llm_int8_skip_modules` | `array` or `null` | | `string` | `null` | | -| `llm_int8_enable_fp32_cpu_offload` | `boolean` | | `boolean` | `false` | | -| `llm_int8_has_fp16_weight` | `boolean` | | `boolean` | `false` | | -| `bnb_4bit_compute_dtype` | `string` or `null` | | `string` | `null` | | -| `bnb_4bit_quant_type` | `const` | | `fp4` and/or `nf4` | `"fp4"` | | -| `bnb_4bit_use_double_quant` | `boolean` | | `boolean` | `false` | | -| `bnb_4bit_quant_storage` | `string` or `null` | | `string` | `null` | | +| quantization_type | `const` | | `bits-and-bytes` | `"bits-and-bytes"` | | +| load_in_8bit | `boolean` | | boolean | `false` | | +| load_in_4bit | `boolean` | | boolean | `false` | | +| llm_int8_threshold | `number` | | number | `6.0` | | +| llm_int8_skip_modules | `array` or `null` | | string | `null` | | +| llm_int8_enable_fp32_cpu_offload | `boolean` | | boolean | `false` | | +| llm_int8_has_fp16_weight | `boolean` | | boolean | `false` | | +| bnb_4bit_compute_dtype | `string` or `null` | | string | `null` | | +| bnb_4bit_quant_type | `const` | | `fp4` and/or `nf4` | `"fp4"` | | +| bnb_4bit_use_double_quant | `boolean` | | boolean | `false` | | +| bnb_4bit_quant_storage | `string` or `null` | | string | `null` | | ## ChatTemplateName @@ -99,11 +99,11 @@ Additionally includes chat template and padding configurations, as those are par | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `training_data` | `object` | ✅ | [ConcatenationDataInput](#concatenationdatainput) and/or [WeightedMixDataInput](#weightedmixdatainput) | | | -| `validation_data` | `object` | ✅ | [AutoSplitDataInput](#autosplitdatainput) and/or [ConcatenationDataInput](#concatenationdatainput) and/or [NoneDataInput](#nonedatainput) | | | +| training_data | `object` | ✅ | [ConcatenationDataInput](#concatenationdatainput) and/or [WeightedMixDataInput](#weightedmixdatainput) | | | +| validation_data | `object` | ✅ | [AutoSplitDataInput](#autosplitdatainput) and/or [ConcatenationDataInput](#concatenationdatainput) and/or [NoneDataInput](#nonedatainput) | | | | chat_template_name | `string` | | [ChatTemplateName](#chattemplatename) | `"mistral-with-system"` | | -| `padding_side` | `string` | | string | `"right"` | Padding side, `right` is usually right. | -| `missing_pad_token_strategy` | `string` | | [MissingPadTokenStrategy](#missingpadtokenstrategy) | `"bos-repurpose"` | See the `MissingPadTokenStrategys` for descriptions of the options | +| padding_side | `string` | | string | `"right"` | Padding side, right is usually right. | +| missing_pad_token_strategy | `string` | | [MissingPadTokenStrategy](#missingpadtokenstrategy) | `"bos-repurpose"` | See the MissingPadTokenStrategys for descriptions of the options | ## ConcatenationDataInput @@ -111,28 +111,25 @@ A simple list of datasets These are simply concatenated, the same as sampling all with equal weight. -The datasets themselves need to be in the fine-tuning supported JSONL formats. +The datasets themselves need to be in the finetuning supported JSONL formats. For SFT this means lines: -```json {"messages": [{"content": "string", "role": "string"}]} -``` For DPO this means lines of: -```json { "prompt_messages": [{"content": "string", "role": "string"}], "chosen_messages": [{"content": "string", "role": "string"}], "rejected_messages": [{"content": "string", "role": "string"}] } -``` + #### Type: `object` | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `type` | `const` | ✅ | `CONCATENATION` | | | -| `datasets` | `array` | ✅ | [DatasetDefinition](#datasetdefinition) | | | -| `data_type` | `string` | | `string` | `"ChatConversation"` | Generally, the data_type is automatically set based on the experiment config method. | +| type | `const` | ✅ | `CONCATENATION` | | | +| datasets | `array` | ✅ | [DatasetDefinition](#datasetdefinition) | | | +| data_type | `string` | | string | `"ChatConversation"` | Generally, the data_type is automatically set based on the experiment config method. | ## DatasetDefinition @@ -142,7 +139,7 @@ Define how to load a dataset | Property | Type | Required | Possible values | Description | | -------- | ---- | -------- | --------------- | ----------- | -| `path` | `string` | ✅ | `string` | Local path to a JSONL file in the fine-tuning data format | +| path | `string` | ✅ | string | Local path to a JSONL file in the finetuning data format | ## FinetuningTrackingConfig @@ -152,16 +149,16 @@ Settings that define how run details are logged | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `mlflow_server_uri` | `string` | ✅ | `string` | | MLflow server URI. Can be local path. | -| `experiment_name` | `string` | ✅ | `string` | | Experiment name that is used for MLflow tracking. | -| `hf_mlflow_log_artifacts` | `string` | | `string` | `"False"` | Whether to store model artifacts in MLflow. | +| mlflow_server_uri | `string` | ✅ | string | | MLflow server URI. Can be local path. | +| experiment_name | `string` | ✅ | string | | Experiment name that is used for MLFlow tracking. | +| hf_mlflow_log_artifacts | `string` | | string | `"False"` | Whether to store model artifacts in MLFlow. | ## GenericPeftConfig Config for any new initialized PEFT Adapter -See for the possible kwargs -and for the types. +See https://huggingface.co/docs/peft/tutorial/peft_model_config for the possible kwargs +and https://github.com/huggingface/peft/blob/v0.7.1/src/peft/utils/peft_types.py for the types. Example: @@ -175,31 +172,31 @@ Example: | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `peft_type` | `string` | ✅ | [PeftType](#pefttype) | | | -| `task_type` | `string` | | [TaskType](#tasktype) | `"CAUSAL_LM"` | | -| `peft_kwargs` | `object` | | `object` | | | +| peft_type | `string` | ✅ | [PeftType](#pefttype) | | | +| task_type | `string` | | [TaskType](#tasktype) | `"CAUSAL_LM"` | | +| peft_kwargs | `object` | | object | | | ## MissingPadTokenStrategy Specifies the available missing pad token strategies. -We've shown in a small set of experiments that repurposing `EOS` can start to hurt performance +We've shown in a small set of experiments that repurposing EOS can start to hurt performance while the other options seem to work equally well. -Repurposing `EOS` is the default in many online sources, but it is actually a bad idea if we want to predict -`EOS`, as all the `pad_token_ids` get ignored in loss computation, and thus the model does not learn to predict +Repurposing EOS is the default in many online sources, but it is actually a bad idea if we want to predict +EOS, as all the pad_token_ids get ignored in loss computation, and thus the model does not learn to predict the end of the text. However, for models that have additional tokens for end of message, end of turn, etc. this is not so dangerous. -Repurposing `BOS` is similar to repurposing `EOS`, but since we do not need to predict `BOS`, this may be more sensible. +Repurposing BOS is similar to repurposing EOS, but since we do not need to predict BOS, this may be more sensible. -Repurposing `UNK` can work with tokenizers that never produce `UNK`s in normal data (e.g. Mistral tokenizers should have +Repurposing UNK can work with tokenizers that never produce UNKs in normal data (e.g. Mistral tokenizers should have a byte fall-back so that everything can be tokenized). -`UNK_CONVERT_TO_EOS` uses a hack where the `unk_token_id` is initially used for padding, but in the collation phase the -input-side `UNK`s (padding) get set to `EOS`, so that the input-side padding looks like `EOS`. On the output-side, the -`UNK`s (padding) still get ignored. NOTE: This will leave the tokenizer's `pad_token_id` set to the `unk_token_id`; so -any subsequent use of the model where padding is involved should somehow explicitly set the `pad_token_id` again. +UNK_CONVERT_TO_EOS uses a hack where the unk_token_id is initially used for padding, but in the collation phase the +input-side UNKs (padding) gets set to EOS, so that the input-side padding looks like EOS. On the output-side, the +UNKs (padding) still gets ignored. NOTE: This will leave the tokenizer's pad_token_id set to the unk_token_id; so +any subsequent use of the model where padding is involved should somehow explicitly set the pad_token_id again. #### Type: `string` @@ -210,53 +207,58 @@ any subsequent use of the model where padding is involved should somehow explici These are passed to AutoModelForCausalLM.from_pretrained See parameter docstrings and help at: - +https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel.from_pretrained See below in "Parameters for big model inference" too, it affects training too. Also note that this link takes you to the transformers main branch version - be sure to compare with the installed version of transformers (that keeps changing over time, and it is difficult to keep this docstring up to date, so we wanted to link to the latest here). Some important parameters to consider are: -- `device_map` : +- device_map : A map that specifies where each submodule should go. It doesn’t need to be refined to each parameter/buffer name, once a given module name is inside, every submodule of it will be sent to the same device. If we only pass - the device (e.g., `"cpu"`, `"cuda:1"`, `"mps"`, or a GPU ordinal rank like 1) on which the model will be allocated, - the device map will map the entire model to this device. Passing `device_map=0` means put the whole model on GPU + the device (e.g., "cpu", "cuda:1", "mps", or a GPU ordinal rank like 1) on which the model will be allocated, + the device map will map the entire model to this device. Passing device_map = 0 means put the whole model on GPU 0. -- `attn_implementation` : - The attention implementation to use in the model (if relevant). Can be any of `"eager"` (manual implementation of - the attention), `"sdpa"` (using `F.scaled_dot_product_attention`), or `"flash_attention_2"` (using - `Dao-AILab/flash-attention`). By default, if available, `SDPA` will be used for `torch>=2.1.1`. The default is +- attn_implementation : + The attention implementation to use in the model (if relevant). Can be any of "eager" (manual implementation of + the attention), "sdpa" (using F.scaled_dot_product_attention), or "flash_attention_2" (using + Dao-AILab/flash-attention). By default, if available, SDPA will be used for torch>=2.1.1. The default is otherwise the manual "eager" implementation. NOTE: - This does not include `quantization_config`. Quantization config is specified separately. + This does not include quantization_config. Quantization config is specified separately. #### Type: `object` | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `torch_dtype` | `const` | | `auto` | `"auto"` | | -| `device_map` | `object` or `string` or `null` | | object and/or string | `null` | Custom device map so that you can manually override the choices that Hugging Face would make. This can also be a string to specify `"auto"`, `"balanced_low_0"`, or `"sequential"`. | -| `max_memory` | `object` or `null` | | `object` | `null` | | -| `low_cpu_mem_usage` | `boolean` | | `boolean` | `false` | | -| `attn_implementation` | `string` or `null` | | `string` | `null` | Note: this can be set to `"sdpa"`, `"flash_attention_2"`, `"eager"`. | -| `offload_folder` | `string` or `null` | | `string` | `null` | | -| `offload_state_dict` | `boolean` or `null` | | `boolean` | `null` | Default is `True` if offloading (otherwise no effect) | -| `offload_buffers` | `boolean` or `null` | | `boolean` | `null` | | -| `use_cache` | `boolean` | | `boolean` | `true` | Saves generated hidden states to speed up generation, see: This is mutually exclusive with gradient_checkpointing. | -| `cache_dir` | `string` or `null` | | `string` | `null` | | -| `force_download` | `boolean` | | `boolean` | `false` | | -| `local_files_only` | `boolean` | | `boolean` | `false` | | -| `proxies` | `object` or `null` | | `object` | `null` | | -| `resume_download` | `boolean` | | `boolean` | `false` | | -| `revision` | `string` | | `string` | `"main"` | | -| `code_revision` | `string` | | `string` | `"main"` | | -| `subfolder` | `string` or `null` | | `string` | `null` | | -| `token` | `string` or `null` | | `string` | `null` | | -| `use_safetensors` | `boolean` or `null` | | `boolean` | `null` | | -| `variant` | `string` or `null` | | `string` | `null` | | -| `trust_remote_code` | `boolean` | | `boolean` | `false` | Warning: if set to `True`, allows execution of downloaded remote code. | +| silogen_extra_args | `object` | | object | | Don't specify directly - this gathers additional args passed to the model | +| dtype | `const` or `string` | | `auto` and/or string | `"auto"` | | +| pretrained_model_name_or_path | `string` or `null` | | Format: [`path`](https://json-schema.org/understanding-json-schema/reference/string#built-in-formats) and/or string | `null` | Can be either:
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
- A path to a *directory* containing model weights saved using `~PreTrainedModel.save_pretrained`.
- A path or url to a *tensorflow index checkpoint file*.
- A path or url to a model folder containing a *flax checkpoint file* in *.msgpack* format.
- `None` if you are both providing the configuration and state dictionary. | +| config | `string` or `null` | | Format: [`path`](https://json-schema.org/understanding-json-schema/reference/string#built-in-formats) and/or string | `null` | Configuration for the model to use instead of an automatically loaded configuration.
Can be either an instance of a class derived from `PretrainedConfig`, or a string/path valid as input to `PretrainedConfig.from_pretrained`. | +| cache_dir | `string` or `null` | | Format: [`path`](https://json-schema.org/understanding-json-schema/reference/string#built-in-formats) and/or string | `null` | Path to a directory in which a downloaded pretrained model configuration should be cached. | +| from_tf | `boolean` | | boolean | `false` | Load the model weights from a TensorFlow checkpoint save file. | +| from_flax | `boolean` | | boolean | `false` | Load the model weights from a Flax checkpoint save file. | +| ignore_mismatched_sizes | `boolean` | | boolean | `false` | Whether or not to raise an error if some of the weights from the checkpoint do not have the same size as the weights of the model. | +| force_download | `boolean` | | boolean | `false` | Whether or not to force the (re-)download of the model weights and configuration files. | +| proxies | `object` or `null` | | object | `null` | A dictionary of proxy servers to use by protocol or endpoint. | +| output_loading_info | `boolean` | | boolean | `false` | Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. | +| local_files_only | `boolean` | | boolean | `false` | Whether or not to only look at local files (i.e., do not try to download the model). | +| token | `boolean` or `string` or `null` | | boolean and/or string | `null` | The token to use as HTTP bearer authorization for remote files. | +| revision | `string` | | string | `"main"` | The specific model version to use. It can be a branch name, a tag name, or a commit id. | +| attn_implementation | `string` or `null` | | string | `null` | The attention implementation to use in the model. Can be any of 'eager', 'sdpa', 'flash_attention_2', or 'flash_attention_3'.
Accepts HF kernel references in the form: /[@][:] | +| device_map | `integer` or `object` or `string` or `null` | | integer and/or object and/or string | `null` | A map that specifies where each submodule should go. | +| max_memory | `object` or `null` | | object | `null` | A dictionary device identifier to maximum memory if using `device_map`. | +| tp_plan | `string` or `null` | | string | `null` | A torch tensor parallel plan. Currently only accepts 'auto'. | +| tp_size | `string` or `null` | | string | `null` | A torch tensor parallel degree. If not provided would default to world size. | +| offload_folder | `string` or `null` | | Format: [`path`](https://json-schema.org/understanding-json-schema/reference/string#built-in-formats) and/or string | `null` | If the `device_map` contains any value 'disk', the folder where we will offload weights. | +| offload_buffers | `boolean` | | boolean | `false` | Whether or not to offload the buffers with the model parameters. | +| subfolder | `string` | | string | `""` | In case the relevant files are located inside a subfolder of the model repo on huggingface.co. | +| variant | `string` or `null` | | string | `null` | If specified load weights from `variant` filename, e.g. pytorch_model..bin. | +| use_safetensors | `boolean` or `null` | | boolean | `null` | Whether or not to use `safetensors` checkpoints. | +| weights_only | `boolean` | | boolean | `true` | Indicates whether unpickler should be restricted to loading only tensors and primitive types. | +| key_mapping | `object` or `null` | | object | `null` | A potential mapping of the weight names if using a model on the Hub which is compatible to a Transformers architecture, but was not converted accordingly. | ## NoPeftConfig @@ -266,7 +268,7 @@ A trivial config specifying that no peft is used | Property | Type | Required | Possible values | Description | | -------- | ---- | -------- | --------------- | ----------- | -| `peft_type` | `const` | ✅ | `NO_PEFT` | | +| peft_type | `const` | ✅ | `NO_PEFT` | | ## NoQuantizationConfig @@ -276,7 +278,7 @@ A marker not to use quantization | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `quantization_type` | `const` | | `no-quantization` | `"no-quantization"` | | +| quantization_type | `const` | | `no-quantization` | `"no-quantization"` | | ## NoneDataInput @@ -286,8 +288,8 @@ A special type for not using data e.g. in validation | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `type` | `const` | ✅ | `NONE` | | | -| `data_type` | `string` | | `string` | `"ChatConversation"` | Generally, the `data_type` is automatically set based on the experiment config method. | +| type | `const` | ✅ | `NONE` | | | +| data_type | `string` | | string | `"ChatConversation"` | Generally, the data_type is automatically set based on the experiment config method. | ## Overrides @@ -299,35 +301,35 @@ These implement dynamic scaling for the learning rate. | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `lr_multiplier` | `number` | | `number` | `1.0` | Multiplier applied to the learning rate in the `training_args` | -| `lr_batch_size_scaling` | `string` | | `none` `sqrt` `linear` | `"none"` | Scales the learning rate in the `training_args` by a factor derived from the total training batch size. `"none"`: No scaling. `"sqrt"`: Multiplies learning rate by square root of batch size (a classic scaling rule). `"linear"`: Multiplies learning rate by the batch size (a more modern scaling rule). | +| lr_multiplier | `number` | | number | `1.0` | Multiplier applied to the learning rate in the training_args | +| lr_batch_size_scaling | `string` | | `none` `sqrt` `linear` | `"none"` | Scales the learning rate in the training_args by a factor derived from the total training batch size. 'none': No scaling. 'sqrt': Multiplies learning rate by square root of batch size (a classic scaling rule). 'linear': Multiplies learning rate by the batch size (a more modern scaling rule). | ## PeftType Enum class for the different types of adapters in PEFT. Supported PEFT types: -- `PROMPT_TUNING` -- `MULTITASK_PROMPT_TUNING` -- `P_TUNING` -- `PREFIX_TUNING` -- `LORA` -- `ADALORA` -- `BOFT` -- `ADAPTION_PROMPT` -- `IA3` -- `LOHA` -- `LOKR` -- `OFT` -- `XLORA` -- `POLY` -- `LN_TUNING` -- `VERA` -- `FOURIERFT` -- `HRA` -- `BONE` -- `RANDLORA` -- `C3A` +- PROMPT_TUNING +- MULTITASK_PROMPT_TUNING +- P_TUNING +- PREFIX_TUNING +- LORA +- ADALORA +- BOFT +- ADAPTION_PROMPT +- IA3 +- LOHA +- LOKR +- OFT +- XLORA +- POLY +- LN_TUNING +- VERA +- FOURIERFT +- HRA +- BONE +- RANDLORA +- C3A #### Type: `string` @@ -341,8 +343,8 @@ PEFT adapter uses the config and initialisation from a pretrained adapter | Property | Type | Required | Possible values | Description | | -------- | ---- | -------- | --------------- | ----------- | -| `peft_type` | `const` | ✅ | `PRETRAINED_PEFT` | | -| `name_or_path` | `string` | ✅ | `string` | HF ID or path to the pretrained PEFT. | +| peft_type | `const` | ✅ | `PRETRAINED_PEFT` | | +| name_or_path | `string` | ✅ | string | HF ID or path to the pretrained peft. | ## RunConfig @@ -352,23 +354,23 @@ Experiment running configuration | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `model` | `string` | | `string` | `"/local_resources/basemodel"` | Local path to model to be fine-tuned. Normally this should be `/local_resources/basemodel` | -| `model_args` | `object` | | [ModelArguments](#modelarguments) | `{"torch_dtype": "auto", "device_map": "auto", "max_memory": null, "low_cpu_mem_usage": false, "attn_implementation": null, "offload_folder": null, "offload_state_dict": null, "offload_buffers": null, "use_cache": true, "cache_dir": null, "force_download": false, "local_files_only": false, "proxies": null, "resume_download": false, "revision": "main", "code_revision": "main", "subfolder": null, "token": null, "use_safetensors": null, "variant": null, "trust_remote_code": false}` | | -| `tokenizer` | `string` or `null` | | `string` | `null` | Model Hugging Face ID, or path, or None to use the one associated with the model | -| `use_fast_tokenizer` | `boolean` | | `boolean` | `true` | Use the Fast version of the tokenizer. The 'slow' version may be compatible with more features. | -| `resume_from_checkpoint` | `boolean` or `string` | | boolean and/or string | `false` | Normally should be set to 'auto' to continue if a checkpoint exists. Can set to `True` to always try to continue, `False` to never try, or a path to load from a specific path. | -| `final_checkpoint_name` | `string` | | `string` | `"checkpoint-final"` | Name of final checkpoint. Should be left as default | -| `determinism` | `string` | | `no` `half` `full` | `"no"` | Set the level of determinism in implementations. Deterministic implementations are not always available, and when they are, they are usually slower than their non-deterministic counterparts. Recommended for debugging only. `"no"`: No determinism. `"half"`: Prefer deterministic implementations. `"full"`: Only fully deterministic implementations, error out on operations that only have non-deterministic implementations. | +| model | `string` | | string | `"/local_resources/basemodel"` | Local path to model to be fine-tuned. Normally this should be /local_resources/basemodel | +| model_args | `object` | | [ModelArguments](#modelarguments) | `{"dtype": "auto", "pretrained_model_name_or_path": null, "config": null, "cache_dir": null, "from_tf": false, "from_flax": false, "ignore_mismatched_sizes": false, "force_download": false, "proxies": null, "output_loading_info": false, "local_files_only": false, "token": null, "revision": "main", "attn_implementation": null, "device_map": "auto", "max_memory": null, "tp_plan": null, "tp_size": null, "offload_folder": null, "offload_buffers": false, "subfolder": "", "variant": null, "use_safetensors": null, "weights_only": true, "key_mapping": null}` | | +| tokenizer | `string` or `null` | | string | `null` | Model HuggingFace ID, or path, or None to use the one associated with the model | +| use_fast_tokenizer | `boolean` | | boolean | `true` | Use the Fast version of the tokenizer. The 'slow' version may be compatible with more features. | +| resume_from_checkpoint | `boolean` or `string` | | boolean and/or string | `false` | Normally should be set to 'auto' to continue if a checkpoint exists. Can set to True to always try to continue, False to never try, or a path to load from a specific path. | +| final_checkpoint_name | `string` | | string | `"checkpoint-final"` | Name of final checkpoint. Should be left as default | +| determinism | `string` | | `no` `half` `full` | `"no"` | Set the level of determinism in implementations. Deterministic implementations are not always available, and when they are, they are usually slower than their non-deterministic counterparts. Recommended for debugging only. 'no': No determinism. 'half': Prefer deterministic implementations. 'full': Only fully deterministic implementations, error out on operations that only have non-deterministic implementations. | ## SilogenDPOConfig -Hugging Face TRL `DPOConfig` as `Config` with additional SiloGen conventions +HuggingFace TRL DPOConfig as Config with additional SiloGen conventions The list of training arguments is best available online (the version might not be up-to-date here): - +https://huggingface.co/docs/transformers/v4.57.3/en/main_classes/trainer#transformers.TrainingArguments -Additionally, the `DPOConfig` has arguments specific to DPO training, which can be found here: - +Additionally, the DPOConfig has arguments specific to DPO training, which can be found here: +https://huggingface.co/docs/trl/v0.13.0/en/dpo_trainer#trl.DPOConfig The object does a lot of things besides specifying the training configuration options (e.g. it has computed properties like true training batch size etc.) @@ -378,12 +380,12 @@ has computed properties like true training batch size etc.) Enum class for the different types of tasks supported by PEFT. Overview of the supported task types: -- `SEQ_CLS`: Text classification. -- `SEQ_2_SEQ_LM`: Sequence-to-sequence language modeling. -- `CAUSAL_LM`: Causal language modeling. -- `TOKEN_CLS`: Token classification. -- `QUESTION_ANS`: Question answering. -- `FEATURE_EXTRACTION`: Feature extraction. Provides the hidden states which can be used as embeddings or features +- SEQ_CLS: Text classification. +- SEQ_2_SEQ_LM: Sequence-to-sequence language modeling. +- CAUSAL_LM: Causal language modeling. +- TOKEN_CLS: Token classification. +- QUESTION_ANS: Question answering. +- FEATURE_EXTRACTION: Feature extraction. Provides the hidden states which can be used as embeddings or features for downstream tasks. #### Type: `string` @@ -398,38 +400,34 @@ Define a dataset, with a weight for sampling | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `path` | `string` | ✅ | `string` | | Local path to a JSONL file in the fine-tuning data format | -| `sampling_weight` | `number` | | `number` | `1.0` | | +| path | `string` | ✅ | string | | Local path to a JSONL file in the finetuning data format | +| sampling_weight | `number` | | number | `1.0` | | ## WeightedMixDataInput A list of datasets where each is sampled by a certain weight -These datasets are interleaved based on the sampling weights. The resulting dataset is fully precomputed, up to +These datasets are interleaved based on the sampling weights. The resulting dataset is fully precomputed, upto the point where every single sample in every dataset gets picked. This means that with small sampling weights, it can take a lot of draws to see every sample from a dataset and so the resulting dataset can be very large. -The datasets themselves need to be in the fine-tuning supported JSONL formats. +The datasets themselves need to be in the finetuning supported JSONL formats. For SFT this means lines: -```json {"messages": [{"content": "string", "role": "string"}]} -``` -For DPO this means lines of: -```json +For DPO this means lines of: { "prompt_messages": [{"content": "string", "role": "string"}], "chosen_messages": [{"content": "string", "role": "string"}], "rejected_messages": [{"content": "string", "role": "string"}] } -``` #### Type: `object` | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `type` | `const` | ✅ | `PRECOMPUTE_WEIGHTED_MIX` | | | -| `datasets` | `array` | ✅ | [WeightedDatasetDefinition](#weighteddatasetdefinition) | | | -| `data_type` | `string` | | `string` | `"ChatConversation"` | Generally, the data_type is automatically set based on the experiment config method. | -| `seed` | `integer` | | `integer` | `19851243` | Seed for the random number generator for interleaving draws | +| type | `const` | ✅ | `PRECOMPUTE_WEIGHTED_MIX` | | | +| datasets | `array` | ✅ | [WeightedDatasetDefinition](#weighteddatasetdefinition) | | | +| data_type | `string` | | string | `"ChatConversation"` | Generally, the data_type is automatically set based on the experiment config method. | +| seed | `integer` | | integer | `19851243` | Seed for the random number generator for interleaving draws | diff --git a/workloads/llm-finetune-silogen-engine/helm/config_doc_sft.md b/workloads/llm-finetune-silogen-engine/helm/config_doc_sft.md index bc6b2ed..c90053a 100644 --- a/workloads/llm-finetune-silogen-engine/helm/config_doc_sft.md +++ b/workloads/llm-finetune-silogen-engine/helm/config_doc_sft.md @@ -9,16 +9,16 @@ See the various sub-configs for their options. Additional properties are not all | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `data_conf` | `object` | ✅ | [ChatTrainValidConfig](#chattrainvalidconfig) | | The data input config | -| `training_args` | `object` | ✅ | [SilogenTrainingArguments](#silogentrainingarguments) | | Transformer TrainingArguments with some restrictions | -| `batchsize_conf` | `object` | ✅ | [BatchsizeConfig](#batchsizeconfig) | | Batch size configuration | -| `peft_conf` | `object` | ✅ | [GenericPeftConfig](#genericpeftconfig) and/or [NoPeftConfig](#nopeftconfig) and/or [PretrainedPeftConfig](#pretrainedpeftconfig) | | Adapter configuration | -| `run_conf` | `object` | ✅ | [RunConfig](#runconfig) | | Model related configuration | -| `sft_args` | `object` | ✅ | [SFTArguments](#sftarguments) | | SFT specific arguments | -| `method` | `const` | | `sft` | `"sft"` | | -| `overrides` | `object` | | [Overrides](#overrides) | `{"lr_multiplier": 1.0, "lr_batch_size_scaling": "none"}` | Override options to simplify the config interface | -| `tracking` | `object` or `null` | | [FinetuningTrackingConfig](#finetuningtrackingconfig) | `null` | MLFlow tracking configuration | -| `quant_conf` | `object` | | [BnBQuantizationConfig](#bnbquantizationconfig) and/or [NoQuantizationConfig](#noquantizationconfig) | `{"quantization_type": "no-quantization"}` | Quantization configuration | +| data_conf | `object` | ✅ | [ChatTrainValidConfig](#chattrainvalidconfig) | | The data input config | +| training_args | `object` | ✅ | [SilogenTrainingArguments](#silogentrainingarguments) | | Transformer TrainingArguments with some restrictions | +| batchsize_conf | `object` | ✅ | [BatchsizeConfig](#batchsizeconfig) | | Batch size configuration | +| peft_conf | `object` | ✅ | [GenericPeftConfig](#genericpeftconfig) and/or [NoPeftConfig](#nopeftconfig) and/or [PretrainedPeftConfig](#pretrainedpeftconfig) | | Adapter configuration | +| run_conf | `object` | ✅ | [RunConfig](#runconfig) | | Model related configuration | +| sft_args | `object` | ✅ | [SFTArguments](#sftarguments) | | SFT specific arguments | +| method | `const` | | `sft` | `"sft"` | | +| overrides | `object` | | [Overrides](#overrides) | `{"lr_multiplier": 1.0, "lr_batch_size_scaling": "none"}` | Override options to simplify the config interface | +| tracking | `object` or `null` | | [FinetuningTrackingConfig](#finetuningtrackingconfig) | `null` | MLFlow tracking configuration | +| quant_conf | `object` | | [BnBQuantizationConfig](#bnbquantizationconfig) and/or [NoQuantizationConfig](#noquantizationconfig) | `{"quantization_type": "no-quantization"}` | Quantization configuration | --- @@ -33,17 +33,17 @@ Automatic validation split from the training data | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `type` | `const` | ✅ | `AUTO_SPLIT` | | | -| `data_type` | `string` | | `string` | `"ChatConversation"` | Generally, the `data_type` is automatically set based on the experiment config method. | -| `ratio` | `number` | | `number` | `0.2` | Ratio of the training data to use for validation | -| `seed` | `integer` | | `integer` | `1289525893` | Seed for the random number generator for splitting | +| type | `const` | ✅ | `AUTO_SPLIT` | | | +| data_type | `string` | | string | `"ChatConversation"` | Generally, the data_type is automatically set based on the experiment config method. | +| ratio | `number` | | number | `0.2` | Ratio of the training data to use for validation | +| seed | `integer` | | integer | `1289525893` | Seed for the random number generator for splitting | ## BatchsizeConfig Config for determining the total batch size Total batch size is the effective batch size for the complete training run. It is equal to -**number of processes** × **per-device batch size** × **accumulation**. +number of processes * per-device batch size * accumulation. The maximum batch size per device is the maximum batch size that can be accommodated on a single device. This mostly limited by the memory capacity of the device. @@ -52,32 +52,32 @@ This mostly limited by the memory capacity of the device. | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `total_train_batch_size` | `integer` | ✅ | `intege`r | | The total batch size for the training run | -| `max_per_device_train_batch_size` | `integer` | ✅ | `integer` | | The maximum training batch size per device | -| `per_device_eval_batch_size` | `integer` or `null` | | `integer` | `null` | The maximum eval batch size per device, if not given, will use same as training batch size | +| total_train_batch_size | `integer` | ✅ | integer | | The total batch size for the training run | +| max_per_device_train_batch_size | `integer` | ✅ | integer | | The maximum training batch size per device | +| per_device_eval_batch_size | `integer` or `null` | | integer | `null` | The maximum eval batch size per device, if not given, will use same as training batch size | ## BnBQuantizationConfig Bits and Bytes configuration The options are from the BitsAndBytes config, -see: +see: https://huggingface.co/docs/transformers/en/main_classes/quantization#transformers.BitsAndBytesConfig #### Type: `object` | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `quantization_type` | `const` | | `bits-and-bytes` | `"bits-and-bytes"` | | -| `load_in_8bit` | `boolean` | | `boolean` | `false` | | -| `load_in_4bit` | `boolean` | | `boolean` | `false` | | -| `llm_int8_threshold` | `number` | | `number` | `6.0` | | -| `llm_int8_skip_modules` | `array` or `null` | | `string` | `null` | | -| `llm_int8_enable_fp32_cpu_offload` | `boolean` | | `boolean` | `false` | | -| `llm_int8_has_fp16_weight` | `boolean` | | `boolean` | `false` | | -| `bnb_4bit_compute_dtype` | `string` or `null` | | `string` | `null` | | -| `bnb_4bit_quant_type` | `const` | | `fp4` and/or `nf4` | `"fp4"` | | -| `bnb_4bit_use_double_quant` | `boolean` | | `boolean` | `false` | | -| `bnb_4bit_quant_storage` | `string` or `null` | | `string` | `null` | | +| quantization_type | `const` | | `bits-and-bytes` | `"bits-and-bytes"` | | +| load_in_8bit | `boolean` | | boolean | `false` | | +| load_in_4bit | `boolean` | | boolean | `false` | | +| llm_int8_threshold | `number` | | number | `6.0` | | +| llm_int8_skip_modules | `array` or `null` | | string | `null` | | +| llm_int8_enable_fp32_cpu_offload | `boolean` | | boolean | `false` | | +| llm_int8_has_fp16_weight | `boolean` | | boolean | `false` | | +| bnb_4bit_compute_dtype | `string` or `null` | | string | `null` | | +| bnb_4bit_quant_type | `const` | | `fp4` and/or `nf4` | `"fp4"` | | +| bnb_4bit_use_double_quant | `boolean` | | boolean | `false` | | +| bnb_4bit_quant_storage | `string` or `null` | | string | `null` | | ## ChatTemplateName @@ -100,7 +100,7 @@ Additionally includes chat template and padding configurations, as those are par | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `training_data` | `object` | ✅ | [ConcatenationDataInput](#concatenationdatainput) and/or [WeightedMixDataInput](#weightedmixdatainput) | | | +| training_data | `object` | ✅ | [ConcatenationDataInput](#concatenationdatainput) and/or [WeightedMixDataInput](#weightedmixdatainput) | | | | validation_data | `object` | ✅ | [AutoSplitDataInput](#autosplitdatainput) and/or [ConcatenationDataInput](#concatenationdatainput) and/or [NoneDataInput](#nonedatainput) | | | | chat_template_name | `string` | | [ChatTemplateName](#chattemplatename) | `"mistral-with-system"` | | | padding_side | `string` | | string | `"right"` | Padding side, right is usually right. | @@ -112,30 +112,25 @@ A simple list of datasets These are simply concatenated, the same as sampling all with equal weight. -The datasets themselves need to be in the fine-tuning supported JSONL formats. +The datasets themselves need to be in the finetuning supported JSONL formats. For SFT this means lines: -```json {"messages": [{"content": "string", "role": "string"}]} -``` For DPO this means lines of: - -```json { "prompt_messages": [{"content": "string", "role": "string"}], "chosen_messages": [{"content": "string", "role": "string"}], "rejected_messages": [{"content": "string", "role": "string"}] } -``` #### Type: `object` | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `type` | `const` | ✅ | `CONCATENATION` | | | -| `datasets` | `array` | ✅ | [DatasetDefinition](#datasetdefinition) | | | -| `data_type` | `string` | | `string` | `"ChatConversation"` | Generally, the `data_type` is automatically set based on the experiment config method. | +| type | `const` | ✅ | `CONCATENATION` | | | +| datasets | `array` | ✅ | [DatasetDefinition](#datasetdefinition) | | | +| data_type | `string` | | string | `"ChatConversation"` | Generally, the data_type is automatically set based on the experiment config method. | ## DatasetDefinition @@ -145,7 +140,7 @@ Define how to load a dataset | Property | Type | Required | Possible values | Description | | -------- | ---- | -------- | --------------- | ----------- | -| `path` | `string` | ✅ | `string` | Local path to a JSONL file in the fine-tuning data format | +| path | `string` | ✅ | string | Local path to a JSONL file in the finetuning data format | ## FinetuningTrackingConfig @@ -155,16 +150,16 @@ Settings that define how run details are logged | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `mlflow_server_uri` | `string` | ✅ | `string` | | MLflow server URI. Can be local path. | -| `experiment_name` | `string` | ✅ | string | | Experiment name that is used for MLflow tracking. | -| `hf_mlflow_log_artifacts` | `string` | | `string` | `"False"` | Whether to store model artifacts in MLflow. | +| mlflow_server_uri | `string` | ✅ | string | | MLflow server URI. Can be local path. | +| experiment_name | `string` | ✅ | string | | Experiment name that is used for MLFlow tracking. | +| hf_mlflow_log_artifacts | `string` | | string | `"False"` | Whether to store model artifacts in MLFlow. | ## GenericPeftConfig Config for any new initialized PEFT Adapter -See for the possible kwargs -and for the types. +See https://huggingface.co/docs/peft/tutorial/peft_model_config for the possible kwargs +and https://github.com/huggingface/peft/blob/v0.7.1/src/peft/utils/peft_types.py for the types. Example: @@ -178,31 +173,31 @@ Example: | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `peft_type` | `string` | ✅ | [PeftType](#pefttype) | | | -| `task_type` | `string` | | [TaskType](#tasktype) | `"CAUSAL_LM"` | | -| `peft_kwargs` | `object` | | `object` | | | +| peft_type | `string` | ✅ | [PeftType](#pefttype) | | | +| task_type | `string` | | [TaskType](#tasktype) | `"CAUSAL_LM"` | | +| peft_kwargs | `object` | | object | | | ## MissingPadTokenStrategy Specifies the available missing pad token strategies. -We've shown in a small set of experiments that repurposing `EOS` can start to hurt performance +We've shown in a small set of experiments that repurposing EOS can start to hurt performance while the other options seem to work equally well. -Repurposing `EOS` is the default in many online sources, but it is actually a bad idea if we want to predict -`EOS`, as all the `pad_token_ids` get ignored in loss computation, and thus the model does not learn to predict +Repurposing EOS is the default in many online sources, but it is actually a bad idea if we want to predict +EOS, as all the pad_token_ids get ignored in loss computation, and thus the model does not learn to predict the end of the text. However, for models that have additional tokens for end of message, end of turn, etc. this is not so dangerous. -Repurposing `BOS` is similar to repurposing `EOS`, but since we do not need to predict `BOS`, this may be more sensible. +Repurposing BOS is similar to repurposing EOS, but since we do not need to predict BOS, this may be more sensible. -Repurposing `UNK` can work with tokenizers that never produce UNKs in normal data (e.g. Mistral tokenizers should have +Repurposing UNK can work with tokenizers that never produce UNKs in normal data (e.g. Mistral tokenizers should have a byte fall-back so that everything can be tokenized). -`UNK_CONVERT_TO_EOS` uses a hack where the `unk_token_id` is initially used for padding, but in the collation phase the -input-side `UNK`s (padding) get set to `EOS`, so that the input-side padding looks like `EOS`. On the output-side, the -`UNK`s (padding) still gets ignored. NOTE: This will leave the tokenizer's `pad_token_id` set to the `unk_token_id`; so -any subsequent use of the model where padding is involved should somehow explicitly set the `pad_token_id again`. +UNK_CONVERT_TO_EOS uses a hack where the unk_token_id is initially used for padding, but in the collation phase the +input-side UNKs (padding) gets set to EOS, so that the input-side padding looks like EOS. On the output-side, the +UNKs (padding) still gets ignored. NOTE: This will leave the tokenizer's pad_token_id set to the unk_token_id; so +any subsequent use of the model where padding is involved should somehow explicitly set the pad_token_id again. #### Type: `string` @@ -210,56 +205,61 @@ any subsequent use of the model where padding is involved should somehow explici ## ModelArguments -These are passed to `AutoModelForCausalLM.from_pretrained` +These are passed to AutoModelForCausalLM.from_pretrained See parameter docstrings and help at: - +https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel.from_pretrained See below in "Parameters for big model inference" too, it affects training too. Also note that this link takes you to the transformers main branch version - be sure to compare with the installed version of transformers (that keeps changing over time, and it is difficult to keep this docstring up to date, so we wanted to link to the latest here). Some important parameters to consider are: -- `device_map`: +- device_map : A map that specifies where each submodule should go. It doesn’t need to be refined to each parameter/buffer name, once a given module name is inside, every submodule of it will be sent to the same device. If we only pass - the device (e.g., `"cpu"`, `"cuda:1"`, `"mps"`, or a GPU ordinal rank like 1) on which the model will be allocated, - the device map will map the entire model to this device. Passing `device_map=0` means put the whole model on GPU + the device (e.g., "cpu", "cuda:1", "mps", or a GPU ordinal rank like 1) on which the model will be allocated, + the device map will map the entire model to this device. Passing device_map = 0 means put the whole model on GPU 0. -- `attn_implementation`: +- attn_implementation : The attention implementation to use in the model (if relevant). Can be any of "eager" (manual implementation of - the attention), `sdpa` (using `F.scaled_dot_product_attentioni`), or `flash_attention_2` (using - `Dao-AILab/flash-attention`). By default, if available, SDPA will be used for `torch>=2.1.1`. The default is + the attention), "sdpa" (using F.scaled_dot_product_attention), or "flash_attention_2" (using + Dao-AILab/flash-attention). By default, if available, SDPA will be used for torch>=2.1.1. The default is otherwise the manual "eager" implementation. NOTE: - This does not include `quantization_config`. Quantization config is specified separately. + This does not include quantization_config. Quantization config is specified separately. #### Type: `object` | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `torch_dtype` | `const` | | `auto` | `"auto"` | | -| `device_map` | `object` or `string` or `null` | | `object` and/or `string` | `null` | Custom device map so that you can manually override the choices that Hugging Face would make. This can also be a string to specify `"auto"`, `"balanced_low_0"`, or `"sequential"`. | -| `max_memory` | `object` or `null` | | `object` | `null` | | -| `low_cpu_mem_usage` | `boolean` | | `boolean` | `false` | | -| `attn_implementation` | `string` or `null` | | `string` | `null` | Note: this can be set to `"sdpa"`, `"flash_attention_2"`, `"eager"`. | -| `offload_folder` | `string` or `null` | | `string` | `null` | | -| `offload_state_dict` | `boolean` or `null` | | boolean | `null` | Default is `True` if offloading (otherwise no effect) | -| `offload_buffers` | `boolean` or `null` | | `boolean` | `null` | | -| `use_cache` | `boolean` | | `boolean` | `true` | Saves generated hidden states to speed up generation, see: This is mutually exclusive with `gradient_checkpointing`. | -| `cache_dir` | `string` or `null` | | `string` | `null` | | -| `force_download` | `boolean` | | `boolean` | `false` | | -| `local_files_only` | `boolean` | | `boolean` | `false` | | -| `proxies` | `object` or `null` | | `object` | `null` | | -| `resume_download` | `boolean` | | `boolean` | `false` | | -| `revision` | `string` | | `string` | `"main"` | | -| `code_revision` | `string` | | `string` | `"main"` | | -| `subfolder` | `string` or `null` | | `string` | `null` | | -| `token` | `string` or `null` | | `string` | `null` | | -| `use_safetensors` | `boolean` or `null` | | `boolean` | `null` | | -| `variant` | `string` or `null` | | `string` | `null` | | -| `trust_remote_code` | `boolean` | | `boolean` | `false` | Warning: if set to `True`, allows execution of downloaded remote code. | +| silogen_extra_args | `object` | | object | | Don't specify directly - this gathers additional args passed to the model | +| dtype | `const` or `string` | | `auto` and/or string | `"auto"` | | +| pretrained_model_name_or_path | `string` or `null` | | Format: [`path`](https://json-schema.org/understanding-json-schema/reference/string#built-in-formats) and/or string | `null` | Can be either:
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
- A path to a *directory* containing model weights saved using `~PreTrainedModel.save_pretrained`.
- A path or url to a *tensorflow index checkpoint file*.
- A path or url to a model folder containing a *flax checkpoint file* in *.msgpack* format.
- `None` if you are both providing the configuration and state dictionary. | +| config | `string` or `null` | | Format: [`path`](https://json-schema.org/understanding-json-schema/reference/string#built-in-formats) and/or string | `null` | Configuration for the model to use instead of an automatically loaded configuration.
Can be either an instance of a class derived from `PretrainedConfig`, or a string/path valid as input to `PretrainedConfig.from_pretrained`. | +| cache_dir | `string` or `null` | | Format: [`path`](https://json-schema.org/understanding-json-schema/reference/string#built-in-formats) and/or string | `null` | Path to a directory in which a downloaded pretrained model configuration should be cached. | +| from_tf | `boolean` | | boolean | `false` | Load the model weights from a TensorFlow checkpoint save file. | +| from_flax | `boolean` | | boolean | `false` | Load the model weights from a Flax checkpoint save file. | +| ignore_mismatched_sizes | `boolean` | | boolean | `false` | Whether or not to raise an error if some of the weights from the checkpoint do not have the same size as the weights of the model. | +| force_download | `boolean` | | boolean | `false` | Whether or not to force the (re-)download of the model weights and configuration files. | +| proxies | `object` or `null` | | object | `null` | A dictionary of proxy servers to use by protocol or endpoint. | +| output_loading_info | `boolean` | | boolean | `false` | Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. | +| local_files_only | `boolean` | | boolean | `false` | Whether or not to only look at local files (i.e., do not try to download the model). | +| token | `boolean` or `string` or `null` | | boolean and/or string | `null` | The token to use as HTTP bearer authorization for remote files. | +| revision | `string` | | string | `"main"` | The specific model version to use. It can be a branch name, a tag name, or a commit id. | +| attn_implementation | `string` or `null` | | string | `null` | The attention implementation to use in the model. Can be any of 'eager', 'sdpa', 'flash_attention_2', or 'flash_attention_3'.
Accepts HF kernel references in the form: /[@][:] | +| device_map | `integer` or `object` or `string` or `null` | | integer and/or object and/or string | `null` | A map that specifies where each submodule should go. | +| max_memory | `object` or `null` | | object | `null` | A dictionary device identifier to maximum memory if using `device_map`. | +| tp_plan | `string` or `null` | | string | `null` | A torch tensor parallel plan. Currently only accepts 'auto'. | +| tp_size | `string` or `null` | | string | `null` | A torch tensor parallel degree. If not provided would default to world size. | +| offload_folder | `string` or `null` | | Format: [`path`](https://json-schema.org/understanding-json-schema/reference/string#built-in-formats) and/or string | `null` | If the `device_map` contains any value 'disk', the folder where we will offload weights. | +| offload_buffers | `boolean` | | boolean | `false` | Whether or not to offload the buffers with the model parameters. | +| subfolder | `string` | | string | `""` | In case the relevant files are located inside a subfolder of the model repo on huggingface.co. | +| variant | `string` or `null` | | string | `null` | If specified load weights from `variant` filename, e.g. pytorch_model..bin. | +| use_safetensors | `boolean` or `null` | | boolean | `null` | Whether or not to use `safetensors` checkpoints. | +| weights_only | `boolean` | | boolean | `true` | Indicates whether unpickler should be restricted to loading only tensors and primitive types. | +| key_mapping | `object` or `null` | | object | `null` | A potential mapping of the weight names if using a model on the Hub which is compatible to a Transformers architecture, but was not converted accordingly. | ## NoPeftConfig @@ -269,7 +269,7 @@ A trivial config specifying that no peft is used | Property | Type | Required | Possible values | Description | | -------- | ---- | -------- | --------------- | ----------- | -| `peft_type` | `const` | ✅ | `NO_PEFT` | | +| peft_type | `const` | ✅ | `NO_PEFT` | | ## NoQuantizationConfig @@ -279,7 +279,7 @@ A marker not to use quantization | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `quantization_type` | `const` | | `no-quantization` | `"no-quantization"` | | +| quantization_type | `const` | | `no-quantization` | `"no-quantization"` | | ## NoneDataInput @@ -289,8 +289,8 @@ A special type for not using data e.g. in validation | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `type` | `const` | ✅ | `NONE` | | | -| `data_type` | `string` | | `string` | `"ChatConversation"` | Generally, the `data_type` is automatically set based on the experiment config method. | +| type | `const` | ✅ | `NONE` | | | +| data_type | `string` | | string | `"ChatConversation"` | Generally, the data_type is automatically set based on the experiment config method. | ## Overrides @@ -302,35 +302,35 @@ These implement dynamic scaling for the learning rate. | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `lr_multiplier` | `number` | | `number` | `1.0` | Multiplier applied to the learning rate in the `training_args` | -| `lr_batch_size_scaling` | `string` | | `none` `sqrt` `linear` | `none` | Scales the learning rate in the §training_args§ by a factor derived from the total training batch size. `"none"`: No scaling. `"sqrt"`: Multiplies learning rate by square root of batch size (a classic scaling rule). `"linear"`: Multiplies learning rate by the batch size (a more modern scaling rule). | +| lr_multiplier | `number` | | number | `1.0` | Multiplier applied to the learning rate in the training_args | +| lr_batch_size_scaling | `string` | | `none` `sqrt` `linear` | `"none"` | Scales the learning rate in the training_args by a factor derived from the total training batch size. 'none': No scaling. 'sqrt': Multiplies learning rate by square root of batch size (a classic scaling rule). 'linear': Multiplies learning rate by the batch size (a more modern scaling rule). | ## PeftType Enum class for the different types of adapters in PEFT. Supported PEFT types: -- `PROMPT_TUNING` -- `MULTITASK_PROMPT_TUNING` -- `P_TUNING` -- `PREFIX_TUNING` -- `LORA` -- `ADALORA` -- `BOFT` -- `ADAPTION_PROMPT` -- `IA3` -- `LOHA` -- `LOKR` -- `OFT` -- `XLORA` -- `POLY` -- `LN_TUNING` -- `VERA` -- `FOURIERFT` -- `HRA` -- `BONE` -- `RANDLORA` -- `C3A` +- PROMPT_TUNING +- MULTITASK_PROMPT_TUNING +- P_TUNING +- PREFIX_TUNING +- LORA +- ADALORA +- BOFT +- ADAPTION_PROMPT +- IA3 +- LOHA +- LOKR +- OFT +- XLORA +- POLY +- LN_TUNING +- VERA +- FOURIERFT +- HRA +- BONE +- RANDLORA +- C3A #### Type: `string` @@ -344,8 +344,8 @@ PEFT adapter uses the config and initialisation from a pretrained adapter | Property | Type | Required | Possible values | Description | | -------- | ---- | -------- | --------------- | ----------- | -| `peft_type` | `const` | ✅ | `PRETRAINED_PEFT` | | -| `name_or_path` | `string` | ✅ | `string` | `HF ID` or `path` to the pretrained PEFT. | +| peft_type | `const` | ✅ | `PRETRAINED_PEFT` | | +| name_or_path | `string` | ✅ | string | HF ID or path to the pretrained peft. | ## RunConfig @@ -355,13 +355,13 @@ Experiment running configuration | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `model` | `string` | | `string` | `"/local_resources/basemodel"` | Local path to model to be fine-tuned. Normally this should be `/local_resources/basemodel` | -| `model_args` | `object` | | [ModelArguments](#modelarguments) | `{"torch_dtype": "auto", "device_map": "auto", "max_memory": null, "low_cpu_mem_usage": false, "attn_implementation": null, "offload_folder": null, "offload_state_dict": null, "offload_buffers": null, "use_cache": true, "cache_dir": null, "force_download": false, "local_files_only": false, "proxies": null, "resume_download": false, "revision": "main", "code_revision": "main", "subfolder": null, "token": null, "use_safetensors": null, "variant": null, "trust_remote_code": false}` | | -| `tokenizer` | `string` or `null` | | `string` | `null` | Model Hugging Face ID, or path, or `None` to use the one associated with the model | -| `use_fast_tokenizer` | `boolean` | | `boolean` | `true` | Use the Fast version of the tokenizer. The 'slow' version may be compatible with more features. | -| `resume_from_checkpoint` | `boolean` or `string` | | boolean and/or string | `false` | Normally should be set to `"auto"` to continue if a checkpoint exists. Can set to `True` to always try to continue, `False` to never try, or a path to load from a specific path. | -| `final_checkpoint_name` | `string` | | `string` | `"checkpoint-final"` | Name of final checkpoint. Should be left as default | -| determinism | `string` | | `no` `half` `full` | `"no"` | Set the level of determinism in implementations. Deterministic implementations are not always available, and when they are, they are usually slower than their non-deterministic counterparts. Recommended for debugging only. `"no"`: No determinism. `"half"`: Prefer deterministic implementations. `"full"`: Only fully deterministic implementations, error out on operations that only have non-deterministic implementations. | +| model | `string` | | string | `"/local_resources/basemodel"` | Local path to model to be fine-tuned. Normally this should be /local_resources/basemodel | +| model_args | `object` | | [ModelArguments](#modelarguments) | `{"dtype": "auto", "pretrained_model_name_or_path": null, "config": null, "cache_dir": null, "from_tf": false, "from_flax": false, "ignore_mismatched_sizes": false, "force_download": false, "proxies": null, "output_loading_info": false, "local_files_only": false, "token": null, "revision": "main", "attn_implementation": null, "device_map": "auto", "max_memory": null, "tp_plan": null, "tp_size": null, "offload_folder": null, "offload_buffers": false, "subfolder": "", "variant": null, "use_safetensors": null, "weights_only": true, "key_mapping": null}` | | +| tokenizer | `string` or `null` | | string | `null` | Model HuggingFace ID, or path, or None to use the one associated with the model | +| use_fast_tokenizer | `boolean` | | boolean | `true` | Use the Fast version of the tokenizer. The 'slow' version may be compatible with more features. | +| resume_from_checkpoint | `boolean` or `string` | | boolean and/or string | `false` | Normally should be set to 'auto' to continue if a checkpoint exists. Can set to True to always try to continue, False to never try, or a path to load from a specific path. | +| final_checkpoint_name | `string` | | string | `"checkpoint-final"` | Name of final checkpoint. Should be left as default | +| determinism | `string` | | `no` `half` `full` | `"no"` | Set the level of determinism in implementations. Deterministic implementations are not always available, and when they are, they are usually slower than their non-deterministic counterparts. Recommended for debugging only. 'no': No determinism. 'half': Prefer deterministic implementations. 'full': Only fully deterministic implementations, error out on operations that only have non-deterministic implementations. | ## SFTArguments @@ -371,18 +371,19 @@ Supervised fine-tuning arguments | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `max_seq_length` | `integer` | | `integer` | `2048` | Maximum length input sequence length. Longer sequences will be filtered out. | -| `save_name_if_new_basemodel` | `string` | | `string` | `"checkpoint-new-basemodel"` | If a new base model is saved, it will be saved with this name | -| `train_on_completions_only` | `boolean` | | `boolean` | `false` | Only compute loss on the assistant's turns. | +| max_seq_length | `integer` | | integer | `2048` | Maximum length input sequence length. Longer sequences will be filtered or truncated. | +| length_handling | `string` | | `filter` `truncate` | `"filter"` | How to handle examples that are longer than max_seq_length. 'filter': Filter out these examples from the training set. 'truncate': Truncate these examples to max_seq_length. Note that this might lead to loss of information and worse performance, especially if the important information is at the end of the sequence. | +| save_name_if_new_basemodel | `string` | | string | `"checkpoint-new-basemodel"` | If a new basemodel is saved, it will be saved with this name | +| train_on_completions_only | `boolean` | | boolean | `false` | Only compute loss on the assistant's turns. | ## SilogenTrainingArguments -Hugging Face `TrainingArguments` as `Config` with additional SiloGen conventions +HuggingFace TrainingArguments as Config with additional SiloGen conventions The list of training arguments is best available online (the version might not be up-to-date here): - +https://huggingface.co/docs/transformers/v4.57.3/en/main_classes/trainer#transformers.TrainingArguments -The `TrainingArguments` object does a lot of things besides specifying the training configuration options (e.g. it +The TrainingArguments object does a lot of things besides specifying the training configuration options (e.g. it has computed properties like true training batch size etc.) ## TaskType @@ -390,12 +391,12 @@ has computed properties like true training batch size etc.) Enum class for the different types of tasks supported by PEFT. Overview of the supported task types: -- `SEQ_CLS`: Text classification. -- `SEQ_2_SEQ_LM`: Sequence-to-sequence language modeling. -- `CAUSAL_LM`: Causal language modeling. -- `TOKEN_CLS`: Token classification. -- `QUESTION_ANS`: Question answering. -- `FEATURE_EXTRACTION`: Feature extraction. Provides the hidden states which can be used as embeddings or features +- SEQ_CLS: Text classification. +- SEQ_2_SEQ_LM: Sequence-to-sequence language modeling. +- CAUSAL_LM: Causal language modeling. +- TOKEN_CLS: Token classification. +- QUESTION_ANS: Question answering. +- FEATURE_EXTRACTION: Feature extraction. Provides the hidden states which can be used as embeddings or features for downstream tasks. #### Type: `string` @@ -410,38 +411,34 @@ Define a dataset, with a weight for sampling | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `path` | `string` | ✅ | `string` | | Local path to a JSONL file in the fine-tuning data format | -| `sampling_weight` | `number` | | `number` | `1.0` | | +| path | `string` | ✅ | string | | Local path to a JSONL file in the finetuning data format | +| sampling_weight | `number` | | number | `1.0` | | ## WeightedMixDataInput A list of datasets where each is sampled by a certain weight -These datasets are interleaved based on the sampling weights. The resulting dataset is fully precomputed, up to +These datasets are interleaved based on the sampling weights. The resulting dataset is fully precomputed, upto the point where every single sample in every dataset gets picked. This means that with small sampling weights, it can take a lot of draws to see every sample from a dataset and so the resulting dataset can be very large. -The datasets themselves need to be in the fine-tuning supported JSONL formats. +The datasets themselves need to be in the finetuning supported JSONL formats. For SFT this means lines: -```json {"messages": [{"content": "string", "role": "string"}]} -``` For DPO this means lines of: -```json { "prompt_messages": [{"content": "string", "role": "string"}], "chosen_messages": [{"content": "string", "role": "string"}], "rejected_messages": [{"content": "string", "role": "string"}] } -``` #### Type: `object` | Property | Type | Required | Possible values | Default | Description | | -------- | ---- | -------- | --------------- | ------- | ----------- | -| `type` | `const` | ✅ | `PRECOMPUTE_WEIGHTED_MIX` | | | -| `datasets` | `array` | ✅ | [WeightedDatasetDefinition](#weighteddatasetdefinition) | | | -| `data_type` | `string` | | `string` | `"ChatConversation"` | Generally, the data_type is automatically set based on the experiment config method. | -| `seed` | `integer` | | `integer` | `19851243` | Seed for the random number generator for interleaving draws | +| type | `const` | ✅ | `PRECOMPUTE_WEIGHTED_MIX` | | | +| datasets | `array` | ✅ | [WeightedDatasetDefinition](#weighteddatasetdefinition) | | | +| data_type | `string` | | string | `"ChatConversation"` | Generally, the data_type is automatically set based on the experiment config method. | +| seed | `integer` | | integer | `19851243` | Seed for the random number generator for interleaving draws | diff --git a/workloads/llm-finetune-silogen-engine/helm/overrides/data/long-random-data.yaml b/workloads/llm-finetune-silogen-engine/helm/overrides/data/long-random-data.yaml new file mode 100644 index 0000000..f5581ba --- /dev/null +++ b/workloads/llm-finetune-silogen-engine/helm/overrides/data/long-random-data.yaml @@ -0,0 +1,6 @@ +# This works for default path in the corresponding data download override +finetuning_config: + data_conf: + training_data: + datasets: + - path: default-bucket/datasets/long-random-data.jsonl diff --git a/workloads/llm-finetune-silogen-engine/helm/overrides/models/meta-llama_llama-3.1-8b-instruct-radeon.yaml b/workloads/llm-finetune-silogen-engine/helm/overrides/models/meta-llama_llama-3.1-8b-instruct-radeon.yaml new file mode 100644 index 0000000..c55ccfa --- /dev/null +++ b/workloads/llm-finetune-silogen-engine/helm/overrides/models/meta-llama_llama-3.1-8b-instruct-radeon.yaml @@ -0,0 +1,91 @@ +metadata: + compatibleAccelerators: + # AMD Radeon Pro series + - 7551 # AI PRO R9700 / R9700S / R9600D + +finetuningImage: ghcr.io/silogen/rocm-silogen-finetuning-worker:v0.7.2-rocm7.2 +runAs: 1001 + +# Canonical model name: +model: "meta-llama/Llama-3.1-8B-Instruct" +hfDownloadExcludeGlob: "original/*" + +# Resources: +downloadsReservedSize: 64Gi +checkpointsReservedSize: 128Gi +memoryPerGpu: 24 +cpusPerGpu: 2 +finetuningGpus: 1 + +# Runtime configuration: +distributedType: "auto-single-process" +mergeAdapter: true + + +### Finetuning config section ### +finetuning_config: + method: sft + data_conf: + training_data: + type: CONCATENATION + validation_data: + type: AUTO_SPLIT + ratio: 0.1 + chat_template_name: "keep-original" + missing_pad_token_strategy: "bos-repurpose" + training_args: + learning_rate: 0.00005 + max_grad_norm: 7.0 + weight_decay: 0.000001 + optim: "adamw_torch" + num_train_epochs: 1 + lr_scheduler_type: cosine + warmup_ratio: 0.01 + logging_strategy: steps + logging_steps: 0.01 + save_strategy: steps + save_steps: 0.2 + seed: 42 + bf16: true + report_to: + - none + push_to_hub: false + gradient_checkpointing: true + gradient_checkpointing_kwargs: + use_reentrant: true + eval_steps: 0.1 + eval_strategy: "steps" + metric_for_best_model: "loss" + greater_is_better: false + load_best_model_at_end: true + batchsize_conf: + max_per_device_train_batch_size: 1 + peft_conf: + peft_type: "LORA" + task_type: "CAUSAL_LM" + peft_kwargs: + r: 64 + lora_alpha: 16.0 + lora_dropout: 0.05 + target_modules: + - q_proj + - k_proj + - v_proj + - o_proj + - up_proj + - down_proj + - gate_proj + - lm_head + - embed_tokens + run_conf: + model_args: + dtype: bfloat16 + attn_implementation: "sdpa" + resume_from_checkpoint: auto + sft_args: + max_seq_length: 8192 + +basemodel: hf://meta-llama/Llama-3.1-8B-Instruct +aimManifest: + modelId: "meta-llama/Llama-3.1-8B-Instruct" + aimId: "meta-llama/Llama-3.1-8B-Instruct" diff --git a/workloads/llm-finetune-silogen-engine/helm/overrides/models/mistralai_mixtral-8x7b-instruct-v0.1.yaml b/workloads/llm-finetune-silogen-engine/helm/overrides/models/mistralai_mixtral-8x7b-instruct-v0.1.yaml index 95103ad..4cd3f9c 100644 --- a/workloads/llm-finetune-silogen-engine/helm/overrides/models/mistralai_mixtral-8x7b-instruct-v0.1.yaml +++ b/workloads/llm-finetune-silogen-engine/helm/overrides/models/mistralai_mixtral-8x7b-instruct-v0.1.yaml @@ -21,7 +21,7 @@ model: "mistralai/Mixtral-8x7B-Instruct-v0.1" # Resources: downloadsReservedSize: 256Gi checkpointsReservedSize: 512Gi -finetuningGpus: 8 +finetuningGpus: 1 memoryPerGpu: 192 cpuPerGpu: 8 @@ -55,9 +55,9 @@ finetuning_config: report_to: - none push_to_hub: false - gradient_checkpointing: true - gradient_checkpointing_kwargs: - use_reentrant: true + gradient_checkpointing: false + #gradient_checkpointing_kwargs: + # use_reentrant: true eval_steps: 0.2 eval_strategy: "steps" metric_for_best_model: "loss" @@ -66,7 +66,23 @@ finetuning_config: batchsize_conf: max_per_device_train_batch_size: 1 peft_conf: - peft_type: "NO_PEFT" + peft_type: "LORA" + task_type: "CAUSAL_LM" + peft_kwargs: + bias: "none" + fan_in_fan_out: false + lora_alpha: 16 + lora_dropout: 0.05 + r: 16 + target_modules: + - "o_proj" + - "v_proj" + - "k_proj" + - "q_proj" + - "w1" + - "gate" + - "w3" + - "w2" run_conf: model_args: torch_dtype: bfloat16 @@ -74,7 +90,7 @@ finetuning_config: attn_implementation: "flash_attention_2" resume_from_checkpoint: auto sft_args: - max_seq_length: 8192 + max_seq_length: 4096 basemodel: hf://mistralai/Mixtral-8x7B-Instruct-v0.1 aimManifest: diff --git a/workloads/llm-finetune-silogen-engine/helm/overrides/utilities/speed-and-max-batch-test.yaml b/workloads/llm-finetune-silogen-engine/helm/overrides/utilities/speed-and-max-batch-test.yaml new file mode 100644 index 0000000..f56ec5f --- /dev/null +++ b/workloads/llm-finetune-silogen-engine/helm/overrides/utilities/speed-and-max-batch-test.yaml @@ -0,0 +1,6 @@ +finetuning_config: + training_args: + include_num_input_tokens_seen: 'non_padding' + include_tokens_per_second: true + sft_args: + length_handling: "truncate" diff --git a/workloads/llm-finetune-silogen-engine/helm/templates/configmap.yaml b/workloads/llm-finetune-silogen-engine/helm/templates/configmap.yaml index d39d255..ea92293 100644 --- a/workloads/llm-finetune-silogen-engine/helm/templates/configmap.yaml +++ b/workloads/llm-finetune-silogen-engine/helm/templates/configmap.yaml @@ -151,6 +151,19 @@ data: modelSources: - modelId: {{ required "aimManifest.modelId is required when aimManifest.enabled is true!" .Values.aimManifest.modelId | quote }} sourceUri: "s3://{{ .Values.checkpointsRemote }}/checkpoint-final" + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: {{ .Values.bucketCredentialsSecret.name | quote }} + key: {{ .Values.bucketCredentialsSecret.accessKeyKey | quote }} + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.bucketCredentialsSecret.name | quote }} + key: {{ .Values.bucketCredentialsSecret.secretKeyKey | quote }} + - name: AWS_ENDPOINT_URL + value: {{ .Values.bucketStorageHost | quote }} {{- if .Values.aimManifest.customTemplates }} customTemplates: {{- toYaml .Values.aimManifest.customTemplates | nindent 8 }} @@ -159,6 +172,7 @@ data: custom: {{- toYaml .Values.aimManifest.custom | nindent 8 }} {{- end }} + {{- if .Values.aimManifest.extraEnv }} env: {{- range .Values.aimManifest.extraEnv }} - name: {{ .name | quote }} @@ -170,17 +184,6 @@ data: {{- else }} value: {{ .value | quote }} {{- end }} - {{ end }} - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - name: {{ .Values.bucketCredentialsSecret.name | quote }} - key: {{ .Values.bucketCredentialsSecret.accessKeyKey | quote }} - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: {{ .Values.bucketCredentialsSecret.name | quote }} - key: {{ .Values.bucketCredentialsSecret.secretKeyKey | quote }} - - name: AWS_ENDPOINT_URL - value: {{ .Values.bucketStorageHost | quote }} + {{- end }} + {{- end }} {{- end }} diff --git a/workloads/llm-finetune-silogen-engine/helm/values.schema.json b/workloads/llm-finetune-silogen-engine/helm/values.schema.json index ef54df7..f9928ec 100644 --- a/workloads/llm-finetune-silogen-engine/helm/values.schema.json +++ b/workloads/llm-finetune-silogen-engine/helm/values.schema.json @@ -762,30 +762,35 @@ "type": "string" }, "ModelArguments": { - "additionalProperties": false, + "additionalProperties": true, "description": "These are passed to AutoModelForCausalLM.from_pretrained\n\nSee parameter docstrings and help at:\nhttps://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel.from_pretrained\nSee below in \"Parameters for big model inference\" too, it affects training too. Also note that this link takes you\nto the transformers main branch version - be sure to compare with the installed version of transformers (that keeps\nchanging over time, and it is difficult to keep this docstring up to date, so we wanted to link to the latest here).\n\nSome important parameters to consider are:\n\n- device_map :\n A map that specifies where each submodule should go. It doesn\u2019t need to be refined to each parameter/buffer\n name, once a given module name is inside, every submodule of it will be sent to the same device. If we only pass\n the device (e.g., \"cpu\", \"cuda:1\", \"mps\", or a GPU ordinal rank like 1) on which the model will be allocated,\n the device map will map the entire model to this device. Passing device_map = 0 means put the whole model on GPU\n 0.\n- attn_implementation :\n The attention implementation to use in the model (if relevant). Can be any of \"eager\" (manual implementation of\n the attention), \"sdpa\" (using F.scaled_dot_product_attention), or \"flash_attention_2\" (using\n Dao-AILab/flash-attention). By default, if available, SDPA will be used for torch>=2.1.1. The default is\n otherwise the manual \"eager\" implementation.\n\nNOTE:\n This does not include quantization_config. Quantization config is specified separately.", "properties": { - "torch_dtype": { + "silogen_extra_args": { + "additionalProperties": true, + "description": "Don't specify directly - this gathers additional args passed to the model", + "title": "Silogen Extra Args", + "type": "object" + }, + "dtype": { + "anyOf": [ + { + "const": "auto", + "type": "string" + }, + { + "type": "string" + } + ], "default": "auto", - "title": "Torch Dtype", - "type": "string" + "title": "Dtype" }, - "device_map": { + "pretrained_model_name_or_path": { "anyOf": [ { - "additionalProperties": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "string" - } - ] - }, - "type": "object" + "type": "string" }, { + "format": "path", "type": "string" }, { @@ -793,10 +798,68 @@ } ], "default": null, - "description": "Custom device map so that you can manually override the choices that HuggingFace would make. This can also be a string to specify \"auto\", \"balanced_low_0\", or \"sequential\".", - "title": "Device Map" + "description": "Can be either:\n- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.\n- A path to a *directory* containing model weights saved using `~PreTrainedModel.save_pretrained`.\n- A path or url to a *tensorflow index checkpoint file*.\n- A path or url to a model folder containing a *flax checkpoint file* in *.msgpack* format.\n- `None` if you are both providing the configuration and state dictionary.", + "title": "Pretrained Model Name Or Path" }, - "max_memory": { + "config": { + "anyOf": [ + { + "type": "string" + }, + { + "format": "path", + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Configuration for the model to use instead of an automatically loaded configuration.\nCan be either an instance of a class derived from `PretrainedConfig`, or a string/path valid as input to `PretrainedConfig.from_pretrained`.", + "title": "Config" + }, + "cache_dir": { + "anyOf": [ + { + "type": "string" + }, + { + "format": "path", + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Path to a directory in which a downloaded pretrained model configuration should be cached.", + "title": "Cache Dir" + }, + "from_tf": { + "default": false, + "description": "Load the model weights from a TensorFlow checkpoint save file.", + "title": "From Tf", + "type": "boolean" + }, + "from_flax": { + "default": false, + "description": "Load the model weights from a Flax checkpoint save file.", + "title": "From Flax", + "type": "boolean" + }, + "ignore_mismatched_sizes": { + "default": false, + "description": "Whether or not to raise an error if some of the weights from the checkpoint do not have the same size as the weights of the model.", + "title": "Ignore Mismatched Sizes", + "type": "boolean" + }, + "force_download": { + "default": false, + "description": "Whether or not to force the (re-)download of the model weights and configuration files.", + "title": "Force Download", + "type": "boolean" + }, + "proxies": { "anyOf": [ { "additionalProperties": { @@ -809,27 +872,44 @@ } ], "default": null, - "title": "Max Memory" + "description": "A dictionary of proxy servers to use by protocol or endpoint.", + "title": "Proxies" }, - "low_cpu_mem_usage": { + "output_loading_info": { "default": false, - "title": "Low Cpu Mem Usage", + "description": "Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages.", + "title": "Output Loading Info", "type": "boolean" }, - "attn_implementation": { + "local_files_only": { + "default": false, + "description": "Whether or not to only look at local files (i.e., do not try to download the model).", + "title": "Local Files Only", + "type": "boolean" + }, + "token": { "anyOf": [ { "type": "string" }, + { + "type": "boolean" + }, { "type": "null" } ], "default": null, - "description": "Note: this can be set to \"sdpa\", \"flash_attention_2\", \"eager\".", - "title": "Attn Implementation" + "description": "The token to use as HTTP bearer authorization for remote files.", + "title": "Token" }, - "offload_folder": { + "revision": { + "default": "main", + "description": "The specific model version to use. It can be a branch name, a tag name, or a commit id.", + "title": "Revision", + "type": "string" + }, + "attn_implementation": { "anyOf": [ { "type": "string" @@ -839,40 +919,53 @@ } ], "default": null, - "title": "Offload Folder" + "description": "The attention implementation to use in the model. Can be any of 'eager', 'sdpa', 'flash_attention_2', or 'flash_attention_3'.\nAccepts HF kernel references in the form: /[@][:]", + "title": "Attn Implementation" }, - "offload_state_dict": { + "device_map": { "anyOf": [ { - "type": "boolean" + "type": "string" + }, + { + "additionalProperties": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + } + ] + }, + "type": "object" + }, + { + "type": "integer" }, { "type": "null" } ], "default": null, - "description": "Default is True if offloading (otherwise no effect)", - "title": "Offload State Dict" + "description": "A map that specifies where each submodule should go.", + "title": "Device Map" }, - "offload_buffers": { + "max_memory": { "anyOf": [ { - "type": "boolean" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], "default": null, - "title": "Offload Buffers" - }, - "use_cache": { - "default": true, - "description": "Saves generated hidden states to speed up generation, see: https://discuss.huggingface.co/t/what-is-the-purpose-of-use-cache-in-decoder/958 This is mutually exclusive with gradient_checkpointing.", - "title": "Use Cache", - "type": "boolean" + "description": "A dictionary device identifier to maximum memory if using `device_map`.", + "title": "Max Memory" }, - "cache_dir": { + "tp_plan": { "anyOf": [ { "type": "string" @@ -882,61 +975,52 @@ } ], "default": null, - "title": "Cache Dir" + "description": "A torch tensor parallel plan. Currently only accepts 'auto'.", + "title": "Tp Plan" }, - "force_download": { - "default": false, - "title": "Force Download", - "type": "boolean" - }, - "local_files_only": { - "default": false, - "title": "Local Files Only", - "type": "boolean" - }, - "proxies": { + "tp_size": { "anyOf": [ { - "additionalProperties": { - "type": "string" - }, - "type": "object" + "type": "string" }, { "type": "null" } ], "default": null, - "title": "Proxies" - }, - "resume_download": { - "default": false, - "title": "Resume Download", - "type": "boolean" - }, - "revision": { - "default": "main", - "title": "Revision", - "type": "string" + "description": "A torch tensor parallel degree. If not provided would default to world size.", + "title": "Tp Size" }, - "code_revision": { - "default": "main", - "title": "Code Revision", - "type": "string" - }, - "subfolder": { + "offload_folder": { "anyOf": [ { "type": "string" }, + { + "format": "path", + "type": "string" + }, { "type": "null" } ], "default": null, - "title": "Subfolder" + "description": "If the `device_map` contains any value 'disk', the folder where we will offload weights.", + "title": "Offload Folder" }, - "token": { + "offload_buffers": { + "default": false, + "description": "Whether or not to offload the buffers with the model parameters.", + "title": "Offload Buffers", + "type": "boolean" + }, + "subfolder": { + "default": "", + "description": "In case the relevant files are located inside a subfolder of the model repo on huggingface.co.", + "title": "Subfolder", + "type": "string" + }, + "variant": { "anyOf": [ { "type": "string" @@ -946,7 +1030,8 @@ } ], "default": null, - "title": "Token" + "description": "If specified load weights from `variant` filename, e.g. pytorch_model..bin.", + "title": "Variant" }, "use_safetensors": { "anyOf": [ @@ -958,25 +1043,30 @@ } ], "default": null, + "description": "Whether or not to use `safetensors` checkpoints.", "title": "Use Safetensors" }, - "variant": { + "weights_only": { + "default": true, + "description": "Indicates whether unpickler should be restricted to loading only tensors and primitive types.", + "title": "Weights Only", + "type": "boolean" + }, + "key_mapping": { "anyOf": [ { - "type": "string" + "additionalProperties": { + "type": "string" + }, + "type": "object" }, { "type": "null" } ], "default": null, - "title": "Variant" - }, - "trust_remote_code": { - "default": false, - "description": "Warning: if set to True, allows execution of downloaded remote code.", - "title": "Trust Remote Code", - "type": "boolean" + "description": "A potential mapping of the weight names if using a model on the Hub which is compatible to a Transformers architecture, but was not converted accordingly.", + "title": "Key Mapping" } }, "title": "ModelArguments", @@ -1174,27 +1264,31 @@ "model_args": { "$ref": "#/$defs/ModelArguments", "default": { - "torch_dtype": "auto", - "device_map": "auto", - "max_memory": null, - "low_cpu_mem_usage": false, - "attn_implementation": null, - "offload_folder": null, - "offload_state_dict": null, - "offload_buffers": null, - "use_cache": true, + "dtype": "auto", + "pretrained_model_name_or_path": null, + "config": null, "cache_dir": null, + "from_tf": false, + "from_flax": false, + "ignore_mismatched_sizes": false, "force_download": false, - "local_files_only": false, "proxies": null, - "resume_download": false, - "revision": "main", - "code_revision": "main", - "subfolder": null, + "output_loading_info": false, + "local_files_only": false, "token": null, - "use_safetensors": null, + "revision": "main", + "attn_implementation": null, + "device_map": "auto", + "max_memory": null, + "tp_plan": null, + "tp_size": null, + "offload_folder": null, + "offload_buffers": false, + "subfolder": "", "variant": null, - "trust_remote_code": false + "use_safetensors": null, + "weights_only": true, + "key_mapping": null } }, "tokenizer": { @@ -1256,10 +1350,20 @@ "properties": { "max_seq_length": { "default": 2048, - "description": "Maximum length input sequence length. Longer sequences will be filtered out.", + "description": "Maximum length input sequence length. Longer sequences will be filtered or truncated.", "title": "Max Seq Length", "type": "integer" }, + "length_handling": { + "default": "filter", + "description": "How to handle examples that are longer than max_seq_length. 'filter': Filter out these examples from the training set. 'truncate': Truncate these examples to max_seq_length. Note that this might lead to loss of information and worse performance, especially if the important information is at the end of the sequence.", + "enum": [ + "filter", + "truncate" + ], + "title": "Length Handling", + "type": "string" + }, "save_name_if_new_basemodel": { "default": "checkpoint-new-basemodel", "description": "If a new basemodel is saved, it will be saved with this name", @@ -2042,7 +2146,7 @@ "type": "string" } ], - "default": "adamw_torch", + "default": "adamw_torch_fused", "title": "Optim" }, "optim_args": { @@ -3386,7 +3490,7 @@ "type": "string" } ], - "default": "adamw_torch", + "default": "adamw_torch_fused", "title": "Optim" }, "optim_args": { diff --git a/workloads/llm-finetune-silogen-engine/helm/values.yaml b/workloads/llm-finetune-silogen-engine/helm/values.yaml index 34fbb2a..2f1f999 100644 --- a/workloads/llm-finetune-silogen-engine/helm/values.yaml +++ b/workloads/llm-finetune-silogen-engine/helm/values.yaml @@ -1,5 +1,5 @@ ### General chart values ### -finetuningImage: ghcr.io/silogen/rocm-silogen-finetuning-worker:v0.7 +finetuningImage: ghcr.io/silogen/rocm-silogen-finetuning-worker:v0.7.2 # kaiwo settings (if enabled, use kaiwo CRDs to have kaiwo operator manage the workload) kaiwo: From 195b1a331d47e7cd9c31f3346b1a12168bbfa4f4 Mon Sep 17 00:00:00 2001 From: Aku Rouhe Date: Wed, 29 Apr 2026 11:53:01 +0300 Subject: [PATCH 4/5] Follow redirects in mc client install (#500) * Follow redirects in mc client install * reverse accidental adds --- docker/axolotl/axoltl-rocm.Dockerfile | 1 + docker/llama-factory/llama-factory-rocm.Dockerfile | 1 + docker/logistics/logistics.Dockerfile | 1 + .../aimtrain-fine-tune-verl/helm/templates/configmap.yaml | 1 + workloads/dev-text2image-comfyui/helm/templates/_entrypoint.tpl | 1 + workloads/llm-evaluation-judge/helm/templates/_helpers.tpl | 1 + workloads/llm-evaluation-metrics/helm/templates/_helpers.tpl | 1 + workloads/llm-finetune-verl/helm/templates/configmap.yaml | 1 + .../llm-inference-megatron-lm/helm/mount/download_files.sh | 1 + .../helm/mount/entrypoint.sh | 2 +- .../helm/mount/entrypoint.sh | 2 +- workloads/llm-inference-sglang/helm/templates/_entrypoint.tpl | 1 + .../llm-inference-vllm-benchmark-mad/helm/mount/entrypoint.sh | 2 +- .../helm/mount/run_benchmark.sh | 2 +- .../helm/templates/_entrypoint.tpl | 1 + workloads/llm-inference-vllm/helm/templates/_entrypoint.tpl | 1 + workloads/rag-embedding-infinity/helm/templates/_entrypoint.tpl | 1 + 17 files changed, 17 insertions(+), 4 deletions(-) diff --git a/docker/axolotl/axoltl-rocm.Dockerfile b/docker/axolotl/axoltl-rocm.Dockerfile index 4d82940..129b866 100644 --- a/docker/axolotl/axoltl-rocm.Dockerfile +++ b/docker/axolotl/axoltl-rocm.Dockerfile @@ -58,6 +58,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # Install minio RUN curl https://dl.min.io/client/mc/release/linux-amd64/mc \ + --location \ --create-dirs \ -o /minio-binaries/mc && \ chown -hR ${USER_NAME} /minio-binaries/ && \ diff --git a/docker/llama-factory/llama-factory-rocm.Dockerfile b/docker/llama-factory/llama-factory-rocm.Dockerfile index 0dec1df..42ed216 100644 --- a/docker/llama-factory/llama-factory-rocm.Dockerfile +++ b/docker/llama-factory/llama-factory-rocm.Dockerfile @@ -7,6 +7,7 @@ RUN git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git && \ # Install minio RUN curl https://dl.min.io/client/mc/release/linux-amd64/mc \ --create-dirs \ + --location \ -o /minio-binaries/mc && \ chmod +x /minio-binaries/mc ENV PATH="${PATH}:/minio-binaries/:/root/scripts/" diff --git a/docker/logistics/logistics.Dockerfile b/docker/logistics/logistics.Dockerfile index 4f9e7fa..93473a9 100644 --- a/docker/logistics/logistics.Dockerfile +++ b/docker/logistics/logistics.Dockerfile @@ -21,6 +21,7 @@ RUN pip install --no-cache-dir -r requirements.txt # Install minio RUN curl https://dl.min.io/client/mc/release/linux-amd64/mc \ --create-dirs \ + --location \ -o /minio-binaries/mc && \ chmod +x /minio-binaries/mc diff --git a/workloads/aim-fine-tuning/aimtrain-fine-tune-verl/helm/templates/configmap.yaml b/workloads/aim-fine-tuning/aimtrain-fine-tune-verl/helm/templates/configmap.yaml index ace9a8f..1ee83bf 100644 --- a/workloads/aim-fine-tuning/aimtrain-fine-tune-verl/helm/templates/configmap.yaml +++ b/workloads/aim-fine-tuning/aimtrain-fine-tune-verl/helm/templates/configmap.yaml @@ -425,6 +425,7 @@ data: {{- end }} echo "Installing MinIO:" curl https://dl.min.io/client/mc/release/linux-amd64/mc \ + --location \ --create-dirs \ -o /minio-binaries/mc chmod +x /minio-binaries/mc diff --git a/workloads/dev-text2image-comfyui/helm/templates/_entrypoint.tpl b/workloads/dev-text2image-comfyui/helm/templates/_entrypoint.tpl index 63deffb..f87465d 100644 --- a/workloads/dev-text2image-comfyui/helm/templates/_entrypoint.tpl +++ b/workloads/dev-text2image-comfyui/helm/templates/_entrypoint.tpl @@ -6,6 +6,7 @@ echo '--------------------------------------------' echo 'Installing minio client' echo '--------------------------------------------' curl https://dl.min.io/client/mc/release/linux-amd64/mc \ + --location \ --create-dirs \ -o /minio-binaries/mc chmod +x /minio-binaries/mc diff --git a/workloads/llm-evaluation-judge/helm/templates/_helpers.tpl b/workloads/llm-evaluation-judge/helm/templates/_helpers.tpl index 73daf2a..5282e5e 100644 --- a/workloads/llm-evaluation-judge/helm/templates/_helpers.tpl +++ b/workloads/llm-evaluation-judge/helm/templates/_helpers.tpl @@ -10,6 +10,7 @@ echo '--------------------------------------------' echo 'Installing and setting up minio client' echo '--------------------------------------------' curl https://dl.min.io/client/mc/release/linux-amd64/mc \ + --location \ --create-dirs \ -o /minio-binaries/mc chmod +x /minio-binaries/mc diff --git a/workloads/llm-evaluation-metrics/helm/templates/_helpers.tpl b/workloads/llm-evaluation-metrics/helm/templates/_helpers.tpl index 73daf2a..5282e5e 100644 --- a/workloads/llm-evaluation-metrics/helm/templates/_helpers.tpl +++ b/workloads/llm-evaluation-metrics/helm/templates/_helpers.tpl @@ -10,6 +10,7 @@ echo '--------------------------------------------' echo 'Installing and setting up minio client' echo '--------------------------------------------' curl https://dl.min.io/client/mc/release/linux-amd64/mc \ + --location \ --create-dirs \ -o /minio-binaries/mc chmod +x /minio-binaries/mc diff --git a/workloads/llm-finetune-verl/helm/templates/configmap.yaml b/workloads/llm-finetune-verl/helm/templates/configmap.yaml index 89e0424..1033e97 100644 --- a/workloads/llm-finetune-verl/helm/templates/configmap.yaml +++ b/workloads/llm-finetune-verl/helm/templates/configmap.yaml @@ -16,6 +16,7 @@ data: echo "Installing MinIO:" curl https://dl.min.io/client/mc/release/linux-amd64/mc \ + --location \ --create-dirs \ -o /minio-binaries/mc chmod +x /minio-binaries/mc diff --git a/workloads/llm-inference-megatron-lm/helm/mount/download_files.sh b/workloads/llm-inference-megatron-lm/helm/mount/download_files.sh index c00101d..66d2729 100644 --- a/workloads/llm-inference-megatron-lm/helm/mount/download_files.sh +++ b/workloads/llm-inference-megatron-lm/helm/mount/download_files.sh @@ -19,6 +19,7 @@ echo '--------------------------------------------' echo 'Installing minio client' echo '--------------------------------------------' curl https://dl.min.io/client/mc/release/linux-amd64/mc \ + --location \ --create-dirs \ -o /minio-binaries/mc chmod +x /minio-binaries/mc diff --git a/workloads/llm-inference-openai-benchmark-guidellm/helm/mount/entrypoint.sh b/workloads/llm-inference-openai-benchmark-guidellm/helm/mount/entrypoint.sh index fe9d1a7..a26fa63 100644 --- a/workloads/llm-inference-openai-benchmark-guidellm/helm/mount/entrypoint.sh +++ b/workloads/llm-inference-openai-benchmark-guidellm/helm/mount/entrypoint.sh @@ -2,7 +2,7 @@ apt update && apt install -y jq pip install guidellm mkdir -p /workload/output -curl https://dl.min.io/client/mc/release/linux-amd64/mc -o /workload/mc +curl https://dl.min.io/client/mc/release/linux-amd64/mc --location -o /workload/mc chmod +x /workload/mc /workload/mc alias set minio-host ${BUCKET_STORAGE_HOST} ${BUCKET_STORAGE_ACCESS_KEY} ${BUCKET_STORAGE_SECRET_KEY} /workload/mc mirror --watch /workload/output/ minio-host/"${BUCKET_RESULT_PATH}" & diff --git a/workloads/llm-inference-openai-benchmark-rocmblog/helm/mount/entrypoint.sh b/workloads/llm-inference-openai-benchmark-rocmblog/helm/mount/entrypoint.sh index 5c940fc..e321065 100644 --- a/workloads/llm-inference-openai-benchmark-rocmblog/helm/mount/entrypoint.sh +++ b/workloads/llm-inference-openai-benchmark-rocmblog/helm/mount/entrypoint.sh @@ -56,7 +56,7 @@ for req_in_out in "${Req_In_Out[@]}"; do done done -curl https://dl.min.io/client/mc/release/linux-amd64/mc --create-dirs -o /minio-binaries/mc +curl https://dl.min.io/client/mc/release/linux-amd64/mc --location --create-dirs -o /minio-binaries/mc chmod +x /minio-binaries/mc export PATH="${PATH}:/minio-binaries/" diff --git a/workloads/llm-inference-sglang/helm/templates/_entrypoint.tpl b/workloads/llm-inference-sglang/helm/templates/_entrypoint.tpl index 3e7b55f..0ceb257 100644 --- a/workloads/llm-inference-sglang/helm/templates/_entrypoint.tpl +++ b/workloads/llm-inference-sglang/helm/templates/_entrypoint.tpl @@ -6,6 +6,7 @@ echo '--------------------------------------------' echo 'Installing minio client' echo '--------------------------------------------' curl https://dl.min.io/client/mc/release/linux-amd64/mc \ + --location \ --create-dirs \ -o /minio-binaries/mc chmod +x /minio-binaries/mc diff --git a/workloads/llm-inference-vllm-benchmark-mad/helm/mount/entrypoint.sh b/workloads/llm-inference-vllm-benchmark-mad/helm/mount/entrypoint.sh index ac7c098..69f0b45 100644 --- a/workloads/llm-inference-vllm-benchmark-mad/helm/mount/entrypoint.sh +++ b/workloads/llm-inference-vllm-benchmark-mad/helm/mount/entrypoint.sh @@ -10,7 +10,7 @@ export OUTPATH=$WORKPATH/output/$(date +%Y%m%d%H%M) export NUM_GPUS=$(rocm-smi -a --csv | grep ^card | wc -l) # get minio -curl https://dl.min.io/client/mc/release/linux-amd64/mc -o $WORKPATH/bin/mc +curl https://dl.min.io/client/mc/release/linux-amd64/mc --location -o $WORKPATH/bin/mc chmod +x $WORKPATH/bin/mc mc alias set minio-host ${BUCKET_STORAGE_HOST} ${BUCKET_STORAGE_ACCESS_KEY} ${BUCKET_STORAGE_SECRET_KEY} diff --git a/workloads/llm-inference-vllm-benchmark-rocmblog/helm/mount/run_benchmark.sh b/workloads/llm-inference-vllm-benchmark-rocmblog/helm/mount/run_benchmark.sh index 81bf005..01cddd6 100644 --- a/workloads/llm-inference-vllm-benchmark-rocmblog/helm/mount/run_benchmark.sh +++ b/workloads/llm-inference-vllm-benchmark-rocmblog/helm/mount/run_benchmark.sh @@ -1,6 +1,6 @@ # Wait for vLLM server (localhost) to be ready mkdir -p /workload/output -curl https://dl.min.io/client/mc/release/linux-amd64/mc -o /workload/mc +curl https://dl.min.io/client/mc/release/linux-amd64/mc --location -o /workload/mc chmod +x /workload/mc /workload/mc alias set minio-host ${BUCKET_STORAGE_HOST} ${BUCKET_STORAGE_ACCESS_KEY} ${BUCKET_STORAGE_SECRET_KEY} /workload/mc mirror --watch /workload/output/ minio-host/"${BUCKET_RESULT_PATH}" & diff --git a/workloads/llm-inference-vllm-benchmark-rocmblog/helm/templates/_entrypoint.tpl b/workloads/llm-inference-vllm-benchmark-rocmblog/helm/templates/_entrypoint.tpl index b815220..9a36727 100644 --- a/workloads/llm-inference-vllm-benchmark-rocmblog/helm/templates/_entrypoint.tpl +++ b/workloads/llm-inference-vllm-benchmark-rocmblog/helm/templates/_entrypoint.tpl @@ -6,6 +6,7 @@ echo '--------------------------------------------' echo 'Installing minio client' echo '--------------------------------------------' curl https://dl.min.io/client/mc/release/linux-amd64/mc \ + --location \ --create-dirs \ -o /minio-binaries/mc chmod +x /minio-binaries/mc diff --git a/workloads/llm-inference-vllm/helm/templates/_entrypoint.tpl b/workloads/llm-inference-vllm/helm/templates/_entrypoint.tpl index 9f3f78d..b84d742 100644 --- a/workloads/llm-inference-vllm/helm/templates/_entrypoint.tpl +++ b/workloads/llm-inference-vllm/helm/templates/_entrypoint.tpl @@ -7,6 +7,7 @@ echo 'Installing minio client' echo '--------------------------------------------' curl https://dl.min.io/client/mc/release/linux-amd64/mc \ --create-dirs \ + --location \ -o /minio-binaries/mc chmod +x /minio-binaries/mc export PATH="${PATH}:/minio-binaries/" diff --git a/workloads/rag-embedding-infinity/helm/templates/_entrypoint.tpl b/workloads/rag-embedding-infinity/helm/templates/_entrypoint.tpl index e12fc11..a946b2a 100644 --- a/workloads/rag-embedding-infinity/helm/templates/_entrypoint.tpl +++ b/workloads/rag-embedding-infinity/helm/templates/_entrypoint.tpl @@ -6,6 +6,7 @@ echo '--------------------------------------------' echo 'Installing minio client' echo '--------------------------------------------' curl https://dl.min.io/client/mc/release/linux-amd64/mc \ + --location \ --create-dirs \ -o /minio-binaries/mc chmod +x /minio-binaries/mc From 806c84b814841ccae797a07975320cf15cbf1d74 Mon Sep 17 00:00:00 2001 From: aivanni <4340981+aivanni@users.noreply.github.com> Date: Wed, 29 Apr 2026 11:56:31 +0300 Subject: [PATCH 5/5] Fixes to LeRobot workload (#499) * fixes to lerobot workload * git checkout --- .../robotics-finetune-lerobot/helm/mount/entrypoint.sh.tpl | 4 ++-- workloads/robotics-finetune-lerobot/helm/values.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/workloads/robotics-finetune-lerobot/helm/mount/entrypoint.sh.tpl b/workloads/robotics-finetune-lerobot/helm/mount/entrypoint.sh.tpl index ad2dc8c..9ffa21e 100644 --- a/workloads/robotics-finetune-lerobot/helm/mount/entrypoint.sh.tpl +++ b/workloads/robotics-finetune-lerobot/helm/mount/entrypoint.sh.tpl @@ -6,10 +6,10 @@ apt-get update && apt-get install -y --no-install-recommends ffmpeg=7:6.1.1-3ubu # Install lerobot cd /workload -git clone --depth 1 https://github.com/huggingface/lerobot.git +git clone https://github.com/huggingface/lerobot.git cd lerobot {{- if .Values.setup.lerobotGitRef }} -git checkout {{ .Values.setup.lerobotGitRef }} +git checkout {{ .Values.setup.lerobotGitRef | quote }} {{- end }} {{- if .Values.setup.lerobotExtraPackages }} pip install -e ".[{{ .Values.setup.lerobotExtraPackages }}]" diff --git a/workloads/robotics-finetune-lerobot/helm/values.yaml b/workloads/robotics-finetune-lerobot/helm/values.yaml index b2fd491..8587831 100644 --- a/workloads/robotics-finetune-lerobot/helm/values.yaml +++ b/workloads/robotics-finetune-lerobot/helm/values.yaml @@ -11,7 +11,7 @@ hfFinetunedModelId: "" setup: # These extra packages e.g. "pi,peft" separated with commas will be installed during lerobot library installation. lerobotExtraPackages: "dataset,training" - lerobotGitRef: 05a5223885bcd36064fc1a967620329696595a76 + lerobotGitRef: v0.5.1 # Main policy settings policy: