From 4a84cecd40ce11a8f04895fbae406083e0dccf79 Mon Sep 17 00:00:00 2001 From: 2395959141 <2395959141@qq.com> Date: Tue, 1 Jul 2025 06:06:24 +0000 Subject: [PATCH 1/4] =?UTF-8?q?feat=EF=BC=9A=E4=BF=AE=E6=94=B9=E4=BA=86?= =?UTF-8?q?=E5=BE=AE=E8=B0=83=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bitbrain/finetune/run_v3.sh | 72 +++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 bitbrain/finetune/run_v3.sh diff --git a/bitbrain/finetune/run_v3.sh b/bitbrain/finetune/run_v3.sh new file mode 100644 index 0000000..4f374fe --- /dev/null +++ b/bitbrain/finetune/run_v3.sh @@ -0,0 +1,72 @@ +# llama factory start script +########################## +#CUDA_VISIBLE_DEVICES=5,6 python ../LLaMA-Factory/src/train.py \ +export LLaMA_PATH=/home/ytllm/LLaMA-Factory +OUTPUT_DIR=/home/ytllm/.cache/ckpt/bit-brain-v3.1/sft +#export CUDA_DEVICE_ORDER=PCI_BUS_ID +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export NCCL_P2P_LEVEL=NVL +export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True +#python $LLaMA_PATH/src/train.py \ +# max_steps / num_train_epochs +# --streaming True \ +# 6卡配置:1,2,3,4,5,7 +# all data: +#! 可设置混合策略: --mix_strategy interleave_over\ +#! --interleave_probs 0.1,0.35,0.2,0.2,0.1,0.05 \ + +#! 可设置自定义的评估数据集 --eval_dataset ceval,cmmlu \ + +#! 设置了--eval-dataset就不能设置--val_size +#! 先不使用--overwrite_cache \ + +#! --streaming True \ +#! --max_steps 100000 \ + +#! baai_instruct_70W,baai_instruct_682W,deepctl_200W \ +# deepctl_1120W_zh,deepctl_276W_en,baai_instruct_70W,ultrachat_200k,wanjuan_exam_399W \ +#! --max_samples 2000000 \ + + +FORCE_TORCHRUN=1 CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node 4 $LLaMA_PATH/src/train.py \ + --stage sft \ + --do_train \ + --model_name_or_path /home/ytllm/.cache/modelscope/models/hh2395959141/Bitbrain-0.6b-base \ + --cutoff_len 2048 \ + --dataset_dir /home/ytllm/.cache/modelscope/datasets/hh2395959141/Bitbrain-0___6b-sft_data \ + --dataset chinese_instruct,deepctrl_200k,ultrachat_200k,code_feedback_custom \ + --overwrite_cache \ + --packing False \ + --use_swanlab true \ + --report_to swanlab \ + --swanlab_project bit-brain-v3-part2-sft \ + --run_name sft_bit-brain-v3-part2 \ + --preprocessing_num_workers 30 \ + --template qwen \ + --finetuning_type full \ + --output_dir ${OUTPUT_DIR}/lr2e-5 \ + --overwrite_output_dir \ + --per_device_train_batch_size 4 \ + --per_device_eval_batch_size 4 \ + --do_eval \ + --val_size 100 \ + --eval_strategy steps \ + --eval_steps 1000 \ + --flash_attn fa2\ + --gradient_accumulation_steps 16 \ + --lr_scheduler_type cosine \ + --warmup_ratio 0.0125 \ + --max_grad_norm 1.0 \ + --logging_steps 10 \ + --save_steps 5000 \ + --learning_rate 2e-5 \ + --weight_decay 0.01 \ + --num_train_epochs 3.0 \ + --plot_loss \ + --bf16 \ + --resume_from_checkpoint /home/ytllm/.cache/modelscope/models/hh2395959141/Bitbrain-0.6b-sft-checkpoint + + + #--resume_from_checkpoint /DATA/disk2/yuhang/.cache/ckpt/bit-brain/sft_mix_v2/bit-brain-v1-full-sft/checkpoint-500 + + From 9dade55ce3e599c0764a2e9abe973fe67053237f Mon Sep 17 00:00:00 2001 From: 2395959141 <2395959141@qq.com> Date: Tue, 1 Jul 2025 10:17:49 +0000 Subject: [PATCH 2/4] =?UTF-8?q?feat=EF=BC=9A=E4=BF=AE=E6=94=B9=E4=BA=86fin?= =?UTF-8?q?etune=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bitbrain/finetune/run.sh | 47 +++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/bitbrain/finetune/run.sh b/bitbrain/finetune/run.sh index bae7524..eff94ad 100644 --- a/bitbrain/finetune/run.sh +++ b/bitbrain/finetune/run.sh @@ -1,53 +1,50 @@ -########################## # llama factory start script ########################## -#CUDA_VISIBLE_DEVICES=5,6 python ../LLaMA-Factory/src/train.py \ -export LLaMA_PATH=/home/chenyuhang/LLaMA-Factory +export LLaMA_PATH="llama_factory本地路径" OUTPUT_DIR="输出路径" -#export CUDA_DEVICE_ORDER=PCI_BUS_ID -export CUDA_VISIBLE_DEVICES=4,5,6,7 +export CUDA_VISIBLE_DEVICES=0,1,2,3 export NCCL_P2P_LEVEL=NVL export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True -# 可设置混合策略: --mix_strategy interleave_over\ -# --interleave_probs 0.1,0.35,0.2,0.2,0.1,0.05 \ - -# 可设置自定义的评估数据集 --eval_dataset ceval,cmmlu \ - -FORCE_TORCHRUN=1 CUDA_VISIBLE_DEVICES=4,5,6,7 torchrun --nproc_per_node 4 $LLaMA_PATH/src/train.py \ +FORCE_TORCHRUN=1 CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node 4 $LLaMA_PATH/src/train.py \ --stage sft \ --do_train \ - --model_name_or_path "预训练模型路径" \ + --model_name_or_path "模型路径" \ --cutoff_len 2048 \ --dataset_dir "数据集路径" \ - --dataset shared_gpt_format\ + --dataset chinese_instruct,deepctrl_200k,ultrachat_200k,code_feedback_custom \ --overwrite_cache \ - --max_samples 5000000 \ - --packing True \ + --enable_liger_kernel True\ + --packing False \ --use_swanlab true \ --report_to swanlab \ - --run_name sft_bit-brain \ + --swanlab_project "swanlab项目名称" \ + --run_name "运行名称" \ --preprocessing_num_workers 30 \ --template qwen \ --finetuning_type full \ - --output_dir ${OUTPUT_DIR}/sft \ + --output_dir ${OUTPUT_DIR}/"本次运行保存子路径" \ --overwrite_output_dir \ - --per_device_train_batch_size 16 \ + --per_device_train_batch_size 4 \ --per_device_eval_batch_size 4 \ --do_eval \ --val_size 100 \ --eval_strategy steps \ --eval_steps 1000 \ - --flash_attn sdpa\ - --gradient_accumulation_steps 4 \ + --flash_attn fa2\ + --gradient_accumulation_steps 16 \ --lr_scheduler_type cosine \ + --warmup_ratio 0.0125 \ + --max_grad_norm 1.0 \ --logging_steps 10 \ - --save_steps 500 \ - --learning_rate 3e-4 \ + --save_steps 5000 \ + --learning_rate 2e-5 \ --weight_decay 0.01 \ - --num_train_epochs 4.0 \ + --num_train_epochs 3.0 \ --plot_loss \ - --bf16 \ - --resume_from_checkpoint "恢复训练的checkpoint路径" + --bf16 + + #--resume_from_checkpoint ${OUTPUT_DIR}/"本次运行保存子路径/checkpoint-xxxx" + From fec392d078d8b5ea0296b3d9f278e5846d12df0e Mon Sep 17 00:00:00 2001 From: 2395959141 <2395959141@qq.com> Date: Tue, 1 Jul 2025 10:26:22 +0000 Subject: [PATCH 3/4] Remove unnecessary file --- bitbrain/finetune/run.sh | 50 ---------------------------------------- 1 file changed, 50 deletions(-) delete mode 100644 bitbrain/finetune/run.sh diff --git a/bitbrain/finetune/run.sh b/bitbrain/finetune/run.sh deleted file mode 100644 index eff94ad..0000000 --- a/bitbrain/finetune/run.sh +++ /dev/null @@ -1,50 +0,0 @@ -# llama factory start script -########################## -export LLaMA_PATH="llama_factory本地路径" -OUTPUT_DIR="输出路径" -export CUDA_VISIBLE_DEVICES=0,1,2,3 -export NCCL_P2P_LEVEL=NVL -export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True - - -FORCE_TORCHRUN=1 CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node 4 $LLaMA_PATH/src/train.py \ - --stage sft \ - --do_train \ - --model_name_or_path "模型路径" \ - --cutoff_len 2048 \ - --dataset_dir "数据集路径" \ - --dataset chinese_instruct,deepctrl_200k,ultrachat_200k,code_feedback_custom \ - --overwrite_cache \ - --enable_liger_kernel True\ - --packing False \ - --use_swanlab true \ - --report_to swanlab \ - --swanlab_project "swanlab项目名称" \ - --run_name "运行名称" \ - --preprocessing_num_workers 30 \ - --template qwen \ - --finetuning_type full \ - --output_dir ${OUTPUT_DIR}/"本次运行保存子路径" \ - --overwrite_output_dir \ - --per_device_train_batch_size 4 \ - --per_device_eval_batch_size 4 \ - --do_eval \ - --val_size 100 \ - --eval_strategy steps \ - --eval_steps 1000 \ - --flash_attn fa2\ - --gradient_accumulation_steps 16 \ - --lr_scheduler_type cosine \ - --warmup_ratio 0.0125 \ - --max_grad_norm 1.0 \ - --logging_steps 10 \ - --save_steps 5000 \ - --learning_rate 2e-5 \ - --weight_decay 0.01 \ - --num_train_epochs 3.0 \ - --plot_loss \ - --bf16 - - #--resume_from_checkpoint ${OUTPUT_DIR}/"本次运行保存子路径/checkpoint-xxxx" - - From 140448d997bad5723ce66823dee399eae791a77b Mon Sep 17 00:00:00 2001 From: 2395959141 <2395959141@qq.com> Date: Tue, 1 Jul 2025 10:27:34 +0000 Subject: [PATCH 4/4] Remove unnecessary file --- bitbrain/finetune/run.sh | 50 ++++++++++++++++++++++++++ bitbrain/finetune/run_v3.sh | 72 ------------------------------------- 2 files changed, 50 insertions(+), 72 deletions(-) create mode 100644 bitbrain/finetune/run.sh delete mode 100644 bitbrain/finetune/run_v3.sh diff --git a/bitbrain/finetune/run.sh b/bitbrain/finetune/run.sh new file mode 100644 index 0000000..eff94ad --- /dev/null +++ b/bitbrain/finetune/run.sh @@ -0,0 +1,50 @@ +# llama factory start script +########################## +export LLaMA_PATH="llama_factory本地路径" +OUTPUT_DIR="输出路径" +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export NCCL_P2P_LEVEL=NVL +export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True + + +FORCE_TORCHRUN=1 CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node 4 $LLaMA_PATH/src/train.py \ + --stage sft \ + --do_train \ + --model_name_or_path "模型路径" \ + --cutoff_len 2048 \ + --dataset_dir "数据集路径" \ + --dataset chinese_instruct,deepctrl_200k,ultrachat_200k,code_feedback_custom \ + --overwrite_cache \ + --enable_liger_kernel True\ + --packing False \ + --use_swanlab true \ + --report_to swanlab \ + --swanlab_project "swanlab项目名称" \ + --run_name "运行名称" \ + --preprocessing_num_workers 30 \ + --template qwen \ + --finetuning_type full \ + --output_dir ${OUTPUT_DIR}/"本次运行保存子路径" \ + --overwrite_output_dir \ + --per_device_train_batch_size 4 \ + --per_device_eval_batch_size 4 \ + --do_eval \ + --val_size 100 \ + --eval_strategy steps \ + --eval_steps 1000 \ + --flash_attn fa2\ + --gradient_accumulation_steps 16 \ + --lr_scheduler_type cosine \ + --warmup_ratio 0.0125 \ + --max_grad_norm 1.0 \ + --logging_steps 10 \ + --save_steps 5000 \ + --learning_rate 2e-5 \ + --weight_decay 0.01 \ + --num_train_epochs 3.0 \ + --plot_loss \ + --bf16 + + #--resume_from_checkpoint ${OUTPUT_DIR}/"本次运行保存子路径/checkpoint-xxxx" + + diff --git a/bitbrain/finetune/run_v3.sh b/bitbrain/finetune/run_v3.sh deleted file mode 100644 index 4f374fe..0000000 --- a/bitbrain/finetune/run_v3.sh +++ /dev/null @@ -1,72 +0,0 @@ -# llama factory start script -########################## -#CUDA_VISIBLE_DEVICES=5,6 python ../LLaMA-Factory/src/train.py \ -export LLaMA_PATH=/home/ytllm/LLaMA-Factory -OUTPUT_DIR=/home/ytllm/.cache/ckpt/bit-brain-v3.1/sft -#export CUDA_DEVICE_ORDER=PCI_BUS_ID -export CUDA_VISIBLE_DEVICES=0,1,2,3 -export NCCL_P2P_LEVEL=NVL -export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True -#python $LLaMA_PATH/src/train.py \ -# max_steps / num_train_epochs -# --streaming True \ -# 6卡配置:1,2,3,4,5,7 -# all data: -#! 可设置混合策略: --mix_strategy interleave_over\ -#! --interleave_probs 0.1,0.35,0.2,0.2,0.1,0.05 \ - -#! 可设置自定义的评估数据集 --eval_dataset ceval,cmmlu \ - -#! 设置了--eval-dataset就不能设置--val_size -#! 先不使用--overwrite_cache \ - -#! --streaming True \ -#! --max_steps 100000 \ - -#! baai_instruct_70W,baai_instruct_682W,deepctl_200W \ -# deepctl_1120W_zh,deepctl_276W_en,baai_instruct_70W,ultrachat_200k,wanjuan_exam_399W \ -#! --max_samples 2000000 \ - - -FORCE_TORCHRUN=1 CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node 4 $LLaMA_PATH/src/train.py \ - --stage sft \ - --do_train \ - --model_name_or_path /home/ytllm/.cache/modelscope/models/hh2395959141/Bitbrain-0.6b-base \ - --cutoff_len 2048 \ - --dataset_dir /home/ytllm/.cache/modelscope/datasets/hh2395959141/Bitbrain-0___6b-sft_data \ - --dataset chinese_instruct,deepctrl_200k,ultrachat_200k,code_feedback_custom \ - --overwrite_cache \ - --packing False \ - --use_swanlab true \ - --report_to swanlab \ - --swanlab_project bit-brain-v3-part2-sft \ - --run_name sft_bit-brain-v3-part2 \ - --preprocessing_num_workers 30 \ - --template qwen \ - --finetuning_type full \ - --output_dir ${OUTPUT_DIR}/lr2e-5 \ - --overwrite_output_dir \ - --per_device_train_batch_size 4 \ - --per_device_eval_batch_size 4 \ - --do_eval \ - --val_size 100 \ - --eval_strategy steps \ - --eval_steps 1000 \ - --flash_attn fa2\ - --gradient_accumulation_steps 16 \ - --lr_scheduler_type cosine \ - --warmup_ratio 0.0125 \ - --max_grad_norm 1.0 \ - --logging_steps 10 \ - --save_steps 5000 \ - --learning_rate 2e-5 \ - --weight_decay 0.01 \ - --num_train_epochs 3.0 \ - --plot_loss \ - --bf16 \ - --resume_from_checkpoint /home/ytllm/.cache/modelscope/models/hh2395959141/Bitbrain-0.6b-sft-checkpoint - - - #--resume_from_checkpoint /DATA/disk2/yuhang/.cache/ckpt/bit-brain/sft_mix_v2/bit-brain-v1-full-sft/checkpoint-500 - -