diff --git a/training/a3ultra/llama3-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS512/recipe/README.md b/training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs1024/recipe/README.md similarity index 95% rename from training/a3ultra/llama3-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS512/recipe/README.md rename to training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs1024/recipe/README.md index 271b8552..e98270bf 100644 --- a/training/a3ultra/llama3-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS512/recipe/README.md +++ b/training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs1024/recipe/README.md @@ -65,7 +65,7 @@ Clone the `gpu-recipes` repository and set a reference to the recipe folder. git clone https://github.com/ai-hypercomputer/gpu-recipes.git cd gpu-recipes export REPO_ROOT=`git rev-parse --show-toplevel` -export RECIPE_ROOT=$REPO_ROOT/training/a3u/llama3-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS512/recipe +export RECIPE_ROOT=$REPO_ROOT/training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs1024/recipe cd $RECIPE_ROOT ``` diff --git a/training/a3ultra/llama3-1-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS1024/recipe/launch_script.sh b/training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs1024/recipe/launch_script.sh similarity index 100% rename from training/a3ultra/llama3-1-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS1024/recipe/launch_script.sh rename to training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs1024/recipe/launch_script.sh diff --git a/training/a3ultra/llama3-1-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS1024/recipe/sbatch_script.sh b/training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs1024/recipe/sbatch_script.sh similarity index 100% rename from training/a3ultra/llama3-1-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS1024/recipe/sbatch_script.sh rename to training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs1024/recipe/sbatch_script.sh diff --git a/training/a3ultra/llama3-1-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS1024/recipe/README.md b/training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs128/recipe/README.md similarity index 95% rename from training/a3ultra/llama3-1-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS1024/recipe/README.md rename to training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs128/recipe/README.md index eef7244b..a145f7d2 100644 --- a/training/a3ultra/llama3-1-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS1024/recipe/README.md +++ b/training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs128/recipe/README.md @@ -65,7 +65,7 @@ Clone the `gpu-recipes` repository and set a reference to the recipe folder. git clone https://github.com/ai-hypercomputer/gpu-recipes.git cd gpu-recipes export REPO_ROOT=`git rev-parse --show-toplevel` -export RECIPE_ROOT=$REPO_ROOT/training/a3u/llama3-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS1024/recipe +export RECIPE_ROOT=$REPO_ROOT/training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs128/recipe cd $RECIPE_ROOT ``` diff --git a/training/a3ultra/llama3-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS128/recipe/launch_script.sh b/training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs128/recipe/launch_script.sh similarity index 100% rename from training/a3ultra/llama3-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS128/recipe/launch_script.sh rename to training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs128/recipe/launch_script.sh diff --git a/training/a3ultra/llama3-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS128/recipe/sbatch_script.sh b/training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs128/recipe/sbatch_script.sh similarity index 100% rename from training/a3ultra/llama3-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS128/recipe/sbatch_script.sh rename to training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs128/recipe/sbatch_script.sh diff --git a/training/a3ultra/llama3-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS128/recipe/README.md b/training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs512/recipe/README.md similarity index 95% rename from training/a3ultra/llama3-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS128/recipe/README.md rename to training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs512/recipe/README.md index 58753ca2..5a0e20e1 100644 --- a/training/a3ultra/llama3-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS128/recipe/README.md +++ b/training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs512/recipe/README.md @@ -65,7 +65,7 @@ Clone the `gpu-recipes` repository and set a reference to the recipe folder. git clone https://github.com/ai-hypercomputer/gpu-recipes.git cd gpu-recipes export REPO_ROOT=`git rev-parse --show-toplevel` -export RECIPE_ROOT=$REPO_ROOT/training/a3u/llama3-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS128/recipe +export RECIPE_ROOT=$REPO_ROOT/training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs512/recipe cd $RECIPE_ROOT ``` diff --git a/training/a3ultra/llama3-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS512/recipe/launch_script.sh b/training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs512/recipe/launch_script.sh similarity index 100% rename from training/a3ultra/llama3-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS512/recipe/launch_script.sh rename to training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs512/recipe/launch_script.sh diff --git a/training/a3ultra/llama3-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS512/recipe/sbatch_script.sh b/training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs512/recipe/sbatch_script.sh similarity index 100% rename from training/a3ultra/llama3-70b/megatron-bridge-pretraining-slurm/4node-FP8CS-GBS512/recipe/sbatch_script.sh rename to training/a3ultra/llama3_70b/megatron-bridge-slurm/nemo2511/32gpus-fp8cs-seq8192-gbs512/recipe/sbatch_script.sh diff --git a/training/a3ultra/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS256/recipe/README.md b/training/a3ultra/llama3_8b/megatron-bridge-slurm/nemo2511/16gpus-fp8cs-seq8192-gbs128/recipe/README.md similarity index 95% rename from training/a3ultra/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS256/recipe/README.md rename to training/a3ultra/llama3_8b/megatron-bridge-slurm/nemo2511/16gpus-fp8cs-seq8192-gbs128/recipe/README.md index ee14d40c..9c72e7bf 100644 --- a/training/a3ultra/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS256/recipe/README.md +++ b/training/a3ultra/llama3_8b/megatron-bridge-slurm/nemo2511/16gpus-fp8cs-seq8192-gbs128/recipe/README.md @@ -65,7 +65,7 @@ Clone the `gpu-recipes` repository and set a reference to the recipe folder. git clone https://github.com/ai-hypercomputer/gpu-recipes.git cd gpu-recipes export REPO_ROOT=`git rev-parse --show-toplevel` -export RECIPE_ROOT=$REPO_ROOT/training/a3u/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS256/recipe +export RECIPE_ROOT=$REPO_ROOT/training/a3ultra/llama3_8b/megatron-bridge-slurm/nemo2511/16gpus-fp8cs-seq8192-gbs128/recipe cd $RECIPE_ROOT ``` diff --git a/training/a3ultra/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS128/recipe/launch_script.sh b/training/a3ultra/llama3_8b/megatron-bridge-slurm/nemo2511/16gpus-fp8cs-seq8192-gbs128/recipe/launch_script.sh similarity index 100% rename from training/a3ultra/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS128/recipe/launch_script.sh rename to training/a3ultra/llama3_8b/megatron-bridge-slurm/nemo2511/16gpus-fp8cs-seq8192-gbs128/recipe/launch_script.sh diff --git a/training/a3ultra/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS128/recipe/sbatch_script.sh b/training/a3ultra/llama3_8b/megatron-bridge-slurm/nemo2511/16gpus-fp8cs-seq8192-gbs128/recipe/sbatch_script.sh similarity index 100% rename from training/a3ultra/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS128/recipe/sbatch_script.sh rename to training/a3ultra/llama3_8b/megatron-bridge-slurm/nemo2511/16gpus-fp8cs-seq8192-gbs128/recipe/sbatch_script.sh diff --git a/training/a3ultra/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS128/recipe/README.md b/training/a3ultra/llama3_8b/megatron-bridge-slurm/nemo2511/16gpus-fp8cs-seq8192-gbs256/recipe/README.md similarity index 95% rename from training/a3ultra/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS128/recipe/README.md rename to training/a3ultra/llama3_8b/megatron-bridge-slurm/nemo2511/16gpus-fp8cs-seq8192-gbs256/recipe/README.md index 0617a4cb..00ac230b 100644 --- a/training/a3ultra/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS128/recipe/README.md +++ b/training/a3ultra/llama3_8b/megatron-bridge-slurm/nemo2511/16gpus-fp8cs-seq8192-gbs256/recipe/README.md @@ -65,7 +65,7 @@ Clone the `gpu-recipes` repository and set a reference to the recipe folder. git clone https://github.com/ai-hypercomputer/gpu-recipes.git cd gpu-recipes export REPO_ROOT=`git rev-parse --show-toplevel` -export RECIPE_ROOT=$REPO_ROOT/training/a3u/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS128/recipe +export RECIPE_ROOT=$REPO_ROOT/training/a3ultra/llama3_8b/megatron-bridge-slurm/nemo2511/16gpus-fp8cs-seq8192-gbs256/recipe cd $RECIPE_ROOT ``` diff --git a/training/a3ultra/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS256/recipe/launch_script.sh b/training/a3ultra/llama3_8b/megatron-bridge-slurm/nemo2511/16gpus-fp8cs-seq8192-gbs256/recipe/launch_script.sh similarity index 100% rename from training/a3ultra/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS256/recipe/launch_script.sh rename to training/a3ultra/llama3_8b/megatron-bridge-slurm/nemo2511/16gpus-fp8cs-seq8192-gbs256/recipe/launch_script.sh diff --git a/training/a3ultra/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS256/recipe/sbatch_script.sh b/training/a3ultra/llama3_8b/megatron-bridge-slurm/nemo2511/16gpus-fp8cs-seq8192-gbs256/recipe/sbatch_script.sh similarity index 100% rename from training/a3ultra/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS256/recipe/sbatch_script.sh rename to training/a3ultra/llama3_8b/megatron-bridge-slurm/nemo2511/16gpus-fp8cs-seq8192-gbs256/recipe/sbatch_script.sh diff --git a/training/a4/llama3-1-405b/megatron-bridge-pretraining-slurm/16node-FP8CS-GBS1024/README.md b/training/a4/llama31_405b/megatron-bridge-slurm/nemo2509/128gpus-fp8cs-gbs1024/recipe/README.md similarity index 100% rename from training/a4/llama3-1-405b/megatron-bridge-pretraining-slurm/16node-FP8CS-GBS1024/README.md rename to training/a4/llama31_405b/megatron-bridge-slurm/nemo2509/128gpus-fp8cs-gbs1024/recipe/README.md diff --git a/training/a4/llama3-1-405b/megatron-bridge-pretraining-slurm/16node-FP8CS-GBS1024/submit.slurm b/training/a4/llama31_405b/megatron-bridge-slurm/nemo2509/128gpus-fp8cs-gbs1024/recipe/submit.slurm similarity index 100% rename from training/a4/llama3-1-405b/megatron-bridge-pretraining-slurm/16node-FP8CS-GBS1024/submit.slurm rename to training/a4/llama31_405b/megatron-bridge-slurm/nemo2509/128gpus-fp8cs-gbs1024/recipe/submit.slurm diff --git a/training/a4/llama3-1-70b/megatron-bridge-pretraining-slurm/8node-fp8-seq8192-gbs2048/README.md b/training/a4/llama3_70b/megatron-bridge-slurm/nemo2509/64gpus-fp8cs-seq8192-gbs2048/recipe/README.md similarity index 100% rename from training/a4/llama3-1-70b/megatron-bridge-pretraining-slurm/8node-fp8-seq8192-gbs2048/README.md rename to training/a4/llama3_70b/megatron-bridge-slurm/nemo2509/64gpus-fp8cs-seq8192-gbs2048/recipe/README.md diff --git a/training/a4/llama3-1-70b/megatron-bridge-pretraining-slurm/8node-fp8-seq8192-gbs2048/submit.slurm b/training/a4/llama3_70b/megatron-bridge-slurm/nemo2509/64gpus-fp8cs-seq8192-gbs2048/recipe/submit.slurm similarity index 100% rename from training/a4/llama3-1-70b/megatron-bridge-pretraining-slurm/8node-fp8-seq8192-gbs2048/submit.slurm rename to training/a4/llama3_70b/megatron-bridge-slurm/nemo2509/64gpus-fp8cs-seq8192-gbs2048/recipe/submit.slurm diff --git a/training/a4/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/16node-BF16-GBS4096/recipe/README.md b/training/a4/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/128gpus-bf16-seq4096-gbs4096/recipe/README.md similarity index 95% rename from training/a4/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/16node-BF16-GBS4096/recipe/README.md rename to training/a4/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/128gpus-bf16-seq4096-gbs4096/recipe/README.md index b7cab4bc..0115391f 100644 --- a/training/a4/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/16node-BF16-GBS4096/recipe/README.md +++ b/training/a4/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/128gpus-bf16-seq4096-gbs4096/recipe/README.md @@ -64,7 +64,7 @@ Clone the `gpu-recipes` repository and set a reference to the recipe folder. git clone https://github.com/ai-hypercomputer/gpu-recipes.git cd gpu-recipes export REPO_ROOT=`git rev-parse --show-toplevel` -export RECIPE_ROOT=$REPO_ROOT/training/a4/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/16node-BF16-GBS4096/recipe +export RECIPE_ROOT=$REPO_ROOT/training/a4/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/128gpus-bf16-seq4096-gbs4096/recipe cd $RECIPE_ROOT ``` diff --git a/training/a4/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/16node-BF16-GBS4096/recipe/launch_script.sh b/training/a4/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/128gpus-bf16-seq4096-gbs4096/recipe/launch_script.sh similarity index 100% rename from training/a4/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/16node-BF16-GBS4096/recipe/launch_script.sh rename to training/a4/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/128gpus-bf16-seq4096-gbs4096/recipe/launch_script.sh diff --git a/training/a4/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/16node-BF16-GBS4096/recipe/sbatch_script.sh b/training/a4/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/128gpus-bf16-seq4096-gbs4096/recipe/sbatch_script.sh similarity index 100% rename from training/a4/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/16node-BF16-GBS4096/recipe/sbatch_script.sh rename to training/a4/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/128gpus-bf16-seq4096-gbs4096/recipe/sbatch_script.sh diff --git a/training/a4x/llama3-1-405b/megatron-bridge-pretraining-slurm/16node-FP8DS-GBS128/README.md b/training/a4x/llama31_405b/megatron-bridge-slurm/nemo2509/128gpus-fp8ds-gbs128/recipe/README.md similarity index 100% rename from training/a4x/llama3-1-405b/megatron-bridge-pretraining-slurm/16node-FP8DS-GBS128/README.md rename to training/a4x/llama31_405b/megatron-bridge-slurm/nemo2509/128gpus-fp8ds-gbs128/recipe/README.md diff --git a/training/a4x/llama3-1-405b/megatron-bridge-pretraining-slurm/16node-FP8DS-GBS128/submit.slurm b/training/a4x/llama31_405b/megatron-bridge-slurm/nemo2509/128gpus-fp8ds-gbs128/recipe/submit.slurm similarity index 100% rename from training/a4x/llama3-1-405b/megatron-bridge-pretraining-slurm/16node-FP8DS-GBS128/submit.slurm rename to training/a4x/llama31_405b/megatron-bridge-slurm/nemo2509/128gpus-fp8ds-gbs128/recipe/submit.slurm diff --git a/training/a4x/llama3-1-405b/megatron-bridge-pretraining-slurm/32node-FP8CS-GBS1024/README.md b/training/a4x/llama31_405b/megatron-bridge-slurm/nemo2509/256gpus-fp8cs-gbs1024/recipe/README.md similarity index 100% rename from training/a4x/llama3-1-405b/megatron-bridge-pretraining-slurm/32node-FP8CS-GBS1024/README.md rename to training/a4x/llama31_405b/megatron-bridge-slurm/nemo2509/256gpus-fp8cs-gbs1024/recipe/README.md diff --git a/training/a4x/llama3-1-405b/megatron-bridge-pretraining-slurm/32node-FP8CS-GBS1024/submit.slurm b/training/a4x/llama31_405b/megatron-bridge-slurm/nemo2509/256gpus-fp8cs-gbs1024/recipe/submit.slurm similarity index 100% rename from training/a4x/llama3-1-405b/megatron-bridge-pretraining-slurm/32node-FP8CS-GBS1024/submit.slurm rename to training/a4x/llama31_405b/megatron-bridge-slurm/nemo2509/256gpus-fp8cs-gbs1024/recipe/submit.slurm diff --git a/training/a4x/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS128/recipe/README.md b/training/a4x/llama3_8b/megatron-bridge-slurm/nemo2511/8gpus-fp8cs-seq8192-gbs128/recipe/README.md similarity index 95% rename from training/a4x/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS128/recipe/README.md rename to training/a4x/llama3_8b/megatron-bridge-slurm/nemo2511/8gpus-fp8cs-seq8192-gbs128/recipe/README.md index 1c1a5a0a..ff0be2d4 100644 --- a/training/a4x/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS128/recipe/README.md +++ b/training/a4x/llama3_8b/megatron-bridge-slurm/nemo2511/8gpus-fp8cs-seq8192-gbs128/recipe/README.md @@ -65,7 +65,7 @@ Clone the `gpu-recipes` repository and set a reference to the recipe folder. git clone https://github.com/ai-hypercomputer/gpu-recipes.git cd gpu-recipes export REPO_ROOT=`git rev-parse --show-toplevel` -export RECIPE_ROOT=$REPO_ROOT/training/a4x/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS128/recipe +export RECIPE_ROOT=$REPO_ROOT/training/a4x/llama3_8b/megatron-bridge-slurm/nemo2511/8gpus-fp8cs-seq8192-gbs128/recipe cd $RECIPE_ROOT ``` diff --git a/training/a4x/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS128/recipe/launch_script.sh b/training/a4x/llama3_8b/megatron-bridge-slurm/nemo2511/8gpus-fp8cs-seq8192-gbs128/recipe/launch_script.sh similarity index 100% rename from training/a4x/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS128/recipe/launch_script.sh rename to training/a4x/llama3_8b/megatron-bridge-slurm/nemo2511/8gpus-fp8cs-seq8192-gbs128/recipe/launch_script.sh diff --git a/training/a4x/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS128/recipe/sbatch_script.sh b/training/a4x/llama3_8b/megatron-bridge-slurm/nemo2511/8gpus-fp8cs-seq8192-gbs128/recipe/sbatch_script.sh similarity index 100% rename from training/a4x/llama3-8b/megatron-bridge-pretraining-slurm/2node-FP8CS-GBS128/recipe/sbatch_script.sh rename to training/a4x/llama3_8b/megatron-bridge-slurm/nemo2511/8gpus-fp8cs-seq8192-gbs128/recipe/sbatch_script.sh diff --git a/training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/32node-BF16-GBS2048/recipe/README.md b/training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/128gpus-bf16-seq4096-gbs2048/recipe/README.md similarity index 95% rename from training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/32node-BF16-GBS2048/recipe/README.md rename to training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/128gpus-bf16-seq4096-gbs2048/recipe/README.md index e5f17662..26b68edc 100644 --- a/training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/32node-BF16-GBS2048/recipe/README.md +++ b/training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/128gpus-bf16-seq4096-gbs2048/recipe/README.md @@ -61,7 +61,7 @@ Clone the `gpu-recipes` repository and set a reference to the recipe folder. git clone https://github.com/ai-hypercomputer/gpu-recipes.git cd gpu-recipes export REPO_ROOT=`git rev-parse --show-toplevel` -export RECIPE_ROOT=$REPO_ROOT/training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/32node-BF16-GBS2048/recipe +export RECIPE_ROOT=$REPO_ROOT/training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/128gpus-bf16-seq4096-gbs2048/recipe cd $RECIPE_ROOT ``` diff --git a/training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/16node-BF16-GBS1024/recipe/custom_setup_experiment.py b/training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/128gpus-bf16-seq4096-gbs2048/recipe/custom_setup_experiment.py similarity index 100% rename from training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/16node-BF16-GBS1024/recipe/custom_setup_experiment.py rename to training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/128gpus-bf16-seq4096-gbs2048/recipe/custom_setup_experiment.py diff --git a/training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/32node-BF16-GBS2048/recipe/launch_script.sh b/training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/128gpus-bf16-seq4096-gbs2048/recipe/launch_script.sh similarity index 100% rename from training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/32node-BF16-GBS2048/recipe/launch_script.sh rename to training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/128gpus-bf16-seq4096-gbs2048/recipe/launch_script.sh diff --git a/training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/32node-BF16-GBS2048/recipe/sbatch_script.sh b/training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/128gpus-bf16-seq4096-gbs2048/recipe/sbatch_script.sh similarity index 100% rename from training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/32node-BF16-GBS2048/recipe/sbatch_script.sh rename to training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/128gpus-bf16-seq4096-gbs2048/recipe/sbatch_script.sh diff --git a/training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/16node-BF16-GBS1024/recipe/README.md b/training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/64gpus-bf16-seq4096-gbs1024/recipe/README.md similarity index 95% rename from training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/16node-BF16-GBS1024/recipe/README.md rename to training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/64gpus-bf16-seq4096-gbs1024/recipe/README.md index d8c0972e..f82454d4 100644 --- a/training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/16node-BF16-GBS1024/recipe/README.md +++ b/training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/64gpus-bf16-seq4096-gbs1024/recipe/README.md @@ -61,7 +61,7 @@ Clone the `gpu-recipes` repository and set a reference to the recipe folder. git clone https://github.com/ai-hypercomputer/gpu-recipes.git cd gpu-recipes export REPO_ROOT=`git rev-parse --show-toplevel` -export RECIPE_ROOT=$REPO_ROOT/training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/16node-BF16-GBS1024/recipe +export RECIPE_ROOT=$REPO_ROOT/training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/64gpus-bf16-seq4096-gbs1024/recipe cd $RECIPE_ROOT ``` diff --git a/training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/32node-BF16-GBS2048/recipe/custom_setup_experiment.py b/training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/64gpus-bf16-seq4096-gbs1024/recipe/custom_setup_experiment.py similarity index 100% rename from training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/32node-BF16-GBS2048/recipe/custom_setup_experiment.py rename to training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/64gpus-bf16-seq4096-gbs1024/recipe/custom_setup_experiment.py diff --git a/training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/16node-BF16-GBS1024/recipe/launch_script.sh b/training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/64gpus-bf16-seq4096-gbs1024/recipe/launch_script.sh similarity index 100% rename from training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/16node-BF16-GBS1024/recipe/launch_script.sh rename to training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/64gpus-bf16-seq4096-gbs1024/recipe/launch_script.sh diff --git a/training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/16node-BF16-GBS1024/recipe/sbatch_script.sh b/training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/64gpus-bf16-seq4096-gbs1024/recipe/sbatch_script.sh similarity index 100% rename from training/a4x/qwen3-235b-a22b/megatron-bridge-pretraining-slurm/16node-BF16-GBS1024/recipe/sbatch_script.sh rename to training/a4x/qwen3_235b_a22b/megatron-bridge-slurm/nemo2511/64gpus-bf16-seq4096-gbs1024/recipe/sbatch_script.sh diff --git a/training/a4x/qwen3-30b/megatron-bridge-pretraining-slurm/2node-FP8MX-GBS1024/recipe/README.md b/training/a4x/qwen3_30b_a3b/megatron-bridge-slurm/nemo2511/8gpus-fp8mx-seq4096-gbs512/recipe/README.md similarity index 95% rename from training/a4x/qwen3-30b/megatron-bridge-pretraining-slurm/2node-FP8MX-GBS1024/recipe/README.md rename to training/a4x/qwen3_30b_a3b/megatron-bridge-slurm/nemo2511/8gpus-fp8mx-seq4096-gbs512/recipe/README.md index bd687d83..39b60355 100644 --- a/training/a4x/qwen3-30b/megatron-bridge-pretraining-slurm/2node-FP8MX-GBS1024/recipe/README.md +++ b/training/a4x/qwen3_30b_a3b/megatron-bridge-slurm/nemo2511/8gpus-fp8mx-seq4096-gbs512/recipe/README.md @@ -65,7 +65,7 @@ Clone the `gpu-recipes` repository and set a reference to the recipe folder. git clone https://github.com/ai-hypercomputer/gpu-recipes.git cd gpu-recipes export REPO_ROOT=`git rev-parse --show-toplevel` -export RECIPE_ROOT=$REPO_ROOT/training/a4x/qwen3-30b/megatron-bridge-pretraining-slurm/2node-FP8MX-GBS1024/recipe +export RECIPE_ROOT=$REPO_ROOT/training/a4x/qwen3_30b_a3b/megatron-bridge-slurm/nemo2511/8gpus-fp8mx-seq4096-gbs512/recipe cd $RECIPE_ROOT ``` diff --git a/training/a4x/qwen3-30b/megatron-bridge-pretraining-slurm/2node-FP8MX-GBS1024/recipe/launch_script.sh b/training/a4x/qwen3_30b_a3b/megatron-bridge-slurm/nemo2511/8gpus-fp8mx-seq4096-gbs512/recipe/launch_script.sh similarity index 100% rename from training/a4x/qwen3-30b/megatron-bridge-pretraining-slurm/2node-FP8MX-GBS1024/recipe/launch_script.sh rename to training/a4x/qwen3_30b_a3b/megatron-bridge-slurm/nemo2511/8gpus-fp8mx-seq4096-gbs512/recipe/launch_script.sh diff --git a/training/a4x/qwen3-30b/megatron-bridge-pretraining-slurm/2node-FP8MX-GBS1024/recipe/sbatch_script.sh b/training/a4x/qwen3_30b_a3b/megatron-bridge-slurm/nemo2511/8gpus-fp8mx-seq4096-gbs512/recipe/sbatch_script.sh similarity index 100% rename from training/a4x/qwen3-30b/megatron-bridge-pretraining-slurm/2node-FP8MX-GBS1024/recipe/sbatch_script.sh rename to training/a4x/qwen3_30b_a3b/megatron-bridge-slurm/nemo2511/8gpus-fp8mx-seq4096-gbs512/recipe/sbatch_script.sh diff --git a/training/a4x/wan2-1-14b/nemo-pretraining-slurm/8node-BF16-GBS64/recipe/README.md b/training/a4x/wan_14b/nemo-slurm/nemo2511/32gpus-bf16-gbs64/recipe/README.md similarity index 96% rename from training/a4x/wan2-1-14b/nemo-pretraining-slurm/8node-BF16-GBS64/recipe/README.md rename to training/a4x/wan_14b/nemo-slurm/nemo2511/32gpus-bf16-gbs64/recipe/README.md index d33f13df..9ec90027 100644 --- a/training/a4x/wan2-1-14b/nemo-pretraining-slurm/8node-BF16-GBS64/recipe/README.md +++ b/training/a4x/wan_14b/nemo-slurm/nemo2511/32gpus-bf16-gbs64/recipe/README.md @@ -65,7 +65,7 @@ Clone the `gpu-recipes` repository and set a reference to the recipe folder. git clone https://github.com/ai-hypercomputer/gpu-recipes.git cd gpu-recipes export REPO_ROOT=`git rev-parse --show-toplevel` -export RECIPE_ROOT=$REPO_ROOT/training/a4x/wan2-1-14b/nemo-pretraining-slurm/8node-BF16-GBS64/recipe +export RECIPE_ROOT=$REPO_ROOT/training/a4x/wan_14b/nemo-slurm/nemo2511/32gpus-bf16-gbs64/recipe cd $RECIPE_ROOT ``` diff --git a/training/a4x/wan2-1-14b/nemo-pretraining-slurm/8node-BF16-GBS64/recipe/wan_14b_benchmark.sh b/training/a4x/wan_14b/nemo-slurm/nemo2511/32gpus-bf16-gbs64/recipe/wan_14b_benchmark.sh similarity index 100% rename from training/a4x/wan2-1-14b/nemo-pretraining-slurm/8node-BF16-GBS64/recipe/wan_14b_benchmark.sh rename to training/a4x/wan_14b/nemo-slurm/nemo2511/32gpus-bf16-gbs64/recipe/wan_14b_benchmark.sh