From 391033b45b7e89cf1dbe8a90e9901bea36597ea9 Mon Sep 17 00:00:00 2001 From: Steve Han Date: Fri, 24 Apr 2026 07:00:45 -0700 Subject: [PATCH] feat(embed): launch finetune with torchrun for multi-GPU support Use torch.distributed.run with --nproc_per_node=gpu so training automatically uses all available GPUs (works correctly with 1 GPU too). Mirrors the rerank recipe change in 756e4f2. Signed-off-by: Steve Han Made-with: Cursor --- src/nemotron/cli/commands/embed/finetune.py | 4 +++- src/nemotron/recipes/embed/stage2_finetune/train.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/nemotron/cli/commands/embed/finetune.py b/src/nemotron/cli/commands/embed/finetune.py index b9e1ca241..427c52c13 100644 --- a/src/nemotron/cli/commands/embed/finetune.py +++ b/src/nemotron/cli/commands/embed/finetune.py @@ -115,7 +115,9 @@ def _execute_uv_local(train_path: Path, passthrough: list[str]) -> None: uv_cmd, "run", "--with", str(repo_root), "--project", str(stage_dir), - "python", str(script_abs), + "python", "-m", "torch.distributed.run", + "--nproc_per_node=gpu", + str(script_abs), "--config", str(train_path), *passthrough, ] diff --git a/src/nemotron/recipes/embed/stage2_finetune/train.py b/src/nemotron/recipes/embed/stage2_finetune/train.py index b2e095d5f..88f1f59ac 100644 --- a/src/nemotron/recipes/embed/stage2_finetune/train.py +++ b/src/nemotron/recipes/embed/stage2_finetune/train.py @@ -8,7 +8,7 @@ # setup = "PyTorch pre-installed. Stage dependencies resolved via UV at runtime." # # [tool.runspec.run] -# launch = "direct" +# launch = "torchrun" # # [tool.runspec.config] # dir = "./config"