From 443e6d1d3a9b27b9132ffa58df118009fc47c213 Mon Sep 17 00:00:00 2001 From: Youngeun Kwon Date: Fri, 6 Feb 2026 10:41:43 -0800 Subject: [PATCH] disable nccl_ub when ep Signed-off-by: Youngeun Kwon --- megatron/training/arguments.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py index c057e55597d..a59cd596fd3 100644 --- a/megatron/training/arguments.py +++ b/megatron/training/arguments.py @@ -775,6 +775,10 @@ def validate_args(args, defaults={}): if args.use_megatron_fsdp: args.reuse_grad_buf_for_mxfp8_param_ag = False + if args.nccl_ub and args.use_megatron_fsdp and args.expert_model_parallel_size > 1 \ + and not args.disable_symmetric_registration: + raise NotImplementedError('NCCL userbuffer registration is currently not supported with expert parallelism') + # Parameters dtype. args.params_dtype = torch.float if args.fp16: