ml-explore · angeloskath · Jun 11, 2026 · Jun 9, 2026 · Jun 11, 2026 · Jun 11, 2026
diff --git a/mlx_lm/benchmark.py b/mlx_lm/benchmark.py
@@ -73,6 +73,11 @@ def setup_arg_parser():
         default=0,
         help="Delay between each test in seconds (default: 0)",
     )
+    parser.add_argument(
+        "--trust-remote-code",
+        action="store_true",
+        help="Enable trusting remote code for tokenizer/model loading.",
+    )
     return parser
 
 
@@ -94,14 +99,19 @@ def rprint(*args, **kwargs):
 
     if group.size() > 1:
         model, tokenizer, config = sharded_load(
-            model_path, pipeline_group, tensor_group, return_config=True
+            model_path,
+            pipeline_group,
+            tensor_group,
+            return_config=True,
+            trust_remote_code=args.trust_remote_code,
         )
     else:
         model, tokenizer, config = load(
             model_path,
             return_config=True,
             tokenizer_config={"trust_remote_code": True},
             model_config={"quantize_activations": args.quantize_activations},
+            trust_remote_code=args.trust_remote_code,
         )
 
     # Empty to avoid early stopping

diff --git a/mlx_lm/cache_prompt.py b/mlx_lm/cache_prompt.py
@@ -85,14 +85,15 @@ def main():
     args = parser.parse_args()
 
     # Building tokenizer_config
-    tokenizer_config = {"trust_remote_code": True if args.trust_remote_code else None}
+    tokenizer_config = {"trust_remote_code": args.trust_remote_code}
     if args.eos_token is not None:
         tokenizer_config["eos_token"] = args.eos_token
 
     model, tokenizer = load(
         args.model,
         adapter_path=args.adapter_path,
         tokenizer_config=tokenizer_config,
+        trust_remote_code=args.trust_remote_code,
     )
 
     args.prompt = sys.stdin.read() if args.prompt == "-" else args.prompt

diff --git a/mlx_lm/chat.py b/mlx_lm/chat.py
@@ -102,14 +102,18 @@ def main():
     if group.size() > 1:
         if args.adapter_path:
             parser.error("Adapters not supported in distributed mode")
-        model, tokenizer = sharded_load(args.model, pipeline_group, tensor_group)
+        model, tokenizer = sharded_load(
+            args.model,
+            pipeline_group,
+            tensor_group,
+            trust_remote_code=args.trust_remote_code,
+        )
     else:
         model, tokenizer = load(
             args.model,
             adapter_path=args.adapter_path,
-            tokenizer_config={
-                "trust_remote_code": True if args.trust_remote_code else None
-            },
+            tokenizer_config={"trust_remote_code": args.trust_remote_code},
+            trust_remote_code=args.trust_remote_code,
         )
 
     with ChatUI(args, rank=rank) as ui:

diff --git a/mlx_lm/convert.py b/mlx_lm/convert.py
@@ -115,6 +115,7 @@ def convert(
         return_config=True,
         tokenizer_config={"trust_remote_code": trust_remote_code},
         lazy=True,
+        trust_remote_code=trust_remote_code,
     )
 
     if isinstance(quant_predicate, str):

diff --git a/mlx_lm/evaluate.py b/mlx_lm/evaluate.py
@@ -83,9 +83,11 @@ def __init__(
         sampler: Optional[Callable[[mx.array], mx.array]] = None,
     ) -> None:
         super().__init__()
-        tokenizer_config = {"trust_remote_code": True if trust_remote_code else None}
+        tokenizer_config = {"trust_remote_code": trust_remote_code}
         self._model, self.tokenizer = load(
-            path_or_hf_repo, tokenizer_config=tokenizer_config
+            path_or_hf_repo,
+            tokenizer_config=tokenizer_config,
+            trust_remote_code=trust_remote_code,
         )
         self._max_tokens = max_tokens
         self._batch_size = batch_size

diff --git a/mlx_lm/fuse.py b/mlx_lm/fuse.py
@@ -54,6 +54,11 @@ def parse_arguments() -> argparse.Namespace:
         default="ggml-model-f16.gguf",
         type=str,
     )
+    parser.add_argument(
+        "--trust-remote-code",
+        action="store_true",
+        help="Enable trusting remote code for tokenizer/model loading.",
+    )
     return parser.parse_args()
 
 
@@ -62,7 +67,10 @@ def main() -> None:
     args = parse_arguments()
 
     model, tokenizer, config = load(
-        args.model, adapter_path=args.adapter_path, return_config=True
+        args.model,
+        adapter_path=args.adapter_path,
+        return_config=True,
+        trust_remote_code=args.trust_remote_code,
     )
 
     fused_linears = [

diff --git a/mlx_lm/generate.py b/mlx_lm/generate.py
@@ -1991,7 +1991,7 @@ def main():
     tokenizer_config = (
         {} if not using_cache else json.loads(metadata["tokenizer_config"])
     )
-    tokenizer_config["trust_remote_code"] = True if args.trust_remote_code else None
+    tokenizer_config["trust_remote_code"] = args.trust_remote_code
 
     model_path = args.model
     if using_cache:
@@ -2010,6 +2010,7 @@ def main():
         adapter_path=args.adapter_path,
         tokenizer_config=tokenizer_config,
         model_config={"quantize_activations": args.quantize_activations},
+        trust_remote_code=args.trust_remote_code,
     )
     for eos_token in args.extra_eos_token:
         tokenizer.add_eos_token(eos_token)

diff --git a/mlx_lm/lora.py b/mlx_lm/lora.py
@@ -77,6 +77,7 @@
     "mask_prompt": False,
     "report_to": None,
     "project_name": None,
+    "trust_remote_code": False,
 }
 
 
@@ -212,6 +213,11 @@ def build_parser():
         help="Project name for logging. Defaults to the name of the root directory.",
     )
     parser.add_argument("--seed", type=int, help="The PRNG seed")
+    parser.add_argument(
+        "--trust-remote-code",
+        action="store_true",
+        help="Enable trusting remote code for tokenizer/model loading.",
+    )
     return parser
 
 
@@ -338,7 +344,11 @@ def run(args, training_callback: TrainingCallback = None):
     )
 
     rprint("Loading pretrained model")
-    model, tokenizer = load(args.model, tokenizer_config={"trust_remote_code": True})
+    model, tokenizer = load(
+        args.model,
+        tokenizer_config={"trust_remote_code": args.trust_remote_code},
+        trust_remote_code=args.trust_remote_code,
+    )
 
     rprint("Loading datasets")
     train_set, valid_set, test_set = load_dataset(args, tokenizer)

diff --git a/mlx_lm/perplexity.py b/mlx_lm/perplexity.py
@@ -144,8 +144,12 @@ def main():
 
     # Load model
     print(f"Loading model from {args.model}...")
-    tokenizer_config = {"trust_remote_code": True if args.trust_remote_code else None}
-    model, tokenizer = load(args.model, tokenizer_config=tokenizer_config)
+    tokenizer_config = {"trust_remote_code": args.trust_remote_code}
+    model, tokenizer = load(
+        args.model,
+        tokenizer_config=tokenizer_config,
+        trust_remote_code=args.trust_remote_code,
+    )
 
     # Count parameters
     total_params = get_total_parameters(model)

diff --git a/mlx_lm/quant/awq.py b/mlx_lm/quant/awq.py
@@ -544,6 +544,11 @@ def main():
     parser.add_argument("--sequence-length", type=int, default=512)
     parser.add_argument("--n-grid", type=int, default=20)
     parser.add_argument("--seed", type=int, default=123)
+    parser.add_argument(
+        "--trust-remote-code",
+        action="store_true",
+        help="Enable trusting remote code for tokenizer/model loading.",
+    )
     args = parser.parse_args()
 
     group = mx.distributed.init()
@@ -554,7 +559,12 @@ def main():
 
     mx.random.seed(args.seed)
 
-    model, tokenizer, config = load(args.model, lazy=True, return_config=True)
+    model, tokenizer, config = load(
+        args.model,
+        lazy=True,
+        return_config=True,
+        trust_remote_code=args.trust_remote_code,
+    )
 
     model_type = config["model_type"]
     if (awq_config := AWQ_MODEL_CONFIGS.get(model_type, None)) is None:

diff --git a/mlx_lm/quant/dwq.py b/mlx_lm/quant/dwq.py
@@ -20,9 +20,9 @@
 from mlx_lm.utils import (
     load,
     load_tokenizer,
-    pipeline_load,
     quantize_model,
     save,
+    sharded_load,
 )
 
 
@@ -300,6 +300,11 @@ def main():
         action="store_true",
         help="Use pipeline parallel instead of data parallel.",
     )
+    parser.add_argument(
+        "--trust-remote-code",
+        action="store_true",
+        help="Enable trusting remote code for tokenizer/model loading.",
+    )
 
     args = parser.parse_args()
 
@@ -332,9 +337,20 @@ def main():
     # Load the base model if we need it
     if not has_targets or args.quantized_model is None:
         if args.pipeline and group.size() > 1:
-            model, _, config = pipeline_load(args.model, return_config=True)
+            model, _, config = sharded_load(
+                args.model,
+                pipeline_group=mx.distributed.init(),
+                tensor_group=None,
+                return_config=True,
+                trust_remote_code=args.trust_remote_code,
+            )
         else:
-            model, _, config = load(args.model, return_config=True, lazy=True)
+            model, _, config = load(
+                args.model,
+                return_config=True,
+                lazy=True,
+                trust_remote_code=args.trust_remote_code,
+            )
     else:
         model = None
 
@@ -370,6 +386,7 @@ def target_fn(batch, idx, split):
             args.quantized_model,
             lazy=True,
             return_config=True,
+            trust_remote_code=args.trust_remote_code,
         )
         if "quantization" not in config:
             raise ValueError("Quantized model must already be quantized.")

diff --git a/mlx_lm/quant/dynamic_quant.py b/mlx_lm/quant/dynamic_quant.py
@@ -182,10 +182,19 @@ def main():
         choices=["float32", "bfloat16"],
         help="What type to use to accumulate the gradients for the sensitivities",
     )
+    parser.add_argument(
+        "--trust-remote-code",
+        action="store_true",
+        help="Enable trusting remote code for tokenizer/model loading.",
+    )
     args = parser.parse_args()
 
     group = mx.distributed.init()
-    model, tokenizer, config = load(args.model, return_config=True)
+    model, tokenizer, config = load(
+        args.model,
+        return_config=True,
+        trust_remote_code=args.trust_remote_code,
+    )
 
     if args.sensitivities is None:
         mx.random.seed(args.seed)

diff --git a/mlx_lm/quant/gptq.py b/mlx_lm/quant/gptq.py
@@ -197,11 +197,21 @@ def main():
         help="Sequence length for the calibration data.",
     )
     parser.add_argument("--seed", type=int, default=123)
+    parser.add_argument(
+        "--trust-remote-code",
+        action="store_true",
+        help="Enable trusting remote code for tokenizer/model loading.",
+    )
     args = parser.parse_args()
 
     mx.random.seed(args.seed)
 
-    model, tokenizer, config = load(args.model, lazy=True, return_config=True)
+    model, tokenizer, config = load(
+        args.model,
+        lazy=True,
+        return_config=True,
+        trust_remote_code=args.trust_remote_code,
+    )
     calibration_data = load_data(tokenizer, args.num_samples, args.sequence_length)
 
     model, config["quantization"] = gptq_quantize(

diff --git a/mlx_lm/server.py b/mlx_lm/server.py
@@ -317,9 +317,7 @@ def __init__(self, cli_args: argparse.Namespace):
         self._draft_model_map["default_model"] = self.cli_args.draft_model
 
         # Build the tokenizer config for later use in load
-        self._tokenizer_config = {
-            "trust_remote_code": True if cli_args.trust_remote_code else None
-        }
+        self._tokenizer_config = {"trust_remote_code": cli_args.trust_remote_code}
         if cli_args.chat_template:
             self._tokenizer_config["chat_template"] = cli_args.chat_template
 
@@ -344,12 +342,14 @@ def _load(self, model_path, adapter_path=None, draft_model_path=None):
                 pipeline_group=self.pipeline_group,
                 tensor_group=self.tensor_group,
                 tokenizer_config=self._tokenizer_config,
+                trust_remote_code=self.cli_args.trust_remote_code,
             )
         else:
             model, tokenizer = load(
                 model_path,
                 adapter_path=adapter_path,
                 tokenizer_config=self._tokenizer_config,
+                trust_remote_code=self.cli_args.trust_remote_code,
             )
 
         # Use the default chat template if needed