Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion mlx_lm/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ def setup_arg_parser():
default=0,
help="Delay between each test in seconds (default: 0)",
)
parser.add_argument(
"--trust-remote-code",
action="store_true",
help="Enable trusting remote code for tokenizer/model loading.",
)
return parser


Expand All @@ -94,14 +99,19 @@ def rprint(*args, **kwargs):

if group.size() > 1:
model, tokenizer, config = sharded_load(
model_path, pipeline_group, tensor_group, return_config=True
model_path,
pipeline_group,
tensor_group,
return_config=True,
trust_remote_code=args.trust_remote_code,
)
else:
model, tokenizer, config = load(
model_path,
return_config=True,
tokenizer_config={"trust_remote_code": True},
model_config={"quantize_activations": args.quantize_activations},
trust_remote_code=args.trust_remote_code,
)

# Empty to avoid early stopping
Expand Down
3 changes: 2 additions & 1 deletion mlx_lm/cache_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,15 @@ def main():
args = parser.parse_args()

# Building tokenizer_config
tokenizer_config = {"trust_remote_code": True if args.trust_remote_code else None}
tokenizer_config = {"trust_remote_code": args.trust_remote_code}
if args.eos_token is not None:
tokenizer_config["eos_token"] = args.eos_token

model, tokenizer = load(
args.model,
adapter_path=args.adapter_path,
tokenizer_config=tokenizer_config,
trust_remote_code=args.trust_remote_code,
)

args.prompt = sys.stdin.read() if args.prompt == "-" else args.prompt
Expand Down
12 changes: 8 additions & 4 deletions mlx_lm/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,18 @@ def main():
if group.size() > 1:
if args.adapter_path:
parser.error("Adapters not supported in distributed mode")
model, tokenizer = sharded_load(args.model, pipeline_group, tensor_group)
model, tokenizer = sharded_load(
args.model,
pipeline_group,
tensor_group,
trust_remote_code=args.trust_remote_code,
)
else:
model, tokenizer = load(
args.model,
adapter_path=args.adapter_path,
tokenizer_config={
"trust_remote_code": True if args.trust_remote_code else None
},
tokenizer_config={"trust_remote_code": args.trust_remote_code},
trust_remote_code=args.trust_remote_code,
)

with ChatUI(args, rank=rank) as ui:
Expand Down
1 change: 1 addition & 0 deletions mlx_lm/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def convert(
return_config=True,
tokenizer_config={"trust_remote_code": trust_remote_code},
lazy=True,
trust_remote_code=trust_remote_code,
)

if isinstance(quant_predicate, str):
Expand Down
6 changes: 4 additions & 2 deletions mlx_lm/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,11 @@ def __init__(
sampler: Optional[Callable[[mx.array], mx.array]] = None,
) -> None:
super().__init__()
tokenizer_config = {"trust_remote_code": True if trust_remote_code else None}
tokenizer_config = {"trust_remote_code": trust_remote_code}
self._model, self.tokenizer = load(
path_or_hf_repo, tokenizer_config=tokenizer_config
path_or_hf_repo,
tokenizer_config=tokenizer_config,
trust_remote_code=trust_remote_code,
)
self._max_tokens = max_tokens
self._batch_size = batch_size
Expand Down
10 changes: 9 additions & 1 deletion mlx_lm/fuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ def parse_arguments() -> argparse.Namespace:
default="ggml-model-f16.gguf",
type=str,
)
parser.add_argument(
"--trust-remote-code",
action="store_true",
help="Enable trusting remote code for tokenizer/model loading.",
)
return parser.parse_args()


Expand All @@ -62,7 +67,10 @@ def main() -> None:
args = parse_arguments()

model, tokenizer, config = load(
args.model, adapter_path=args.adapter_path, return_config=True
args.model,
adapter_path=args.adapter_path,
return_config=True,
trust_remote_code=args.trust_remote_code,
)

fused_linears = [
Expand Down
3 changes: 2 additions & 1 deletion mlx_lm/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1991,7 +1991,7 @@ def main():
tokenizer_config = (
{} if not using_cache else json.loads(metadata["tokenizer_config"])
)
tokenizer_config["trust_remote_code"] = True if args.trust_remote_code else None
tokenizer_config["trust_remote_code"] = args.trust_remote_code

model_path = args.model
if using_cache:
Expand All @@ -2010,6 +2010,7 @@ def main():
adapter_path=args.adapter_path,
tokenizer_config=tokenizer_config,
model_config={"quantize_activations": args.quantize_activations},
trust_remote_code=args.trust_remote_code,
)
for eos_token in args.extra_eos_token:
tokenizer.add_eos_token(eos_token)
Expand Down
12 changes: 11 additions & 1 deletion mlx_lm/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
"mask_prompt": False,
"report_to": None,
"project_name": None,
"trust_remote_code": False,
}


Expand Down Expand Up @@ -212,6 +213,11 @@ def build_parser():
help="Project name for logging. Defaults to the name of the root directory.",
)
parser.add_argument("--seed", type=int, help="The PRNG seed")
parser.add_argument(
"--trust-remote-code",
action="store_true",
help="Enable trusting remote code for tokenizer/model loading.",
)
return parser


Expand Down Expand Up @@ -338,7 +344,11 @@ def run(args, training_callback: TrainingCallback = None):
)

rprint("Loading pretrained model")
model, tokenizer = load(args.model, tokenizer_config={"trust_remote_code": True})
model, tokenizer = load(
args.model,
tokenizer_config={"trust_remote_code": args.trust_remote_code},
trust_remote_code=args.trust_remote_code,
)

rprint("Loading datasets")
train_set, valid_set, test_set = load_dataset(args, tokenizer)
Expand Down
8 changes: 6 additions & 2 deletions mlx_lm/perplexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,12 @@ def main():

# Load model
print(f"Loading model from {args.model}...")
tokenizer_config = {"trust_remote_code": True if args.trust_remote_code else None}
model, tokenizer = load(args.model, tokenizer_config=tokenizer_config)
tokenizer_config = {"trust_remote_code": args.trust_remote_code}
model, tokenizer = load(
args.model,
tokenizer_config=tokenizer_config,
trust_remote_code=args.trust_remote_code,
)

# Count parameters
total_params = get_total_parameters(model)
Expand Down
12 changes: 11 additions & 1 deletion mlx_lm/quant/awq.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,11 @@ def main():
parser.add_argument("--sequence-length", type=int, default=512)
parser.add_argument("--n-grid", type=int, default=20)
parser.add_argument("--seed", type=int, default=123)
parser.add_argument(
"--trust-remote-code",
action="store_true",
help="Enable trusting remote code for tokenizer/model loading.",
)
args = parser.parse_args()

group = mx.distributed.init()
Expand All @@ -554,7 +559,12 @@ def main():

mx.random.seed(args.seed)

model, tokenizer, config = load(args.model, lazy=True, return_config=True)
model, tokenizer, config = load(
args.model,
lazy=True,
return_config=True,
trust_remote_code=args.trust_remote_code,
)

model_type = config["model_type"]
if (awq_config := AWQ_MODEL_CONFIGS.get(model_type, None)) is None:
Expand Down
23 changes: 20 additions & 3 deletions mlx_lm/quant/dwq.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
from mlx_lm.utils import (
load,
load_tokenizer,
pipeline_load,
quantize_model,
save,
sharded_load,
)


Expand Down Expand Up @@ -300,6 +300,11 @@ def main():
action="store_true",
help="Use pipeline parallel instead of data parallel.",
)
parser.add_argument(
"--trust-remote-code",
action="store_true",
help="Enable trusting remote code for tokenizer/model loading.",
)

args = parser.parse_args()

Expand Down Expand Up @@ -332,9 +337,20 @@ def main():
# Load the base model if we need it
if not has_targets or args.quantized_model is None:
if args.pipeline and group.size() > 1:
model, _, config = pipeline_load(args.model, return_config=True)
model, _, config = sharded_load(
args.model,
pipeline_group=mx.distributed.init(),
tensor_group=None,
return_config=True,
trust_remote_code=args.trust_remote_code,
)
else:
model, _, config = load(args.model, return_config=True, lazy=True)
model, _, config = load(
args.model,
return_config=True,
lazy=True,
trust_remote_code=args.trust_remote_code,
)
else:
model = None

Expand Down Expand Up @@ -370,6 +386,7 @@ def target_fn(batch, idx, split):
args.quantized_model,
lazy=True,
return_config=True,
trust_remote_code=args.trust_remote_code,
)
if "quantization" not in config:
raise ValueError("Quantized model must already be quantized.")
Expand Down
11 changes: 10 additions & 1 deletion mlx_lm/quant/dynamic_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,19 @@ def main():
choices=["float32", "bfloat16"],
help="What type to use to accumulate the gradients for the sensitivities",
)
parser.add_argument(
"--trust-remote-code",
action="store_true",
help="Enable trusting remote code for tokenizer/model loading.",
)
args = parser.parse_args()

group = mx.distributed.init()
model, tokenizer, config = load(args.model, return_config=True)
model, tokenizer, config = load(
args.model,
return_config=True,
trust_remote_code=args.trust_remote_code,
)

if args.sensitivities is None:
mx.random.seed(args.seed)
Expand Down
12 changes: 11 additions & 1 deletion mlx_lm/quant/gptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,11 +197,21 @@ def main():
help="Sequence length for the calibration data.",
)
parser.add_argument("--seed", type=int, default=123)
parser.add_argument(
"--trust-remote-code",
action="store_true",
help="Enable trusting remote code for tokenizer/model loading.",
)
args = parser.parse_args()

mx.random.seed(args.seed)

model, tokenizer, config = load(args.model, lazy=True, return_config=True)
model, tokenizer, config = load(
args.model,
lazy=True,
return_config=True,
trust_remote_code=args.trust_remote_code,
)
calibration_data = load_data(tokenizer, args.num_samples, args.sequence_length)

model, config["quantization"] = gptq_quantize(
Expand Down
6 changes: 3 additions & 3 deletions mlx_lm/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,9 +317,7 @@ def __init__(self, cli_args: argparse.Namespace):
self._draft_model_map["default_model"] = self.cli_args.draft_model

# Build the tokenizer config for later use in load
self._tokenizer_config = {
"trust_remote_code": True if cli_args.trust_remote_code else None
}
self._tokenizer_config = {"trust_remote_code": cli_args.trust_remote_code}
if cli_args.chat_template:
self._tokenizer_config["chat_template"] = cli_args.chat_template

Expand All @@ -344,12 +342,14 @@ def _load(self, model_path, adapter_path=None, draft_model_path=None):
pipeline_group=self.pipeline_group,
tensor_group=self.tensor_group,
tokenizer_config=self._tokenizer_config,
trust_remote_code=self.cli_args.trust_remote_code,
)
else:
model, tokenizer = load(
model_path,
adapter_path=adapter_path,
tokenizer_config=self._tokenizer_config,
trust_remote_code=self.cli_args.trust_remote_code,
)

# Use the default chat template if needed
Expand Down
Loading
Loading