diff --git a/common/arg.cpp b/common/arg.cpp index 3d0183ed70..74f3c658b9 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2634,6 +2634,31 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.hf_token = value; } ).set_env("HF_TOKEN")); + add_opt(common_arg( + {"-ms", "-msr", "--ms-repo"}, "/[:quant]", + "ModelScope model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n" + "mmproj is also downloaded automatically if available. to disable, add --no-mmproj\n" + "example: user/model:Q4_K_M\n" + "(default: unused)", + [](common_params & params, const std::string & value) { + params.model.hf_repo = value; + params.model.repo_type = LLAMA_REPO_TYPE_MS; + } + ).set_env("LLAMA_ARG_MS_REPO")); + add_opt(common_arg( + {"-msf", "--ms-file"}, "FILE", + "ModelScope model file. If specified, it will override the quant in --ms-repo (default: unused)", + [](common_params & params, const std::string & value) { + params.model.hf_file = value; + } + ).set_env("LLAMA_ARG_MS_FILE")); + add_opt(common_arg( + {"-mst", "--ms-token"}, "TOKEN", + "ModelScope access token (default: value from MS_TOKEN environment variable)", + [](common_params & params, const std::string & value) { + params.hf_token = value; + } + ).set_env("MS_TOKEN")); add_opt(common_arg( {"--context-file"}, "FNAME", "file to load context from (use comma-separated values to specify multiple files)", diff --git a/common/common.cpp b/common/common.cpp index 16f78debd0..a81972c71e 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1381,18 +1381,29 @@ common_init_result_ptr common_init_from_params(common_params & params) { common_init_result::~common_init_result() = default; -std::string get_model_endpoint() { +std::string get_model_endpoint(llama_repo_type type) { const char * model_endpoint_env = getenv("MODEL_ENDPOINT"); // We still respect the use of environment-variable "HF_ENDPOINT" for backward-compatibility. const char * hf_endpoint_env = getenv("HF_ENDPOINT"); - const char * endpoint_env = model_endpoint_env ? model_endpoint_env : hf_endpoint_env; - std::string model_endpoint = "https://huggingface.co/"; - if (endpoint_env) { - model_endpoint = endpoint_env; - if (model_endpoint.back() != '/') { - model_endpoint += '/'; + std::string model_endpoint; + + if (type == LLAMA_REPO_TYPE_MS) { + if (model_endpoint_env && *model_endpoint_env) { + model_endpoint = model_endpoint_env; + } else { + model_endpoint = "https://www.modelscope.cn/"; + } + } else { + const char * endpoint_env = model_endpoint_env ? model_endpoint_env : hf_endpoint_env; + if (endpoint_env && *endpoint_env) { + model_endpoint = endpoint_env; + } else { + model_endpoint = "https://huggingface.co/"; } } + if (model_endpoint.back() != '/') { + model_endpoint += '/'; + } return model_endpoint; } diff --git a/common/common.h b/common/common.h index 020b6a721f..4ca77960fe 100644 --- a/common/common.h +++ b/common/common.h @@ -14,6 +14,12 @@ #include #include +// Repository type enumeration +enum llama_repo_type { + LLAMA_REPO_TYPE_HF, // Hugging Face + LLAMA_REPO_TYPE_MS, // ModelScope +}; + #if defined(_WIN32) && !defined(_WIN32_WINNT) #define _WIN32_WINNT 0x0A00 #endif @@ -305,6 +311,7 @@ struct common_params_model { std::string hf_file = ""; // HF file // NOLINT std::string docker_repo = ""; // Docker repo // NOLINT std::string name = ""; // in format /[:] (tag is optional) // NOLINT + enum llama_repo_type repo_type = LLAMA_REPO_TYPE_HF; // repository type for model downloads // NOLINT }; struct common_ngram_mod; @@ -467,6 +474,7 @@ struct common_params { std::set model_alias; // model aliases // NOLINT std::set model_tags; // model tags (informational, not used for routing) // NOLINT std::string hf_token = ""; // HF token // NOLINT + llama_repo_type repo_type = LLAMA_REPO_TYPE_HF; // repository type (HF or ModelScope) // NOLINT std::string prompt = ""; // NOLINT std::string system_prompt = ""; // NOLINT std::string prompt_file = ""; // store the external prompt file name // NOLINT @@ -859,7 +867,7 @@ struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_p // clear LoRA adapters from context, then apply new list of adapters void common_set_adapter_lora(struct llama_context * ctx, std::vector & lora); -std::string get_model_endpoint(); +std::string get_model_endpoint(llama_repo_type type = LLAMA_REPO_TYPE_HF); // // Batch utils diff --git a/common/download.cpp b/common/download.cpp index 0e0034e1da..af4d7a6329 100644 --- a/common/download.cpp +++ b/common/download.cpp @@ -683,7 +683,7 @@ static hf_plan get_hf_plan(const common_params_model & model, auto [repo, tag] = common_download_split_repo_tag(model.hf_repo); if (!opts.offline) { - all = hf_cache::get_repo_files(repo, opts.bearer_token); + all = hf_cache::get_repo_files(repo, opts.bearer_token, model.repo_type); } if (all.empty()) { all = hf_cache::get_cached_files(repo); diff --git a/common/hf-cache.cpp b/common/hf-cache.cpp index 665c9ff066..8619d72716 100644 --- a/common/hf-cache.cpp +++ b/common/hf-cache.cpp @@ -196,18 +196,28 @@ static void safe_write_file(const fs::path & path, const std::string & data) { } static nl::json api_get(const std::string & url, - const std::string & token) { + const std::string & token, + llama_repo_type type = LLAMA_REPO_TYPE_HF) { auto [cli, parts] = common_http_client(url); + // Unified User-Agent for consistency httplib::Headers headers = { {"User-Agent", "llama-cpp/" + build_info}, {"Accept", "application/json"} }; - if (is_valid_hf_token(token)) { - headers.emplace("Authorization", "Bearer " + token); - } else if (!token.empty()) { - LOG_WRN("%s: invalid token, authentication disabled\n", __func__); + if (!token.empty()) { + if (type == LLAMA_REPO_TYPE_MS) { + // ModelScope: Cookie Auth + headers.emplace("Cookie", "m_session_id=" + token); + } else { + // Hugging Face: Bearer Auth + if (is_valid_hf_token(token)) { + headers.emplace("Authorization", "Bearer " + token); + } else { + LOG_WRN("%s: invalid HF token, authentication disabled\n", __func__); + } + } } if (auto res = cli.Get(parts.path, headers)) { @@ -227,11 +237,18 @@ static nl::json api_get(const std::string & url, } static std::string get_repo_commit(const std::string & repo_id, - const std::string & token) { - try { - auto endpoint = get_model_endpoint(); - auto json = api_get(endpoint + "api/models/" + repo_id + "/refs", token); + const std::string & token, + llama_repo_type type = LLAMA_REPO_TYPE_HF) { + + // MS does not support /refs API, default to master + if (type == LLAMA_REPO_TYPE_MS) { + return "master"; + } + // Original Hugging Face logic + try { + std::string endpoint = get_model_endpoint(type); + auto json = api_get(endpoint + "api/models/" + repo_id + "/refs", token, type); if (!json.is_object() || !json.contains("branches") || !json["branches"].is_array()) { LOG_WRN("%s: missing 'branches' for '%s'\n", __func__, repo_id.c_str()); @@ -289,13 +306,14 @@ static std::string get_repo_commit(const std::string & repo_id, } hf_files get_repo_files(const std::string & repo_id, - const std::string & token) { + const std::string & token, + llama_repo_type type) { if (!is_valid_repo_id(repo_id)) { LOG_WRN("%s: invalid repository: %s\n", __func__, repo_id.c_str()); return {}; } - std::string commit = get_repo_commit(repo_id, token); + std::string commit = get_repo_commit(repo_id, token, type); if (commit.empty()) { LOG_WRN("%s: failed to resolve commit for %s\n", __func__, repo_id.c_str()); return {}; @@ -307,63 +325,110 @@ hf_files get_repo_files(const std::string & repo_id, hf_files files; try { - auto endpoint = get_model_endpoint(); - auto json = api_get(endpoint + "api/models/" + repo_id + "/tree/" + commit + "?recursive=true", token); + std::string endpoint = get_model_endpoint(type); + nl::json json; - if (!json.is_array()) { - LOG_WRN("%s: response is not an array for '%s'\n", __func__, repo_id.c_str()); - return {}; - } + if (type == LLAMA_REPO_TYPE_MS) { + // --- ModelScope Logic --- + std::string url = endpoint + "api/v1/models/" + repo_id + "/repo/files?Revision=" + commit + "&Recursive=True"; + json = api_get(url, token, type); - for (const auto & item : json) { - if (!item.is_object() || - !item.contains("type") || !item["type"].is_string() || item["type"] != "file" || - !item.contains("path") || !item["path"].is_string()) { - continue; + if (json.contains("Code") && json["Code"] != 200) { + throw std::runtime_error("ModelScope API Error: " + json.value("Message", "Unknown")); } - - hf_file file; - file.repo_id = repo_id; - file.path = item["path"].get(); - - if (!is_valid_subpath(commit_path, file.path)) { - LOG_WRN("%s: skip invalid path: %s\n", __func__, file.path.c_str()); - continue; + if (!json.contains("Data") || !json["Data"].contains("Files") || !json["Data"]["Files"].is_array()) { + return {}; } - if (item.contains("lfs") && item["lfs"].is_object()) { - if (item["lfs"].contains("oid") && item["lfs"]["oid"].is_string()) { - file.oid = item["lfs"]["oid"].get(); + for (const auto & item : json["Data"]["Files"]) { + if (!item.contains("Path") || !item["Path"].is_string()) continue; + + hf_file file; + file.repo_id = repo_id; + file.path = item["Path"].get(); + + if (!is_valid_subpath(commit_path, file.path)) { + LOG_WRN("%s: skip invalid path: %s\n", __func__, file.path.c_str()); + continue; + } + + if (item.contains("Size") && item["Size"].is_number_unsigned()) { + file.size = item["Size"].get(); } - if (item["lfs"].contains("size") && item["lfs"]["size"].is_number()) { - file.size = item["lfs"]["size"].get(); + if (item.contains("Sha256") && item["Sha256"].is_string()) { + file.oid = item["Sha256"].get(); + } else if (item.contains("Revision") && item["Revision"].is_string()) { + file.oid = item["Revision"].get(); } - } else if (item.contains("oid") && item["oid"].is_string()) { - file.oid = item["oid"].get(); - } - if (file.size == 0 && item.contains("size") && item["size"].is_number()) { - file.size = item["size"].get(); + + // MS Download URL: /models/{repo}/resolve/{commit}/{path} + file.url = endpoint + "models/" + repo_id + "/resolve/" + commit + "/" + file.path; + file.final_path = (commit_path / file.path).string(); + file.local_path = file.oid.empty() ? file.final_path : (blobs_path / file.oid).string(); + + files.push_back(std::move(file)); } - if (!file.oid.empty() && !is_valid_oid(file.oid)) { - LOG_WRN("%s: skip invalid oid: %s\n", __func__, file.oid.c_str()); - continue; + } else { + // Original Hugging Face Logic + auto json = api_get(endpoint + "api/models/" + repo_id + "/tree/" + commit + "?recursive=true", token, type); + + if (!json.is_array()) { + LOG_WRN("%s: response is not an array for '%s'\n", __func__, repo_id.c_str()); + return {}; } - file.url = endpoint + repo_id + "/resolve/" + commit + "/" + file.path; + for (const auto & item : json) { + if (!item.is_object() || + !item.contains("type") || !item["type"].is_string() || item["type"] != "file" || + !item.contains("path") || !item["path"].is_string()) { + continue; + } - fs::path final_path = commit_path / file.path; - file.final_path = final_path.string(); + hf_file file; + file.repo_id = repo_id; + file.path = item["path"].get(); - if (!file.oid.empty() && !fs::exists(final_path)) { - fs::path local_path = blobs_path / file.oid; - file.local_path = local_path.string(); - } else { - file.local_path = file.final_path; - } + if (!is_valid_subpath(commit_path, file.path)) { + LOG_WRN("%s: skip invalid path: %s\n", __func__, file.path.c_str()); + continue; + } - files.push_back(file); + if (item.contains("lfs") && item["lfs"].is_object()) { + if (item["lfs"].contains("oid") && item["lfs"]["oid"].is_string()) { + file.oid = item["lfs"]["oid"].get(); + } + if (item["lfs"].contains("size") && item["lfs"]["size"].is_number()) { + file.size = item["lfs"]["size"].get(); + } + } else if (item.contains("oid") && item["oid"].is_string()) { + file.oid = item["oid"].get(); + } + if (file.size == 0 && item.contains("size") && item["size"].is_number()) { + file.size = item["size"].get(); + } + + if (!file.oid.empty() && !is_valid_oid(file.oid)) { + LOG_WRN("%s: skip invalid oid: %s\n", __func__, file.oid.c_str()); + continue; + } + + file.url = endpoint + repo_id + "/resolve/" + commit + "/" + file.path; + + fs::path final_path = commit_path / file.path; + file.final_path = final_path.string(); + + if (!file.oid.empty() && !fs::exists(final_path)) { + fs::path local_path = blobs_path / file.oid; + file.local_path = local_path.string(); + } else { + file.local_path = file.final_path; + } + + files.push_back(file); + } } + } catch (const nl::json::exception & e) { LOG_ERR("%s: JSON error: %s\n", __func__, e.what()); } catch (const std::exception & e) { @@ -372,6 +437,13 @@ hf_files get_repo_files(const std::string & repo_id, return files; } + +// Backward-compatible overload defaulting to HF +static hf_files get_repo_files(const std::string & repo_id, + const std::string & token) { + return get_repo_files(repo_id, token, LLAMA_REPO_TYPE_HF); +} + static std::string get_cached_ref(const fs::path & repo_path) { fs::path refs_path = repo_path / "refs"; if (!fs::is_directory(refs_path)) { @@ -720,7 +792,7 @@ void migrate_old_cache_to_hf_cache(const std::string & token, bool offline) { } auto repo_id = owner + "/" + repo; - auto files = get_repo_files(repo_id, token); + auto files = get_repo_files(repo_id, token, LLAMA_REPO_TYPE_HF); if (files.empty()) { LOG_WRN("%s: could not get repo files for %s, skipping\n", __func__, repo_id.c_str()); diff --git a/common/hf-cache.h b/common/hf-cache.h index 9e46f97743..86daa208bf 100644 --- a/common/hf-cache.h +++ b/common/hf-cache.h @@ -5,6 +5,8 @@ // Ref: https://huggingface.co/docs/hub/local-cache.md +#include "common.h" // for llama_repo_type + namespace hf_cache { struct hf_file { @@ -22,7 +24,8 @@ using hf_files = std::vector; // Get files from HF API hf_files get_repo_files( const std::string & repo_id, - const std::string & token + const std::string & token, + llama_repo_type type = LLAMA_REPO_TYPE_HF ); hf_files get_cached_files(const std::string & repo_id = {}); diff --git a/tools/cli/README.md b/tools/cli/README.md index de0b780409..6edb0472d5 100644 --- a/tools/cli/README.md +++ b/tools/cli/README.md @@ -89,6 +89,9 @@ | `-hfv, -hfrv, --hf-repo-v /[:quant]` | Hugging Face model repository for the vocoder model (default: unused)
(env: LLAMA_ARG_HF_REPO_V) | | `-hffv, --hf-file-v FILE` | Hugging Face model file for the vocoder model (default: unused)
(env: LLAMA_ARG_HF_FILE_V) | | `-hft, --hf-token TOKEN` | Hugging Face access token (default: value from HF_TOKEN environment variable)
(env: HF_TOKEN) | +| `-ms, -msr, --ms-repo /[:quant]` | ModelScope model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.
mmproj is also downloaded automatically if available. to disable, add --no-mmproj
example: Qwen/Qwen3-4B-GGUF:Q4_K_M
(default: unused)
(env: LLAMA_ARG_MS_REPO) | +| `-msf, --ms-file FILE` | ModelScope model file. If specified, it will override the quant in --ms-repo (default: unused)
(env: LLAMA_ARG_MS_FILE) | +| `-mst, --ms-token TOKEN` | ModelScope access token (default: value from MS_TOKEN environment variable)
(env: MS_TOKEN) | | `--log-disable` | Log disable | | `--log-file FNAME` | Log to file
(env: LLAMA_LOG_FILE) | | `--log-colors [on\|off\|auto]` | Set colored logging ('on', 'off', or 'auto', default: 'auto')
'auto' enables colors when output is to a terminal
(env: LLAMA_LOG_COLORS) |