diff --git a/common/arg.cpp b/common/arg.cpp
index 3d0183ed70..74f3c658b9 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2634,6 +2634,31 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             params.hf_token = value;
         }
     ).set_env("HF_TOKEN"));
+    add_opt(common_arg(
+        {"-ms", "-msr", "--ms-repo"}, "<user>/<model>[:quant]",
+        "ModelScope model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n"
+        "mmproj is also downloaded automatically if available. to disable, add --no-mmproj\n"
+        "example: user/model:Q4_K_M\n"
+        "(default: unused)",
+        [](common_params & params, const std::string & value) {
+            params.model.hf_repo = value;
+            params.model.repo_type = LLAMA_REPO_TYPE_MS;
+        }
+    ).set_env("LLAMA_ARG_MS_REPO"));
+    add_opt(common_arg(
+        {"-msf", "--ms-file"}, "FILE",
+        "ModelScope model file. If specified, it will override the quant in --ms-repo (default: unused)",
+        [](common_params & params, const std::string & value) {
+            params.model.hf_file = value;
+        }
+    ).set_env("LLAMA_ARG_MS_FILE"));
+    add_opt(common_arg(
+        {"-mst", "--ms-token"}, "TOKEN",
+        "ModelScope access token (default: value from MS_TOKEN environment variable)",
+        [](common_params & params, const std::string & value) {
+            params.hf_token = value;
+        }
+    ).set_env("MS_TOKEN"));
     add_opt(common_arg(
         {"--context-file"}, "FNAME",
         "file to load context from (use comma-separated values to specify multiple files)",
diff --git a/common/common.cpp b/common/common.cpp
index 16f78debd0..a81972c71e 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1381,18 +1381,29 @@ common_init_result_ptr common_init_from_params(common_params & params) {
 
 common_init_result::~common_init_result() = default;
 
-std::string get_model_endpoint() {
+std::string get_model_endpoint(llama_repo_type type) {
     const char * model_endpoint_env = getenv("MODEL_ENDPOINT");
     // We still respect the use of environment-variable "HF_ENDPOINT" for backward-compatibility.
     const char * hf_endpoint_env = getenv("HF_ENDPOINT");
-    const char * endpoint_env = model_endpoint_env ? model_endpoint_env : hf_endpoint_env;
-    std::string model_endpoint = "https://huggingface.co/";
-    if (endpoint_env) {
-        model_endpoint = endpoint_env;
-        if (model_endpoint.back() != '/') {
-            model_endpoint += '/';
+    std::string model_endpoint;
+
+    if (type == LLAMA_REPO_TYPE_MS) {
+        if (model_endpoint_env && *model_endpoint_env) {
+            model_endpoint = model_endpoint_env;
+        } else {
+            model_endpoint = "https://www.modelscope.cn/";
+        }
+    } else {
+        const char * endpoint_env = model_endpoint_env ? model_endpoint_env : hf_endpoint_env;
+        if (endpoint_env && *endpoint_env) {
+            model_endpoint = endpoint_env;
+        } else {
+            model_endpoint = "https://huggingface.co/";
         }
     }
+    if (model_endpoint.back() != '/') {
+        model_endpoint += '/';
+    }
     return model_endpoint;
 }
 
diff --git a/common/common.h b/common/common.h
index 020b6a721f..4ca77960fe 100644
--- a/common/common.h
+++ b/common/common.h
@@ -14,6 +14,12 @@
 #include <vector>
 #include <map>
 
+// Repository type enumeration
+enum llama_repo_type {
+    LLAMA_REPO_TYPE_HF,      // Hugging Face
+    LLAMA_REPO_TYPE_MS,      // ModelScope
+};
+
 #if defined(_WIN32) && !defined(_WIN32_WINNT)
 #define _WIN32_WINNT 0x0A00
 #endif
@@ -305,6 +311,7 @@ struct common_params_model {
     std::string hf_file     = ""; // HF file                                                // NOLINT
     std::string docker_repo = ""; // Docker repo                                            // NOLINT
     std::string name        = ""; // in format <user>/<model>[:<tag>] (tag is optional)     // NOLINT
+    enum llama_repo_type repo_type = LLAMA_REPO_TYPE_HF; // repository type for model downloads // NOLINT
 };
 
 struct common_ngram_mod;
@@ -467,6 +474,7 @@ struct common_params {
     std::set<std::string> model_alias;     // model aliases                                                 // NOLINT
     std::set<std::string> model_tags;      // model tags (informational, not used for routing)              // NOLINT
     std::string hf_token             = ""; // HF token                                                      // NOLINT
+    llama_repo_type repo_type        = LLAMA_REPO_TYPE_HF; // repository type (HF or ModelScope)            // NOLINT
     std::string prompt               = "";                                                                  // NOLINT
     std::string system_prompt        = "";                                                                  // NOLINT
     std::string prompt_file          = ""; // store the external prompt file name                           // NOLINT
@@ -859,7 +867,7 @@ struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_p
 // clear LoRA adapters from context, then apply new list of adapters
 void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);
 
-std::string                   get_model_endpoint();
+std::string get_model_endpoint(llama_repo_type type = LLAMA_REPO_TYPE_HF);
 
 //
 // Batch utils
diff --git a/common/download.cpp b/common/download.cpp
index 0e0034e1da..af4d7a6329 100644
--- a/common/download.cpp
+++ b/common/download.cpp
@@ -683,7 +683,7 @@ static hf_plan get_hf_plan(const common_params_model  & model,
     auto [repo, tag] = common_download_split_repo_tag(model.hf_repo);
 
     if (!opts.offline) {
-        all = hf_cache::get_repo_files(repo, opts.bearer_token);
+        all = hf_cache::get_repo_files(repo, opts.bearer_token, model.repo_type);
     }
     if (all.empty()) {
         all = hf_cache::get_cached_files(repo);
diff --git a/common/hf-cache.cpp b/common/hf-cache.cpp
index 665c9ff066..8619d72716 100644
--- a/common/hf-cache.cpp
+++ b/common/hf-cache.cpp
@@ -196,18 +196,28 @@ static void safe_write_file(const fs::path & path, const std::string & data) {
 }
 
 static nl::json api_get(const std::string & url,
-                        const std::string & token) {
+                        const std::string & token,
+                        llama_repo_type type = LLAMA_REPO_TYPE_HF) {
     auto [cli, parts] = common_http_client(url);
 
+    // Unified User-Agent for consistency
     httplib::Headers headers = {
         {"User-Agent", "llama-cpp/" + build_info},
         {"Accept", "application/json"}
     };
 
-    if (is_valid_hf_token(token)) {
-        headers.emplace("Authorization", "Bearer " + token);
-    } else if (!token.empty()) {
-        LOG_WRN("%s: invalid token, authentication disabled\n", __func__);
+    if (!token.empty()) {
+        if (type == LLAMA_REPO_TYPE_MS) {
+            // ModelScope: Cookie Auth
+            headers.emplace("Cookie", "m_session_id=" + token);
+        } else {
+            // Hugging Face: Bearer Auth
+            if (is_valid_hf_token(token)) {
+                headers.emplace("Authorization", "Bearer " + token);
+            } else {
+                LOG_WRN("%s: invalid HF token, authentication disabled\n", __func__);
+            }
+        }
     }
 
     if (auto res = cli.Get(parts.path, headers)) {
@@ -227,11 +237,18 @@ static nl::json api_get(const std::string & url,
 }
 
 static std::string get_repo_commit(const std::string & repo_id,
-                                   const std::string & token) {
-    try {
-        auto endpoint = get_model_endpoint();
-        auto json = api_get(endpoint + "api/models/" + repo_id + "/refs", token);
+                                   const std::string & token,
+                                   llama_repo_type type = LLAMA_REPO_TYPE_HF) {
+    
+    // MS does not support /refs API, default to master
+    if (type == LLAMA_REPO_TYPE_MS) {
+        return "master";
+    }
 
+    // Original Hugging Face logic
+    try {
+        std::string endpoint = get_model_endpoint(type);
+        auto json = api_get(endpoint + "api/models/" + repo_id + "/refs", token, type);
         if (!json.is_object() ||
             !json.contains("branches") || !json["branches"].is_array()) {
             LOG_WRN("%s: missing 'branches' for '%s'\n", __func__, repo_id.c_str());
@@ -289,13 +306,14 @@ static std::string get_repo_commit(const std::string & repo_id,
 }
 
 hf_files get_repo_files(const std::string & repo_id,
-                        const std::string & token) {
+                        const std::string & token,
+                        llama_repo_type type) {
     if (!is_valid_repo_id(repo_id)) {
         LOG_WRN("%s: invalid repository: %s\n", __func__, repo_id.c_str());
         return {};
     }
 
-    std::string commit = get_repo_commit(repo_id, token);
+    std::string commit = get_repo_commit(repo_id, token, type);
     if (commit.empty()) {
         LOG_WRN("%s: failed to resolve commit for %s\n", __func__, repo_id.c_str());
         return {};
@@ -307,63 +325,110 @@ hf_files get_repo_files(const std::string & repo_id,
     hf_files files;
 
     try {
-        auto endpoint = get_model_endpoint();
-        auto json = api_get(endpoint + "api/models/" + repo_id + "/tree/" + commit + "?recursive=true", token);
+        std::string endpoint = get_model_endpoint(type);
+        nl::json json;
 
-        if (!json.is_array()) {
-            LOG_WRN("%s: response is not an array for '%s'\n", __func__, repo_id.c_str());
-            return {};
-        }
+        if (type == LLAMA_REPO_TYPE_MS) {
+            // --- ModelScope Logic ---
+            std::string url = endpoint + "api/v1/models/" + repo_id + "/repo/files?Revision=" + commit + "&Recursive=True";
+            json = api_get(url, token, type);
 
-        for (const auto & item : json) {
-            if (!item.is_object() ||
-                !item.contains("type") || !item["type"].is_string() || item["type"] != "file" ||
-                !item.contains("path") || !item["path"].is_string()) {
-                continue;
+            if (json.contains("Code") && json["Code"] != 200) {
+                 throw std::runtime_error("ModelScope API Error: " + json.value("Message", "Unknown"));
             }
-
-            hf_file file;
-            file.repo_id = repo_id;
-            file.path = item["path"].get<std::string>();
-
-            if (!is_valid_subpath(commit_path, file.path)) {
-                LOG_WRN("%s: skip invalid path: %s\n", __func__, file.path.c_str());
-                continue;
+            if (!json.contains("Data") || !json["Data"].contains("Files") || !json["Data"]["Files"].is_array()) {
+                return {};
             }
 
-            if (item.contains("lfs") && item["lfs"].is_object()) {
-                if (item["lfs"].contains("oid") && item["lfs"]["oid"].is_string()) {
-                    file.oid = item["lfs"]["oid"].get<std::string>();
+            for (const auto & item : json["Data"]["Files"]) {
+                if (!item.contains("Path") || !item["Path"].is_string()) continue;
+                
+                hf_file file;
+                file.repo_id = repo_id;
+                file.path = item["Path"].get<std::string>();
+                
+                if (!is_valid_subpath(commit_path, file.path)) {
+                    LOG_WRN("%s: skip invalid path: %s\n", __func__, file.path.c_str());
+                    continue;
+                }
+
+                if (item.contains("Size") && item["Size"].is_number_unsigned()) {
+                    file.size = item["Size"].get<size_t>();
                 }
-                if (item["lfs"].contains("size") && item["lfs"]["size"].is_number()) {
-                    file.size = item["lfs"]["size"].get<size_t>();
+                if (item.contains("Sha256") && item["Sha256"].is_string()) {
+                    file.oid = item["Sha256"].get<std::string>();
+                } else if (item.contains("Revision") && item["Revision"].is_string()) {
+                    file.oid = item["Revision"].get<std::string>();
                 }
-            } else if (item.contains("oid") && item["oid"].is_string()) {
-                file.oid = item["oid"].get<std::string>();
-            }
-            if (file.size == 0 && item.contains("size") && item["size"].is_number()) {
-                file.size = item["size"].get<size_t>();
+
+                // MS Download URL: /models/{repo}/resolve/{commit}/{path}
+                file.url = endpoint + "models/" + repo_id + "/resolve/" + commit + "/" + file.path;
+                file.final_path = (commit_path / file.path).string();
+                file.local_path = file.oid.empty() ? file.final_path : (blobs_path / file.oid).string();
+                
+                files.push_back(std::move(file));
             }
 
-            if (!file.oid.empty() && !is_valid_oid(file.oid)) {
-                LOG_WRN("%s: skip invalid oid: %s\n", __func__, file.oid.c_str());
-                continue;
+        } else {
+            // Original Hugging Face Logic
+            auto json = api_get(endpoint + "api/models/" + repo_id + "/tree/" + commit + "?recursive=true", token, type);
+
+            if (!json.is_array()) {
+                LOG_WRN("%s: response is not an array for '%s'\n", __func__, repo_id.c_str());
+                return {};
             }
 
-            file.url = endpoint + repo_id + "/resolve/" + commit + "/" + file.path;
+            for (const auto & item : json) {
+                if (!item.is_object() ||
+                    !item.contains("type") || !item["type"].is_string() || item["type"] != "file" ||
+                    !item.contains("path") || !item["path"].is_string()) {
+                    continue;
+                }
 
-            fs::path final_path = commit_path / file.path;
-            file.final_path = final_path.string();
+                hf_file file;
+                file.repo_id = repo_id;
+                file.path = item["path"].get<std::string>();
 
-            if (!file.oid.empty() && !fs::exists(final_path)) {
-                fs::path local_path = blobs_path / file.oid;
-                file.local_path = local_path.string();
-            } else {
-                file.local_path = file.final_path;
-            }
+                if (!is_valid_subpath(commit_path, file.path)) {
+                    LOG_WRN("%s: skip invalid path: %s\n", __func__, file.path.c_str());
+                    continue;
+                }
 
-            files.push_back(file);
+                if (item.contains("lfs") && item["lfs"].is_object()) {
+                    if (item["lfs"].contains("oid") && item["lfs"]["oid"].is_string()) {
+                        file.oid = item["lfs"]["oid"].get<std::string>();
+                    }
+                    if (item["lfs"].contains("size") && item["lfs"]["size"].is_number()) {
+                        file.size = item["lfs"]["size"].get<size_t>();
+                    }
+                } else if (item.contains("oid") && item["oid"].is_string()) {
+                    file.oid = item["oid"].get<std::string>();
+                }
+                if (file.size == 0 && item.contains("size") && item["size"].is_number()) {
+                    file.size = item["size"].get<size_t>();
+                }
+
+                if (!file.oid.empty() && !is_valid_oid(file.oid)) {
+                    LOG_WRN("%s: skip invalid oid: %s\n", __func__, file.oid.c_str());
+                    continue;
+                }
+
+                file.url = endpoint + repo_id + "/resolve/" + commit + "/" + file.path;
+
+                fs::path final_path = commit_path / file.path;
+                file.final_path = final_path.string();
+
+                if (!file.oid.empty() && !fs::exists(final_path)) {
+                    fs::path local_path = blobs_path / file.oid;
+                    file.local_path = local_path.string();
+                } else {
+                    file.local_path = file.final_path;
+                }
+
+                files.push_back(file);
+            }
         }
+
     } catch (const nl::json::exception & e) {
         LOG_ERR("%s: JSON error: %s\n", __func__, e.what());
     } catch (const std::exception & e) {
@@ -372,6 +437,13 @@ hf_files get_repo_files(const std::string & repo_id,
     return files;
 }
 
+
+// Backward-compatible overload defaulting to HF
+static hf_files get_repo_files(const std::string & repo_id,
+                               const std::string & token) {
+    return get_repo_files(repo_id, token, LLAMA_REPO_TYPE_HF);
+}
+
 static std::string get_cached_ref(const fs::path & repo_path) {
     fs::path refs_path = repo_path / "refs";
     if (!fs::is_directory(refs_path)) {
@@ -720,7 +792,7 @@ void migrate_old_cache_to_hf_cache(const std::string & token, bool offline) {
         }
 
         auto repo_id = owner + "/" + repo;
-        auto files = get_repo_files(repo_id, token);
+        auto files = get_repo_files(repo_id, token, LLAMA_REPO_TYPE_HF);
 
         if (files.empty()) {
             LOG_WRN("%s: could not get repo files for %s, skipping\n", __func__, repo_id.c_str());
diff --git a/common/hf-cache.h b/common/hf-cache.h
index 9e46f97743..86daa208bf 100644
--- a/common/hf-cache.h
+++ b/common/hf-cache.h
@@ -5,6 +5,8 @@
 
 // Ref: https://huggingface.co/docs/hub/local-cache.md
 
+#include "common.h" // for llama_repo_type
+
 namespace hf_cache {
 
 struct hf_file {
@@ -22,7 +24,8 @@ using hf_files = std::vector<hf_file>;
 // Get files from HF API
 hf_files get_repo_files(
     const std::string & repo_id,
-    const std::string & token
+    const std::string & token,
+    llama_repo_type type = LLAMA_REPO_TYPE_HF
 );
 
 hf_files get_cached_files(const std::string & repo_id = {});
diff --git a/tools/cli/README.md b/tools/cli/README.md
index de0b780409..6edb0472d5 100644
--- a/tools/cli/README.md
+++ b/tools/cli/README.md
@@ -89,6 +89,9 @@
 | `-hfv, -hfrv, --hf-repo-v <user>/<model>[:quant]` | Hugging Face model repository for the vocoder model (default: unused)<br/>(env: LLAMA_ARG_HF_REPO_V) |
 | `-hffv, --hf-file-v FILE` | Hugging Face model file for the vocoder model (default: unused)<br/>(env: LLAMA_ARG_HF_FILE_V) |
 | `-hft, --hf-token TOKEN` | Hugging Face access token (default: value from HF_TOKEN environment variable)<br/>(env: HF_TOKEN) |
+| `-ms, -msr, --ms-repo <user>/<model>[:quant]` | ModelScope model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.<br/>mmproj is also downloaded automatically if available. to disable, add --no-mmproj<br/>example: Qwen/Qwen3-4B-GGUF:Q4_K_M<br/>(default: unused)<br/>(env: LLAMA_ARG_MS_REPO) |
+| `-msf, --ms-file FILE` | ModelScope model file. If specified, it will override the quant in --ms-repo (default: unused)<br/>(env: LLAMA_ARG_MS_FILE) |
+| `-mst, --ms-token TOKEN` | ModelScope access token (default: value from MS_TOKEN environment variable)<br/>(env: MS_TOKEN) |
 | `--log-disable` | Log disable |
 | `--log-file FNAME` | Log to file<br/>(env: LLAMA_LOG_FILE) |
 | `--log-colors [on\|off\|auto]` | Set colored logging ('on', 'off', or 'auto', default: 'auto')<br/>'auto' enables colors when output is to a terminal<br/>(env: LLAMA_LOG_COLORS) |