From 2a2510827fe2a5956b220774329df227a428bd06 Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Wed, 6 May 2026 16:35:35 +0800
Subject: [PATCH 01/17] 1st add

---
 .aitk/configs/checks.json                     |  8 +-
 .aitk/configs/model_list.json                 |  3 +-
 .aitk/docs/guide/ModelList.md                 |  2 +-
 .../requirements/WebGPU/WebGPU_py3.12.13.txt  |  0
 .../aitk/deepseek_webgpu.json                 | 73 +++++++++++++++++++
 .../aitk/deepseek_webgpu.json.config          | 53 ++++++++++++++
 .../aitk/info.yml                             |  3 +
 .../aitk/model_project.config                 |  4 +
 8 files changed, 140 insertions(+), 6 deletions(-)
 create mode 100644 .aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
 create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json
 create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json.config

diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json
index 22f96301f..ee3a84873 100644
--- a/.aitk/configs/checks.json
+++ b/.aitk/configs/checks.json
@@ -1,5 +1,5 @@
 {
-    "configCheck": 167,
+    "configCheck": 168,
     "copyCheck": 182,
     "extensionCheck": 2,
     "gitignoreCheck": 44,
@@ -8,9 +8,9 @@
     "licenseCheck": 41,
     "modelProjectCheck": 46,
     "oliveCheck": 88,
-    "oliveJsonCheck": 167,
-    "pathCheck": 1423,
+    "oliveJsonCheck": 168,
+    "pathCheck": 1426,
     "requirementsCheck": 37,
     "templateCheck": 3,
-    "venvRequirementsCheck": 17
+    "venvRequirementsCheck": 18
 }
diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json
index 11abf6887..afa6e2615 100644
--- a/.aitk/configs/model_list.json
+++ b/.aitk/configs/model_list.json
@@ -64,7 +64,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
diff --git a/.aitk/docs/guide/ModelList.md b/.aitk/docs/guide/ModelList.md
index 8cc8e7801..034fc5429 100644
--- a/.aitk/docs/guide/ModelList.md
+++ b/.aitk/docs/guide/ModelList.md
@@ -5,7 +5,7 @@
 | Model Name | Supported Runtimes |
 |------------|--------------------|
 | [Deepseek R1 Distill Llama 8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) | [Intel CPU](../../../deepseek-ai-DeepSeek-R1-Distill-Llama-8B/aitk/deepseek_ov_config.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Llama-8B/aitk/deepseek_ov_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Llama-8B/aitk/deepseek_ov_npu_config.json) |
-| [Deepseek R1 Distill Qwen 1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | [Qualcomm NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json), [Qualcomm GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_gpu_config.json), [AMD NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json), [Intel CPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json), [DirectML](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json) |
+| [Deepseek R1 Distill Qwen 1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | [Qualcomm NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json), [Qualcomm GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_gpu_config.json), [AMD NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json), [Intel CPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json), [DirectML](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json), [WebGPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json) |
 | [Deepseek R1 Distill Qwen 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B) | [NVIDIA TensorRT for RTX](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-14B/aitk/deepseek_trtrtx.json), [Intel CPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-14B/aitk/deepseek_ov_config.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-14B/aitk/deepseek_ov_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-14B/aitk/deepseek_ov_npu_config.json) |
 | [Deepseek R1 Distill Qwen 7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | [AMD NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_trtrtx.json), [Intel CPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_ov_config.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_ov_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_ov_npu_config.json) |
 | [Llama 3.1 8B Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) | [Qualcomm NPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_qnn_config.json), [AMD NPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_trtrtx_config.json), [Intel CPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_ov_gpu_config.json), [Intel GPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_ov_gpu_config.json), [Intel NPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_ov_config.json), [DirectML](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_dml_config.json) |
diff --git a/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt b/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
new file mode 100644
index 000000000..e69de29bb
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json
new file mode 100644
index 000000000..a54f42c81
--- /dev/null
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json
@@ -0,0 +1,73 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+        "load_kwargs": {
+            "torch_dtype": "float16"
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "s": {
+            "type": "SelectiveMixedPrecision",
+            "algorithm": "kld_gradient",
+            "bits": 4,
+            "high_bits": 8,
+            "ratio": 0.65,
+            "sym": false,
+            "group_size": 32
+        },
+        "g": {
+            "type": "gptq",
+            "bits": 4,
+            "sym": false,
+            "group_size": 32
+        },
+        "r": {
+            "type": "rtn",
+            "bits": 8,
+            "sym": false,
+            "group_size": 32,
+            "lm_head": true,
+            "embeds": true,
+            "overrides": {
+                "lm_head": {
+                    "bits": 8
+                },
+                "model.embed_tokens": {
+                    "bits": 8
+                }
+            }
+        },
+        "m": {
+            "type": "ModelBuilder",
+            "precision": "int4"
+        },
+        "t": {
+            "type": "GraphSurgeries",
+            "surgeries": [
+                {
+                    "surgeon": "TieWordEmbeddings"
+                }
+            ]
+        }
+    },
+    "target": "local_system",
+    "log_severity_level": 0,
+    "output_dir": "model/deepseek",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json.config
new file mode 100644
index 000000000..3810be3cd
--- /dev/null
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json.config
@@ -0,0 +1,53 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "isLLM": true,
+    "evalRuntime": "WebGPU",
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "m"
+    },
+    "isGPUSuggested": true,
+    "runtimeOverwrite": {
+        "autoGenerated": true,
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.m.precision"
+        }
+    ],
+    "optimizationDefault": "int4",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.m",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
index dc61befc4..d211dd33c 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
@@ -39,6 +39,9 @@ recipes:
         isGPURequired: true
         runtimeOverwrite:
           executeEp: NvTensorRTRTXExecutionProvider
+    - file: "deepseek_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config
index c997fb66b..495bf4127 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "deepseek_qnn_gpu_config.json",
             "templateName": "deepseek_qnn_gpu_config"
+        },
+        {
+            "file": "deepseek_webgpu.json",
+            "templateName": "deepseek_webgpu"
         }
     ],
     "modelInfo": {

From 8f509a6869bc2412d548a145a5ace3d1356572cb Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Wed, 6 May 2026 16:56:27 +0800
Subject: [PATCH 02/17] init

---
 .aitk/requirements/WebGPU/WebGPU_py3.12.13.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt b/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
index e69de29bb..aa1a81919 100644
--- a/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
+++ b/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
@@ -0,0 +1,5 @@
+olive-ai==0.12.1
+accelerate
+datasets
+onnxruntime-genai
+transformers==4.52.4

From c29ce79274ef3204129505e44f223f70abe4aadd Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Wed, 6 May 2026 17:07:13 +0800
Subject: [PATCH 03/17] add generator

Co-authored-by: Copilot <copilot@github.com>
---
 .aitk/configs/checks.json                     |  2 +-
 .aitk/scripts/project_processor.py            |  3 ++
 .aitk/scripts/sanitize/generator_webgpu.py    | 35 +++++++++++++++
 .../aitk/deepseek_webgpu.json.config          | 44 ++++++++++++++++++-
 .../aitk/info.yml                             |  3 ++
 5 files changed, 85 insertions(+), 2 deletions(-)
 create mode 100644 .aitk/scripts/sanitize/generator_webgpu.py

diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json
index ee3a84873..9cacddb66 100644
--- a/.aitk/configs/checks.json
+++ b/.aitk/configs/checks.json
@@ -9,7 +9,7 @@
     "modelProjectCheck": 46,
     "oliveCheck": 88,
     "oliveJsonCheck": 168,
-    "pathCheck": 1426,
+    "pathCheck": 1428,
     "requirementsCheck": 37,
     "templateCheck": 3,
     "venvRequirementsCheck": 18
diff --git a/.aitk/scripts/project_processor.py b/.aitk/scripts/project_processor.py
index 74c2c66f8..85b8289fe 100644
--- a/.aitk/scripts/project_processor.py
+++ b/.aitk/scripts/project_processor.py
@@ -13,6 +13,7 @@
 from sanitize.generator_intel import generator_intel
 from sanitize.generator_qnn import generator_qnn
 from sanitize.generator_trtrtx import generator_trtrtx
+from sanitize.generator_webgpu import generator_webgpu
 from sanitize.model_info import ModelInfo, ModelList
 from sanitize.project_config import ModelInfoProject, ModelProjectConfig, WorkflowItem
 from sanitize.utils import GlobalVars, isLLM_by_id, open_ex
@@ -183,6 +184,8 @@ def convert_yaml_to_project_config(
             generator_trtrtx(id, recipe, yml_file.parent, modelList)
         elif recipe.get("ep") == EPNames.DmlExecutionProvider.value:
             generator_dml(id, recipe, yml_file.parent, modelList)
+        elif recipe.get("ep") == EPNames.WebGpuExecutionProvider.value:
+            generator_webgpu(id, recipe, yml_file.parent, modelList)
         runtimes = get_runtime(recipe)
         for runtime in runtimes:
             modelSummary.recipes.setdefault(runtime, []).append(file)
diff --git a/.aitk/scripts/sanitize/generator_webgpu.py b/.aitk/scripts/sanitize/generator_webgpu.py
new file mode 100644
index 000000000..80557f841
--- /dev/null
+++ b/.aitk/scripts/sanitize/generator_webgpu.py
@@ -0,0 +1,35 @@
+from pathlib import Path
+
+from .generator_common import create_model_parameter, set_optimization_path
+from .generator_dml import generate_quantization_config
+from .model_info import ModelList
+from .model_parameter import ModelParameter
+from .utils import isLLM_by_id
+
+def generator_webgpu(id: str, recipe, folder: Path, modelList: ModelList):
+    aitk = recipe.get("aitk", {})
+    auto = aitk.get("auto", True)
+    if not auto:
+        return
+
+    isLLM = isLLM_by_id(id)
+    file = recipe.get("file")
+    configFile = folder / file
+
+    if not isLLM:
+        modelParameter = ModelParameter.Read(str(configFile) + ".config")
+        set_optimization_path(modelParameter, str(configFile))
+        modelParameter.writeIfChanged()
+        return
+
+    name = "Convert to WebGPU"
+
+    parameter = create_model_parameter(aitk, name, configFile)
+    parameter.isLLM = isLLM
+
+    quantize = generate_quantization_config(configFile, parameter)
+    if quantize:
+        parameter.sections.append(quantize)
+
+    parameter.writeIfChanged()
+    print(f"\tGenerated WebGPU configuration for {file}")
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json.config
index 3810be3cd..8c3a740ee 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json.config
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json.config
@@ -7,7 +7,6 @@
         "autoGenerated": true,
         "useModelBuilder": "m"
     },
-    "isGPUSuggested": true,
     "runtimeOverwrite": {
         "autoGenerated": true,
         "executeRequirement": "WebGPU/WebGPU_py3.12.13"
@@ -48,6 +47,49 @@
                 ],
                 "readOnly": true
             }
+        },
+        {
+            "autoGenerated": true,
+            "name": "Optimization",
+            "phase": "Quantization",
+            "parameters": [
+                {
+                    "autoGenerated": true,
+                    "name": "Precision",
+                    "description": "Precision of model",
+                    "type": "enum",
+                    "displayNames": [
+                        "Int4",
+                        "Bf16",
+                        "Fp16",
+                        "Fp32"
+                    ],
+                    "displayType": "RadioGroup",
+                    "path": "passes.m.precision",
+                    "values": [
+                        "int4",
+                        "bf16",
+                        "fp16",
+                        "fp32"
+                    ],
+                    "template": {
+                        "path": "passes.m.precision",
+                        "template": "ModelBuilderPrecision"
+                    }
+                }
+            ],
+            "disableToggleGeneration": true,
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Optimize model",
+                "type": "bool",
+                "path": "passes.m",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
         }
     ]
 }
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
index d211dd33c..4f025c44a 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
@@ -42,6 +42,9 @@ recipes:
     - file: "deepseek_webgpu.json"
       device: gpu
       ep: WebGpuExecutionProvider
+      aitk:
+        requirements: WebGPU/WebGPU_py3.12.13
+        evalRuntime: WebGPU
 aitk:
     modelInfo:
         id: "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

From 9a9d79ad386bb279fcb551a1f998273552b85997 Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Thu, 7 May 2026 11:14:07 +0800
Subject: [PATCH 04/17] freeze

---
 .../requirements/WebGPU/WebGPU_py3.12.13.txt  | 80 ++++++++++++++++++-
 1 file changed, 77 insertions(+), 3 deletions(-)

diff --git a/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt b/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
index aa1a81919..c074a2e6b 100644
--- a/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
+++ b/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
@@ -1,5 +1,79 @@
+--extra-index-url https://download.pytorch.org/whl/cu128
+accelerate==1.13.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.5
+aiosignal==1.4.0
+alembic==1.18.4
+annotated-types==0.7.0
+anyio==4.13.0
+attrs==26.1.0
+certifi==2026.4.22
+charset-normalizer==3.4.7
+colorama==0.4.6
+colorlog==6.10.1
+datasets==4.8.5
+dill==0.4.1
+filelock==3.29.0
+flatbuffers==25.12.19
+frozenlist==1.8.0
+fsspec==2026.2.0
+greenlet==3.5.0
+h11==0.16.0
+hf-xet==1.5.0
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.36.2
+idna==3.13
+importlib-metadata==8.7.1
+jinja2==3.1.6
+lightning-utilities==0.15.3
+mako==1.3.12
+markupsafe==3.0.3
+ml-dtypes==0.5.4
+mpmath==1.3.0
+multidict==6.7.1
+multiprocess==0.70.19
+networkx==3.6.1
+numpy==2.4.4
 olive-ai==0.12.1
-accelerate
-datasets
-onnxruntime-genai
+onnx==1.21.0
+onnx-ir==0.2.1
+onnxruntime-genai==0.13.2
+onnxruntime-webgpu==1.25.1
+onnxscript==0.7.0
+opentelemetry-api==1.41.1
+opentelemetry-sdk==1.41.1
+opentelemetry-semantic-conventions==0.62b1
+optuna==4.8.0
+packaging==26.2
+pandas==3.0.2
+prompt-toolkit==3.0.52
+propcache==0.4.1
+protobuf==7.34.1
+psutil==7.2.2
+pyarrow==24.0.0
+pydantic==2.13.3
+pydantic-core==2.46.3
+python-dateutil==2.9.0.post0
+pyyaml==6.0.3
+questionary==2.1.1
+regex==2026.4.4
+requests==2.33.1
+safetensors==0.7.0
+setuptools==81.0.0
+six==1.17.0
+sqlalchemy==2.0.49
+sympy==1.14.0
+tokenizers==0.21.4
+torch==2.8.0+cu128
+torchmetrics==1.7.1
+tqdm==4.67.3
 transformers==4.52.4
+typing-extensions==4.15.0
+typing-inspection==0.4.2
+tzdata==2026.2
+urllib3==2.6.3
+wcwidth==0.7.0
+xxhash==3.7.0
+yarl==1.23.0
+zipp==3.23.1

From 628a24721a9e23dc22261a7c49662ba05789a03f Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Thu, 7 May 2026 11:32:34 +0800
Subject: [PATCH 05/17] add intel bert

Co-authored-by: Copilot <copilot@github.com>
---
 .aitk/configs/checks.json                     |  6 +--
 .aitk/configs/model_list.json                 |  3 +-
 .aitk/docs/guide/ModelList.md                 |  2 +-
 .../aitk/bert_webgpu.json                     | 35 ++++++++++++++
 .../aitk/bert_webgpu.json.config              | 47 +++++++++++++++++++
 intel-bert-base-uncased-mrpc/aitk/info.yml    |  3 ++
 .../aitk/model_project.config                 |  4 ++
 7 files changed, 95 insertions(+), 5 deletions(-)
 create mode 100644 intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json
 create mode 100644 intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config

diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json
index 9cacddb66..fc2de1dd9 100644
--- a/.aitk/configs/checks.json
+++ b/.aitk/configs/checks.json
@@ -1,5 +1,5 @@
 {
-    "configCheck": 168,
+    "configCheck": 169,
     "copyCheck": 182,
     "extensionCheck": 2,
     "gitignoreCheck": 44,
@@ -8,8 +8,8 @@
     "licenseCheck": 41,
     "modelProjectCheck": 46,
     "oliveCheck": 88,
-    "oliveJsonCheck": 168,
-    "pathCheck": 1428,
+    "oliveJsonCheck": 169,
+    "pathCheck": 1431,
     "requirementsCheck": 37,
     "templateCheck": 3,
     "venvRequirementsCheck": 18
diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json
index afa6e2615..1c7db6d3f 100644
--- a/.aitk/configs/model_list.json
+++ b/.aitk/configs/model_list.json
@@ -140,7 +140,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
diff --git a/.aitk/docs/guide/ModelList.md b/.aitk/docs/guide/ModelList.md
index 034fc5429..f05df07c1 100644
--- a/.aitk/docs/guide/ModelList.md
+++ b/.aitk/docs/guide/ModelList.md
@@ -35,7 +35,7 @@
 | Model Name | Supported Runtimes |
 |------------|--------------------|
 | [Bert Base Multilingual Cased](https://huggingface.co/google-bert/bert-base-multilingual-cased) | [Qualcomm NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_qnn.json), [Qualcomm GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qnn_gpu.json), [AMD NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_amd.json), [AMD GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_migraphx.json), [NVIDIA TensorRT for RTX](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_trtrtx.json), [Intel CPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [Intel GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [Intel NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [DirectML](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_dml.json) |
-| [Bert Base Uncased Mrpc](https://huggingface.co/Intel/bert-base-uncased-mrpc) | [Qualcomm NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qdq_qnn.json), [Qualcomm GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qnn_gpu.json), [AMD NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qdq_amd.json), [AMD GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_migraphx.json), [NVIDIA TensorRT for RTX](../../../intel-bert-base-uncased-mrpc/aitk/bert_trtrtx.json), [Intel CPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [Intel GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [Intel NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [DirectML](../../../intel-bert-base-uncased-mrpc/aitk/bert_dml.json) |
+| [Bert Base Uncased Mrpc](https://huggingface.co/Intel/bert-base-uncased-mrpc) | [Qualcomm NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qdq_qnn.json), [Qualcomm GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qnn_gpu.json), [AMD NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qdq_amd.json), [AMD GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_migraphx.json), [NVIDIA TensorRT for RTX](../../../intel-bert-base-uncased-mrpc/aitk/bert_trtrtx.json), [Intel CPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [Intel GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [Intel NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [DirectML](../../../intel-bert-base-uncased-mrpc/aitk/bert_dml.json), [WebGPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json) |
 | [Chinese Clip Vit Base Patch16](https://huggingface.co/OFA-Sys/chinese-clip-vit-base-patch16) | [Intel CPU](../../../OFA-Sys-chinese-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel GPU](../../../OFA-Sys-chinese-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel NPU](../../../OFA-Sys-chinese-clip-vit-base-patch16/aitk/openai_clip_ov.json) |
 | [Clip Vit B 32 Laion2B S34B B79K](https://huggingface.co/laion/CLIP-ViT-B-32-laion2B-s34B-b79K) | [Qualcomm NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qnn.json), [Qualcomm GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qnn_gpu.json), [AMD NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd.json), [AMD GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx.json), [Intel CPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [Intel GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [Intel NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [DirectML](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml.json) |
 | [Clip Vit Base Patch16](https://huggingface.co/openai/clip-vit-base-patch16) | [Qualcomm NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qnn.json), [Qualcomm GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qnn_gpu.json), [AMD NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch16/aitk/openai_clip_dml.json) |
diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json
new file mode 100644
index 000000000..96ede55b4
--- /dev/null
+++ b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json
@@ -0,0 +1,35 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "Intel/bert-base-uncased-mrpc",
+        "task": "text-classification"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "peephole": {
+            "type": "OnnxPeepholeOptimizer",
+            "save_as_external_data": true
+        }
+    },
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_dir": "model/bert_webgpu",
+    "evaluate_input_model": false
+}
diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config
new file mode 100644
index 000000000..4575e8895
--- /dev/null
+++ b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config
@@ -0,0 +1,47 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "evalRuntime": "WebGPU",
+    "runtimeOverwrite": {
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.conversion",
+            "name": "fp32"
+        }
+    ],
+    "optimizationDefault": "fp32",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/intel-bert-base-uncased-mrpc/aitk/info.yml b/intel-bert-base-uncased-mrpc/aitk/info.yml
index db957fe08..fb250bcac 100644
--- a/intel-bert-base-uncased-mrpc/aitk/info.yml
+++ b/intel-bert-base-uncased-mrpc/aitk/info.yml
@@ -29,6 +29,9 @@ recipes:
     - file: "bert_qnn_gpu.json"
       device: gpu
       ep: QNNExecutionProvider
+    - file: "bert_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/Intel/bert-base-uncased-mrpc"
diff --git a/intel-bert-base-uncased-mrpc/aitk/model_project.config b/intel-bert-base-uncased-mrpc/aitk/model_project.config
index 5b21e504f..60a31234e 100644
--- a/intel-bert-base-uncased-mrpc/aitk/model_project.config
+++ b/intel-bert-base-uncased-mrpc/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "bert_qnn_gpu.json",
             "templateName": "bert_qnn_gpu"
+        },
+        {
+            "file": "bert_webgpu.json",
+            "templateName": "bert_webgpu"
         }
     ],
     "modelInfo": {

From 87bd35e95f4a17a9d778e9981985ab2fc89914f2 Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Thu, 7 May 2026 11:39:36 +0800
Subject: [PATCH 06/17] update

---
 .aitk/requirements/WebGPU/WebGPU_py3.12.13.txt | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt b/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
index c074a2e6b..3768b67fa 100644
--- a/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
+++ b/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
@@ -38,7 +38,8 @@ numpy==2.4.4
 olive-ai==0.12.1
 onnx==1.21.0
 onnx-ir==0.2.1
-onnxruntime-genai==0.13.2
+# install it separatly with no deps as it will install onnxruntime to overwrite onnxruntime-webgpu
+# uvpip:install onnxruntime-genai==0.13.2 --no-deps;post
 onnxruntime-webgpu==1.25.1
 onnxscript==0.7.0
 opentelemetry-api==1.41.1
@@ -65,8 +66,8 @@ six==1.17.0
 sqlalchemy==2.0.49
 sympy==1.14.0
 tokenizers==0.21.4
-torch==2.8.0+cu128
-torchmetrics==1.7.1
+torch==2.11.0+cu128
+torchmetrics==1.9.0
 tqdm==4.67.3
 transformers==4.52.4
 typing-extensions==4.15.0

From 449bd6227cbcfc91fcc5c820826f3b3568b053c0 Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Thu, 7 May 2026 11:54:50 +0800
Subject: [PATCH 07/17] add more

Co-authored-by: Copilot <copilot@github.com>
---
 .../requirements/WebGPU/WebGPU_py3.12.13.txt  |  2 +
 .../aitk/bert_webgpu.json                     | 35 +++++++++++++
 .../aitk/bert_webgpu.json.config              | 47 +++++++++++++++++
 .../aitk/info.yml                             |  3 ++
 google-vit-base-patch16-224/aitk/info.yml     |  3 ++
 .../aitk/vit_webgpu.json                      | 51 +++++++++++++++++++
 .../aitk/vit_webgpu.json.config               | 47 +++++++++++++++++
 .../aitk/bert_webgpu.json                     |  5 +-
 microsoft-resnet-50/aitk/info.yml             |  3 ++
 microsoft-resnet-50/aitk/resnet_webgpu.json   | 51 +++++++++++++++++++
 .../aitk/resnet_webgpu.json.config            | 47 +++++++++++++++++
 11 files changed, 293 insertions(+), 1 deletion(-)
 create mode 100644 google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json
 create mode 100644 google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json.config
 create mode 100644 google-vit-base-patch16-224/aitk/vit_webgpu.json
 create mode 100644 google-vit-base-patch16-224/aitk/vit_webgpu.json.config
 create mode 100644 microsoft-resnet-50/aitk/resnet_webgpu.json
 create mode 100644 microsoft-resnet-50/aitk/resnet_webgpu.json.config

diff --git a/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt b/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
index 3768b67fa..3b4b6a6d5 100644
--- a/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
+++ b/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
@@ -40,6 +40,7 @@ onnx==1.21.0
 onnx-ir==0.2.1
 # install it separatly with no deps as it will install onnxruntime to overwrite onnxruntime-webgpu
 # uvpip:install onnxruntime-genai==0.13.2 --no-deps;post
+onnxoptimizer==0.4.2
 onnxruntime-webgpu==1.25.1
 onnxscript==0.7.0
 opentelemetry-api==1.41.1
@@ -65,6 +66,7 @@ setuptools==81.0.0
 six==1.17.0
 sqlalchemy==2.0.49
 sympy==1.14.0
+tabulate==0.10.0
 tokenizers==0.21.4
 torch==2.11.0+cu128
 torchmetrics==1.9.0
diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json b/google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json
new file mode 100644
index 000000000..04815ae4b
--- /dev/null
+++ b/google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json
@@ -0,0 +1,35 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "google-bert/bert-base-multilingual-cased",
+        "task": "feature-extraction"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "peephole": {
+            "type": "OnnxPeepholeOptimizer",
+            "save_as_external_data": true
+        }
+    },
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_dir": "model/bert_webgpu",
+    "evaluate_input_model": false
+}
diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json.config b/google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json.config
new file mode 100644
index 000000000..2a8bbc29b
--- /dev/null
+++ b/google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json.config
@@ -0,0 +1,47 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "evalRuntime": "WebGPU",
+    "runtimeOverwrite": {
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.conversion",
+            "name": "fp32"
+        }
+    ],
+    "optimizationDefault": "fp32",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/google-bert-bert-base-multilingual-cased/aitk/info.yml b/google-bert-bert-base-multilingual-cased/aitk/info.yml
index cdfa911f4..8f546a03a 100644
--- a/google-bert-bert-base-multilingual-cased/aitk/info.yml
+++ b/google-bert-bert-base-multilingual-cased/aitk/info.yml
@@ -26,6 +26,9 @@ recipes:
     - file: "bert-base-multilingual-cased_qnn_gpu.json"
       device: gpu
       ep: QNNExecutionProvider
+    - file: "bert_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/google-bert/bert-base-multilingual-cased"
diff --git a/google-vit-base-patch16-224/aitk/info.yml b/google-vit-base-patch16-224/aitk/info.yml
index 5a4186142..c1f21f260 100644
--- a/google-vit-base-patch16-224/aitk/info.yml
+++ b/google-vit-base-patch16-224/aitk/info.yml
@@ -26,6 +26,9 @@ recipes:
     - file: "vit-base-patch16-224_qnn_gpu.json"
       device: gpu
       ep: QNNExecutionProvider
+    - file: "vit_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/google/vit-base-patch16-224"
diff --git a/google-vit-base-patch16-224/aitk/vit_webgpu.json b/google-vit-base-patch16-224/aitk/vit_webgpu.json
new file mode 100644
index 000000000..1b9f439e7
--- /dev/null
+++ b/google-vit-base-patch16-224/aitk/vit_webgpu.json
@@ -0,0 +1,51 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "google/vit-base-patch16-224",
+        "task": "image-classification",
+        "io_config": {
+            "input_names": [
+                "pixel_values"
+            ],
+            "input_shapes": [
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ]
+            ],
+            "output_names": [
+                "output"
+            ]
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "peephole": {
+            "type": "OnnxPeepholeOptimizer",
+            "save_as_external_data": true
+        }
+    },
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_dir": "model/vit_webgpu",
+    "evaluate_input_model": false
+}
diff --git a/google-vit-base-patch16-224/aitk/vit_webgpu.json.config b/google-vit-base-patch16-224/aitk/vit_webgpu.json.config
new file mode 100644
index 000000000..2a8bbc29b
--- /dev/null
+++ b/google-vit-base-patch16-224/aitk/vit_webgpu.json.config
@@ -0,0 +1,47 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "evalRuntime": "WebGPU",
+    "runtimeOverwrite": {
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.conversion",
+            "name": "fp32"
+        }
+    ],
+    "optimizationDefault": "fp32",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json
index 96ede55b4..f67676762 100644
--- a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json
+++ b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json
@@ -2,7 +2,10 @@
     "input_model": {
         "type": "HfModel",
         "model_path": "Intel/bert-base-uncased-mrpc",
-        "task": "text-classification"
+        "task": "text-classification",
+        "load_kwargs": {
+            "attn_implementation": "eager"
+        }
     },
     "systems": {
         "local_system": {
diff --git a/microsoft-resnet-50/aitk/info.yml b/microsoft-resnet-50/aitk/info.yml
index fe9387cc5..f558f572e 100644
--- a/microsoft-resnet-50/aitk/info.yml
+++ b/microsoft-resnet-50/aitk/info.yml
@@ -26,6 +26,9 @@ recipes:
     - file: "resnet_qnn_gpu.json"
       device: gpu
       ep: QNNExecutionProvider
+    - file: "resnet_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/microsoft/resnet-50"
diff --git a/microsoft-resnet-50/aitk/resnet_webgpu.json b/microsoft-resnet-50/aitk/resnet_webgpu.json
new file mode 100644
index 000000000..1c44d2f51
--- /dev/null
+++ b/microsoft-resnet-50/aitk/resnet_webgpu.json
@@ -0,0 +1,51 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "microsoft/resnet-50",
+        "task": "image-classification",
+        "io_config": {
+            "input_names": [
+                "pixel_values"
+            ],
+            "input_shapes": [
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ]
+            ],
+            "output_names": [
+                "logits"
+            ]
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "peephole": {
+            "type": "OnnxPeepholeOptimizer",
+            "save_as_external_data": true
+        }
+    },
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_dir": "model/resnet_webgpu",
+    "evaluate_input_model": false
+}
diff --git a/microsoft-resnet-50/aitk/resnet_webgpu.json.config b/microsoft-resnet-50/aitk/resnet_webgpu.json.config
new file mode 100644
index 000000000..2a8bbc29b
--- /dev/null
+++ b/microsoft-resnet-50/aitk/resnet_webgpu.json.config
@@ -0,0 +1,47 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "evalRuntime": "WebGPU",
+    "runtimeOverwrite": {
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.conversion",
+            "name": "fp32"
+        }
+    ],
+    "optimizationDefault": "fp32",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}

From c46cb4df8e5c0762c0ce1e77eda49e8df0b63c8e Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Thu, 7 May 2026 12:00:17 +0800
Subject: [PATCH 08/17] clip

Co-authored-by: Copilot <copilot@github.com>
---
 .../aitk/_copy.json.config                    | 15 ++++
 .../aitk/info.yml                             |  3 +
 openai-clip-vit-base-patch16/aitk/info.yml    |  3 +
 .../aitk/openai_clip_webgpu.json              | 90 +++++++++++++++++++
 .../aitk/openai_clip_webgpu.json.config       | 47 ++++++++++
 .../aitk/_copy.json.config                    | 15 ++++
 openai-clip-vit-base-patch32/aitk/info.yml    |  3 +
 7 files changed, 176 insertions(+)
 create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json
 create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config

diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config
index f6ce51a00..c3c112dba 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config
@@ -117,6 +117,21 @@
             "dst": "laion_clip_dml.json.config",
             "replacements": []
         },
+        {
+            "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json",
+            "dst": "laion_clip_webgpu.json",
+            "replacements": [
+                {
+                    "find": "openai/clip-vit-base-patch16",
+                    "replace": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
+                }
+            ]
+        },
+        {
+            "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config",
+            "dst": "laion_clip_webgpu.json.config",
+            "replacements": []
+        },
         {
             "src": "laion_clip_dml.json",
             "dst": "laion_clip_migraphx.json",
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml
index ffde088d1..44669b315 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml
@@ -26,6 +26,9 @@ recipes:
     - file: "laion_clip_qnn_gpu.json"
       device: gpu
       ep: QNNExecutionProvider
+    - file: "laion_clip_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
diff --git a/openai-clip-vit-base-patch16/aitk/info.yml b/openai-clip-vit-base-patch16/aitk/info.yml
index 633a5133f..141dd7e9b 100644
--- a/openai-clip-vit-base-patch16/aitk/info.yml
+++ b/openai-clip-vit-base-patch16/aitk/info.yml
@@ -26,6 +26,9 @@ recipes:
     - file: "openai_clip_qnn_gpu.json"
       device: gpu
       ep: QNNExecutionProvider
+    - file: "openai_clip_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/openai/clip-vit-base-patch16"
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json
new file mode 100644
index 000000000..e0f5adc4e
--- /dev/null
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json
@@ -0,0 +1,90 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "openai/clip-vit-base-patch16",
+        "task": "zero-shot-image-classification",
+        "load_kwargs": {
+            "attn_implementation": "eager"
+        },
+        "io_config": {
+            "input_names": [
+                "input_ids",
+                "pixel_values",
+                "attention_mask"
+            ],
+            "input_shapes": [
+                [
+                    10,
+                    77
+                ],
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ],
+                [
+                    10,
+                    77
+                ]
+            ],
+            "input_types": [
+                "int64",
+                "float32",
+                "int64"
+            ],
+            "output_names": [
+                "logits_per_image",
+                "logits_per_text",
+                "text_embeds",
+                "image_embeds"
+            ],
+            "output_shapes": [
+                [
+                    1,
+                    10
+                ],
+                [
+                    10,
+                    1
+                ],
+                [
+                    10,
+                    512
+                ],
+                [
+                    1,
+                    512
+                ]
+            ]
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "peephole": {
+            "type": "OnnxPeepholeOptimizer",
+            "save_as_external_data": true
+        }
+    },
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_dir": "model/clip_webgpu",
+    "evaluate_input_model": false
+}
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config
new file mode 100644
index 000000000..2a8bbc29b
--- /dev/null
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config
@@ -0,0 +1,47 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "evalRuntime": "WebGPU",
+    "runtimeOverwrite": {
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.conversion",
+            "name": "fp32"
+        }
+    ],
+    "optimizationDefault": "fp32",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/openai-clip-vit-base-patch32/aitk/_copy.json.config b/openai-clip-vit-base-patch32/aitk/_copy.json.config
index c6c72ccee..005a6cb5d 100644
--- a/openai-clip-vit-base-patch32/aitk/_copy.json.config
+++ b/openai-clip-vit-base-patch32/aitk/_copy.json.config
@@ -109,6 +109,21 @@
             "dst": "openai_clip_dml.json.config",
             "replacements": []
         },
+        {
+            "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json",
+            "dst": "openai_clip_webgpu.json",
+            "replacements": [
+                {
+                    "find": "openai/clip-vit-base-patch16",
+                    "replace": "openai/clip-vit-base-patch32"
+                }
+            ]
+        },
+        {
+            "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config",
+            "dst": "openai_clip_webgpu.json.config",
+            "replacements": []
+        },
         {
             "src": "openai_clip_dml.json",
             "dst": "openai_clip_migraphx.json",
diff --git a/openai-clip-vit-base-patch32/aitk/info.yml b/openai-clip-vit-base-patch32/aitk/info.yml
index 1f9fa87dc..b7476660f 100644
--- a/openai-clip-vit-base-patch32/aitk/info.yml
+++ b/openai-clip-vit-base-patch32/aitk/info.yml
@@ -26,6 +26,9 @@ recipes:
     - file: "openai_clip_qnn_gpu.json"
       device: gpu
       ep: QNNExecutionProvider
+    - file: "openai_clip_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/openai/clip-vit-base-patch32"

From 76f99a7ac96cde004eb6636db92b4fedc9930dfd Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Thu, 7 May 2026 12:04:06 +0800
Subject: [PATCH 09/17] sanitize

---
 .aitk/configs/checks.json                     |  8 +-
 .aitk/configs/model_list.json                 | 18 ++--
 .aitk/docs/guide/ModelList.md                 | 12 +--
 .../aitk/bert_webgpu.json.config              | 94 +++++++++----------
 .../aitk/model_project.config                 |  4 +
 .../aitk/model_project.config                 |  4 +
 .../aitk/vit_webgpu.json.config               | 94 +++++++++----------
 .../aitk/laion_clip_webgpu.json               | 90 ++++++++++++++++++
 .../aitk/laion_clip_webgpu.json.config        | 47 ++++++++++
 .../aitk/model_project.config                 |  4 +
 microsoft-resnet-50/aitk/model_project.config |  4 +
 .../aitk/resnet_webgpu.json.config            | 94 +++++++++----------
 .../aitk/model_project.config                 |  4 +
 .../aitk/openai_clip_webgpu.json.config       | 94 +++++++++----------
 .../aitk/model_project.config                 |  4 +
 .../aitk/openai_clip_webgpu.json              | 90 ++++++++++++++++++
 .../aitk/openai_clip_webgpu.json.config       | 47 ++++++++++
 17 files changed, 508 insertions(+), 204 deletions(-)
 create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json
 create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json.config
 create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json
 create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json.config

diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json
index fc2de1dd9..6d2666e0f 100644
--- a/.aitk/configs/checks.json
+++ b/.aitk/configs/checks.json
@@ -1,6 +1,6 @@
 {
-    "configCheck": 169,
-    "copyCheck": 182,
+    "configCheck": 175,
+    "copyCheck": 186,
     "extensionCheck": 2,
     "gitignoreCheck": 44,
     "inferenceModelCheck": 25,
@@ -8,8 +8,8 @@
     "licenseCheck": 41,
     "modelProjectCheck": 46,
     "oliveCheck": 88,
-    "oliveJsonCheck": 169,
-    "pathCheck": 1431,
+    "oliveJsonCheck": 175,
+    "pathCheck": 1449,
     "requirementsCheck": 37,
     "templateCheck": 3,
     "venvRequirementsCheck": 18
diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json
index 1c7db6d3f..7768ed3d9 100644
--- a/.aitk/configs/model_list.json
+++ b/.aitk/configs/model_list.json
@@ -38,7 +38,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "CNN",
             "status": "Ready",
@@ -90,7 +91,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -115,7 +117,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -166,7 +169,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -215,7 +219,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -240,7 +245,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
diff --git a/.aitk/docs/guide/ModelList.md b/.aitk/docs/guide/ModelList.md
index f05df07c1..53fdec086 100644
--- a/.aitk/docs/guide/ModelList.md
+++ b/.aitk/docs/guide/ModelList.md
@@ -34,14 +34,14 @@
 
 | Model Name | Supported Runtimes |
 |------------|--------------------|
-| [Bert Base Multilingual Cased](https://huggingface.co/google-bert/bert-base-multilingual-cased) | [Qualcomm NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_qnn.json), [Qualcomm GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qnn_gpu.json), [AMD NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_amd.json), [AMD GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_migraphx.json), [NVIDIA TensorRT for RTX](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_trtrtx.json), [Intel CPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [Intel GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [Intel NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [DirectML](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_dml.json) |
+| [Bert Base Multilingual Cased](https://huggingface.co/google-bert/bert-base-multilingual-cased) | [Qualcomm NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_qnn.json), [Qualcomm GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qnn_gpu.json), [AMD NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_amd.json), [AMD GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_migraphx.json), [NVIDIA TensorRT for RTX](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_trtrtx.json), [Intel CPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [Intel GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [Intel NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [DirectML](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_dml.json), [WebGPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json) |
 | [Bert Base Uncased Mrpc](https://huggingface.co/Intel/bert-base-uncased-mrpc) | [Qualcomm NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qdq_qnn.json), [Qualcomm GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qnn_gpu.json), [AMD NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qdq_amd.json), [AMD GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_migraphx.json), [NVIDIA TensorRT for RTX](../../../intel-bert-base-uncased-mrpc/aitk/bert_trtrtx.json), [Intel CPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [Intel GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [Intel NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [DirectML](../../../intel-bert-base-uncased-mrpc/aitk/bert_dml.json), [WebGPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json) |
 | [Chinese Clip Vit Base Patch16](https://huggingface.co/OFA-Sys/chinese-clip-vit-base-patch16) | [Intel CPU](../../../OFA-Sys-chinese-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel GPU](../../../OFA-Sys-chinese-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel NPU](../../../OFA-Sys-chinese-clip-vit-base-patch16/aitk/openai_clip_ov.json) |
-| [Clip Vit B 32 Laion2B S34B B79K](https://huggingface.co/laion/CLIP-ViT-B-32-laion2B-s34B-b79K) | [Qualcomm NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qnn.json), [Qualcomm GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qnn_gpu.json), [AMD NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd.json), [AMD GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx.json), [Intel CPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [Intel GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [Intel NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [DirectML](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml.json) |
-| [Clip Vit Base Patch16](https://huggingface.co/openai/clip-vit-base-patch16) | [Qualcomm NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qnn.json), [Qualcomm GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qnn_gpu.json), [AMD NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch16/aitk/openai_clip_dml.json) |
-| [Clip Vit Base Patch32](https://huggingface.co/openai/clip-vit-base-patch32) | [Qualcomm NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qnn.json), [Qualcomm GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qnn_gpu.json), [AMD NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch32/aitk/openai_clip_dml.json) |
+| [Clip Vit B 32 Laion2B S34B B79K](https://huggingface.co/laion/CLIP-ViT-B-32-laion2B-s34B-b79K) | [Qualcomm NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qnn.json), [Qualcomm GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qnn_gpu.json), [AMD NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd.json), [AMD GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx.json), [Intel CPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [Intel GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [Intel NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [DirectML](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml.json), [WebGPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json) |
+| [Clip Vit Base Patch16](https://huggingface.co/openai/clip-vit-base-patch16) | [Qualcomm NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qnn.json), [Qualcomm GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qnn_gpu.json), [AMD NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch16/aitk/openai_clip_dml.json), [WebGPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json) |
+| [Clip Vit Base Patch32](https://huggingface.co/openai/clip-vit-base-patch32) | [Qualcomm NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qnn.json), [Qualcomm GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qnn_gpu.json), [AMD NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch32/aitk/openai_clip_dml.json), [WebGPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json) |
 | [Clip Vit Large Patch14](https://huggingface.co/openai/clip-vit-large-patch14) | [Qualcomm NPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-large-patch14/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-large-patch14/aitk/openai_clip_dml.json) |
-| [Resnet 50](https://huggingface.co/microsoft/resnet-50) | [Qualcomm NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_qnn.json), [Qualcomm GPU](../../../microsoft-resnet-50/aitk/resnet_qnn_gpu.json), [AMD NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_amd.json), [AMD GPU](../../../microsoft-resnet-50/aitk/resnet_migraphx.json), [NVIDIA TensorRT for RTX](../../../microsoft-resnet-50/aitk/resnet_trtrtx.json), [Intel CPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel GPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel NPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [DirectML](../../../microsoft-resnet-50/aitk/resnet_dml.json) |
+| [Resnet 50](https://huggingface.co/microsoft/resnet-50) | [Qualcomm NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_qnn.json), [Qualcomm GPU](../../../microsoft-resnet-50/aitk/resnet_qnn_gpu.json), [AMD NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_amd.json), [AMD GPU](../../../microsoft-resnet-50/aitk/resnet_migraphx.json), [NVIDIA TensorRT for RTX](../../../microsoft-resnet-50/aitk/resnet_trtrtx.json), [Intel CPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel GPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel NPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [DirectML](../../../microsoft-resnet-50/aitk/resnet_dml.json), [WebGPU](../../../microsoft-resnet-50/aitk/resnet_webgpu.json) |
 | [Stable Diffusion V1 5](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) | [Qualcomm NPU](../../../sd-legacy-stable-diffusion-v1-5/aitk/sd_qnn_workflow.json), [Intel CPU](../../../sd-legacy-stable-diffusion-v1-5/aitk/sd_ov_workflow.json), [Intel GPU](../../../sd-legacy-stable-diffusion-v1-5/aitk/sd_ov_workflow.json) |
-| [Vit Base Patch16 224](https://huggingface.co/google/vit-base-patch16-224) | [Qualcomm NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json), [Qualcomm GPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qnn_gpu.json), [AMD NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json), [AMD GPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_migraphx.json), [NVIDIA TensorRT for RTX](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json), [Intel CPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel GPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel NPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [DirectML](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json) |
+| [Vit Base Patch16 224](https://huggingface.co/google/vit-base-patch16-224) | [Qualcomm NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json), [Qualcomm GPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qnn_gpu.json), [AMD NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json), [AMD GPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_migraphx.json), [NVIDIA TensorRT for RTX](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json), [Intel CPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel GPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel NPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [DirectML](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json), [WebGPU](../../../google-vit-base-patch16-224/aitk/vit_webgpu.json) |
 | [Whisper Large V3 Turbo](https://huggingface.co/openai/whisper-large-v3-turbo) | [Qualcomm NPU](../../../openai-whisper-large-v3-turbo/aitk/qnn_workflow.json) |
diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json.config b/google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json.config
index 2a8bbc29b..4575e8895 100644
--- a/google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json.config
+++ b/google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json.config
@@ -1,47 +1,47 @@
-{
-    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
-    "name": "Convert to WebGPU",
-    "evalRuntime": "WebGPU",
-    "runtimeOverwrite": {
-        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
-    },
-    "runtime": {
-        "autoGenerated": true,
-        "name": "Evaluate on",
-        "type": "enum",
-        "displayNames": [
-            "WebGPU"
-        ],
-        "path": "systems.local_system.accelerators.0.execution_providers.0",
-        "values": [
-            "WebGpuExecutionProvider"
-        ],
-        "readOnly": false
-    },
-    "optimizationPaths": [
-        {
-            "path": "passes.conversion",
-            "name": "fp32"
-        }
-    ],
-    "optimizationDefault": "fp32",
-    "sections": [
-        {
-            "autoGenerated": true,
-            "name": "Convert",
-            "phase": "Conversion",
-            "parameters": [],
-            "toggle": {
-                "autoGenerated": true,
-                "name": "Convert to ONNX format",
-                "type": "bool",
-                "path": "passes.conversion",
-                "actions": [
-                    [],
-                    []
-                ],
-                "readOnly": true
-            }
-        }
-    ]
-}
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "evalRuntime": "WebGPU",
+    "runtimeOverwrite": {
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.conversion",
+            "name": "fp32"
+        }
+    ],
+    "optimizationDefault": "fp32",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/google-bert-bert-base-multilingual-cased/aitk/model_project.config b/google-bert-bert-base-multilingual-cased/aitk/model_project.config
index 0c243b19c..647418708 100644
--- a/google-bert-bert-base-multilingual-cased/aitk/model_project.config
+++ b/google-bert-bert-base-multilingual-cased/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "bert-base-multilingual-cased_qnn_gpu.json",
             "templateName": "bert-base-multilingual-cased_qnn_gpu"
+        },
+        {
+            "file": "bert_webgpu.json",
+            "templateName": "bert_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/google-vit-base-patch16-224/aitk/model_project.config b/google-vit-base-patch16-224/aitk/model_project.config
index a06dd8750..149292581 100644
--- a/google-vit-base-patch16-224/aitk/model_project.config
+++ b/google-vit-base-patch16-224/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "vit-base-patch16-224_qnn_gpu.json",
             "templateName": "vit-base-patch16-224_qnn_gpu"
+        },
+        {
+            "file": "vit_webgpu.json",
+            "templateName": "vit_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/google-vit-base-patch16-224/aitk/vit_webgpu.json.config b/google-vit-base-patch16-224/aitk/vit_webgpu.json.config
index 2a8bbc29b..4575e8895 100644
--- a/google-vit-base-patch16-224/aitk/vit_webgpu.json.config
+++ b/google-vit-base-patch16-224/aitk/vit_webgpu.json.config
@@ -1,47 +1,47 @@
-{
-    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
-    "name": "Convert to WebGPU",
-    "evalRuntime": "WebGPU",
-    "runtimeOverwrite": {
-        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
-    },
-    "runtime": {
-        "autoGenerated": true,
-        "name": "Evaluate on",
-        "type": "enum",
-        "displayNames": [
-            "WebGPU"
-        ],
-        "path": "systems.local_system.accelerators.0.execution_providers.0",
-        "values": [
-            "WebGpuExecutionProvider"
-        ],
-        "readOnly": false
-    },
-    "optimizationPaths": [
-        {
-            "path": "passes.conversion",
-            "name": "fp32"
-        }
-    ],
-    "optimizationDefault": "fp32",
-    "sections": [
-        {
-            "autoGenerated": true,
-            "name": "Convert",
-            "phase": "Conversion",
-            "parameters": [],
-            "toggle": {
-                "autoGenerated": true,
-                "name": "Convert to ONNX format",
-                "type": "bool",
-                "path": "passes.conversion",
-                "actions": [
-                    [],
-                    []
-                ],
-                "readOnly": true
-            }
-        }
-    ]
-}
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "evalRuntime": "WebGPU",
+    "runtimeOverwrite": {
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.conversion",
+            "name": "fp32"
+        }
+    ],
+    "optimizationDefault": "fp32",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json
new file mode 100644
index 000000000..94d4dbae2
--- /dev/null
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json
@@ -0,0 +1,90 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
+        "task": "zero-shot-image-classification",
+        "load_kwargs": {
+            "attn_implementation": "eager"
+        },
+        "io_config": {
+            "input_names": [
+                "input_ids",
+                "pixel_values",
+                "attention_mask"
+            ],
+            "input_shapes": [
+                [
+                    10,
+                    77
+                ],
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ],
+                [
+                    10,
+                    77
+                ]
+            ],
+            "input_types": [
+                "int64",
+                "float32",
+                "int64"
+            ],
+            "output_names": [
+                "logits_per_image",
+                "logits_per_text",
+                "text_embeds",
+                "image_embeds"
+            ],
+            "output_shapes": [
+                [
+                    1,
+                    10
+                ],
+                [
+                    10,
+                    1
+                ],
+                [
+                    10,
+                    512
+                ],
+                [
+                    1,
+                    512
+                ]
+            ]
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "peephole": {
+            "type": "OnnxPeepholeOptimizer",
+            "save_as_external_data": true
+        }
+    },
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_dir": "model/clip_webgpu",
+    "evaluate_input_model": false
+}
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json.config
new file mode 100644
index 000000000..4575e8895
--- /dev/null
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json.config
@@ -0,0 +1,47 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "evalRuntime": "WebGPU",
+    "runtimeOverwrite": {
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.conversion",
+            "name": "fp32"
+        }
+    ],
+    "optimizationDefault": "fp32",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config
index a9a556885..59a48c533 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "laion_clip_qnn_gpu.json",
             "templateName": "laion_clip_qnn_gpu"
+        },
+        {
+            "file": "laion_clip_webgpu.json",
+            "templateName": "laion_clip_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/microsoft-resnet-50/aitk/model_project.config b/microsoft-resnet-50/aitk/model_project.config
index d11ed84be..90865571d 100644
--- a/microsoft-resnet-50/aitk/model_project.config
+++ b/microsoft-resnet-50/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "resnet_qnn_gpu.json",
             "templateName": "resnet_qnn_gpu"
+        },
+        {
+            "file": "resnet_webgpu.json",
+            "templateName": "resnet_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/microsoft-resnet-50/aitk/resnet_webgpu.json.config b/microsoft-resnet-50/aitk/resnet_webgpu.json.config
index 2a8bbc29b..4575e8895 100644
--- a/microsoft-resnet-50/aitk/resnet_webgpu.json.config
+++ b/microsoft-resnet-50/aitk/resnet_webgpu.json.config
@@ -1,47 +1,47 @@
-{
-    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
-    "name": "Convert to WebGPU",
-    "evalRuntime": "WebGPU",
-    "runtimeOverwrite": {
-        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
-    },
-    "runtime": {
-        "autoGenerated": true,
-        "name": "Evaluate on",
-        "type": "enum",
-        "displayNames": [
-            "WebGPU"
-        ],
-        "path": "systems.local_system.accelerators.0.execution_providers.0",
-        "values": [
-            "WebGpuExecutionProvider"
-        ],
-        "readOnly": false
-    },
-    "optimizationPaths": [
-        {
-            "path": "passes.conversion",
-            "name": "fp32"
-        }
-    ],
-    "optimizationDefault": "fp32",
-    "sections": [
-        {
-            "autoGenerated": true,
-            "name": "Convert",
-            "phase": "Conversion",
-            "parameters": [],
-            "toggle": {
-                "autoGenerated": true,
-                "name": "Convert to ONNX format",
-                "type": "bool",
-                "path": "passes.conversion",
-                "actions": [
-                    [],
-                    []
-                ],
-                "readOnly": true
-            }
-        }
-    ]
-}
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "evalRuntime": "WebGPU",
+    "runtimeOverwrite": {
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.conversion",
+            "name": "fp32"
+        }
+    ],
+    "optimizationDefault": "fp32",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/openai-clip-vit-base-patch16/aitk/model_project.config b/openai-clip-vit-base-patch16/aitk/model_project.config
index a15e48590..120402cf4 100644
--- a/openai-clip-vit-base-patch16/aitk/model_project.config
+++ b/openai-clip-vit-base-patch16/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "openai_clip_qnn_gpu.json",
             "templateName": "openai_clip_qnn_gpu"
+        },
+        {
+            "file": "openai_clip_webgpu.json",
+            "templateName": "openai_clip_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config
index 2a8bbc29b..4575e8895 100644
--- a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config
@@ -1,47 +1,47 @@
-{
-    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
-    "name": "Convert to WebGPU",
-    "evalRuntime": "WebGPU",
-    "runtimeOverwrite": {
-        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
-    },
-    "runtime": {
-        "autoGenerated": true,
-        "name": "Evaluate on",
-        "type": "enum",
-        "displayNames": [
-            "WebGPU"
-        ],
-        "path": "systems.local_system.accelerators.0.execution_providers.0",
-        "values": [
-            "WebGpuExecutionProvider"
-        ],
-        "readOnly": false
-    },
-    "optimizationPaths": [
-        {
-            "path": "passes.conversion",
-            "name": "fp32"
-        }
-    ],
-    "optimizationDefault": "fp32",
-    "sections": [
-        {
-            "autoGenerated": true,
-            "name": "Convert",
-            "phase": "Conversion",
-            "parameters": [],
-            "toggle": {
-                "autoGenerated": true,
-                "name": "Convert to ONNX format",
-                "type": "bool",
-                "path": "passes.conversion",
-                "actions": [
-                    [],
-                    []
-                ],
-                "readOnly": true
-            }
-        }
-    ]
-}
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "evalRuntime": "WebGPU",
+    "runtimeOverwrite": {
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.conversion",
+            "name": "fp32"
+        }
+    ],
+    "optimizationDefault": "fp32",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/openai-clip-vit-base-patch32/aitk/model_project.config b/openai-clip-vit-base-patch32/aitk/model_project.config
index 084fa3a91..6226974d8 100644
--- a/openai-clip-vit-base-patch32/aitk/model_project.config
+++ b/openai-clip-vit-base-patch32/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "openai_clip_qnn_gpu.json",
             "templateName": "openai_clip_qnn_gpu"
+        },
+        {
+            "file": "openai_clip_webgpu.json",
+            "templateName": "openai_clip_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json
new file mode 100644
index 000000000..7f8d0bd3f
--- /dev/null
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json
@@ -0,0 +1,90 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "openai/clip-vit-base-patch32",
+        "task": "zero-shot-image-classification",
+        "load_kwargs": {
+            "attn_implementation": "eager"
+        },
+        "io_config": {
+            "input_names": [
+                "input_ids",
+                "pixel_values",
+                "attention_mask"
+            ],
+            "input_shapes": [
+                [
+                    10,
+                    77
+                ],
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ],
+                [
+                    10,
+                    77
+                ]
+            ],
+            "input_types": [
+                "int64",
+                "float32",
+                "int64"
+            ],
+            "output_names": [
+                "logits_per_image",
+                "logits_per_text",
+                "text_embeds",
+                "image_embeds"
+            ],
+            "output_shapes": [
+                [
+                    1,
+                    10
+                ],
+                [
+                    10,
+                    1
+                ],
+                [
+                    10,
+                    512
+                ],
+                [
+                    1,
+                    512
+                ]
+            ]
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "peephole": {
+            "type": "OnnxPeepholeOptimizer",
+            "save_as_external_data": true
+        }
+    },
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_dir": "model/clip_webgpu",
+    "evaluate_input_model": false
+}
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json.config b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json.config
new file mode 100644
index 000000000..4575e8895
--- /dev/null
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json.config
@@ -0,0 +1,47 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "evalRuntime": "WebGPU",
+    "runtimeOverwrite": {
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.conversion",
+            "name": "fp32"
+        }
+    ],
+    "optimizationDefault": "fp32",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}

From 0b6de73c42bb06a83fcd3f5405baf130472ef121 Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Thu, 7 May 2026 14:51:20 +0800
Subject: [PATCH 10/17] cuda 130 works

---
 .aitk/requirements/WebGPU/WebGPU_py3.12.13.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt b/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
index 3b4b6a6d5..cf1e762a5 100644
--- a/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
+++ b/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
@@ -1,4 +1,4 @@
---extra-index-url https://download.pytorch.org/whl/cu128
+--extra-index-url https://download.pytorch.org/whl/cu130
 accelerate==1.13.0
 aiohappyeyeballs==2.6.1
 aiohttp==3.13.5
@@ -68,7 +68,7 @@ sqlalchemy==2.0.49
 sympy==1.14.0
 tabulate==0.10.0
 tokenizers==0.21.4
-torch==2.11.0+cu128
+torch==2.11.0+cu130
 torchmetrics==1.9.0
 tqdm==4.67.3
 transformers==4.52.4

From 5af1ceb91438b826e9b6e75c9792377138d31751 Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Fri, 8 May 2026 10:11:28 +0800
Subject: [PATCH 11/17] for Qwen3VLForConditionalGeneration

---
 .aitk/requirements/WebGPU/WebGPU_py3.12.13.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt b/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
index cf1e762a5..f12a05e18 100644
--- a/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
+++ b/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
@@ -67,11 +67,11 @@ six==1.17.0
 sqlalchemy==2.0.49
 sympy==1.14.0
 tabulate==0.10.0
-tokenizers==0.21.4
+tokenizers==0.22.2
 torch==2.11.0+cu130
 torchmetrics==1.9.0
 tqdm==4.67.3
-transformers==4.52.4
+transformers==4.57.6
 typing-extensions==4.15.0
 typing-inspection==0.4.2
 tzdata==2026.2

From 9b7bc87459d7bef6e01388000cd9334e6b8bbe37 Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Fri, 8 May 2026 10:26:33 +0800
Subject: [PATCH 12/17] must exact same version..

---
 .aitk/requirements/WebGPU/WebGPU_py3.12.13.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt b/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
index f12a05e18..b0ab103ce 100644
--- a/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
+++ b/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
@@ -39,7 +39,7 @@ olive-ai==0.12.1
 onnx==1.21.0
 onnx-ir==0.2.1
 # install it separatly with no deps as it will install onnxruntime to overwrite onnxruntime-webgpu
-# uvpip:install onnxruntime-genai==0.13.2 --no-deps;post
+# uvpip:install onnxruntime-genai==0.12.2 --no-deps;post
 onnxoptimizer==0.4.2
 onnxruntime-webgpu==1.25.1
 onnxscript==0.7.0
@@ -67,11 +67,11 @@ six==1.17.0
 sqlalchemy==2.0.49
 sympy==1.14.0
 tabulate==0.10.0
-tokenizers==0.22.2
+tokenizers==0.21.4
 torch==2.11.0+cu130
 torchmetrics==1.9.0
 tqdm==4.67.3
-transformers==4.57.6
+transformers==4.52.4
 typing-extensions==4.15.0
 typing-inspection==0.4.2
 tzdata==2026.2

From beb508049d0cc140e19f22d53785b7ffb74c3e9b Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Fri, 8 May 2026 11:20:43 +0800
Subject: [PATCH 13/17] add llama

---
 .aitk/configs/checks.json                     |  8 +-
 .aitk/configs/model_list.json                 |  3 +-
 .aitk/docs/guide/ModelList.md                 |  2 +-
 .../aitk/_copy.json.config                    | 14 +++
 .../aitk/info.yml                             |  6 ++
 .../aitk/llama3_2_webgpu.json                 | 73 ++++++++++++++
 .../aitk/llama3_2_webgpu.json.config          | 95 +++++++++++++++++++
 .../aitk/model_project.config                 |  4 +
 8 files changed, 199 insertions(+), 6 deletions(-)
 create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json
 create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json.config

diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json
index 6d2666e0f..9f01f55f7 100644
--- a/.aitk/configs/checks.json
+++ b/.aitk/configs/checks.json
@@ -1,6 +1,6 @@
 {
-    "configCheck": 175,
-    "copyCheck": 186,
+    "configCheck": 176,
+    "copyCheck": 187,
     "extensionCheck": 2,
     "gitignoreCheck": 44,
     "inferenceModelCheck": 25,
@@ -8,8 +8,8 @@
     "licenseCheck": 41,
     "modelProjectCheck": 46,
     "oliveCheck": 88,
-    "oliveJsonCheck": 175,
-    "pathCheck": 1449,
+    "oliveJsonCheck": 176,
+    "pathCheck": 1454,
     "requirementsCheck": 37,
     "templateCheck": 3,
     "venvRequirementsCheck": 18
diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json
index 7768ed3d9..06ca4ae3f 100644
--- a/.aitk/configs/model_list.json
+++ b/.aitk/configs/model_list.json
@@ -194,7 +194,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
diff --git a/.aitk/docs/guide/ModelList.md b/.aitk/docs/guide/ModelList.md
index 53fdec086..4bf60e316 100644
--- a/.aitk/docs/guide/ModelList.md
+++ b/.aitk/docs/guide/ModelList.md
@@ -9,7 +9,7 @@
 | [Deepseek R1 Distill Qwen 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B) | [NVIDIA TensorRT for RTX](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-14B/aitk/deepseek_trtrtx.json), [Intel CPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-14B/aitk/deepseek_ov_config.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-14B/aitk/deepseek_ov_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-14B/aitk/deepseek_ov_npu_config.json) |
 | [Deepseek R1 Distill Qwen 7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | [AMD NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_trtrtx.json), [Intel CPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_ov_config.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_ov_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_ov_npu_config.json) |
 | [Llama 3.1 8B Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) | [Qualcomm NPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_qnn_config.json), [AMD NPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_trtrtx_config.json), [Intel CPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_ov_gpu_config.json), [Intel GPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_ov_gpu_config.json), [Intel NPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_ov_config.json), [DirectML](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_dml_config.json) |
-| [Llama 3.2 1B Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) | [Qualcomm NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json), [Qualcomm GPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_gpu_config.json), [AMD NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json), [Intel CPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_gpu_config.json), [Intel GPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_gpu_config.json), [Intel NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json), [DirectML](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_dml_config.json) |
+| [Llama 3.2 1B Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) | [Qualcomm NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json), [Qualcomm GPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_gpu_config.json), [AMD NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json), [Intel CPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_gpu_config.json), [Intel GPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_gpu_config.json), [Intel NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json), [DirectML](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_dml_config.json), [WebGPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json) |
 | [Mistral 7B Instruct V0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) | [AMD NPU](../../../mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_trtrtx.json), [Intel CPU](../../../mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_gpu_context_ov_dy.json), [Intel GPU](../../../mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_gpu_context_ov_dy.json), [Intel NPU](../../../mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_npu_context_ov_dy.json) |
 | [Mistral 7B Instruct V0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) | [Intel CPU](../../../mistralai-Mistral-7B-Instruct-v0.3/aitk/mistral-7b-instruct-v0.3-ov.json), [Intel GPU](../../../mistralai-Mistral-7B-Instruct-v0.3/aitk/mistral-7b-instruct-v0.3-ov.json) |
 | [Phi 3 Mini 128K Instruct](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) | [Qualcomm NPU](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_qnn.json), [AMD NPU](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_trtrtx.json), [Intel CPU](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_ov_config.json), [Intel GPU](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_ov_config.json), [Intel NPU](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_ov_npu_config.json) |
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config
index d539528bc..74763f29e 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config
@@ -42,6 +42,20 @@
                 }
             ]
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json",
+            "dst": "llama3_2_webgpu.json",
+            "replacements": [
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "meta-llama/Llama-3.2-1B-Instruct"
+                },
+                {
+                    "find": "model/deepseek",
+                    "replace": "model/llama3_2"
+                }
+            ]
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md",
             "dst": "README.md",
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml
index cf6187b73..aabb016b0 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml
@@ -39,6 +39,12 @@ recipes:
         isGPURequired: true
         runtimeOverwrite:
           executeEp: NvTensorRTRTXExecutionProvider
+    - file: "llama3_2_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
+      aitk:
+        requirements: WebGPU/WebGPU_py3.12.13
+        evalRuntime: WebGPU
 aitk:
     modelInfo:
         id: "huggingface/meta-llama/Llama-3.2-1B-Instruct"
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json
new file mode 100644
index 000000000..67572ad6d
--- /dev/null
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json
@@ -0,0 +1,73 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "meta-llama/Llama-3.2-1B-Instruct",
+        "load_kwargs": {
+            "torch_dtype": "float16"
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "s": {
+            "type": "SelectiveMixedPrecision",
+            "algorithm": "kld_gradient",
+            "bits": 4,
+            "high_bits": 8,
+            "ratio": 0.65,
+            "sym": false,
+            "group_size": 32
+        },
+        "g": {
+            "type": "gptq",
+            "bits": 4,
+            "sym": false,
+            "group_size": 32
+        },
+        "r": {
+            "type": "rtn",
+            "bits": 8,
+            "sym": false,
+            "group_size": 32,
+            "lm_head": true,
+            "embeds": true,
+            "overrides": {
+                "lm_head": {
+                    "bits": 8
+                },
+                "model.embed_tokens": {
+                    "bits": 8
+                }
+            }
+        },
+        "m": {
+            "type": "ModelBuilder",
+            "precision": "int4"
+        },
+        "t": {
+            "type": "GraphSurgeries",
+            "surgeries": [
+                {
+                    "surgeon": "TieWordEmbeddings"
+                }
+            ]
+        }
+    },
+    "target": "local_system",
+    "log_severity_level": 0,
+    "output_dir": "model/llama3_2",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json.config
new file mode 100644
index 000000000..8c3a740ee
--- /dev/null
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json.config
@@ -0,0 +1,95 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "isLLM": true,
+    "evalRuntime": "WebGPU",
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "m"
+    },
+    "runtimeOverwrite": {
+        "autoGenerated": true,
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.m.precision"
+        }
+    ],
+    "optimizationDefault": "int4",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.m",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "autoGenerated": true,
+            "name": "Optimization",
+            "phase": "Quantization",
+            "parameters": [
+                {
+                    "autoGenerated": true,
+                    "name": "Precision",
+                    "description": "Precision of model",
+                    "type": "enum",
+                    "displayNames": [
+                        "Int4",
+                        "Bf16",
+                        "Fp16",
+                        "Fp32"
+                    ],
+                    "displayType": "RadioGroup",
+                    "path": "passes.m.precision",
+                    "values": [
+                        "int4",
+                        "bf16",
+                        "fp16",
+                        "fp32"
+                    ],
+                    "template": {
+                        "path": "passes.m.precision",
+                        "template": "ModelBuilderPrecision"
+                    }
+                }
+            ],
+            "disableToggleGeneration": true,
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Optimize model",
+                "type": "bool",
+                "path": "passes.m",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config
index 3887e2f47..90d1e0ba0 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "llama3_2_qnn_gpu_config.json",
             "templateName": "llama3_2_qnn_gpu_config"
+        },
+        {
+            "file": "llama3_2_webgpu.json",
+            "templateName": "llama3_2_webgpu"
         }
     ],
     "modelInfo": {

From a44c252277e34f888197a0d9ba359deb00bba3f2 Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Fri, 8 May 2026 12:01:32 +0800
Subject: [PATCH 14/17] add qwen, phi

---
 .aitk/configs/checks.json                     |  8 +-
 .aitk/configs/model_list.json                 |  6 +-
 .aitk/docs/guide/ModelList.md                 |  4 +-
 .../aitk/_copy.json.config                    | 14 +++
 Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml      |  6 ++
 .../aitk/model_project.config                 |  4 +
 .../aitk/qwen2_5_webgpu.json                  | 73 ++++++++++++++
 .../aitk/qwen2_5_webgpu.json.config           | 95 +++++++++++++++++++
 .../aitk/_copy.json.config                    | 14 +++
 microsoft-Phi-3.5-mini-instruct/aitk/info.yml |  6 ++
 .../aitk/model_project.config                 |  4 +
 .../aitk/phi3_5_webgpu.json                   | 73 ++++++++++++++
 .../aitk/phi3_5_webgpu.json.config            | 95 +++++++++++++++++++
 13 files changed, 394 insertions(+), 8 deletions(-)
 create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json
 create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json.config
 create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json
 create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json.config

diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json
index 9f01f55f7..0e8897cac 100644
--- a/.aitk/configs/checks.json
+++ b/.aitk/configs/checks.json
@@ -1,6 +1,6 @@
 {
-    "configCheck": 176,
-    "copyCheck": 187,
+    "configCheck": 178,
+    "copyCheck": 189,
     "extensionCheck": 2,
     "gitignoreCheck": 44,
     "inferenceModelCheck": 25,
@@ -8,8 +8,8 @@
     "licenseCheck": 41,
     "modelProjectCheck": 46,
     "oliveCheck": 88,
-    "oliveJsonCheck": 176,
-    "pathCheck": 1454,
+    "oliveJsonCheck": 178,
+    "pathCheck": 1464,
     "requirementsCheck": 37,
     "templateCheck": 3,
     "venvRequirementsCheck": 18
diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json
index 06ca4ae3f..9088cc137 100644
--- a/.aitk/configs/model_list.json
+++ b/.aitk/configs/model_list.json
@@ -13,7 +13,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -273,7 +274,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
diff --git a/.aitk/docs/guide/ModelList.md b/.aitk/docs/guide/ModelList.md
index 4bf60e316..fd663b079 100644
--- a/.aitk/docs/guide/ModelList.md
+++ b/.aitk/docs/guide/ModelList.md
@@ -14,14 +14,14 @@
 | [Mistral 7B Instruct V0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) | [Intel CPU](../../../mistralai-Mistral-7B-Instruct-v0.3/aitk/mistral-7b-instruct-v0.3-ov.json), [Intel GPU](../../../mistralai-Mistral-7B-Instruct-v0.3/aitk/mistral-7b-instruct-v0.3-ov.json) |
 | [Phi 3 Mini 128K Instruct](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) | [Qualcomm NPU](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_qnn.json), [AMD NPU](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_trtrtx.json), [Intel CPU](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_ov_config.json), [Intel GPU](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_ov_config.json), [Intel NPU](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_ov_npu_config.json) |
 | [Phi 3 Mini 4K Instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) | [Qualcomm NPU](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_qnn.json), [AMD NPU](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_trtrtx.json), [Intel CPU](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_ov_config.json), [Intel GPU](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_ov_config.json), [Intel NPU](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_ov_npu_config.json) |
-| [Phi 3.5 Mini Instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) | [Qualcomm NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json), [Qualcomm GPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_gpu_config.json), [AMD NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json), [Intel CPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel GPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_config.json), [DirectML](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_dml_config.json) |
+| [Phi 3.5 Mini Instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) | [Qualcomm NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json), [Qualcomm GPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_gpu_config.json), [AMD NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json), [Intel CPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel GPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_config.json), [DirectML](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_dml_config.json), [WebGPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json) |
 | [Phi 4](https://huggingface.co/microsoft/Phi-4) | [NVIDIA TensorRT for RTX](../../../microsoft-Phi-4/aitk/phi4_trtrtx.json), [Intel CPU](../../../microsoft-Phi-4/aitk/phi4_ov_config.json), [Intel GPU](../../../microsoft-Phi-4/aitk/phi4_ov_config.json) |
 | [Phi 4 Mini Instruct](https://huggingface.co/microsoft/Phi-4-mini-instruct) | [Qualcomm NPU](../../../microsoft-Phi-4-mini-instruct/aitk/phi4_qnn.json), [AMD NPU](../../../microsoft-Phi-4-mini-instruct/aitk/phi4_vitis_ai_config.json), [Intel CPU](../../../microsoft-Phi-4-mini-instruct/aitk/phi4_ov_config.json), [Intel GPU](../../../microsoft-Phi-4-mini-instruct/aitk/phi4_ov_config.json), [Intel NPU](../../../microsoft-Phi-4-mini-instruct/aitk/phi4_ov_npu_config.json) |
 | [Phi 4 Mini Reasoning](https://huggingface.co/microsoft/Phi-4-mini-reasoning) | [AMD NPU](../../../microsoft-Phi-4-mini-reasoning/aitk/phi4_vitis_ai_config.json), [Intel CPU](../../../microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_gpu_config.json), [Intel GPU](../../../microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_gpu_config.json), [Intel NPU](../../../microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_config.json) |
 | [Phi 4 Reasoning](https://huggingface.co/microsoft/Phi-4-reasoning) | [Intel NPU](../../../microsoft-Phi-4-reasoning/aitk/phi4_ov_config.json) |
 | [Phi 4 Reasoning Plus](https://huggingface.co/microsoft/Phi-4-reasoning-plus) | [Intel NPU](../../../microsoft-Phi-4-reasoning-plus/aitk/phi4_ov_config.json) |
 | [Qwen2.5 0.5B Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) | [AMD NPU](../../../Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_trtrtx.json), [Intel CPU](../../../Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_ov_config.json), [Intel GPU](../../../Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_ov_config.json), [Intel NPU](../../../Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_ov_npu_config.json) |
-| [Qwen2.5 1.5B Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) | [Qualcomm NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json), [Qualcomm GPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_gpu_config.json), [AMD NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json), [Intel CPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel GPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_config.json), [DirectML](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_dml_config.json) |
+| [Qwen2.5 1.5B Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) | [Qualcomm NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json), [Qualcomm GPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_gpu_config.json), [AMD NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json), [Intel CPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel GPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_config.json), [DirectML](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_dml_config.json), [WebGPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json) |
 | [Qwen2.5 14B Instruct](https://huggingface.co/Qwen/Qwen2.5-14B-Instruct) | [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-14B-Instruct/aitk/qwen2_5_trtrtx.json), [Intel CPU](../../../Qwen-Qwen2.5-14B-Instruct/aitk/qwen2_5_ov_config.json), [Intel GPU](../../../Qwen-Qwen2.5-14B-Instruct/aitk/qwen2_5_ov_config.json), [Intel NPU](../../../Qwen-Qwen2.5-14B-Instruct/aitk/qwen2_5_ov_npu_config.json) |
 | [Qwen2.5 3B Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct) | [Intel CPU](../../../Qwen-Qwen2.5-3B-Instruct/aitk/qwen2_5_ov_config.json), [Intel GPU](../../../Qwen-Qwen2.5-3B-Instruct/aitk/qwen2_5_ov_config.json), [Intel NPU](../../../Qwen-Qwen2.5-3B-Instruct/aitk/qwen2_5_ov_npu_config.json) |
 | [Qwen2.5 7B Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) | [Qualcomm NPU](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_qnn_config.json), [AMD NPU](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_trtrtx.json), [Intel CPU](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_ov_config.json), [Intel GPU](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_ov_config.json), [Intel NPU](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_ov_npu_config.json) |
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config
index 6a6e71a41..e38c1711a 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config
@@ -28,6 +28,20 @@
                 }
             ]
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json",
+            "dst": "qwen2_5_webgpu.json",
+            "replacements": [
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "Qwen/Qwen2.5-1.5B-Instruct"
+                },
+                {
+                    "find": "model/deepseek",
+                    "replace": "model/qwen2_5"
+                }
+            ]
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md",
             "dst": "README.md",
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
index 09794cb70..f8f342e08 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
@@ -39,6 +39,12 @@ recipes:
         isGPURequired: true
         runtimeOverwrite:
           executeEp: NvTensorRTRTXExecutionProvider
+    - file: "qwen2_5_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
+      aitk:
+        requirements: WebGPU/WebGPU_py3.12.13
+        evalRuntime: WebGPU
 aitk:
     modelInfo:
         id: "huggingface/Qwen/Qwen2.5-1.5B-Instruct"
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config
index 8b192b6f5..214e98239 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "qwen2_5_qnn_gpu_config.json",
             "templateName": "qwen2_5_qnn_gpu_config"
+        },
+        {
+            "file": "qwen2_5_webgpu.json",
+            "templateName": "qwen2_5_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json
new file mode 100644
index 000000000..f574e4b04
--- /dev/null
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json
@@ -0,0 +1,73 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "Qwen/Qwen2.5-1.5B-Instruct",
+        "load_kwargs": {
+            "torch_dtype": "float16"
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "s": {
+            "type": "SelectiveMixedPrecision",
+            "algorithm": "kld_gradient",
+            "bits": 4,
+            "high_bits": 8,
+            "ratio": 0.65,
+            "sym": false,
+            "group_size": 32
+        },
+        "g": {
+            "type": "gptq",
+            "bits": 4,
+            "sym": false,
+            "group_size": 32
+        },
+        "r": {
+            "type": "rtn",
+            "bits": 8,
+            "sym": false,
+            "group_size": 32,
+            "lm_head": true,
+            "embeds": true,
+            "overrides": {
+                "lm_head": {
+                    "bits": 8
+                },
+                "model.embed_tokens": {
+                    "bits": 8
+                }
+            }
+        },
+        "m": {
+            "type": "ModelBuilder",
+            "precision": "int4"
+        },
+        "t": {
+            "type": "GraphSurgeries",
+            "surgeries": [
+                {
+                    "surgeon": "TieWordEmbeddings"
+                }
+            ]
+        }
+    },
+    "target": "local_system",
+    "log_severity_level": 0,
+    "output_dir": "model/qwen2_5",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json.config
new file mode 100644
index 000000000..8c3a740ee
--- /dev/null
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json.config
@@ -0,0 +1,95 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "isLLM": true,
+    "evalRuntime": "WebGPU",
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "m"
+    },
+    "runtimeOverwrite": {
+        "autoGenerated": true,
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.m.precision"
+        }
+    ],
+    "optimizationDefault": "int4",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.m",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "autoGenerated": true,
+            "name": "Optimization",
+            "phase": "Quantization",
+            "parameters": [
+                {
+                    "autoGenerated": true,
+                    "name": "Precision",
+                    "description": "Precision of model",
+                    "type": "enum",
+                    "displayNames": [
+                        "Int4",
+                        "Bf16",
+                        "Fp16",
+                        "Fp32"
+                    ],
+                    "displayType": "RadioGroup",
+                    "path": "passes.m.precision",
+                    "values": [
+                        "int4",
+                        "bf16",
+                        "fp16",
+                        "fp32"
+                    ],
+                    "template": {
+                        "path": "passes.m.precision",
+                        "template": "ModelBuilderPrecision"
+                    }
+                }
+            ],
+            "disableToggleGeneration": true,
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Optimize model",
+                "type": "bool",
+                "path": "passes.m",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config
index cee9caa7b..36e8b8627 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config
@@ -14,6 +14,20 @@
                 }
             ]
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json",
+            "dst": "phi3_5_webgpu.json",
+            "replacements": [
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "microsoft/Phi-3.5-mini-instruct"
+                },
+                {
+                    "find": "model/deepseek",
+                    "replace": "model/phi3_5"
+                }
+            ]
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md",
             "dst": "README.md",
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml
index eba2844ca..c30fdee22 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml
@@ -39,6 +39,12 @@ recipes:
         isGPURequired: true
         runtimeOverwrite:
           executeEp: NvTensorRTRTXExecutionProvider
+    - file: "phi3_5_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
+      aitk:
+        requirements: WebGPU/WebGPU_py3.12.13
+        evalRuntime: WebGPU
 aitk:
     modelInfo:
         id: "huggingface/microsoft/Phi-3.5-mini-instruct"
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config
index d5d2fe50b..67ed43574 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "phi3_5_qnn_gpu_config.json",
             "templateName": "phi3_5_qnn_gpu_config"
+        },
+        {
+            "file": "phi3_5_webgpu.json",
+            "templateName": "phi3_5_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json
new file mode 100644
index 000000000..f71026229
--- /dev/null
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json
@@ -0,0 +1,73 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "microsoft/Phi-3.5-mini-instruct",
+        "load_kwargs": {
+            "torch_dtype": "float16"
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "s": {
+            "type": "SelectiveMixedPrecision",
+            "algorithm": "kld_gradient",
+            "bits": 4,
+            "high_bits": 8,
+            "ratio": 0.65,
+            "sym": false,
+            "group_size": 32
+        },
+        "g": {
+            "type": "gptq",
+            "bits": 4,
+            "sym": false,
+            "group_size": 32
+        },
+        "r": {
+            "type": "rtn",
+            "bits": 8,
+            "sym": false,
+            "group_size": 32,
+            "lm_head": true,
+            "embeds": true,
+            "overrides": {
+                "lm_head": {
+                    "bits": 8
+                },
+                "model.embed_tokens": {
+                    "bits": 8
+                }
+            }
+        },
+        "m": {
+            "type": "ModelBuilder",
+            "precision": "int4"
+        },
+        "t": {
+            "type": "GraphSurgeries",
+            "surgeries": [
+                {
+                    "surgeon": "TieWordEmbeddings"
+                }
+            ]
+        }
+    },
+    "target": "local_system",
+    "log_severity_level": 0,
+    "output_dir": "model/phi3_5",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json.config
new file mode 100644
index 000000000..8c3a740ee
--- /dev/null
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json.config
@@ -0,0 +1,95 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "isLLM": true,
+    "evalRuntime": "WebGPU",
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "m"
+    },
+    "runtimeOverwrite": {
+        "autoGenerated": true,
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.m.precision"
+        }
+    ],
+    "optimizationDefault": "int4",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.m",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "autoGenerated": true,
+            "name": "Optimization",
+            "phase": "Quantization",
+            "parameters": [
+                {
+                    "autoGenerated": true,
+                    "name": "Precision",
+                    "description": "Precision of model",
+                    "type": "enum",
+                    "displayNames": [
+                        "Int4",
+                        "Bf16",
+                        "Fp16",
+                        "Fp32"
+                    ],
+                    "displayType": "RadioGroup",
+                    "path": "passes.m.precision",
+                    "values": [
+                        "int4",
+                        "bf16",
+                        "fp16",
+                        "fp32"
+                    ],
+                    "template": {
+                        "path": "passes.m.precision",
+                        "template": "ModelBuilderPrecision"
+                    }
+                }
+            ],
+            "disableToggleGeneration": true,
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Optimize model",
+                "type": "bool",
+                "path": "passes.m",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}

From bbb7fbab69b96ca556a5f81da20c17dce201d429 Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Sat, 9 May 2026 10:30:25 +0800
Subject: [PATCH 15/17] guide

---
 .aitk/docs/others/FIX_GUIDE.md      | 330 ++++++++++++++++++++++
 .aitk/docs/others/fix_onnx_model.py | 414 ++++++++++++++++++++++++++++
 2 files changed, 744 insertions(+)
 create mode 100644 .aitk/docs/others/FIX_GUIDE.md
 create mode 100644 .aitk/docs/others/fix_onnx_model.py

diff --git a/.aitk/docs/others/FIX_GUIDE.md b/.aitk/docs/others/FIX_GUIDE.md
new file mode 100644
index 000000000..f69f24b17
--- /dev/null
+++ b/.aitk/docs/others/FIX_GUIDE.md
@@ -0,0 +1,330 @@
+# Generic WebGPU ONNX Model QKV Fix Guide
+
+## Problem Description
+
+WebGPU-converted ONNX models (DeepSeek, Llama, and others) with combined qkv_proj structures develop a critical dimension mismatch error in specific layers:
+
+```
+Node (/model/layers.X/attn/o_proj/MatMulNBits) Op (MatMulNBits) 
+[ShapeInferenceError] Incompatible dimensions for matrix multiplication
+```
+
+### Root Cause
+
+These layers have a **combined qkv_proj** structure (Q, K, V packed into one output), but the GroupQueryAttention operation was misconfigured:
+
+| Issue | Problem |
+|-------|---------|
+| **Q input** | Receiving full 2048-dim qkv output instead of just Q (1536 dims) |
+| **K input** | Using K from previous layer instead of current layer (256 dims from wrong source) |
+| **V input** | Using V from previous layer instead of current layer (256 dims from wrong source) |
+| **Result** | GroupQueryAttention produces mismatched output → o_proj fails |
+
+### Layer Structure
+
+Different models have this issue in different layers:
+
+| Model | Layers with combined qkv_proj | Total QKV | Q | K | V |
+|-------|-------------------------------|-----------|---|---|---|
+| DeepSeek-R1-Distill-Qwen-1.5B | 0, 6, 8, 12, 25, 26, 27 | 2048 | 1536 | 256 | 256 |
+| Llama-3.2-1B | 2, 5, 6, 8, 10, 13 | 3072 | 2048 | 512 | 512 |
+
+The `fix_onnx_model.py` script auto-detects this information automatically.
+
+## Solution
+
+For each affected layer, extract Q, K, V from the combined qkv_proj using Slice operations:
+
+```
+qkv_proj output (total_qkv dims):
+  [0:q_dim]                → Q dimensions
+  [q_dim:q_dim+k_dim]      → K dimensions  
+  [q_dim+k_dim:total_qkv]  → V dimensions
+
+GroupQueryAttention uses extracted Q, K, V → output matches o_proj expectations
+```
+
+**Example dimensions:**
+- **DeepSeek:** [0:1536] Q, [1536:1792] K, [1792:2048] V
+- **Llama:** [0:2048] Q, [2048:2560] K, [2560:3072] V
+
+## Implementation
+
+### Quick Start (Auto-Detect)
+
+The script automatically detects affected layers and dimensions:
+
+```bash
+# From the model directory
+cd ./model
+
+# Run the fix (auto-detects everything)
+python ../fix_onnx_model.py model.onnx
+
+# Verify the fix
+python ../fix_onnx_model.py model.onnx --verify
+```
+
+### Using Configuration File
+
+For reproducibility or multiple models, create a `config.json`:
+
+```json
+{
+  "layers_to_fix": [0, 6, 8, 12, 25, 26, 27],
+  "q_dim": 1536,
+  "k_dim": 256,
+  "v_dim": 256
+}
+```
+
+Then run:
+```bash
+python fix_onnx_model.py model.onnx --config config.json
+```
+
+### Examples for Common Models
+
+**DeepSeek-R1-Distill-Qwen-1.5B config.json:**
+```json
+{
+  "layers_to_fix": [0, 6, 8, 12, 25, 26, 27],
+  "q_dim": 1536,
+  "k_dim": 256,
+  "v_dim": 256
+}
+```
+
+**Llama-3.2-1B config.json:**
+```json
+{
+  "layers_to_fix": [2, 5, 6, 8, 10, 13],
+  "q_dim": 2048,
+  "k_dim": 512,
+  "v_dim": 512
+}
+```
+
+### Manual Implementation (Advanced)
+
+If you need to integrate this into your own code:
+
+```python
+from fix_onnx_model import fix_webgpu_qkv_model, verify_fix
+
+# Auto-detect (recommended)
+fix_webgpu_qkv_model('model.onnx')
+
+# Or with explicit parameters
+fix_webgpu_qkv_model(
+    'model.onnx',
+    layers_to_fix=[2, 5, 6, 8, 10, 13],  # Llama layers
+    q_dim=2048,
+    k_dim=512,
+    v_dim=512,
+    auto_detect=False  # Use provided values only
+)
+
+# Verify
+verify_fix('model.onnx', verbose=True)
+```
+
+## Key Technical Details
+
+### ONNX Slice Syntax
+
+The `Slice` operator (opset 21) takes inputs in this order:
+```
+Slice(data, starts, ends, [axes], [steps])
+```
+
+- **data:** Input tensor to slice
+- **starts:** Tensor with starting indices
+- **ends:** Tensor with ending indices  
+- **axes:** Tensor specifying which axes to slice (e.g., [2] for axis 2)
+- **steps:** (optional) Step size for each axis
+
+**Important:** Pass `axes` as an input tensor, NOT as an attribute (common mistake with older ONNX versions).
+
+### Data Type Consistency
+
+All new tensors must be **FLOAT16** to match:
+- Input: `qkv_proj/Add/output_0` (FLOAT16)
+- Output: `GroupQueryAttention/output_0` (FLOAT16)
+- Subsequent layers expect FLOAT16 inputs
+
+### Dimension Breakdown
+
+The exact dimensions depend on your model's architecture:
+
+**DeepSeek-R1-Distill-Qwen-1.5B:**
+- num_heads=12, kv_num_heads=2, head_dim=128
+- Q: 12 × 128 = 1536
+- K: 2 × 128 = 256  
+- V: 2 × 128 = 256
+- Total: 1536 + 256 + 256 = 2048
+
+**Llama-3.2-1B:**
+- num_heads=32, kv_num_heads=8, head_dim=64
+- Q: 32 × 64 = 2048
+- K: 8 × 64 = 512
+- V: 8 × 64 = 512
+- Total: 2048 + 512 + 512 = 3072
+
+To find these for any model:
+```python
+import onnx
+
+model = onnx.load('model.onnx', load_external_data=False)
+for vi in model.graph.value_info:
+    if 'layers.0/attn/qkv_proj' in vi.name and 'output' in vi.name:
+        qkv_dim = vi.type.tensor_type.shape.dim[-1].dim_value
+        print(f"Total QKV dimension: {qkv_dim}")
+        break
+
+for node in model.graph.node:
+    if 'layers.0/attn/o_proj' in node.name:
+        for attr in node.attribute:
+            if attr.name == 'K':
+                print(f"Q dimension (from o_proj K): {attr.i}")
+        break
+```
+
+## Verification
+
+After applying the fix, verify that:
+
+```python
+import onnx
+
+model = onnx.load('model.onnx', load_external_data=False)
+layers_to_check = [0, 6, 8, 12, 25, 26, 27]  # Or your model's layers
+
+for layer_id in layers_to_check:
+    for node in model.graph.node:
+        if node.name == f'/model/layers.{layer_id}/attn/GroupQueryAttention':
+            print(f"Layer {layer_id}:")
+            print(f"  Q: {node.input[0]}")     # Should be q_proj_extracted
+            print(f"  K: {node.input[1]}")     # Should be k_proj_extracted
+            print(f"  V: {node.input[2]}")     # Should be v_proj_extracted
+            break
+```
+
+Expected pattern for fixed model:
+```
+Layer 0:
+  Q: /model/layers.0/attn/q_proj_extracted/output_0
+  K: /model/layers.0/attn/k_proj_extracted/output_0
+  V: /model/layers.0/attn/v_proj_extracted/output_0
+```
+
+The script's `--verify` flag does this automatically:
+```bash
+python fix_onnx_model.py model.onnx --verify
+```
+
+## Usage Example
+
+### DeepSeek-R1-Distill-Qwen-1.5B
+
+```bash
+cd C:\path\to\deepseek\model
+python fix_onnx_model.py model/model.onnx
+```
+
+### Llama-3.2-1B  
+
+```bash
+cd C:\path\to\llama\model
+python fix_onnx_model.py model/model.onnx
+```
+
+Both commands auto-detect layers and dimensions automatically. After the fix, your inference notebooks should work without shape inference errors:
+
+```python
+import onnxruntime_genai as og
+
+# Model now loads successfully
+model = og.Model('./model')
+tokenizer = og.Tokenizer(model)
+
+# Inference works correctly
+generator = og.Generator(model, params)
+```
+
+## Detecting This Issue
+
+If your WebGPU-converted model fails with shape inference errors, you can check if it has this issue:
+
+```python
+import onnx
+
+model = onnx.load('model.onnx', load_external_data=False)
+
+print("=== Checking for QKV cross-layer references ===")
+affected_layers = []
+
+for i in range(64):
+    gqa_node = None
+    for node in model.graph.node:
+        if node.name == f'/model/layers.{i}/attn/GroupQueryAttention':
+            gqa_node = node
+            break
+    
+    if not gqa_node:
+        continue
+    
+    has_qkv = any(f'layers.{i}/attn' in n.name and 'qkv_proj' in n.name 
+                  for n in model.graph.node)
+    
+    if has_qkv:
+        # Check if K/V come from different layers
+        k_input = gqa_node.input[1]
+        v_input = gqa_node.input[2]
+        
+        if f'layers.{i}' not in k_input or f'layers.{i}' not in v_input:
+            print(f"  ✗ Layer {i}: Cross-layer reference detected")
+            affected_layers.append(i)
+
+if affected_layers:
+    print(f"\nFix required for layers: {affected_layers}")
+else:
+    print("\nNo cross-layer references detected - model may not need fixing")
+```
+
+Typical output for affected models:
+```
+✗ Layer 2: Cross-layer reference detected
+✗ Layer 5: Cross-layer reference detected
+✗ Layer 6: Cross-layer reference detected
+...
+Fix required for layers: [2, 5, 6, 8, 10, 13]
+```
+
+## Troubleshooting
+
+| Error | Solution |
+|-------|----------|
+| `Unrecognized attribute: axes for operator Slice` | Ensure `axes` is passed as an input tensor, not an attribute (automatic in script) |
+| `Type (tensor(float)) does not match expected type (tensor(float16))` | Verify all new tensors use correct data type - script auto-detects this |
+| `Incompatible dimensions for matrix multiplication` | Confirm Slice indices match your model's dimensions (script auto-detects) |
+| Model still fails after fix | Run with `--verify` flag to check all layers were processed correctly |
+| Auto-detection doesn't work | Provide explicit config with `--config` flag |
+
+## Supported Models
+
+This fix has been tested on:
+- ✅ DeepSeek-R1-Distill-Qwen-1.5B
+- ✅ Llama-3.2-1B-Instruct
+- ✅ Other WebGPU-converted models with similar cross-layer QKV issues
+
+If you test this on other models, please note that auto-detection handles most cases. For models with non-standard structures, use the config file approach.
+
+## References
+
+- ONNX Slice operator: https://onnx.ai/onnx/operators/onnx__Slice.html
+- ONNX spec: https://onnx.ai/onnx/
+- DeepSeek-R1 Model: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+- Llama-3.2 Model: https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct
+- WebGPU ONNX Runtime: https://onnxruntime.ai/docs/execution-providers/web-gpu-execution-provider.html
+- ONNX Runtime GenAI: https://github.com/microsoft/onnxruntime-genai
diff --git a/.aitk/docs/others/fix_onnx_model.py b/.aitk/docs/others/fix_onnx_model.py
new file mode 100644
index 000000000..9559e6c9b
--- /dev/null
+++ b/.aitk/docs/others/fix_onnx_model.py
@@ -0,0 +1,414 @@
+"""
+Generic ONNX Model WebGPU Fix for Combined QKV Projection Issues
+
+PROBLEM SUMMARY:
+================
+WebGPU-converted ONNX models with combined qkv_proj structures exhibit a critical 
+architecture mismatch:
+
+1. GroupQueryAttention nodes use K, V projections from PREVIOUS layers instead of 
+   the same layer
+2. GroupQueryAttention Q input receives the full combined qkv_proj output instead 
+   of just the Q portion
+3. This causes dimension mismatch: o_proj expects specific K dimension but receives 
+   mismatched output from GroupQueryAttention
+
+EXAMPLES:
+- DeepSeek-R1-Distill-Qwen-1.5B: qkv_proj=2048, Q=1536, K=256, V=256
+- Llama-3.2-1B: qkv_proj=3072, Q=2048, K=512, V=512
+
+SOLUTION OVERVIEW:
+==================
+For each affected layer, we:
+1. Extract Q from qkv_proj[0:Q_dim]
+2. Extract K from qkv_proj[Q_dim:Q_dim+K_dim]
+3. Extract V from qkv_proj[Q_dim+K_dim:total_dim]
+4. Update GroupQueryAttention to use extracted tensors
+5. Ensure all new tensors match model precision
+6. Use proper ONNX Slice syntax (axes as input, not attribute)
+"""
+
+import onnx
+from onnx import helper
+import sys
+import json
+from pathlib import Path
+
+def auto_detect_layers_and_dims(model_path):
+    """
+    Auto-detect which layers have combined qkv_proj and their dimensions.
+    
+    Returns: (layers_to_fix, q_dim, k_dim, v_dim) or (None, None, None, None) if not found
+    """
+    try:
+        model = onnx.load(model_path, load_external_data=False)
+        graph = model.graph
+        
+        layers_to_fix = []
+        qkv_dim = None
+        
+        # Find layers with qkv_proj
+        for i in range(64):
+            has_qkv = False
+            for node in graph.node:
+                if f'layers.{i}/attn' in node.name and 'qkv_proj' in node.name:
+                    has_qkv = True
+                    if qkv_dim is None:
+                        # Get qkv_proj output dimension
+                        for vi in graph.value_info:
+                            if f'layers.{i}/attn/qkv_proj' in vi.name and 'output' in vi.name:
+                                dims = vi.type.tensor_type.shape.dim
+                                qkv_dim = dims[-1].dim_value
+            
+            if has_qkv:
+                layers_to_fix.append(i)
+        
+        if not layers_to_fix or qkv_dim is None:
+            return None, None, None, None
+        
+        # Get o_proj K dimension to infer Q_dim
+        o_proj_k = None
+        for i in layers_to_fix:
+            for node in graph.node:
+                if node.name == f'/model/layers.{i}/attn/o_proj/MatMulNBits':
+                    for attr in node.attribute:
+                        if attr.type == 2 and attr.name == 'K':
+                            o_proj_k = attr.i
+                    break
+            if o_proj_k:
+                break
+        
+        if qkv_dim and o_proj_k:
+            q_dim = o_proj_k
+            remaining = qkv_dim - q_dim
+            k_dim = remaining // 2
+            v_dim = remaining - k_dim
+            return layers_to_fix, q_dim, k_dim, v_dim
+        
+        return None, None, None, None
+    except Exception:
+        return None, None, None, None
+
+
+def fix_webgpu_qkv_model(model_path, layers_to_fix=None, q_dim=None, k_dim=None, v_dim=None, auto_detect=True):
+    """
+    Generic fix for WebGPU ONNX models with combined qkv_proj dimension mismatch.
+    
+    Parameters:
+    -----------
+    model_path : str
+        Path to the ONNX model file
+    layers_to_fix : list
+        Layer IDs to fix (auto-detected if None)
+    q_dim : int
+        Query dimension (auto-detected if None)
+    k_dim : int
+        Key dimension (auto-detected if None)
+    v_dim : int
+        Value dimension (auto-detected if None)
+    auto_detect : bool
+        If True, auto-detect layers and dimensions (overrides manual params)
+    
+    Returns:
+    --------
+    bool : True if successful, False otherwise
+    """
+    
+    print("=" * 70)
+    print("Generic WebGPU ONNX QKV Model Fixer")
+    print("=" * 70)
+    
+    try:
+        # Load model
+        print(f"\n[1/4] Loading model from {model_path}...")
+        model = onnx.load(model_path, load_external_data=False)
+        graph = model.graph
+        print(f"  ✓ Model loaded successfully")
+        print(f"  - IR Version: {model.ir_version}")
+        print(f"  - Opset: {model.opset_import[0].version if model.opset_import else 'unknown'}")
+        
+        # Auto-detect if enabled
+        if auto_detect:
+            print(f"\n[2/4] Auto-detecting layers and dimensions...")
+            det_layers, det_q, det_k, det_v = auto_detect_layers_and_dims(model_path)
+            if det_layers:
+                layers_to_fix = det_layers
+                q_dim = det_q
+                k_dim = det_k
+                v_dim = det_v
+                print(f"  ✓ Detected layers: {layers_to_fix}")
+                print(f"  ✓ Detected dimensions: Q={q_dim}, K={k_dim}, V={v_dim}")
+        
+        if not layers_to_fix or not q_dim or not k_dim or not v_dim:
+            print(f"  ✗ Failed to detect or specify layers and dimensions")
+            return False
+        
+        total_dim = q_dim + k_dim + v_dim
+        print(f"\n[3/4] Setting up Slice operations...")
+        print(f"  • Total QKV dim: {total_dim} = {q_dim} + {k_dim} + {v_dim}")
+        
+        # Create required constants for Slice operations
+        constants = {
+            'const_0': 0,
+            f'const_{q_dim}': q_dim,
+            f'const_{q_dim + k_dim}': q_dim + k_dim,
+            f'const_{total_dim}': total_dim,
+            'const_axes_2': [2]
+        }
+        
+        # Add constants to graph
+        for const_name, const_value in constants.items():
+            if not any(init.name == const_name for init in graph.initializer):
+                if const_name == 'const_axes_2':
+                    tensor = helper.make_tensor(const_name, onnx.TensorProto.INT64, [1], const_value)
+                else:
+                    tensor = helper.make_tensor(const_name, onnx.TensorProto.INT64, [1], [const_value])
+                graph.initializer.append(tensor)
+        
+        # Fix each layer
+        slices_added = 0
+        for layer_id in layers_to_fix:
+            # Auto-detect qkv_proj output node (could be Add or MatMulNBits)
+            qkv_output = None
+            for node in graph.node:
+                if node.name == f'/model/layers.{layer_id}/attn/qkv_proj/Add':
+                    qkv_output = f'/model/layers.{layer_id}/attn/qkv_proj/Add/output_0'
+                    break
+            
+            if not qkv_output:
+                # Fall back to MatMulNBits if no Add node
+                for node in graph.node:
+                    if node.name == f'/model/layers.{layer_id}/attn/qkv_proj/MatMulNBits':
+                        qkv_output = f'/model/layers.{layer_id}/attn/qkv_proj/MatMulNBits/output_0'
+                        break
+            
+            if not qkv_output:
+                print(f"  ✗ Could not find qkv_proj output for layer {layer_id}")
+                return False
+            
+            # Find data type from qkv_proj output
+            dtype = onnx.TensorProto.FLOAT16
+            for vi in graph.value_info:
+                if f'layers.{layer_id}/attn/qkv_proj' in vi.name and 'output' in vi.name:
+                    dtype = vi.type.tensor_type.elem_type
+                    break
+            
+            # Q extraction: [0:q_dim]
+            slice_q = helper.make_node(
+                'Slice',
+                inputs=[qkv_output, 
+                       'const_0', f'const_{q_dim}', 'const_axes_2'],
+                outputs=[f'/model/layers.{layer_id}/attn/q_proj_extracted/output_0'],
+                name=f'/model/layers.{layer_id}/attn/q_proj_extracted/Slice'
+            )
+            
+            # K extraction: [q_dim:q_dim+k_dim]
+            slice_k = helper.make_node(
+                'Slice',
+                inputs=[qkv_output, 
+                       f'const_{q_dim}', f'const_{q_dim + k_dim}', 'const_axes_2'],
+                outputs=[f'/model/layers.{layer_id}/attn/k_proj_extracted/output_0'],
+                name=f'/model/layers.{layer_id}/attn/k_proj_extracted/Slice'
+            )
+            
+            # V extraction: [q_dim+k_dim:total]
+            slice_v = helper.make_node(
+                'Slice',
+                inputs=[qkv_output, 
+                       f'const_{q_dim + k_dim}', f'const_{total_dim}', 'const_axes_2'],
+                outputs=[f'/model/layers.{layer_id}/attn/v_proj_extracted/output_0'],
+                name=f'/model/layers.{layer_id}/attn/v_proj_extracted/Slice'
+            )
+            
+            graph.node.extend([slice_q, slice_k, slice_v])
+            slices_added += 3
+            
+            # Add value_info for extracted tensors
+            q_info = helper.make_tensor_value_info(
+                f'/model/layers.{layer_id}/attn/q_proj_extracted/output_0',
+                dtype,
+                ['batch_size', 'sequence_length', q_dim]
+            )
+            k_info = helper.make_tensor_value_info(
+                f'/model/layers.{layer_id}/attn/k_proj_extracted/output_0',
+                dtype,
+                ['batch_size', 'sequence_length', k_dim]
+            )
+            v_info = helper.make_tensor_value_info(
+                f'/model/layers.{layer_id}/attn/v_proj_extracted/output_0',
+                dtype,
+                ['batch_size', 'sequence_length', v_dim]
+            )
+            graph.value_info.extend([q_info, k_info, v_info])
+            
+            # Update GroupQueryAttention inputs
+            for node in graph.node:
+                if node.name == f'/model/layers.{layer_id}/attn/GroupQueryAttention':
+                    node.input[0] = f'/model/layers.{layer_id}/attn/q_proj_extracted/output_0'
+                    node.input[1] = f'/model/layers.{layer_id}/attn/k_proj_extracted/output_0'
+                    node.input[2] = f'/model/layers.{layer_id}/attn/v_proj_extracted/output_0'
+                    break
+        
+        print(f"  ✓ Added {slices_added} Slice nodes across {len(layers_to_fix)} layers")
+        print(f"  ✓ Updated {len(layers_to_fix)} GroupQueryAttention nodes")
+        
+        # Save fixed model
+        print(f"\n[4/4] Saving fixed model...")
+        onnx.save(model, model_path)
+        print(f"  ✓ Model saved successfully")
+        
+        print("\n" + "=" * 70)
+        print("FIX COMPLETED SUCCESSFULLY!")
+        print("=" * 70)
+        print("\nSummary of Changes:")
+        print(f"  • Fixed {len(layers_to_fix)} layers: {layers_to_fix}")
+        print(f"  • QKV dimensions: Q={q_dim}, K={k_dim}, V={v_dim}")
+        print(f"  • Added {slices_added} Slice nodes for Q/K/V extraction")
+        print(f"  • Corrected GroupQueryAttention layer cross-references")
+        print(f"  • Ensured precision consistency for all new tensors")
+        print(f"  • Updated Slice syntax for ONNX opset 21 compatibility")
+        
+        return True
+        
+    except Exception as e:
+        print(f"\n❌ ERROR: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+
+def verify_fix(model_path, verbose=False, layers_to_fix=None):
+    """
+    Verify that the fix was applied correctly.
+    
+    Parameters:
+    -----------
+    model_path : str
+        Path to the fixed ONNX model
+    verbose : bool
+        Print detailed information
+    layers_to_fix : list
+        Specific layers to verify (auto-detected if None)
+    
+    Returns:
+    --------
+    bool : True if fix is verified, False otherwise
+    """
+    
+    print("\nVerifying model fix...")
+    
+    try:
+        model = onnx.load(model_path, load_external_data=False)
+        graph = model.graph
+        
+        # Auto-detect layers if not provided
+        if layers_to_fix is None:
+            det_result = auto_detect_layers_and_dims(model_path)
+            if det_result and det_result[0]:
+                layers_to_fix = det_result[0]
+            else:
+                print("  ✗ No layers detected - model may not need fixing or has unknown structure")
+                return False
+        
+        if not layers_to_fix or not isinstance(layers_to_fix, list):
+            print("  ✗ Invalid layers list")
+            return False
+        
+        all_correct = True
+        
+        for layer_id in layers_to_fix:
+            # Check Slice nodes exist
+            slice_nodes = [n for n in graph.node 
+                          if f'layers.{layer_id}' in n.name and 'Slice' in n.name and 'proj_extracted' in n.name]
+            
+            if len(slice_nodes) != 3:
+                print(f"  ✗ Layer {layer_id}: Expected 3 Slice nodes, found {len(slice_nodes)}")
+                all_correct = False
+                continue
+            
+            # Check GroupQueryAttention inputs
+            gqa_node = next((n for n in graph.node 
+                           if n.name == f'/model/layers.{layer_id}/attn/GroupQueryAttention'), None)
+            
+            if not gqa_node:
+                print(f"  ✗ Layer {layer_id}: GroupQueryAttention node not found")
+                all_correct = False
+                continue
+            
+            # Verify inputs point to extracted tensors
+            q_correct = gqa_node.input[0] == f'/model/layers.{layer_id}/attn/q_proj_extracted/output_0'
+            k_correct = gqa_node.input[1] == f'/model/layers.{layer_id}/attn/k_proj_extracted/output_0'
+            v_correct = gqa_node.input[2] == f'/model/layers.{layer_id}/attn/v_proj_extracted/output_0'
+            
+            if q_correct and k_correct and v_correct:
+                if verbose:
+                    print(f"  ✓ Layer {layer_id}: All checks passed")
+            else:
+                print(f"  ✗ Layer {layer_id}: GroupQueryAttention inputs incorrect")
+                all_correct = False
+        
+        if all_correct:
+            print("  ✓ All verifications passed!")
+        
+        return all_correct
+        
+    except Exception as e:
+        print(f"  ✗ Verification failed: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+
+if __name__ == "__main__":
+    # Usage:
+    # python fix_onnx_model.py [model_path]                    (auto-detect all)
+    # python fix_onnx_model.py [model_path] --verify            (verify existing fix)
+    # python fix_onnx_model.py [model_path] --config config.json (use config file)
+    
+    model_path = "./model/model.onnx"
+    verify_only = False
+    config_file = None
+    
+    if len(sys.argv) > 1:
+        model_path = sys.argv[1]
+    
+    if "--verify" in sys.argv:
+        verify_only = True
+    
+    if "--config" in sys.argv:
+        idx = sys.argv.index("--config")
+        if idx + 1 < len(sys.argv):
+            config_file = sys.argv[idx + 1]
+    
+    if verify_only:
+        verify_fix(model_path, verbose=True)
+        sys.exit(0)
+    
+    # Load config if provided
+    q_dim = k_dim = v_dim = layers = None
+    if config_file:
+        try:
+            with open(config_file, 'r') as f:
+                config = json.load(f)
+                layers = config.get('layers_to_fix')
+                q_dim = config.get('q_dim')
+                k_dim = config.get('k_dim')
+                v_dim = config.get('v_dim')
+                print(f"Loaded config from {config_file}")
+        except Exception as e:
+            print(f"Warning: Failed to load config: {e}")
+    
+    success = fix_webgpu_qkv_model(
+        model_path,
+        layers_to_fix=layers,
+        q_dim=q_dim,
+        k_dim=k_dim,
+        v_dim=v_dim,
+        auto_detect=True  # Always auto-detect if values not provided
+    )
+    
+    if success:
+        verify_fix(model_path, verbose=True)
+        sys.exit(0)
+    else:
+        sys.exit(1)

From 671c4284ab13baf6d717ddc3c764762b54de06d4 Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Wed, 13 May 2026 16:18:03 +0800
Subject: [PATCH 16/17] merge fix

---
 .aitk/configs/checks.json                            | 12 +++++++-----
 .aitk/docs/guide/ModelList.md                        |  2 +-
 Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml             |  2 --
 .../aitk/info.yml                                    |  2 --
 meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml       |  2 --
 microsoft-Phi-3.5-mini-instruct/aitk/info.yml        |  2 --
 6 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json
index 3bbc46767..b72e0388a 100644
--- a/.aitk/configs/checks.json
+++ b/.aitk/configs/checks.json
@@ -1,6 +1,7 @@
 {
-    "configCheck": 178,
-    "copyCheck": 189,
+    "configCheck": 180,
+    "copyCheck": 190,
+    "executeRuntimeCheck": 115,
     "extensionCheck": 2,
     "gitignoreCheck": 44,
     "inferenceModelCheck": 25,
@@ -8,9 +9,10 @@
     "licenseCheck": 41,
     "modelProjectCheck": 46,
     "oliveCheck": 88,
-    "oliveJsonCheck": 178,
-    "pathCheck": 1464,
+    "oliveJsonCheck": 180,
+    "pathCheck": 1480,
     "requirementsCheck": 37,
     "templateCheck": 3,
-    "venvRequirementsCheck": 18
+    "venvRequirementsCheck": 22,
+    "winmlCopyCheck": 39
 }
diff --git a/.aitk/docs/guide/ModelList.md b/.aitk/docs/guide/ModelList.md
index fd663b079..762fe6bf9 100644
--- a/.aitk/docs/guide/ModelList.md
+++ b/.aitk/docs/guide/ModelList.md
@@ -42,6 +42,6 @@
 | [Clip Vit Base Patch32](https://huggingface.co/openai/clip-vit-base-patch32) | [Qualcomm NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qnn.json), [Qualcomm GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qnn_gpu.json), [AMD NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch32/aitk/openai_clip_dml.json), [WebGPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json) |
 | [Clip Vit Large Patch14](https://huggingface.co/openai/clip-vit-large-patch14) | [Qualcomm NPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-large-patch14/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-large-patch14/aitk/openai_clip_dml.json) |
 | [Resnet 50](https://huggingface.co/microsoft/resnet-50) | [Qualcomm NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_qnn.json), [Qualcomm GPU](../../../microsoft-resnet-50/aitk/resnet_qnn_gpu.json), [AMD NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_amd.json), [AMD GPU](../../../microsoft-resnet-50/aitk/resnet_migraphx.json), [NVIDIA TensorRT for RTX](../../../microsoft-resnet-50/aitk/resnet_trtrtx.json), [Intel CPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel GPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel NPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [DirectML](../../../microsoft-resnet-50/aitk/resnet_dml.json), [WebGPU](../../../microsoft-resnet-50/aitk/resnet_webgpu.json) |
-| [Stable Diffusion V1 5](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) | [Qualcomm NPU](../../../sd-legacy-stable-diffusion-v1-5/aitk/sd_qnn_workflow.json), [Intel CPU](../../../sd-legacy-stable-diffusion-v1-5/aitk/sd_ov_workflow.json), [Intel GPU](../../../sd-legacy-stable-diffusion-v1-5/aitk/sd_ov_workflow.json) |
+| [Stable Diffusion V1 5](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) | [Qualcomm NPU](../../../sd-legacy-stable-diffusion-v1-5/aitk/sd_qnn_workflow.json), [Intel CPU](../../../sd-legacy-stable-diffusion-v1-5/aitk/sd_ov_workflow.json), [Intel GPU](../../../sd-legacy-stable-diffusion-v1-5/aitk/sd_ov_workflow.json), [Intel NPU](../../../sd-legacy-stable-diffusion-v1-5/aitk/sd_ov_npu_workflow.json) |
 | [Vit Base Patch16 224](https://huggingface.co/google/vit-base-patch16-224) | [Qualcomm NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json), [Qualcomm GPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qnn_gpu.json), [AMD NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json), [AMD GPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_migraphx.json), [NVIDIA TensorRT for RTX](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json), [Intel CPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel GPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel NPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [DirectML](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json), [WebGPU](../../../google-vit-base-patch16-224/aitk/vit_webgpu.json) |
 | [Whisper Large V3 Turbo](https://huggingface.co/openai/whisper-large-v3-turbo) | [Qualcomm NPU](../../../openai-whisper-large-v3-turbo/aitk/qnn_workflow.json) |
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
index ca5a0b4d0..65ae5875c 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
@@ -37,8 +37,6 @@ recipes:
       aitk:
         oliveFile: "QNN/config_gpu.json"
         isGPURequired: true
-        runtimeOverwrite:
-          executeEp: NvTensorRTRTXExecutionProvider
         requirements: General/CUDA_py3.12.9
     - file: "qwen2_5_webgpu.json"
       device: gpu
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
index 770210df1..106acc988 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
@@ -38,8 +38,6 @@ recipes:
         oliveFile: "QNN/config_gpu.json"
         isGPURequired: true
         requirements: General/CUDA_py3.12.9
-        runtimeOverwrite:
-          executeEp: NvTensorRTRTXExecutionProvider
     - file: "deepseek_webgpu.json"
       device: gpu
       ep: WebGpuExecutionProvider
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml
index 0ecef6db7..3a1d18b2f 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml
@@ -37,8 +37,6 @@ recipes:
       aitk:
         oliveFile: "QNN/config_gpu.json"
         isGPURequired: true
-        runtimeOverwrite:
-          executeEp: NvTensorRTRTXExecutionProvider
         requirements: General/CUDA_py3.12.9
     - file: "llama3_2_webgpu.json"
       device: gpu
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml
index 30a27e79b..29217e9ac 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml
@@ -37,8 +37,6 @@ recipes:
       aitk:
         oliveFile: "QNN/config_gpu.json"
         isGPURequired: true
-        runtimeOverwrite:
-          executeEp: NvTensorRTRTXExecutionProvider
         requirements: General/CUDA_py3.12.9
     - file: "phi3_5_webgpu.json"
       device: gpu

From b3490208d5cf333b7ef74a219765564fde950461 Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Fri, 15 May 2026 11:46:20 +0800
Subject: [PATCH 17/17] use k_quant_mixed

---
 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json        | 7 +------
 .../aitk/deepseek_webgpu.json                              | 7 +------
 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json | 7 +------
 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json    | 7 +------
 4 files changed, 4 insertions(+), 24 deletions(-)

diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json
index f574e4b04..a9d1937d0 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json
@@ -22,12 +22,7 @@
     "passes": {
         "s": {
             "type": "SelectiveMixedPrecision",
-            "algorithm": "kld_gradient",
-            "bits": 4,
-            "high_bits": 8,
-            "ratio": 0.65,
-            "sym": false,
-            "group_size": 32
+            "algorithm": "k_quant_mixed"
         },
         "g": {
             "type": "gptq",
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json
index a54f42c81..314a606a0 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json
@@ -22,12 +22,7 @@
     "passes": {
         "s": {
             "type": "SelectiveMixedPrecision",
-            "algorithm": "kld_gradient",
-            "bits": 4,
-            "high_bits": 8,
-            "ratio": 0.65,
-            "sym": false,
-            "group_size": 32
+            "algorithm": "k_quant_mixed"
         },
         "g": {
             "type": "gptq",
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json
index 67572ad6d..8ee4392e5 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json
@@ -22,12 +22,7 @@
     "passes": {
         "s": {
             "type": "SelectiveMixedPrecision",
-            "algorithm": "kld_gradient",
-            "bits": 4,
-            "high_bits": 8,
-            "ratio": 0.65,
-            "sym": false,
-            "group_size": 32
+            "algorithm": "k_quant_mixed"
         },
         "g": {
             "type": "gptq",
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json
index f71026229..12c617ab4 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json
@@ -22,12 +22,7 @@
     "passes": {
         "s": {
             "type": "SelectiveMixedPrecision",
-            "algorithm": "kld_gradient",
-            "bits": 4,
-            "high_bits": 8,
-            "ratio": 0.65,
-            "sym": false,
-            "group_size": 32
+            "algorithm": "k_quant_mixed"
         },
         "g": {
             "type": "gptq",