diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json
index 156c69154..b72e0388a 100644
--- a/.aitk/configs/checks.json
+++ b/.aitk/configs/checks.json
@@ -1,7 +1,7 @@
 {
-    "configCheck": 169,
-    "copyCheck": 183,
-    "executeRuntimeCheck": 104,
+    "configCheck": 180,
+    "copyCheck": 190,
+    "executeRuntimeCheck": 115,
     "extensionCheck": 2,
     "gitignoreCheck": 44,
     "inferenceModelCheck": 25,
@@ -9,10 +9,10 @@
     "licenseCheck": 41,
     "modelProjectCheck": 46,
     "oliveCheck": 88,
-    "oliveJsonCheck": 169,
-    "pathCheck": 1439,
+    "oliveJsonCheck": 180,
+    "pathCheck": 1480,
     "requirementsCheck": 37,
     "templateCheck": 3,
-    "venvRequirementsCheck": 21,
+    "venvRequirementsCheck": 22,
     "winmlCopyCheck": 39
 }
diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json
index 857bbfe9b..dfb098022 100644
--- a/.aitk/configs/model_list.json
+++ b/.aitk/configs/model_list.json
@@ -13,7 +13,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -38,7 +39,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "CNN",
             "status": "Ready",
@@ -64,7 +66,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -89,7 +92,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -114,7 +118,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -139,7 +144,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -164,7 +170,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -188,7 +195,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -213,7 +221,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -238,7 +247,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -264,7 +274,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
diff --git a/.aitk/docs/guide/ModelList.md b/.aitk/docs/guide/ModelList.md
index 3fbd653da..762fe6bf9 100644
--- a/.aitk/docs/guide/ModelList.md
+++ b/.aitk/docs/guide/ModelList.md
@@ -5,23 +5,23 @@
 | Model Name | Supported Runtimes |
 |------------|--------------------|
 | [Deepseek R1 Distill Llama 8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) | [Intel CPU](../../../deepseek-ai-DeepSeek-R1-Distill-Llama-8B/aitk/deepseek_ov_config.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Llama-8B/aitk/deepseek_ov_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Llama-8B/aitk/deepseek_ov_npu_config.json) |
-| [Deepseek R1 Distill Qwen 1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | [Qualcomm NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json), [Qualcomm GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_gpu_config.json), [AMD NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json), [Intel CPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json), [DirectML](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json) |
+| [Deepseek R1 Distill Qwen 1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | [Qualcomm NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json), [Qualcomm GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_gpu_config.json), [AMD NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json), [Intel CPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json), [DirectML](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json), [WebGPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json) |
 | [Deepseek R1 Distill Qwen 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B) | [NVIDIA TensorRT for RTX](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-14B/aitk/deepseek_trtrtx.json), [Intel CPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-14B/aitk/deepseek_ov_config.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-14B/aitk/deepseek_ov_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-14B/aitk/deepseek_ov_npu_config.json) |
 | [Deepseek R1 Distill Qwen 7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | [AMD NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_trtrtx.json), [Intel CPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_ov_config.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_ov_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_ov_npu_config.json) |
 | [Llama 3.1 8B Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) | [Qualcomm NPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_qnn_config.json), [AMD NPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_trtrtx_config.json), [Intel CPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_ov_gpu_config.json), [Intel GPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_ov_gpu_config.json), [Intel NPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_ov_config.json), [DirectML](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_dml_config.json) |
-| [Llama 3.2 1B Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) | [Qualcomm NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json), [Qualcomm GPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_gpu_config.json), [AMD NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json), [Intel CPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_gpu_config.json), [Intel GPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_gpu_config.json), [Intel NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json), [DirectML](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_dml_config.json) |
+| [Llama 3.2 1B Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) | [Qualcomm NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json), [Qualcomm GPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_gpu_config.json), [AMD NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json), [Intel CPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_gpu_config.json), [Intel GPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_gpu_config.json), [Intel NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json), [DirectML](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_dml_config.json), [WebGPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json) |
 | [Mistral 7B Instruct V0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) | [AMD NPU](../../../mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_trtrtx.json), [Intel CPU](../../../mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_gpu_context_ov_dy.json), [Intel GPU](../../../mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_gpu_context_ov_dy.json), [Intel NPU](../../../mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_npu_context_ov_dy.json) |
 | [Mistral 7B Instruct V0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) | [Intel CPU](../../../mistralai-Mistral-7B-Instruct-v0.3/aitk/mistral-7b-instruct-v0.3-ov.json), [Intel GPU](../../../mistralai-Mistral-7B-Instruct-v0.3/aitk/mistral-7b-instruct-v0.3-ov.json) |
 | [Phi 3 Mini 128K Instruct](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) | [Qualcomm NPU](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_qnn.json), [AMD NPU](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_trtrtx.json), [Intel CPU](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_ov_config.json), [Intel GPU](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_ov_config.json), [Intel NPU](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_ov_npu_config.json) |
 | [Phi 3 Mini 4K Instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) | [Qualcomm NPU](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_qnn.json), [AMD NPU](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_trtrtx.json), [Intel CPU](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_ov_config.json), [Intel GPU](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_ov_config.json), [Intel NPU](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_ov_npu_config.json) |
-| [Phi 3.5 Mini Instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) | [Qualcomm NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json), [Qualcomm GPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_gpu_config.json), [AMD NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json), [Intel CPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel GPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_config.json), [DirectML](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_dml_config.json) |
+| [Phi 3.5 Mini Instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) | [Qualcomm NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json), [Qualcomm GPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_gpu_config.json), [AMD NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json), [Intel CPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel GPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_config.json), [DirectML](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_dml_config.json), [WebGPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json) |
 | [Phi 4](https://huggingface.co/microsoft/Phi-4) | [NVIDIA TensorRT for RTX](../../../microsoft-Phi-4/aitk/phi4_trtrtx.json), [Intel CPU](../../../microsoft-Phi-4/aitk/phi4_ov_config.json), [Intel GPU](../../../microsoft-Phi-4/aitk/phi4_ov_config.json) |
 | [Phi 4 Mini Instruct](https://huggingface.co/microsoft/Phi-4-mini-instruct) | [Qualcomm NPU](../../../microsoft-Phi-4-mini-instruct/aitk/phi4_qnn.json), [AMD NPU](../../../microsoft-Phi-4-mini-instruct/aitk/phi4_vitis_ai_config.json), [Intel CPU](../../../microsoft-Phi-4-mini-instruct/aitk/phi4_ov_config.json), [Intel GPU](../../../microsoft-Phi-4-mini-instruct/aitk/phi4_ov_config.json), [Intel NPU](../../../microsoft-Phi-4-mini-instruct/aitk/phi4_ov_npu_config.json) |
 | [Phi 4 Mini Reasoning](https://huggingface.co/microsoft/Phi-4-mini-reasoning) | [AMD NPU](../../../microsoft-Phi-4-mini-reasoning/aitk/phi4_vitis_ai_config.json), [Intel CPU](../../../microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_gpu_config.json), [Intel GPU](../../../microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_gpu_config.json), [Intel NPU](../../../microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_config.json) |
 | [Phi 4 Reasoning](https://huggingface.co/microsoft/Phi-4-reasoning) | [Intel NPU](../../../microsoft-Phi-4-reasoning/aitk/phi4_ov_config.json) |
 | [Phi 4 Reasoning Plus](https://huggingface.co/microsoft/Phi-4-reasoning-plus) | [Intel NPU](../../../microsoft-Phi-4-reasoning-plus/aitk/phi4_ov_config.json) |
 | [Qwen2.5 0.5B Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) | [AMD NPU](../../../Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_trtrtx.json), [Intel CPU](../../../Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_ov_config.json), [Intel GPU](../../../Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_ov_config.json), [Intel NPU](../../../Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_ov_npu_config.json) |
-| [Qwen2.5 1.5B Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) | [Qualcomm NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json), [Qualcomm GPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_gpu_config.json), [AMD NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json), [Intel CPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel GPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_config.json), [DirectML](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_dml_config.json) |
+| [Qwen2.5 1.5B Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) | [Qualcomm NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json), [Qualcomm GPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_gpu_config.json), [AMD NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json), [Intel CPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel GPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_config.json), [DirectML](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_dml_config.json), [WebGPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json) |
 | [Qwen2.5 14B Instruct](https://huggingface.co/Qwen/Qwen2.5-14B-Instruct) | [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-14B-Instruct/aitk/qwen2_5_trtrtx.json), [Intel CPU](../../../Qwen-Qwen2.5-14B-Instruct/aitk/qwen2_5_ov_config.json), [Intel GPU](../../../Qwen-Qwen2.5-14B-Instruct/aitk/qwen2_5_ov_config.json), [Intel NPU](../../../Qwen-Qwen2.5-14B-Instruct/aitk/qwen2_5_ov_npu_config.json) |
 | [Qwen2.5 3B Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct) | [Intel CPU](../../../Qwen-Qwen2.5-3B-Instruct/aitk/qwen2_5_ov_config.json), [Intel GPU](../../../Qwen-Qwen2.5-3B-Instruct/aitk/qwen2_5_ov_config.json), [Intel NPU](../../../Qwen-Qwen2.5-3B-Instruct/aitk/qwen2_5_ov_npu_config.json) |
 | [Qwen2.5 7B Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) | [Qualcomm NPU](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_qnn_config.json), [AMD NPU](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_trtrtx.json), [Intel CPU](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_ov_config.json), [Intel GPU](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_ov_config.json), [Intel NPU](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_ov_npu_config.json) |
@@ -34,14 +34,14 @@
 
 | Model Name | Supported Runtimes |
 |------------|--------------------|
-| [Bert Base Multilingual Cased](https://huggingface.co/google-bert/bert-base-multilingual-cased) | [Qualcomm NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_qnn.json), [Qualcomm GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qnn_gpu.json), [AMD NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_amd.json), [AMD GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_migraphx.json), [NVIDIA TensorRT for RTX](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_trtrtx.json), [Intel CPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [Intel GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [Intel NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [DirectML](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_dml.json) |
-| [Bert Base Uncased Mrpc](https://huggingface.co/Intel/bert-base-uncased-mrpc) | [Qualcomm NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qdq_qnn.json), [Qualcomm GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qnn_gpu.json), [AMD NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qdq_amd.json), [AMD GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_migraphx.json), [NVIDIA TensorRT for RTX](../../../intel-bert-base-uncased-mrpc/aitk/bert_trtrtx.json), [Intel CPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [Intel GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [Intel NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [DirectML](../../../intel-bert-base-uncased-mrpc/aitk/bert_dml.json) |
+| [Bert Base Multilingual Cased](https://huggingface.co/google-bert/bert-base-multilingual-cased) | [Qualcomm NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_qnn.json), [Qualcomm GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qnn_gpu.json), [AMD NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_amd.json), [AMD GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_migraphx.json), [NVIDIA TensorRT for RTX](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_trtrtx.json), [Intel CPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [Intel GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [Intel NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [DirectML](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_dml.json), [WebGPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json) |
+| [Bert Base Uncased Mrpc](https://huggingface.co/Intel/bert-base-uncased-mrpc) | [Qualcomm NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qdq_qnn.json), [Qualcomm GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qnn_gpu.json), [AMD NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qdq_amd.json), [AMD GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_migraphx.json), [NVIDIA TensorRT for RTX](../../../intel-bert-base-uncased-mrpc/aitk/bert_trtrtx.json), [Intel CPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [Intel GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [Intel NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [DirectML](../../../intel-bert-base-uncased-mrpc/aitk/bert_dml.json), [WebGPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json) |
 | [Chinese Clip Vit Base Patch16](https://huggingface.co/OFA-Sys/chinese-clip-vit-base-patch16) | [Intel CPU](../../../OFA-Sys-chinese-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel GPU](../../../OFA-Sys-chinese-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel NPU](../../../OFA-Sys-chinese-clip-vit-base-patch16/aitk/openai_clip_ov.json) |
-| [Clip Vit B 32 Laion2B S34B B79K](https://huggingface.co/laion/CLIP-ViT-B-32-laion2B-s34B-b79K) | [Qualcomm NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qnn.json), [Qualcomm GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qnn_gpu.json), [AMD NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd.json), [AMD GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx.json), [Intel CPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [Intel GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [Intel NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [DirectML](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml.json) |
-| [Clip Vit Base Patch16](https://huggingface.co/openai/clip-vit-base-patch16) | [Qualcomm NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qnn.json), [Qualcomm GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qnn_gpu.json), [AMD NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch16/aitk/openai_clip_dml.json) |
-| [Clip Vit Base Patch32](https://huggingface.co/openai/clip-vit-base-patch32) | [Qualcomm NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qnn.json), [Qualcomm GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qnn_gpu.json), [AMD NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch32/aitk/openai_clip_dml.json) |
+| [Clip Vit B 32 Laion2B S34B B79K](https://huggingface.co/laion/CLIP-ViT-B-32-laion2B-s34B-b79K) | [Qualcomm NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qnn.json), [Qualcomm GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qnn_gpu.json), [AMD NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd.json), [AMD GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx.json), [Intel CPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [Intel GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [Intel NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [DirectML](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml.json), [WebGPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json) |
+| [Clip Vit Base Patch16](https://huggingface.co/openai/clip-vit-base-patch16) | [Qualcomm NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qnn.json), [Qualcomm GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qnn_gpu.json), [AMD NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch16/aitk/openai_clip_dml.json), [WebGPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json) |
+| [Clip Vit Base Patch32](https://huggingface.co/openai/clip-vit-base-patch32) | [Qualcomm NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qnn.json), [Qualcomm GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qnn_gpu.json), [AMD NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch32/aitk/openai_clip_dml.json), [WebGPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json) |
 | [Clip Vit Large Patch14](https://huggingface.co/openai/clip-vit-large-patch14) | [Qualcomm NPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-large-patch14/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-large-patch14/aitk/openai_clip_dml.json) |
-| [Resnet 50](https://huggingface.co/microsoft/resnet-50) | [Qualcomm NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_qnn.json), [Qualcomm GPU](../../../microsoft-resnet-50/aitk/resnet_qnn_gpu.json), [AMD NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_amd.json), [AMD GPU](../../../microsoft-resnet-50/aitk/resnet_migraphx.json), [NVIDIA TensorRT for RTX](../../../microsoft-resnet-50/aitk/resnet_trtrtx.json), [Intel CPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel GPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel NPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [DirectML](../../../microsoft-resnet-50/aitk/resnet_dml.json) |
+| [Resnet 50](https://huggingface.co/microsoft/resnet-50) | [Qualcomm NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_qnn.json), [Qualcomm GPU](../../../microsoft-resnet-50/aitk/resnet_qnn_gpu.json), [AMD NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_amd.json), [AMD GPU](../../../microsoft-resnet-50/aitk/resnet_migraphx.json), [NVIDIA TensorRT for RTX](../../../microsoft-resnet-50/aitk/resnet_trtrtx.json), [Intel CPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel GPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel NPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [DirectML](../../../microsoft-resnet-50/aitk/resnet_dml.json), [WebGPU](../../../microsoft-resnet-50/aitk/resnet_webgpu.json) |
 | [Stable Diffusion V1 5](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) | [Qualcomm NPU](../../../sd-legacy-stable-diffusion-v1-5/aitk/sd_qnn_workflow.json), [Intel CPU](../../../sd-legacy-stable-diffusion-v1-5/aitk/sd_ov_workflow.json), [Intel GPU](../../../sd-legacy-stable-diffusion-v1-5/aitk/sd_ov_workflow.json), [Intel NPU](../../../sd-legacy-stable-diffusion-v1-5/aitk/sd_ov_npu_workflow.json) |
-| [Vit Base Patch16 224](https://huggingface.co/google/vit-base-patch16-224) | [Qualcomm NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json), [Qualcomm GPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qnn_gpu.json), [AMD NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json), [AMD GPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_migraphx.json), [NVIDIA TensorRT for RTX](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json), [Intel CPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel GPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel NPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [DirectML](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json) |
+| [Vit Base Patch16 224](https://huggingface.co/google/vit-base-patch16-224) | [Qualcomm NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json), [Qualcomm GPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qnn_gpu.json), [AMD NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json), [AMD GPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_migraphx.json), [NVIDIA TensorRT for RTX](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json), [Intel CPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel GPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel NPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [DirectML](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json), [WebGPU](../../../google-vit-base-patch16-224/aitk/vit_webgpu.json) |
 | [Whisper Large V3 Turbo](https://huggingface.co/openai/whisper-large-v3-turbo) | [Qualcomm NPU](../../../openai-whisper-large-v3-turbo/aitk/qnn_workflow.json) |
diff --git a/.aitk/docs/others/FIX_GUIDE.md b/.aitk/docs/others/FIX_GUIDE.md
new file mode 100644
index 000000000..f69f24b17
--- /dev/null
+++ b/.aitk/docs/others/FIX_GUIDE.md
@@ -0,0 +1,330 @@
+# Generic WebGPU ONNX Model QKV Fix Guide
+
+## Problem Description
+
+WebGPU-converted ONNX models (DeepSeek, Llama, and others) with combined qkv_proj structures develop a critical dimension mismatch error in specific layers:
+
+```
+Node (/model/layers.X/attn/o_proj/MatMulNBits) Op (MatMulNBits) 
+[ShapeInferenceError] Incompatible dimensions for matrix multiplication
+```
+
+### Root Cause
+
+These layers have a **combined qkv_proj** structure (Q, K, V packed into one output), but the GroupQueryAttention operation was misconfigured:
+
+| Issue | Problem |
+|-------|---------|
+| **Q input** | Receiving full 2048-dim qkv output instead of just Q (1536 dims) |
+| **K input** | Using K from previous layer instead of current layer (256 dims from wrong source) |
+| **V input** | Using V from previous layer instead of current layer (256 dims from wrong source) |
+| **Result** | GroupQueryAttention produces mismatched output → o_proj fails |
+
+### Layer Structure
+
+Different models have this issue in different layers:
+
+| Model | Layers with combined qkv_proj | Total QKV | Q | K | V |
+|-------|-------------------------------|-----------|---|---|---|
+| DeepSeek-R1-Distill-Qwen-1.5B | 0, 6, 8, 12, 25, 26, 27 | 2048 | 1536 | 256 | 256 |
+| Llama-3.2-1B | 2, 5, 6, 8, 10, 13 | 3072 | 2048 | 512 | 512 |
+
+The `fix_onnx_model.py` script auto-detects this information automatically.
+
+## Solution
+
+For each affected layer, extract Q, K, V from the combined qkv_proj using Slice operations:
+
+```
+qkv_proj output (total_qkv dims):
+  [0:q_dim]                → Q dimensions
+  [q_dim:q_dim+k_dim]      → K dimensions  
+  [q_dim+k_dim:total_qkv]  → V dimensions
+
+GroupQueryAttention uses extracted Q, K, V → output matches o_proj expectations
+```
+
+**Example dimensions:**
+- **DeepSeek:** [0:1536] Q, [1536:1792] K, [1792:2048] V
+- **Llama:** [0:2048] Q, [2048:2560] K, [2560:3072] V
+
+## Implementation
+
+### Quick Start (Auto-Detect)
+
+The script automatically detects affected layers and dimensions:
+
+```bash
+# From the model directory
+cd ./model
+
+# Run the fix (auto-detects everything)
+python ../fix_onnx_model.py model.onnx
+
+# Verify the fix
+python ../fix_onnx_model.py model.onnx --verify
+```
+
+### Using Configuration File
+
+For reproducibility or multiple models, create a `config.json`:
+
+```json
+{
+  "layers_to_fix": [0, 6, 8, 12, 25, 26, 27],
+  "q_dim": 1536,
+  "k_dim": 256,
+  "v_dim": 256
+}
+```
+
+Then run:
+```bash
+python fix_onnx_model.py model.onnx --config config.json
+```
+
+### Examples for Common Models
+
+**DeepSeek-R1-Distill-Qwen-1.5B config.json:**
+```json
+{
+  "layers_to_fix": [0, 6, 8, 12, 25, 26, 27],
+  "q_dim": 1536,
+  "k_dim": 256,
+  "v_dim": 256
+}
+```
+
+**Llama-3.2-1B config.json:**
+```json
+{
+  "layers_to_fix": [2, 5, 6, 8, 10, 13],
+  "q_dim": 2048,
+  "k_dim": 512,
+  "v_dim": 512
+}
+```
+
+### Manual Implementation (Advanced)
+
+If you need to integrate this into your own code:
+
+```python
+from fix_onnx_model import fix_webgpu_qkv_model, verify_fix
+
+# Auto-detect (recommended)
+fix_webgpu_qkv_model('model.onnx')
+
+# Or with explicit parameters
+fix_webgpu_qkv_model(
+    'model.onnx',
+    layers_to_fix=[2, 5, 6, 8, 10, 13],  # Llama layers
+    q_dim=2048,
+    k_dim=512,
+    v_dim=512,
+    auto_detect=False  # Use provided values only
+)
+
+# Verify
+verify_fix('model.onnx', verbose=True)
+```
+
+## Key Technical Details
+
+### ONNX Slice Syntax
+
+The `Slice` operator (opset 21) takes inputs in this order:
+```
+Slice(data, starts, ends, [axes], [steps])
+```
+
+- **data:** Input tensor to slice
+- **starts:** Tensor with starting indices
+- **ends:** Tensor with ending indices  
+- **axes:** Tensor specifying which axes to slice (e.g., [2] for axis 2)
+- **steps:** (optional) Step size for each axis
+
+**Important:** Pass `axes` as an input tensor, NOT as an attribute (common mistake with older ONNX versions).
+
+### Data Type Consistency
+
+All new tensors must be **FLOAT16** to match:
+- Input: `qkv_proj/Add/output_0` (FLOAT16)
+- Output: `GroupQueryAttention/output_0` (FLOAT16)
+- Subsequent layers expect FLOAT16 inputs
+
+### Dimension Breakdown
+
+The exact dimensions depend on your model's architecture:
+
+**DeepSeek-R1-Distill-Qwen-1.5B:**
+- num_heads=12, kv_num_heads=2, head_dim=128
+- Q: 12 × 128 = 1536
+- K: 2 × 128 = 256  
+- V: 2 × 128 = 256
+- Total: 1536 + 256 + 256 = 2048
+
+**Llama-3.2-1B:**
+- num_heads=32, kv_num_heads=8, head_dim=64
+- Q: 32 × 64 = 2048
+- K: 8 × 64 = 512
+- V: 8 × 64 = 512
+- Total: 2048 + 512 + 512 = 3072
+
+To find these for any model:
+```python
+import onnx
+
+model = onnx.load('model.onnx', load_external_data=False)
+for vi in model.graph.value_info:
+    if 'layers.0/attn/qkv_proj' in vi.name and 'output' in vi.name:
+        qkv_dim = vi.type.tensor_type.shape.dim[-1].dim_value
+        print(f"Total QKV dimension: {qkv_dim}")
+        break
+
+for node in model.graph.node:
+    if 'layers.0/attn/o_proj' in node.name:
+        for attr in node.attribute:
+            if attr.name == 'K':
+                print(f"Q dimension (from o_proj K): {attr.i}")
+        break
+```
+
+## Verification
+
+After applying the fix, verify that:
+
+```python
+import onnx
+
+model = onnx.load('model.onnx', load_external_data=False)
+layers_to_check = [0, 6, 8, 12, 25, 26, 27]  # Or your model's layers
+
+for layer_id in layers_to_check:
+    for node in model.graph.node:
+        if node.name == f'/model/layers.{layer_id}/attn/GroupQueryAttention':
+            print(f"Layer {layer_id}:")
+            print(f"  Q: {node.input[0]}")     # Should be q_proj_extracted
+            print(f"  K: {node.input[1]}")     # Should be k_proj_extracted
+            print(f"  V: {node.input[2]}")     # Should be v_proj_extracted
+            break
+```
+
+Expected pattern for fixed model:
+```
+Layer 0:
+  Q: /model/layers.0/attn/q_proj_extracted/output_0
+  K: /model/layers.0/attn/k_proj_extracted/output_0
+  V: /model/layers.0/attn/v_proj_extracted/output_0
+```
+
+The script's `--verify` flag does this automatically:
+```bash
+python fix_onnx_model.py model.onnx --verify
+```
+
+## Usage Example
+
+### DeepSeek-R1-Distill-Qwen-1.5B
+
+```bash
+cd C:\path\to\deepseek\model
+python fix_onnx_model.py model/model.onnx
+```
+
+### Llama-3.2-1B  
+
+```bash
+cd C:\path\to\llama\model
+python fix_onnx_model.py model/model.onnx
+```
+
+Both commands auto-detect layers and dimensions automatically. After the fix, your inference notebooks should work without shape inference errors:
+
+```python
+import onnxruntime_genai as og
+
+# Model now loads successfully
+model = og.Model('./model')
+tokenizer = og.Tokenizer(model)
+
+# Inference works correctly
+generator = og.Generator(model, params)
+```
+
+## Detecting This Issue
+
+If your WebGPU-converted model fails with shape inference errors, you can check if it has this issue:
+
+```python
+import onnx
+
+model = onnx.load('model.onnx', load_external_data=False)
+
+print("=== Checking for QKV cross-layer references ===")
+affected_layers = []
+
+for i in range(64):
+    gqa_node = None
+    for node in model.graph.node:
+        if node.name == f'/model/layers.{i}/attn/GroupQueryAttention':
+            gqa_node = node
+            break
+    
+    if not gqa_node:
+        continue
+    
+    has_qkv = any(f'layers.{i}/attn' in n.name and 'qkv_proj' in n.name 
+                  for n in model.graph.node)
+    
+    if has_qkv:
+        # Check if K/V come from different layers
+        k_input = gqa_node.input[1]
+        v_input = gqa_node.input[2]
+        
+        if f'layers.{i}' not in k_input or f'layers.{i}' not in v_input:
+            print(f"  ✗ Layer {i}: Cross-layer reference detected")
+            affected_layers.append(i)
+
+if affected_layers:
+    print(f"\nFix required for layers: {affected_layers}")
+else:
+    print("\nNo cross-layer references detected - model may not need fixing")
+```
+
+Typical output for affected models:
+```
+✗ Layer 2: Cross-layer reference detected
+✗ Layer 5: Cross-layer reference detected
+✗ Layer 6: Cross-layer reference detected
+...
+Fix required for layers: [2, 5, 6, 8, 10, 13]
+```
+
+## Troubleshooting
+
+| Error | Solution |
+|-------|----------|
+| `Unrecognized attribute: axes for operator Slice` | Ensure `axes` is passed as an input tensor, not an attribute (automatic in script) |
+| `Type (tensor(float)) does not match expected type (tensor(float16))` | Verify all new tensors use correct data type - script auto-detects this |
+| `Incompatible dimensions for matrix multiplication` | Confirm Slice indices match your model's dimensions (script auto-detects) |
+| Model still fails after fix | Run with `--verify` flag to check all layers were processed correctly |
+| Auto-detection doesn't work | Provide explicit config with `--config` flag |
+
+## Supported Models
+
+This fix has been tested on:
+- ✅ DeepSeek-R1-Distill-Qwen-1.5B
+- ✅ Llama-3.2-1B-Instruct
+- ✅ Other WebGPU-converted models with similar cross-layer QKV issues
+
+If you test this on other models, please note that auto-detection handles most cases. For models with non-standard structures, use the config file approach.
+
+## References
+
+- ONNX Slice operator: https://onnx.ai/onnx/operators/onnx__Slice.html
+- ONNX spec: https://onnx.ai/onnx/
+- DeepSeek-R1 Model: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+- Llama-3.2 Model: https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct
+- WebGPU ONNX Runtime: https://onnxruntime.ai/docs/execution-providers/web-gpu-execution-provider.html
+- ONNX Runtime GenAI: https://github.com/microsoft/onnxruntime-genai
diff --git a/.aitk/docs/others/fix_onnx_model.py b/.aitk/docs/others/fix_onnx_model.py
new file mode 100644
index 000000000..9559e6c9b
--- /dev/null
+++ b/.aitk/docs/others/fix_onnx_model.py
@@ -0,0 +1,414 @@
+"""
+Generic ONNX Model WebGPU Fix for Combined QKV Projection Issues
+
+PROBLEM SUMMARY:
+================
+WebGPU-converted ONNX models with combined qkv_proj structures exhibit a critical 
+architecture mismatch:
+
+1. GroupQueryAttention nodes use K, V projections from PREVIOUS layers instead of 
+   the same layer
+2. GroupQueryAttention Q input receives the full combined qkv_proj output instead 
+   of just the Q portion
+3. This causes dimension mismatch: o_proj expects specific K dimension but receives 
+   mismatched output from GroupQueryAttention
+
+EXAMPLES:
+- DeepSeek-R1-Distill-Qwen-1.5B: qkv_proj=2048, Q=1536, K=256, V=256
+- Llama-3.2-1B: qkv_proj=3072, Q=2048, K=512, V=512
+
+SOLUTION OVERVIEW:
+==================
+For each affected layer, we:
+1. Extract Q from qkv_proj[0:Q_dim]
+2. Extract K from qkv_proj[Q_dim:Q_dim+K_dim]
+3. Extract V from qkv_proj[Q_dim+K_dim:total_dim]
+4. Update GroupQueryAttention to use extracted tensors
+5. Ensure all new tensors match model precision
+6. Use proper ONNX Slice syntax (axes as input, not attribute)
+"""
+
+import onnx
+from onnx import helper
+import sys
+import json
+from pathlib import Path
+
+def auto_detect_layers_and_dims(model_path):
+    """
+    Auto-detect which layers have combined qkv_proj and their dimensions.
+    
+    Returns: (layers_to_fix, q_dim, k_dim, v_dim) or (None, None, None, None) if not found
+    """
+    try:
+        model = onnx.load(model_path, load_external_data=False)
+        graph = model.graph
+        
+        layers_to_fix = []
+        qkv_dim = None
+        
+        # Find layers with qkv_proj
+        for i in range(64):
+            has_qkv = False
+            for node in graph.node:
+                if f'layers.{i}/attn' in node.name and 'qkv_proj' in node.name:
+                    has_qkv = True
+                    if qkv_dim is None:
+                        # Get qkv_proj output dimension
+                        for vi in graph.value_info:
+                            if f'layers.{i}/attn/qkv_proj' in vi.name and 'output' in vi.name:
+                                dims = vi.type.tensor_type.shape.dim
+                                qkv_dim = dims[-1].dim_value
+            
+            if has_qkv:
+                layers_to_fix.append(i)
+        
+        if not layers_to_fix or qkv_dim is None:
+            return None, None, None, None
+        
+        # Get o_proj K dimension to infer Q_dim
+        o_proj_k = None
+        for i in layers_to_fix:
+            for node in graph.node:
+                if node.name == f'/model/layers.{i}/attn/o_proj/MatMulNBits':
+                    for attr in node.attribute:
+                        if attr.type == 2 and attr.name == 'K':
+                            o_proj_k = attr.i
+                    break
+            if o_proj_k:
+                break
+        
+        if qkv_dim and o_proj_k:
+            q_dim = o_proj_k
+            remaining = qkv_dim - q_dim
+            k_dim = remaining // 2
+            v_dim = remaining - k_dim
+            return layers_to_fix, q_dim, k_dim, v_dim
+        
+        return None, None, None, None
+    except Exception:
+        return None, None, None, None
+
+
+def fix_webgpu_qkv_model(model_path, layers_to_fix=None, q_dim=None, k_dim=None, v_dim=None, auto_detect=True):
+    """
+    Generic fix for WebGPU ONNX models with combined qkv_proj dimension mismatch.
+    
+    Parameters:
+    -----------
+    model_path : str
+        Path to the ONNX model file
+    layers_to_fix : list
+        Layer IDs to fix (auto-detected if None)
+    q_dim : int
+        Query dimension (auto-detected if None)
+    k_dim : int
+        Key dimension (auto-detected if None)
+    v_dim : int
+        Value dimension (auto-detected if None)
+    auto_detect : bool
+        If True, auto-detect layers and dimensions (overrides manual params)
+    
+    Returns:
+    --------
+    bool : True if successful, False otherwise
+    """
+    
+    print("=" * 70)
+    print("Generic WebGPU ONNX QKV Model Fixer")
+    print("=" * 70)
+    
+    try:
+        # Load model
+        print(f"\n[1/4] Loading model from {model_path}...")
+        model = onnx.load(model_path, load_external_data=False)
+        graph = model.graph
+        print(f"  ✓ Model loaded successfully")
+        print(f"  - IR Version: {model.ir_version}")
+        print(f"  - Opset: {model.opset_import[0].version if model.opset_import else 'unknown'}")
+        
+        # Auto-detect if enabled
+        if auto_detect:
+            print(f"\n[2/4] Auto-detecting layers and dimensions...")
+            det_layers, det_q, det_k, det_v = auto_detect_layers_and_dims(model_path)
+            if det_layers:
+                layers_to_fix = det_layers
+                q_dim = det_q
+                k_dim = det_k
+                v_dim = det_v
+                print(f"  ✓ Detected layers: {layers_to_fix}")
+                print(f"  ✓ Detected dimensions: Q={q_dim}, K={k_dim}, V={v_dim}")
+        
+        if not layers_to_fix or not q_dim or not k_dim or not v_dim:
+            print(f"  ✗ Failed to detect or specify layers and dimensions")
+            return False
+        
+        total_dim = q_dim + k_dim + v_dim
+        print(f"\n[3/4] Setting up Slice operations...")
+        print(f"  • Total QKV dim: {total_dim} = {q_dim} + {k_dim} + {v_dim}")
+        
+        # Create required constants for Slice operations
+        constants = {
+            'const_0': 0,
+            f'const_{q_dim}': q_dim,
+            f'const_{q_dim + k_dim}': q_dim + k_dim,
+            f'const_{total_dim}': total_dim,
+            'const_axes_2': [2]
+        }
+        
+        # Add constants to graph
+        for const_name, const_value in constants.items():
+            if not any(init.name == const_name for init in graph.initializer):
+                if const_name == 'const_axes_2':
+                    tensor = helper.make_tensor(const_name, onnx.TensorProto.INT64, [1], const_value)
+                else:
+                    tensor = helper.make_tensor(const_name, onnx.TensorProto.INT64, [1], [const_value])
+                graph.initializer.append(tensor)
+        
+        # Fix each layer
+        slices_added = 0
+        for layer_id in layers_to_fix:
+            # Auto-detect qkv_proj output node (could be Add or MatMulNBits)
+            qkv_output = None
+            for node in graph.node:
+                if node.name == f'/model/layers.{layer_id}/attn/qkv_proj/Add':
+                    qkv_output = f'/model/layers.{layer_id}/attn/qkv_proj/Add/output_0'
+                    break
+            
+            if not qkv_output:
+                # Fall back to MatMulNBits if no Add node
+                for node in graph.node:
+                    if node.name == f'/model/layers.{layer_id}/attn/qkv_proj/MatMulNBits':
+                        qkv_output = f'/model/layers.{layer_id}/attn/qkv_proj/MatMulNBits/output_0'
+                        break
+            
+            if not qkv_output:
+                print(f"  ✗ Could not find qkv_proj output for layer {layer_id}")
+                return False
+            
+            # Find data type from qkv_proj output
+            dtype = onnx.TensorProto.FLOAT16
+            for vi in graph.value_info:
+                if f'layers.{layer_id}/attn/qkv_proj' in vi.name and 'output' in vi.name:
+                    dtype = vi.type.tensor_type.elem_type
+                    break
+            
+            # Q extraction: [0:q_dim]
+            slice_q = helper.make_node(
+                'Slice',
+                inputs=[qkv_output, 
+                       'const_0', f'const_{q_dim}', 'const_axes_2'],
+                outputs=[f'/model/layers.{layer_id}/attn/q_proj_extracted/output_0'],
+                name=f'/model/layers.{layer_id}/attn/q_proj_extracted/Slice'
+            )
+            
+            # K extraction: [q_dim:q_dim+k_dim]
+            slice_k = helper.make_node(
+                'Slice',
+                inputs=[qkv_output, 
+                       f'const_{q_dim}', f'const_{q_dim + k_dim}', 'const_axes_2'],
+                outputs=[f'/model/layers.{layer_id}/attn/k_proj_extracted/output_0'],
+                name=f'/model/layers.{layer_id}/attn/k_proj_extracted/Slice'
+            )
+            
+            # V extraction: [q_dim+k_dim:total]
+            slice_v = helper.make_node(
+                'Slice',
+                inputs=[qkv_output, 
+                       f'const_{q_dim + k_dim}', f'const_{total_dim}', 'const_axes_2'],
+                outputs=[f'/model/layers.{layer_id}/attn/v_proj_extracted/output_0'],
+                name=f'/model/layers.{layer_id}/attn/v_proj_extracted/Slice'
+            )
+            
+            graph.node.extend([slice_q, slice_k, slice_v])
+            slices_added += 3
+            
+            # Add value_info for extracted tensors
+            q_info = helper.make_tensor_value_info(
+                f'/model/layers.{layer_id}/attn/q_proj_extracted/output_0',
+                dtype,
+                ['batch_size', 'sequence_length', q_dim]
+            )
+            k_info = helper.make_tensor_value_info(
+                f'/model/layers.{layer_id}/attn/k_proj_extracted/output_0',
+                dtype,
+                ['batch_size', 'sequence_length', k_dim]
+            )
+            v_info = helper.make_tensor_value_info(
+                f'/model/layers.{layer_id}/attn/v_proj_extracted/output_0',
+                dtype,
+                ['batch_size', 'sequence_length', v_dim]
+            )
+            graph.value_info.extend([q_info, k_info, v_info])
+            
+            # Update GroupQueryAttention inputs
+            for node in graph.node:
+                if node.name == f'/model/layers.{layer_id}/attn/GroupQueryAttention':
+                    node.input[0] = f'/model/layers.{layer_id}/attn/q_proj_extracted/output_0'
+                    node.input[1] = f'/model/layers.{layer_id}/attn/k_proj_extracted/output_0'
+                    node.input[2] = f'/model/layers.{layer_id}/attn/v_proj_extracted/output_0'
+                    break
+        
+        print(f"  ✓ Added {slices_added} Slice nodes across {len(layers_to_fix)} layers")
+        print(f"  ✓ Updated {len(layers_to_fix)} GroupQueryAttention nodes")
+        
+        # Save fixed model
+        print(f"\n[4/4] Saving fixed model...")
+        onnx.save(model, model_path)
+        print(f"  ✓ Model saved successfully")
+        
+        print("\n" + "=" * 70)
+        print("FIX COMPLETED SUCCESSFULLY!")
+        print("=" * 70)
+        print("\nSummary of Changes:")
+        print(f"  • Fixed {len(layers_to_fix)} layers: {layers_to_fix}")
+        print(f"  • QKV dimensions: Q={q_dim}, K={k_dim}, V={v_dim}")
+        print(f"  • Added {slices_added} Slice nodes for Q/K/V extraction")
+        print(f"  • Corrected GroupQueryAttention layer cross-references")
+        print(f"  • Ensured precision consistency for all new tensors")
+        print(f"  • Updated Slice syntax for ONNX opset 21 compatibility")
+        
+        return True
+        
+    except Exception as e:
+        print(f"\n❌ ERROR: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+
+def verify_fix(model_path, verbose=False, layers_to_fix=None):
+    """
+    Verify that the fix was applied correctly.
+    
+    Parameters:
+    -----------
+    model_path : str
+        Path to the fixed ONNX model
+    verbose : bool
+        Print detailed information
+    layers_to_fix : list
+        Specific layers to verify (auto-detected if None)
+    
+    Returns:
+    --------
+    bool : True if fix is verified, False otherwise
+    """
+    
+    print("\nVerifying model fix...")
+    
+    try:
+        model = onnx.load(model_path, load_external_data=False)
+        graph = model.graph
+        
+        # Auto-detect layers if not provided
+        if layers_to_fix is None:
+            det_result = auto_detect_layers_and_dims(model_path)
+            if det_result and det_result[0]:
+                layers_to_fix = det_result[0]
+            else:
+                print("  ✗ No layers detected - model may not need fixing or has unknown structure")
+                return False
+        
+        if not layers_to_fix or not isinstance(layers_to_fix, list):
+            print("  ✗ Invalid layers list")
+            return False
+        
+        all_correct = True
+        
+        for layer_id in layers_to_fix:
+            # Check Slice nodes exist
+            slice_nodes = [n for n in graph.node 
+                          if f'layers.{layer_id}' in n.name and 'Slice' in n.name and 'proj_extracted' in n.name]
+            
+            if len(slice_nodes) != 3:
+                print(f"  ✗ Layer {layer_id}: Expected 3 Slice nodes, found {len(slice_nodes)}")
+                all_correct = False
+                continue
+            
+            # Check GroupQueryAttention inputs
+            gqa_node = next((n for n in graph.node 
+                           if n.name == f'/model/layers.{layer_id}/attn/GroupQueryAttention'), None)
+            
+            if not gqa_node:
+                print(f"  ✗ Layer {layer_id}: GroupQueryAttention node not found")
+                all_correct = False
+                continue
+            
+            # Verify inputs point to extracted tensors
+            q_correct = gqa_node.input[0] == f'/model/layers.{layer_id}/attn/q_proj_extracted/output_0'
+            k_correct = gqa_node.input[1] == f'/model/layers.{layer_id}/attn/k_proj_extracted/output_0'
+            v_correct = gqa_node.input[2] == f'/model/layers.{layer_id}/attn/v_proj_extracted/output_0'
+            
+            if q_correct and k_correct and v_correct:
+                if verbose:
+                    print(f"  ✓ Layer {layer_id}: All checks passed")
+            else:
+                print(f"  ✗ Layer {layer_id}: GroupQueryAttention inputs incorrect")
+                all_correct = False
+        
+        if all_correct:
+            print("  ✓ All verifications passed!")
+        
+        return all_correct
+        
+    except Exception as e:
+        print(f"  ✗ Verification failed: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+
+if __name__ == "__main__":
+    # Usage:
+    # python fix_onnx_model.py [model_path]                    (auto-detect all)
+    # python fix_onnx_model.py [model_path] --verify            (verify existing fix)
+    # python fix_onnx_model.py [model_path] --config config.json (use config file)
+    
+    model_path = "./model/model.onnx"
+    verify_only = False
+    config_file = None
+    
+    if len(sys.argv) > 1:
+        model_path = sys.argv[1]
+    
+    if "--verify" in sys.argv:
+        verify_only = True
+    
+    if "--config" in sys.argv:
+        idx = sys.argv.index("--config")
+        if idx + 1 < len(sys.argv):
+            config_file = sys.argv[idx + 1]
+    
+    if verify_only:
+        verify_fix(model_path, verbose=True)
+        sys.exit(0)
+    
+    # Load config if provided
+    q_dim = k_dim = v_dim = layers = None
+    if config_file:
+        try:
+            with open(config_file, 'r') as f:
+                config = json.load(f)
+                layers = config.get('layers_to_fix')
+                q_dim = config.get('q_dim')
+                k_dim = config.get('k_dim')
+                v_dim = config.get('v_dim')
+                print(f"Loaded config from {config_file}")
+        except Exception as e:
+            print(f"Warning: Failed to load config: {e}")
+    
+    success = fix_webgpu_qkv_model(
+        model_path,
+        layers_to_fix=layers,
+        q_dim=q_dim,
+        k_dim=k_dim,
+        v_dim=v_dim,
+        auto_detect=True  # Always auto-detect if values not provided
+    )
+    
+    if success:
+        verify_fix(model_path, verbose=True)
+        sys.exit(0)
+    else:
+        sys.exit(1)
diff --git a/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt b/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
new file mode 100644
index 000000000..b0ab103ce
--- /dev/null
+++ b/.aitk/requirements/WebGPU/WebGPU_py3.12.13.txt
@@ -0,0 +1,82 @@
+--extra-index-url https://download.pytorch.org/whl/cu130
+accelerate==1.13.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.5
+aiosignal==1.4.0
+alembic==1.18.4
+annotated-types==0.7.0
+anyio==4.13.0
+attrs==26.1.0
+certifi==2026.4.22
+charset-normalizer==3.4.7
+colorama==0.4.6
+colorlog==6.10.1
+datasets==4.8.5
+dill==0.4.1
+filelock==3.29.0
+flatbuffers==25.12.19
+frozenlist==1.8.0
+fsspec==2026.2.0
+greenlet==3.5.0
+h11==0.16.0
+hf-xet==1.5.0
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.36.2
+idna==3.13
+importlib-metadata==8.7.1
+jinja2==3.1.6
+lightning-utilities==0.15.3
+mako==1.3.12
+markupsafe==3.0.3
+ml-dtypes==0.5.4
+mpmath==1.3.0
+multidict==6.7.1
+multiprocess==0.70.19
+networkx==3.6.1
+numpy==2.4.4
+olive-ai==0.12.1
+onnx==1.21.0
+onnx-ir==0.2.1
+# install it separatly with no deps as it will install onnxruntime to overwrite onnxruntime-webgpu
+# uvpip:install onnxruntime-genai==0.12.2 --no-deps;post
+onnxoptimizer==0.4.2
+onnxruntime-webgpu==1.25.1
+onnxscript==0.7.0
+opentelemetry-api==1.41.1
+opentelemetry-sdk==1.41.1
+opentelemetry-semantic-conventions==0.62b1
+optuna==4.8.0
+packaging==26.2
+pandas==3.0.2
+prompt-toolkit==3.0.52
+propcache==0.4.1
+protobuf==7.34.1
+psutil==7.2.2
+pyarrow==24.0.0
+pydantic==2.13.3
+pydantic-core==2.46.3
+python-dateutil==2.9.0.post0
+pyyaml==6.0.3
+questionary==2.1.1
+regex==2026.4.4
+requests==2.33.1
+safetensors==0.7.0
+setuptools==81.0.0
+six==1.17.0
+sqlalchemy==2.0.49
+sympy==1.14.0
+tabulate==0.10.0
+tokenizers==0.21.4
+torch==2.11.0+cu130
+torchmetrics==1.9.0
+tqdm==4.67.3
+transformers==4.52.4
+typing-extensions==4.15.0
+typing-inspection==0.4.2
+tzdata==2026.2
+urllib3==2.6.3
+wcwidth==0.7.0
+xxhash==3.7.0
+yarl==1.23.0
+zipp==3.23.1
diff --git a/.aitk/scripts/project_processor.py b/.aitk/scripts/project_processor.py
index 4fb5a09c6..9c5366473 100644
--- a/.aitk/scripts/project_processor.py
+++ b/.aitk/scripts/project_processor.py
@@ -12,6 +12,7 @@
 from sanitize.generator_intel import generator_intel
 from sanitize.generator_qnn import generator_qnn
 from sanitize.generator_trtrtx import generator_trtrtx
+from sanitize.generator_webgpu import generator_webgpu
 from sanitize.model_info import ModelInfo, ModelList
 from sanitize.project_config import ModelInfoProject, ModelProjectConfig, WorkflowItem
 from sanitize.utils import (
@@ -189,6 +190,8 @@ def convert_yaml_to_project_config(
             generator_trtrtx(id, recipe, yml_file.parent, modelList)
         elif recipe.get("ep") == EPNames.DmlExecutionProvider.value:
             generator_dml(id, recipe, yml_file.parent, modelList)
+        elif recipe.get("ep") == EPNames.WebGpuExecutionProvider.value:
+            generator_webgpu(id, recipe, yml_file.parent, modelList)
         runtimes = get_runtime(recipe)
         for runtime in runtimes:
             modelSummary.recipes.setdefault(runtime, []).append(file)
diff --git a/.aitk/scripts/sanitize/generator_webgpu.py b/.aitk/scripts/sanitize/generator_webgpu.py
new file mode 100644
index 000000000..80557f841
--- /dev/null
+++ b/.aitk/scripts/sanitize/generator_webgpu.py
@@ -0,0 +1,35 @@
+from pathlib import Path
+
+from .generator_common import create_model_parameter, set_optimization_path
+from .generator_dml import generate_quantization_config
+from .model_info import ModelList
+from .model_parameter import ModelParameter
+from .utils import isLLM_by_id
+
+def generator_webgpu(id: str, recipe, folder: Path, modelList: ModelList):
+    aitk = recipe.get("aitk", {})
+    auto = aitk.get("auto", True)
+    if not auto:
+        return
+
+    isLLM = isLLM_by_id(id)
+    file = recipe.get("file")
+    configFile = folder / file
+
+    if not isLLM:
+        modelParameter = ModelParameter.Read(str(configFile) + ".config")
+        set_optimization_path(modelParameter, str(configFile))
+        modelParameter.writeIfChanged()
+        return
+
+    name = "Convert to WebGPU"
+
+    parameter = create_model_parameter(aitk, name, configFile)
+    parameter.isLLM = isLLM
+
+    quantize = generate_quantization_config(configFile, parameter)
+    if quantize:
+        parameter.sections.append(quantize)
+
+    parameter.writeIfChanged()
+    print(f"\tGenerated WebGPU configuration for {file}")
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config
index d5d6e0975..cc8121b4d 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config
@@ -28,6 +28,20 @@
                 }
             ]
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json",
+            "dst": "qwen2_5_webgpu.json",
+            "replacements": [
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "Qwen/Qwen2.5-1.5B-Instruct"
+                },
+                {
+                    "find": "model/deepseek",
+                    "replace": "model/qwen2_5"
+                }
+            ]
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md",
             "dst": "README.md",
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
index af6ec72c3..65ae5875c 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
@@ -38,6 +38,12 @@ recipes:
         oliveFile: "QNN/config_gpu.json"
         isGPURequired: true
         requirements: General/CUDA_py3.12.9
+    - file: "qwen2_5_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
+      aitk:
+        requirements: WebGPU/WebGPU_py3.12.13
+        evalRuntime: WebGPU
 aitk:
     modelInfo:
         id: "huggingface/Qwen/Qwen2.5-1.5B-Instruct"
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config
index fdd9f88f7..3bad9483e 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "qwen2_5_qnn_gpu_config.json",
             "templateName": "qwen2_5_qnn_gpu_config"
+        },
+        {
+            "file": "qwen2_5_webgpu.json",
+            "templateName": "qwen2_5_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json
new file mode 100644
index 000000000..a9d1937d0
--- /dev/null
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json
@@ -0,0 +1,68 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "Qwen/Qwen2.5-1.5B-Instruct",
+        "load_kwargs": {
+            "torch_dtype": "float16"
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "s": {
+            "type": "SelectiveMixedPrecision",
+            "algorithm": "k_quant_mixed"
+        },
+        "g": {
+            "type": "gptq",
+            "bits": 4,
+            "sym": false,
+            "group_size": 32
+        },
+        "r": {
+            "type": "rtn",
+            "bits": 8,
+            "sym": false,
+            "group_size": 32,
+            "lm_head": true,
+            "embeds": true,
+            "overrides": {
+                "lm_head": {
+                    "bits": 8
+                },
+                "model.embed_tokens": {
+                    "bits": 8
+                }
+            }
+        },
+        "m": {
+            "type": "ModelBuilder",
+            "precision": "int4"
+        },
+        "t": {
+            "type": "GraphSurgeries",
+            "surgeries": [
+                {
+                    "surgeon": "TieWordEmbeddings"
+                }
+            ]
+        }
+    },
+    "target": "local_system",
+    "log_severity_level": 0,
+    "output_dir": "model/qwen2_5",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json.config
new file mode 100644
index 000000000..8c3a740ee
--- /dev/null
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu.json.config
@@ -0,0 +1,95 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "isLLM": true,
+    "evalRuntime": "WebGPU",
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "m"
+    },
+    "runtimeOverwrite": {
+        "autoGenerated": true,
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.m.precision"
+        }
+    ],
+    "optimizationDefault": "int4",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.m",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "autoGenerated": true,
+            "name": "Optimization",
+            "phase": "Quantization",
+            "parameters": [
+                {
+                    "autoGenerated": true,
+                    "name": "Precision",
+                    "description": "Precision of model",
+                    "type": "enum",
+                    "displayNames": [
+                        "Int4",
+                        "Bf16",
+                        "Fp16",
+                        "Fp32"
+                    ],
+                    "displayType": "RadioGroup",
+                    "path": "passes.m.precision",
+                    "values": [
+                        "int4",
+                        "bf16",
+                        "fp16",
+                        "fp32"
+                    ],
+                    "template": {
+                        "path": "passes.m.precision",
+                        "template": "ModelBuilderPrecision"
+                    }
+                }
+            ],
+            "disableToggleGeneration": true,
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Optimize model",
+                "type": "bool",
+                "path": "passes.m",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json
new file mode 100644
index 000000000..314a606a0
--- /dev/null
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json
@@ -0,0 +1,68 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+        "load_kwargs": {
+            "torch_dtype": "float16"
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "s": {
+            "type": "SelectiveMixedPrecision",
+            "algorithm": "k_quant_mixed"
+        },
+        "g": {
+            "type": "gptq",
+            "bits": 4,
+            "sym": false,
+            "group_size": 32
+        },
+        "r": {
+            "type": "rtn",
+            "bits": 8,
+            "sym": false,
+            "group_size": 32,
+            "lm_head": true,
+            "embeds": true,
+            "overrides": {
+                "lm_head": {
+                    "bits": 8
+                },
+                "model.embed_tokens": {
+                    "bits": 8
+                }
+            }
+        },
+        "m": {
+            "type": "ModelBuilder",
+            "precision": "int4"
+        },
+        "t": {
+            "type": "GraphSurgeries",
+            "surgeries": [
+                {
+                    "surgeon": "TieWordEmbeddings"
+                }
+            ]
+        }
+    },
+    "target": "local_system",
+    "log_severity_level": 0,
+    "output_dir": "model/deepseek",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json.config
new file mode 100644
index 000000000..8c3a740ee
--- /dev/null
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json.config
@@ -0,0 +1,95 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "isLLM": true,
+    "evalRuntime": "WebGPU",
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "m"
+    },
+    "runtimeOverwrite": {
+        "autoGenerated": true,
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.m.precision"
+        }
+    ],
+    "optimizationDefault": "int4",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.m",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "autoGenerated": true,
+            "name": "Optimization",
+            "phase": "Quantization",
+            "parameters": [
+                {
+                    "autoGenerated": true,
+                    "name": "Precision",
+                    "description": "Precision of model",
+                    "type": "enum",
+                    "displayNames": [
+                        "Int4",
+                        "Bf16",
+                        "Fp16",
+                        "Fp32"
+                    ],
+                    "displayType": "RadioGroup",
+                    "path": "passes.m.precision",
+                    "values": [
+                        "int4",
+                        "bf16",
+                        "fp16",
+                        "fp32"
+                    ],
+                    "template": {
+                        "path": "passes.m.precision",
+                        "template": "ModelBuilderPrecision"
+                    }
+                }
+            ],
+            "disableToggleGeneration": true,
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Optimize model",
+                "type": "bool",
+                "path": "passes.m",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
index 9486b13f9..106acc988 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
@@ -38,6 +38,12 @@ recipes:
         oliveFile: "QNN/config_gpu.json"
         isGPURequired: true
         requirements: General/CUDA_py3.12.9
+    - file: "deepseek_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
+      aitk:
+        requirements: WebGPU/WebGPU_py3.12.13
+        evalRuntime: WebGPU
 aitk:
     modelInfo:
         id: "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config
index 64ea7551e..52e53d2af 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "deepseek_qnn_gpu_config.json",
             "templateName": "deepseek_qnn_gpu_config"
+        },
+        {
+            "file": "deepseek_webgpu.json",
+            "templateName": "deepseek_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json b/google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json
new file mode 100644
index 000000000..04815ae4b
--- /dev/null
+++ b/google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json
@@ -0,0 +1,35 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "google-bert/bert-base-multilingual-cased",
+        "task": "feature-extraction"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "peephole": {
+            "type": "OnnxPeepholeOptimizer",
+            "save_as_external_data": true
+        }
+    },
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_dir": "model/bert_webgpu",
+    "evaluate_input_model": false
+}
diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json.config b/google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json.config
new file mode 100644
index 000000000..4575e8895
--- /dev/null
+++ b/google-bert-bert-base-multilingual-cased/aitk/bert_webgpu.json.config
@@ -0,0 +1,47 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "evalRuntime": "WebGPU",
+    "runtimeOverwrite": {
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.conversion",
+            "name": "fp32"
+        }
+    ],
+    "optimizationDefault": "fp32",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/google-bert-bert-base-multilingual-cased/aitk/info.yml b/google-bert-bert-base-multilingual-cased/aitk/info.yml
index c3bf4db00..af086bc3c 100644
--- a/google-bert-bert-base-multilingual-cased/aitk/info.yml
+++ b/google-bert-bert-base-multilingual-cased/aitk/info.yml
@@ -26,6 +26,9 @@ recipes:
     - file: "bert-base-multilingual-cased_qnn_gpu.json"
       device: gpu
       ep: QNNExecutionProvider
+    - file: "bert_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/google-bert/bert-base-multilingual-cased"
diff --git a/google-bert-bert-base-multilingual-cased/aitk/model_project.config b/google-bert-bert-base-multilingual-cased/aitk/model_project.config
index c7a3fa7da..b7020ec6f 100644
--- a/google-bert-bert-base-multilingual-cased/aitk/model_project.config
+++ b/google-bert-bert-base-multilingual-cased/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "bert-base-multilingual-cased_qnn_gpu.json",
             "templateName": "bert-base-multilingual-cased_qnn_gpu"
+        },
+        {
+            "file": "bert_webgpu.json",
+            "templateName": "bert_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/google-vit-base-patch16-224/aitk/info.yml b/google-vit-base-patch16-224/aitk/info.yml
index 00835ef69..0014c79a4 100644
--- a/google-vit-base-patch16-224/aitk/info.yml
+++ b/google-vit-base-patch16-224/aitk/info.yml
@@ -26,6 +26,9 @@ recipes:
     - file: "vit-base-patch16-224_qnn_gpu.json"
       device: gpu
       ep: QNNExecutionProvider
+    - file: "vit_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/google/vit-base-patch16-224"
diff --git a/google-vit-base-patch16-224/aitk/model_project.config b/google-vit-base-patch16-224/aitk/model_project.config
index a13675ac8..c16a34e96 100644
--- a/google-vit-base-patch16-224/aitk/model_project.config
+++ b/google-vit-base-patch16-224/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "vit-base-patch16-224_qnn_gpu.json",
             "templateName": "vit-base-patch16-224_qnn_gpu"
+        },
+        {
+            "file": "vit_webgpu.json",
+            "templateName": "vit_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/google-vit-base-patch16-224/aitk/vit_webgpu.json b/google-vit-base-patch16-224/aitk/vit_webgpu.json
new file mode 100644
index 000000000..1b9f439e7
--- /dev/null
+++ b/google-vit-base-patch16-224/aitk/vit_webgpu.json
@@ -0,0 +1,51 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "google/vit-base-patch16-224",
+        "task": "image-classification",
+        "io_config": {
+            "input_names": [
+                "pixel_values"
+            ],
+            "input_shapes": [
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ]
+            ],
+            "output_names": [
+                "output"
+            ]
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "peephole": {
+            "type": "OnnxPeepholeOptimizer",
+            "save_as_external_data": true
+        }
+    },
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_dir": "model/vit_webgpu",
+    "evaluate_input_model": false
+}
diff --git a/google-vit-base-patch16-224/aitk/vit_webgpu.json.config b/google-vit-base-patch16-224/aitk/vit_webgpu.json.config
new file mode 100644
index 000000000..4575e8895
--- /dev/null
+++ b/google-vit-base-patch16-224/aitk/vit_webgpu.json.config
@@ -0,0 +1,47 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "evalRuntime": "WebGPU",
+    "runtimeOverwrite": {
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.conversion",
+            "name": "fp32"
+        }
+    ],
+    "optimizationDefault": "fp32",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json
new file mode 100644
index 000000000..f67676762
--- /dev/null
+++ b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json
@@ -0,0 +1,38 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "Intel/bert-base-uncased-mrpc",
+        "task": "text-classification",
+        "load_kwargs": {
+            "attn_implementation": "eager"
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "peephole": {
+            "type": "OnnxPeepholeOptimizer",
+            "save_as_external_data": true
+        }
+    },
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_dir": "model/bert_webgpu",
+    "evaluate_input_model": false
+}
diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config
new file mode 100644
index 000000000..4575e8895
--- /dev/null
+++ b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config
@@ -0,0 +1,47 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "evalRuntime": "WebGPU",
+    "runtimeOverwrite": {
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.conversion",
+            "name": "fp32"
+        }
+    ],
+    "optimizationDefault": "fp32",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/intel-bert-base-uncased-mrpc/aitk/info.yml b/intel-bert-base-uncased-mrpc/aitk/info.yml
index f8781cde7..d052c8025 100644
--- a/intel-bert-base-uncased-mrpc/aitk/info.yml
+++ b/intel-bert-base-uncased-mrpc/aitk/info.yml
@@ -29,6 +29,9 @@ recipes:
     - file: "bert_qnn_gpu.json"
       device: gpu
       ep: QNNExecutionProvider
+    - file: "bert_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/Intel/bert-base-uncased-mrpc"
diff --git a/intel-bert-base-uncased-mrpc/aitk/model_project.config b/intel-bert-base-uncased-mrpc/aitk/model_project.config
index 72a32db9d..b6b99b857 100644
--- a/intel-bert-base-uncased-mrpc/aitk/model_project.config
+++ b/intel-bert-base-uncased-mrpc/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "bert_qnn_gpu.json",
             "templateName": "bert_qnn_gpu"
+        },
+        {
+            "file": "bert_webgpu.json",
+            "templateName": "bert_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config
index f6ce51a00..c3c112dba 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config
@@ -117,6 +117,21 @@
             "dst": "laion_clip_dml.json.config",
             "replacements": []
         },
+        {
+            "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json",
+            "dst": "laion_clip_webgpu.json",
+            "replacements": [
+                {
+                    "find": "openai/clip-vit-base-patch16",
+                    "replace": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
+                }
+            ]
+        },
+        {
+            "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config",
+            "dst": "laion_clip_webgpu.json.config",
+            "replacements": []
+        },
         {
             "src": "laion_clip_dml.json",
             "dst": "laion_clip_migraphx.json",
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml
index cc0da630e..ad1eca92b 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml
@@ -26,6 +26,9 @@ recipes:
     - file: "laion_clip_qnn_gpu.json"
       device: gpu
       ep: QNNExecutionProvider
+    - file: "laion_clip_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json
new file mode 100644
index 000000000..94d4dbae2
--- /dev/null
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json
@@ -0,0 +1,90 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
+        "task": "zero-shot-image-classification",
+        "load_kwargs": {
+            "attn_implementation": "eager"
+        },
+        "io_config": {
+            "input_names": [
+                "input_ids",
+                "pixel_values",
+                "attention_mask"
+            ],
+            "input_shapes": [
+                [
+                    10,
+                    77
+                ],
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ],
+                [
+                    10,
+                    77
+                ]
+            ],
+            "input_types": [
+                "int64",
+                "float32",
+                "int64"
+            ],
+            "output_names": [
+                "logits_per_image",
+                "logits_per_text",
+                "text_embeds",
+                "image_embeds"
+            ],
+            "output_shapes": [
+                [
+                    1,
+                    10
+                ],
+                [
+                    10,
+                    1
+                ],
+                [
+                    10,
+                    512
+                ],
+                [
+                    1,
+                    512
+                ]
+            ]
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "peephole": {
+            "type": "OnnxPeepholeOptimizer",
+            "save_as_external_data": true
+        }
+    },
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_dir": "model/clip_webgpu",
+    "evaluate_input_model": false
+}
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json.config
new file mode 100644
index 000000000..4575e8895
--- /dev/null
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json.config
@@ -0,0 +1,47 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "evalRuntime": "WebGPU",
+    "runtimeOverwrite": {
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.conversion",
+            "name": "fp32"
+        }
+    ],
+    "optimizationDefault": "fp32",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config
index 2ebfd7066..0188d4c64 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "laion_clip_qnn_gpu.json",
             "templateName": "laion_clip_qnn_gpu"
+        },
+        {
+            "file": "laion_clip_webgpu.json",
+            "templateName": "laion_clip_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config
index 607b92270..87a0d6a41 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config
@@ -42,6 +42,20 @@
                 }
             ]
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json",
+            "dst": "llama3_2_webgpu.json",
+            "replacements": [
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "meta-llama/Llama-3.2-1B-Instruct"
+                },
+                {
+                    "find": "model/deepseek",
+                    "replace": "model/llama3_2"
+                }
+            ]
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md",
             "dst": "README.md",
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml
index 803ac4a12..3a1d18b2f 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml
@@ -38,6 +38,12 @@ recipes:
         oliveFile: "QNN/config_gpu.json"
         isGPURequired: true
         requirements: General/CUDA_py3.12.9
+    - file: "llama3_2_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
+      aitk:
+        requirements: WebGPU/WebGPU_py3.12.13
+        evalRuntime: WebGPU
 aitk:
     modelInfo:
         id: "huggingface/meta-llama/Llama-3.2-1B-Instruct"
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json
new file mode 100644
index 000000000..8ee4392e5
--- /dev/null
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json
@@ -0,0 +1,68 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "meta-llama/Llama-3.2-1B-Instruct",
+        "load_kwargs": {
+            "torch_dtype": "float16"
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "s": {
+            "type": "SelectiveMixedPrecision",
+            "algorithm": "k_quant_mixed"
+        },
+        "g": {
+            "type": "gptq",
+            "bits": 4,
+            "sym": false,
+            "group_size": 32
+        },
+        "r": {
+            "type": "rtn",
+            "bits": 8,
+            "sym": false,
+            "group_size": 32,
+            "lm_head": true,
+            "embeds": true,
+            "overrides": {
+                "lm_head": {
+                    "bits": 8
+                },
+                "model.embed_tokens": {
+                    "bits": 8
+                }
+            }
+        },
+        "m": {
+            "type": "ModelBuilder",
+            "precision": "int4"
+        },
+        "t": {
+            "type": "GraphSurgeries",
+            "surgeries": [
+                {
+                    "surgeon": "TieWordEmbeddings"
+                }
+            ]
+        }
+    },
+    "target": "local_system",
+    "log_severity_level": 0,
+    "output_dir": "model/llama3_2",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json.config
new file mode 100644
index 000000000..8c3a740ee
--- /dev/null
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu.json.config
@@ -0,0 +1,95 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "isLLM": true,
+    "evalRuntime": "WebGPU",
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "m"
+    },
+    "runtimeOverwrite": {
+        "autoGenerated": true,
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.m.precision"
+        }
+    ],
+    "optimizationDefault": "int4",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.m",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "autoGenerated": true,
+            "name": "Optimization",
+            "phase": "Quantization",
+            "parameters": [
+                {
+                    "autoGenerated": true,
+                    "name": "Precision",
+                    "description": "Precision of model",
+                    "type": "enum",
+                    "displayNames": [
+                        "Int4",
+                        "Bf16",
+                        "Fp16",
+                        "Fp32"
+                    ],
+                    "displayType": "RadioGroup",
+                    "path": "passes.m.precision",
+                    "values": [
+                        "int4",
+                        "bf16",
+                        "fp16",
+                        "fp32"
+                    ],
+                    "template": {
+                        "path": "passes.m.precision",
+                        "template": "ModelBuilderPrecision"
+                    }
+                }
+            ],
+            "disableToggleGeneration": true,
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Optimize model",
+                "type": "bool",
+                "path": "passes.m",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config
index 3df076bbb..f26533b06 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "llama3_2_qnn_gpu_config.json",
             "templateName": "llama3_2_qnn_gpu_config"
+        },
+        {
+            "file": "llama3_2_webgpu.json",
+            "templateName": "llama3_2_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config
index d260de070..6f9870282 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config
@@ -14,6 +14,20 @@
                 }
             ]
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu.json",
+            "dst": "phi3_5_webgpu.json",
+            "replacements": [
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "microsoft/Phi-3.5-mini-instruct"
+                },
+                {
+                    "find": "model/deepseek",
+                    "replace": "model/phi3_5"
+                }
+            ]
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md",
             "dst": "README.md",
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml
index 1f85b22d8..29217e9ac 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml
@@ -38,6 +38,12 @@ recipes:
         oliveFile: "QNN/config_gpu.json"
         isGPURequired: true
         requirements: General/CUDA_py3.12.9
+    - file: "phi3_5_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
+      aitk:
+        requirements: WebGPU/WebGPU_py3.12.13
+        evalRuntime: WebGPU
 aitk:
     modelInfo:
         id: "huggingface/microsoft/Phi-3.5-mini-instruct"
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config
index 1162f8288..5bff7a1f1 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "phi3_5_qnn_gpu_config.json",
             "templateName": "phi3_5_qnn_gpu_config"
+        },
+        {
+            "file": "phi3_5_webgpu.json",
+            "templateName": "phi3_5_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json
new file mode 100644
index 000000000..12c617ab4
--- /dev/null
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json
@@ -0,0 +1,68 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "microsoft/Phi-3.5-mini-instruct",
+        "load_kwargs": {
+            "torch_dtype": "float16"
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "s": {
+            "type": "SelectiveMixedPrecision",
+            "algorithm": "k_quant_mixed"
+        },
+        "g": {
+            "type": "gptq",
+            "bits": 4,
+            "sym": false,
+            "group_size": 32
+        },
+        "r": {
+            "type": "rtn",
+            "bits": 8,
+            "sym": false,
+            "group_size": 32,
+            "lm_head": true,
+            "embeds": true,
+            "overrides": {
+                "lm_head": {
+                    "bits": 8
+                },
+                "model.embed_tokens": {
+                    "bits": 8
+                }
+            }
+        },
+        "m": {
+            "type": "ModelBuilder",
+            "precision": "int4"
+        },
+        "t": {
+            "type": "GraphSurgeries",
+            "surgeries": [
+                {
+                    "surgeon": "TieWordEmbeddings"
+                }
+            ]
+        }
+    },
+    "target": "local_system",
+    "log_severity_level": 0,
+    "output_dir": "model/phi3_5",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json.config
new file mode 100644
index 000000000..8c3a740ee
--- /dev/null
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu.json.config
@@ -0,0 +1,95 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "isLLM": true,
+    "evalRuntime": "WebGPU",
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "m"
+    },
+    "runtimeOverwrite": {
+        "autoGenerated": true,
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.m.precision"
+        }
+    ],
+    "optimizationDefault": "int4",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.m",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "autoGenerated": true,
+            "name": "Optimization",
+            "phase": "Quantization",
+            "parameters": [
+                {
+                    "autoGenerated": true,
+                    "name": "Precision",
+                    "description": "Precision of model",
+                    "type": "enum",
+                    "displayNames": [
+                        "Int4",
+                        "Bf16",
+                        "Fp16",
+                        "Fp32"
+                    ],
+                    "displayType": "RadioGroup",
+                    "path": "passes.m.precision",
+                    "values": [
+                        "int4",
+                        "bf16",
+                        "fp16",
+                        "fp32"
+                    ],
+                    "template": {
+                        "path": "passes.m.precision",
+                        "template": "ModelBuilderPrecision"
+                    }
+                }
+            ],
+            "disableToggleGeneration": true,
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Optimize model",
+                "type": "bool",
+                "path": "passes.m",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/microsoft-resnet-50/aitk/info.yml b/microsoft-resnet-50/aitk/info.yml
index 990b55773..f9d53c81f 100644
--- a/microsoft-resnet-50/aitk/info.yml
+++ b/microsoft-resnet-50/aitk/info.yml
@@ -26,6 +26,9 @@ recipes:
     - file: "resnet_qnn_gpu.json"
       device: gpu
       ep: QNNExecutionProvider
+    - file: "resnet_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/microsoft/resnet-50"
diff --git a/microsoft-resnet-50/aitk/model_project.config b/microsoft-resnet-50/aitk/model_project.config
index be2778a56..c4c8dfd39 100644
--- a/microsoft-resnet-50/aitk/model_project.config
+++ b/microsoft-resnet-50/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "resnet_qnn_gpu.json",
             "templateName": "resnet_qnn_gpu"
+        },
+        {
+            "file": "resnet_webgpu.json",
+            "templateName": "resnet_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/microsoft-resnet-50/aitk/resnet_webgpu.json b/microsoft-resnet-50/aitk/resnet_webgpu.json
new file mode 100644
index 000000000..1c44d2f51
--- /dev/null
+++ b/microsoft-resnet-50/aitk/resnet_webgpu.json
@@ -0,0 +1,51 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "microsoft/resnet-50",
+        "task": "image-classification",
+        "io_config": {
+            "input_names": [
+                "pixel_values"
+            ],
+            "input_shapes": [
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ]
+            ],
+            "output_names": [
+                "logits"
+            ]
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "peephole": {
+            "type": "OnnxPeepholeOptimizer",
+            "save_as_external_data": true
+        }
+    },
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_dir": "model/resnet_webgpu",
+    "evaluate_input_model": false
+}
diff --git a/microsoft-resnet-50/aitk/resnet_webgpu.json.config b/microsoft-resnet-50/aitk/resnet_webgpu.json.config
new file mode 100644
index 000000000..4575e8895
--- /dev/null
+++ b/microsoft-resnet-50/aitk/resnet_webgpu.json.config
@@ -0,0 +1,47 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "evalRuntime": "WebGPU",
+    "runtimeOverwrite": {
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.conversion",
+            "name": "fp32"
+        }
+    ],
+    "optimizationDefault": "fp32",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/openai-clip-vit-base-patch16/aitk/info.yml b/openai-clip-vit-base-patch16/aitk/info.yml
index 1686cc549..16d11fdbb 100644
--- a/openai-clip-vit-base-patch16/aitk/info.yml
+++ b/openai-clip-vit-base-patch16/aitk/info.yml
@@ -26,6 +26,9 @@ recipes:
     - file: "openai_clip_qnn_gpu.json"
       device: gpu
       ep: QNNExecutionProvider
+    - file: "openai_clip_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/openai/clip-vit-base-patch16"
diff --git a/openai-clip-vit-base-patch16/aitk/model_project.config b/openai-clip-vit-base-patch16/aitk/model_project.config
index 0506a0fd7..76699ef5a 100644
--- a/openai-clip-vit-base-patch16/aitk/model_project.config
+++ b/openai-clip-vit-base-patch16/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "openai_clip_qnn_gpu.json",
             "templateName": "openai_clip_qnn_gpu"
+        },
+        {
+            "file": "openai_clip_webgpu.json",
+            "templateName": "openai_clip_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json
new file mode 100644
index 000000000..e0f5adc4e
--- /dev/null
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json
@@ -0,0 +1,90 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "openai/clip-vit-base-patch16",
+        "task": "zero-shot-image-classification",
+        "load_kwargs": {
+            "attn_implementation": "eager"
+        },
+        "io_config": {
+            "input_names": [
+                "input_ids",
+                "pixel_values",
+                "attention_mask"
+            ],
+            "input_shapes": [
+                [
+                    10,
+                    77
+                ],
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ],
+                [
+                    10,
+                    77
+                ]
+            ],
+            "input_types": [
+                "int64",
+                "float32",
+                "int64"
+            ],
+            "output_names": [
+                "logits_per_image",
+                "logits_per_text",
+                "text_embeds",
+                "image_embeds"
+            ],
+            "output_shapes": [
+                [
+                    1,
+                    10
+                ],
+                [
+                    10,
+                    1
+                ],
+                [
+                    10,
+                    512
+                ],
+                [
+                    1,
+                    512
+                ]
+            ]
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "peephole": {
+            "type": "OnnxPeepholeOptimizer",
+            "save_as_external_data": true
+        }
+    },
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_dir": "model/clip_webgpu",
+    "evaluate_input_model": false
+}
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config
new file mode 100644
index 000000000..4575e8895
--- /dev/null
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config
@@ -0,0 +1,47 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "evalRuntime": "WebGPU",
+    "runtimeOverwrite": {
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.conversion",
+            "name": "fp32"
+        }
+    ],
+    "optimizationDefault": "fp32",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/openai-clip-vit-base-patch32/aitk/_copy.json.config b/openai-clip-vit-base-patch32/aitk/_copy.json.config
index c6c72ccee..005a6cb5d 100644
--- a/openai-clip-vit-base-patch32/aitk/_copy.json.config
+++ b/openai-clip-vit-base-patch32/aitk/_copy.json.config
@@ -109,6 +109,21 @@
             "dst": "openai_clip_dml.json.config",
             "replacements": []
         },
+        {
+            "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json",
+            "dst": "openai_clip_webgpu.json",
+            "replacements": [
+                {
+                    "find": "openai/clip-vit-base-patch16",
+                    "replace": "openai/clip-vit-base-patch32"
+                }
+            ]
+        },
+        {
+            "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config",
+            "dst": "openai_clip_webgpu.json.config",
+            "replacements": []
+        },
         {
             "src": "openai_clip_dml.json",
             "dst": "openai_clip_migraphx.json",
diff --git a/openai-clip-vit-base-patch32/aitk/info.yml b/openai-clip-vit-base-patch32/aitk/info.yml
index 545b1f463..46d92dcdd 100644
--- a/openai-clip-vit-base-patch32/aitk/info.yml
+++ b/openai-clip-vit-base-patch32/aitk/info.yml
@@ -26,6 +26,9 @@ recipes:
     - file: "openai_clip_qnn_gpu.json"
       device: gpu
       ep: QNNExecutionProvider
+    - file: "openai_clip_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/openai/clip-vit-base-patch32"
diff --git a/openai-clip-vit-base-patch32/aitk/model_project.config b/openai-clip-vit-base-patch32/aitk/model_project.config
index 3932bafd5..95d9e3cc8 100644
--- a/openai-clip-vit-base-patch32/aitk/model_project.config
+++ b/openai-clip-vit-base-patch32/aitk/model_project.config
@@ -27,6 +27,10 @@
         {
             "file": "openai_clip_qnn_gpu.json",
             "templateName": "openai_clip_qnn_gpu"
+        },
+        {
+            "file": "openai_clip_webgpu.json",
+            "templateName": "openai_clip_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json
new file mode 100644
index 000000000..7f8d0bd3f
--- /dev/null
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json
@@ -0,0 +1,90 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "openai/clip-vit-base-patch32",
+        "task": "zero-shot-image-classification",
+        "load_kwargs": {
+            "attn_implementation": "eager"
+        },
+        "io_config": {
+            "input_names": [
+                "input_ids",
+                "pixel_values",
+                "attention_mask"
+            ],
+            "input_shapes": [
+                [
+                    10,
+                    77
+                ],
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ],
+                [
+                    10,
+                    77
+                ]
+            ],
+            "input_types": [
+                "int64",
+                "float32",
+                "int64"
+            ],
+            "output_names": [
+                "logits_per_image",
+                "logits_per_text",
+                "text_embeds",
+                "image_embeds"
+            ],
+            "output_shapes": [
+                [
+                    1,
+                    10
+                ],
+                [
+                    10,
+                    1
+                ],
+                [
+                    10,
+                    512
+                ],
+                [
+                    1,
+                    512
+                ]
+            ]
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "peephole": {
+            "type": "OnnxPeepholeOptimizer",
+            "save_as_external_data": true
+        }
+    },
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_dir": "model/clip_webgpu",
+    "evaluate_input_model": false
+}
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json.config b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json.config
new file mode 100644
index 000000000..4575e8895
--- /dev/null
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json.config
@@ -0,0 +1,47 @@
+{
+    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
+    "name": "Convert to WebGPU",
+    "evalRuntime": "WebGPU",
+    "runtimeOverwrite": {
+        "executeRequirement": "WebGPU/WebGPU_py3.12.13"
+    },
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "optimizationPaths": [
+        {
+            "path": "passes.conversion",
+            "name": "fp32"
+        }
+    ],
+    "optimizationDefault": "fp32",
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}