From 3331414aa583f7c6b1df10301529928c485a26a1 Mon Sep 17 00:00:00 2001 From: hualxie Date: Fri, 25 Jul 2025 15:25:46 +0800 Subject: [PATCH 01/15] add deepseek --- .../LICENSE | 51 +++++ .../README.md | 3 + .../aitk/.gitignore | 5 + .../aitk/README.md | 160 ++++++++++++++ .../aitk/deepseek_dml_config.json | 46 ++++ .../aitk/deepseek_dml_config.json.config | 48 +++++ .../aitk/deepseek_ov_config.json | 56 +++++ .../aitk/deepseek_ov_config.json.config | 153 ++++++++++++++ .../aitk/deepseek_qnn_config.json | 132 ++++++++++++ .../aitk/deepseek_qnn_config.json.config | 197 ++++++++++++++++++ .../aitk/deepseek_vitis_ai_config.json | 134 ++++++++++++ .../aitk/deepseek_vitis_ai_config.json.config | 191 +++++++++++++++++ .../aitk/inference_model.json | 31 +++ .../aitk/inference_sample.ipynb | 131 ++++++++++++ .../aitk/info.yml | 25 +++ .../aitk/model_project.config | 24 +++ .../aitk/requirements.txt | 3 + intel-bert-base-uncased-mrpc/README.md | 5 +- .../aitk/bert_dml.json | 131 ++++++++++++ .../aitk/bert_dml.json.config | 105 ++++++++++ intel-bert-base-uncased-mrpc/aitk/info.yml | 8 +- .../aitk/model_project.config | 28 +++ 22 files changed, 1663 insertions(+), 4 deletions(-) create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/LICENSE create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/README.md create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/.gitignore create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json.config create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json.config create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json.config create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_model.json create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_sample.ipynb create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt create mode 100644 intel-bert-base-uncased-mrpc/aitk/bert_dml.json create mode 100644 intel-bert-base-uncased-mrpc/aitk/bert_dml.json.config create mode 100644 intel-bert-base-uncased-mrpc/aitk/model_project.config diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/LICENSE b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/LICENSE new file mode 100644 index 00000000..79dde0ac --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/LICENSE @@ -0,0 +1,51 @@ +--- +title: MIT License +spdx-id: MIT +featured: true +hidden: false + +description: A short and simple permissive license with conditions only requiring preservation of copyright and license notices. Licensed works, modifications, and larger works may be distributed under different terms and without source code. + +how: Create a text file (typically named LICENSE or LICENSE.txt) in the root of your source code and copy the text of the license into the file. Replace [year] with the current year and [fullname] with the name (or names) of the copyright holders. + +using: + Babel: https://github.com/babel/babel/blob/master/LICENSE + .NET: https://github.com/dotnet/runtime/blob/main/LICENSE.TXT + Rails: https://github.com/rails/rails/blob/master/MIT-LICENSE + +permissions: + - commercial-use + - modifications + - distribution + - private-use + +conditions: + - include-copyright + +limitations: + - liability + - warranty + +--- + +MIT License + +Copyright (c) [year] [fullname] + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/README.md b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/README.md new file mode 100644 index 00000000..e4f44474 --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/README.md @@ -0,0 +1,3 @@ +# DeepSeek Optimization + +This folder contains examples of DeepSeek optimization using different workflows. diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/.gitignore b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/.gitignore new file mode 100644 index 00000000..48c03882 --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/.gitignore @@ -0,0 +1,5 @@ +__pycache__ +/cache +/history/*/* +!/history/*/history.config +!/history/*/olive_config.json diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md new file mode 100644 index 00000000..8de94d1c --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md @@ -0,0 +1,160 @@ +# DeepSeek-R1-Distill-Qwen-1.5B Model Optimization + +This repository demonstrates the optimization of the [DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) model using **post-training quantization (PTQ)** techniques. The optimization process is divided into three main workflows: + +- QDQ for AMD NPU +- PTQ + AOT for QNN NPU + + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs** +- OpenVINO for Intel NPU + + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` + +## **QDQ Model with 4-bit Weights & 16-bit Activations** + +This workflow produces an ONNX QDQ model that is agnostic to the target hardware and accelerator, making it suitable for general inference. + +### **Optimization Process** + +The model is optimized using **weight-only quantization** and **activation quantization** for efficient deployment. The process includes: + +1. **Weight Rotation ([QuaRot](https://arxiv.org/abs/2404.00456))** + - Reduces outliers from weights and hidden states to enhance quantization efficiency. + +2. **4-bit Per-Channel Symmetric Quantization ([GPTQ](https://arxiv.org/abs/2210.17323))** + - Reduces transformer layer size while preserving accuracy. + +3. **ONNX Graph Capture** + - Exports the model to ONNX for further optimization. + +4. **4-bit Block-wise Quantization** + - Applies weight-only quantization to the **embedding layer** and **language modeling head**. + +5. **16-bit Activation Quantization** + - Uses 16-bit activations to balance precision and efficiency. + +The final output is a **QDQ model** with **4-bit weights** and **16-bit activations**. This model also leverages [GroupQueryAttention (GQA)](https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md#com.microsoft.GroupQueryAttention) for efficient long-context processing and long-sequence generation. + +### **Handling Dynamic and Static Input Shapes** + +NPUs require **precompiled graphs**, meaning the model must use **static input shapes**. However, **text generation** involves two distinct processing stages: + +- **Prefill (Prompt Processing)**: Processes multiple tokens simultaneously. +- **Token Generation (Iteration)**: Processes one token at a time. + +To support both efficiently, we create **two model instances**: +1. **Prefill model**: Optimized for batch processing. +2. **Token generation model**: Optimized for one-token-at-a-time inference. + +## **PTQ + AOT Compilation for Qualcomm NPUs using QNN EP** + +This process extends the [**QDQ Model with 4-bit Weights & 16-bit Activations**](#qdq-model-with-4-bit-weights--16-bit-activations) by compiling it specifically for **Qualcomm NPUs** using the **QNN Execution Provider**. + +### **Resource Optimization Strategy** + +To maximize efficiency while supporting dynamic input handling: + +- **Embedding Layer & Language Model Head** → Executed on CPU (handles dynamic input). +- **Transformer Layers** → Executed on NPU (requires static input shapes). +- **Weight Sharing** → Prefill & token generation models reuse weights to minimize memory usage. + +> ⚠️ **Note:** GQA is an ONNX Runtime *contrib operator* and must be executed on the CPU. The model graph is partitioned into **CPU (GQA nodes)** and **NPU (other nodes)** for execution. + +### **Compilation for Qualcomm NPU Deployment** + +Once optimized, the model is compiled for Qualcomm NPUs using **ONNX Runtime QNNExecutionProvider**. The steps include: + +1. **Split the Quantized Model** → Divide into three parts: + - **Embedding Layer** + - **Transformer Layers** + - **Language Model Head** +2. **Set Static Input Shapes**: + - **(1, 64)** for prefill (batch size, sequence length). + - **(1, 1)** for token generation. +3. **Compile using QNNExecutionProvider**: + - Leverages **weight sharing** across the prefill and token generation models. + +### **Usage** + +This workflow is configured using the `qnn_config.json` file. It contains all of the quantization and compilation steps. It requires two separate Python environments described below. + +#### A workable version + +- python=3.10 +- CUDA=12.1 +- cudnn=9.2.0 + +#### Quantization Python Environment Setup + +Quantization is resource-intensive and requires GPU acceleration. In an [x64 Python environment with Olive installed](https://github.com/microsoft/Olive/blob/main/examples/README.md#important), install the required packages: + +```bash +# Install common dependencies +pip install -r requirements.txt + +# Install ONNX Runtime GPU packages +pip install "onnxruntime-gpu>=1.21.0" "onnxruntime-genai-cuda>=0.6.0" + +# AutoGPTQ: Install from source (stable package may be slow for weight packing) +# Disable CUDA extension build (not required) +# Linux +export BUILD_CUDA_EXT=0 +# Windows +# set BUILD_CUDA_EXT=0 + +# Install AutoGPTQ from source +pip install --no-build-isolation git+https://github.com/PanQiWei/AutoGPTQ.git +``` + +> ⚠️ Only set up the environment and install the packages. Do not run the `olive run` command at this point. + +#### AOT Compilation Python Environment Setup + +Model compilation using QNN Execution Provider requires a Python environment with onnxruntime-qnn installed. In a separate Python environment with Olive installed, install the required packages: + +```bash +# Install ONNX Runtime QNN +pip install -r https://raw.githubusercontent.com/microsoft/onnxruntime/refs/heads/main/requirements.txt +pip install -U --pre --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple onnxruntime-qnn --no-deps +``` + +Replace `/path/to/qnn/env/bin` in `qnn_config.json` with the path to the directory containing your QNN environment's Python executable. This path can be found by running the following command in the environment: + +```bash +# Linux +command -v python +# Windows +# where python +``` + +This command will return the path to the Python executable. Set the parent directory of the executable as the `/path/to/qnn/env/bin` in the config file. + +#### **Run the Quantization + Compilation Config** + +Activate the **Quantization Python Environment** and run the workflow: + +```bash +olive run --config qnn_config.json +``` + +Olive will run the AOT compilation step in the **AOT Compilation Python Environment** specified in the config file using a subprocess. All other steps will run in the **Quantization Python Environment** natively. + +✅ Optimized model saved in: `./model` + +> ⚠️ If optimization fails due to out of memory, please remove `calibration_providers` in config file. + +> ⚠️ If optimization fails during context binary generation, rerun the command. The process will resume from the last completed step. + +### **Inference** + +The optimized model can be used for inference using ONNX Runtime QNNExecutionProvider and ONNX Runtime GenAI. **Inference must be run on a Windows Copilot+ PC with a Qualcomm NPU.** + +#### **Install Required Packages (arm64 Python)** +```bash +pip install -r https://raw.githubusercontent.com/microsoft/onnxruntime/refs/heads/main/requirements.txt +pip install -U --pre --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple onnxruntime-qnn --no-deps +pip install "onnxruntime-genai>=0.7.0rc2" +``` + +#### **Run Console-Based Chat Interface** +Execute the provided `inference_sample.ipynb` notebook. + +> ⚠️ If got 6033 error, replace `genai_config.json` in `./model` folder diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json new file mode 100644 index 00000000..e0e26360 --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json @@ -0,0 +1,46 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device":"cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device":"gpu", + "execution_providers": [ + "DmlExecutionProvider" + ] + } + ] + } + }, + "passes": { + "q": { + "type": "AutoAWQQuantizer" + }, + "mb": { + "type": "ModelBuilder", + "precision": "int4" + } + }, + "host": "host_system", + "target": "target_system", + "log_severity_level": 1, + "output_dir": "model/deepseek", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config new file mode 100644 index 00000000..5778ef75 --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config @@ -0,0 +1,48 @@ +{ + "name": "Convert to DirectML", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "mb" + }, + "isGPURequired": true, + "executeRuntimeFeatures": [ + "AutoAwq" + ], + "evaluationRuntimeFeatures": [ + "Nightly" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "DirectML" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "DmlExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json new file mode 100644 index 00000000..7cd11cf6 --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json @@ -0,0 +1,56 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "OpenVINOExecutionProvider" + ] + } + ] + } + }, + "passes": { + "optimum_convert": { + "type": "OpenVINOOptimumConversion", + "extra_args": { + "device": "npu" + }, + "ov_quant_config": { + "weight_format": "int4", + "group_size": 128, + "dataset": "wikitext2", + "ratio": 1, + "sym": true, + "trust_remote_code": true, + "awq": false, + "scale_estimation": false, + "sensitivity_metric": "weight_quantization_error", + "backup_precision": "int8_asym" + } + }, + "io_update": { + "type": "OpenVINOIoUpdate", + "static": false, + "reuse_cache": true + }, + "encapsulation": { + "type": "OpenVINOEncapsulation", + "target_device": "npu", + "keep_ov_dynamic_dims": true, + "ov_version": "2025.1", + "reuse_cache": true + } + }, + "search_strategy": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "evaluate_input_model": false, + "output_dir": "model/deepseek" +} diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json.config new file mode 100644 index 00000000..d39f9f91 --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json.config @@ -0,0 +1,153 @@ +{ + "name": "Convert to Intel CPU/NPU/GPU", + "oliveFile": "deepseek/openvino/DeepSeek-R1-Distill-Qwen-1.5B_context_ov_dynamic_sym_gs128_bkp_int8_sym_r1.json", + "isLLM": true, + "isIntel": true, + "debugInfo": { + "autoGenerated": true, + "useOpenVINOOptimumConversion": "optimum_convert" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "systems.local_system.accelerators.0.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "actions": [ + [ + { + "type": "delete", + "path": "passes.io_update.reuse_cache" + }, + { + "type": "delete", + "path": "passes.encapsulation.reuse_cache" + } + ], + [ + { + "type": "delete", + "path": "passes.io_update.reuse_cache" + }, + { + "type": "delete", + "path": "passes.encapsulation.reuse_cache" + } + ], + [ + { + "type": "delete", + "path": "passes.io_update.reuse_cache" + }, + { + "type": "delete", + "path": "passes.encapsulation.reuse_cache" + } + ] + ], + "readOnly": false + }, + "runtimeInConversion": { + "autoGenerated": true, + "name": "Convert/Quantize to", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "passes.optimum_convert.extra_args.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "actions": [ + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "cpu" + } + ], + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "gpu" + } + ], + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "npu" + } + ] + ] + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "passes.optimum_convert.ov_quant_config.dataset", + "values": [ + "wikitext2" + ], + "template": { + "path": "passes.optimum_convert.ov_quant_config.dataset", + "values": [ + "wikitext2" + ], + "template": "QuantizationDataset" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json new file mode 100644 index 00000000..616a0e74 --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json @@ -0,0 +1,132 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" + }, + "systems": { + "qnn_system": { + "type": "PythonEnvironment", + "python_environment_path": "/path/to/qnn/env/bin", + "accelerators": [ + { + "execution_providers": [ + "QNNExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "wikitext2_train", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "wikitext", + "subset": "wikitext-2-raw-v1", + "split": "train" + }, + "pre_process_data_config": { + "strategy": "line-by-line", + "add_special_tokens": false, + "max_samples": 128, + "max_seq_len": 512 + } + } + ], + "passes": { + "q": { + "type": "QuaRot" + }, + "g": { + "type": "GptqQuantizer", + "sym": true, + "group_size": -1 + }, + "cs": { + "type": "CaptureSplitInfo", + "num_splits": 4, + "unique_embeds_lm_head_splits": true + }, + "mb": { + "type": "ModelBuilder", + "precision": "int4", + "int4_block_size": 32, + "int4_accuracy_level": 4, + "int4_op_types_to_quantize": [ + "MatMul", + "Gather" + ], + "save_as_external_data": true + }, + "mq": { + "type": "MatMulNBitsToQDQ", + "use_int4": true, + "add_zero_point": true, + "nodes_to_exclude": [ + "/lm_head/MatMul_Q4" + ], + "save_as_external_data": true + }, + "gs": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "RemoveRopeMultiCache" + }, + { + "surgeon": "AttentionMaskToSequenceLengths" + }, + { + "surgeon": "SimplifiedLayerNormToL2Norm" + } + ], + "save_as_external_data": true + }, + "sq": { + "type": "OnnxStaticQuantization", + "data_config": "wikitext2_train", + "activation_type": "uint16", + "precision": "uint8", + "calibration_providers": [ + "CUDAExecutionProvider" + ], + "quant_preprocess": true, + "op_types_to_exclude": [ + "GatherBlockQuantized", + "GroupQueryAttention", + "MatMulNBits" + ], + "save_as_external_data": true + }, + "sp": { + "type": "SplitModel" + }, + "st": { + "type": "StaticLLM", + "batch_size": 1, + "context_length": 64 + }, + "cb": { + "type": "EPContextBinaryGenerator", + "provider_options": { + "htp_performance_mode": "burst", + "htp_graph_finalization_optimization_mode": "3", + "soc_model": "60" + }, + "session_options": { + "intra_op_num_threads": 2, + "inter_op_num_threads": 1 + }, + "weight_sharing": true + }, + "cp": { + "type": "ComposeOnnxModels" + } + }, + "target": "qnn_system", + "log_severity_level": 1, + "output_dir": "model/deepseek", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json.config new file mode 100644 index 00000000..032429d1 --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json.config @@ -0,0 +1,197 @@ +{ + "name": "Convert to Qualcomm NPU", + "oliveFile": "phi3_5/qnn_config.json", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "mb" + }, + "isQNNLLM": true, + "isGPURequired": true, + "runtimeOverwrite": { + "autoGenerated": true, + "pyEnvPath": "systems.qnn_system.python_environment_path", + "executeEp": "CUDAExecutionProvider", + "evaluateUsedInExecute": true + }, + "executeRuntimeFeatures": [ + "AutoGptq" + ], + "pyEnvRuntimeFeatures": [ + "Nightly" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Qualcomm NPU" + ], + "path": "systems.qnn_system.accelerators.0.execution_providers.0", + "values": [ + "QNNExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.sq.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.sq.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.sq.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.sq.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "wikitext" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "wikitext" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Subset", + "tags": [ + "QuantizationDatasetSubset", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "wikitext-103-raw-v1", + "wikitext-103-v1", + "wikitext-2-raw-v1", + "wikitext-2-v1" + ], + "template": { + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "wikitext-103-raw-v1", + "wikitext-103-v1", + "wikitext-2-raw-v1", + "wikitext-2-v1" + ], + "template": "QuantizationDatasetSubset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json new file mode 100644 index 00000000..e4e30711 --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json @@ -0,0 +1,134 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "wikitext2_train", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "wikitext", + "subset": "wikitext-2-raw-v1", + "split": "train" + }, + "pre_process_data_config": { + "strategy": "line-by-line", + "add_special_tokens": false, + "max_samples": 128, + "max_seq_len": 512 + } + } + ], + "passes": { + "q": { + "type": "QuaRot" + }, + "g": { + "type": "GptqQuantizer", + "sym": true, + "group_size": -1 + }, + "cs": { + "type": "CaptureSplitInfo", + "num_splits": 1, + "unique_embeds_lm_head_splits": true + }, + "mb": { + "type": "ModelBuilder", + "precision": "int4", + "int4_block_size": 32, + "int4_accuracy_level": 4, + "int4_op_types_to_quantize": [ + "MatMul", + "Gather" + ], + "save_as_external_data": true + }, + "mq": { + "type": "MatMulNBitsToQDQ", + "use_int4": true, + "add_zero_point": true, + "nodes_to_exclude": [ + "/lm_head/MatMul_Q4" + ], + "save_as_external_data": true + }, + "gs": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "RemoveRopeMultiCache" + }, + { + "surgeon": "AttentionMaskToSequenceLengths" + }, + { + "surgeon": "SimplifiedLayerNormToL2Norm" + } + ], + "save_as_external_data": true + }, + "sq": { + "type": "OnnxStaticQuantization", + "data_config": "wikitext2_train", + "activation_type": "uint16", + "precision": "uint8", + "calibration_providers": [ + "CUDAExecutionProvider" + ], + "quant_preprocess": true, + "op_types_to_exclude": [ + "GatherBlockQuantized", + "GroupQueryAttention", + "MatMulNBits" + ], + "save_as_external_data": true + }, + "addmetadata": { + "type": "VitisAIAddMetaData", + "config_meta_data_keys": [ + "architectures", + "model_type" + ], + "activation_type": "uint16", + "weight_type": "int4", + "quant_type": "QuaRot" + }, + "sp": { + "type": "SplitModel" + }, + "st": { + "type": "StaticLLM", + "batch_size": 1, + "context_length": 64, + "group_session_options": { + "log_id": "onnxruntime-genai", + "provider_options": [ + { + "VitisAI": {} + } + ], + "graph_optimization_level": "ORT_ENABLE_ALL" + } + } + }, + "target": "local_system", + "log_severity_level": 1, + "output_dir": "model/deepseek", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json.config new file mode 100644 index 00000000..f6624c83 --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json.config @@ -0,0 +1,191 @@ +{ + "name": "Convert to AMD NPU", + "oliveFile": "phi3_5/qdq_config_vitis_ai.json", + "isLLM": true, + "evalRuntime": "AMDNPU", + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "mb" + }, + "isGPURequired": true, + "runtimeOverwrite": { + "executeEp": "CUDAExecutionProvider" + }, + "executeRuntimeFeatures": [ + "AutoGptq" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "CPU" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.sq.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.sq.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.sq.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.sq.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "wikitext" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "wikitext" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Subset", + "tags": [ + "QuantizationDatasetSubset", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "wikitext-103-raw-v1", + "wikitext-103-v1", + "wikitext-2-raw-v1", + "wikitext-2-v1" + ], + "template": { + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "wikitext-103-raw-v1", + "wikitext-103-v1", + "wikitext-2-raw-v1", + "wikitext-2-v1" + ], + "template": "QuantizationDatasetSubset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_model.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_model.json new file mode 100644 index 00000000..cf831c1e --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_model.json @@ -0,0 +1,31 @@ +{ + "Name": "DeepSeek-R1-Distill-Qwen-1.5B", + "PromptTemplate": { + "assistant": "{Content}", + "prompt": "<|User|>{Content}<|Assistant|>" + }, + "ParameterSchema": { + "enabled": [ + { + "name": "max_tokens", + "default": 512 + }, + { + "name": "temperature", + "default": 0.6 + }, + { + "name": "top_p", + "default": 0.9 + }, + { + "name": "top_k", + "default": 5 + }, + { + "name": "random_seed", + "default": 5687 + } + ] + } +} diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_sample.ipynb b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_sample.ipynb new file mode 100644 index 00000000..67a72436 --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_sample.ipynb @@ -0,0 +1,131 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "text = 'Who is Isaac Newton?'\n", + "ExecutionProvider=\"QNNExecutionProvider\"\n", + "model_folder = \"./model\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime_genai as og\n", + "import json\n", + "import time\n", + "from pathlib import Path\n", + "\n", + "def get_session_options(obj):\n", + " if type(obj) is dict:\n", + " for k, v in obj.items():\n", + " if k == \"session_options\":\n", + " yield v\n", + " else:\n", + " for x in get_session_options(v):\n", + " yield x\n", + " elif type(obj) is list:\n", + " for v in obj:\n", + " for x in get_session_options(v):\n", + " yield x\n", + "\n", + "\n", + "def remove_provider_options(model_path):\n", + " genai_config_path = Path(model_path) / \"genai_config.json\"\n", + " data = json.loads(genai_config_path.read_text())\n", + " for session_option in get_session_options(data):\n", + " if 'provider_options' in session_option:\n", + " session_option['provider_options'] = [{k: dict() for k in opts.keys()} for opts in session_option['provider_options']]\n", + "\n", + " json.dump(data, genai_config_path.open(\"w\"), indent=4)\n", + "\n", + "if ExecutionProvider == \"QNNExecutionProvider\":\n", + " remove_provider_options(model_folder)\n", + "\n", + "# Load the base model and tokenizer\n", + "model = og.Model(model_folder)\n", + "tokenizer = og.Tokenizer(model)\n", + "tokenizer_stream = tokenizer.create_stream()\n", + "\n", + "# Set the max length to something sensible by default,\n", + "# since otherwise it will be set to the entire context length\n", + "search_options = {}\n", + "search_options[\"max_length\"] = 200\n", + "\n", + "chat_template = \"<|User|>{input}<|Assistant|>\"\n", + "\n", + "# Generate prompt (prompt template + input)\n", + "prompt = f\"{chat_template.format(input=text)}\"\n", + "\n", + "# Encode the prompt using the tokenizer\n", + "input_tokens = tokenizer.encode(prompt)\n", + "\n", + "# Create params and generator\n", + "params = og.GeneratorParams(model)\n", + "params.set_search_options(**search_options)\n", + "generator = og.Generator(model, params)\n", + "\n", + "# Append input tokens to the generator\n", + "generator.append_tokens(input_tokens)\n", + "\n", + "print(\"\")\n", + "print(\"Output: \", end=\"\", flush=True)\n", + "\n", + "token_times = []\n", + "\n", + "# Stream the output\n", + "while not generator.is_done():\n", + " start_time = time.time()\n", + " generator.generate_next_token()\n", + " end_time = time.time()\n", + " \n", + " # Record the time for this token generation\n", + " token_time = end_time - start_time\n", + " token_times.append(token_time)\n", + "\n", + " new_token = generator.get_next_tokens()[0]\n", + " print(tokenizer_stream.decode(new_token), end=\"\", flush=True)\n", + "\n", + "print()\n", + "\n", + "# Calculate and display timing statistics\n", + "if token_times:\n", + " total_tokens = len(token_times)\n", + " avg_time = sum(token_times) / total_tokens\n", + " \n", + " print(f\"Total tokens generated: {total_tokens}\")\n", + " print(f\"Average time per token: {avg_time:.4f} seconds\")\n", + " print(f\"Tokens per second: {total_tokens / sum(token_times):.2f}\")\n", + "\n", + "del generator\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml new file mode 100644 index 00000000..8e55a94a --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml @@ -0,0 +1,25 @@ +keywords: + aitk +arch: deepseek +recipes: + - file: "deepseek_qnn_config.json" + device: npu + ep: QNNExecutionProvider + - file: "deepseek_vitis_ai_config.json" + device: npu + ep: VitisAIExecutionProvider + - file: "deepseek_ov_config.json" + device: npu + ep: OpenVINOExecutionProvider + - file: "deepseek_dml_config.json" + device: gpu + ep: DmlExecutionProvider +aitk: + modelInfo: + id: "huggingface/Intel/bert-base-uncased-mrpc" + version: 1 + workflows: + - file: "deepseek_qnn_config.json" + - file: "deepseek_vitis_ai_config.json" + - file: "deepseek_ov_config.json" + - file: "deepseek_dml_config.json" diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config new file mode 100644 index 00000000..fa0d2dac --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config @@ -0,0 +1,24 @@ +{ + "workflows": [ + { + "file": "deepseek_qnn_config.json", + "templateName": "deepseek_qnn_config" + }, + { + "file": "deepseek_vitis_ai_config.json", + "templateName": "deepseek_vitis_ai_config" + }, + { + "file": "deepseek_ov_config.json", + "templateName": "deepseek_ov_config" + }, + { + "file": "deepseek_dml_config.json", + "templateName": "deepseek_dml_config" + } + ], + "modelInfo": { + "id": "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "version": 1 + } +} \ No newline at end of file diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt new file mode 100644 index 00000000..bca8ca03 --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt @@ -0,0 +1,3 @@ +# For a full requirements, see AITK +datasets +optimum diff --git a/intel-bert-base-uncased-mrpc/README.md b/intel-bert-base-uncased-mrpc/README.md index 1513fdb7..96ef34a8 100644 --- a/intel-bert-base-uncased-mrpc/README.md +++ b/intel-bert-base-uncased-mrpc/README.md @@ -1,2 +1,3 @@ -# BERT Optimization -This folder contains examples of BERT optimization using different workflows. \ No newline at end of file +# BERT Optimization + +This folder contains examples of BERT optimization using different workflows. diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_dml.json b/intel-bert-base-uncased-mrpc/aitk/bert_dml.json new file mode 100644 index 00000000..28eafa9e --- /dev/null +++ b/intel-bert-base-uncased-mrpc/aitk/bert_dml.json @@ -0,0 +1,131 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "Intel/bert-base-uncased-mrpc", + "task": "text-classification", + "load_kwargs": { + "attn_implementation": "eager" + } + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "DmlExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "glue_mrpc_eval", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "glue", + "subset": "mrpc", + "split": "validation" + }, + "pre_process_data_config": { + "max_length": 128, + "padding": "max_length", + "input_cols": [ + "sentence1", + "sentence2" + ], + "max_samples": 100 + }, + "dataloader_config": { + "batch_size": 1 + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "data_config": "glue_mrpc_eval", + "sub_types": [ + { + "name": "accuracy_score", + "priority": 1 + }, + { + "name": "f1_score" + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "glue_mrpc_eval", + "sub_types": [ + { + "name": "avg", + "priority": 2 + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "save_as_external_data": true + }, + "transformer_optimizer": { + "type": "OrtTransformersOptimization", + "model_type": "bert", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false, + "optimization_options": { + "enable_gelu": true, + "enable_layer_norm": true, + "enable_attention": true, + "use_multi_head_attention": true, + "enable_skip_layer_norm": false, + "enable_embed_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_bias_gelu": false, + "enable_gelu_approximation": false, + "enable_qordered_matmul": false, + "enable_shape_inference": true, + "enable_gemm_fast_gelu": false, + "enable_nhwc_conv": false, + "enable_group_norm": false, + "enable_bias_splitgelu": false, + "enable_packed_qkv": true, + "enable_packed_kv": true, + "enable_bias_add": false, + "enable_rotary_embeddings": true + }, + "save_as_external_data": true + } + }, + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "host": "host_system", + "target": "target_system", + "cache_dir": "cache", + "output_dir": "model/bert_dml" +} \ No newline at end of file diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_dml.json.config b/intel-bert-base-uncased-mrpc/aitk/bert_dml.json.config new file mode 100644 index 00000000..a0925b99 --- /dev/null +++ b/intel-bert-base-uncased-mrpc/aitk/bert_dml.json.config @@ -0,0 +1,105 @@ +{ + "name": "Convert to DirectML", + "evaluationRuntimeFeatures": [ + "Nightly" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "DirectML" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "DmlExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "glue" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "glue" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} \ No newline at end of file diff --git a/intel-bert-base-uncased-mrpc/aitk/info.yml b/intel-bert-base-uncased-mrpc/aitk/info.yml index 5fc6046d..09fc9a11 100644 --- a/intel-bert-base-uncased-mrpc/aitk/info.yml +++ b/intel-bert-base-uncased-mrpc/aitk/info.yml @@ -10,7 +10,7 @@ device: npu gpu arch: bert -files: +recipes: - file: "bert_qdq_qnn.json" device: npu ep: QNNExecutionProvider @@ -18,10 +18,13 @@ files: device: npu ep: VitisAIExecutionProvider - file: "bert_ov.json" - ep: VitisAIExecutionProvider + ep: OpenVINOExecutionProvider - file: "bert_trtrtx.json" device: gpu ep: NvTensorRTRTXExecutionProvider + - file: "bert_dml.json" + device: gpu + ep: DmlExecutionProvider aitk: modelInfo: id: "huggingface/Intel/bert-base-uncased-mrpc" @@ -31,3 +34,4 @@ aitk: - file: "bert_qdq_amd.json" - file: "bert_ov.json" - file: "bert_trtrtx.json" + - file: "bert_dml.json" diff --git a/intel-bert-base-uncased-mrpc/aitk/model_project.config b/intel-bert-base-uncased-mrpc/aitk/model_project.config new file mode 100644 index 00000000..a3df90e4 --- /dev/null +++ b/intel-bert-base-uncased-mrpc/aitk/model_project.config @@ -0,0 +1,28 @@ +{ + "workflows": [ + { + "file": "bert_qdq_qnn.json", + "templateName": "bert_qdq_qnn" + }, + { + "file": "bert_qdq_amd.json", + "templateName": "bert_qdq_amd" + }, + { + "file": "bert_ov.json", + "templateName": "bert_ov" + }, + { + "file": "bert_trtrtx.json", + "templateName": "bert_trtrtx" + }, + { + "file": "bert_dml.json", + "templateName": "bert_dml" + } + ], + "modelInfo": { + "id": "huggingface/Intel/bert-base-uncased-mrpc", + "version": 1 + } +} \ No newline at end of file From 2f2a5d12403474a54f378c3c26f15742a76a42e3 Mon Sep 17 00:00:00 2001 From: hualxie Date: Fri, 25 Jul 2025 16:59:17 +0800 Subject: [PATCH 02/15] add remainings --- Qwen-Qwen2.5-1.5B-Instruct/aitk/.gitignore | 5 + Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md | 160 ++++++++++ .../aitk/_copy.json.config | 144 +++++++++ .../aitk/inference_model.json | 31 ++ .../aitk/inference_sample.ipynb | 131 +++++++++ Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml | 20 ++ .../aitk/model_project.config | 24 ++ .../aitk/qwen2_5_dml_config.json | 46 +++ .../aitk/qwen2_5_dml_config.json.config | 48 +++ .../aitk/qwen2_5_ov_config.json | 56 ++++ .../aitk/qwen2_5_ov_config.json.config | 153 ++++++++++ .../aitk/qwen2_5_qnn_config.json | 132 +++++++++ .../aitk/qwen2_5_qnn_config.json.config | 197 +++++++++++++ .../aitk/qwen2_5_vitis_ai_config.json | 134 +++++++++ .../aitk/qwen2_5_vitis_ai_config.json.config | 191 ++++++++++++ .../aitk/requirements.txt | 2 + .../aitk/info.yml | 7 +- .../aitk/requirements.txt | 3 +- .../aitk/.gitignore | 5 + .../aitk/README.md | 22 ++ .../aitk/_copy.json.config | 18 ++ ...-multilingual-cased_context_ov_static.json | 97 +++++++ ...ingual-cased_context_ov_static.json.config | 182 ++++++++++++ .../bert-base-multilingual-cased_dml.json | 139 +++++++++ ...rt-base-multilingual-cased_dml.json.config | 126 ++++++++ .../bert-base-multilingual-cased_qdq_amd.json | 168 +++++++++++ ...ase-multilingual-cased_qdq_amd.json.config | 273 ++++++++++++++++++ .../bert-base-multilingual-cased_qdq_qnn.json | 163 +++++++++++ ...ase-multilingual-cased_qdq_qnn.json.config | 273 ++++++++++++++++++ .../bert-base-multilingual-cased_trtrtx.json | 128 ++++++++ ...base-multilingual-cased_trtrtx.json.config | 125 ++++++++ .../aitk/inference_sample.ipynb | 150 ++++++++++ .../aitk/info.yml | 23 ++ .../aitk/model_project.config | 28 ++ .../aitk/requirements.txt | 5 + .../aitk/user_script.py | 83 ++++++ google-vit-base-patch16-224/aitk/.gitignore | 5 + google-vit-base-patch16-224/aitk/README.md | 14 + .../aitk/_copy.json.config | 42 +++ .../aitk/inference_sample.ipynb | 209 ++++++++++++++ google-vit-base-patch16-224/aitk/info.yml | 23 ++ .../aitk/model_project.config | 28 ++ .../aitk/requirements.txt | 5 + .../aitk/vit-base-patch16-224.py | 100 +++++++ .../aitk/vit-base-patch16-224_dml.json | 143 +++++++++ .../aitk/vit-base-patch16-224_dml.json.config | 107 +++++++ ...ase-patch16-224_dml_inference_sample.ipynb | 209 ++++++++++++++ .../aitk/vit-base-patch16-224_qdq_amd.json | 157 ++++++++++ .../vit-base-patch16-224_qdq_amd.json.config | 238 +++++++++++++++ .../aitk/vit-base-patch16-224_qdq_qnn.json | 151 ++++++++++ .../vit-base-patch16-224_qdq_qnn.json.config | 235 +++++++++++++++ .../aitk/vit-base-patch16-224_trtrtx.json | 113 ++++++++ .../vit-base-patch16-224_trtrtx.json.config | 106 +++++++ ...-patch16-224_trtrtx_inference_sample.ipynb | 209 ++++++++++++++ ...it_base_patch16_224_context_ov_static.json | 144 +++++++++ ..._patch16_224_context_ov_static.json.config | 217 ++++++++++++++ .../aitk/requirements.txt | 3 +- .../aitk/.gitignore | 5 + .../aitk/README.md | 48 +++ .../aitk/_copy.json.config | 224 ++++++++++++++ .../aitk/clip_script.py | 151 ++++++++++ .../aitk/info.yml | 26 ++ .../aitk/laion_clip_dml.json | 192 ++++++++++++ .../aitk/laion_clip_dml.json.config | 87 ++++++ .../laion_clip_dml_inference_sample.ipynb | 90 ++++++ .../aitk/laion_clip_ov.json | 125 ++++++++ .../aitk/laion_clip_ov.json.config | 174 +++++++++++ .../aitk/laion_clip_ov.py | 124 ++++++++ .../aitk/laion_clip_ov_inference_sample.ipynb | 84 ++++++ .../aitk/laion_clip_qdq_amd.json | 209 ++++++++++++++ .../aitk/laion_clip_qdq_amd.json.config | 195 +++++++++++++ .../laion_clip_qdq_amd_inference_sample.ipynb | 84 ++++++ .../aitk/laion_clip_text_qnn.json | 193 +++++++++++++ .../aitk/laion_clip_text_qnn.json.config | 235 +++++++++++++++ ...laion_clip_text_qnn_inference_sample.ipynb | 141 +++++++++ .../aitk/laion_clip_trtrtx.json | 173 +++++++++++ .../aitk/laion_clip_trtrtx.json.config | 86 ++++++ .../laion_clip_trtrtx_inference_sample.ipynb | 90 ++++++ .../aitk/laion_clip_vision_qnn.json | 186 ++++++++++++ .../aitk/laion_clip_vision_qnn.json.config | 237 +++++++++++++++ ...ion_clip_vision_qnn_inference_sample.ipynb | 170 +++++++++++ .../aitk/model_project.config | 32 ++ .../aitk/requirements.txt | 7 + .../aitk/user_script.py | 64 ++++ .../aitk/.gitignore | 5 + .../aitk/README.md | 160 ++++++++++ .../aitk/_copy.json.config | 160 ++++++++++ .../aitk/inference_model.json | 31 ++ .../aitk/inference_sample.ipynb | 131 +++++++++ .../aitk/info.yml | 20 ++ .../aitk/llama3_2_dml_config.json | 46 +++ .../aitk/llama3_2_dml_config.json.config | 48 +++ .../aitk/llama3_2_ov_config.json | 56 ++++ .../aitk/llama3_2_ov_config.json.config | 153 ++++++++++ .../aitk/llama3_2_qnn_config.json | 132 +++++++++ .../aitk/llama3_2_qnn_config.json.config | 197 +++++++++++++ .../aitk/llama3_2_vitis_ai_config.json | 134 +++++++++ .../aitk/llama3_2_vitis_ai_config.json.config | 191 ++++++++++++ .../aitk/model_project.config | 24 ++ .../aitk/requirements.txt | 2 + .../aitk/.gitignore | 5 + .../aitk/README.md | 160 ++++++++++ .../aitk/_copy.json.config | 140 +++++++++ .../aitk/inference_model.json | 31 ++ .../aitk/inference_sample.ipynb | 131 +++++++++ microsoft-Phi-3.5-mini-instruct/aitk/info.yml | 20 ++ .../aitk/model_project.config | 24 ++ .../aitk/phi3_5_dml_config.json | 46 +++ .../aitk/phi3_5_dml_config.json.config | 48 +++ .../aitk/phi3_5_ov_config.json | 56 ++++ .../aitk/phi3_5_ov_config.json.config | 153 ++++++++++ .../aitk/phi3_5_qnn_config.json | 132 +++++++++ .../aitk/phi3_5_qnn_config.json.config | 197 +++++++++++++ .../aitk/phi3_5_vitis_ai_config.json | 134 +++++++++ .../aitk/phi3_5_vitis_ai_config.json.config | 191 ++++++++++++ .../aitk/requirements.txt | 2 + .../aitk/.gitignore | 5 + microsoft-Phi-4-mini-reasoning/aitk/README.md | 6 + .../aitk/_copy.json.config | 42 +++ .../aitk/inference_model.json | 31 ++ .../aitk/inference_sample.ipynb | 131 +++++++++ microsoft-Phi-4-mini-reasoning/aitk/info.yml | 11 + .../aitk/model_project.config | 12 + .../aitk/phi4_ov_config.json | 55 ++++ .../aitk/phi4_ov_config.json.config | 156 ++++++++++ .../aitk/requirements.txt | 1 + microsoft-resnet-50/aitk/.gitignore | 5 + microsoft-resnet-50/aitk/README.md | 21 ++ microsoft-resnet-50/aitk/_copy.json.config | 28 ++ microsoft-resnet-50/aitk/imagenet.py | 105 +++++++ .../aitk/inference_sample.ipynb | 128 ++++++++ microsoft-resnet-50/aitk/info.yml | 23 ++ microsoft-resnet-50/aitk/model_project.config | 28 ++ microsoft-resnet-50/aitk/requirements.txt | 4 + .../aitk/resnet_context_ov_static.json | 139 +++++++++ .../aitk/resnet_context_ov_static.json.config | 261 +++++++++++++++++ microsoft-resnet-50/aitk/resnet_dml.json | 121 ++++++++ .../aitk/resnet_dml.json.config | 107 +++++++ .../aitk/resnet_dml_inference_sample.ipynb | 121 ++++++++ microsoft-resnet-50/aitk/resnet_qdq_amd.json | 147 ++++++++++ .../aitk/resnet_qdq_amd.json.config | 239 +++++++++++++++ microsoft-resnet-50/aitk/resnet_qdq_qnn.json | 132 +++++++++ .../aitk/resnet_qdq_qnn.json.config | 237 +++++++++++++++ microsoft-resnet-50/aitk/resnet_trtrtx.json | 110 +++++++ .../aitk/resnet_trtrtx.json.config | 106 +++++++ .../aitk/resnet_trtrtx_inference_sample.ipynb | 121 ++++++++ .../aitk/.gitignore | 5 + .../aitk/README.md | 7 + .../aitk/inference_sample.ipynb | 112 +++++++ .../aitk/info.yml | 11 + .../aitk/mistral-7b-instruct-v0.3-ov.json | 34 +++ .../mistral-7b-instruct-v0.3-ov.json.config | 67 +++++ .../aitk/model_project.config | 12 + openai-clip-vit-base-patch16/aitk/.gitignore | 5 + openai-clip-vit-base-patch16/aitk/README.md | 48 +++ .../aitk/_copy.json.config | 28 ++ .../aitk/clip_script.py | 151 ++++++++++ openai-clip-vit-base-patch16/aitk/info.yml | 26 ++ .../aitk/model_project.config | 32 ++ .../aitk/openai_clip_dml.json | 192 ++++++++++++ .../aitk/openai_clip_dml.json.config | 87 ++++++ .../openai_clip_dml_inference_sample.ipynb | 90 ++++++ .../aitk/openai_clip_ov.json | 125 ++++++++ .../aitk/openai_clip_ov.json.config | 174 +++++++++++ .../aitk/openai_clip_ov.py | 124 ++++++++ .../openai_clip_ov_inference_sample.ipynb | 84 ++++++ .../aitk/openai_clip_qdq_amd.json | 209 ++++++++++++++ .../aitk/openai_clip_qdq_amd.json.config | 195 +++++++++++++ ...openai_clip_qdq_amd_inference_sample.ipynb | 84 ++++++ .../aitk/openai_clip_text_qnn.json | 193 +++++++++++++ .../aitk/openai_clip_text_qnn.json.config | 235 +++++++++++++++ ...penai_clip_text_qnn_inference_sample.ipynb | 141 +++++++++ .../aitk/openai_clip_trtrtx.json | 173 +++++++++++ .../aitk/openai_clip_trtrtx.json.config | 86 ++++++ .../openai_clip_trtrtx_inference_sample.ipynb | 90 ++++++ .../aitk/openai_clip_vision_qnn.json | 186 ++++++++++++ .../aitk/openai_clip_vision_qnn.json.config | 237 +++++++++++++++ ...nai_clip_vision_qnn_inference_sample.ipynb | 170 +++++++++++ .../aitk/requirements.txt | 5 + .../aitk/user_script.py | 64 ++++ openai-clip-vit-base-patch32/aitk/.gitignore | 5 + openai-clip-vit-base-patch32/aitk/README.md | 48 +++ .../aitk/_copy.json.config | 206 +++++++++++++ .../aitk/clip_script.py | 151 ++++++++++ openai-clip-vit-base-patch32/aitk/info.yml | 26 ++ .../aitk/model_project.config | 32 ++ .../aitk/openai_clip_dml.json | 192 ++++++++++++ .../aitk/openai_clip_dml.json.config | 87 ++++++ .../openai_clip_dml_inference_sample.ipynb | 90 ++++++ .../aitk/openai_clip_ov.json | 125 ++++++++ .../aitk/openai_clip_ov.json.config | 174 +++++++++++ .../aitk/openai_clip_ov.py | 124 ++++++++ .../openai_clip_ov_inference_sample.ipynb | 84 ++++++ .../aitk/openai_clip_qdq_amd.json | 209 ++++++++++++++ .../aitk/openai_clip_qdq_amd.json.config | 195 +++++++++++++ ...openai_clip_qdq_amd_inference_sample.ipynb | 84 ++++++ .../aitk/openai_clip_text_qnn.json | 193 +++++++++++++ .../aitk/openai_clip_text_qnn.json.config | 235 +++++++++++++++ ...penai_clip_text_qnn_inference_sample.ipynb | 141 +++++++++ .../aitk/openai_clip_trtrtx.json | 173 +++++++++++ .../aitk/openai_clip_trtrtx.json.config | 86 ++++++ .../openai_clip_trtrtx_inference_sample.ipynb | 90 ++++++ .../aitk/openai_clip_vision_qnn.json | 186 ++++++++++++ .../aitk/openai_clip_vision_qnn.json.config | 237 +++++++++++++++ ...nai_clip_vision_qnn_inference_sample.ipynb | 170 +++++++++++ .../aitk/requirements.txt | 5 + .../aitk/user_script.py | 64 ++++ 207 files changed, 21861 insertions(+), 8 deletions(-) create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/.gitignore create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/inference_model.json create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/inference_sample.ipynb create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_dml_config.json create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_dml_config.json.config create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_config.json create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_config.json.config create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json.config create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/requirements.txt create mode 100644 google-bert-bert-base-multilingual-cased/aitk/.gitignore create mode 100644 google-bert-bert-base-multilingual-cased/aitk/README.md create mode 100644 google-bert-bert-base-multilingual-cased/aitk/_copy.json.config create mode 100644 google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json create mode 100644 google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json.config create mode 100644 google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_dml.json create mode 100644 google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_dml.json.config create mode 100644 google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_amd.json create mode 100644 google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_amd.json.config create mode 100644 google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_qnn.json create mode 100644 google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_qnn.json.config create mode 100644 google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_trtrtx.json create mode 100644 google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_trtrtx.json.config create mode 100644 google-bert-bert-base-multilingual-cased/aitk/inference_sample.ipynb create mode 100644 google-bert-bert-base-multilingual-cased/aitk/info.yml create mode 100644 google-bert-bert-base-multilingual-cased/aitk/model_project.config create mode 100644 google-bert-bert-base-multilingual-cased/aitk/requirements.txt create mode 100644 google-bert-bert-base-multilingual-cased/aitk/user_script.py create mode 100644 google-vit-base-patch16-224/aitk/.gitignore create mode 100644 google-vit-base-patch16-224/aitk/README.md create mode 100644 google-vit-base-patch16-224/aitk/_copy.json.config create mode 100644 google-vit-base-patch16-224/aitk/inference_sample.ipynb create mode 100644 google-vit-base-patch16-224/aitk/info.yml create mode 100644 google-vit-base-patch16-224/aitk/model_project.config create mode 100644 google-vit-base-patch16-224/aitk/requirements.txt create mode 100644 google-vit-base-patch16-224/aitk/vit-base-patch16-224.py create mode 100644 google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json create mode 100644 google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json.config create mode 100644 google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb create mode 100644 google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json create mode 100644 google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json.config create mode 100644 google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json create mode 100644 google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json.config create mode 100644 google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json create mode 100644 google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json.config create mode 100644 google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb create mode 100644 google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json create mode 100644 google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json.config create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/.gitignore create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/clip_script.py create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml.json create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml.json.config create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml_inference_sample.ipynb create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json.config create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.py create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd.json create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd.json.config create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn.json create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn.json.config create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx.json create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx.json.config create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx_inference_sample.ipynb create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn.json create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn.json.config create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/requirements.txt create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/user_script.py create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/.gitignore create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/README.md create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/inference_model.json create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/inference_sample.ipynb create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_dml_config.json create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_dml_config.json.config create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json.config create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json.config create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json.config create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/requirements.txt create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/.gitignore create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/README.md create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/inference_model.json create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/inference_sample.ipynb create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/info.yml create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/model_project.config create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_dml_config.json create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_dml_config.json.config create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_config.json create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_config.json.config create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json.config create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json.config create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/requirements.txt create mode 100644 microsoft-Phi-4-mini-reasoning/aitk/.gitignore create mode 100644 microsoft-Phi-4-mini-reasoning/aitk/README.md create mode 100644 microsoft-Phi-4-mini-reasoning/aitk/_copy.json.config create mode 100644 microsoft-Phi-4-mini-reasoning/aitk/inference_model.json create mode 100644 microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb create mode 100644 microsoft-Phi-4-mini-reasoning/aitk/info.yml create mode 100644 microsoft-Phi-4-mini-reasoning/aitk/model_project.config create mode 100644 microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_config.json create mode 100644 microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_config.json.config create mode 100644 microsoft-Phi-4-mini-reasoning/aitk/requirements.txt create mode 100644 microsoft-resnet-50/aitk/.gitignore create mode 100644 microsoft-resnet-50/aitk/README.md create mode 100644 microsoft-resnet-50/aitk/_copy.json.config create mode 100644 microsoft-resnet-50/aitk/imagenet.py create mode 100644 microsoft-resnet-50/aitk/inference_sample.ipynb create mode 100644 microsoft-resnet-50/aitk/info.yml create mode 100644 microsoft-resnet-50/aitk/model_project.config create mode 100644 microsoft-resnet-50/aitk/requirements.txt create mode 100644 microsoft-resnet-50/aitk/resnet_context_ov_static.json create mode 100644 microsoft-resnet-50/aitk/resnet_context_ov_static.json.config create mode 100644 microsoft-resnet-50/aitk/resnet_dml.json create mode 100644 microsoft-resnet-50/aitk/resnet_dml.json.config create mode 100644 microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb create mode 100644 microsoft-resnet-50/aitk/resnet_qdq_amd.json create mode 100644 microsoft-resnet-50/aitk/resnet_qdq_amd.json.config create mode 100644 microsoft-resnet-50/aitk/resnet_qdq_qnn.json create mode 100644 microsoft-resnet-50/aitk/resnet_qdq_qnn.json.config create mode 100644 microsoft-resnet-50/aitk/resnet_trtrtx.json create mode 100644 microsoft-resnet-50/aitk/resnet_trtrtx.json.config create mode 100644 microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb create mode 100644 mistralai-Mistral-7B-Instruct-v0.3/aitk/.gitignore create mode 100644 mistralai-Mistral-7B-Instruct-v0.3/aitk/README.md create mode 100644 mistralai-Mistral-7B-Instruct-v0.3/aitk/inference_sample.ipynb create mode 100644 mistralai-Mistral-7B-Instruct-v0.3/aitk/info.yml create mode 100644 mistralai-Mistral-7B-Instruct-v0.3/aitk/mistral-7b-instruct-v0.3-ov.json create mode 100644 mistralai-Mistral-7B-Instruct-v0.3/aitk/mistral-7b-instruct-v0.3-ov.json.config create mode 100644 mistralai-Mistral-7B-Instruct-v0.3/aitk/model_project.config create mode 100644 openai-clip-vit-base-patch16/aitk/.gitignore create mode 100644 openai-clip-vit-base-patch16/aitk/README.md create mode 100644 openai-clip-vit-base-patch16/aitk/_copy.json.config create mode 100644 openai-clip-vit-base-patch16/aitk/clip_script.py create mode 100644 openai-clip-vit-base-patch16/aitk/info.yml create mode 100644 openai-clip-vit-base-patch16/aitk/model_project.config create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_dml.json create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_dml.json.config create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_dml_inference_sample.ipynb create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_ov.json create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_ov.json.config create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_ov.py create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json.config create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn.json create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn.json.config create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json.config create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx_inference_sample.ipynb create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json.config create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb create mode 100644 openai-clip-vit-base-patch16/aitk/requirements.txt create mode 100644 openai-clip-vit-base-patch16/aitk/user_script.py create mode 100644 openai-clip-vit-base-patch32/aitk/.gitignore create mode 100644 openai-clip-vit-base-patch32/aitk/README.md create mode 100644 openai-clip-vit-base-patch32/aitk/_copy.json.config create mode 100644 openai-clip-vit-base-patch32/aitk/clip_script.py create mode 100644 openai-clip-vit-base-patch32/aitk/info.yml create mode 100644 openai-clip-vit-base-patch32/aitk/model_project.config create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_dml.json create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_dml.json.config create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_dml_inference_sample.ipynb create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_ov.json create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_ov.json.config create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_ov.py create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json.config create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn.json create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn.json.config create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json.config create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx_inference_sample.ipynb create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn.json create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn.json.config create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb create mode 100644 openai-clip-vit-base-patch32/aitk/requirements.txt create mode 100644 openai-clip-vit-base-patch32/aitk/user_script.py diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/.gitignore b/Qwen-Qwen2.5-1.5B-Instruct/aitk/.gitignore new file mode 100644 index 00000000..48c03882 --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/.gitignore @@ -0,0 +1,5 @@ +__pycache__ +/cache +/history/*/* +!/history/*/history.config +!/history/*/olive_config.json diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md b/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md new file mode 100644 index 00000000..bd256350 --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md @@ -0,0 +1,160 @@ +# Qwen2.5-1.5B-Instruct Model Optimization + +This repository demonstrates the optimization of the [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) model using **post-training quantization (PTQ)** techniques. The optimization process is divided into three main workflows: + +- QDQ for AMD NPU +- PTQ + AOT for QNN NPU + + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs** +- OpenVINO for Intel NPU + + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` + +## **QDQ Model with 4-bit Weights & 16-bit Activations** + +This workflow produces an ONNX QDQ model that is agnostic to the target hardware and accelerator, making it suitable for general inference. + +### **Optimization Process** + +The model is optimized using **weight-only quantization** and **activation quantization** for efficient deployment. The process includes: + +1. **Weight Rotation ([QuaRot](https://arxiv.org/abs/2404.00456))** + - Reduces outliers from weights and hidden states to enhance quantization efficiency. + +2. **4-bit Per-Channel Symmetric Quantization ([GPTQ](https://arxiv.org/abs/2210.17323))** + - Reduces transformer layer size while preserving accuracy. + +3. **ONNX Graph Capture** + - Exports the model to ONNX for further optimization. + +4. **4-bit Block-wise Quantization** + - Applies weight-only quantization to the **embedding layer** and **language modeling head**. + +5. **16-bit Activation Quantization** + - Uses 16-bit activations to balance precision and efficiency. + +The final output is a **QDQ model** with **4-bit weights** and **16-bit activations**. This model also leverages [GroupQueryAttention (GQA)](https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md#com.microsoft.GroupQueryAttention) for efficient long-context processing and long-sequence generation. + +### **Handling Dynamic and Static Input Shapes** + +NPUs require **precompiled graphs**, meaning the model must use **static input shapes**. However, **text generation** involves two distinct processing stages: + +- **Prefill (Prompt Processing)**: Processes multiple tokens simultaneously. +- **Token Generation (Iteration)**: Processes one token at a time. + +To support both efficiently, we create **two model instances**: +1. **Prefill model**: Optimized for batch processing. +2. **Token generation model**: Optimized for one-token-at-a-time inference. + +## **PTQ + AOT Compilation for Qualcomm NPUs using QNN EP** + +This process extends the [**QDQ Model with 4-bit Weights & 16-bit Activations**](#qdq-model-with-4-bit-weights--16-bit-activations) by compiling it specifically for **Qualcomm NPUs** using the **QNN Execution Provider**. + +### **Resource Optimization Strategy** + +To maximize efficiency while supporting dynamic input handling: + +- **Embedding Layer & Language Model Head** → Executed on CPU (handles dynamic input). +- **Transformer Layers** → Executed on NPU (requires static input shapes). +- **Weight Sharing** → Prefill & token generation models reuse weights to minimize memory usage. + +> ⚠️ **Note:** GQA is an ONNX Runtime *contrib operator* and must be executed on the CPU. The model graph is partitioned into **CPU (GQA nodes)** and **NPU (other nodes)** for execution. + +### **Compilation for Qualcomm NPU Deployment** + +Once optimized, the model is compiled for Qualcomm NPUs using **ONNX Runtime QNNExecutionProvider**. The steps include: + +1. **Split the Quantized Model** → Divide into three parts: + - **Embedding Layer** + - **Transformer Layers** + - **Language Model Head** +2. **Set Static Input Shapes**: + - **(1, 64)** for prefill (batch size, sequence length). + - **(1, 1)** for token generation. +3. **Compile using QNNExecutionProvider**: + - Leverages **weight sharing** across the prefill and token generation models. + +### **Usage** + +This workflow is configured using the `qnn_config.json` file. It contains all of the quantization and compilation steps. It requires two separate Python environments described below. + +#### A workable version + +- python=3.10 +- CUDA=12.1 +- cudnn=9.2.0 + +#### Quantization Python Environment Setup + +Quantization is resource-intensive and requires GPU acceleration. In an [x64 Python environment with Olive installed](https://github.com/microsoft/Olive/blob/main/examples/README.md#important), install the required packages: + +```bash +# Install common dependencies +pip install -r requirements.txt + +# Install ONNX Runtime GPU packages +pip install "onnxruntime-gpu>=1.21.0" "onnxruntime-genai-cuda>=0.6.0" + +# AutoGPTQ: Install from source (stable package may be slow for weight packing) +# Disable CUDA extension build (not required) +# Linux +export BUILD_CUDA_EXT=0 +# Windows +# set BUILD_CUDA_EXT=0 + +# Install AutoGPTQ from source +pip install --no-build-isolation git+https://github.com/PanQiWei/AutoGPTQ.git +``` + +> ⚠️ Only set up the environment and install the packages. Do not run the `olive run` command at this point. + +#### AOT Compilation Python Environment Setup + +Model compilation using QNN Execution Provider requires a Python environment with onnxruntime-qnn installed. In a separate Python environment with Olive installed, install the required packages: + +```bash +# Install ONNX Runtime QNN +pip install -r https://raw.githubusercontent.com/microsoft/onnxruntime/refs/heads/main/requirements.txt +pip install -U --pre --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple onnxruntime-qnn --no-deps +``` + +Replace `/path/to/qnn/env/bin` in `qnn_config.json` with the path to the directory containing your QNN environment's Python executable. This path can be found by running the following command in the environment: + +```bash +# Linux +command -v python +# Windows +# where python +``` + +This command will return the path to the Python executable. Set the parent directory of the executable as the `/path/to/qnn/env/bin` in the config file. + +#### **Run the Quantization + Compilation Config** + +Activate the **Quantization Python Environment** and run the workflow: + +```bash +olive run --config qnn_config.json +``` + +Olive will run the AOT compilation step in the **AOT Compilation Python Environment** specified in the config file using a subprocess. All other steps will run in the **Quantization Python Environment** natively. + +✅ Optimized model saved in: `./model` + +> ⚠️ If optimization fails due to out of memory, please remove `calibration_providers` in config file. + +> ⚠️ If optimization fails during context binary generation, rerun the command. The process will resume from the last completed step. + +### **Inference** + +The optimized model can be used for inference using ONNX Runtime QNNExecutionProvider and ONNX Runtime GenAI. **Inference must be run on a Windows Copilot+ PC with a Qualcomm NPU.** + +#### **Install Required Packages (arm64 Python)** +```bash +pip install -r https://raw.githubusercontent.com/microsoft/onnxruntime/refs/heads/main/requirements.txt +pip install -U --pre --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple onnxruntime-qnn --no-deps +pip install "onnxruntime-genai>=0.7.0rc2" +``` + +#### **Run Console-Based Chat Interface** +Execute the provided `inference_sample.ipynb` notebook. + + diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config new file mode 100644 index 00000000..c28c58db --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config @@ -0,0 +1,144 @@ +{ + "copies": [ + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/model_project.config", + "dst": "model_project.config", + "replacements": [ + { + "find": "deepseek_qnn_config", + "replace": "qwen2_5_qnn_config" + }, + { + "find": "deepseek_vitis_ai_config", + "replace": "qwen2_5_vitis_ai_config" + }, + { + "find": "deepseek_ov_config", + "replace": "qwen2_5_ov_config" + }, + { + "find": "deepseek_dml_config", + "replace": "qwen2_5_dml_config" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_qnn_config.json", + "dst": "qwen2_5_qnn_config.json", + "replacements": [ + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "Qwen/Qwen2.5-1.5B-Instruct" + }, + { + "find": "model/deepseek", + "replace": "model/qwen2_5" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_qnn_config.json.config", + "dst": "qwen2_5_qnn_config.json.config", + "replacements": [ + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_vitis_ai_config.json", + "dst": "qwen2_5_vitis_ai_config.json", + "replacements": [ + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "Qwen/Qwen2.5-1.5B-Instruct" + }, + { + "find": "model/deepseek", + "replace": "model/qwen2_5" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_vitis_ai_config.json.config", + "dst": "qwen2_5_vitis_ai_config.json.config", + "replacements": [ + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_ov_config.json", + "dst": "qwen2_5_ov_config.json", + "replacements": [ + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "Qwen/Qwen2.5-1.5B-Instruct" + }, + { + "find": "model/deepseek", + "replace": "model/qwen2_5" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_ov_config.json.config", + "dst": "qwen2_5_ov_config.json.config", + "replacements": [ + { + "find": "deepseek/openvino/DeepSeek-R1-Distill-Qwen-1.5B_context_ov_dynamic_sym_gs128_bkp_int8_sym_r1.json", + "replace": "qwen2_5/openvino/Qwen2.5-1.5B-instruct_context_ov_dynamic_sym_bkp_int8_sym_r1.json" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_dml_config.json", + "dst": "qwen2_5_dml_config.json", + "replacements": [ + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "Qwen/Qwen2.5-1.5B-Instruct" + }, + { + "find": "model/deepseek", + "replace": "model/qwen2_5" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_dml_config.json.config", + "dst": "qwen2_5_dml_config.json.config", + "replacements": [ + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/README.md", + "dst": "README.md", + "replacements": [ + { + "find": "# DeepSeek-R1-Distill-Qwen-1.5B Model Optimization", + "replace": "# Qwen2.5-1.5B-Instruct Model Optimization" + }, + { + "find": "[DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B)", + "replace": "[Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct)" + }, + { + "find": "> ⚠️ If got 6033 error, replace `genai_config.json` in `./model` folder", + "replace": "" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/requirements.txt", + "dst": "requirements.txt", + "replacements": [ + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/inference_sample.ipynb", + "dst": "inference_sample.ipynb", + "replacements": [ + { + "find": "<|User|>{input}<|Assistant|>", + "replace": "<|im_start|>user\\\\n{input}<|im_end|>\\\\n<|im_start|>assistant\\\\n" + } + ] + } + ] +} diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/inference_model.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/inference_model.json new file mode 100644 index 00000000..7a3bb4a0 --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/inference_model.json @@ -0,0 +1,31 @@ +{ + "Name": "Qwen2.5-1.5B-Instruct", + "PromptTemplate": { + "assistant": "{Content}", + "prompt": "<|im_start|>user\n{Content}<|im_end|>\n<|im_start|>assistant\n" + }, + "ParameterSchema": { + "enabled": [ + { + "name": "max_tokens", + "default": 512 + }, + { + "name": "temperature", + "default": 0.6 + }, + { + "name": "top_p", + "default": 0.95 + }, + { + "name": "top_k", + "default": 5 + }, + { + "name": "random_seed", + "default": 3328 + } + ] + } +} diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/inference_sample.ipynb b/Qwen-Qwen2.5-1.5B-Instruct/aitk/inference_sample.ipynb new file mode 100644 index 00000000..7757249e --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/inference_sample.ipynb @@ -0,0 +1,131 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "text = 'Who is Isaac Newton?'\n", + "ExecutionProvider=\"QNNExecutionProvider\"\n", + "model_folder = \"./model\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime_genai as og\n", + "import json\n", + "import time\n", + "from pathlib import Path\n", + "\n", + "def get_session_options(obj):\n", + " if type(obj) is dict:\n", + " for k, v in obj.items():\n", + " if k == \"session_options\":\n", + " yield v\n", + " else:\n", + " for x in get_session_options(v):\n", + " yield x\n", + " elif type(obj) is list:\n", + " for v in obj:\n", + " for x in get_session_options(v):\n", + " yield x\n", + "\n", + "\n", + "def remove_provider_options(model_path):\n", + " genai_config_path = Path(model_path) / \"genai_config.json\"\n", + " data = json.loads(genai_config_path.read_text())\n", + " for session_option in get_session_options(data):\n", + " if 'provider_options' in session_option:\n", + " session_option['provider_options'] = [{k: dict() for k in opts.keys()} for opts in session_option['provider_options']]\n", + "\n", + " json.dump(data, genai_config_path.open(\"w\"), indent=4)\n", + "\n", + "if ExecutionProvider == \"QNNExecutionProvider\":\n", + " remove_provider_options(model_folder)\n", + "\n", + "# Load the base model and tokenizer\n", + "model = og.Model(model_folder)\n", + "tokenizer = og.Tokenizer(model)\n", + "tokenizer_stream = tokenizer.create_stream()\n", + "\n", + "# Set the max length to something sensible by default,\n", + "# since otherwise it will be set to the entire context length\n", + "search_options = {}\n", + "search_options[\"max_length\"] = 200\n", + "\n", + "chat_template = \"<|im_start|>user\\n{input}<|im_end|>\\n<|im_start|>assistant\\n\"\n", + "\n", + "# Generate prompt (prompt template + input)\n", + "prompt = f\"{chat_template.format(input=text)}\"\n", + "\n", + "# Encode the prompt using the tokenizer\n", + "input_tokens = tokenizer.encode(prompt)\n", + "\n", + "# Create params and generator\n", + "params = og.GeneratorParams(model)\n", + "params.set_search_options(**search_options)\n", + "generator = og.Generator(model, params)\n", + "\n", + "# Append input tokens to the generator\n", + "generator.append_tokens(input_tokens)\n", + "\n", + "print(\"\")\n", + "print(\"Output: \", end=\"\", flush=True)\n", + "\n", + "token_times = []\n", + "\n", + "# Stream the output\n", + "while not generator.is_done():\n", + " start_time = time.time()\n", + " generator.generate_next_token()\n", + " end_time = time.time()\n", + " \n", + " # Record the time for this token generation\n", + " token_time = end_time - start_time\n", + " token_times.append(token_time)\n", + "\n", + " new_token = generator.get_next_tokens()[0]\n", + " print(tokenizer_stream.decode(new_token), end=\"\", flush=True)\n", + "\n", + "print()\n", + "\n", + "# Calculate and display timing statistics\n", + "if token_times:\n", + " total_tokens = len(token_times)\n", + " avg_time = sum(token_times) / total_tokens\n", + " \n", + " print(f\"Total tokens generated: {total_tokens}\")\n", + " print(f\"Average time per token: {avg_time:.4f} seconds\")\n", + " print(f\"Tokens per second: {total_tokens / sum(token_times):.2f}\")\n", + "\n", + "del generator\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml new file mode 100644 index 00000000..b5c32c66 --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml @@ -0,0 +1,20 @@ +keywords: + aitk +arch: deepseek +recipes: + - file: "qwen2_5_qnn_config.json" + device: npu + ep: QNNExecutionProvider + - file: "qwen2_5_vitis_ai_config.json" + device: npu + ep: VitisAIExecutionProvider + - file: "qwen2_5_ov_config.json" + device: npu + ep: OpenVINOExecutionProvider + - file: "qwen2_5_dml_config.json" + device: gpu + ep: DmlExecutionProvider +aitk: + modelInfo: + id: "huggingface/Qwen/Qwen2.5-1.5B-Instruct" + version: 1 diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config new file mode 100644 index 00000000..68672843 --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config @@ -0,0 +1,24 @@ +{ + "workflows": [ + { + "file": "qwen2_5_qnn_config.json", + "templateName": "qwen2_5_qnn_config" + }, + { + "file": "qwen2_5_vitis_ai_config.json", + "templateName": "qwen2_5_vitis_ai_config" + }, + { + "file": "qwen2_5_ov_config.json", + "templateName": "qwen2_5_ov_config" + }, + { + "file": "qwen2_5_dml_config.json", + "templateName": "qwen2_5_dml_config" + } + ], + "modelInfo": { + "id": "huggingface/Qwen/Qwen2.5-1.5B-Instruct", + "version": 1 + } +} diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_dml_config.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_dml_config.json new file mode 100644 index 00000000..4e7b0265 --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_dml_config.json @@ -0,0 +1,46 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "Qwen/Qwen2.5-1.5B-Instruct" + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device":"cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device":"gpu", + "execution_providers": [ + "DmlExecutionProvider" + ] + } + ] + } + }, + "passes": { + "q": { + "type": "AutoAWQQuantizer" + }, + "mb": { + "type": "ModelBuilder", + "precision": "int4" + } + }, + "host": "host_system", + "target": "target_system", + "log_severity_level": 1, + "output_dir": "model/qwen2_5", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_dml_config.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_dml_config.json.config new file mode 100644 index 00000000..5778ef75 --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_dml_config.json.config @@ -0,0 +1,48 @@ +{ + "name": "Convert to DirectML", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "mb" + }, + "isGPURequired": true, + "executeRuntimeFeatures": [ + "AutoAwq" + ], + "evaluationRuntimeFeatures": [ + "Nightly" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "DirectML" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "DmlExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_config.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_config.json new file mode 100644 index 00000000..1d55d610 --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_config.json @@ -0,0 +1,56 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "Qwen/Qwen2.5-1.5B-Instruct" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "OpenVINOExecutionProvider" + ] + } + ] + } + }, + "passes": { + "optimum_convert": { + "type": "OpenVINOOptimumConversion", + "extra_args": { + "device": "npu" + }, + "ov_quant_config": { + "weight_format": "int4", + "group_size": 128, + "dataset": "wikitext2", + "ratio": 1, + "sym": true, + "trust_remote_code": true, + "awq": false, + "scale_estimation": false, + "sensitivity_metric": "weight_quantization_error", + "backup_precision": "int8_asym" + } + }, + "io_update": { + "type": "OpenVINOIoUpdate", + "static": false, + "reuse_cache": true + }, + "encapsulation": { + "type": "OpenVINOEncapsulation", + "target_device": "npu", + "keep_ov_dynamic_dims": true, + "ov_version": "2025.1", + "reuse_cache": true + } + }, + "search_strategy": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "evaluate_input_model": false, + "output_dir": "model/qwen2_5" +} diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_config.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_config.json.config new file mode 100644 index 00000000..b95b828a --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_config.json.config @@ -0,0 +1,153 @@ +{ + "name": "Convert to Intel CPU/NPU/GPU", + "oliveFile": "qwen2_5/openvino/Qwen2.5-1.5B-instruct_context_ov_dynamic_sym_bkp_int8_sym_r1.json", + "isLLM": true, + "isIntel": true, + "debugInfo": { + "autoGenerated": true, + "useOpenVINOOptimumConversion": "optimum_convert" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "systems.local_system.accelerators.0.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "actions": [ + [ + { + "type": "delete", + "path": "passes.io_update.reuse_cache" + }, + { + "type": "delete", + "path": "passes.encapsulation.reuse_cache" + } + ], + [ + { + "type": "delete", + "path": "passes.io_update.reuse_cache" + }, + { + "type": "delete", + "path": "passes.encapsulation.reuse_cache" + } + ], + [ + { + "type": "delete", + "path": "passes.io_update.reuse_cache" + }, + { + "type": "delete", + "path": "passes.encapsulation.reuse_cache" + } + ] + ], + "readOnly": false + }, + "runtimeInConversion": { + "autoGenerated": true, + "name": "Convert/Quantize to", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "passes.optimum_convert.extra_args.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "actions": [ + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "cpu" + } + ], + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "gpu" + } + ], + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "npu" + } + ] + ] + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "passes.optimum_convert.ov_quant_config.dataset", + "values": [ + "wikitext2" + ], + "template": { + "path": "passes.optimum_convert.ov_quant_config.dataset", + "values": [ + "wikitext2" + ], + "template": "QuantizationDataset" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json new file mode 100644 index 00000000..d84eb1fa --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json @@ -0,0 +1,132 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "Qwen/Qwen2.5-1.5B-Instruct" + }, + "systems": { + "qnn_system": { + "type": "PythonEnvironment", + "python_environment_path": "/path/to/qnn/env/bin", + "accelerators": [ + { + "execution_providers": [ + "QNNExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "wikitext2_train", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "wikitext", + "subset": "wikitext-2-raw-v1", + "split": "train" + }, + "pre_process_data_config": { + "strategy": "line-by-line", + "add_special_tokens": false, + "max_samples": 128, + "max_seq_len": 512 + } + } + ], + "passes": { + "q": { + "type": "QuaRot" + }, + "g": { + "type": "GptqQuantizer", + "sym": true, + "group_size": -1 + }, + "cs": { + "type": "CaptureSplitInfo", + "num_splits": 4, + "unique_embeds_lm_head_splits": true + }, + "mb": { + "type": "ModelBuilder", + "precision": "int4", + "int4_block_size": 32, + "int4_accuracy_level": 4, + "int4_op_types_to_quantize": [ + "MatMul", + "Gather" + ], + "save_as_external_data": true + }, + "mq": { + "type": "MatMulNBitsToQDQ", + "use_int4": true, + "add_zero_point": true, + "nodes_to_exclude": [ + "/lm_head/MatMul_Q4" + ], + "save_as_external_data": true + }, + "gs": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "RemoveRopeMultiCache" + }, + { + "surgeon": "AttentionMaskToSequenceLengths" + }, + { + "surgeon": "SimplifiedLayerNormToL2Norm" + } + ], + "save_as_external_data": true + }, + "sq": { + "type": "OnnxStaticQuantization", + "data_config": "wikitext2_train", + "activation_type": "uint16", + "precision": "uint8", + "calibration_providers": [ + "CUDAExecutionProvider" + ], + "quant_preprocess": true, + "op_types_to_exclude": [ + "GatherBlockQuantized", + "GroupQueryAttention", + "MatMulNBits" + ], + "save_as_external_data": true + }, + "sp": { + "type": "SplitModel" + }, + "st": { + "type": "StaticLLM", + "batch_size": 1, + "context_length": 64 + }, + "cb": { + "type": "EPContextBinaryGenerator", + "provider_options": { + "htp_performance_mode": "burst", + "htp_graph_finalization_optimization_mode": "3", + "soc_model": "60" + }, + "session_options": { + "intra_op_num_threads": 2, + "inter_op_num_threads": 1 + }, + "weight_sharing": true + }, + "cp": { + "type": "ComposeOnnxModels" + } + }, + "target": "qnn_system", + "log_severity_level": 1, + "output_dir": "model/qwen2_5", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json.config new file mode 100644 index 00000000..032429d1 --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json.config @@ -0,0 +1,197 @@ +{ + "name": "Convert to Qualcomm NPU", + "oliveFile": "phi3_5/qnn_config.json", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "mb" + }, + "isQNNLLM": true, + "isGPURequired": true, + "runtimeOverwrite": { + "autoGenerated": true, + "pyEnvPath": "systems.qnn_system.python_environment_path", + "executeEp": "CUDAExecutionProvider", + "evaluateUsedInExecute": true + }, + "executeRuntimeFeatures": [ + "AutoGptq" + ], + "pyEnvRuntimeFeatures": [ + "Nightly" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Qualcomm NPU" + ], + "path": "systems.qnn_system.accelerators.0.execution_providers.0", + "values": [ + "QNNExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.sq.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.sq.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.sq.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.sq.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "wikitext" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "wikitext" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Subset", + "tags": [ + "QuantizationDatasetSubset", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "wikitext-103-raw-v1", + "wikitext-103-v1", + "wikitext-2-raw-v1", + "wikitext-2-v1" + ], + "template": { + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "wikitext-103-raw-v1", + "wikitext-103-v1", + "wikitext-2-raw-v1", + "wikitext-2-v1" + ], + "template": "QuantizationDatasetSubset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json new file mode 100644 index 00000000..d49375ec --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json @@ -0,0 +1,134 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "Qwen/Qwen2.5-1.5B-Instruct" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "wikitext2_train", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "wikitext", + "subset": "wikitext-2-raw-v1", + "split": "train" + }, + "pre_process_data_config": { + "strategy": "line-by-line", + "add_special_tokens": false, + "max_samples": 128, + "max_seq_len": 512 + } + } + ], + "passes": { + "q": { + "type": "QuaRot" + }, + "g": { + "type": "GptqQuantizer", + "sym": true, + "group_size": -1 + }, + "cs": { + "type": "CaptureSplitInfo", + "num_splits": 1, + "unique_embeds_lm_head_splits": true + }, + "mb": { + "type": "ModelBuilder", + "precision": "int4", + "int4_block_size": 32, + "int4_accuracy_level": 4, + "int4_op_types_to_quantize": [ + "MatMul", + "Gather" + ], + "save_as_external_data": true + }, + "mq": { + "type": "MatMulNBitsToQDQ", + "use_int4": true, + "add_zero_point": true, + "nodes_to_exclude": [ + "/lm_head/MatMul_Q4" + ], + "save_as_external_data": true + }, + "gs": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "RemoveRopeMultiCache" + }, + { + "surgeon": "AttentionMaskToSequenceLengths" + }, + { + "surgeon": "SimplifiedLayerNormToL2Norm" + } + ], + "save_as_external_data": true + }, + "sq": { + "type": "OnnxStaticQuantization", + "data_config": "wikitext2_train", + "activation_type": "uint16", + "precision": "uint8", + "calibration_providers": [ + "CUDAExecutionProvider" + ], + "quant_preprocess": true, + "op_types_to_exclude": [ + "GatherBlockQuantized", + "GroupQueryAttention", + "MatMulNBits" + ], + "save_as_external_data": true + }, + "addmetadata": { + "type": "VitisAIAddMetaData", + "config_meta_data_keys": [ + "architectures", + "model_type" + ], + "activation_type": "uint16", + "weight_type": "int4", + "quant_type": "QuaRot" + }, + "sp": { + "type": "SplitModel" + }, + "st": { + "type": "StaticLLM", + "batch_size": 1, + "context_length": 64, + "group_session_options": { + "log_id": "onnxruntime-genai", + "provider_options": [ + { + "VitisAI": {} + } + ], + "graph_optimization_level": "ORT_ENABLE_ALL" + } + } + }, + "target": "local_system", + "log_severity_level": 1, + "output_dir": "model/qwen2_5", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config new file mode 100644 index 00000000..f6624c83 --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config @@ -0,0 +1,191 @@ +{ + "name": "Convert to AMD NPU", + "oliveFile": "phi3_5/qdq_config_vitis_ai.json", + "isLLM": true, + "evalRuntime": "AMDNPU", + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "mb" + }, + "isGPURequired": true, + "runtimeOverwrite": { + "executeEp": "CUDAExecutionProvider" + }, + "executeRuntimeFeatures": [ + "AutoGptq" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "CPU" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.sq.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.sq.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.sq.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.sq.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "wikitext" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "wikitext" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Subset", + "tags": [ + "QuantizationDatasetSubset", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "wikitext-103-raw-v1", + "wikitext-103-v1", + "wikitext-2-raw-v1", + "wikitext-2-v1" + ], + "template": { + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "wikitext-103-raw-v1", + "wikitext-103-v1", + "wikitext-2-raw-v1", + "wikitext-2-v1" + ], + "template": "QuantizationDatasetSubset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/requirements.txt b/Qwen-Qwen2.5-1.5B-Instruct/aitk/requirements.txt new file mode 100644 index 00000000..03275c3e --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/requirements.txt @@ -0,0 +1,2 @@ +datasets +optimum diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml index 8e55a94a..7c05c28d 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml @@ -16,10 +16,5 @@ recipes: ep: DmlExecutionProvider aitk: modelInfo: - id: "huggingface/Intel/bert-base-uncased-mrpc" + id: "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" version: 1 - workflows: - - file: "deepseek_qnn_config.json" - - file: "deepseek_vitis_ai_config.json" - - file: "deepseek_ov_config.json" - - file: "deepseek_dml_config.json" diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt index bca8ca03..7af84714 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt @@ -1,3 +1,4 @@ -# For a full requirements, see AITK +# This file will be installed together with AITK runtime requirements +# For the full requirements, see AITK datasets optimum diff --git a/google-bert-bert-base-multilingual-cased/aitk/.gitignore b/google-bert-bert-base-multilingual-cased/aitk/.gitignore new file mode 100644 index 00000000..48c03882 --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/.gitignore @@ -0,0 +1,5 @@ +__pycache__ +/cache +/history/*/* +!/history/*/history.config +!/history/*/olive_config.json diff --git a/google-bert-bert-base-multilingual-cased/aitk/README.md b/google-bert-bert-base-multilingual-cased/aitk/README.md new file mode 100644 index 00000000..e00d9063 --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/README.md @@ -0,0 +1,22 @@ +# BERT Optimization + +This folder contains examples of BERT optimization using different workflows. + +- QDQ for Qualcomm NPU / AMD NPU +- OpenVINO for Intel NPU + +## BERT Quantization QDQ + +This workflow quantizes the model. It performs the pipeline: +- *HF Model-> ONNX Model ->Quantized Onnx Model* + +Config file: `bert-base-multilingual-cased_qdq.json` + +### Latency / Throughput + +| Model Version | Latency (ms/sample) | Throughput (token per second)| Dataset | +|-----------------------|----------------------|------------------------------|---------------| +| PyTorch FP32 | 1162 | 0.81 | facebook/xnli | +| ONNX INT8 (QDQ) | 590 | 1.75 | facebook/xnli | + +*Note: Latency can vary significantly depending on the hardware and system environment. The values provided here are for reference only and may not reflect performance on all devices.* diff --git a/google-bert-bert-base-multilingual-cased/aitk/_copy.json.config b/google-bert-bert-base-multilingual-cased/aitk/_copy.json.config new file mode 100644 index 00000000..ff27826d --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/_copy.json.config @@ -0,0 +1,18 @@ +{ + "copies": [ + { + "src": "bert-base-multilingual-cased_qdq_amd.json.config", + "dst": "bert-base-multilingual-cased_qdq_qnn.json.config", + "replacements": [ + { + "find": "bert/google_bert_qdq_vitis_ai.json", + "replace": "bert/google_bert_qdq.json" + }, + { + "find": "Convert to AMD NPU", + "replace": "Convert to Qualcomm NPU" + } + ] + } + ] +} diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json new file mode 100644 index 00000000..ba5d70d8 --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json @@ -0,0 +1,97 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "google-bert/bert-base-multilingual-cased", + "task": "fill-mask" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "npu", + "execution_providers": [ + "OpenVINOExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quantize_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "bert_base_multilingual_cased_dataset", + "data_name": "wikipedia", + "split": "train", + "max_samples": 300 + }, + "dataloader_config": { + "batch_size": 1, + "drop_last": true + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "latency", + "type": "latency", + "sub_types": [ + { "name": "avg", "priority": 1, "metric_config": { "warmup_num": 20, "repeat_test_num": 100 } }, + { "name": "p90", "metric_config": { "warmup_num": 20, "repeat_test_num": 100 } } + ] + } + ] + } + }, + "passes": { + "optimum_convert": { + "type": "OpenVINOOptimumConversion", + "extra_args": { + "device": "npu", + "task": "feature-extraction" + } + }, + "io_update": { + "type": "OpenVINOIoUpdate", + "input_shapes": [ + [ + 1, + 128 + ], + [ + 1, + 128 + ], + [ + 1, + 128 + ] + ], + "static": true + }, + "ov_quantize": { + "type": "OpenVINOQuantization", + "target_device": "npu", + "data_config": "quantize_data_config", + "model_type": "TRANSFORMER", + "user_script": "user_script.py", + "transform_fn": "custom_transform_func" + }, + "encapsulation": { + "type": "OpenVINOEncapsulation", + "target_device": "npu", + "ov_version": "2025.1" + } + }, + "search_strategy": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "output_dir": "model/bert-base-multilingual-cased_context_ov_static" +} diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json.config b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json.config new file mode 100644 index 00000000..801c3bc6 --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json.config @@ -0,0 +1,182 @@ +{ + "name": "Convert to Intel CPU/NPU/GPU", + "oliveFile": "bert/openvino/bert_base_multilingual_cased/bert-base-multilingual-cased_context_ov_static.json", + "isIntel": true, + "debugInfo": { + "autoGenerated": true, + "useOpenVINOOptimumConversion": "optimum_convert" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "systems.local_system.accelerators.0.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "readOnly": false + }, + "runtimeInConversion": { + "autoGenerated": true, + "name": "Convert/Quantize to", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "passes.optimum_convert.extra_args.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "actions": [ + [ + { + "type": "update", + "path": "passes.ov_quantize.target_device", + "value": "cpu" + }, + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "cpu" + } + ], + [ + { + "type": "update", + "path": "passes.ov_quantize.target_device", + "value": "gpu" + }, + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "gpu" + } + ], + [ + { + "type": "update", + "path": "passes.ov_quantize.target_device", + "value": "npu" + }, + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "npu" + } + ] + ] + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "wikipedia" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "wikipedia" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].load_dataset_config.max_samples", + "template": { + "path": "data_configs[0].load_dataset_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_dml.json b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_dml.json new file mode 100644 index 00000000..72e4e129 --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_dml.json @@ -0,0 +1,139 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "google-bert/bert-base-multilingual-cased", + "task": "feature-extraction" + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "DmlExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "evaluation_data_config", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "facebook/xnli", + "subset": "en", + "split": "validation" + }, + "pre_process_data_config": { + "input_cols": [ + "premise" + ], + "padding": "max_length", + "max_length": 128, + "max_samples": 10 + }, + "dataloader_config": { + "batch_size": 1 + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "latency", + "type": "latency", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "avg", + "priority": 1, + "goal": { + "type": "percent-min-improvement", + "value": 0.1 + } + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + }, + { + "name": "throughput", + "type": "throughput", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "avg" + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "save_as_external_data": true + }, + "transformer_optimizer": { + "type": "OrtTransformersOptimization", + "model_type": "bert", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false, + "optimization_options": { + "enable_gelu": true, + "enable_layer_norm": true, + "enable_attention": true, + "use_multi_head_attention": true, + "enable_skip_layer_norm": false, + "enable_embed_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_bias_gelu": false, + "enable_gelu_approximation": false, + "enable_qordered_matmul": false, + "enable_shape_inference": true, + "enable_gemm_fast_gelu": false, + "enable_nhwc_conv": false, + "enable_group_norm": false, + "enable_bias_splitgelu": false, + "enable_packed_qkv": true, + "enable_packed_kv": true, + "enable_bias_add": false, + "enable_rotary_embeddings": true + }, + "save_as_external_data": true + } + }, + "host": "host_system", + "target": "target_system", + "evaluator": "common_evaluator", + "cache_dir": "cache", + "output_dir": "model/google_bert", + "evaluate_input_model": false +} diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_dml.json.config b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_dml.json.config new file mode 100644 index 00000000..ca319b9d --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_dml.json.config @@ -0,0 +1,126 @@ +{ + "name": "Convert to DirectML", + "evaluationRuntimeFeatures": [ + "Nightly" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "DirectML" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "DmlExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "facebook/xnli" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "facebook/xnli" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Subset", + "tags": [ + "EvaluationDatasetSubset", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "en", + "all_languages" + ], + "template": { + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "en", + "all_languages" + ], + "template": "EvaluationDatasetSubset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_amd.json b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_amd.json new file mode 100644 index 00000000..7e5e9c73 --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_amd.json @@ -0,0 +1,168 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "google-bert/bert-base-multilingual-cased", + "task": "feature-extraction" + }, + "systems": { + "qnn_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "npu", + "execution_providers": [ + "VitisAIExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quantization_data_config", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "facebook/xnli", + "subset": "en", + "split": "validation" + }, + "pre_process_data_config": { + "input_cols": [ + "premise" + ], + "padding": "max_length", + "max_length": 128, + "max_samples": 10 + }, + "dataloader_config": { + "batch_size": 1 + } + }, + { + "name": "evaluation_data_config", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "facebook/xnli", + "subset": "en", + "split": "validation" + }, + "pre_process_data_config": { + "input_cols": [ + "premise" + ], + "padding": "max_length", + "max_length": 128, + "max_samples": 10 + }, + "dataloader_config": { + "batch_size": 1 + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "latency", + "type": "latency", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "avg", + "priority": 1, + "goal": { + "type": "percent-min-improvement", + "value": 0.1 + } + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + }, + { + "name": "throughput", + "type": "throughput", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "avg" + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + }, + "transformer_optimizer": { + "type": "orttransformersoptimization", + "model_type": "bert", + "opt_level": 1, + "optimization_options": { + "enable_gelu": true, + "enable_bias_gelu": false, + "enable_layer_norm": true, + "enable_skip_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_attention": false + }, + "save_as_external_data": true + }, + "dynamic_shape_to_fixed": { + "type": "DynamicToFixedShape", + "dim_param": [ + "batch_size", + "sequence_length" + ], + "dim_value": [ + 1, + 128 + ] + }, + "surgery": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "ReplaceAttentionMaskValue" + } + ] + }, + "OnnxQuantization": { + "type": "OnnxStaticQuantization", + "data_config": "quantization_data_config", + "activation_type": "uint16", + "precision": "uint8", + "save_as_external_data": true + }, + "addmetadata": { + "type": "VitisAIAddMetaData", + "config_meta_data_keys": [ + "architectures", + "model_type" + ], + "activation_type": "uint16", + "weight_type": "uint8", + "quant_type": "OnnxStaticQuantization" + } + }, + "host": "qnn_system", + "target": "qnn_system", + "evaluator": "common_evaluator", + "cache_dir": "cache", + "output_dir": "model/google_bert", + "evaluate_input_model": false +} diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_amd.json.config b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_amd.json.config new file mode 100644 index 00000000..19476bf7 --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_amd.json.config @@ -0,0 +1,273 @@ +{ + "name": "Convert to AMD NPU", + "oliveFile": "bert/google_bert_qdq_vitis_ai.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "AMD NPU", + "CPU" + ], + "path": "systems.qnn_system.accelerators.0.execution_providers.0", + "values": [ + "VitisAIExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.OnnxQuantization.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.OnnxQuantization.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.OnnxQuantization.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.OnnxQuantization.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "facebook/xnli" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "facebook/xnli" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Subset", + "tags": [ + "QuantizationDatasetSubset", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "en", + "all_languages" + ], + "template": { + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "en", + "all_languages" + ], + "template": "QuantizationDatasetSubset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.OnnxQuantization", + "actions": [ + [], + [ + { + "type": "update", + "path": "passes", + "value": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + } + } + } + ] + ] + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "facebook/xnli" + ], + "template": { + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "facebook/xnli" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Subset", + "tags": [ + "EvaluationDatasetSubset", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.subset", + "values": [ + "en", + "all_languages" + ], + "template": { + "path": "data_configs[1].load_dataset_config.subset", + "values": [ + "en", + "all_languages" + ], + "template": "EvaluationDatasetSubset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[1].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[1].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[1].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_qnn.json b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_qnn.json new file mode 100644 index 00000000..da4c6d4f --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_qnn.json @@ -0,0 +1,163 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "google-bert/bert-base-multilingual-cased", + "task": "feature-extraction" + }, + "systems": { + "qnn_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "npu", + "execution_providers": [ + "QNNExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quantization_data_config", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "facebook/xnli", + "subset": "en", + "split": "validation" + }, + "pre_process_data_config": { + "input_cols": [ + "premise" + ], + "padding": "max_length", + "max_length": 128, + "max_samples": 10 + }, + "dataloader_config": { + "batch_size": 1 + } + }, + { + "name": "evaluation_data_config", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "facebook/xnli", + "subset": "en", + "split": "validation" + }, + "pre_process_data_config": { + "input_cols": [ + "premise" + ], + "padding": "max_length", + "max_length": 128, + "max_samples": 10 + }, + "dataloader_config": { + "batch_size": 1 + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "latency", + "type": "latency", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "avg", + "priority": 1, + "goal": { + "type": "percent-min-improvement", + "value": 0.1 + } + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + }, + { + "name": "throughput", + "type": "throughput", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "avg" + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "save_as_external_data": true + }, + "to_fixed_shape": { + "type": "DynamicToFixedShape", + "dim_param": [ + "batch_size", + "sequence_length" + ], + "dim_value": [ + 1, + 128 + ] + }, + "surgery": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "ReplaceAttentionMaskValue", + "replacement": -100.0 + }, + { + "surgeon": "MatMulAddToGemm" + } + ] + }, + "transformer_optimizer": { + "type": "OrtTransformersOptimization", + "model_type": "bert", + "opt_level": 1, + "optimization_options": { + "enable_gelu": true, + "enable_bias_gelu": false, + "enable_layer_norm": true, + "enable_skip_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_attention": false + }, + "save_as_external_data": true + }, + "OnnxQuantization": { + "type": "OnnxStaticQuantization", + "data_config": "quantization_data_config", + "quant_preprocess": true, + "activation_type": "uint16", + "precision": "uint8", + "save_as_external_data": true + } + }, + "host": "qnn_system", + "target": "qnn_system", + "evaluator": "common_evaluator", + "cache_dir": "cache", + "output_dir": "model/google_bert", + "evaluate_input_model": false +} diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_qnn.json.config b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_qnn.json.config new file mode 100644 index 00000000..45b6868c --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_qnn.json.config @@ -0,0 +1,273 @@ +{ + "name": "Convert to Qualcomm NPU", + "oliveFile": "bert/google_bert_qdq.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Qualcomm NPU", + "CPU" + ], + "path": "systems.qnn_system.accelerators.0.execution_providers.0", + "values": [ + "QNNExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.OnnxQuantization.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.OnnxQuantization.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.OnnxQuantization.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.OnnxQuantization.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "facebook/xnli" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "facebook/xnli" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Subset", + "tags": [ + "QuantizationDatasetSubset", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "en", + "all_languages" + ], + "template": { + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "en", + "all_languages" + ], + "template": "QuantizationDatasetSubset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.OnnxQuantization", + "actions": [ + [], + [ + { + "type": "update", + "path": "passes", + "value": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "save_as_external_data": true + } + } + } + ] + ] + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "facebook/xnli" + ], + "template": { + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "facebook/xnli" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Subset", + "tags": [ + "EvaluationDatasetSubset", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.subset", + "values": [ + "en", + "all_languages" + ], + "template": { + "path": "data_configs[1].load_dataset_config.subset", + "values": [ + "en", + "all_languages" + ], + "template": "EvaluationDatasetSubset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[1].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[1].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[1].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_trtrtx.json b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_trtrtx.json new file mode 100644 index 00000000..5994f683 --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_trtrtx.json @@ -0,0 +1,128 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "google-bert/bert-base-multilingual-cased", + "task": "feature-extraction" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "NvTensorRTRTXExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "xnli", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "facebook/xnli", + "subset": "en", + "split": "validation" + }, + "pre_process_data_config": { + "input_cols": [ + "premise" + ], + "padding": "max_length", + "max_length": 128, + "max_samples": 10 + }, + "dataloader_config": { + "batch_size": 1 + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "latency", + "type": "latency", + "data_config": "xnli", + "sub_types": [ + { + "name": "avg", + "priority": 1, + "goal": { + "type": "percent-min-improvement", + "value": 0.1 + } + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + }, + { + "name": "throughput", + "type": "throughput", + "data_config": "xnli", + "sub_types": [ + { + "name": "avg" + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + }, + "onnx_float_to_float16": { + "type": "OnnxFloatToFloat16", + "save_as_external_data": true + }, + "dynamic_shape_to_fixed": { + "type": "DynamicToFixedShape", + "dim_param": [ + "batch_size", + "sequence_length" + ], + "dim_value": [ + 1, + 128 + ] + }, + "surgery": { + "type": "GraphSurgeries", + "save_as_external_data": true, + "surgeries": [ + { + "surgeon": "ReplaceAttentionMaskValue" + } + ] + }, + "session_params_tuning": { + "type": "OrtSessionParamsTuning", + "io_bind": false, + "data_config": "xnli" + } + }, + "host": "local_system", + "target": "local_system", + "evaluator": "common_evaluator", + "cache_dir": "cache", + "output_dir": "model/google_bert_trtrtx", + "log_severity_level": 0, + "evaluate_input_model": false +} diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_trtrtx.json.config b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_trtrtx.json.config new file mode 100644 index 00000000..90a60833 --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_trtrtx.json.config @@ -0,0 +1,125 @@ +{ + "name": "Convert to NVIDIA TRT for RTX", + "oliveFile": "bert/google_bert_trtrtx.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "NVIDIA TensorRT for RTX", + "CPU" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "NvTensorRTRTXExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "facebook/xnli" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "facebook/xnli" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Subset", + "tags": [ + "EvaluationDatasetSubset", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "en", + "all_languages" + ], + "template": { + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "en", + "all_languages" + ], + "template": "EvaluationDatasetSubset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/google-bert-bert-base-multilingual-cased/aitk/inference_sample.ipynb b/google-bert-bert-base-multilingual-cased/aitk/inference_sample.ipynb new file mode 100644 index 00000000..0ff50f9d --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/inference_sample.ipynb @@ -0,0 +1,150 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "\n", + "ExecutionProvider=\"QNNExecutionProvider\"\n", + "if ExecutionProvider == \"OpenVINOExecutionProvider\":\n", + " onnx_model_path = \"./model/openvino_model_st_quant.onnx\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "inputs = \"This is an example sentence.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime as ort\n", + "import torch\n", + "import torch.nn.functional as F\n", + "\n", + "from transformers import AutoModel, AutoTokenizer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def mean_pooling(model_output, attention_mask):\n", + " token_embeddings = torch.tensor(model_output[0])\n", + " input_mask_expanded = attention_mask.unsqueeze(-1).expand_as(token_embeddings).float()\n", + " return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tokenizer = AutoTokenizer.from_pretrained('google-bert/bert-base-multilingual-cased')\n", + "encoded_input = tokenizer(\n", + " inputs,\n", + " padding=\"max_length\",\n", + " max_length=128,\n", + " truncation=True,\n", + " add_special_tokens=True,\n", + " return_tensors=\"pt\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + "\n", + "\n", + "session_options = ort.SessionOptions()\n", + "\n", + "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "\n", + "session = ort.InferenceSession(\n", + " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " sess_options=session_options,\n", + ")\n", + "\n", + "input_ids = encoded_input[\"input_ids\"]\n", + "attention_mask = encoded_input[\"attention_mask\"]\n", + "token_type_ids = encoded_input[\"token_type_ids\"]\n", + "inputs = {\n", + " \"input_ids\": input_ids.long().cpu().numpy(),\n", + " \"attention_mask\": attention_mask.long().cpu().numpy(),\n", + " \"token_type_ids\": token_type_ids.long().cpu().numpy()\n", + "}\n", + "\n", + "outputs = session.run(None, inputs)\n", + "embeds_1 = mean_pooling(outputs, encoded_input['attention_mask'])\n", + "embeds_1 = F.normalize(embeds_1, p=2, dim=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get text embedding from orinal model, as ground truth.\n", + "model = AutoModel.from_pretrained('google-bert/bert-base-multilingual-cased').eval()\n", + "with torch.no_grad():\n", + " outputs = model(**encoded_input)\n", + " embeds_2 = mean_pooling(outputs, encoded_input['attention_mask'])\n", + " embeds_2 = F.normalize(embeds_2, p=2, dim=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "similarity = F.cosine_similarity(embeds_1, embeds_2).item()\n", + "print(\"Similarity: \", similarity)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/google-bert-bert-base-multilingual-cased/aitk/info.yml b/google-bert-bert-base-multilingual-cased/aitk/info.yml new file mode 100644 index 00000000..c5771102 --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/info.yml @@ -0,0 +1,23 @@ +keywords: + aitk +arch: bert +recipes: + - file: "bert-base-multilingual-cased_qdq_qnn.json" + device: npu + ep: QNNExecutionProvider + - file: "bert-base-multilingual-cased_qdq_amd.json" + device: npu + ep: VitisAIExecutionProvider + - file: "bert-base-multilingual-cased_context_ov_static.json" + device: npu + ep: OpenVINOExecutionProvider + - file: "bert-base-multilingual-cased_trtrtx.json" + device: gpu + ep: NvTensorRTRTXExecutionProvider + - file: "bert-base-multilingual-cased_dml.json" + device: gpu + ep: DmlExecutionProvider +aitk: + modelInfo: + id: "huggingface/google-bert/bert-base-multilingual-cased" + version: 1 diff --git a/google-bert-bert-base-multilingual-cased/aitk/model_project.config b/google-bert-bert-base-multilingual-cased/aitk/model_project.config new file mode 100644 index 00000000..41846e12 --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/model_project.config @@ -0,0 +1,28 @@ +{ + "workflows": [ + { + "file": "bert-base-multilingual-cased_qdq_qnn.json", + "templateName": "bert-base-multilingual-cased_qdq_qnn" + }, + { + "file": "bert-base-multilingual-cased_qdq_amd.json", + "templateName": "bert-base-multilingual-cased_qdq_amd" + }, + { + "file": "bert-base-multilingual-cased_context_ov_static.json", + "templateName": "bert-base-multilingual-cased_context_ov_static" + }, + { + "file": "bert-base-multilingual-cased_trtrtx.json", + "templateName": "bert-base-multilingual-cased_trtrtx" + }, + { + "file": "bert-base-multilingual-cased_dml.json", + "templateName": "bert-base-multilingual-cased_dml" + } + ], + "modelInfo": { + "id": "huggingface/google-bert/bert-base-multilingual-cased", + "version": 1 + } +} diff --git a/google-bert-bert-base-multilingual-cased/aitk/requirements.txt b/google-bert-bert-base-multilingual-cased/aitk/requirements.txt new file mode 100644 index 00000000..b02be515 --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/requirements.txt @@ -0,0 +1,5 @@ +# This file will be installed together with AITK runtime requirements +# For the full requirements, see AITK +olive-ai +datasets +optimum diff --git a/google-bert-bert-base-multilingual-cased/aitk/user_script.py b/google-bert-bert-base-multilingual-cased/aitk/user_script.py new file mode 100644 index 00000000..f7442c2f --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/user_script.py @@ -0,0 +1,83 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Intel Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import datasets +import numpy as np +import torch +from transformers import BertTokenizer + +from olive.data.registry import Registry + +# ------------------------------------------------------------------------- +# Common Dataset +# ------------------------------------------------------------------------- + +seed = 0 +# seed everything to 0 for reproducibility, https://pytorch.org/docs/stable/notes/randomness.html +# do not set random seed and np.random.seed for aml test, since it will cause aml job name conflict +torch.manual_seed(seed) +# the following are needed only for GPU +torch.cuda.manual_seed(seed) +torch.backends.cudnn.deterministic = True +torch.backends.cudnn.benchmark = False + +# set max sequence length +MAX_SEQ_LENGTH = 128 + +# define the tokenizer +tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-multilingual-cased") +VOCAB_SIZE = len(tokenizer) + +# set default input +default_input = torch.ones(1, MAX_SEQ_LENGTH, dtype=torch.int64) + +# define model inputs +model_inputs = { + "input_ids": default_input, + "attention_mask": default_input, + "token_type_ids": default_input, +} + +# capture input names +INPUT_NAMES = list(model_inputs) + + +@Registry.register_dataset() +def bert_base_multilingual_cased_dataset(data_name, split, max_samples): + # load the raw wikipedia dataset for tuning. Load just 300 examples for speed. + raw_dataset = datasets.load_dataset(data_name, "20220301.en", split=f"{split}[:{max_samples}]", trust_remote_code=True) + + def _preprocess_fn(examples): + return tokenizer( + examples["text"], + padding="max_length", + max_length=MAX_SEQ_LENGTH, + truncation=True, + ) + + # preprocess the dataset + return raw_dataset.map(_preprocess_fn, batched=True, batch_size=1) + + +def custom_transform_func(data_item): + return { + name: np.asarray([np.array([g.flatten() for g in data_item[name]]).flatten()], dtype=np.int64) + for name in INPUT_NAMES + } + + +def custom_example_func(): + vocab_size = VOCAB_SIZE + batch_size = 1 + sequence_length = MAX_SEQ_LENGTH + + input_ids = torch.randint(0, vocab_size, (batch_size, sequence_length)) + + # Generate random attention_mask (1s for actual tokens, 0s for padding) + attention_mask = default_input + + # Generate random token_type_ids (0 for sentence 1, 1 for sentence 2) + token_type_ids = default_input + + return [input_ids, attention_mask, token_type_ids] diff --git a/google-vit-base-patch16-224/aitk/.gitignore b/google-vit-base-patch16-224/aitk/.gitignore new file mode 100644 index 00000000..48c03882 --- /dev/null +++ b/google-vit-base-patch16-224/aitk/.gitignore @@ -0,0 +1,5 @@ +__pycache__ +/cache +/history/*/* +!/history/*/history.config +!/history/*/olive_config.json diff --git a/google-vit-base-patch16-224/aitk/README.md b/google-vit-base-patch16-224/aitk/README.md new file mode 100644 index 00000000..f6d32027 --- /dev/null +++ b/google-vit-base-patch16-224/aitk/README.md @@ -0,0 +1,14 @@ +# Vision Transformer (ViT) Optimization + +This folder contains examples of VIT optimization using different workflows. + +- QDQ for Qualcomm NPU / AMD NPU +- OpenVINO for Intel NPU + +## Optimization Workflows + +### ViT optimization with qdq + +This example performs ViT optimization in one workflow. It performs the optimization pipeline: + +- *Huggingface Model -> Onnx Model -> Quantized Onnx Model* diff --git a/google-vit-base-patch16-224/aitk/_copy.json.config b/google-vit-base-patch16-224/aitk/_copy.json.config new file mode 100644 index 00000000..6a948f91 --- /dev/null +++ b/google-vit-base-patch16-224/aitk/_copy.json.config @@ -0,0 +1,42 @@ +{ + "copies": [ + { + "src": "vit-base-patch16-224_qdq_amd.json.config", + "dst": "vit-base-patch16-224_qdq_qnn.json.config", + "replacements": [ + { + "find": "vit/vit_qdq_vitis_ai.json", + "replace": "vit/vit_qdq.json" + }, + { + "find": "Convert to AMD NPU", + "replace": "Convert to Qualcomm NPU" + } + ] + }, + { + "src": "inference_sample.ipynb", + "dst": "vit-base-patch16-224_dml_inference_sample.ipynb", + "replacements": [ + { + "find": "QNNExecutionProvider", + "replace": "DmlExecutionProvider" + }, + { + "find": "input_name: image", + "replace": "input_name: image.astype(np.float16)" + } + ] + }, + { + "src": "vit-base-patch16-224_dml_inference_sample.ipynb", + "dst": "vit-base-patch16-224_trtrtx_inference_sample.ipynb", + "replacements": [ + { + "find": "DmlExecutionProvider", + "replace": "NvTensorRTRTXExecutionProvider" + } + ] + } + ] +} diff --git a/google-vit-base-patch16-224/aitk/inference_sample.ipynb b/google-vit-base-patch16-224/aitk/inference_sample.ipynb new file mode 100644 index 00000000..650f381d --- /dev/null +++ b/google-vit-base-patch16-224/aitk/inference_sample.ipynb @@ -0,0 +1,209 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "\n", + "ExecutionProvider=\"QNNExecutionProvider\"\n", + "if ExecutionProvider == \"OpenVINOExecutionProvider\":\n", + " onnx_model_path = \"./model/ov_model_st_quant.onnx\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import onnxruntime as ort\n", + "import time\n", + "import torch\n", + "import torchvision.transforms as transforms\n", + "from datasets import load_dataset\n", + "from transformers import ViTFeatureExtractor, ViTForImageClassification" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_samples = 256" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load datasets\n", + "\n", + "feature_extractor = ViTFeatureExtractor.from_pretrained(\"google/vit-base-patch16-224\")\n", + "preprocess = transforms.Compose([\n", + " transforms.Lambda(lambda img: img.convert(\"RGB\")),\n", + " transforms.Resize((224, 224)),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),\n", + "])\n", + "\n", + "def imageTransform(example):\n", + " example[\"image\"] = preprocess(example[\"image\"])\n", + " return example\n", + "datasetStream = load_dataset(\"timm/mini-imagenet\", split=\"validation\", streaming=True, trust_remote_code=True)\n", + "iterable_dataset = iter(datasetStream)\n", + "selected_samples = [next(iterable_dataset) for _ in range(num_samples)]\n", + "selected_samples = list(map(imageTransform, selected_samples))\n", + "\n", + "def get_imagenet_label_map():\n", + " import json\n", + " from pathlib import Path\n", + " cache_file = Path(f\"../../cache/data/imagenet_class_index.json\")\n", + " if not cache_file.exists():\n", + " import requests \n", + " imagenet_class_index_url = (\n", + " \"https://raw.githubusercontent.com/pytorch/vision/main/gallery/assets/imagenet_class_index.json\"\n", + " )\n", + " response = requests.get(imagenet_class_index_url)\n", + " response.raise_for_status() # Ensure the request was successful\n", + " content = response.json()\n", + " cache_file.parent.resolve().mkdir(parents=True, exist_ok=True)\n", + " with open(cache_file, \"w\") as f:\n", + " json.dump(content, f)\n", + " else:\n", + " with open(cache_file) as f:\n", + " content = json.loads(f.read())\n", + "\n", + " return {v[0]: int(k) for k, v in content.items()}\n", + "\n", + "label_map = get_imagenet_label_map()\n", + "label_names = datasetStream.features[\"label\"].names\n", + "\n", + "def mini_to_imagenet_label(mini_label):\n", + " class_name = label_names[mini_label]\n", + " return label_map[class_name]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Original model metrics\n", + "\n", + "def evaluate_torch(model, selected_samples, device):\n", + " model.eval()\n", + " correct, total = 0, 0\n", + " latencies = []\n", + " with torch.no_grad():\n", + " for example in selected_samples:\n", + " image = example[\"image\"].unsqueeze(0).to(device)\n", + " label = torch.tensor(example[\"label\"]).to(device)\n", + " label = mini_to_imagenet_label(label.item())\n", + " \n", + " start_time = time.time()\n", + " output = model(image)\n", + " end_time = time.time()\n", + " \n", + " latencies.append((end_time - start_time))\n", + " pred = torch.argmax(output.logits, dim=1)\n", + " correct += (pred == label).sum().item()\n", + " total += 1\n", + " \n", + " accuracy = correct / total\n", + " avg_latency = np.mean(latencies)\n", + " return accuracy, avg_latency\n", + "\n", + "device = torch.device(\"cpu\")\n", + "model = ViTForImageClassification.from_pretrained(\"google/vit-base-patch16-224\").to(device)\n", + "accuracy, avg_latency = evaluate_torch(model, selected_samples, device)\n", + "\n", + "print(f\"Original Model Accuracy: {accuracy * 100:.2f}%\")\n", + "print(f\"Original Model Average Latency Per Image: {avg_latency * 1000:.2f} ms\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Quantized model metrics\n", + "\n", + "def evaluate_onnx(session, selected_samples):\n", + " correct, total = 0, 0\n", + " latencies = []\n", + " input_name = session.get_inputs()[0].name\n", + " output_name = session.get_outputs()[0].name\n", + "\n", + " for example in selected_samples:\n", + " image = np.expand_dims(example[\"image\"], axis=0)\n", + " label = example[\"label\"]\n", + " label = mini_to_imagenet_label(label)\n", + " \n", + " start_time = time.time()\n", + " output = session.run([output_name], {input_name: image})[0]\n", + " end_time = time.time()\n", + " \n", + " latencies.append((end_time - start_time))\n", + " pred = np.argmax(output, axis=1)[0]\n", + " correct += (pred == label)\n", + " total += 1\n", + " \n", + " accuracy = correct / total\n", + " avg_latency = np.mean(latencies)\n", + " return accuracy, avg_latency\n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + "\n", + "\n", + "session_options = ort.SessionOptions()\n", + "\n", + "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "\n", + "session = ort.InferenceSession(\n", + " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " sess_options=session_options,\n", + ")\n", + "\n", + "accuracy, avg_latency = evaluate_onnx(session, selected_samples)\n", + "\n", + "print(f\"Quantized Model Accuracy: {accuracy * 100:.2f}%\")\n", + "print(f\"Quantized Model Average Latency Per Image: {avg_latency * 1000:.2f} ms\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python-WCR-win32-x64-3.12.9", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/google-vit-base-patch16-224/aitk/info.yml b/google-vit-base-patch16-224/aitk/info.yml new file mode 100644 index 00000000..26289b59 --- /dev/null +++ b/google-vit-base-patch16-224/aitk/info.yml @@ -0,0 +1,23 @@ +keywords: + aitk +arch: vit +recipes: + - file: "vit-base-patch16-224_qdq_qnn.json" + device: npu + ep: QNNExecutionProvider + - file: "vit-base-patch16-224_qdq_amd.json" + device: npu + ep: VitisAIExecutionProvider + - file: "vit_base_patch16_224_context_ov_static.json" + device: npu + ep: OpenVINOExecutionProvider + - file: "vit-base-patch16-224_trtrtx.json" + device: gpu + ep: NvTensorRTRTXExecutionProvider + - file: "vit-base-patch16-224_dml.json" + device: gpu + ep: DmlExecutionProvider +aitk: + modelInfo: + id: "huggingface/google/vit-base-patch16-224" + version: 1 diff --git a/google-vit-base-patch16-224/aitk/model_project.config b/google-vit-base-patch16-224/aitk/model_project.config new file mode 100644 index 00000000..7ec62cd3 --- /dev/null +++ b/google-vit-base-patch16-224/aitk/model_project.config @@ -0,0 +1,28 @@ +{ + "workflows": [ + { + "file": "vit-base-patch16-224_qdq_qnn.json", + "templateName": "vit-base-patch16-224_qdq_qnn" + }, + { + "file": "vit-base-patch16-224_qdq_amd.json", + "templateName": "vit-base-patch16-224_qdq_amd" + }, + { + "file": "vit_base_patch16_224_context_ov_static.json", + "templateName": "vit_base_patch16_224_context_ov_static" + }, + { + "file": "vit-base-patch16-224_trtrtx.json", + "templateName": "vit-base-patch16-224_trtrtx" + }, + { + "file": "vit-base-patch16-224_dml.json", + "templateName": "vit-base-patch16-224_dml" + } + ], + "modelInfo": { + "id": "huggingface/google/vit-base-patch16-224", + "version": 1 + } +} diff --git a/google-vit-base-patch16-224/aitk/requirements.txt b/google-vit-base-patch16-224/aitk/requirements.txt new file mode 100644 index 00000000..8992d27f --- /dev/null +++ b/google-vit-base-patch16-224/aitk/requirements.txt @@ -0,0 +1,5 @@ +# This file will be installed together with AITK runtime requirements +# For the full requirements, see AITK +olive-ai +datasets +torchvision diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224.py b/google-vit-base-patch16-224/aitk/vit-base-patch16-224.py new file mode 100644 index 00000000..92751ca1 --- /dev/null +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224.py @@ -0,0 +1,100 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +from logging import getLogger +from pathlib import Path + +import numpy as np +import torchvision.transforms as transforms +import transformers +from torch import from_numpy +from torch.utils.data import Dataset + +from olive.data.registry import Registry + +logger = getLogger(__name__) + +def get_imagenet_label_map(): + import json + cache_file = Path(f"./cache/data/imagenet_class_index.json") + if not cache_file.exists(): + import requests + imagenet_class_index_url = ( + "https://raw.githubusercontent.com/pytorch/vision/main/gallery/assets/imagenet_class_index.json" + ) + response = requests.get(imagenet_class_index_url) + response.raise_for_status() # Ensure the request was successful + content = response.json() + cache_file.parent.resolve().mkdir(parents=True, exist_ok=True) + with open(cache_file, "w") as f: + json.dump(content, f) + else: + with open(cache_file) as f: + content = json.loads(f.read()) + + return {v[0]: int(k) for k, v in content.items()} + +def adapt_label_for_mini_imagenet(labels: list, label_names: list): + label_map = get_imagenet_label_map() + return [label_map[label_names[x]] for x in labels] + +class ImagenetDataset(Dataset): + def __init__(self, data): + self.images = from_numpy(data["images"]) + self.labels = from_numpy(data["labels"]) + + def __len__(self): + return min(len(self.images), len(self.labels)) + + def __getitem__(self, idx): + return {"pixel_values": self.images[idx]}, self.labels[idx] + + +@Registry.register_post_process() +def dataset_post_process(output): + return ( + output.logits.argmax(axis=1) + if isinstance(output, transformers.modeling_outputs.ModelOutput) + else output.argmax(axis=1) + ) + +from transformers import AutoImageProcessor +processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224", use_fast=True) + +@Registry.register_pre_process() +def dataset_pre_process(output_data, **kwargs): + shuffle = kwargs.get("shuffle", True) + if shuffle: + seed = kwargs.get("seed", 42) + output_data = output_data.shuffle(seed=seed) + cache_key = kwargs.get("cache_key") + size = kwargs.get("size", 256) + cache_file = None + if cache_key: + cache_file = Path(f"./cache/data/{cache_key}_{output_data.info.dataset_name}_{size}.npz") + if cache_file.exists(): + with np.load(Path(cache_file)) as data: + return ImagenetDataset(data) + + labels = [] + images = [] + for i, sample in enumerate(output_data): + if i >= size: + break + image = sample["image"] + label = sample["label"] + image = image.convert("RGB") + image = processor(image)["pixel_values"][0] + images.append(image) + labels.append(label) + + if(output_data.info.dataset_name == "mini-imagenet"): + labels = adapt_label_for_mini_imagenet(labels, output_data.features["label"].names) + result_data = ImagenetDataset({"images": np.array(images), "labels": np.array(labels)}) + + if cache_file: + cache_file.parent.resolve().mkdir(parents=True, exist_ok=True) + np.savez(cache_file, images=np.array(images), labels=np.array(labels)) + + return result_data diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json new file mode 100644 index 00000000..14b49d34 --- /dev/null +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json @@ -0,0 +1,143 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "google/vit-base-patch16-224", + "task": "image-classification", + "io_config": { + "input_names": [ + "pixel_values" + ], + "input_shapes": [ + [ + 1, + 3, + 224, + 224 + ] + ], + "output_names": [ + "output" + ] + } + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "DmlExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "evaluation_data_config", + "type": "HuggingfaceContainer", + "user_script": "vit-base-patch16-224.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "validation", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 1000, + "cache_key": "imagedata_evaluation" + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "accuracy_score", + "priority": 1, + "metric_config": { + "task": "multiclass", + "num_classes": 1000 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "avg", + "priority": 2 + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "save_as_external_data": true + }, + "transformer_optimizer": { + "type": "OrtTransformersOptimization", + "model_type": "vit", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false, + "optimization_options": { + "enable_gelu": true, + "enable_layer_norm": true, + "enable_attention": true, + "use_multi_head_attention": true, + "enable_skip_layer_norm": false, + "enable_embed_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_bias_gelu": false, + "enable_gelu_approximation": false, + "enable_qordered_matmul": false, + "enable_shape_inference": true, + "enable_gemm_fast_gelu": false, + "enable_nhwc_conv": false, + "enable_group_norm": false, + "enable_bias_splitgelu": false, + "enable_packed_qkv": true, + "enable_packed_kv": true, + "enable_bias_add": false, + "enable_rotary_embeddings": true + }, + "save_as_external_data": true + } + }, + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "host": "host_system", + "target": "target_system", + "cache_dir": "cache", + "output_dir": "model/vit" +} diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json.config b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json.config new file mode 100644 index 00000000..7216c02e --- /dev/null +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json.config @@ -0,0 +1,107 @@ +{ + "name": "Convert to DirectML", + "evaluationRuntimeFeatures": [ + "Nightly" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "DirectML" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "DmlExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.size", + "template": { + "path": "data_configs[0].pre_process_data_config.size", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb new file mode 100644 index 00000000..369bdc7b --- /dev/null +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb @@ -0,0 +1,209 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "\n", + "ExecutionProvider=\"DmlExecutionProvider\"\n", + "if ExecutionProvider == \"OpenVINOExecutionProvider\":\n", + " onnx_model_path = \"./model/ov_model_st_quant.onnx\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import onnxruntime as ort\n", + "import time\n", + "import torch\n", + "import torchvision.transforms as transforms\n", + "from datasets import load_dataset\n", + "from transformers import ViTFeatureExtractor, ViTForImageClassification" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_samples = 256" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load datasets\n", + "\n", + "feature_extractor = ViTFeatureExtractor.from_pretrained(\"google/vit-base-patch16-224\")\n", + "preprocess = transforms.Compose([\n", + " transforms.Lambda(lambda img: img.convert(\"RGB\")),\n", + " transforms.Resize((224, 224)),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),\n", + "])\n", + "\n", + "def imageTransform(example):\n", + " example[\"image\"] = preprocess(example[\"image\"])\n", + " return example\n", + "datasetStream = load_dataset(\"timm/mini-imagenet\", split=\"validation\", streaming=True, trust_remote_code=True)\n", + "iterable_dataset = iter(datasetStream)\n", + "selected_samples = [next(iterable_dataset) for _ in range(num_samples)]\n", + "selected_samples = list(map(imageTransform, selected_samples))\n", + "\n", + "def get_imagenet_label_map():\n", + " import json\n", + " from pathlib import Path\n", + " cache_file = Path(f\"../../cache/data/imagenet_class_index.json\")\n", + " if not cache_file.exists():\n", + " import requests \n", + " imagenet_class_index_url = (\n", + " \"https://raw.githubusercontent.com/pytorch/vision/main/gallery/assets/imagenet_class_index.json\"\n", + " )\n", + " response = requests.get(imagenet_class_index_url)\n", + " response.raise_for_status() # Ensure the request was successful\n", + " content = response.json()\n", + " cache_file.parent.resolve().mkdir(parents=True, exist_ok=True)\n", + " with open(cache_file, \"w\") as f:\n", + " json.dump(content, f)\n", + " else:\n", + " with open(cache_file) as f:\n", + " content = json.loads(f.read())\n", + "\n", + " return {v[0]: int(k) for k, v in content.items()}\n", + "\n", + "label_map = get_imagenet_label_map()\n", + "label_names = datasetStream.features[\"label\"].names\n", + "\n", + "def mini_to_imagenet_label(mini_label):\n", + " class_name = label_names[mini_label]\n", + " return label_map[class_name]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Original model metrics\n", + "\n", + "def evaluate_torch(model, selected_samples, device):\n", + " model.eval()\n", + " correct, total = 0, 0\n", + " latencies = []\n", + " with torch.no_grad():\n", + " for example in selected_samples:\n", + " image = example[\"image\"].unsqueeze(0).to(device)\n", + " label = torch.tensor(example[\"label\"]).to(device)\n", + " label = mini_to_imagenet_label(label.item())\n", + " \n", + " start_time = time.time()\n", + " output = model(image)\n", + " end_time = time.time()\n", + " \n", + " latencies.append((end_time - start_time))\n", + " pred = torch.argmax(output.logits, dim=1)\n", + " correct += (pred == label).sum().item()\n", + " total += 1\n", + " \n", + " accuracy = correct / total\n", + " avg_latency = np.mean(latencies)\n", + " return accuracy, avg_latency\n", + "\n", + "device = torch.device(\"cpu\")\n", + "model = ViTForImageClassification.from_pretrained(\"google/vit-base-patch16-224\").to(device)\n", + "accuracy, avg_latency = evaluate_torch(model, selected_samples, device)\n", + "\n", + "print(f\"Original Model Accuracy: {accuracy * 100:.2f}%\")\n", + "print(f\"Original Model Average Latency Per Image: {avg_latency * 1000:.2f} ms\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Quantized model metrics\n", + "\n", + "def evaluate_onnx(session, selected_samples):\n", + " correct, total = 0, 0\n", + " latencies = []\n", + " input_name = session.get_inputs()[0].name\n", + " output_name = session.get_outputs()[0].name\n", + "\n", + " for example in selected_samples:\n", + " image = np.expand_dims(example[\"image\"], axis=0)\n", + " label = example[\"label\"]\n", + " label = mini_to_imagenet_label(label)\n", + " \n", + " start_time = time.time()\n", + " output = session.run([output_name], {input_name: image.astype(np.float16)})[0]\n", + " end_time = time.time()\n", + " \n", + " latencies.append((end_time - start_time))\n", + " pred = np.argmax(output, axis=1)[0]\n", + " correct += (pred == label)\n", + " total += 1\n", + " \n", + " accuracy = correct / total\n", + " avg_latency = np.mean(latencies)\n", + " return accuracy, avg_latency\n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + "\n", + "\n", + "session_options = ort.SessionOptions()\n", + "\n", + "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "\n", + "session = ort.InferenceSession(\n", + " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " sess_options=session_options,\n", + ")\n", + "\n", + "accuracy, avg_latency = evaluate_onnx(session, selected_samples)\n", + "\n", + "print(f\"Quantized Model Accuracy: {accuracy * 100:.2f}%\")\n", + "print(f\"Quantized Model Average Latency Per Image: {avg_latency * 1000:.2f} ms\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python-WCR-win32-x64-3.12.9", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json new file mode 100644 index 00000000..4e519509 --- /dev/null +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json @@ -0,0 +1,157 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "google/vit-base-patch16-224", + "task": "image-classification", + "io_config": { + "input_names": [ + "pixel_values" + ], + "input_shapes": [ + [ + 1, + 3, + 224, + 224 + ] + ], + "output_names": [ + "output" + ] + } + }, + "systems": { + "qnn_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "npu", + "execution_providers": [ + "VitisAIExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quantization_data_config", + "type": "HuggingfaceContainer", + "user_script": "vit-base-patch16-224.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "train", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 256, + "cache_key": "imagedata_quantization" + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + }, + { + "name": "evaluation_data_config", + "type": "HuggingfaceContainer", + "user_script": "vit-base-patch16-224.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "validation", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 1000, + "cache_key": "imagedata_evaluation" + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "accuracy_score", + "priority": 1, + "metric_config": { + "task": "multiclass", + "num_classes": 1000 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "avg", + "priority": 2 + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "device": "cpu", + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true, + "all_tensors_to_one_file": true, + "use_dynamo_exporter": false + }, + "transformer_optimizer": { + "type": "orttransformersoptimization", + "model_type": "vit", + "opt_level": 1, + "optimization_options": { + "enable_gelu": true, + "enable_bias_gelu": false, + "enable_layer_norm": true, + "enable_skip_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_attention": false + }, + "save_as_external_data": true + }, + "OnnxQuantization": { + "type": "OnnxQuantization", + "data_config": "quantization_data_config", + "activation_type": "uint16", + "precision": "uint8", + "calibrate_method": "MinMax", + "quant_preprocess": true, + "save_as_external_data": true + }, + "addmetadata": { + "type": "VitisAIAddMetaData", + "config_meta_data_keys": [ + "architectures", + "model_type" + ], + "activation_type": "uint16", + "weight_type": "uint8", + "quant_type": "OnnxStaticQuantization" + } + }, + "host": "qnn_system", + "target": "qnn_system", + "evaluator": "common_evaluator", + "output_dir": "model/vit", + "evaluate_input_model": false, + "cache_dir": "cache" +} diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json.config b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json.config new file mode 100644 index 00000000..e217a030 --- /dev/null +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json.config @@ -0,0 +1,238 @@ +{ + "name": "Convert to AMD NPU", + "oliveFile": "vit/vit_qdq_vitis_ai.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "AMD NPU", + "CPU" + ], + "path": "systems.qnn_system.accelerators.0.execution_providers.0", + "values": [ + "VitisAIExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.OnnxQuantization.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.OnnxQuantization.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.OnnxQuantization.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.OnnxQuantization.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.size", + "template": { + "path": "data_configs[0].pre_process_data_config.size", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.OnnxQuantization", + "actions": [ + [], + [ + { + "type": "update", + "path": "passes", + "value": { + "conversion": { + "device": "cpu", + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true, + "all_tensors_to_one_file": true, + "use_dynamo_exporter": false + } + } + } + ] + ] + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[1].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].pre_process_data_config.size", + "template": { + "path": "data_configs[1].pre_process_data_config.size", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json new file mode 100644 index 00000000..b6048eec --- /dev/null +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json @@ -0,0 +1,151 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "google/vit-base-patch16-224", + "task": "image-classification", + "io_config": { + "input_names": [ + "pixel_values" + ], + "input_shapes": [ + [ + 1, + 3, + 224, + 224 + ] + ], + "output_names": [ + "output" + ] + } + }, + "systems": { + "qnn_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "npu", + "execution_providers": [ + "QNNExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quantization_data_config", + "type": "HuggingfaceContainer", + "user_script": "vit-base-patch16-224.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "train", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 256, + "cache_key": "imagedata_quantization" + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + }, + { + "name": "evaluation_data_config", + "type": "HuggingfaceContainer", + "user_script": "vit-base-patch16-224.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "validation", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 1000, + "cache_key": "imagedata_evaluation" + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "accuracy_score", + "priority": 1, + "metric_config": { + "task": "multiclass", + "num_classes": 1000 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "avg", + "priority": 2 + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "save_as_external_data": true + }, + "surgery": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "MatMulAddToGemm" + } + ] + }, + "transformer_optimizer": { + "type": "OrtTransformersOptimization", + "model_type": "vit", + "opt_level": 1, + "optimization_options": { + "enable_gelu": true, + "enable_bias_gelu": false, + "enable_layer_norm": true, + "enable_skip_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_attention": false + }, + "save_as_external_data": true + }, + "OnnxQuantization": { + "type": "OnnxQuantization", + "data_config": "quantization_data_config", + "quant_preprocess": true, + "activation_type": "uint16", + "precision": "uint8", + "save_as_external_data": true + } + }, + "host": "qnn_system", + "target": "qnn_system", + "evaluator": "common_evaluator", + "output_dir": "model/vit", + "evaluate_input_model": false, + "cache_dir": "cache" +} diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json.config b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json.config new file mode 100644 index 00000000..1a1e2951 --- /dev/null +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json.config @@ -0,0 +1,235 @@ +{ + "name": "Convert to Qualcomm NPU", + "oliveFile": "vit/vit_qdq.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Qualcomm NPU", + "CPU" + ], + "path": "systems.qnn_system.accelerators.0.execution_providers.0", + "values": [ + "QNNExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.OnnxQuantization.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.OnnxQuantization.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.OnnxQuantization.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.OnnxQuantization.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.size", + "template": { + "path": "data_configs[0].pre_process_data_config.size", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.OnnxQuantization", + "actions": [ + [], + [ + { + "type": "update", + "path": "passes", + "value": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "save_as_external_data": true + } + } + } + ] + ] + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[1].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].pre_process_data_config.size", + "template": { + "path": "data_configs[1].pre_process_data_config.size", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json new file mode 100644 index 00000000..dd5972c5 --- /dev/null +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json @@ -0,0 +1,113 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "google/vit-base-patch16-224", + "task": "image-classification", + "io_config": { + "input_names": [ + "pixel_values" + ], + "input_shapes": [ + [ + 1, + 3, + 224, + 224 + ] + ], + "output_names": [ + "logits" + ] + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "NvTensorRTRTXExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quantize_data_config", + "type": "HuggingfaceContainer", + "user_script": "vit-base-patch16-224.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "validation", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 256, + "cache_key": "imagedata_quantization" + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "data_config": "quantize_data_config", + "sub_types": [ + { + "name": "accuracy_score", + "priority": 1, + "metric_config": { + "task": "multiclass", + "num_classes": 1001 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "quantize_data_config", + "sub_types": [ + { + "name": "avg", + "priority": 2 + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true, + "all_tensors_to_one_file": true, + "use_dynamo_exporter": false + }, + "onnx_float_to_float16": { + "type": "OnnxFloatToFloat16", + "save_as_external_data": true + }, + "session_params_tuning": { + "type": "OrtSessionParamsTuning", + "io_bind": false, + "data_config": "quantize_data_config" + } + }, + "host": "local_system", + "target": "local_system", + "evaluator": "common_evaluator", + "output_dir": "model/vit-base-patch16-224", + "cache_dir": "cache", + "evaluate_input_model": false +} diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json.config b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json.config new file mode 100644 index 00000000..2d9d01c2 --- /dev/null +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json.config @@ -0,0 +1,106 @@ +{ + "name": "Convert to NVIDIA TRT for RTX", + "oliveFile": "vit/vit_trtrtx.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "NVIDIA TensorRT for RTX", + "CPU" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "NvTensorRTRTXExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.size", + "template": { + "path": "data_configs[0].pre_process_data_config.size", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb new file mode 100644 index 00000000..b74e7976 --- /dev/null +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb @@ -0,0 +1,209 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "\n", + "ExecutionProvider=\"NvTensorRTRTXExecutionProvider\"\n", + "if ExecutionProvider == \"OpenVINOExecutionProvider\":\n", + " onnx_model_path = \"./model/ov_model_st_quant.onnx\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import onnxruntime as ort\n", + "import time\n", + "import torch\n", + "import torchvision.transforms as transforms\n", + "from datasets import load_dataset\n", + "from transformers import ViTFeatureExtractor, ViTForImageClassification" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_samples = 256" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load datasets\n", + "\n", + "feature_extractor = ViTFeatureExtractor.from_pretrained(\"google/vit-base-patch16-224\")\n", + "preprocess = transforms.Compose([\n", + " transforms.Lambda(lambda img: img.convert(\"RGB\")),\n", + " transforms.Resize((224, 224)),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),\n", + "])\n", + "\n", + "def imageTransform(example):\n", + " example[\"image\"] = preprocess(example[\"image\"])\n", + " return example\n", + "datasetStream = load_dataset(\"timm/mini-imagenet\", split=\"validation\", streaming=True, trust_remote_code=True)\n", + "iterable_dataset = iter(datasetStream)\n", + "selected_samples = [next(iterable_dataset) for _ in range(num_samples)]\n", + "selected_samples = list(map(imageTransform, selected_samples))\n", + "\n", + "def get_imagenet_label_map():\n", + " import json\n", + " from pathlib import Path\n", + " cache_file = Path(f\"../../cache/data/imagenet_class_index.json\")\n", + " if not cache_file.exists():\n", + " import requests \n", + " imagenet_class_index_url = (\n", + " \"https://raw.githubusercontent.com/pytorch/vision/main/gallery/assets/imagenet_class_index.json\"\n", + " )\n", + " response = requests.get(imagenet_class_index_url)\n", + " response.raise_for_status() # Ensure the request was successful\n", + " content = response.json()\n", + " cache_file.parent.resolve().mkdir(parents=True, exist_ok=True)\n", + " with open(cache_file, \"w\") as f:\n", + " json.dump(content, f)\n", + " else:\n", + " with open(cache_file) as f:\n", + " content = json.loads(f.read())\n", + "\n", + " return {v[0]: int(k) for k, v in content.items()}\n", + "\n", + "label_map = get_imagenet_label_map()\n", + "label_names = datasetStream.features[\"label\"].names\n", + "\n", + "def mini_to_imagenet_label(mini_label):\n", + " class_name = label_names[mini_label]\n", + " return label_map[class_name]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Original model metrics\n", + "\n", + "def evaluate_torch(model, selected_samples, device):\n", + " model.eval()\n", + " correct, total = 0, 0\n", + " latencies = []\n", + " with torch.no_grad():\n", + " for example in selected_samples:\n", + " image = example[\"image\"].unsqueeze(0).to(device)\n", + " label = torch.tensor(example[\"label\"]).to(device)\n", + " label = mini_to_imagenet_label(label.item())\n", + " \n", + " start_time = time.time()\n", + " output = model(image)\n", + " end_time = time.time()\n", + " \n", + " latencies.append((end_time - start_time))\n", + " pred = torch.argmax(output.logits, dim=1)\n", + " correct += (pred == label).sum().item()\n", + " total += 1\n", + " \n", + " accuracy = correct / total\n", + " avg_latency = np.mean(latencies)\n", + " return accuracy, avg_latency\n", + "\n", + "device = torch.device(\"cpu\")\n", + "model = ViTForImageClassification.from_pretrained(\"google/vit-base-patch16-224\").to(device)\n", + "accuracy, avg_latency = evaluate_torch(model, selected_samples, device)\n", + "\n", + "print(f\"Original Model Accuracy: {accuracy * 100:.2f}%\")\n", + "print(f\"Original Model Average Latency Per Image: {avg_latency * 1000:.2f} ms\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Quantized model metrics\n", + "\n", + "def evaluate_onnx(session, selected_samples):\n", + " correct, total = 0, 0\n", + " latencies = []\n", + " input_name = session.get_inputs()[0].name\n", + " output_name = session.get_outputs()[0].name\n", + "\n", + " for example in selected_samples:\n", + " image = np.expand_dims(example[\"image\"], axis=0)\n", + " label = example[\"label\"]\n", + " label = mini_to_imagenet_label(label)\n", + " \n", + " start_time = time.time()\n", + " output = session.run([output_name], {input_name: image.astype(np.float16)})[0]\n", + " end_time = time.time()\n", + " \n", + " latencies.append((end_time - start_time))\n", + " pred = np.argmax(output, axis=1)[0]\n", + " correct += (pred == label)\n", + " total += 1\n", + " \n", + " accuracy = correct / total\n", + " avg_latency = np.mean(latencies)\n", + " return accuracy, avg_latency\n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + "\n", + "\n", + "session_options = ort.SessionOptions()\n", + "\n", + "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "\n", + "session = ort.InferenceSession(\n", + " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " sess_options=session_options,\n", + ")\n", + "\n", + "accuracy, avg_latency = evaluate_onnx(session, selected_samples)\n", + "\n", + "print(f\"Quantized Model Accuracy: {accuracy * 100:.2f}%\")\n", + "print(f\"Quantized Model Average Latency Per Image: {avg_latency * 1000:.2f} ms\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python-WCR-win32-x64-3.12.9", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json b/google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json new file mode 100644 index 00000000..2deef015 --- /dev/null +++ b/google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json @@ -0,0 +1,144 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "google/vit-base-patch16-224", + "task": "image-classification" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "npu", + "execution_providers": [ + "OpenVINOExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quantization_data_config", + "type": "HuggingfaceContainer", + "user_script": "vit-base-patch16-224.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "train", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 256, + "cache_key": "imagedata_quantization" + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + }, + { + "name": "evaluation_data_config", + "type": "HuggingfaceContainer", + "user_script": "vit-base-patch16-224.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "validation", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 1000, + "cache_key": "imagedata_evaluation" + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "accuracy_score", + "priority": 1, + "metric_config": { + "task": "multiclass", + "num_classes": 1000 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "avg", + "priority": 2 + } + ] + } + ] + } + }, + "passes": { + "ov_convert": { + "type": "OpenVINOConversion", + "input_shapes": [ + [ + 1, + 3, + 224, + 224 + ] + ], + "output_model": "vit_base_patch16_224", + "compress_to_fp16": true + }, + "io_update": { + "type": "OpenVINOIoUpdate", + "input_shapes": [ + [ + 1, + 3, + 224, + 224 + ] + ], + "static": true + }, + "ov_quantize": { + "type": "OpenVINOQuantization", + "target_device": "npu", + "data_config": "quantization_data_config", + "model_type": "TRANSFORMER", + "extra_configs": [ + { + "advanced_quantization_parameters": { + "smooth_quant_alpha": 0.6 + } + } + ] + }, + "encapsulation": { + "type": "OpenVINOEncapsulation", + "target_device": "npu", + "ov_version": "2025.1" + } + }, + "search_strategy": false, + "host": "local_system", + "target": "local_system", + "evaluator": "common_evaluator", + "cache_dir": "cache", + "evaluate_input_model": false, + "output_dir": "model/vit_base_patch16_224_ov_static" +} diff --git a/google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json.config b/google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json.config new file mode 100644 index 00000000..0186b350 --- /dev/null +++ b/google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json.config @@ -0,0 +1,217 @@ +{ + "name": "Convert to Intel CPU/NPU/GPU", + "oliveFile": "vit/openvino/vit_base_patch16_224_context_ov_static.json", + "isIntel": true, + "debugInfo": { + "autoGenerated": true, + "useOpenVINOConversion": "ov_convert" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "systems.local_system.accelerators.0.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "readOnly": false + }, + "runtimeInConversion": { + "autoGenerated": true, + "name": "Convert/Quantize to", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "passes.ov_quantize.target_device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "actions": [ + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "cpu" + } + ], + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "gpu" + } + ], + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "npu" + } + ] + ] + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.ov_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.size", + "template": { + "path": "data_configs[0].pre_process_data_config.size", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.ov_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[1].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].pre_process_data_config.size", + "template": { + "path": "data_configs[1].pre_process_data_config.size", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/intel-bert-base-uncased-mrpc/aitk/requirements.txt b/intel-bert-base-uncased-mrpc/aitk/requirements.txt index 0ce2fda0..bad441ca 100644 --- a/intel-bert-base-uncased-mrpc/aitk/requirements.txt +++ b/intel-bert-base-uncased-mrpc/aitk/requirements.txt @@ -1,3 +1,4 @@ -# For a full requirements, see AITK +# This file will be installed together with AITK runtime requirements +# For the full requirements, see AITK olive-ai optimum diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/.gitignore b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/.gitignore new file mode 100644 index 00000000..48c03882 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/.gitignore @@ -0,0 +1,5 @@ +__pycache__ +/cache +/history/*/* +!/history/*/history.config +!/history/*/olive_config.json diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md new file mode 100644 index 00000000..6ae6ffb0 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md @@ -0,0 +1,48 @@ +# Laion Clip optimization + +This folder contains examples of Laion Clip optimization using different workflows. + +- Text and vision model QDQ for Qualcomm NPU +- QDQ for AMD NPU +- OpenVINO for Intel NPU + +## Laion Clip text optimization with QDQ for Qualcomm NPU + +This example performs Laion Clip optimization with QDQ in one workflow. It performs the optimization pipeline: + +- *PyTorch Model -> Onnx Model -> Quantized Onnx Model* + +### Evaluation result + +The quantization uses 256 samples from train split of imagenet-1k dataset and the evaluations uses 256 samples from test split of imagenet-1k dataset. + + +| Activation Type  | Weight Type  | Size  | Latency ms (avg)  | +| --------------------- | ----------------- | ---------- | ---------------------- | +| QUInt16 | QUInt8 | 100 | 6.53724 | + +## Laion Clip vision optimization with QDQ for Qualcomm NPU + +This example performs Laion Clip optimization with QDQ in one workflow. It performs the optimization pipeline: + +- *PyTorch Model -> Onnx Model -> Quantized Onnx Model* + +### Evaluation result + +The quantization uses 256 samples from train split of imagenet-1k dataset and the evaluations uses 256 samples from test split of imagenet-1k dataset. + + +| Activation Type  | Weight Type  | Size  | Latency ms (avg)  | +| --------------------- | ----------------- | ---------- | ---------------------- | +| QUInt16 | QUInt8 | 100 | 20.13231 | + + +## Laion Clip optimization with QDQ for AMD NPU + +This example performs Laion Clip optimization with QDQ in one workflow. It performs the optimization pipeline: + +- *PyTorch Model -> Onnx Model -> Quantized Onnx Model* + +## Laion Clip optimization with OpenVINO + +This example performs Laion Clip optimization with OpenVINO in one workflow for Intel NPU. diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config new file mode 100644 index 00000000..4629da4e --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config @@ -0,0 +1,224 @@ +{ + "copies": [ + { + "src": "../../../openai/clip-vit-base-patch16/1/model_project.config", + "dst": "model_project.config", + "replacements": [ + { + "find": "openai_clip", + "replace": "laion_clip" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_text_qnn_inference_sample.ipynb", + "dst": "laion_clip_text_qnn_inference_sample.ipynb", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_text_qnn.json", + "dst": "laion_clip_text_qnn.json", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_text_qnn.json.config", + "dst": "laion_clip_text_qnn.json.config", + "replacements": [ + { + "find": "clip/qdq/openai_clip_text_b16_qdq.json", + "replace": "clip/qdq/laion_clip_text_b32_qdq.json" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_vision_qnn_inference_sample.ipynb", + "dst": "laion_clip_vision_qnn_inference_sample.ipynb", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_vision_qnn.json", + "dst": "laion_clip_vision_qnn.json", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_vision_qnn.json.config", + "dst": "laion_clip_vision_qnn.json.config", + "replacements": [ + { + "find": "clip/qdq/openai_clip_vision_b16_qdq.json", + "replace": "clip/qdq/laion_clip_vision_b32_qdq.json" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_ov_inference_sample.ipynb", + "dst": "laion_clip_ov_inference_sample.ipynb", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_ov.json", + "dst": "laion_clip_ov.json", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" + }, + { + "find": "openai_clip", + "replace": "laion_clip" + }, + { + "find": "\"device\": \"npu\"\n", + "replace": "\"device\": \"npu\", \"library\": \"transformers\"\n" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_ov.json.config", + "dst": "laion_clip_ov.json.config", + "replacements": [ + { + "find": "clip/openvino/clip_vit_base_patch16_context_ov_static.json", + "replace": "clip/openvino/clip_vit_b32_laion2b_s34B_b79k_context_ov_static.json" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_qdq_amd_inference_sample.ipynb", + "dst": "laion_clip_qdq_amd_inference_sample.ipynb", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_qdq_amd.json", + "dst": "laion_clip_qdq_amd.json", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_qdq_amd.json.config", + "dst": "laion_clip_qdq_amd.json.config", + "replacements": [ + { + "find": "clip/openai_clip-vit-base-patch16_ptq_qdq_vitis_ai.json", + "replace": "clip/laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qdq_vitis_ai.json" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_trtrtx.json", + "dst": "laion_clip_trtrtx.json", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_trtrtx.json.config", + "dst": "laion_clip_trtrtx.json.config", + "replacements": [ + { + "find": "clip/openai_clip-vit-base-patch16_trtrtx.json", + "replace": "clip/laion_CLIP-ViT-B-32-laion2B-s34B-b79K_trtrtx.json" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_trtrtx_inference_sample.ipynb", + "dst": "laion_clip_trtrtx_inference_sample.ipynb", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_dml.json", + "dst": "laion_clip_dml.json", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_dml.json.config", + "dst": "laion_clip_dml.json.config", + "replacements": [ + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_dml_inference_sample.ipynb", + "dst": "laion_clip_dml_inference_sample.ipynb", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/clip_script.py", + "dst": "clip_script.py" + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/user_script.py", + "dst": "user_script.py" + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_ov.py", + "dst": "laion_clip_ov.py" + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/README.md", + "dst": "README.md", + "replacements": [ + { + "find": "Openai", + "replace": "Laion" + } + ] + }, + { + "src": "../../../openai/clip-vit-base-patch16/1/requirements.txt", + "dst": "requirements.txt" + } + ] +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/clip_script.py b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/clip_script.py new file mode 100644 index 00000000..6f775697 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/clip_script.py @@ -0,0 +1,151 @@ +from __future__ import annotations + +from collections import OrderedDict +from itertools import chain + +import torch +from transformers import ( + AutoProcessor, + CLIPTextModelWithProjection, + CLIPVisionModelWithProjection, +) + +from olive.data.component.dataset import BaseDataset +from olive.data.registry import Registry + +HF_MODEL_SUBFOLDER_MAPPING = { + "sentence-transformers/clip-ViT-B-32": "0_CLIPModel", +} + + +def load_image_encoder(model_name): + return CLIPVisionModelWithProjection.from_pretrained( + model_name, + subfolder=HF_MODEL_SUBFOLDER_MAPPING.get(model_name, ""), + ).eval() + + +def load_text_encoder(model_name): + if model_name == "sentence-transformers/clip-ViT-B-32-multilingual-v1": + from sbert_clip_script import SDistilBertTextEncoder + + return SDistilBertTextEncoder(model_name).eval() + + return CLIPTextModelWithProjection.from_pretrained( + model_name, + subfolder=HF_MODEL_SUBFOLDER_MAPPING.get(model_name, ""), + ).eval() + + +def hfdataset_pre_process_for_clip( + dataset, + processor, + torch_model=None, + image_col: str | None = None, + caption_col: str | None = None, + label_col: str = "label", + max_samples: int | None = None, + max_length: int = 77, + batch_size: int = 32, +): + def generate_inputs(sample, indices): + captions = sample.get(caption_col, None) + images = sample.get(image_col, None) + + kwargs = { + "padding": "max_length", + "max_length": max_length, + "truncation": True, + "add_special_tokens": True, + "return_tensors": "pt", + } + if images: + kwargs["images"] = [img.convert("RGB") for img in images] + if captions: + kwargs["text"] = list(chain([x[0] for x in captions])) + + encoded_input = processor(**kwargs) + + return { + **encoded_input, + label_col: torch_model(**encoded_input)[0] if torch_model else sample.get(label_col, indices), + } + + if max_samples is not None and max_samples < len(dataset): + dataset = dataset.select(range(max_samples)) + + tokenized_datasets = dataset.map( + generate_inputs, + batched=True, + batch_size=batch_size, + with_indices=True, + remove_columns=dataset.column_names, + desc="Processing dataset", + ) + tokenized_datasets.set_format("torch", output_all_columns=True) + + return tokenized_datasets + + +@Registry.register_pre_process() +def pre_process_dataset( + dataset, + model_name: str, + generate_ground_truth: bool = False, + image_col: str | None = None, + caption_col: str | None = None, + label_col: str = "label", + max_samples: int | None = None, + max_length: int = 77, + **kwargs, +): + if image_col is None and caption_col is None: + raise ValueError("Either image_col or caption_col must be provided.") + + if generate_ground_truth: + if image_col and caption_col: + raise ValueError("Can not generate two types of embedding at the same time.") + + torch_model = load_image_encoder(model_name) if image_col else load_text_encoder(model_name) + else: + torch_model = None + + processor = AutoProcessor.from_pretrained(model_name) + dataset = hfdataset_pre_process_for_clip( + dataset, + processor, + torch_model=torch_model, + image_col=image_col, + caption_col=caption_col, + label_col=label_col, + max_length=max_length, + max_samples=max_samples, + ) + return BaseDataset(dataset, label_col) + + +@Registry.register_post_process() +def embed_post_process(output): + """Post-processing for CLIP output.""" + match output: + case dict() | OrderedDict() as out: + if "embeds" in out: + return out["embeds"] + elif "text_embeds" in out: + return out["text_embeds"] + elif "image_embeds" in out: + return out["image_embeds"] + case torch.Tensor(): + return output.argmax(dim=-1) + raise ValueError(f"Unsupported output type: {type(output)}") + + +def eval_similarity_degrad(output, targets, batch_size=1024): + import torch.nn.functional as F + + preds = output.preds + scores = [ + F.cosine_similarity(preds[i : i + batch_size], targets[i : i + batch_size]) + for i in range(0, preds.size(0), batch_size) + ] + return {"percentage": f"{100.0 - torch.mean(torch.cat(scores)) * 100.0:.2f}"} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml new file mode 100644 index 00000000..337d5a41 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml @@ -0,0 +1,26 @@ +keywords: + aitk +arch: clip +recipes: + - file: "laion_clip_text_qnn.json" + device: npu + ep: QNNExecutionProvider + - file: "laion_clip_vision_qnn.json" + device: npu + ep: QNNExecutionProvider + - file: "laion_clip_qdq_amd.json" + device: npu + ep: VitisAIExecutionProvider + - file: "laion_clip_ov.json" + device: npu + ep: OpenVINOExecutionProvider + - file: "laion_clip_trtrtx.json" + device: gpu + ep: NvTensorRTRTXExecutionProvider + - file: "laion_clip_dml.json" + device: gpu + ep: DmlExecutionProvider +aitk: + modelInfo: + id: "huggingface/laion/CLIP-ViT-B-32-laion2B-s34B-b79K" + version: 1 diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml.json b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml.json new file mode 100644 index 00000000..27847826 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml.json @@ -0,0 +1,192 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "task": "zero-shot-image-classification", + "load_kwargs": { + "attn_implementation": "eager" + }, + "io_config": { + "input_names": [ + "input_ids", + "pixel_values", + "attention_mask" + ], + "input_shapes": [ + [ + 10, + 77 + ], + [ + 1, + 3, + 224, + 224 + ], + [ + 10, + 77 + ] + ], + "input_types": [ + "int64", + "float32", + "int64" + ], + "output_names": [ + "logits_per_image" + ], + "output_shapes": [ + [ + 1, + 2 + ] + ] + } + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "DmlExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "metric_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "dataset_name": "nlphuji/flickr30k", + "start": 0, + "end": 10 + }, + "dataloader_config": { + "type": "no_auto_batch_dataloader" + }, + "post_process_data_config": { + "type": "clip_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "backend": "huggingface_metrics", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "accuracy", + "priority": 1, + "goal": { + "type": "max-degradation", + "value": 0.05 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg", + "goal": { + "type": "percent-min-improvement", + "value": 0.1 + } + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + }, + { + "name": "throughput", + "type": "throughput", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg" + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + }, + "transformer_optimizer": { + "type": "orttransformersoptimization", + "model_type": "clip", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false, + "optimization_options": { + "enable_gelu": true, + "enable_layer_norm": true, + "enable_attention": true, + "use_multi_head_attention": true, + "enable_skip_layer_norm": false, + "enable_embed_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_bias_gelu": false, + "enable_gelu_approximation": false, + "enable_qordered_matmul": false, + "enable_shape_inference": true, + "enable_gemm_fast_gelu": false, + "enable_nhwc_conv": false, + "enable_group_norm": false, + "enable_bias_splitgelu": false, + "enable_packed_qkv": true, + "enable_packed_kv": true, + "enable_bias_add": false, + "enable_rotary_embeddings": true + }, + "save_as_external_data": true + } + }, + "search_strategy": false, + "host": "host_system", + "target": "target_system", + "cache_dir": "cache", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "output_dir": "model/clip" +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml.json.config new file mode 100644 index 00000000..ed09dcf4 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml.json.config @@ -0,0 +1,87 @@ +{ + "name": "Convert to DirectML", + "evaluationRuntimeFeatures": [ + "Nightly" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "DirectML" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "DmlExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[0].load_dataset_config.end", + "template": { + "path": "data_configs[0].load_dataset_config.end", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml_inference_sample.ipynb new file mode 100644 index 00000000..c33db85d --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml_inference_sample.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aeb33f1a", + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"DmlExecutionProvider\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "307fcca8", + "metadata": {}, + "outputs": [], + "source": [ + "from PIL import Image\n", + "import requests\n", + " \n", + "from transformers import CLIPProcessor\n", + "import onnxruntime as ort\n", + "import numpy as np\n", + "import torch\n", + " \n", + "processor = CLIPProcessor.from_pretrained(\"laion/CLIP-ViT-B-32-laion2B-s34B-b79K\", use_fast=False)\n", + " \n", + "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + " \n", + "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n", + " images=image, return_tensors=\"np\", padding=\"max_length\",\n", + " max_length= 77, truncation=True)\n", + " \n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " \n", + "opts = ort.SessionOptions()\n", + " \n", + "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n", + "assert opts.has_providers()\n", + "\n", + "# options = ort.SessionOptions()\n", + "session = ort.InferenceSession(onnx_model_path,\n", + " sess_options=opts,\n", + " # providers=[ExecutionProvider],\n", + " # provider_options=[provider_options]\n", + ")\n", + "logits_per_image = session.run([\"logits_per_image\"],\n", + " {\n", + " \"input_ids\": inputs['input_ids'].astype(np.int64),\n", + " \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n", + " \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n", + " })\n", + " \n", + "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n", + "print(\"Label probs:\", probs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "winml", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json new file mode 100644 index 00000000..b1c07ec0 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json @@ -0,0 +1,125 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "npu", + "execution_providers": [ + "OpenVINOExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quantize_data_config", + "user_script": "laion_clip_ov.py", + "load_dataset_config": { + "type": "conceptual_captions_dataset", + "data_name": "google-research-datasets/conceptual_captions", + "model_path": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" + }, + "dataloader_config": { + "batch_size": 1, + "drop_last": true + } + }, + { + "name": "metric_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "dataset_name": "nlphuji/flickr30k", + "start": 10, + "end": 20 + }, + "dataloader_config": { "type": "no_auto_batch_dataloader" }, + "post_process_data_config": { "type": "clip_post_process" } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "backend": "huggingface_metrics", + "data_config": "metric_data_config", + "sub_types": [ + { "name": "accuracy", "priority": 1, "goal": { "type": "max-degradation", "value": 0.05 } } + ] + }, + { + "name": "latency", + "type": "latency", + "sub_types": [ + { "name": "avg", "priority": 2, "metric_config": { "warmup_num": 20, "repeat_test_num": 100 } }, + { "name": "p90", "metric_config": { "warmup_num": 20, "repeat_test_num": 100 } } + ] + } + ] + } + }, + "passes": { + "optimum_convert": { + "type": "OpenVINOOptimumConversion", + "extra_args": { + "device": "npu", "library": "transformers" + } + }, + "ov_quantize": { + "type": "OpenVINOQuantization", + "target_device": "npu", + "data_config": "quantize_data_config", + "model_type": "TRANSFORMER", + "user_script": "laion_clip_ov.py", + "transform_fn": "custom_transform_func", + "extra_configs": [ + { + "advanced_quantization_parameters": { + "smooth_quant_alpha": 0.6 + } + } + ] + }, + "io_update": { + "type": "OpenVINOIoUpdate", + "input_shapes": [ + [ + 10, + 77 + ], + [ + 1, + 3, + 224, + 224 + ], + [ + 10, + 77 + ] + ], + "static": true + }, + "encapsulation": { + "type": "OpenVINOEncapsulation", + "target_device": "npu", + "ov_version": "2025.1" + } + }, + "search_strategy": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "output_dir": "model/clip_vit_base_patch16_context_ov_static" +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json.config new file mode 100644 index 00000000..fb471c3b --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json.config @@ -0,0 +1,174 @@ +{ + "name": "Convert to Intel CPU/NPU/GPU", + "oliveFile": "clip/openvino/clip_vit_b32_laion2b_s34B_b79k_context_ov_static.json", + "isIntel": true, + "debugInfo": { + "autoGenerated": true, + "useOpenVINOOptimumConversion": "optimum_convert" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "systems.local_system.accelerators.0.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "readOnly": false + }, + "runtimeInConversion": { + "autoGenerated": true, + "name": "Convert/Quantize to", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "passes.optimum_convert.extra_args.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "actions": [ + [ + { + "type": "update", + "path": "passes.ov_quantize.target_device", + "value": "cpu" + }, + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "cpu" + } + ], + [ + { + "type": "update", + "path": "passes.ov_quantize.target_device", + "value": "gpu" + }, + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "gpu" + } + ], + [ + { + "type": "update", + "path": "passes.ov_quantize.target_device", + "value": "npu" + }, + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "npu" + } + ] + ] + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "google-research-datasets/conceptual_captions" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "google-research-datasets/conceptual_captions" + ], + "template": "QuantizationDataset" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.py b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.py new file mode 100644 index 00000000..d1971b50 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.py @@ -0,0 +1,124 @@ +from io import BytesIO + +import requests +import torch +from datasets import load_dataset +from PIL import Image +from requests.packages.urllib3.exceptions import InsecureRequestWarning +from tqdm import tqdm +from transformers import CLIPModel, CLIPProcessor + +from olive.data.registry import Registry + +requests.packages.urllib3.disable_warnings(InsecureRequestWarning) + +# ------------------------------------------------------------------------- +# Common Dataset +# ------------------------------------------------------------------------- + +seed = 0 +# seed everything to 0 for reproducibility, https://pytorch.org/docs/stable/notes/randomness.html +# do not set random seed and np.random.seed for aml test, since it will cause aml job name conflict +torch.manual_seed(seed) +# the following are needed only for GPU +torch.cuda.manual_seed(seed) +torch.backends.cudnn.deterministic = True +torch.backends.cudnn.benchmark = False + + +def check_text_data(data): + """Check if the given data is text-based.""" + if isinstance(data, str): + return True + if isinstance(data, list): + return all(isinstance(x, str) for x in data) + return False + + +def get_pil_from_url(url): + """Download and convert an image from a URL to a PIL Image object.""" + response = requests.get(url, verify=True, timeout=20) + image = Image.open(BytesIO(response.content)) + return image.convert("RGB") + + +def wrap_collate_fn(processor, max_length): + def collate_fn(example, image_column="image_url", text_column="caption"): + """Preprocess an example by loading and transforming image and text data. + + Check if the text data in the example is valid by calling the `check_text_data` function. + Download the image specified by the URL in the image_column by calling the `get_pil_from_url` function. + If there is any error during the download process, return None. + Return the preprocessed inputs with transformed image and text data. + """ + if len(example) != 1: + raise ValueError(f"Expected 'example' to have exactly one element, but got {len(example)}.") + example = example[0] + + if not check_text_data(example[text_column]): + raise ValueError("Text data is not valid") + + url = example[image_column] + try: + image = get_pil_from_url(url) + w, h = image.size + if h == 1 or w == 1: + return None + except Exception: + return None + + inputs = processor(text=example[text_column], images=[image], return_tensors="pt", padding=True) + if inputs["input_ids"].shape[1] > max_length: + return None + return inputs + + return collate_fn + + +def prepare_calibration_data(dataloader, init_steps): + """Prepare calibration data from a dataloader for a specified number of initialization steps. + + Iterate over the dataloader, fetching batches and storing the relevant data. + """ + data = [] + with tqdm(total=init_steps) as pbar: + for batch in dataloader: + if len(data) == init_steps: + break + if batch: + pbar.update(1) + with torch.no_grad(): + data.append( + { + "input_ids": batch["input_ids"].to("cpu"), + "pixel_values": batch["pixel_values"].to("cpu"), + "attention_mask": batch["attention_mask"].to("cpu"), + } + ) + return data + + +@Registry.register_dataset() +def conceptual_captions_dataset(data_name,opt_init_steps=200, max_train_samples=1000, **kwargs): + """Prepare a vision-text dataset for quantization.""" + dataset = load_dataset(data_name, trust_remote_code=True) + model_path = kwargs.get("model_path") + if not model_path: + raise ValueError( + "The 'model_path' parameter is required in data_configs.load_dataset_config but was not provided." + ) + model = CLIPModel.from_pretrained(model_path) + processor = CLIPProcessor.from_pretrained(model_path) + max_length = model.config.text_config.max_position_embeddings + train_dataset = dataset["train"].shuffle(seed=seed) + collate_fn = wrap_collate_fn(processor, max_length) + dataloader = torch.utils.data.DataLoader(train_dataset, collate_fn=collate_fn, batch_size=1) + return prepare_calibration_data(dataloader, opt_init_steps) + + +def custom_transform_func(data_item): + np_inputs = {} + for inp in data_item: + # Drop the first dimension using slicing + np_inputs[inp] = data_item[inp].numpy()[0, ...] + return np_inputs diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb new file mode 100644 index 00000000..df300a10 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb @@ -0,0 +1,84 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aeb33f1a", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/openvino_model_quant_st.onnx\"\n", + "ExecutionProvider=\"OpenVINOExecutionProvider\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "307fcca8", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "from PIL import Image\n", + "import requests\n", + " \n", + "from transformers import CLIPProcessor\n", + "import onnxruntime as ort\n", + "import numpy as np\n", + "import torch\n", + " \n", + "processor = CLIPProcessor.from_pretrained(\"laion/CLIP-ViT-B-32-laion2B-s34B-b79K\", use_fast=False)\n", + " \n", + "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + " \n", + "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n", + " images=image, return_tensors=\"np\", padding=\"max_length\",\n", + " max_length= 77, truncation=True)\n", + " \n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " \n", + "opts = ort.SessionOptions()\n", + " \n", + "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "assert opts.has_providers()\n", + "\n", + "# options = ort.SessionOptions()\n", + "session = ort.InferenceSession(onnx_model_path,\n", + " sess_options=opts,\n", + " # providers=[ExecutionProvider],\n", + " # provider_options=[provider_options]\n", + ")\n", + "logits_per_image = session.run([\"logits_per_image\"],\n", + " {\n", + " \"input_ids\": inputs['input_ids'].astype(np.int64),\n", + " \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n", + " \"pixel_values\": inputs['pixel_values']\n", + " })\n", + " \n", + "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n", + "print(\"Label probs:\", probs)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd.json b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd.json new file mode 100644 index 00000000..173e2962 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd.json @@ -0,0 +1,209 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "task": "zero-shot-image-classification", + "load_kwargs": { + "attn_implementation": "eager" + }, + "io_config": { + "input_names": [ + "input_ids", + "pixel_values", + "attention_mask" + ], + "input_shapes": [ + [ + 10, + 77 + ], + [ + 1, + 3, + 224, + 224 + ], + [ + 10, + 77 + ] + ], + "input_types": [ + "int64", + "float32", + "int64" + ], + "output_names": [ + "logits_per_image" + ], + "output_shapes": [ + [ + 1, + 2 + ] + ] + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "npu", + "execution_providers": [ + "VitisAIExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quant_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "dataset_name": "nlphuji/flickr30k", + "start": 0, + "end": 10 + }, + "dataloader_config": { + "type": "no_auto_batch_dataloader" + } + }, + { + "name": "metric_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "dataset_name": "nlphuji/flickr30k", + "start": 0, + "end": 10 + }, + "dataloader_config": { + "type": "no_auto_batch_dataloader" + }, + "post_process_data_config": { + "type": "clip_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "backend": "huggingface_metrics", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "accuracy", + "priority": 1, + "goal": { + "type": "max-degradation", + "value": 0.05 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg", + "goal": { + "type": "percent-min-improvement", + "value": 0.1 + } + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + }, + { + "name": "throughput", + "type": "throughput", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg" + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + }, + "transformer_optimizer": { + "type": "orttransformersoptimization", + "model_type": "clip", + "opt_level": 1, + "optimization_options": { + "enable_gelu": true, + "enable_bias_gelu": false, + "enable_layer_norm": true, + "enable_skip_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_attention": false + }, + "save_as_external_data": true + }, + "surgery": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "ReplaceAttentionMaskValue" + }, + { + "surgeon": "PowReduceSumPowDiv2LpNorm" + } + ] + }, + "quantization": { + "type": "OnnxStaticQuantization", + "quant_preprocess": true, + "data_config": "quant_data_config", + "activation_type": "uint16", + "precision": "uint8", + "calibrate_method": "MinMax", + "save_as_external_data": true + }, + "addmetadata": { + "type": "VitisAIAddMetaData", + "config_meta_data_keys": [ + "architectures", + "model_type" + ], + "activation_type": "uint16", + "weight_type": "uint8", + "quant_type": "OnnxStaticQuantization" + } + }, + "search_strategy": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "output_dir": "model/clip_vit_base_patch16" +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd.json.config new file mode 100644 index 00000000..a85c1762 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd.json.config @@ -0,0 +1,195 @@ +{ + "name": "Convert to AMD NPU", + "oliveFile": "clip/laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qdq_vitis_ai.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "AMD NPU", + "CPU" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "VitisAIExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.quantization.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.quantization.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.quantization.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.quantization.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].load_dataset_config.end", + "template": { + "path": "data_configs[0].load_dataset_config.end", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.quantization", + "actions": [ + [], + [ + { + "type": "update", + "path": "passes", + "value": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + } + } + } + ] + ] + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].load_dataset_config.end", + "template": { + "path": "data_configs[1].load_dataset_config.end", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb new file mode 100644 index 00000000..b5dd1398 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb @@ -0,0 +1,84 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aeb33f1a", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"VitisAIExecutionProvider\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "307fcca8", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "from PIL import Image\n", + "import requests\n", + " \n", + "from transformers import CLIPProcessor\n", + "import onnxruntime as ort\n", + "import numpy as np\n", + "import torch\n", + " \n", + "processor = CLIPProcessor.from_pretrained(\"laion/CLIP-ViT-B-32-laion2B-s34B-b79K\", use_fast=False)\n", + " \n", + "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + " \n", + "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n", + " images=image, return_tensors=\"np\", padding=\"max_length\",\n", + " max_length= 77, truncation=True)\n", + " \n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " \n", + "opts = ort.SessionOptions()\n", + " \n", + "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "assert opts.has_providers()\n", + "\n", + "# options = ort.SessionOptions()\n", + "session = ort.InferenceSession(onnx_model_path,\n", + " sess_options=opts,\n", + " # providers=[ExecutionProvider],\n", + " # provider_options=[provider_options]\n", + ")\n", + "logits_per_image = session.run([\"logits_per_image\"],\n", + " {\n", + " \"input_ids\": inputs['input_ids'].astype(np.int64),\n", + " \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n", + " \"pixel_values\": inputs['pixel_values']\n", + " })\n", + " \n", + "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n", + "print(\"Label probs:\", probs)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn.json b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn.json new file mode 100644 index 00000000..47cfd022 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn.json @@ -0,0 +1,193 @@ +{ + "input_model": { + "type": "PytorchModel", + "model_path": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "generative": false, + "io_config": { + "input_names": [ + "input_ids", + "attention_mask" + ], + "input_shapes": [ + [ + 1, + 77 + ], + [ + 1, + 77 + ] + ], + "input_types": [ + "int32", + "int32" + ], + "output_names": [ + "embeds", + "last_hidden_state" + ] + }, + "model_loader": "load_text_encoder", + "model_script": "clip_script.py" + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "QNNExecutionProvider" + ] + } + ] + } + }, + "host": "host_system", + "target": "host_system", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "log_to_file": false, + "data_configs": [ + { + "name": "calib_data", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "nlphuji/flickr30k", + "split": "test" + }, + "pre_process_data_config": { + "type": "pre_process_dataset", + "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "caption_col": "caption", + "max_length": 77, + "max_samples": 12 + }, + "dataloader_config": { + "batch_size": 1 + }, + "user_script": "clip_script.py" + }, + { + "name": "eval_data", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "nlphuji/flickr30k", + "split": "test" + }, + "pre_process_data_config": { + "type": "pre_process_dataset", + "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "generate_ground_truth": true, + "caption_col": "caption", + "max_length": 77, + "max_samples": 100 + }, + "post_process_data_config": { + "type": "embed_post_process" + }, + "dataloader_config": { + "batch_size": 1 + }, + "user_script": "clip_script.py" + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "degrad", + "type": "custom", + "data_config": "eval_data", + "sub_types": [ + { + "name": "percentage", + "priority": 1, + "higher_is_better": false + } + ], + "user_config": { + "user_script": "clip_script.py", + "metric_func": "eval_similarity_degrad" + } + }, + { + "name": "latency", + "type": "latency", + "sub_types": [ + { + "name": "avg", + "priority": 2, + "metric_config": { + "warmup_num": 20, + "repeat_test_num": 100 + } + }, + { + "name": "p90", + "metric_config": { + "warmup_num": 20, + "repeat_test_num": 100 + } + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "dynamic": true, + "use_dynamo_exporter": false, + "save_as_external_data": true + }, + "to_fixed_shape": { + "type": "DynamicToFixedShape", + "dim_param": [ + "batch_size", + "sequence_length" + ], + "dim_value": [ + 1, + 77 + ] + }, + "surgery": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "ReplaceAttentionMaskValue", + "replacement": -100.0 + }, + { + "surgeon": "MatMulAddToGemm" + } + ] + }, + "transformer_optimizer": { + "type": "OrtTransformersOptimization", + "model_type": "bert", + "opt_level": 1, + "optimization_options": { + "enable_gelu": false, + "enable_bias_gelu": false, + "enable_layer_norm": true, + "enable_skip_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_attention": false + }, + "save_as_external_data": true + }, + "quantization": { + "type": "OnnxStaticQuantization", + "data_config": "calib_data", + "quant_preprocess": true, + "activation_type": "uint16", + "precision": "uint8", + "save_as_external_data": true + } + }, + "cache_dir": "cache", + "output_dir": "model/clip_text" +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn.json.config new file mode 100644 index 00000000..d312cc7a --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn.json.config @@ -0,0 +1,235 @@ +{ + "name": "Convert Text Model to Qualcomm NPU", + "oliveFile": "clip/qdq/laion_clip_text_b32_qdq.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Qualcomm NPU", + "CPU" + ], + "path": "systems.host_system.accelerators.0.execution_providers.0", + "values": [ + "QNNExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.quantization.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.quantization.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.quantization.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.quantization.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "test" + ], + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.quantization", + "actions": [ + [], + [ + { + "type": "update", + "path": "passes", + "value": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "dynamic": true, + "use_dynamo_exporter": false, + "save_as_external_data": true + } + } + } + ] + ] + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.split", + "values": [ + "test" + ], + "template": { + "path": "data_configs[1].load_dataset_config.split", + "values": [ + "test" + ], + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[1].pre_process_data_config.max_samples", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb new file mode 100644 index 00000000..aab3b532 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb @@ -0,0 +1,141 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "43751a72", + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"QNNExecutionProvider\"" + ] + }, + { + "cell_type": "markdown", + "id": "897ffb42-3569-4d78-b99d-355a38fdce35", + "metadata": {}, + "source": [ + "### Data Processor" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa8d84cd-4853-4746-bce3-b281bfc23d8b", + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import CLIPProcessor\n", + "\n", + "processor = CLIPProcessor.from_pretrained(\"laion/CLIP-ViT-B-32-laion2B-s34B-b79K\")" + ] + }, + { + "cell_type": "markdown", + "id": "5568eb71-5812-4c74-989c-c12271d33b12", + "metadata": {}, + "source": [ + "### Model Inference with ORT-QNN" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02bad4ec-f477-4659-8584-00735f6ed5a9", + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime as ort\n", + "import torch\n", + "import numpy as np\n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + "\n", + "\n", + "session_options = ort.SessionOptions()\n", + "\n", + "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "\n", + "text_model = ort.InferenceSession(\n", + " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " sess_options=session_options,\n", + ")\n", + "\n", + "def get_text_embedding(text):\n", + " inputs = processor(\n", + " text=text,\n", + " padding=\"max_length\",\n", + " max_length=77,#text_model.sequence_length,\n", + " truncation=True,\n", + " add_special_tokens=True,\n", + " return_tensors=\"np\",\n", + " )\n", + " output = text_model.run(None, {\n", + " \"input_ids\": inputs[\"input_ids\"].astype(np.int32),\n", + " \"attention_mask\": inputs[\"attention_mask\"].astype(np.int32),\n", + " })\n", + " return torch.from_numpy(output[0])\n", + "\n", + "def calculate_score(emb_1, emb_2):\n", + " emb_1 /= torch.norm(emb_1, dim=-1, keepdim=True)\n", + " emb_2 /= torch.norm(emb_2, dim=-1, keepdim=True)\n", + " return torch.matmul(emb_1, emb_2.T) * 100.0\n", + "\n", + "# Get source embedding and calculate the similarity score for each target\n", + "# We need to process one by one because to static quantization, we fixed the batch size to 1\n", + "def ask(source, targets):\n", + " source_emb = get_text_embedding(source)\n", + " scores = []\n", + " for i, target in enumerate(targets):\n", + " target_emb = get_text_embedding(target)\n", + " score = calculate_score(source_emb, target_emb)\n", + " print(f\"Similarity score of sentence {i}:{score.item()}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "3477e36c-2e72-432b-ae81-602073a3754c", + "metadata": {}, + "source": [ + "### Play with Samples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8cdc2a6-4c81-4f93-8426-065ee4c2b013", + "metadata": {}, + "outputs": [], + "source": [ + "ask(\"a photo containing two cats\", [\"a photo of tshirt\", \"a photo of two cats\"])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx.json b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx.json new file mode 100644 index 00000000..f5c79241 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx.json @@ -0,0 +1,173 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "task": "zero-shot-image-classification", + "load_kwargs": { + "attn_implementation": "eager" + }, + "io_config": { + "input_names": [ + "input_ids", + "pixel_values", + "attention_mask" + ], + "input_shapes": [ + [ + 10, + 77 + ], + [ + 1, + 3, + 224, + 224 + ], + [ + 10, + 77 + ] + ], + "input_types": [ + "int64", + "float32", + "int64" + ], + "output_names": [ + "logits_per_image" + ], + "output_shapes": [ + [ + 1, + 2 + ] + ] + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "NvTensorRTRTXExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quant_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "dataset_name": "nlphuji/flickr30k", + "start": 0, + "end": 10 + }, + "dataloader_config": { + "type": "no_auto_batch_dataloader" + } + }, + { + "name": "metric_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "dataset_name": "nlphuji/flickr30k", + "start": 10, + "end": 20 + }, + "dataloader_config": { + "type": "no_auto_batch_dataloader" + }, + "post_process_data_config": { + "type": "clip_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "backend": "huggingface_metrics", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "accuracy", + "priority": 1, + "goal": { + "type": "max-degradation", + "value": 0.05 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg", + "goal": { + "type": "percent-min-improvement", + "value": 0.1 + } + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + }, + { + "name": "throughput", + "type": "throughput", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg" + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + }, + "onnx_float_to_float16": { + "type": "OnnxFloatToFloat16", + "save_as_external_data": true + }, + "session_params_tuning": { + "type": "OrtSessionParamsTuning", + "io_bind": false, + "data_config": "quant_data_config" + } + }, + "host": "local_system", + "target": "local_system", + "evaluator": "common_evaluator", + "cache_dir": "cache", + "output_dir": "model/clip-vit-base-patch16", + "evaluate_input_model": false +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx.json.config new file mode 100644 index 00000000..55b6e418 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx.json.config @@ -0,0 +1,86 @@ +{ + "name": "Convert to NVIDIA TRT for RTX", + "oliveFile": "clip/laion_CLIP-ViT-B-32-laion2B-s34B-b79K_trtrtx.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "NVIDIA TensorRT for RTX", + "CPU" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "NvTensorRTRTXExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].load_dataset_config.end", + "template": { + "path": "data_configs[1].load_dataset_config.end", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx_inference_sample.ipynb new file mode 100644 index 00000000..c4c32324 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx_inference_sample.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aeb33f1a", + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"NvTensorRTRTXExecutionProvider\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "307fcca8", + "metadata": {}, + "outputs": [], + "source": [ + "from PIL import Image\n", + "import requests\n", + " \n", + "from transformers import CLIPProcessor\n", + "import onnxruntime as ort\n", + "import numpy as np\n", + "import torch\n", + " \n", + "processor = CLIPProcessor.from_pretrained(\"laion/CLIP-ViT-B-32-laion2B-s34B-b79K\", use_fast=False)\n", + " \n", + "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + " \n", + "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n", + " images=image, return_tensors=\"np\", padding=\"max_length\",\n", + " max_length= 77, truncation=True)\n", + " \n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " \n", + "opts = ort.SessionOptions()\n", + " \n", + "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n", + "assert opts.has_providers()\n", + "\n", + "# options = ort.SessionOptions()\n", + "session = ort.InferenceSession(onnx_model_path,\n", + " sess_options=opts,\n", + " # providers=[ExecutionProvider],\n", + " # provider_options=[provider_options]\n", + ")\n", + "logits_per_image = session.run([\"logits_per_image\"],\n", + " {\n", + " \"input_ids\": inputs['input_ids'].astype(np.int64),\n", + " \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n", + " \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n", + " })\n", + " \n", + "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n", + "print(\"Label probs:\", probs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "winml", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn.json b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn.json new file mode 100644 index 00000000..20f32514 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn.json @@ -0,0 +1,186 @@ +{ + "input_model": { + "type": "PytorchModel", + "model_path": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "generative": false, + "io_config": { + "input_names": [ + "pixel_values" + ], + "input_shapes": [ + [ + 1, + 3, + 224, + 224 + ] + ], + "output_names": [ + "embeds" + ] + }, + "model_loader": "load_image_encoder", + "model_script": "clip_script.py" + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "QNNExecutionProvider" + ] + } + ] + } + }, + "host": "host_system", + "target": "host_system", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "log_to_file": false, + "data_configs": [ + { + "name": "calib_data", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "test" + }, + "pre_process_data_config": { + "type": "pre_process_dataset", + "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "image_col": "image", + "max_samples": 12 + }, + "dataloader_config": { + "batch_size": 1 + }, + "user_script": "clip_script.py" + }, + { + "name": "eval_data", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "test" + }, + "pre_process_data_config": { + "type": "pre_process_dataset", + "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "generate_ground_truth": true, + "image_col": "image", + "max_samples": 100 + }, + "post_process_data_config": { + "type": "embed_post_process" + }, + "dataloader_config": { + "batch_size": 1 + }, + "user_script": "clip_script.py" + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "degrad", + "type": "custom", + "data_config": "eval_data", + "sub_types": [ + { + "name": "percentage", + "priority": 1, + "higher_is_better": false + } + ], + "user_config": { + "user_script": "clip_script.py", + "metric_func": "eval_similarity_degrad", + "metric_func_kwargs": { + "batch_size": 32 + } + } + }, + { + "name": "latency", + "type": "latency", + "sub_types": [ + { + "name": "avg", + "priority": 2, + "metric_config": { + "warmup_num": 20, + "repeat_test_num": 100 + } + }, + { + "name": "p90", + "metric_config": { + "warmup_num": 20, + "repeat_test_num": 100 + } + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "dynamic": true, + "use_dynamo_exporter": false, + "save_as_external_data": true + }, + "to_fixed_shape": { + "type": "DynamicToFixedShape", + "dim_param": [ + "batch_size", + "num_channels", + "height", + "width" + ], + "dim_value": [ + 1, + 3, + 224, + 224 + ] + }, + "surgery": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "MatMulAddToGemm" + } + ] + }, + "transformer_optimizer": { + "type": "OrtTransformersOptimization", + "model_type": "vit", + "opt_level": 1, + "optimization_options": { + "enable_gelu": false, + "enable_bias_gelu": false, + "enable_layer_norm": true, + "enable_skip_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_attention": false + }, + "save_as_external_data": true + }, + "quantization": { + "type": "OnnxStaticQuantization", + "data_config": "calib_data", + "quant_preprocess": true, + "activation_type": "uint16", + "precision": "uint8", + "save_as_external_data": true + } + }, + "cache_dir": "cache", + "output_dir": "model/clip_vision" +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn.json.config new file mode 100644 index 00000000..6308658b --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn.json.config @@ -0,0 +1,237 @@ +{ + "name": "Convert Vision Model to Qualcomm NPU", + "oliveFile": "clip/qdq/laion_clip_vision_b32_qdq.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Qualcomm NPU", + "CPU" + ], + "path": "systems.host_system.accelerators.0.execution_providers.0", + "values": [ + "QNNExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.quantization.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.quantization.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.quantization.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.quantization.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "nlphuji/flickr30k" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.quantization", + "actions": [ + [], + [ + { + "type": "update", + "path": "passes", + "value": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "dynamic": true, + "use_dynamo_exporter": false, + "save_as_external_data": true + } + } + } + ] + ] + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[1].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[1].pre_process_data_config.max_samples", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb new file mode 100644 index 00000000..3fc7a253 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb @@ -0,0 +1,170 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "3c18a7d6", + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "\n", + "ExecutionProvider=\"QNNExecutionProvider\"" + ] + }, + { + "cell_type": "markdown", + "id": "897ffb42-3569-4d78-b99d-355a38fdce35", + "metadata": {}, + "source": [ + "### Data Processor" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa8d84cd-4853-4746-bce3-b281bfc23d8b", + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import CLIPProcessor\n", + "\n", + "processor = CLIPProcessor.from_pretrained(\"laion/CLIP-ViT-B-32-laion2B-s34B-b79K\")" + ] + }, + { + "cell_type": "markdown", + "id": "5568eb71-5812-4c74-989c-c12271d33b12", + "metadata": {}, + "source": [ + "### Model Inference with ORT-QNN" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02bad4ec-f477-4659-8584-00735f6ed5a9", + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime as ort\n", + "import torch\n", + "import numpy as np\n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + "\n", + "\n", + "session_options = ort.SessionOptions()\n", + "\n", + "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "\n", + "vision_model = ort.InferenceSession(\n", + " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " sess_options=session_options,\n", + ")\n", + "\n", + "def get_image_embedding(image):\n", + " inputs = processor(images=image, return_tensors=\"np\")\n", + " output = vision_model.run(None, { \"pixel_values\": inputs[\"pixel_values\"] })\n", + " return torch.from_numpy(output[0])\n", + "\n", + "def calculate_score(emb_1, emb_2):\n", + " emb_1 /= torch.norm(emb_1, dim=-1, keepdim=True)\n", + " emb_2 /= torch.norm(emb_2, dim=-1, keepdim=True)\n", + " return torch.matmul(emb_1, emb_2.T) * 100.0\n", + "\n", + "# Get source embedding and calculate the similarity score for each target\n", + "# We need to process one by one because to static quantization, we fixed the batch size to 1\n", + "def ask(source, targets):\n", + " source_emb = get_image_embedding(source)\n", + " for i, target in enumerate(targets):\n", + " target_emb = get_image_embedding(target)\n", + " score = calculate_score(source_emb, target_emb)\n", + " print(f\"Similarity score of image {i}:{score.item()}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "3477e36c-2e72-432b-ae81-602073a3754c", + "metadata": {}, + "source": [ + "### Play with Samples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16868fbd-e447-4866-af7d-eb6e49975bcc", + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from PIL import Image\n", + "\n", + "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + "image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07076b9a", + "metadata": {}, + "outputs": [], + "source": [ + "url = \"http://images.cocodataset.org/train2017/000000208833.jpg\"\n", + "image1 = Image.open(requests.get(url, stream=True).raw)\n", + "image1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c10de7cd", + "metadata": {}, + "outputs": [], + "source": [ + "url = \"http://images.cocodataset.org/train2017/000000125690.jpg\"\n", + "image2 = Image.open(requests.get(url, stream=True).raw)\n", + "image2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8cdc2a6-4c81-4f93-8426-065ee4c2b013", + "metadata": {}, + "outputs": [], + "source": [ + "ask(image, [image1, image2])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config new file mode 100644 index 00000000..add4c18d --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config @@ -0,0 +1,32 @@ +{ + "workflows": [ + { + "file": "laion_clip_text_qnn.json", + "templateName": "laion_clip_text_qnn" + }, + { + "file": "laion_clip_vision_qnn.json", + "templateName": "laion_clip_vision_qnn" + }, + { + "file": "laion_clip_qdq_amd.json", + "templateName": "laion_clip_qdq_amd" + }, + { + "file": "laion_clip_ov.json", + "templateName": "laion_clip_ov" + }, + { + "file": "laion_clip_trtrtx.json", + "templateName": "laion_clip_trtrtx" + }, + { + "file": "laion_clip_dml.json", + "templateName": "laion_clip_dml" + } + ], + "modelInfo": { + "id": "huggingface/laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "version": 1 + } +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/requirements.txt b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/requirements.txt new file mode 100644 index 00000000..163d793e --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/requirements.txt @@ -0,0 +1,7 @@ +# This file will be installed together with AITK runtime requirements +# For the full requirements, see AITK +olive-ai +cachetools==5.5.0 +nltk>=3.9.1 +accelerate>=1.4.0 +pillow>=10.0.1 diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/user_script.py b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/user_script.py new file mode 100644 index 00000000..2d0051f0 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/user_script.py @@ -0,0 +1,64 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import numpy as np +import torch +from datasets import load_dataset +from torch.utils.data import Dataset +from transformers import CLIPProcessor + +from olive.data.registry import Registry + + +class CLIPDataset(Dataset): + def __init__( + self, + model_name, + dataset_name, + start=0, + end=500, + image_size=(224, 224), + ): + assert 0 <= start < end + self.start = start + self.end = end + self.model_name = model_name + self.dataset_name = dataset_name + self.processor = CLIPProcessor.from_pretrained(self.model_name) + self.length = self.end - self.start + self.image_size = image_size + self.dataset = load_dataset(self.dataset_name, split=f"test[{0}:{self.end + 10}]") + + def __len__(self): + return self.length + + def __getitem__(self, idx): + text_inputs = self.processor( + text=[" ".join(item) for item in self.dataset[idx : idx + 10]["caption"]], + return_tensors="np", + padding="max_length", + truncation=True, + ) + + image_input = self.processor(images=self.dataset[idx]["image"].resize(self.image_size), return_tensors="np") + model_inputs = [ + { + "input_ids": text_inputs["input_ids"].astype(np.int64), + "pixel_values": image_input["pixel_values"], + "attention_mask": text_inputs["attention_mask"].astype(np.int64), + } + ] + + target = torch.Tensor([0]).to(torch.int32) + return model_inputs[0], target + + +@Registry.register_dataset() +def clip_dataset(**kwargs): + return CLIPDataset(**kwargs) + + +@Registry.register_post_process() +def clip_post_process(output): + return output["logits_per_image"].argmax(axis=-1) diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/.gitignore b/meta-llama-Llama-3.2-1B-Instruct/aitk/.gitignore new file mode 100644 index 00000000..48c03882 --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/.gitignore @@ -0,0 +1,5 @@ +__pycache__ +/cache +/history/*/* +!/history/*/history.config +!/history/*/olive_config.json diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md b/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md new file mode 100644 index 00000000..1355c54f --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md @@ -0,0 +1,160 @@ +# Llama-3.2-1B-Instruct Model Optimization + +This repository demonstrates the optimization of the [Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) model using **post-training quantization (PTQ)** techniques. The optimization process is divided into three main workflows: + +- QDQ for AMD NPU +- PTQ + AOT for QNN NPU + + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs** +- OpenVINO for Intel NPU + + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` + +## **QDQ Model with 4-bit Weights & 16-bit Activations** + +This workflow produces an ONNX QDQ model that is agnostic to the target hardware and accelerator, making it suitable for general inference. + +### **Optimization Process** + +The model is optimized using **weight-only quantization** and **activation quantization** for efficient deployment. The process includes: + +1. **Weight Rotation ([QuaRot](https://arxiv.org/abs/2404.00456))** + - Reduces outliers from weights and hidden states to enhance quantization efficiency. + +2. **4-bit Per-Channel Symmetric Quantization ([GPTQ](https://arxiv.org/abs/2210.17323))** + - Reduces transformer layer size while preserving accuracy. + +3. **ONNX Graph Capture** + - Exports the model to ONNX for further optimization. + +4. **4-bit Block-wise Quantization** + - Applies weight-only quantization to the **embedding layer** and **language modeling head**. + +5. **16-bit Activation Quantization** + - Uses 16-bit activations to balance precision and efficiency. + +The final output is a **QDQ model** with **4-bit weights** and **16-bit activations**. This model also leverages [GroupQueryAttention (GQA)](https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md#com.microsoft.GroupQueryAttention) for efficient long-context processing and long-sequence generation. + +### **Handling Dynamic and Static Input Shapes** + +NPUs require **precompiled graphs**, meaning the model must use **static input shapes**. However, **text generation** involves two distinct processing stages: + +- **Prefill (Prompt Processing)**: Processes multiple tokens simultaneously. +- **Token Generation (Iteration)**: Processes one token at a time. + +To support both efficiently, we create **two model instances**: +1. **Prefill model**: Optimized for batch processing. +2. **Token generation model**: Optimized for one-token-at-a-time inference. + +## **PTQ + AOT Compilation for Qualcomm NPUs using QNN EP** + +This process extends the [**QDQ Model with 4-bit Weights & 16-bit Activations**](#qdq-model-with-4-bit-weights--16-bit-activations) by compiling it specifically for **Qualcomm NPUs** using the **QNN Execution Provider**. + +### **Resource Optimization Strategy** + +To maximize efficiency while supporting dynamic input handling: + +- **Embedding Layer & Language Model Head** → Executed on CPU (handles dynamic input). +- **Transformer Layers** → Executed on NPU (requires static input shapes). +- **Weight Sharing** → Prefill & token generation models reuse weights to minimize memory usage. + +> ⚠️ **Note:** GQA is an ONNX Runtime *contrib operator* and must be executed on the CPU. The model graph is partitioned into **CPU (GQA nodes)** and **NPU (other nodes)** for execution. + +### **Compilation for Qualcomm NPU Deployment** + +Once optimized, the model is compiled for Qualcomm NPUs using **ONNX Runtime QNNExecutionProvider**. The steps include: + +1. **Split the Quantized Model** → Divide into three parts: + - **Embedding Layer** + - **Transformer Layers** + - **Language Model Head** +2. **Set Static Input Shapes**: + - **(1, 64)** for prefill (batch size, sequence length). + - **(1, 1)** for token generation. +3. **Compile using QNNExecutionProvider**: + - Leverages **weight sharing** across the prefill and token generation models. + +### **Usage** + +This workflow is configured using the `qnn_config.json` file. It contains all of the quantization and compilation steps. It requires two separate Python environments described below. + +#### A workable version + +- python=3.10 +- CUDA=12.1 +- cudnn=9.2.0 + +#### Quantization Python Environment Setup + +Quantization is resource-intensive and requires GPU acceleration. In an [x64 Python environment with Olive installed](https://github.com/microsoft/Olive/blob/main/examples/README.md#important), install the required packages: + +```bash +# Install common dependencies +pip install -r requirements.txt + +# Install ONNX Runtime GPU packages +pip install "onnxruntime-gpu>=1.21.0" "onnxruntime-genai-cuda>=0.6.0" + +# AutoGPTQ: Install from source (stable package may be slow for weight packing) +# Disable CUDA extension build (not required) +# Linux +export BUILD_CUDA_EXT=0 +# Windows +# set BUILD_CUDA_EXT=0 + +# Install AutoGPTQ from source +pip install --no-build-isolation git+https://github.com/PanQiWei/AutoGPTQ.git +``` + +> ⚠️ Only set up the environment and install the packages. Do not run the `olive run` command at this point. + +#### AOT Compilation Python Environment Setup + +Model compilation using QNN Execution Provider requires a Python environment with onnxruntime-qnn installed. In a separate Python environment with Olive installed, install the required packages: + +```bash +# Install ONNX Runtime QNN +pip install -r https://raw.githubusercontent.com/microsoft/onnxruntime/refs/heads/main/requirements.txt +pip install -U --pre --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple onnxruntime-qnn --no-deps +``` + +Replace `/path/to/qnn/env/bin` in `qnn_config.json` with the path to the directory containing your QNN environment's Python executable. This path can be found by running the following command in the environment: + +```bash +# Linux +command -v python +# Windows +# where python +``` + +This command will return the path to the Python executable. Set the parent directory of the executable as the `/path/to/qnn/env/bin` in the config file. + +#### **Run the Quantization + Compilation Config** + +Activate the **Quantization Python Environment** and run the workflow: + +```bash +olive run --config qnn_config.json +``` + +Olive will run the AOT compilation step in the **AOT Compilation Python Environment** specified in the config file using a subprocess. All other steps will run in the **Quantization Python Environment** natively. + +✅ Optimized model saved in: `./model` + +> ⚠️ If optimization fails due to out of memory, please remove `calibration_providers` in config file. + +> ⚠️ If optimization fails during context binary generation, rerun the command. The process will resume from the last completed step. + +### **Inference** + +The optimized model can be used for inference using ONNX Runtime QNNExecutionProvider and ONNX Runtime GenAI. **Inference must be run on a Windows Copilot+ PC with a Qualcomm NPU.** + +#### **Install Required Packages (arm64 Python)** +```bash +pip install -r https://raw.githubusercontent.com/microsoft/onnxruntime/refs/heads/main/requirements.txt +pip install -U --pre --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple onnxruntime-qnn --no-deps +pip install "onnxruntime-genai>=0.7.0rc2" +``` + +#### **Run Console-Based Chat Interface** +Execute the provided `inference_sample.ipynb` notebook. + + diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config new file mode 100644 index 00000000..b6457585 --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config @@ -0,0 +1,160 @@ +{ + "copies": [ + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/model_project.config", + "dst": "model_project.config", + "replacements": [ + { + "find": "deepseek_qnn_config", + "replace": "llama3_2_qnn_config" + }, + { + "find": "deepseek_vitis_ai_config", + "replace": "llama3_2_vitis_ai_config" + }, + { + "find": "deepseek_ov_config", + "replace": "llama3_2_ov_config" + }, + { + "find": "deepseek_dml_config", + "replace": "llama3_2_dml_config" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_qnn_config.json", + "dst": "llama3_2_qnn_config.json", + "replacements": [ + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "meta-llama/Llama-3.2-1B-Instruct" + }, + { + "find": "model/deepseek", + "replace": "model/llama3_2" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_qnn_config.json.config", + "dst": "llama3_2_qnn_config.json.config", + "replacements": [ + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_vitis_ai_config.json", + "dst": "llama3_2_vitis_ai_config.json", + "replacements": [ + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "meta-llama/Llama-3.2-1B-Instruct" + }, + { + "find": "model/deepseek", + "replace": "model/llama3_2" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_vitis_ai_config.json.config", + "dst": "llama3_2_vitis_ai_config.json.config", + "replacements": [ + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_ov_config.json", + "dst": "llama3_2_ov_config.json", + "replacements": [ + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "meta-llama/Llama-3.2-1B-Instruct" + }, + { + "find": "model/deepseek", + "replace": "model/llama3_2" + }, + { + "find": "\"awq\": false", + "replace": "\"awq\": true" + }, + { + "find": "\"scale_estimation\": false", + "replace": "\"scale_estimation\": true" + }, + { + "find": "\"sensitivity_metric\": \"weight_quantization_error\",", + "replace": "" + }, + { + "find": "\"backup_precision\": \"int8_asym\"", + "replace": "\"backup_precision\": \"int8_sym\"" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_ov_config.json.config", + "dst": "llama3_2_ov_config.json.config", + "replacements": [ + { + "find": "deepseek/openvino/DeepSeek-R1-Distill-Qwen-1.5B_context_ov_dynamic_sym_gs128_bkp_int8_sym_r1.json", + "replace": "llama3/openvino/Llama-3.2-1B-Instruct_context_ov_dynamic_sym_bkp_int8_sym.json" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_dml_config.json", + "dst": "llama3_2_dml_config.json", + "replacements": [ + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "meta-llama/Llama-3.2-1B-Instruct" + }, + { + "find": "model/deepseek", + "replace": "model/llama3_2" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_dml_config.json.config", + "dst": "llama3_2_dml_config.json.config", + "replacements": [ + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/README.md", + "dst": "README.md", + "replacements": [ + { + "find": "# DeepSeek-R1-Distill-Qwen-1.5B Model Optimization", + "replace": "# Llama-3.2-1B-Instruct Model Optimization" + }, + { + "find": "[DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B)", + "replace": "[Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct)" + }, + { + "find": "> ⚠️ If got 6033 error, replace `genai_config.json` in `./model` folder", + "replace": "" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/requirements.txt", + "dst": "requirements.txt", + "replacements": [ + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/inference_sample.ipynb", + "dst": "inference_sample.ipynb", + "replacements": [ + { + "find": "<|User|>{input}<|Assistant|>", + "replace": "<|start_header_id|>user<|end_header_id|>\\\\n{input}<|start_header_id|>assistant<|end_header_id|>\\\\n" + } + ] + } + ] +} diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/inference_model.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/inference_model.json new file mode 100644 index 00000000..5ec359e2 --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/inference_model.json @@ -0,0 +1,31 @@ +{ + "Name": "Llama-3.2-1B-Instruct", + "PromptTemplate": { + "assistant": "{Content}", + "prompt":"<|start_header_id|>user<|end_header_id|>\n{Content}<|start_header_id|>assistant<|end_header_id|>\n" + }, + "ParameterSchema": { + "enabled": [ + { + "name": "max_tokens", + "default": 512 + }, + { + "name": "temperature", + "default": 0.6 + }, + { + "name": "top_p", + "default": 0.9 + }, + { + "name": "top_k", + "default": 5 + }, + { + "name": "random_seed", + "default": 42 + } + ] + } +} diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/inference_sample.ipynb b/meta-llama-Llama-3.2-1B-Instruct/aitk/inference_sample.ipynb new file mode 100644 index 00000000..77a3070b --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/inference_sample.ipynb @@ -0,0 +1,131 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "text = 'Who is Isaac Newton?'\n", + "ExecutionProvider=\"QNNExecutionProvider\"\n", + "model_folder = \"./model\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime_genai as og\n", + "import json\n", + "import time\n", + "from pathlib import Path\n", + "\n", + "def get_session_options(obj):\n", + " if type(obj) is dict:\n", + " for k, v in obj.items():\n", + " if k == \"session_options\":\n", + " yield v\n", + " else:\n", + " for x in get_session_options(v):\n", + " yield x\n", + " elif type(obj) is list:\n", + " for v in obj:\n", + " for x in get_session_options(v):\n", + " yield x\n", + "\n", + "\n", + "def remove_provider_options(model_path):\n", + " genai_config_path = Path(model_path) / \"genai_config.json\"\n", + " data = json.loads(genai_config_path.read_text())\n", + " for session_option in get_session_options(data):\n", + " if 'provider_options' in session_option:\n", + " session_option['provider_options'] = [{k: dict() for k in opts.keys()} for opts in session_option['provider_options']]\n", + "\n", + " json.dump(data, genai_config_path.open(\"w\"), indent=4)\n", + "\n", + "if ExecutionProvider == \"QNNExecutionProvider\":\n", + " remove_provider_options(model_folder)\n", + "\n", + "# Load the base model and tokenizer\n", + "model = og.Model(model_folder)\n", + "tokenizer = og.Tokenizer(model)\n", + "tokenizer_stream = tokenizer.create_stream()\n", + "\n", + "# Set the max length to something sensible by default,\n", + "# since otherwise it will be set to the entire context length\n", + "search_options = {}\n", + "search_options[\"max_length\"] = 200\n", + "\n", + "chat_template = \"<|start_header_id|>user<|end_header_id|>\\n{input}<|start_header_id|>assistant<|end_header_id|>\\n\"\n", + "\n", + "# Generate prompt (prompt template + input)\n", + "prompt = f\"{chat_template.format(input=text)}\"\n", + "\n", + "# Encode the prompt using the tokenizer\n", + "input_tokens = tokenizer.encode(prompt)\n", + "\n", + "# Create params and generator\n", + "params = og.GeneratorParams(model)\n", + "params.set_search_options(**search_options)\n", + "generator = og.Generator(model, params)\n", + "\n", + "# Append input tokens to the generator\n", + "generator.append_tokens(input_tokens)\n", + "\n", + "print(\"\")\n", + "print(\"Output: \", end=\"\", flush=True)\n", + "\n", + "token_times = []\n", + "\n", + "# Stream the output\n", + "while not generator.is_done():\n", + " start_time = time.time()\n", + " generator.generate_next_token()\n", + " end_time = time.time()\n", + " \n", + " # Record the time for this token generation\n", + " token_time = end_time - start_time\n", + " token_times.append(token_time)\n", + "\n", + " new_token = generator.get_next_tokens()[0]\n", + " print(tokenizer_stream.decode(new_token), end=\"\", flush=True)\n", + "\n", + "print()\n", + "\n", + "# Calculate and display timing statistics\n", + "if token_times:\n", + " total_tokens = len(token_times)\n", + " avg_time = sum(token_times) / total_tokens\n", + " \n", + " print(f\"Total tokens generated: {total_tokens}\")\n", + " print(f\"Average time per token: {avg_time:.4f} seconds\")\n", + " print(f\"Tokens per second: {total_tokens / sum(token_times):.2f}\")\n", + "\n", + "del generator\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml new file mode 100644 index 00000000..59e77800 --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml @@ -0,0 +1,20 @@ +keywords: + aitk +arch: llama +recipes: + - file: "llama3_2_qnn_config.json" + device: npu + ep: QNNExecutionProvider + - file: "llama3_2_vitis_ai_config.json" + device: npu + ep: VitisAIExecutionProvider + - file: "llama3_2_ov_config.json" + device: npu + ep: OpenVINOExecutionProvider + - file: "llama3_2_dml_config.json" + device: gpu + ep: DmlExecutionProvider +aitk: + modelInfo: + id: "huggingface/meta-llama/Llama-3.2-1B-Instruct" + version: 1 diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_dml_config.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_dml_config.json new file mode 100644 index 00000000..6965e946 --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_dml_config.json @@ -0,0 +1,46 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "meta-llama/Llama-3.2-1B-Instruct" + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device":"cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device":"gpu", + "execution_providers": [ + "DmlExecutionProvider" + ] + } + ] + } + }, + "passes": { + "q": { + "type": "AutoAWQQuantizer" + }, + "mb": { + "type": "ModelBuilder", + "precision": "int4" + } + }, + "host": "host_system", + "target": "target_system", + "log_severity_level": 1, + "output_dir": "model/llama3_2", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_dml_config.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_dml_config.json.config new file mode 100644 index 00000000..5778ef75 --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_dml_config.json.config @@ -0,0 +1,48 @@ +{ + "name": "Convert to DirectML", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "mb" + }, + "isGPURequired": true, + "executeRuntimeFeatures": [ + "AutoAwq" + ], + "evaluationRuntimeFeatures": [ + "Nightly" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "DirectML" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "DmlExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json new file mode 100644 index 00000000..73cd8c82 --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json @@ -0,0 +1,56 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "meta-llama/Llama-3.2-1B-Instruct" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "OpenVINOExecutionProvider" + ] + } + ] + } + }, + "passes": { + "optimum_convert": { + "type": "OpenVINOOptimumConversion", + "extra_args": { + "device": "npu" + }, + "ov_quant_config": { + "weight_format": "int4", + "group_size": 128, + "dataset": "wikitext2", + "ratio": 1, + "sym": true, + "trust_remote_code": true, + "awq": true, + "scale_estimation": true, + + "backup_precision": "int8_sym" + } + }, + "io_update": { + "type": "OpenVINOIoUpdate", + "static": false, + "reuse_cache": true + }, + "encapsulation": { + "type": "OpenVINOEncapsulation", + "target_device": "npu", + "keep_ov_dynamic_dims": true, + "ov_version": "2025.1", + "reuse_cache": true + } + }, + "search_strategy": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "evaluate_input_model": false, + "output_dir": "model/llama3_2" +} diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json.config new file mode 100644 index 00000000..d594204f --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json.config @@ -0,0 +1,153 @@ +{ + "name": "Convert to Intel CPU/NPU/GPU", + "oliveFile": "llama3/openvino/Llama-3.2-1B-Instruct_context_ov_dynamic_sym_bkp_int8_sym.json", + "isLLM": true, + "isIntel": true, + "debugInfo": { + "autoGenerated": true, + "useOpenVINOOptimumConversion": "optimum_convert" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "systems.local_system.accelerators.0.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "actions": [ + [ + { + "type": "delete", + "path": "passes.io_update.reuse_cache" + }, + { + "type": "delete", + "path": "passes.encapsulation.reuse_cache" + } + ], + [ + { + "type": "delete", + "path": "passes.io_update.reuse_cache" + }, + { + "type": "delete", + "path": "passes.encapsulation.reuse_cache" + } + ], + [ + { + "type": "delete", + "path": "passes.io_update.reuse_cache" + }, + { + "type": "delete", + "path": "passes.encapsulation.reuse_cache" + } + ] + ], + "readOnly": false + }, + "runtimeInConversion": { + "autoGenerated": true, + "name": "Convert/Quantize to", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "passes.optimum_convert.extra_args.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "actions": [ + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "cpu" + } + ], + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "gpu" + } + ], + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "npu" + } + ] + ] + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "passes.optimum_convert.ov_quant_config.dataset", + "values": [ + "wikitext2" + ], + "template": { + "path": "passes.optimum_convert.ov_quant_config.dataset", + "values": [ + "wikitext2" + ], + "template": "QuantizationDataset" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json new file mode 100644 index 00000000..4a699670 --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json @@ -0,0 +1,132 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "meta-llama/Llama-3.2-1B-Instruct" + }, + "systems": { + "qnn_system": { + "type": "PythonEnvironment", + "python_environment_path": "/path/to/qnn/env/bin", + "accelerators": [ + { + "execution_providers": [ + "QNNExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "wikitext2_train", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "wikitext", + "subset": "wikitext-2-raw-v1", + "split": "train" + }, + "pre_process_data_config": { + "strategy": "line-by-line", + "add_special_tokens": false, + "max_samples": 128, + "max_seq_len": 512 + } + } + ], + "passes": { + "q": { + "type": "QuaRot" + }, + "g": { + "type": "GptqQuantizer", + "sym": true, + "group_size": -1 + }, + "cs": { + "type": "CaptureSplitInfo", + "num_splits": 4, + "unique_embeds_lm_head_splits": true + }, + "mb": { + "type": "ModelBuilder", + "precision": "int4", + "int4_block_size": 32, + "int4_accuracy_level": 4, + "int4_op_types_to_quantize": [ + "MatMul", + "Gather" + ], + "save_as_external_data": true + }, + "mq": { + "type": "MatMulNBitsToQDQ", + "use_int4": true, + "add_zero_point": true, + "nodes_to_exclude": [ + "/lm_head/MatMul_Q4" + ], + "save_as_external_data": true + }, + "gs": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "RemoveRopeMultiCache" + }, + { + "surgeon": "AttentionMaskToSequenceLengths" + }, + { + "surgeon": "SimplifiedLayerNormToL2Norm" + } + ], + "save_as_external_data": true + }, + "sq": { + "type": "OnnxStaticQuantization", + "data_config": "wikitext2_train", + "activation_type": "uint16", + "precision": "uint8", + "calibration_providers": [ + "CUDAExecutionProvider" + ], + "quant_preprocess": true, + "op_types_to_exclude": [ + "GatherBlockQuantized", + "GroupQueryAttention", + "MatMulNBits" + ], + "save_as_external_data": true + }, + "sp": { + "type": "SplitModel" + }, + "st": { + "type": "StaticLLM", + "batch_size": 1, + "context_length": 64 + }, + "cb": { + "type": "EPContextBinaryGenerator", + "provider_options": { + "htp_performance_mode": "burst", + "htp_graph_finalization_optimization_mode": "3", + "soc_model": "60" + }, + "session_options": { + "intra_op_num_threads": 2, + "inter_op_num_threads": 1 + }, + "weight_sharing": true + }, + "cp": { + "type": "ComposeOnnxModels" + } + }, + "target": "qnn_system", + "log_severity_level": 1, + "output_dir": "model/llama3_2", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json.config new file mode 100644 index 00000000..032429d1 --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json.config @@ -0,0 +1,197 @@ +{ + "name": "Convert to Qualcomm NPU", + "oliveFile": "phi3_5/qnn_config.json", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "mb" + }, + "isQNNLLM": true, + "isGPURequired": true, + "runtimeOverwrite": { + "autoGenerated": true, + "pyEnvPath": "systems.qnn_system.python_environment_path", + "executeEp": "CUDAExecutionProvider", + "evaluateUsedInExecute": true + }, + "executeRuntimeFeatures": [ + "AutoGptq" + ], + "pyEnvRuntimeFeatures": [ + "Nightly" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Qualcomm NPU" + ], + "path": "systems.qnn_system.accelerators.0.execution_providers.0", + "values": [ + "QNNExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.sq.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.sq.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.sq.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.sq.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "wikitext" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "wikitext" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Subset", + "tags": [ + "QuantizationDatasetSubset", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "wikitext-103-raw-v1", + "wikitext-103-v1", + "wikitext-2-raw-v1", + "wikitext-2-v1" + ], + "template": { + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "wikitext-103-raw-v1", + "wikitext-103-v1", + "wikitext-2-raw-v1", + "wikitext-2-v1" + ], + "template": "QuantizationDatasetSubset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json new file mode 100644 index 00000000..97a54b26 --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json @@ -0,0 +1,134 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "meta-llama/Llama-3.2-1B-Instruct" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "wikitext2_train", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "wikitext", + "subset": "wikitext-2-raw-v1", + "split": "train" + }, + "pre_process_data_config": { + "strategy": "line-by-line", + "add_special_tokens": false, + "max_samples": 128, + "max_seq_len": 512 + } + } + ], + "passes": { + "q": { + "type": "QuaRot" + }, + "g": { + "type": "GptqQuantizer", + "sym": true, + "group_size": -1 + }, + "cs": { + "type": "CaptureSplitInfo", + "num_splits": 1, + "unique_embeds_lm_head_splits": true + }, + "mb": { + "type": "ModelBuilder", + "precision": "int4", + "int4_block_size": 32, + "int4_accuracy_level": 4, + "int4_op_types_to_quantize": [ + "MatMul", + "Gather" + ], + "save_as_external_data": true + }, + "mq": { + "type": "MatMulNBitsToQDQ", + "use_int4": true, + "add_zero_point": true, + "nodes_to_exclude": [ + "/lm_head/MatMul_Q4" + ], + "save_as_external_data": true + }, + "gs": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "RemoveRopeMultiCache" + }, + { + "surgeon": "AttentionMaskToSequenceLengths" + }, + { + "surgeon": "SimplifiedLayerNormToL2Norm" + } + ], + "save_as_external_data": true + }, + "sq": { + "type": "OnnxStaticQuantization", + "data_config": "wikitext2_train", + "activation_type": "uint16", + "precision": "uint8", + "calibration_providers": [ + "CUDAExecutionProvider" + ], + "quant_preprocess": true, + "op_types_to_exclude": [ + "GatherBlockQuantized", + "GroupQueryAttention", + "MatMulNBits" + ], + "save_as_external_data": true + }, + "addmetadata": { + "type": "VitisAIAddMetaData", + "config_meta_data_keys": [ + "architectures", + "model_type" + ], + "activation_type": "uint16", + "weight_type": "int4", + "quant_type": "QuaRot" + }, + "sp": { + "type": "SplitModel" + }, + "st": { + "type": "StaticLLM", + "batch_size": 1, + "context_length": 64, + "group_session_options": { + "log_id": "onnxruntime-genai", + "provider_options": [ + { + "VitisAI": {} + } + ], + "graph_optimization_level": "ORT_ENABLE_ALL" + } + } + }, + "target": "local_system", + "log_severity_level": 1, + "output_dir": "model/llama3_2", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json.config new file mode 100644 index 00000000..f6624c83 --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json.config @@ -0,0 +1,191 @@ +{ + "name": "Convert to AMD NPU", + "oliveFile": "phi3_5/qdq_config_vitis_ai.json", + "isLLM": true, + "evalRuntime": "AMDNPU", + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "mb" + }, + "isGPURequired": true, + "runtimeOverwrite": { + "executeEp": "CUDAExecutionProvider" + }, + "executeRuntimeFeatures": [ + "AutoGptq" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "CPU" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.sq.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.sq.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.sq.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.sq.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "wikitext" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "wikitext" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Subset", + "tags": [ + "QuantizationDatasetSubset", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "wikitext-103-raw-v1", + "wikitext-103-v1", + "wikitext-2-raw-v1", + "wikitext-2-v1" + ], + "template": { + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "wikitext-103-raw-v1", + "wikitext-103-v1", + "wikitext-2-raw-v1", + "wikitext-2-v1" + ], + "template": "QuantizationDatasetSubset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config new file mode 100644 index 00000000..f5a73299 --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config @@ -0,0 +1,24 @@ +{ + "workflows": [ + { + "file": "llama3_2_qnn_config.json", + "templateName": "llama3_2_qnn_config" + }, + { + "file": "llama3_2_vitis_ai_config.json", + "templateName": "llama3_2_vitis_ai_config" + }, + { + "file": "llama3_2_ov_config.json", + "templateName": "llama3_2_ov_config" + }, + { + "file": "llama3_2_dml_config.json", + "templateName": "llama3_2_dml_config" + } + ], + "modelInfo": { + "id": "huggingface/meta-llama/Llama-3.2-1B-Instruct", + "version": 1 + } +} diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/requirements.txt b/meta-llama-Llama-3.2-1B-Instruct/aitk/requirements.txt new file mode 100644 index 00000000..03275c3e --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/requirements.txt @@ -0,0 +1,2 @@ +datasets +optimum diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/.gitignore b/microsoft-Phi-3.5-mini-instruct/aitk/.gitignore new file mode 100644 index 00000000..48c03882 --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/.gitignore @@ -0,0 +1,5 @@ +__pycache__ +/cache +/history/*/* +!/history/*/history.config +!/history/*/olive_config.json diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/README.md b/microsoft-Phi-3.5-mini-instruct/aitk/README.md new file mode 100644 index 00000000..b8df9630 --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/README.md @@ -0,0 +1,160 @@ +# Phi-3.5 Model Optimization + +This repository demonstrates the optimization of the [Microsoft Phi-3.5 Mini Instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) model using **post-training quantization (PTQ)** techniques. The optimization process is divided into three main workflows: + +- QDQ for AMD NPU +- PTQ + AOT for QNN NPU + + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs** +- OpenVINO for Intel NPU + + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` + +## **QDQ Model with 4-bit Weights & 16-bit Activations** + +This workflow produces an ONNX QDQ model that is agnostic to the target hardware and accelerator, making it suitable for general inference. + +### **Optimization Process** + +The model is optimized using **weight-only quantization** and **activation quantization** for efficient deployment. The process includes: + +1. **Weight Rotation ([QuaRot](https://arxiv.org/abs/2404.00456))** + - Reduces outliers from weights and hidden states to enhance quantization efficiency. + +2. **4-bit Per-Channel Symmetric Quantization ([GPTQ](https://arxiv.org/abs/2210.17323))** + - Reduces transformer layer size while preserving accuracy. + +3. **ONNX Graph Capture** + - Exports the model to ONNX for further optimization. + +4. **4-bit Block-wise Quantization** + - Applies weight-only quantization to the **embedding layer** and **language modeling head**. + +5. **16-bit Activation Quantization** + - Uses 16-bit activations to balance precision and efficiency. + +The final output is a **QDQ model** with **4-bit weights** and **16-bit activations**. This model also leverages [GroupQueryAttention (GQA)](https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md#com.microsoft.GroupQueryAttention) for efficient long-context processing and long-sequence generation. + +### **Handling Dynamic and Static Input Shapes** + +NPUs require **precompiled graphs**, meaning the model must use **static input shapes**. However, **text generation** involves two distinct processing stages: + +- **Prefill (Prompt Processing)**: Processes multiple tokens simultaneously. +- **Token Generation (Iteration)**: Processes one token at a time. + +To support both efficiently, we create **two model instances**: +1. **Prefill model**: Optimized for batch processing. +2. **Token generation model**: Optimized for one-token-at-a-time inference. + +## **PTQ + AOT Compilation for Qualcomm NPUs using QNN EP** + +This process extends the [**QDQ Model with 4-bit Weights & 16-bit Activations**](#qdq-model-with-4-bit-weights--16-bit-activations) by compiling it specifically for **Qualcomm NPUs** using the **QNN Execution Provider**. + +### **Resource Optimization Strategy** + +To maximize efficiency while supporting dynamic input handling: + +- **Embedding Layer & Language Model Head** → Executed on CPU (handles dynamic input). +- **Transformer Layers** → Executed on NPU (requires static input shapes). +- **Weight Sharing** → Prefill & token generation models reuse weights to minimize memory usage. + +> ⚠️ **Note:** GQA is an ONNX Runtime *contrib operator* and must be executed on the CPU. The model graph is partitioned into **CPU (GQA nodes)** and **NPU (other nodes)** for execution. + +### **Compilation for Qualcomm NPU Deployment** + +Once optimized, the model is compiled for Qualcomm NPUs using **ONNX Runtime QNNExecutionProvider**. The steps include: + +1. **Split the Quantized Model** → Divide into three parts: + - **Embedding Layer** + - **Transformer Layers** + - **Language Model Head** +2. **Set Static Input Shapes**: + - **(1, 64)** for prefill (batch size, sequence length). + - **(1, 1)** for token generation. +3. **Compile using QNNExecutionProvider**: + - Leverages **weight sharing** across the prefill and token generation models. + +### **Usage** + +This workflow is configured using the `qnn_config.json` file. It contains all of the quantization and compilation steps. It requires two separate Python environments described below. + +#### A workable version + +- python=3.10 +- CUDA=12.1 +- cudnn=9.2.0 + +#### Quantization Python Environment Setup + +Quantization is resource-intensive and requires GPU acceleration. In an [x64 Python environment with Olive installed](https://github.com/microsoft/Olive/blob/main/examples/README.md#important), install the required packages: + +```bash +# Install common dependencies +pip install -r requirements.txt + +# Install ONNX Runtime GPU packages +pip install "onnxruntime-gpu>=1.21.0" "onnxruntime-genai-cuda>=0.6.0" + +# AutoGPTQ: Install from source (stable package may be slow for weight packing) +# Disable CUDA extension build (not required) +# Linux +export BUILD_CUDA_EXT=0 +# Windows +# set BUILD_CUDA_EXT=0 + +# Install AutoGPTQ from source +pip install --no-build-isolation git+https://github.com/PanQiWei/AutoGPTQ.git +``` + +> ⚠️ Only set up the environment and install the packages. Do not run the `olive run` command at this point. + +#### AOT Compilation Python Environment Setup + +Model compilation using QNN Execution Provider requires a Python environment with onnxruntime-qnn installed. In a separate Python environment with Olive installed, install the required packages: + +```bash +# Install ONNX Runtime QNN +pip install -r https://raw.githubusercontent.com/microsoft/onnxruntime/refs/heads/main/requirements.txt +pip install -U --pre --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple onnxruntime-qnn --no-deps +``` + +Replace `/path/to/qnn/env/bin` in `qnn_config.json` with the path to the directory containing your QNN environment's Python executable. This path can be found by running the following command in the environment: + +```bash +# Linux +command -v python +# Windows +# where python +``` + +This command will return the path to the Python executable. Set the parent directory of the executable as the `/path/to/qnn/env/bin` in the config file. + +#### **Run the Quantization + Compilation Config** + +Activate the **Quantization Python Environment** and run the workflow: + +```bash +olive run --config qnn_config.json +``` + +Olive will run the AOT compilation step in the **AOT Compilation Python Environment** specified in the config file using a subprocess. All other steps will run in the **Quantization Python Environment** natively. + +✅ Optimized model saved in: `./model` + +> ⚠️ If optimization fails due to out of memory, please remove `calibration_providers` in config file. + +> ⚠️ If optimization fails during context binary generation, rerun the command. The process will resume from the last completed step. + +### **Inference** + +The optimized model can be used for inference using ONNX Runtime QNNExecutionProvider and ONNX Runtime GenAI. **Inference must be run on a Windows Copilot+ PC with a Qualcomm NPU.** + +#### **Install Required Packages (arm64 Python)** +```bash +pip install -r https://raw.githubusercontent.com/microsoft/onnxruntime/refs/heads/main/requirements.txt +pip install -U --pre --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple onnxruntime-qnn --no-deps +pip install "onnxruntime-genai>=0.7.0rc2" +``` + +#### **Run Console-Based Chat Interface** +Execute the provided `inference_sample.ipynb` notebook. + +> ⚠️ If got 6033 error, replace `genai_config.json` in `./model` folder diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config new file mode 100644 index 00000000..cfda4ffc --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config @@ -0,0 +1,140 @@ +{ + "copies": [ + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/model_project.config", + "dst": "model_project.config", + "replacements": [ + { + "find": "deepseek_qnn_config", + "replace": "phi3_5_qnn_config" + }, + { + "find": "deepseek_vitis_ai_config", + "replace": "phi3_5_vitis_ai_config" + }, + { + "find": "deepseek_ov_config", + "replace": "phi3_5_ov_config" + }, + { + "find": "deepseek_dml_config", + "replace": "phi3_5_dml_config" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_qnn_config.json", + "dst": "phi3_5_qnn_config.json", + "replacements": [ + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "microsoft/Phi-3.5-mini-instruct" + }, + { + "find": "model/deepseek", + "replace": "model/phi3_5" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_qnn_config.json.config", + "dst": "phi3_5_qnn_config.json.config", + "replacements": [ + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_vitis_ai_config.json", + "dst": "phi3_5_vitis_ai_config.json", + "replacements": [ + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "microsoft/Phi-3.5-mini-instruct" + }, + { + "find": "model/deepseek", + "replace": "model/phi3_5" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_vitis_ai_config.json.config", + "dst": "phi3_5_vitis_ai_config.json.config", + "replacements": [ + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_ov_config.json", + "dst": "phi3_5_ov_config.json", + "replacements": [ + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "microsoft/Phi-3.5-mini-instruct" + }, + { + "find": "model/deepseek", + "replace": "model/phi3_5" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_ov_config.json.config", + "dst": "phi3_5_ov_config.json.config", + "replacements": [ + { + "find": "deepseek/openvino/DeepSeek-R1-Distill-Qwen-1.5B_context_ov_dynamic_sym_gs128_bkp_int8_sym_r1.json", + "replace": "phi3_5/openvino/Phi-3.5-mini-instruct_context_ov_dynamic_sym_gs128_bkp_int8_sym.json" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_dml_config.json", + "dst": "phi3_5_dml_config.json", + "replacements": [ + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "microsoft/Phi-3.5-mini-instruct" + }, + { + "find": "model/deepseek", + "replace": "model/phi3_5" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_dml_config.json.config", + "dst": "phi3_5_dml_config.json.config", + "replacements": [ + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/README.md", + "dst": "README.md", + "replacements": [ + { + "find": "# DeepSeek-R1-Distill-Qwen-1.5B Model Optimization", + "replace": "# Phi-3.5 Model Optimization" + }, + { + "find": "[DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B)", + "replace": "[Microsoft Phi-3.5 Mini Instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct)" + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/requirements.txt", + "dst": "requirements.txt", + "replacements": [ + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/inference_sample.ipynb", + "dst": "inference_sample.ipynb", + "replacements": [ + { + "find": "<|User|>{input}<|Assistant|>", + "replace": "<|user|>\\\\n{input} <|end|>\\\\n<|assistant|>" + } + ] + } + ] +} diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/inference_model.json b/microsoft-Phi-3.5-mini-instruct/aitk/inference_model.json new file mode 100644 index 00000000..319c2d42 --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/inference_model.json @@ -0,0 +1,31 @@ +{ + "Name": "Phi-3.5-mini-instruct-onnx", + "PromptTemplate": { + "assistant": "{Content}", + "prompt":"<|user|>\n{Content} <|end|>\n<|assistant|>" + }, + "ParameterSchema": { + "enabled": [ + { + "name": "max_tokens", + "default": 512 + }, + { + "name": "temperature", + "default": 0.6 + }, + { + "name": "top_p", + "default": 0.9 + }, + { + "name": "top_k", + "default": 5 + }, + { + "name": "random_seed", + "default": 57894 + } + ] + } +} diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/inference_sample.ipynb b/microsoft-Phi-3.5-mini-instruct/aitk/inference_sample.ipynb new file mode 100644 index 00000000..a47cdc58 --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/inference_sample.ipynb @@ -0,0 +1,131 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "text = 'Who is Isaac Newton?'\n", + "ExecutionProvider=\"QNNExecutionProvider\"\n", + "model_folder = \"./model\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime_genai as og\n", + "import json\n", + "import time\n", + "from pathlib import Path\n", + "\n", + "def get_session_options(obj):\n", + " if type(obj) is dict:\n", + " for k, v in obj.items():\n", + " if k == \"session_options\":\n", + " yield v\n", + " else:\n", + " for x in get_session_options(v):\n", + " yield x\n", + " elif type(obj) is list:\n", + " for v in obj:\n", + " for x in get_session_options(v):\n", + " yield x\n", + "\n", + "\n", + "def remove_provider_options(model_path):\n", + " genai_config_path = Path(model_path) / \"genai_config.json\"\n", + " data = json.loads(genai_config_path.read_text())\n", + " for session_option in get_session_options(data):\n", + " if 'provider_options' in session_option:\n", + " session_option['provider_options'] = [{k: dict() for k in opts.keys()} for opts in session_option['provider_options']]\n", + "\n", + " json.dump(data, genai_config_path.open(\"w\"), indent=4)\n", + "\n", + "if ExecutionProvider == \"QNNExecutionProvider\":\n", + " remove_provider_options(model_folder)\n", + "\n", + "# Load the base model and tokenizer\n", + "model = og.Model(model_folder)\n", + "tokenizer = og.Tokenizer(model)\n", + "tokenizer_stream = tokenizer.create_stream()\n", + "\n", + "# Set the max length to something sensible by default,\n", + "# since otherwise it will be set to the entire context length\n", + "search_options = {}\n", + "search_options[\"max_length\"] = 200\n", + "\n", + "chat_template = \"<|user|>\\n{input} <|end|>\\n<|assistant|>\"\n", + "\n", + "# Generate prompt (prompt template + input)\n", + "prompt = f\"{chat_template.format(input=text)}\"\n", + "\n", + "# Encode the prompt using the tokenizer\n", + "input_tokens = tokenizer.encode(prompt)\n", + "\n", + "# Create params and generator\n", + "params = og.GeneratorParams(model)\n", + "params.set_search_options(**search_options)\n", + "generator = og.Generator(model, params)\n", + "\n", + "# Append input tokens to the generator\n", + "generator.append_tokens(input_tokens)\n", + "\n", + "print(\"\")\n", + "print(\"Output: \", end=\"\", flush=True)\n", + "\n", + "token_times = []\n", + "\n", + "# Stream the output\n", + "while not generator.is_done():\n", + " start_time = time.time()\n", + " generator.generate_next_token()\n", + " end_time = time.time()\n", + " \n", + " # Record the time for this token generation\n", + " token_time = end_time - start_time\n", + " token_times.append(token_time)\n", + "\n", + " new_token = generator.get_next_tokens()[0]\n", + " print(tokenizer_stream.decode(new_token), end=\"\", flush=True)\n", + "\n", + "print()\n", + "\n", + "# Calculate and display timing statistics\n", + "if token_times:\n", + " total_tokens = len(token_times)\n", + " avg_time = sum(token_times) / total_tokens\n", + " \n", + " print(f\"Total tokens generated: {total_tokens}\")\n", + " print(f\"Average time per token: {avg_time:.4f} seconds\")\n", + " print(f\"Tokens per second: {total_tokens / sum(token_times):.2f}\")\n", + "\n", + "del generator\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml new file mode 100644 index 00000000..68a6970d --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml @@ -0,0 +1,20 @@ +keywords: + aitk +arch: llama +recipes: + - file: "phi3_5_qnn_config.json" + device: npu + ep: QNNExecutionProvider + - file: "phi3_5_vitis_ai_config.json" + device: npu + ep: VitisAIExecutionProvider + - file: "phi3_5_ov_config.json" + device: npu + ep: OpenVINOExecutionProvider + - file: "phi3_5_dml_config.json" + device: gpu + ep: DmlExecutionProvider +aitk: + modelInfo: + id: "huggingface/microsoft/Phi-3.5-mini-instruct" + version: 1 diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config new file mode 100644 index 00000000..a5f764fe --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config @@ -0,0 +1,24 @@ +{ + "workflows": [ + { + "file": "phi3_5_qnn_config.json", + "templateName": "phi3_5_qnn_config" + }, + { + "file": "phi3_5_vitis_ai_config.json", + "templateName": "phi3_5_vitis_ai_config" + }, + { + "file": "phi3_5_ov_config.json", + "templateName": "phi3_5_ov_config" + }, + { + "file": "phi3_5_dml_config.json", + "templateName": "phi3_5_dml_config" + } + ], + "modelInfo": { + "id": "huggingface/microsoft/Phi-3.5-mini-instruct", + "version": 1 + } +} diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_dml_config.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_dml_config.json new file mode 100644 index 00000000..9e401bf4 --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_dml_config.json @@ -0,0 +1,46 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "microsoft/Phi-3.5-mini-instruct" + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device":"cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device":"gpu", + "execution_providers": [ + "DmlExecutionProvider" + ] + } + ] + } + }, + "passes": { + "q": { + "type": "AutoAWQQuantizer" + }, + "mb": { + "type": "ModelBuilder", + "precision": "int4" + } + }, + "host": "host_system", + "target": "target_system", + "log_severity_level": 1, + "output_dir": "model/phi3_5", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_dml_config.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_dml_config.json.config new file mode 100644 index 00000000..5778ef75 --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_dml_config.json.config @@ -0,0 +1,48 @@ +{ + "name": "Convert to DirectML", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "mb" + }, + "isGPURequired": true, + "executeRuntimeFeatures": [ + "AutoAwq" + ], + "evaluationRuntimeFeatures": [ + "Nightly" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "DirectML" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "DmlExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_config.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_config.json new file mode 100644 index 00000000..904638ab --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_config.json @@ -0,0 +1,56 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "microsoft/Phi-3.5-mini-instruct" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "OpenVINOExecutionProvider" + ] + } + ] + } + }, + "passes": { + "optimum_convert": { + "type": "OpenVINOOptimumConversion", + "extra_args": { + "device": "npu" + }, + "ov_quant_config": { + "weight_format": "int4", + "group_size": 128, + "dataset": "wikitext2", + "ratio": 1, + "sym": true, + "trust_remote_code": true, + "awq": false, + "scale_estimation": false, + "sensitivity_metric": "weight_quantization_error", + "backup_precision": "int8_asym" + } + }, + "io_update": { + "type": "OpenVINOIoUpdate", + "static": false, + "reuse_cache": true + }, + "encapsulation": { + "type": "OpenVINOEncapsulation", + "target_device": "npu", + "keep_ov_dynamic_dims": true, + "ov_version": "2025.1", + "reuse_cache": true + } + }, + "search_strategy": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "evaluate_input_model": false, + "output_dir": "model/phi3_5" +} diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_config.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_config.json.config new file mode 100644 index 00000000..768b5505 --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_config.json.config @@ -0,0 +1,153 @@ +{ + "name": "Convert to Intel CPU/NPU/GPU", + "oliveFile": "phi3_5/openvino/Phi-3.5-mini-instruct_context_ov_dynamic_sym_gs128_bkp_int8_sym.json", + "isLLM": true, + "isIntel": true, + "debugInfo": { + "autoGenerated": true, + "useOpenVINOOptimumConversion": "optimum_convert" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "systems.local_system.accelerators.0.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "actions": [ + [ + { + "type": "delete", + "path": "passes.io_update.reuse_cache" + }, + { + "type": "delete", + "path": "passes.encapsulation.reuse_cache" + } + ], + [ + { + "type": "delete", + "path": "passes.io_update.reuse_cache" + }, + { + "type": "delete", + "path": "passes.encapsulation.reuse_cache" + } + ], + [ + { + "type": "delete", + "path": "passes.io_update.reuse_cache" + }, + { + "type": "delete", + "path": "passes.encapsulation.reuse_cache" + } + ] + ], + "readOnly": false + }, + "runtimeInConversion": { + "autoGenerated": true, + "name": "Convert/Quantize to", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "passes.optimum_convert.extra_args.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "actions": [ + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "cpu" + } + ], + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "gpu" + } + ], + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "npu" + } + ] + ] + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "passes.optimum_convert.ov_quant_config.dataset", + "values": [ + "wikitext2" + ], + "template": { + "path": "passes.optimum_convert.ov_quant_config.dataset", + "values": [ + "wikitext2" + ], + "template": "QuantizationDataset" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json new file mode 100644 index 00000000..1e8648d4 --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json @@ -0,0 +1,132 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "microsoft/Phi-3.5-mini-instruct" + }, + "systems": { + "qnn_system": { + "type": "PythonEnvironment", + "python_environment_path": "/path/to/qnn/env/bin", + "accelerators": [ + { + "execution_providers": [ + "QNNExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "wikitext2_train", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "wikitext", + "subset": "wikitext-2-raw-v1", + "split": "train" + }, + "pre_process_data_config": { + "strategy": "line-by-line", + "add_special_tokens": false, + "max_samples": 128, + "max_seq_len": 512 + } + } + ], + "passes": { + "q": { + "type": "QuaRot" + }, + "g": { + "type": "GptqQuantizer", + "sym": true, + "group_size": -1 + }, + "cs": { + "type": "CaptureSplitInfo", + "num_splits": 4, + "unique_embeds_lm_head_splits": true + }, + "mb": { + "type": "ModelBuilder", + "precision": "int4", + "int4_block_size": 32, + "int4_accuracy_level": 4, + "int4_op_types_to_quantize": [ + "MatMul", + "Gather" + ], + "save_as_external_data": true + }, + "mq": { + "type": "MatMulNBitsToQDQ", + "use_int4": true, + "add_zero_point": true, + "nodes_to_exclude": [ + "/lm_head/MatMul_Q4" + ], + "save_as_external_data": true + }, + "gs": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "RemoveRopeMultiCache" + }, + { + "surgeon": "AttentionMaskToSequenceLengths" + }, + { + "surgeon": "SimplifiedLayerNormToL2Norm" + } + ], + "save_as_external_data": true + }, + "sq": { + "type": "OnnxStaticQuantization", + "data_config": "wikitext2_train", + "activation_type": "uint16", + "precision": "uint8", + "calibration_providers": [ + "CUDAExecutionProvider" + ], + "quant_preprocess": true, + "op_types_to_exclude": [ + "GatherBlockQuantized", + "GroupQueryAttention", + "MatMulNBits" + ], + "save_as_external_data": true + }, + "sp": { + "type": "SplitModel" + }, + "st": { + "type": "StaticLLM", + "batch_size": 1, + "context_length": 64 + }, + "cb": { + "type": "EPContextBinaryGenerator", + "provider_options": { + "htp_performance_mode": "burst", + "htp_graph_finalization_optimization_mode": "3", + "soc_model": "60" + }, + "session_options": { + "intra_op_num_threads": 2, + "inter_op_num_threads": 1 + }, + "weight_sharing": true + }, + "cp": { + "type": "ComposeOnnxModels" + } + }, + "target": "qnn_system", + "log_severity_level": 1, + "output_dir": "model/phi3_5", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json.config new file mode 100644 index 00000000..032429d1 --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json.config @@ -0,0 +1,197 @@ +{ + "name": "Convert to Qualcomm NPU", + "oliveFile": "phi3_5/qnn_config.json", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "mb" + }, + "isQNNLLM": true, + "isGPURequired": true, + "runtimeOverwrite": { + "autoGenerated": true, + "pyEnvPath": "systems.qnn_system.python_environment_path", + "executeEp": "CUDAExecutionProvider", + "evaluateUsedInExecute": true + }, + "executeRuntimeFeatures": [ + "AutoGptq" + ], + "pyEnvRuntimeFeatures": [ + "Nightly" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Qualcomm NPU" + ], + "path": "systems.qnn_system.accelerators.0.execution_providers.0", + "values": [ + "QNNExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.sq.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.sq.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.sq.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.sq.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "wikitext" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "wikitext" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Subset", + "tags": [ + "QuantizationDatasetSubset", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "wikitext-103-raw-v1", + "wikitext-103-v1", + "wikitext-2-raw-v1", + "wikitext-2-v1" + ], + "template": { + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "wikitext-103-raw-v1", + "wikitext-103-v1", + "wikitext-2-raw-v1", + "wikitext-2-v1" + ], + "template": "QuantizationDatasetSubset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json new file mode 100644 index 00000000..889e4d82 --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json @@ -0,0 +1,134 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "microsoft/Phi-3.5-mini-instruct" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "wikitext2_train", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "wikitext", + "subset": "wikitext-2-raw-v1", + "split": "train" + }, + "pre_process_data_config": { + "strategy": "line-by-line", + "add_special_tokens": false, + "max_samples": 128, + "max_seq_len": 512 + } + } + ], + "passes": { + "q": { + "type": "QuaRot" + }, + "g": { + "type": "GptqQuantizer", + "sym": true, + "group_size": -1 + }, + "cs": { + "type": "CaptureSplitInfo", + "num_splits": 1, + "unique_embeds_lm_head_splits": true + }, + "mb": { + "type": "ModelBuilder", + "precision": "int4", + "int4_block_size": 32, + "int4_accuracy_level": 4, + "int4_op_types_to_quantize": [ + "MatMul", + "Gather" + ], + "save_as_external_data": true + }, + "mq": { + "type": "MatMulNBitsToQDQ", + "use_int4": true, + "add_zero_point": true, + "nodes_to_exclude": [ + "/lm_head/MatMul_Q4" + ], + "save_as_external_data": true + }, + "gs": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "RemoveRopeMultiCache" + }, + { + "surgeon": "AttentionMaskToSequenceLengths" + }, + { + "surgeon": "SimplifiedLayerNormToL2Norm" + } + ], + "save_as_external_data": true + }, + "sq": { + "type": "OnnxStaticQuantization", + "data_config": "wikitext2_train", + "activation_type": "uint16", + "precision": "uint8", + "calibration_providers": [ + "CUDAExecutionProvider" + ], + "quant_preprocess": true, + "op_types_to_exclude": [ + "GatherBlockQuantized", + "GroupQueryAttention", + "MatMulNBits" + ], + "save_as_external_data": true + }, + "addmetadata": { + "type": "VitisAIAddMetaData", + "config_meta_data_keys": [ + "architectures", + "model_type" + ], + "activation_type": "uint16", + "weight_type": "int4", + "quant_type": "QuaRot" + }, + "sp": { + "type": "SplitModel" + }, + "st": { + "type": "StaticLLM", + "batch_size": 1, + "context_length": 64, + "group_session_options": { + "log_id": "onnxruntime-genai", + "provider_options": [ + { + "VitisAI": {} + } + ], + "graph_optimization_level": "ORT_ENABLE_ALL" + } + } + }, + "target": "local_system", + "log_severity_level": 1, + "output_dir": "model/phi3_5", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json.config new file mode 100644 index 00000000..f6624c83 --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json.config @@ -0,0 +1,191 @@ +{ + "name": "Convert to AMD NPU", + "oliveFile": "phi3_5/qdq_config_vitis_ai.json", + "isLLM": true, + "evalRuntime": "AMDNPU", + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "mb" + }, + "isGPURequired": true, + "runtimeOverwrite": { + "executeEp": "CUDAExecutionProvider" + }, + "executeRuntimeFeatures": [ + "AutoGptq" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "CPU" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.sq.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.sq.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.sq.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.sq.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "wikitext" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "wikitext" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Subset", + "tags": [ + "QuantizationDatasetSubset", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "wikitext-103-raw-v1", + "wikitext-103-v1", + "wikitext-2-raw-v1", + "wikitext-2-v1" + ], + "template": { + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "wikitext-103-raw-v1", + "wikitext-103-v1", + "wikitext-2-raw-v1", + "wikitext-2-v1" + ], + "template": "QuantizationDatasetSubset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/requirements.txt b/microsoft-Phi-3.5-mini-instruct/aitk/requirements.txt new file mode 100644 index 00000000..03275c3e --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/requirements.txt @@ -0,0 +1,2 @@ +datasets +optimum diff --git a/microsoft-Phi-4-mini-reasoning/aitk/.gitignore b/microsoft-Phi-4-mini-reasoning/aitk/.gitignore new file mode 100644 index 00000000..48c03882 --- /dev/null +++ b/microsoft-Phi-4-mini-reasoning/aitk/.gitignore @@ -0,0 +1,5 @@ +__pycache__ +/cache +/history/*/* +!/history/*/history.config +!/history/*/olive_config.json diff --git a/microsoft-Phi-4-mini-reasoning/aitk/README.md b/microsoft-Phi-4-mini-reasoning/aitk/README.md new file mode 100644 index 00000000..52c59381 --- /dev/null +++ b/microsoft-Phi-4-mini-reasoning/aitk/README.md @@ -0,0 +1,6 @@ +# Phi-4 Model Optimization + +This repository demonstrates the optimization of the [Microsoft Phi-4 Mini Reasoning](https://huggingface.co/microsoft/Phi-4-mini-reasoning) model using **post-training quantization (PTQ)** techniques. The optimization process is divided into these main workflows: + +- OpenVINO for Intel NPU + + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` diff --git a/microsoft-Phi-4-mini-reasoning/aitk/_copy.json.config b/microsoft-Phi-4-mini-reasoning/aitk/_copy.json.config new file mode 100644 index 00000000..1b769d18 --- /dev/null +++ b/microsoft-Phi-4-mini-reasoning/aitk/_copy.json.config @@ -0,0 +1,42 @@ +{ + "copies": [ + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_ov_config.json.config", + "dst": "phi4_ov_config.json.config", + "replacements": [ + { + "find": "deepseek/openvino/DeepSeek-R1-Distill-Qwen-1.5B_context_ov_dynamic_sym_gs128_bkp_int8_sym_r1.json", + "replace": "phi4/openvino/phi_4_mini_reasoning/Phi-4-mini-reasoning_context_ov_dynamic_sym_gs128_bkp_int8_sym.json" + }, + { + "find": "\"addCpu\": false,", + "replace": "\"executeRuntimeFeatures\": [\"Nightly\"],\"addCpu\": false," + } + ] + }, + { + "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/inference_sample.ipynb", + "dst": "inference_sample.ipynb", + "replacements": [ + { + "find": "<|User|>{input}<|Assistant|>", + "replace": "<|user|>\\\\n{input} <|end|>\\\\n<|assistant|>" + }, + { + "find": "ExecutionProvider=\\\"QNNExecutionProvider\\\"", + "replace": "ExecutionProvider=\\\"OpenVINOExecutionProvider\\\"" + } + ] + }, + { + "src": "../../Phi-3.5-mini-instruct/1/inference_model.json", + "dst": "inference_model.json", + "replacements": [ + { + "find": "Phi-3.5-mini-instruct-onnx", + "replace": "Phi-4-mini-reasoning-onnx" + } + ] + } + ] +} diff --git a/microsoft-Phi-4-mini-reasoning/aitk/inference_model.json b/microsoft-Phi-4-mini-reasoning/aitk/inference_model.json new file mode 100644 index 00000000..c86373cf --- /dev/null +++ b/microsoft-Phi-4-mini-reasoning/aitk/inference_model.json @@ -0,0 +1,31 @@ +{ + "Name": "Phi-4-mini-reasoning-onnx", + "PromptTemplate": { + "assistant": "{Content}", + "prompt":"<|user|>\n{Content} <|end|>\n<|assistant|>" + }, + "ParameterSchema": { + "enabled": [ + { + "name": "max_tokens", + "default": 512 + }, + { + "name": "temperature", + "default": 0.6 + }, + { + "name": "top_p", + "default": 0.9 + }, + { + "name": "top_k", + "default": 5 + }, + { + "name": "random_seed", + "default": 57894 + } + ] + } +} diff --git a/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb b/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb new file mode 100644 index 00000000..70e1b959 --- /dev/null +++ b/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb @@ -0,0 +1,131 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "text = 'Who is Isaac Newton?'\n", + "ExecutionProvider=\"OpenVINOExecutionProvider\"\n", + "model_folder = \"./model\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime_genai as og\n", + "import json\n", + "import time\n", + "from pathlib import Path\n", + "\n", + "def get_session_options(obj):\n", + " if type(obj) is dict:\n", + " for k, v in obj.items():\n", + " if k == \"session_options\":\n", + " yield v\n", + " else:\n", + " for x in get_session_options(v):\n", + " yield x\n", + " elif type(obj) is list:\n", + " for v in obj:\n", + " for x in get_session_options(v):\n", + " yield x\n", + "\n", + "\n", + "def remove_provider_options(model_path):\n", + " genai_config_path = Path(model_path) / \"genai_config.json\"\n", + " data = json.loads(genai_config_path.read_text())\n", + " for session_option in get_session_options(data):\n", + " if 'provider_options' in session_option:\n", + " session_option['provider_options'] = [{k: dict() for k in opts.keys()} for opts in session_option['provider_options']]\n", + "\n", + " json.dump(data, genai_config_path.open(\"w\"), indent=4)\n", + "\n", + "if ExecutionProvider == \"QNNExecutionProvider\":\n", + " remove_provider_options(model_folder)\n", + "\n", + "# Load the base model and tokenizer\n", + "model = og.Model(model_folder)\n", + "tokenizer = og.Tokenizer(model)\n", + "tokenizer_stream = tokenizer.create_stream()\n", + "\n", + "# Set the max length to something sensible by default,\n", + "# since otherwise it will be set to the entire context length\n", + "search_options = {}\n", + "search_options[\"max_length\"] = 200\n", + "\n", + "chat_template = \"<|user|>\\n{input} <|end|>\\n<|assistant|>\"\n", + "\n", + "# Generate prompt (prompt template + input)\n", + "prompt = f\"{chat_template.format(input=text)}\"\n", + "\n", + "# Encode the prompt using the tokenizer\n", + "input_tokens = tokenizer.encode(prompt)\n", + "\n", + "# Create params and generator\n", + "params = og.GeneratorParams(model)\n", + "params.set_search_options(**search_options)\n", + "generator = og.Generator(model, params)\n", + "\n", + "# Append input tokens to the generator\n", + "generator.append_tokens(input_tokens)\n", + "\n", + "print(\"\")\n", + "print(\"Output: \", end=\"\", flush=True)\n", + "\n", + "token_times = []\n", + "\n", + "# Stream the output\n", + "while not generator.is_done():\n", + " start_time = time.time()\n", + " generator.generate_next_token()\n", + " end_time = time.time()\n", + " \n", + " # Record the time for this token generation\n", + " token_time = end_time - start_time\n", + " token_times.append(token_time)\n", + "\n", + " new_token = generator.get_next_tokens()[0]\n", + " print(tokenizer_stream.decode(new_token), end=\"\", flush=True)\n", + "\n", + "print()\n", + "\n", + "# Calculate and display timing statistics\n", + "if token_times:\n", + " total_tokens = len(token_times)\n", + " avg_time = sum(token_times) / total_tokens\n", + " \n", + " print(f\"Total tokens generated: {total_tokens}\")\n", + " print(f\"Average time per token: {avg_time:.4f} seconds\")\n", + " print(f\"Tokens per second: {total_tokens / sum(token_times):.2f}\")\n", + "\n", + "del generator\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/microsoft-Phi-4-mini-reasoning/aitk/info.yml b/microsoft-Phi-4-mini-reasoning/aitk/info.yml new file mode 100644 index 00000000..07948e46 --- /dev/null +++ b/microsoft-Phi-4-mini-reasoning/aitk/info.yml @@ -0,0 +1,11 @@ +keywords: + aitk +arch: phi +recipes: + - file: "phi4_ov_config.json" + device: npu + ep: OpenVINOExecutionProvider +aitk: + modelInfo: + id: "huggingface/microsoft/Phi-4-mini-reasoning" + version: 1 diff --git a/microsoft-Phi-4-mini-reasoning/aitk/model_project.config b/microsoft-Phi-4-mini-reasoning/aitk/model_project.config new file mode 100644 index 00000000..13c6e9ab --- /dev/null +++ b/microsoft-Phi-4-mini-reasoning/aitk/model_project.config @@ -0,0 +1,12 @@ +{ + "workflows": [ + { + "file": "phi4_ov_config.json", + "templateName": "phi4_ov_config" + } + ], + "modelInfo": { + "id": "huggingface/microsoft/Phi-4-mini-reasoning", + "version": 1 + } +} diff --git a/microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_config.json b/microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_config.json new file mode 100644 index 00000000..578fc1db --- /dev/null +++ b/microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_config.json @@ -0,0 +1,55 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "microsoft/Phi-4-mini-reasoning" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "OpenVINOExecutionProvider" + ] + } + ] + } + }, + "passes": { + "optimum_convert": { + "type": "OpenVINOOptimumConversion", + "extra_args": { + "device": "npu" + }, + "ov_quant_config": { + "weight_format": "int4", + "group_size": 128, + "dataset": "wikitext2", + "ratio": 1, + "awq": true, + "scale_estimation": true, + "sym": true, + "trust_remote_code": true, + "backup_precision": "int8_sym" + } + }, + "io_update": { + "type": "OpenVINOIoUpdate", + "static": false, + "reuse_cache": true + }, + "encapsulation": { + "type": "OpenVINOEncapsulation", + "target_device": "npu", + "keep_ov_dynamic_dims": true, + "ov_version": "2025.1", + "reuse_cache": true + } + }, + "search_strategy": false, + "host": "local_system", + "cache_dir": "cache", + "evaluate_input_model": false, + "output_dir": "model/Phi-4-mini-reasoning_context_ov_dynamic_sym_gs128_bkp_int8_sym", + "target": "local_system" +} diff --git a/microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_config.json.config b/microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_config.json.config new file mode 100644 index 00000000..0b15f17c --- /dev/null +++ b/microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_config.json.config @@ -0,0 +1,156 @@ +{ + "name": "Convert to Intel CPU/NPU/GPU", + "oliveFile": "phi4/openvino/phi_4_mini_reasoning/Phi-4-mini-reasoning_context_ov_dynamic_sym_gs128_bkp_int8_sym.json", + "isLLM": true, + "isIntel": true, + "debugInfo": { + "autoGenerated": true, + "useOpenVINOOptimumConversion": "optimum_convert" + }, + "executeRuntimeFeatures": [ + "Nightly" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "systems.local_system.accelerators.0.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "actions": [ + [ + { + "type": "delete", + "path": "passes.io_update.reuse_cache" + }, + { + "type": "delete", + "path": "passes.encapsulation.reuse_cache" + } + ], + [ + { + "type": "delete", + "path": "passes.io_update.reuse_cache" + }, + { + "type": "delete", + "path": "passes.encapsulation.reuse_cache" + } + ], + [ + { + "type": "delete", + "path": "passes.io_update.reuse_cache" + }, + { + "type": "delete", + "path": "passes.encapsulation.reuse_cache" + } + ] + ], + "readOnly": false + }, + "runtimeInConversion": { + "autoGenerated": true, + "name": "Convert/Quantize to", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "passes.optimum_convert.extra_args.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "actions": [ + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "cpu" + } + ], + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "gpu" + } + ], + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "npu" + } + ] + ] + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "passes.optimum_convert.ov_quant_config.dataset", + "values": [ + "wikitext2" + ], + "template": { + "path": "passes.optimum_convert.ov_quant_config.dataset", + "values": [ + "wikitext2" + ], + "template": "QuantizationDataset" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/microsoft-Phi-4-mini-reasoning/aitk/requirements.txt b/microsoft-Phi-4-mini-reasoning/aitk/requirements.txt new file mode 100644 index 00000000..4a41a3ef --- /dev/null +++ b/microsoft-Phi-4-mini-reasoning/aitk/requirements.txt @@ -0,0 +1 @@ +olive-ai diff --git a/microsoft-resnet-50/aitk/.gitignore b/microsoft-resnet-50/aitk/.gitignore new file mode 100644 index 00000000..48c03882 --- /dev/null +++ b/microsoft-resnet-50/aitk/.gitignore @@ -0,0 +1,5 @@ +__pycache__ +/cache +/history/*/* +!/history/*/history.config +!/history/*/olive_config.json diff --git a/microsoft-resnet-50/aitk/README.md b/microsoft-resnet-50/aitk/README.md new file mode 100644 index 00000000..d4d440e6 --- /dev/null +++ b/microsoft-resnet-50/aitk/README.md @@ -0,0 +1,21 @@ +# ResNet optimization + +This folder contains examples of ResNet optimization using different workflows. + +- QDQ for Qualcomm NPU / AMD NPU +- OpenVINO for Intel NPU + +## QDQ for Qualcomm NPU / AMD NPU + +This workflow performs ResNet optimization with QDQ in one workflow. It performs the optimization pipeline: + +- *PyTorch Model -> Onnx Model -> Quantized Onnx Model* + +## Evaluation result + +The quantization uses 256 samples from train split of imagenet-1k dataset and the evaluations uses 256 samples from test split of imagenet-1k dataset. + +| Activation Type  | Weight Type  | Size  | Accuracy  | Latency (avg)  | +| --------------------- | ----------------- | ---------- | -------------- | ------------------- | +| float32 | float32 | 97.3 MB | - | - | +| QUInt16 | QUInt8 | 24.5MB | 0.78515625 | 2.53724 ms | diff --git a/microsoft-resnet-50/aitk/_copy.json.config b/microsoft-resnet-50/aitk/_copy.json.config new file mode 100644 index 00000000..953a59db --- /dev/null +++ b/microsoft-resnet-50/aitk/_copy.json.config @@ -0,0 +1,28 @@ +{ + "copies": [ + { + "src": "resnet_qdq_amd.json.config", + "dst": "resnet_qdq_qnn.json.config", + "replacements": [ + { + "find": "resnet/resnet_ptq_qdq_vitis_ai.json", + "replace": "resnet/resnet_ptq_qdq.json" + }, + { + "find": "Convert to AMD NPU", + "replace": "Convert to Qualcomm NPU" + } + ] + }, + { + "src": "resnet_trtrtx_inference_sample.ipynb", + "dst": "resnet_dml_inference_sample.ipynb", + "replacements": [ + { + "find": "NvTensorRTRTXExecutionProvider", + "replace": "DmlExecutionProvider" + } + ] + } + ] +} \ No newline at end of file diff --git a/microsoft-resnet-50/aitk/imagenet.py b/microsoft-resnet-50/aitk/imagenet.py new file mode 100644 index 00000000..41aa142e --- /dev/null +++ b/microsoft-resnet-50/aitk/imagenet.py @@ -0,0 +1,105 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +from logging import getLogger +from pathlib import Path + +import numpy as np +import torchvision.transforms as transforms +import transformers +from torch import from_numpy, permute +from torch.utils.data import Dataset + +from olive.data.registry import Registry + +logger = getLogger(__name__) + +def get_imagenet_label_map(): + import json + cache_file = Path(f"./cache/data/imagenet_class_index.json") + if not cache_file.exists(): + import requests + imagenet_class_index_url = ( + "https://raw.githubusercontent.com/pytorch/vision/main/gallery/assets/imagenet_class_index.json" + ) + response = requests.get(imagenet_class_index_url) + response.raise_for_status() # Ensure the request was successful + content = response.json() + cache_file.parent.resolve().mkdir(parents=True, exist_ok=True) + with open(cache_file, "w") as f: + json.dump(content, f) + else: + with open(cache_file) as f: + content = json.loads(f.read()) + + return {v[0]: int(k) for k, v in content.items()} + +def adapt_label_for_mini_imagenet(labels: list, label_names: list): + label_map = get_imagenet_label_map() + return [label_map[label_names[x]] for x in labels] + +class ImagenetDataset(Dataset): + def __init__(self, data): + self.images = from_numpy(data["images"]) + self.labels = from_numpy(data["labels"]) + + def __len__(self): + return min(len(self.images), len(self.labels)) + + def __getitem__(self, idx): + return {"pixel_values": self.images[idx]}, self.labels[idx] + + +@Registry.register_post_process() +def dataset_post_process(output): + return ( + output.logits.argmax(axis=1) + if isinstance(output, transformers.modeling_outputs.ModelOutput) + else output.argmax(axis=1) + ) + + +from transformers import AutoImageProcessor +processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50", use_fast=True) + +@Registry.register_pre_process() +def dataset_pre_process(output_data, **kwargs): + shuffle = kwargs.get("shuffle", True) + if shuffle: + seed = kwargs.get("seed", 42) + output_data = output_data.shuffle(seed=seed) + cache_key = kwargs.get("cache_key") + size = kwargs.get("size", 256) + transpose = kwargs.get("transpose", False) + cache_file = None + if cache_key: + suffix = "nhwc" if transpose else "nchw" + cache_file = Path(f"./cache/data/{cache_key}_{output_data.info.dataset_name}_{size}_{suffix}.npz") + if cache_file.exists(): + with np.load(Path(cache_file)) as data: + return ImagenetDataset(data) + + labels = [] + images = [] + for i, sample in enumerate(output_data): + if i >= size: + break + image = sample["image"] + label = sample["label"] + image = image.convert("RGB") + image = processor(image)["pixel_values"][0] + if transpose: + image = permute(image, (1, 2, 0)) + images.append(image) + labels.append(label) + + if(output_data.info.dataset_name == "mini-imagenet"): + labels = adapt_label_for_mini_imagenet(labels, output_data.features["label"].names) + result_data = ImagenetDataset({"images": np.array(images), "labels": np.array(labels)}) + + if cache_file: + cache_file.parent.resolve().mkdir(parents=True, exist_ok=True) + np.savez(cache_file, images=np.array(images), labels=np.array(labels)) + + return result_data \ No newline at end of file diff --git a/microsoft-resnet-50/aitk/inference_sample.ipynb b/microsoft-resnet-50/aitk/inference_sample.ipynb new file mode 100644 index 00000000..e84dc7cf --- /dev/null +++ b/microsoft-resnet-50/aitk/inference_sample.ipynb @@ -0,0 +1,128 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "\n", + "ExecutionProvider=\"QNNExecutionProvider\"\n", + "transpose = False\n", + "if ExecutionProvider == \"OpenVINOExecutionProvider\":\n", + " onnx_model_path = \"./model/ov_model_st_quant.onnx\"\n", + "elif ExecutionProvider == \"VitisAIExecutionProvider\":\n", + " transpose = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from PIL import Image\n", + "url = \"https://onnxruntime.ai/images/dog.jpeg\"\n", + "response = requests.get(url)\n", + "# Save the image to a file\n", + "with open(\"dog.jpeg\", \"wb\") as file:\n", + " file.write(response.content)\n", + "img = Image.open(\"dog.jpeg\")\n", + "img" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime as ort\n", + "from PIL import Image\n", + "import torch\n", + "import torchvision.transforms as transforms\n", + "from torchvision.models.resnet import ResNet50_Weights\n", + "\n", + "image_file_path = \"dog.jpeg\"\n", + "\n", + "# Create ONNX runtime session\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + "\n", + "\n", + "session_options = ort.SessionOptions()\n", + "\n", + "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "\n", + "session = ort.InferenceSession(\n", + " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " sess_options=session_options,\n", + ")\n", + "\n", + "print(\"Available providers:\", session.get_providers())\n", + "print(\"Current provider:\", session.get_provider_options())\n", + "\n", + "# Read and preprocess image\n", + "image = Image.open(image_file_path)\n", + "preprocess = transforms.Compose([\n", + " transforms.Resize(256),\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n", + "])\n", + "input_tensor = preprocess(image)\n", + "if transpose:\n", + " input_tensor = input_tensor.permute(1, 2, 0)\n", + "input_batch = input_tensor.unsqueeze(0)\n", + "\n", + "# Run inference\n", + "ort_inputs = {session.get_inputs()[0].name: input_batch.numpy()}\n", + "ort_outputs = session.run(None, ort_inputs)\n", + "\n", + "# Postprocess to get softmax vector\n", + "output = ort_outputs[0]\n", + "softmax = torch.nn.functional.softmax(torch.tensor(output), dim=1)\n", + "\n", + "# Extract top 10 predicted classes\n", + "top10 = torch.topk(softmax, 10)\n", + "\n", + "# Get label mapping\n", + "weights = ResNet50_Weights.DEFAULT\n", + "labels = weights.meta[\"categories\"]\n", + "\n", + "# Print results to console\n", + "print(\"Top 10 predictions for ResNet50 v2...\")\n", + "print(\"--------------------------------------------------------------\")\n", + "for i in range(10):\n", + " print(f\"Label: {labels[top10.indices[0][i]]}, Confidence: {top10.values[0][i].item():.4f}\")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cpu", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/microsoft-resnet-50/aitk/info.yml b/microsoft-resnet-50/aitk/info.yml new file mode 100644 index 00000000..feadf0e4 --- /dev/null +++ b/microsoft-resnet-50/aitk/info.yml @@ -0,0 +1,23 @@ +keywords: + aitk +arch: resnet +recipes: + - file: "resnet_qdq_qnn.json" + device: npu + ep: QNNExecutionProvider + - file: "resnet_qdq_amd.json" + device: npu + ep: VitisAIExecutionProvider + - file: "resnet_context_ov_static.json" + device: npu + ep: OpenVINOExecutionProvider + - file: "resnet_trtrtx.json" + device: gpu + ep: NvTensorRTRTXExecutionProvider + - file: "resnet_dml.json" + device: gpu + ep: DmlExecutionProvider +aitk: + modelInfo: + id: "huggingface/microsoft/resnet-50" + version: 1 diff --git a/microsoft-resnet-50/aitk/model_project.config b/microsoft-resnet-50/aitk/model_project.config new file mode 100644 index 00000000..2a944b44 --- /dev/null +++ b/microsoft-resnet-50/aitk/model_project.config @@ -0,0 +1,28 @@ +{ + "workflows": [ + { + "file": "resnet_qdq_qnn.json", + "templateName": "resnet_qdq_qnn" + }, + { + "file": "resnet_qdq_amd.json", + "templateName": "resnet_qdq_amd" + }, + { + "file": "resnet_context_ov_static.json", + "templateName": "resnet_context_ov_static" + }, + { + "file": "resnet_trtrtx.json", + "templateName": "resnet_trtrtx" + }, + { + "file": "resnet_dml.json", + "templateName": "resnet_dml" + } + ], + "modelInfo": { + "id": "huggingface/microsoft/resnet-50", + "version": 1 + } +} diff --git a/microsoft-resnet-50/aitk/requirements.txt b/microsoft-resnet-50/aitk/requirements.txt new file mode 100644 index 00000000..4598395d --- /dev/null +++ b/microsoft-resnet-50/aitk/requirements.txt @@ -0,0 +1,4 @@ +olive-ai +torchvision +pillow +requests diff --git a/microsoft-resnet-50/aitk/resnet_context_ov_static.json b/microsoft-resnet-50/aitk/resnet_context_ov_static.json new file mode 100644 index 00000000..30ebfd55 --- /dev/null +++ b/microsoft-resnet-50/aitk/resnet_context_ov_static.json @@ -0,0 +1,139 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "microsoft/resnet-50", + "task": "image-classification" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "npu", + "execution_providers": [ + "OpenVINOExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quantize_data_config", + "type": "HuggingfaceContainer", + "user_script": "imagenet.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "train", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 256, + "cache_key": "imagedata_quantization" + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + }, + { + "name": "evaluation_data_config", + "type": "HuggingfaceContainer", + "user_script": "imagenet.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "validation", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 1000, + "cache_key": "imagedata_evaluation" + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "accuracy_score", + "priority": 1, + "metric_config": { + "task": "multiclass", + "num_classes": 1001 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "avg", + "priority": 2 + } + ] + } + ] + } + }, + "passes": { + "ov_convert": { + "type": "OpenVINOConversion", + "input_shapes": [ + [ + 1, + 3, + 224, + 224 + ] + ], + "compress_to_fp16": true, + "static": true + }, + "io_update": { + "type": "OpenVINOIoUpdate", + "static": true, + "input_shapes": [ + [ + 1, + 3, + 224, + 224 + ] + ], + "reuse_cache": true + }, + "ov_quantize": { + "type": "OpenVINOQuantization", + "target_device": "npu", + "data_config": "quantize_data_config", + "reuse_cache": true + }, + "encapsulation": { + "type": "OpenVINOEncapsulation", + "target_device": "npu", + "ov_version": "2025.1", + "reuse_cache": true + } + }, + "search_strategy": false, + "host": "local_system", + "target": "local_system", + "evaluator": "common_evaluator", + "cache_dir": "cache", + "evaluate_input_model": false, + "output_dir": "model/resnet_context_ov_static" +} diff --git a/microsoft-resnet-50/aitk/resnet_context_ov_static.json.config b/microsoft-resnet-50/aitk/resnet_context_ov_static.json.config new file mode 100644 index 00000000..aa4fccdb --- /dev/null +++ b/microsoft-resnet-50/aitk/resnet_context_ov_static.json.config @@ -0,0 +1,261 @@ +{ + "name": "Convert to Intel CPU/NPU/GPU", + "oliveFile": "resnet/openvino/resnet_context_ov_static.json", + "isIntel": true, + "debugInfo": { + "autoGenerated": true, + "useOpenVINOConversion": "ov_convert" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "systems.local_system.accelerators.0.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "actions": [ + [ + { + "type": "delete", + "path": "passes.io_update.reuse_cache" + }, + { + "type": "delete", + "path": "passes.ov_quantize.reuse_cache" + }, + { + "type": "delete", + "path": "passes.encapsulation.reuse_cache" + } + ], + [ + { + "type": "delete", + "path": "passes.io_update.reuse_cache" + }, + { + "type": "delete", + "path": "passes.ov_quantize.reuse_cache" + }, + { + "type": "delete", + "path": "passes.encapsulation.reuse_cache" + } + ], + [ + { + "type": "delete", + "path": "passes.io_update.reuse_cache" + }, + { + "type": "delete", + "path": "passes.ov_quantize.reuse_cache" + }, + { + "type": "delete", + "path": "passes.encapsulation.reuse_cache" + } + ] + ], + "readOnly": false + }, + "runtimeInConversion": { + "autoGenerated": true, + "name": "Convert/Quantize to", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "passes.ov_quantize.target_device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "actions": [ + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "cpu" + } + ], + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "gpu" + } + ], + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "npu" + } + ] + ] + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.ov_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.size", + "template": { + "path": "data_configs[0].pre_process_data_config.size", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.ov_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[1].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].pre_process_data_config.size", + "template": { + "path": "data_configs[1].pre_process_data_config.size", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/microsoft-resnet-50/aitk/resnet_dml.json b/microsoft-resnet-50/aitk/resnet_dml.json new file mode 100644 index 00000000..95e52a9e --- /dev/null +++ b/microsoft-resnet-50/aitk/resnet_dml.json @@ -0,0 +1,121 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "microsoft/resnet-50", + "task": "image-classification", + "io_config": { + "input_names": [ + "pixel_values" + ], + "input_shapes": [ + [ + 1, + 3, + 224, + 224 + ] + ], + "output_names": [ + "logits" + ] + } + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "DmlExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "evaluation_data_config", + "type": "HuggingfaceContainer", + "user_script": "imagenet.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "validation", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 1000, + "cache_key": "imagedata_evaluation" + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "accuracy_score", + "priority": 1, + "metric_config": { + "task": "multiclass", + "num_classes": 1001 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "avg", + "priority": 2 + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "device": "cpu", + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true, + "all_tensors_to_one_file": true, + "dynamic": false, + "use_dynamo_exporter": false + }, + "onnx_float_to_float16": { + "type": "OnnxFloatToFloat16", + "save_as_external_data": true + } + }, + "host": "host_system", + "target": "target_system", + "evaluator": "common_evaluator", + "cache_dir": "cache", + "output_dir": "model/resnet_dml", + "evaluate_input_model": false +} diff --git a/microsoft-resnet-50/aitk/resnet_dml.json.config b/microsoft-resnet-50/aitk/resnet_dml.json.config new file mode 100644 index 00000000..7216c02e --- /dev/null +++ b/microsoft-resnet-50/aitk/resnet_dml.json.config @@ -0,0 +1,107 @@ +{ + "name": "Convert to DirectML", + "evaluationRuntimeFeatures": [ + "Nightly" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "DirectML" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "DmlExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.size", + "template": { + "path": "data_configs[0].pre_process_data_config.size", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb b/microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb new file mode 100644 index 00000000..5acecca5 --- /dev/null +++ b/microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb @@ -0,0 +1,121 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"DmlExecutionProvider\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from PIL import Image\n", + "url = \"https://onnxruntime.ai/images/dog.jpeg\"\n", + "response = requests.get(url)\n", + "# Save the image to a file\n", + "with open(\"dog.jpeg\", \"wb\") as file:\n", + " file.write(response.content)\n", + "img = Image.open(\"dog.jpeg\")\n", + "img" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime as ort\n", + "from PIL import Image\n", + "import torch\n", + "import torchvision.transforms as transforms\n", + "from torchvision.models.resnet import ResNet50_Weights\n", + "import numpy as np\n", + "\n", + "image_file_path = \"dog.jpeg\"\n", + "\n", + "# Create ONNX runtime session\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + "\n", + "\n", + "session_options = ort.SessionOptions()\n", + "\n", + "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "\n", + "session = ort.InferenceSession(\n", + " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " sess_options=session_options,\n", + ")\n", + "\n", + "print(\"Available providers:\", session.get_providers())\n", + "print(\"Current provider:\", session.get_provider_options())\n", + "\n", + "# Read and preprocess image\n", + "image = Image.open(image_file_path)\n", + "preprocess = transforms.Compose([\n", + " transforms.Resize(256),\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n", + "])\n", + "input_tensor = preprocess(image)\n", + "input_batch = input_tensor.unsqueeze(0)\n", + "\n", + "# Run inference\n", + "ort_inputs = {session.get_inputs()[0].name: input_batch.numpy().astype(np.float16)}\n", + "ort_outputs = session.run(None, ort_inputs)\n", + "\n", + "# Postprocess to get softmax vector\n", + "output = ort_outputs[0]\n", + "softmax = torch.nn.functional.softmax(torch.tensor(output), dim=1)\n", + "\n", + "# Extract top 10 predicted classes\n", + "top10 = torch.topk(softmax, 10)\n", + "\n", + "# Get label mapping\n", + "weights = ResNet50_Weights.DEFAULT\n", + "labels = weights.meta[\"categories\"]\n", + "\n", + "# Print results to console\n", + "print(\"Top 10 predictions for ResNet50 v2...\")\n", + "print(\"--------------------------------------------------------------\")\n", + "for i in range(10):\n", + " print(f\"Label: {labels[top10.indices[0][i]]}, Confidence: {top10.values[0][i].item():.4f}\")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cpu", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/microsoft-resnet-50/aitk/resnet_qdq_amd.json b/microsoft-resnet-50/aitk/resnet_qdq_amd.json new file mode 100644 index 00000000..ea681095 --- /dev/null +++ b/microsoft-resnet-50/aitk/resnet_qdq_amd.json @@ -0,0 +1,147 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "microsoft/resnet-50", + "task": "image-classification", + "io_config": { + "input_names": [ + "pixel_values" + ], + "input_shapes": [ + [ + 1, + 3, + 224, + 224 + ] + ], + "output_names": [ + "logits" + ] + } + }, + "systems": { + "qnn_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "VitisAIExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quantize_data_config", + "type": "HuggingfaceContainer", + "user_script": "imagenet.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "train", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 64, + "cache_key": "imagedata_quantization", + "transpose": true + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + }, + { + "name": "evaluation_data_config", + "type": "HuggingfaceContainer", + "user_script": "imagenet.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "validation", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 1000, + "cache_key": "imagedata_evaluation", + "transpose": true + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "accuracy_score", + "priority": 1, + "metric_config": { + "task": "multiclass", + "num_classes": 1001 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "avg", + "priority": 2 + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "device": "cpu", + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true, + "all_tensors_to_one_file": true, + "dynamic": false, + "use_dynamo_exporter": false + }, + "transpose_input": { + "type": "InputNCHWtoNHWC" + }, + "OnnxQuantization": { + "type": "OnnxQuantization", + "data_config": "quantize_data_config", + "activation_type": "uint8", + "precision": "uint8", + "calibrate_method": "MinMax", + "save_as_external_data": true + }, + "addmetadata": { + "type": "VitisAIAddMetaData", + "config_meta_data_keys": [ + "architectures", + "model_type" + ], + "activation_type": "uint8", + "weight_type": "uint8", + "quant_type": "OnnxStaticQuantization" + } + }, + "host": "qnn_system", + "target": "qnn_system", + "evaluator": "common_evaluator", + "cache_dir": "cache", + "output_dir": "model/resnet_ptq_qnn", + "evaluate_input_model": false +} diff --git a/microsoft-resnet-50/aitk/resnet_qdq_amd.json.config b/microsoft-resnet-50/aitk/resnet_qdq_amd.json.config new file mode 100644 index 00000000..eabd4ce7 --- /dev/null +++ b/microsoft-resnet-50/aitk/resnet_qdq_amd.json.config @@ -0,0 +1,239 @@ +{ + "name": "Convert to AMD NPU", + "oliveFile": "resnet/resnet_ptq_qdq_vitis_ai.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "AMD NPU", + "CPU" + ], + "path": "systems.qnn_system.accelerators.0.execution_providers.0", + "values": [ + "VitisAIExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.OnnxQuantization.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.OnnxQuantization.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.OnnxQuantization.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.OnnxQuantization.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.size", + "template": { + "path": "data_configs[0].pre_process_data_config.size", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.OnnxQuantization", + "actions": [ + [], + [ + { + "type": "update", + "path": "passes", + "value": { + "conversion": { + "device": "cpu", + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true, + "all_tensors_to_one_file": true, + "dynamic": false, + "use_dynamo_exporter": false + } + } + } + ] + ] + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[1].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].pre_process_data_config.size", + "template": { + "path": "data_configs[1].pre_process_data_config.size", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/microsoft-resnet-50/aitk/resnet_qdq_qnn.json b/microsoft-resnet-50/aitk/resnet_qdq_qnn.json new file mode 100644 index 00000000..2a8b9c16 --- /dev/null +++ b/microsoft-resnet-50/aitk/resnet_qdq_qnn.json @@ -0,0 +1,132 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "microsoft/resnet-50", + "task": "image-classification", + "io_config": { + "input_names": [ + "pixel_values" + ], + "input_shapes": [ + [ + 1, + 3, + 224, + 224 + ] + ], + "output_names": [ + "logits" + ] + } + }, + "systems": { + "qnn_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "QNNExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quantize_data_config", + "type": "HuggingfaceContainer", + "user_script": "imagenet.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "train", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 64, + "cache_key": "imagedata_quantization" + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + }, + { + "name": "evaluation_data_config", + "type": "HuggingfaceContainer", + "user_script": "imagenet.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "validation", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 1000, + "cache_key": "imagedata_evaluation" + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "accuracy_score", + "priority": 1, + "metric_config": { + "task": "multiclass", + "num_classes": 1001 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "avg", + "priority": 2 + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true, + "all_tensors_to_one_file": true, + "use_dynamo_exporter": false + }, + "OnnxQuantization": { + "type": "OnnxQuantization", + "data_config": "quantize_data_config", + "activation_type": "uint16", + "precision": "uint8", + "calibrate_method": "MinMax", + "quant_preprocess": true, + "prepare_qnn_config": true, + "save_as_external_data": true + } + }, + "host": "qnn_system", + "target": "qnn_system", + "evaluator": "common_evaluator", + "cache_dir": "cache", + "output_dir": "model/resnet_ptq_qnn", + "evaluate_input_model": false +} diff --git a/microsoft-resnet-50/aitk/resnet_qdq_qnn.json.config b/microsoft-resnet-50/aitk/resnet_qdq_qnn.json.config new file mode 100644 index 00000000..9dde5538 --- /dev/null +++ b/microsoft-resnet-50/aitk/resnet_qdq_qnn.json.config @@ -0,0 +1,237 @@ +{ + "name": "Convert to Qualcomm NPU", + "oliveFile": "resnet/resnet_ptq_qdq.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Qualcomm NPU", + "CPU" + ], + "path": "systems.qnn_system.accelerators.0.execution_providers.0", + "values": [ + "QNNExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.OnnxQuantization.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.OnnxQuantization.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.OnnxQuantization.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.OnnxQuantization.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.size", + "template": { + "path": "data_configs[0].pre_process_data_config.size", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.OnnxQuantization", + "actions": [ + [], + [ + { + "type": "update", + "path": "passes", + "value": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true, + "all_tensors_to_one_file": true, + "use_dynamo_exporter": false + } + } + } + ] + ] + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[1].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].pre_process_data_config.size", + "template": { + "path": "data_configs[1].pre_process_data_config.size", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/microsoft-resnet-50/aitk/resnet_trtrtx.json b/microsoft-resnet-50/aitk/resnet_trtrtx.json new file mode 100644 index 00000000..ed10f746 --- /dev/null +++ b/microsoft-resnet-50/aitk/resnet_trtrtx.json @@ -0,0 +1,110 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "microsoft/resnet-50", + "task": "image-classification", + "io_config": { + "input_names": [ + "pixel_values" + ], + "input_shapes": [ + [ + 1, + 3, + 224, + 224 + ] + ], + "output_names": [ + "logits" + ] + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "NvTensorRTRTXExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "data_config", + "type": "HuggingfaceContainer", + "user_script": "imagenet.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "train", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 256, + "cache_key": "imagenet" + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "data_config": "data_config", + "sub_types": [ + { + "name": "accuracy_score", + "priority": 1, + "metric_config": { + "task": "multiclass", + "num_classes": 1001 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "data_config", + "sub_types": [ + { + "name": "avg" + } + ] + } + ] + } + }, + "passes": { + "onnx_conversion": { + "type": "OnnxConversion", + "target_opset": 13, + "save_as_external_data": true + }, + "onnx_float_to_float16": { + "type": "OnnxFloatToFloat16", + "save_as_external_data": true + }, + "session_params_tuning": { + "type": "OrtSessionParamsTuning", + "io_bind": false, + "data_config": "data_config" + } + }, + "host": "local_system", + "target": "local_system", + "evaluator": "common_evaluator", + "cache_dir": "cache", + "output_dir": "model/resnet_trtrtx", + "evaluate_input_model": false +} diff --git a/microsoft-resnet-50/aitk/resnet_trtrtx.json.config b/microsoft-resnet-50/aitk/resnet_trtrtx.json.config new file mode 100644 index 00000000..838f3301 --- /dev/null +++ b/microsoft-resnet-50/aitk/resnet_trtrtx.json.config @@ -0,0 +1,106 @@ +{ + "name": "Convert to NVIDIA TRT for RTX", + "oliveFile": "resnet/resnet_trtrtx.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "NVIDIA TensorRT for RTX", + "CPU" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "NvTensorRTRTXExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.onnx_conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.size", + "template": { + "path": "data_configs[0].pre_process_data_config.size", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb b/microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb new file mode 100644 index 00000000..75f15bc3 --- /dev/null +++ b/microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb @@ -0,0 +1,121 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"NvTensorRTRTXExecutionProvider\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from PIL import Image\n", + "url = \"https://onnxruntime.ai/images/dog.jpeg\"\n", + "response = requests.get(url)\n", + "# Save the image to a file\n", + "with open(\"dog.jpeg\", \"wb\") as file:\n", + " file.write(response.content)\n", + "img = Image.open(\"dog.jpeg\")\n", + "img" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime as ort\n", + "from PIL import Image\n", + "import torch\n", + "import torchvision.transforms as transforms\n", + "from torchvision.models.resnet import ResNet50_Weights\n", + "import numpy as np\n", + "\n", + "image_file_path = \"dog.jpeg\"\n", + "\n", + "# Create ONNX runtime session\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + "\n", + "\n", + "session_options = ort.SessionOptions()\n", + "\n", + "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "\n", + "session = ort.InferenceSession(\n", + " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " sess_options=session_options,\n", + ")\n", + "\n", + "print(\"Available providers:\", session.get_providers())\n", + "print(\"Current provider:\", session.get_provider_options())\n", + "\n", + "# Read and preprocess image\n", + "image = Image.open(image_file_path)\n", + "preprocess = transforms.Compose([\n", + " transforms.Resize(256),\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n", + "])\n", + "input_tensor = preprocess(image)\n", + "input_batch = input_tensor.unsqueeze(0)\n", + "\n", + "# Run inference\n", + "ort_inputs = {session.get_inputs()[0].name: input_batch.numpy().astype(np.float16)}\n", + "ort_outputs = session.run(None, ort_inputs)\n", + "\n", + "# Postprocess to get softmax vector\n", + "output = ort_outputs[0]\n", + "softmax = torch.nn.functional.softmax(torch.tensor(output), dim=1)\n", + "\n", + "# Extract top 10 predicted classes\n", + "top10 = torch.topk(softmax, 10)\n", + "\n", + "# Get label mapping\n", + "weights = ResNet50_Weights.DEFAULT\n", + "labels = weights.meta[\"categories\"]\n", + "\n", + "# Print results to console\n", + "print(\"Top 10 predictions for ResNet50 v2...\")\n", + "print(\"--------------------------------------------------------------\")\n", + "for i in range(10):\n", + " print(f\"Label: {labels[top10.indices[0][i]]}, Confidence: {top10.values[0][i].item():.4f}\")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cpu", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/mistralai-Mistral-7B-Instruct-v0.3/aitk/.gitignore b/mistralai-Mistral-7B-Instruct-v0.3/aitk/.gitignore new file mode 100644 index 00000000..48c03882 --- /dev/null +++ b/mistralai-Mistral-7B-Instruct-v0.3/aitk/.gitignore @@ -0,0 +1,5 @@ +__pycache__ +/cache +/history/*/* +!/history/*/history.config +!/history/*/olive_config.json diff --git a/mistralai-Mistral-7B-Instruct-v0.3/aitk/README.md b/mistralai-Mistral-7B-Instruct-v0.3/aitk/README.md new file mode 100644 index 00000000..d6c8ba9a --- /dev/null +++ b/mistralai-Mistral-7B-Instruct-v0.3/aitk/README.md @@ -0,0 +1,7 @@ +# Mistral-7B-Instruct-v0.3 Optimization + +This repository demonstrates the optimization of the [Mistral 7B Instruct-v0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) model. + +The optimization process is divided into these main workflows: +- OpenVINO for Intel GPU + + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` diff --git a/mistralai-Mistral-7B-Instruct-v0.3/aitk/inference_sample.ipynb b/mistralai-Mistral-7B-Instruct-v0.3/aitk/inference_sample.ipynb new file mode 100644 index 00000000..cb939cad --- /dev/null +++ b/mistralai-Mistral-7B-Instruct-v0.3/aitk/inference_sample.ipynb @@ -0,0 +1,112 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "text = 'Who is Isaac Newton?'\n", + "ExecutionProvider=\"OpenVINOExecutionProvider\"\n", + "model_folder = \"./model\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime_genai as og\n", + "import json\n", + "from pathlib import Path\n", + "\n", + "def get_session_options(obj):\n", + " if type(obj) is dict:\n", + " for k, v in obj.items():\n", + " if k == \"session_options\":\n", + " yield v\n", + " else:\n", + " for x in get_session_options(v):\n", + " yield x\n", + " elif type(obj) is list:\n", + " for v in obj:\n", + " for x in get_session_options(v):\n", + " yield x\n", + "\n", + "\n", + "def remove_provider_options(model_path):\n", + " genai_config_path = Path(model_path) / \"genai_config.json\"\n", + " data = json.loads(genai_config_path.read_text())\n", + " for session_option in get_session_options(data):\n", + " if 'provider_options' in session_option:\n", + " session_option['provider_options'] = [{k: dict() for k in opts.keys()} for opts in session_option['provider_options']]\n", + "\n", + " json.dump(data, genai_config_path.open(\"w\"), indent=4)\n", + "\n", + "if ExecutionProvider == \"QNNExecutionProvider\":\n", + " remove_provider_options(model_folder)\n", + "\n", + "# Load the base model and tokenizer\n", + "model = og.Model(model_folder)\n", + "tokenizer = og.Tokenizer(model)\n", + "tokenizer_stream = tokenizer.create_stream()\n", + "\n", + "# Set the max length to something sensible by default,\n", + "# since otherwise it will be set to the entire context length\n", + "search_options = {}\n", + "search_options[\"max_length\"] = 200\n", + "\n", + "chat_template = \"<|im_start|>user\\n{input}<|im_end|>\\n<|im_start|>assistant\\n\"\n", + "\n", + "# Generate prompt (prompt template + input)\n", + "prompt = f\"{chat_template.format(input=text)}\"\n", + "\n", + "# Encode the prompt using the tokenizer\n", + "input_tokens = tokenizer.encode(prompt)\n", + "\n", + "# Create params and generator\n", + "params = og.GeneratorParams(model)\n", + "params.set_search_options(**search_options)\n", + "generator = og.Generator(model, params)\n", + "\n", + "# Append input tokens to the generator\n", + "generator.append_tokens(input_tokens)\n", + "\n", + "print(\"\")\n", + "print(\"Output: \", end=\"\", flush=True)\n", + "# Stream the output\n", + "while not generator.is_done():\n", + " generator.generate_next_token()\n", + "\n", + " new_token = generator.get_next_tokens()[0]\n", + " print(tokenizer_stream.decode(new_token), end=\"\", flush=True)\n", + "\n", + "print()\n", + "\n", + "del generator\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/mistralai-Mistral-7B-Instruct-v0.3/aitk/info.yml b/mistralai-Mistral-7B-Instruct-v0.3/aitk/info.yml new file mode 100644 index 00000000..a59bb6cd --- /dev/null +++ b/mistralai-Mistral-7B-Instruct-v0.3/aitk/info.yml @@ -0,0 +1,11 @@ +keywords: + aitk +arch: mistral +recipes: + - file: "mistral-7b-instruct-v0.3-ov.json.json" + device: gpu + ep: OpenVINOExecutionProvider +aitk: + modelInfo: + id: "huggingface/mistralai/Mistral-7B-Instruct-v0.3" + version: 1 diff --git a/mistralai-Mistral-7B-Instruct-v0.3/aitk/mistral-7b-instruct-v0.3-ov.json b/mistralai-Mistral-7B-Instruct-v0.3/aitk/mistral-7b-instruct-v0.3-ov.json new file mode 100644 index 00000000..06b14d17 --- /dev/null +++ b/mistralai-Mistral-7B-Instruct-v0.3/aitk/mistral-7b-instruct-v0.3-ov.json @@ -0,0 +1,34 @@ +{ + "input_model": { "type": "HfModel", "model_path": "mistralai/Mistral-7B-Instruct-v0.3" }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ { "execution_providers": [ "OpenVINOExecutionProvider" ] } ] + } + }, + "passes": { + "optimum_convert": { + "type": "OpenVINOOptimumConversion", + "extra_args": { "device": "gpu" }, + "ov_quant_config": { + "task": "text-generation-with-past", + "weight_format": "int4", + "group_size": 128, + "ratio": 0.8 + } + }, + "io_update": { "type": "OpenVINOIoUpdate", "static": false }, + "encapsulation": { + "type": "OpenVINOEncapsulation", + "target_device": "gpu", + "keep_ov_dynamic_dims": true, + "ov_version": "2025.1" + } + }, + "search_strategy": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "evaluate_input_model": false, + "output_dir": "model/mistralai" +} diff --git a/mistralai-Mistral-7B-Instruct-v0.3/aitk/mistral-7b-instruct-v0.3-ov.json.config b/mistralai-Mistral-7B-Instruct-v0.3/aitk/mistral-7b-instruct-v0.3-ov.json.config new file mode 100644 index 00000000..800869a2 --- /dev/null +++ b/mistralai-Mistral-7B-Instruct-v0.3/aitk/mistral-7b-instruct-v0.3-ov.json.config @@ -0,0 +1,67 @@ +{ + "name": "Convert to Intel GPU", + "oliveFile": "mistral/openvino/Mistral-7B-Instruct-v0.3-gpu-context-ov-dy.json", + "isLLM": true, + "isIntel": true, + "intelRuntimeValues": [ + "gpu" + ], + "debugInfo": { + "autoGenerated": true, + "useOpenVINOOptimumConversion": "optimum_convert" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Intel GPU" + ], + "path": "systems.local_system.accelerators.0.device", + "values": [ + "gpu" + ], + "readOnly": false + }, + "runtimeInConversion": { + "autoGenerated": true, + "name": "Convert/Quantize to", + "type": "enum", + "displayNames": [ + "Intel GPU" + ], + "path": "passes.optimum_convert.extra_args.device", + "values": [ + "gpu" + ], + "actions": [ + [ + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "gpu" + } + ] + ] + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/mistralai-Mistral-7B-Instruct-v0.3/aitk/model_project.config b/mistralai-Mistral-7B-Instruct-v0.3/aitk/model_project.config new file mode 100644 index 00000000..40434dc0 --- /dev/null +++ b/mistralai-Mistral-7B-Instruct-v0.3/aitk/model_project.config @@ -0,0 +1,12 @@ +{ + "workflows": [ + { + "file": "mistral-7b-instruct-v0.3-ov.json", + "templateName": "mistral-7b-instruct-v0.3-ov" + } + ], + "modelInfo": { + "id": "huggingface/mistralai/Mistral-7B-Instruct-v0.3", + "version": 1 + } +} diff --git a/openai-clip-vit-base-patch16/aitk/.gitignore b/openai-clip-vit-base-patch16/aitk/.gitignore new file mode 100644 index 00000000..48c03882 --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/.gitignore @@ -0,0 +1,5 @@ +__pycache__ +/cache +/history/*/* +!/history/*/history.config +!/history/*/olive_config.json diff --git a/openai-clip-vit-base-patch16/aitk/README.md b/openai-clip-vit-base-patch16/aitk/README.md new file mode 100644 index 00000000..35dfb8fe --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/README.md @@ -0,0 +1,48 @@ +# Openai Clip optimization + +This folder contains examples of Openai Clip optimization using different workflows. + +- Text and vision model QDQ for Qualcomm NPU +- QDQ for AMD NPU +- OpenVINO for Intel NPU + +## Openai Clip text optimization with QDQ for Qualcomm NPU + +This example performs Openai Clip optimization with QDQ in one workflow. It performs the optimization pipeline: + +- *PyTorch Model -> Onnx Model -> Quantized Onnx Model* + +### Evaluation result + +The quantization uses 256 samples from train split of imagenet-1k dataset and the evaluations uses 256 samples from test split of imagenet-1k dataset. + + +| Activation Type  | Weight Type  | Size  | Latency ms (avg)  | +| --------------------- | ----------------- | ---------- | ---------------------- | +| QUInt16 | QUInt8 | 100 | 6.53724 | + +## Openai Clip vision optimization with QDQ for Qualcomm NPU + +This example performs Openai Clip optimization with QDQ in one workflow. It performs the optimization pipeline: + +- *PyTorch Model -> Onnx Model -> Quantized Onnx Model* + +### Evaluation result + +The quantization uses 256 samples from train split of imagenet-1k dataset and the evaluations uses 256 samples from test split of imagenet-1k dataset. + + +| Activation Type  | Weight Type  | Size  | Latency ms (avg)  | +| --------------------- | ----------------- | ---------- | ---------------------- | +| QUInt16 | QUInt8 | 100 | 20.13231 | + + +## Openai Clip optimization with QDQ for AMD NPU + +This example performs Openai Clip optimization with QDQ in one workflow. It performs the optimization pipeline: + +- *PyTorch Model -> Onnx Model -> Quantized Onnx Model* + +## Openai Clip optimization with OpenVINO + +This example performs Openai Clip optimization with OpenVINO in one workflow for Intel NPU. diff --git a/openai-clip-vit-base-patch16/aitk/_copy.json.config b/openai-clip-vit-base-patch16/aitk/_copy.json.config new file mode 100644 index 00000000..abd20714 --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/_copy.json.config @@ -0,0 +1,28 @@ +{ + "copies": [ + { + "src": "openai_clip_ov_inference_sample.ipynb", + "dst": "openai_clip_qdq_amd_inference_sample.ipynb", + "replacements": [ + { + "find": "OpenVINOExecutionProvider", + "replace": "VitisAIExecutionProvider" + }, + { + "find": "./model/openvino_model_quant_st.onnx", + "replace": "./model/model.onnx" + } + ] + }, + { + "src": "openai_clip_trtrtx_inference_sample.ipynb", + "dst": "openai_clip_dml_inference_sample.ipynb", + "replacements": [ + { + "find": "NvTensorRTRTXExecutionProvider", + "replace": "DmlExecutionProvider" + } + ] + } + ] +} diff --git a/openai-clip-vit-base-patch16/aitk/clip_script.py b/openai-clip-vit-base-patch16/aitk/clip_script.py new file mode 100644 index 00000000..6f775697 --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/clip_script.py @@ -0,0 +1,151 @@ +from __future__ import annotations + +from collections import OrderedDict +from itertools import chain + +import torch +from transformers import ( + AutoProcessor, + CLIPTextModelWithProjection, + CLIPVisionModelWithProjection, +) + +from olive.data.component.dataset import BaseDataset +from olive.data.registry import Registry + +HF_MODEL_SUBFOLDER_MAPPING = { + "sentence-transformers/clip-ViT-B-32": "0_CLIPModel", +} + + +def load_image_encoder(model_name): + return CLIPVisionModelWithProjection.from_pretrained( + model_name, + subfolder=HF_MODEL_SUBFOLDER_MAPPING.get(model_name, ""), + ).eval() + + +def load_text_encoder(model_name): + if model_name == "sentence-transformers/clip-ViT-B-32-multilingual-v1": + from sbert_clip_script import SDistilBertTextEncoder + + return SDistilBertTextEncoder(model_name).eval() + + return CLIPTextModelWithProjection.from_pretrained( + model_name, + subfolder=HF_MODEL_SUBFOLDER_MAPPING.get(model_name, ""), + ).eval() + + +def hfdataset_pre_process_for_clip( + dataset, + processor, + torch_model=None, + image_col: str | None = None, + caption_col: str | None = None, + label_col: str = "label", + max_samples: int | None = None, + max_length: int = 77, + batch_size: int = 32, +): + def generate_inputs(sample, indices): + captions = sample.get(caption_col, None) + images = sample.get(image_col, None) + + kwargs = { + "padding": "max_length", + "max_length": max_length, + "truncation": True, + "add_special_tokens": True, + "return_tensors": "pt", + } + if images: + kwargs["images"] = [img.convert("RGB") for img in images] + if captions: + kwargs["text"] = list(chain([x[0] for x in captions])) + + encoded_input = processor(**kwargs) + + return { + **encoded_input, + label_col: torch_model(**encoded_input)[0] if torch_model else sample.get(label_col, indices), + } + + if max_samples is not None and max_samples < len(dataset): + dataset = dataset.select(range(max_samples)) + + tokenized_datasets = dataset.map( + generate_inputs, + batched=True, + batch_size=batch_size, + with_indices=True, + remove_columns=dataset.column_names, + desc="Processing dataset", + ) + tokenized_datasets.set_format("torch", output_all_columns=True) + + return tokenized_datasets + + +@Registry.register_pre_process() +def pre_process_dataset( + dataset, + model_name: str, + generate_ground_truth: bool = False, + image_col: str | None = None, + caption_col: str | None = None, + label_col: str = "label", + max_samples: int | None = None, + max_length: int = 77, + **kwargs, +): + if image_col is None and caption_col is None: + raise ValueError("Either image_col or caption_col must be provided.") + + if generate_ground_truth: + if image_col and caption_col: + raise ValueError("Can not generate two types of embedding at the same time.") + + torch_model = load_image_encoder(model_name) if image_col else load_text_encoder(model_name) + else: + torch_model = None + + processor = AutoProcessor.from_pretrained(model_name) + dataset = hfdataset_pre_process_for_clip( + dataset, + processor, + torch_model=torch_model, + image_col=image_col, + caption_col=caption_col, + label_col=label_col, + max_length=max_length, + max_samples=max_samples, + ) + return BaseDataset(dataset, label_col) + + +@Registry.register_post_process() +def embed_post_process(output): + """Post-processing for CLIP output.""" + match output: + case dict() | OrderedDict() as out: + if "embeds" in out: + return out["embeds"] + elif "text_embeds" in out: + return out["text_embeds"] + elif "image_embeds" in out: + return out["image_embeds"] + case torch.Tensor(): + return output.argmax(dim=-1) + raise ValueError(f"Unsupported output type: {type(output)}") + + +def eval_similarity_degrad(output, targets, batch_size=1024): + import torch.nn.functional as F + + preds = output.preds + scores = [ + F.cosine_similarity(preds[i : i + batch_size], targets[i : i + batch_size]) + for i in range(0, preds.size(0), batch_size) + ] + return {"percentage": f"{100.0 - torch.mean(torch.cat(scores)) * 100.0:.2f}"} diff --git a/openai-clip-vit-base-patch16/aitk/info.yml b/openai-clip-vit-base-patch16/aitk/info.yml new file mode 100644 index 00000000..9773cb6b --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/info.yml @@ -0,0 +1,26 @@ +keywords: + aitk +arch: clip +recipes: + - file: "openai_clip_text_qnn.json" + device: npu + ep: QNNExecutionProvider + - file: "openai_clip_vision_qnn.json" + device: npu + ep: QNNExecutionProvider + - file: "openai_clip_qdq_amd.json" + device: npu + ep: VitisAIExecutionProvider + - file: "openai_clip_ov.json" + device: npu + ep: OpenVINOExecutionProvider + - file: "openai_clip_trtrtx.json" + device: gpu + ep: NvTensorRTRTXExecutionProvider + - file: "openai_clip_dml.json" + device: gpu + ep: DmlExecutionProvider +aitk: + modelInfo: + id: "huggingface/openai/clip-vit-base-patch16" + version: 1 diff --git a/openai-clip-vit-base-patch16/aitk/model_project.config b/openai-clip-vit-base-patch16/aitk/model_project.config new file mode 100644 index 00000000..c2d569bd --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/model_project.config @@ -0,0 +1,32 @@ +{ + "workflows": [ + { + "file": "openai_clip_text_qnn.json", + "templateName": "openai_clip_text_qnn" + }, + { + "file": "openai_clip_vision_qnn.json", + "templateName": "openai_clip_vision_qnn" + }, + { + "file": "openai_clip_qdq_amd.json", + "templateName": "openai_clip_qdq_amd" + }, + { + "file": "openai_clip_ov.json", + "templateName": "openai_clip_ov" + }, + { + "file": "openai_clip_trtrtx.json", + "templateName": "openai_clip_trtrtx" + }, + { + "file": "openai_clip_dml.json", + "templateName": "openai_clip_dml" + } + ], + "modelInfo": { + "id": "huggingface/openai/clip-vit-base-patch16", + "version": 1 + } +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_dml.json b/openai-clip-vit-base-patch16/aitk/openai_clip_dml.json new file mode 100644 index 00000000..ee99adaa --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_dml.json @@ -0,0 +1,192 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "openai/clip-vit-base-patch16", + "task": "zero-shot-image-classification", + "load_kwargs": { + "attn_implementation": "eager" + }, + "io_config": { + "input_names": [ + "input_ids", + "pixel_values", + "attention_mask" + ], + "input_shapes": [ + [ + 10, + 77 + ], + [ + 1, + 3, + 224, + 224 + ], + [ + 10, + 77 + ] + ], + "input_types": [ + "int64", + "float32", + "int64" + ], + "output_names": [ + "logits_per_image" + ], + "output_shapes": [ + [ + 1, + 2 + ] + ] + } + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "DmlExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "metric_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "openai/clip-vit-base-patch16", + "dataset_name": "nlphuji/flickr30k", + "start": 0, + "end": 10 + }, + "dataloader_config": { + "type": "no_auto_batch_dataloader" + }, + "post_process_data_config": { + "type": "clip_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "backend": "huggingface_metrics", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "accuracy", + "priority": 1, + "goal": { + "type": "max-degradation", + "value": 0.05 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg", + "goal": { + "type": "percent-min-improvement", + "value": 0.1 + } + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + }, + { + "name": "throughput", + "type": "throughput", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg" + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + }, + "transformer_optimizer": { + "type": "orttransformersoptimization", + "model_type": "clip", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false, + "optimization_options": { + "enable_gelu": true, + "enable_layer_norm": true, + "enable_attention": true, + "use_multi_head_attention": true, + "enable_skip_layer_norm": false, + "enable_embed_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_bias_gelu": false, + "enable_gelu_approximation": false, + "enable_qordered_matmul": false, + "enable_shape_inference": true, + "enable_gemm_fast_gelu": false, + "enable_nhwc_conv": false, + "enable_group_norm": false, + "enable_bias_splitgelu": false, + "enable_packed_qkv": true, + "enable_packed_kv": true, + "enable_bias_add": false, + "enable_rotary_embeddings": true + }, + "save_as_external_data": true + } + }, + "search_strategy": false, + "host": "host_system", + "target": "target_system", + "cache_dir": "cache", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "output_dir": "model/clip" +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_dml.json.config b/openai-clip-vit-base-patch16/aitk/openai_clip_dml.json.config new file mode 100644 index 00000000..ed09dcf4 --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_dml.json.config @@ -0,0 +1,87 @@ +{ + "name": "Convert to DirectML", + "evaluationRuntimeFeatures": [ + "Nightly" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "DirectML" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "DmlExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[0].load_dataset_config.end", + "template": { + "path": "data_configs[0].load_dataset_config.end", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_dml_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_dml_inference_sample.ipynb new file mode 100644 index 00000000..19f4bc70 --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_dml_inference_sample.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aeb33f1a", + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"DmlExecutionProvider\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "307fcca8", + "metadata": {}, + "outputs": [], + "source": [ + "from PIL import Image\n", + "import requests\n", + " \n", + "from transformers import CLIPProcessor\n", + "import onnxruntime as ort\n", + "import numpy as np\n", + "import torch\n", + " \n", + "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch16\", use_fast=False)\n", + " \n", + "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + " \n", + "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n", + " images=image, return_tensors=\"np\", padding=\"max_length\",\n", + " max_length= 77, truncation=True)\n", + " \n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " \n", + "opts = ort.SessionOptions()\n", + " \n", + "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n", + "assert opts.has_providers()\n", + "\n", + "# options = ort.SessionOptions()\n", + "session = ort.InferenceSession(onnx_model_path,\n", + " sess_options=opts,\n", + " # providers=[ExecutionProvider],\n", + " # provider_options=[provider_options]\n", + ")\n", + "logits_per_image = session.run([\"logits_per_image\"],\n", + " {\n", + " \"input_ids\": inputs['input_ids'].astype(np.int64),\n", + " \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n", + " \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n", + " })\n", + " \n", + "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n", + "print(\"Label probs:\", probs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "winml", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_ov.json b/openai-clip-vit-base-patch16/aitk/openai_clip_ov.json new file mode 100644 index 00000000..e368f2fb --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_ov.json @@ -0,0 +1,125 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "openai/clip-vit-base-patch16" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "npu", + "execution_providers": [ + "OpenVINOExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quantize_data_config", + "user_script": "openai_clip_ov.py", + "load_dataset_config": { + "type": "conceptual_captions_dataset", + "data_name": "google-research-datasets/conceptual_captions", + "model_path": "openai/clip-vit-base-patch16" + }, + "dataloader_config": { + "batch_size": 1, + "drop_last": true + } + }, + { + "name": "metric_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "openai/clip-vit-base-patch16", + "dataset_name": "nlphuji/flickr30k", + "start": 10, + "end": 20 + }, + "dataloader_config": { "type": "no_auto_batch_dataloader" }, + "post_process_data_config": { "type": "clip_post_process" } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "backend": "huggingface_metrics", + "data_config": "metric_data_config", + "sub_types": [ + { "name": "accuracy", "priority": 1, "goal": { "type": "max-degradation", "value": 0.05 } } + ] + }, + { + "name": "latency", + "type": "latency", + "sub_types": [ + { "name": "avg", "priority": 2, "metric_config": { "warmup_num": 20, "repeat_test_num": 100 } }, + { "name": "p90", "metric_config": { "warmup_num": 20, "repeat_test_num": 100 } } + ] + } + ] + } + }, + "passes": { + "optimum_convert": { + "type": "OpenVINOOptimumConversion", + "extra_args": { + "device": "npu" + } + }, + "ov_quantize": { + "type": "OpenVINOQuantization", + "target_device": "npu", + "data_config": "quantize_data_config", + "model_type": "TRANSFORMER", + "user_script": "openai_clip_ov.py", + "transform_fn": "custom_transform_func", + "extra_configs": [ + { + "advanced_quantization_parameters": { + "smooth_quant_alpha": 0.6 + } + } + ] + }, + "io_update": { + "type": "OpenVINOIoUpdate", + "input_shapes": [ + [ + 10, + 77 + ], + [ + 1, + 3, + 224, + 224 + ], + [ + 10, + 77 + ] + ], + "static": true + }, + "encapsulation": { + "type": "OpenVINOEncapsulation", + "target_device": "npu", + "ov_version": "2025.1" + } + }, + "search_strategy": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "output_dir": "model/clip_vit_base_patch16_context_ov_static" +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_ov.json.config b/openai-clip-vit-base-patch16/aitk/openai_clip_ov.json.config new file mode 100644 index 00000000..67325a6e --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_ov.json.config @@ -0,0 +1,174 @@ +{ + "name": "Convert to Intel CPU/NPU/GPU", + "oliveFile": "clip/openvino/clip_vit_base_patch16_context_ov_static.json", + "isIntel": true, + "debugInfo": { + "autoGenerated": true, + "useOpenVINOOptimumConversion": "optimum_convert" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "systems.local_system.accelerators.0.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "readOnly": false + }, + "runtimeInConversion": { + "autoGenerated": true, + "name": "Convert/Quantize to", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "passes.optimum_convert.extra_args.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "actions": [ + [ + { + "type": "update", + "path": "passes.ov_quantize.target_device", + "value": "cpu" + }, + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "cpu" + } + ], + [ + { + "type": "update", + "path": "passes.ov_quantize.target_device", + "value": "gpu" + }, + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "gpu" + } + ], + [ + { + "type": "update", + "path": "passes.ov_quantize.target_device", + "value": "npu" + }, + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "npu" + } + ] + ] + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "google-research-datasets/conceptual_captions" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "google-research-datasets/conceptual_captions" + ], + "template": "QuantizationDataset" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_ov.py b/openai-clip-vit-base-patch16/aitk/openai_clip_ov.py new file mode 100644 index 00000000..d1971b50 --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_ov.py @@ -0,0 +1,124 @@ +from io import BytesIO + +import requests +import torch +from datasets import load_dataset +from PIL import Image +from requests.packages.urllib3.exceptions import InsecureRequestWarning +from tqdm import tqdm +from transformers import CLIPModel, CLIPProcessor + +from olive.data.registry import Registry + +requests.packages.urllib3.disable_warnings(InsecureRequestWarning) + +# ------------------------------------------------------------------------- +# Common Dataset +# ------------------------------------------------------------------------- + +seed = 0 +# seed everything to 0 for reproducibility, https://pytorch.org/docs/stable/notes/randomness.html +# do not set random seed and np.random.seed for aml test, since it will cause aml job name conflict +torch.manual_seed(seed) +# the following are needed only for GPU +torch.cuda.manual_seed(seed) +torch.backends.cudnn.deterministic = True +torch.backends.cudnn.benchmark = False + + +def check_text_data(data): + """Check if the given data is text-based.""" + if isinstance(data, str): + return True + if isinstance(data, list): + return all(isinstance(x, str) for x in data) + return False + + +def get_pil_from_url(url): + """Download and convert an image from a URL to a PIL Image object.""" + response = requests.get(url, verify=True, timeout=20) + image = Image.open(BytesIO(response.content)) + return image.convert("RGB") + + +def wrap_collate_fn(processor, max_length): + def collate_fn(example, image_column="image_url", text_column="caption"): + """Preprocess an example by loading and transforming image and text data. + + Check if the text data in the example is valid by calling the `check_text_data` function. + Download the image specified by the URL in the image_column by calling the `get_pil_from_url` function. + If there is any error during the download process, return None. + Return the preprocessed inputs with transformed image and text data. + """ + if len(example) != 1: + raise ValueError(f"Expected 'example' to have exactly one element, but got {len(example)}.") + example = example[0] + + if not check_text_data(example[text_column]): + raise ValueError("Text data is not valid") + + url = example[image_column] + try: + image = get_pil_from_url(url) + w, h = image.size + if h == 1 or w == 1: + return None + except Exception: + return None + + inputs = processor(text=example[text_column], images=[image], return_tensors="pt", padding=True) + if inputs["input_ids"].shape[1] > max_length: + return None + return inputs + + return collate_fn + + +def prepare_calibration_data(dataloader, init_steps): + """Prepare calibration data from a dataloader for a specified number of initialization steps. + + Iterate over the dataloader, fetching batches and storing the relevant data. + """ + data = [] + with tqdm(total=init_steps) as pbar: + for batch in dataloader: + if len(data) == init_steps: + break + if batch: + pbar.update(1) + with torch.no_grad(): + data.append( + { + "input_ids": batch["input_ids"].to("cpu"), + "pixel_values": batch["pixel_values"].to("cpu"), + "attention_mask": batch["attention_mask"].to("cpu"), + } + ) + return data + + +@Registry.register_dataset() +def conceptual_captions_dataset(data_name,opt_init_steps=200, max_train_samples=1000, **kwargs): + """Prepare a vision-text dataset for quantization.""" + dataset = load_dataset(data_name, trust_remote_code=True) + model_path = kwargs.get("model_path") + if not model_path: + raise ValueError( + "The 'model_path' parameter is required in data_configs.load_dataset_config but was not provided." + ) + model = CLIPModel.from_pretrained(model_path) + processor = CLIPProcessor.from_pretrained(model_path) + max_length = model.config.text_config.max_position_embeddings + train_dataset = dataset["train"].shuffle(seed=seed) + collate_fn = wrap_collate_fn(processor, max_length) + dataloader = torch.utils.data.DataLoader(train_dataset, collate_fn=collate_fn, batch_size=1) + return prepare_calibration_data(dataloader, opt_init_steps) + + +def custom_transform_func(data_item): + np_inputs = {} + for inp in data_item: + # Drop the first dimension using slicing + np_inputs[inp] = data_item[inp].numpy()[0, ...] + return np_inputs diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb new file mode 100644 index 00000000..18a7aa58 --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb @@ -0,0 +1,84 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aeb33f1a", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/openvino_model_quant_st.onnx\"\n", + "ExecutionProvider=\"OpenVINOExecutionProvider\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "307fcca8", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "from PIL import Image\n", + "import requests\n", + " \n", + "from transformers import CLIPProcessor\n", + "import onnxruntime as ort\n", + "import numpy as np\n", + "import torch\n", + " \n", + "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch16\", use_fast=False)\n", + " \n", + "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + " \n", + "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n", + " images=image, return_tensors=\"np\", padding=\"max_length\",\n", + " max_length= 77, truncation=True)\n", + " \n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " \n", + "opts = ort.SessionOptions()\n", + " \n", + "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "assert opts.has_providers()\n", + "\n", + "# options = ort.SessionOptions()\n", + "session = ort.InferenceSession(onnx_model_path,\n", + " sess_options=opts,\n", + " # providers=[ExecutionProvider],\n", + " # provider_options=[provider_options]\n", + ")\n", + "logits_per_image = session.run([\"logits_per_image\"],\n", + " {\n", + " \"input_ids\": inputs['input_ids'].astype(np.int64),\n", + " \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n", + " \"pixel_values\": inputs['pixel_values']\n", + " })\n", + " \n", + "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n", + "print(\"Label probs:\", probs)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json b/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json new file mode 100644 index 00000000..25b4782c --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json @@ -0,0 +1,209 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "openai/clip-vit-base-patch16", + "task": "zero-shot-image-classification", + "load_kwargs": { + "attn_implementation": "eager" + }, + "io_config": { + "input_names": [ + "input_ids", + "pixel_values", + "attention_mask" + ], + "input_shapes": [ + [ + 10, + 77 + ], + [ + 1, + 3, + 224, + 224 + ], + [ + 10, + 77 + ] + ], + "input_types": [ + "int64", + "float32", + "int64" + ], + "output_names": [ + "logits_per_image" + ], + "output_shapes": [ + [ + 1, + 2 + ] + ] + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "npu", + "execution_providers": [ + "VitisAIExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quant_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "openai/clip-vit-base-patch16", + "dataset_name": "nlphuji/flickr30k", + "start": 0, + "end": 10 + }, + "dataloader_config": { + "type": "no_auto_batch_dataloader" + } + }, + { + "name": "metric_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "openai/clip-vit-base-patch16", + "dataset_name": "nlphuji/flickr30k", + "start": 0, + "end": 10 + }, + "dataloader_config": { + "type": "no_auto_batch_dataloader" + }, + "post_process_data_config": { + "type": "clip_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "backend": "huggingface_metrics", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "accuracy", + "priority": 1, + "goal": { + "type": "max-degradation", + "value": 0.05 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg", + "goal": { + "type": "percent-min-improvement", + "value": 0.1 + } + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + }, + { + "name": "throughput", + "type": "throughput", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg" + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + }, + "transformer_optimizer": { + "type": "orttransformersoptimization", + "model_type": "clip", + "opt_level": 1, + "optimization_options": { + "enable_gelu": true, + "enable_bias_gelu": false, + "enable_layer_norm": true, + "enable_skip_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_attention": false + }, + "save_as_external_data": true + }, + "surgery": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "ReplaceAttentionMaskValue" + }, + { + "surgeon": "PowReduceSumPowDiv2LpNorm" + } + ] + }, + "quantization": { + "type": "OnnxStaticQuantization", + "quant_preprocess": true, + "data_config": "quant_data_config", + "activation_type": "uint16", + "precision": "uint8", + "calibrate_method": "MinMax", + "save_as_external_data": true + }, + "addmetadata": { + "type": "VitisAIAddMetaData", + "config_meta_data_keys": [ + "architectures", + "model_type" + ], + "activation_type": "uint16", + "weight_type": "uint8", + "quant_type": "OnnxStaticQuantization" + } + }, + "search_strategy": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "output_dir": "model/clip_vit_base_patch16" +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json.config b/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json.config new file mode 100644 index 00000000..e86474b6 --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json.config @@ -0,0 +1,195 @@ +{ + "name": "Convert to AMD NPU", + "oliveFile": "clip/openai_clip-vit-base-patch16_ptq_qdq_vitis_ai.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "AMD NPU", + "CPU" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "VitisAIExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.quantization.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.quantization.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.quantization.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.quantization.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].load_dataset_config.end", + "template": { + "path": "data_configs[0].load_dataset_config.end", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.quantization", + "actions": [ + [], + [ + { + "type": "update", + "path": "passes", + "value": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + } + } + } + ] + ] + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].load_dataset_config.end", + "template": { + "path": "data_configs[1].load_dataset_config.end", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb new file mode 100644 index 00000000..a4cb3eb3 --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb @@ -0,0 +1,84 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aeb33f1a", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"VitisAIExecutionProvider\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "307fcca8", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "from PIL import Image\n", + "import requests\n", + " \n", + "from transformers import CLIPProcessor\n", + "import onnxruntime as ort\n", + "import numpy as np\n", + "import torch\n", + " \n", + "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch16\", use_fast=False)\n", + " \n", + "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + " \n", + "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n", + " images=image, return_tensors=\"np\", padding=\"max_length\",\n", + " max_length= 77, truncation=True)\n", + " \n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " \n", + "opts = ort.SessionOptions()\n", + " \n", + "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "assert opts.has_providers()\n", + "\n", + "# options = ort.SessionOptions()\n", + "session = ort.InferenceSession(onnx_model_path,\n", + " sess_options=opts,\n", + " # providers=[ExecutionProvider],\n", + " # provider_options=[provider_options]\n", + ")\n", + "logits_per_image = session.run([\"logits_per_image\"],\n", + " {\n", + " \"input_ids\": inputs['input_ids'].astype(np.int64),\n", + " \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n", + " \"pixel_values\": inputs['pixel_values']\n", + " })\n", + " \n", + "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n", + "print(\"Label probs:\", probs)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn.json b/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn.json new file mode 100644 index 00000000..f1821df2 --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn.json @@ -0,0 +1,193 @@ +{ + "input_model": { + "type": "PytorchModel", + "model_path": "openai/clip-vit-base-patch16", + "generative": false, + "io_config": { + "input_names": [ + "input_ids", + "attention_mask" + ], + "input_shapes": [ + [ + 1, + 77 + ], + [ + 1, + 77 + ] + ], + "input_types": [ + "int32", + "int32" + ], + "output_names": [ + "embeds", + "last_hidden_state" + ] + }, + "model_loader": "load_text_encoder", + "model_script": "clip_script.py" + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "QNNExecutionProvider" + ] + } + ] + } + }, + "host": "host_system", + "target": "host_system", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "log_to_file": false, + "data_configs": [ + { + "name": "calib_data", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "nlphuji/flickr30k", + "split": "test" + }, + "pre_process_data_config": { + "type": "pre_process_dataset", + "model_name": "openai/clip-vit-base-patch16", + "caption_col": "caption", + "max_length": 77, + "max_samples": 12 + }, + "dataloader_config": { + "batch_size": 1 + }, + "user_script": "clip_script.py" + }, + { + "name": "eval_data", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "nlphuji/flickr30k", + "split": "test" + }, + "pre_process_data_config": { + "type": "pre_process_dataset", + "model_name": "openai/clip-vit-base-patch16", + "generate_ground_truth": true, + "caption_col": "caption", + "max_length": 77, + "max_samples": 100 + }, + "post_process_data_config": { + "type": "embed_post_process" + }, + "dataloader_config": { + "batch_size": 1 + }, + "user_script": "clip_script.py" + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "degrad", + "type": "custom", + "data_config": "eval_data", + "sub_types": [ + { + "name": "percentage", + "priority": 1, + "higher_is_better": false + } + ], + "user_config": { + "user_script": "clip_script.py", + "metric_func": "eval_similarity_degrad" + } + }, + { + "name": "latency", + "type": "latency", + "sub_types": [ + { + "name": "avg", + "priority": 2, + "metric_config": { + "warmup_num": 20, + "repeat_test_num": 100 + } + }, + { + "name": "p90", + "metric_config": { + "warmup_num": 20, + "repeat_test_num": 100 + } + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "dynamic": true, + "use_dynamo_exporter": false, + "save_as_external_data": true + }, + "to_fixed_shape": { + "type": "DynamicToFixedShape", + "dim_param": [ + "batch_size", + "sequence_length" + ], + "dim_value": [ + 1, + 77 + ] + }, + "surgery": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "ReplaceAttentionMaskValue", + "replacement": -100.0 + }, + { + "surgeon": "MatMulAddToGemm" + } + ] + }, + "transformer_optimizer": { + "type": "OrtTransformersOptimization", + "model_type": "bert", + "opt_level": 1, + "optimization_options": { + "enable_gelu": false, + "enable_bias_gelu": false, + "enable_layer_norm": true, + "enable_skip_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_attention": false + }, + "save_as_external_data": true + }, + "quantization": { + "type": "OnnxStaticQuantization", + "data_config": "calib_data", + "quant_preprocess": true, + "activation_type": "uint16", + "precision": "uint8", + "save_as_external_data": true + } + }, + "cache_dir": "cache", + "output_dir": "model/clip_text" +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn.json.config b/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn.json.config new file mode 100644 index 00000000..0904f12d --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn.json.config @@ -0,0 +1,235 @@ +{ + "name": "Convert Text Model to Qualcomm NPU", + "oliveFile": "clip/qdq/openai_clip_text_b16_qdq.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Qualcomm NPU", + "CPU" + ], + "path": "systems.host_system.accelerators.0.execution_providers.0", + "values": [ + "QNNExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.quantization.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.quantization.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.quantization.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.quantization.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "test" + ], + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.quantization", + "actions": [ + [], + [ + { + "type": "update", + "path": "passes", + "value": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "dynamic": true, + "use_dynamo_exporter": false, + "save_as_external_data": true + } + } + } + ] + ] + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.split", + "values": [ + "test" + ], + "template": { + "path": "data_configs[1].load_dataset_config.split", + "values": [ + "test" + ], + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[1].pre_process_data_config.max_samples", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb new file mode 100644 index 00000000..c571836e --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb @@ -0,0 +1,141 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "43751a72", + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"QNNExecutionProvider\"" + ] + }, + { + "cell_type": "markdown", + "id": "897ffb42-3569-4d78-b99d-355a38fdce35", + "metadata": {}, + "source": [ + "### Data Processor" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa8d84cd-4853-4746-bce3-b281bfc23d8b", + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import CLIPProcessor\n", + "\n", + "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch16\")" + ] + }, + { + "cell_type": "markdown", + "id": "5568eb71-5812-4c74-989c-c12271d33b12", + "metadata": {}, + "source": [ + "### Model Inference with ORT-QNN" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02bad4ec-f477-4659-8584-00735f6ed5a9", + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime as ort\n", + "import torch\n", + "import numpy as np\n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + "\n", + "\n", + "session_options = ort.SessionOptions()\n", + "\n", + "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "\n", + "text_model = ort.InferenceSession(\n", + " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " sess_options=session_options,\n", + ")\n", + "\n", + "def get_text_embedding(text):\n", + " inputs = processor(\n", + " text=text,\n", + " padding=\"max_length\",\n", + " max_length=77,#text_model.sequence_length,\n", + " truncation=True,\n", + " add_special_tokens=True,\n", + " return_tensors=\"np\",\n", + " )\n", + " output = text_model.run(None, {\n", + " \"input_ids\": inputs[\"input_ids\"].astype(np.int32),\n", + " \"attention_mask\": inputs[\"attention_mask\"].astype(np.int32),\n", + " })\n", + " return torch.from_numpy(output[0])\n", + "\n", + "def calculate_score(emb_1, emb_2):\n", + " emb_1 /= torch.norm(emb_1, dim=-1, keepdim=True)\n", + " emb_2 /= torch.norm(emb_2, dim=-1, keepdim=True)\n", + " return torch.matmul(emb_1, emb_2.T) * 100.0\n", + "\n", + "# Get source embedding and calculate the similarity score for each target\n", + "# We need to process one by one because to static quantization, we fixed the batch size to 1\n", + "def ask(source, targets):\n", + " source_emb = get_text_embedding(source)\n", + " scores = []\n", + " for i, target in enumerate(targets):\n", + " target_emb = get_text_embedding(target)\n", + " score = calculate_score(source_emb, target_emb)\n", + " print(f\"Similarity score of sentence {i}:{score.item()}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "3477e36c-2e72-432b-ae81-602073a3754c", + "metadata": {}, + "source": [ + "### Play with Samples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8cdc2a6-4c81-4f93-8426-065ee4c2b013", + "metadata": {}, + "outputs": [], + "source": [ + "ask(\"a photo containing two cats\", [\"a photo of tshirt\", \"a photo of two cats\"])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json b/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json new file mode 100644 index 00000000..0d8f7581 --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json @@ -0,0 +1,173 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "openai/clip-vit-base-patch16", + "task": "zero-shot-image-classification", + "load_kwargs": { + "attn_implementation": "eager" + }, + "io_config": { + "input_names": [ + "input_ids", + "pixel_values", + "attention_mask" + ], + "input_shapes": [ + [ + 10, + 77 + ], + [ + 1, + 3, + 224, + 224 + ], + [ + 10, + 77 + ] + ], + "input_types": [ + "int64", + "float32", + "int64" + ], + "output_names": [ + "logits_per_image" + ], + "output_shapes": [ + [ + 1, + 2 + ] + ] + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "NvTensorRTRTXExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quant_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "openai/clip-vit-base-patch16", + "dataset_name": "nlphuji/flickr30k", + "start": 0, + "end": 10 + }, + "dataloader_config": { + "type": "no_auto_batch_dataloader" + } + }, + { + "name": "metric_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "openai/clip-vit-base-patch16", + "dataset_name": "nlphuji/flickr30k", + "start": 10, + "end": 20 + }, + "dataloader_config": { + "type": "no_auto_batch_dataloader" + }, + "post_process_data_config": { + "type": "clip_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "backend": "huggingface_metrics", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "accuracy", + "priority": 1, + "goal": { + "type": "max-degradation", + "value": 0.05 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg", + "goal": { + "type": "percent-min-improvement", + "value": 0.1 + } + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + }, + { + "name": "throughput", + "type": "throughput", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg" + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + }, + "onnx_float_to_float16": { + "type": "OnnxFloatToFloat16", + "save_as_external_data": true + }, + "session_params_tuning": { + "type": "OrtSessionParamsTuning", + "io_bind": false, + "data_config": "quant_data_config" + } + }, + "host": "local_system", + "target": "local_system", + "evaluator": "common_evaluator", + "cache_dir": "cache", + "output_dir": "model/clip-vit-base-patch16", + "evaluate_input_model": false +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json.config b/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json.config new file mode 100644 index 00000000..c61c1395 --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json.config @@ -0,0 +1,86 @@ +{ + "name": "Convert to NVIDIA TRT for RTX", + "oliveFile": "clip/openai_clip-vit-base-patch16_trtrtx.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "NVIDIA TensorRT for RTX", + "CPU" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "NvTensorRTRTXExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].load_dataset_config.end", + "template": { + "path": "data_configs[1].load_dataset_config.end", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx_inference_sample.ipynb new file mode 100644 index 00000000..a3c6f084 --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx_inference_sample.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aeb33f1a", + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"NvTensorRTRTXExecutionProvider\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "307fcca8", + "metadata": {}, + "outputs": [], + "source": [ + "from PIL import Image\n", + "import requests\n", + " \n", + "from transformers import CLIPProcessor\n", + "import onnxruntime as ort\n", + "import numpy as np\n", + "import torch\n", + " \n", + "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch16\", use_fast=False)\n", + " \n", + "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + " \n", + "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n", + " images=image, return_tensors=\"np\", padding=\"max_length\",\n", + " max_length= 77, truncation=True)\n", + " \n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " \n", + "opts = ort.SessionOptions()\n", + " \n", + "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n", + "assert opts.has_providers()\n", + "\n", + "# options = ort.SessionOptions()\n", + "session = ort.InferenceSession(onnx_model_path,\n", + " sess_options=opts,\n", + " # providers=[ExecutionProvider],\n", + " # provider_options=[provider_options]\n", + ")\n", + "logits_per_image = session.run([\"logits_per_image\"],\n", + " {\n", + " \"input_ids\": inputs['input_ids'].astype(np.int64),\n", + " \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n", + " \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n", + " })\n", + " \n", + "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n", + "print(\"Label probs:\", probs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "winml", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json b/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json new file mode 100644 index 00000000..b58a975f --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json @@ -0,0 +1,186 @@ +{ + "input_model": { + "type": "PytorchModel", + "model_path": "openai/clip-vit-base-patch16", + "generative": false, + "io_config": { + "input_names": [ + "pixel_values" + ], + "input_shapes": [ + [ + 1, + 3, + 224, + 224 + ] + ], + "output_names": [ + "embeds" + ] + }, + "model_loader": "load_image_encoder", + "model_script": "clip_script.py" + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "QNNExecutionProvider" + ] + } + ] + } + }, + "host": "host_system", + "target": "host_system", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "log_to_file": false, + "data_configs": [ + { + "name": "calib_data", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "test" + }, + "pre_process_data_config": { + "type": "pre_process_dataset", + "model_name": "openai/clip-vit-base-patch16", + "image_col": "image", + "max_samples": 12 + }, + "dataloader_config": { + "batch_size": 1 + }, + "user_script": "clip_script.py" + }, + { + "name": "eval_data", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "test" + }, + "pre_process_data_config": { + "type": "pre_process_dataset", + "model_name": "openai/clip-vit-base-patch16", + "generate_ground_truth": true, + "image_col": "image", + "max_samples": 100 + }, + "post_process_data_config": { + "type": "embed_post_process" + }, + "dataloader_config": { + "batch_size": 1 + }, + "user_script": "clip_script.py" + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "degrad", + "type": "custom", + "data_config": "eval_data", + "sub_types": [ + { + "name": "percentage", + "priority": 1, + "higher_is_better": false + } + ], + "user_config": { + "user_script": "clip_script.py", + "metric_func": "eval_similarity_degrad", + "metric_func_kwargs": { + "batch_size": 32 + } + } + }, + { + "name": "latency", + "type": "latency", + "sub_types": [ + { + "name": "avg", + "priority": 2, + "metric_config": { + "warmup_num": 20, + "repeat_test_num": 100 + } + }, + { + "name": "p90", + "metric_config": { + "warmup_num": 20, + "repeat_test_num": 100 + } + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "dynamic": true, + "use_dynamo_exporter": false, + "save_as_external_data": true + }, + "to_fixed_shape": { + "type": "DynamicToFixedShape", + "dim_param": [ + "batch_size", + "num_channels", + "height", + "width" + ], + "dim_value": [ + 1, + 3, + 224, + 224 + ] + }, + "surgery": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "MatMulAddToGemm" + } + ] + }, + "transformer_optimizer": { + "type": "OrtTransformersOptimization", + "model_type": "vit", + "opt_level": 1, + "optimization_options": { + "enable_gelu": false, + "enable_bias_gelu": false, + "enable_layer_norm": true, + "enable_skip_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_attention": false + }, + "save_as_external_data": true + }, + "quantization": { + "type": "OnnxStaticQuantization", + "data_config": "calib_data", + "quant_preprocess": true, + "activation_type": "uint16", + "precision": "uint8", + "save_as_external_data": true + } + }, + "cache_dir": "cache", + "output_dir": "model/clip_vision" +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json.config b/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json.config new file mode 100644 index 00000000..61ec81c9 --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json.config @@ -0,0 +1,237 @@ +{ + "name": "Convert Vision Model to Qualcomm NPU", + "oliveFile": "clip/qdq/openai_clip_vision_b16_qdq.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Qualcomm NPU", + "CPU" + ], + "path": "systems.host_system.accelerators.0.execution_providers.0", + "values": [ + "QNNExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.quantization.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.quantization.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.quantization.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.quantization.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "nlphuji/flickr30k" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.quantization", + "actions": [ + [], + [ + { + "type": "update", + "path": "passes", + "value": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "dynamic": true, + "use_dynamo_exporter": false, + "save_as_external_data": true + } + } + } + ] + ] + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[1].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[1].pre_process_data_config.max_samples", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb new file mode 100644 index 00000000..a8b98672 --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb @@ -0,0 +1,170 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "3c18a7d6", + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "\n", + "ExecutionProvider=\"QNNExecutionProvider\"" + ] + }, + { + "cell_type": "markdown", + "id": "897ffb42-3569-4d78-b99d-355a38fdce35", + "metadata": {}, + "source": [ + "### Data Processor" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa8d84cd-4853-4746-bce3-b281bfc23d8b", + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import CLIPProcessor\n", + "\n", + "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch16\")" + ] + }, + { + "cell_type": "markdown", + "id": "5568eb71-5812-4c74-989c-c12271d33b12", + "metadata": {}, + "source": [ + "### Model Inference with ORT-QNN" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02bad4ec-f477-4659-8584-00735f6ed5a9", + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime as ort\n", + "import torch\n", + "import numpy as np\n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + "\n", + "\n", + "session_options = ort.SessionOptions()\n", + "\n", + "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "\n", + "vision_model = ort.InferenceSession(\n", + " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " sess_options=session_options,\n", + ")\n", + "\n", + "def get_image_embedding(image):\n", + " inputs = processor(images=image, return_tensors=\"np\")\n", + " output = vision_model.run(None, { \"pixel_values\": inputs[\"pixel_values\"] })\n", + " return torch.from_numpy(output[0])\n", + "\n", + "def calculate_score(emb_1, emb_2):\n", + " emb_1 /= torch.norm(emb_1, dim=-1, keepdim=True)\n", + " emb_2 /= torch.norm(emb_2, dim=-1, keepdim=True)\n", + " return torch.matmul(emb_1, emb_2.T) * 100.0\n", + "\n", + "# Get source embedding and calculate the similarity score for each target\n", + "# We need to process one by one because to static quantization, we fixed the batch size to 1\n", + "def ask(source, targets):\n", + " source_emb = get_image_embedding(source)\n", + " for i, target in enumerate(targets):\n", + " target_emb = get_image_embedding(target)\n", + " score = calculate_score(source_emb, target_emb)\n", + " print(f\"Similarity score of image {i}:{score.item()}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "3477e36c-2e72-432b-ae81-602073a3754c", + "metadata": {}, + "source": [ + "### Play with Samples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16868fbd-e447-4866-af7d-eb6e49975bcc", + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from PIL import Image\n", + "\n", + "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + "image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07076b9a", + "metadata": {}, + "outputs": [], + "source": [ + "url = \"http://images.cocodataset.org/train2017/000000208833.jpg\"\n", + "image1 = Image.open(requests.get(url, stream=True).raw)\n", + "image1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c10de7cd", + "metadata": {}, + "outputs": [], + "source": [ + "url = \"http://images.cocodataset.org/train2017/000000125690.jpg\"\n", + "image2 = Image.open(requests.get(url, stream=True).raw)\n", + "image2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8cdc2a6-4c81-4f93-8426-065ee4c2b013", + "metadata": {}, + "outputs": [], + "source": [ + "ask(image, [image1, image2])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/openai-clip-vit-base-patch16/aitk/requirements.txt b/openai-clip-vit-base-patch16/aitk/requirements.txt new file mode 100644 index 00000000..0cddd58d --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/requirements.txt @@ -0,0 +1,5 @@ +olive-ai +cachetools==5.5.0 +nltk>=3.9.1 +accelerate>=1.4.0 +pillow>=10.0.1 diff --git a/openai-clip-vit-base-patch16/aitk/user_script.py b/openai-clip-vit-base-patch16/aitk/user_script.py new file mode 100644 index 00000000..2d0051f0 --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/user_script.py @@ -0,0 +1,64 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import numpy as np +import torch +from datasets import load_dataset +from torch.utils.data import Dataset +from transformers import CLIPProcessor + +from olive.data.registry import Registry + + +class CLIPDataset(Dataset): + def __init__( + self, + model_name, + dataset_name, + start=0, + end=500, + image_size=(224, 224), + ): + assert 0 <= start < end + self.start = start + self.end = end + self.model_name = model_name + self.dataset_name = dataset_name + self.processor = CLIPProcessor.from_pretrained(self.model_name) + self.length = self.end - self.start + self.image_size = image_size + self.dataset = load_dataset(self.dataset_name, split=f"test[{0}:{self.end + 10}]") + + def __len__(self): + return self.length + + def __getitem__(self, idx): + text_inputs = self.processor( + text=[" ".join(item) for item in self.dataset[idx : idx + 10]["caption"]], + return_tensors="np", + padding="max_length", + truncation=True, + ) + + image_input = self.processor(images=self.dataset[idx]["image"].resize(self.image_size), return_tensors="np") + model_inputs = [ + { + "input_ids": text_inputs["input_ids"].astype(np.int64), + "pixel_values": image_input["pixel_values"], + "attention_mask": text_inputs["attention_mask"].astype(np.int64), + } + ] + + target = torch.Tensor([0]).to(torch.int32) + return model_inputs[0], target + + +@Registry.register_dataset() +def clip_dataset(**kwargs): + return CLIPDataset(**kwargs) + + +@Registry.register_post_process() +def clip_post_process(output): + return output["logits_per_image"].argmax(axis=-1) diff --git a/openai-clip-vit-base-patch32/aitk/.gitignore b/openai-clip-vit-base-patch32/aitk/.gitignore new file mode 100644 index 00000000..48c03882 --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/.gitignore @@ -0,0 +1,5 @@ +__pycache__ +/cache +/history/*/* +!/history/*/history.config +!/history/*/olive_config.json diff --git a/openai-clip-vit-base-patch32/aitk/README.md b/openai-clip-vit-base-patch32/aitk/README.md new file mode 100644 index 00000000..35dfb8fe --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/README.md @@ -0,0 +1,48 @@ +# Openai Clip optimization + +This folder contains examples of Openai Clip optimization using different workflows. + +- Text and vision model QDQ for Qualcomm NPU +- QDQ for AMD NPU +- OpenVINO for Intel NPU + +## Openai Clip text optimization with QDQ for Qualcomm NPU + +This example performs Openai Clip optimization with QDQ in one workflow. It performs the optimization pipeline: + +- *PyTorch Model -> Onnx Model -> Quantized Onnx Model* + +### Evaluation result + +The quantization uses 256 samples from train split of imagenet-1k dataset and the evaluations uses 256 samples from test split of imagenet-1k dataset. + + +| Activation Type  | Weight Type  | Size  | Latency ms (avg)  | +| --------------------- | ----------------- | ---------- | ---------------------- | +| QUInt16 | QUInt8 | 100 | 6.53724 | + +## Openai Clip vision optimization with QDQ for Qualcomm NPU + +This example performs Openai Clip optimization with QDQ in one workflow. It performs the optimization pipeline: + +- *PyTorch Model -> Onnx Model -> Quantized Onnx Model* + +### Evaluation result + +The quantization uses 256 samples from train split of imagenet-1k dataset and the evaluations uses 256 samples from test split of imagenet-1k dataset. + + +| Activation Type  | Weight Type  | Size  | Latency ms (avg)  | +| --------------------- | ----------------- | ---------- | ---------------------- | +| QUInt16 | QUInt8 | 100 | 20.13231 | + + +## Openai Clip optimization with QDQ for AMD NPU + +This example performs Openai Clip optimization with QDQ in one workflow. It performs the optimization pipeline: + +- *PyTorch Model -> Onnx Model -> Quantized Onnx Model* + +## Openai Clip optimization with OpenVINO + +This example performs Openai Clip optimization with OpenVINO in one workflow for Intel NPU. diff --git a/openai-clip-vit-base-patch32/aitk/_copy.json.config b/openai-clip-vit-base-patch32/aitk/_copy.json.config new file mode 100644 index 00000000..16b1d573 --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/_copy.json.config @@ -0,0 +1,206 @@ +{ + "copies": [ + { + "src": "../../clip-vit-base-patch16/1/model_project.config", + "dst": "model_project.config", + "replacements": [ + ] + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_text_qnn_inference_sample.ipynb", + "dst": "openai_clip_text_qnn_inference_sample.ipynb", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "openai/clip-vit-base-patch32" + } + ] + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_text_qnn.json", + "dst": "openai_clip_text_qnn.json", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "openai/clip-vit-base-patch32" + } + ] + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_text_qnn.json.config", + "dst": "openai_clip_text_qnn.json.config", + "replacements": [ + { + "find": "clip/qdq/openai_clip_text_b16_qdq.json", + "replace": "clip/qdq/openai_clip_text_b32_qdq.json" + } + ] + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_vision_qnn_inference_sample.ipynb", + "dst": "openai_clip_vision_qnn_inference_sample.ipynb", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "openai/clip-vit-base-patch32" + } + ] + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_vision_qnn.json", + "dst": "openai_clip_vision_qnn.json", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "openai/clip-vit-base-patch32" + } + ] + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_vision_qnn.json.config", + "dst": "openai_clip_vision_qnn.json.config", + "replacements": [ + { + "find": "clip/qdq/openai_clip_vision_b16_qdq.json", + "replace": "clip/qdq/openai_clip_vision_b32_qdq.json" + } + ] + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_ov_inference_sample.ipynb", + "dst": "openai_clip_ov_inference_sample.ipynb", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "openai/clip-vit-base-patch32" + } + ] + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_ov.json", + "dst": "openai_clip_ov.json", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "openai/clip-vit-base-patch32" + } + ] + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_ov.json.config", + "dst": "openai_clip_ov.json.config", + "replacements": [ + { + "find": "clip/openvino/clip_vit_base_patch16_context_ov_static.json", + "replace": "clip/openvino/clip_vit_base_patch32_context_ov_static.json" + } + ] + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_qdq_amd_inference_sample.ipynb", + "dst": "openai_clip_qdq_amd_inference_sample.ipynb", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "openai/clip-vit-base-patch32" + } + ] + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_qdq_amd.json", + "dst": "openai_clip_qdq_amd.json", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "openai/clip-vit-base-patch32" + } + ] + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_qdq_amd.json.config", + "dst": "openai_clip_qdq_amd.json.config", + "replacements": [ + { + "find": "clip/openai_clip-vit-base-patch16_ptq_qdq_vitis_ai.json", + "replace": "clip/openai_clip-vit-base-patch32_ptq_qdq_vitis_ai.json" + } + ] + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_trtrtx.json", + "dst": "openai_clip_trtrtx.json", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "openai/clip-vit-base-patch32" + } + ] + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_trtrtx.json.config", + "dst": "openai_clip_trtrtx.json.config", + "replacements": [ + { + "find": "clip/openai_clip-vit-base-patch16_trtrtx.json", + "replace": "clip/openai_clip-vit-base-patch32_trtrtx.json" + } + ] + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_trtrtx_inference_sample.ipynb", + "dst": "openai_clip_trtrtx_inference_sample.ipynb", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "openai/clip-vit-base-patch32" + } + ] + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_dml.json", + "dst": "openai_clip_dml.json", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "openai/clip-vit-base-patch32" + } + ] + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_dml.json.config", + "dst": "openai_clip_dml.json.config", + "replacements": [ + ] + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_dml_inference_sample.ipynb", + "dst": "openai_clip_dml_inference_sample.ipynb", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "openai/clip-vit-base-patch32" + } + ] + }, + { + "src": "../../clip-vit-base-patch16/1/clip_script.py", + "dst": "clip_script.py" + }, + { + "src": "../../clip-vit-base-patch16/1/user_script.py", + "dst": "user_script.py" + }, + { + "src": "../../clip-vit-base-patch16/1/openai_clip_ov.py", + "dst": "openai_clip_ov.py" + }, + { + "src": "../../clip-vit-base-patch16/1/README.md", + "dst": "README.md" + }, + { + "src": "../../clip-vit-base-patch16/1/requirements.txt", + "dst": "requirements.txt" + } + ] +} \ No newline at end of file diff --git a/openai-clip-vit-base-patch32/aitk/clip_script.py b/openai-clip-vit-base-patch32/aitk/clip_script.py new file mode 100644 index 00000000..6f775697 --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/clip_script.py @@ -0,0 +1,151 @@ +from __future__ import annotations + +from collections import OrderedDict +from itertools import chain + +import torch +from transformers import ( + AutoProcessor, + CLIPTextModelWithProjection, + CLIPVisionModelWithProjection, +) + +from olive.data.component.dataset import BaseDataset +from olive.data.registry import Registry + +HF_MODEL_SUBFOLDER_MAPPING = { + "sentence-transformers/clip-ViT-B-32": "0_CLIPModel", +} + + +def load_image_encoder(model_name): + return CLIPVisionModelWithProjection.from_pretrained( + model_name, + subfolder=HF_MODEL_SUBFOLDER_MAPPING.get(model_name, ""), + ).eval() + + +def load_text_encoder(model_name): + if model_name == "sentence-transformers/clip-ViT-B-32-multilingual-v1": + from sbert_clip_script import SDistilBertTextEncoder + + return SDistilBertTextEncoder(model_name).eval() + + return CLIPTextModelWithProjection.from_pretrained( + model_name, + subfolder=HF_MODEL_SUBFOLDER_MAPPING.get(model_name, ""), + ).eval() + + +def hfdataset_pre_process_for_clip( + dataset, + processor, + torch_model=None, + image_col: str | None = None, + caption_col: str | None = None, + label_col: str = "label", + max_samples: int | None = None, + max_length: int = 77, + batch_size: int = 32, +): + def generate_inputs(sample, indices): + captions = sample.get(caption_col, None) + images = sample.get(image_col, None) + + kwargs = { + "padding": "max_length", + "max_length": max_length, + "truncation": True, + "add_special_tokens": True, + "return_tensors": "pt", + } + if images: + kwargs["images"] = [img.convert("RGB") for img in images] + if captions: + kwargs["text"] = list(chain([x[0] for x in captions])) + + encoded_input = processor(**kwargs) + + return { + **encoded_input, + label_col: torch_model(**encoded_input)[0] if torch_model else sample.get(label_col, indices), + } + + if max_samples is not None and max_samples < len(dataset): + dataset = dataset.select(range(max_samples)) + + tokenized_datasets = dataset.map( + generate_inputs, + batched=True, + batch_size=batch_size, + with_indices=True, + remove_columns=dataset.column_names, + desc="Processing dataset", + ) + tokenized_datasets.set_format("torch", output_all_columns=True) + + return tokenized_datasets + + +@Registry.register_pre_process() +def pre_process_dataset( + dataset, + model_name: str, + generate_ground_truth: bool = False, + image_col: str | None = None, + caption_col: str | None = None, + label_col: str = "label", + max_samples: int | None = None, + max_length: int = 77, + **kwargs, +): + if image_col is None and caption_col is None: + raise ValueError("Either image_col or caption_col must be provided.") + + if generate_ground_truth: + if image_col and caption_col: + raise ValueError("Can not generate two types of embedding at the same time.") + + torch_model = load_image_encoder(model_name) if image_col else load_text_encoder(model_name) + else: + torch_model = None + + processor = AutoProcessor.from_pretrained(model_name) + dataset = hfdataset_pre_process_for_clip( + dataset, + processor, + torch_model=torch_model, + image_col=image_col, + caption_col=caption_col, + label_col=label_col, + max_length=max_length, + max_samples=max_samples, + ) + return BaseDataset(dataset, label_col) + + +@Registry.register_post_process() +def embed_post_process(output): + """Post-processing for CLIP output.""" + match output: + case dict() | OrderedDict() as out: + if "embeds" in out: + return out["embeds"] + elif "text_embeds" in out: + return out["text_embeds"] + elif "image_embeds" in out: + return out["image_embeds"] + case torch.Tensor(): + return output.argmax(dim=-1) + raise ValueError(f"Unsupported output type: {type(output)}") + + +def eval_similarity_degrad(output, targets, batch_size=1024): + import torch.nn.functional as F + + preds = output.preds + scores = [ + F.cosine_similarity(preds[i : i + batch_size], targets[i : i + batch_size]) + for i in range(0, preds.size(0), batch_size) + ] + return {"percentage": f"{100.0 - torch.mean(torch.cat(scores)) * 100.0:.2f}"} diff --git a/openai-clip-vit-base-patch32/aitk/info.yml b/openai-clip-vit-base-patch32/aitk/info.yml new file mode 100644 index 00000000..d908ab0d --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/info.yml @@ -0,0 +1,26 @@ +keywords: + aitk +arch: clip +recipes: + - file: "openai_clip_text_qnn.json" + device: npu + ep: QNNExecutionProvider + - file: "openai_clip_vision_qnn.json" + device: npu + ep: QNNExecutionProvider + - file: "openai_clip_qdq_amd.json" + device: npu + ep: VitisAIExecutionProvider + - file: "openai_clip_ov.json" + device: npu + ep: OpenVINOExecutionProvider + - file: "openai_clip_trtrtx.json" + device: gpu + ep: NvTensorRTRTXExecutionProvider + - file: "openai_clip_dml.json" + device: gpu + ep: DmlExecutionProvider +aitk: + modelInfo: + id: "huggingface/openai/clip-vit-base-patch32" + version: 1 diff --git a/openai-clip-vit-base-patch32/aitk/model_project.config b/openai-clip-vit-base-patch32/aitk/model_project.config new file mode 100644 index 00000000..4f2dd495 --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/model_project.config @@ -0,0 +1,32 @@ +{ + "workflows": [ + { + "file": "openai_clip_text_qnn.json", + "templateName": "openai_clip_text_qnn" + }, + { + "file": "openai_clip_vision_qnn.json", + "templateName": "openai_clip_vision_qnn" + }, + { + "file": "openai_clip_qdq_amd.json", + "templateName": "openai_clip_qdq_amd" + }, + { + "file": "openai_clip_ov.json", + "templateName": "openai_clip_ov" + }, + { + "file": "openai_clip_trtrtx.json", + "templateName": "openai_clip_trtrtx" + }, + { + "file": "openai_clip_dml.json", + "templateName": "openai_clip_dml" + } + ], + "modelInfo": { + "id": "huggingface/openai/clip-vit-base-patch32", + "version": 1 + } +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_dml.json b/openai-clip-vit-base-patch32/aitk/openai_clip_dml.json new file mode 100644 index 00000000..aa1c716d --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_dml.json @@ -0,0 +1,192 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "openai/clip-vit-base-patch32", + "task": "zero-shot-image-classification", + "load_kwargs": { + "attn_implementation": "eager" + }, + "io_config": { + "input_names": [ + "input_ids", + "pixel_values", + "attention_mask" + ], + "input_shapes": [ + [ + 10, + 77 + ], + [ + 1, + 3, + 224, + 224 + ], + [ + 10, + 77 + ] + ], + "input_types": [ + "int64", + "float32", + "int64" + ], + "output_names": [ + "logits_per_image" + ], + "output_shapes": [ + [ + 1, + 2 + ] + ] + } + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "DmlExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "metric_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "openai/clip-vit-base-patch32", + "dataset_name": "nlphuji/flickr30k", + "start": 0, + "end": 10 + }, + "dataloader_config": { + "type": "no_auto_batch_dataloader" + }, + "post_process_data_config": { + "type": "clip_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "backend": "huggingface_metrics", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "accuracy", + "priority": 1, + "goal": { + "type": "max-degradation", + "value": 0.05 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg", + "goal": { + "type": "percent-min-improvement", + "value": 0.1 + } + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + }, + { + "name": "throughput", + "type": "throughput", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg" + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + }, + "transformer_optimizer": { + "type": "orttransformersoptimization", + "model_type": "clip", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false, + "optimization_options": { + "enable_gelu": true, + "enable_layer_norm": true, + "enable_attention": true, + "use_multi_head_attention": true, + "enable_skip_layer_norm": false, + "enable_embed_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_bias_gelu": false, + "enable_gelu_approximation": false, + "enable_qordered_matmul": false, + "enable_shape_inference": true, + "enable_gemm_fast_gelu": false, + "enable_nhwc_conv": false, + "enable_group_norm": false, + "enable_bias_splitgelu": false, + "enable_packed_qkv": true, + "enable_packed_kv": true, + "enable_bias_add": false, + "enable_rotary_embeddings": true + }, + "save_as_external_data": true + } + }, + "search_strategy": false, + "host": "host_system", + "target": "target_system", + "cache_dir": "cache", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "output_dir": "model/clip" +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_dml.json.config b/openai-clip-vit-base-patch32/aitk/openai_clip_dml.json.config new file mode 100644 index 00000000..ed09dcf4 --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_dml.json.config @@ -0,0 +1,87 @@ +{ + "name": "Convert to DirectML", + "evaluationRuntimeFeatures": [ + "Nightly" + ], + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "DirectML" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "DmlExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[0].load_dataset_config.end", + "template": { + "path": "data_configs[0].load_dataset_config.end", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_dml_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_dml_inference_sample.ipynb new file mode 100644 index 00000000..db21746c --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_dml_inference_sample.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aeb33f1a", + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"DmlExecutionProvider\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "307fcca8", + "metadata": {}, + "outputs": [], + "source": [ + "from PIL import Image\n", + "import requests\n", + " \n", + "from transformers import CLIPProcessor\n", + "import onnxruntime as ort\n", + "import numpy as np\n", + "import torch\n", + " \n", + "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\", use_fast=False)\n", + " \n", + "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + " \n", + "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n", + " images=image, return_tensors=\"np\", padding=\"max_length\",\n", + " max_length= 77, truncation=True)\n", + " \n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " \n", + "opts = ort.SessionOptions()\n", + " \n", + "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n", + "assert opts.has_providers()\n", + "\n", + "# options = ort.SessionOptions()\n", + "session = ort.InferenceSession(onnx_model_path,\n", + " sess_options=opts,\n", + " # providers=[ExecutionProvider],\n", + " # provider_options=[provider_options]\n", + ")\n", + "logits_per_image = session.run([\"logits_per_image\"],\n", + " {\n", + " \"input_ids\": inputs['input_ids'].astype(np.int64),\n", + " \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n", + " \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n", + " })\n", + " \n", + "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n", + "print(\"Label probs:\", probs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "winml", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_ov.json b/openai-clip-vit-base-patch32/aitk/openai_clip_ov.json new file mode 100644 index 00000000..de22a30c --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_ov.json @@ -0,0 +1,125 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "openai/clip-vit-base-patch32" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "npu", + "execution_providers": [ + "OpenVINOExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quantize_data_config", + "user_script": "openai_clip_ov.py", + "load_dataset_config": { + "type": "conceptual_captions_dataset", + "data_name": "google-research-datasets/conceptual_captions", + "model_path": "openai/clip-vit-base-patch32" + }, + "dataloader_config": { + "batch_size": 1, + "drop_last": true + } + }, + { + "name": "metric_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "openai/clip-vit-base-patch32", + "dataset_name": "nlphuji/flickr30k", + "start": 10, + "end": 20 + }, + "dataloader_config": { "type": "no_auto_batch_dataloader" }, + "post_process_data_config": { "type": "clip_post_process" } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "backend": "huggingface_metrics", + "data_config": "metric_data_config", + "sub_types": [ + { "name": "accuracy", "priority": 1, "goal": { "type": "max-degradation", "value": 0.05 } } + ] + }, + { + "name": "latency", + "type": "latency", + "sub_types": [ + { "name": "avg", "priority": 2, "metric_config": { "warmup_num": 20, "repeat_test_num": 100 } }, + { "name": "p90", "metric_config": { "warmup_num": 20, "repeat_test_num": 100 } } + ] + } + ] + } + }, + "passes": { + "optimum_convert": { + "type": "OpenVINOOptimumConversion", + "extra_args": { + "device": "npu" + } + }, + "ov_quantize": { + "type": "OpenVINOQuantization", + "target_device": "npu", + "data_config": "quantize_data_config", + "model_type": "TRANSFORMER", + "user_script": "openai_clip_ov.py", + "transform_fn": "custom_transform_func", + "extra_configs": [ + { + "advanced_quantization_parameters": { + "smooth_quant_alpha": 0.6 + } + } + ] + }, + "io_update": { + "type": "OpenVINOIoUpdate", + "input_shapes": [ + [ + 10, + 77 + ], + [ + 1, + 3, + 224, + 224 + ], + [ + 10, + 77 + ] + ], + "static": true + }, + "encapsulation": { + "type": "OpenVINOEncapsulation", + "target_device": "npu", + "ov_version": "2025.1" + } + }, + "search_strategy": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "output_dir": "model/clip_vit_base_patch16_context_ov_static" +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_ov.json.config b/openai-clip-vit-base-patch32/aitk/openai_clip_ov.json.config new file mode 100644 index 00000000..25acd717 --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_ov.json.config @@ -0,0 +1,174 @@ +{ + "name": "Convert to Intel CPU/NPU/GPU", + "oliveFile": "clip/openvino/clip_vit_base_patch32_context_ov_static.json", + "isIntel": true, + "debugInfo": { + "autoGenerated": true, + "useOpenVINOOptimumConversion": "optimum_convert" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "systems.local_system.accelerators.0.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "readOnly": false + }, + "runtimeInConversion": { + "autoGenerated": true, + "name": "Convert/Quantize to", + "type": "enum", + "displayNames": [ + "Intel CPU", + "Intel GPU", + "Intel NPU" + ], + "path": "passes.optimum_convert.extra_args.device", + "values": [ + "cpu", + "gpu", + "npu" + ], + "actions": [ + [ + { + "type": "update", + "path": "passes.ov_quantize.target_device", + "value": "cpu" + }, + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "cpu" + } + ], + [ + { + "type": "update", + "path": "passes.ov_quantize.target_device", + "value": "gpu" + }, + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "gpu" + } + ], + [ + { + "type": "update", + "path": "passes.ov_quantize.target_device", + "value": "npu" + }, + { + "type": "update", + "path": "passes.encapsulation.target_device", + "value": "npu" + } + ] + ] + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "google-research-datasets/conceptual_captions" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "google-research-datasets/conceptual_captions" + ], + "template": "QuantizationDataset" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.optimum_convert", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_ov.py b/openai-clip-vit-base-patch32/aitk/openai_clip_ov.py new file mode 100644 index 00000000..d1971b50 --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_ov.py @@ -0,0 +1,124 @@ +from io import BytesIO + +import requests +import torch +from datasets import load_dataset +from PIL import Image +from requests.packages.urllib3.exceptions import InsecureRequestWarning +from tqdm import tqdm +from transformers import CLIPModel, CLIPProcessor + +from olive.data.registry import Registry + +requests.packages.urllib3.disable_warnings(InsecureRequestWarning) + +# ------------------------------------------------------------------------- +# Common Dataset +# ------------------------------------------------------------------------- + +seed = 0 +# seed everything to 0 for reproducibility, https://pytorch.org/docs/stable/notes/randomness.html +# do not set random seed and np.random.seed for aml test, since it will cause aml job name conflict +torch.manual_seed(seed) +# the following are needed only for GPU +torch.cuda.manual_seed(seed) +torch.backends.cudnn.deterministic = True +torch.backends.cudnn.benchmark = False + + +def check_text_data(data): + """Check if the given data is text-based.""" + if isinstance(data, str): + return True + if isinstance(data, list): + return all(isinstance(x, str) for x in data) + return False + + +def get_pil_from_url(url): + """Download and convert an image from a URL to a PIL Image object.""" + response = requests.get(url, verify=True, timeout=20) + image = Image.open(BytesIO(response.content)) + return image.convert("RGB") + + +def wrap_collate_fn(processor, max_length): + def collate_fn(example, image_column="image_url", text_column="caption"): + """Preprocess an example by loading and transforming image and text data. + + Check if the text data in the example is valid by calling the `check_text_data` function. + Download the image specified by the URL in the image_column by calling the `get_pil_from_url` function. + If there is any error during the download process, return None. + Return the preprocessed inputs with transformed image and text data. + """ + if len(example) != 1: + raise ValueError(f"Expected 'example' to have exactly one element, but got {len(example)}.") + example = example[0] + + if not check_text_data(example[text_column]): + raise ValueError("Text data is not valid") + + url = example[image_column] + try: + image = get_pil_from_url(url) + w, h = image.size + if h == 1 or w == 1: + return None + except Exception: + return None + + inputs = processor(text=example[text_column], images=[image], return_tensors="pt", padding=True) + if inputs["input_ids"].shape[1] > max_length: + return None + return inputs + + return collate_fn + + +def prepare_calibration_data(dataloader, init_steps): + """Prepare calibration data from a dataloader for a specified number of initialization steps. + + Iterate over the dataloader, fetching batches and storing the relevant data. + """ + data = [] + with tqdm(total=init_steps) as pbar: + for batch in dataloader: + if len(data) == init_steps: + break + if batch: + pbar.update(1) + with torch.no_grad(): + data.append( + { + "input_ids": batch["input_ids"].to("cpu"), + "pixel_values": batch["pixel_values"].to("cpu"), + "attention_mask": batch["attention_mask"].to("cpu"), + } + ) + return data + + +@Registry.register_dataset() +def conceptual_captions_dataset(data_name,opt_init_steps=200, max_train_samples=1000, **kwargs): + """Prepare a vision-text dataset for quantization.""" + dataset = load_dataset(data_name, trust_remote_code=True) + model_path = kwargs.get("model_path") + if not model_path: + raise ValueError( + "The 'model_path' parameter is required in data_configs.load_dataset_config but was not provided." + ) + model = CLIPModel.from_pretrained(model_path) + processor = CLIPProcessor.from_pretrained(model_path) + max_length = model.config.text_config.max_position_embeddings + train_dataset = dataset["train"].shuffle(seed=seed) + collate_fn = wrap_collate_fn(processor, max_length) + dataloader = torch.utils.data.DataLoader(train_dataset, collate_fn=collate_fn, batch_size=1) + return prepare_calibration_data(dataloader, opt_init_steps) + + +def custom_transform_func(data_item): + np_inputs = {} + for inp in data_item: + # Drop the first dimension using slicing + np_inputs[inp] = data_item[inp].numpy()[0, ...] + return np_inputs diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb new file mode 100644 index 00000000..ef626f4c --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb @@ -0,0 +1,84 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aeb33f1a", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/openvino_model_quant_st.onnx\"\n", + "ExecutionProvider=\"OpenVINOExecutionProvider\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "307fcca8", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "from PIL import Image\n", + "import requests\n", + " \n", + "from transformers import CLIPProcessor\n", + "import onnxruntime as ort\n", + "import numpy as np\n", + "import torch\n", + " \n", + "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\", use_fast=False)\n", + " \n", + "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + " \n", + "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n", + " images=image, return_tensors=\"np\", padding=\"max_length\",\n", + " max_length= 77, truncation=True)\n", + " \n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " \n", + "opts = ort.SessionOptions()\n", + " \n", + "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "assert opts.has_providers()\n", + "\n", + "# options = ort.SessionOptions()\n", + "session = ort.InferenceSession(onnx_model_path,\n", + " sess_options=opts,\n", + " # providers=[ExecutionProvider],\n", + " # provider_options=[provider_options]\n", + ")\n", + "logits_per_image = session.run([\"logits_per_image\"],\n", + " {\n", + " \"input_ids\": inputs['input_ids'].astype(np.int64),\n", + " \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n", + " \"pixel_values\": inputs['pixel_values']\n", + " })\n", + " \n", + "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n", + "print(\"Label probs:\", probs)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json b/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json new file mode 100644 index 00000000..2283c80c --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json @@ -0,0 +1,209 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "openai/clip-vit-base-patch32", + "task": "zero-shot-image-classification", + "load_kwargs": { + "attn_implementation": "eager" + }, + "io_config": { + "input_names": [ + "input_ids", + "pixel_values", + "attention_mask" + ], + "input_shapes": [ + [ + 10, + 77 + ], + [ + 1, + 3, + 224, + 224 + ], + [ + 10, + 77 + ] + ], + "input_types": [ + "int64", + "float32", + "int64" + ], + "output_names": [ + "logits_per_image" + ], + "output_shapes": [ + [ + 1, + 2 + ] + ] + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "npu", + "execution_providers": [ + "VitisAIExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quant_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "openai/clip-vit-base-patch32", + "dataset_name": "nlphuji/flickr30k", + "start": 0, + "end": 10 + }, + "dataloader_config": { + "type": "no_auto_batch_dataloader" + } + }, + { + "name": "metric_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "openai/clip-vit-base-patch32", + "dataset_name": "nlphuji/flickr30k", + "start": 0, + "end": 10 + }, + "dataloader_config": { + "type": "no_auto_batch_dataloader" + }, + "post_process_data_config": { + "type": "clip_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "backend": "huggingface_metrics", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "accuracy", + "priority": 1, + "goal": { + "type": "max-degradation", + "value": 0.05 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg", + "goal": { + "type": "percent-min-improvement", + "value": 0.1 + } + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + }, + { + "name": "throughput", + "type": "throughput", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg" + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + }, + "transformer_optimizer": { + "type": "orttransformersoptimization", + "model_type": "clip", + "opt_level": 1, + "optimization_options": { + "enable_gelu": true, + "enable_bias_gelu": false, + "enable_layer_norm": true, + "enable_skip_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_attention": false + }, + "save_as_external_data": true + }, + "surgery": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "ReplaceAttentionMaskValue" + }, + { + "surgeon": "PowReduceSumPowDiv2LpNorm" + } + ] + }, + "quantization": { + "type": "OnnxStaticQuantization", + "quant_preprocess": true, + "data_config": "quant_data_config", + "activation_type": "uint16", + "precision": "uint8", + "calibrate_method": "MinMax", + "save_as_external_data": true + }, + "addmetadata": { + "type": "VitisAIAddMetaData", + "config_meta_data_keys": [ + "architectures", + "model_type" + ], + "activation_type": "uint16", + "weight_type": "uint8", + "quant_type": "OnnxStaticQuantization" + } + }, + "search_strategy": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "output_dir": "model/clip_vit_base_patch16" +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json.config b/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json.config new file mode 100644 index 00000000..0e95bfd2 --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json.config @@ -0,0 +1,195 @@ +{ + "name": "Convert to AMD NPU", + "oliveFile": "clip/openai_clip-vit-base-patch32_ptq_qdq_vitis_ai.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "AMD NPU", + "CPU" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "VitisAIExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.quantization.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.quantization.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.quantization.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.quantization.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].load_dataset_config.end", + "template": { + "path": "data_configs[0].load_dataset_config.end", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.quantization", + "actions": [ + [], + [ + { + "type": "update", + "path": "passes", + "value": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + } + } + } + ] + ] + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].load_dataset_config.end", + "template": { + "path": "data_configs[1].load_dataset_config.end", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb new file mode 100644 index 00000000..95bfb0a4 --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb @@ -0,0 +1,84 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aeb33f1a", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"VitisAIExecutionProvider\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "307fcca8", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "from PIL import Image\n", + "import requests\n", + " \n", + "from transformers import CLIPProcessor\n", + "import onnxruntime as ort\n", + "import numpy as np\n", + "import torch\n", + " \n", + "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\", use_fast=False)\n", + " \n", + "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + " \n", + "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n", + " images=image, return_tensors=\"np\", padding=\"max_length\",\n", + " max_length= 77, truncation=True)\n", + " \n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " \n", + "opts = ort.SessionOptions()\n", + " \n", + "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "assert opts.has_providers()\n", + "\n", + "# options = ort.SessionOptions()\n", + "session = ort.InferenceSession(onnx_model_path,\n", + " sess_options=opts,\n", + " # providers=[ExecutionProvider],\n", + " # provider_options=[provider_options]\n", + ")\n", + "logits_per_image = session.run([\"logits_per_image\"],\n", + " {\n", + " \"input_ids\": inputs['input_ids'].astype(np.int64),\n", + " \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n", + " \"pixel_values\": inputs['pixel_values']\n", + " })\n", + " \n", + "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n", + "print(\"Label probs:\", probs)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn.json b/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn.json new file mode 100644 index 00000000..469e1cfe --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn.json @@ -0,0 +1,193 @@ +{ + "input_model": { + "type": "PytorchModel", + "model_path": "openai/clip-vit-base-patch32", + "generative": false, + "io_config": { + "input_names": [ + "input_ids", + "attention_mask" + ], + "input_shapes": [ + [ + 1, + 77 + ], + [ + 1, + 77 + ] + ], + "input_types": [ + "int32", + "int32" + ], + "output_names": [ + "embeds", + "last_hidden_state" + ] + }, + "model_loader": "load_text_encoder", + "model_script": "clip_script.py" + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "QNNExecutionProvider" + ] + } + ] + } + }, + "host": "host_system", + "target": "host_system", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "log_to_file": false, + "data_configs": [ + { + "name": "calib_data", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "nlphuji/flickr30k", + "split": "test" + }, + "pre_process_data_config": { + "type": "pre_process_dataset", + "model_name": "openai/clip-vit-base-patch32", + "caption_col": "caption", + "max_length": 77, + "max_samples": 12 + }, + "dataloader_config": { + "batch_size": 1 + }, + "user_script": "clip_script.py" + }, + { + "name": "eval_data", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "nlphuji/flickr30k", + "split": "test" + }, + "pre_process_data_config": { + "type": "pre_process_dataset", + "model_name": "openai/clip-vit-base-patch32", + "generate_ground_truth": true, + "caption_col": "caption", + "max_length": 77, + "max_samples": 100 + }, + "post_process_data_config": { + "type": "embed_post_process" + }, + "dataloader_config": { + "batch_size": 1 + }, + "user_script": "clip_script.py" + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "degrad", + "type": "custom", + "data_config": "eval_data", + "sub_types": [ + { + "name": "percentage", + "priority": 1, + "higher_is_better": false + } + ], + "user_config": { + "user_script": "clip_script.py", + "metric_func": "eval_similarity_degrad" + } + }, + { + "name": "latency", + "type": "latency", + "sub_types": [ + { + "name": "avg", + "priority": 2, + "metric_config": { + "warmup_num": 20, + "repeat_test_num": 100 + } + }, + { + "name": "p90", + "metric_config": { + "warmup_num": 20, + "repeat_test_num": 100 + } + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "dynamic": true, + "use_dynamo_exporter": false, + "save_as_external_data": true + }, + "to_fixed_shape": { + "type": "DynamicToFixedShape", + "dim_param": [ + "batch_size", + "sequence_length" + ], + "dim_value": [ + 1, + 77 + ] + }, + "surgery": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "ReplaceAttentionMaskValue", + "replacement": -100.0 + }, + { + "surgeon": "MatMulAddToGemm" + } + ] + }, + "transformer_optimizer": { + "type": "OrtTransformersOptimization", + "model_type": "bert", + "opt_level": 1, + "optimization_options": { + "enable_gelu": false, + "enable_bias_gelu": false, + "enable_layer_norm": true, + "enable_skip_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_attention": false + }, + "save_as_external_data": true + }, + "quantization": { + "type": "OnnxStaticQuantization", + "data_config": "calib_data", + "quant_preprocess": true, + "activation_type": "uint16", + "precision": "uint8", + "save_as_external_data": true + } + }, + "cache_dir": "cache", + "output_dir": "model/clip_text" +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn.json.config b/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn.json.config new file mode 100644 index 00000000..5d7b93e7 --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn.json.config @@ -0,0 +1,235 @@ +{ + "name": "Convert Text Model to Qualcomm NPU", + "oliveFile": "clip/qdq/openai_clip_text_b32_qdq.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Qualcomm NPU", + "CPU" + ], + "path": "systems.host_system.accelerators.0.execution_providers.0", + "values": [ + "QNNExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.quantization.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.quantization.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.quantization.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.quantization.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "test" + ], + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.quantization", + "actions": [ + [], + [ + { + "type": "update", + "path": "passes", + "value": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "dynamic": true, + "use_dynamo_exporter": false, + "save_as_external_data": true + } + } + } + ] + ] + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.split", + "values": [ + "test" + ], + "template": { + "path": "data_configs[1].load_dataset_config.split", + "values": [ + "test" + ], + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[1].pre_process_data_config.max_samples", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb new file mode 100644 index 00000000..d5cfcda2 --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb @@ -0,0 +1,141 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "43751a72", + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"QNNExecutionProvider\"" + ] + }, + { + "cell_type": "markdown", + "id": "897ffb42-3569-4d78-b99d-355a38fdce35", + "metadata": {}, + "source": [ + "### Data Processor" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa8d84cd-4853-4746-bce3-b281bfc23d8b", + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import CLIPProcessor\n", + "\n", + "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\")" + ] + }, + { + "cell_type": "markdown", + "id": "5568eb71-5812-4c74-989c-c12271d33b12", + "metadata": {}, + "source": [ + "### Model Inference with ORT-QNN" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02bad4ec-f477-4659-8584-00735f6ed5a9", + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime as ort\n", + "import torch\n", + "import numpy as np\n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + "\n", + "\n", + "session_options = ort.SessionOptions()\n", + "\n", + "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "\n", + "text_model = ort.InferenceSession(\n", + " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " sess_options=session_options,\n", + ")\n", + "\n", + "def get_text_embedding(text):\n", + " inputs = processor(\n", + " text=text,\n", + " padding=\"max_length\",\n", + " max_length=77,#text_model.sequence_length,\n", + " truncation=True,\n", + " add_special_tokens=True,\n", + " return_tensors=\"np\",\n", + " )\n", + " output = text_model.run(None, {\n", + " \"input_ids\": inputs[\"input_ids\"].astype(np.int32),\n", + " \"attention_mask\": inputs[\"attention_mask\"].astype(np.int32),\n", + " })\n", + " return torch.from_numpy(output[0])\n", + "\n", + "def calculate_score(emb_1, emb_2):\n", + " emb_1 /= torch.norm(emb_1, dim=-1, keepdim=True)\n", + " emb_2 /= torch.norm(emb_2, dim=-1, keepdim=True)\n", + " return torch.matmul(emb_1, emb_2.T) * 100.0\n", + "\n", + "# Get source embedding and calculate the similarity score for each target\n", + "# We need to process one by one because to static quantization, we fixed the batch size to 1\n", + "def ask(source, targets):\n", + " source_emb = get_text_embedding(source)\n", + " scores = []\n", + " for i, target in enumerate(targets):\n", + " target_emb = get_text_embedding(target)\n", + " score = calculate_score(source_emb, target_emb)\n", + " print(f\"Similarity score of sentence {i}:{score.item()}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "3477e36c-2e72-432b-ae81-602073a3754c", + "metadata": {}, + "source": [ + "### Play with Samples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8cdc2a6-4c81-4f93-8426-065ee4c2b013", + "metadata": {}, + "outputs": [], + "source": [ + "ask(\"a photo containing two cats\", [\"a photo of tshirt\", \"a photo of two cats\"])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json b/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json new file mode 100644 index 00000000..f6cd9515 --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json @@ -0,0 +1,173 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "openai/clip-vit-base-patch32", + "task": "zero-shot-image-classification", + "load_kwargs": { + "attn_implementation": "eager" + }, + "io_config": { + "input_names": [ + "input_ids", + "pixel_values", + "attention_mask" + ], + "input_shapes": [ + [ + 10, + 77 + ], + [ + 1, + 3, + 224, + 224 + ], + [ + 10, + 77 + ] + ], + "input_types": [ + "int64", + "float32", + "int64" + ], + "output_names": [ + "logits_per_image" + ], + "output_shapes": [ + [ + 1, + 2 + ] + ] + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "NvTensorRTRTXExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quant_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "openai/clip-vit-base-patch32", + "dataset_name": "nlphuji/flickr30k", + "start": 0, + "end": 10 + }, + "dataloader_config": { + "type": "no_auto_batch_dataloader" + } + }, + { + "name": "metric_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "openai/clip-vit-base-patch32", + "dataset_name": "nlphuji/flickr30k", + "start": 10, + "end": 20 + }, + "dataloader_config": { + "type": "no_auto_batch_dataloader" + }, + "post_process_data_config": { + "type": "clip_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "backend": "huggingface_metrics", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "accuracy", + "priority": 1, + "goal": { + "type": "max-degradation", + "value": 0.05 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg", + "goal": { + "type": "percent-min-improvement", + "value": 0.1 + } + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + }, + { + "name": "throughput", + "type": "throughput", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg" + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + }, + "onnx_float_to_float16": { + "type": "OnnxFloatToFloat16", + "save_as_external_data": true + }, + "session_params_tuning": { + "type": "OrtSessionParamsTuning", + "io_bind": false, + "data_config": "quant_data_config" + } + }, + "host": "local_system", + "target": "local_system", + "evaluator": "common_evaluator", + "cache_dir": "cache", + "output_dir": "model/clip-vit-base-patch16", + "evaluate_input_model": false +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json.config b/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json.config new file mode 100644 index 00000000..6b98187a --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json.config @@ -0,0 +1,86 @@ +{ + "name": "Convert to NVIDIA TRT for RTX", + "oliveFile": "clip/openai_clip-vit-base-patch32_trtrtx.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "NVIDIA TensorRT for RTX", + "CPU" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "NvTensorRTRTXExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].load_dataset_config.end", + "template": { + "path": "data_configs[1].load_dataset_config.end", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx_inference_sample.ipynb new file mode 100644 index 00000000..ee2b42fd --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx_inference_sample.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aeb33f1a", + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"NvTensorRTRTXExecutionProvider\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "307fcca8", + "metadata": {}, + "outputs": [], + "source": [ + "from PIL import Image\n", + "import requests\n", + " \n", + "from transformers import CLIPProcessor\n", + "import onnxruntime as ort\n", + "import numpy as np\n", + "import torch\n", + " \n", + "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\", use_fast=False)\n", + " \n", + "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + " \n", + "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n", + " images=image, return_tensors=\"np\", padding=\"max_length\",\n", + " max_length= 77, truncation=True)\n", + " \n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " \n", + "opts = ort.SessionOptions()\n", + " \n", + "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n", + "assert opts.has_providers()\n", + "\n", + "# options = ort.SessionOptions()\n", + "session = ort.InferenceSession(onnx_model_path,\n", + " sess_options=opts,\n", + " # providers=[ExecutionProvider],\n", + " # provider_options=[provider_options]\n", + ")\n", + "logits_per_image = session.run([\"logits_per_image\"],\n", + " {\n", + " \"input_ids\": inputs['input_ids'].astype(np.int64),\n", + " \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n", + " \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n", + " })\n", + " \n", + "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n", + "print(\"Label probs:\", probs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "winml", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn.json b/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn.json new file mode 100644 index 00000000..a12522a0 --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn.json @@ -0,0 +1,186 @@ +{ + "input_model": { + "type": "PytorchModel", + "model_path": "openai/clip-vit-base-patch32", + "generative": false, + "io_config": { + "input_names": [ + "pixel_values" + ], + "input_shapes": [ + [ + 1, + 3, + 224, + 224 + ] + ], + "output_names": [ + "embeds" + ] + }, + "model_loader": "load_image_encoder", + "model_script": "clip_script.py" + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "QNNExecutionProvider" + ] + } + ] + } + }, + "host": "host_system", + "target": "host_system", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "log_to_file": false, + "data_configs": [ + { + "name": "calib_data", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "test" + }, + "pre_process_data_config": { + "type": "pre_process_dataset", + "model_name": "openai/clip-vit-base-patch32", + "image_col": "image", + "max_samples": 12 + }, + "dataloader_config": { + "batch_size": 1 + }, + "user_script": "clip_script.py" + }, + { + "name": "eval_data", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "test" + }, + "pre_process_data_config": { + "type": "pre_process_dataset", + "model_name": "openai/clip-vit-base-patch32", + "generate_ground_truth": true, + "image_col": "image", + "max_samples": 100 + }, + "post_process_data_config": { + "type": "embed_post_process" + }, + "dataloader_config": { + "batch_size": 1 + }, + "user_script": "clip_script.py" + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "degrad", + "type": "custom", + "data_config": "eval_data", + "sub_types": [ + { + "name": "percentage", + "priority": 1, + "higher_is_better": false + } + ], + "user_config": { + "user_script": "clip_script.py", + "metric_func": "eval_similarity_degrad", + "metric_func_kwargs": { + "batch_size": 32 + } + } + }, + { + "name": "latency", + "type": "latency", + "sub_types": [ + { + "name": "avg", + "priority": 2, + "metric_config": { + "warmup_num": 20, + "repeat_test_num": 100 + } + }, + { + "name": "p90", + "metric_config": { + "warmup_num": 20, + "repeat_test_num": 100 + } + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "dynamic": true, + "use_dynamo_exporter": false, + "save_as_external_data": true + }, + "to_fixed_shape": { + "type": "DynamicToFixedShape", + "dim_param": [ + "batch_size", + "num_channels", + "height", + "width" + ], + "dim_value": [ + 1, + 3, + 224, + 224 + ] + }, + "surgery": { + "type": "GraphSurgeries", + "surgeries": [ + { + "surgeon": "MatMulAddToGemm" + } + ] + }, + "transformer_optimizer": { + "type": "OrtTransformersOptimization", + "model_type": "vit", + "opt_level": 1, + "optimization_options": { + "enable_gelu": false, + "enable_bias_gelu": false, + "enable_layer_norm": true, + "enable_skip_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_attention": false + }, + "save_as_external_data": true + }, + "quantization": { + "type": "OnnxStaticQuantization", + "data_config": "calib_data", + "quant_preprocess": true, + "activation_type": "uint16", + "precision": "uint8", + "save_as_external_data": true + } + }, + "cache_dir": "cache", + "output_dir": "model/clip_vision" +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn.json.config b/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn.json.config new file mode 100644 index 00000000..66db16db --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn.json.config @@ -0,0 +1,237 @@ +{ + "name": "Convert Vision Model to Qualcomm NPU", + "oliveFile": "clip/qdq/openai_clip_vision_b32_qdq.json", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "Qualcomm NPU", + "CPU" + ], + "path": "systems.host_system.accelerators.0.execution_providers.0", + "values": [ + "QNNExecutionProvider", + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.quantization.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.quantization.activation_type", + "template": "ActivationType" + } + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "path": "passes.quantization.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ], + "template": { + "path": "passes.quantization.precision", + "template": "WeightType" + } + }, + { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "nlphuji/flickr30k" + ], + "template": "QuantizationDataset" + } + }, + { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "QuantizationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.quantization", + "actions": [ + [], + [ + { + "type": "update", + "path": "passes", + "value": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "dynamic": true, + "use_dynamo_exporter": false, + "save_as_external_data": true + } + } + } + ] + ] + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[1].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[1].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[1].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[1].pre_process_data_config.max_samples", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb new file mode 100644 index 00000000..816976a0 --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb @@ -0,0 +1,170 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "3c18a7d6", + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "\n", + "ExecutionProvider=\"QNNExecutionProvider\"" + ] + }, + { + "cell_type": "markdown", + "id": "897ffb42-3569-4d78-b99d-355a38fdce35", + "metadata": {}, + "source": [ + "### Data Processor" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa8d84cd-4853-4746-bce3-b281bfc23d8b", + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import CLIPProcessor\n", + "\n", + "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\")" + ] + }, + { + "cell_type": "markdown", + "id": "5568eb71-5812-4c74-989c-c12271d33b12", + "metadata": {}, + "source": [ + "### Model Inference with ORT-QNN" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02bad4ec-f477-4659-8584-00735f6ed5a9", + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime as ort\n", + "import torch\n", + "import numpy as np\n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + "\n", + "\n", + "session_options = ort.SessionOptions()\n", + "\n", + "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "\n", + "vision_model = ort.InferenceSession(\n", + " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " sess_options=session_options,\n", + ")\n", + "\n", + "def get_image_embedding(image):\n", + " inputs = processor(images=image, return_tensors=\"np\")\n", + " output = vision_model.run(None, { \"pixel_values\": inputs[\"pixel_values\"] })\n", + " return torch.from_numpy(output[0])\n", + "\n", + "def calculate_score(emb_1, emb_2):\n", + " emb_1 /= torch.norm(emb_1, dim=-1, keepdim=True)\n", + " emb_2 /= torch.norm(emb_2, dim=-1, keepdim=True)\n", + " return torch.matmul(emb_1, emb_2.T) * 100.0\n", + "\n", + "# Get source embedding and calculate the similarity score for each target\n", + "# We need to process one by one because to static quantization, we fixed the batch size to 1\n", + "def ask(source, targets):\n", + " source_emb = get_image_embedding(source)\n", + " for i, target in enumerate(targets):\n", + " target_emb = get_image_embedding(target)\n", + " score = calculate_score(source_emb, target_emb)\n", + " print(f\"Similarity score of image {i}:{score.item()}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "3477e36c-2e72-432b-ae81-602073a3754c", + "metadata": {}, + "source": [ + "### Play with Samples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16868fbd-e447-4866-af7d-eb6e49975bcc", + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from PIL import Image\n", + "\n", + "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + "image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07076b9a", + "metadata": {}, + "outputs": [], + "source": [ + "url = \"http://images.cocodataset.org/train2017/000000208833.jpg\"\n", + "image1 = Image.open(requests.get(url, stream=True).raw)\n", + "image1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c10de7cd", + "metadata": {}, + "outputs": [], + "source": [ + "url = \"http://images.cocodataset.org/train2017/000000125690.jpg\"\n", + "image2 = Image.open(requests.get(url, stream=True).raw)\n", + "image2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8cdc2a6-4c81-4f93-8426-065ee4c2b013", + "metadata": {}, + "outputs": [], + "source": [ + "ask(image, [image1, image2])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/openai-clip-vit-base-patch32/aitk/requirements.txt b/openai-clip-vit-base-patch32/aitk/requirements.txt new file mode 100644 index 00000000..0cddd58d --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/requirements.txt @@ -0,0 +1,5 @@ +olive-ai +cachetools==5.5.0 +nltk>=3.9.1 +accelerate>=1.4.0 +pillow>=10.0.1 diff --git a/openai-clip-vit-base-patch32/aitk/user_script.py b/openai-clip-vit-base-patch32/aitk/user_script.py new file mode 100644 index 00000000..2d0051f0 --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/user_script.py @@ -0,0 +1,64 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import numpy as np +import torch +from datasets import load_dataset +from torch.utils.data import Dataset +from transformers import CLIPProcessor + +from olive.data.registry import Registry + + +class CLIPDataset(Dataset): + def __init__( + self, + model_name, + dataset_name, + start=0, + end=500, + image_size=(224, 224), + ): + assert 0 <= start < end + self.start = start + self.end = end + self.model_name = model_name + self.dataset_name = dataset_name + self.processor = CLIPProcessor.from_pretrained(self.model_name) + self.length = self.end - self.start + self.image_size = image_size + self.dataset = load_dataset(self.dataset_name, split=f"test[{0}:{self.end + 10}]") + + def __len__(self): + return self.length + + def __getitem__(self, idx): + text_inputs = self.processor( + text=[" ".join(item) for item in self.dataset[idx : idx + 10]["caption"]], + return_tensors="np", + padding="max_length", + truncation=True, + ) + + image_input = self.processor(images=self.dataset[idx]["image"].resize(self.image_size), return_tensors="np") + model_inputs = [ + { + "input_ids": text_inputs["input_ids"].astype(np.int64), + "pixel_values": image_input["pixel_values"], + "attention_mask": text_inputs["attention_mask"].astype(np.int64), + } + ] + + target = torch.Tensor([0]).to(torch.int32) + return model_inputs[0], target + + +@Registry.register_dataset() +def clip_dataset(**kwargs): + return CLIPDataset(**kwargs) + + +@Registry.register_post_process() +def clip_post_process(output): + return output["logits_per_image"].argmax(axis=-1) From 9390c49ccc9a25acad5825663beb58747284658b Mon Sep 17 00:00:00 2001 From: hualxie Date: Fri, 25 Jul 2025 17:20:00 +0800 Subject: [PATCH 03/15] fixes --- Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml | 2 +- .../LICENSE | 51 ------------------- .../README.md | 3 -- .../aitk/model_project.config | 2 +- .../aitk/model_project.config | 2 +- microsoft-Phi-3.5-mini-instruct/aitk/info.yml | 2 +- 6 files changed, 4 insertions(+), 58 deletions(-) delete mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/LICENSE delete mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/README.md diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml index b5c32c66..8e284e83 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml @@ -1,6 +1,6 @@ keywords: aitk -arch: deepseek +arch: qwen2 recipes: - file: "qwen2_5_qnn_config.json" device: npu diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/LICENSE b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/LICENSE deleted file mode 100644 index 79dde0ac..00000000 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/LICENSE +++ /dev/null @@ -1,51 +0,0 @@ ---- -title: MIT License -spdx-id: MIT -featured: true -hidden: false - -description: A short and simple permissive license with conditions only requiring preservation of copyright and license notices. Licensed works, modifications, and larger works may be distributed under different terms and without source code. - -how: Create a text file (typically named LICENSE or LICENSE.txt) in the root of your source code and copy the text of the license into the file. Replace [year] with the current year and [fullname] with the name (or names) of the copyright holders. - -using: - Babel: https://github.com/babel/babel/blob/master/LICENSE - .NET: https://github.com/dotnet/runtime/blob/main/LICENSE.TXT - Rails: https://github.com/rails/rails/blob/master/MIT-LICENSE - -permissions: - - commercial-use - - modifications - - distribution - - private-use - -conditions: - - include-copyright - -limitations: - - liability - - warranty - ---- - -MIT License - -Copyright (c) [year] [fullname] - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/README.md b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/README.md deleted file mode 100644 index e4f44474..00000000 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# DeepSeek Optimization - -This folder contains examples of DeepSeek optimization using different workflows. diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config index fa0d2dac..dab152a5 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config @@ -21,4 +21,4 @@ "id": "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "version": 1 } -} \ No newline at end of file +} diff --git a/intel-bert-base-uncased-mrpc/aitk/model_project.config b/intel-bert-base-uncased-mrpc/aitk/model_project.config index a3df90e4..ca302634 100644 --- a/intel-bert-base-uncased-mrpc/aitk/model_project.config +++ b/intel-bert-base-uncased-mrpc/aitk/model_project.config @@ -25,4 +25,4 @@ "id": "huggingface/Intel/bert-base-uncased-mrpc", "version": 1 } -} \ No newline at end of file +} diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml index 68a6970d..d0332445 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml +++ b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml @@ -1,6 +1,6 @@ keywords: aitk -arch: llama +arch: phi recipes: - file: "phi3_5_qnn_config.json" device: npu From bcf96dc01a6fff871a335827d920f665b72cb1a3 Mon Sep 17 00:00:00 2001 From: hualxie Date: Mon, 28 Jul 2025 10:08:37 +0800 Subject: [PATCH 04/15] add names --- intel-bert-base-uncased-mrpc/LICENSE | 237 ------------------ .../aitk/info.yml | 2 + openai-clip-vit-base-patch16/aitk/info.yml | 2 + openai-clip-vit-base-patch32/aitk/info.yml | 2 + 4 files changed, 6 insertions(+), 237 deletions(-) delete mode 100644 intel-bert-base-uncased-mrpc/LICENSE diff --git a/intel-bert-base-uncased-mrpc/LICENSE b/intel-bert-base-uncased-mrpc/LICENSE deleted file mode 100644 index 9b2c5698..00000000 --- a/intel-bert-base-uncased-mrpc/LICENSE +++ /dev/null @@ -1,237 +0,0 @@ ---- -title: Apache License 2.0 -spdx-id: Apache-2.0 -redirect_from: /licenses/apache/ -featured: true -hidden: false - -description: A permissive license whose main conditions require preservation of copyright and license notices. Contributors provide an express grant of patent rights. Licensed works, modifications, and larger works may be distributed under different terms and without source code. - -how: Create a text file (typically named LICENSE or LICENSE.txt) in the root of your source code and copy the text of the license into the file. - -note: The Apache Software Foundation recommends taking the additional step of adding a boilerplate notice to the header of each source file. You can find the notice in the appendix at the very end of the license text. - -using: - Kubernetes: https://github.com/kubernetes/kubernetes/blob/master/LICENSE - PDF.js: https://github.com/mozilla/pdf.js/blob/master/LICENSE - Swift: https://github.com/apple/swift/blob/main/LICENSE.txt - -permissions: - - commercial-use - - modifications - - distribution - - patent-use - - private-use - -conditions: - - include-copyright - - document-changes - -limitations: - - trademark-use - - liability - - warranty - ---- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml index 337d5a41..4ed75a0c 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml @@ -5,9 +5,11 @@ recipes: - file: "laion_clip_text_qnn.json" device: npu ep: QNNExecutionProvider + name: "Convert Text Model to Qualcomm NPU" - file: "laion_clip_vision_qnn.json" device: npu ep: QNNExecutionProvider + name: "Convert Vision Model to Qualcomm NPU" - file: "laion_clip_qdq_amd.json" device: npu ep: VitisAIExecutionProvider diff --git a/openai-clip-vit-base-patch16/aitk/info.yml b/openai-clip-vit-base-patch16/aitk/info.yml index 9773cb6b..a7a3ec1d 100644 --- a/openai-clip-vit-base-patch16/aitk/info.yml +++ b/openai-clip-vit-base-patch16/aitk/info.yml @@ -5,9 +5,11 @@ recipes: - file: "openai_clip_text_qnn.json" device: npu ep: QNNExecutionProvider + name: "Convert Text Model to Qualcomm NPU" - file: "openai_clip_vision_qnn.json" device: npu ep: QNNExecutionProvider + name: "Convert Vision Model to Qualcomm NPU" - file: "openai_clip_qdq_amd.json" device: npu ep: VitisAIExecutionProvider diff --git a/openai-clip-vit-base-patch32/aitk/info.yml b/openai-clip-vit-base-patch32/aitk/info.yml index d908ab0d..515d5076 100644 --- a/openai-clip-vit-base-patch32/aitk/info.yml +++ b/openai-clip-vit-base-patch32/aitk/info.yml @@ -5,9 +5,11 @@ recipes: - file: "openai_clip_text_qnn.json" device: npu ep: QNNExecutionProvider + name: "Convert Text Model to Qualcomm NPU" - file: "openai_clip_vision_qnn.json" device: npu ep: QNNExecutionProvider + name: "Convert Vision Model to Qualcomm NPU" - file: "openai_clip_qdq_amd.json" device: npu ep: VitisAIExecutionProvider From 12eb15cb01f8660d140de87612bff8c7fda66fb1 Mon Sep 17 00:00:00 2001 From: hualxie Date: Mon, 28 Jul 2025 15:00:27 +0800 Subject: [PATCH 05/15] add all sanitize --- .aitk/configs/checks.json | 11 + .aitk/configs/gitignore.md | 5 + .aitk/configs/model_list.json | 279 ++++++++ .aitk/configs/parameter_template.json | 142 +++++ .../extensions/llm_evaluator/README.md | 3 + .../extensions/llm_evaluator/evaluator.json | 33 + .../llm_evaluator/evaluator.json.config | 66 ++ .../llm_evaluator/model_project.config | 12 + .../templates/empty/.gitignore | 5 + .../templates/empty/README.md | 33 + .../templates/empty/_copy.json.config | 10 + .../templates/empty/inference_sample.ipynb | 23 + .../templates/empty/model_project.config | 15 + .../templates/empty/requirements.txt | 1 + .../templates/empty/sample.custom.config | 124 ++++ .../templates/empty/sample.json | 110 ++++ .../templates/empty/sample.json.config | 124 ++++ .aitk/scripts/auto_formatter.py | 212 +++++++ .aitk/scripts/model_lab/__init__.py | 25 + .aitk/scripts/project_scanner.py | 0 .aitk/scripts/requirements.txt | 3 + .aitk/scripts/sanitize.py | 50 ++ .aitk/scripts/sanitize/README.md | 111 ++++ .aitk/scripts/sanitize/__init__.py | 3 + .aitk/scripts/sanitize/base.py | 31 + .aitk/scripts/sanitize/constants.py | 145 +++++ .aitk/scripts/sanitize/copy_config.py | 77 +++ .aitk/scripts/sanitize/file_validation.py | 201 ++++++ .aitk/scripts/sanitize/main.py | 199 ++++++ .aitk/scripts/sanitize/model_info.py | 93 +++ .aitk/scripts/sanitize/model_parameter.py | 600 ++++++++++++++++++ .aitk/scripts/sanitize/parameters.py | 266 ++++++++ .aitk/scripts/sanitize/project_config.py | 77 +++ .aitk/scripts/sanitize/utils.py | 188 ++++++ 34 files changed, 3277 insertions(+) create mode 100644 .aitk/configs/checks.json create mode 100644 .aitk/configs/gitignore.md create mode 100644 .aitk/configs/model_list.json create mode 100644 .aitk/configs/parameter_template.json create mode 100644 .aitk/non_model_projects/extensions/llm_evaluator/README.md create mode 100644 .aitk/non_model_projects/extensions/llm_evaluator/evaluator.json create mode 100644 .aitk/non_model_projects/extensions/llm_evaluator/evaluator.json.config create mode 100644 .aitk/non_model_projects/extensions/llm_evaluator/model_project.config create mode 100644 .aitk/non_model_projects/templates/empty/.gitignore create mode 100644 .aitk/non_model_projects/templates/empty/README.md create mode 100644 .aitk/non_model_projects/templates/empty/_copy.json.config create mode 100644 .aitk/non_model_projects/templates/empty/inference_sample.ipynb create mode 100644 .aitk/non_model_projects/templates/empty/model_project.config create mode 100644 .aitk/non_model_projects/templates/empty/requirements.txt create mode 100644 .aitk/non_model_projects/templates/empty/sample.custom.config create mode 100644 .aitk/non_model_projects/templates/empty/sample.json create mode 100644 .aitk/non_model_projects/templates/empty/sample.json.config create mode 100644 .aitk/scripts/auto_formatter.py create mode 100644 .aitk/scripts/model_lab/__init__.py create mode 100644 .aitk/scripts/project_scanner.py create mode 100644 .aitk/scripts/requirements.txt create mode 100644 .aitk/scripts/sanitize.py create mode 100644 .aitk/scripts/sanitize/README.md create mode 100644 .aitk/scripts/sanitize/__init__.py create mode 100644 .aitk/scripts/sanitize/base.py create mode 100644 .aitk/scripts/sanitize/constants.py create mode 100644 .aitk/scripts/sanitize/copy_config.py create mode 100644 .aitk/scripts/sanitize/file_validation.py create mode 100644 .aitk/scripts/sanitize/main.py create mode 100644 .aitk/scripts/sanitize/model_info.py create mode 100644 .aitk/scripts/sanitize/model_parameter.py create mode 100644 .aitk/scripts/sanitize/parameters.py create mode 100644 .aitk/scripts/sanitize/project_config.py create mode 100644 .aitk/scripts/sanitize/utils.py diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json new file mode 100644 index 00000000..100deb54 --- /dev/null +++ b/.aitk/configs/checks.json @@ -0,0 +1,11 @@ +{ + "configCheck": 58, + "extensionCheck": 1, + "gitignoreCheck": 14, + "inferenceModelCheck": 6, + "ipynbCheck": 33, + "modelProjectCheck": 15, + "oliveCheck": 0, + "oliveJsonCheck": 58, + "pathCheck": 652 +} diff --git a/.aitk/configs/gitignore.md b/.aitk/configs/gitignore.md new file mode 100644 index 00000000..48c03882 --- /dev/null +++ b/.aitk/configs/gitignore.md @@ -0,0 +1,5 @@ +__pycache__ +/cache +/history/*/* +!/history/*/history.config +!/history/*/olive_config.json diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json new file mode 100644 index 00000000..69de61d4 --- /dev/null +++ b/.aitk/configs/model_list.json @@ -0,0 +1,279 @@ +{ + "models": [ + { + "displayName": "Intel/bert-base-uncased-mrpc", + "icon": "intel", + "modelLink": "https://huggingface.co/Intel/bert-base-uncased-mrpc", + "id": "huggingface/Intel/bert-base-uncased-mrpc", + "runtimes": [ + "QNN", + "AMDNPU", + "NvidiaTRTRTX", + "IntelCPU", + "IntelGPU", + "IntelNPU" + ], + "architecture": "Transformer", + "status": "Ready", + "version": 1 + }, + { + "displayName": "google-bert/bert-base-multilingual-cased", + "icon": "gemini", + "modelLink": "https://huggingface.co/google-bert/bert-base-multilingual-cased", + "id": "huggingface/google-bert/bert-base-multilingual-cased", + "runtimes": [ + "QNN", + "AMDNPU", + "NvidiaTRTRTX", + "IntelCPU", + "IntelGPU", + "IntelNPU" + ], + "architecture": "Transformer", + "status": "Ready", + "version": 1 + }, + { + "displayName": "openai/clip-vit-base-patch32", + "icon": "OpenAI", + "modelLink": "https://huggingface.co/openai/clip-vit-base-patch32", + "id": "huggingface/openai/clip-vit-base-patch32", + "runtimes": [ + "QNN", + "AMDNPU", + "NvidiaTRTRTX", + "IntelCPU", + "IntelGPU", + "IntelNPU" + ], + "architecture": "Transformer", + "status": "Ready", + "version": 1 + }, + { + "displayName": "openai/clip-vit-base-patch16", + "icon": "OpenAI", + "modelLink": "https://huggingface.co/openai/clip-vit-base-patch16", + "id": "huggingface/openai/clip-vit-base-patch16", + "runtimes": [ + "QNN", + "AMDNPU", + "NvidiaTRTRTX", + "IntelCPU", + "IntelGPU", + "IntelNPU" + ], + "architecture": "Transformer", + "status": "Ready", + "version": 1 + }, + { + "displayName": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "icon": "laion", + "modelLink": "https://huggingface.co/laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "id": "huggingface/laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "runtimes": [ + "QNN", + "AMDNPU", + "NvidiaTRTRTX", + "IntelCPU", + "IntelGPU", + "IntelNPU" + ], + "architecture": "Transformer", + "status": "Ready", + "version": 1 + }, + { + "displayName": "Microsoft/ResNet-50", + "icon": "ms", + "modelLink": "https://huggingface.co/microsoft/resnet-50", + "id": "huggingface/microsoft/resnet-50", + "runtimes": [ + "QNN", + "AMDNPU", + "NvidiaTRTRTX", + "IntelCPU", + "IntelGPU", + "IntelNPU" + ], + "architecture": "CNN", + "status": "Ready", + "version": 1 + }, + { + "displayName": "google/vit-base-patch16-224", + "icon": "gemini", + "modelLink": "https://huggingface.co/google/vit-base-patch16-224", + "id": "huggingface/google/vit-base-patch16-224", + "runtimes": [ + "QNN", + "AMDNPU", + "NvidiaTRTRTX", + "IntelCPU", + "IntelGPU", + "IntelNPU" + ], + "architecture": "Transformer", + "status": "Ready", + "version": 1 + }, + { + "displayName": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "icon": "DeepSeek", + "modelLink": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "id": "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "runtimes": [ + "QNN", + "AMDNPU", + "IntelCPU", + "IntelGPU", + "IntelNPU" + ], + "architecture": "Transformer", + "status": "Ready", + "version": 1 + }, + { + "displayName": "microsoft/Phi-3.5-mini-instruct", + "icon": "ms", + "modelLink": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct", + "id": "huggingface/microsoft/Phi-3.5-mini-instruct", + "runtimes": [ + "QNN", + "AMDNPU", + "IntelCPU", + "IntelGPU", + "IntelNPU" + ], + "architecture": "Transformer", + "status": "Ready", + "version": 1 + }, + { + "displayName": "microsoft/Phi-4-mini-reasoning", + "icon": "ms", + "modelLink": "https://huggingface.co/microsoft/Phi-4-mini-reasoning", + "id": "huggingface/microsoft/Phi-4-mini-reasoning", + "runtimes": [ + "IntelCPU", + "IntelGPU", + "IntelNPU" + ], + "architecture": "Transformer", + "status": "Ready", + "version": 1 + }, + { + "displayName": "Qwen/Qwen2.5-1.5B-Instruct", + "icon": "qwen", + "modelLink": "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct", + "id": "huggingface/Qwen/Qwen2.5-1.5B-Instruct", + "runtimes": [ + "QNN", + "AMDNPU", + "IntelCPU", + "IntelGPU", + "IntelNPU" + ], + "architecture": "Transformer", + "status": "Ready", + "version": 1 + }, + { + "displayName": "meta-llama/Llama-3.2-1B-Instruct", + "icon": "meta", + "modelLink": "https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct", + "id": "huggingface/meta-llama/Llama-3.2-1B-Instruct", + "runtimes": [ + "QNN", + "AMDNPU", + "IntelCPU", + "IntelGPU", + "IntelNPU" + ], + "architecture": "Transformer", + "status": "Ready", + "version": 1 + }, + { + "displayName": "mistralai/Mistral-7B-Instruct-v0.3", + "icon": "mistralai", + "modelLink": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3", + "id": "huggingface/mistralai/Mistral-7B-Instruct-v0.3", + "runtimes": [ + "IntelGPU" + ], + "architecture": "Transformer", + "status": "Ready", + "version": 1 + } + ], + "template_models": [ + { + "displayName": "Model Project Template", + "discription": "Customize this to create your own model project", + "icon": "HuggingFace", + "modelLink": "https://huggingface.co/", + "id": "huggingface/empty", + "runtimes": [ + "CPU" + ], + "architecture": "Others", + "status": "Ready", + "version": 1 + }, + { + "displayName": "LLM Evaluator Template", + "icon": "HuggingFace", + "modelLink": "https://huggingface.co/", + "id": "extension/llm_evaluator", + "runtimes": [ + "CPU" + ], + "architecture": "Others", + "status": "Hide", + "version": 1, + "extension": true + } + ], + "HFDatasets": { + "imagenet-1k": "https://huggingface.co/datasets/ILSVRC/imagenet-1k", + "wikitext": "https://huggingface.co/datasets/Salesforce/wikitext", + "wikitext2": "https://huggingface.co/datasets/mindchain/wikitext2", + "facebook/xnli": "https://huggingface.co/datasets/facebook/xnli", + "glue": "https://huggingface.co/datasets/nyu-mll/glue", + "nlphuji/flickr30k": "https://huggingface.co/datasets/nlphuji/flickr30k", + "timm/mini-imagenet": "https://huggingface.co/datasets/timm/mini-imagenet", + "wikipedia": "https://huggingface.co/datasets/wikimedia/wikipedia", + "google-research-datasets/conceptual_captions": "https://huggingface.co/datasets/google-research-datasets/conceptual_captions" + }, + "LoginRequiredDatasets": [ + "imagenet-1k" + ], + "LoginRequiredModelIds": [ + "huggingface/meta-llama/Llama-3.2-1B-Instruct", + "huggingface/mistralai/Mistral-7B-Instruct-v0.3" + ], + "DatasetSplit": { + "nlphuji/flickr30k": [ + "test" + ], + "imagenet-1k": [ + "train", + "validation", + "test" + ], + "timm/mini-imagenet": [ + "train", + "validation", + "test" + ] + }, + "DatasetSubset": { + "nlphuji/flickr30k": [], + "imagenet-1k": [], + "timm/mini-imagenet": [] + } +} diff --git a/.aitk/configs/parameter_template.json b/.aitk/configs/parameter_template.json new file mode 100644 index 00000000..a04677b1 --- /dev/null +++ b/.aitk/configs/parameter_template.json @@ -0,0 +1,142 @@ +{ + "ActivationType": { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ] + }, + "WeightType": { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8", + "UInt8", + "Int16", + "UInt16" + ], + "displayType": "RadioGroup", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ] + }, + "ActivationTypeIntel": { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "description": "Quantization data type of activation. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8" + ], + "displayType": "RadioGroup", + "values": [ + "int8" + ] + }, + "WeightTypeIntel": { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "description": "Data type for quantizing weights. ‘Int8’ for signed 8-bit integer, ‘UInt8’ for unsigned 8-bit integer etc.", + "descriptionLink": "https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html", + "type": "enum", + "displayNames": [ + "Int8" + ], + "displayType": "RadioGroup", + "values": [ + "int8" + ] + }, + "EvaluationDataset": { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum" + }, + "EvaluationDatasetSize": { + "name": "Evaluation Dataset Size", + "type": "int" + }, + "EvaluationDatasetSplit": { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "values": [ + "train", + "validation", + "test" + ] + }, + "EvaluationDatasetSubset": { + "name": "Evaluation Dataset Subset", + "tags": [ + "EvaluationDatasetSubset", + "DependsOnDataset" + ], + "type": "enum" + }, + "QuantizationDataset": { + "name": "Quantization Dataset", + "tags": [ + "QuantizationDataset" + ], + "type": "enum" + }, + "QuantizationDatasetSize": { + "name": "Quantization Dataset Size", + "type": "int" + }, + "QuantizationDatasetSplit": { + "name": "Quantization Dataset Split", + "tags": [ + "QuantizationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "values": [ + "train", + "validation", + "test" + ] + }, + "QuantizationDatasetSubset": { + "name": "Quantization Dataset Subset", + "tags": [ + "QuantizationDatasetSubset", + "DependsOnDataset" + ], + "type": "enum" + } +} diff --git a/.aitk/non_model_projects/extensions/llm_evaluator/README.md b/.aitk/non_model_projects/extensions/llm_evaluator/README.md new file mode 100644 index 00000000..2c24451b --- /dev/null +++ b/.aitk/non_model_projects/extensions/llm_evaluator/README.md @@ -0,0 +1,3 @@ +The evaluator.json provided here is minimized to fit the sanitize. + +The main purpose is to provide the UX. diff --git a/.aitk/non_model_projects/extensions/llm_evaluator/evaluator.json b/.aitk/non_model_projects/extensions/llm_evaluator/evaluator.json new file mode 100644 index 00000000..c2f1c5fe --- /dev/null +++ b/.aitk/non_model_projects/extensions/llm_evaluator/evaluator.json @@ -0,0 +1,33 @@ +{ + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + } + }, + "evaluators": { + "modelLab_llm_evaluator": { + "enabled": true, + "prompt_length": 16, + "generation_length": 64 + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "save_as_external_data": true + } + }, + "host": "local_system", + "target": "local_system", + "evaluator": "modelLab_llm_evaluator", + "evaluate_input_model": false, + "output_dir": "model/nan", + "cache_dir": "cache" +} \ No newline at end of file diff --git a/.aitk/non_model_projects/extensions/llm_evaluator/evaluator.json.config b/.aitk/non_model_projects/extensions/llm_evaluator/evaluator.json.config new file mode 100644 index 00000000..b2a23869 --- /dev/null +++ b/.aitk/non_model_projects/extensions/llm_evaluator/evaluator.json.config @@ -0,0 +1,66 @@ +{ + "name": "LLM Evaluator", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "CPU" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Prompt Length", + "type": "enum", + "path": "evaluators.modelLab_llm_evaluator.prompt_length", + "values": [ + 16, + 64, + 256 + ], + "fallbackValue": 16 + }, + { + "name": "Evaluation Generation Length", + "type": "int", + "path": "evaluators.modelLab_llm_evaluator.generation_length", + "fallbackValue": 64 + } + ], + "disableToggleGeneration": true, + "toggle": { + "name": "Evaluate model performance", + "description": "Use a built-in evaluator instead of Olive evaluator", + "type": "bool", + "path": "evaluators.modelLab_llm_evaluator.enabled", + "fallbackValue": true + } + } + ] +} diff --git a/.aitk/non_model_projects/extensions/llm_evaluator/model_project.config b/.aitk/non_model_projects/extensions/llm_evaluator/model_project.config new file mode 100644 index 00000000..6394dbf9 --- /dev/null +++ b/.aitk/non_model_projects/extensions/llm_evaluator/model_project.config @@ -0,0 +1,12 @@ +{ + "workflows": [ + { + "file": "evaluator.json", + "templateName": "evaluator" + } + ], + "modelInfo": { + "id": "extension/llm_evaluator", + "version": 1 + } +} diff --git a/.aitk/non_model_projects/templates/empty/.gitignore b/.aitk/non_model_projects/templates/empty/.gitignore new file mode 100644 index 00000000..48c03882 --- /dev/null +++ b/.aitk/non_model_projects/templates/empty/.gitignore @@ -0,0 +1,5 @@ +__pycache__ +/cache +/history/*/* +!/history/*/history.config +!/history/*/olive_config.json diff --git a/.aitk/non_model_projects/templates/empty/README.md b/.aitk/non_model_projects/templates/empty/README.md new file mode 100644 index 00000000..78a217d1 --- /dev/null +++ b/.aitk/non_model_projects/templates/empty/README.md @@ -0,0 +1,33 @@ +# Sample Guide + +## Update sample.json + +To make the sample work, you need to fill in the following properties + +- MODEL_PATH: like Intel/bert-base-uncased-mrpc +- MODEL_TASK: like text-classification +- DS_NAME: like glue +- DS_SUBSET: like mrpc +- DS_SPLIT: like validation +- DATA_COLS: like [ "sentence1", "sentence2" ] +- FIXED_PARAMS: like [ "batch_size", "sequence_length" ] +- FIXED_VALUES: like [ 1, 128 ] + +You could also adjust other parameters to suit your need: + +- "execution_providers": [ "CPUExecutionProvider" ]: To other providers like QNNExecutionProvider. You need to run it on the matched device +- "max_length": 128 / "batch_size": 1: For static quantization, the input size should be fixed. Adjust these to match `FIXED_VALUES` +- "max_samples": 100: The number of samples used. + +## Update model_project.comfig (optional) + +Update `displayName` and `modelLink` to the one you used. + +## Update sample.custom.config (optional) + +This file is used to render the Run UX. +You could remove or add parameters match to your `sample.json`. Path update may be needed. + +## Update inference_sample.ipynb (optional) + +Write your own code to test the output model. diff --git a/.aitk/non_model_projects/templates/empty/_copy.json.config b/.aitk/non_model_projects/templates/empty/_copy.json.config new file mode 100644 index 00000000..0d86275f --- /dev/null +++ b/.aitk/non_model_projects/templates/empty/_copy.json.config @@ -0,0 +1,10 @@ +{ + "copies": [ + { + "src": "sample.json.config", + "dst": "sample.custom.config", + "replacements": [ + ] + } + ] +} \ No newline at end of file diff --git a/.aitk/non_model_projects/templates/empty/inference_sample.ipynb b/.aitk/non_model_projects/templates/empty/inference_sample.ipynb new file mode 100644 index 00000000..bc33b256 --- /dev/null +++ b/.aitk/non_model_projects/templates/empty/inference_sample.ipynb @@ -0,0 +1,23 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"CPUExecutionProvider\"\n", + "# Write your code to load the ONNX model and perform inference here.\n", + "import onnxruntime as ort" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/.aitk/non_model_projects/templates/empty/model_project.config b/.aitk/non_model_projects/templates/empty/model_project.config new file mode 100644 index 00000000..7c06e32e --- /dev/null +++ b/.aitk/non_model_projects/templates/empty/model_project.config @@ -0,0 +1,15 @@ +{ + "workflows": [ + { + "displayName": "Conversion Sample", + "file": "sample.json", + "templateName": "sample" + } + ], + "modelInfo": { + "id": "huggingface/empty", + "version": 1, + "displayName": "Model Project Template", + "modelLink": "https://huggingface.co/" + } +} diff --git a/.aitk/non_model_projects/templates/empty/requirements.txt b/.aitk/non_model_projects/templates/empty/requirements.txt new file mode 100644 index 00000000..ddf3e1d4 --- /dev/null +++ b/.aitk/non_model_projects/templates/empty/requirements.txt @@ -0,0 +1 @@ +olive-ai diff --git a/.aitk/non_model_projects/templates/empty/sample.custom.config b/.aitk/non_model_projects/templates/empty/sample.custom.config new file mode 100644 index 00000000..6bc9a705 --- /dev/null +++ b/.aitk/non_model_projects/templates/empty/sample.custom.config @@ -0,0 +1,124 @@ +{ + "name": "Conversion Sample", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "CPU" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "type": "enum", + "displayType": "RadioGroup", + "path": "passes.quantization.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ] + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "type": "enum", + "displayType": "RadioGroup", + "path": "passes.quantization.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ] + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples" + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.quantization", + "actions": [ + [], + [ + { + "type": "update", + "path": "passes", + "value": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + } + } + } + ] + ] + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].pre_process_data_config.max_samples" + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/.aitk/non_model_projects/templates/empty/sample.json b/.aitk/non_model_projects/templates/empty/sample.json new file mode 100644 index 00000000..52ddfb23 --- /dev/null +++ b/.aitk/non_model_projects/templates/empty/sample.json @@ -0,0 +1,110 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "MODEL_PATH", + "task": "MODEL_TASK" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "quantization", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "DS_NAME", + "subset": "DS_SUBSET", + "split": "DS_SPLIT" + }, + "pre_process_data_config": { + "max_length": 128, + "padding": "max_length", + "input_cols": "DATA_COLS", + "max_samples": 100 + }, + "dataloader_config": { + "batch_size": 1 + } + }, + { + "name": "evaluation", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "DS_NAME", + "subset": "DS_SUBSET", + "split": "DS_SPLIT" + }, + "pre_process_data_config": { + "max_length": 128, + "padding": "max_length", + "input_cols": "DATA_COLS", + "max_samples": 100 + }, + "dataloader_config": { + "batch_size": 1 + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "data_config": "evaluation", + "sub_types": [ + { + "name": "accuracy_score", + "priority": 1 + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "evaluation", + "sub_types": [ + { + "name": "avg", + "priority": 2 + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + }, + "dynamic_shape_to_fixed": { + "type": "DynamicToFixedShape", + "dim_param": "FIXED_PARAMS", + "dim_value": "FIXED_VALUES" + }, + "quantization": { + "type": "OnnxStaticQuantization", + "data_config": "quantization", + "activation_type": "uint16", + "precision": "uint8", + "save_as_external_data": true + } + }, + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "output_dir": "model/output" +} \ No newline at end of file diff --git a/.aitk/non_model_projects/templates/empty/sample.json.config b/.aitk/non_model_projects/templates/empty/sample.json.config new file mode 100644 index 00000000..6bc9a705 --- /dev/null +++ b/.aitk/non_model_projects/templates/empty/sample.json.config @@ -0,0 +1,124 @@ +{ + "name": "Conversion Sample", + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "CPU" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "CPUExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Quantize", + "phase": "Quantization", + "parameters": [ + { + "name": "Activation Type", + "tags": [ + "ActivationType" + ], + "type": "enum", + "displayType": "RadioGroup", + "path": "passes.quantization.activation_type", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ] + }, + { + "name": "Weight Type", + "tags": [ + "WeightType" + ], + "type": "enum", + "displayType": "RadioGroup", + "path": "passes.quantization.precision", + "values": [ + "int8", + "uint8", + "int16", + "uint16" + ] + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples" + } + ], + "toggle": { + "autoGenerated": true, + "name": "Quantize model", + "type": "bool", + "path": "passes.quantization", + "actions": [ + [], + [ + { + "type": "update", + "path": "passes", + "value": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + } + } + } + ] + ] + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[1].pre_process_data_config.max_samples" + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/.aitk/scripts/auto_formatter.py b/.aitk/scripts/auto_formatter.py new file mode 100644 index 00000000..425d3ba6 --- /dev/null +++ b/.aitk/scripts/auto_formatter.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 +""" +Auto-formatter module for Python scripts. +Provides comprehensive formatting capabilities including: +- Removing unused imports using autoflake +- Sorting and organizing imports using isort +- Formatting code using black with 120 character line length +- Checking that all imports are at the top of files +""" + +import subprocess +import sys +from pathlib import Path + +from sanitize.utils import printError, printInfo, printTip, printWarning + + +def install_formatter_tools(): + """ + Install required formatting tools if not available. + """ + tools = [ + ("black", "black"), + ("isort", "isort"), + ("autoflake", "autoflake"), # Added autoflake for removing unused imports + ] + + for tool_name, package_name in tools: + try: + subprocess.run([tool_name, "--version"], check=True, capture_output=True) + except (subprocess.CalledProcessError, FileNotFoundError): + printInfo(f"Installing {package_name} formatter...") + try: + subprocess.run([sys.executable, "-m", "pip", "install", package_name], check=True) + except subprocess.CalledProcessError as e: + printError(f"Failed to install {package_name}: {e}") + return False + return True + + +def check_imports_at_top(file_path): + """ + Check if all imports are at the top of the file (after docstring and comments). + Returns True if imports are properly placed, False otherwise. + """ + try: + with open(file_path, "r", encoding="utf-8") as f: + lines = f.readlines() + + # Skip shebang, encoding declarations, and docstrings + in_docstring = False + docstring_quotes = None + non_import_code_found = False + + for i, line in enumerate(lines): + stripped = line.strip() + + # Skip empty lines and comments + if not stripped or stripped.startswith("#"): + continue + + # Handle docstrings + if not in_docstring and (stripped.startswith('"""') or stripped.startswith("'''")): + docstring_quotes = stripped[:3] + if stripped.count(docstring_quotes) >= 2: + # Single line docstring + continue + else: + in_docstring = True + continue + elif in_docstring and docstring_quotes and docstring_quotes in stripped: + in_docstring = False + continue + elif in_docstring: + continue + + # Check for imports and from statements (including multi-line imports) + if ( + stripped.startswith("import ") + or stripped.startswith("from ") + or (not non_import_code_found and (stripped.endswith(",") or stripped.startswith(")"))) + ): + if non_import_code_found: + printWarning(f"Import found after non-import code in {file_path}:{i+1}") + return False + else: + # Non-import code found (but ignore special variables and sys.path modifications) + if ( + stripped + and not stripped.startswith("__") + and not stripped.startswith("sys.path") + and not any(special in stripped for special in ["__all__", "__version__", "__author__"]) + ): + non_import_code_found = True + + return True + except Exception as e: + printError(f"Error checking imports in {file_path}: {e}") + return True # Don't fail the entire process + + +def auto_format_scripts(target_dir=None): + """ + Auto-format all Python scripts in the target directory with comprehensive formatting: + - Remove unused imports using autoflake + - Sort and organize imports using isort + - Format code using black with 120 character line length + - Check that all imports are at the top of files + + Args: + target_dir: Path to the directory to format. If None, uses the scripts directory. + """ + if target_dir is None: + target_dir = Path(__file__).parent + else: + target_dir = Path(target_dir) + + printTip(f"Auto-formatting Python scripts in {target_dir}...") + + # Find all Python files in the target directory + python_files = [] + for py_file in target_dir.rglob("*.py"): + if py_file.is_file(): + python_files.append(str(py_file)) + + if not python_files: + printInfo("No Python files found to format.") + return + + # Install required tools + if not install_formatter_tools(): + printError("Failed to install required formatting tools.") + return + + # Step 1: Remove unused imports with autoflake + printInfo("Step 1: Removing unused imports...") + try: + autoflake_cmd = [ + "autoflake", + "--in-place", # Modify files in place + "--remove-all-unused-imports", # Remove all unused imports + "--remove-unused-variables", # Remove unused variables + "--remove-duplicate-keys", # Remove duplicate keys in dictionaries + "--ignore-init-module-imports", # Don't remove imports in __init__.py files + ] + python_files + + result = subprocess.run(autoflake_cmd, capture_output=True, text=True) + if result.returncode == 0: + printInfo(f"Successfully removed unused imports from {len(python_files)} files.") + if result.stdout: + printInfo(result.stdout) + else: + printError(f"Autoflake failed: {result.stderr}") + except Exception as e: + printError(f"Error during unused import removal: {e}") + + # Step 2: Sort imports with isort + printInfo("Step 2: Sorting and organizing imports...") + try: + isort_cmd = [ + "isort", + "--line-length", + "120", + "--multi-line", + "3", + "--trailing-comma", + "--force-grid-wrap", + "0", + "--combine-as", + "--use-parentheses", + ] + python_files + + result = subprocess.run(isort_cmd, capture_output=True, text=True) + if result.returncode == 0: + printInfo(f"Successfully sorted imports in {len(python_files)} files.") + else: + printError(f"Import sorting failed: {result.stderr}") + except Exception as e: + printError(f"Error during import sorting: {e}") + + # Step 3: Check import placement + printInfo("Step 3: Checking import placement...") + for py_file in python_files: + check_imports_at_top(py_file) + + # Step 4: Format with black + printInfo("Step 4: Formatting code with black...") + try: + black_cmd = ["black", "--line-length", "120"] + python_files + result = subprocess.run(black_cmd, capture_output=True, text=True) + + if result.returncode == 0: + printInfo(f"Successfully formatted {len(python_files)} Python files with 120 character line length.") + if result.stdout: + printInfo(result.stdout) + else: + printError(f"Black formatting failed: {result.stderr}") + except Exception as e: + printError(f"Error during black formatting: {e}") + + printInfo("Auto-formatting completed!") + + # Clear sanitize modules from cache + modules_to_clear = [name for name in sys.modules.keys() if name.startswith("sanitize")] + for module_name in modules_to_clear: + if module_name in sys.modules: + del sys.modules[module_name] + + +if __name__ == "__main__": + # Allow running this module directly for testing + auto_format_scripts() diff --git a/.aitk/scripts/model_lab/__init__.py b/.aitk/scripts/model_lab/__init__.py new file mode 100644 index 00000000..4b18ecf4 --- /dev/null +++ b/.aitk/scripts/model_lab/__init__.py @@ -0,0 +1,25 @@ +from enum import Enum + + +class RuntimeEnum(Enum): + CPU = "CPU" + QNN = "QNN" + IntelAny = "IntelAny" + IntelCPU = "IntelCPU" + IntelNPU = "IntelNPU" + IntelGPU = "IntelGPU" + AMDNPU = "AMDNPU" + NvidiaGPU = "NvidiaGPU" + NvidiaTRTRTX = "NvidiaTRTRTX" + DML = "DML" + WCR = "WCR" + WCR_CUDA = "WCR_CUDA" + # Inference + QNN_LLLM = "QNN_LLM" + + +class RuntimeFeatureEnum(Enum): + AutoGptq = "AutoGptq" + AutoAwq = "AutoAwq" + Nightly = "Nightly" + Genai = "Genai" diff --git a/.aitk/scripts/project_scanner.py b/.aitk/scripts/project_scanner.py new file mode 100644 index 00000000..e69de29b diff --git a/.aitk/scripts/requirements.txt b/.aitk/scripts/requirements.txt new file mode 100644 index 00000000..303c2104 --- /dev/null +++ b/.aitk/scripts/requirements.txt @@ -0,0 +1,3 @@ +pydash +pydantic +deepdiff diff --git a/.aitk/scripts/sanitize.py b/.aitk/scripts/sanitize.py new file mode 100644 index 00000000..751d8b95 --- /dev/null +++ b/.aitk/scripts/sanitize.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +""" +Sanitize script - new modular version +This script maintains compatibility with the original sanitize.py while using the new modular structure. +Auto-formats all Python scripts in the scripts directory on every run. +""" + +# Import main directly without going through __init__.py +import sys +from pathlib import Path + +from auto_formatter import auto_format_scripts +from sanitize.main import main +from sanitize.utils import GlobalVars + +# Get the absolute path to the project root (parent of scripts) +project_root = Path(__file__).parent.parent +scripts_dir = Path(__file__).parent + +# Add both the project root and scripts directory to Python path +sys.path.insert(0, str(project_root)) +sys.path.insert(0, str(scripts_dir)) + + +# Import the main function from the new sanitize.main module +# Import here to avoid circular imports after formatting +def run_main(): + original_path = sys.path.copy() + + try: + # Make sure the project root and scripts dir are in the path + if str(project_root) not in sys.path: + sys.path.insert(0, str(project_root)) + if str(scripts_dir) not in sys.path: + sys.path.insert(0, str(scripts_dir)) + + main() + finally: + # Restore original sys.path + sys.path = original_path + + +if __name__ == "__main__": + # Check if verbose mode is requested + if "-v" in sys.argv or "--verbose" in sys.argv: + GlobalVars.verbose = True + + # Auto-format scripts before running sanitize + auto_format_scripts() + run_main() diff --git a/.aitk/scripts/sanitize/README.md b/.aitk/scripts/sanitize/README.md new file mode 100644 index 00000000..99a54f6a --- /dev/null +++ b/.aitk/scripts/sanitize/README.md @@ -0,0 +1,111 @@ +# Sanitize Module Refactoring + +This directory contains the refactored sanitize functionality, split into multiple modules for better maintainability and organization. + +## File Structure + +``` +sanitize/ +├── __init__.py # Package initialization and exports +├── constants.py # All constants, enums, and static values +├── utils.py # Utility functions and GlobalVars +├── base.py # Base model classes +├── model_info.py # ModelInfo and ModelList classes +├── parameters.py # Parameter-related classes +├── project_config.py # Project configuration classes +├── model_parameter.py # ModelParameter and related classes +├── copy_config.py # Copy configuration functionality +└── file_validation.py # File validation functions +``` + +## Main Files + +- `sanitize.py` - Main entry point (maintains compatibility) +- `sanitize_main.py` - Contains the main logic function +- `sanitize_old.py` - Backup of the original monolithic file + +## Module Descriptions + +### constants.py +Contains all enums and constant values: +- `IconEnum`, `ArchitectureEnum`, `ModelStatusEnum` +- `ParameterTypeEnum`, `ParameterDisplayTypeEnum`, etc. +- `EPNames`, `OlivePassNames`, `OlivePropertyNames` +- Path and import constants + +### utils.py +Utility functions and shared state: +- `GlobalVars` - Global state management with direct initialization + - All runtime mappings are initialized at module import time + - `epToName` - EP to display name mappings (from constants) + - `runtimeToEp` - Runtime enum to EP string mappings +- `printProcess`, `printInfo`, `printError`, `printWarning` - Logging functions +- `open_ex` - File I/O context manager +- `checkPath` - Path validation utility + +#### Initialization Design + +### base.py +Base classes for all model classes: +- `BaseModelClass` - Base class with file I/O capabilities + +### model_info.py +Model information management: +- `ModelInfo` - Individual model information +- `ModelList` - Collection of models with validation + +### parameters.py +Parameter configuration: +- `Parameter` - Parameter definition and validation +- `ParameterCheck`, `ParameterAction` - Parameter validation and actions +- `readCheckParameterTemplate` - Template reading function + +### project_config.py +Project configuration management: +- `WorkflowItem` - Individual workflow configuration +- `ModelInfoProject` - Project-specific model info +- `ModelProjectConfig` - Complete project configuration + +### model_parameter.py +Model parameter configuration: +- `ModelParameter` - Main model parameter class +- `Section` - Configuration sections +- `RuntimeOverwrite`, `DebugInfo`, `ADMNPUConfig` - Supporting classes + +### copy_config.py +File copy and replacement functionality: +- `Copy`, `Replacement` - Copy operations +- `CopyConfig` - Copy configuration management + +### file_validation.py +File validation and checking: +- `check_case` - Path case validation +- `process_gitignore` - Git ignore file processing +- `readCheckOliveConfig` - Olive configuration validation +- `readCheckIpynb` - Jupyter notebook validation + +## Usage + +The new structure maintains full compatibility with the original script. You can still run: + +```bash +python sanitize.py +python sanitize.py -v +python sanitize.py --olive /path/to/olive/repo +``` + +## Benefits of Refactoring + +1. **Maintainability**: Each module has a single responsibility +2. **Testability**: Individual modules can be tested in isolation +3. **Reusability**: Components can be imported and used separately +4. **Readability**: Smaller, focused files are easier to understand +5. **Type Safety**: Better type hints and error checking +6. **Extensibility**: New functionality can be added without modifying existing code + +## Migration Notes + +- All functionality from the original `sanitize.py` is preserved +- Import statements may need to be updated if you were importing specific classes +- The main entry point remains the same for backward compatibility +- Error handling and logging behavior is unchanged diff --git a/.aitk/scripts/sanitize/__init__.py b/.aitk/scripts/sanitize/__init__.py new file mode 100644 index 00000000..8b10466e --- /dev/null +++ b/.aitk/scripts/sanitize/__init__.py @@ -0,0 +1,3 @@ +""" +Sanitize module for Windows AI Studio model configurations +""" diff --git a/.aitk/scripts/sanitize/base.py b/.aitk/scripts/sanitize/base.py new file mode 100644 index 00000000..a5371c4c --- /dev/null +++ b/.aitk/scripts/sanitize/base.py @@ -0,0 +1,31 @@ +""" +Base model classes +""" + +from typing import Optional + +from pydantic import BaseModel + +from .utils import open_ex + + +class BaseModelClass(BaseModel): + """Base class for all model classes with file I/O capabilities""" + + _file: Optional[str] = None + _fileContent: Optional[str] = None + + def writeIfChanged(self): + newContent = self.model_dump_json(indent=4, exclude_none=True) + if self._file: + BaseModelClass.writeJsonIfChanged(newContent, self._file, self._fileContent) + + @classmethod + def writeJsonIfChanged(cls, newContent: str, filePath: str, fileContent: str | None): + newContent += "\n" + if newContent != fileContent: + with open_ex(filePath, "w") as file: + file.write(newContent) + + class Config: + arbitrary_types_allowed = True diff --git a/.aitk/scripts/sanitize/constants.py b/.aitk/scripts/sanitize/constants.py new file mode 100644 index 00000000..ae764619 --- /dev/null +++ b/.aitk/scripts/sanitize/constants.py @@ -0,0 +1,145 @@ +""" +Constants and Enums for the sanitize module +""" + +from enum import Enum + + +class IconEnum(Enum): + Intel = "intel" + Gemini = "gemini" + OpenAI = "OpenAI" + Microsoft = "ms" + Meta = "meta" + CompVis = "compvis" + BAAI = "baai" + tiiuae = "tiiuae" + EleutherAI = "eleutherai" + openlm = "openlm" + DeepSeek = "DeepSeek" + laion = "laion" + qwen = "qwen" + mistralai = "mistralai" + HuggingFace = "HuggingFace" + + +class ArchitectureEnum(Enum): + Transformer = "Transformer" + CNN = "CNN" + Diffusion = "Diffusion" + Others = "Others" + + +class ParameterTypeEnum(Enum): + Enum = "enum" + Int = "int" + Bool = "bool" + String = "str" + + +class ParameterDisplayTypeEnum(Enum): + Dropdown = "Dropdown" + RadioGroup = "RadioGroup" + + +class ParameterCheckTypeEnum(Enum): + Exist = "exist" + NotExist = "notExist" + + +class ParameterActionTypeEnum(Enum): + # Update and Insert are both upsert in runtime. Separate them for validation + Update = "update" + Insert = "insert" + Delete = "delete" + + +class ParameterTagEnum(Enum): + QuantizationDataset = "QuantizationDataset" + QuantizationDatasetSubset = "QuantizationDatasetSubset" + QuantizationDatasetSplit = "QuantizationDatasetSplit" + EvaluationDataset = "EvaluationDataset" + EvaluationDatasetSubset = "EvaluationDatasetSubset" + EvaluationDatasetSplit = "EvaluationDatasetSplit" + DependsOnDataset = "DependsOnDataset" + ActivationType = "ActivationType" + WeightType = "WeightType" + + +class PhaseTypeEnum(Enum): + Conversion = "Conversion" + Quantization = "Quantization" + Evaluation = "Evaluation" + + +class ReplaceTypeEnum(Enum): + String = "string" + Path = "path" + PathAdd = "pathAdd" + + +class EPNames(Enum): + CPUExecutionProvider = "CPUExecutionProvider" + CUDAExecutionProvider = "CUDAExecutionProvider" + QNNExecutionProvider = "QNNExecutionProvider" + OpenVINOExecutionProvider = "OpenVINOExecutionProvider" + VitisAIExecutionProvider = "VitisAIExecutionProvider" + NvTensorRTRTXExecutionProvider = "NvTensorRTRTXExecutionProvider" + DmlExecutionProvider = "DmlExecutionProvider" + + +class OliveDeviceTypes(Enum): + Any = "any" + CPU = "cpu" + GPU = "gpu" + NPU = "npu" + + +# Pass name is case insensitive, so we use lower case for all pass names +class OlivePassNames: + OnnxConversion = "onnxconversion" + OnnxQuantization = "onnxquantization" + OnnxStaticQuantization = "onnxstaticquantization" + OnnxDynamicQuantization = "onnxdynamicquantization" + ModelBuilder = "modelbuilder" + OpenVINOConversion = "openvinoconversion" + OpenVINOOptimumConversion = "openvinooptimumconversion" + OpenVINOQuantization = "openvinoquantization" + OpenVINOEncapsulation = "openvinoencapsulation" + OrtTransformersOptimization = "orttransformersoptimization" + + +class OlivePropertyNames: + Engine = "engine" + Passes = "passes" + Evaluator = "evaluator" + Evaluators = "evaluators" + Type = "type" + ExternalData = "save_as_external_data" + Systems = "systems" + Accelerators = "accelerators" + Device = "device" + TargetDevice = "target_device" + ExecutionProviders = "execution_providers" + DataConfigs = "data_configs" + Target = "target" + CacheDir = "cache_dir" + OutputDir = "output_dir" + PythonEnvironmentPath = "python_environment_path" + EvaluateInputModel = "evaluate_input_model" + Metrics = "metrics" + UserConfig = "user_config" + CleanCache = "clean_cache" + ExtraArgs = "extra_args" + + +# Path constants +outputModelRelativePath = r"\\\"./model/model.onnx\\\"" +outputModelIntelNPURelativePath = ( + r"\\\"./model/(ov_model_st_quant|openvino_model_quant_st|openvino_model_st_quant).onnx\\\"" +) +outputModelModelBuilderPath = r"\\\"./model\\\"" + +# Import constants +importOnnxruntime = r"import onnxruntime as ort" +importOnnxgenairuntime = r"import onnxruntime_genai as og" diff --git a/.aitk/scripts/sanitize/copy_config.py b/.aitk/scripts/sanitize/copy_config.py new file mode 100644 index 00000000..0937c27a --- /dev/null +++ b/.aitk/scripts/sanitize/copy_config.py @@ -0,0 +1,77 @@ +""" +Copy configuration classes +""" + +import json +import os +import shutil +from typing import Any, List, Union + +import pydash +from pydantic import BaseModel + +from .constants import ReplaceTypeEnum +from .utils import open_ex, printError, printInfo + + +class Replacement(BaseModel): + find: str + replace: Union[str, Any] + type: ReplaceTypeEnum = ReplaceTypeEnum.String + + +class Copy(BaseModel): + src: str + dst: str + replacements: List[Replacement] = [] + + +class CopyConfig(BaseModel): + copies: List[Copy] = [] + + def process(self, modelVerDir: str): + if not self.copies: + return + for copy in self.copies: + src = os.path.join(modelVerDir, copy.src) + dst = os.path.join(modelVerDir, copy.dst) + if not os.path.exists(src): + printError(f"{src} does not exist") + continue + shutil.copy(src, dst) + if copy.replacements: + stringReplacements = [repl for repl in copy.replacements if repl.type == ReplaceTypeEnum.String] + if stringReplacements: + with open_ex(dst, "r") as file: + content = file.read() + for replacement in stringReplacements: + printInfo(replacement.find) + if replacement.find not in content: + printError(f"Not in dst file {dst}: {replacement.find}") + continue + content = content.replace(replacement.find, replacement.replace) + with open_ex(dst, "w") as file: + file.write(content) + pathReplacements = [ + repl + for repl in copy.replacements + if repl.type == ReplaceTypeEnum.Path or repl.type == ReplaceTypeEnum.PathAdd + ] + if pathReplacements: + with open_ex(dst, "r") as file: + jsonObj = json.load(file) + for replacement in pathReplacements: + printInfo(replacement.find) + target = pydash.get(jsonObj, replacement.find) + if ( + replacement.type == ReplaceTypeEnum.Path + and target is None + or replacement.type == ReplaceTypeEnum.PathAdd + and target + ): + printError(f"Not match type in dst json {dst}: {replacement.find}") + continue + pydash.set_(jsonObj, replacement.find, replacement.replace) + with open_ex(dst, "w") as file: + json.dump(jsonObj, file, indent=4) + file.write("\n") diff --git a/.aitk/scripts/sanitize/file_validation.py b/.aitk/scripts/sanitize/file_validation.py new file mode 100644 index 00000000..d54e16ad --- /dev/null +++ b/.aitk/scripts/sanitize/file_validation.py @@ -0,0 +1,201 @@ +""" +File validation functions +""" + +from __future__ import annotations + +import json +import os +import re +import shutil +from pathlib import Path + +from .constants import ( + EPNames, + OlivePassNames, + OlivePropertyNames, + importOnnxgenairuntime, + importOnnxruntime, + outputModelIntelNPURelativePath, + outputModelModelBuilderPath, + outputModelRelativePath, +) +from .model_parameter import ModelParameter +from .utils import GlobalVars, open_ex, printError, printProcess, printWarning + + +def check_case(path: Path) -> bool: + path = Path(path) + try: + abs_path = path.resolve(strict=False) + except Exception: + return False + + if str(path) != str(abs_path): + printError(f"Path case mismatch: {path} vs {abs_path}") + return False + return True + + +def process_gitignore(modelVerDir: str, configDir: str): + gitignoreFile = os.path.join(modelVerDir, ".gitignore") + GlobalVars.gitignoreCheck.append(gitignoreFile) + templateFile = os.path.join(configDir, "gitignore.md") + if not os.path.exists(gitignoreFile): + printWarning(f"{gitignoreFile} not exists. Copy the template one") + shutil.copy(templateFile, gitignoreFile) + else: + # Ensure each non-empty line in template is present in the .gitignore file (exact match) + with open_ex(gitignoreFile, "r") as file: + gitignoreLines = [line.strip() for line in file if line.strip()] + with open_ex(templateFile, "r") as file: + templateLines = [line.strip() for line in file if line.strip()] + missing = [line for line in templateLines if line not in gitignoreLines] + for line in missing: + printError(f"{gitignoreFile} does not have line '{line}'") + + +def checkSystem(oliveJsonFile: str, system): + accelerators = system[OlivePropertyNames.Accelerators] + if len(accelerators) != 1: + printError(f"{oliveJsonFile} should have only one accelerator") + return False + eps = accelerators[0][OlivePropertyNames.ExecutionProviders] + if len(eps) != 1: + printError(f"{oliveJsonFile} should have only one execution provider") + return False + if eps[0] not in EPNames: + printError(f"{oliveJsonFile} has wrong execution provider {eps[0]}") + return False + return True + + +def readCheckOliveConfig(oliveJsonFile: str): + """ + This will set phases to modelParameter + """ + GlobalVars.oliveJsonCheck.append(oliveJsonFile) + + printProcess(oliveJsonFile) + with open_ex(oliveJsonFile, "r") as file: + oliveJson = json.load(file) + # check if engine is in oliveJson + if OlivePropertyNames.Engine in oliveJson: + printError(f"{oliveJsonFile} has engine. Should place in the root instead") + return + if OlivePropertyNames.Evaluator in oliveJson and not isinstance(oliveJson[OlivePropertyNames.Evaluator], str): + printError(f"{oliveJsonFile} evaluator property should be str") + return + + jsonUpdated = False + + # TODO check host + # check target + if OlivePropertyNames.Target not in oliveJson: + printError(f"{oliveJsonFile} should have target") + return + target = oliveJson[OlivePropertyNames.Target] + if OlivePropertyNames.Systems not in oliveJson or target not in oliveJson[OlivePropertyNames.Systems]: + printError(f"{oliveJsonFile} should have {target} system") + return + if not checkSystem(oliveJsonFile, oliveJson[OlivePropertyNames.Systems][target]): + return + + # cache / output / evaluate_input_model + if OlivePropertyNames.CleanCache in oliveJson and oliveJson[OlivePropertyNames.CleanCache]: + oliveJson.pop(OlivePropertyNames.CleanCache) + jsonUpdated = True + + if OlivePropertyNames.CacheDir not in oliveJson or oliveJson[OlivePropertyNames.CacheDir] != "cache": + oliveJson[OlivePropertyNames.CacheDir] = "cache" + jsonUpdated = True + + if OlivePropertyNames.OutputDir not in oliveJson or not str(oliveJson[OlivePropertyNames.OutputDir]).startswith( + "model/" + ): + printError(f"{oliveJsonFile} should have use model/XXX as {OlivePropertyNames.OutputDir}") + + if OlivePropertyNames.EvaluateInputModel not in oliveJson or oliveJson[OlivePropertyNames.EvaluateInputModel]: + oliveJson[OlivePropertyNames.EvaluateInputModel] = False + jsonUpdated = True + + # update save_as_external_data + supportedPasses = [ + v + for k, v in oliveJson[OlivePropertyNames.Passes].items() + if v[OlivePropertyNames.Type].lower() + in [ + OlivePassNames.OnnxConversion, + OlivePassNames.OnnxQuantization, + OlivePassNames.OnnxStaticQuantization, + OlivePassNames.OnnxDynamicQuantization, + OlivePassNames.OrtTransformersOptimization, + ] + ] + for conversionPass in supportedPasses: + if OlivePropertyNames.ExternalData not in conversionPass or not conversionPass[OlivePropertyNames.ExternalData]: + conversionPass[OlivePropertyNames.ExternalData] = True + jsonUpdated = True + + if jsonUpdated: + with open_ex(oliveJsonFile, "w") as file: + json.dump(oliveJson, file, indent=4) + file.write("\n") + return oliveJson + + +def readCheckIpynb(ipynbFile: str, modelItems: dict[str, ModelParameter]): + """ + Note this return exists or not, not valid or not + """ + if os.path.exists(ipynbFile): + GlobalVars.ipynbCheck.append(ipynbFile) + + with open_ex(ipynbFile, "r") as file: + ipynbContent: str = file.read() + allRuntimes: list[str] = [] + for name, modelParameter in modelItems.items(): + testPath = outputModelRelativePath + importStr = importOnnxruntime + if modelParameter.isLLM: + testPath = outputModelModelBuilderPath + importStr = importOnnxgenairuntime + elif modelParameter.runtime.values and modelParameter.isIntel: + testPath = outputModelIntelNPURelativePath + for item in [testPath, importStr]: + if not re.search(item, ipynbContent): + printError(f"{ipynbFile} does not have '{item}' for {name}, please use it as input") + if modelParameter.evalRuntime: + runtime = GlobalVars.RuntimeToEPName[modelParameter.evalRuntime] + if runtime not in allRuntimes: + allRuntimes.append(runtime.value) + else: + if modelParameter.isIntel: + allRuntimes.append(EPNames.OpenVINOExecutionProvider.value) + elif modelParameter.runtime.values: + for runtime in modelParameter.runtime.values: + if runtime not in allRuntimes: + allRuntimes.append(runtime) + + targetEP = None + if len(allRuntimes) == 2 and EPNames.CPUExecutionProvider.value in allRuntimes: + allRuntimes.remove(EPNames.CPUExecutionProvider.value) + targetEP = allRuntimes[0] + elif len(allRuntimes) == 1: + targetEP = allRuntimes[0] + elif len(allRuntimes) > 1: + # TODO we use QNN as default because currently we only replace this + if EPNames.QNNExecutionProvider.value in allRuntimes: + targetEP = EPNames.QNNExecutionProvider.value + elif EPNames.CPUExecutionProvider.value in allRuntimes: + targetEP = EPNames.CPUExecutionProvider.value + else: + targetEP = allRuntimes[0] + if targetEP: + targetStr = f'ExecutionProvider=\\"{targetEP}\\"' + if ipynbContent.count(targetStr) != 1: + printError(f"{ipynbFile} should have 1 {targetStr}") + else: + printError(f"{ipynbFile} has no runtime for it!") + return True + return False diff --git a/.aitk/scripts/sanitize/main.py b/.aitk/scripts/sanitize/main.py new file mode 100644 index 00000000..37099073 --- /dev/null +++ b/.aitk/scripts/sanitize/main.py @@ -0,0 +1,199 @@ +""" +Main sanitize script +""" + +from __future__ import annotations + +import argparse +import copy +import os +import subprocess +from pathlib import Path + +from .constants import EPNames, ModelStatusEnum +from .copy_config import CopyConfig +from .file_validation import check_case, process_gitignore, readCheckIpynb, readCheckOliveConfig +from .model_info import ModelInfo, ModelList +from .model_parameter import ModelParameter +from .parameters import readCheckParameterTemplate +from .project_config import ModelInfoProject, ModelProjectConfig +from .utils import GlobalVars, open_ex, printError, printWarning + + +def shouldCheckModel(configDir: str, model: ModelInfo) -> str | None: + modelDir = os.path.join(configDir, model.id) + # If we have folder, we also check it + if model.status == ModelStatusEnum.Ready or os.path.exists(modelDir): + return modelDir + return None + + +def main(): + argparser = argparse.ArgumentParser(description="Check model lab configs") + argparser.add_argument("-v", "--verbose", action="store_true", help="Verbose mode") + argparser.add_argument( + "-o", + "--olive", + default="", + type=str, + help="Path to olive repo to check json files", + ) + args = argparser.parse_args() + GlobalVars.verbose = args.verbose + GlobalVars.olivePath = args.olive + + # need to resolve due to d:\ vs D:\ + # Now main.py is in sanitize/ folder, so we need to go up three levels: + # sanitize/main.py -> scripts/ -> model_lab_configs/ + configDir = str(Path(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))).resolve(strict=False)) + + # get model list + modelList = ModelList.Read(configDir) + # check parameter template + parameterTemplate = readCheckParameterTemplate(os.path.join(configDir, "parameter_template.json")) + + # check each model + for model in modelList.allModels(): + modelDir = shouldCheckModel(configDir, model) + if modelDir: + if not check_case(Path(modelDir)): + printError( + f"Model folder does not exist, or check if case matches between model.id {model.id} and model folder." + ) + + # get all versions + allVersions = [int(name) for name in os.listdir(modelDir) if os.path.isdir(os.path.join(modelDir, name))] + allVersions.sort() + model.version = allVersions[-1] + # check if version is continuous + if allVersions[0] != 1 or allVersions[-1] != len(allVersions): + printError(f"{modelDir} has wrong versions {allVersions}") + + # process each version + for version in allVersions: + # deep copy model for version usage + modelInVersion = copy.deepcopy(model) + modelInVersion.version = version + modelVerDir = os.path.join(modelDir, str(version)) + + # process copy + copyConfigFile = os.path.join(modelVerDir, "_copy.json.config") + if os.path.exists(copyConfigFile): + with open_ex(copyConfigFile, "r") as file: + copyConfigContent = file.read() + copyConfig = CopyConfig.model_validate_json(copyConfigContent, strict=True) + copyConfig.process(modelVerDir) + + # get model space config + modelSpaceConfig = ModelProjectConfig.Read(os.path.join(modelVerDir, "model_project.config")) + modelSpaceConfig.modelInfo.version = int(os.path.basename(modelVerDir)) + + # check md + mdFile = os.path.join(modelVerDir, "README.md") + if not os.path.exists(mdFile): + printError(f"{mdFile} not exists") + + # check requirement.txt + if not model.extension: + requirementFile = os.path.join(modelVerDir, "requirements.txt") + if not os.path.exists(requirementFile): + printWarning(f"{requirementFile} not exists.") + + # copy .gitignore + if not model.extension: + process_gitignore(modelVerDir, configDir) + + # check ipynb & parameter + sharedIpynbFile = os.path.join(modelVerDir, "inference_sample.ipynb") + hasSharedIpynb = os.path.exists(sharedIpynbFile) + workflowsAgainstShared: dict[str, ModelParameter] = {} + + if modelSpaceConfig.modelInfo: + modelSpaceConfig.modelInfo.id = modelInVersion.id + else: + modelSpaceConfig.modelInfo = ModelInfoProject(id=modelInVersion.id) + + hasLLM = False + for _, modelItem in enumerate(modelSpaceConfig.workflows): + # set template + fileName = os.path.basename(modelItem.file)[:-5] + modelItem.templateName = fileName + + # read parameter + modelParameter = ModelParameter.Read(os.path.join(modelVerDir, f"{modelItem.file}.config")) + + # check olive json + oliveJsonFile = os.path.join(modelVerDir, modelItem.file) + oliveJson = readCheckOliveConfig(oliveJsonFile) + if not oliveJson: + printError(f"{oliveJsonFile} not exists or is not a valid olive json file") + continue + + # check parameter + modelParameter.Check(parameterTemplate, oliveJson, modelList) + if modelParameter.isIntel: + tmpDevices = modelParameter.getIntelDevices() + # Remove items containing "intel" (case-insensitive) from runtime values + filteredValues = [v for v in model.runtimes if "intel" not in v.lower()] + # Add Intel runtime values + intelRuntimes = [ + GlobalVars.GetRuntimeRPC(EPNames.OpenVINOExecutionProvider, device) for device in tmpDevices + ] + filteredValues.extend([runtime.value for runtime in intelRuntimes]) + model.runtimes = filteredValues + + hasLLM = hasLLM or modelParameter.isLLM + + # check ipynb + if not model.extension: + # although filename and templateName are same here, use fileName to align with Skylight implementation + ipynbFile = os.path.join(modelVerDir, f"{fileName}_inference_sample.ipynb") + hasSpecialIpynb = readCheckIpynb(ipynbFile, {modelItem.file: modelParameter}) + if not hasSpecialIpynb: + if not hasSharedIpynb: + printError(f"{ipynbFile} nor {sharedIpynbFile} not exists.") + else: + workflowsAgainstShared[modelItem.file] = modelParameter + + if not model.extension: + readCheckIpynb(sharedIpynbFile, workflowsAgainstShared) + + if model.extension: + GlobalVars.extensionCheck += 1 + + modelSpaceConfig.Check(modelInVersion) + + if hasLLM: + # check inference_model.json + inferenceModelFile = os.path.join(modelVerDir, "inference_model.json") + GlobalVars.inferenceModelCheck.append(inferenceModelFile) + if not os.path.exists(inferenceModelFile): + printWarning(f"{inferenceModelFile} not exists.") + + modelList.Check() + + if GlobalVars.olivePath: + printWarning(f"Total {GlobalVars.oliveCheck} config files checked against olive json files") + + GlobalVars.Check(configDir) + + result = subprocess.run( + ["git", "status", "--porcelain"], + cwd=configDir, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + if len(GlobalVars.errorList) == 0: + # If the output is not empty, there are uncommitted changes + if bool(result.stdout.strip()): + printError("Please commit changes!") + + for filename, lineno, msg in GlobalVars.errorList: + # Red text, with file and line number, clickable in terminal + print(f"\033[31mERROR: {filename}:{lineno}: {msg}\033[0m") + + +if __name__ == "__main__": + main() diff --git a/.aitk/scripts/sanitize/model_info.py b/.aitk/scripts/sanitize/model_info.py new file mode 100644 index 00000000..5d9c3921 --- /dev/null +++ b/.aitk/scripts/sanitize/model_info.py @@ -0,0 +1,93 @@ +""" +Model information and model list classes +""" + +from __future__ import annotations + +import os +from typing import Dict, List, Optional + +from pydantic import BaseModel + +from .base import BaseModelClass +from .constants import ArchitectureEnum, IconEnum +from .utils import open_ex, printError, printProcess + +# This file is import by others +# To avoid circular import issues, we should carefully manage imports + + +class ModelInfo(BaseModel): + displayName: str + discription: Optional[str] = None + icon: IconEnum + modelLink: str + id: str + runtimes: List[str] # Changed to List[str] to avoid forward reference issues + architecture: ArchitectureEnum + version: int = -1 + extension: Optional[bool] = None + + def Check(self): + if not self.displayName: + return False + if not self.modelLink: + return False + if not self.id: + return False + if not self.runtimes: + return False + if self.version <= 0: + return False + return True + + +class ModelList(BaseModelClass): + models: List[ModelInfo] + template_models: List[ModelInfo] + HFDatasets: Dict[str, str] + LoginRequiredDatasets: List[str] + LoginRequiredModelIds: List[str] + # If exist in the dict, we will use the one from dict + # If not exist in the dict, we will use the config from json + # - if only one value, don't need to add + # - custom config could provide a combined list for new datasets + DatasetSplit: Dict[str, List[str]] + DatasetSubset: Dict[str, List[str]] + + @staticmethod + def Read(scriptFolder: str): + modelListFile = os.path.join(scriptFolder, "model_list.json") + printProcess(modelListFile) + with open_ex(modelListFile, "r") as file: + modelListContent = file.read() + modelList = ModelList.model_validate_json(modelListContent, strict=True) + modelList._file = modelListFile + modelList._fileContent = modelListContent + return modelList + + def allModels(self): + return self.models + self.template_models + + # Check after set version + def Check(self): + for i, model in enumerate(self.allModels()): + if not model.Check(): + printError(f"{self._file} model {i} has error") + self.writeIfChanged() + + self.CheckDataset(self.LoginRequiredDatasets, "LoginRequiredDatasets") + self.CheckDataset(self.DatasetSplit.keys(), "DatasetSplit") + self.CheckDataset(self.DatasetSubset.keys(), "DatasetSubset") + self.CheckModel(self.LoginRequiredModelIds, "LoginRequiredModelIds") + + def CheckDataset(self, datasetKeys, name: str): + for key in datasetKeys: + if key not in self.HFDatasets: + printError(f"{self._file} {name} {key} not in HFDatasets") + + def CheckModel(self, modelIds, name: str): + tmpAllModelIds = {model.id for model in self.models} + for key in modelIds: + if key not in tmpAllModelIds: + printError(f"{self._file} {name} {key} not in ModelInfos") diff --git a/.aitk/scripts/sanitize/model_parameter.py b/.aitk/scripts/sanitize/model_parameter.py new file mode 100644 index 00000000..f55a1c9a --- /dev/null +++ b/.aitk/scripts/sanitize/model_parameter.py @@ -0,0 +1,600 @@ +""" +Model parameter configuration classes +""" + +from __future__ import annotations + +import json +import os +import re +from typing import Any, Dict, Iterator, List, Optional + +from deepdiff import DeepDiff +from model_lab import RuntimeEnum, RuntimeFeatureEnum +from pydantic import BaseModel + +from .base import BaseModelClass +from .constants import ( + EPNames, + OliveDeviceTypes, + OlivePassNames, + OlivePropertyNames, + ParameterActionTypeEnum, + ParameterTagEnum, + ParameterTypeEnum, + PhaseTypeEnum, +) +from .model_info import ModelList +from .parameters import Parameter, ParameterAction +from .utils import GlobalVars, checkPath, get_target_system, open_ex, printError, printProcess, printWarning + + +class RuntimeOverwrite(BaseModel): + # This tag is only used for the case that when we edit the json, we know the property is auto generated by sanitize.py so no need to care about it + autoGenerated: Optional[bool] = None + pyEnvPath: Optional[str] = None + executeEp: Optional[EPNames] = None + # This is usually used for EP binary generation + evaluateUsedInExecute: Optional[bool] = None + + def Check(self, oliveJson: Any): + return self.pyEnvPath and checkPath(self.pyEnvPath, oliveJson) + + +class Section(BaseModel): + # This tag is only used for the case that when we edit the json, we know the property is auto generated by sanitize.py so no need to care about it + autoGenerated: Optional[bool] = None + name: str + phase: PhaseTypeEnum + description: Optional[str] = None + parameters: List[Parameter] + disableToggleGeneration: Optional[bool] = None + toggle: Optional[Parameter] = None + + @staticmethod + def datasetPathPattern(path: str): + return re.fullmatch(r"data_configs\[(0|[1-9]\d{0,2})\]\.load_dataset_config\.data_name", path) + + def Check( + self, + templates: Dict[str, Parameter], + _file: str, + sectionId: int, + oliveJson: Any, + modelList: ModelList, + ): + if not self.name: + return False + # if not self.description: + # return False + # TODO add place holder for General? + if not self.parameters and self.phase != PhaseTypeEnum.Conversion: + printWarning(f"self.parameters is empty for {self.phase}.") + + for i, parameter in enumerate(self.parameters): + if parameter.template: + template = parameter.template + if not isinstance(template, Parameter): + printError(f"{_file} section {sectionId} parameter {i} has wrong template") + continue + if template.template not in templates: + printError(f"{_file} section {sectionId} parameter {i} has wrong template") + continue + parameter.clearValue() + parameter.applyTemplate(template) + parameter.applyTemplate(templates[str(template.template)]) + if not parameter.Check(False, oliveJson, modelList): + printError(f"{_file} section {sectionId} parameter {i} has error") + + # TODO move tag check into Parameter + if parameter.path and Section.datasetPathPattern(parameter.path): + if self.phase == PhaseTypeEnum.Quantization: + if not parameter.tags or ParameterTagEnum.QuantizationDataset not in parameter.tags: + printError(f"{_file} section {sectionId} parameter {i} should have QuantizationDataset tag") + elif self.phase == PhaseTypeEnum.Evaluation: + if not parameter.tags or ParameterTagEnum.EvaluationDataset not in parameter.tags: + printError(f"{_file} section {sectionId} parameter {i} should have EvaluationDataset tag") + if parameter.values: + missing_keys = [key for key in parameter.values if key not in modelList.HFDatasets] + if missing_keys: + printError(f"datasets are not in HFDatasets: {', '.join(str(key) for key in missing_keys)}") + elif parameter.path and parameter.path.endswith("activation_type"): + if not parameter.tags or ParameterTagEnum.ActivationType not in parameter.tags: + printError(f"{_file} section {sectionId} parameter {i} should have ActivationType tag") + elif parameter.path and parameter.path.endswith("weight_type"): + if not parameter.tags or ParameterTagEnum.WeightType not in parameter.tags: + printError(f"{_file} section {sectionId} parameter {i} should have WeightType tag") + + if self.toggle: + if self.toggle.type != ParameterTypeEnum.Bool: + printError(f"{_file} section {sectionId} toggle must use bool") + return False + if not self.toggle.Check(False, oliveJson, modelList): + printError(f"{_file} section {sectionId} toggle has error") + + return True + + +class ADMNPUConfig(BaseModel): + inferenceSettings: Optional[Any] = None + + +class DebugInfo(BaseModel): + autoGenerated: bool = True + # This kind of config will + # - could not disable quantization + # - use modelbuilder for conversion + # - output a model folder instead of model file + useModelBuilder: Optional[str] = None + # This kind of config will + # - could not disable quantization + # - use OpenVINOConversion for conversion + useOpenVINOConversion: Optional[str] = None + # This kind of config will + # - could not disable quantization + # - use OpenVINOConversion for conversion + useOpenVINOOptimumConversion: Optional[str] = None + + def setupUseX(self, oliveJson: Any): + def getPass(passType: str): + return next( + ( + k + for k, v in oliveJson[OlivePropertyNames.Passes].items() + if v[OlivePropertyNames.Type].lower() == passType + ), + None, + ) + + # setup useModelBuilder + self.useModelBuilder = getPass(OlivePassNames.ModelBuilder) + + # setup useOpenVINOConversion + self.useOpenVINOConversion = getPass(OlivePassNames.OpenVINOConversion) + + # setup useOpenVINOOptimumConversion + self.useOpenVINOOptimumConversion = getPass(OlivePassNames.OpenVINOOptimumConversion) + if ( + sum( + bool(v) + for v in [ + self.useModelBuilder, + self.useOpenVINOConversion, + self.useOpenVINOOptimumConversion, + ] + ) + > 1 + ): + printError(f"should not have both useModelBuilder and useOpenVINOConversion") + return False + return True + + def getUseX(self): + if self.useModelBuilder: + return self.useModelBuilder + elif self.useOpenVINOConversion: + return self.useOpenVINOConversion + elif self.useOpenVINOOptimumConversion: + return self.useOpenVINOOptimumConversion + else: + return None + + def isEmpty(self): + return not (self.useModelBuilder or self.useOpenVINOConversion or self.useOpenVINOOptimumConversion) + + +class ModelParameter(BaseModelClass): + name: str + oliveFile: Optional[str] = None + isLLM: Optional[bool] = None + isIntel: Optional[bool] = None + intelRuntimeValues: Optional[List[OliveDeviceTypes]] = None + # For template using CUDA and no runtime overwrite, we need to set this so we know the target EP + evalRuntime: Optional[RuntimeEnum] = None # Changed to str to avoid forward reference + debugInfo: Optional[DebugInfo] = None + # A SHORTCUT FOR SEVERAL PARAMETERS + # This kind of config will + # - setup runtimeOverwrite for CUDA EP and others + # + the previous EP is used for EPContextBinaryGeneator by PythonEnvironment + # - do not support cpu evaluation + # - setup executeRuntimeFeatures, pyEnvRuntimeFeatures + isQNNLLM: Optional[bool] = None + # SET AUTOMATICALLY + isGPURequired: Optional[bool] = None + runtimeOverwrite: Optional[RuntimeOverwrite] = None + executeRuntimeFeatures: Optional[List[RuntimeFeatureEnum]] = None + evaluationRuntimeFeatures: Optional[List[RuntimeFeatureEnum]] = None + pyEnvRuntimeFeatures: Optional[List[RuntimeFeatureEnum]] = None + # it means default template does not use it + # for Cpu, None means add + addCpu: Optional[bool] = None + addAmdNpu: Optional[ADMNPUConfig] = None + + runtime: Parameter + runtimeInConversion: Optional[Parameter] = None + sections: List[Section] + + @staticmethod + def Read(parameterFile: str): + printProcess(parameterFile) + with open_ex(parameterFile, "r") as file: + parameterContent = file.read() + modelParameter = ModelParameter.model_validate_json(parameterContent, strict=True) + modelParameter._file = parameterFile + modelParameter._fileContent = parameterContent + return modelParameter + + def getIntelDevices(self) -> Iterator[OliveDeviceTypes]: + for tmpDevice in OliveDeviceTypes: + if tmpDevice == OliveDeviceTypes.Any: + continue + if self.intelRuntimeValues and tmpDevice not in self.intelRuntimeValues: + continue + yield tmpDevice + + def Check(self, templates: Dict[str, Parameter], oliveJson: Any, modelList: ModelList): + GlobalVars.configCheck.append(self._file) + + if not self.sections: + printError(f"{self._file} should have sections") + return + + if not self.checkDebugInfo(oliveJson): + return + + if self.sections[0].phase == PhaseTypeEnum.Conversion: + self.sections = self.sections[1:] + self.sections.insert( + 0, + Section( + autoGenerated=True, + name="Convert", + phase=PhaseTypeEnum.Conversion, + parameters=[], + ), + ) + + if self.isQNNLLM: + self.addCpu = False + + # Add runtime + syskey, system = get_target_system(oliveJson) + currentEp = system[OlivePropertyNames.Accelerators][0][OlivePropertyNames.ExecutionProviders][0] + currentOliveDeviceType = system[OlivePropertyNames.Accelerators][0].get( + OlivePropertyNames.Device, OliveDeviceTypes.Any.value + ) + currentRuntimeRPC = GlobalVars.GetRuntimeRPC(currentEp, currentOliveDeviceType) + # use any for default + if currentEp == EPNames.OpenVINOExecutionProvider.value: + currentRuntimeRPC = RuntimeEnum.IntelAny + + runtimeValues: List[str] = [currentEp] + runtimeDisplayNames = [GlobalVars.RuntimeToDisplayName[currentRuntimeRPC]] + + runtimeActions = None + + # CPU always last + if self.addCpu != False and currentRuntimeRPC != RuntimeEnum.CPU: + runtimeValues.append(GlobalVars.RuntimeToEPName[RuntimeEnum.CPU].value) + runtimeDisplayNames.append(GlobalVars.RuntimeToDisplayName[RuntimeEnum.CPU]) + if runtimeActions is not None: + runtimeActions.append([]) + + self.runtime = Parameter( + autoGenerated=True, + name="Evaluate on", + type=ParameterTypeEnum.Enum, + values=runtimeValues, + displayNames=runtimeDisplayNames, + path=f"{OlivePropertyNames.Systems}.{syskey}.{OlivePropertyNames.Accelerators}.0.{OlivePropertyNames.ExecutionProviders}.0", + readOnly=False, + ) + if currentEp == EPNames.OpenVINOExecutionProvider.value: + self.runtime.path = ( + f"{OlivePropertyNames.Systems}.{syskey}.{OlivePropertyNames.Accelerators}.0.{OlivePropertyNames.Device}" + ) + self.runtime.values = [] + self.runtime.displayNames = [] + for tmpDevice in self.getIntelDevices(): + tmpRuntimeRPC = GlobalVars.GetRuntimeRPC(EPNames.OpenVINOExecutionProvider, tmpDevice) + self.runtime.values.append(GlobalVars.RuntimeToOliveDeviceType[tmpRuntimeRPC].value) + self.runtime.displayNames.append(GlobalVars.RuntimeToDisplayName[tmpRuntimeRPC]) + + self.runtime.actions = runtimeActions + self.TryToRemoveReuseCacheInRuntimeAction(oliveJson) + if not self.runtime.Check(False, oliveJson, modelList): + printError(f"{self._file} runtime has error") + + # Add runtime overwrite + if self.isQNNLLM: + if not system[OlivePropertyNames.Type] == "PythonEnvironment": + printError(f"{self._file}'s olive json does not use PythonEnvironment") + self.runtimeOverwrite = RuntimeOverwrite( + autoGenerated=True, + pyEnvPath=f"{OlivePropertyNames.Systems}.{syskey}.{OlivePropertyNames.PythonEnvironmentPath}", + executeEp=EPNames.CUDAExecutionProvider, + evaluateUsedInExecute=True, + ) + if self.runtimeOverwrite and not self.runtimeOverwrite.Check(oliveJson): + printError(f"{self._file} runtime overwrite has error") + self.executeRuntimeFeatures = [RuntimeFeatureEnum.AutoGptq] + self.pyEnvRuntimeFeatures = [RuntimeFeatureEnum.Nightly] + + for tmpDevice, section in enumerate(self.sections): + # Add conversion toggle + if section.phase == PhaseTypeEnum.Conversion: + if not section.disableToggleGeneration: + conversion = None + if self.debugInfo: + conversion = self.debugInfo.getUseX() + if not conversion: + conversion = [ + k + for k, v in oliveJson[OlivePropertyNames.Passes].items() + if v[OlivePropertyNames.Type].lower() == OlivePassNames.OnnxConversion + ][0] + conversionPath = f"{OlivePropertyNames.Passes}.{conversion}" + section.toggle = Parameter( + autoGenerated=True, + name="Convert to ONNX format", + type=ParameterTypeEnum.Bool, + path=conversionPath, + actions=[[], []], + readOnly=True, + ) + + # Add quantization toggle + elif section.phase == PhaseTypeEnum.Quantization: + if not section.disableToggleGeneration: + toggleReadOnly = None + actions = [] + quantize = None + if self.debugInfo: + quantize = self.debugInfo.getUseX() + if quantize: + toggleReadOnly = True + else: + quantize = [ + k + for k, v in oliveJson[OlivePropertyNames.Passes].items() + if v[OlivePropertyNames.Type].lower() + in [ + OlivePassNames.OnnxQuantization, + OlivePassNames.OnnxStaticQuantization, + OlivePassNames.OnnxDynamicQuantization, + ] + ][0] + conversion = [ + (k, v) + for k, v in oliveJson[OlivePropertyNames.Passes].items() + if v[OlivePropertyNames.Type].lower() == OlivePassNames.OnnxConversion + ][0] + actions = [ + ParameterAction( + path=f"{OlivePropertyNames.Passes}", + type=ParameterActionTypeEnum.Update, + value={conversion[0]: conversion[1]}, + ) + ] + quantizePath = f"{OlivePropertyNames.Passes}.{quantize}" + section.toggle = Parameter( + autoGenerated=True, + name="Quantize model", + type=ParameterTypeEnum.Bool, + path=quantizePath, + readOnly=toggleReadOnly, + actions=[[], actions], + ) + + # Add evaluation toggle + elif section.phase == PhaseTypeEnum.Evaluation: + if not section.disableToggleGeneration: + action = ParameterAction( + path=OlivePropertyNames.Evaluator, + type=ParameterActionTypeEnum.Delete, + ) + section.toggle = Parameter( + autoGenerated=True, + name="Evaluate model performance", + type=ParameterTypeEnum.Bool, + path=OlivePropertyNames.Evaluator, + actions=[[], [action]], + ) + evaluatorName = oliveJson[OlivePropertyNames.Evaluator] + if not checkPath(f"{OlivePropertyNames.Evaluators}.{evaluatorName}", oliveJson): + printError(f"{self._file} does not have evaluator {evaluatorName}") + + if not section.Check(templates, self._file or "", tmpDevice, oliveJson, modelList): + printError(f"{self._file} section {tmpDevice} has error") + + if ( + currentEp == EPNames.CUDAExecutionProvider.value + or self.runtimeOverwrite + and self.runtimeOverwrite.executeEp == EPNames.CUDAExecutionProvider + ): + self.isGPURequired = True + + self.checkPhase(oliveJson) + self.CheckRuntimeInConversion(oliveJson, modelList) + self.checkOliveFile(oliveJson) + if self.debugInfo and self.debugInfo.isEmpty(): + self.debugInfo = None + self.writeIfChanged() + + def TryToRemoveReuseCacheInRuntimeAction(self, oliveJson: Any): + if not self.runtime.values: + printError(f"{self._file} runtime values is empty, cannot remove reuse_cache") + return + # Find all passes that have reuse_cache field + reuse_cache_paths = [] + if OlivePropertyNames.Passes in oliveJson: + for pass_key, pass_value in oliveJson[OlivePropertyNames.Passes].items(): + if "reuse_cache" in pass_value: + reuse_cache_path = f"{OlivePropertyNames.Passes}.{pass_key}.reuse_cache" + reuse_cache_paths.append(reuse_cache_path) + + if reuse_cache_paths: + if self.runtime.actions is None: + self.runtime.actions = [] + for i in range(len(self.runtime.values)): + if i >= len(self.runtime.actions): + self.runtime.actions.append([]) + for tmpPath in reuse_cache_paths: + self.runtime.actions[i].append( + ParameterAction( + path=tmpPath, + type=ParameterActionTypeEnum.Delete, + ) + ) + return None + + def CheckRuntimeInConversion(self, oliveJson: Any, modelList: ModelList): + def getOpenVINOPass(passType: str): + return next( + ( + (k, v) + for k, v in oliveJson[OlivePropertyNames.Passes].items() + if v[OlivePropertyNames.Type].lower() == passType + ), + None, + ) + + openVINOOptimumConversion = getOpenVINOPass(OlivePassNames.OpenVINOOptimumConversion) + openVINOQuantization = getOpenVINOPass(OlivePassNames.OpenVINOQuantization) + openVINOEncapsulation = getOpenVINOPass(OlivePassNames.OpenVINOEncapsulation) + + def addRuntimeInConversion(runtime: Parameter, path: str, values: List[Any]): + if not runtime.path: + runtime.path = path + runtime.values = values + runtime.displayNames = [ + GlobalVars.RuntimeToDisplayName[GlobalVars.GetRuntimeRPC(EPNames.OpenVINOExecutionProvider, e)] + for e in values + ] + else: + if runtime.actions is None: + runtime.actions = [] + for i in range(len(values)): + if i >= len(runtime.actions): + runtime.actions.append([]) + runtime.actions[i].append( + ParameterAction( + path=path, + type=ParameterActionTypeEnum.Update, + value=values[i], + ) + ) + + if openVINOOptimumConversion or openVINOQuantization or openVINOEncapsulation: + self.runtimeInConversion = Parameter( + autoGenerated=True, name="Convert/Quantize to", type=ParameterTypeEnum.Enum + ) + if openVINOOptimumConversion: + addRuntimeInConversion( + self.runtimeInConversion, + f"{OlivePropertyNames.Passes}.{openVINOOptimumConversion[0]}.{OlivePropertyNames.ExtraArgs}.{OlivePropertyNames.Device}", + # TODO support any after olive release + [e.value for e in self.getIntelDevices()], + ) + if openVINOQuantization: + addRuntimeInConversion( + self.runtimeInConversion, + f"{OlivePropertyNames.Passes}.{openVINOQuantization[0]}.{OlivePropertyNames.TargetDevice}", + # TODO support any after olive release + [e.value for e in self.getIntelDevices()], + ) + if openVINOEncapsulation: + addRuntimeInConversion( + self.runtimeInConversion, + f"{OlivePropertyNames.Passes}.{openVINOEncapsulation[0]}.{OlivePropertyNames.TargetDevice}", + # TODO support any after olive release + [e.value for e in self.getIntelDevices()], + ) + if not self.runtimeInConversion.Check(False, oliveJson, modelList): + printError(f"{self._file} runtime in conversion has error") + + def checkPhase(self, oliveJson: Any): + allPhases = [section.phase for section in self.sections] + if len(allPhases) == 1 and allPhases[0] == PhaseTypeEnum.Conversion: + pass + elif ( + len(allPhases) == 2 + and allPhases[0] == PhaseTypeEnum.Conversion + and allPhases[1] in [PhaseTypeEnum.Quantization, PhaseTypeEnum.Evaluation] + ): + pass + elif ( + len(allPhases) == 3 + and allPhases[0] == PhaseTypeEnum.Conversion + and allPhases[1] == PhaseTypeEnum.Quantization + and allPhases[2] == PhaseTypeEnum.Evaluation + ): + pass + else: + printError(f"{self._file} has wrong phases {allPhases}") + + if ( + PhaseTypeEnum.Evaluation in allPhases + and PhaseTypeEnum.Quantization in allPhases + and len(oliveJson[OlivePropertyNames.DataConfigs]) != 2 + ): + printWarning(f"{self._file}'s olive json should have two data configs for evaluation") + + def checkOliveFile(self, oliveJson: Any): + if not GlobalVars.olivePath: + return + if not self.oliveFile: + printWarning(f"{self._file} does not have oliveFile") + return + + with open_ex(os.path.join(GlobalVars.olivePath, "examples", self.oliveFile), "r") as file: + oliveFileJson = json.load(file) + + diff = DeepDiff( + oliveFileJson[OlivePropertyNames.Passes], + oliveJson[OlivePropertyNames.Passes], + ) + + addeds: list[str] = diff.pop("dictionary_item_added", []) + newAddeds = [] + for added in addeds: + if added.endswith("['save_as_external_data']"): + # We add it to align model format + pass + else: + newAddeds.append(added) + if newAddeds: + diff["dictionary_item_added"] = newAddeds + + removeds: list[str] = diff.pop("dictionary_item_removed", []) + newRemoveds = [] + for removed in removeds: + if removed.endswith("['reuse_cache']"): + # In debug mode for olive, this will throw exception 'file is occupied' for ov recipes + pass + else: + newRemoveds.append(removed) + if newRemoveds: + diff["dictionary_item_removed"] = newRemoveds + + changeds: dict[str, Any] = diff.pop("values_changed", {}) + newChangeds = {} + for changed in changeds: + if changed.endswith("['data_config']") or changed.endswith("['user_script']"): + # Data config name or *.py could be different + pass + else: + newChangeds[changed] = changeds[changed] + if newChangeds: + diff["values_changed"] = newChangeds + + if diff: + # Check out branch hualxie/example_align for alignments + printError(f"different from {self.oliveFile}\r\n{diff}") + GlobalVars.oliveCheck += 1 + + def checkDebugInfo(self, oliveJson: Any): + self.debugInfo = DebugInfo() + if not self.debugInfo.setupUseX(oliveJson): + return False + return True diff --git a/.aitk/scripts/sanitize/parameters.py b/.aitk/scripts/sanitize/parameters.py new file mode 100644 index 00000000..1b6be493 --- /dev/null +++ b/.aitk/scripts/sanitize/parameters.py @@ -0,0 +1,266 @@ +""" +Parameter classes for model configuration +""" + +from __future__ import annotations + +from typing import Any, Dict, List, Optional, Union + +import pydash +from pydantic import BaseModel, TypeAdapter + +from .base import BaseModelClass +from .constants import ( + ParameterActionTypeEnum, + ParameterCheckTypeEnum, + ParameterDisplayTypeEnum, + ParameterTagEnum, + ParameterTypeEnum, +) +from .model_info import ModelList +from .utils import checkPath, open_ex, printError, printProcess, printWarning + + +class ParameterCheck(BaseModel): + type: Optional[ParameterCheckTypeEnum] = None + path: Optional[str] = None + + def check(self, oliveJson: Any): + if not self.type: + return False + if not self.path: + return False + if not checkPath(self.path, oliveJson): + return False + return True + + +class ParameterAction(BaseModel): + type: Optional[ParameterActionTypeEnum] = None + path: Optional[str] = None + value: Optional[Union[str, int, bool, float, Any]] = None + + def check(self, oliveJson: Any): + if not self.type: + return False + if not self.path: + return False + if self.type in [ParameterActionTypeEnum.Insert, ParameterActionTypeEnum.Update] and not self.value: + return False + pathExist = checkPath(self.path, oliveJson, False) + if self.type in [ParameterActionTypeEnum.Delete, ParameterActionTypeEnum.Update] and not pathExist: + return False + if self.type in [ParameterActionTypeEnum.Insert] and pathExist: + return False + return True + + +class Parameter(BaseModel): + """ + REMEMEBER to update clearValue and applyTemplate if new fields are added + for enum type and bool type, either path + values or checks + actions + + path: path to the parameter in olive json + values: possible values for the parameter. + path and values are used to determine the status of the parameter + checks: advanced method to get default value for enum or bool + actions: actions to be performed on the parameter in template(original) olive json. + if actions is empty, the parameter is upserted by path = selected value + + """ + + # This tag is only used for the case that when we edit the json, we know the property is auto generated by sanitize.py so no need to care about it + autoGenerated: Optional[bool] = None + name: Optional[str] = None + tags: Optional[List[ParameterTagEnum]] = None + description: Optional[str] = None + descriptionLink: Optional[str] = None + type: Optional[ParameterTypeEnum] = None + displayNames: Optional[List[str]] = None + displayType: Optional[ParameterDisplayTypeEnum] = None + path: Optional[str] = None + values: Optional[List[Any]] = None + # TODO update to expression + selectors: Optional[List[ParameterCheck]] = None + actions: Optional[List[List[ParameterAction]]] = None + readOnly: Optional[bool] = None + customize: Optional[bool] = None + # When the path does not exist, we will use this value as the default value + # defaultValue is already used in Skylight, so do not use it + fallbackValue: Optional[Any] = None + # Template can be: + # 1. A Parameter object (with its own template field as str) + # 2. A string (template name) + # When template is a Parameter object, template.template should be str + # always put template in the end + template: Optional[Union[Parameter, str]] = None + + def Check( + self, + isTemplate: bool, + oliveJson: Any = None, + modelList: Optional[ModelList] = None, + ): + if isTemplate: + if self.template: + return False + return True + + if not self.name: + return False + if not self.description: + if self.descriptionLink: + printError("Description link should not be used without description") + return False + if not self.type: + return False + if self.type != ParameterTypeEnum.Bool and self.type != ParameterTypeEnum.Enum: + if not self.path: + return False + elif not checkPath(self.path, oliveJson): + return False + elif self.values or self.selectors or self.actions or self.displayNames or self.customize: + printError("Redundant fields") + return False + else: + expectedLength = 2 + lenValues = len(self.values) if self.values else 0 + lenChecks = len(self.selectors) if self.selectors else 0 + lenActions = len(self.actions) if self.actions else 0 + if self.type == ParameterTypeEnum.Enum: + expectedLength = max(lenValues, lenChecks) + if expectedLength == 0: + printError("Enum should have values or checks") + return False + + # Display names + if self.type == ParameterTypeEnum.Enum and self.selectors and not self.displayNames: + printError("Display names should be used with checks") + return False + + if self.displayNames and len(self.displayNames) != expectedLength: + printError(f"Display names has wrong length {expectedLength}") + return False + + # Display type + if self.type == ParameterTypeEnum.Enum: + if not ( + not self.displayType + or self.displayType == ParameterDisplayTypeEnum.Dropdown + or self.displayType == ParameterDisplayTypeEnum.RadioGroup + ): + printError("Display type should be Dropdown or RadioGroup") + return False + + # customize + if self.customize == True: + if not (self.type == ParameterTypeEnum.Enum and self.values and not self.selectors): + printError("Wrong customize prerequisites!") + return False + + # path: bool + # path + actions: bool + # path + values: enum + # path + values + actions: bool, enum + # checks + actions: bool, enum + if ( + self.type == ParameterTypeEnum.Bool + and self.path + and not self.values + and not self.selectors + and not self.actions + ): + pass + elif ( + self.type == ParameterTypeEnum.Bool + and self.path + and not self.values + and not self.selectors + and lenActions == expectedLength + ): + pass + elif ( + self.type == ParameterTypeEnum.Enum + and self.path + and lenValues == expectedLength + and not self.selectors + and not self.actions + ): + pass + elif self.path and lenValues == expectedLength and not self.selectors and lenActions == expectedLength: + pass + elif not self.path and not self.values and lenChecks == expectedLength and lenActions == expectedLength: + pass + else: + printError(f"Invalid combination. Check comment") + return False + if self.path: + if not checkPath(self.path, oliveJson): + return False + # TODO more checks + if self.values: + value = pydash.get(oliveJson, self.path) + if self.tags and ( + ParameterTagEnum.EvaluationDataset in self.tags + or ParameterTagEnum.QuantizationDataset in self.tags + ): + if value != self.values[0]: + printError(f"Value {value} not the first in values for {self.path}") + return False + for i in range(len(self.values) - 1): + value_in_list = self.values[i + 1] + if modelList and value_in_list not in modelList.DatasetSplit: + printError(f"Value {value_in_list} not in DatasetSplit for {self.path}") + return False + if modelList and value_in_list not in modelList.DatasetSubset: + # No error for this, just warning + printWarning( + f"Value {value_in_list} not in DatasetSubset for {self.path}. Could be acceptable if it doesn't have subset" + ) + elif value and value not in self.values: + printError(f"Value {value} not in values for {self.path}") + return False + + if self.selectors: + for i, check in enumerate(self.selectors): + if not check.check(oliveJson): + printError(f"Check {i} has error") + return False + + if self.actions: + for i, actions in enumerate(self.actions): + for j, action in enumerate(actions): + if not action.check(oliveJson): + printError(f"Action {i} {j} has error") + return False + return True + + def clearValue(self): + """ + Clear everything except template + """ + for attr in vars(self): + if attr != "template": + setattr(self, attr, None) + + def applyTemplate(self, template: Parameter): + """ + Apply everything except template + """ + for attr, value in vars(template).items(): + if not getattr(self, attr) and attr != "template": + setattr(self, attr, value) + + +def readCheckParameterTemplate(filePath: str): + printProcess(filePath) + with open_ex(filePath, "r") as file: + fileContent = file.read() + adapter = TypeAdapter(Dict[str, Parameter]) + parameters: Dict[str, Parameter] = adapter.validate_json(fileContent, strict=True) + for key, parameter in parameters.items(): + if not parameter.Check(True): + printError(f"{filePath} parameter {key} has error") + newContent = adapter.dump_json(parameters, indent=4, exclude_none=True).decode("utf-8") + BaseModelClass.writeJsonIfChanged(newContent, filePath, fileContent) + return parameters diff --git a/.aitk/scripts/sanitize/project_config.py b/.aitk/scripts/sanitize/project_config.py new file mode 100644 index 00000000..f39c9704 --- /dev/null +++ b/.aitk/scripts/sanitize/project_config.py @@ -0,0 +1,77 @@ +""" +Model project configuration classes +""" + +from typing import List, Optional + +from pydantic import BaseModel + +from .base import BaseModelClass +from .constants import IconEnum +from .model_info import ModelInfo +from .utils import GlobalVars, open_ex, printError, printProcess + + +class WorkflowItem(BaseModel): + displayName: Optional[str] = None + file: str + templateName: str + # DO NOT ADD ANYTHING ELSE HERE + # We should add it to the *.json.config + + def Check(self): + if not self.file: + return False + if "\\" in self.file: + printError("Please use / instead of \\") + return False + if not self.templateName: + return False + return True + + +class ModelInfoProject(BaseModel): + id: str + version: int = -1 + displayName: Optional[str] = None + icon: Optional[IconEnum] = None + modelLink: Optional[str] = None + + def Check(self, modelInfo: ModelInfo): + if not self.id: + return False + if self.displayName and self.displayName != modelInfo.displayName: + return False + if self.icon and self.icon != modelInfo.icon: + return False + if self.modelLink and self.modelLink != modelInfo.modelLink: + return False + return True + + +class ModelProjectConfig(BaseModelClass): + workflows: List[WorkflowItem] + modelInfo: ModelInfoProject + + @staticmethod + def Read(modelSpaceConfigFile: str): + printProcess(modelSpaceConfigFile) + with open_ex(modelSpaceConfigFile, "r") as file: + modelSpaceConfigContent = file.read() + modelSpaceConfig = ModelProjectConfig.model_validate_json(modelSpaceConfigContent, strict=True) + modelSpaceConfig._file = modelSpaceConfigFile + modelSpaceConfig._fileContent = modelSpaceConfigContent + return modelSpaceConfig + + # after template is set + def Check(self, modelInfo: ModelInfo): + GlobalVars.modelProjectCheck.append(self._file) + + for i, model in enumerate(self.workflows): + if not model.Check(): + printError(f"{self._file} model {i} has error") + + if not self.modelInfo.Check(modelInfo): + printError(f"{self._file} modelInfo has error") + + self.writeIfChanged() diff --git a/.aitk/scripts/sanitize/utils.py b/.aitk/scripts/sanitize/utils.py new file mode 100644 index 00000000..b3406219 --- /dev/null +++ b/.aitk/scripts/sanitize/utils.py @@ -0,0 +1,188 @@ +""" +Utility functions for the sanitize module +""" + +import inspect +import json +import os +from contextlib import contextmanager +from typing import Any + +import pydash +from model_lab import RuntimeEnum + +from .constants import EPNames, OliveDeviceTypes, OlivePropertyNames + + +class GlobalVars: + errorList = [] + verbose = False + # Initialize checks + pathCheck = 0 + configCheck = [] + oliveJsonCheck = [] + ipynbCheck = [] + gitignoreCheck = [] + modelProjectCheck = [] + extensionCheck = 0 + # Should align with number of LLM models + inferenceModelCheck = [] + + olivePath = None + oliveCheck = 0 + RuntimeToEPName = { + RuntimeEnum.CPU: EPNames.CPUExecutionProvider, + RuntimeEnum.QNN: EPNames.QNNExecutionProvider, + RuntimeEnum.IntelAny: EPNames.OpenVINOExecutionProvider, + RuntimeEnum.IntelCPU: EPNames.OpenVINOExecutionProvider, + RuntimeEnum.IntelNPU: EPNames.OpenVINOExecutionProvider, + RuntimeEnum.IntelGPU: EPNames.OpenVINOExecutionProvider, + RuntimeEnum.AMDNPU: EPNames.VitisAIExecutionProvider, + RuntimeEnum.NvidiaGPU: EPNames.CUDAExecutionProvider, + RuntimeEnum.NvidiaTRTRTX: EPNames.NvTensorRTRTXExecutionProvider, + RuntimeEnum.DML: EPNames.DmlExecutionProvider, + } + RuntimeToOliveDeviceType = { + RuntimeEnum.CPU: OliveDeviceTypes.CPU, + RuntimeEnum.QNN: OliveDeviceTypes.NPU, + RuntimeEnum.IntelAny: OliveDeviceTypes.Any, + RuntimeEnum.IntelCPU: OliveDeviceTypes.CPU, + RuntimeEnum.IntelNPU: OliveDeviceTypes.NPU, + RuntimeEnum.IntelGPU: OliveDeviceTypes.GPU, + RuntimeEnum.AMDNPU: OliveDeviceTypes.NPU, + RuntimeEnum.NvidiaGPU: OliveDeviceTypes.GPU, + RuntimeEnum.DML: OliveDeviceTypes.GPU, + } + RuntimeToDisplayName = { + RuntimeEnum.CPU: "CPU", + RuntimeEnum.QNN: "Qualcomm NPU", + RuntimeEnum.IntelAny: "Intel Any", + RuntimeEnum.IntelCPU: "Intel CPU", + RuntimeEnum.IntelNPU: "Intel NPU", + RuntimeEnum.IntelGPU: "Intel GPU", + RuntimeEnum.AMDNPU: "AMD NPU", + RuntimeEnum.NvidiaGPU: "NVIDIA GPU", + RuntimeEnum.NvidiaTRTRTX: "NVIDIA TensorRT for RTX", + RuntimeEnum.DML: "DirectML", + } + + @classmethod + def Check(cls, configDir: str): + if len(cls.configCheck) != len(cls.oliveJsonCheck): + printError(f"Config check {len(cls.configCheck)} does not match olive json check {len(cls.oliveJsonCheck)}") + if len(cls.gitignoreCheck) != len(cls.modelProjectCheck) - cls.extensionCheck: + printError( + f"Gitignore check {len(cls.gitignoreCheck)} does not match model project check {len(cls.modelProjectCheck)} - {cls.extensionCheck}" + ) + # We add this test to make sure the sanity check is working: i.e. paths are checked and files are checked + with open_ex(os.path.join(configDir, "checks.json"), "w") as file: + # get class properties and dump all ends with Check + properties = [attr for attr in dir(cls) if attr.endswith("Check") and attr != "Check"] + # save len if list else save the value + properties = { + prop: len(getattr(cls, prop)) if isinstance(getattr(cls, prop), list) else getattr(cls, prop) + for prop in properties + } + json.dump(properties, file, indent=4) + file.write("\n") + + @classmethod + def GetRuntimeRPC(cls, epName: EPNames, oliveDeviceType: OliveDeviceTypes) -> RuntimeEnum: + # Accept epName as either Enum or string, convert to Enum if needed + if not isinstance(epName, EPNames): + epName = EPNames(epName) + # Accept oliveDeviceType as either Enum or string, convert to Enum if needed + if not isinstance(oliveDeviceType, OliveDeviceTypes): + oliveDeviceType = OliveDeviceTypes(oliveDeviceType) + + matching_runtimes = [runtime for runtime, ep in cls.RuntimeToEPName.items() if ep == epName] + if not matching_runtimes: + raise ValueError(f"No runtime found for EPName: {epName}") + if len(matching_runtimes) == 1: + return matching_runtimes[0] + # If multiple runtimes match, filter by oliveDeviceType + for runtime in matching_runtimes: + if cls.RuntimeToOliveDeviceType[runtime] == oliveDeviceType: + return runtime + raise ValueError(f"No matching runtime found for EPName: {epName} and OliveDeviceType: {oliveDeviceType}") + + +def printProcess(msg: str): + if GlobalVars.verbose: + print(f"Process {msg}") + + +def printInfo(msg: str): + if GlobalVars.verbose: + print(msg) + + +def printTip(msg: str): + """Print important information with special color formatting (cyan)""" + frame = inspect.currentframe() + if frame and frame.f_back: + frame = frame.f_back + filename = os.path.relpath(frame.f_code.co_filename) + lineno = frame.f_lineno + else: + filename = "unknown" + lineno = 0 + # Cyan text with file and line number, clickable in terminal + print(f"\033[36mTip: {filename}:{lineno}: {msg}\033[0m") + + +def printError(msg: str): + frame = inspect.currentframe() + if frame and frame.f_back: + frame = frame.f_back + filename = os.path.relpath(frame.f_code.co_filename) + lineno = frame.f_lineno + else: + filename = "unknown" + lineno = 0 + # print all errors in the end + GlobalVars.errorList.append((filename, lineno, msg)) + + +def printWarning(msg: str): + frame = inspect.currentframe() + if frame and frame.f_back: + frame = frame.f_back + filename = os.path.relpath(frame.f_code.co_filename) + lineno = frame.f_lineno + else: + filename = "unknown" + lineno = 0 + # Yellow text, with file and line number, clickable in terminal + print(f"\033[33mWARNING: {filename}:{lineno}: {msg}\033[0m") + + +@contextmanager +def open_ex(file_path, mode): + # Note: The `newline` parameter has no effect when reading a file. + file = open(file_path, mode, encoding="utf-8", newline="\n") + try: + yield file + finally: + file.close() + + +def get_target_system(oliveJson: Any): + syskey = oliveJson[OlivePropertyNames.Target] + sysValue = oliveJson[OlivePropertyNames.Systems][syskey] + return syskey, sysValue + + +def checkPath(path: str, oliveJson: Any, printOnNotExist: bool = True): + printInfo(path) + GlobalVars.pathCheck += 1 + if pydash.get(oliveJson, path) is None: + syskey, system = get_target_system(oliveJson) + currentEp = system[OlivePropertyNames.Accelerators][0][OlivePropertyNames.ExecutionProviders][0] + # TODO some ov recipes do not have device but we set it in config + if path == f"systems.{syskey}.accelerators.0.device" and currentEp == EPNames.OpenVINOExecutionProvider.value: + return True + if printOnNotExist: + printError(f"Not in olive json: {path}") + return False + return True From 8cd735870c38ebc6f86d7b8da6963a6984ac935f Mon Sep 17 00:00:00 2001 From: hualxie Date: Mon, 28 Jul 2025 16:07:15 +0800 Subject: [PATCH 06/15] add project_processor --- .aitk/scripts/project_processor.py | 113 +++++++++++++++++++++++++++ .aitk/scripts/project_scanner.py | 0 .aitk/scripts/sanitize/main.py | 16 +--- .aitk/scripts/sanitize/model_info.py | 4 +- microsoft-resnet-50/aitk/info.yml | 1 + 5 files changed, 121 insertions(+), 13 deletions(-) create mode 100644 .aitk/scripts/project_processor.py delete mode 100644 .aitk/scripts/project_scanner.py diff --git a/.aitk/scripts/project_processor.py b/.aitk/scripts/project_processor.py new file mode 100644 index 00000000..ec01dbc8 --- /dev/null +++ b/.aitk/scripts/project_processor.py @@ -0,0 +1,113 @@ +from pathlib import Path + +import yaml +from .sanitize.model_info import ModelList, ModelInfo +from .sanitize.constants import IconEnum, ArchitectureEnum, EPNames, OliveDeviceTypes +from.sanitize.utils import GlobalVars +from .sanitize.project_config import ModelInfoProject, WorkflowItem, ModelProjectConfig +from .model_lab import RuntimeEnum + +org_to_icon = { + "Intel": IconEnum.Intel, + "google-bert": IconEnum.Gemini, + "openai": IconEnum.OpenAI, +} + + +def get_runtime(recipe: dict) -> RuntimeEnum: + ep = EPNames(recipe.get("ep")) + device = OliveDeviceTypes(recipe.get("device")) + return GlobalVars.GetRuntimeRPC(ep, device) + + +def convert_yaml_to_model_info(root_dir: Path, yml_file: Path, yaml_object: dict) -> ModelInfo: + """ + Convert a YAML object to a ModelInfo instance. + """ + id = yaml_object.get("id") + version = yaml_object.get("version", 1) + if not id: + raise ValueError(f"Model ID is required in {yml_file}") + if not isinstance(version, int) or version <= 0: + raise ValueError(f"Model version must be a positive integer in {yml_file}") + id_segs = id.split("/") + + + display_name = yaml_object.get("displayName", "/".join(id_segs[1:])) + icon = yaml_object.get("icon", org_to_icon.get(id_segs[1])) + if icon is str: + icon = IconEnum(icon) + model_link = yaml_object.get("modelLink", "/".join(["https://huggingface.co"] + id_segs[1:])) + architecture = yaml_object.get("architecture", ArchitectureEnum.Transformer) + if architecture is str: + architecture = ArchitectureEnum(architecture) + recipes = yaml_object.get("recipes", []) + runtimes = set() + for recipe in recipes: + runtimes.add(get_runtime(recipe)) + runtimes = [r for r in RuntimeEnum if r in runtimes] + relative_path = str(yml_file.relative_to(root_dir)) + model_info = ModelInfo( + displayName=display_name, + icon=icon, + modelLink=model_link, + id=id, + runtimes=runtimes, + architecture=architecture, + version=version, + relativePath=relative_path, + ) + return model_info + + +def convert_yaml_to_project_config(yml_file: Path, yaml_object: dict) -> ModelProjectConfig: + recipes = yaml_object.get("recipes", []) + items = [] + for recipe in recipes: + file = recipe.get("file") + items.append( + WorkflowItem( + file=file, + templateName=file[:-5] if file and file.endswith(".json") else file, + ) + ) + result = ModelProjectConfig( + workflows=items, + modelInfo=ModelInfoProject( + id=yaml_object.get("id", ""), + ), + ) + result._file = str(yml_file.parent / "model_project.config") + result.writeIfChanged() + return result + + +def project_processor(): + target_dir = Path(__file__).parent.parent + + modelList = ModelList.Read(str(target_dir / "configs" / "model_list.json")) + modelList.models.clear() + + for yml_file in target_dir.rglob("*.yml"): + # read yml file as yaml object + with yml_file.open("r", encoding="utf-8") as file: + try: + yaml_content = file.read() + yaml_object = yaml.safe_load(yaml_content) + except yaml.YAMLError as e: + print(f"Error reading {yml_file}: {e}") + continue + keywords = yaml_object.get("keywords", []) + if not keywords or "aitk" not in keywords: + print(f"aitk keyword not found in {yml_file}") + continue + modelList.models.append( + convert_yaml_to_model_info(target_dir, yml_file, yaml_object) + ) + convert_yaml_to_project_config(yml_file, yaml_object) + + modelList.writeIfChanged() + + +if __name__ == "__main__": + project_processor() diff --git a/.aitk/scripts/project_scanner.py b/.aitk/scripts/project_scanner.py deleted file mode 100644 index e69de29b..00000000 diff --git a/.aitk/scripts/sanitize/main.py b/.aitk/scripts/sanitize/main.py index 37099073..63d94fc5 100644 --- a/.aitk/scripts/sanitize/main.py +++ b/.aitk/scripts/sanitize/main.py @@ -10,7 +10,7 @@ import subprocess from pathlib import Path -from .constants import EPNames, ModelStatusEnum +from .constants import EPNames from .copy_config import CopyConfig from .file_validation import check_case, process_gitignore, readCheckIpynb, readCheckOliveConfig from .model_info import ModelInfo, ModelList @@ -20,14 +20,6 @@ from .utils import GlobalVars, open_ex, printError, printWarning -def shouldCheckModel(configDir: str, model: ModelInfo) -> str | None: - modelDir = os.path.join(configDir, model.id) - # If we have folder, we also check it - if model.status == ModelStatusEnum.Ready or os.path.exists(modelDir): - return modelDir - return None - - def main(): argparser = argparse.ArgumentParser(description="Check model lab configs") argparser.add_argument("-v", "--verbose", action="store_true", help="Verbose mode") @@ -54,7 +46,7 @@ def main(): # check each model for model in modelList.allModels(): - modelDir = shouldCheckModel(configDir, model) + modelDir = os.path.join(configDir, model.id) if modelDir: if not check_case(Path(modelDir)): printError( @@ -134,12 +126,12 @@ def main(): if modelParameter.isIntel: tmpDevices = modelParameter.getIntelDevices() # Remove items containing "intel" (case-insensitive) from runtime values - filteredValues = [v for v in model.runtimes if "intel" not in v.lower()] + filteredValues = [v for v in model.runtimes if "intel" not in v.value.lower()] # Add Intel runtime values intelRuntimes = [ GlobalVars.GetRuntimeRPC(EPNames.OpenVINOExecutionProvider, device) for device in tmpDevices ] - filteredValues.extend([runtime.value for runtime in intelRuntimes]) + filteredValues.extend([runtime for runtime in intelRuntimes]) model.runtimes = filteredValues hasLLM = hasLLM or modelParameter.isLLM diff --git a/.aitk/scripts/sanitize/model_info.py b/.aitk/scripts/sanitize/model_info.py index 5d9c3921..0581b363 100644 --- a/.aitk/scripts/sanitize/model_info.py +++ b/.aitk/scripts/sanitize/model_info.py @@ -12,6 +12,7 @@ from .base import BaseModelClass from .constants import ArchitectureEnum, IconEnum from .utils import open_ex, printError, printProcess +from ..model_lab import RuntimeEnum # This file is import by others # To avoid circular import issues, we should carefully manage imports @@ -23,10 +24,11 @@ class ModelInfo(BaseModel): icon: IconEnum modelLink: str id: str - runtimes: List[str] # Changed to List[str] to avoid forward reference issues + runtimes: List[RuntimeEnum] architecture: ArchitectureEnum version: int = -1 extension: Optional[bool] = None + relativePath: Optional[str] = None def Check(self): if not self.displayName: diff --git a/microsoft-resnet-50/aitk/info.yml b/microsoft-resnet-50/aitk/info.yml index feadf0e4..02697370 100644 --- a/microsoft-resnet-50/aitk/info.yml +++ b/microsoft-resnet-50/aitk/info.yml @@ -21,3 +21,4 @@ aitk: modelInfo: id: "huggingface/microsoft/resnet-50" version: 1 + architecture: CNN From aa943f391ceab44b618450d46c8aef353f7f23d8 Mon Sep 17 00:00:00 2001 From: hualxie Date: Mon, 28 Jul 2025 16:12:09 +0800 Subject: [PATCH 07/15] debug --- .aitk/scripts/project_processor.py | 17 +++++++---------- .aitk/scripts/sanitize.py | 4 +++- .aitk/scripts/sanitize/main.py | 4 ++-- .aitk/scripts/sanitize/model_info.py | 6 ++---- 4 files changed, 14 insertions(+), 17 deletions(-) diff --git a/.aitk/scripts/project_processor.py b/.aitk/scripts/project_processor.py index ec01dbc8..f28f93c7 100644 --- a/.aitk/scripts/project_processor.py +++ b/.aitk/scripts/project_processor.py @@ -1,11 +1,11 @@ from pathlib import Path import yaml -from .sanitize.model_info import ModelList, ModelInfo -from .sanitize.constants import IconEnum, ArchitectureEnum, EPNames, OliveDeviceTypes -from.sanitize.utils import GlobalVars -from .sanitize.project_config import ModelInfoProject, WorkflowItem, ModelProjectConfig -from .model_lab import RuntimeEnum +from model_lab import RuntimeEnum +from sanitize.constants import ArchitectureEnum, EPNames, IconEnum, OliveDeviceTypes +from sanitize.model_info import ModelInfo, ModelList +from sanitize.project_config import ModelInfoProject, ModelProjectConfig, WorkflowItem +from sanitize.utils import GlobalVars org_to_icon = { "Intel": IconEnum.Intel, @@ -32,7 +32,6 @@ def convert_yaml_to_model_info(root_dir: Path, yml_file: Path, yaml_object: dict raise ValueError(f"Model version must be a positive integer in {yml_file}") id_segs = id.split("/") - display_name = yaml_object.get("displayName", "/".join(id_segs[1:])) icon = yaml_object.get("icon", org_to_icon.get(id_segs[1])) if icon is str: @@ -101,13 +100,11 @@ def project_processor(): if not keywords or "aitk" not in keywords: print(f"aitk keyword not found in {yml_file}") continue - modelList.models.append( - convert_yaml_to_model_info(target_dir, yml_file, yaml_object) - ) + modelList.models.append(convert_yaml_to_model_info(target_dir, yml_file, yaml_object)) convert_yaml_to_project_config(yml_file, yaml_object) modelList.writeIfChanged() - + if __name__ == "__main__": project_processor() diff --git a/.aitk/scripts/sanitize.py b/.aitk/scripts/sanitize.py index 751d8b95..643fac75 100644 --- a/.aitk/scripts/sanitize.py +++ b/.aitk/scripts/sanitize.py @@ -10,6 +10,7 @@ from pathlib import Path from auto_formatter import auto_format_scripts +from project_processor import project_processor from sanitize.main import main from sanitize.utils import GlobalVars @@ -47,4 +48,5 @@ def run_main(): # Auto-format scripts before running sanitize auto_format_scripts() - run_main() + project_processor() + # run_main() diff --git a/.aitk/scripts/sanitize/main.py b/.aitk/scripts/sanitize/main.py index 63d94fc5..dadbdfd2 100644 --- a/.aitk/scripts/sanitize/main.py +++ b/.aitk/scripts/sanitize/main.py @@ -13,7 +13,7 @@ from .constants import EPNames from .copy_config import CopyConfig from .file_validation import check_case, process_gitignore, readCheckIpynb, readCheckOliveConfig -from .model_info import ModelInfo, ModelList +from .model_info import ModelList from .model_parameter import ModelParameter from .parameters import readCheckParameterTemplate from .project_config import ModelInfoProject, ModelProjectConfig @@ -40,7 +40,7 @@ def main(): configDir = str(Path(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))).resolve(strict=False)) # get model list - modelList = ModelList.Read(configDir) + modelList = ModelList.Read(os.path.join(configDir, "model_list.json")) # check parameter template parameterTemplate = readCheckParameterTemplate(os.path.join(configDir, "parameter_template.json")) diff --git a/.aitk/scripts/sanitize/model_info.py b/.aitk/scripts/sanitize/model_info.py index 0581b363..4fe8ce99 100644 --- a/.aitk/scripts/sanitize/model_info.py +++ b/.aitk/scripts/sanitize/model_info.py @@ -4,15 +4,14 @@ from __future__ import annotations -import os from typing import Dict, List, Optional +from model_lab import RuntimeEnum from pydantic import BaseModel from .base import BaseModelClass from .constants import ArchitectureEnum, IconEnum from .utils import open_ex, printError, printProcess -from ..model_lab import RuntimeEnum # This file is import by others # To avoid circular import issues, we should carefully manage imports @@ -58,8 +57,7 @@ class ModelList(BaseModelClass): DatasetSubset: Dict[str, List[str]] @staticmethod - def Read(scriptFolder: str): - modelListFile = os.path.join(scriptFolder, "model_list.json") + def Read(modelListFile: str): printProcess(modelListFile) with open_ex(modelListFile, "r") as file: modelListContent = file.read() From 44e8c6864bda2c2f1ef37792f5ff011f33066517 Mon Sep 17 00:00:00 2001 From: hualxie Date: Mon, 28 Jul 2025 16:36:14 +0800 Subject: [PATCH 08/15] fix --- .aitk/scripts/project_processor.py | 40 +++++++++++++------ Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml | 5 ++- .../aitk/info.yml | 5 ++- .../aitk/info.yml | 5 ++- google-vit-base-patch16-224/aitk/info.yml | 5 ++- intel-bert-base-uncased-mrpc/aitk/info.yml | 13 ++---- .../aitk/info.yml | 5 ++- .../aitk/info.yml | 5 ++- microsoft-Phi-3.5-mini-instruct/aitk/info.yml | 5 ++- microsoft-resnet-50/aitk/info.yml | 6 ++- openai-clip-vit-base-patch16/aitk/info.yml | 5 ++- openai-clip-vit-base-patch32/aitk/info.yml | 5 ++- 12 files changed, 72 insertions(+), 32 deletions(-) diff --git a/.aitk/scripts/project_processor.py b/.aitk/scripts/project_processor.py index f28f93c7..627275d9 100644 --- a/.aitk/scripts/project_processor.py +++ b/.aitk/scripts/project_processor.py @@ -11,6 +11,13 @@ "Intel": IconEnum.Intel, "google-bert": IconEnum.Gemini, "openai": IconEnum.OpenAI, + "laion": IconEnum.laion, + "microsoft": IconEnum.Microsoft, + "google": IconEnum.Gemini, + "deepseek-ai": IconEnum.DeepSeek, + "Qwen": IconEnum.qwen, + "meta-llama": IconEnum.Meta, + "mistralai": IconEnum.mistralai, } @@ -24,20 +31,22 @@ def convert_yaml_to_model_info(root_dir: Path, yml_file: Path, yaml_object: dict """ Convert a YAML object to a ModelInfo instance. """ - id = yaml_object.get("id") - version = yaml_object.get("version", 1) + aitk = yaml_object.get("aitk", {}) + modelInfo = aitk.get("modelInfo", {}) + id = modelInfo.get("id") + version = modelInfo.get("version", 1) if not id: raise ValueError(f"Model ID is required in {yml_file}") if not isinstance(version, int) or version <= 0: raise ValueError(f"Model version must be a positive integer in {yml_file}") id_segs = id.split("/") - display_name = yaml_object.get("displayName", "/".join(id_segs[1:])) - icon = yaml_object.get("icon", org_to_icon.get(id_segs[1])) + display_name = modelInfo.get("displayName", "/".join(id_segs[1:])) + icon = modelInfo.get("icon", org_to_icon.get(id_segs[1])) if icon is str: icon = IconEnum(icon) - model_link = yaml_object.get("modelLink", "/".join(["https://huggingface.co"] + id_segs[1:])) - architecture = yaml_object.get("architecture", ArchitectureEnum.Transformer) + model_link = modelInfo.get("modelLink", "/".join(["https://huggingface.co"] + id_segs[1:])) + architecture = modelInfo.get("architecture", ArchitectureEnum.Transformer) if architecture is str: architecture = ArchitectureEnum(architecture) recipes = yaml_object.get("recipes", []) @@ -70,10 +79,15 @@ def convert_yaml_to_project_config(yml_file: Path, yaml_object: dict) -> ModelPr templateName=file[:-5] if file and file.endswith(".json") else file, ) ) + aitk = yaml_object.get("aitk", {}) + modelInfo = aitk.get("modelInfo", {}) + id = modelInfo.get("id") + version = modelInfo.get("version", 1) result = ModelProjectConfig( workflows=items, modelInfo=ModelInfoProject( - id=yaml_object.get("id", ""), + id=id, + version=version, ), ) result._file = str(yml_file.parent / "model_project.config") @@ -82,12 +96,12 @@ def convert_yaml_to_project_config(yml_file: Path, yaml_object: dict) -> ModelPr def project_processor(): - target_dir = Path(__file__).parent.parent + target_dir = Path(__file__).parent.parent.parent - modelList = ModelList.Read(str(target_dir / "configs" / "model_list.json")) + modelList = ModelList.Read(str(target_dir / ".aitk" / "configs" / "model_list.json")) modelList.models.clear() - for yml_file in target_dir.rglob("*.yml"): + for yml_file in target_dir.rglob("info.yml"): # read yml file as yaml object with yml_file.open("r", encoding="utf-8") as file: try: @@ -96,9 +110,9 @@ def project_processor(): except yaml.YAMLError as e: print(f"Error reading {yml_file}: {e}") continue - keywords = yaml_object.get("keywords", []) - if not keywords or "aitk" not in keywords: - print(f"aitk keyword not found in {yml_file}") + aitk = yaml_object.get("aitk", []) + if not aitk: + print(f"aitk not found in {yml_file}") continue modelList.models.append(convert_yaml_to_model_info(target_dir, yml_file, yaml_object)) convert_yaml_to_project_config(yml_file, yaml_object) diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml index 8e284e83..d7b55fcf 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml @@ -9,7 +9,10 @@ recipes: device: npu ep: VitisAIExecutionProvider - file: "qwen2_5_ov_config.json" - device: npu + devices: + - npu + - cpu + - gpu ep: OpenVINOExecutionProvider - file: "qwen2_5_dml_config.json" device: gpu diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml index 7c05c28d..9f79ef32 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml @@ -9,7 +9,10 @@ recipes: device: npu ep: VitisAIExecutionProvider - file: "deepseek_ov_config.json" - device: npu + devices: + - npu + - cpu + - gpu ep: OpenVINOExecutionProvider - file: "deepseek_dml_config.json" device: gpu diff --git a/google-bert-bert-base-multilingual-cased/aitk/info.yml b/google-bert-bert-base-multilingual-cased/aitk/info.yml index c5771102..b87cb62d 100644 --- a/google-bert-bert-base-multilingual-cased/aitk/info.yml +++ b/google-bert-bert-base-multilingual-cased/aitk/info.yml @@ -9,7 +9,10 @@ recipes: device: npu ep: VitisAIExecutionProvider - file: "bert-base-multilingual-cased_context_ov_static.json" - device: npu + devices: + - npu + - cpu + - gpu ep: OpenVINOExecutionProvider - file: "bert-base-multilingual-cased_trtrtx.json" device: gpu diff --git a/google-vit-base-patch16-224/aitk/info.yml b/google-vit-base-patch16-224/aitk/info.yml index 26289b59..cdc2474e 100644 --- a/google-vit-base-patch16-224/aitk/info.yml +++ b/google-vit-base-patch16-224/aitk/info.yml @@ -9,7 +9,10 @@ recipes: device: npu ep: VitisAIExecutionProvider - file: "vit_base_patch16_224_context_ov_static.json" - device: npu + devices: + - npu + - cpu + - gpu ep: OpenVINOExecutionProvider - file: "vit-base-patch16-224_trtrtx.json" device: gpu diff --git a/intel-bert-base-uncased-mrpc/aitk/info.yml b/intel-bert-base-uncased-mrpc/aitk/info.yml index 77efd2a2..6655a388 100644 --- a/intel-bert-base-uncased-mrpc/aitk/info.yml +++ b/intel-bert-base-uncased-mrpc/aitk/info.yml @@ -1,14 +1,5 @@ keywords: aitk -ep: - OpenVINOExecutionProvider - VitisAIExecutionProvider - QNNExecutionProvider - NvTensorRTRTXExecutionProvider -device: - cpu - npu - gpu arch: bert recipes: - file: "bert_qdq_qnn.json" @@ -20,6 +11,10 @@ recipes: ep: VitisAIExecutionProvider - name: intel-bert-base-uncased-mrpc (ov) file: "bert_ov.json" + devices: + - npu + - cpu + - gpu ep: OpenVINOExecutionProvider - file: "bert_trtrtx.json" device: gpu diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml index 4ed75a0c..5bed4ab9 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml @@ -14,7 +14,10 @@ recipes: device: npu ep: VitisAIExecutionProvider - file: "laion_clip_ov.json" - device: npu + devices: + - npu + - cpu + - gpu ep: OpenVINOExecutionProvider - file: "laion_clip_trtrtx.json" device: gpu diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml index 59e77800..16ebc30d 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml @@ -9,7 +9,10 @@ recipes: device: npu ep: VitisAIExecutionProvider - file: "llama3_2_ov_config.json" - device: npu + devices: + - npu + - cpu + - gpu ep: OpenVINOExecutionProvider - file: "llama3_2_dml_config.json" device: gpu diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml index d0332445..2801977a 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml +++ b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml @@ -9,7 +9,10 @@ recipes: device: npu ep: VitisAIExecutionProvider - file: "phi3_5_ov_config.json" - device: npu + devices: + - npu + - cpu + - gpu ep: OpenVINOExecutionProvider - file: "phi3_5_dml_config.json" device: gpu diff --git a/microsoft-resnet-50/aitk/info.yml b/microsoft-resnet-50/aitk/info.yml index 02697370..038d7eb6 100644 --- a/microsoft-resnet-50/aitk/info.yml +++ b/microsoft-resnet-50/aitk/info.yml @@ -9,7 +9,10 @@ recipes: device: npu ep: VitisAIExecutionProvider - file: "resnet_context_ov_static.json" - device: npu + devices: + - npu + - cpu + - gpu ep: OpenVINOExecutionProvider - file: "resnet_trtrtx.json" device: gpu @@ -22,3 +25,4 @@ aitk: id: "huggingface/microsoft/resnet-50" version: 1 architecture: CNN + displayName: "Microsoft/ResNet-50" diff --git a/openai-clip-vit-base-patch16/aitk/info.yml b/openai-clip-vit-base-patch16/aitk/info.yml index a7a3ec1d..ff4aaa43 100644 --- a/openai-clip-vit-base-patch16/aitk/info.yml +++ b/openai-clip-vit-base-patch16/aitk/info.yml @@ -14,7 +14,10 @@ recipes: device: npu ep: VitisAIExecutionProvider - file: "openai_clip_ov.json" - device: npu + devices: + - npu + - cpu + - gpu ep: OpenVINOExecutionProvider - file: "openai_clip_trtrtx.json" device: gpu diff --git a/openai-clip-vit-base-patch32/aitk/info.yml b/openai-clip-vit-base-patch32/aitk/info.yml index 515d5076..ba11d2cb 100644 --- a/openai-clip-vit-base-patch32/aitk/info.yml +++ b/openai-clip-vit-base-patch32/aitk/info.yml @@ -14,7 +14,10 @@ recipes: device: npu ep: VitisAIExecutionProvider - file: "openai_clip_ov.json" - device: npu + devices: + - npu + - cpu + - gpu ep: OpenVINOExecutionProvider - file: "openai_clip_trtrtx.json" device: gpu From 8852a1f5bec38f7bac2fac8dccb0c314bfb61d1a Mon Sep 17 00:00:00 2001 From: hualxie Date: Mon, 28 Jul 2025 16:49:32 +0800 Subject: [PATCH 09/15] add project_processor --- .aitk/configs/model_list.json | 195 +++++++++--------- .aitk/scripts/model_lab/__init__.py | 8 +- .aitk/scripts/project_processor.py | 14 +- .../aitk/info.yml | 2 +- 4 files changed, 115 insertions(+), 104 deletions(-) diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json index 69de61d4..b2439505 100644 --- a/.aitk/configs/model_list.json +++ b/.aitk/configs/model_list.json @@ -1,21 +1,21 @@ { "models": [ { - "displayName": "Intel/bert-base-uncased-mrpc", - "icon": "intel", - "modelLink": "https://huggingface.co/Intel/bert-base-uncased-mrpc", - "id": "huggingface/Intel/bert-base-uncased-mrpc", + "displayName": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "icon": "DeepSeek", + "modelLink": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "id": "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "runtimes": [ "QNN", "AMDNPU", - "NvidiaTRTRTX", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], "architecture": "Transformer", - "status": "Ready", - "version": 1 + "version": 1, + "relativePath": "deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B\\aitk\\info.yml" }, { "displayName": "google-bert/bert-base-multilingual-cased", @@ -28,45 +28,48 @@ "NvidiaTRTRTX", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], "architecture": "Transformer", - "status": "Ready", - "version": 1 + "version": 1, + "relativePath": "google-bert-bert-base-multilingual-cased\\aitk\\info.yml" }, { - "displayName": "openai/clip-vit-base-patch32", - "icon": "OpenAI", - "modelLink": "https://huggingface.co/openai/clip-vit-base-patch32", - "id": "huggingface/openai/clip-vit-base-patch32", + "displayName": "google/vit-base-patch16-224", + "icon": "gemini", + "modelLink": "https://huggingface.co/google/vit-base-patch16-224", + "id": "huggingface/google/vit-base-patch16-224", "runtimes": [ "QNN", "AMDNPU", "NvidiaTRTRTX", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], "architecture": "Transformer", - "status": "Ready", - "version": 1 + "version": 1, + "relativePath": "google-vit-base-patch16-224\\aitk\\info.yml" }, { - "displayName": "openai/clip-vit-base-patch16", - "icon": "OpenAI", - "modelLink": "https://huggingface.co/openai/clip-vit-base-patch16", - "id": "huggingface/openai/clip-vit-base-patch16", + "displayName": "Intel/bert-base-uncased-mrpc", + "icon": "intel", + "modelLink": "https://huggingface.co/Intel/bert-base-uncased-mrpc", + "id": "huggingface/Intel/bert-base-uncased-mrpc", "runtimes": [ "QNN", "AMDNPU", "NvidiaTRTRTX", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], "architecture": "Transformer", - "status": "Ready", - "version": 1 + "version": 1, + "relativePath": "intel-bert-base-uncased-mrpc\\aitk\\info.yml" }, { "displayName": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", @@ -79,135 +82,141 @@ "NvidiaTRTRTX", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], "architecture": "Transformer", - "status": "Ready", - "version": 1 + "version": 1, + "relativePath": "laion-CLIP-ViT-B-32-laion2B-s34B-b79K\\aitk\\info.yml" }, { - "displayName": "Microsoft/ResNet-50", - "icon": "ms", - "modelLink": "https://huggingface.co/microsoft/resnet-50", - "id": "huggingface/microsoft/resnet-50", + "displayName": "meta-llama/Llama-3.2-1B-Instruct", + "icon": "meta", + "modelLink": "https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct", + "id": "huggingface/meta-llama/Llama-3.2-1B-Instruct", "runtimes": [ "QNN", "AMDNPU", - "NvidiaTRTRTX", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], - "architecture": "CNN", - "status": "Ready", - "version": 1 + "architecture": "Transformer", + "version": 1, + "relativePath": "meta-llama-Llama-3.2-1B-Instruct\\aitk\\info.yml" }, { - "displayName": "google/vit-base-patch16-224", - "icon": "gemini", - "modelLink": "https://huggingface.co/google/vit-base-patch16-224", - "id": "huggingface/google/vit-base-patch16-224", + "displayName": "microsoft/Phi-3.5-mini-instruct", + "icon": "ms", + "modelLink": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct", + "id": "huggingface/microsoft/Phi-3.5-mini-instruct", "runtimes": [ "QNN", "AMDNPU", - "NvidiaTRTRTX", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], "architecture": "Transformer", - "status": "Ready", - "version": 1 + "version": 1, + "relativePath": "microsoft-Phi-3.5-mini-instruct\\aitk\\info.yml" }, { - "displayName": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "icon": "DeepSeek", - "modelLink": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "id": "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "displayName": "microsoft/Phi-4-mini-reasoning", + "icon": "ms", + "modelLink": "https://huggingface.co/microsoft/Phi-4-mini-reasoning", + "id": "huggingface/microsoft/Phi-4-mini-reasoning", "runtimes": [ - "QNN", - "AMDNPU", - "IntelCPU", - "IntelGPU", "IntelNPU" ], "architecture": "Transformer", - "status": "Ready", - "version": 1 + "version": 1, + "relativePath": "microsoft-Phi-4-mini-reasoning\\aitk\\info.yml" }, { - "displayName": "microsoft/Phi-3.5-mini-instruct", + "displayName": "Microsoft/ResNet-50", "icon": "ms", - "modelLink": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct", - "id": "huggingface/microsoft/Phi-3.5-mini-instruct", + "modelLink": "https://huggingface.co/microsoft/resnet-50", + "id": "huggingface/microsoft/resnet-50", "runtimes": [ "QNN", "AMDNPU", + "NvidiaTRTRTX", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], - "architecture": "Transformer", - "status": "Ready", - "version": 1 + "architecture": "CNN", + "version": 1, + "relativePath": "microsoft-resnet-50\\aitk\\info.yml" }, { - "displayName": "microsoft/Phi-4-mini-reasoning", - "icon": "ms", - "modelLink": "https://huggingface.co/microsoft/Phi-4-mini-reasoning", - "id": "huggingface/microsoft/Phi-4-mini-reasoning", + "displayName": "mistralai/Mistral-7B-Instruct-v0.3", + "icon": "mistralai", + "modelLink": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3", + "id": "huggingface/mistralai/Mistral-7B-Instruct-v0.3", "runtimes": [ - "IntelCPU", - "IntelGPU", - "IntelNPU" + "IntelGPU" ], "architecture": "Transformer", - "status": "Ready", - "version": 1 + "version": 1, + "relativePath": "mistralai-Mistral-7B-Instruct-v0.3\\aitk\\info.yml" }, { - "displayName": "Qwen/Qwen2.5-1.5B-Instruct", - "icon": "qwen", - "modelLink": "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct", - "id": "huggingface/Qwen/Qwen2.5-1.5B-Instruct", + "displayName": "openai/clip-vit-base-patch16", + "icon": "OpenAI", + "modelLink": "https://huggingface.co/openai/clip-vit-base-patch16", + "id": "huggingface/openai/clip-vit-base-patch16", "runtimes": [ "QNN", "AMDNPU", + "NvidiaTRTRTX", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], "architecture": "Transformer", - "status": "Ready", - "version": 1 + "version": 1, + "relativePath": "openai-clip-vit-base-patch16\\aitk\\info.yml" }, { - "displayName": "meta-llama/Llama-3.2-1B-Instruct", - "icon": "meta", - "modelLink": "https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct", - "id": "huggingface/meta-llama/Llama-3.2-1B-Instruct", + "displayName": "openai/clip-vit-base-patch32", + "icon": "OpenAI", + "modelLink": "https://huggingface.co/openai/clip-vit-base-patch32", + "id": "huggingface/openai/clip-vit-base-patch32", "runtimes": [ "QNN", "AMDNPU", + "NvidiaTRTRTX", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], "architecture": "Transformer", - "status": "Ready", - "version": 1 + "version": 1, + "relativePath": "openai-clip-vit-base-patch32\\aitk\\info.yml" }, { - "displayName": "mistralai/Mistral-7B-Instruct-v0.3", - "icon": "mistralai", - "modelLink": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3", - "id": "huggingface/mistralai/Mistral-7B-Instruct-v0.3", + "displayName": "Qwen/Qwen2.5-1.5B-Instruct", + "icon": "qwen", + "modelLink": "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct", + "id": "huggingface/Qwen/Qwen2.5-1.5B-Instruct", "runtimes": [ - "IntelGPU" + "QNN", + "AMDNPU", + "IntelCPU", + "IntelGPU", + "IntelNPU", + "DML" ], "architecture": "Transformer", - "status": "Ready", - "version": 1 + "version": 1, + "relativePath": "Qwen-Qwen2.5-1.5B-Instruct\\aitk\\info.yml" } ], "template_models": [ @@ -221,7 +230,6 @@ "CPU" ], "architecture": "Others", - "status": "Ready", "version": 1 }, { @@ -233,7 +241,6 @@ "CPU" ], "architecture": "Others", - "status": "Hide", "version": 1, "extension": true } diff --git a/.aitk/scripts/model_lab/__init__.py b/.aitk/scripts/model_lab/__init__.py index 4b18ecf4..a0aecd09 100644 --- a/.aitk/scripts/model_lab/__init__.py +++ b/.aitk/scripts/model_lab/__init__.py @@ -4,14 +4,14 @@ class RuntimeEnum(Enum): CPU = "CPU" QNN = "QNN" + AMDNPU = "AMDNPU" + NvidiaTRTRTX = "NvidiaTRTRTX" IntelAny = "IntelAny" IntelCPU = "IntelCPU" - IntelNPU = "IntelNPU" IntelGPU = "IntelGPU" - AMDNPU = "AMDNPU" - NvidiaGPU = "NvidiaGPU" - NvidiaTRTRTX = "NvidiaTRTRTX" + IntelNPU = "IntelNPU" DML = "DML" + NvidiaGPU = "NvidiaGPU" WCR = "WCR" WCR_CUDA = "WCR_CUDA" # Inference diff --git a/.aitk/scripts/project_processor.py b/.aitk/scripts/project_processor.py index 627275d9..26f876ba 100644 --- a/.aitk/scripts/project_processor.py +++ b/.aitk/scripts/project_processor.py @@ -21,10 +21,12 @@ } -def get_runtime(recipe: dict) -> RuntimeEnum: - ep = EPNames(recipe.get("ep")) - device = OliveDeviceTypes(recipe.get("device")) - return GlobalVars.GetRuntimeRPC(ep, device) +def get_runtime(recipe: dict): + eps = recipe.get("eps", [recipe.get("ep")]) + devices = recipe.get("devices", [recipe.get("device")]) + for ep in eps: + for device in devices: + yield GlobalVars.GetRuntimeRPC(ep, device) def convert_yaml_to_model_info(root_dir: Path, yml_file: Path, yaml_object: dict) -> ModelInfo: @@ -52,7 +54,7 @@ def convert_yaml_to_model_info(root_dir: Path, yml_file: Path, yaml_object: dict recipes = yaml_object.get("recipes", []) runtimes = set() for recipe in recipes: - runtimes.add(get_runtime(recipe)) + runtimes.update(get_runtime(recipe)) runtimes = [r for r in RuntimeEnum if r in runtimes] relative_path = str(yml_file.relative_to(root_dir)) model_info = ModelInfo( @@ -114,9 +116,11 @@ def project_processor(): if not aitk: print(f"aitk not found in {yml_file}") continue + print(f"Process aitk for {yml_file}") modelList.models.append(convert_yaml_to_model_info(target_dir, yml_file, yaml_object)) convert_yaml_to_project_config(yml_file, yaml_object) + modelList.models.sort(key=lambda x: (x.displayName.lower())) modelList.writeIfChanged() diff --git a/mistralai-Mistral-7B-Instruct-v0.3/aitk/info.yml b/mistralai-Mistral-7B-Instruct-v0.3/aitk/info.yml index a59bb6cd..b8708f72 100644 --- a/mistralai-Mistral-7B-Instruct-v0.3/aitk/info.yml +++ b/mistralai-Mistral-7B-Instruct-v0.3/aitk/info.yml @@ -2,7 +2,7 @@ keywords: aitk arch: mistral recipes: - - file: "mistral-7b-instruct-v0.3-ov.json.json" + - file: "mistral-7b-instruct-v0.3-ov.json" device: gpu ep: OpenVINOExecutionProvider aitk: From a6402465a2de757d6f7a41c505d67b83b0837860 Mon Sep 17 00:00:00 2001 From: hualxie Date: Tue, 29 Jul 2025 08:22:36 +0800 Subject: [PATCH 10/15] update name --- laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml | 4 ++-- openai-clip-vit-base-patch16/aitk/info.yml | 4 ++-- openai-clip-vit-base-patch32/aitk/info.yml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml index 4ed75a0c..70edec7c 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml @@ -5,11 +5,11 @@ recipes: - file: "laion_clip_text_qnn.json" device: npu ep: QNNExecutionProvider - name: "Convert Text Model to Qualcomm NPU" + name: "laion-CLIP-ViT-B-32-laion2B-s34B-b79K (Text)" - file: "laion_clip_vision_qnn.json" device: npu ep: QNNExecutionProvider - name: "Convert Vision Model to Qualcomm NPU" + name: "laion-CLIP-ViT-B-32-laion2B-s34B-b79K (Vision)" - file: "laion_clip_qdq_amd.json" device: npu ep: VitisAIExecutionProvider diff --git a/openai-clip-vit-base-patch16/aitk/info.yml b/openai-clip-vit-base-patch16/aitk/info.yml index a7a3ec1d..fd842b36 100644 --- a/openai-clip-vit-base-patch16/aitk/info.yml +++ b/openai-clip-vit-base-patch16/aitk/info.yml @@ -5,11 +5,11 @@ recipes: - file: "openai_clip_text_qnn.json" device: npu ep: QNNExecutionProvider - name: "Convert Text Model to Qualcomm NPU" + name: "openai-clip-vit-base-patch16 (Text)" - file: "openai_clip_vision_qnn.json" device: npu ep: QNNExecutionProvider - name: "Convert Vision Model to Qualcomm NPU" + name: "openai-clip-vit-base-patch16 (Vision)" - file: "openai_clip_qdq_amd.json" device: npu ep: VitisAIExecutionProvider diff --git a/openai-clip-vit-base-patch32/aitk/info.yml b/openai-clip-vit-base-patch32/aitk/info.yml index 515d5076..c0691592 100644 --- a/openai-clip-vit-base-patch32/aitk/info.yml +++ b/openai-clip-vit-base-patch32/aitk/info.yml @@ -5,11 +5,11 @@ recipes: - file: "openai_clip_text_qnn.json" device: npu ep: QNNExecutionProvider - name: "Convert Text Model to Qualcomm NPU" + name: "openai-clip-vit-base-patch32 (Text)" - file: "openai_clip_vision_qnn.json" device: npu ep: QNNExecutionProvider - name: "Convert Vision Model to Qualcomm NPU" + name: "openai-clip-vit-base-patch32 (Vision)" - file: "openai_clip_qdq_amd.json" device: npu ep: VitisAIExecutionProvider From d8a2688c4bdad45a4cd640b5b46b632268bf2d99 Mon Sep 17 00:00:00 2001 From: hualxie Date: Tue, 29 Jul 2025 08:29:56 +0800 Subject: [PATCH 11/15] fix --- .../aitk/laion_clip_text_qnn_inference_sample.ipynb | 2 +- .../aitk/laion_clip_vision_qnn_inference_sample.ipynb | 2 +- mistralai-Mistral-7B-Instruct-v0.3/aitk/info.yml | 2 +- .../aitk/openai_clip_text_qnn_inference_sample.ipynb | 2 +- .../aitk/openai_clip_vision_qnn_inference_sample.ipynb | 2 +- .../aitk/openai_clip_text_qnn_inference_sample.ipynb | 2 +- .../aitk/openai_clip_vision_qnn_inference_sample.ipynb | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb index aab3b532..293b9b1f 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb @@ -63,7 +63,7 @@ "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", "\n", "text_model = ort.InferenceSession(\n", - " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " onnx_model_path, # a model with QNN EPContext nodes\n", " sess_options=session_options,\n", ")\n", "\n", diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb index 3fc7a253..02cfa10a 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb @@ -64,7 +64,7 @@ "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", "\n", "vision_model = ort.InferenceSession(\n", - " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " onnx_model_path, # a model with QNN EPContext nodes\n", " sess_options=session_options,\n", ")\n", "\n", diff --git a/mistralai-Mistral-7B-Instruct-v0.3/aitk/info.yml b/mistralai-Mistral-7B-Instruct-v0.3/aitk/info.yml index a59bb6cd..b8708f72 100644 --- a/mistralai-Mistral-7B-Instruct-v0.3/aitk/info.yml +++ b/mistralai-Mistral-7B-Instruct-v0.3/aitk/info.yml @@ -2,7 +2,7 @@ keywords: aitk arch: mistral recipes: - - file: "mistral-7b-instruct-v0.3-ov.json.json" + - file: "mistral-7b-instruct-v0.3-ov.json" device: gpu ep: OpenVINOExecutionProvider aitk: diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb index c571836e..9f0a36b2 100644 --- a/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb @@ -63,7 +63,7 @@ "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", "\n", "text_model = ort.InferenceSession(\n", - " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " onnx_model_path, # a model with QNN EPContext nodes\n", " sess_options=session_options,\n", ")\n", "\n", diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb index a8b98672..f3609ed0 100644 --- a/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb @@ -64,7 +64,7 @@ "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", "\n", "vision_model = ort.InferenceSession(\n", - " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " onnx_model_path, # a model with QNN EPContext nodes\n", " sess_options=session_options,\n", ")\n", "\n", diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb index d5cfcda2..0a120030 100644 --- a/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb @@ -63,7 +63,7 @@ "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", "\n", "text_model = ort.InferenceSession(\n", - " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " onnx_model_path, # a model with QNN EPContext nodes\n", " sess_options=session_options,\n", ")\n", "\n", diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb index 816976a0..518a97c7 100644 --- a/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb @@ -64,7 +64,7 @@ "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", "\n", "vision_model = ort.InferenceSession(\n", - " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " onnx_model_path, # a model with QNN EPContext nodes\n", " sess_options=session_options,\n", ")\n", "\n", From 85af2af8db56f90641993e28e59c100822bb7250 Mon Sep 17 00:00:00 2001 From: hualxie Date: Tue, 29 Jul 2025 08:31:17 +0800 Subject: [PATCH 12/15] typoe --- microsoft-resnet-50/aitk/inference_sample.ipynb | 2 +- microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb | 2 +- microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/microsoft-resnet-50/aitk/inference_sample.ipynb b/microsoft-resnet-50/aitk/inference_sample.ipynb index e84dc7cf..c167ae59 100644 --- a/microsoft-resnet-50/aitk/inference_sample.ipynb +++ b/microsoft-resnet-50/aitk/inference_sample.ipynb @@ -61,7 +61,7 @@ "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", "\n", "session = ort.InferenceSession(\n", - " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " onnx_model_path, # a model with QNN EPContext nodes\n", " sess_options=session_options,\n", ")\n", "\n", diff --git a/microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb b/microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb index 5acecca5..489618e6 100644 --- a/microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb +++ b/microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb @@ -56,7 +56,7 @@ "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", "\n", "session = ort.InferenceSession(\n", - " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " onnx_model_path, # a model with QNN EPContext nodes\n", " sess_options=session_options,\n", ")\n", "\n", diff --git a/microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb b/microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb index 75f15bc3..25eebee1 100644 --- a/microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb +++ b/microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb @@ -56,7 +56,7 @@ "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", "\n", "session = ort.InferenceSession(\n", - " onnx_model_path, # a model wirh QNN EPContext nodes\n", + " onnx_model_path, # a model with QNN EPContext nodes\n", " sess_options=session_options,\n", ")\n", "\n", From 36420bfc950021f161e4eb8a2e3c9095a8f4e62f Mon Sep 17 00:00:00 2001 From: hualxie Date: Tue, 29 Jul 2025 14:28:14 +0800 Subject: [PATCH 13/15] add back status --- .aitk/configs/model_list.json | 26 +++++++++++++------------- .aitk/scripts/project_processor.py | 2 +- .aitk/scripts/sanitize/constants.py | 6 ++++++ .aitk/scripts/sanitize/main.py | 16 ++++++++++++---- .aitk/scripts/sanitize/model_info.py | 24 +++++++++++++++++++++--- 5 files changed, 53 insertions(+), 21 deletions(-) diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json index b2439505..f8ad47a1 100644 --- a/.aitk/configs/model_list.json +++ b/.aitk/configs/model_list.json @@ -15,7 +15,7 @@ ], "architecture": "Transformer", "version": 1, - "relativePath": "deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B\\aitk\\info.yml" + "relativePath": "deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B\\aitk" }, { "displayName": "google-bert/bert-base-multilingual-cased", @@ -33,7 +33,7 @@ ], "architecture": "Transformer", "version": 1, - "relativePath": "google-bert-bert-base-multilingual-cased\\aitk\\info.yml" + "relativePath": "google-bert-bert-base-multilingual-cased\\aitk" }, { "displayName": "google/vit-base-patch16-224", @@ -51,7 +51,7 @@ ], "architecture": "Transformer", "version": 1, - "relativePath": "google-vit-base-patch16-224\\aitk\\info.yml" + "relativePath": "google-vit-base-patch16-224\\aitk" }, { "displayName": "Intel/bert-base-uncased-mrpc", @@ -69,7 +69,7 @@ ], "architecture": "Transformer", "version": 1, - "relativePath": "intel-bert-base-uncased-mrpc\\aitk\\info.yml" + "relativePath": "intel-bert-base-uncased-mrpc\\aitk" }, { "displayName": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", @@ -87,7 +87,7 @@ ], "architecture": "Transformer", "version": 1, - "relativePath": "laion-CLIP-ViT-B-32-laion2B-s34B-b79K\\aitk\\info.yml" + "relativePath": "laion-CLIP-ViT-B-32-laion2B-s34B-b79K\\aitk" }, { "displayName": "meta-llama/Llama-3.2-1B-Instruct", @@ -104,7 +104,7 @@ ], "architecture": "Transformer", "version": 1, - "relativePath": "meta-llama-Llama-3.2-1B-Instruct\\aitk\\info.yml" + "relativePath": "meta-llama-Llama-3.2-1B-Instruct\\aitk" }, { "displayName": "microsoft/Phi-3.5-mini-instruct", @@ -121,7 +121,7 @@ ], "architecture": "Transformer", "version": 1, - "relativePath": "microsoft-Phi-3.5-mini-instruct\\aitk\\info.yml" + "relativePath": "microsoft-Phi-3.5-mini-instruct\\aitk" }, { "displayName": "microsoft/Phi-4-mini-reasoning", @@ -133,7 +133,7 @@ ], "architecture": "Transformer", "version": 1, - "relativePath": "microsoft-Phi-4-mini-reasoning\\aitk\\info.yml" + "relativePath": "microsoft-Phi-4-mini-reasoning\\aitk" }, { "displayName": "Microsoft/ResNet-50", @@ -151,7 +151,7 @@ ], "architecture": "CNN", "version": 1, - "relativePath": "microsoft-resnet-50\\aitk\\info.yml" + "relativePath": "microsoft-resnet-50\\aitk" }, { "displayName": "mistralai/Mistral-7B-Instruct-v0.3", @@ -163,7 +163,7 @@ ], "architecture": "Transformer", "version": 1, - "relativePath": "mistralai-Mistral-7B-Instruct-v0.3\\aitk\\info.yml" + "relativePath": "mistralai-Mistral-7B-Instruct-v0.3\\aitk" }, { "displayName": "openai/clip-vit-base-patch16", @@ -181,7 +181,7 @@ ], "architecture": "Transformer", "version": 1, - "relativePath": "openai-clip-vit-base-patch16\\aitk\\info.yml" + "relativePath": "openai-clip-vit-base-patch16\\aitk" }, { "displayName": "openai/clip-vit-base-patch32", @@ -199,7 +199,7 @@ ], "architecture": "Transformer", "version": 1, - "relativePath": "openai-clip-vit-base-patch32\\aitk\\info.yml" + "relativePath": "openai-clip-vit-base-patch32\\aitk" }, { "displayName": "Qwen/Qwen2.5-1.5B-Instruct", @@ -216,7 +216,7 @@ ], "architecture": "Transformer", "version": 1, - "relativePath": "Qwen-Qwen2.5-1.5B-Instruct\\aitk\\info.yml" + "relativePath": "Qwen-Qwen2.5-1.5B-Instruct\\aitk" } ], "template_models": [ diff --git a/.aitk/scripts/project_processor.py b/.aitk/scripts/project_processor.py index 26f876ba..b4e1ef68 100644 --- a/.aitk/scripts/project_processor.py +++ b/.aitk/scripts/project_processor.py @@ -56,7 +56,7 @@ def convert_yaml_to_model_info(root_dir: Path, yml_file: Path, yaml_object: dict for recipe in recipes: runtimes.update(get_runtime(recipe)) runtimes = [r for r in RuntimeEnum if r in runtimes] - relative_path = str(yml_file.relative_to(root_dir)) + relative_path = str(yml_file.parent.relative_to(root_dir)) model_info = ModelInfo( displayName=display_name, icon=icon, diff --git a/.aitk/scripts/sanitize/constants.py b/.aitk/scripts/sanitize/constants.py index ae764619..8c46194e 100644 --- a/.aitk/scripts/sanitize/constants.py +++ b/.aitk/scripts/sanitize/constants.py @@ -30,6 +30,12 @@ class ArchitectureEnum(Enum): Others = "Others" +class ModelStatusEnum(Enum): + Ready = "Ready" + Coming = "Coming" + Hide = "Hide" + + class ParameterTypeEnum(Enum): Enum = "enum" Int = "int" diff --git a/.aitk/scripts/sanitize/main.py b/.aitk/scripts/sanitize/main.py index dadbdfd2..b9977242 100644 --- a/.aitk/scripts/sanitize/main.py +++ b/.aitk/scripts/sanitize/main.py @@ -10,16 +10,24 @@ import subprocess from pathlib import Path -from .constants import EPNames +from .constants import EPNames, ModelStatusEnum from .copy_config import CopyConfig from .file_validation import check_case, process_gitignore, readCheckIpynb, readCheckOliveConfig -from .model_info import ModelList +from .model_info import ModelInfo, ModelList from .model_parameter import ModelParameter from .parameters import readCheckParameterTemplate from .project_config import ModelInfoProject, ModelProjectConfig from .utils import GlobalVars, open_ex, printError, printWarning +def shouldCheckModel(configDir: str, model: ModelInfo) -> str | None: + modelDir = os.path.join(configDir, model.id) + # If we have folder, we also check it + if model.status == ModelStatusEnum.Ready or os.path.exists(modelDir): + return modelDir + return None + + def main(): argparser = argparse.ArgumentParser(description="Check model lab configs") argparser.add_argument("-v", "--verbose", action="store_true", help="Verbose mode") @@ -40,13 +48,13 @@ def main(): configDir = str(Path(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))).resolve(strict=False)) # get model list - modelList = ModelList.Read(os.path.join(configDir, "model_list.json")) + modelList = ModelList.Read(configDir) # check parameter template parameterTemplate = readCheckParameterTemplate(os.path.join(configDir, "parameter_template.json")) # check each model for model in modelList.allModels(): - modelDir = os.path.join(configDir, model.id) + modelDir = shouldCheckModel(configDir, model) if modelDir: if not check_case(Path(modelDir)): printError( diff --git a/.aitk/scripts/sanitize/model_info.py b/.aitk/scripts/sanitize/model_info.py index 4fe8ce99..616e25f1 100644 --- a/.aitk/scripts/sanitize/model_info.py +++ b/.aitk/scripts/sanitize/model_info.py @@ -4,13 +4,15 @@ from __future__ import annotations +import os +import re from typing import Dict, List, Optional from model_lab import RuntimeEnum from pydantic import BaseModel from .base import BaseModelClass -from .constants import ArchitectureEnum, IconEnum +from .constants import ArchitectureEnum, IconEnum, ModelStatusEnum from .utils import open_ex, printError, printProcess # This file is import by others @@ -23,25 +25,38 @@ class ModelInfo(BaseModel): icon: IconEnum modelLink: str id: str + groupId: Optional[str] = None + groupItemName: Optional[str] = None runtimes: List[RuntimeEnum] architecture: ArchitectureEnum + status: ModelStatusEnum = ModelStatusEnum.Hide version: int = -1 extension: Optional[bool] = None relativePath: Optional[str] = None def Check(self): + if self.status == ModelStatusEnum.Hide: + return True if not self.displayName: return False if not self.modelLink: return False - if not self.id: + if not self.id and self.status == ModelStatusEnum.Ready: return False if not self.runtimes: return False - if self.version <= 0: + if self.version <= 0 and self.status == ModelStatusEnum.Ready: return False return True + def GetSortKey(self): + lowerName = self.displayName.lower() + match = re.search(r"-(\d+(?:\.\d+)?)b", lowerName) + if match: + return (lowerName.replace(match.group(0), "-0b", 1), float(match.group(1))) + else: + return (lowerName, 0) + class ModelList(BaseModelClass): models: List[ModelInfo] @@ -71,6 +86,9 @@ def allModels(self): # Check after set version def Check(self): + self.models.sort(key=lambda x: x.GetSortKey()) + # TODO template models order needs manually set + # self.template_models.sort(key=lambda x: x.displayName.lower()) for i, model in enumerate(self.allModels()): if not model.Check(): printError(f"{self._file} model {i} has error") From 7a10f8ba56294f96df974d913607563fb44c4ed1 Mon Sep 17 00:00:00 2001 From: hualxie Date: Tue, 29 Jul 2025 15:01:46 +0800 Subject: [PATCH 14/15] update copy --- .aitk/configs/model_list.json | 67 ++++++++++++------- .aitk/scripts/project_processor.py | 22 +++--- .aitk/scripts/sanitize.py | 2 +- .aitk/scripts/sanitize/main.py | 23 +++---- .aitk/scripts/sanitize/model_info.py | 3 +- .../aitk/_copy.json.config | 24 +++---- .../aitk/requirements.txt | 2 + .../aitk/bert_dml.json.config | 2 +- .../aitk/bert_qdq_amd.json | 3 +- .../aitk/bert_qdq_qnn.json | 3 +- .../aitk/_copy.json.config | 48 ++++++------- .../aitk/_copy.json.config | 24 +++---- .../aitk/requirements.txt | 2 + .../aitk/_copy.json.config | 24 +++---- .../aitk/requirements.txt | 2 + .../aitk/_copy.json.config | 6 +- .../aitk/requirements.txt | 2 + .../aitk/_copy.json.config | 48 ++++++------- .../aitk/requirements.txt | 2 + 19 files changed, 166 insertions(+), 143 deletions(-) diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json index f8ad47a1..7ea47386 100644 --- a/.aitk/configs/model_list.json +++ b/.aitk/configs/model_list.json @@ -8,12 +8,13 @@ "runtimes": [ "QNN", "AMDNPU", + "DML", "IntelCPU", "IntelGPU", - "IntelNPU", - "DML" + "IntelNPU" ], "architecture": "Transformer", + "status": "Ready", "version": 1, "relativePath": "deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B\\aitk" }, @@ -26,12 +27,13 @@ "QNN", "AMDNPU", "NvidiaTRTRTX", + "DML", "IntelCPU", "IntelGPU", - "IntelNPU", - "DML" + "IntelNPU" ], "architecture": "Transformer", + "status": "Ready", "version": 1, "relativePath": "google-bert-bert-base-multilingual-cased\\aitk" }, @@ -44,12 +46,13 @@ "QNN", "AMDNPU", "NvidiaTRTRTX", + "DML", "IntelCPU", "IntelGPU", - "IntelNPU", - "DML" + "IntelNPU" ], "architecture": "Transformer", + "status": "Ready", "version": 1, "relativePath": "google-vit-base-patch16-224\\aitk" }, @@ -62,12 +65,13 @@ "QNN", "AMDNPU", "NvidiaTRTRTX", + "DML", "IntelCPU", "IntelGPU", - "IntelNPU", - "DML" + "IntelNPU" ], "architecture": "Transformer", + "status": "Ready", "version": 1, "relativePath": "intel-bert-base-uncased-mrpc\\aitk" }, @@ -80,12 +84,13 @@ "QNN", "AMDNPU", "NvidiaTRTRTX", + "DML", "IntelCPU", "IntelGPU", - "IntelNPU", - "DML" + "IntelNPU" ], "architecture": "Transformer", + "status": "Ready", "version": 1, "relativePath": "laion-CLIP-ViT-B-32-laion2B-s34B-b79K\\aitk" }, @@ -97,12 +102,13 @@ "runtimes": [ "QNN", "AMDNPU", + "DML", "IntelCPU", "IntelGPU", - "IntelNPU", - "DML" + "IntelNPU" ], "architecture": "Transformer", + "status": "Ready", "version": 1, "relativePath": "meta-llama-Llama-3.2-1B-Instruct\\aitk" }, @@ -114,12 +120,13 @@ "runtimes": [ "QNN", "AMDNPU", + "DML", "IntelCPU", "IntelGPU", - "IntelNPU", - "DML" + "IntelNPU" ], "architecture": "Transformer", + "status": "Ready", "version": 1, "relativePath": "microsoft-Phi-3.5-mini-instruct\\aitk" }, @@ -129,9 +136,12 @@ "modelLink": "https://huggingface.co/microsoft/Phi-4-mini-reasoning", "id": "huggingface/microsoft/Phi-4-mini-reasoning", "runtimes": [ + "IntelCPU", + "IntelGPU", "IntelNPU" ], "architecture": "Transformer", + "status": "Ready", "version": 1, "relativePath": "microsoft-Phi-4-mini-reasoning\\aitk" }, @@ -144,12 +154,13 @@ "QNN", "AMDNPU", "NvidiaTRTRTX", + "DML", "IntelCPU", "IntelGPU", - "IntelNPU", - "DML" + "IntelNPU" ], "architecture": "CNN", + "status": "Ready", "version": 1, "relativePath": "microsoft-resnet-50\\aitk" }, @@ -162,6 +173,7 @@ "IntelGPU" ], "architecture": "Transformer", + "status": "Ready", "version": 1, "relativePath": "mistralai-Mistral-7B-Instruct-v0.3\\aitk" }, @@ -174,12 +186,13 @@ "QNN", "AMDNPU", "NvidiaTRTRTX", + "DML", "IntelCPU", "IntelGPU", - "IntelNPU", - "DML" + "IntelNPU" ], "architecture": "Transformer", + "status": "Ready", "version": 1, "relativePath": "openai-clip-vit-base-patch16\\aitk" }, @@ -192,12 +205,13 @@ "QNN", "AMDNPU", "NvidiaTRTRTX", + "DML", "IntelCPU", "IntelGPU", - "IntelNPU", - "DML" + "IntelNPU" ], "architecture": "Transformer", + "status": "Ready", "version": 1, "relativePath": "openai-clip-vit-base-patch32\\aitk" }, @@ -209,12 +223,13 @@ "runtimes": [ "QNN", "AMDNPU", + "DML", "IntelCPU", "IntelGPU", - "IntelNPU", - "DML" + "IntelNPU" ], "architecture": "Transformer", + "status": "Ready", "version": 1, "relativePath": "Qwen-Qwen2.5-1.5B-Instruct\\aitk" } @@ -230,7 +245,9 @@ "CPU" ], "architecture": "Others", - "version": 1 + "status": "Ready", + "version": 1, + "relativePath": ".aitk\\non_model_projects\\templates\\empty" }, { "displayName": "LLM Evaluator Template", @@ -241,8 +258,10 @@ "CPU" ], "architecture": "Others", + "status": "Hide", "version": 1, - "extension": true + "extension": true, + "relativePath": ".aitk\\non_model_projects\\extensions\\llm_evaluator" } ], "HFDatasets": { diff --git a/.aitk/scripts/project_processor.py b/.aitk/scripts/project_processor.py index b4e1ef68..adcc2918 100644 --- a/.aitk/scripts/project_processor.py +++ b/.aitk/scripts/project_processor.py @@ -2,7 +2,7 @@ import yaml from model_lab import RuntimeEnum -from sanitize.constants import ArchitectureEnum, EPNames, IconEnum, OliveDeviceTypes +from sanitize.constants import ArchitectureEnum, IconEnum, ModelStatusEnum from sanitize.model_info import ModelInfo, ModelList from sanitize.project_config import ModelInfoProject, ModelProjectConfig, WorkflowItem from sanitize.utils import GlobalVars @@ -44,13 +44,10 @@ def convert_yaml_to_model_info(root_dir: Path, yml_file: Path, yaml_object: dict id_segs = id.split("/") display_name = modelInfo.get("displayName", "/".join(id_segs[1:])) - icon = modelInfo.get("icon", org_to_icon.get(id_segs[1])) - if icon is str: - icon = IconEnum(icon) + icon = IconEnum(modelInfo.get("icon", org_to_icon.get(id_segs[1]))) model_link = modelInfo.get("modelLink", "/".join(["https://huggingface.co"] + id_segs[1:])) - architecture = modelInfo.get("architecture", ArchitectureEnum.Transformer) - if architecture is str: - architecture = ArchitectureEnum(architecture) + architecture = ArchitectureEnum(modelInfo.get("architecture", ArchitectureEnum.Transformer)) + status = ModelStatusEnum(modelInfo.get("status", ModelStatusEnum.Ready)) recipes = yaml_object.get("recipes", []) runtimes = set() for recipe in recipes: @@ -64,6 +61,7 @@ def convert_yaml_to_model_info(root_dir: Path, yml_file: Path, yaml_object: dict id=id, runtimes=runtimes, architecture=architecture, + status=status, version=version, relativePath=relative_path, ) @@ -98,12 +96,12 @@ def convert_yaml_to_project_config(yml_file: Path, yaml_object: dict) -> ModelPr def project_processor(): - target_dir = Path(__file__).parent.parent.parent + root_dir = Path(__file__).parent.parent.parent - modelList = ModelList.Read(str(target_dir / ".aitk" / "configs" / "model_list.json")) + modelList = ModelList.Read(str(root_dir / ".aitk" / "configs")) modelList.models.clear() - for yml_file in target_dir.rglob("info.yml"): + for yml_file in root_dir.rglob("info.yml"): # read yml file as yaml object with yml_file.open("r", encoding="utf-8") as file: try: @@ -117,10 +115,10 @@ def project_processor(): print(f"aitk not found in {yml_file}") continue print(f"Process aitk for {yml_file}") - modelList.models.append(convert_yaml_to_model_info(target_dir, yml_file, yaml_object)) + modelList.models.append(convert_yaml_to_model_info(root_dir, yml_file, yaml_object)) convert_yaml_to_project_config(yml_file, yaml_object) - modelList.models.sort(key=lambda x: (x.displayName.lower())) + modelList.models.sort(key=lambda x: (x.GetSortKey())) modelList.writeIfChanged() diff --git a/.aitk/scripts/sanitize.py b/.aitk/scripts/sanitize.py index 643fac75..4e778ae9 100644 --- a/.aitk/scripts/sanitize.py +++ b/.aitk/scripts/sanitize.py @@ -49,4 +49,4 @@ def run_main(): # Auto-format scripts before running sanitize auto_format_scripts() project_processor() - # run_main() + run_main() diff --git a/.aitk/scripts/sanitize/main.py b/.aitk/scripts/sanitize/main.py index b9977242..4f654d15 100644 --- a/.aitk/scripts/sanitize/main.py +++ b/.aitk/scripts/sanitize/main.py @@ -20,8 +20,8 @@ from .utils import GlobalVars, open_ex, printError, printWarning -def shouldCheckModel(configDir: str, model: ModelInfo) -> str | None: - modelDir = os.path.join(configDir, model.id) +def shouldCheckModel(rootDir: str, configDir: str, model: ModelInfo) -> str | None: + modelDir = os.path.join(rootDir, model.relativePath) if model.relativePath else os.path.join(configDir, model.id) # If we have folder, we also check it if model.status == ModelStatusEnum.Ready or os.path.exists(modelDir): return modelDir @@ -43,9 +43,8 @@ def main(): GlobalVars.olivePath = args.olive # need to resolve due to d:\ vs D:\ - # Now main.py is in sanitize/ folder, so we need to go up three levels: - # sanitize/main.py -> scripts/ -> model_lab_configs/ - configDir = str(Path(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))).resolve(strict=False)) + rootDir = Path(__file__).parent.parent.parent.parent.resolve(strict=False) + configDir = str((rootDir / ".aitk" / "configs").resolve(strict=False)) # get model list modelList = ModelList.Read(configDir) @@ -54,7 +53,7 @@ def main(): # check each model for model in modelList.allModels(): - modelDir = shouldCheckModel(configDir, model) + modelDir = shouldCheckModel(str(rootDir), configDir, model) if modelDir: if not check_case(Path(modelDir)): printError( @@ -62,19 +61,13 @@ def main(): ) # get all versions - allVersions = [int(name) for name in os.listdir(modelDir) if os.path.isdir(os.path.join(modelDir, name))] - allVersions.sort() - model.version = allVersions[-1] - # check if version is continuous - if allVersions[0] != 1 or allVersions[-1] != len(allVersions): - printError(f"{modelDir} has wrong versions {allVersions}") - + allVersions = [model.version] # process each version for version in allVersions: # deep copy model for version usage modelInVersion = copy.deepcopy(model) modelInVersion.version = version - modelVerDir = os.path.join(modelDir, str(version)) + modelVerDir = modelDir if model.relativePath else os.path.join(modelDir, str(version)) # process copy copyConfigFile = os.path.join(modelVerDir, "_copy.json.config") @@ -86,7 +79,7 @@ def main(): # get model space config modelSpaceConfig = ModelProjectConfig.Read(os.path.join(modelVerDir, "model_project.config")) - modelSpaceConfig.modelInfo.version = int(os.path.basename(modelVerDir)) + modelSpaceConfig.modelInfo.version = version # check md mdFile = os.path.join(modelVerDir, "README.md") diff --git a/.aitk/scripts/sanitize/model_info.py b/.aitk/scripts/sanitize/model_info.py index 616e25f1..37ca8c56 100644 --- a/.aitk/scripts/sanitize/model_info.py +++ b/.aitk/scripts/sanitize/model_info.py @@ -72,7 +72,8 @@ class ModelList(BaseModelClass): DatasetSubset: Dict[str, List[str]] @staticmethod - def Read(modelListFile: str): + def Read(scriptFolder: str): + modelListFile = os.path.join(scriptFolder, "model_list.json") printProcess(modelListFile) with open_ex(modelListFile, "r") as file: modelListContent = file.read() diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config index c28c58db..f6beabd9 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config @@ -1,7 +1,7 @@ { "copies": [ { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/model_project.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config", "dst": "model_project.config", "replacements": [ { @@ -23,7 +23,7 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_qnn_config.json", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json", "dst": "qwen2_5_qnn_config.json", "replacements": [ { @@ -37,13 +37,13 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_qnn_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json.config", "dst": "qwen2_5_qnn_config.json.config", "replacements": [ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_vitis_ai_config.json", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json", "dst": "qwen2_5_vitis_ai_config.json", "replacements": [ { @@ -57,13 +57,13 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_vitis_ai_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json.config", "dst": "qwen2_5_vitis_ai_config.json.config", "replacements": [ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_ov_config.json", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json", "dst": "qwen2_5_ov_config.json", "replacements": [ { @@ -77,7 +77,7 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_ov_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json.config", "dst": "qwen2_5_ov_config.json.config", "replacements": [ { @@ -87,7 +87,7 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_dml_config.json", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json", "dst": "qwen2_5_dml_config.json", "replacements": [ { @@ -101,13 +101,13 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_dml_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config", "dst": "qwen2_5_dml_config.json.config", "replacements": [ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/README.md", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md", "dst": "README.md", "replacements": [ { @@ -125,13 +125,13 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/requirements.txt", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt", "dst": "requirements.txt", "replacements": [ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/inference_sample.ipynb", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_sample.ipynb", "dst": "inference_sample.ipynb", "replacements": [ { diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/requirements.txt b/Qwen-Qwen2.5-1.5B-Instruct/aitk/requirements.txt index 03275c3e..7af84714 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/requirements.txt +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/requirements.txt @@ -1,2 +1,4 @@ +# This file will be installed together with AITK runtime requirements +# For the full requirements, see AITK datasets optimum diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_dml.json.config b/intel-bert-base-uncased-mrpc/aitk/bert_dml.json.config index a0925b99..071b7baa 100644 --- a/intel-bert-base-uncased-mrpc/aitk/bert_dml.json.config +++ b/intel-bert-base-uncased-mrpc/aitk/bert_dml.json.config @@ -102,4 +102,4 @@ } } ] -} \ No newline at end of file +} diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_qdq_amd.json b/intel-bert-base-uncased-mrpc/aitk/bert_qdq_amd.json index 9d7da9c2..5920fe67 100644 --- a/intel-bert-base-uncased-mrpc/aitk/bert_qdq_amd.json +++ b/intel-bert-base-uncased-mrpc/aitk/bert_qdq_amd.json @@ -130,7 +130,8 @@ "enable_skip_layer_norm": false, "enable_bias_skip_layer_norm": false, "enable_attention": false - } + }, + "save_as_external_data": true }, "quantization": { "type": "OnnxStaticQuantization", diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_qdq_qnn.json b/intel-bert-base-uncased-mrpc/aitk/bert_qdq_qnn.json index 93f82722..2c7370be 100644 --- a/intel-bert-base-uncased-mrpc/aitk/bert_qdq_qnn.json +++ b/intel-bert-base-uncased-mrpc/aitk/bert_qdq_qnn.json @@ -134,7 +134,8 @@ "enable_skip_layer_norm": false, "enable_bias_skip_layer_norm": false, "enable_attention": false - } + }, + "save_as_external_data": true }, "quantization": { "type": "OnnxStaticQuantization", diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config index 4629da4e..b88d2cda 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config @@ -1,7 +1,7 @@ { "copies": [ { - "src": "../../../openai/clip-vit-base-patch16/1/model_project.config", + "src": "../../openai-clip-vit-base-patch16/aitk/model_project.config", "dst": "model_project.config", "replacements": [ { @@ -11,7 +11,7 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_text_qnn_inference_sample.ipynb", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb", "dst": "laion_clip_text_qnn_inference_sample.ipynb", "replacements": [ { @@ -21,7 +21,7 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_text_qnn.json", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn.json", "dst": "laion_clip_text_qnn.json", "replacements": [ { @@ -31,7 +31,7 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_text_qnn.json.config", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn.json.config", "dst": "laion_clip_text_qnn.json.config", "replacements": [ { @@ -41,7 +41,7 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_vision_qnn_inference_sample.ipynb", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb", "dst": "laion_clip_vision_qnn_inference_sample.ipynb", "replacements": [ { @@ -51,7 +51,7 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_vision_qnn.json", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json", "dst": "laion_clip_vision_qnn.json", "replacements": [ { @@ -61,7 +61,7 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_vision_qnn.json.config", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json.config", "dst": "laion_clip_vision_qnn.json.config", "replacements": [ { @@ -71,7 +71,7 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_ov_inference_sample.ipynb", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb", "dst": "laion_clip_ov_inference_sample.ipynb", "replacements": [ { @@ -81,7 +81,7 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_ov.json", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json", "dst": "laion_clip_ov.json", "replacements": [ { @@ -99,7 +99,7 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_ov.json.config", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json.config", "dst": "laion_clip_ov.json.config", "replacements": [ { @@ -109,7 +109,7 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_qdq_amd_inference_sample.ipynb", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb", "dst": "laion_clip_qdq_amd_inference_sample.ipynb", "replacements": [ { @@ -119,7 +119,7 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_qdq_amd.json", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json", "dst": "laion_clip_qdq_amd.json", "replacements": [ { @@ -129,7 +129,7 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_qdq_amd.json.config", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json.config", "dst": "laion_clip_qdq_amd.json.config", "replacements": [ { @@ -139,7 +139,7 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_trtrtx.json", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json", "dst": "laion_clip_trtrtx.json", "replacements": [ { @@ -149,7 +149,7 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_trtrtx.json.config", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json.config", "dst": "laion_clip_trtrtx.json.config", "replacements": [ { @@ -159,7 +159,7 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_trtrtx_inference_sample.ipynb", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx_inference_sample.ipynb", "dst": "laion_clip_trtrtx_inference_sample.ipynb", "replacements": [ { @@ -169,7 +169,7 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_dml.json", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_dml.json", "dst": "laion_clip_dml.json", "replacements": [ { @@ -179,13 +179,13 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_dml.json.config", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_dml.json.config", "dst": "laion_clip_dml.json.config", "replacements": [ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_dml_inference_sample.ipynb", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_dml_inference_sample.ipynb", "dst": "laion_clip_dml_inference_sample.ipynb", "replacements": [ { @@ -195,19 +195,19 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/clip_script.py", + "src": "../../openai-clip-vit-base-patch16/aitk/clip_script.py", "dst": "clip_script.py" }, { - "src": "../../../openai/clip-vit-base-patch16/1/user_script.py", + "src": "../../openai-clip-vit-base-patch16/aitk/user_script.py", "dst": "user_script.py" }, { - "src": "../../../openai/clip-vit-base-patch16/1/openai_clip_ov.py", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.py", "dst": "laion_clip_ov.py" }, { - "src": "../../../openai/clip-vit-base-patch16/1/README.md", + "src": "../../openai-clip-vit-base-patch16/aitk/README.md", "dst": "README.md", "replacements": [ { @@ -217,7 +217,7 @@ ] }, { - "src": "../../../openai/clip-vit-base-patch16/1/requirements.txt", + "src": "../../openai-clip-vit-base-patch16/aitk/requirements.txt", "dst": "requirements.txt" } ] diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config index b6457585..36887866 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config @@ -1,7 +1,7 @@ { "copies": [ { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/model_project.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config", "dst": "model_project.config", "replacements": [ { @@ -23,7 +23,7 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_qnn_config.json", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json", "dst": "llama3_2_qnn_config.json", "replacements": [ { @@ -37,13 +37,13 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_qnn_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json.config", "dst": "llama3_2_qnn_config.json.config", "replacements": [ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_vitis_ai_config.json", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json", "dst": "llama3_2_vitis_ai_config.json", "replacements": [ { @@ -57,13 +57,13 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_vitis_ai_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json.config", "dst": "llama3_2_vitis_ai_config.json.config", "replacements": [ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_ov_config.json", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json", "dst": "llama3_2_ov_config.json", "replacements": [ { @@ -93,7 +93,7 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_ov_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json.config", "dst": "llama3_2_ov_config.json.config", "replacements": [ { @@ -103,7 +103,7 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_dml_config.json", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json", "dst": "llama3_2_dml_config.json", "replacements": [ { @@ -117,13 +117,13 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_dml_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config", "dst": "llama3_2_dml_config.json.config", "replacements": [ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/README.md", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md", "dst": "README.md", "replacements": [ { @@ -141,13 +141,13 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/requirements.txt", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt", "dst": "requirements.txt", "replacements": [ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/inference_sample.ipynb", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_sample.ipynb", "dst": "inference_sample.ipynb", "replacements": [ { diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/requirements.txt b/meta-llama-Llama-3.2-1B-Instruct/aitk/requirements.txt index 03275c3e..7af84714 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/requirements.txt +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/requirements.txt @@ -1,2 +1,4 @@ +# This file will be installed together with AITK runtime requirements +# For the full requirements, see AITK datasets optimum diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config index cfda4ffc..09ba5c76 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config +++ b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config @@ -1,7 +1,7 @@ { "copies": [ { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/model_project.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config", "dst": "model_project.config", "replacements": [ { @@ -23,7 +23,7 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_qnn_config.json", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json", "dst": "phi3_5_qnn_config.json", "replacements": [ { @@ -37,13 +37,13 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_qnn_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json.config", "dst": "phi3_5_qnn_config.json.config", "replacements": [ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_vitis_ai_config.json", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json", "dst": "phi3_5_vitis_ai_config.json", "replacements": [ { @@ -57,13 +57,13 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_vitis_ai_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json.config", "dst": "phi3_5_vitis_ai_config.json.config", "replacements": [ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_ov_config.json", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json", "dst": "phi3_5_ov_config.json", "replacements": [ { @@ -77,7 +77,7 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_ov_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json.config", "dst": "phi3_5_ov_config.json.config", "replacements": [ { @@ -87,7 +87,7 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_dml_config.json", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json", "dst": "phi3_5_dml_config.json", "replacements": [ { @@ -101,13 +101,13 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_dml_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config", "dst": "phi3_5_dml_config.json.config", "replacements": [ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/README.md", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md", "dst": "README.md", "replacements": [ { @@ -121,13 +121,13 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/requirements.txt", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt", "dst": "requirements.txt", "replacements": [ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/inference_sample.ipynb", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_sample.ipynb", "dst": "inference_sample.ipynb", "replacements": [ { diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/requirements.txt b/microsoft-Phi-3.5-mini-instruct/aitk/requirements.txt index 03275c3e..7af84714 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/requirements.txt +++ b/microsoft-Phi-3.5-mini-instruct/aitk/requirements.txt @@ -1,2 +1,4 @@ +# This file will be installed together with AITK runtime requirements +# For the full requirements, see AITK datasets optimum diff --git a/microsoft-Phi-4-mini-reasoning/aitk/_copy.json.config b/microsoft-Phi-4-mini-reasoning/aitk/_copy.json.config index 1b769d18..5baff691 100644 --- a/microsoft-Phi-4-mini-reasoning/aitk/_copy.json.config +++ b/microsoft-Phi-4-mini-reasoning/aitk/_copy.json.config @@ -1,7 +1,7 @@ { "copies": [ { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/deepseek_ov_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json.config", "dst": "phi4_ov_config.json.config", "replacements": [ { @@ -15,7 +15,7 @@ ] }, { - "src": "../../../deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/1/inference_sample.ipynb", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_sample.ipynb", "dst": "inference_sample.ipynb", "replacements": [ { @@ -29,7 +29,7 @@ ] }, { - "src": "../../Phi-3.5-mini-instruct/1/inference_model.json", + "src": "../../microsoft-Phi-3.5-mini-instruct/aitk/inference_model.json", "dst": "inference_model.json", "replacements": [ { diff --git a/openai-clip-vit-base-patch16/aitk/requirements.txt b/openai-clip-vit-base-patch16/aitk/requirements.txt index 0cddd58d..163d793e 100644 --- a/openai-clip-vit-base-patch16/aitk/requirements.txt +++ b/openai-clip-vit-base-patch16/aitk/requirements.txt @@ -1,3 +1,5 @@ +# This file will be installed together with AITK runtime requirements +# For the full requirements, see AITK olive-ai cachetools==5.5.0 nltk>=3.9.1 diff --git a/openai-clip-vit-base-patch32/aitk/_copy.json.config b/openai-clip-vit-base-patch32/aitk/_copy.json.config index 16b1d573..1725324a 100644 --- a/openai-clip-vit-base-patch32/aitk/_copy.json.config +++ b/openai-clip-vit-base-patch32/aitk/_copy.json.config @@ -1,13 +1,13 @@ { "copies": [ { - "src": "../../clip-vit-base-patch16/1/model_project.config", + "src": "../../openai-clip-vit-base-patch16/aitk/model_project.config", "dst": "model_project.config", "replacements": [ ] }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_text_qnn_inference_sample.ipynb", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb", "dst": "openai_clip_text_qnn_inference_sample.ipynb", "replacements": [ { @@ -17,7 +17,7 @@ ] }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_text_qnn.json", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn.json", "dst": "openai_clip_text_qnn.json", "replacements": [ { @@ -27,7 +27,7 @@ ] }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_text_qnn.json.config", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn.json.config", "dst": "openai_clip_text_qnn.json.config", "replacements": [ { @@ -37,7 +37,7 @@ ] }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_vision_qnn_inference_sample.ipynb", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb", "dst": "openai_clip_vision_qnn_inference_sample.ipynb", "replacements": [ { @@ -47,7 +47,7 @@ ] }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_vision_qnn.json", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json", "dst": "openai_clip_vision_qnn.json", "replacements": [ { @@ -57,7 +57,7 @@ ] }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_vision_qnn.json.config", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json.config", "dst": "openai_clip_vision_qnn.json.config", "replacements": [ { @@ -67,7 +67,7 @@ ] }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_ov_inference_sample.ipynb", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb", "dst": "openai_clip_ov_inference_sample.ipynb", "replacements": [ { @@ -77,7 +77,7 @@ ] }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_ov.json", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json", "dst": "openai_clip_ov.json", "replacements": [ { @@ -87,7 +87,7 @@ ] }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_ov.json.config", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json.config", "dst": "openai_clip_ov.json.config", "replacements": [ { @@ -97,7 +97,7 @@ ] }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_qdq_amd_inference_sample.ipynb", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb", "dst": "openai_clip_qdq_amd_inference_sample.ipynb", "replacements": [ { @@ -107,7 +107,7 @@ ] }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_qdq_amd.json", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json", "dst": "openai_clip_qdq_amd.json", "replacements": [ { @@ -117,7 +117,7 @@ ] }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_qdq_amd.json.config", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json.config", "dst": "openai_clip_qdq_amd.json.config", "replacements": [ { @@ -127,7 +127,7 @@ ] }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_trtrtx.json", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json", "dst": "openai_clip_trtrtx.json", "replacements": [ { @@ -137,7 +137,7 @@ ] }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_trtrtx.json.config", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json.config", "dst": "openai_clip_trtrtx.json.config", "replacements": [ { @@ -147,7 +147,7 @@ ] }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_trtrtx_inference_sample.ipynb", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx_inference_sample.ipynb", "dst": "openai_clip_trtrtx_inference_sample.ipynb", "replacements": [ { @@ -157,7 +157,7 @@ ] }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_dml.json", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_dml.json", "dst": "openai_clip_dml.json", "replacements": [ { @@ -167,13 +167,13 @@ ] }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_dml.json.config", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_dml.json.config", "dst": "openai_clip_dml.json.config", "replacements": [ ] }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_dml_inference_sample.ipynb", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_dml_inference_sample.ipynb", "dst": "openai_clip_dml_inference_sample.ipynb", "replacements": [ { @@ -183,23 +183,23 @@ ] }, { - "src": "../../clip-vit-base-patch16/1/clip_script.py", + "src": "../../openai-clip-vit-base-patch16/aitk/clip_script.py", "dst": "clip_script.py" }, { - "src": "../../clip-vit-base-patch16/1/user_script.py", + "src": "../../openai-clip-vit-base-patch16/aitk/user_script.py", "dst": "user_script.py" }, { - "src": "../../clip-vit-base-patch16/1/openai_clip_ov.py", + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.py", "dst": "openai_clip_ov.py" }, { - "src": "../../clip-vit-base-patch16/1/README.md", + "src": "../../openai-clip-vit-base-patch16/aitk/README.md", "dst": "README.md" }, { - "src": "../../clip-vit-base-patch16/1/requirements.txt", + "src": "../../openai-clip-vit-base-patch16/aitk/requirements.txt", "dst": "requirements.txt" } ] diff --git a/openai-clip-vit-base-patch32/aitk/requirements.txt b/openai-clip-vit-base-patch32/aitk/requirements.txt index 0cddd58d..163d793e 100644 --- a/openai-clip-vit-base-patch32/aitk/requirements.txt +++ b/openai-clip-vit-base-patch32/aitk/requirements.txt @@ -1,3 +1,5 @@ +# This file will be installed together with AITK runtime requirements +# For the full requirements, see AITK olive-ai cachetools==5.5.0 nltk>=3.9.1 From fd1013d8ba9755f651578c419e6706efd9c60127 Mon Sep 17 00:00:00 2001 From: hualxie Date: Tue, 29 Jul 2025 17:17:34 +0800 Subject: [PATCH 15/15] move --- .../NvTensorRtRtx/Qwen2.5-1.5B-Instruct_nvmo_int4_awq.json | 0 .../NvTensorRtRtx/README.md | 0 .../NvTensorRtRtx/info.yml | 0 .../NvTensorRtRtx/requirements-nvmo-awq.txt | 0 .../DeepSeek-R1-Distill-Qwen-1.5B_fp16_model_builder.json | 0 .../NvTensorRtRtx/README.md | 0 .../NvTensorRtRtx/info.yml | 0 .../NvTensorRtRtx/Llama-3.2-1B-Instruct_nvmo_int4_awq.json | 0 .../NvTensorRtRtx/README.md | 0 .../NvTensorRtRtx/info.yml | 0 .../NvTensorRtRtx/requirements-nvmo-awq.txt | 0 .../NvTensorRtRtx/Phi-3.5-mini-instruct_nvmo_int4_awq.json | 0 .../NvTensorRtRtx/README.md | 0 .../NvTensorRtRtx/info.yml | 0 .../NvTensorRtRtx/requirements-nvmo-awq.txt | 0 15 files changed, 0 insertions(+), 0 deletions(-) rename {Qwen2.5-1.5B-Instruct => Qwen-Qwen2.5-1.5B-Instruct}/NvTensorRtRtx/Qwen2.5-1.5B-Instruct_nvmo_int4_awq.json (100%) rename {Qwen2.5-1.5B-Instruct => Qwen-Qwen2.5-1.5B-Instruct}/NvTensorRtRtx/README.md (100%) rename {Qwen2.5-1.5B-Instruct => Qwen-Qwen2.5-1.5B-Instruct}/NvTensorRtRtx/info.yml (100%) rename {Llama-3.2-1B-Instruct => Qwen-Qwen2.5-1.5B-Instruct}/NvTensorRtRtx/requirements-nvmo-awq.txt (100%) rename {DeepSeek-R1-Distill-Qwen-1.5B => deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B}/NvTensorRtRtx/DeepSeek-R1-Distill-Qwen-1.5B_fp16_model_builder.json (100%) rename {DeepSeek-R1-Distill-Qwen-1.5B => deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B}/NvTensorRtRtx/README.md (100%) rename {DeepSeek-R1-Distill-Qwen-1.5B => deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B}/NvTensorRtRtx/info.yml (100%) rename {Llama-3.2-1B-Instruct => meta-llama-Llama-3.2-1B-Instruct}/NvTensorRtRtx/Llama-3.2-1B-Instruct_nvmo_int4_awq.json (100%) rename {Llama-3.2-1B-Instruct => meta-llama-Llama-3.2-1B-Instruct}/NvTensorRtRtx/README.md (100%) rename {Llama-3.2-1B-Instruct => meta-llama-Llama-3.2-1B-Instruct}/NvTensorRtRtx/info.yml (100%) rename {Phi-3.5-mini-instruct => meta-llama-Llama-3.2-1B-Instruct}/NvTensorRtRtx/requirements-nvmo-awq.txt (100%) rename {Phi-3.5-mini-instruct => microsoft-Phi-3.5-mini-instruct}/NvTensorRtRtx/Phi-3.5-mini-instruct_nvmo_int4_awq.json (100%) rename {Phi-3.5-mini-instruct => microsoft-Phi-3.5-mini-instruct}/NvTensorRtRtx/README.md (100%) rename {Phi-3.5-mini-instruct => microsoft-Phi-3.5-mini-instruct}/NvTensorRtRtx/info.yml (100%) rename {Qwen2.5-1.5B-Instruct => microsoft-Phi-3.5-mini-instruct}/NvTensorRtRtx/requirements-nvmo-awq.txt (100%) diff --git a/Qwen2.5-1.5B-Instruct/NvTensorRtRtx/Qwen2.5-1.5B-Instruct_nvmo_int4_awq.json b/Qwen-Qwen2.5-1.5B-Instruct/NvTensorRtRtx/Qwen2.5-1.5B-Instruct_nvmo_int4_awq.json similarity index 100% rename from Qwen2.5-1.5B-Instruct/NvTensorRtRtx/Qwen2.5-1.5B-Instruct_nvmo_int4_awq.json rename to Qwen-Qwen2.5-1.5B-Instruct/NvTensorRtRtx/Qwen2.5-1.5B-Instruct_nvmo_int4_awq.json diff --git a/Qwen2.5-1.5B-Instruct/NvTensorRtRtx/README.md b/Qwen-Qwen2.5-1.5B-Instruct/NvTensorRtRtx/README.md similarity index 100% rename from Qwen2.5-1.5B-Instruct/NvTensorRtRtx/README.md rename to Qwen-Qwen2.5-1.5B-Instruct/NvTensorRtRtx/README.md diff --git a/Qwen2.5-1.5B-Instruct/NvTensorRtRtx/info.yml b/Qwen-Qwen2.5-1.5B-Instruct/NvTensorRtRtx/info.yml similarity index 100% rename from Qwen2.5-1.5B-Instruct/NvTensorRtRtx/info.yml rename to Qwen-Qwen2.5-1.5B-Instruct/NvTensorRtRtx/info.yml diff --git a/Llama-3.2-1B-Instruct/NvTensorRtRtx/requirements-nvmo-awq.txt b/Qwen-Qwen2.5-1.5B-Instruct/NvTensorRtRtx/requirements-nvmo-awq.txt similarity index 100% rename from Llama-3.2-1B-Instruct/NvTensorRtRtx/requirements-nvmo-awq.txt rename to Qwen-Qwen2.5-1.5B-Instruct/NvTensorRtRtx/requirements-nvmo-awq.txt diff --git a/DeepSeek-R1-Distill-Qwen-1.5B/NvTensorRtRtx/DeepSeek-R1-Distill-Qwen-1.5B_fp16_model_builder.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/NvTensorRtRtx/DeepSeek-R1-Distill-Qwen-1.5B_fp16_model_builder.json similarity index 100% rename from DeepSeek-R1-Distill-Qwen-1.5B/NvTensorRtRtx/DeepSeek-R1-Distill-Qwen-1.5B_fp16_model_builder.json rename to deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/NvTensorRtRtx/DeepSeek-R1-Distill-Qwen-1.5B_fp16_model_builder.json diff --git a/DeepSeek-R1-Distill-Qwen-1.5B/NvTensorRtRtx/README.md b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/NvTensorRtRtx/README.md similarity index 100% rename from DeepSeek-R1-Distill-Qwen-1.5B/NvTensorRtRtx/README.md rename to deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/NvTensorRtRtx/README.md diff --git a/DeepSeek-R1-Distill-Qwen-1.5B/NvTensorRtRtx/info.yml b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/NvTensorRtRtx/info.yml similarity index 100% rename from DeepSeek-R1-Distill-Qwen-1.5B/NvTensorRtRtx/info.yml rename to deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/NvTensorRtRtx/info.yml diff --git a/Llama-3.2-1B-Instruct/NvTensorRtRtx/Llama-3.2-1B-Instruct_nvmo_int4_awq.json b/meta-llama-Llama-3.2-1B-Instruct/NvTensorRtRtx/Llama-3.2-1B-Instruct_nvmo_int4_awq.json similarity index 100% rename from Llama-3.2-1B-Instruct/NvTensorRtRtx/Llama-3.2-1B-Instruct_nvmo_int4_awq.json rename to meta-llama-Llama-3.2-1B-Instruct/NvTensorRtRtx/Llama-3.2-1B-Instruct_nvmo_int4_awq.json diff --git a/Llama-3.2-1B-Instruct/NvTensorRtRtx/README.md b/meta-llama-Llama-3.2-1B-Instruct/NvTensorRtRtx/README.md similarity index 100% rename from Llama-3.2-1B-Instruct/NvTensorRtRtx/README.md rename to meta-llama-Llama-3.2-1B-Instruct/NvTensorRtRtx/README.md diff --git a/Llama-3.2-1B-Instruct/NvTensorRtRtx/info.yml b/meta-llama-Llama-3.2-1B-Instruct/NvTensorRtRtx/info.yml similarity index 100% rename from Llama-3.2-1B-Instruct/NvTensorRtRtx/info.yml rename to meta-llama-Llama-3.2-1B-Instruct/NvTensorRtRtx/info.yml diff --git a/Phi-3.5-mini-instruct/NvTensorRtRtx/requirements-nvmo-awq.txt b/meta-llama-Llama-3.2-1B-Instruct/NvTensorRtRtx/requirements-nvmo-awq.txt similarity index 100% rename from Phi-3.5-mini-instruct/NvTensorRtRtx/requirements-nvmo-awq.txt rename to meta-llama-Llama-3.2-1B-Instruct/NvTensorRtRtx/requirements-nvmo-awq.txt diff --git a/Phi-3.5-mini-instruct/NvTensorRtRtx/Phi-3.5-mini-instruct_nvmo_int4_awq.json b/microsoft-Phi-3.5-mini-instruct/NvTensorRtRtx/Phi-3.5-mini-instruct_nvmo_int4_awq.json similarity index 100% rename from Phi-3.5-mini-instruct/NvTensorRtRtx/Phi-3.5-mini-instruct_nvmo_int4_awq.json rename to microsoft-Phi-3.5-mini-instruct/NvTensorRtRtx/Phi-3.5-mini-instruct_nvmo_int4_awq.json diff --git a/Phi-3.5-mini-instruct/NvTensorRtRtx/README.md b/microsoft-Phi-3.5-mini-instruct/NvTensorRtRtx/README.md similarity index 100% rename from Phi-3.5-mini-instruct/NvTensorRtRtx/README.md rename to microsoft-Phi-3.5-mini-instruct/NvTensorRtRtx/README.md diff --git a/Phi-3.5-mini-instruct/NvTensorRtRtx/info.yml b/microsoft-Phi-3.5-mini-instruct/NvTensorRtRtx/info.yml similarity index 100% rename from Phi-3.5-mini-instruct/NvTensorRtRtx/info.yml rename to microsoft-Phi-3.5-mini-instruct/NvTensorRtRtx/info.yml diff --git a/Qwen2.5-1.5B-Instruct/NvTensorRtRtx/requirements-nvmo-awq.txt b/microsoft-Phi-3.5-mini-instruct/NvTensorRtRtx/requirements-nvmo-awq.txt similarity index 100% rename from Qwen2.5-1.5B-Instruct/NvTensorRtRtx/requirements-nvmo-awq.txt rename to microsoft-Phi-3.5-mini-instruct/NvTensorRtRtx/requirements-nvmo-awq.txt