diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json index 1453436d..1eb736ea 100644 --- a/.aitk/configs/checks.json +++ b/.aitk/configs/checks.json @@ -1,6 +1,6 @@ { "configCheck": 171, - "copyCheck": 182, + "copyCheck": 181, "executeRuntimeCheck": 104, "extensionCheck": 2, "gitignoreCheck": 44, @@ -13,6 +13,6 @@ "pathCheck": 1455, "requirementsCheck": 37, "templateCheck": 3, - "venvRequirementsCheck": 22, + "venvRequirementsCheck": 23, "winmlCopyCheck": 38 } diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json index aefac9ab..d76f3f6c 100644 --- a/.aitk/configs/model_list.json +++ b/.aitk/configs/model_list.json @@ -18,7 +18,7 @@ "architecture": "Transformer", "status": "Ready", "relativePath": "microsoft-Phi-3.5-mini-instruct/aitk", - "version": 7, + "version": 8, "p0": true, "pipeline_tags": [ "text-generation" @@ -69,7 +69,7 @@ "architecture": "Transformer", "status": "Ready", "relativePath": "deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk", - "version": 7, + "version": 8, "p0": true, "pipeline_tags": [ "text-generation" @@ -193,7 +193,7 @@ "architecture": "Transformer", "status": "Ready", "relativePath": "meta-llama-Llama-3.2-1B-Instruct/aitk", - "version": 7, + "version": 8, "p0": true, "pipeline_tags": [ "text-generation" @@ -269,7 +269,7 @@ "architecture": "Transformer", "status": "Ready", "relativePath": "Qwen-Qwen2.5-1.5B-Instruct/aitk", - "version": 7, + "version": 8, "p0": true, "pipeline_tags": [ "text-generation" @@ -520,7 +520,7 @@ "architecture": "Transformer", "status": "Ready", "relativePath": "meta-llama-Llama-3.1-8B-Instruct/aitk", - "version": 5, + "version": 6, "p0": false, "pipeline_tags": [ "text-generation" @@ -1047,7 +1047,8 @@ "bfloat16": "a:bf16" }, "QuarkWeightType": { - "w_uint4_per_group_asym": "w:int4" + "w_uint4_per_group_asym": "w:uint4", + "uint4_wo_128": "w:uint4" } } } diff --git a/.aitk/requirements/AMD/Quark_py3.12.13.txt b/.aitk/requirements/AMD/Quark_py3.12.13.txt new file mode 100644 index 00000000..9e2b80b8 --- /dev/null +++ b/.aitk/requirements/AMD/Quark_py3.12.13.txt @@ -0,0 +1,107 @@ +--extra-index-url=https://download.pytorch.org/whl/cu128 +--extra-index-url=https://pypi.amd.com/olive/1.7.1-5D/simple +--extra-index-url=https://pypi.amd.com/simple +accelerate==1.13.0 +aiohappyeyeballs==2.6.1 +aiohttp==3.13.5 +aiosignal==1.4.0 +alembic==1.18.4 +amd-quark==0.11 +annotated-doc==0.0.4 +annotated-types==0.7.0 +anyio==4.13.0 +attrs==26.1.0 +certifi==2026.4.22 +charset-normalizer==3.4.7 +click==8.4.0 +colorama==0.4.6 +colorlog==6.10.1 +datasets==4.8.5 +dill==0.4.1 +evaluate==0.4.6 +filelock==3.29.0 +flatbuffers==25.12.19 +frozenlist==1.8.0 +fsspec==2026.2.0 +greenlet==3.5.0 +h11==0.16.0 +hf-xet==1.5.0 +httpcore==1.0.9 +httpx==0.28.1 +huggingface-hub==0.36.2 +idna==3.15 +importlib-metadata==8.7.1 +jinja2==3.1.6 +joblib==1.5.3 +lightning-utilities==0.15.3 +mako==1.3.12 +markdown-it-py==4.2.0 +markupsafe==3.0.3 +mdurl==0.1.2 +ml-dtypes==0.5.4 +model-generate==1.7.1 +mpmath==1.3.0 +multidict==6.7.1 +multiprocess==0.70.19 +narwhals==2.21.2 +networkx==3.6.1 +ninja==1.13.0 +nltk==3.9.4 +numpy==1.26.4 +olive-ai==0.12.1 +onnx==1.18.0 +onnx-ir==0.2.1 +onnx-tool==1.0.1 +onnxruntime==1.26.0 +onnxruntime-genai==0.13.2 +onnxscript==0.7.0 +onnxsim==0.6.3 +onnxslim==0.1.93 +opentelemetry-api==1.41.1 +opentelemetry-sdk==1.41.1 +opentelemetry-semantic-conventions==0.62b1 +optimum==2.1.0 +optuna==4.8.0 +packaging==26.2 +pandas==3.0.3 +plotly==6.7.0 +prompt-toolkit==3.0.52 +propcache==0.5.2 +protobuf==7.34.1 +psutil==7.2.2 +pyarrow==24.0.0 +pydantic==2.13.4 +pydantic-core==2.46.4 +pygments==2.20.0 +python-dateutil==2.9.0.post0 +pyyaml==6.0.3 +questionary==2.1.1 +regex==2026.5.9 +requests==2.34.2 +rich==15.0.0 +ryzenai-dynamic-dispatch==1.7.1 +ryzenai-onnx-utils==1.7.1 +safetensors==0.7.0 +scipy==1.17.1 +sentencepiece==0.2.1 +setuptools==81.0.0 +shellingham==1.5.4 +six==1.17.0 +sqlalchemy==2.0.49 +sympy==1.14.0 +tabulate==0.10.0 +tokenizers==0.22.2 +torch==2.7.1+cu128 +torchmetrics==1.9.0 +tqdm==4.67.3 +transformers==4.57.6 +typer==0.25.1 +typing-extensions==4.15.0 +typing-inspection==0.4.2 +tzdata==2026.2 +urllib3==2.7.0 +wcwidth==0.7.0 +xxhash==3.7.0 +yarl==1.23.0 +zipp==4.1.0 +zstandard==0.25.0 diff --git a/.aitk/requirements/AMD/sitecustomize.py b/.aitk/requirements/AMD/sitecustomize.py new file mode 100644 index 00000000..d73ffde7 --- /dev/null +++ b/.aitk/requirements/AMD/sitecustomize.py @@ -0,0 +1,7 @@ +try: + import pyarrow # noqa + import pyarrow.dataset # noqa + import pyarrow.compute # noqa +except Exception: + pass + diff --git a/Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config b/Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config index 1c98763a..1a95be2a 100644 --- a/Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config +++ b/Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml index af6ec72c..caf4dafb 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml @@ -12,7 +12,7 @@ recipes: ep: VitisAIExecutionProvider aitk: oliveFile: "VitisAI/Qwen2.5-1.5B-Instruct_quark_vitisai_llm.json" - requirements: AMD/Quark_py3.10.17 + requirements: AMD/Quark_py3.12.13 evalRuntime: AMDNPU - file: "qwen2_5_ov_gpu_config.json" devices: @@ -41,7 +41,7 @@ recipes: aitk: modelInfo: id: "huggingface/Qwen/Qwen2.5-1.5B-Instruct" - version: 7 + version: 8 groupId: "huggingface/Qwen/Qwen2.5-1.5B-Instruct" groupItemName: "1.5B" p0: true diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config index fdd9f88f..ea852ef7 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config @@ -31,6 +31,6 @@ ], "modelInfo": { "id": "huggingface/Qwen/Qwen2.5-1.5B-Instruct", - "version": 7 + "version": 8 } } diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json index bbd0de14..5ea92228 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json @@ -21,35 +21,18 @@ "passes": { "qq": { "type": "QuarkQuantization", - "quant_scheme": "w_uint4_per_group_asym", + "quant_scheme": "uint4_wo_128", "quant_algo": "awq", "dataset": "pileval_for_awq_benchmark", "data_type": "bfloat16", "num_calib_data": 128, - "model_export": [ "hf_format" ], - "exclude_layers": [ ], - "quant_config": { - "name": "awq", - "scaling_layers": [ - { - "prev_op": "input_layernorm", - "layers": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj" ], - "inp": "self_attn.q_proj", - "module2inspect": "self_attn" - }, - { "prev_op": "self_attn.v_proj", "layers": [ "self_attn.o_proj" ], "inp": "self_attn.o_proj" }, - { - "prev_op": "post_attention_layernorm", - "layers": [ "mlp.gate_proj", "mlp.up_proj" ], - "inp": "mlp.gate_proj", - "module2inspect": "mlp" - }, - { "prev_op": "mlp.up_proj", "layers": [ "mlp.down_proj" ], "inp": "mlp.down_proj" } - ], - "model_decoder_layers": "model.layers" - } + "model_export": ["hf_format"], + "exclude_layers": [] }, - "mg": { "type": "VitisGenerateModelLLM", "packed_const": false, "cpu_only": false } + "mg": { + "type": "VitisGenerateModelLLM", + "recipe": "full_fusion" + } }, "target": "local_system", "log_severity_level": 1, diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config index 5a64cbcb..a608a451 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config @@ -11,7 +11,7 @@ "isGPUSuggested": true, "runtimeOverwrite": { "autoGenerated": true, - "executeRequirement": "AMD/Quark_py3.10.17" + "executeRequirement": "AMD/Quark_py3.12.13" }, "epMinVersions": { "VitisAIExecutionProvider": "1.8.50" @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config b/Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config index cba12f35..3ff25517 100644 --- a/Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config +++ b/Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/Qwen-Qwen2.5-Coder-0.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config b/Qwen-Qwen2.5-Coder-0.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config index 1a31c0a5..27e12328 100644 --- a/Qwen-Qwen2.5-Coder-0.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config +++ b/Qwen-Qwen2.5-Coder-0.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/Qwen-Qwen2.5-Coder-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config b/Qwen-Qwen2.5-Coder-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config index 6cbffe8a..90a5871c 100644 --- a/Qwen-Qwen2.5-Coder-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config +++ b/Qwen-Qwen2.5-Coder-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/Qwen-Qwen2.5-Coder-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config b/Qwen-Qwen2.5-Coder-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config index cb398a3e..6d610732 100644 --- a/Qwen-Qwen2.5-Coder-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config +++ b/Qwen-Qwen2.5-Coder-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json index ff6dffe6..d7f7d190 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json @@ -20,36 +20,19 @@ ], "passes": { "qq": { - "type": "QuarkQuantization", - "quant_scheme": "w_uint4_per_group_asym", + "type": "QuarkQuantization", + "quant_scheme": "uint4_wo_128", "quant_algo": "awq", "dataset": "pileval_for_awq_benchmark", "data_type": "bfloat16", "num_calib_data": 128, - "model_export": [ "hf_format" ], - "exclude_layers": [ ], - "quant_config": { - "name": "awq", - "scaling_layers": [ - { - "prev_op": "input_layernorm", - "layers": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj" ], - "inp": "self_attn.q_proj", - "module2inspect": "self_attn" - }, - { "prev_op": "self_attn.v_proj", "layers": [ "self_attn.o_proj" ], "inp": "self_attn.o_proj" }, - { - "prev_op": "post_attention_layernorm", - "layers": [ "mlp.gate_proj", "mlp.up_proj" ], - "inp": "mlp.gate_proj", - "module2inspect": "mlp" - }, - { "prev_op": "mlp.up_proj", "layers": [ "mlp.down_proj" ], "inp": "mlp.down_proj" } - ], - "model_decoder_layers": "model.layers" - } + "model_export": ["hf_format"], + "exclude_layers": [] }, - "mg": { "type": "VitisGenerateModelLLM", "packed_const": false, "cpu_only": false } + "mg": { + "type": "VitisGenerateModelLLM", + "recipe": "full_fusion" + } }, "target": "local_system", "log_severity_level": 1, diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json.config index feada2a9..2070828c 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json.config +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json.config @@ -11,7 +11,7 @@ "isGPUSuggested": true, "runtimeOverwrite": { "autoGenerated": true, - "executeRequirement": "AMD/Quark_py3.10.17" + "executeRequirement": "AMD/Quark_py3.12.13" }, "epMinVersions": { "VitisAIExecutionProvider": "1.8.50" @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml index 9486b13f..5400e5bd 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml @@ -12,7 +12,7 @@ recipes: ep: VitisAIExecutionProvider aitk: oliveFile: "VitisAI/DeepSeek-R1-Distill-Qwen-1.5B_quark_vitisai_llm.json" - requirements: AMD/Quark_py3.10.17 + requirements: AMD/Quark_py3.12.13 evalRuntime: AMDNPU - file: "deepseek_ov_gpu_config.json" devices: @@ -41,7 +41,7 @@ recipes: aitk: modelInfo: id: "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" - version: 7 + version: 8 groupId: "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" groupItemName: "1.5B" p0: true diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config index 64ea7551..fd3da91f 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config @@ -31,6 +31,6 @@ ], "modelInfo": { "id": "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "version": 7 + "version": 8 } } diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_vitis_ai_config.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_vitis_ai_config.json.config index 968721c0..c5ab868d 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_vitis_ai_config.json.config +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/meta-llama-Llama-3.1-8B-Instruct/aitk/_copy.json.config b/meta-llama-Llama-3.1-8B-Instruct/aitk/_copy.json.config index 9c433fa1..c78d3191 100644 --- a/meta-llama-Llama-3.1-8B-Instruct/aitk/_copy.json.config +++ b/meta-llama-Llama-3.1-8B-Instruct/aitk/_copy.json.config @@ -1,19 +1,5 @@ { "copies": [ - { - "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json", - "dst": "llama3_1_vitis_ai_config.json", - "replacements": [ - { - "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "replace": "meta-llama/Llama-3.1-8B-Instruct" - }, - { - "find": "model/deepseek", - "replace": "model/llama3_1" - } - ] - }, { "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json", "dst": "llama3_1_dml_config.json", diff --git a/meta-llama-Llama-3.1-8B-Instruct/aitk/info.yml b/meta-llama-Llama-3.1-8B-Instruct/aitk/info.yml index 76ede4ce..cb134ffd 100644 --- a/meta-llama-Llama-3.1-8B-Instruct/aitk/info.yml +++ b/meta-llama-Llama-3.1-8B-Instruct/aitk/info.yml @@ -12,7 +12,7 @@ recipes: ep: VitisAIExecutionProvider aitk: oliveFile: "VitisAI/Llama-3.1-8B-Instruct_quark_vitisai_llm.json" - requirements: AMD/Quark_py3.10.17 + requirements: AMD/Quark_py3.12.13 evalRuntime: AMDNPU isGPUSuggested: true - file: "llama3_1_ov_config.json" @@ -35,5 +35,5 @@ recipes: aitk: modelInfo: id: "huggingface/meta-llama/Llama-3.1-8B-Instruct" - version: 5 + version: 6 p0: false diff --git a/meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json b/meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json index 2cbfe5bf..aa140b92 100644 --- a/meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json +++ b/meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json @@ -21,35 +21,18 @@ "passes": { "qq": { "type": "QuarkQuantization", - "quant_scheme": "w_uint4_per_group_asym", + "quant_scheme": "uint4_wo_128", "quant_algo": "awq", "dataset": "pileval_for_awq_benchmark", "data_type": "bfloat16", "num_calib_data": 128, - "model_export": [ "hf_format" ], - "exclude_layers": [ ], - "quant_config": { - "name": "awq", - "scaling_layers": [ - { - "prev_op": "input_layernorm", - "layers": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj" ], - "inp": "self_attn.q_proj", - "module2inspect": "self_attn" - }, - { "prev_op": "self_attn.v_proj", "layers": [ "self_attn.o_proj" ], "inp": "self_attn.o_proj" }, - { - "prev_op": "post_attention_layernorm", - "layers": [ "mlp.gate_proj", "mlp.up_proj" ], - "inp": "mlp.gate_proj", - "module2inspect": "mlp" - }, - { "prev_op": "mlp.up_proj", "layers": [ "mlp.down_proj" ], "inp": "mlp.down_proj" } - ], - "model_decoder_layers": "model.layers" - } + "model_export": ["hf_format"], + "exclude_layers": [] }, - "mg": { "type": "VitisGenerateModelLLM", "packed_const": false, "cpu_only": false } + "mg": { + "type": "VitisGenerateModelLLM", + "recipe": "full_fusion" + } }, "target": "local_system", "log_severity_level": 1, diff --git a/meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json.config b/meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json.config index ed2ced33..a7dec835 100644 --- a/meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json.config +++ b/meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json.config @@ -11,7 +11,7 @@ "isGPUSuggested": true, "runtimeOverwrite": { "autoGenerated": true, - "executeRequirement": "AMD/Quark_py3.10.17" + "executeRequirement": "AMD/Quark_py3.12.13" }, "epMinVersions": { "VitisAIExecutionProvider": "1.8.50" @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/meta-llama-Llama-3.1-8B-Instruct/aitk/model_project.config b/meta-llama-Llama-3.1-8B-Instruct/aitk/model_project.config index 8ab8dedc..b1e84476 100644 --- a/meta-llama-Llama-3.1-8B-Instruct/aitk/model_project.config +++ b/meta-llama-Llama-3.1-8B-Instruct/aitk/model_project.config @@ -27,6 +27,6 @@ ], "modelInfo": { "id": "huggingface/meta-llama/Llama-3.1-8B-Instruct", - "version": 5 + "version": 6 } } diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml index 803ac4a1..b9f8aff5 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml @@ -12,7 +12,7 @@ recipes: ep: VitisAIExecutionProvider aitk: oliveFile: "VitisAI/Llama-3.2-1B-Instruct_quark_vitisai_llm.json" - requirements: AMD/Quark_py3.10.17 + requirements: AMD/Quark_py3.12.13 evalRuntime: AMDNPU - file: "llama3_2_ov_config.json" devices: @@ -41,5 +41,5 @@ recipes: aitk: modelInfo: id: "huggingface/meta-llama/Llama-3.2-1B-Instruct" - version: 7 + version: 8 p0: true diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json index 38dceba2..11cf511a 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json @@ -21,35 +21,18 @@ "passes": { "qq": { "type": "QuarkQuantization", - "quant_scheme": "w_uint4_per_group_asym", + "quant_scheme": "uint4_wo_128", "quant_algo": "awq", "dataset": "pileval_for_awq_benchmark", "data_type": "bfloat16", "num_calib_data": 128, - "model_export": [ "hf_format" ], - "exclude_layers": [ ], - "quant_config": { - "name": "awq", - "scaling_layers": [ - { - "prev_op": "input_layernorm", - "layers": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj" ], - "inp": "self_attn.q_proj", - "module2inspect": "self_attn" - }, - { "prev_op": "self_attn.v_proj", "layers": [ "self_attn.o_proj" ], "inp": "self_attn.o_proj" }, - { - "prev_op": "post_attention_layernorm", - "layers": [ "mlp.gate_proj", "mlp.up_proj" ], - "inp": "mlp.gate_proj", - "module2inspect": "mlp" - }, - { "prev_op": "mlp.up_proj", "layers": [ "mlp.down_proj" ], "inp": "mlp.down_proj" } - ], - "model_decoder_layers": "model.layers" - } + "model_export": ["hf_format"], + "exclude_layers": [] }, - "mg": { "type": "VitisGenerateModelLLM", "packed_const": false, "cpu_only": false } + "mg": { + "type": "VitisGenerateModelLLM", + "recipe": "full_fusion" + } }, "target": "local_system", "log_severity_level": 1, diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json.config index fa4b549b..762473e3 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json.config +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json.config @@ -11,7 +11,7 @@ "isGPUSuggested": true, "runtimeOverwrite": { "autoGenerated": true, - "executeRequirement": "AMD/Quark_py3.10.17" + "executeRequirement": "AMD/Quark_py3.12.13" }, "epMinVersions": { "VitisAIExecutionProvider": "1.8.50" @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config index 3df076bb..769ec19e 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config @@ -31,6 +31,6 @@ ], "modelInfo": { "id": "huggingface/meta-llama/Llama-3.2-1B-Instruct", - "version": 7 + "version": 8 } } diff --git a/microsoft-Phi-3-mini-128k-instruct/aitk/phi3_vitis_ai_config.json.config b/microsoft-Phi-3-mini-128k-instruct/aitk/phi3_vitis_ai_config.json.config index 8ca32fde..f78b6ae1 100644 --- a/microsoft-Phi-3-mini-128k-instruct/aitk/phi3_vitis_ai_config.json.config +++ b/microsoft-Phi-3-mini-128k-instruct/aitk/phi3_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/microsoft-Phi-3-mini-4k-instruct/aitk/phi3_vitis_ai_config.json.config b/microsoft-Phi-3-mini-4k-instruct/aitk/phi3_vitis_ai_config.json.config index 5c6254fb..ba219335 100644 --- a/microsoft-Phi-3-mini-4k-instruct/aitk/phi3_vitis_ai_config.json.config +++ b/microsoft-Phi-3-mini-4k-instruct/aitk/phi3_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml index 1f85b22d..e7fd3e32 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml +++ b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml @@ -12,7 +12,7 @@ recipes: ep: VitisAIExecutionProvider aitk: oliveFile: "VitisAI/Phi-3.5-mini-instruct_quark_vitisai_llm.json" - requirements: AMD/Quark_py3.10.17 + requirements: AMD/Quark_py3.12.13 evalRuntime: AMDNPU - file: "phi3_5_ov_gpu_config.json" devices: @@ -41,5 +41,5 @@ recipes: aitk: modelInfo: id: "huggingface/microsoft/Phi-3.5-mini-instruct" - version: 7 + version: 8 p0: true diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config index 1162f828..409edc01 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config +++ b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config @@ -31,6 +31,6 @@ ], "modelInfo": { "id": "huggingface/microsoft/Phi-3.5-mini-instruct", - "version": 7 + "version": 8 } } diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json index 741a8c02..edf53d12 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json @@ -21,35 +21,18 @@ "passes": { "qq": { "type": "QuarkQuantization", - "quant_scheme": "w_uint4_per_group_asym", + "quant_scheme": "uint4_wo_128", "quant_algo": "awq", "dataset": "pileval_for_awq_benchmark", "data_type": "bfloat16", "num_calib_data": 128, - "model_export": [ "hf_format" ], - "exclude_layers": [ ], - "quant_config": { - "name": "awq", - "scaling_layers": [ - { - "prev_op": "input_layernorm", - "layers": [ "self_attn.qkv_proj" ], - "inp": "self_attn.qkv_proj", - "module2inspect": "self_attn" - }, - { "prev_op": "self_attn.qkv_proj", "layers": [ "self_attn.o_proj" ], "inp": "self_attn.o_proj" }, - { - "prev_op": "post_attention_layernorm", - "layers": [ "mlp.gate_up_proj" ], - "inp": "mlp.gate_up_proj", - "module2inspect": "mlp" - }, - { "prev_op": "mlp.gate_up_proj", "layers": [ "mlp.down_proj" ], "inp": "mlp.down_proj" } - ], - "model_decoder_layers": "model.layers" - } + "model_export": ["hf_format"], + "exclude_layers": [] }, - "mg": { "type": "VitisGenerateModelLLM", "packed_const": false, "cpu_only": false } + "mg": { + "type": "VitisGenerateModelLLM", + "recipe": "full_fusion" + } }, "target": "local_system", "log_severity_level": 1, diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json.config index c3cede35..2ca77543 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json.config +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json.config @@ -11,7 +11,7 @@ "isGPUSuggested": true, "runtimeOverwrite": { "autoGenerated": true, - "executeRequirement": "AMD/Quark_py3.10.17" + "executeRequirement": "AMD/Quark_py3.12.13" }, "epMinVersions": { "VitisAIExecutionProvider": "1.8.50" @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/microsoft-Phi-4-mini-instruct/aitk/phi4_vitis_ai_config.json.config b/microsoft-Phi-4-mini-instruct/aitk/phi4_vitis_ai_config.json.config index 4c7b42a2..8e901993 100644 --- a/microsoft-Phi-4-mini-instruct/aitk/phi4_vitis_ai_config.json.config +++ b/microsoft-Phi-4-mini-instruct/aitk/phi4_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/microsoft-Phi-4-mini-reasoning/aitk/phi4_vitis_ai_config.json.config b/microsoft-Phi-4-mini-reasoning/aitk/phi4_vitis_ai_config.json.config index 547ccda6..62e83bd2 100644 --- a/microsoft-Phi-4-mini-reasoning/aitk/phi4_vitis_ai_config.json.config +++ b/microsoft-Phi-4-mini-reasoning/aitk/phi4_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_vitis_ai_config.json.config b/mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_vitis_ai_config.json.config index bf876512..544070a3 100644 --- a/mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_vitis_ai_config.json.config +++ b/mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true,