We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 43ee3f9 commit aebf42eCopy full SHA for aebf42e
1 file changed
examples/models/llama/export_llama_lib.py
@@ -1171,6 +1171,8 @@ def _get_xnnpack_partitioners(llm_config: LlmConfig) -> Optional[List[Partitione
1171
"""Get XNNPACK partitioners for multimethod_lora export."""
1172
partitioners = []
1173
1174
+ # Order matters here, dynamic quantization should be applied first when
1175
+ # both xnnpack and xnnpack_extended_ops are enabled.
1176
if llm_config.backend.xnnpack.enabled:
1177
partitioners.append(
1178
get_xnnpack_partitioner(dynamic_quant_only_partitioner=True)
0 commit comments