From 96816a244b8d64576b4f58a766b50ae4d155d41e Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Tue, 20 May 2025 17:36:28 -0700 Subject: [PATCH] Add new export LLM config [ghstack-poisoned] --- examples/models/llama/config/llm_config.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/examples/models/llama/config/llm_config.py b/examples/models/llama/config/llm_config.py index c668d77c51d..c80c6be6eaf 100644 --- a/examples/models/llama/config/llm_config.py +++ b/examples/models/llama/config/llm_config.py @@ -23,7 +23,6 @@ class BaseConfig: These are specific to the specific model, e.g. whether it’s Qwen3 0.6B or Phi-4-mini. for each of these different models, you can expect each of these fields to change. """ - model_class: str = "llama" params: Optional[str] = None checkpoint: Optional[str] = None @@ -41,7 +40,6 @@ class ModelConfig: optimizations / actual configurations. The same ModelConfig can be applied to different models. """ - dtype_override: str = "fp32" enable_dynamic_shape: bool = True use_shared_embedding: bool = False @@ -68,6 +66,17 @@ class ExportConfig: @dataclass +<<<<<<< HEAD +======= +class KVCacheConfig: + use_kv_cache: Optional[bool] = None + quantize_kv_cache: Optional[bool] = None + local_global_attention: List[int] = None + # ...potentially more in the future such as cache eviction strategy + + +@dataclass +>>>>>>> ec85c4be2 (Add new export LLM config) class DebugConfig: profile_memory: bool = False profile_path: Optional[str] = None