From 96816a244b8d64576b4f58a766b50ae4d155d41e Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Tue, 20 May 2025 17:36:28 -0700
Subject: [PATCH] Add new export LLM config

[ghstack-poisoned]
---
 examples/models/llama/config/llm_config.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/examples/models/llama/config/llm_config.py b/examples/models/llama/config/llm_config.py
index c668d77c51d..c80c6be6eaf 100644
--- a/examples/models/llama/config/llm_config.py
+++ b/examples/models/llama/config/llm_config.py
@@ -23,7 +23,6 @@ class BaseConfig:
     These are specific to the specific model, e.g. whether it’s Qwen3 0.6B or Phi-4-mini.
     for each of these different models, you can expect each of these fields to change.
     """
-
     model_class: str = "llama"
     params: Optional[str] = None
     checkpoint: Optional[str] = None
@@ -41,7 +40,6 @@ class ModelConfig:
     optimizations / actual configurations. The same ModelConfig can be applied
     to different models.
     """
-
     dtype_override: str = "fp32"
     enable_dynamic_shape: bool = True
     use_shared_embedding: bool = False
@@ -68,6 +66,17 @@ class ExportConfig:
 
 
 @dataclass
+<<<<<<< HEAD
+=======
+class KVCacheConfig:
+    use_kv_cache: Optional[bool] = None
+    quantize_kv_cache: Optional[bool] = None
+    local_global_attention: List[int] = None
+    # ...potentially more in the future such as cache eviction strategy
+
+
+@dataclass
+>>>>>>> ec85c4be2 (Add new export LLM config)
 class DebugConfig:
     profile_memory: bool = False
     profile_path: Optional[str] = None