From 72b2ef9627480db706fb46b68ac9bb37f895906e Mon Sep 17 00:00:00 2001
From: Will Burford <will@lmstudio.ai>
Date: Fri, 22 May 2026 18:09:26 -0400
Subject: [PATCH 1/3] Update llama batch size help text

---
 en/config.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/en/config.json b/en/config.json
index ebbccdc6..046432b8 100644
--- a/en/config.json
+++ b/en/config.json
@@ -148,8 +148,9 @@
   "llm.load.numCpuExpertLayersRatio/info": "Specifies the number of layers to force the expert into CPU. Leaves attention layers on GPU, saving VRAM while keeping inference fairly fast.",
 
   "llm.load.llama.evalBatchSize/title": "Evaluation Batch Size",
-  "llm.load.llama.evalBatchSize/subTitle": "Number of input tokens to process at a time. Increasing this increases performance at the cost of memory usage",
+  "llm.load.llama.evalBatchSize/subTitle": "Logical number of prompt tokens to process at a time. Increasing this increases performance at the cost of memory usage",
   "llm.load.llama.evalBatchSize/info": "Sets the number of examples processed together in one batch during evaluation, affecting speed and memory usage",
+  "llm.load.llama.physicalBatchSize/subTitle": "Physical number of prompt tokens to process at a time. Increasing this increases performance at the cost of memory usage",
   "llm.load.llama.ropeFrequencyBase/title": "RoPE Frequency Base",
   "llm.load.llama.ropeFrequencyBase/subTitle": "Custom base frequency for rotary positional embeddings (RoPE). Increasing this may enable better performance at high context lengths",
   "llm.load.llama.ropeFrequencyBase/info": "[Advanced] Adjusts the base frequency for Rotary Positional Encoding, affecting how positional information is embedded",

From 275fdf0829a30073bf4b92641f1037f634df70b7 Mon Sep 17 00:00:00 2001
From: Will Burford <will@lmstudio.ai>
Date: Fri, 22 May 2026 18:17:37 -0400
Subject: [PATCH 2/3] Update llama batch size help copy

---
 en/config.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/en/config.json b/en/config.json
index 046432b8..9b2dff34 100644
--- a/en/config.json
+++ b/en/config.json
@@ -148,9 +148,9 @@
   "llm.load.numCpuExpertLayersRatio/info": "Specifies the number of layers to force the expert into CPU. Leaves attention layers on GPU, saving VRAM while keeping inference fairly fast.",
 
   "llm.load.llama.evalBatchSize/title": "Evaluation Batch Size",
-  "llm.load.llama.evalBatchSize/subTitle": "Logical number of prompt tokens to process at a time. Increasing this increases performance at the cost of memory usage",
-  "llm.load.llama.evalBatchSize/info": "Sets the number of examples processed together in one batch during evaluation, affecting speed and memory usage",
-  "llm.load.llama.physicalBatchSize/subTitle": "Physical number of prompt tokens to process at a time. Increasing this increases performance at the cost of memory usage",
+  "llm.load.llama.evalBatchSize/subTitle": "Maximum number of prompt tokens to schedule during prompt processing. Larger values may improve performance, but can use more memory",
+  "llm.load.llama.evalBatchSize/info": "Sets the maximum number of prompt tokens to schedule during prompt processing",
+  "llm.load.llama.physicalBatchSize/subTitle": "Maximum number of prompt tokens to process in one chunk. Larger values may improve performance, but can use more memory",
   "llm.load.llama.ropeFrequencyBase/title": "RoPE Frequency Base",
   "llm.load.llama.ropeFrequencyBase/subTitle": "Custom base frequency for rotary positional embeddings (RoPE). Increasing this may enable better performance at high context lengths",
   "llm.load.llama.ropeFrequencyBase/info": "[Advanced] Adjusts the base frequency for Rotary Positional Encoding, affecting how positional information is embedded",

From d233f8ebbd6ce1ecdaf262ebb003dacd43622c22 Mon Sep 17 00:00:00 2001
From: Will Burford <will@lmstudio.ai>
Date: Fri, 22 May 2026 18:23:02 -0400
Subject: [PATCH 3/3] Remove unused eval batch size info copy

---
 en/config.json | 1 -
 1 file changed, 1 deletion(-)

diff --git a/en/config.json b/en/config.json
index 9b2dff34..609c0365 100644
--- a/en/config.json
+++ b/en/config.json
@@ -149,7 +149,6 @@
 
   "llm.load.llama.evalBatchSize/title": "Evaluation Batch Size",
   "llm.load.llama.evalBatchSize/subTitle": "Maximum number of prompt tokens to schedule during prompt processing. Larger values may improve performance, but can use more memory",
-  "llm.load.llama.evalBatchSize/info": "Sets the maximum number of prompt tokens to schedule during prompt processing",
   "llm.load.llama.physicalBatchSize/subTitle": "Maximum number of prompt tokens to process in one chunk. Larger values may improve performance, but can use more memory",
   "llm.load.llama.ropeFrequencyBase/title": "RoPE Frequency Base",
   "llm.load.llama.ropeFrequencyBase/subTitle": "Custom base frequency for rotary positional embeddings (RoPE). Increasing this may enable better performance at high context lengths",