From 72b2ef9627480db706fb46b68ac9bb37f895906e Mon Sep 17 00:00:00 2001 From: Will Burford Date: Fri, 22 May 2026 18:09:26 -0400 Subject: [PATCH 1/3] Update llama batch size help text --- en/config.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/en/config.json b/en/config.json index ebbccdc6..046432b8 100644 --- a/en/config.json +++ b/en/config.json @@ -148,8 +148,9 @@ "llm.load.numCpuExpertLayersRatio/info": "Specifies the number of layers to force the expert into CPU. Leaves attention layers on GPU, saving VRAM while keeping inference fairly fast.", "llm.load.llama.evalBatchSize/title": "Evaluation Batch Size", - "llm.load.llama.evalBatchSize/subTitle": "Number of input tokens to process at a time. Increasing this increases performance at the cost of memory usage", + "llm.load.llama.evalBatchSize/subTitle": "Logical number of prompt tokens to process at a time. Increasing this increases performance at the cost of memory usage", "llm.load.llama.evalBatchSize/info": "Sets the number of examples processed together in one batch during evaluation, affecting speed and memory usage", + "llm.load.llama.physicalBatchSize/subTitle": "Physical number of prompt tokens to process at a time. Increasing this increases performance at the cost of memory usage", "llm.load.llama.ropeFrequencyBase/title": "RoPE Frequency Base", "llm.load.llama.ropeFrequencyBase/subTitle": "Custom base frequency for rotary positional embeddings (RoPE). Increasing this may enable better performance at high context lengths", "llm.load.llama.ropeFrequencyBase/info": "[Advanced] Adjusts the base frequency for Rotary Positional Encoding, affecting how positional information is embedded", From 275fdf0829a30073bf4b92641f1037f634df70b7 Mon Sep 17 00:00:00 2001 From: Will Burford Date: Fri, 22 May 2026 18:17:37 -0400 Subject: [PATCH 2/3] Update llama batch size help copy --- en/config.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/en/config.json b/en/config.json index 046432b8..9b2dff34 100644 --- a/en/config.json +++ b/en/config.json @@ -148,9 +148,9 @@ "llm.load.numCpuExpertLayersRatio/info": "Specifies the number of layers to force the expert into CPU. Leaves attention layers on GPU, saving VRAM while keeping inference fairly fast.", "llm.load.llama.evalBatchSize/title": "Evaluation Batch Size", - "llm.load.llama.evalBatchSize/subTitle": "Logical number of prompt tokens to process at a time. Increasing this increases performance at the cost of memory usage", - "llm.load.llama.evalBatchSize/info": "Sets the number of examples processed together in one batch during evaluation, affecting speed and memory usage", - "llm.load.llama.physicalBatchSize/subTitle": "Physical number of prompt tokens to process at a time. Increasing this increases performance at the cost of memory usage", + "llm.load.llama.evalBatchSize/subTitle": "Maximum number of prompt tokens to schedule during prompt processing. Larger values may improve performance, but can use more memory", + "llm.load.llama.evalBatchSize/info": "Sets the maximum number of prompt tokens to schedule during prompt processing", + "llm.load.llama.physicalBatchSize/subTitle": "Maximum number of prompt tokens to process in one chunk. Larger values may improve performance, but can use more memory", "llm.load.llama.ropeFrequencyBase/title": "RoPE Frequency Base", "llm.load.llama.ropeFrequencyBase/subTitle": "Custom base frequency for rotary positional embeddings (RoPE). Increasing this may enable better performance at high context lengths", "llm.load.llama.ropeFrequencyBase/info": "[Advanced] Adjusts the base frequency for Rotary Positional Encoding, affecting how positional information is embedded", From d233f8ebbd6ce1ecdaf262ebb003dacd43622c22 Mon Sep 17 00:00:00 2001 From: Will Burford Date: Fri, 22 May 2026 18:23:02 -0400 Subject: [PATCH 3/3] Remove unused eval batch size info copy --- en/config.json | 1 - 1 file changed, 1 deletion(-) diff --git a/en/config.json b/en/config.json index 9b2dff34..609c0365 100644 --- a/en/config.json +++ b/en/config.json @@ -149,7 +149,6 @@ "llm.load.llama.evalBatchSize/title": "Evaluation Batch Size", "llm.load.llama.evalBatchSize/subTitle": "Maximum number of prompt tokens to schedule during prompt processing. Larger values may improve performance, but can use more memory", - "llm.load.llama.evalBatchSize/info": "Sets the maximum number of prompt tokens to schedule during prompt processing", "llm.load.llama.physicalBatchSize/subTitle": "Maximum number of prompt tokens to process in one chunk. Larger values may improve performance, but can use more memory", "llm.load.llama.ropeFrequencyBase/title": "RoPE Frequency Base", "llm.load.llama.ropeFrequencyBase/subTitle": "Custom base frequency for rotary positional embeddings (RoPE). Increasing this may enable better performance at high context lengths",