Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 23 additions & 14 deletions src/subcommands/load.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,27 @@ import { terminalSize } from "@lmstudio/lms-isomorphic";
import {
type EstimatedResourcesUsage,
type LLMLoadModelConfig,
type ModelInfo,
type LMStudioClient,
type ModelInfo,
} from "@lmstudio/sdk";
import chalk from "chalk";
import fuzzy from "fuzzy";
import { getCliPref } from "../cliPref.js";
import { addCreateClientOptions, createClient, type CreateClientArgs } from "../createClient.js";
import { type DeviceNameResolver, createDeviceNameResolver } from "../deviceNameLookup.js";
import { formatElapsedTime } from "../formatElapsedTime.js";
import { createDeviceNameResolver, type DeviceNameResolver } from "../deviceNameLookup.js";
import { formatSizeBytes1024 } from "../formatBytes.js";
import { formatElapsedTime } from "../formatElapsedTime.js";
import { fuzzyHighlightOptions, searchTheme } from "../inquirerTheme.js";
import { addLogLevelOptions, createLogger, type LogLevelArgs } from "../logLevel.js";
import { runPromptWithExitHandling } from "../prompt.js";
import { Spinner } from "../Spinner.js";
import { createRefinedNumberParser } from "../types/refinedNumber.js";
import { fuzzyHighlightOptions, searchTheme } from "../inquirerTheme.js";

const gpuOptionParser = (str: string): number => {
const gpuOptionParser = (str: string): number | "auto" => {
str = str.trim().toLowerCase();
if (str === "auto") {
return "auto";
}
if (str === "off") {
return 0;
} else if (str === "max") {
Expand All @@ -47,7 +50,7 @@ type LoadCommandOptions = OptionValues &
CreateClientArgs &
LogLevelArgs & {
ttl?: number;
gpu?: number;
gpu?: number | "auto";
contextLength?: number;
parallel?: number;
exact?: boolean;
Expand Down Expand Up @@ -88,9 +91,9 @@ const loadCommand = new Command<[], LoadCommandOptions>()
new Option(
"--gpu <offload-ratio>",
text`
GPU offload ratio. Valid values: "off" (disable GPU), "max" (full offload), or a number
between 0 and 1 (e.g., "0.5" for 50% offload). By default, LM Studio automatically
determines the optimal offload ratio.
GPU offload. Valid values: "auto" (decide automatically), "off" (disable GPU), "max" (full
offload), or a number between 0 and 1 (e.g., "0.5" for 50% offload).
Defaults to "auto".
`,
).argParser(gpuOptionParser),
)
Expand Down Expand Up @@ -180,9 +183,11 @@ loadCommand.action(async (modelKeyArg, options: LoadCommandOptions) => {
maxParallelPredictions,
};
if (gpu !== undefined) {
loadConfig.gpu = {
ratio: gpu,
};
if (gpu === "auto") {
loadConfig.gpu = { fit: true };
} else {
loadConfig.gpu = { fit: false, ratio: gpu };
}
}
let modelKey = modelKeyArg;
const logger = createLogger(options);
Expand Down Expand Up @@ -556,7 +561,7 @@ async function loadModel({
function printEstimatedResourceUsage(
model: ModelInfo,
contextLength: number | undefined,
gpuOffloadRatio: number | undefined,
gpuOffloadRatio: number | "auto" | undefined,
estimate: EstimatedResourcesUsage,
logger: SimpleLogger,
) {
Expand All @@ -566,7 +571,11 @@ function printEstimatedResourceUsage(
logger.info(`Context Length: ${contextLength.toLocaleString()}`);
}
if (gpuOffloadRatio !== undefined) {
logger.info(`GPU Offload: ${gpuOffloadRatio * 100}%`);
if (gpuOffloadRatio === "auto") {
logger.info(`GPU Offload: Auto`);
} else {
logger.info(`GPU Offload: ${gpuOffloadRatio * 100}%`);
}
}
logger.info(
`Estimated GPU Memory: ${colorFunc(formatSizeBytes1024(estimate.memory.totalVramBytes))}`,
Expand Down
Loading