diff --git a/ios/OffgridMobile/Info.plist b/ios/OffgridMobile/Info.plist
index 3e6cdd82..414d96e3 100644
--- a/ios/OffgridMobile/Info.plist
+++ b/ios/OffgridMobile/Info.plist
@@ -31,8 +31,6 @@
NSAllowsLocalNetworking
- NSLocalNetworkUsageDescription
- Off Grid scans your local network to automatically discover LLM servers such as Ollama and LM Studio.
NSBonjourServices
_http._tcp
@@ -43,6 +41,8 @@
This app needs access to your camera to take photos and attach them to conversations.
NSFaceIDUsageDescription
This app may use Face ID to protect access to your stored credentials.
+ NSLocalNetworkUsageDescription
+ Off Grid scans your local network to automatically discover LLM servers such as Ollama and LM Studio.
NSMicrophoneUsageDescription
This app needs access to your microphone for voice-to-text transcription using Whisper.
NSPhotoLibraryAddUsageDescription
diff --git a/ios/Podfile.lock b/ios/Podfile.lock
index 8646eef3..2674daf8 100644
--- a/ios/Podfile.lock
+++ b/ios/Podfile.lock
@@ -10,7 +10,7 @@ PODS:
- hermes-engine (0.14.0):
- hermes-engine/Pre-built (= 0.14.0)
- hermes-engine/Pre-built (0.14.0)
- - llama-rn (0.11.4):
+ - llama-rn (0.12.0-rc.5):
- boost
- DoubleConversion
- fast_float
@@ -40,15 +40,15 @@ PODS:
- ReactCommon/turbomodule/core
- SocketRocket
- Yoga
- - lottie-ios (4.6.0)
- - lottie-react-native (7.3.6):
+ - lottie-ios (4.5.0)
+ - lottie-react-native (7.3.5):
- boost
- DoubleConversion
- fast_float
- fmt
- glog
- hermes-engine
- - lottie-ios (= 4.6.0)
+ - lottie-ios (= 4.5.0)
- RCT-Folly
- RCT-Folly/Fabric
- RCTRequired
@@ -2094,7 +2094,7 @@ PODS:
- ReactCommon/turbomodule/core
- SocketRocket
- Yoga
- - react-native-safe-area-context (5.7.0):
+ - react-native-safe-area-context (5.6.2):
- boost
- DoubleConversion
- fast_float
@@ -2112,8 +2112,8 @@ PODS:
- React-graphics
- React-ImageManager
- React-jsi
- - react-native-safe-area-context/common (= 5.7.0)
- - react-native-safe-area-context/fabric (= 5.7.0)
+ - react-native-safe-area-context/common (= 5.6.2)
+ - react-native-safe-area-context/fabric (= 5.6.2)
- React-NativeModulesApple
- React-RCTFabric
- React-renderercss
@@ -2124,7 +2124,7 @@ PODS:
- ReactCommon/turbomodule/core
- SocketRocket
- Yoga
- - react-native-safe-area-context/common (5.7.0):
+ - react-native-safe-area-context/common (5.6.2):
- boost
- DoubleConversion
- fast_float
@@ -2152,7 +2152,7 @@ PODS:
- ReactCommon/turbomodule/core
- SocketRocket
- Yoga
- - react-native-safe-area-context/fabric (5.7.0):
+ - react-native-safe-area-context/fabric (5.6.2):
- boost
- DoubleConversion
- fast_float
@@ -2825,7 +2825,7 @@ PODS:
- ReactCommon/turbomodule/core
- SocketRocket
- Yoga
- - RNDeviceInfo (15.0.2):
+ - RNDeviceInfo (15.0.1):
- React-Core
- RNFS (2.20.0):
- React-Core
@@ -2913,7 +2913,7 @@ PODS:
- ReactCommon/turbomodule/core
- SocketRocket
- Yoga
- - RNReanimated (4.2.2):
+ - RNReanimated (4.2.1):
- boost
- DoubleConversion
- fast_float
@@ -2940,11 +2940,11 @@ PODS:
- ReactCodegen
- ReactCommon/turbomodule/bridging
- ReactCommon/turbomodule/core
- - RNReanimated/reanimated (= 4.2.2)
+ - RNReanimated/reanimated (= 4.2.1)
- RNWorklets
- SocketRocket
- Yoga
- - RNReanimated/reanimated (4.2.2):
+ - RNReanimated/reanimated (4.2.1):
- boost
- DoubleConversion
- fast_float
@@ -2971,11 +2971,11 @@ PODS:
- ReactCodegen
- ReactCommon/turbomodule/bridging
- ReactCommon/turbomodule/core
- - RNReanimated/reanimated/apple (= 4.2.2)
+ - RNReanimated/reanimated/apple (= 4.2.1)
- RNWorklets
- SocketRocket
- Yoga
- - RNReanimated/reanimated/apple (4.2.2):
+ - RNReanimated/reanimated/apple (4.2.1):
- boost
- DoubleConversion
- fast_float
@@ -3005,7 +3005,7 @@ PODS:
- RNWorklets
- SocketRocket
- Yoga
- - RNScreens (4.24.0):
+ - RNScreens (4.20.0):
- boost
- DoubleConversion
- fast_float
@@ -3032,10 +3032,10 @@ PODS:
- ReactCodegen
- ReactCommon/turbomodule/bridging
- ReactCommon/turbomodule/core
- - RNScreens/common (= 4.24.0)
+ - RNScreens/common (= 4.20.0)
- SocketRocket
- Yoga
- - RNScreens/common (4.24.0):
+ - RNScreens/common (4.20.0):
- boost
- DoubleConversion
- fast_float
@@ -3149,7 +3149,7 @@ PODS:
- ReactCommon/turbomodule/core
- SocketRocket
- Yoga
- - RNWorklets (0.7.4):
+ - RNWorklets (0.7.3):
- boost
- DoubleConversion
- fast_float
@@ -3176,10 +3176,10 @@ PODS:
- ReactCodegen
- ReactCommon/turbomodule/bridging
- ReactCommon/turbomodule/core
- - RNWorklets/worklets (= 0.7.4)
+ - RNWorklets/worklets (= 0.7.3)
- SocketRocket
- Yoga
- - RNWorklets/worklets (0.7.4):
+ - RNWorklets/worklets (0.7.3):
- boost
- DoubleConversion
- fast_float
@@ -3206,10 +3206,10 @@ PODS:
- ReactCodegen
- ReactCommon/turbomodule/bridging
- ReactCommon/turbomodule/core
- - RNWorklets/worklets/apple (= 0.7.4)
+ - RNWorklets/worklets/apple (= 0.7.3)
- SocketRocket
- Yoga
- - RNWorklets/worklets/apple (0.7.4):
+ - RNWorklets/worklets/apple (0.7.3):
- boost
- DoubleConversion
- fast_float
@@ -3603,10 +3603,10 @@ SPEC CHECKSUMS:
FBLazyVector: 309703e71d3f2f1ed7dc7889d58309c9d77a95a4
fmt: a40bb5bd0294ea969aaaba240a927bd33d878cdd
glog: 5683914934d5b6e4240e497e0f4a3b42d1854183
- hermes-engine: 3f74bbb07573d284e764cee0131ae769e16c53b8
- llama-rn: f673d63fdd04ce96793c60ba03be52804d94f49a
- lottie-ios: 8f959969761e9c45d70353667d00af0e5b9cadb3
- lottie-react-native: 983fd0489530e8d40f173de7f04e2f88b9317a15
+ hermes-engine: 8c6be38f94b3bf8b864981980e64e55f08e467ec
+ llama-rn: 3ae5a64b3d08ff41f9e62b214ba5004e475b9561
+ lottie-ios: a881093fab623c467d3bce374367755c272bdd59
+ lottie-react-native: 691b8363e8c591fb78a78254ff2517258891456b
op-sqlite: bafff369cecaee4fe65c89eec47deaba26f2db95
RCT-Folly: 846fda9475e61ec7bcbf8a3fe81edfcaeb090669
RCTDeprecation: a41bbdd9af30bf2e5715796b313e44ec43eefff1
@@ -3648,7 +3648,7 @@ SPEC CHECKSUMS:
react-native-document-picker: dc2d83366e47e89e7c51e8a41eab99c1d54e941c
react-native-document-viewer: 8c6ed07e7e27352743fa98e8dd6d288ad925b884
react-native-image-picker: 0314366753615115fa55c3cc937ac44cb7e75702
- react-native-safe-area-context: befb5404eb8a16fdc07fa2bebab3568ecabcbb8a
+ react-native-safe-area-context: c00143b4823773bba23f2f19f85663ae89ceb460
react-native-slider: 34064ca1a6864d7b263e44dd76a2d794e8d26744
react-native-voice: 908a0eba96c8c3d643e4f98b7232c6557d0a6f9c
React-NativeModulesApple: a2c3d2cbec893956a5b3e4060322db2984fff75b
@@ -3685,16 +3685,16 @@ SPEC CHECKSUMS:
ReactCodegen: 3d48510bcef445f6403c0004047d4d9cbb915435
ReactCommon: ac934cb340aee91282ecd6f273a26d24d4c55cae
RNCAsyncStorage: 29f0230e1a25f36c20b05f65e2eb8958d6526e82
- RNDeviceInfo: 4c852998208b60dc192ae3529e5867817719ad1e
+ RNDeviceInfo: 36d7f232bfe7c9b5c494cb7793230424ed32c388
RNFS: 89de7d7f4c0f6bafa05343c578f61118c8282ed8
RNGestureHandler: cd4be101cfa17ea6bbd438710caa02e286a84381
RNKeychain: a2c134ab796272c3d605e035ab727591000b30f3
RNReactNativeHapticFeedback: be4f1b4bf0398c30b59b76ed92ecb0a2ff3a69c6
- RNReanimated: 18324d3313d6477e1d12836c20c3ee30afb5de30
- RNScreens: 7f643ee0fd1407dc5085c7795460bd93da113b8f
+ RNReanimated: 292cd58688552a22b3fc1cefcfbc49b336dfed68
+ RNScreens: 714e10b6b554f7dc7ad9f78dcf36dc8e3fc73415
RNSVG: 595abfa0f9ac26d56afcaaedf4e37a00f54cab71
RNVectorIcons: 791f13226ec4a3fd13062eda9e892159f0981fae
- RNWorklets: a3184955a41f2be46898a937e2821469c8c8da42
+ RNWorklets: 944dddd0eef13006b658e653abbb3ee8365c3809
RNZipArchive: 4304f5100eab004eeb7349adc51997b3a28deb76
SocketRocket: d4aabe649be1e368d1318fdf28a022d714d65748
SSZipArchive: c69881e8ac5521f0e622291387add5f60f30f3c4
diff --git a/package-lock.json b/package-lock.json
index f44f33ad..da069b35 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -25,7 +25,7 @@
"@shopify/flash-list": "^2.2.2",
"@testing-library/react-native": "^13.3.3",
"@types/react-native-vector-icons": "^6.4.18",
- "llama.rn": "^0.11.2",
+ "llama.rn": "^0.12.0-rc.5",
"lottie-react-native": "^7.3.5",
"moti": "^0.30.0",
"patch-package": "^8.0.1",
@@ -10048,9 +10048,9 @@
}
},
"node_modules/llama.rn": {
- "version": "0.11.2",
- "resolved": "https://registry.npmjs.org/llama.rn/-/llama.rn-0.11.2.tgz",
- "integrity": "sha512-LpZ1r9iLi9YdrbbpED4gB7QNhStYghdKbr1QX85UEoqRKeQhfi6yBiIwSRANLa0xMjp6yDIL4p+y3RjgyNG3GQ==",
+ "version": "0.12.0-rc.5",
+ "resolved": "https://registry.npmjs.org/llama.rn/-/llama.rn-0.12.0-rc.5.tgz",
+ "integrity": "sha512-92UDVtroH4hMWekgGyjxtAM4/K5NizO4kEPnhGOloXpuH67H5GWH3sZsT617afJwVwyvLTX8WtoY/M1Ke9wjNw==",
"license": "MIT",
"engines": {
"node": ">= 16.0.0"
@@ -14156,7 +14156,7 @@
"version": "5.9.3",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
- "dev": true,
+ "devOptional": true,
"license": "Apache-2.0",
"bin": {
"tsc": "bin/tsc",
diff --git a/package.json b/package.json
index 66e24464..6830de61 100644
--- a/package.json
+++ b/package.json
@@ -36,7 +36,7 @@
"@shopify/flash-list": "^2.2.2",
"@testing-library/react-native": "^13.3.3",
"@types/react-native-vector-icons": "^6.4.18",
- "llama.rn": "^0.11.2",
+ "llama.rn": "^0.12.0-rc.5",
"lottie-react-native": "^7.3.5",
"moti": "^0.30.0",
"patch-package": "^8.0.1",
diff --git a/src/components/ChatMessage/utils.ts b/src/components/ChatMessage/utils.ts
index cebd4e0a..12acaf74 100644
--- a/src/components/ChatMessage/utils.ts
+++ b/src/components/ChatMessage/utils.ts
@@ -4,12 +4,36 @@ import type { ParsedContent } from './types';
/**
* Parse content that may contain thinking/reasoning sections.
- * Handles two formats:
- * 1. HLSL.. HLSL tags (used by llama models with thinking enabled)
- * 2. <|channel|>analysis<|message|>...<|channel|>final<|message|> (used by Qwen and similar models)
+ * Handles three formats:
+ * 1. ... tags (DeepSeek-style, used by llama models with thinking enabled)
+ * 2. <|channel>thought\n... (Gemma 4)
+ * 3. <|channel|>analysis<|message|>...<|channel|>final<|message|> (Qwen and similar models)
*/
export function parseThinkingContent(content: string): ParsedContent {
- // First, check for channel-based thinking format
+ // Gemma 4 thinking format: <|channel>thought\n[thinking][response]
+ // Note asymmetric tags: <|channel> opens (with channel name 'thought'), closes.
+ const gemmaOpenMatch = content.match(/<\|channel>thought\n/i);
+ const gemmaCloseMatch = content.match(//i);
+
+ if (gemmaOpenMatch) {
+ const thinkStart = gemmaOpenMatch.index! + gemmaOpenMatch[0].length;
+ if (gemmaCloseMatch && gemmaCloseMatch.index! >= thinkStart) {
+ const thinkEnd = gemmaCloseMatch.index!;
+ return {
+ thinking: content.slice(thinkStart, thinkEnd).trim(),
+ response: content.slice(thinkEnd + gemmaCloseMatch[0].length).trim(),
+ isThinkingComplete: true,
+ };
+ }
+ // Still streaming — thinking not yet closed
+ return {
+ thinking: content.slice(thinkStart).trim(),
+ response: '',
+ isThinkingComplete: false,
+ };
+ }
+
+ // Check for channel-based thinking format
// Format: <|channel|>analysis<|message|>[thinking content]<|channel|>final<|message|>[response]
const channelAnalysisMatch = content.match(/<\|channel\|>analysis<\|message\|>/i);
const channelFinalMatch = content.match(/<\|channel\|>final<\|message\|>/i);
diff --git a/src/components/ModelCard.tsx b/src/components/ModelCard.tsx
index 6c8145fc..9d332ba0 100644
--- a/src/components/ModelCard.tsx
+++ b/src/components/ModelCard.tsx
@@ -138,7 +138,7 @@ export const ModelCard: React.FC = ({
disabled={!onPress}
testID={testID}
>
-
+
{compact ? (
= Dispatch>;
const FALLBACK_RECENT_MESSAGE_COUNT = 2;
@@ -224,6 +223,17 @@ async function injectRagContext(projectId: string | undefined, query: string, pr
}
return prompt;
}
+/**
+ * Gemma 4 requires <|think|> at the start of the system prompt to activate thinking mode.
+ * For E2B/E4B variants (the mobile-sized models), omitting this token fully disables thinking.
+ */
+function applyGemma4ThinkToken(prompt: string, isRemote: boolean): string {
+ if (!isRemote && llmService.isGemma4Model() && llmService.isThinkingEnabled()) {
+ return `<|think|>\n${prompt}`;
+ }
+ return prompt;
+}
+
function resolveToolsAndPrompt(deps: GenerationDeps, conversation: any): { enabledTools: string[]; rawPrompt: string } {
const project = conversation?.projectId ? useProjectStore.getState().getProject(conversation.projectId) : null;
const { activeServerId, activeRemoteTextModelId } = useRemoteServerStore.getState();
@@ -253,11 +263,12 @@ export async function startGenerationFn(deps: GenerationDeps, call: StartGenerat
const conversation = useChatStore.getState().conversations.find(c => c.id === targetConversationId);
const { enabledTools, rawPrompt } = resolveToolsAndPrompt(deps, conversation);
const basePrompt = await injectRagContext(conversation?.projectId, messageText, rawPrompt);
- // Remote models use native tool_choice: 'auto' — skip heuristic gate and always pass enabled tools
const isRemote = !!useRemoteServerStore.getState().activeRemoteTextModelId;
- const heuristicMatch = shouldUseToolsForMessage(messageText, enabledTools);
- const activeTools = (isRemote || heuristicMatch) ? enabledTools : [];
- const systemPrompt = (!isRemote && activeTools.length > 0) ? `${basePrompt}${buildToolSystemPromptHint(activeTools)}` : basePrompt;
+ const activeTools = enabledTools;
+ const systemPrompt = applyGemma4ThinkToken(
+ (!isRemote && activeTools.length > 0) ? `${basePrompt}${buildToolSystemPromptHint(activeTools)}` : basePrompt,
+ isRemote,
+ );
logger.log(`[ChatGen][DEBUG] isRemote=${isRemote}, tools=[${activeTools.join(', ')}], path=${activeTools.length > 0 ? 'withTools' : 'generate'}`);
const messagesForContext = buildMessagesForContext(targetConversationId, messageText, systemPrompt);
await prepareContext(setDebugInfo, systemPrompt, messagesForContext);
@@ -332,9 +343,12 @@ export async function regenerateResponseFn(deps: GenerationDeps, call: Regenerat
.map(m => m.id === userMessage.id ? { ...m, content: messageText } : m);
const { enabledTools, rawPrompt } = resolveToolsAndPrompt(deps, conversation);
const isRemote = !!useRemoteServerStore.getState().activeRemoteTextModelId;
- const activeTools = (isRemote || shouldUseToolsForMessage(messageText, enabledTools)) ? enabledTools : [];
+ const activeTools = enabledTools;
const basePrompt = await injectRagContext(conversation?.projectId, messageText, rawPrompt);
- const systemPrompt = (!isRemote && activeTools.length > 0) ? `${basePrompt}${buildToolSystemPromptHint(activeTools)}` : basePrompt;
+ const systemPrompt = applyGemma4ThinkToken(
+ (!isRemote && activeTools.length > 0) ? `${basePrompt}${buildToolSystemPromptHint(activeTools)}` : basePrompt,
+ isRemote,
+ );
const { prefix, filtered } = applyCompactionPrefix(conversation, systemPrompt, messagesUpToUser);
try {
await generateWithCompactionRetry({ id: targetConversationId, prompt: systemPrompt, messages: [...prefix, ...filtered] }, activeTools, conversation?.projectId);
diff --git a/src/screens/ModelsScreen/importHelpers.ts b/src/screens/ModelsScreen/importHelpers.ts
index 651417e4..7667b318 100644
--- a/src/screens/ModelsScreen/importHelpers.ts
+++ b/src/screens/ModelsScreen/importHelpers.ts
@@ -1,5 +1,7 @@
+import RNFS from 'react-native-fs';
+import { Alert } from 'react-native';
import { modelManager } from '../../services';
-import { showAlert, hideAlert, AlertState } from '../../components/CustomAlert';
+import { showAlert, AlertState } from '../../components/CustomAlert';
import { DownloadedModel } from '../../types';
export type GgufFileRef = { uri: string; name: string; size: number };
@@ -43,13 +45,21 @@ export async function importGgufFiles(
deps: GgufImportDeps,
): Promise {
const { setAlertState, setImportProgress, addDownloadedModel } = deps;
+ console.log('[IMPORT] importGgufFiles called with', files.length, 'file(s)');
if (files.length === 1) {
+ const resolvedFileName = files[0].name ?? 'unknown';
+ console.log('[IMPORT] Single file import. uri:', files[0].uri, '| name:', JSON.stringify(files[0].name), '| resolvedFileName:', resolvedFileName, '| size:', files[0].size);
const model = await modelManager.importLocalModel({
sourceUri: files[0].uri,
- fileName: files[0].name ?? 'unknown',
- onProgress: p => setImportProgress(p),
+ fileName: resolvedFileName,
+ sourceSize: files[0].size,
+ onProgress: p => {
+ console.log(`[IMPORT] Progress: ${(p.fraction * 100).toFixed(1)}% — ${p.fileName}`);
+ setImportProgress(p);
+ },
});
+ console.log('[IMPORT] Single file import complete. model.name:', model.name, '| model.id:', model.id);
addDownloadedModel(model);
setAlertState(showAlert('Success', `${model.name} imported successfully!`));
return;
@@ -57,30 +67,70 @@ export async function importGgufFiles(
const file1: GgufFileRef = { uri: files[0].uri, name: files[0].name ?? '', size: files[0].size ?? 0 };
const file2: GgufFileRef = { uri: files[1].uri, name: files[1].name ?? '', size: files[1].size ?? 0 };
+ console.log('[IMPORT] Two-file (vision) import.');
+ console.log('[IMPORT] file1 — name:', JSON.stringify(file1.name), '| size:', file1.size, '| uri:', file1.uri);
+ console.log('[IMPORT] file2 — name:', JSON.stringify(file2.name), '| size:', file2.size, '| uri:', file2.uri);
+ console.log('[IMPORT] isMmProj(file1.name):', isMmProj(file1.name), '| isMmProj(file2.name):', isMmProj(file2.name));
+
+ // Check if files exist RIGHT AFTER picker returns — before any dialog
+ const file1ExistsBefore = await RNFS.exists(file1.uri.replace('file://', ''));
+ const file2ExistsBefore = await RNFS.exists(file2.uri.replace('file://', ''));
+ console.log('[IMPORT] FILE EXISTS CHECK (before dialog) — file1:', file1ExistsBefore, '| file2:', file2ExistsBefore);
+ console.log('[IMPORT] file1 decoded path:', decodeURIComponent(file1.uri.replace('file://', '')));
+ console.log('[IMPORT] file2 decoded path:', decodeURIComponent(file2.uri.replace('file://', '')));
+
const { mainFile, mmProjFile } = classifyGgufPair(file1, file2);
+ console.log('[IMPORT] Classification — mainFile:', mainFile.name, '| mmProjFile:', mmProjFile.name);
+
+ const dialogOpenTime = Date.now();
+ console.log('[IMPORT] Showing confirmation dialog at t=0ms');
const confirmed = await new Promise(resolve => {
- setAlertState(
- showAlert(
- 'Import Vision Model?',
- `Main model: ${mainFile.name}\nProjector: ${mmProjFile.name}\n\nIf these look wrong, cancel and rename your files.`,
- [
- { text: 'Cancel', style: 'cancel', onPress: () => { setAlertState(hideAlert()); resolve(false); } },
- { text: 'Import', onPress: () => { setAlertState(hideAlert()); resolve(true); } },
- ],
- ),
+ Alert.alert(
+ 'Import Vision Model?',
+ `Main model: ${mainFile.name}\nProjector: ${mmProjFile.name}\n\nIf these look wrong, cancel and rename your files.`,
+ [
+ { text: 'Cancel', style: 'cancel', onPress: () => resolve(false) },
+ { text: 'Import', onPress: () => resolve(true) },
+ ],
+ { cancelable: false },
);
});
- if (!confirmed) return;
+ const dialogDurationMs = Date.now() - dialogOpenTime;
+ console.log('[IMPORT] Dialog closed after', dialogDurationMs, 'ms. confirmed:', confirmed);
+
+ if (!confirmed) {
+ console.log('[IMPORT] User cancelled vision model import confirmation.');
+ return;
+ }
+
+ // Check if files STILL exist after dialog was dismissed — key check
+ const mainExistsAfter = await RNFS.exists(decodeURIComponent(mainFile.uri.replace('file://', '')));
+ const mmProjExistsAfter = await RNFS.exists(decodeURIComponent(mmProjFile.uri.replace('file://', '')));
+ console.log('[IMPORT] FILE EXISTS CHECK (after dialog, before copy) — mainFile:', mainExistsAfter, '| mmProjFile:', mmProjExistsAfter);
+ console.log('[IMPORT] mainFile path:', decodeURIComponent(mainFile.uri.replace('file://', '')));
+ console.log('[IMPORT] mmProjFile path:', decodeURIComponent(mmProjFile.uri.replace('file://', '')));
+
+ if (!mainExistsAfter || !mmProjExistsAfter) {
+ console.log('[IMPORT] ⚠️ FILES GONE after dialog! iOS deleted temp inbox files during the', dialogDurationMs, 'ms dialog wait.');
+ console.log('[IMPORT] This confirms the temp file eviction bug. Need keepLocalCopy() before dialog.');
+ }
+ console.log('[IMPORT] Vision import confirmed. Starting importLocalModel...');
const model = await modelManager.importLocalModel({
sourceUri: mainFile.uri,
fileName: mainFile.name,
- onProgress: p => setImportProgress(p),
+ sourceSize: mainFile.size,
+ onProgress: p => {
+ console.log(`[IMPORT] Vision progress: ${(p.fraction * 100).toFixed(1)}% — ${p.fileName}`);
+ setImportProgress(p);
+ },
mmProjSourceUri: mmProjFile.uri,
mmProjFileName: mmProjFile.name,
+ mmProjSourceSize: mmProjFile.size,
});
+ console.log('[IMPORT] Vision import complete. model.name:', model.name, '| isVisionModel:', model.isVisionModel, '| mmProjPath:', model.mmProjPath);
addDownloadedModel(model);
setAlertState(showAlert('Success', `${model.name} imported with vision projector!`));
}
diff --git a/src/screens/ModelsScreen/useModelsScreen.ts b/src/screens/ModelsScreen/useModelsScreen.ts
index 5f5570c8..c2d0b250 100644
--- a/src/screens/ModelsScreen/useModelsScreen.ts
+++ b/src/screens/ModelsScreen/useModelsScreen.ts
@@ -120,44 +120,81 @@ export function useModelsScreen() {
importImageModelZip(sourceUri, fileName, { addDownloadedImageModel, activeImageModelId, setActiveImageModelId, setImportProgress, setAlertState });
const handleImportLocalModel = async () => {
- if (isImporting) return;
- setIsImporting(true);
+ console.log('[IMPORT] handleImportLocalModel called. isImporting =', isImporting);
+ if (isImporting) {
+ console.log('[IMPORT] BLOCKED — isImporting is true, returning early. This is the silent-exit bug.');
+ return;
+ }
try {
+ console.log('[IMPORT] Opening file picker...');
const result = await pick({ type: [types.allFiles], allowMultiSelection: true });
- if (!result || result.length === 0) return;
+ console.log('[IMPORT] Picker returned. File count:', result?.length ?? 0);
+ setIsImporting(true);
+ console.log('[IMPORT] setIsImporting(true) called AFTER picker returns');
+ result?.forEach((f, i) => {
+ console.log(`[IMPORT] File[${i}] name=${JSON.stringify(f.name)} uri=${f.uri} size=${f.size} type=${f.type} nativeType=${f.nativeType} error=${f.error}`);
+ });
+
+ if (!result || result.length === 0) {
+ console.log('[IMPORT] Empty result from picker, returning.');
+ return;
+ }
- const allGguf = result.every(f => (f.name ?? '').toLowerCase().endsWith('.gguf'));
- const singleZip = result.length === 1 && (result[0].name ?? '').toLowerCase().endsWith('.zip');
+ // Resolve filename: use picker name if available, fall back to last path segment of URI
+ const resolvedFiles = result.map(f => ({
+ ...f,
+ name: f.name?.trim() || decodeURIComponent(f.uri.split('/').pop() ?? '') || 'unknown',
+ }));
+ resolvedFiles.forEach((f, i) => {
+ console.log(`[IMPORT] File[${i}] resolvedName=${JSON.stringify(f.name)} (original=${JSON.stringify(result[i].name)})`);
+ });
+
+ const allGguf = resolvedFiles.every(f => f.name.toLowerCase().endsWith('.gguf'));
+ const singleZip = resolvedFiles.length === 1 && resolvedFiles[0].name.toLowerCase().endsWith('.zip');
+ console.log('[IMPORT] Validation — allGguf:', allGguf, '| singleZip:', singleZip);
+ resolvedFiles.forEach((f, i) => {
+ console.log(`[IMPORT] File[${i}] nameForCheck=${JSON.stringify(f.name)} endsWithGguf=${f.name.toLowerCase().endsWith('.gguf')}`);
+ });
if (!allGguf && !singleZip) {
+ console.log('[IMPORT] VALIDATION FAILED — showing Invalid File alert');
setAlertState(showAlert(
'Invalid File',
- result.length > 1
+ resolvedFiles.length > 1
? 'When selecting multiple files, all must be .gguf files (main model + mmproj projector).'
: 'Supported formats: .gguf (text models) and .zip (image models).',
));
return;
}
- if (result.length > 2) {
+ if (resolvedFiles.length > 2) {
+ console.log('[IMPORT] Too many files selected:', resolvedFiles.length);
setAlertState(showAlert('Too Many Files', 'Select 1 file (text/zip) or 2 .gguf files (vision model + mmproj projector).'));
return;
}
- const firstUri = result[0].uri;
- const firstFileName = result[0].name ?? 'unknown';
+ const firstUri = resolvedFiles[0].uri;
+ const firstFileName = resolvedFiles[0].name;
+ console.log('[IMPORT] firstUri:', firstUri, '| firstFileName:', firstFileName);
setImportProgress({ fraction: 0, fileName: firstFileName });
if (singleZip) {
+ console.log('[IMPORT] Single ZIP detected, routing to handleImportImageModelZip');
await handleImportImageModelZip(firstUri, firstFileName);
return;
}
- await importGgufFiles(result.slice(0, 2), { setAlertState, setImportProgress, addDownloadedModel });
+ console.log('[IMPORT] Routing to importGgufFiles with', resolvedFiles.slice(0, 2).length, 'file(s)');
+ await importGgufFiles(resolvedFiles.slice(0, 2), { setAlertState, setImportProgress, addDownloadedModel });
} catch (error: unknown) {
- if (isErrorWithCode(error) && error.code === errorCodes.OPERATION_CANCELED) return;
+ if (isErrorWithCode(error) && error.code === errorCodes.OPERATION_CANCELED) {
+ console.log('[IMPORT] User cancelled the picker.');
+ return;
+ }
+ console.log('[IMPORT] ERROR caught:', getErrorMessage(error), error);
setAlertState(showAlert('Import Failed', getErrorMessage(error)));
} finally {
+ console.log('[IMPORT] finally block — resetting isImporting and progress');
setIsImporting(false);
setImportProgress(null);
}
diff --git a/src/services/llm.ts b/src/services/llm.ts
index 55ab97dc..1fdcf145 100644
--- a/src/services/llm.ts
+++ b/src/services/llm.ts
@@ -150,14 +150,22 @@ class LLMService {
supportsToolCalling(): boolean { return this.toolCallingSupported; }
supportsThinking(): boolean { return this.thinkingSupported; }
isThinkingEnabled(): boolean { return this.thinkingSupported && useAppStore.getState().settings.thinkingEnabled; }
+ isGemma4Model(): boolean {
+ const path = this.currentModelPath?.toLowerCase() ?? '';
+ return path.includes('gemma-4') || path.includes('gemma4');
+ }
/** Disable ctx_shift on Android when GPU layers are active — the OpenCL backend SIGSEGVs on the ggml set op used by KV cache shifting. */
private shouldDisableCtxShift(): boolean { return Platform.OS === 'android' && this.activeGpuLayers > 0; }
private detectToolCallingSupport(): void {
if (!this.context) { this.toolCallingSupported = false; return; }
try {
const jinja = (this.context as any)?.model?.chatTemplates?.jinja;
+ logger.log('[LLM][TOOLS] Full jinja caps:', JSON.stringify(jinja));
+ logger.log('[LLM][TOOLS] defaultCaps?.toolCalls:', jinja?.defaultCaps?.toolCalls);
+ logger.log('[LLM][TOOLS] toolUse:', jinja?.toolUse);
+ logger.log('[LLM][TOOLS] toolUseCaps?.toolCalls:', jinja?.toolUseCaps?.toolCalls);
this.toolCallingSupported = !!(jinja?.defaultCaps?.toolCalls || jinja?.toolUse || jinja?.toolUseCaps?.toolCalls);
- logger.log('[LLM] Tool calling supported:', this.toolCallingSupported);
+ logger.log('[LLM][TOOLS] toolCallingSupported =', this.toolCallingSupported);
} catch (e) { logger.warn('[LLM] Error detecting tool calling support:', e); this.toolCallingSupported = false; }
}
private detectThinkingSupport(): void {
@@ -196,11 +204,13 @@ class LLMService {
const startTime = Date.now();
let firstTokenMs = 0, tokenCount = 0, firstReceived = false;
let fullContent = '', fullReasoningContent = '', streamedContentSoFar = '', streamedReasoningSoFar = '';
- const completionParams = { messages: oaiMessages, ...buildCompletionParams(settings, { disableCtxShift: this.shouldDisableCtxShift() }), ...buildThinkingCompletionParams(this.isThinkingEnabled()) };
+ const completionParams = { messages: oaiMessages, ...buildCompletionParams(settings, { disableCtxShift: this.shouldDisableCtxShift() }), ...buildThinkingCompletionParams(this.isThinkingEnabled(), this.isGemma4Model()) };
+ logger.log(`[LLM][THINKING] thinkingSupported=${this.thinkingSupported}, thinkingEnabled=${useAppStore.getState().settings.thinkingEnabled}, isThinkingEnabled=${this.isThinkingEnabled()}, enable_thinking=${(completionParams as any).enable_thinking}, reasoning_format=${(completionParams as any).reasoning_format}`);
const completionResult = await safeCompletion(ctx, () => ctx.completion(completionParams, (data: any) => {
if (!this.isGenerating || !data.token) return;
- if (!firstReceived) { firstReceived = true; firstTokenMs = Date.now() - startTime; }
+ if (!firstReceived) { firstReceived = true; firstTokenMs = Date.now() - startTime; logger.log(`[LLM][THINKING] First token raw data — token: ${JSON.stringify(data.token)}, content: ${JSON.stringify(data.content)}, reasoning_content: ${JSON.stringify(data.reasoning_content)}`); }
tokenCount++;
+ if (data.reasoning_content) logger.log(`[LLM][THINKING] reasoning_content chunk received: ${JSON.stringify(data.reasoning_content)}`);
const content = getStreamingDelta(data.content ?? (!data.reasoning_content ? data.token : undefined), streamedContentSoFar);
const reasoningContent = getStreamingDelta(data.reasoning_content || undefined, streamedReasoningSoFar);
if (data.content) streamedContentSoFar = data.content;
@@ -214,6 +224,7 @@ class LLMService {
this.performanceStats = recordGenerationStats(startTime, firstTokenMs, tokenCount);
if (completionResult?.context_full) { logger.log('[LLM] Context full detected — signalling for compaction'); throw new Error('Context is full'); }
const result = { content: cr?.content || cr?.text || fullContent, reasoningContent: cr?.reasoning_content || fullReasoningContent };
+ logger.log(`[LLM][THINKING] Final result — hasContent=${!!result.content}, hasReasoningContent=${!!result.reasoningContent}, reasoningLength=${result.reasoningContent?.length ?? 0}, fullReasoningFromStream=${fullReasoningContent.length}`);
onComplete?.(result);
return result.content;
})();
@@ -224,6 +235,7 @@ class LLMService {
const work = generateWithToolsImpl({
context: this.context, isGenerating: this.isGenerating,
isThinkingEnabled: this.isThinkingEnabled(),
+ isGemma4Model: this.isGemma4Model(),
disableCtxShift: this.shouldDisableCtxShift(),
manageContextWindow: (msgs, extra?) => this.manageContextWindow(msgs, extra),
convertToOAIMessages: (msgs) => this.convertToOAIMessages(msgs),
diff --git a/src/services/llmHelpers.ts b/src/services/llmHelpers.ts
index ba8971e5..571ec24e 100644
--- a/src/services/llmHelpers.ts
+++ b/src/services/llmHelpers.ts
@@ -148,7 +148,12 @@ export async function initContextWithFallback(
logger.error(`[LLM] Attempt 3/3 failed (CPU, ctx=2048): ${finalMsg}`);
logger.error(`[LLM] All 3 init attempts failed for model: ${modelPath}`);
logger.error(`[LLM] Error chain — GPU: "${gpuMsg}" | CPU: "${cpuMsg}" | min-ctx: "${finalMsg}"`);
- throw new Error(`Failed to load model even at minimum context (2048). This may indicate insufficient memory, a corrupted model file, or an unsupported model format. (${finalMsg})`);
+ const errorParts = [
+ gpuMsg && gpuMsg !== finalMsg ? `GPU: ${gpuMsg}` : null,
+ cpuMsg && cpuMsg !== finalMsg ? `CPU: ${cpuMsg}` : null,
+ `min-ctx: ${finalMsg}`,
+ ].filter(Boolean).join(' | ');
+ throw new Error(`Failed to load model even at minimum context (2048). This may indicate insufficient memory, a corrupted model file, or an unsupported model format.\n\nError chain: ${errorParts}`);
}
}
}
@@ -184,8 +189,10 @@ export function supportsNativeThinking(context: LlamaContext | null): boolean {
return false;
}
}
-export function buildThinkingCompletionParams(enableThinking: boolean): { enable_thinking: boolean; reasoning_format: 'none' | 'deepseek' } {
- return { enable_thinking: enableThinking, reasoning_format: enableThinking ? 'deepseek' : 'none' };
+export function buildThinkingCompletionParams(enableThinking: boolean, isGemma4: boolean = false): { enable_thinking: boolean; reasoning_format: 'none' | 'deepseek' } {
+ // Gemma 4 uses its own <|channel>thought\n... format — not DeepSeek's tags.
+ // Set reasoning_format:'none' so llama.rn doesn't try to strip DeepSeek tags; we parse it ourselves.
+ return { enable_thinking: enableThinking, reasoning_format: (enableThinking && !isGemma4) ? 'deepseek' : 'none' };
}
export function getStreamingDelta(nextValue: string | undefined, previousValue: string): string | undefined {
if (!nextValue) return undefined;
diff --git a/src/services/llmToolGeneration.ts b/src/services/llmToolGeneration.ts
index 242f337d..a057f201 100644
--- a/src/services/llmToolGeneration.ts
+++ b/src/services/llmToolGeneration.ts
@@ -13,6 +13,68 @@ import logger from '../utils/logger';
type ToolStreamCallback = (data: StreamToken) => void;
type ToolCompleteCallback = (fullResponse: string) => void;
+/**
+ * Suppresses Gemma 4's native tool call tokens from the visible text stream.
+ * Gemma 4 wraps tool calls in <|tool_call>... — llama.rn parses
+ * the structured call fine, but the raw tokens still flow through data.token.
+ * This filter buffers the stream and drops everything inside those tags.
+ */
+class ToolCallTokenFilter {
+ private inBlock = false;
+ private buffer = '';
+
+ process(token: string): string {
+ this.buffer += token;
+ return this.flush();
+ }
+
+ private flush(): string {
+ const openTag = '<|tool_call>';
+ const closeTag = '';
+ let output = '';
+
+ while (this.buffer.length > 0) {
+ if (this.inBlock) {
+ const closeIdx = this.buffer.indexOf(closeTag);
+ if (closeIdx === -1) {
+ // Partial close tag may be at the end — hold it in the buffer
+ const partial = this.partialSuffix(this.buffer, closeTag);
+ this.buffer = partial > 0 ? this.buffer.slice(this.buffer.length - partial) : '';
+ break;
+ }
+ // Drop everything up to and including the close tag
+ this.buffer = this.buffer.slice(closeIdx + closeTag.length);
+ this.inBlock = false;
+ } else {
+ const openIdx = this.buffer.indexOf(openTag);
+ if (openIdx === -1) {
+ const partial = this.partialSuffix(this.buffer, openTag);
+ if (partial > 0) {
+ output += this.buffer.slice(0, this.buffer.length - partial);
+ this.buffer = this.buffer.slice(this.buffer.length - partial);
+ } else {
+ output += this.buffer;
+ this.buffer = '';
+ }
+ break;
+ }
+ output += this.buffer.slice(0, openIdx);
+ this.buffer = this.buffer.slice(openIdx + openTag.length);
+ this.inBlock = true;
+ }
+ }
+
+ return output;
+ }
+
+ private partialSuffix(text: string, tag: string): number {
+ for (let len = Math.min(tag.length - 1, text.length); len > 0; len--) {
+ if (text.endsWith(tag.slice(0, len))) return len;
+ }
+ return 0;
+ }
+}
+
function parseToolCall(tc: any): ToolCall {
const fn = tc.function || {};
let args = fn.arguments || {};
@@ -26,6 +88,7 @@ export interface ToolGenerationDeps {
context: any;
isGenerating: boolean;
isThinkingEnabled: boolean;
+ isGemma4Model: boolean;
disableCtxShift: boolean;
manageContextWindow: (messages: Message[], extraReserve?: number) => Promise;
convertToOAIMessages: (messages: Message[]) => any[];
@@ -57,13 +120,15 @@ export async function generateWithToolsImpl(
let fullResponse = '';
let firstReceived = false;
const collectedToolCalls: ToolCall[] = [];
+ // Gemma 4 emits <|tool_call>... tokens in the stream; filter them out.
+ const toolCallFilter = deps.isGemma4Model ? new ToolCallTokenFilter() : null;
const completionParams = {
messages: oaiMessages,
...buildCompletionParams(settings, { disableCtxShift: deps.disableCtxShift }),
tools: options.tools,
tool_choice: 'auto',
- ...buildThinkingCompletionParams(deps.isThinkingEnabled),
+ ...buildThinkingCompletionParams(deps.isThinkingEnabled, deps.isGemma4Model),
};
logger.log('[LLM-Tools] === INPUT ===');
logger.log(JSON.stringify(completionParams, null, 2));
@@ -77,8 +142,9 @@ export async function generateWithToolsImpl(
if (!data.token) return;
if (!firstReceived) { firstReceived = true; firstTokenMs = Date.now() - startTime; }
tokenCount++;
- fullResponse += data.token;
- options.onStream?.({ content: data.token });
+ const visibleToken = toolCallFilter ? toolCallFilter.process(data.token) : data.token;
+ fullResponse += visibleToken;
+ if (visibleToken) options.onStream?.({ content: visibleToken });
}), 'generateWithTools');
logger.log('[LLM-Tools] === OUTPUT ===');
logger.log(JSON.stringify(completionResult, null, 2));
diff --git a/src/services/modelManager/scan.ts b/src/services/modelManager/scan.ts
index d3d26db7..68acda54 100644
--- a/src/services/modelManager/scan.ts
+++ b/src/services/modelManager/scan.ts
@@ -196,53 +196,96 @@ export interface ImportLocalModelOpts {
sourceUri: string;
fileName: string;
modelsDir: string;
+ sourceSize?: number | null;
onProgress?: (progress: { fraction: number; fileName: string }) => void;
mmProjSourceUri?: string;
mmProjFileName?: string;
+ mmProjSourceSize?: number | null;
}
-async function resolveAndroidUri(uri: string, cacheFileName: string): Promise<{ resolved: string; tempPath: string | null }> {
- if (Platform.OS !== 'android' || !uri.startsWith('content://')) {
- return { resolved: uri, tempPath: null };
+function resolveUri(uri: string): string {
+ // Android content:// URIs are passed directly to RNFS.copyFile — no cache copy needed.
+ // iOS file:// URIs need decoding (%20 → space) so RNFS can find the file on disk.
+ if (uri.startsWith('content://')) {
+ console.log('[IMPORT][scan] resolveUri — Android content:// URI, using as-is');
+ console.log('[IMPORT][scan] uri:', uri);
+ return uri;
}
- const tempPath = `${RNFS.CachesDirectoryPath}/${Date.now()}_${cacheFileName}`;
- await RNFS.copyFile(uri, tempPath);
- return { resolved: tempPath, tempPath };
+ const decoded = decodeURIComponent(uri);
+ console.log('[IMPORT][scan] resolveUri — file URI, decoded');
+ console.log('[IMPORT][scan] original:', uri);
+ console.log('[IMPORT][scan] decoded: ', decoded);
+ return decoded;
}
export async function importLocalModel(opts: ImportLocalModelOpts): Promise { // NOSONAR
- const { sourceUri, fileName, modelsDir, onProgress, mmProjSourceUri, mmProjFileName } = opts;
+ const { sourceUri, fileName, modelsDir, sourceSize, onProgress, mmProjSourceUri, mmProjFileName, mmProjSourceSize } = opts;
+ const importStart = Date.now();
+ const elapsed = () => `+${Date.now() - importStart}ms`;
+
+ console.log('[IMPORT][scan] ── importLocalModel START ──────────────────');
+ console.log('[IMPORT][scan] Platform.OS:', Platform.OS);
+ console.log('[IMPORT][scan] fileName:', fileName);
+ console.log('[IMPORT][scan] sourceUri:', sourceUri);
+ console.log('[IMPORT][scan] sourceSize:', sourceSize ?? 'unknown');
+ console.log('[IMPORT][scan] mmProjFileName:', mmProjFileName ?? 'none');
+ console.log('[IMPORT][scan] mmProjSourceUri:', mmProjSourceUri ?? 'none');
+ console.log('[IMPORT][scan] mmProjSourceSize:', mmProjSourceSize ?? 'unknown');
+ console.log('[IMPORT][scan] modelsDir:', modelsDir);
+
+ // Heartbeat — logs every 3s so we can see exactly where it gets stuck
+ let heartbeatStep = 'init';
+ const heartbeat = setInterval(() => {
+ console.log(`[IMPORT][scan] ⏱ HEARTBEAT — still running at ${elapsed()}, current step: ${heartbeatStep}`);
+ }, 3000);
if (!fileName.toLowerCase().endsWith('.gguf')) {
+ clearInterval(heartbeat);
+ console.log('[IMPORT][scan] ERROR — fileName does not end with .gguf:', fileName);
throw new Error('Only .gguf files can be imported');
}
- const { resolved: resolvedSource, tempPath: tempCachePath } = await resolveAndroidUri(sourceUri, fileName);
- const { resolved: resolvedMmProjSource, tempPath: tempMmProjCachePath } = mmProjSourceUri && mmProjFileName
- ? await resolveAndroidUri(mmProjSourceUri, mmProjFileName)
- : { resolved: mmProjSourceUri, tempPath: null };
+ heartbeatStep = 'resolving URIs';
+ const resolvedSource = resolveUri(sourceUri);
+ const resolvedMmProjSource = mmProjSourceUri ? resolveUri(mmProjSourceUri) : undefined;
+ console.log(`[IMPORT][scan] ${elapsed()} resolvedSource:`, resolvedSource);
+ if (mmProjFileName) {
+ console.log(`[IMPORT][scan] ${elapsed()} resolvedMmProjSource:`, resolvedMmProjSource ?? 'none');
+ }
try {
+ heartbeatStep = 'checking dest paths';
const destPath = `${modelsDir}/${fileName}`;
- if (await RNFS.exists(destPath)) throw new Error(`A model file named "${fileName}" already exists`);
+ console.log(`[IMPORT][scan] ${elapsed()} destPath:`, destPath);
+ const destExists = await RNFS.exists(destPath);
+ console.log(`[IMPORT][scan] ${elapsed()} dest already exists:`, destExists);
+ if (destExists) throw new Error(`A model file named "${fileName}" already exists`);
if (mmProjFileName && await RNFS.exists(`${modelsDir}/${mmProjFileName}`)) {
throw new Error(`A file named "${mmProjFileName}" already exists`);
}
// Copy main model: progress 0→0.5 when mmproj present, 0→1 otherwise
const mainProgressScale = mmProjFileName ? 0.5 : 1;
+ heartbeatStep = 'copying main model';
+ console.log(`[IMPORT][scan] ${elapsed()} Starting main model copy. sourceSize: ${sourceSize ?? 'unknown'} mainProgressScale: ${mainProgressScale}`);
+ console.log(`[IMPORT][scan] ${elapsed()} copy FROM:`, resolvedSource);
+ console.log(`[IMPORT][scan] ${elapsed()} copy TO: `, destPath);
+ const mainCopyStart = Date.now();
await copyFileWithProgress(
resolvedSource,
destPath,
+ sourceSize ?? null,
onProgress ? (fraction) => onProgress({ fraction: fraction * mainProgressScale, fileName }) : undefined,
);
+ console.log(`[IMPORT][scan] ${elapsed()} Main model copy complete in ${Date.now() - mainCopyStart}ms`);
const quantMatch = fileName.match(/[_-](Q\d+[_\w]*|f16|f32)/i);
const quantization = quantMatch ? quantMatch[1].toUpperCase() : 'Unknown';
const modelName = fileName.replace(/\.gguf$/i, '').replace(/[_-]Q\d+.*/i, '');
const destStat = await RNFS.stat(destPath);
const fileSize = parseSizeInt(destStat.size);
+ console.log('[IMPORT][scan] modelName:', modelName, '| quantization:', quantization, '| fileSize:', fileSize, 'bytes');
const pseudoFile: ModelFile = { name: fileName, size: fileSize, quantization, downloadUrl: '' };
const model = await buildDownloadedModel({ modelId: 'local_import', file: pseudoFile, resolvedLocalPath: destPath });
@@ -257,36 +300,61 @@ export async function importLocalModel(opts: ImportLocalModelOpts): Promise onProgress({ fraction: 0.5 + fraction * 0.5, fileName: mmProjFileName })
: undefined,
);
+ console.log(`[IMPORT][scan] ${elapsed()} mmproj copy complete in ${Date.now() - mmProjCopyStart}ms`);
const mmProjStat = await RNFS.stat(mmProjDestPath);
builtModel.mmProjPath = mmProjDestPath;
builtModel.mmProjFileName = mmProjFileName;
builtModel.mmProjFileSize = parseSizeInt(mmProjStat.size);
builtModel.isVisionModel = true;
+ console.log(`[IMPORT][scan] ${elapsed()} mmproj linked. mmProjFileSize:`, builtModel.mmProjFileSize, 'bytes');
}
+ heartbeatStep = 'persisting metadata';
+ console.log(`[IMPORT][scan] ${elapsed()} Persisting model metadata...`);
await persistDownloadedModel(builtModel, modelsDir);
+ console.log(`[IMPORT][scan] ${elapsed()} ── importLocalModel COMPLETE. id: ${builtModel.id} ──`);
return builtModel;
+ } catch (e) {
+ console.log(`[IMPORT][scan] ${elapsed()} ❌ importLocalModel ERROR:`, e);
+ throw e;
} finally {
- if (tempCachePath) await RNFS.unlink(tempCachePath).catch(() => {});
- if (tempMmProjCachePath) await RNFS.unlink(tempMmProjCachePath).catch(() => {});
+ clearInterval(heartbeat);
}
}
async function copyFileWithProgress(
source: string,
dest: string,
+ knownTotalBytes: number | null,
onProgress?: (fraction: number) => void,
): Promise {
- const sourceStat = await RNFS.stat(source);
- const totalBytes = parseSizeInt(sourceStat.size);
+ const copyStart = Date.now();
+ const copyElapsed = () => `+${Date.now() - copyStart}ms`;
+ const totalBytes = knownTotalBytes ?? 0;
+ const totalMB = totalBytes > 0 ? (totalBytes / 1024 / 1024).toFixed(1) : '?';
+
+ console.log(`[IMPORT][scan] copyFileWithProgress START — knownTotalBytes: ${knownTotalBytes ?? 'unknown'} (${totalMB} MB)`);
+ console.log('[IMPORT][scan] FROM:', source);
+ console.log('[IMPORT][scan] TO: ', dest);
+ if (!knownTotalBytes) {
+ console.log('[IMPORT][scan] No known size — progress will be indeterminate');
+ }
+
let polling = true;
+ let lastWritten = 0;
const pollInterval = setInterval(async () => {
if (!polling) return;
@@ -295,21 +363,37 @@ async function copyFileWithProgress(
if (exists) {
const stat = await RNFS.stat(dest);
const written = parseSizeInt(stat.size);
- onProgress?.(totalBytes > 0 ? Math.min(written / totalBytes, 0.99) : 0);
+ const writtenMB = (written / 1024 / 1024).toFixed(1);
+ const delta = written - lastWritten;
+ const speedMBs = ((delta / 1024 / 1024) / 0.5).toFixed(1);
+ lastWritten = written;
+ if (totalBytes > 0) {
+ const pct = Math.min(written / totalBytes, 0.99);
+ console.log(`[IMPORT][scan] ${copyElapsed()} copy poll — ${writtenMB}/${totalMB} MB (${(pct * 100).toFixed(1)}%) speed: ${speedMBs} MB/s`);
+ onProgress?.(pct);
+ } else {
+ console.log(`[IMPORT][scan] ${copyElapsed()} copy poll — ${writtenMB} MB written (size unknown) speed: ${speedMBs} MB/s`);
+ // No fraction available — don't call onProgress so UI stays indeterminate
+ }
+ } else {
+ console.log(`[IMPORT][scan] ${copyElapsed()} copy poll — dest not created yet`);
}
- } catch {
- // File may not exist yet, ignore
+ } catch (e) {
+ console.log(`[IMPORT][scan] ${copyElapsed()} copy poll error:`, e);
}
}, 500);
try {
+ console.log(`[IMPORT][scan] ${copyElapsed()} calling RNFS.copyFile...`);
await RNFS.copyFile(source, dest);
polling = false;
clearInterval(pollInterval);
+ console.log(`[IMPORT][scan] ${copyElapsed()} RNFS.copyFile resolved — 100% done`);
onProgress?.(1);
} catch (error) {
polling = false;
clearInterval(pollInterval);
+ console.log(`[IMPORT][scan] ${copyElapsed()} RNFS.copyFile FAILED:`, error);
await RNFS.unlink(dest).catch(() => {});
throw error;
}
diff --git a/src/utils/messageContent.ts b/src/utils/messageContent.ts
index d23e77cb..52f9d92d 100644
--- a/src/utils/messageContent.ts
+++ b/src/utils/messageContent.ts
@@ -5,16 +5,29 @@ const CONTROL_TOKEN_PATTERNS: RegExp[] = [
/<\|eot_id\|>/gi,
/<\/s>/gi,
/[\s\S]*?<\/tool_call>\s*/g,
+ // Gemma 4 native tool call format: <|tool_call>...
+ // The streaming filter in llmToolGeneration suppresses these live;
+ // this catches any that slip through into stored message content.
+ /<\|tool_call>[\s\S]*?\s*/g,
+ // Gemma 4 string-delimiter token that may appear outside a tool block
+ /<\|">/g,
];
// Patterns for channel-based thinking format (used by some models like Qwen)
const CHANNEL_ANALYSIS_START = /<\|channel\|>analysis<\|message\|>/gi;
const CHANNEL_FINAL_START = /<\|channel\|>final<\|message\|>/gi;
+// Gemma 4 thinking tags: <|channel>thought\n...
+// These are stripped as a safety net; parseThinkingContent handles them before display.
+const GEMMA4_THINK_OPEN = /<\|channel>thought\n/gi;
+const GEMMA4_THINK_CLOSE = //gi;
+
export function stripControlTokens(content: string): string {
let result = CONTROL_TOKEN_PATTERNS.reduce((acc, pattern) => acc.replace(pattern, ''), content);
// Remove channel markers but preserve the content after them
result = result.replace(CHANNEL_ANALYSIS_START, '');
result = result.replace(CHANNEL_FINAL_START, '');
+ result = result.replace(GEMMA4_THINK_OPEN, '');
+ result = result.replace(GEMMA4_THINK_CLOSE, '');
return result;
}
\ No newline at end of file