diff --git a/ios/OffgridMobile/Info.plist b/ios/OffgridMobile/Info.plist
index 3e6cdd82..414d96e3 100644
--- a/ios/OffgridMobile/Info.plist
+++ b/ios/OffgridMobile/Info.plist
@@ -31,8 +31,6 @@
 		<key>NSAllowsLocalNetworking</key>
 		<true/>
 	</dict>
-	<key>NSLocalNetworkUsageDescription</key>
-	<string>Off Grid scans your local network to automatically discover LLM servers such as Ollama and LM Studio.</string>
 	<key>NSBonjourServices</key>
 	<array>
 		<string>_http._tcp</string>
@@ -43,6 +41,8 @@
 	<string>This app needs access to your camera to take photos and attach them to conversations.</string>
 	<key>NSFaceIDUsageDescription</key>
 	<string>This app may use Face ID to protect access to your stored credentials.</string>
+	<key>NSLocalNetworkUsageDescription</key>
+	<string>Off Grid scans your local network to automatically discover LLM servers such as Ollama and LM Studio.</string>
 	<key>NSMicrophoneUsageDescription</key>
 	<string>This app needs access to your microphone for voice-to-text transcription using Whisper.</string>
 	<key>NSPhotoLibraryAddUsageDescription</key>
diff --git a/ios/Podfile.lock b/ios/Podfile.lock
index 8646eef3..2674daf8 100644
--- a/ios/Podfile.lock
+++ b/ios/Podfile.lock
@@ -10,7 +10,7 @@ PODS:
   - hermes-engine (0.14.0):
     - hermes-engine/Pre-built (= 0.14.0)
   - hermes-engine/Pre-built (0.14.0)
-  - llama-rn (0.11.4):
+  - llama-rn (0.12.0-rc.5):
     - boost
     - DoubleConversion
     - fast_float
@@ -40,15 +40,15 @@ PODS:
     - ReactCommon/turbomodule/core
     - SocketRocket
     - Yoga
-  - lottie-ios (4.6.0)
-  - lottie-react-native (7.3.6):
+  - lottie-ios (4.5.0)
+  - lottie-react-native (7.3.5):
     - boost
     - DoubleConversion
     - fast_float
     - fmt
     - glog
     - hermes-engine
-    - lottie-ios (= 4.6.0)
+    - lottie-ios (= 4.5.0)
     - RCT-Folly
     - RCT-Folly/Fabric
     - RCTRequired
@@ -2094,7 +2094,7 @@ PODS:
     - ReactCommon/turbomodule/core
     - SocketRocket
     - Yoga
-  - react-native-safe-area-context (5.7.0):
+  - react-native-safe-area-context (5.6.2):
     - boost
     - DoubleConversion
     - fast_float
@@ -2112,8 +2112,8 @@ PODS:
     - React-graphics
     - React-ImageManager
     - React-jsi
-    - react-native-safe-area-context/common (= 5.7.0)
-    - react-native-safe-area-context/fabric (= 5.7.0)
+    - react-native-safe-area-context/common (= 5.6.2)
+    - react-native-safe-area-context/fabric (= 5.6.2)
     - React-NativeModulesApple
     - React-RCTFabric
     - React-renderercss
@@ -2124,7 +2124,7 @@ PODS:
     - ReactCommon/turbomodule/core
     - SocketRocket
     - Yoga
-  - react-native-safe-area-context/common (5.7.0):
+  - react-native-safe-area-context/common (5.6.2):
     - boost
     - DoubleConversion
     - fast_float
@@ -2152,7 +2152,7 @@ PODS:
     - ReactCommon/turbomodule/core
     - SocketRocket
     - Yoga
-  - react-native-safe-area-context/fabric (5.7.0):
+  - react-native-safe-area-context/fabric (5.6.2):
     - boost
     - DoubleConversion
     - fast_float
@@ -2825,7 +2825,7 @@ PODS:
     - ReactCommon/turbomodule/core
     - SocketRocket
     - Yoga
-  - RNDeviceInfo (15.0.2):
+  - RNDeviceInfo (15.0.1):
     - React-Core
   - RNFS (2.20.0):
     - React-Core
@@ -2913,7 +2913,7 @@ PODS:
     - ReactCommon/turbomodule/core
     - SocketRocket
     - Yoga
-  - RNReanimated (4.2.2):
+  - RNReanimated (4.2.1):
     - boost
     - DoubleConversion
     - fast_float
@@ -2940,11 +2940,11 @@ PODS:
     - ReactCodegen
     - ReactCommon/turbomodule/bridging
     - ReactCommon/turbomodule/core
-    - RNReanimated/reanimated (= 4.2.2)
+    - RNReanimated/reanimated (= 4.2.1)
     - RNWorklets
     - SocketRocket
     - Yoga
-  - RNReanimated/reanimated (4.2.2):
+  - RNReanimated/reanimated (4.2.1):
     - boost
     - DoubleConversion
     - fast_float
@@ -2971,11 +2971,11 @@ PODS:
     - ReactCodegen
     - ReactCommon/turbomodule/bridging
     - ReactCommon/turbomodule/core
-    - RNReanimated/reanimated/apple (= 4.2.2)
+    - RNReanimated/reanimated/apple (= 4.2.1)
     - RNWorklets
     - SocketRocket
     - Yoga
-  - RNReanimated/reanimated/apple (4.2.2):
+  - RNReanimated/reanimated/apple (4.2.1):
     - boost
     - DoubleConversion
     - fast_float
@@ -3005,7 +3005,7 @@ PODS:
     - RNWorklets
     - SocketRocket
     - Yoga
-  - RNScreens (4.24.0):
+  - RNScreens (4.20.0):
     - boost
     - DoubleConversion
     - fast_float
@@ -3032,10 +3032,10 @@ PODS:
     - ReactCodegen
     - ReactCommon/turbomodule/bridging
     - ReactCommon/turbomodule/core
-    - RNScreens/common (= 4.24.0)
+    - RNScreens/common (= 4.20.0)
     - SocketRocket
     - Yoga
-  - RNScreens/common (4.24.0):
+  - RNScreens/common (4.20.0):
     - boost
     - DoubleConversion
     - fast_float
@@ -3149,7 +3149,7 @@ PODS:
     - ReactCommon/turbomodule/core
     - SocketRocket
     - Yoga
-  - RNWorklets (0.7.4):
+  - RNWorklets (0.7.3):
     - boost
     - DoubleConversion
     - fast_float
@@ -3176,10 +3176,10 @@ PODS:
     - ReactCodegen
     - ReactCommon/turbomodule/bridging
     - ReactCommon/turbomodule/core
-    - RNWorklets/worklets (= 0.7.4)
+    - RNWorklets/worklets (= 0.7.3)
     - SocketRocket
     - Yoga
-  - RNWorklets/worklets (0.7.4):
+  - RNWorklets/worklets (0.7.3):
     - boost
     - DoubleConversion
     - fast_float
@@ -3206,10 +3206,10 @@ PODS:
     - ReactCodegen
     - ReactCommon/turbomodule/bridging
     - ReactCommon/turbomodule/core
-    - RNWorklets/worklets/apple (= 0.7.4)
+    - RNWorklets/worklets/apple (= 0.7.3)
     - SocketRocket
     - Yoga
-  - RNWorklets/worklets/apple (0.7.4):
+  - RNWorklets/worklets/apple (0.7.3):
     - boost
     - DoubleConversion
     - fast_float
@@ -3603,10 +3603,10 @@ SPEC CHECKSUMS:
   FBLazyVector: 309703e71d3f2f1ed7dc7889d58309c9d77a95a4
   fmt: a40bb5bd0294ea969aaaba240a927bd33d878cdd
   glog: 5683914934d5b6e4240e497e0f4a3b42d1854183
-  hermes-engine: 3f74bbb07573d284e764cee0131ae769e16c53b8
-  llama-rn: f673d63fdd04ce96793c60ba03be52804d94f49a
-  lottie-ios: 8f959969761e9c45d70353667d00af0e5b9cadb3
-  lottie-react-native: 983fd0489530e8d40f173de7f04e2f88b9317a15
+  hermes-engine: 8c6be38f94b3bf8b864981980e64e55f08e467ec
+  llama-rn: 3ae5a64b3d08ff41f9e62b214ba5004e475b9561
+  lottie-ios: a881093fab623c467d3bce374367755c272bdd59
+  lottie-react-native: 691b8363e8c591fb78a78254ff2517258891456b
   op-sqlite: bafff369cecaee4fe65c89eec47deaba26f2db95
   RCT-Folly: 846fda9475e61ec7bcbf8a3fe81edfcaeb090669
   RCTDeprecation: a41bbdd9af30bf2e5715796b313e44ec43eefff1
@@ -3648,7 +3648,7 @@ SPEC CHECKSUMS:
   react-native-document-picker: dc2d83366e47e89e7c51e8a41eab99c1d54e941c
   react-native-document-viewer: 8c6ed07e7e27352743fa98e8dd6d288ad925b884
   react-native-image-picker: 0314366753615115fa55c3cc937ac44cb7e75702
-  react-native-safe-area-context: befb5404eb8a16fdc07fa2bebab3568ecabcbb8a
+  react-native-safe-area-context: c00143b4823773bba23f2f19f85663ae89ceb460
   react-native-slider: 34064ca1a6864d7b263e44dd76a2d794e8d26744
   react-native-voice: 908a0eba96c8c3d643e4f98b7232c6557d0a6f9c
   React-NativeModulesApple: a2c3d2cbec893956a5b3e4060322db2984fff75b
@@ -3685,16 +3685,16 @@ SPEC CHECKSUMS:
   ReactCodegen: 3d48510bcef445f6403c0004047d4d9cbb915435
   ReactCommon: ac934cb340aee91282ecd6f273a26d24d4c55cae
   RNCAsyncStorage: 29f0230e1a25f36c20b05f65e2eb8958d6526e82
-  RNDeviceInfo: 4c852998208b60dc192ae3529e5867817719ad1e
+  RNDeviceInfo: 36d7f232bfe7c9b5c494cb7793230424ed32c388
   RNFS: 89de7d7f4c0f6bafa05343c578f61118c8282ed8
   RNGestureHandler: cd4be101cfa17ea6bbd438710caa02e286a84381
   RNKeychain: a2c134ab796272c3d605e035ab727591000b30f3
   RNReactNativeHapticFeedback: be4f1b4bf0398c30b59b76ed92ecb0a2ff3a69c6
-  RNReanimated: 18324d3313d6477e1d12836c20c3ee30afb5de30
-  RNScreens: 7f643ee0fd1407dc5085c7795460bd93da113b8f
+  RNReanimated: 292cd58688552a22b3fc1cefcfbc49b336dfed68
+  RNScreens: 714e10b6b554f7dc7ad9f78dcf36dc8e3fc73415
   RNSVG: 595abfa0f9ac26d56afcaaedf4e37a00f54cab71
   RNVectorIcons: 791f13226ec4a3fd13062eda9e892159f0981fae
-  RNWorklets: a3184955a41f2be46898a937e2821469c8c8da42
+  RNWorklets: 944dddd0eef13006b658e653abbb3ee8365c3809
   RNZipArchive: 4304f5100eab004eeb7349adc51997b3a28deb76
   SocketRocket: d4aabe649be1e368d1318fdf28a022d714d65748
   SSZipArchive: c69881e8ac5521f0e622291387add5f60f30f3c4
diff --git a/package-lock.json b/package-lock.json
index f44f33ad..da069b35 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -25,7 +25,7 @@
         "@shopify/flash-list": "^2.2.2",
         "@testing-library/react-native": "^13.3.3",
         "@types/react-native-vector-icons": "^6.4.18",
-        "llama.rn": "^0.11.2",
+        "llama.rn": "^0.12.0-rc.5",
         "lottie-react-native": "^7.3.5",
         "moti": "^0.30.0",
         "patch-package": "^8.0.1",
@@ -10048,9 +10048,9 @@
       }
     },
     "node_modules/llama.rn": {
-      "version": "0.11.2",
-      "resolved": "https://registry.npmjs.org/llama.rn/-/llama.rn-0.11.2.tgz",
-      "integrity": "sha512-LpZ1r9iLi9YdrbbpED4gB7QNhStYghdKbr1QX85UEoqRKeQhfi6yBiIwSRANLa0xMjp6yDIL4p+y3RjgyNG3GQ==",
+      "version": "0.12.0-rc.5",
+      "resolved": "https://registry.npmjs.org/llama.rn/-/llama.rn-0.12.0-rc.5.tgz",
+      "integrity": "sha512-92UDVtroH4hMWekgGyjxtAM4/K5NizO4kEPnhGOloXpuH67H5GWH3sZsT617afJwVwyvLTX8WtoY/M1Ke9wjNw==",
       "license": "MIT",
       "engines": {
         "node": ">= 16.0.0"
@@ -14156,7 +14156,7 @@
       "version": "5.9.3",
       "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
       "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
-      "dev": true,
+      "devOptional": true,
       "license": "Apache-2.0",
       "bin": {
         "tsc": "bin/tsc",
diff --git a/package.json b/package.json
index 66e24464..6830de61 100644
--- a/package.json
+++ b/package.json
@@ -36,7 +36,7 @@
     "@shopify/flash-list": "^2.2.2",
     "@testing-library/react-native": "^13.3.3",
     "@types/react-native-vector-icons": "^6.4.18",
-    "llama.rn": "^0.11.2",
+    "llama.rn": "^0.12.0-rc.5",
     "lottie-react-native": "^7.3.5",
     "moti": "^0.30.0",
     "patch-package": "^8.0.1",
diff --git a/src/components/ChatMessage/utils.ts b/src/components/ChatMessage/utils.ts
index cebd4e0a..12acaf74 100644
--- a/src/components/ChatMessage/utils.ts
+++ b/src/components/ChatMessage/utils.ts
@@ -4,12 +4,36 @@ import type { ParsedContent } from './types';
 
 /**
  * Parse content that may contain thinking/reasoning sections.
- * Handles two formats:
- * 1. HLSL.. HLSL tags (used by llama models with thinking enabled)
- * 2. <|channel|>analysis<|message|>...<|channel|>final<|message|> (used by Qwen and similar models)
+ * Handles three formats:
+ * 1. <think>...</think> tags (DeepSeek-style, used by llama models with thinking enabled)
+ * 2. <|channel>thought\n...<channel|> (Gemma 4)
+ * 3. <|channel|>analysis<|message|>...<|channel|>final<|message|> (Qwen and similar models)
  */
 export function parseThinkingContent(content: string): ParsedContent {
-  // First, check for channel-based thinking format
+  // Gemma 4 thinking format: <|channel>thought\n[thinking]<channel|>[response]
+  // Note asymmetric tags: <|channel> opens (with channel name 'thought'), <channel|> closes.
+  const gemmaOpenMatch = content.match(/<\|channel>thought\n/i);
+  const gemmaCloseMatch = content.match(/<channel\|>/i);
+
+  if (gemmaOpenMatch) {
+    const thinkStart = gemmaOpenMatch.index! + gemmaOpenMatch[0].length;
+    if (gemmaCloseMatch && gemmaCloseMatch.index! >= thinkStart) {
+      const thinkEnd = gemmaCloseMatch.index!;
+      return {
+        thinking: content.slice(thinkStart, thinkEnd).trim(),
+        response: content.slice(thinkEnd + gemmaCloseMatch[0].length).trim(),
+        isThinkingComplete: true,
+      };
+    }
+    // Still streaming — thinking not yet closed
+    return {
+      thinking: content.slice(thinkStart).trim(),
+      response: '',
+      isThinkingComplete: false,
+    };
+  }
+
+  // Check for channel-based thinking format
   // Format: <|channel|>analysis<|message|>[thinking content]<|channel|>final<|message|>[response]
   const channelAnalysisMatch = content.match(/<\|channel\|>analysis<\|message\|>/i);
   const channelFinalMatch = content.match(/<\|channel\|>final<\|message\|>/i);
diff --git a/src/components/ModelCard.tsx b/src/components/ModelCard.tsx
index 6c8145fc..9d332ba0 100644
--- a/src/components/ModelCard.tsx
+++ b/src/components/ModelCard.tsx
@@ -138,7 +138,7 @@ export const ModelCard: React.FC<ModelCardProps> = ({
       disabled={!onPress}
       testID={testID}
     >
-      <View style={styles.cardRow}>
+<View style={styles.cardRow}>
         <View style={styles.cardContent}>
           {compact ? (
             <CompactModelCardContent
diff --git a/src/constants/models.ts b/src/constants/models.ts
index ed3e6461..fd3e2336 100644
--- a/src/constants/models.ts
+++ b/src/constants/models.ts
@@ -11,9 +11,30 @@ export const MODEL_RECOMMENDATIONS = {
   ],
 };
 
-// Curated list of recommended models for mobile (updated Feb 2026)
+// Curated list of recommended models for mobile (updated Apr 2026)
 // All IDs use official org repos where available, ggml-org (HuggingFace official) as fallback
 export const RECOMMENDED_MODELS = [
+  // --- NEW: Gemma 4 (Apr 2026) ---
+  {
+    id: 'ggml-org/gemma-4-E2B-it-GGUF',
+    name: 'Gemma 4 E2B',
+    params: 2,
+    description: 'Google\'s latest with thinking mode, MoE architecture',
+    minRam: 4,
+    type: 'text' as const,
+    org: 'google',
+    isNew: true,
+  },
+  {
+    id: 'ggml-org/gemma-4-E4B-it-GGUF',
+    name: 'Gemma 4 E4B',
+    params: 4,
+    description: 'Google\'s latest, stronger reasoning + vision',
+    minRam: 6,
+    type: 'vision' as const,
+    org: 'google',
+    isNew: true,
+  },
   // --- Text: Ultra-light (3 GB+) ---
   {
     id: 'unsloth/Qwen3.5-0.8B-GGUF',
diff --git a/src/screens/ChatScreen/useChatGenerationActions.ts b/src/screens/ChatScreen/useChatGenerationActions.ts
index 08955a46..1c44813a 100644
--- a/src/screens/ChatScreen/useChatGenerationActions.ts
+++ b/src/screens/ChatScreen/useChatGenerationActions.ts
@@ -23,7 +23,6 @@ import { embeddingService } from '../../services/rag/embedding';
 import { useChatStore, useProjectStore, useRemoteServerStore } from '../../stores';
 import { Message, MediaAttachment, Project, DownloadedModel, RemoteModel, ModelLoadingStrategy, CacheType } from '../../types';
 import logger from '../../utils/logger';
-import { shouldUseToolsForMessage } from './toolUsage';
 type SetState<T> = Dispatch<SetStateAction<T>>;
 const FALLBACK_RECENT_MESSAGE_COUNT = 2;
 
@@ -224,6 +223,17 @@ async function injectRagContext(projectId: string | undefined, query: string, pr
   }
   return prompt;
 }
+/**
+ * Gemma 4 requires <|think|> at the start of the system prompt to activate thinking mode.
+ * For E2B/E4B variants (the mobile-sized models), omitting this token fully disables thinking.
+ */
+function applyGemma4ThinkToken(prompt: string, isRemote: boolean): string {
+  if (!isRemote && llmService.isGemma4Model() && llmService.isThinkingEnabled()) {
+    return `<|think|>\n${prompt}`;
+  }
+  return prompt;
+}
+
 function resolveToolsAndPrompt(deps: GenerationDeps, conversation: any): { enabledTools: string[]; rawPrompt: string } {
   const project = conversation?.projectId ? useProjectStore.getState().getProject(conversation.projectId) : null;
   const { activeServerId, activeRemoteTextModelId } = useRemoteServerStore.getState();
@@ -253,11 +263,12 @@ export async function startGenerationFn(deps: GenerationDeps, call: StartGenerat
   const conversation = useChatStore.getState().conversations.find(c => c.id === targetConversationId);
   const { enabledTools, rawPrompt } = resolveToolsAndPrompt(deps, conversation);
   const basePrompt = await injectRagContext(conversation?.projectId, messageText, rawPrompt);
-  // Remote models use native tool_choice: 'auto' — skip heuristic gate and always pass enabled tools
   const isRemote = !!useRemoteServerStore.getState().activeRemoteTextModelId;
-  const heuristicMatch = shouldUseToolsForMessage(messageText, enabledTools);
-  const activeTools = (isRemote || heuristicMatch) ? enabledTools : [];
-  const systemPrompt = (!isRemote && activeTools.length > 0) ? `${basePrompt}${buildToolSystemPromptHint(activeTools)}` : basePrompt;
+  const activeTools = enabledTools;
+  const systemPrompt = applyGemma4ThinkToken(
+    (!isRemote && activeTools.length > 0) ? `${basePrompt}${buildToolSystemPromptHint(activeTools)}` : basePrompt,
+    isRemote,
+  );
   logger.log(`[ChatGen][DEBUG] isRemote=${isRemote}, tools=[${activeTools.join(', ')}], path=${activeTools.length > 0 ? 'withTools' : 'generate'}`);
   const messagesForContext = buildMessagesForContext(targetConversationId, messageText, systemPrompt);
   await prepareContext(setDebugInfo, systemPrompt, messagesForContext);
@@ -332,9 +343,12 @@ export async function regenerateResponseFn(deps: GenerationDeps, call: Regenerat
     .map(m => m.id === userMessage.id ? { ...m, content: messageText } : m);
   const { enabledTools, rawPrompt } = resolveToolsAndPrompt(deps, conversation);
   const isRemote = !!useRemoteServerStore.getState().activeRemoteTextModelId;
-  const activeTools = (isRemote || shouldUseToolsForMessage(messageText, enabledTools)) ? enabledTools : [];
+  const activeTools = enabledTools;
   const basePrompt = await injectRagContext(conversation?.projectId, messageText, rawPrompt);
-  const systemPrompt = (!isRemote && activeTools.length > 0) ? `${basePrompt}${buildToolSystemPromptHint(activeTools)}` : basePrompt;
+  const systemPrompt = applyGemma4ThinkToken(
+    (!isRemote && activeTools.length > 0) ? `${basePrompt}${buildToolSystemPromptHint(activeTools)}` : basePrompt,
+    isRemote,
+  );
   const { prefix, filtered } = applyCompactionPrefix(conversation, systemPrompt, messagesUpToUser);
   try {
     await generateWithCompactionRetry({ id: targetConversationId, prompt: systemPrompt, messages: [...prefix, ...filtered] }, activeTools, conversation?.projectId);
diff --git a/src/screens/ModelsScreen/importHelpers.ts b/src/screens/ModelsScreen/importHelpers.ts
index 651417e4..7667b318 100644
--- a/src/screens/ModelsScreen/importHelpers.ts
+++ b/src/screens/ModelsScreen/importHelpers.ts
@@ -1,5 +1,7 @@
+import RNFS from 'react-native-fs';
+import { Alert } from 'react-native';
 import { modelManager } from '../../services';
-import { showAlert, hideAlert, AlertState } from '../../components/CustomAlert';
+import { showAlert, AlertState } from '../../components/CustomAlert';
 import { DownloadedModel } from '../../types';
 
 export type GgufFileRef = { uri: string; name: string; size: number };
@@ -43,13 +45,21 @@ export async function importGgufFiles(
   deps: GgufImportDeps,
 ): Promise<void> {
   const { setAlertState, setImportProgress, addDownloadedModel } = deps;
+  console.log('[IMPORT] importGgufFiles called with', files.length, 'file(s)');
 
   if (files.length === 1) {
+    const resolvedFileName = files[0].name ?? 'unknown';
+    console.log('[IMPORT] Single file import. uri:', files[0].uri, '| name:', JSON.stringify(files[0].name), '| resolvedFileName:', resolvedFileName, '| size:', files[0].size);
     const model = await modelManager.importLocalModel({
       sourceUri: files[0].uri,
-      fileName: files[0].name ?? 'unknown',
-      onProgress: p => setImportProgress(p),
+      fileName: resolvedFileName,
+      sourceSize: files[0].size,
+      onProgress: p => {
+        console.log(`[IMPORT] Progress: ${(p.fraction * 100).toFixed(1)}% — ${p.fileName}`);
+        setImportProgress(p);
+      },
     });
+    console.log('[IMPORT] Single file import complete. model.name:', model.name, '| model.id:', model.id);
     addDownloadedModel(model);
     setAlertState(showAlert('Success', `${model.name} imported successfully!`));
     return;
@@ -57,30 +67,70 @@ export async function importGgufFiles(
 
   const file1: GgufFileRef = { uri: files[0].uri, name: files[0].name ?? '', size: files[0].size ?? 0 };
   const file2: GgufFileRef = { uri: files[1].uri, name: files[1].name ?? '', size: files[1].size ?? 0 };
+  console.log('[IMPORT] Two-file (vision) import.');
+  console.log('[IMPORT] file1 — name:', JSON.stringify(file1.name), '| size:', file1.size, '| uri:', file1.uri);
+  console.log('[IMPORT] file2 — name:', JSON.stringify(file2.name), '| size:', file2.size, '| uri:', file2.uri);
+  console.log('[IMPORT] isMmProj(file1.name):', isMmProj(file1.name), '| isMmProj(file2.name):', isMmProj(file2.name));
+
+  // Check if files exist RIGHT AFTER picker returns — before any dialog
+  const file1ExistsBefore = await RNFS.exists(file1.uri.replace('file://', ''));
+  const file2ExistsBefore = await RNFS.exists(file2.uri.replace('file://', ''));
+  console.log('[IMPORT] FILE EXISTS CHECK (before dialog) — file1:', file1ExistsBefore, '| file2:', file2ExistsBefore);
+  console.log('[IMPORT] file1 decoded path:', decodeURIComponent(file1.uri.replace('file://', '')));
+  console.log('[IMPORT] file2 decoded path:', decodeURIComponent(file2.uri.replace('file://', '')));
+
   const { mainFile, mmProjFile } = classifyGgufPair(file1, file2);
+  console.log('[IMPORT] Classification — mainFile:', mainFile.name, '| mmProjFile:', mmProjFile.name);
+
+  const dialogOpenTime = Date.now();
+  console.log('[IMPORT] Showing confirmation dialog at t=0ms');
 
   const confirmed = await new Promise<boolean>(resolve => {
-    setAlertState(
-      showAlert(
-        'Import Vision Model?',
-        `Main model:  ${mainFile.name}\nProjector:    ${mmProjFile.name}\n\nIf these look wrong, cancel and rename your files.`,
-        [
-          { text: 'Cancel', style: 'cancel', onPress: () => { setAlertState(hideAlert()); resolve(false); } },
-          { text: 'Import', onPress: () => { setAlertState(hideAlert()); resolve(true); } },
-        ],
-      ),
+    Alert.alert(
+      'Import Vision Model?',
+      `Main model:  ${mainFile.name}\nProjector:    ${mmProjFile.name}\n\nIf these look wrong, cancel and rename your files.`,
+      [
+        { text: 'Cancel', style: 'cancel', onPress: () => resolve(false) },
+        { text: 'Import', onPress: () => resolve(true) },
+      ],
+      { cancelable: false },
     );
   });
 
-  if (!confirmed) return;
+  const dialogDurationMs = Date.now() - dialogOpenTime;
+  console.log('[IMPORT] Dialog closed after', dialogDurationMs, 'ms. confirmed:', confirmed);
+
+  if (!confirmed) {
+    console.log('[IMPORT] User cancelled vision model import confirmation.');
+    return;
+  }
+
+  // Check if files STILL exist after dialog was dismissed — key check
+  const mainExistsAfter = await RNFS.exists(decodeURIComponent(mainFile.uri.replace('file://', '')));
+  const mmProjExistsAfter = await RNFS.exists(decodeURIComponent(mmProjFile.uri.replace('file://', '')));
+  console.log('[IMPORT] FILE EXISTS CHECK (after dialog, before copy) — mainFile:', mainExistsAfter, '| mmProjFile:', mmProjExistsAfter);
+  console.log('[IMPORT] mainFile path:', decodeURIComponent(mainFile.uri.replace('file://', '')));
+  console.log('[IMPORT] mmProjFile path:', decodeURIComponent(mmProjFile.uri.replace('file://', '')));
+
+  if (!mainExistsAfter || !mmProjExistsAfter) {
+    console.log('[IMPORT] ⚠️ FILES GONE after dialog! iOS deleted temp inbox files during the', dialogDurationMs, 'ms dialog wait.');
+    console.log('[IMPORT] This confirms the temp file eviction bug. Need keepLocalCopy() before dialog.');
+  }
 
+  console.log('[IMPORT] Vision import confirmed. Starting importLocalModel...');
   const model = await modelManager.importLocalModel({
     sourceUri: mainFile.uri,
     fileName: mainFile.name,
-    onProgress: p => setImportProgress(p),
+    sourceSize: mainFile.size,
+    onProgress: p => {
+      console.log(`[IMPORT] Vision progress: ${(p.fraction * 100).toFixed(1)}% — ${p.fileName}`);
+      setImportProgress(p);
+    },
     mmProjSourceUri: mmProjFile.uri,
     mmProjFileName: mmProjFile.name,
+    mmProjSourceSize: mmProjFile.size,
   });
+  console.log('[IMPORT] Vision import complete. model.name:', model.name, '| isVisionModel:', model.isVisionModel, '| mmProjPath:', model.mmProjPath);
   addDownloadedModel(model);
   setAlertState(showAlert('Success', `${model.name} imported with vision projector!`));
 }
diff --git a/src/screens/ModelsScreen/useModelsScreen.ts b/src/screens/ModelsScreen/useModelsScreen.ts
index 5f5570c8..c2d0b250 100644
--- a/src/screens/ModelsScreen/useModelsScreen.ts
+++ b/src/screens/ModelsScreen/useModelsScreen.ts
@@ -120,44 +120,81 @@ export function useModelsScreen() {
     importImageModelZip(sourceUri, fileName, { addDownloadedImageModel, activeImageModelId, setActiveImageModelId, setImportProgress, setAlertState });
 
   const handleImportLocalModel = async () => {
-    if (isImporting) return;
-    setIsImporting(true);
+    console.log('[IMPORT] handleImportLocalModel called. isImporting =', isImporting);
+    if (isImporting) {
+      console.log('[IMPORT] BLOCKED — isImporting is true, returning early. This is the silent-exit bug.');
+      return;
+    }
     try {
+      console.log('[IMPORT] Opening file picker...');
       const result = await pick({ type: [types.allFiles], allowMultiSelection: true });
-      if (!result || result.length === 0) return;
+      console.log('[IMPORT] Picker returned. File count:', result?.length ?? 0);
+      setIsImporting(true);
+      console.log('[IMPORT] setIsImporting(true) called AFTER picker returns');
+      result?.forEach((f, i) => {
+        console.log(`[IMPORT] File[${i}] name=${JSON.stringify(f.name)} uri=${f.uri} size=${f.size} type=${f.type} nativeType=${f.nativeType} error=${f.error}`);
+      });
+
+      if (!result || result.length === 0) {
+        console.log('[IMPORT] Empty result from picker, returning.');
+        return;
+      }
 
-      const allGguf = result.every(f => (f.name ?? '').toLowerCase().endsWith('.gguf'));
-      const singleZip = result.length === 1 && (result[0].name ?? '').toLowerCase().endsWith('.zip');
+      // Resolve filename: use picker name if available, fall back to last path segment of URI
+      const resolvedFiles = result.map(f => ({
+        ...f,
+        name: f.name?.trim() || decodeURIComponent(f.uri.split('/').pop() ?? '') || 'unknown',
+      }));
+      resolvedFiles.forEach((f, i) => {
+        console.log(`[IMPORT] File[${i}] resolvedName=${JSON.stringify(f.name)} (original=${JSON.stringify(result[i].name)})`);
+      });
+
+      const allGguf = resolvedFiles.every(f => f.name.toLowerCase().endsWith('.gguf'));
+      const singleZip = resolvedFiles.length === 1 && resolvedFiles[0].name.toLowerCase().endsWith('.zip');
+      console.log('[IMPORT] Validation — allGguf:', allGguf, '| singleZip:', singleZip);
+      resolvedFiles.forEach((f, i) => {
+        console.log(`[IMPORT] File[${i}] nameForCheck=${JSON.stringify(f.name)} endsWithGguf=${f.name.toLowerCase().endsWith('.gguf')}`);
+      });
 
       if (!allGguf && !singleZip) {
+        console.log('[IMPORT] VALIDATION FAILED — showing Invalid File alert');
         setAlertState(showAlert(
           'Invalid File',
-          result.length > 1
+          resolvedFiles.length > 1
             ? 'When selecting multiple files, all must be .gguf files (main model + mmproj projector).'
             : 'Supported formats: .gguf (text models) and .zip (image models).',
         ));
         return;
       }
 
-      if (result.length > 2) {
+      if (resolvedFiles.length > 2) {
+        console.log('[IMPORT] Too many files selected:', resolvedFiles.length);
         setAlertState(showAlert('Too Many Files', 'Select 1 file (text/zip) or 2 .gguf files (vision model + mmproj projector).'));
         return;
       }
 
-      const firstUri = result[0].uri;
-      const firstFileName = result[0].name ?? 'unknown';
+      const firstUri = resolvedFiles[0].uri;
+      const firstFileName = resolvedFiles[0].name;
+      console.log('[IMPORT] firstUri:', firstUri, '| firstFileName:', firstFileName);
       setImportProgress({ fraction: 0, fileName: firstFileName });
 
       if (singleZip) {
+        console.log('[IMPORT] Single ZIP detected, routing to handleImportImageModelZip');
         await handleImportImageModelZip(firstUri, firstFileName);
         return;
       }
 
-      await importGgufFiles(result.slice(0, 2), { setAlertState, setImportProgress, addDownloadedModel });
+      console.log('[IMPORT] Routing to importGgufFiles with', resolvedFiles.slice(0, 2).length, 'file(s)');
+      await importGgufFiles(resolvedFiles.slice(0, 2), { setAlertState, setImportProgress, addDownloadedModel });
     } catch (error: unknown) {
-      if (isErrorWithCode(error) && error.code === errorCodes.OPERATION_CANCELED) return;
+      if (isErrorWithCode(error) && error.code === errorCodes.OPERATION_CANCELED) {
+        console.log('[IMPORT] User cancelled the picker.');
+        return;
+      }
+      console.log('[IMPORT] ERROR caught:', getErrorMessage(error), error);
       setAlertState(showAlert('Import Failed', getErrorMessage(error)));
     } finally {
+      console.log('[IMPORT] finally block — resetting isImporting and progress');
       setIsImporting(false);
       setImportProgress(null);
     }
diff --git a/src/services/llm.ts b/src/services/llm.ts
index 55ab97dc..1fdcf145 100644
--- a/src/services/llm.ts
+++ b/src/services/llm.ts
@@ -150,14 +150,22 @@ class LLMService {
   supportsToolCalling(): boolean { return this.toolCallingSupported; }
   supportsThinking(): boolean { return this.thinkingSupported; }
   isThinkingEnabled(): boolean { return this.thinkingSupported && useAppStore.getState().settings.thinkingEnabled; }
+  isGemma4Model(): boolean {
+    const path = this.currentModelPath?.toLowerCase() ?? '';
+    return path.includes('gemma-4') || path.includes('gemma4');
+  }
   /** Disable ctx_shift on Android when GPU layers are active — the OpenCL backend SIGSEGVs on the ggml set op used by KV cache shifting. */
   private shouldDisableCtxShift(): boolean { return Platform.OS === 'android' && this.activeGpuLayers > 0; }
   private detectToolCallingSupport(): void {
     if (!this.context) { this.toolCallingSupported = false; return; }
     try {
       const jinja = (this.context as any)?.model?.chatTemplates?.jinja;
+      logger.log('[LLM][TOOLS] Full jinja caps:', JSON.stringify(jinja));
+      logger.log('[LLM][TOOLS] defaultCaps?.toolCalls:', jinja?.defaultCaps?.toolCalls);
+      logger.log('[LLM][TOOLS] toolUse:', jinja?.toolUse);
+      logger.log('[LLM][TOOLS] toolUseCaps?.toolCalls:', jinja?.toolUseCaps?.toolCalls);
       this.toolCallingSupported = !!(jinja?.defaultCaps?.toolCalls || jinja?.toolUse || jinja?.toolUseCaps?.toolCalls);
-      logger.log('[LLM] Tool calling supported:', this.toolCallingSupported);
+      logger.log('[LLM][TOOLS] toolCallingSupported =', this.toolCallingSupported);
     } catch (e) { logger.warn('[LLM] Error detecting tool calling support:', e); this.toolCallingSupported = false; }
   }
   private detectThinkingSupport(): void {
@@ -196,11 +204,13 @@ class LLMService {
       const startTime = Date.now();
       let firstTokenMs = 0, tokenCount = 0, firstReceived = false;
       let fullContent = '', fullReasoningContent = '', streamedContentSoFar = '', streamedReasoningSoFar = '';
-      const completionParams = { messages: oaiMessages, ...buildCompletionParams(settings, { disableCtxShift: this.shouldDisableCtxShift() }), ...buildThinkingCompletionParams(this.isThinkingEnabled()) };
+      const completionParams = { messages: oaiMessages, ...buildCompletionParams(settings, { disableCtxShift: this.shouldDisableCtxShift() }), ...buildThinkingCompletionParams(this.isThinkingEnabled(), this.isGemma4Model()) };
+      logger.log(`[LLM][THINKING] thinkingSupported=${this.thinkingSupported}, thinkingEnabled=${useAppStore.getState().settings.thinkingEnabled}, isThinkingEnabled=${this.isThinkingEnabled()}, enable_thinking=${(completionParams as any).enable_thinking}, reasoning_format=${(completionParams as any).reasoning_format}`);
       const completionResult = await safeCompletion(ctx, () => ctx.completion(completionParams, (data: any) => {
         if (!this.isGenerating || !data.token) return;
-        if (!firstReceived) { firstReceived = true; firstTokenMs = Date.now() - startTime; }
+        if (!firstReceived) { firstReceived = true; firstTokenMs = Date.now() - startTime; logger.log(`[LLM][THINKING] First token raw data — token: ${JSON.stringify(data.token)}, content: ${JSON.stringify(data.content)}, reasoning_content: ${JSON.stringify(data.reasoning_content)}`); }
         tokenCount++;
+        if (data.reasoning_content) logger.log(`[LLM][THINKING] reasoning_content chunk received: ${JSON.stringify(data.reasoning_content)}`);
         const content = getStreamingDelta(data.content ?? (!data.reasoning_content ? data.token : undefined), streamedContentSoFar);
         const reasoningContent = getStreamingDelta(data.reasoning_content || undefined, streamedReasoningSoFar);
         if (data.content) streamedContentSoFar = data.content;
@@ -214,6 +224,7 @@ class LLMService {
       this.performanceStats = recordGenerationStats(startTime, firstTokenMs, tokenCount);
       if (completionResult?.context_full) { logger.log('[LLM] Context full detected — signalling for compaction'); throw new Error('Context is full'); }
       const result = { content: cr?.content || cr?.text || fullContent, reasoningContent: cr?.reasoning_content || fullReasoningContent };
+      logger.log(`[LLM][THINKING] Final result — hasContent=${!!result.content}, hasReasoningContent=${!!result.reasoningContent}, reasoningLength=${result.reasoningContent?.length ?? 0}, fullReasoningFromStream=${fullReasoningContent.length}`);
       onComplete?.(result);
       return result.content;
     })();
@@ -224,6 +235,7 @@ class LLMService {
     const work = generateWithToolsImpl({
       context: this.context, isGenerating: this.isGenerating,
       isThinkingEnabled: this.isThinkingEnabled(),
+      isGemma4Model: this.isGemma4Model(),
       disableCtxShift: this.shouldDisableCtxShift(),
       manageContextWindow: (msgs, extra?) => this.manageContextWindow(msgs, extra),
       convertToOAIMessages: (msgs) => this.convertToOAIMessages(msgs),
diff --git a/src/services/llmHelpers.ts b/src/services/llmHelpers.ts
index ba8971e5..571ec24e 100644
--- a/src/services/llmHelpers.ts
+++ b/src/services/llmHelpers.ts
@@ -148,7 +148,12 @@ export async function initContextWithFallback(
         logger.error(`[LLM] Attempt 3/3 failed (CPU, ctx=2048): ${finalMsg}`);
         logger.error(`[LLM] All 3 init attempts failed for model: ${modelPath}`);
         logger.error(`[LLM] Error chain — GPU: "${gpuMsg}" | CPU: "${cpuMsg}" | min-ctx: "${finalMsg}"`);
-        throw new Error(`Failed to load model even at minimum context (2048). This may indicate insufficient memory, a corrupted model file, or an unsupported model format. (${finalMsg})`);
+        const errorParts = [
+          gpuMsg && gpuMsg !== finalMsg ? `GPU: ${gpuMsg}` : null,
+          cpuMsg && cpuMsg !== finalMsg ? `CPU: ${cpuMsg}` : null,
+          `min-ctx: ${finalMsg}`,
+        ].filter(Boolean).join(' | ');
+        throw new Error(`Failed to load model even at minimum context (2048). This may indicate insufficient memory, a corrupted model file, or an unsupported model format.\n\nError chain: ${errorParts}`);
       }
     }
   }
@@ -184,8 +189,10 @@ export function supportsNativeThinking(context: LlamaContext | null): boolean {
     return false;
   }
 }
-export function buildThinkingCompletionParams(enableThinking: boolean): { enable_thinking: boolean; reasoning_format: 'none' | 'deepseek' } {
-  return { enable_thinking: enableThinking, reasoning_format: enableThinking ? 'deepseek' : 'none' };
+export function buildThinkingCompletionParams(enableThinking: boolean, isGemma4: boolean = false): { enable_thinking: boolean; reasoning_format: 'none' | 'deepseek' } {
+  // Gemma 4 uses its own <|channel>thought\n...<channel|> format — not DeepSeek's <think> tags.
+  // Set reasoning_format:'none' so llama.rn doesn't try to strip DeepSeek tags; we parse it ourselves.
+  return { enable_thinking: enableThinking, reasoning_format: (enableThinking && !isGemma4) ? 'deepseek' : 'none' };
 }
 export function getStreamingDelta(nextValue: string | undefined, previousValue: string): string | undefined {
   if (!nextValue) return undefined;
diff --git a/src/services/llmToolGeneration.ts b/src/services/llmToolGeneration.ts
index 242f337d..a057f201 100644
--- a/src/services/llmToolGeneration.ts
+++ b/src/services/llmToolGeneration.ts
@@ -13,6 +13,68 @@ import logger from '../utils/logger';
 type ToolStreamCallback = (data: StreamToken) => void;
 type ToolCompleteCallback = (fullResponse: string) => void;
 
+/**
+ * Suppresses Gemma 4's native tool call tokens from the visible text stream.
+ * Gemma 4 wraps tool calls in <|tool_call>...<tool_call|> — llama.rn parses
+ * the structured call fine, but the raw tokens still flow through data.token.
+ * This filter buffers the stream and drops everything inside those tags.
+ */
+class ToolCallTokenFilter {
+  private inBlock = false;
+  private buffer = '';
+
+  process(token: string): string {
+    this.buffer += token;
+    return this.flush();
+  }
+
+  private flush(): string {
+    const openTag = '<|tool_call>';
+    const closeTag = '<tool_call|>';
+    let output = '';
+
+    while (this.buffer.length > 0) {
+      if (this.inBlock) {
+        const closeIdx = this.buffer.indexOf(closeTag);
+        if (closeIdx === -1) {
+          // Partial close tag may be at the end — hold it in the buffer
+          const partial = this.partialSuffix(this.buffer, closeTag);
+          this.buffer = partial > 0 ? this.buffer.slice(this.buffer.length - partial) : '';
+          break;
+        }
+        // Drop everything up to and including the close tag
+        this.buffer = this.buffer.slice(closeIdx + closeTag.length);
+        this.inBlock = false;
+      } else {
+        const openIdx = this.buffer.indexOf(openTag);
+        if (openIdx === -1) {
+          const partial = this.partialSuffix(this.buffer, openTag);
+          if (partial > 0) {
+            output += this.buffer.slice(0, this.buffer.length - partial);
+            this.buffer = this.buffer.slice(this.buffer.length - partial);
+          } else {
+            output += this.buffer;
+            this.buffer = '';
+          }
+          break;
+        }
+        output += this.buffer.slice(0, openIdx);
+        this.buffer = this.buffer.slice(openIdx + openTag.length);
+        this.inBlock = true;
+      }
+    }
+
+    return output;
+  }
+
+  private partialSuffix(text: string, tag: string): number {
+    for (let len = Math.min(tag.length - 1, text.length); len > 0; len--) {
+      if (text.endsWith(tag.slice(0, len))) return len;
+    }
+    return 0;
+  }
+}
+
 function parseToolCall(tc: any): ToolCall {
   const fn = tc.function || {};
   let args = fn.arguments || {};
@@ -26,6 +88,7 @@ export interface ToolGenerationDeps {
   context: any;
   isGenerating: boolean;
   isThinkingEnabled: boolean;
+  isGemma4Model: boolean;
   disableCtxShift: boolean;
   manageContextWindow: (messages: Message[], extraReserve?: number) => Promise<Message[]>;
   convertToOAIMessages: (messages: Message[]) => any[];
@@ -57,13 +120,15 @@ export async function generateWithToolsImpl(
     let fullResponse = '';
     let firstReceived = false;
     const collectedToolCalls: ToolCall[] = [];
+    // Gemma 4 emits <|tool_call>...<tool_call|> tokens in the stream; filter them out.
+    const toolCallFilter = deps.isGemma4Model ? new ToolCallTokenFilter() : null;
 
     const completionParams = {
       messages: oaiMessages,
       ...buildCompletionParams(settings, { disableCtxShift: deps.disableCtxShift }),
       tools: options.tools,
       tool_choice: 'auto',
-      ...buildThinkingCompletionParams(deps.isThinkingEnabled),
+      ...buildThinkingCompletionParams(deps.isThinkingEnabled, deps.isGemma4Model),
     };
     logger.log('[LLM-Tools] === INPUT ===');
     logger.log(JSON.stringify(completionParams, null, 2));
@@ -77,8 +142,9 @@ export async function generateWithToolsImpl(
       if (!data.token) return;
       if (!firstReceived) { firstReceived = true; firstTokenMs = Date.now() - startTime; }
       tokenCount++;
-      fullResponse += data.token;
-      options.onStream?.({ content: data.token });
+      const visibleToken = toolCallFilter ? toolCallFilter.process(data.token) : data.token;
+      fullResponse += visibleToken;
+      if (visibleToken) options.onStream?.({ content: visibleToken });
     }), 'generateWithTools');
     logger.log('[LLM-Tools] === OUTPUT ===');
     logger.log(JSON.stringify(completionResult, null, 2));
diff --git a/src/services/modelManager/scan.ts b/src/services/modelManager/scan.ts
index d3d26db7..68acda54 100644
--- a/src/services/modelManager/scan.ts
+++ b/src/services/modelManager/scan.ts
@@ -196,53 +196,96 @@ export interface ImportLocalModelOpts {
   sourceUri: string;
   fileName: string;
   modelsDir: string;
+  sourceSize?: number | null;
   onProgress?: (progress: { fraction: number; fileName: string }) => void;
   mmProjSourceUri?: string;
   mmProjFileName?: string;
+  mmProjSourceSize?: number | null;
 }
 
-async function resolveAndroidUri(uri: string, cacheFileName: string): Promise<{ resolved: string; tempPath: string | null }> {
-  if (Platform.OS !== 'android' || !uri.startsWith('content://')) {
-    return { resolved: uri, tempPath: null };
+function resolveUri(uri: string): string {
+  // Android content:// URIs are passed directly to RNFS.copyFile — no cache copy needed.
+  // iOS file:// URIs need decoding (%20 → space) so RNFS can find the file on disk.
+  if (uri.startsWith('content://')) {
+    console.log('[IMPORT][scan] resolveUri — Android content:// URI, using as-is');
+    console.log('[IMPORT][scan] uri:', uri);
+    return uri;
   }
-  const tempPath = `${RNFS.CachesDirectoryPath}/${Date.now()}_${cacheFileName}`;
-  await RNFS.copyFile(uri, tempPath);
-  return { resolved: tempPath, tempPath };
+  const decoded = decodeURIComponent(uri);
+  console.log('[IMPORT][scan] resolveUri — file URI, decoded');
+  console.log('[IMPORT][scan] original:', uri);
+  console.log('[IMPORT][scan] decoded: ', decoded);
+  return decoded;
 }
 
 
 export async function importLocalModel(opts: ImportLocalModelOpts): Promise<DownloadedModel> { // NOSONAR
-  const { sourceUri, fileName, modelsDir, onProgress, mmProjSourceUri, mmProjFileName } = opts;
+  const { sourceUri, fileName, modelsDir, sourceSize, onProgress, mmProjSourceUri, mmProjFileName, mmProjSourceSize } = opts;
+  const importStart = Date.now();
+  const elapsed = () => `+${Date.now() - importStart}ms`;
+
+  console.log('[IMPORT][scan] ── importLocalModel START ──────────────────');
+  console.log('[IMPORT][scan] Platform.OS:', Platform.OS);
+  console.log('[IMPORT][scan] fileName:', fileName);
+  console.log('[IMPORT][scan] sourceUri:', sourceUri);
+  console.log('[IMPORT][scan] sourceSize:', sourceSize ?? 'unknown');
+  console.log('[IMPORT][scan] mmProjFileName:', mmProjFileName ?? 'none');
+  console.log('[IMPORT][scan] mmProjSourceUri:', mmProjSourceUri ?? 'none');
+  console.log('[IMPORT][scan] mmProjSourceSize:', mmProjSourceSize ?? 'unknown');
+  console.log('[IMPORT][scan] modelsDir:', modelsDir);
+
+  // Heartbeat — logs every 3s so we can see exactly where it gets stuck
+  let heartbeatStep = 'init';
+  const heartbeat = setInterval(() => {
+    console.log(`[IMPORT][scan] ⏱ HEARTBEAT — still running at ${elapsed()}, current step: ${heartbeatStep}`);
+  }, 3000);
 
   if (!fileName.toLowerCase().endsWith('.gguf')) {
+    clearInterval(heartbeat);
+    console.log('[IMPORT][scan] ERROR — fileName does not end with .gguf:', fileName);
     throw new Error('Only .gguf files can be imported');
   }
 
-  const { resolved: resolvedSource, tempPath: tempCachePath } = await resolveAndroidUri(sourceUri, fileName);
-  const { resolved: resolvedMmProjSource, tempPath: tempMmProjCachePath } = mmProjSourceUri && mmProjFileName
-    ? await resolveAndroidUri(mmProjSourceUri, mmProjFileName)
-    : { resolved: mmProjSourceUri, tempPath: null };
+  heartbeatStep = 'resolving URIs';
+  const resolvedSource = resolveUri(sourceUri);
+  const resolvedMmProjSource = mmProjSourceUri ? resolveUri(mmProjSourceUri) : undefined;
+  console.log(`[IMPORT][scan] ${elapsed()} resolvedSource:`, resolvedSource);
+  if (mmProjFileName) {
+    console.log(`[IMPORT][scan] ${elapsed()} resolvedMmProjSource:`, resolvedMmProjSource ?? 'none');
+  }
 
   try {
+    heartbeatStep = 'checking dest paths';
     const destPath = `${modelsDir}/${fileName}`;
-    if (await RNFS.exists(destPath)) throw new Error(`A model file named "${fileName}" already exists`);
+    console.log(`[IMPORT][scan] ${elapsed()} destPath:`, destPath);
+    const destExists = await RNFS.exists(destPath);
+    console.log(`[IMPORT][scan] ${elapsed()} dest already exists:`, destExists);
+    if (destExists) throw new Error(`A model file named "${fileName}" already exists`);
     if (mmProjFileName && await RNFS.exists(`${modelsDir}/${mmProjFileName}`)) {
       throw new Error(`A file named "${mmProjFileName}" already exists`);
     }
 
     // Copy main model: progress 0→0.5 when mmproj present, 0→1 otherwise
     const mainProgressScale = mmProjFileName ? 0.5 : 1;
+    heartbeatStep = 'copying main model';
+    console.log(`[IMPORT][scan] ${elapsed()} Starting main model copy. sourceSize: ${sourceSize ?? 'unknown'} mainProgressScale: ${mainProgressScale}`);
+    console.log(`[IMPORT][scan] ${elapsed()} copy FROM:`, resolvedSource);
+    console.log(`[IMPORT][scan] ${elapsed()} copy TO:  `, destPath);
+    const mainCopyStart = Date.now();
     await copyFileWithProgress(
       resolvedSource,
       destPath,
+      sourceSize ?? null,
       onProgress ? (fraction) => onProgress({ fraction: fraction * mainProgressScale, fileName }) : undefined,
     );
+    console.log(`[IMPORT][scan] ${elapsed()} Main model copy complete in ${Date.now() - mainCopyStart}ms`);
 
     const quantMatch = fileName.match(/[_-](Q\d+[_\w]*|f16|f32)/i);
     const quantization = quantMatch ? quantMatch[1].toUpperCase() : 'Unknown';
     const modelName = fileName.replace(/\.gguf$/i, '').replace(/[_-]Q\d+.*/i, '');
     const destStat = await RNFS.stat(destPath);
     const fileSize = parseSizeInt(destStat.size);
+    console.log('[IMPORT][scan] modelName:', modelName, '| quantization:', quantization, '| fileSize:', fileSize, 'bytes');
 
     const pseudoFile: ModelFile = { name: fileName, size: fileSize, quantization, downloadUrl: '' };
     const model = await buildDownloadedModel({ modelId: 'local_import', file: pseudoFile, resolvedLocalPath: destPath });
@@ -257,36 +300,61 @@ export async function importLocalModel(opts: ImportLocalModelOpts): Promise<Down
     // Copy mmproj and link it to the model: progress 0.5→1
     if (mmProjFileName && resolvedMmProjSource) {
       const mmProjDestPath = `${modelsDir}/${mmProjFileName}`;
+      heartbeatStep = 'copying mmproj';
+      console.log(`[IMPORT][scan] ${elapsed()} Starting mmproj copy. mmProjSourceSize: ${mmProjSourceSize ?? 'unknown'}`);
+      console.log(`[IMPORT][scan] ${elapsed()} copy FROM:`, resolvedMmProjSource);
+      console.log(`[IMPORT][scan] ${elapsed()} copy TO:  `, mmProjDestPath);
+      const mmProjCopyStart = Date.now();
       await copyFileWithProgress(
         resolvedMmProjSource,
         mmProjDestPath,
+        mmProjSourceSize ?? null,
         onProgress
           ? (fraction) => onProgress({ fraction: 0.5 + fraction * 0.5, fileName: mmProjFileName })
           : undefined,
       );
+      console.log(`[IMPORT][scan] ${elapsed()} mmproj copy complete in ${Date.now() - mmProjCopyStart}ms`);
       const mmProjStat = await RNFS.stat(mmProjDestPath);
       builtModel.mmProjPath = mmProjDestPath;
       builtModel.mmProjFileName = mmProjFileName;
       builtModel.mmProjFileSize = parseSizeInt(mmProjStat.size);
       builtModel.isVisionModel = true;
+      console.log(`[IMPORT][scan] ${elapsed()} mmproj linked. mmProjFileSize:`, builtModel.mmProjFileSize, 'bytes');
     }
 
+    heartbeatStep = 'persisting metadata';
+    console.log(`[IMPORT][scan] ${elapsed()} Persisting model metadata...`);
     await persistDownloadedModel(builtModel, modelsDir);
+    console.log(`[IMPORT][scan] ${elapsed()} ── importLocalModel COMPLETE. id: ${builtModel.id} ──`);
     return builtModel;
+  } catch (e) {
+    console.log(`[IMPORT][scan] ${elapsed()} ❌ importLocalModel ERROR:`, e);
+    throw e;
   } finally {
-    if (tempCachePath) await RNFS.unlink(tempCachePath).catch(() => {});
-    if (tempMmProjCachePath) await RNFS.unlink(tempMmProjCachePath).catch(() => {});
+    clearInterval(heartbeat);
   }
 }
 
 async function copyFileWithProgress(
   source: string,
   dest: string,
+  knownTotalBytes: number | null,
   onProgress?: (fraction: number) => void,
 ): Promise<void> {
-  const sourceStat = await RNFS.stat(source);
-  const totalBytes = parseSizeInt(sourceStat.size);
+  const copyStart = Date.now();
+  const copyElapsed = () => `+${Date.now() - copyStart}ms`;
+  const totalBytes = knownTotalBytes ?? 0;
+  const totalMB = totalBytes > 0 ? (totalBytes / 1024 / 1024).toFixed(1) : '?';
+
+  console.log(`[IMPORT][scan] copyFileWithProgress START — knownTotalBytes: ${knownTotalBytes ?? 'unknown'} (${totalMB} MB)`);
+  console.log('[IMPORT][scan] FROM:', source);
+  console.log('[IMPORT][scan] TO:  ', dest);
+  if (!knownTotalBytes) {
+    console.log('[IMPORT][scan] No known size — progress will be indeterminate');
+  }
+
   let polling = true;
+  let lastWritten = 0;
 
   const pollInterval = setInterval(async () => {
     if (!polling) return;
@@ -295,21 +363,37 @@ async function copyFileWithProgress(
       if (exists) {
         const stat = await RNFS.stat(dest);
         const written = parseSizeInt(stat.size);
-        onProgress?.(totalBytes > 0 ? Math.min(written / totalBytes, 0.99) : 0);
+        const writtenMB = (written / 1024 / 1024).toFixed(1);
+        const delta = written - lastWritten;
+        const speedMBs = ((delta / 1024 / 1024) / 0.5).toFixed(1);
+        lastWritten = written;
+        if (totalBytes > 0) {
+          const pct = Math.min(written / totalBytes, 0.99);
+          console.log(`[IMPORT][scan] ${copyElapsed()} copy poll — ${writtenMB}/${totalMB} MB (${(pct * 100).toFixed(1)}%) speed: ${speedMBs} MB/s`);
+          onProgress?.(pct);
+        } else {
+          console.log(`[IMPORT][scan] ${copyElapsed()} copy poll — ${writtenMB} MB written (size unknown) speed: ${speedMBs} MB/s`);
+          // No fraction available — don't call onProgress so UI stays indeterminate
+        }
+      } else {
+        console.log(`[IMPORT][scan] ${copyElapsed()} copy poll — dest not created yet`);
       }
-    } catch {
-      // File may not exist yet, ignore
+    } catch (e) {
+      console.log(`[IMPORT][scan] ${copyElapsed()} copy poll error:`, e);
     }
   }, 500);
 
   try {
+    console.log(`[IMPORT][scan] ${copyElapsed()} calling RNFS.copyFile...`);
     await RNFS.copyFile(source, dest);
     polling = false;
     clearInterval(pollInterval);
+    console.log(`[IMPORT][scan] ${copyElapsed()} RNFS.copyFile resolved — 100% done`);
     onProgress?.(1);
   } catch (error) {
     polling = false;
     clearInterval(pollInterval);
+    console.log(`[IMPORT][scan] ${copyElapsed()} RNFS.copyFile FAILED:`, error);
     await RNFS.unlink(dest).catch(() => {});
     throw error;
   }
diff --git a/src/utils/messageContent.ts b/src/utils/messageContent.ts
index d23e77cb..52f9d92d 100644
--- a/src/utils/messageContent.ts
+++ b/src/utils/messageContent.ts
@@ -5,16 +5,29 @@ const CONTROL_TOKEN_PATTERNS: RegExp[] = [
   /<\|eot_id\|>/gi,
   /<\/s>/gi,
   /<tool_call>[\s\S]*?<\/tool_call>\s*/g,
+  // Gemma 4 native tool call format: <|tool_call>...<tool_call|>
+  // The streaming filter in llmToolGeneration suppresses these live;
+  // this catches any that slip through into stored message content.
+  /<\|tool_call>[\s\S]*?<tool_call\|>\s*/g,
+  // Gemma 4 string-delimiter token that may appear outside a tool block
+  /<\|">/g,
 ];
 
 // Patterns for channel-based thinking format (used by some models like Qwen)
 const CHANNEL_ANALYSIS_START = /<\|channel\|>analysis<\|message\|>/gi;
 const CHANNEL_FINAL_START = /<\|channel\|>final<\|message\|>/gi;
 
+// Gemma 4 thinking tags: <|channel>thought\n...<channel|>
+// These are stripped as a safety net; parseThinkingContent handles them before display.
+const GEMMA4_THINK_OPEN = /<\|channel>thought\n/gi;
+const GEMMA4_THINK_CLOSE = /<channel\|>/gi;
+
 export function stripControlTokens(content: string): string {
   let result = CONTROL_TOKEN_PATTERNS.reduce((acc, pattern) => acc.replace(pattern, ''), content);
   // Remove channel markers but preserve the content after them
   result = result.replace(CHANNEL_ANALYSIS_START, '');
   result = result.replace(CHANNEL_FINAL_START, '');
+  result = result.replace(GEMMA4_THINK_OPEN, '');
+  result = result.replace(GEMMA4_THINK_CLOSE, '');
   return result;
 }
\ No newline at end of file