AI_GUI/main.py at main · AlexC1991/AI_GUI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
"""
AI_GUI (VoxAI Orchestrator) - Main Entry Point

CRITICAL: The first thing we do is import bootstrap to set up temp directories.
This MUST happen before ANY other imports to prevent C: drive usage.
"""

# =========================================================
# STEP 0: BOOTSTRAP - MUST BE THE ABSOLUTE FIRST IMPORT
# =========================================================
# This sets up temp/cache directories and lies to Windows about C: drive
# having only 250MB free. This MUST run before torch/diffusers/etc load.

import bootstrap  # noqa: F401 - KEEP THIS FIRST!

# =========================================================
# STEP 1: BACKEND SETUP - MUST BE BEFORE LLAMA IMPORTS
# =========================================================
# This mirrors the backend setup from engine/vox_api.py:
# 1. Set LLAMA_CPP_LIB to point to our custom llama.dll
# 2. Add DLL directories to PATH
# 3. Pre-load ggml.dll and call ggml_backend_load_all()

import sys
import os
import ctypes
from pathlib import Path

# Get app directory
APP_DIR = Path(__file__).parent.absolute()
VOX_API_DIR = APP_DIR / "engine"

def _setup_vox_backend():
    """
    Set up the VOX-AI backend (DLL loading for llama.cpp).

    This MUST run before any import that could touch llama_cpp.
    """
    print("[VoxAI] Setting up custom backend...")

    # Check if engine/ exists
    if not VOX_API_DIR.exists():
        print(f"[VoxAI] WARNING: {VOX_API_DIR} not found")
        return False

    vox_str = str(VOX_API_DIR)
    llama_dll = VOX_API_DIR / "llama.dll"
    ggml_dll = VOX_API_DIR / "ggml.dll"

    if not llama_dll.exists():
        print(f"[VoxAI] WARNING: llama.dll not found in {vox_str}")
        return False

    # =========================================================
    # 1. Set LLAMA_CPP_LIB - This is the KEY
    # =========================================================
    os.environ["LLAMA_CPP_LIB"] = str(llama_dll)
    print(f"[VoxAI] LLAMA_CPP_LIB = {llama_dll}")

    # =========================================================
    # 2. Set GGML backend search path
    # =========================================================
    os.environ["GGML_BACKEND_SEARCH_PATH"] = vox_str

    # =========================================================
    # 3. Add to PATH (for dependent DLLs)
    # =========================================================
    os.environ["PATH"] = vox_str + os.pathsep + os.environ.get("PATH", "")

    # Also add parent dir (for ZLUDA if it's there)
    parent_dir = str(APP_DIR)
    os.environ["PATH"] = parent_dir + os.pathsep + os.environ["PATH"]

    # =========================================================
    # 4. Add DLL directories (Windows 10+ requirement)
    #    Python 3.8+ does NOT use PATH for DLL resolution.
    #    We must register every directory that contains DLLs
    #    that torch_cuda.dll (and others) depend on.
    # =========================================================
    if hasattr(os, 'add_dll_directory'):
        dll_dirs = [vox_str, parent_dir]

        # ZLUDA - provides nvcuda.dll, cublas.dll etc. that torch needs
        _default_zluda = os.path.join(os.environ.get("LOCALAPPDATA", ""), "zluda", "zluda")
        zluda_path = os.environ.get("ZLUDA_PATH", _default_zluda)
        if os.path.isdir(zluda_path):
            dll_dirs.append(zluda_path)

        # HIP / ROCm - provides amdhip64.dll, rocblas.dll etc.
        hip_path = os.environ.get("HIP_PATH", r"C:\Program Files\AMD\ROCm\6.2")
        hip_bin = os.path.join(hip_path, "bin")
        if os.path.isdir(hip_bin):
            dll_dirs.append(hip_bin)
        elif os.path.isdir(hip_path):
            dll_dirs.append(hip_path)

        for d in dll_dirs:
            try:
                os.add_dll_directory(d)
            except Exception as e:
                print(f"[VoxAI] DLL directory warning ({d}): {e}")

        print(f"[VoxAI] Added {len(dll_dirs)} DLL directories (incl. ZLUDA/HIP)")

    # =========================================================
    # 5. Pre-load ggml.dll and initialize backends
    # =========================================================
    if ggml_dll.exists():
        try:
            ggml = ctypes.CDLL(str(ggml_dll))
            print(f"[VoxAI] Loaded ggml.dll")

            if hasattr(ggml, 'ggml_backend_load_all'):
                ggml.ggml_backend_load_all()
                print("[VoxAI] ✓ Backends loaded (ggml_backend_load_all)")
        except Exception as e:
            print(f"[VoxAI] Backend loading error: {e}")
            return False
    else:
        print(f"[VoxAI] WARNING: ggml.dll not found")
        return False

    # Mark as initialized
    os.environ["_VOX_BACKEND_INITIALIZED"] = "1"
    print("[VoxAI] ✓ Backend setup complete")
    return True

# RUN IMMEDIATELY
_vox_ok = _setup_vox_backend()

# =========================================================
# NOW it's safe to import everything else
# =========================================================

import subprocess
import importlib
import atexit

# Set process priority (like standalone does)
try:
    import psutil
    p = psutil.Process(os.getpid())
    p.nice(psutil.HIGH_PRIORITY_CLASS)
except Exception:
    pass

# -------------------------
# Environment Configuration
# -------------------------

# Fix ZLUDA/MIOpen issues for AMD GPUs
os.environ["MIOPEN_FIND_MODE"] = "NORMAL"
os.environ["MIOPEN_USER_DB_PATH"] = str(APP_DIR / "models" / "miopen_cache")
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
os.environ["DISABLE_ADDMM_CUDA_LT"] = "1"

# Disable Flash Attention (incompatible with ZLUDA)
os.environ["DIFFUSERS_FLASH_ATTN"] = "0"
os.environ["XFORMERS_DISABLED"] = "1"

# --- ZLUDA cuBLAS safety: disable TF32 + tensor ops ---
# ZLUDA reports SM88 but can't execute CUBLAS_GEMM_DEFAULT_TENSOR_OP.
# These settings tell PyTorch to avoid tensor op codepaths entirely.
try:
    import torch
    torch.backends.cuda.matmul.allow_tf32 = False
    torch.backends.cudnn.allow_tf32 = False
    if hasattr(torch.backends.cuda, 'enable_mem_efficient_sdp'):
        torch.backends.cuda.enable_mem_efficient_sdp(False)
    if hasattr(torch.backends.cuda, 'enable_flash_sdp'):
        torch.backends.cuda.enable_flash_sdp(False)
    if hasattr(torch.backends.cuda, 'enable_math_sdp'):
        torch.backends.cuda.enable_math_sdp(True)
except Exception:
    pass  # torch not yet available at startup

# Note: Temp directories are now handled by bootstrap.py
# The following are kept for backward compatibility but bootstrap takes priority

# MIOpen cache (this can stay on E: as configured in bat file)
MIOPEN_CACHE = APP_DIR / "models" / "miopen_cache"
MIOPEN_CACHE.mkdir(parents=True, exist_ok=True)

# -------------------------
# Dependency Configuration
# -------------------------

REQUIRED_PACKAGES = {
    "PySide6": "PySide6>=6.6",
    "markdown": "markdown",
    "pygments": "pygments",
    "psutil": "psutil",
    "requests": "requests",
    "peft": "peft",
    "diffusers": "diffusers>=0.31.0",
    "transformers": "transformers",
    "accelerate": "accelerate",
    "safetensors": "safetensors",
    "huggingface_hub": "huggingface_hub",
    "sentencepiece": "sentencepiece",
    "protobuf": "protobuf",
    "gguf": "gguf",
    # DO NOT auto-install llama-cpp-python - we use custom DLLs
}


def install_package(package: str):
    print(f"[VoxAI] Installing: {package}")
    try:
        subprocess.check_call(
            [sys.executable, "-m", "pip", "install", package],
            stdout=subprocess.DEVNULL,
            stderr=subprocess.STDOUT
        )
    except subprocess.CalledProcessError as e:
        print(f"[VoxAI] Failed to install {package}: {e}")
        sys.exit(1)


def ensure_dependencies():
    print("[VoxAI] Checking dependencies...")
    missing = []
    for module_name, package_name in REQUIRED_PACKAGES.items():
        try:
            importlib.import_module(module_name)
        except ImportError:
            missing.append((module_name, package_name))
        except OSError as e:
            # torch may raise OSError when CUDA/ZLUDA DLLs fail to load
            print(f"[VoxAI] Non-fatal load error for {module_name}: {e}")

    if missing:
        print(f"[VoxAI] Installing {len(missing)} missing packages...")
        for module_name, package_name in missing:
            install_package(package_name)

    print("[VoxAI] Dependencies OK\n")


# -------------------------
# Cleanup Functions
# -------------------------

def run_startup_cleanup():
    print("[VoxAI] Running startup cleanup...")
    try:
        # Use bootstrap's clear_temp if available
        import bootstrap
        cleared = bootstrap.clear_temp()
        if cleared > 0:
            print(f"[VoxAI] Cleaned {cleared / 1024**2:.1f} MB from temp")
    except Exception:
        pass

    try:
        from backend.cleanup import get_cleanup_manager
        manager = get_cleanup_manager(APP_DIR)
        freed = manager.cleanup_all(include_hf_cache=False, include_miopen=False)
        if freed > 0:
            print(f"[VoxAI] Cleaned {freed / 1024**3:.2f} GB")
        else:
            print("[VoxAI] No additional temp files to clean")
    except ImportError:
        fallback_cleanup()
    except Exception as e:
        print(f"[VoxAI] Cleanup error: {e}")


def fallback_cleanup():
    import shutil
    import tempfile
    total = 0

    # Use bootstrap temp dir
    try:
        import bootstrap
        temp_dir = bootstrap.get_temp_dir()
    except (ImportError, AttributeError):
        temp_dir = APP_DIR / "temp_workspace"

    if temp_dir.exists():
        for item in temp_dir.iterdir():
            try:
                if item.is_file():
                    total += item.stat().st_size
                    item.unlink()
                elif item.is_dir():
                    total += sum(f.stat().st_size for f in item.rglob("*") if f.is_file())
                    shutil.rmtree(item, ignore_errors=True)
            except OSError:
                pass

    # Clean system temp (though with bootstrap it should be redirected)
    system_temp = Path(tempfile.gettempdir())
    for pattern in ["flux_t5_offload", "t5_offload", "offload_folder"]:
        target = system_temp / pattern
        if target.exists():
            try:
                total += sum(f.stat().st_size for f in target.rglob("*") if f.is_file())
                shutil.rmtree(target, ignore_errors=True)
            except OSError:
                pass

    if total > 0:
        print(f"[VoxAI] Cleaned {total / 1024**2:.1f} MB")


def run_shutdown_cleanup():
    print("\n[VoxAI] Shutdown cleanup...")
    try:
        from backend.cleanup import cleanup_on_shutdown
        cleanup_on_shutdown()
    except Exception:
        fallback_cleanup()
    print("[VoxAI] Goodbye!")


def register_cleanup_handlers():
    atexit.register(run_shutdown_cleanup)


# -------------------------
# PyTorch Setup
# -------------------------

def setup_torch():
    try:
        import torch
        if hasattr(torch.backends.cuda, 'enable_flash_sdp'):
            torch.backends.cuda.enable_flash_sdp(False)
        if hasattr(torch.backends.cuda, 'enable_mem_efficient_sdp'):
            torch.backends.cuda.enable_mem_efficient_sdp(False)
        if hasattr(torch.backends.cuda, 'enable_math_sdp'):
            torch.backends.cuda.enable_math_sdp(True)

        if torch.cuda.is_available():
            gpu = torch.cuda.get_device_name(0)
            vram = torch.cuda.get_device_properties(0).total_memory / 1024**3
            print(f"[VoxAI] GPU: {gpu} ({vram:.1f} GB)")
            if "ZLUDA" in gpu:
                print("[VoxAI] ZLUDA detected - Flash Attention disabled")
        else:
            print("[VoxAI] No GPU detected")
    except ImportError:
        pass
    except Exception as e:
        print(f"[VoxAI] PyTorch warning: {e}")


# -------------------------
# Debug Setup
# -------------------------

def setup_debug():
    try:
        from backend.debug import enable_debug, DebugLevel
        if "--debug" in sys.argv or os.environ.get("AI_GUI_DEBUG"):
            enable_debug(DebugLevel.VERBOSE)
        elif "--trace" in sys.argv:
            enable_debug(DebugLevel.TRACE)
        else:
            enable_debug(DebugLevel.INFO)
    except ImportError:
        pass


# -------------------------
# Main Entry Point
# -------------------------

def main():
    print("=" * 50)
    print("  VoxAI Orchestrator")
    print("=" * 50)
    print()

    if _vox_ok:
        print("[VoxAI] ✓ Custom backend ready")
    else:
        print("[VoxAI] ⚠ Backend not loaded - will use CPU")
    print()

    setup_debug()
    register_cleanup_handlers()

    print("[VoxAI] Launching...\n")

    from PySide6.QtWidgets import QApplication
    from PySide6.QtCore import QTimer
    from main_window import MainWindow

    app = QApplication(sys.argv)
    app.setApplicationName("VoxAI Orchestrator")
    app.setOrganizationName("AI_GUI")
    app.setApplicationVersion("0.1.0")

    window = MainWindow()
    window.show()

    def _deferred_init():
        ensure_dependencies()
        setup_torch()
        run_startup_cleanup()
        print("[VoxAI] Deferred init complete")

    QTimer.singleShot(500, _deferred_init)

    sys.exit(app.exec())


if __name__ == "__main__":
    main()