From 665ad79649a7ffe0f15b3b6b3e3d8319a3fc4287 Mon Sep 17 00:00:00 2001 From: key4ng Date: Sat, 16 May 2026 23:34:01 -0700 Subject: [PATCH 1/2] ci(trtllm): install pre-release wheel from PyPI instead of building from source PR #11037 (gRPC serve) and the Harmony parser fixes that motivated the source build (#12045, #12467) are all included in tensorrt-llm 1.3.0rc14 on PyPI. Pinning to the published pre-release saves ~30 min of CMake compile time per CI run and removes the brittle wheel-cache dance whose key was evicted after 7 days of inactivity. Drop the cached-wheel + source-build branches from ci_install_trtllm.sh and the now-redundant cache restore/save steps from the composite action. The runtime apt deps, NCCL pin, cu130 torch index URL, and LD_LIBRARY_PATH setup are preserved. Signed-off-by: key4ng --- .github/actions/setup-trtllm/action.yml | 16 +- scripts/ci_install_trtllm.sh | 316 ++---------------------- 2 files changed, 28 insertions(+), 304 deletions(-) diff --git a/.github/actions/setup-trtllm/action.yml b/.github/actions/setup-trtllm/action.yml index 4c4c183dd..7d206fcdd 100644 --- a/.github/actions/setup-trtllm/action.yml +++ b/.github/actions/setup-trtllm/action.yml @@ -1,5 +1,5 @@ name: 'Setup TRT-LLM Backend' -description: 'Create Python venv, restore/save TRT-LLM wheel cache, and install TRT-LLM.' +description: 'Create Python venv and install TensorRT-LLM pre-release wheel from PyPI.' runs: using: 'composite' @@ -8,20 +8,6 @@ runs: shell: bash run: bash scripts/ci_setup_python_venv.sh - - name: Restore TRT-LLM wheel cache - id: trtllm-cache - uses: actions/cache/restore@v4 - with: - path: /tmp/trtllm-wheel - key: trtllm-wheel-${{ runner.os }}-${{ hashFiles('scripts/ci_install_trtllm.sh') }} - - name: Install TRT-LLM shell: bash run: bash scripts/ci_install_trtllm.sh - - - name: Save TRT-LLM wheel cache - if: steps.trtllm-cache.outputs.cache-hit != 'true' - uses: actions/cache/save@v4 - with: - path: /tmp/trtllm-wheel - key: trtllm-wheel-${{ runner.os }}-${{ hashFiles('scripts/ci_install_trtllm.sh') }} diff --git a/scripts/ci_install_trtllm.sh b/scripts/ci_install_trtllm.sh index 7f7f71756..fac5c34c9 100755 --- a/scripts/ci_install_trtllm.sh +++ b/scripts/ci_install_trtllm.sh @@ -1,11 +1,11 @@ #!/bin/bash -# Install TensorRT-LLM from source with gRPC support for CI +# Install TensorRT-LLM pre-release wheel from PyPI for CI. # -# gRPC server support (PR #11037) is not yet in a pip release, -# so we build from source (main branch) which compiles the C++ -# extensions properly and includes the gRPC serve command. -# -# Cache version: 5 — rebuild from latest TRT-LLM main for Harmony tool calling fixes +# As of 1.3.0rc14 (released 2026-05-07), the gRPC serve command from PR #11037 +# and the Harmony parser fixes (#12045, #12467) referenced by SMG #801 are all +# included in the published pre-release wheel. We install it directly from PyPI +# instead of building TensorRT-LLM from source, which saves ~30 min of CMake +# compile time per CI run. See git history for the previous source-build logic. # # Prerequisites (expected on k8s-runner-gpu nodes): # - NVIDIA driver 580+ (CUDA 13) @@ -16,6 +16,7 @@ set -euo pipefail +TRTLLM_VERSION="1.3.0rc14" NCCL_VERSION_CONSTRAINT="nvidia-nccl-cu13>=2.28.9,<=2.29.2" # Activate venv if it exists @@ -23,90 +24,11 @@ if [ -f ".venv/bin/activate" ]; then source .venv/bin/activate fi -# ── Check for cached wheel FIRST ───────────────────────────────────────────── -# This allows us to skip heavy build dependencies when wheel is already cached -TRTLLM_WHEEL_CACHE="/tmp/trtllm-wheel" -mkdir -p "$TRTLLM_WHEEL_CACHE" -CACHED_WHEEL=$(find "$TRTLLM_WHEEL_CACHE" -name "tensorrt_llm*.whl" 2>/dev/null | head -1 || true) - -if [ -n "$CACHED_WHEEL" ] && [ -f "$CACHED_WHEEL" ]; then - echo "=== Found cached TRT-LLM wheel: $CACHED_WHEEL ===" - echo "=== Installing runtime dependencies only (skipping build deps) ===" - - # ── Runtime dependencies only ──────────────────────────────────────────── - export DEBIAN_FRONTEND=noninteractive - sudo dpkg --configure -a --force-confnew 2>/dev/null || true - - # Add NVIDIA apt repository if needed - if ! dpkg -l cuda-keyring 2>/dev/null | grep -q '^ii'; then - echo "Setting up NVIDIA apt repository..." - curl -fsSL -o /tmp/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb - sudo dpkg -i /tmp/cuda-keyring.deb - rm -f /tmp/cuda-keyring.deb - fi - - sudo apt-get update - # Runtime deps: wheel links against CUDA 13 + TensorRT libs - sudo apt-get install -y libopenmpi-dev libnvinfer10 cuda-toolkit-13-0 - - # ── CUDA runtime setup ─────────────────────────────────────────────────── - if [ -d "/usr/local/cuda-13.0" ]; then - export CUDA_HOME="/usr/local/cuda-13.0" - else - export CUDA_HOME="${CUDA_HOME:-/usr/local/cuda}" - fi - export PATH="$CUDA_HOME/bin:$PATH" - export LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH:-}" - - # ── Install pip and NCCL runtime ───────────────────────────────────────── - pip install --upgrade pip - pip install --no-cache-dir "$NCCL_VERSION_CONSTRAINT" - - # ── Install cached wheel ───────────────────────────────────────────────── - # Use --extra-index-url for cu130 torch so pip resolves torch 2.10+cu130 - # (cuda-bindings==13.x) instead of the default PyPI torch (cuda-bindings==12.9.4), - # which conflicts with tensorrt-llm's cuda-python>=13 requirement. - echo "Installing cached wheel..." - pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cu130 "$CACHED_WHEEL" - - # ── Setup LD_LIBRARY_PATH ──────────────────────────────────────────────── - SITE_PACKAGES=$(python3 -c "import site; print(site.getsitepackages()[0])") - NVIDIA_LIB_DIRS=$(find "$SITE_PACKAGES/nvidia" -name "lib" -type d 2>/dev/null | sort -u | paste -sd':') - if [ -n "$NVIDIA_LIB_DIRS" ]; then - export LD_LIBRARY_PATH="${NVIDIA_LIB_DIRS}:${LD_LIBRARY_PATH:-}" - fi - - TRTLLM_LIB_DIR=$(find "$SITE_PACKAGES" -path "*/tensorrt_llm/libs" -type d 2>/dev/null | head -1) - if [ -n "$TRTLLM_LIB_DIR" ]; then - export LD_LIBRARY_PATH="${TRTLLM_LIB_DIR}:${LD_LIBRARY_PATH:-}" - fi - - # Persist LD_LIBRARY_PATH for subsequent CI steps - if [ -n "${GITHUB_ENV:-}" ]; then - echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> "$GITHUB_ENV" - fi - - # ── Verification ───────────────────────────────────────────────────────── - echo "=== TensorRT-LLM verification ===" - python3 -c "import tensorrt_llm; print(f'TensorRT-LLM version: {tensorrt_llm.__version__}')" - python3 -c "from tensorrt_llm.commands.serve import main; print('gRPC serve command: available')" - echo "Verifying gRPC serve command..." - python3 -m tensorrt_llm.commands.serve serve --help 2>&1 | head -20 || echo "WARNING: serve --help failed" - - echo "TensorRT-LLM installation complete (from cache)" - exit 0 -fi - -# ══════════════════════════════════════════════════════════════════════════════ -# No cached wheel - full build required -# ══════════════════════════════════════════════════════════════════════════════ -echo "=== No cached wheel found, building from source ===" - -# ── System dependencies (full build) ───────────────────────────────────────── +# ── Runtime system dependencies ────────────────────────────────────────────── export DEBIAN_FRONTEND=noninteractive sudo dpkg --configure -a --force-confnew 2>/dev/null || true -# Add NVIDIA CUDA/TensorRT apt repository (needed for libnvinfer-dev, tensorrt-dev) +# Add NVIDIA apt repository if needed if ! dpkg -l cuda-keyring 2>/dev/null | grep -q '^ii'; then echo "Setting up NVIDIA apt repository..." curl -fsSL -o /tmp/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb @@ -115,216 +37,34 @@ if ! dpkg -l cuda-keyring 2>/dev/null | grep -q '^ii'; then fi sudo apt-get update -# Full build deps: runtime + dev headers + build tools -sudo apt-get install -y libopenmpi-dev git-lfs libnvinfer10 libnvinfer-dev tensorrt-dev cuda-toolkit-13-0 cmake - -# ── Fabric Manager for multi-GPU NCCL communication ─────────────────────────── -# Required for H100 with NVSwitch - the k8s GPU runners should have it pre-installed -# Just try to start it if it's not running (don't try to install - causes dpkg errors) -echo "Checking Fabric Manager status for multi-GPU support..." -if command -v nv-fabricmanager &>/dev/null || [ -f /usr/bin/nv-fabricmanager ]; then - sudo systemctl start nvidia-fabricmanager 2>/dev/null || true - sudo systemctl status nvidia-fabricmanager --no-pager 2>/dev/null || echo "INFO: Fabric Manager not running (may not be needed for this GPU type)" -else - echo "INFO: Fabric Manager not installed (may not be needed for this GPU type)" -fi +# Runtime deps: wheel links against CUDA 13 + TensorRT libs +sudo apt-get install -y libopenmpi-dev libnvinfer10 cuda-toolkit-13-0 -# ── CUDA setup ─────────────────────────────────────────────────────────────── -# Prefer /usr/local/cuda-13.0 if it exists, otherwise fall back to /usr/local/cuda +# ── CUDA runtime setup ─────────────────────────────────────────────────────── if [ -d "/usr/local/cuda-13.0" ]; then export CUDA_HOME="/usr/local/cuda-13.0" else export CUDA_HOME="${CUDA_HOME:-/usr/local/cuda}" fi -# Re-activate venv first, then add CUDA to PATH so it takes precedence -if [ -f ".venv/bin/activate" ]; then - source .venv/bin/activate -fi export PATH="$CUDA_HOME/bin:$PATH" export LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH:-}" -# Debug: print what CUDA we actually have -echo "=== CUDA diagnostics ===" -echo "CUDA_HOME=$CUDA_HOME" -echo "PATH=$PATH" -ls -la "$CUDA_HOME/bin/nvcc" 2>/dev/null || echo "WARNING: nvcc not at $CUDA_HOME/bin/nvcc" -find /usr/local -name "nvcc" -type f 2>/dev/null || echo "WARNING: nvcc not found anywhere in /usr/local" -which nvcc 2>/dev/null || echo "WARNING: nvcc not on PATH" -nvcc --version 2>/dev/null || echo "WARNING: nvcc --version failed" -nvidia-smi 2>/dev/null | head -4 || echo "WARNING: nvidia-smi not found" -echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}" -python3 --version -echo "=== end CUDA diagnostics ===" - -# ── TensorRT symlinks (for CMake to find TensorRT) ────────────────────────── -sudo mkdir -p /usr/local/tensorrt -sudo ln -sf /usr/include/x86_64-linux-gnu /usr/local/tensorrt/include -sudo ln -sf /usr/lib/x86_64-linux-gnu /usr/local/tensorrt/lib - +# ── Install pip and NCCL runtime ───────────────────────────────────────────── pip install --upgrade pip - -# ── Clone TensorRT-LLM ────────────────────────────────────────────────────── -TRTLLM_DIR="/tmp/tensorrt-llm-src" -if [ ! -d "$TRTLLM_DIR" ]; then - echo "Cloning TensorRT-LLM main branch..." - git clone --depth 1 https://github.com/NVIDIA/TensorRT-LLM.git "$TRTLLM_DIR" -fi - -cd "$TRTLLM_DIR" -git lfs install --force -git lfs pull - -# ── Install TensorRT-LLM Python requirements ───────────────────────────────── -# Install nvidia-cutlass first - provides cutlass_library module needed during CMake configure -# This is cleaner than relying on CMake's FetchContent which installs to user site-packages -pip install --no-cache-dir nvidia-cutlass - -if [ -f "requirements-dev.txt" ]; then - echo "Installing TensorRT-LLM build requirements..." - pip install --no-cache-dir -r requirements-dev.txt -fi - -# ── NCCL setup ────────────────────────────────────────────────────────────── -# build_wheel.py runs pip install internally which can change the NCCL version. -# Copy headers+libs to a fixed directory that pip can't overwrite, and point -# NCCL_ROOT there for CMake. -pip install --no-cache-dir --force-reinstall "$NCCL_VERSION_CONSTRAINT" - +pip install --no-cache-dir "$NCCL_VERSION_CONSTRAINT" + +# ── Install TensorRT-LLM pre-release wheel from PyPI ───────────────────────── +# --pre allows pre-release versions; --extra-index-url for cu130 torch so pip +# resolves torch 2.10+cu130 (cuda-bindings==13.x) instead of the default PyPI +# torch (cuda-bindings==12.9.4), which conflicts with tensorrt-llm's +# cuda-python>=13 requirement. +echo "Installing tensorrt-llm==${TRTLLM_VERSION} from PyPI..." +pip install --no-cache-dir --pre \ + --extra-index-url https://download.pytorch.org/whl/cu130 \ + "tensorrt-llm==${TRTLLM_VERSION}" + +# ── Setup LD_LIBRARY_PATH ──────────────────────────────────────────────────── SITE_PACKAGES=$(python3 -c "import site; print(site.getsitepackages()[0])") -NCCL_PIP_ROOT="$SITE_PACKAGES/nvidia/nccl" - -# Copy to a stable location that pip won't touch -NCCL_ROOT="/tmp/nccl-stable" -rm -rf "$NCCL_ROOT" -mkdir -p "$NCCL_ROOT/include" "$NCCL_ROOT/lib" -cp -a "$NCCL_PIP_ROOT/include/"* "$NCCL_ROOT/include/" -cp -a "$NCCL_PIP_ROOT/lib/"* "$NCCL_ROOT/lib/" -# Create libnccl.so symlink — pip only ships libnccl.so.2 -if [ -f "$NCCL_ROOT/lib/libnccl.so.2" ] && [ ! -e "$NCCL_ROOT/lib/libnccl.so" ]; then - ln -s libnccl.so.2 "$NCCL_ROOT/lib/libnccl.so" -fi - -echo "=== NCCL diagnostics ===" -echo "NCCL_ROOT=$NCCL_ROOT (stable copy, immune to pip downgrades)" -ls -la "$NCCL_ROOT/include/" 2>/dev/null | head -5 -ls -la "$NCCL_ROOT/lib/" 2>/dev/null | head -5 -grep "NCCL_MAJOR\|NCCL_MINOR" "$NCCL_ROOT/include/nccl.h" 2>/dev/null | head -3 -echo "=== end NCCL diagnostics ===" - -# Symlink stable NCCL header to system path for other tools that look there -sudo ln -sf "$NCCL_ROOT/include/nccl.h" /usr/include/nccl.h - -# ── Patch FindTensorRT.cmake ───────────────────────────────────────────────── -# CMake needs to find TensorRT in system paths -CMAKE_FILE="cpp/cmake/modules/FindTensorRT.cmake" -if [ -f "$CMAKE_FILE" ]; then - echo "Patching FindTensorRT.cmake for system paths..." - python3 <<'PYTHON_EOF' -import pathlib -import re -import sys - -cmake_file = sys.argv[1] if len(sys.argv) > 1 else "cpp/cmake/modules/FindTensorRT.cmake" -p = pathlib.Path(cmake_file) -text = p.read_text() - -# Add system paths to CMAKE_FIND_ROOT_PATH -if '/usr/local/tensorrt' not in text or 'list(APPEND CMAKE_FIND_ROOT_PATH' not in text: - text = text.replace( - 'set(TensorRT_WELL_KNOWN_ROOT /usr/local/tensorrt)', - 'set(TensorRT_WELL_KNOWN_ROOT /usr/local/tensorrt)\nlist(APPEND CMAKE_FIND_ROOT_PATH /usr/local/tensorrt /usr)', - ) - -# Patch find_path for NvInfer.h to include system paths -text = re.sub( - r'(find_path\(\s*TensorRT_INCLUDE_DIR\s+NAMES\s+NvInfer\.h\s+PATHS\s+\$\{TensorRT_WELL_KNOWN_ROOT\}/include)', - r'\1 /usr/include/x86_64-linux-gnu', - text, -) - -# Add system library paths to find_library calls (matches installation guide) -text = re.sub( - r'(find_library\([^)]*PATHS\s+\$\{TensorRT_WELL_KNOWN_ROOT\}/lib)(\s*\))', - r'\1 /usr/lib/x86_64-linux-gnu\2', - text, - flags=re.DOTALL, -) - -# Add NO_CMAKE_FIND_ROOT_PATH to find_path and find_library calls -for pattern in [r'(find_path\([^)]*)\)', r'(find_library\([^)]*)\)']: - for match in re.finditer(pattern, text, re.DOTALL): - block = match.group(0) - if 'TensorRT' in block and 'NO_CMAKE_FIND_ROOT_PATH' not in block: - patched = block[:-1] + '\n NO_CMAKE_FIND_ROOT_PATH)' - text = text.replace(block, patched) - -p.write_text(text) -print('FindTensorRT.cmake patched') -PYTHON_EOF -fi - -# ── Patch FindNCCL.cmake ───────────────────────────────────────────────────── -# The upstream FindNCCL.cmake doesn't use NCCL_ROOT hint at all! -# We need to add PATHS ${NCCL_ROOT}/lib and NO_CMAKE_FIND_ROOT_PATH -NCCL_CMAKE_FILE="cpp/cmake/modules/FindNCCL.cmake" -if [ -f "$NCCL_CMAKE_FILE" ]; then - echo "Patching FindNCCL.cmake to use NCCL_ROOT hint..." - python3 <<'PYTHON_EOF' -import pathlib - -p = pathlib.Path("cpp/cmake/modules/FindNCCL.cmake") -text = p.read_text() - -# Replace simple find_library/find_path calls with ones that use NCCL_ROOT hint -# Original: find_library(NCCL_LIBRARY NAMES nccl) -# Patched: find_library(NCCL_LIBRARY NAMES nccl PATHS ${NCCL_ROOT}/lib NO_CMAKE_FIND_ROOT_PATH) - -# The pip nvidia-nccl-cu13 package has libnccl.so.2 directly in NCCL_ROOT, not in lib/ -text = text.replace( - 'find_library(NCCL_LIBRARY NAMES nccl)', - 'find_library(NCCL_LIBRARY NAMES nccl PATHS ${NCCL_ROOT} ${NCCL_ROOT}/lib NO_CMAKE_FIND_ROOT_PATH)' -) - -text = text.replace( - 'find_library(NCCL_STATIC_LIBRARY NAMES nccl_static)', - 'find_library(NCCL_STATIC_LIBRARY NAMES nccl_static PATHS ${NCCL_ROOT} ${NCCL_ROOT}/lib NO_CMAKE_FIND_ROOT_PATH)' -) - -text = text.replace( - 'find_path(NCCL_INCLUDE_DIR NAMES nccl.h)', - 'find_path(NCCL_INCLUDE_DIR NAMES nccl.h PATHS ${NCCL_ROOT}/include NO_CMAKE_FIND_ROOT_PATH)' -) - -p.write_text(text) -print('FindNCCL.cmake patched to use NCCL_ROOT hint') -PYTHON_EOF -fi - -# ── Build TensorRT-LLM ─────────────────────────────────────────────────────── -echo "=== Building TensorRT-LLM from source (this may take a while)... ===" - -python3 scripts/build_wheel.py \ - --cuda_architectures "90-real" \ - --trt_root /usr/local/tensorrt \ - --nccl_root "$NCCL_ROOT" \ - --install \ - --no-venv \ - -j "$(nproc)" \ - -D "ENABLE_UCX=OFF" \ - --clean - -# Return to repo dir -cd - - -# Cache the built wheel for future runs -mkdir -p "$TRTLLM_WHEEL_CACHE" -BUILT_WHEEL=$(find "$TRTLLM_DIR/build" -name "tensorrt_llm*.whl" 2>/dev/null | head -1) -if [ -n "$BUILT_WHEEL" ]; then - cp "$BUILT_WHEEL" "$TRTLLM_WHEEL_CACHE/" - echo "Cached wheel to: $TRTLLM_WHEEL_CACHE/$(basename "$BUILT_WHEEL")" -fi - -# ── Add pip-installed NVIDIA libraries to LD_LIBRARY_PATH ──────────────────── NVIDIA_LIB_DIRS=$(find "$SITE_PACKAGES/nvidia" -name "lib" -type d 2>/dev/null | sort -u | paste -sd':') if [ -n "$NVIDIA_LIB_DIRS" ]; then export LD_LIBRARY_PATH="${NVIDIA_LIB_DIRS}:${LD_LIBRARY_PATH:-}" @@ -344,9 +84,7 @@ fi echo "=== TensorRT-LLM verification ===" python3 -c "import tensorrt_llm; print(f'TensorRT-LLM version: {tensorrt_llm.__version__}')" python3 -c "from tensorrt_llm.commands.serve import main; print('gRPC serve command: available')" - -# Smoke-test: verify the serve command can parse --help without crashing echo "Verifying gRPC serve command..." python3 -m tensorrt_llm.commands.serve serve --help 2>&1 | head -20 || echo "WARNING: serve --help failed" -echo "TensorRT-LLM installation complete (built from source)" +echo "TensorRT-LLM installation complete (from PyPI)" From 35a1b8e052696b5b7b2c9907316bd505e73c59f4 Mon Sep 17 00:00:00 2001 From: key4ng Date: Sat, 16 May 2026 23:46:29 -0700 Subject: [PATCH 2/2] fix(ci): point pip at pypi.nvidia.com for tensorrt-llm prebuilt wheel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PyPI only ships the tensorrt-llm source tarball, so a plain pip install tensorrt-llm==1.3.0rc14 triggers a full source build — defeating the purpose of switching off the build_wheel.py path. The pre-built linux_x86_64 wheel (2.75 GB) lives at https://pypi.nvidia.com/tensorrt-llm/, which pip needs as an extra index to resolve. Signed-off-by: key4ng --- scripts/ci_install_trtllm.sh | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/scripts/ci_install_trtllm.sh b/scripts/ci_install_trtllm.sh index fac5c34c9..46b32a9ad 100755 --- a/scripts/ci_install_trtllm.sh +++ b/scripts/ci_install_trtllm.sh @@ -53,13 +53,17 @@ export LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_ pip install --upgrade pip pip install --no-cache-dir "$NCCL_VERSION_CONSTRAINT" -# ── Install TensorRT-LLM pre-release wheel from PyPI ───────────────────────── -# --pre allows pre-release versions; --extra-index-url for cu130 torch so pip -# resolves torch 2.10+cu130 (cuda-bindings==13.x) instead of the default PyPI -# torch (cuda-bindings==12.9.4), which conflicts with tensorrt-llm's -# cuda-python>=13 requirement. -echo "Installing tensorrt-llm==${TRTLLM_VERSION} from PyPI..." +# ── Install TensorRT-LLM pre-release wheel from NVIDIA's index ─────────────── +# PyPI only hosts the source tarball for tensorrt-llm — installing from there +# would trigger a full source build. The pre-built linux_x86_64 wheels live on +# https://pypi.nvidia.com, which we add as an extra index. +# +# The cu130 torch index is also needed so pip resolves torch 2.10+cu130 +# (cuda-bindings==13.x) instead of the default PyPI torch (cuda-bindings==12.9.4), +# which conflicts with tensorrt-llm's cuda-python>=13 requirement. +echo "Installing tensorrt-llm==${TRTLLM_VERSION} from pypi.nvidia.com..." pip install --no-cache-dir --pre \ + --extra-index-url https://pypi.nvidia.com \ --extra-index-url https://download.pytorch.org/whl/cu130 \ "tensorrt-llm==${TRTLLM_VERSION}"