-
Notifications
You must be signed in to change notification settings - Fork 84
ci(trtllm): install pre-release wheel from PyPI instead of building from source #1501
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -1,11 +1,11 @@ | ||||||
| #!/bin/bash | ||||||
| # Install TensorRT-LLM from source with gRPC support for CI | ||||||
| # Install TensorRT-LLM pre-release wheel from PyPI for CI. | ||||||
| # | ||||||
| # gRPC server support (PR #11037) is not yet in a pip release, | ||||||
| # so we build from source (main branch) which compiles the C++ | ||||||
| # extensions properly and includes the gRPC serve command. | ||||||
| # | ||||||
| # Cache version: 5 — rebuild from latest TRT-LLM main for Harmony tool calling fixes | ||||||
| # As of 1.3.0rc14 (released 2026-05-07), the gRPC serve command from PR #11037 | ||||||
| # and the Harmony parser fixes (#12045, #12467) referenced by SMG #801 are all | ||||||
| # included in the published pre-release wheel. We install it directly from PyPI | ||||||
| # instead of building TensorRT-LLM from source, which saves ~30 min of CMake | ||||||
| # compile time per CI run. See git history for the previous source-build logic. | ||||||
| # | ||||||
| # Prerequisites (expected on k8s-runner-gpu nodes): | ||||||
| # - NVIDIA driver 580+ (CUDA 13) | ||||||
|
|
@@ -16,97 +16,19 @@ | |||||
|
|
||||||
| set -euo pipefail | ||||||
|
|
||||||
| TRTLLM_VERSION="1.3.0rc14" | ||||||
| NCCL_VERSION_CONSTRAINT="nvidia-nccl-cu13>=2.28.9,<=2.29.2" | ||||||
|
|
||||||
| # Activate venv if it exists | ||||||
| if [ -f ".venv/bin/activate" ]; then | ||||||
| source .venv/bin/activate | ||||||
| fi | ||||||
|
|
||||||
| # ── Check for cached wheel FIRST ───────────────────────────────────────────── | ||||||
| # This allows us to skip heavy build dependencies when wheel is already cached | ||||||
| TRTLLM_WHEEL_CACHE="/tmp/trtllm-wheel" | ||||||
| mkdir -p "$TRTLLM_WHEEL_CACHE" | ||||||
| CACHED_WHEEL=$(find "$TRTLLM_WHEEL_CACHE" -name "tensorrt_llm*.whl" 2>/dev/null | head -1 || true) | ||||||
|
|
||||||
| if [ -n "$CACHED_WHEEL" ] && [ -f "$CACHED_WHEEL" ]; then | ||||||
| echo "=== Found cached TRT-LLM wheel: $CACHED_WHEEL ===" | ||||||
| echo "=== Installing runtime dependencies only (skipping build deps) ===" | ||||||
|
|
||||||
| # ── Runtime dependencies only ──────────────────────────────────────────── | ||||||
| export DEBIAN_FRONTEND=noninteractive | ||||||
| sudo dpkg --configure -a --force-confnew 2>/dev/null || true | ||||||
|
|
||||||
| # Add NVIDIA apt repository if needed | ||||||
| if ! dpkg -l cuda-keyring 2>/dev/null | grep -q '^ii'; then | ||||||
| echo "Setting up NVIDIA apt repository..." | ||||||
| curl -fsSL -o /tmp/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb | ||||||
| sudo dpkg -i /tmp/cuda-keyring.deb | ||||||
| rm -f /tmp/cuda-keyring.deb | ||||||
| fi | ||||||
|
|
||||||
| sudo apt-get update | ||||||
| # Runtime deps: wheel links against CUDA 13 + TensorRT libs | ||||||
| sudo apt-get install -y libopenmpi-dev libnvinfer10 cuda-toolkit-13-0 | ||||||
|
|
||||||
| # ── CUDA runtime setup ─────────────────────────────────────────────────── | ||||||
| if [ -d "/usr/local/cuda-13.0" ]; then | ||||||
| export CUDA_HOME="/usr/local/cuda-13.0" | ||||||
| else | ||||||
| export CUDA_HOME="${CUDA_HOME:-/usr/local/cuda}" | ||||||
| fi | ||||||
| export PATH="$CUDA_HOME/bin:$PATH" | ||||||
| export LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH:-}" | ||||||
|
|
||||||
| # ── Install pip and NCCL runtime ───────────────────────────────────────── | ||||||
| pip install --upgrade pip | ||||||
| pip install --no-cache-dir "$NCCL_VERSION_CONSTRAINT" | ||||||
|
|
||||||
| # ── Install cached wheel ───────────────────────────────────────────────── | ||||||
| # Use --extra-index-url for cu130 torch so pip resolves torch 2.10+cu130 | ||||||
| # (cuda-bindings==13.x) instead of the default PyPI torch (cuda-bindings==12.9.4), | ||||||
| # which conflicts with tensorrt-llm's cuda-python>=13 requirement. | ||||||
| echo "Installing cached wheel..." | ||||||
| pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cu130 "$CACHED_WHEEL" | ||||||
|
|
||||||
| # ── Setup LD_LIBRARY_PATH ──────────────────────────────────────────────── | ||||||
| SITE_PACKAGES=$(python3 -c "import site; print(site.getsitepackages()[0])") | ||||||
| NVIDIA_LIB_DIRS=$(find "$SITE_PACKAGES/nvidia" -name "lib" -type d 2>/dev/null | sort -u | paste -sd':') | ||||||
| if [ -n "$NVIDIA_LIB_DIRS" ]; then | ||||||
| export LD_LIBRARY_PATH="${NVIDIA_LIB_DIRS}:${LD_LIBRARY_PATH:-}" | ||||||
| fi | ||||||
|
|
||||||
| TRTLLM_LIB_DIR=$(find "$SITE_PACKAGES" -path "*/tensorrt_llm/libs" -type d 2>/dev/null | head -1) | ||||||
| if [ -n "$TRTLLM_LIB_DIR" ]; then | ||||||
| export LD_LIBRARY_PATH="${TRTLLM_LIB_DIR}:${LD_LIBRARY_PATH:-}" | ||||||
| fi | ||||||
|
|
||||||
| # Persist LD_LIBRARY_PATH for subsequent CI steps | ||||||
| if [ -n "${GITHUB_ENV:-}" ]; then | ||||||
| echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> "$GITHUB_ENV" | ||||||
| fi | ||||||
|
|
||||||
| # ── Verification ───────────────────────────────────────────────────────── | ||||||
| echo "=== TensorRT-LLM verification ===" | ||||||
| python3 -c "import tensorrt_llm; print(f'TensorRT-LLM version: {tensorrt_llm.__version__}')" | ||||||
| python3 -c "from tensorrt_llm.commands.serve import main; print('gRPC serve command: available')" | ||||||
| echo "Verifying gRPC serve command..." | ||||||
| python3 -m tensorrt_llm.commands.serve serve --help 2>&1 | head -20 || echo "WARNING: serve --help failed" | ||||||
|
|
||||||
| echo "TensorRT-LLM installation complete (from cache)" | ||||||
| exit 0 | ||||||
| fi | ||||||
|
|
||||||
| # ══════════════════════════════════════════════════════════════════════════════ | ||||||
| # No cached wheel - full build required | ||||||
| # ══════════════════════════════════════════════════════════════════════════════ | ||||||
| echo "=== No cached wheel found, building from source ===" | ||||||
|
|
||||||
| # ── System dependencies (full build) ───────────────────────────────────────── | ||||||
| # ── Runtime system dependencies ────────────────────────────────────────────── | ||||||
| export DEBIAN_FRONTEND=noninteractive | ||||||
| sudo dpkg --configure -a --force-confnew 2>/dev/null || true | ||||||
|
|
||||||
| # Add NVIDIA CUDA/TensorRT apt repository (needed for libnvinfer-dev, tensorrt-dev) | ||||||
| # Add NVIDIA apt repository if needed | ||||||
| if ! dpkg -l cuda-keyring 2>/dev/null | grep -q '^ii'; then | ||||||
| echo "Setting up NVIDIA apt repository..." | ||||||
| curl -fsSL -o /tmp/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb | ||||||
|
|
@@ -115,216 +37,38 @@ if ! dpkg -l cuda-keyring 2>/dev/null | grep -q '^ii'; then | |||||
| fi | ||||||
|
|
||||||
| sudo apt-get update | ||||||
| # Full build deps: runtime + dev headers + build tools | ||||||
| sudo apt-get install -y libopenmpi-dev git-lfs libnvinfer10 libnvinfer-dev tensorrt-dev cuda-toolkit-13-0 cmake | ||||||
|
|
||||||
| # ── Fabric Manager for multi-GPU NCCL communication ─────────────────────────── | ||||||
| # Required for H100 with NVSwitch - the k8s GPU runners should have it pre-installed | ||||||
| # Just try to start it if it's not running (don't try to install - causes dpkg errors) | ||||||
| echo "Checking Fabric Manager status for multi-GPU support..." | ||||||
| if command -v nv-fabricmanager &>/dev/null || [ -f /usr/bin/nv-fabricmanager ]; then | ||||||
| sudo systemctl start nvidia-fabricmanager 2>/dev/null || true | ||||||
| sudo systemctl status nvidia-fabricmanager --no-pager 2>/dev/null || echo "INFO: Fabric Manager not running (may not be needed for this GPU type)" | ||||||
| else | ||||||
| echo "INFO: Fabric Manager not installed (may not be needed for this GPU type)" | ||||||
| fi | ||||||
| # Runtime deps: wheel links against CUDA 13 + TensorRT libs | ||||||
| sudo apt-get install -y libopenmpi-dev libnvinfer10 cuda-toolkit-13-0 | ||||||
|
|
||||||
| # ── CUDA setup ─────────────────────────────────────────────────────────────── | ||||||
| # Prefer /usr/local/cuda-13.0 if it exists, otherwise fall back to /usr/local/cuda | ||||||
| # ── CUDA runtime setup ─────────────────────────────────────────────────────── | ||||||
| if [ -d "/usr/local/cuda-13.0" ]; then | ||||||
| export CUDA_HOME="/usr/local/cuda-13.0" | ||||||
| else | ||||||
| export CUDA_HOME="${CUDA_HOME:-/usr/local/cuda}" | ||||||
| fi | ||||||
| # Re-activate venv first, then add CUDA to PATH so it takes precedence | ||||||
| if [ -f ".venv/bin/activate" ]; then | ||||||
| source .venv/bin/activate | ||||||
| fi | ||||||
| export PATH="$CUDA_HOME/bin:$PATH" | ||||||
| export LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH:-}" | ||||||
|
|
||||||
| # Debug: print what CUDA we actually have | ||||||
| echo "=== CUDA diagnostics ===" | ||||||
| echo "CUDA_HOME=$CUDA_HOME" | ||||||
| echo "PATH=$PATH" | ||||||
| ls -la "$CUDA_HOME/bin/nvcc" 2>/dev/null || echo "WARNING: nvcc not at $CUDA_HOME/bin/nvcc" | ||||||
| find /usr/local -name "nvcc" -type f 2>/dev/null || echo "WARNING: nvcc not found anywhere in /usr/local" | ||||||
| which nvcc 2>/dev/null || echo "WARNING: nvcc not on PATH" | ||||||
| nvcc --version 2>/dev/null || echo "WARNING: nvcc --version failed" | ||||||
| nvidia-smi 2>/dev/null | head -4 || echo "WARNING: nvidia-smi not found" | ||||||
| echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-<unset>}" | ||||||
| python3 --version | ||||||
| echo "=== end CUDA diagnostics ===" | ||||||
|
|
||||||
| # ── TensorRT symlinks (for CMake to find TensorRT) ────────────────────────── | ||||||
| sudo mkdir -p /usr/local/tensorrt | ||||||
| sudo ln -sf /usr/include/x86_64-linux-gnu /usr/local/tensorrt/include | ||||||
| sudo ln -sf /usr/lib/x86_64-linux-gnu /usr/local/tensorrt/lib | ||||||
|
|
||||||
| # ── Install pip and NCCL runtime ───────────────────────────────────────────── | ||||||
| pip install --upgrade pip | ||||||
| pip install --no-cache-dir "$NCCL_VERSION_CONSTRAINT" | ||||||
|
|
||||||
| # ── Clone TensorRT-LLM ────────────────────────────────────────────────────── | ||||||
| TRTLLM_DIR="/tmp/tensorrt-llm-src" | ||||||
| if [ ! -d "$TRTLLM_DIR" ]; then | ||||||
| echo "Cloning TensorRT-LLM main branch..." | ||||||
| git clone --depth 1 https://github.com/NVIDIA/TensorRT-LLM.git "$TRTLLM_DIR" | ||||||
| fi | ||||||
|
|
||||||
| cd "$TRTLLM_DIR" | ||||||
| git lfs install --force | ||||||
| git lfs pull | ||||||
|
|
||||||
| # ── Install TensorRT-LLM Python requirements ───────────────────────────────── | ||||||
| # Install nvidia-cutlass first - provides cutlass_library module needed during CMake configure | ||||||
| # This is cleaner than relying on CMake's FetchContent which installs to user site-packages | ||||||
| pip install --no-cache-dir nvidia-cutlass | ||||||
|
|
||||||
| if [ -f "requirements-dev.txt" ]; then | ||||||
| echo "Installing TensorRT-LLM build requirements..." | ||||||
| pip install --no-cache-dir -r requirements-dev.txt | ||||||
| fi | ||||||
|
|
||||||
| # ── NCCL setup ────────────────────────────────────────────────────────────── | ||||||
| # build_wheel.py runs pip install internally which can change the NCCL version. | ||||||
| # Copy headers+libs to a fixed directory that pip can't overwrite, and point | ||||||
| # NCCL_ROOT there for CMake. | ||||||
| pip install --no-cache-dir --force-reinstall "$NCCL_VERSION_CONSTRAINT" | ||||||
|
|
||||||
| # ── Install TensorRT-LLM pre-release wheel from NVIDIA's index ─────────────── | ||||||
| # PyPI only hosts the source tarball for tensorrt-llm — installing from there | ||||||
| # would trigger a full source build. The pre-built linux_x86_64 wheels live on | ||||||
| # https://pypi.nvidia.com, which we add as an extra index. | ||||||
| # | ||||||
| # The cu130 torch index is also needed so pip resolves torch 2.10+cu130 | ||||||
| # (cuda-bindings==13.x) instead of the default PyPI torch (cuda-bindings==12.9.4), | ||||||
| # which conflicts with tensorrt-llm's cuda-python>=13 requirement. | ||||||
| echo "Installing tensorrt-llm==${TRTLLM_VERSION} from pypi.nvidia.com..." | ||||||
| pip install --no-cache-dir --pre \ | ||||||
| --extra-index-url https://pypi.nvidia.com \ | ||||||
| --extra-index-url https://download.pytorch.org/whl/cu130 \ | ||||||
| "tensorrt-llm==${TRTLLM_VERSION}" | ||||||
|
Comment on lines
+65
to
+68
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
In the clean CI runners this command only searches PyPI plus the PyTorch CUDA index, but the PyPI metadata for Useful? React with 👍 / 👎. |
||||||
|
|
||||||
| # ── Setup LD_LIBRARY_PATH ──────────────────────────────────────────────────── | ||||||
| SITE_PACKAGES=$(python3 -c "import site; print(site.getsitepackages()[0])") | ||||||
| NCCL_PIP_ROOT="$SITE_PACKAGES/nvidia/nccl" | ||||||
|
|
||||||
| # Copy to a stable location that pip won't touch | ||||||
| NCCL_ROOT="/tmp/nccl-stable" | ||||||
| rm -rf "$NCCL_ROOT" | ||||||
| mkdir -p "$NCCL_ROOT/include" "$NCCL_ROOT/lib" | ||||||
| cp -a "$NCCL_PIP_ROOT/include/"* "$NCCL_ROOT/include/" | ||||||
| cp -a "$NCCL_PIP_ROOT/lib/"* "$NCCL_ROOT/lib/" | ||||||
| # Create libnccl.so symlink — pip only ships libnccl.so.2 | ||||||
| if [ -f "$NCCL_ROOT/lib/libnccl.so.2" ] && [ ! -e "$NCCL_ROOT/lib/libnccl.so" ]; then | ||||||
| ln -s libnccl.so.2 "$NCCL_ROOT/lib/libnccl.so" | ||||||
| fi | ||||||
|
|
||||||
| echo "=== NCCL diagnostics ===" | ||||||
| echo "NCCL_ROOT=$NCCL_ROOT (stable copy, immune to pip downgrades)" | ||||||
| ls -la "$NCCL_ROOT/include/" 2>/dev/null | head -5 | ||||||
| ls -la "$NCCL_ROOT/lib/" 2>/dev/null | head -5 | ||||||
| grep "NCCL_MAJOR\|NCCL_MINOR" "$NCCL_ROOT/include/nccl.h" 2>/dev/null | head -3 | ||||||
| echo "=== end NCCL diagnostics ===" | ||||||
|
|
||||||
| # Symlink stable NCCL header to system path for other tools that look there | ||||||
| sudo ln -sf "$NCCL_ROOT/include/nccl.h" /usr/include/nccl.h | ||||||
|
|
||||||
| # ── Patch FindTensorRT.cmake ───────────────────────────────────────────────── | ||||||
| # CMake needs to find TensorRT in system paths | ||||||
| CMAKE_FILE="cpp/cmake/modules/FindTensorRT.cmake" | ||||||
| if [ -f "$CMAKE_FILE" ]; then | ||||||
| echo "Patching FindTensorRT.cmake for system paths..." | ||||||
| python3 <<'PYTHON_EOF' | ||||||
| import pathlib | ||||||
| import re | ||||||
| import sys | ||||||
|
|
||||||
| cmake_file = sys.argv[1] if len(sys.argv) > 1 else "cpp/cmake/modules/FindTensorRT.cmake" | ||||||
| p = pathlib.Path(cmake_file) | ||||||
| text = p.read_text() | ||||||
|
|
||||||
| # Add system paths to CMAKE_FIND_ROOT_PATH | ||||||
| if '/usr/local/tensorrt' not in text or 'list(APPEND CMAKE_FIND_ROOT_PATH' not in text: | ||||||
| text = text.replace( | ||||||
| 'set(TensorRT_WELL_KNOWN_ROOT /usr/local/tensorrt)', | ||||||
| 'set(TensorRT_WELL_KNOWN_ROOT /usr/local/tensorrt)\nlist(APPEND CMAKE_FIND_ROOT_PATH /usr/local/tensorrt /usr)', | ||||||
| ) | ||||||
|
|
||||||
| # Patch find_path for NvInfer.h to include system paths | ||||||
| text = re.sub( | ||||||
| r'(find_path\(\s*TensorRT_INCLUDE_DIR\s+NAMES\s+NvInfer\.h\s+PATHS\s+\$\{TensorRT_WELL_KNOWN_ROOT\}/include)', | ||||||
| r'\1 /usr/include/x86_64-linux-gnu', | ||||||
| text, | ||||||
| ) | ||||||
|
|
||||||
| # Add system library paths to find_library calls (matches installation guide) | ||||||
| text = re.sub( | ||||||
| r'(find_library\([^)]*PATHS\s+\$\{TensorRT_WELL_KNOWN_ROOT\}/lib)(\s*\))', | ||||||
| r'\1 /usr/lib/x86_64-linux-gnu\2', | ||||||
| text, | ||||||
| flags=re.DOTALL, | ||||||
| ) | ||||||
|
|
||||||
| # Add NO_CMAKE_FIND_ROOT_PATH to find_path and find_library calls | ||||||
| for pattern in [r'(find_path\([^)]*)\)', r'(find_library\([^)]*)\)']: | ||||||
| for match in re.finditer(pattern, text, re.DOTALL): | ||||||
| block = match.group(0) | ||||||
| if 'TensorRT' in block and 'NO_CMAKE_FIND_ROOT_PATH' not in block: | ||||||
| patched = block[:-1] + '\n NO_CMAKE_FIND_ROOT_PATH)' | ||||||
| text = text.replace(block, patched) | ||||||
|
|
||||||
| p.write_text(text) | ||||||
| print('FindTensorRT.cmake patched') | ||||||
| PYTHON_EOF | ||||||
| fi | ||||||
|
|
||||||
| # ── Patch FindNCCL.cmake ───────────────────────────────────────────────────── | ||||||
| # The upstream FindNCCL.cmake doesn't use NCCL_ROOT hint at all! | ||||||
| # We need to add PATHS ${NCCL_ROOT}/lib and NO_CMAKE_FIND_ROOT_PATH | ||||||
| NCCL_CMAKE_FILE="cpp/cmake/modules/FindNCCL.cmake" | ||||||
| if [ -f "$NCCL_CMAKE_FILE" ]; then | ||||||
| echo "Patching FindNCCL.cmake to use NCCL_ROOT hint..." | ||||||
| python3 <<'PYTHON_EOF' | ||||||
| import pathlib | ||||||
|
|
||||||
| p = pathlib.Path("cpp/cmake/modules/FindNCCL.cmake") | ||||||
| text = p.read_text() | ||||||
|
|
||||||
| # Replace simple find_library/find_path calls with ones that use NCCL_ROOT hint | ||||||
| # Original: find_library(NCCL_LIBRARY NAMES nccl) | ||||||
| # Patched: find_library(NCCL_LIBRARY NAMES nccl PATHS ${NCCL_ROOT}/lib NO_CMAKE_FIND_ROOT_PATH) | ||||||
|
|
||||||
| # The pip nvidia-nccl-cu13 package has libnccl.so.2 directly in NCCL_ROOT, not in lib/ | ||||||
| text = text.replace( | ||||||
| 'find_library(NCCL_LIBRARY NAMES nccl)', | ||||||
| 'find_library(NCCL_LIBRARY NAMES nccl PATHS ${NCCL_ROOT} ${NCCL_ROOT}/lib NO_CMAKE_FIND_ROOT_PATH)' | ||||||
| ) | ||||||
|
|
||||||
| text = text.replace( | ||||||
| 'find_library(NCCL_STATIC_LIBRARY NAMES nccl_static)', | ||||||
| 'find_library(NCCL_STATIC_LIBRARY NAMES nccl_static PATHS ${NCCL_ROOT} ${NCCL_ROOT}/lib NO_CMAKE_FIND_ROOT_PATH)' | ||||||
| ) | ||||||
|
|
||||||
| text = text.replace( | ||||||
| 'find_path(NCCL_INCLUDE_DIR NAMES nccl.h)', | ||||||
| 'find_path(NCCL_INCLUDE_DIR NAMES nccl.h PATHS ${NCCL_ROOT}/include NO_CMAKE_FIND_ROOT_PATH)' | ||||||
| ) | ||||||
|
|
||||||
| p.write_text(text) | ||||||
| print('FindNCCL.cmake patched to use NCCL_ROOT hint') | ||||||
| PYTHON_EOF | ||||||
| fi | ||||||
|
|
||||||
| # ── Build TensorRT-LLM ─────────────────────────────────────────────────────── | ||||||
| echo "=== Building TensorRT-LLM from source (this may take a while)... ===" | ||||||
|
|
||||||
| python3 scripts/build_wheel.py \ | ||||||
| --cuda_architectures "90-real" \ | ||||||
| --trt_root /usr/local/tensorrt \ | ||||||
| --nccl_root "$NCCL_ROOT" \ | ||||||
| --install \ | ||||||
| --no-venv \ | ||||||
| -j "$(nproc)" \ | ||||||
| -D "ENABLE_UCX=OFF" \ | ||||||
| --clean | ||||||
|
|
||||||
| # Return to repo dir | ||||||
| cd - | ||||||
|
|
||||||
| # Cache the built wheel for future runs | ||||||
| mkdir -p "$TRTLLM_WHEEL_CACHE" | ||||||
| BUILT_WHEEL=$(find "$TRTLLM_DIR/build" -name "tensorrt_llm*.whl" 2>/dev/null | head -1) | ||||||
| if [ -n "$BUILT_WHEEL" ]; then | ||||||
| cp "$BUILT_WHEEL" "$TRTLLM_WHEEL_CACHE/" | ||||||
| echo "Cached wheel to: $TRTLLM_WHEEL_CACHE/$(basename "$BUILT_WHEEL")" | ||||||
| fi | ||||||
|
|
||||||
| # ── Add pip-installed NVIDIA libraries to LD_LIBRARY_PATH ──────────────────── | ||||||
| NVIDIA_LIB_DIRS=$(find "$SITE_PACKAGES/nvidia" -name "lib" -type d 2>/dev/null | sort -u | paste -sd':') | ||||||
| if [ -n "$NVIDIA_LIB_DIRS" ]; then | ||||||
| export LD_LIBRARY_PATH="${NVIDIA_LIB_DIRS}:${LD_LIBRARY_PATH:-}" | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to the previous comment, this construction can introduce a leading or trailing colon if LD_LIBRARY_PATH was empty. Using the conditional expansion syntax ensures a clean path list without accidentally including the current directory in the search path.
Suggested change
|
||||||
|
|
@@ -344,9 +88,7 @@ fi | |||||
| echo "=== TensorRT-LLM verification ===" | ||||||
| python3 -c "import tensorrt_llm; print(f'TensorRT-LLM version: {tensorrt_llm.__version__}')" | ||||||
| python3 -c "from tensorrt_llm.commands.serve import main; print('gRPC serve command: available')" | ||||||
|
|
||||||
| # Smoke-test: verify the serve command can parse --help without crashing | ||||||
| echo "Verifying gRPC serve command..." | ||||||
| python3 -m tensorrt_llm.commands.serve serve --help 2>&1 | head -20 || echo "WARNING: serve --help failed" | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Verify the script currently masks verification failures with `|| echo`.
# Expected result: one match on the serve --help line containing `|| echo`.
rg -n --type=sh -C2 'serve --help.*\|\|.*WARNING' scripts/ci_install_trtllm.shRepository: lightseekorg/smg Length of output: 374 🏁 Script executed: #!/bin/bash
# Check if script has set -euo pipefail
head -20 scripts/ci_install_trtllm.sh | grep -n "set -"Repository: lightseekorg/smg Length of output: 81 Do not swallow smoke-test failures in CI verification. Line 88 converts a failed Proposed fix- python3 -m tensorrt_llm.commands.serve serve --help 2>&1 | head -20 || echo "WARNING: serve --help failed"
+ python3 -m tensorrt_llm.commands.serve serve --help 2>&1 | head -20📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||
|
|
||||||
| echo "TensorRT-LLM installation complete (built from source)" | ||||||
| echo "TensorRT-LLM installation complete (from PyPI)" | ||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The current construction of LD_LIBRARY_PATH can result in a trailing colon if the variable was previously empty or unset. In shell, a trailing or leading colon in LD_LIBRARY_PATH is interpreted as including the current working directory (.), which is generally considered a security risk and can lead to unexpected library resolution. It is safer to use the ${VAR:+:${VAR}} syntax to only append the colon if the variable is already non-empty.