diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ab0c0c2..f678ad3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,6 +9,7 @@ on: # yamllint disable-line rule:truthy jobs: build: + name: Build (${{ matrix.os }} ${{ matrix.build_type }}, CUDA=${{ matrix.cuda_version }}) runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -16,6 +17,7 @@ jobs: # Testing across Linux and macOS os: [ubuntu-latest, macos-latest] build_type: [Debug, Release] + cuda_version: [cpu] # Cuda builds are tested on our containers steps: - uses: actions/checkout@v4 @@ -31,12 +33,12 @@ jobs: run: brew install libomp - name: Build TGN - run: make build BUILD_TYPE=${{ matrix.build_type }} + run: make build BUILD_TYPE=${{ matrix.build_type }} CUDA_VERSION=${{ matrix.cuda_version }} - name: Upload Build Artifacts uses: actions/upload-artifact@v4 with: - name: build-${{ matrix.os }}-${{ matrix.build_type }} + name: build-${{ matrix.os }}-${{ matrix.cuda_version }}-${{ matrix.build_type }} path: build/ test: @@ -51,10 +53,10 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Download Debug Build + - name: Download CPU Debug Build uses: actions/download-artifact@v4 with: - name: build-${{ matrix.os }}-Debug + name: build-${{ matrix.os }}-cpu-Debug path: build/ - name: Install OpenMP (MacOS) @@ -70,10 +72,10 @@ jobs: steps: - uses: actions/checkout@v5 - - name: Download Debug Build + - name: Download CPU Debug Build uses: actions/download-artifact@v4 with: - name: build-ubuntu-latest-Debug + name: build-ubuntu-latest-cpu-Debug path: build/ - uses: cpp-linter/cpp-linter-action@v2 @@ -86,18 +88,22 @@ jobs: tidy-checks: "" # Use .clang-tidy config file test-on-container: + name: Container Build (Cuda=${{ matrix.cuda_version }}) runs-on: ubuntu-latest + strategy: + matrix: + cuda_version: [cpu, 12.6, 12.8, 13.0] steps: - uses: actions/checkout@v4 - name: Build TGN Container - run: docker build -t tgn-dev . + run: docker build --build-arg CUDA_VERSION=${{ matrix.cuda_version }} -t tgn-dev:${{ matrix.cuda_version }} . - name: Run Tests inside Container run: | - docker run --rm \ - -v "$(pwd):/workspace:Z" \ - tgn-dev /bin/bash -c "make test" + # If CPU, run tests. If CUDA, just run build to verify compilation. + CMD=$([[ "${{ matrix.cuda_version }}" == "cpu" ]] && echo "make test" || echo "make build") + docker run --rm -v "$(pwd):/workspace:Z" tgn-dev:${{ matrix.cuda_version }} /bin/bash -c "$CMD" python-tests: needs: build @@ -111,10 +117,10 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Download Build Artifacts + - name: Download CPU Release Build uses: actions/download-artifact@v4 with: - name: build-${{ matrix.os }}-Release + name: build-${{ matrix.os }}-cpu-Release path: build/ - name: Setup uv diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 1d9138d..3dd75b1 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -10,12 +10,16 @@ on: # yamllint disable-line rule:truthy jobs: build-all: + name: Build (${{ matrix.os }}, CUDA=${{ matrix.cuda_version }}) runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: # Testing across Linux and macOS os: [ubuntu-latest, macos-latest] + build_type: [Release] + cuda_version: [cpu] # Cuda builds are tested on our containers + steps: - uses: actions/checkout@v4 @@ -30,12 +34,12 @@ jobs: run: brew install libomp - name: Build TGN Examples - run: make examples BUILD_TYPE=Release + run: make examples BUILD_TYPE=Release CUDA_VERSION=${{ matrix.cuda_version }} - name: Upload Build Artifact uses: actions/upload-artifact@v4 with: - name: build-${{ matrix.os }}-Release + name: build-${{ matrix.os }}-${{ matrix.cuda_version }}-Release path: build/ link-pred: @@ -53,7 +57,7 @@ jobs: - name: Download Build Artifact uses: actions/download-artifact@v4 with: - name: build-${{ matrix.os }}-Release + name: build-${{ matrix.os }}-cpu-Release path: build/ - name: Restore Binary Permissions @@ -87,7 +91,7 @@ jobs: - name: Download Build Artifact uses: actions/download-artifact@v4 with: - name: build-${{ matrix.os }}-Release + name: build-${{ matrix.os }}-cpu-Release path: build/ - name: Restore Binary Permissions @@ -107,15 +111,19 @@ jobs: run: make run-node-${{ matrix.dataset }} node-pred-on-container: + name: Container Integration (CUDA=${{ matrix.cuda_version }}) runs-on: ubuntu-latest + strategy: + matrix: + cuda_version: [cpu, 12.6, 12.8, 13.0] steps: - uses: actions/checkout@v4 - name: Build TGN Container - run: docker build -t tgn-dev . + run: docker build --build-arg CUDA_VERSION=${{ matrix.cuda_version }} -t tgn-dev:${{ matrix.cuda_version }} . - name: Run Node Prediction run: | - docker run --rm \ - -v "$(pwd):/workspace:Z" \ - tgn-dev /bin/bash -c "make run-node-tgbn-trade" + # If CPU: run full node prediction. If CUDA: just verify 'make examples' compiles. + CMD=$([[ "${{ matrix.cuda_version }}" == "cpu" ]] && echo "make run-node-tgbn-trade" || echo "make examples") + docker run --rm -v "$(pwd):/workspace:Z" tgn-dev:${{ matrix.cuda_version }} /bin/bash -c "$CMD" diff --git a/CMakeLists.txt b/CMakeLists.txt index c434c6a..2615769 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,10 +26,22 @@ add_compile_options( include(FetchContent) +set(CUDA_VERSION "cpu" CACHE STRING "CUDA version (cpu, 12.6, 12.8, 13.0)") + if(APPLE) # Target Apple Silicon (M1/M2/M3) set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-2.10.0.zip") + message(STATUS "TGUF: Target System is APPLE (ARM64). Using CPU LibTorch.") else() # Target Linux x86_64 - set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-2.10.0%2Bcpu.zip") + if(CUDA_VERSION STREQUAL "cpu") + set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-2.10.0%2Bcpu.zip") + message(STATUS "TGUF: Target System is LINUX (CPU).") + else() + # Clean "12.6" -> "126" for the URL mapping + string(REPLACE "." "" CUDA_TAG ${CUDA_VERSION}) + set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu${CUDA_TAG}/libtorch-shared-with-deps-2.10.0%2Bcu${CUDA_TAG}.zip") + message(STATUS "TGUF: Target System is LINUX (CUDA ${CUDA_VERSION}).") + enable_language(CUDA) + endif() endif() FetchContent_Declare( diff --git a/Dockerfile b/Dockerfile index 8711ff5..521ead5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,6 @@ FROM ubuntu:24.04 +ARG CUDA_VERSION=cpu # Default is "cpu". Pass "12.6", "12.8", or "13.0" to trigger CUDA install. ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y \ @@ -11,12 +12,23 @@ RUN apt-get update && apt-get install -y \ make \ git \ curl \ + wget \ ca-certificates \ + && \ + if [ "$CUDA_VERSION" != "cpu" ]; then \ + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb && \ + dpkg -i cuda-keyring_1.1-1_all.deb && \ + apt-get update && \ + PACKAGE_SUFFIX=$(echo $CUDA_VERSION | sed 's/\./-/g') && \ + apt-get install -y cuda-toolkit-${PACKAGE_SUFFIX} ; \ + fi \ && rm -rf /var/lib/apt/lists/* COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /usr/local/bin/ ENV CC=clang ENV CXX=clang++ +ENV PATH=${CUDA_VERSION:+/usr/local/cuda-${CUDA_VERSION}/bin:}${PATH} +ENV LD_LIBRARY_PATH=${CUDA_VERSION:+/usr/local/cuda-${CUDA_VERSION}/lib64:}${LD_LIBRARY_PATH} WORKDIR /workspace diff --git a/Makefile b/Makefile index dbd0205..8d4858c 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,27 @@ BUILD_DIR := build PROFILE_DIR := build-profile -CMAKE_FLAGS := -DCMAKE_EXPORT_COMPILE_COMMANDS=ON + +CUDA_VERSION ?= cpu +GPU_ARCH ?= native + +CMAKE_FLAGS := -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCUDA_VERSION=$(CUDA_VERSION) + +ifneq ($(CUDA_VERSION), cpu) + CMAKE_FLAGS += -DCMAKE_CUDA_ARCHITECTURES=$(GPU_ARCH) + + # Add Torch-specific Arch list (converts 80 -> 8.0) + ifneq ($(GPU_ARCH), native) + TORCH_ARCH := $(shell echo $(GPU_ARCH) | sed 's/\([0-9]\)\([0-9]\)/\1.\2/') + CMAKE_FLAGS += -DTORCH_CUDA_ARCH_LIST="$(TORCH_ARCH)" + endif + + # Handle CUDA Compiler Path (Look in standard /usr/local/cuda-X.Y) + CUDA_PATH := /usr/local/cuda-$(CUDA_VERSION) + ifneq ("$(wildcard $(CUDA_PATH)/bin/nvcc)","") + CMAKE_FLAGS += -DCMAKE_CUDA_COMPILER=$(CUDA_PATH)/bin/nvcc + endif +endif + NPROCS := $(shell nproc 2>/dev/null || sysctl -n hw.logicalcpu) EXAMPLE_LINK := $(BUILD_DIR)/examples/tgn_link_pred @@ -29,6 +50,10 @@ help: @echo " make examples - Build tgn_link_prop and tgn_node_prop examples" @echo " make clean - Remove build directory" @echo "" + @echo "Build Parameters (Optional):" + @echo " CUDA_VERSION= - Build for CUDA (e.g., 12.6, 12.8, 13.0). Default: cpu" + @echo " GPU_ARCH= - Compute capability (e.g., 80, 90, native). Default: native" + @echo "" @echo "Documentation Targets:" @echo " make docs - Build project documentation" @echo " make docs-serve - Build and serve project documentation" diff --git a/README.md b/README.md index 79252f6..c020d1e 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,9 @@ ![Clang](https://img.shields.io/badge/Compiler-Clang-orange?style=flat&labelColor=white&logo=clang&logoColor=black) ![Linux](https://img.shields.io/badge/Linux-FCC624?style=flat&logo=linux&logoColor=black) ![macOS](https://img.shields.io/badge/macOS-000000?style=flat&logo=apple&logoColor=white) +![CUDA 12.6](https://img.shields.io/badge/CUDA-12.6-76B900?style=flat&labelColor=white&logo=nvidia&logoColor=76B900) +![CUDA 12.8](https://img.shields.io/badge/CUDA-12.8-76B900?style=flat&labelColor=white&logo=nvidia&logoColor=76B900) +![CUDA 13.0](https://img.shields.io/badge/CUDA-13.0-76B900?style=flat&labelColor=white&logo=nvidia&logoColor=76B900) [![Docs](https://img.shields.io/readthedocs/tgncpp?style=flat&label=Docs&labelColor=white&logo=readthedocs&logoColor=black)](https://tgncpp.readthedocs.io/en/latest/?badge=latest) [![Tests](https://img.shields.io/github/actions/workflow/status/Jacob-Chmura/tgn.cpp/ci.yml?label=Tests&style=flat&labelColor=white&logo=github-actions&logoColor=black)](https://github.com/Jacob-Chmura/tgn.cpp/actions/workflows/ci.yml) @@ -33,7 +36,17 @@ A C++20 Port of [TGN](https://arxiv.org/abs/2006.10637) over pure LibTorch: ### Installation -You should just use the [Dockerfile](./Dockerfile), but if you prefer to install dependencies manually: +You should just use the [Dockerfile](./Dockerfile): + +```sh +# Build for CPU (default) +docker build -t tgn-dev:cpu . + +# Build for specific CUDA drivers (e.g. 12.6 for A100/H100) +docker build --build-arg CUDA_VERSION=12.6 -t tgn-dev:cu126 . +``` + +If you prefer a bare-metal install: ##### Linux @@ -42,6 +55,8 @@ You should just use the [Dockerfile](./Dockerfile), but if you prefer to install sudo apt-get install -y clang libc++-dev libc++abi-dev ``` +If you want to run with CUDA, refer to [nvidia docs](https://developer.nvidia.com/cuda-12-6-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=20.04&target_type=deb_local) for nvidia toolkit installation. + ##### MacOS ```sh @@ -49,6 +64,16 @@ sudo apt-get install -y clang libc++-dev libc++abi-dev brew install cmake libomp ``` +> \[!Important\] +> **Platform Support**: + +| OS | **CUDA_VERSION** | Default | +| ----- | ----------------------------- | ------- | +| Linux | `cpu`, `12.6`, `12.8`, `13.0` | `cpu` | +| macOS | `cpu` | `cpu` | + +> **GPU_ARCH**: Specifies compute capability (e.g. `80`, `90`, `native`) for CUDA backend on Linux. + ##### TGUF Conversion Scripts use [uv](https://docs.astral.sh/uv/): ```sh @@ -57,15 +82,19 @@ curl -LsSf https://astral.sh/uv/install.sh | sh ### Usage -> \[!Note\] -> Tested on Linux (Ubuntu 22.04+) and macOS (Apple Silicon) +#### Setup ```sh # Clone the repo git clone git@github.com:Jacob-Chmura/tgn.cpp.git && cd tgn.cpp -# See available targets +# See all available targets make help +``` + +#### Running on CPU + +```sh # Download `tgbl-wiki` data, convert to `.tguf` and run examples/link_pred.cpp. make run-link-tgbl-wiki @@ -73,3 +102,13 @@ make run-link-tgbl-wiki # Download `tgbn-trade` data, convert to `.tguf` and run examples/node_pred.cpp make run-node-tgbn-trade ``` + +#### Running on GPU (Linux only) + +```sh +# Example: Cuda 12.6 on an A100 (Arch 80) +CUDA_VERSION=12.6 GPU_ARCH=80 make run-link-tgbl-wiki +``` + +> \[!TIP\] +> Use `nvidia-smi` to check your **CUDA_VERSION** and **GPU_ARCH**