diff --git a/.github/workflows/build-deb.yml b/.github/workflows/build-deb.yml index 0fd24711c..b5612b76d 100644 --- a/.github/workflows/build-deb.yml +++ b/.github/workflows/build-deb.yml @@ -19,7 +19,7 @@ jobs: strategy: fail-fast: false matrix: - backend: [nvidia, metax] + backend: [nvidia, metax, ascend] steps: - name: Checkout repository diff --git a/packaging/debian/build-helpers/Dockerfile.deb b/packaging/debian/build-helpers/Dockerfile.deb index dfd9dbb1d..e285000a0 100644 --- a/packaging/debian/build-helpers/Dockerfile.deb +++ b/packaging/debian/build-helpers/Dockerfile.deb @@ -1,21 +1,26 @@ -# Multi-stage Dockerfile to build Debian packages for FlagCX +# Unified Dockerfile to build Debian packages for FlagCX # Supports multiple backends via build arguments ARG BASE_IMAGE ARG BASE_IMAGE_VERSION=latest -FROM ${BASE_IMAGE}:${BASE_IMAGE_VERSION} as builder +FROM ${BASE_IMAGE}:${BASE_IMAGE_VERSION} ARG VENDOR ENV DEBIAN_FRONTEND=noninteractive # Install Debian packaging tools and build dependencies +# Note: some base images (e.g. Ascend CANN) may lack basic build tools, +# so we explicitly install all Build-Depends from debian/control here. RUN apt-get update && apt-get install -y \ debhelper \ devscripts \ dpkg-dev \ fakeroot \ lsb-release \ + g++ \ + make \ + cmake \ chrpath \ patchelf \ nlohmann-json3-dev \ @@ -41,11 +46,7 @@ RUN DEB_BUILD_PROFILES="pkg.flagcx.${VENDOR}-only" \ find /workspace -name "*.log" -exec echo "=== {} ===" \; -exec cat {} \; 2>/dev/null || true; \ exit 1; } -# Collect built .deb files +# Collect built .deb files into /output RUN mkdir -p /output && \ find /workspace -maxdepth 1 -name "*.deb" -exec cp {} /output/ \; && \ ls -lh /output/ - -# Output stage: minimal image with only the .deb files -FROM alpine:latest as output -COPY --from=builder /output/*.deb /output/ diff --git a/packaging/debian/build-helpers/build-flagcx.sh b/packaging/debian/build-helpers/build-flagcx.sh index 14a7b4389..d72e869d0 100755 --- a/packaging/debian/build-helpers/build-flagcx.sh +++ b/packaging/debian/build-helpers/build-flagcx.sh @@ -3,7 +3,7 @@ set -e # Unified FlagCX Debian package build script # Usage: ./packaging/debian/build-helpers/build-flagcx.sh [base_image_version] -# Supported backends: nvidia, metax +# Supported backends: nvidia, metax, ascend SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_DIR="$(dirname "$(dirname "$(dirname "$SCRIPT_DIR")")")" @@ -31,6 +31,7 @@ if [ -z "$BACKEND" ]; then echo "Supported backends:" echo " nvidia - Build packages for NVIDIA GPUs" echo " metax - Build packages for MetaX accelerators" + echo " ascend - Build packages for Ascend NPUs" echo "" echo "Optional arguments:" echo " base_image_version - Base image version tag (default: latest)" @@ -52,9 +53,13 @@ case "$BACKEND" in BASE_IMAGE="harbor.baai.ac.cn/flagbase/flagbase-metax" VENDOR="metax" ;; + ascend) + BASE_IMAGE="harbor.baai.ac.cn/flagbase/flagbase-ascend" + VENDOR="ascend" + ;; *) log_error "Invalid backend: $BACKEND" - echo "Supported backends: nvidia, metax" + echo "Supported backends: nvidia, metax, ascend" exit 1 ;; esac @@ -62,6 +67,14 @@ esac log_info "Building FlagCX Debian packages for $BACKEND backend" log_info "Using base image: ${BASE_IMAGE}:${BASE_IMAGE_VERSION}" +# Sync changelog from CHANGELOG.md +log_step "Synchronizing changelog..." +if [ -f "${PROJECT_DIR}/packaging/sync-changelog.py" ]; then + python3 "${PROJECT_DIR}/packaging/sync-changelog.py" || log_warn "Failed to sync changelog" +else + log_warn "sync-changelog.py not found, skipping changelog sync" +fi + DOCKERFILE="${SCRIPT_DIR}/Dockerfile.deb" # Check if Dockerfile exists @@ -77,11 +90,11 @@ IMAGE_TAG="flagcx-deb-${BACKEND}:${BASE_IMAGE_VERSION}" log_step "Building container image: $IMAGE_TAG" if ! docker build \ + --network=host \ -f "$DOCKERFILE" \ --build-arg BASE_IMAGE="$BASE_IMAGE" \ --build-arg BASE_IMAGE_VERSION="$BASE_IMAGE_VERSION" \ --build-arg VENDOR="$VENDOR" \ - --target output \ -t "$IMAGE_TAG" \ "$PROJECT_DIR"; then log_error "Docker build failed for $BACKEND" diff --git a/packaging/debian/control b/packaging/debian/control index 26dc89c17..2fed7819f 100644 --- a/packaging/debian/control +++ b/packaging/debian/control @@ -20,12 +20,15 @@ X-Build-Environment: This package is designed to be built in backend-specific . - MetaX backend: Requires maca_sdk (from repos.metax-tech.com) Built in Ubuntu 22.04 with MetaX APT repository configured + . + - Ascend backend: Requires Ascend CANN toolkit + Built in Ubuntu 22.04 with Ascend SDK . The Build-Depends listed above are for the Debian packaging tools only. Backend-specific dependencies are provided by the container environment. Package: libflagcx-metax -Build-Profiles: +Build-Profiles: Architecture: amd64 Depends: ${shlibs:Depends}, ${misc:Depends} Description: FlagCX communication library for MetaX hardware @@ -37,7 +40,7 @@ Description: FlagCX communication library for MetaX hardware MCCL backend support. Package: libflagcx-metax-dev -Build-Profiles: +Build-Profiles: Section: libdevel Architecture: amd64 Depends: libflagcx-metax (= ${binary:Version}), ${misc:Depends} @@ -48,7 +51,7 @@ Description: FlagCX development files for MetaX hardware building applications against libflagcx-metax. Package: libflagcx-nvidia -Build-Profiles: +Build-Profiles: Architecture: amd64 Depends: ${shlibs:Depends}, ${misc:Depends} Description: FlagCX communication library for NVIDIA hardware @@ -60,7 +63,7 @@ Description: FlagCX communication library for NVIDIA hardware NCCL backend support. Package: libflagcx-nvidia-dev -Build-Profiles: +Build-Profiles: Section: libdevel Architecture: amd64 Depends: libflagcx-nvidia (= ${binary:Version}), ${misc:Depends} @@ -69,3 +72,26 @@ Description: FlagCX development files for NVIDIA hardware . This package contains the header files and development files for building applications against libflagcx-nvidia. + +Package: libflagcx-ascend +Build-Profiles: +Architecture: amd64 arm64 +Depends: ${shlibs:Depends}, ${misc:Depends} +Description: FlagCX communication library for Ascend NPUs + FlagCX is a scalable and adaptive cross-chip communication library. + It serves as a platform where developers, researchers, and AI engineers + can collaborate on various projects. + . + This package contains the shared library built for Huawei Ascend NPUs + with HCCL backend support. + +Package: libflagcx-ascend-dev +Build-Profiles: +Section: libdevel +Architecture: amd64 arm64 +Depends: libflagcx-ascend (= ${binary:Version}), ${misc:Depends} +Description: FlagCX development files for Ascend NPUs + FlagCX is a scalable and adaptive cross-chip communication library. + . + This package contains the header files and development files for + building applications against libflagcx-ascend. diff --git a/packaging/debian/rules b/packaging/debian/rules index a80459541..3f9396777 100755 --- a/packaging/debian/rules +++ b/packaging/debian/rules @@ -2,120 +2,57 @@ export DH_VERBOSE = 1 -# Allow building only specific backend via environment variable -# Usage: FLAGCX_BUILD_BACKEND=metax dpkg-buildpackage ... -# Valid values: metax, nvidia, all (default) -FLAGCX_BUILD_BACKEND ?= all +# Supported backends - add new vendors here +SUPPORTED_BACKENDS = nvidia metax ascend -# Build directories for different backends -BUILD_DIR_METAX = $(CURDIR)/build-metax -BUILD_DIR_NVIDIA = $(CURDIR)/build-nvidia +# Usage: FLAGCX_BUILD_BACKEND=nvidia dpkg-buildpackage ... +# Each backend requires its own SDK environment (container). +FLAGCX_BUILD_BACKEND ?= + +# Validate backend +ifeq ($(filter $(FLAGCX_BUILD_BACKEND),$(SUPPORTED_BACKENDS)),) + $(error FLAGCX_BUILD_BACKEND must be one of: $(SUPPORTED_BACKENDS). Each backend requires its own SDK environment.) +endif + +# Derived variables from backend name +BACKEND = $(FLAGCX_BUILD_BACKEND) +BACKEND_UPPER = $(shell echo $(BACKEND) | tr a-z A-Z) +BUILD_DIR = $(CURDIR)/build-$(BACKEND) %: dh $@ override_dh_auto_clean: dh_auto_clean - rm -rf $(BUILD_DIR_METAX) $(BUILD_DIR_NVIDIA) + rm -rf $(foreach b,$(SUPPORTED_BACKENDS),$(CURDIR)/build-$(b)) $(MAKE) clean || true override_dh_auto_build: -ifeq ($(FLAGCX_BUILD_BACKEND),nvidia) - @echo "Building NVIDIA variant only" - # Build NVIDIA variant - mkdir -p $(BUILD_DIR_NVIDIA) - $(MAKE) USE_NVIDIA=1 PREFIX=/usr - # Copy built files to nvidia build dir - mkdir -p $(BUILD_DIR_NVIDIA)/lib $(BUILD_DIR_NVIDIA)/include - cp -a build/lib/libflagcx.so* $(BUILD_DIR_NVIDIA)/lib/ || cp -a build/libflagcx.so* $(BUILD_DIR_NVIDIA)/lib/ - cp -r flagcx/include $(BUILD_DIR_NVIDIA)/include/flagcx -else ifeq ($(FLAGCX_BUILD_BACKEND),metax) - @echo "Building MetaX variant only" - # Build MetaX variant - mkdir -p $(BUILD_DIR_METAX) - $(MAKE) USE_METAX=1 PREFIX=/usr - # Copy built files to metax build dir - mkdir -p $(BUILD_DIR_METAX)/lib $(BUILD_DIR_METAX)/include - cp -a build/lib/libflagcx.so* $(BUILD_DIR_METAX)/lib/ || cp -a build/libflagcx.so* $(BUILD_DIR_METAX)/lib/ - cp -r flagcx/include $(BUILD_DIR_METAX)/include/flagcx -else - @echo "Building both variants" - # Build MetaX variant - mkdir -p $(BUILD_DIR_METAX) - $(MAKE) USE_METAX=1 PREFIX=/usr - # Copy built files to metax build dir - mkdir -p $(BUILD_DIR_METAX)/lib $(BUILD_DIR_METAX)/include - cp -a build/lib/libflagcx.so* $(BUILD_DIR_METAX)/lib/ || cp -a build/libflagcx.so* $(BUILD_DIR_METAX)/lib/ - cp -r flagcx/include $(BUILD_DIR_METAX)/include/flagcx - - # Clean for next build - $(MAKE) clean - - # Build NVIDIA variant - mkdir -p $(BUILD_DIR_NVIDIA) - $(MAKE) USE_NVIDIA=1 PREFIX=/usr - # Copy built files to nvidia build dir - mkdir -p $(BUILD_DIR_NVIDIA)/lib $(BUILD_DIR_NVIDIA)/include - cp -a build/lib/libflagcx.so* $(BUILD_DIR_NVIDIA)/lib/ || cp -a build/libflagcx.so* $(BUILD_DIR_NVIDIA)/lib/ - cp -r flagcx/include $(BUILD_DIR_NVIDIA)/include/flagcx -endif + @echo "Building $(BACKEND) variant" + mkdir -p $(BUILD_DIR) + $(MAKE) USE_$(BACKEND_UPPER)=1 PREFIX=/usr + mkdir -p $(BUILD_DIR)/lib $(BUILD_DIR)/include + cp -a build/lib/libflagcx.so* $(BUILD_DIR)/lib/ || \ + cp -a build/libflagcx.so* $(BUILD_DIR)/lib/ + cp -r flagcx/include $(BUILD_DIR)/include/flagcx override_dh_auto_install: -ifeq ($(FLAGCX_BUILD_BACKEND),nvidia) - @echo "Installing NVIDIA variant only" - # Install NVIDIA variant - mkdir -p debian/libflagcx-nvidia/usr/lib - mkdir -p debian/libflagcx-nvidia-dev/usr/include/flagcx - cp -a $(BUILD_DIR_NVIDIA)/lib/libflagcx.so* debian/libflagcx-nvidia/usr/lib/ - # Fix SONAME and RPATH - cd debian/libflagcx-nvidia/usr/lib && \ - patchelf --set-soname libflagcx.so.0 libflagcx.so && \ - patchelf --remove-rpath libflagcx.so && \ - ln -sf libflagcx.so libflagcx.so.0 && \ - ln -sf libflagcx.so libflagcx.so.0.1.0 - cp -r $(BUILD_DIR_NVIDIA)/include/* debian/libflagcx-nvidia-dev/usr/include/flagcx/ -else ifeq ($(FLAGCX_BUILD_BACKEND),metax) - @echo "Installing MetaX variant only" - # Install MetaX variant - mkdir -p debian/libflagcx-metax/usr/lib - mkdir -p debian/libflagcx-metax-dev/usr/include/flagcx - cp -a $(BUILD_DIR_METAX)/lib/libflagcx.so* debian/libflagcx-metax/usr/lib/ - # Fix SONAME and RPATH - cd debian/libflagcx-metax/usr/lib && \ + @echo "Installing $(BACKEND) variant" + # Install shared library + mkdir -p debian/libflagcx-$(BACKEND)/usr/lib + cp -a $(BUILD_DIR)/lib/libflagcx.so* debian/libflagcx-$(BACKEND)/usr/lib/ + # Fix SONAME, RPATH and create symlinks + cd debian/libflagcx-$(BACKEND)/usr/lib && \ patchelf --set-soname libflagcx.so.0 libflagcx.so && \ patchelf --remove-rpath libflagcx.so && \ ln -sf libflagcx.so libflagcx.so.0 && \ ln -sf libflagcx.so libflagcx.so.0.1.0 - cp -r $(BUILD_DIR_METAX)/include/* debian/libflagcx-metax-dev/usr/include/flagcx/ -else - @echo "Installing both variants" - # Install MetaX variant - mkdir -p debian/libflagcx-metax/usr/lib - mkdir -p debian/libflagcx-metax-dev/usr/include/flagcx - cp -a $(BUILD_DIR_METAX)/lib/libflagcx.so* debian/libflagcx-metax/usr/lib/ - # Fix SONAME and RPATH for MetaX - cd debian/libflagcx-metax/usr/lib && \ - patchelf --set-soname libflagcx.so.0 libflagcx.so && \ - patchelf --remove-rpath libflagcx.so && \ - ln -sf libflagcx.so libflagcx.so.0 && \ - ln -sf libflagcx.so libflagcx.so.0.1.0 - cp -r $(BUILD_DIR_METAX)/include/* debian/libflagcx-metax-dev/usr/include/flagcx/ - - # Install NVIDIA variant - mkdir -p debian/libflagcx-nvidia/usr/lib - mkdir -p debian/libflagcx-nvidia-dev/usr/include/flagcx - cp -a $(BUILD_DIR_NVIDIA)/lib/libflagcx.so* debian/libflagcx-nvidia/usr/lib/ - # Fix SONAME and RPATH for NVIDIA - cd debian/libflagcx-nvidia/usr/lib && \ - patchelf --set-soname libflagcx.so.0 libflagcx.so && \ - patchelf --remove-rpath libflagcx.so && \ - ln -sf libflagcx.so libflagcx.so.0 && \ - ln -sf libflagcx.so libflagcx.so.0.1.0 - cp -r $(BUILD_DIR_NVIDIA)/include/* debian/libflagcx-nvidia-dev/usr/include/flagcx/ -endif + # Install development headers + mkdir -p debian/libflagcx-$(BACKEND)-dev/usr/include/flagcx + cp -r $(BUILD_DIR)/include/* debian/libflagcx-$(BACKEND)-dev/usr/include/flagcx/ override_dh_auto_test: - # Skip tests for now + # Skip tests - requires vendor hardware @echo "Skipping tests" override_dh_dwz: @@ -123,6 +60,6 @@ override_dh_dwz: @echo "Skipping dwz debug info optimization" override_dh_shlibdeps: - # Ignore missing CUDA/vendor library dependencies - # These are provided by vendor-specific runtime packages (e.g., CUDA runtime, MACA runtime) + # Ignore missing vendor library dependencies (CUDA, MACA, CANN, etc.) + # These are provided by vendor-specific runtime environments dh_shlibdeps --dpkg-shlibdeps-params=--ignore-missing-info || true