From 3869b48323e84b9b6728397d26ce8058b9cde1dc Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Wed, 25 Feb 2026 19:48:53 +0800 Subject: [PATCH 1/3] feat(packaging): add RPM packaging support for RHEL/Rocky/OpenEuler Add complete RPM packaging infrastructure: - RPM spec file with conditional backend builds (nvidia, metax, ascend) - Dockerfiles for NVIDIA (Rocky Linux 8) and Ascend (OpenEuler 24.03) - Parameterized build script (build-flagcx-rpm.sh) - GitHub Actions workflow for automated RPM builds - Proper SONAME and RPATH handling via patchelf - ASL 2.0 license identifier for RPM compliance --- .github/workflows/build-rpm.yml | 42 ++++ .gitignore | 1 + packaging/rpm/build-flagcx-rpm.sh | 90 ++++++++ packaging/rpm/dockerfiles/Dockerfile.ascend | 41 ++++ packaging/rpm/dockerfiles/Dockerfile.nvidia | 41 ++++ packaging/rpm/specs/flagcx.spec | 221 ++++++++++++++++++++ 6 files changed, 436 insertions(+) create mode 100644 .github/workflows/build-rpm.yml create mode 100755 packaging/rpm/build-flagcx-rpm.sh create mode 100644 packaging/rpm/dockerfiles/Dockerfile.ascend create mode 100644 packaging/rpm/dockerfiles/Dockerfile.nvidia create mode 100644 packaging/rpm/specs/flagcx.spec diff --git a/.github/workflows/build-rpm.yml b/.github/workflows/build-rpm.yml new file mode 100644 index 000000000..881f8acb1 --- /dev/null +++ b/.github/workflows/build-rpm.yml @@ -0,0 +1,42 @@ +name: Build RPM Packages + +on: + push: + tags: + - 'v*' + pull_request: + branches: [ main ] + paths: + - 'flagcx/**' + - 'packaging/rpm/**' + - '.github/workflows/build-rpm.yml' + workflow_dispatch: + +jobs: + build-rpm-packages: + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + backend: [nvidia, ascend] + # metax requires custom base image setup + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build ${{ matrix.backend }} RPM packages + run: ./packaging/rpm/build-flagcx-rpm.sh ${{ matrix.backend }} + + - name: Upload ${{ matrix.backend }} RPM packages + uses: actions/upload-artifact@v4 + with: + name: flagcx-${{ matrix.backend }}-rpm-packages + path: rpm-packages/${{ matrix.backend }}/**/*.rpm + retention-days: 7 diff --git a/.gitignore b/.gitignore index 4703bb0cd..5d7c4d58b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ build plugin/*/build test/*/build debian-packages +rpm-packages # Ignore compiled Python files and shared object files plugin/*/*.so diff --git a/packaging/rpm/build-flagcx-rpm.sh b/packaging/rpm/build-flagcx-rpm.sh new file mode 100755 index 000000000..49afbe7bd --- /dev/null +++ b/packaging/rpm/build-flagcx-rpm.sh @@ -0,0 +1,90 @@ +#!/bin/bash +set -e + +# FlagCX RPM package build script +# Usage: ./build-flagcx-rpm.sh [base_image_version] +# Supported backends: nvidia, metax, ascend + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="$(dirname "$(dirname "$SCRIPT_DIR")")" +BACKEND="${1:-}" +BASE_IMAGE_VERSION="${2:-}" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +log_info() { echo -e "${GREEN}[INFO]${NC} $1"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } +log_error() { echo -e "${RED}[ERROR]${NC} $1"; } +log_step() { echo -e "${BLUE}[STEP]${NC} $1"; } + +# Show usage +if [ -z "$BACKEND" ]; then + log_error "No backend specified" + echo "" + echo "Usage: $0 [base_image_version]" + echo "" + echo "Supported backends:" + echo " nvidia - Build RPM packages for NVIDIA GPUs" + echo " metax - Build RPM packages for MetaX accelerators" + echo " ascend - Build RPM packages for Ascend NPUs" + echo "" + echo "Examples:" + echo " $0 nvidia" + echo " $0 ascend 8.5.0-910-openeuler24.03-py3.11" + exit 1 +fi + +# Validate backend and set base image +case "$BACKEND" in + nvidia) + BASE_IMAGE="nvcr.io/nvidia/cuda" + [ -z "$BASE_IMAGE_VERSION" ] && BASE_IMAGE_VERSION="12.4.1-devel-rockylinux8" + ;; + metax) + BASE_IMAGE="harbor.baai.ac.cn/flagbase/flagbase-metax" + [ -z "$BASE_IMAGE_VERSION" ] && BASE_IMAGE_VERSION="latest" + log_warn "MetaX RPM build may require custom base image with RPM tools" + ;; + ascend) + BASE_IMAGE="ascendai/cann" + [ -z "$BASE_IMAGE_VERSION" ] && BASE_IMAGE_VERSION="8.5.0-910-openeuler24.03-py3.11" + ;; + *) + log_error "Invalid backend: $BACKEND" + echo "Supported backends: nvidia, metax, ascend" + exit 1 + ;; +esac + +log_info "Building FlagCX RPM packages for $BACKEND backend" +log_info "Using base image: ${BASE_IMAGE}:${BASE_IMAGE_VERSION}" + +# Build Docker image +log_step "Building Docker image..." +docker build \ + --build-arg BASE_IMAGE="${BASE_IMAGE}" \ + --build-arg BASE_IMAGE_VERSION="${BASE_IMAGE_VERSION}" \ + -f "${SCRIPT_DIR}/dockerfiles/Dockerfile.${BACKEND}" \ + -t "flagcx-rpm-${BACKEND}:${BASE_IMAGE_VERSION}" \ + "${PROJECT_DIR}" + +# Extract RPM packages +log_step "Extracting RPM packages..." +OUTPUT_DIR="${PROJECT_DIR}/rpm-packages/${BACKEND}" +mkdir -p "${OUTPUT_DIR}" + +CONTAINER_ID=$(docker create "flagcx-rpm-${BACKEND}:${BASE_IMAGE_VERSION}") +docker cp "${CONTAINER_ID}:/root/rpmbuild/RPMS/" "${OUTPUT_DIR}/" 2>/dev/null || true +docker cp "${CONTAINER_ID}:/root/rpmbuild/SRPMS/" "${OUTPUT_DIR}/" 2>/dev/null || true +docker rm "${CONTAINER_ID}" + +log_info "✓ Packages built successfully for ${BACKEND}:" +echo "" +find "${OUTPUT_DIR}" -name "*.rpm" -exec ls -lh {} \; + +log_info "Build complete! Packages in: ${OUTPUT_DIR}" diff --git a/packaging/rpm/dockerfiles/Dockerfile.ascend b/packaging/rpm/dockerfiles/Dockerfile.ascend new file mode 100644 index 000000000..338c0e914 --- /dev/null +++ b/packaging/rpm/dockerfiles/Dockerfile.ascend @@ -0,0 +1,41 @@ +ARG BASE_IMAGE=ascendai/cann +ARG BASE_IMAGE_VERSION=8.5.0-910-openeuler24.03-py3.11 + +FROM ${BASE_IMAGE}:${BASE_IMAGE_VERSION} + +# Install RPM build tools +# OpenEuler uses different package names +RUN yum install -y \ + rpm-build \ + rpmdevtools \ + gcc-c++ \ + make \ + cmake \ + patchelf \ + nlohmann-json-devel \ + && yum clean all + +# Setup RPM build environment +RUN rpmdev-setuptree + +# Copy source code +WORKDIR /workspace +COPY . /workspace/ + +# Create source tarball +RUN tar czf /root/rpmbuild/SOURCES/flagcx-0.8.0.tar.gz \ + --transform 's,^\.,flagcx-0.8.0,' \ + --exclude='.git' \ + --exclude='build' \ + --exclude='debian-packages' \ + . + +# Build RPM with Ascend backend +RUN rpmbuild -ba \ + --define 'backend ascend' \ + /workspace/packaging/rpm/specs/flagcx.spec + +# List built packages +RUN ls -lh /root/rpmbuild/RPMS/*/*.rpm + +CMD ["/bin/bash"] diff --git a/packaging/rpm/dockerfiles/Dockerfile.nvidia b/packaging/rpm/dockerfiles/Dockerfile.nvidia new file mode 100644 index 000000000..6462100fd --- /dev/null +++ b/packaging/rpm/dockerfiles/Dockerfile.nvidia @@ -0,0 +1,41 @@ +ARG BASE_IMAGE=nvcr.io/nvidia/cuda +ARG BASE_IMAGE_VERSION=12.4.1-devel-rockylinux8 + +FROM ${BASE_IMAGE}:${BASE_IMAGE_VERSION} + +# Install EPEL and RPM build tools +RUN yum install -y epel-release && \ + yum install -y \ + rpm-build \ + rpmdevtools \ + gcc-c++ \ + make \ + cmake \ + patchelf \ + json-devel \ + && yum clean all + +# Setup RPM build environment +RUN rpmdev-setuptree + +# Copy source code +WORKDIR /workspace +COPY . /workspace/ + +# Create source tarball +RUN tar czf /root/rpmbuild/SOURCES/flagcx-0.8.0.tar.gz \ + --transform 's,^\.,flagcx-0.8.0,' \ + --exclude='.git' \ + --exclude='build' \ + --exclude='debian-packages' \ + . + +# Build RPM with NVIDIA backend +RUN rpmbuild -ba \ + --define 'backend nvidia' \ + /workspace/packaging/rpm/specs/flagcx.spec + +# List built packages +RUN ls -lh /root/rpmbuild/RPMS/*/*.rpm + +CMD ["/bin/bash"] diff --git a/packaging/rpm/specs/flagcx.spec b/packaging/rpm/specs/flagcx.spec new file mode 100644 index 000000000..0012125b8 --- /dev/null +++ b/packaging/rpm/specs/flagcx.spec @@ -0,0 +1,221 @@ +%global debug_package %{nil} +%global _build_id_links none + +Name: flagcx +Version: 0.8.0 +Release: 1%{?dist} +Summary: FlagCX scalable cross-chip communication library + +License: ASL 2.0 +URL: https://github.com/flagos-ai/FlagCX +Source0: %{name}-%{version}.tar.gz + +BuildRequires: gcc-c++ +BuildRequires: make +BuildRequires: cmake +BuildRequires: patchelf +# nlohmann-json package name varies by distro +%if 0%{?rhel} == 8 +BuildRequires: json-devel +%else +BuildRequires: nlohmann-json-devel +%endif + +# Backend-specific packages will be built with different profiles +# This is the base spec, actual builds use --define 'backend nvidia|metax|ascend' + +%description +FlagCX is a scalable and adaptive cross-chip communication library. +It serves as a platform where developers, researchers, and AI engineers +can collaborate on various projects. + +%package -n libflagcx-nvidia +Summary: FlagCX library for NVIDIA GPUs +Requires: libnccl >= 2.0 + +%description -n libflagcx-nvidia +FlagCX communication library built for NVIDIA hardware with NCCL backend support. + +%package -n libflagcx-nvidia-devel +Summary: Development files for libflagcx-nvidia +Requires: libflagcx-nvidia = %{version}-%{release} + +%description -n libflagcx-nvidia-devel +Development files (headers and libraries) for libflagcx-nvidia. + +%package -n libflagcx-metax +Summary: FlagCX library for MetaX accelerators + +%description -n libflagcx-metax +FlagCX communication library built for MetaX hardware with MCCL backend support. + +%package -n libflagcx-metax-devel +Summary: Development files for libflagcx-metax +Requires: libflagcx-metax = %{version}-%{release} + +%description -n libflagcx-metax-devel +Development files (headers and libraries) for libflagcx-metax. + +%package -n libflagcx-ascend +Summary: FlagCX library for Ascend NPUs + +%description -n libflagcx-ascend +FlagCX communication library built for Huawei Ascend NPUs with HCCL backend support. + +%package -n libflagcx-ascend-devel +Summary: Development files for libflagcx-ascend +Requires: libflagcx-ascend = %{version}-%{release} + +%description -n libflagcx-ascend-devel +Development files (headers and libraries) for libflagcx-ascend. + +%prep +%setup -q + +%build +# Determine which backend to build based on RPM macro +%if "%{?backend}" == "nvidia" + make USE_NVIDIA=1 PREFIX=%{_prefix} +%endif + +%if "%{?backend}" == "metax" + make USE_METAX=1 PREFIX=%{_prefix} +%endif + +%if "%{?backend}" == "ascend" + make USE_ASCEND=1 PREFIX=%{_prefix} +%endif + +%install +rm -rf %{buildroot} + +%if "%{?backend}" == "nvidia" + # Install NVIDIA variant + install -d %{buildroot}%{_libdir} + + # Install library + install -m 755 build/lib/libflagcx.so %{buildroot}%{_libdir}/libflagcx.so.0 + + # Create symlinks + ln -s libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so + + install -d %{buildroot}%{_includedir}/flagcx + cp -r flagcx/include/* %{buildroot}%{_includedir}/flagcx/ + + # Fix RPATH and set SONAME + patchelf --remove-rpath %{buildroot}%{_libdir}/libflagcx.so.0 || true + patchelf --set-soname libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so.0 || true +%endif + +%if "%{?backend}" == "metax" + # Install MetaX variant + install -d %{buildroot}%{_libdir} + + # Install library + install -m 755 build/lib/libflagcx.so %{buildroot}%{_libdir}/libflagcx.so.0 + + # Create symlinks + ln -s libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so + + install -d %{buildroot}%{_includedir}/flagcx + cp -r flagcx/include/* %{buildroot}%{_includedir}/flagcx/ + + patchelf --remove-rpath %{buildroot}%{_libdir}/libflagcx.so.0 || true + patchelf --set-soname libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so.0 || true +%endif + +%if "%{?backend}" == "ascend" + # Install Ascend variant + install -d %{buildroot}%{_libdir} + + # Install library + install -m 755 build/lib/libflagcx.so %{buildroot}%{_libdir}/libflagcx.so.0 + + # Create symlinks + ln -s libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so + + install -d %{buildroot}%{_includedir}/flagcx + cp -r flagcx/include/* %{buildroot}%{_includedir}/flagcx/ + + patchelf --remove-rpath %{buildroot}%{_libdir}/libflagcx.so.0 || true + patchelf --set-soname libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so.0 || true +%endif + +%files -n libflagcx-nvidia +%if "%{?backend}" == "nvidia" +%license LICENSE +%{_libdir}/libflagcx.so.0 +%endif + +%files -n libflagcx-nvidia-devel +%if "%{?backend}" == "nvidia" +%{_includedir}/flagcx/ +%{_libdir}/libflagcx.so +%endif + +%files -n libflagcx-metax +%if "%{?backend}" == "metax" +%license LICENSE +%{_libdir}/libflagcx.so.0 +%endif + +%files -n libflagcx-metax-devel +%if "%{?backend}" == "metax" +%{_includedir}/flagcx/ +%{_libdir}/libflagcx.so +%endif + +%files -n libflagcx-ascend +%if "%{?backend}" == "ascend" +%license LICENSE +%{_libdir}/libflagcx.so.0 +%{_libdir}/libflagcx.so.%{version} +%endif + +%files -n libflagcx-ascend-devel +%if "%{?backend}" == "ascend" +%{_includedir}/flagcx/ +%{_libdir}/libflagcx.so +%endif + +%changelog +* Sat Nov 01 2025 FlagOS Contributors - 0.7-1 +- Added support to TsingMicro, including device adaptor tsmicroAdaptor and CCL adaptor tcclAdaptor. +- Implemented an experimental kernel-free non-reduce collective communication (SendRecv, AlltoAll, AlltoAllv, Broadcast, Gather, Scatter, AllGather) using device-buffer IPC/RDMA. +- Enabled auto-tuning on NVIDIA, MetaX, and Hygon platforms, achieving 1.02×–1.26× speedups for AllReduce, AllGather, ReduceScatter, and AlltoAll. +- Enhanced flagcxNetAdaptor with one-sided primitives (put, putSignal, waitValue) and added retransmission support for reliability improvement. + +* Wed Oct 01 2025 FlagOS Contributors - 0.6-1 +- Implemented device-buffer IPC communication to support intra-node SendRecv operations. +- Introduced device-initiated, host-launched device-side primitives, enabling kernel-based communication directly from devices. +- Enhanced auto-tuning with 50% performance improvement on MetaX platforms for the AllReduce operations. + +* Mon Sep 01 2025 FlagOS Contributors - 0.5-1 +- Added support for AMD GPUs, including a device adaptor hipAdaptor and a CCL adaptor rcclAdaptor. +- Introduced flagcxNetAdaptor to unify network backends, currently supporting socket, IBRC, UCX and IBUC (experimental). +- Enabled zero-copy device-buffer RDMA (user-buffer RDMA) to boost performance for small messages. +- Supported auto-tuning in homogeneous scenarios via flagcxTuner. +- Added test automation in CI/CD for PyTorch APIs. + +* Fri Aug 01 2025 FlagOS Contributors - 0.4-1 +- Supported heterogeneous training of ERNIE4.5 (Baidu) on NVIDIA and Iluvatar GPUs with Paddle + FlagCX. +- Improved heterogeneous communication across arbitrary NIC configurations, with more robust and flexible deployments. +- Introduced an experimental network plugin interface with extended supports for IBRC and SOCKET. Device buffer registration now can be done via DMA-BUF. +- Added an InterOp-level DSL to enable customized C2C algorithm design. +- Provided user documentation under docs/. + +* Tue Jul 01 2025 FlagOS Contributors - 0.3-1 +- Integrated three additional native communication libraries: HCCL (Huawei), MUSACCL (Moore Threads) and MPI. +- Enhanced heterogeneous collective communication operations with pipeline optimizations. +- Introduced device-side functions to enable device-buffer RDMA, complementing the existing host-side functions. +- Delivered a full-stack open-source solution, FlagScale + FlagCX, for efficient heterogeneous prefilling-decoding disaggregation. + +* Thu May 01 2025 FlagOS Contributors - 0.2-1 +- Integrated 3 additional native communications libraries, including MCCL (Moore Threads), XCCL (Mellanox) and DUCCL (BAAI). +- Improved 11 heterogeneous collective communication operations with automatic topology detection and full support to single-NIC and multi-NIC environments. + +* Tue Apr 01 2025 FlagOS Contributors - 0.1-1 +- Added 5 native communications libraries including CCL adaptors for NCCL (NVIDIA), IXCCL (Iluvatar), and CNCL (Cambricon), and Host CCL adaptors GLOO and Bootstrap. +- Supported 11 heterogeneous collective communication operations using the C2C (Cluster-to-Cluster) algorithm. +- Provided a full-stack open-source solution, FlagScale + FlagCX, for efficient heterogeneous training. +- Natively integrated into PaddlePaddle [v3.0.0](https://github.com/PaddlePaddle/Paddle/tree/v3.0.0), with support for both dynamic and static graphs. From 60e004dfa9bc83d862bc22c5fc353599caf8cdcc Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Mon, 9 Mar 2026 01:08:57 +0800 Subject: [PATCH 2/3] refactor(packaging): unify RPM spec and Dockerfile with parameterized backend - Rewrite flagcx.spec to use %{backend} dynamic package names, reducing 222 lines to ~90 lines and eliminating empty sub-packages - Merge three per-backend Dockerfiles into unified Dockerfile.rpm with BASE_IMAGE and BACKEND build args - Add MetaX RPM support via Rocky Linux 8 + MACA SDK yum repo - Extract version from spec via grep instead of hardcoding in Dockerfiles - Add --network=host to docker build for DNS reliability - Add PR-REVIEW-ISSUES.md for tracking remaining items --- .github/workflows/build-rpm.yml | 5 +- packaging/rpm/build-flagcx-rpm.sh | 20 ++- packaging/rpm/dockerfiles/Dockerfile.ascend | 41 ----- packaging/rpm/dockerfiles/Dockerfile.nvidia | 41 ----- packaging/rpm/dockerfiles/Dockerfile.rpm | 61 ++++++++ packaging/rpm/specs/flagcx.spec | 161 ++++---------------- 6 files changed, 109 insertions(+), 220 deletions(-) delete mode 100644 packaging/rpm/dockerfiles/Dockerfile.ascend delete mode 100644 packaging/rpm/dockerfiles/Dockerfile.nvidia create mode 100644 packaging/rpm/dockerfiles/Dockerfile.rpm diff --git a/.github/workflows/build-rpm.yml b/.github/workflows/build-rpm.yml index 881f8acb1..12f250cf4 100644 --- a/.github/workflows/build-rpm.yml +++ b/.github/workflows/build-rpm.yml @@ -14,13 +14,12 @@ on: jobs: build-rpm-packages: - runs-on: ubuntu-latest + runs-on: h20 strategy: fail-fast: false matrix: - backend: [nvidia, ascend] - # metax requires custom base image setup + backend: [nvidia, metax, ascend] steps: - name: Checkout repository diff --git a/packaging/rpm/build-flagcx-rpm.sh b/packaging/rpm/build-flagcx-rpm.sh index 49afbe7bd..89b24cd17 100755 --- a/packaging/rpm/build-flagcx-rpm.sh +++ b/packaging/rpm/build-flagcx-rpm.sh @@ -46,9 +46,8 @@ case "$BACKEND" in [ -z "$BASE_IMAGE_VERSION" ] && BASE_IMAGE_VERSION="12.4.1-devel-rockylinux8" ;; metax) - BASE_IMAGE="harbor.baai.ac.cn/flagbase/flagbase-metax" - [ -z "$BASE_IMAGE_VERSION" ] && BASE_IMAGE_VERSION="latest" - log_warn "MetaX RPM build may require custom base image with RPM tools" + BASE_IMAGE="rockylinux" + [ -z "$BASE_IMAGE_VERSION" ] && BASE_IMAGE_VERSION="8" ;; ascend) BASE_IMAGE="ascendai/cann" @@ -64,12 +63,23 @@ esac log_info "Building FlagCX RPM packages for $BACKEND backend" log_info "Using base image: ${BASE_IMAGE}:${BASE_IMAGE_VERSION}" -# Build Docker image +# Sync changelog from CHANGELOG.md +log_step "Synchronizing changelog..." +if [ -f "${PROJECT_DIR}/packaging/sync-changelog.py" ]; then + python3 "${PROJECT_DIR}/packaging/sync-changelog.py" || log_warn "Failed to sync changelog" +else + log_warn "sync-changelog.py not found, skipping changelog sync" +fi + +# Build Docker image using unified Dockerfile +DOCKERFILE="${SCRIPT_DIR}/dockerfiles/Dockerfile.rpm" log_step "Building Docker image..." docker build \ + --network=host \ --build-arg BASE_IMAGE="${BASE_IMAGE}" \ --build-arg BASE_IMAGE_VERSION="${BASE_IMAGE_VERSION}" \ - -f "${SCRIPT_DIR}/dockerfiles/Dockerfile.${BACKEND}" \ + --build-arg BACKEND="${BACKEND}" \ + -f "${DOCKERFILE}" \ -t "flagcx-rpm-${BACKEND}:${BASE_IMAGE_VERSION}" \ "${PROJECT_DIR}" diff --git a/packaging/rpm/dockerfiles/Dockerfile.ascend b/packaging/rpm/dockerfiles/Dockerfile.ascend deleted file mode 100644 index 338c0e914..000000000 --- a/packaging/rpm/dockerfiles/Dockerfile.ascend +++ /dev/null @@ -1,41 +0,0 @@ -ARG BASE_IMAGE=ascendai/cann -ARG BASE_IMAGE_VERSION=8.5.0-910-openeuler24.03-py3.11 - -FROM ${BASE_IMAGE}:${BASE_IMAGE_VERSION} - -# Install RPM build tools -# OpenEuler uses different package names -RUN yum install -y \ - rpm-build \ - rpmdevtools \ - gcc-c++ \ - make \ - cmake \ - patchelf \ - nlohmann-json-devel \ - && yum clean all - -# Setup RPM build environment -RUN rpmdev-setuptree - -# Copy source code -WORKDIR /workspace -COPY . /workspace/ - -# Create source tarball -RUN tar czf /root/rpmbuild/SOURCES/flagcx-0.8.0.tar.gz \ - --transform 's,^\.,flagcx-0.8.0,' \ - --exclude='.git' \ - --exclude='build' \ - --exclude='debian-packages' \ - . - -# Build RPM with Ascend backend -RUN rpmbuild -ba \ - --define 'backend ascend' \ - /workspace/packaging/rpm/specs/flagcx.spec - -# List built packages -RUN ls -lh /root/rpmbuild/RPMS/*/*.rpm - -CMD ["/bin/bash"] diff --git a/packaging/rpm/dockerfiles/Dockerfile.nvidia b/packaging/rpm/dockerfiles/Dockerfile.nvidia deleted file mode 100644 index 6462100fd..000000000 --- a/packaging/rpm/dockerfiles/Dockerfile.nvidia +++ /dev/null @@ -1,41 +0,0 @@ -ARG BASE_IMAGE=nvcr.io/nvidia/cuda -ARG BASE_IMAGE_VERSION=12.4.1-devel-rockylinux8 - -FROM ${BASE_IMAGE}:${BASE_IMAGE_VERSION} - -# Install EPEL and RPM build tools -RUN yum install -y epel-release && \ - yum install -y \ - rpm-build \ - rpmdevtools \ - gcc-c++ \ - make \ - cmake \ - patchelf \ - json-devel \ - && yum clean all - -# Setup RPM build environment -RUN rpmdev-setuptree - -# Copy source code -WORKDIR /workspace -COPY . /workspace/ - -# Create source tarball -RUN tar czf /root/rpmbuild/SOURCES/flagcx-0.8.0.tar.gz \ - --transform 's,^\.,flagcx-0.8.0,' \ - --exclude='.git' \ - --exclude='build' \ - --exclude='debian-packages' \ - . - -# Build RPM with NVIDIA backend -RUN rpmbuild -ba \ - --define 'backend nvidia' \ - /workspace/packaging/rpm/specs/flagcx.spec - -# List built packages -RUN ls -lh /root/rpmbuild/RPMS/*/*.rpm - -CMD ["/bin/bash"] diff --git a/packaging/rpm/dockerfiles/Dockerfile.rpm b/packaging/rpm/dockerfiles/Dockerfile.rpm new file mode 100644 index 000000000..dde3ac9b5 --- /dev/null +++ b/packaging/rpm/dockerfiles/Dockerfile.rpm @@ -0,0 +1,61 @@ +# Unified Dockerfile to build RPM packages for FlagCX +# Supports multiple backends via build arguments + +ARG BASE_IMAGE +ARG BASE_IMAGE_VERSION + +FROM ${BASE_IMAGE}:${BASE_IMAGE_VERSION} + +ARG BACKEND + +# Install RPM build tools and dependencies +# Handles differences between RHEL/Rocky (epel + json-devel) and +# OpenEuler (nlohmann-json-devel), and MetaX SDK repo setup. +RUN yum install -y epel-release 2>/dev/null || true +RUN yum install -y \ + rpm-build \ + rpmdevtools \ + gcc-c++ \ + make \ + cmake \ + patchelf \ + && yum clean all +RUN yum install -y json-devel 2>/dev/null \ + || yum install -y nlohmann-json-devel 2>/dev/null \ + || true + +# MetaX-specific: configure MACA SDK yum repository and install SDK +RUN if [ "${BACKEND}" = "metax" ]; then \ + printf '[maca-sdk]\nname=MACA SDK Yum Repository\nbaseurl=https://repos.metax-tech.com/r/maca-sdk-rpm-x86_64/\nenabled=1\ngpgcheck=0\n' \ + > /etc/yum.repos.d/maca-sdk-rpm.repo && \ + yum makecache && \ + yum install -y maca_sdk && \ + yum clean all; \ + fi + +# Setup RPM build environment +RUN rpmdev-setuptree + +# Copy source code +WORKDIR /workspace +COPY . /workspace/ + +# Read version from spec and create source tarball +RUN SPEC_VERSION=$(grep '^Version:' /workspace/packaging/rpm/specs/flagcx.spec | awk '{print $2}') && \ + tar czf /root/rpmbuild/SOURCES/flagcx-${SPEC_VERSION}.tar.gz \ + --transform "s,^\.,flagcx-${SPEC_VERSION}," \ + --exclude='.git' \ + --exclude='build' \ + --exclude='debian-packages' \ + --exclude='rpm-packages' \ + . + +# Build RPM with specified backend +RUN rpmbuild -ba \ + --define "backend ${BACKEND}" \ + /workspace/packaging/rpm/specs/flagcx.spec + +# List built packages +RUN ls -lh /root/rpmbuild/RPMS/*/*.rpm + +CMD ["/bin/bash"] diff --git a/packaging/rpm/specs/flagcx.spec b/packaging/rpm/specs/flagcx.spec index 0012125b8..9d63c2c2c 100644 --- a/packaging/rpm/specs/flagcx.spec +++ b/packaging/rpm/specs/flagcx.spec @@ -1,6 +1,12 @@ %global debug_package %{nil} %global _build_id_links none +# Backend must be specified via: rpmbuild --define 'backend nvidia|metax|ascend' +%{!?backend: %{error: backend must be defined (nvidia, metax, or ascend)}} + +# Derive uppercase backend name for make flag (USE_NVIDIA=1, etc.) +%global backend_upper %(echo %{backend} | tr a-z A-Z) + Name: flagcx Version: 0.8.0 Release: 1%{?dist} @@ -21,162 +27,57 @@ BuildRequires: json-devel BuildRequires: nlohmann-json-devel %endif -# Backend-specific packages will be built with different profiles -# This is the base spec, actual builds use --define 'backend nvidia|metax|ascend' - %description FlagCX is a scalable and adaptive cross-chip communication library. It serves as a platform where developers, researchers, and AI engineers can collaborate on various projects. -%package -n libflagcx-nvidia -Summary: FlagCX library for NVIDIA GPUs +# Only the target backend's subpackages are defined +%package -n libflagcx-%{backend} +Summary: FlagCX library for %{backend} +%if "%{backend}" == "nvidia" Requires: libnccl >= 2.0 +%endif -%description -n libflagcx-nvidia -FlagCX communication library built for NVIDIA hardware with NCCL backend support. - -%package -n libflagcx-nvidia-devel -Summary: Development files for libflagcx-nvidia -Requires: libflagcx-nvidia = %{version}-%{release} - -%description -n libflagcx-nvidia-devel -Development files (headers and libraries) for libflagcx-nvidia. - -%package -n libflagcx-metax -Summary: FlagCX library for MetaX accelerators - -%description -n libflagcx-metax -FlagCX communication library built for MetaX hardware with MCCL backend support. - -%package -n libflagcx-metax-devel -Summary: Development files for libflagcx-metax -Requires: libflagcx-metax = %{version}-%{release} - -%description -n libflagcx-metax-devel -Development files (headers and libraries) for libflagcx-metax. - -%package -n libflagcx-ascend -Summary: FlagCX library for Ascend NPUs - -%description -n libflagcx-ascend -FlagCX communication library built for Huawei Ascend NPUs with HCCL backend support. +%description -n libflagcx-%{backend} +FlagCX communication library built for %{backend} hardware. -%package -n libflagcx-ascend-devel -Summary: Development files for libflagcx-ascend -Requires: libflagcx-ascend = %{version}-%{release} +%package -n libflagcx-%{backend}-devel +Summary: Development files for libflagcx-%{backend} +Requires: libflagcx-%{backend} = %{version}-%{release} -%description -n libflagcx-ascend-devel -Development files (headers and libraries) for libflagcx-ascend. +%description -n libflagcx-%{backend}-devel +Development files (headers and libraries) for libflagcx-%{backend}. %prep %setup -q %build -# Determine which backend to build based on RPM macro -%if "%{?backend}" == "nvidia" - make USE_NVIDIA=1 PREFIX=%{_prefix} -%endif - -%if "%{?backend}" == "metax" - make USE_METAX=1 PREFIX=%{_prefix} -%endif - -%if "%{?backend}" == "ascend" - make USE_ASCEND=1 PREFIX=%{_prefix} -%endif +make USE_%{backend_upper}=1 PREFIX=%{_prefix} %install rm -rf %{buildroot} -%if "%{?backend}" == "nvidia" - # Install NVIDIA variant - install -d %{buildroot}%{_libdir} - - # Install library - install -m 755 build/lib/libflagcx.so %{buildroot}%{_libdir}/libflagcx.so.0 - - # Create symlinks - ln -s libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so - - install -d %{buildroot}%{_includedir}/flagcx - cp -r flagcx/include/* %{buildroot}%{_includedir}/flagcx/ - - # Fix RPATH and set SONAME - patchelf --remove-rpath %{buildroot}%{_libdir}/libflagcx.so.0 || true - patchelf --set-soname libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so.0 || true -%endif - -%if "%{?backend}" == "metax" - # Install MetaX variant - install -d %{buildroot}%{_libdir} - - # Install library - install -m 755 build/lib/libflagcx.so %{buildroot}%{_libdir}/libflagcx.so.0 - - # Create symlinks - ln -s libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so +# Install shared library +install -d %{buildroot}%{_libdir} +install -m 755 build/lib/libflagcx.so %{buildroot}%{_libdir}/libflagcx.so.0 +ln -s libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so - install -d %{buildroot}%{_includedir}/flagcx - cp -r flagcx/include/* %{buildroot}%{_includedir}/flagcx/ +# Install headers +install -d %{buildroot}%{_includedir}/flagcx +cp -r flagcx/include/* %{buildroot}%{_includedir}/flagcx/ - patchelf --remove-rpath %{buildroot}%{_libdir}/libflagcx.so.0 || true - patchelf --set-soname libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so.0 || true -%endif - -%if "%{?backend}" == "ascend" - # Install Ascend variant - install -d %{buildroot}%{_libdir} - - # Install library - install -m 755 build/lib/libflagcx.so %{buildroot}%{_libdir}/libflagcx.so.0 - - # Create symlinks - ln -s libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so - - install -d %{buildroot}%{_includedir}/flagcx - cp -r flagcx/include/* %{buildroot}%{_includedir}/flagcx/ - - patchelf --remove-rpath %{buildroot}%{_libdir}/libflagcx.so.0 || true - patchelf --set-soname libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so.0 || true -%endif +# Fix RPATH and set SONAME +patchelf --remove-rpath %{buildroot}%{_libdir}/libflagcx.so.0 || true +patchelf --set-soname libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so.0 || true -%files -n libflagcx-nvidia -%if "%{?backend}" == "nvidia" +%files -n libflagcx-%{backend} %license LICENSE %{_libdir}/libflagcx.so.0 -%endif -%files -n libflagcx-nvidia-devel -%if "%{?backend}" == "nvidia" +%files -n libflagcx-%{backend}-devel %{_includedir}/flagcx/ %{_libdir}/libflagcx.so -%endif - -%files -n libflagcx-metax -%if "%{?backend}" == "metax" -%license LICENSE -%{_libdir}/libflagcx.so.0 -%endif - -%files -n libflagcx-metax-devel -%if "%{?backend}" == "metax" -%{_includedir}/flagcx/ -%{_libdir}/libflagcx.so -%endif - -%files -n libflagcx-ascend -%if "%{?backend}" == "ascend" -%license LICENSE -%{_libdir}/libflagcx.so.0 -%{_libdir}/libflagcx.so.%{version} -%endif - -%files -n libflagcx-ascend-devel -%if "%{?backend}" == "ascend" -%{_includedir}/flagcx/ -%{_libdir}/libflagcx.so -%endif %changelog * Sat Nov 01 2025 FlagOS Contributors - 0.7-1 From b79fda33b403d5d2ba2f9f8a8769466ca02361df Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Sun, 22 Mar 2026 21:29:05 +0800 Subject: [PATCH 3/3] ci(build): remove Docker Buildx setup from RPM workflow The Docker Buildx setup step was removed from the GitHub Actions workflow for building RPM packages. This suggests that the RPM build process no longer requires Docker Buildx capabilities, potentially simplifying the build environment or moving to a different build approach. --- .github/workflows/build-rpm.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/build-rpm.yml b/.github/workflows/build-rpm.yml index 12f250cf4..a27415b0f 100644 --- a/.github/workflows/build-rpm.yml +++ b/.github/workflows/build-rpm.yml @@ -27,9 +27,6 @@ jobs: with: submodules: recursive - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Build ${{ matrix.backend }} RPM packages run: ./packaging/rpm/build-flagcx-rpm.sh ${{ matrix.backend }}