From f137546fd59412c4b14b6aecd1d956f3c33c9781 Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Wed, 25 Feb 2026 19:48:53 +0800 Subject: [PATCH 01/17] feat(packaging): add RPM packaging support for RHEL/Rocky/OpenEuler Add complete RPM packaging infrastructure: - RPM spec file with conditional backend builds (nvidia, metax, ascend) - Dockerfiles for NVIDIA (Rocky Linux 8) and Ascend (OpenEuler 24.03) - Parameterized build script (build-flagcx-rpm.sh) - GitHub Actions workflow for automated RPM builds - Proper SONAME and RPATH handling via patchelf - ASL 2.0 license identifier for RPM compliance --- .github/workflows/build-rpm.yml | 42 ++++ .gitignore | 1 + packaging/rpm/build-flagcx-rpm.sh | 90 ++++++++ packaging/rpm/dockerfiles/Dockerfile.ascend | 41 ++++ packaging/rpm/dockerfiles/Dockerfile.nvidia | 41 ++++ packaging/rpm/specs/flagcx.spec | 221 ++++++++++++++++++++ 6 files changed, 436 insertions(+) create mode 100644 .github/workflows/build-rpm.yml create mode 100755 packaging/rpm/build-flagcx-rpm.sh create mode 100644 packaging/rpm/dockerfiles/Dockerfile.ascend create mode 100644 packaging/rpm/dockerfiles/Dockerfile.nvidia create mode 100644 packaging/rpm/specs/flagcx.spec diff --git a/.github/workflows/build-rpm.yml b/.github/workflows/build-rpm.yml new file mode 100644 index 000000000..881f8acb1 --- /dev/null +++ b/.github/workflows/build-rpm.yml @@ -0,0 +1,42 @@ +name: Build RPM Packages + +on: + push: + tags: + - 'v*' + pull_request: + branches: [ main ] + paths: + - 'flagcx/**' + - 'packaging/rpm/**' + - '.github/workflows/build-rpm.yml' + workflow_dispatch: + +jobs: + build-rpm-packages: + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + backend: [nvidia, ascend] + # metax requires custom base image setup + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build ${{ matrix.backend }} RPM packages + run: ./packaging/rpm/build-flagcx-rpm.sh ${{ matrix.backend }} + + - name: Upload ${{ matrix.backend }} RPM packages + uses: actions/upload-artifact@v4 + with: + name: flagcx-${{ matrix.backend }}-rpm-packages + path: rpm-packages/${{ matrix.backend }}/**/*.rpm + retention-days: 7 diff --git a/.gitignore b/.gitignore index f47f3f9f3..ac8e47dc9 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ build plugin/*/build test/*/build debian-packages +rpm-packages # Ignore compiled Python files and shared object files plugin/*/*.so diff --git a/packaging/rpm/build-flagcx-rpm.sh b/packaging/rpm/build-flagcx-rpm.sh new file mode 100755 index 000000000..49afbe7bd --- /dev/null +++ b/packaging/rpm/build-flagcx-rpm.sh @@ -0,0 +1,90 @@ +#!/bin/bash +set -e + +# FlagCX RPM package build script +# Usage: ./build-flagcx-rpm.sh [base_image_version] +# Supported backends: nvidia, metax, ascend + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="$(dirname "$(dirname "$SCRIPT_DIR")")" +BACKEND="${1:-}" +BASE_IMAGE_VERSION="${2:-}" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +log_info() { echo -e "${GREEN}[INFO]${NC} $1"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } +log_error() { echo -e "${RED}[ERROR]${NC} $1"; } +log_step() { echo -e "${BLUE}[STEP]${NC} $1"; } + +# Show usage +if [ -z "$BACKEND" ]; then + log_error "No backend specified" + echo "" + echo "Usage: $0 [base_image_version]" + echo "" + echo "Supported backends:" + echo " nvidia - Build RPM packages for NVIDIA GPUs" + echo " metax - Build RPM packages for MetaX accelerators" + echo " ascend - Build RPM packages for Ascend NPUs" + echo "" + echo "Examples:" + echo " $0 nvidia" + echo " $0 ascend 8.5.0-910-openeuler24.03-py3.11" + exit 1 +fi + +# Validate backend and set base image +case "$BACKEND" in + nvidia) + BASE_IMAGE="nvcr.io/nvidia/cuda" + [ -z "$BASE_IMAGE_VERSION" ] && BASE_IMAGE_VERSION="12.4.1-devel-rockylinux8" + ;; + metax) + BASE_IMAGE="harbor.baai.ac.cn/flagbase/flagbase-metax" + [ -z "$BASE_IMAGE_VERSION" ] && BASE_IMAGE_VERSION="latest" + log_warn "MetaX RPM build may require custom base image with RPM tools" + ;; + ascend) + BASE_IMAGE="ascendai/cann" + [ -z "$BASE_IMAGE_VERSION" ] && BASE_IMAGE_VERSION="8.5.0-910-openeuler24.03-py3.11" + ;; + *) + log_error "Invalid backend: $BACKEND" + echo "Supported backends: nvidia, metax, ascend" + exit 1 + ;; +esac + +log_info "Building FlagCX RPM packages for $BACKEND backend" +log_info "Using base image: ${BASE_IMAGE}:${BASE_IMAGE_VERSION}" + +# Build Docker image +log_step "Building Docker image..." +docker build \ + --build-arg BASE_IMAGE="${BASE_IMAGE}" \ + --build-arg BASE_IMAGE_VERSION="${BASE_IMAGE_VERSION}" \ + -f "${SCRIPT_DIR}/dockerfiles/Dockerfile.${BACKEND}" \ + -t "flagcx-rpm-${BACKEND}:${BASE_IMAGE_VERSION}" \ + "${PROJECT_DIR}" + +# Extract RPM packages +log_step "Extracting RPM packages..." +OUTPUT_DIR="${PROJECT_DIR}/rpm-packages/${BACKEND}" +mkdir -p "${OUTPUT_DIR}" + +CONTAINER_ID=$(docker create "flagcx-rpm-${BACKEND}:${BASE_IMAGE_VERSION}") +docker cp "${CONTAINER_ID}:/root/rpmbuild/RPMS/" "${OUTPUT_DIR}/" 2>/dev/null || true +docker cp "${CONTAINER_ID}:/root/rpmbuild/SRPMS/" "${OUTPUT_DIR}/" 2>/dev/null || true +docker rm "${CONTAINER_ID}" + +log_info "✓ Packages built successfully for ${BACKEND}:" +echo "" +find "${OUTPUT_DIR}" -name "*.rpm" -exec ls -lh {} \; + +log_info "Build complete! Packages in: ${OUTPUT_DIR}" diff --git a/packaging/rpm/dockerfiles/Dockerfile.ascend b/packaging/rpm/dockerfiles/Dockerfile.ascend new file mode 100644 index 000000000..338c0e914 --- /dev/null +++ b/packaging/rpm/dockerfiles/Dockerfile.ascend @@ -0,0 +1,41 @@ +ARG BASE_IMAGE=ascendai/cann +ARG BASE_IMAGE_VERSION=8.5.0-910-openeuler24.03-py3.11 + +FROM ${BASE_IMAGE}:${BASE_IMAGE_VERSION} + +# Install RPM build tools +# OpenEuler uses different package names +RUN yum install -y \ + rpm-build \ + rpmdevtools \ + gcc-c++ \ + make \ + cmake \ + patchelf \ + nlohmann-json-devel \ + && yum clean all + +# Setup RPM build environment +RUN rpmdev-setuptree + +# Copy source code +WORKDIR /workspace +COPY . /workspace/ + +# Create source tarball +RUN tar czf /root/rpmbuild/SOURCES/flagcx-0.8.0.tar.gz \ + --transform 's,^\.,flagcx-0.8.0,' \ + --exclude='.git' \ + --exclude='build' \ + --exclude='debian-packages' \ + . + +# Build RPM with Ascend backend +RUN rpmbuild -ba \ + --define 'backend ascend' \ + /workspace/packaging/rpm/specs/flagcx.spec + +# List built packages +RUN ls -lh /root/rpmbuild/RPMS/*/*.rpm + +CMD ["/bin/bash"] diff --git a/packaging/rpm/dockerfiles/Dockerfile.nvidia b/packaging/rpm/dockerfiles/Dockerfile.nvidia new file mode 100644 index 000000000..6462100fd --- /dev/null +++ b/packaging/rpm/dockerfiles/Dockerfile.nvidia @@ -0,0 +1,41 @@ +ARG BASE_IMAGE=nvcr.io/nvidia/cuda +ARG BASE_IMAGE_VERSION=12.4.1-devel-rockylinux8 + +FROM ${BASE_IMAGE}:${BASE_IMAGE_VERSION} + +# Install EPEL and RPM build tools +RUN yum install -y epel-release && \ + yum install -y \ + rpm-build \ + rpmdevtools \ + gcc-c++ \ + make \ + cmake \ + patchelf \ + json-devel \ + && yum clean all + +# Setup RPM build environment +RUN rpmdev-setuptree + +# Copy source code +WORKDIR /workspace +COPY . /workspace/ + +# Create source tarball +RUN tar czf /root/rpmbuild/SOURCES/flagcx-0.8.0.tar.gz \ + --transform 's,^\.,flagcx-0.8.0,' \ + --exclude='.git' \ + --exclude='build' \ + --exclude='debian-packages' \ + . + +# Build RPM with NVIDIA backend +RUN rpmbuild -ba \ + --define 'backend nvidia' \ + /workspace/packaging/rpm/specs/flagcx.spec + +# List built packages +RUN ls -lh /root/rpmbuild/RPMS/*/*.rpm + +CMD ["/bin/bash"] diff --git a/packaging/rpm/specs/flagcx.spec b/packaging/rpm/specs/flagcx.spec new file mode 100644 index 000000000..0012125b8 --- /dev/null +++ b/packaging/rpm/specs/flagcx.spec @@ -0,0 +1,221 @@ +%global debug_package %{nil} +%global _build_id_links none + +Name: flagcx +Version: 0.8.0 +Release: 1%{?dist} +Summary: FlagCX scalable cross-chip communication library + +License: ASL 2.0 +URL: https://github.com/flagos-ai/FlagCX +Source0: %{name}-%{version}.tar.gz + +BuildRequires: gcc-c++ +BuildRequires: make +BuildRequires: cmake +BuildRequires: patchelf +# nlohmann-json package name varies by distro +%if 0%{?rhel} == 8 +BuildRequires: json-devel +%else +BuildRequires: nlohmann-json-devel +%endif + +# Backend-specific packages will be built with different profiles +# This is the base spec, actual builds use --define 'backend nvidia|metax|ascend' + +%description +FlagCX is a scalable and adaptive cross-chip communication library. +It serves as a platform where developers, researchers, and AI engineers +can collaborate on various projects. + +%package -n libflagcx-nvidia +Summary: FlagCX library for NVIDIA GPUs +Requires: libnccl >= 2.0 + +%description -n libflagcx-nvidia +FlagCX communication library built for NVIDIA hardware with NCCL backend support. + +%package -n libflagcx-nvidia-devel +Summary: Development files for libflagcx-nvidia +Requires: libflagcx-nvidia = %{version}-%{release} + +%description -n libflagcx-nvidia-devel +Development files (headers and libraries) for libflagcx-nvidia. + +%package -n libflagcx-metax +Summary: FlagCX library for MetaX accelerators + +%description -n libflagcx-metax +FlagCX communication library built for MetaX hardware with MCCL backend support. + +%package -n libflagcx-metax-devel +Summary: Development files for libflagcx-metax +Requires: libflagcx-metax = %{version}-%{release} + +%description -n libflagcx-metax-devel +Development files (headers and libraries) for libflagcx-metax. + +%package -n libflagcx-ascend +Summary: FlagCX library for Ascend NPUs + +%description -n libflagcx-ascend +FlagCX communication library built for Huawei Ascend NPUs with HCCL backend support. + +%package -n libflagcx-ascend-devel +Summary: Development files for libflagcx-ascend +Requires: libflagcx-ascend = %{version}-%{release} + +%description -n libflagcx-ascend-devel +Development files (headers and libraries) for libflagcx-ascend. + +%prep +%setup -q + +%build +# Determine which backend to build based on RPM macro +%if "%{?backend}" == "nvidia" + make USE_NVIDIA=1 PREFIX=%{_prefix} +%endif + +%if "%{?backend}" == "metax" + make USE_METAX=1 PREFIX=%{_prefix} +%endif + +%if "%{?backend}" == "ascend" + make USE_ASCEND=1 PREFIX=%{_prefix} +%endif + +%install +rm -rf %{buildroot} + +%if "%{?backend}" == "nvidia" + # Install NVIDIA variant + install -d %{buildroot}%{_libdir} + + # Install library + install -m 755 build/lib/libflagcx.so %{buildroot}%{_libdir}/libflagcx.so.0 + + # Create symlinks + ln -s libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so + + install -d %{buildroot}%{_includedir}/flagcx + cp -r flagcx/include/* %{buildroot}%{_includedir}/flagcx/ + + # Fix RPATH and set SONAME + patchelf --remove-rpath %{buildroot}%{_libdir}/libflagcx.so.0 || true + patchelf --set-soname libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so.0 || true +%endif + +%if "%{?backend}" == "metax" + # Install MetaX variant + install -d %{buildroot}%{_libdir} + + # Install library + install -m 755 build/lib/libflagcx.so %{buildroot}%{_libdir}/libflagcx.so.0 + + # Create symlinks + ln -s libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so + + install -d %{buildroot}%{_includedir}/flagcx + cp -r flagcx/include/* %{buildroot}%{_includedir}/flagcx/ + + patchelf --remove-rpath %{buildroot}%{_libdir}/libflagcx.so.0 || true + patchelf --set-soname libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so.0 || true +%endif + +%if "%{?backend}" == "ascend" + # Install Ascend variant + install -d %{buildroot}%{_libdir} + + # Install library + install -m 755 build/lib/libflagcx.so %{buildroot}%{_libdir}/libflagcx.so.0 + + # Create symlinks + ln -s libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so + + install -d %{buildroot}%{_includedir}/flagcx + cp -r flagcx/include/* %{buildroot}%{_includedir}/flagcx/ + + patchelf --remove-rpath %{buildroot}%{_libdir}/libflagcx.so.0 || true + patchelf --set-soname libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so.0 || true +%endif + +%files -n libflagcx-nvidia +%if "%{?backend}" == "nvidia" +%license LICENSE +%{_libdir}/libflagcx.so.0 +%endif + +%files -n libflagcx-nvidia-devel +%if "%{?backend}" == "nvidia" +%{_includedir}/flagcx/ +%{_libdir}/libflagcx.so +%endif + +%files -n libflagcx-metax +%if "%{?backend}" == "metax" +%license LICENSE +%{_libdir}/libflagcx.so.0 +%endif + +%files -n libflagcx-metax-devel +%if "%{?backend}" == "metax" +%{_includedir}/flagcx/ +%{_libdir}/libflagcx.so +%endif + +%files -n libflagcx-ascend +%if "%{?backend}" == "ascend" +%license LICENSE +%{_libdir}/libflagcx.so.0 +%{_libdir}/libflagcx.so.%{version} +%endif + +%files -n libflagcx-ascend-devel +%if "%{?backend}" == "ascend" +%{_includedir}/flagcx/ +%{_libdir}/libflagcx.so +%endif + +%changelog +* Sat Nov 01 2025 FlagOS Contributors - 0.7-1 +- Added support to TsingMicro, including device adaptor tsmicroAdaptor and CCL adaptor tcclAdaptor. +- Implemented an experimental kernel-free non-reduce collective communication (SendRecv, AlltoAll, AlltoAllv, Broadcast, Gather, Scatter, AllGather) using device-buffer IPC/RDMA. +- Enabled auto-tuning on NVIDIA, MetaX, and Hygon platforms, achieving 1.02×–1.26× speedups for AllReduce, AllGather, ReduceScatter, and AlltoAll. +- Enhanced flagcxNetAdaptor with one-sided primitives (put, putSignal, waitValue) and added retransmission support for reliability improvement. + +* Wed Oct 01 2025 FlagOS Contributors - 0.6-1 +- Implemented device-buffer IPC communication to support intra-node SendRecv operations. +- Introduced device-initiated, host-launched device-side primitives, enabling kernel-based communication directly from devices. +- Enhanced auto-tuning with 50% performance improvement on MetaX platforms for the AllReduce operations. + +* Mon Sep 01 2025 FlagOS Contributors - 0.5-1 +- Added support for AMD GPUs, including a device adaptor hipAdaptor and a CCL adaptor rcclAdaptor. +- Introduced flagcxNetAdaptor to unify network backends, currently supporting socket, IBRC, UCX and IBUC (experimental). +- Enabled zero-copy device-buffer RDMA (user-buffer RDMA) to boost performance for small messages. +- Supported auto-tuning in homogeneous scenarios via flagcxTuner. +- Added test automation in CI/CD for PyTorch APIs. + +* Fri Aug 01 2025 FlagOS Contributors - 0.4-1 +- Supported heterogeneous training of ERNIE4.5 (Baidu) on NVIDIA and Iluvatar GPUs with Paddle + FlagCX. +- Improved heterogeneous communication across arbitrary NIC configurations, with more robust and flexible deployments. +- Introduced an experimental network plugin interface with extended supports for IBRC and SOCKET. Device buffer registration now can be done via DMA-BUF. +- Added an InterOp-level DSL to enable customized C2C algorithm design. +- Provided user documentation under docs/. + +* Tue Jul 01 2025 FlagOS Contributors - 0.3-1 +- Integrated three additional native communication libraries: HCCL (Huawei), MUSACCL (Moore Threads) and MPI. +- Enhanced heterogeneous collective communication operations with pipeline optimizations. +- Introduced device-side functions to enable device-buffer RDMA, complementing the existing host-side functions. +- Delivered a full-stack open-source solution, FlagScale + FlagCX, for efficient heterogeneous prefilling-decoding disaggregation. + +* Thu May 01 2025 FlagOS Contributors - 0.2-1 +- Integrated 3 additional native communications libraries, including MCCL (Moore Threads), XCCL (Mellanox) and DUCCL (BAAI). +- Improved 11 heterogeneous collective communication operations with automatic topology detection and full support to single-NIC and multi-NIC environments. + +* Tue Apr 01 2025 FlagOS Contributors - 0.1-1 +- Added 5 native communications libraries including CCL adaptors for NCCL (NVIDIA), IXCCL (Iluvatar), and CNCL (Cambricon), and Host CCL adaptors GLOO and Bootstrap. +- Supported 11 heterogeneous collective communication operations using the C2C (Cluster-to-Cluster) algorithm. +- Provided a full-stack open-source solution, FlagScale + FlagCX, for efficient heterogeneous training. +- Natively integrated into PaddlePaddle [v3.0.0](https://github.com/PaddlePaddle/Paddle/tree/v3.0.0), with support for both dynamic and static graphs. From 973b257950001a1db5cdc44e08a1be719118300a Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Mon, 9 Mar 2026 01:08:57 +0800 Subject: [PATCH 02/17] refactor(packaging): unify RPM spec and Dockerfile with parameterized backend - Rewrite flagcx.spec to use %{backend} dynamic package names, reducing 222 lines to ~90 lines and eliminating empty sub-packages - Merge three per-backend Dockerfiles into unified Dockerfile.rpm with BASE_IMAGE and BACKEND build args - Add MetaX RPM support via Rocky Linux 8 + MACA SDK yum repo - Extract version from spec via grep instead of hardcoding in Dockerfiles - Add --network=host to docker build for DNS reliability - Add PR-REVIEW-ISSUES.md for tracking remaining items --- .github/workflows/build-rpm.yml | 5 +- packaging/rpm/build-flagcx-rpm.sh | 20 ++- packaging/rpm/dockerfiles/Dockerfile.ascend | 41 ----- packaging/rpm/dockerfiles/Dockerfile.nvidia | 41 ----- packaging/rpm/dockerfiles/Dockerfile.rpm | 61 ++++++++ packaging/rpm/specs/flagcx.spec | 161 ++++---------------- 6 files changed, 109 insertions(+), 220 deletions(-) delete mode 100644 packaging/rpm/dockerfiles/Dockerfile.ascend delete mode 100644 packaging/rpm/dockerfiles/Dockerfile.nvidia create mode 100644 packaging/rpm/dockerfiles/Dockerfile.rpm diff --git a/.github/workflows/build-rpm.yml b/.github/workflows/build-rpm.yml index 881f8acb1..12f250cf4 100644 --- a/.github/workflows/build-rpm.yml +++ b/.github/workflows/build-rpm.yml @@ -14,13 +14,12 @@ on: jobs: build-rpm-packages: - runs-on: ubuntu-latest + runs-on: h20 strategy: fail-fast: false matrix: - backend: [nvidia, ascend] - # metax requires custom base image setup + backend: [nvidia, metax, ascend] steps: - name: Checkout repository diff --git a/packaging/rpm/build-flagcx-rpm.sh b/packaging/rpm/build-flagcx-rpm.sh index 49afbe7bd..89b24cd17 100755 --- a/packaging/rpm/build-flagcx-rpm.sh +++ b/packaging/rpm/build-flagcx-rpm.sh @@ -46,9 +46,8 @@ case "$BACKEND" in [ -z "$BASE_IMAGE_VERSION" ] && BASE_IMAGE_VERSION="12.4.1-devel-rockylinux8" ;; metax) - BASE_IMAGE="harbor.baai.ac.cn/flagbase/flagbase-metax" - [ -z "$BASE_IMAGE_VERSION" ] && BASE_IMAGE_VERSION="latest" - log_warn "MetaX RPM build may require custom base image with RPM tools" + BASE_IMAGE="rockylinux" + [ -z "$BASE_IMAGE_VERSION" ] && BASE_IMAGE_VERSION="8" ;; ascend) BASE_IMAGE="ascendai/cann" @@ -64,12 +63,23 @@ esac log_info "Building FlagCX RPM packages for $BACKEND backend" log_info "Using base image: ${BASE_IMAGE}:${BASE_IMAGE_VERSION}" -# Build Docker image +# Sync changelog from CHANGELOG.md +log_step "Synchronizing changelog..." +if [ -f "${PROJECT_DIR}/packaging/sync-changelog.py" ]; then + python3 "${PROJECT_DIR}/packaging/sync-changelog.py" || log_warn "Failed to sync changelog" +else + log_warn "sync-changelog.py not found, skipping changelog sync" +fi + +# Build Docker image using unified Dockerfile +DOCKERFILE="${SCRIPT_DIR}/dockerfiles/Dockerfile.rpm" log_step "Building Docker image..." docker build \ + --network=host \ --build-arg BASE_IMAGE="${BASE_IMAGE}" \ --build-arg BASE_IMAGE_VERSION="${BASE_IMAGE_VERSION}" \ - -f "${SCRIPT_DIR}/dockerfiles/Dockerfile.${BACKEND}" \ + --build-arg BACKEND="${BACKEND}" \ + -f "${DOCKERFILE}" \ -t "flagcx-rpm-${BACKEND}:${BASE_IMAGE_VERSION}" \ "${PROJECT_DIR}" diff --git a/packaging/rpm/dockerfiles/Dockerfile.ascend b/packaging/rpm/dockerfiles/Dockerfile.ascend deleted file mode 100644 index 338c0e914..000000000 --- a/packaging/rpm/dockerfiles/Dockerfile.ascend +++ /dev/null @@ -1,41 +0,0 @@ -ARG BASE_IMAGE=ascendai/cann -ARG BASE_IMAGE_VERSION=8.5.0-910-openeuler24.03-py3.11 - -FROM ${BASE_IMAGE}:${BASE_IMAGE_VERSION} - -# Install RPM build tools -# OpenEuler uses different package names -RUN yum install -y \ - rpm-build \ - rpmdevtools \ - gcc-c++ \ - make \ - cmake \ - patchelf \ - nlohmann-json-devel \ - && yum clean all - -# Setup RPM build environment -RUN rpmdev-setuptree - -# Copy source code -WORKDIR /workspace -COPY . /workspace/ - -# Create source tarball -RUN tar czf /root/rpmbuild/SOURCES/flagcx-0.8.0.tar.gz \ - --transform 's,^\.,flagcx-0.8.0,' \ - --exclude='.git' \ - --exclude='build' \ - --exclude='debian-packages' \ - . - -# Build RPM with Ascend backend -RUN rpmbuild -ba \ - --define 'backend ascend' \ - /workspace/packaging/rpm/specs/flagcx.spec - -# List built packages -RUN ls -lh /root/rpmbuild/RPMS/*/*.rpm - -CMD ["/bin/bash"] diff --git a/packaging/rpm/dockerfiles/Dockerfile.nvidia b/packaging/rpm/dockerfiles/Dockerfile.nvidia deleted file mode 100644 index 6462100fd..000000000 --- a/packaging/rpm/dockerfiles/Dockerfile.nvidia +++ /dev/null @@ -1,41 +0,0 @@ -ARG BASE_IMAGE=nvcr.io/nvidia/cuda -ARG BASE_IMAGE_VERSION=12.4.1-devel-rockylinux8 - -FROM ${BASE_IMAGE}:${BASE_IMAGE_VERSION} - -# Install EPEL and RPM build tools -RUN yum install -y epel-release && \ - yum install -y \ - rpm-build \ - rpmdevtools \ - gcc-c++ \ - make \ - cmake \ - patchelf \ - json-devel \ - && yum clean all - -# Setup RPM build environment -RUN rpmdev-setuptree - -# Copy source code -WORKDIR /workspace -COPY . /workspace/ - -# Create source tarball -RUN tar czf /root/rpmbuild/SOURCES/flagcx-0.8.0.tar.gz \ - --transform 's,^\.,flagcx-0.8.0,' \ - --exclude='.git' \ - --exclude='build' \ - --exclude='debian-packages' \ - . - -# Build RPM with NVIDIA backend -RUN rpmbuild -ba \ - --define 'backend nvidia' \ - /workspace/packaging/rpm/specs/flagcx.spec - -# List built packages -RUN ls -lh /root/rpmbuild/RPMS/*/*.rpm - -CMD ["/bin/bash"] diff --git a/packaging/rpm/dockerfiles/Dockerfile.rpm b/packaging/rpm/dockerfiles/Dockerfile.rpm new file mode 100644 index 000000000..dde3ac9b5 --- /dev/null +++ b/packaging/rpm/dockerfiles/Dockerfile.rpm @@ -0,0 +1,61 @@ +# Unified Dockerfile to build RPM packages for FlagCX +# Supports multiple backends via build arguments + +ARG BASE_IMAGE +ARG BASE_IMAGE_VERSION + +FROM ${BASE_IMAGE}:${BASE_IMAGE_VERSION} + +ARG BACKEND + +# Install RPM build tools and dependencies +# Handles differences between RHEL/Rocky (epel + json-devel) and +# OpenEuler (nlohmann-json-devel), and MetaX SDK repo setup. +RUN yum install -y epel-release 2>/dev/null || true +RUN yum install -y \ + rpm-build \ + rpmdevtools \ + gcc-c++ \ + make \ + cmake \ + patchelf \ + && yum clean all +RUN yum install -y json-devel 2>/dev/null \ + || yum install -y nlohmann-json-devel 2>/dev/null \ + || true + +# MetaX-specific: configure MACA SDK yum repository and install SDK +RUN if [ "${BACKEND}" = "metax" ]; then \ + printf '[maca-sdk]\nname=MACA SDK Yum Repository\nbaseurl=https://repos.metax-tech.com/r/maca-sdk-rpm-x86_64/\nenabled=1\ngpgcheck=0\n' \ + > /etc/yum.repos.d/maca-sdk-rpm.repo && \ + yum makecache && \ + yum install -y maca_sdk && \ + yum clean all; \ + fi + +# Setup RPM build environment +RUN rpmdev-setuptree + +# Copy source code +WORKDIR /workspace +COPY . /workspace/ + +# Read version from spec and create source tarball +RUN SPEC_VERSION=$(grep '^Version:' /workspace/packaging/rpm/specs/flagcx.spec | awk '{print $2}') && \ + tar czf /root/rpmbuild/SOURCES/flagcx-${SPEC_VERSION}.tar.gz \ + --transform "s,^\.,flagcx-${SPEC_VERSION}," \ + --exclude='.git' \ + --exclude='build' \ + --exclude='debian-packages' \ + --exclude='rpm-packages' \ + . + +# Build RPM with specified backend +RUN rpmbuild -ba \ + --define "backend ${BACKEND}" \ + /workspace/packaging/rpm/specs/flagcx.spec + +# List built packages +RUN ls -lh /root/rpmbuild/RPMS/*/*.rpm + +CMD ["/bin/bash"] diff --git a/packaging/rpm/specs/flagcx.spec b/packaging/rpm/specs/flagcx.spec index 0012125b8..9d63c2c2c 100644 --- a/packaging/rpm/specs/flagcx.spec +++ b/packaging/rpm/specs/flagcx.spec @@ -1,6 +1,12 @@ %global debug_package %{nil} %global _build_id_links none +# Backend must be specified via: rpmbuild --define 'backend nvidia|metax|ascend' +%{!?backend: %{error: backend must be defined (nvidia, metax, or ascend)}} + +# Derive uppercase backend name for make flag (USE_NVIDIA=1, etc.) +%global backend_upper %(echo %{backend} | tr a-z A-Z) + Name: flagcx Version: 0.8.0 Release: 1%{?dist} @@ -21,162 +27,57 @@ BuildRequires: json-devel BuildRequires: nlohmann-json-devel %endif -# Backend-specific packages will be built with different profiles -# This is the base spec, actual builds use --define 'backend nvidia|metax|ascend' - %description FlagCX is a scalable and adaptive cross-chip communication library. It serves as a platform where developers, researchers, and AI engineers can collaborate on various projects. -%package -n libflagcx-nvidia -Summary: FlagCX library for NVIDIA GPUs +# Only the target backend's subpackages are defined +%package -n libflagcx-%{backend} +Summary: FlagCX library for %{backend} +%if "%{backend}" == "nvidia" Requires: libnccl >= 2.0 +%endif -%description -n libflagcx-nvidia -FlagCX communication library built for NVIDIA hardware with NCCL backend support. - -%package -n libflagcx-nvidia-devel -Summary: Development files for libflagcx-nvidia -Requires: libflagcx-nvidia = %{version}-%{release} - -%description -n libflagcx-nvidia-devel -Development files (headers and libraries) for libflagcx-nvidia. - -%package -n libflagcx-metax -Summary: FlagCX library for MetaX accelerators - -%description -n libflagcx-metax -FlagCX communication library built for MetaX hardware with MCCL backend support. - -%package -n libflagcx-metax-devel -Summary: Development files for libflagcx-metax -Requires: libflagcx-metax = %{version}-%{release} - -%description -n libflagcx-metax-devel -Development files (headers and libraries) for libflagcx-metax. - -%package -n libflagcx-ascend -Summary: FlagCX library for Ascend NPUs - -%description -n libflagcx-ascend -FlagCX communication library built for Huawei Ascend NPUs with HCCL backend support. +%description -n libflagcx-%{backend} +FlagCX communication library built for %{backend} hardware. -%package -n libflagcx-ascend-devel -Summary: Development files for libflagcx-ascend -Requires: libflagcx-ascend = %{version}-%{release} +%package -n libflagcx-%{backend}-devel +Summary: Development files for libflagcx-%{backend} +Requires: libflagcx-%{backend} = %{version}-%{release} -%description -n libflagcx-ascend-devel -Development files (headers and libraries) for libflagcx-ascend. +%description -n libflagcx-%{backend}-devel +Development files (headers and libraries) for libflagcx-%{backend}. %prep %setup -q %build -# Determine which backend to build based on RPM macro -%if "%{?backend}" == "nvidia" - make USE_NVIDIA=1 PREFIX=%{_prefix} -%endif - -%if "%{?backend}" == "metax" - make USE_METAX=1 PREFIX=%{_prefix} -%endif - -%if "%{?backend}" == "ascend" - make USE_ASCEND=1 PREFIX=%{_prefix} -%endif +make USE_%{backend_upper}=1 PREFIX=%{_prefix} %install rm -rf %{buildroot} -%if "%{?backend}" == "nvidia" - # Install NVIDIA variant - install -d %{buildroot}%{_libdir} - - # Install library - install -m 755 build/lib/libflagcx.so %{buildroot}%{_libdir}/libflagcx.so.0 - - # Create symlinks - ln -s libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so - - install -d %{buildroot}%{_includedir}/flagcx - cp -r flagcx/include/* %{buildroot}%{_includedir}/flagcx/ - - # Fix RPATH and set SONAME - patchelf --remove-rpath %{buildroot}%{_libdir}/libflagcx.so.0 || true - patchelf --set-soname libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so.0 || true -%endif - -%if "%{?backend}" == "metax" - # Install MetaX variant - install -d %{buildroot}%{_libdir} - - # Install library - install -m 755 build/lib/libflagcx.so %{buildroot}%{_libdir}/libflagcx.so.0 - - # Create symlinks - ln -s libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so +# Install shared library +install -d %{buildroot}%{_libdir} +install -m 755 build/lib/libflagcx.so %{buildroot}%{_libdir}/libflagcx.so.0 +ln -s libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so - install -d %{buildroot}%{_includedir}/flagcx - cp -r flagcx/include/* %{buildroot}%{_includedir}/flagcx/ +# Install headers +install -d %{buildroot}%{_includedir}/flagcx +cp -r flagcx/include/* %{buildroot}%{_includedir}/flagcx/ - patchelf --remove-rpath %{buildroot}%{_libdir}/libflagcx.so.0 || true - patchelf --set-soname libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so.0 || true -%endif - -%if "%{?backend}" == "ascend" - # Install Ascend variant - install -d %{buildroot}%{_libdir} - - # Install library - install -m 755 build/lib/libflagcx.so %{buildroot}%{_libdir}/libflagcx.so.0 - - # Create symlinks - ln -s libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so - - install -d %{buildroot}%{_includedir}/flagcx - cp -r flagcx/include/* %{buildroot}%{_includedir}/flagcx/ - - patchelf --remove-rpath %{buildroot}%{_libdir}/libflagcx.so.0 || true - patchelf --set-soname libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so.0 || true -%endif +# Fix RPATH and set SONAME +patchelf --remove-rpath %{buildroot}%{_libdir}/libflagcx.so.0 || true +patchelf --set-soname libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so.0 || true -%files -n libflagcx-nvidia -%if "%{?backend}" == "nvidia" +%files -n libflagcx-%{backend} %license LICENSE %{_libdir}/libflagcx.so.0 -%endif -%files -n libflagcx-nvidia-devel -%if "%{?backend}" == "nvidia" +%files -n libflagcx-%{backend}-devel %{_includedir}/flagcx/ %{_libdir}/libflagcx.so -%endif - -%files -n libflagcx-metax -%if "%{?backend}" == "metax" -%license LICENSE -%{_libdir}/libflagcx.so.0 -%endif - -%files -n libflagcx-metax-devel -%if "%{?backend}" == "metax" -%{_includedir}/flagcx/ -%{_libdir}/libflagcx.so -%endif - -%files -n libflagcx-ascend -%if "%{?backend}" == "ascend" -%license LICENSE -%{_libdir}/libflagcx.so.0 -%{_libdir}/libflagcx.so.%{version} -%endif - -%files -n libflagcx-ascend-devel -%if "%{?backend}" == "ascend" -%{_includedir}/flagcx/ -%{_libdir}/libflagcx.so -%endif %changelog * Sat Nov 01 2025 FlagOS Contributors - 0.7-1 From 4c7340f34384090e2fa253aa65f55484080dc0e2 Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Sun, 22 Mar 2026 21:29:05 +0800 Subject: [PATCH 03/17] ci(build): remove Docker Buildx setup from RPM workflow The Docker Buildx setup step was removed from the GitHub Actions workflow for building RPM packages. This suggests that the RPM build process no longer requires Docker Buildx capabilities, potentially simplifying the build environment or moving to a different build approach. --- .github/workflows/build-rpm.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/build-rpm.yml b/.github/workflows/build-rpm.yml index 12f250cf4..a27415b0f 100644 --- a/.github/workflows/build-rpm.yml +++ b/.github/workflows/build-rpm.yml @@ -27,9 +27,6 @@ jobs: with: submodules: recursive - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Build ${{ matrix.backend }} RPM packages run: ./packaging/rpm/build-flagcx-rpm.sh ${{ matrix.backend }} From e38bfb5b1d88ada6d331b1e8ebef198e2551c8ec Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Thu, 21 May 2026 21:22:48 +0900 Subject: [PATCH 04/17] fix(packaging): fail loud on missing JSON dep; verify RPM artifacts; split RHEL 9 spec path --- packaging/rpm/build-flagcx-rpm.sh | 10 ++++++++-- packaging/rpm/dockerfiles/Dockerfile.rpm | 2 +- packaging/rpm/specs/flagcx.spec | 12 +++++++++++- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/packaging/rpm/build-flagcx-rpm.sh b/packaging/rpm/build-flagcx-rpm.sh index 89b24cd17..22328a967 100755 --- a/packaging/rpm/build-flagcx-rpm.sh +++ b/packaging/rpm/build-flagcx-rpm.sh @@ -89,10 +89,16 @@ OUTPUT_DIR="${PROJECT_DIR}/rpm-packages/${BACKEND}" mkdir -p "${OUTPUT_DIR}" CONTAINER_ID=$(docker create "flagcx-rpm-${BACKEND}:${BASE_IMAGE_VERSION}") -docker cp "${CONTAINER_ID}:/root/rpmbuild/RPMS/" "${OUTPUT_DIR}/" 2>/dev/null || true -docker cp "${CONTAINER_ID}:/root/rpmbuild/SRPMS/" "${OUTPUT_DIR}/" 2>/dev/null || true +docker cp "${CONTAINER_ID}:/root/rpmbuild/RPMS/" "${OUTPUT_DIR}/" +docker cp "${CONTAINER_ID}:/root/rpmbuild/SRPMS/" "${OUTPUT_DIR}/" docker rm "${CONTAINER_ID}" +# Fail loudly if no RPMs were extracted, so CI doesn't silently upload empty artifacts. +if ! find "${OUTPUT_DIR}" -name '*.rpm' | grep -q .; then + log_error "No RPM packages found under ${OUTPUT_DIR}" + exit 1 +fi + log_info "✓ Packages built successfully for ${BACKEND}:" echo "" find "${OUTPUT_DIR}" -name "*.rpm" -exec ls -lh {} \; diff --git a/packaging/rpm/dockerfiles/Dockerfile.rpm b/packaging/rpm/dockerfiles/Dockerfile.rpm index dde3ac9b5..0d96ffcc1 100644 --- a/packaging/rpm/dockerfiles/Dockerfile.rpm +++ b/packaging/rpm/dockerfiles/Dockerfile.rpm @@ -22,7 +22,7 @@ RUN yum install -y \ && yum clean all RUN yum install -y json-devel 2>/dev/null \ || yum install -y nlohmann-json-devel 2>/dev/null \ - || true + || { echo "ERROR: neither json-devel nor nlohmann-json-devel is available; rpmbuild requires nlohmann::json headers" >&2; exit 1; } # MetaX-specific: configure MACA SDK yum repository and install SDK RUN if [ "${BACKEND}" = "metax" ]; then \ diff --git a/packaging/rpm/specs/flagcx.spec b/packaging/rpm/specs/flagcx.spec index 9d63c2c2c..596cbadff 100644 --- a/packaging/rpm/specs/flagcx.spec +++ b/packaging/rpm/specs/flagcx.spec @@ -20,11 +20,21 @@ BuildRequires: gcc-c++ BuildRequires: make BuildRequires: cmake BuildRequires: patchelf -# nlohmann-json package name varies by distro +# nlohmann-json package name varies by distro: +# - RHEL/Rocky 8 (via EPEL): json-devel +# - RHEL/Rocky 9 (via EPEL): nlohmann-json-devel +# - OpenEuler / others: nlohmann-json-devel (fallback) +# TODO: verify Rocky 9 / RHEL 9 build path end-to-end; the EPEL 9 package +# name is nlohmann-json-devel, but this has only been smoke-tested. %if 0%{?rhel} == 8 BuildRequires: json-devel %else +%if 0%{?rhel} >= 9 BuildRequires: nlohmann-json-devel +%else +# Non-RHEL (OpenEuler, etc.) – assume upstream nlohmann-json-devel package name. +BuildRequires: nlohmann-json-devel +%endif %endif %description From 0ecf806119e4a69e51928818bd2e96bf2ff91846 Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Thu, 21 May 2026 22:55:02 +0900 Subject: [PATCH 05/17] fix(packaging): drop '|| true' on dh_shlibdeps and gate make-clean on Makefile presence --- packaging/debian/rules | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/packaging/debian/rules b/packaging/debian/rules index a80459541..a920875fa 100755 --- a/packaging/debian/rules +++ b/packaging/debian/rules @@ -17,7 +17,12 @@ BUILD_DIR_NVIDIA = $(CURDIR)/build-nvidia override_dh_auto_clean: dh_auto_clean rm -rf $(BUILD_DIR_METAX) $(BUILD_DIR_NVIDIA) - $(MAKE) clean || true + # Only invoke upstream Makefile clean when a Makefile is present + # (first build before configure may have none); avoids `|| true` + # that would otherwise hide real errors. +ifneq ($(wildcard Makefile),) + $(MAKE) clean +endif override_dh_auto_build: ifeq ($(FLAGCX_BUILD_BACKEND),nvidia) @@ -125,4 +130,4 @@ override_dh_dwz: override_dh_shlibdeps: # Ignore missing CUDA/vendor library dependencies # These are provided by vendor-specific runtime packages (e.g., CUDA runtime, MACA runtime) - dh_shlibdeps --dpkg-shlibdeps-params=--ignore-missing-info || true + dh_shlibdeps --dpkg-shlibdeps-params=--ignore-missing-info From bf3b4035ee2c12c5e88870da6e79f7acc3c9a7a8 Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Thu, 21 May 2026 22:58:32 +0900 Subject: [PATCH 06/17] fix(packaging): preserve stderr/aggregate failures; fail loud on patchelf --- packaging/debian/build-helpers/test-nexus-upload.sh | 11 +++++++++-- packaging/rpm/dockerfiles/Dockerfile.rpm | 2 +- packaging/rpm/specs/flagcx.spec | 7 ++++--- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/packaging/debian/build-helpers/test-nexus-upload.sh b/packaging/debian/build-helpers/test-nexus-upload.sh index ef47eed58..104ef305b 100755 --- a/packaging/debian/build-helpers/test-nexus-upload.sh +++ b/packaging/debian/build-helpers/test-nexus-upload.sh @@ -118,8 +118,15 @@ case "$BACKEND" in upload_backend "metax" ;; all) - upload_backend "nvidia" || true - upload_backend "metax" || true + # Run both backends regardless of individual failures, but surface + # an aggregate non-zero exit so CI doesn't silently pass. + fail=0 + upload_backend "nvidia" || fail=1 + upload_backend "metax" || fail=1 + if [ "$fail" -ne 0 ]; then + log_error "One or more backend uploads failed" + exit 1 + fi ;; *) log_error "Invalid backend: $BACKEND" diff --git a/packaging/rpm/dockerfiles/Dockerfile.rpm b/packaging/rpm/dockerfiles/Dockerfile.rpm index 0d96ffcc1..36a2edd4f 100644 --- a/packaging/rpm/dockerfiles/Dockerfile.rpm +++ b/packaging/rpm/dockerfiles/Dockerfile.rpm @@ -11,7 +11,7 @@ ARG BACKEND # Install RPM build tools and dependencies # Handles differences between RHEL/Rocky (epel + json-devel) and # OpenEuler (nlohmann-json-devel), and MetaX SDK repo setup. -RUN yum install -y epel-release 2>/dev/null || true +RUN yum install -y epel-release || echo "EPEL not available (likely OpenEuler), continuing without it" RUN yum install -y \ rpm-build \ rpmdevtools \ diff --git a/packaging/rpm/specs/flagcx.spec b/packaging/rpm/specs/flagcx.spec index 596cbadff..88f90fcfc 100644 --- a/packaging/rpm/specs/flagcx.spec +++ b/packaging/rpm/specs/flagcx.spec @@ -77,9 +77,10 @@ ln -s libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so install -d %{buildroot}%{_includedir}/flagcx cp -r flagcx/include/* %{buildroot}%{_includedir}/flagcx/ -# Fix RPATH and set SONAME -patchelf --remove-rpath %{buildroot}%{_libdir}/libflagcx.so.0 || true -patchelf --set-soname libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so.0 || true +# Fix RPATH and set SONAME — fail loud if patchelf can't normalize the .so, +# otherwise a misconfigured SONAME ships and crashes consumers at runtime. +patchelf --remove-rpath %{buildroot}%{_libdir}/libflagcx.so.0 +patchelf --set-soname libflagcx.so.0 %{buildroot}%{_libdir}/libflagcx.so.0 %files -n libflagcx-%{backend} %license LICENSE From 7fd1bf3e761b106d755790e96861f1925614f0af Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Thu, 21 May 2026 23:33:41 +0900 Subject: [PATCH 07/17] fix(packaging): switch RPM License to SPDX Apache-2.0 (was legacy 'ASL 2.0') --- packaging/rpm/specs/flagcx.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/rpm/specs/flagcx.spec b/packaging/rpm/specs/flagcx.spec index 88f90fcfc..0768ed906 100644 --- a/packaging/rpm/specs/flagcx.spec +++ b/packaging/rpm/specs/flagcx.spec @@ -12,7 +12,7 @@ Version: 0.8.0 Release: 1%{?dist} Summary: FlagCX scalable cross-chip communication library -License: ASL 2.0 +License: Apache-2.0 URL: https://github.com/flagos-ai/FlagCX Source0: %{name}-%{version}.tar.gz From 22e8d6d55c122dbb805eba1bf0ae596746a5f3a1 Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Thu, 21 May 2026 23:37:59 +0900 Subject: [PATCH 08/17] fix(packaging): allow empty main %files; Standards-Version 4.6.2; TODOs for libnccl floor and MetaX GPG --- packaging/debian/control | 2 +- packaging/rpm/dockerfiles/Dockerfile.rpm | 3 +++ packaging/rpm/specs/flagcx.spec | 7 +++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/packaging/debian/control b/packaging/debian/control index 26dc89c17..e22cb20ba 100644 --- a/packaging/debian/control +++ b/packaging/debian/control @@ -7,7 +7,7 @@ Build-Depends: debhelper-compat (= 13), make, cmake, patchelf -Standards-Version: 4.6.0 +Standards-Version: 4.6.2 Homepage: https://github.com/flagos-ai/FlagCX Vcs-Browser: https://github.com/flagos-ai/FlagCX Vcs-Git: https://github.com/flagos-ai/FlagCX.git diff --git a/packaging/rpm/dockerfiles/Dockerfile.rpm b/packaging/rpm/dockerfiles/Dockerfile.rpm index 36a2edd4f..8998cf7d3 100644 --- a/packaging/rpm/dockerfiles/Dockerfile.rpm +++ b/packaging/rpm/dockerfiles/Dockerfile.rpm @@ -25,6 +25,9 @@ RUN yum install -y json-devel 2>/dev/null \ || { echo "ERROR: neither json-devel nor nlohmann-json-devel is available; rpmbuild requires nlohmann::json headers" >&2; exit 1; } # MetaX-specific: configure MACA SDK yum repository and install SDK +# TODO: switch gpgcheck=1 once MetaX publishes a stable GPG key for +# repos.metax-tech.com. Today this repo serves unsigned packages; gpgcheck=1 +# + gpgkey= would just fail. Tracked as a known security gap. RUN if [ "${BACKEND}" = "metax" ]; then \ printf '[maca-sdk]\nname=MACA SDK Yum Repository\nbaseurl=https://repos.metax-tech.com/r/maca-sdk-rpm-x86_64/\nenabled=1\ngpgcheck=0\n' \ > /etc/yum.repos.d/maca-sdk-rpm.repo && \ diff --git a/packaging/rpm/specs/flagcx.spec b/packaging/rpm/specs/flagcx.spec index 0768ed906..1d2416a3b 100644 --- a/packaging/rpm/specs/flagcx.spec +++ b/packaging/rpm/specs/flagcx.spec @@ -1,5 +1,9 @@ %global debug_package %{nil} %global _build_id_links none +# Main "flagcx" package intentionally has no %files of its own; all artifacts +# live in the libflagcx-%{backend}{,-devel} subpackages. Without this guard, +# rpmbuild treats an empty main package manifest as an error. +%global _empty_manifest_terminate_build 0 # Backend must be specified via: rpmbuild --define 'backend nvidia|metax|ascend' %{!?backend: %{error: backend must be defined (nvidia, metax, or ascend)}} @@ -46,6 +50,9 @@ can collaborate on various projects. %package -n libflagcx-%{backend} Summary: FlagCX library for %{backend} %if "%{backend}" == "nvidia" +# TODO: tighten libnccl lower bound. FlagCX's NCCL adaptor likely needs +# >= 2.18 (group-call API, ncclConfig changes) but the exact floor has +# not yet been confirmed against the source tree. Requires: libnccl >= 2.0 %endif From 0070429ee4f81ca441c1e95560cce6f2e9c52c43 Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Fri, 22 May 2026 17:29:54 +0900 Subject: [PATCH 09/17] fix(packaging): qualify RPM Source0 with %{url} (round-7 batch missed flagcx) --- packaging/rpm/specs/flagcx.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/rpm/specs/flagcx.spec b/packaging/rpm/specs/flagcx.spec index 1d2416a3b..87a96e68e 100644 --- a/packaging/rpm/specs/flagcx.spec +++ b/packaging/rpm/specs/flagcx.spec @@ -18,7 +18,7 @@ Summary: FlagCX scalable cross-chip communication library License: Apache-2.0 URL: https://github.com/flagos-ai/FlagCX -Source0: %{name}-%{version}.tar.gz +Source0: %{url}/archive/v%{version}/%{name}-%{version}.tar.gz BuildRequires: gcc-c++ BuildRequires: make From 6211013af107faf8a3e1cf2f73172f1c12fcc94f Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Fri, 22 May 2026 17:38:37 +0900 Subject: [PATCH 10/17] fix(packaging): tighten libnccl >= 2.10 (group-call API minimum); Source0 fedora-style --- packaging/rpm/specs/flagcx.spec | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packaging/rpm/specs/flagcx.spec b/packaging/rpm/specs/flagcx.spec index 87a96e68e..84b81ab4e 100644 --- a/packaging/rpm/specs/flagcx.spec +++ b/packaging/rpm/specs/flagcx.spec @@ -18,7 +18,7 @@ Summary: FlagCX scalable cross-chip communication library License: Apache-2.0 URL: https://github.com/flagos-ai/FlagCX -Source0: %{url}/archive/v%{version}/%{name}-%{version}.tar.gz +Source0: %{url}/archive/refs/tags/v%{version}.tar.gz#/%{name}-%{version}.tar.gz BuildRequires: gcc-c++ BuildRequires: make @@ -50,10 +50,10 @@ can collaborate on various projects. %package -n libflagcx-%{backend} Summary: FlagCX library for %{backend} %if "%{backend}" == "nvidia" -# TODO: tighten libnccl lower bound. FlagCX's NCCL adaptor likely needs -# >= 2.18 (group-call API, ncclConfig changes) but the exact floor has -# not yet been confirmed against the source tree. -Requires: libnccl >= 2.0 +# Group-call API arrived in NCCL 2.10; ncclConfig appeared in 2.14. +# 2.10 is the practical minimum for FlagCX's adaptor today; bump to 2.14 +# once we confirm ncclConfig is actually exercised. +Requires: libnccl >= 2.10 %endif %description -n libflagcx-%{backend} From 0a11c914274978f002064c33318ce18f5569879d Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Fri, 22 May 2026 18:05:02 +0900 Subject: [PATCH 11/17] fix(packaging): ExclusiveArch by backend (ascend=aarch64, others=x86_64); avoid producing arch-mislabeled rpm --- packaging/rpm/specs/flagcx.spec | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/packaging/rpm/specs/flagcx.spec b/packaging/rpm/specs/flagcx.spec index 84b81ab4e..05870717c 100644 --- a/packaging/rpm/specs/flagcx.spec +++ b/packaging/rpm/specs/flagcx.spec @@ -11,6 +11,16 @@ # Derive uppercase backend name for make flag (USE_NVIDIA=1, etc.) %global backend_upper %(echo %{backend} | tr a-z A-Z) +# Pin build/install arch by backend: Ascend NPU hosts are aarch64, +# everything else (NVIDIA / MetaX / etc.) is x86_64. ExclusiveArch +# makes rpmbuild refuse to even start on a mismatched host, which is +# safer than producing a CPU-arch-mislabeled rpm. +%if "%{backend}" == "ascend" +ExclusiveArch: aarch64 +%else +ExclusiveArch: x86_64 +%endif + Name: flagcx Version: 0.8.0 Release: 1%{?dist} From 700fa49cdcdcf82cfd9a8e0c71c3176e5dbcdef2 Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Tue, 26 May 2026 18:31:45 +0900 Subject: [PATCH 12/17] fix(packaging): drop Debian changes from RPM PR --- packaging/debian/build-helpers/test-nexus-upload.sh | 11 ++--------- packaging/debian/control | 2 +- packaging/debian/rules | 9 ++------- 3 files changed, 5 insertions(+), 17 deletions(-) diff --git a/packaging/debian/build-helpers/test-nexus-upload.sh b/packaging/debian/build-helpers/test-nexus-upload.sh index 104ef305b..ef47eed58 100755 --- a/packaging/debian/build-helpers/test-nexus-upload.sh +++ b/packaging/debian/build-helpers/test-nexus-upload.sh @@ -118,15 +118,8 @@ case "$BACKEND" in upload_backend "metax" ;; all) - # Run both backends regardless of individual failures, but surface - # an aggregate non-zero exit so CI doesn't silently pass. - fail=0 - upload_backend "nvidia" || fail=1 - upload_backend "metax" || fail=1 - if [ "$fail" -ne 0 ]; then - log_error "One or more backend uploads failed" - exit 1 - fi + upload_backend "nvidia" || true + upload_backend "metax" || true ;; *) log_error "Invalid backend: $BACKEND" diff --git a/packaging/debian/control b/packaging/debian/control index e22cb20ba..26dc89c17 100644 --- a/packaging/debian/control +++ b/packaging/debian/control @@ -7,7 +7,7 @@ Build-Depends: debhelper-compat (= 13), make, cmake, patchelf -Standards-Version: 4.6.2 +Standards-Version: 4.6.0 Homepage: https://github.com/flagos-ai/FlagCX Vcs-Browser: https://github.com/flagos-ai/FlagCX Vcs-Git: https://github.com/flagos-ai/FlagCX.git diff --git a/packaging/debian/rules b/packaging/debian/rules index a920875fa..a80459541 100755 --- a/packaging/debian/rules +++ b/packaging/debian/rules @@ -17,12 +17,7 @@ BUILD_DIR_NVIDIA = $(CURDIR)/build-nvidia override_dh_auto_clean: dh_auto_clean rm -rf $(BUILD_DIR_METAX) $(BUILD_DIR_NVIDIA) - # Only invoke upstream Makefile clean when a Makefile is present - # (first build before configure may have none); avoids `|| true` - # that would otherwise hide real errors. -ifneq ($(wildcard Makefile),) - $(MAKE) clean -endif + $(MAKE) clean || true override_dh_auto_build: ifeq ($(FLAGCX_BUILD_BACKEND),nvidia) @@ -130,4 +125,4 @@ override_dh_dwz: override_dh_shlibdeps: # Ignore missing CUDA/vendor library dependencies # These are provided by vendor-specific runtime packages (e.g., CUDA runtime, MACA runtime) - dh_shlibdeps --dpkg-shlibdeps-params=--ignore-missing-info + dh_shlibdeps --dpkg-shlibdeps-params=--ignore-missing-info || true From b834b29a6951f5b967cd3bf707fd9d26b3dea09b Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Tue, 26 May 2026 18:43:12 +0900 Subject: [PATCH 13/17] refactor(packaging): split RPM Dockerfiles by backend --- packaging/rpm/build-flagcx-rpm.sh | 8 +-- packaging/rpm/dockerfiles/Dockerfile.rpm | 64 ------------------- .../rpm/dockerfiles/Dockerfile.rpm.ascend | 11 ++++ .../rpm/dockerfiles/Dockerfile.rpm.metax | 20 ++++++ .../rpm/dockerfiles/Dockerfile.rpm.nvidia | 11 ++++ packaging/rpm/dockerfiles/build-rpm-common.sh | 58 +++++++++++++++++ 6 files changed, 104 insertions(+), 68 deletions(-) delete mode 100644 packaging/rpm/dockerfiles/Dockerfile.rpm create mode 100644 packaging/rpm/dockerfiles/Dockerfile.rpm.ascend create mode 100644 packaging/rpm/dockerfiles/Dockerfile.rpm.metax create mode 100644 packaging/rpm/dockerfiles/Dockerfile.rpm.nvidia create mode 100755 packaging/rpm/dockerfiles/build-rpm-common.sh diff --git a/packaging/rpm/build-flagcx-rpm.sh b/packaging/rpm/build-flagcx-rpm.sh index 22328a967..a4216bfb0 100755 --- a/packaging/rpm/build-flagcx-rpm.sh +++ b/packaging/rpm/build-flagcx-rpm.sh @@ -44,14 +44,17 @@ case "$BACKEND" in nvidia) BASE_IMAGE="nvcr.io/nvidia/cuda" [ -z "$BASE_IMAGE_VERSION" ] && BASE_IMAGE_VERSION="12.4.1-devel-rockylinux8" + DOCKERFILE="${SCRIPT_DIR}/dockerfiles/Dockerfile.rpm.nvidia" ;; metax) BASE_IMAGE="rockylinux" [ -z "$BASE_IMAGE_VERSION" ] && BASE_IMAGE_VERSION="8" + DOCKERFILE="${SCRIPT_DIR}/dockerfiles/Dockerfile.rpm.metax" ;; ascend) BASE_IMAGE="ascendai/cann" [ -z "$BASE_IMAGE_VERSION" ] && BASE_IMAGE_VERSION="8.5.0-910-openeuler24.03-py3.11" + DOCKERFILE="${SCRIPT_DIR}/dockerfiles/Dockerfile.rpm.ascend" ;; *) log_error "Invalid backend: $BACKEND" @@ -71,14 +74,11 @@ else log_warn "sync-changelog.py not found, skipping changelog sync" fi -# Build Docker image using unified Dockerfile -DOCKERFILE="${SCRIPT_DIR}/dockerfiles/Dockerfile.rpm" +# Build Docker image using backend-specific Dockerfile with shared RPM logic. log_step "Building Docker image..." docker build \ --network=host \ - --build-arg BASE_IMAGE="${BASE_IMAGE}" \ --build-arg BASE_IMAGE_VERSION="${BASE_IMAGE_VERSION}" \ - --build-arg BACKEND="${BACKEND}" \ -f "${DOCKERFILE}" \ -t "flagcx-rpm-${BACKEND}:${BASE_IMAGE_VERSION}" \ "${PROJECT_DIR}" diff --git a/packaging/rpm/dockerfiles/Dockerfile.rpm b/packaging/rpm/dockerfiles/Dockerfile.rpm deleted file mode 100644 index 8998cf7d3..000000000 --- a/packaging/rpm/dockerfiles/Dockerfile.rpm +++ /dev/null @@ -1,64 +0,0 @@ -# Unified Dockerfile to build RPM packages for FlagCX -# Supports multiple backends via build arguments - -ARG BASE_IMAGE -ARG BASE_IMAGE_VERSION - -FROM ${BASE_IMAGE}:${BASE_IMAGE_VERSION} - -ARG BACKEND - -# Install RPM build tools and dependencies -# Handles differences between RHEL/Rocky (epel + json-devel) and -# OpenEuler (nlohmann-json-devel), and MetaX SDK repo setup. -RUN yum install -y epel-release || echo "EPEL not available (likely OpenEuler), continuing without it" -RUN yum install -y \ - rpm-build \ - rpmdevtools \ - gcc-c++ \ - make \ - cmake \ - patchelf \ - && yum clean all -RUN yum install -y json-devel 2>/dev/null \ - || yum install -y nlohmann-json-devel 2>/dev/null \ - || { echo "ERROR: neither json-devel nor nlohmann-json-devel is available; rpmbuild requires nlohmann::json headers" >&2; exit 1; } - -# MetaX-specific: configure MACA SDK yum repository and install SDK -# TODO: switch gpgcheck=1 once MetaX publishes a stable GPG key for -# repos.metax-tech.com. Today this repo serves unsigned packages; gpgcheck=1 -# + gpgkey= would just fail. Tracked as a known security gap. -RUN if [ "${BACKEND}" = "metax" ]; then \ - printf '[maca-sdk]\nname=MACA SDK Yum Repository\nbaseurl=https://repos.metax-tech.com/r/maca-sdk-rpm-x86_64/\nenabled=1\ngpgcheck=0\n' \ - > /etc/yum.repos.d/maca-sdk-rpm.repo && \ - yum makecache && \ - yum install -y maca_sdk && \ - yum clean all; \ - fi - -# Setup RPM build environment -RUN rpmdev-setuptree - -# Copy source code -WORKDIR /workspace -COPY . /workspace/ - -# Read version from spec and create source tarball -RUN SPEC_VERSION=$(grep '^Version:' /workspace/packaging/rpm/specs/flagcx.spec | awk '{print $2}') && \ - tar czf /root/rpmbuild/SOURCES/flagcx-${SPEC_VERSION}.tar.gz \ - --transform "s,^\.,flagcx-${SPEC_VERSION}," \ - --exclude='.git' \ - --exclude='build' \ - --exclude='debian-packages' \ - --exclude='rpm-packages' \ - . - -# Build RPM with specified backend -RUN rpmbuild -ba \ - --define "backend ${BACKEND}" \ - /workspace/packaging/rpm/specs/flagcx.spec - -# List built packages -RUN ls -lh /root/rpmbuild/RPMS/*/*.rpm - -CMD ["/bin/bash"] diff --git a/packaging/rpm/dockerfiles/Dockerfile.rpm.ascend b/packaging/rpm/dockerfiles/Dockerfile.rpm.ascend new file mode 100644 index 000000000..f709d0508 --- /dev/null +++ b/packaging/rpm/dockerfiles/Dockerfile.rpm.ascend @@ -0,0 +1,11 @@ +# Build FlagCX Ascend RPM packages on CANN OpenEuler images. + +ARG BASE_IMAGE_VERSION=8.5.0-910-openeuler24.03-py3.11 +FROM ascendai/cann:${BASE_IMAGE_VERSION} + +WORKDIR /workspace +COPY . /workspace/ + +RUN bash packaging/rpm/dockerfiles/build-rpm-common.sh ascend + +CMD ["/bin/bash"] diff --git a/packaging/rpm/dockerfiles/Dockerfile.rpm.metax b/packaging/rpm/dockerfiles/Dockerfile.rpm.metax new file mode 100644 index 000000000..a3191727e --- /dev/null +++ b/packaging/rpm/dockerfiles/Dockerfile.rpm.metax @@ -0,0 +1,20 @@ +# Build FlagCX MetaX RPM packages on Rocky Linux. + +ARG BASE_IMAGE_VERSION=8 +FROM rockylinux:${BASE_IMAGE_VERSION} + +WORKDIR /workspace +COPY . /workspace/ + +# MetaX packages are served from the public MACA yum repository. +# TODO: switch gpgcheck=1 once MetaX publishes a stable GPG key for +# repos.metax-tech.com. Today this repo serves unsigned packages. +RUN printf '[maca-sdk]\nname=MACA SDK Yum Repository\nbaseurl=https://repos.metax-tech.com/r/maca-sdk-rpm-x86_64/\nenabled=1\ngpgcheck=0\n' \ + > /etc/yum.repos.d/maca-sdk-rpm.repo && \ + yum makecache && \ + yum install -y maca_sdk && \ + yum clean all + +RUN bash packaging/rpm/dockerfiles/build-rpm-common.sh metax + +CMD ["/bin/bash"] diff --git a/packaging/rpm/dockerfiles/Dockerfile.rpm.nvidia b/packaging/rpm/dockerfiles/Dockerfile.rpm.nvidia new file mode 100644 index 000000000..f23cb0f15 --- /dev/null +++ b/packaging/rpm/dockerfiles/Dockerfile.rpm.nvidia @@ -0,0 +1,11 @@ +# Build FlagCX NVIDIA RPM packages on Rocky Linux based CUDA images. + +ARG BASE_IMAGE_VERSION=12.4.1-devel-rockylinux8 +FROM nvcr.io/nvidia/cuda:${BASE_IMAGE_VERSION} + +WORKDIR /workspace +COPY . /workspace/ + +RUN bash packaging/rpm/dockerfiles/build-rpm-common.sh nvidia + +CMD ["/bin/bash"] diff --git a/packaging/rpm/dockerfiles/build-rpm-common.sh b/packaging/rpm/dockerfiles/build-rpm-common.sh new file mode 100755 index 000000000..ed7104fec --- /dev/null +++ b/packaging/rpm/dockerfiles/build-rpm-common.sh @@ -0,0 +1,58 @@ +#!/bin/bash +set -euo pipefail + +BACKEND="${1:-}" + +if [ -z "${BACKEND}" ]; then + echo "ERROR: backend is required" >&2 + exit 1 +fi + +case "${BACKEND}" in + nvidia|metax|ascend) + ;; + *) + echo "ERROR: unsupported backend: ${BACKEND}" >&2 + exit 1 + ;; +esac + +PKG_MANAGER="$(command -v dnf || command -v yum || true)" +if [ -z "${PKG_MANAGER}" ]; then + echo "ERROR: neither dnf nor yum is available in the base image" >&2 + exit 1 +fi + +"${PKG_MANAGER}" install -y epel-release || \ + echo "EPEL not available for this base image, continuing without it" + +"${PKG_MANAGER}" install -y \ + rpm-build \ + rpmdevtools \ + gcc-c++ \ + make \ + cmake \ + patchelf + +"${PKG_MANAGER}" install -y json-devel 2>/dev/null \ + || "${PKG_MANAGER}" install -y nlohmann-json-devel 2>/dev/null \ + || { echo "ERROR: neither json-devel nor nlohmann-json-devel is available; rpmbuild requires nlohmann::json headers" >&2; exit 1; } + +"${PKG_MANAGER}" clean all + +rpmdev-setuptree + +SPEC_VERSION="$(awk '/^Version:/ {print $2; exit}' /workspace/packaging/rpm/specs/flagcx.spec)" +tar czf "/root/rpmbuild/SOURCES/flagcx-${SPEC_VERSION}.tar.gz" \ + --transform "s,^\.,flagcx-${SPEC_VERSION}," \ + --exclude='.git' \ + --exclude='build' \ + --exclude='debian-packages' \ + --exclude='rpm-packages' \ + . + +rpmbuild -ba \ + --define "backend ${BACKEND}" \ + /workspace/packaging/rpm/specs/flagcx.spec + +ls -lh /root/rpmbuild/RPMS/*/*.rpm From e7f84bc2029767ff8af59f194594bac87838acdb Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Tue, 26 May 2026 20:14:01 +0900 Subject: [PATCH 14/17] fix(packaging): allow x86_64 Ascend RPM builds --- packaging/rpm/specs/flagcx.spec | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packaging/rpm/specs/flagcx.spec b/packaging/rpm/specs/flagcx.spec index 05870717c..6cec195f5 100644 --- a/packaging/rpm/specs/flagcx.spec +++ b/packaging/rpm/specs/flagcx.spec @@ -11,12 +11,12 @@ # Derive uppercase backend name for make flag (USE_NVIDIA=1, etc.) %global backend_upper %(echo %{backend} | tr a-z A-Z) -# Pin build/install arch by backend: Ascend NPU hosts are aarch64, -# everything else (NVIDIA / MetaX / etc.) is x86_64. ExclusiveArch -# makes rpmbuild refuse to even start on a mismatched host, which is -# safer than producing a CPU-arch-mislabeled rpm. +# Pin build/install arch by backend. Ascend CANN images are available for +# both x86_64 development hosts and aarch64 deployment hosts; NVIDIA and +# MetaX RPM builds currently target x86_64. ExclusiveArch makes rpmbuild +# refuse to start on unsupported hosts, avoiding CPU-arch-mislabeled RPMs. %if "%{backend}" == "ascend" -ExclusiveArch: aarch64 +ExclusiveArch: x86_64 aarch64 %else ExclusiveArch: x86_64 %endif From aa8cc37edc348926403c05d805319deafd9f0766 Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Wed, 27 May 2026 18:25:28 +0900 Subject: [PATCH 15/17] ci(packaging): upload RPM artifacts to Nexus --- .github/workflows/build-rpm.yml | 70 +++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/.github/workflows/build-rpm.yml b/.github/workflows/build-rpm.yml index a27415b0f..b077283a3 100644 --- a/.github/workflows/build-rpm.yml +++ b/.github/workflows/build-rpm.yml @@ -36,3 +36,73 @@ jobs: name: flagcx-${{ matrix.backend }}-rpm-packages path: rpm-packages/${{ matrix.backend }}/**/*.rpm retention-days: 7 + + upload-rpm-to-nexus: + if: startsWith(github.ref, 'refs/tags/v') + needs: build-rpm-packages + runs-on: h20 + + steps: + - name: Download RPM artifacts + uses: actions/download-artifact@v4 + with: + pattern: flagcx-*-rpm-packages + path: packages/ + + - name: List downloaded RPM packages + run: | + echo "Downloaded RPM packages:" + find packages/ -name "*.rpm" -exec ls -lh {} \; + + - name: Upload RPM packages to Nexus YUM repository + env: + NEXUS_USERNAME: ${{ secrets.REGISTRY_USERNAME }} + NEXUS_PASSWORD: ${{ secrets.CONTAINER_REGISTRY }} + NEXUS_REPO_URL: https://resource.flagos.net/repository/flagos-yum-hosted + run: | + set -euo pipefail + + upload_rpm() { + local rpm_file="$1" + local filename + local artifact_dir + local backend + local repo_path + local parent_dir + + filename="$(basename "$rpm_file")" + artifact_dir="${rpm_file#packages/}" + artifact_dir="${artifact_dir%%/*}" + backend="${artifact_dir#flagcx-}" + backend="${backend%-rpm-packages}" + parent_dir="$(basename "$(dirname "$rpm_file")")" + + if [[ "$rpm_file" == *"/SRPMS/"* ]]; then + repo_path="SRPMS/${backend}/${filename}" + elif [[ "$rpm_file" == *"/RPMS/"* ]]; then + repo_path="${backend}/${parent_dir}/${filename}" + else + repo_path="${backend}/${filename}" + fi + + echo "Uploading: ${filename}" + echo " Backend: ${backend}" + echo " Repository path: ${repo_path}" + + curl -f -u "${NEXUS_USERNAME}:${NEXUS_PASSWORD}" \ + --upload-file "$rpm_file" \ + "${NEXUS_REPO_URL}/${repo_path}" + } + + uploaded=0 + while IFS= read -r -d '' rpm_file; do + upload_rpm "$rpm_file" + uploaded=$((uploaded + 1)) + done < <(find packages/ -name "*.rpm" -print0) + + if [ "$uploaded" -eq 0 ]; then + echo "No RPM packages found to upload" + exit 1 + fi + + echo "Uploaded ${uploaded} RPM package(s) to Nexus" From 4c4a859ebf3afa6696641d462121a860ba1dc483 Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Wed, 27 May 2026 20:56:10 +0900 Subject: [PATCH 16/17] ci(packaging): defer RPM Nexus upload --- .github/workflows/build-rpm.yml | 70 --------------------------------- 1 file changed, 70 deletions(-) diff --git a/.github/workflows/build-rpm.yml b/.github/workflows/build-rpm.yml index b077283a3..a27415b0f 100644 --- a/.github/workflows/build-rpm.yml +++ b/.github/workflows/build-rpm.yml @@ -36,73 +36,3 @@ jobs: name: flagcx-${{ matrix.backend }}-rpm-packages path: rpm-packages/${{ matrix.backend }}/**/*.rpm retention-days: 7 - - upload-rpm-to-nexus: - if: startsWith(github.ref, 'refs/tags/v') - needs: build-rpm-packages - runs-on: h20 - - steps: - - name: Download RPM artifacts - uses: actions/download-artifact@v4 - with: - pattern: flagcx-*-rpm-packages - path: packages/ - - - name: List downloaded RPM packages - run: | - echo "Downloaded RPM packages:" - find packages/ -name "*.rpm" -exec ls -lh {} \; - - - name: Upload RPM packages to Nexus YUM repository - env: - NEXUS_USERNAME: ${{ secrets.REGISTRY_USERNAME }} - NEXUS_PASSWORD: ${{ secrets.CONTAINER_REGISTRY }} - NEXUS_REPO_URL: https://resource.flagos.net/repository/flagos-yum-hosted - run: | - set -euo pipefail - - upload_rpm() { - local rpm_file="$1" - local filename - local artifact_dir - local backend - local repo_path - local parent_dir - - filename="$(basename "$rpm_file")" - artifact_dir="${rpm_file#packages/}" - artifact_dir="${artifact_dir%%/*}" - backend="${artifact_dir#flagcx-}" - backend="${backend%-rpm-packages}" - parent_dir="$(basename "$(dirname "$rpm_file")")" - - if [[ "$rpm_file" == *"/SRPMS/"* ]]; then - repo_path="SRPMS/${backend}/${filename}" - elif [[ "$rpm_file" == *"/RPMS/"* ]]; then - repo_path="${backend}/${parent_dir}/${filename}" - else - repo_path="${backend}/${filename}" - fi - - echo "Uploading: ${filename}" - echo " Backend: ${backend}" - echo " Repository path: ${repo_path}" - - curl -f -u "${NEXUS_USERNAME}:${NEXUS_PASSWORD}" \ - --upload-file "$rpm_file" \ - "${NEXUS_REPO_URL}/${repo_path}" - } - - uploaded=0 - while IFS= read -r -d '' rpm_file; do - upload_rpm "$rpm_file" - uploaded=$((uploaded + 1)) - done < <(find packages/ -name "*.rpm" -print0) - - if [ "$uploaded" -eq 0 ]; then - echo "No RPM packages found to upload" - exit 1 - fi - - echo "Uploaded ${uploaded} RPM package(s) to Nexus" From 79c79848d61e0e00d0faadd23b10de783f85f025 Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Wed, 27 May 2026 21:55:14 +0900 Subject: [PATCH 17/17] ci(packaging): publish RPMs to Nexus directly from build job Upload built RPMs to flagos-yum-hosted from within the build-rpm job on the same self-hosted runner, gated on v* tags. h20 reaches the internal Nexus but not codeload/api.github.com reliably, so avoid the cross-workflow artifact round-trip that the deb upload-nexus flow depends on. --- .github/workflows/build-rpm.yml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/.github/workflows/build-rpm.yml b/.github/workflows/build-rpm.yml index a27415b0f..586f8371c 100644 --- a/.github/workflows/build-rpm.yml +++ b/.github/workflows/build-rpm.yml @@ -36,3 +36,33 @@ jobs: name: flagcx-${{ matrix.backend }}-rpm-packages path: rpm-packages/${{ matrix.backend }}/**/*.rpm retention-days: 7 + + # Publish straight from the locally-built artifacts on the same self-hosted + # runner: h20 can reach the internal Nexus but not codeload/api.github.com + # reliably, so we deliberately avoid any cross-workflow artifact round-trip. + - name: Publish ${{ matrix.backend }} RPMs to Nexus YUM repository + if: startsWith(github.ref, 'refs/tags/v') + env: + NEXUS_USERNAME: ${{ secrets.REGISTRY_USERNAME }} + NEXUS_PASSWORD: ${{ secrets.CONTAINER_REGISTRY }} + NEXUS_REPO_URL: https://resource.flagos.net/repository/flagos-yum-hosted + BACKEND: ${{ matrix.backend }} + run: | + set -euo pipefail + + uploaded=0 + while IFS= read -r -d '' rpm; do + # rel keeps the RPMS// or SRPMS/ layout under the backend + rel="${rpm#rpm-packages/${BACKEND}/}" + echo "Uploading ${rpm} -> ${BACKEND}/${rel}" + curl -f -u "${NEXUS_USERNAME}:${NEXUS_PASSWORD}" \ + --upload-file "$rpm" \ + "${NEXUS_REPO_URL}/${BACKEND}/${rel}" + uploaded=$((uploaded + 1)) + done < <(find "rpm-packages/${BACKEND}" -name '*.rpm' -print0) + + if [ "$uploaded" -eq 0 ]; then + echo "No RPMs found for ${BACKEND}" + exit 1 + fi + echo "Uploaded ${uploaded} ${BACKEND} RPM(s) to Nexus YUM repository"