Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
60 commits
Select commit Hold shift + click to select a range
0f83f95
build: Add initial automated build image
Mar 3, 2026
d1d0bc4
debug0
Mar 3, 2026
5e320c2
debug1
Mar 3, 2026
08ad0d2
debug2
Mar 3, 2026
1821568
debug2
Mar 3, 2026
f537255
debug2
Mar 3, 2026
aee1aae
debug3
Mar 3, 2026
1b392f2
debug4
Mar 3, 2026
cc46adf
update5
Mar 4, 2026
85edb17
update6
Mar 4, 2026
a56ed5c
debug7
Mar 5, 2026
c4f4f3a
镜像构建完运行测试用例并上传镜像到harbor
Mar 5, 2026
f8ba2fd
fix branches for all_tests_cuda.yml
Mar 5, 2026
d2f3f24
update requirements/cuda/serve.txt
Mar 6, 2026
d976b55
ci: cuda_tests waits for the image build to complete before running t…
Mar 6, 2026
2ec4457
ci: update CUDA image tags [skip ci]
github-actions[bot] Mar 9, 2026
c160987
debug8
Mar 9, 2026
1ef224b
Merge branch 'auto-build-image' of https://github.com/zihugithub/Flag…
Mar 9, 2026
39b1a9d
debug9
Mar 9, 2026
1b9ea93
debug9
Mar 9, 2026
4170c7b
ci: switch Docker image build to conda package manager
Mar 9, 2026
6759c1c
ci: defer cuda_tests to workflow_run when image build is active
Mar 10, 2026
bd5c331
debug10
Mar 10, 2026
582cf2f
ci: update CUDA image tags [skip ci]
github-actions[bot] Mar 10, 2026
d9c92c6
fix(ci): gate Harbor image push on both workflows succeeding
Mar 10, 2026
c1bc1d0
Merge branch 'auto-build-image' of https://github.com/zihugithub/Flag…
Mar 10, 2026
2436e30
debug11
Mar 10, 2026
7723926
debug11
Mar 10, 2026
5c8b522
debug12
Mar 10, 2026
0b63905
ci: update CUDA image tags [skip ci]
github-actions[bot] Mar 10, 2026
1d8a133
debug13
Mar 10, 2026
37d1687
Merge branch 'auto-build-image' of https://github.com/zihugithub/Flag…
Mar 10, 2026
b5c542b
debug14
Mar 10, 2026
4560d97
debug15
Mar 10, 2026
849006d
ci: update CUDA image tags [skip ci]
github-actions[bot] Mar 10, 2026
1bbc3f7
debug16
Mar 10, 2026
6a89919
debug17
Mar 10, 2026
5f5b0fd
ci: update CUDA image tags [skip ci]
github-actions[bot] Mar 10, 2026
df039ab
debug18
Mar 10, 2026
feaf935
test1: Update CI config
Mar 17, 2026
9d8ba6c
debug1
Mar 17, 2026
c09e945
ci: update CUDA image tags [skip ci]
github-actions[bot] Mar 17, 2026
eaef4ec
debug2
Mar 17, 2026
ba4cb3b
Merge branch 'auto-build-image' of https://github.com/zihugithub/Flag…
Mar 17, 2026
df82b7e
debug2
Mar 17, 2026
69d945b
debug2
Mar 17, 2026
ac2f553
debug2
Mar 17, 2026
3e376a3
debug2
Mar 17, 2026
b2134f1
debug2
Mar 17, 2026
94caf4e
debug2
Mar 17, 2026
1a5a151
debug3
Mar 17, 2026
5cc4320
debug4
Mar 17, 2026
3a8be29
debug4
Mar 17, 2026
60e9011
ci: update CUDA image tags [skip ci]
github-actions[bot] Mar 17, 2026
40d4006
debug1
Mar 17, 2026
e9d2cdc
Merge branch 'auto-build-image' of https://github.com/zihugithub/Flag…
Mar 17, 2026
6d1f71f
debug1
Mar 17, 2026
ea5f7ad
test1
Mar 17, 2026
aea2839
test1
Mar 17, 2026
e9054a7
test2
Mar 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/configs/cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ hardware_name: cuda
display_name: "CUDA Tests"

# Docker image for this hardware
ci_image: localhost:5000/flagscale:cuda12.8.1-cudnn9.7.1-python3.12-torch2.7.1-time2510131515
ci_train_image: localhost:5000/flagscale-train:dev-cu128-py3.12-20260228210721
ci_inference_image: localhost:5000/flagscale-inference:dev-cu128-py3.12-20260302102033
ci_image: localhost:5000/flagscale:cuda12.8.1-cudnn9.7.1-python3.12-torch2.7.1-time2510131515-pre
ci_train_image: localhost:5000/flagscale-train:dev-cu128-py3.12
ci_inference_image: localhost:5000/flagscale-inference:dev-cu128-py3.12

# Runner labels for this hardware
runner_labels:
Expand Down
300 changes: 150 additions & 150 deletions .github/workflows/all_tests_common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,177 +99,177 @@ jobs:
# Load configuration and group tests by task
load_platform_config "$PLATFORM"

unit_tests:
needs: checkout_and_config
strategy:
fail-fast: false
matrix:
device: ${{ fromJson(needs.checkout_and_config.outputs.device_types) }}
uses: ./.github/workflows/unit_tests_common.yml
name: unit_tests
with:
platform: ${{ inputs.platform }}
device: ${{ matrix.device }}
image: ${{ needs.checkout_and_config.outputs.ci_train_image }}
runs_on: ${{ needs.checkout_and_config.outputs.runs_on }}
container_volumes: ${{ needs.checkout_and_config.outputs.container_volumes }}
container_options: ${{ needs.checkout_and_config.outputs.container_options }}
source_artifact: flagscale-source-${{ github.sha }}
pkg_mgr: ${{ needs.checkout_and_config.outputs.pkg_mgr }}
env_name: ${{ needs.checkout_and_config.outputs.env_name_train }}
env_path: ${{ needs.checkout_and_config.outputs.env_path }}

functional_tests_train:
needs:
- checkout_and_config
- unit_tests
if: fromJson(needs.checkout_and_config.outputs.train_test_matrix)[0] != null
uses: ./.github/workflows/functional_tests_train.yml
with:
platform: ${{ inputs.platform }}
test_matrix: ${{ needs.checkout_and_config.outputs.train_test_matrix }}
image: ${{ needs.checkout_and_config.outputs.ci_train_image }}
runs_on: ${{ needs.checkout_and_config.outputs.runs_on }}
container_volumes: ${{ needs.checkout_and_config.outputs.container_volumes }}
container_options: ${{ needs.checkout_and_config.outputs.container_options }}
source_artifact: flagscale-source-${{ github.sha }}
pkg_mgr: ${{ needs.checkout_and_config.outputs.pkg_mgr }}
env_name: ${{ needs.checkout_and_config.outputs.env_name_train }}
env_path: ${{ needs.checkout_and_config.outputs.env_path }}
# unit_tests:
# needs: checkout_and_config
# strategy:
# fail-fast: false
# matrix:
# device: ${{ fromJson(needs.checkout_and_config.outputs.device_types) }}
# uses: ./.github/workflows/unit_tests_common.yml
# name: unit_tests
# with:
# platform: ${{ inputs.platform }}
# device: ${{ matrix.device }}
# image: ${{ needs.checkout_and_config.outputs.ci_train_image }}
# runs_on: ${{ needs.checkout_and_config.outputs.runs_on }}
# container_volumes: ${{ needs.checkout_and_config.outputs.container_volumes }}
# container_options: ${{ needs.checkout_and_config.outputs.container_options }}
# source_artifact: flagscale-source-${{ github.sha }}
# pkg_mgr: ${{ needs.checkout_and_config.outputs.pkg_mgr }}
# env_name: ${{ needs.checkout_and_config.outputs.env_name_train }}
# env_path: ${{ needs.checkout_and_config.outputs.env_path }}

functional_tests_hetero_train:
needs:
- checkout_and_config
- unit_tests
if: fromJson(needs.checkout_and_config.outputs.hetero_train_test_matrix)[0] != null
uses: ./.github/workflows/functional_tests_hetero_train.yml
with:
platform: ${{ inputs.platform }}
test_matrix: ${{ needs.checkout_and_config.outputs.hetero_train_test_matrix }}
image: ${{ needs.checkout_and_config.outputs.ci_train_image }}
runs_on: ${{ needs.checkout_and_config.outputs.runs_on }}
container_volumes: ${{ needs.checkout_and_config.outputs.container_volumes }}
container_options: ${{ needs.checkout_and_config.outputs.container_options }}
source_artifact: flagscale-source-${{ github.sha }}
pkg_mgr: ${{ needs.checkout_and_config.outputs.pkg_mgr }}
env_name: ${{ needs.checkout_and_config.outputs.env_name_train }}
env_path: ${{ needs.checkout_and_config.outputs.env_path }}
# functional_tests_train:
# needs:
# - checkout_and_config
# - unit_tests
# if: fromJson(needs.checkout_and_config.outputs.train_test_matrix)[0] != null
# uses: ./.github/workflows/functional_tests_train.yml
# with:
# platform: ${{ inputs.platform }}
# test_matrix: ${{ needs.checkout_and_config.outputs.train_test_matrix }}
# image: ${{ needs.checkout_and_config.outputs.ci_train_image }}
# runs_on: ${{ needs.checkout_and_config.outputs.runs_on }}
# container_volumes: ${{ needs.checkout_and_config.outputs.container_volumes }}
# container_options: ${{ needs.checkout_and_config.outputs.container_options }}
# source_artifact: flagscale-source-${{ github.sha }}
# pkg_mgr: ${{ needs.checkout_and_config.outputs.pkg_mgr }}
# env_name: ${{ needs.checkout_and_config.outputs.env_name_train }}
# env_path: ${{ needs.checkout_and_config.outputs.env_path }}

# NOTE: Inference, serve, and rl functional tests are temporarily disabled
functional_tests_inference:
needs:
- checkout_and_config
- unit_tests
if: fromJson(needs.checkout_and_config.outputs.inference_test_matrix)[0] != null
uses: ./.github/workflows/functional_tests_inference.yml
with:
platform: ${{ inputs.platform }}
test_matrix: ${{ needs.checkout_and_config.outputs.inference_test_matrix }}
image: ${{ needs.checkout_and_config.outputs.ci_inference_image }}
runs_on: ${{ needs.checkout_and_config.outputs.runs_on }}
container_volumes: ${{ needs.checkout_and_config.outputs.container_volumes }}
container_options: ${{ needs.checkout_and_config.outputs.container_options }}
source_artifact: flagscale-source-${{ github.sha }}
pkg_mgr: ${{ needs.checkout_and_config.outputs.pkg_mgr }}
env_name: ${{ needs.checkout_and_config.outputs.env_name_inference }}
env_path: ${{ needs.checkout_and_config.outputs.env_path }}
# functional_tests_hetero_train:
# needs:
# - checkout_and_config
# - unit_tests
# if: fromJson(needs.checkout_and_config.outputs.hetero_train_test_matrix)[0] != null
# uses: ./.github/workflows/functional_tests_hetero_train.yml
# with:
# platform: ${{ inputs.platform }}
# test_matrix: ${{ needs.checkout_and_config.outputs.hetero_train_test_matrix }}
# image: ${{ needs.checkout_and_config.outputs.ci_train_image }}
# runs_on: ${{ needs.checkout_and_config.outputs.runs_on }}
# container_volumes: ${{ needs.checkout_and_config.outputs.container_volumes }}
# container_options: ${{ needs.checkout_and_config.outputs.container_options }}
# source_artifact: flagscale-source-${{ github.sha }}
# pkg_mgr: ${{ needs.checkout_and_config.outputs.pkg_mgr }}
# env_name: ${{ needs.checkout_and_config.outputs.env_name_train }}
# env_path: ${{ needs.checkout_and_config.outputs.env_path }}

functional_tests_serve:
needs:
- checkout_and_config
- unit_tests
if: fromJson(needs.checkout_and_config.outputs.serve_test_matrix)[0] != null
uses: ./.github/workflows/functional_tests_serve.yml
with:
platform: ${{ inputs.platform }}
test_matrix: ${{ needs.checkout_and_config.outputs.serve_test_matrix }}
image: ${{ needs.checkout_and_config.outputs.ci_inference_image }}
runs_on: ${{ needs.checkout_and_config.outputs.runs_on }}
container_volumes: ${{ needs.checkout_and_config.outputs.container_volumes }}
container_options: ${{ needs.checkout_and_config.outputs.container_options }}
source_artifact: flagscale-source-${{ github.sha }}
pkg_mgr: ${{ needs.checkout_and_config.outputs.pkg_mgr }}
env_name: ${{ needs.checkout_and_config.outputs.env_name_serve }}
env_path: ${{ needs.checkout_and_config.outputs.env_path }}
# # NOTE: Inference, serve, and rl functional tests are temporarily disabled
# functional_tests_inference:
# needs:
# - checkout_and_config
# - unit_tests
# if: fromJson(needs.checkout_and_config.outputs.inference_test_matrix)[0] != null
# uses: ./.github/workflows/functional_tests_inference.yml
# with:
# platform: ${{ inputs.platform }}
# test_matrix: ${{ needs.checkout_and_config.outputs.inference_test_matrix }}
# image: ${{ needs.checkout_and_config.outputs.ci_inference_image }}
# runs_on: ${{ needs.checkout_and_config.outputs.runs_on }}
# container_volumes: ${{ needs.checkout_and_config.outputs.container_volumes }}
# container_options: ${{ needs.checkout_and_config.outputs.container_options }}
# source_artifact: flagscale-source-${{ github.sha }}
# pkg_mgr: ${{ needs.checkout_and_config.outputs.pkg_mgr }}
# env_name: ${{ needs.checkout_and_config.outputs.env_name_inference }}
# env_path: ${{ needs.checkout_and_config.outputs.env_path }}

# functional_tests_rl:
# functional_tests_serve:
# needs:
# - checkout_and_config
# - unit_tests
# if: fromJson(needs.checkout_and_config.outputs.rl_test_matrix)[0] != null
# uses: ./.github/workflows/functional_tests_rl.yml
# if: fromJson(needs.checkout_and_config.outputs.serve_test_matrix)[0] != null
# uses: ./.github/workflows/functional_tests_serve.yml
# with:
# platform: ${{ inputs.platform }}
# test_matrix: ${{ needs.checkout_and_config.outputs.rl_test_matrix }}
# image: ${{ needs.checkout_and_config.outputs.ci_image }}
# test_matrix: ${{ needs.checkout_and_config.outputs.serve_test_matrix }}
# image: ${{ needs.checkout_and_config.outputs.ci_inference_image }}
# runs_on: ${{ needs.checkout_and_config.outputs.runs_on }}
# container_volumes: ${{ needs.checkout_and_config.outputs.container_volumes }}
# container_options: ${{ needs.checkout_and_config.outputs.container_options }}
# source_artifact: flagscale-source-${{ github.sha }}
# pkg_mgr: ${{ needs.checkout_and_config.outputs.pkg_mgr }}
# env_name: ${{ needs.checkout_and_config.outputs.env_name_rl }}
# env_name: ${{ needs.checkout_and_config.outputs.env_name_serve }}
# env_path: ${{ needs.checkout_and_config.outputs.env_path }}

all_tests_complete:
defaults:
run:
shell: bash
needs:
- checkout_and_config
- unit_tests
- functional_tests_train
- functional_tests_hetero_train
# NOTE: Disabled tests removed from needs
# - functional_tests_inference
# - functional_tests_serve
# - functional_tests_rl
runs-on: ubuntu-latest
if: always()
steps:
- name: Verify all tests passed
run: |
# Check all test jobs (skip if not run)
failed=false
# # functional_tests_rl:
# # needs:
# # - checkout_and_config
# # - unit_tests
# # if: fromJson(needs.checkout_and_config.outputs.rl_test_matrix)[0] != null
# # uses: ./.github/workflows/functional_tests_rl.yml
# # with:
# # platform: ${{ inputs.platform }}
# # test_matrix: ${{ needs.checkout_and_config.outputs.rl_test_matrix }}
# # image: ${{ needs.checkout_and_config.outputs.ci_image }}
# # runs_on: ${{ needs.checkout_and_config.outputs.runs_on }}
# # container_volumes: ${{ needs.checkout_and_config.outputs.container_volumes }}
# # container_options: ${{ needs.checkout_and_config.outputs.container_options }}
# # source_artifact: flagscale-source-${{ github.sha }}
# # pkg_mgr: ${{ needs.checkout_and_config.outputs.pkg_mgr }}
# # env_name: ${{ needs.checkout_and_config.outputs.env_name_rl }}
# # env_path: ${{ needs.checkout_and_config.outputs.env_path }}

if [ "${{ needs.unit_tests.result }}" != "success" ]; then
echo "❌ Unit tests failed"
failed=true
fi
# all_tests_complete:
# defaults:
# run:
# shell: bash
# needs:
# - checkout_and_config
# - unit_tests
# - functional_tests_train
# - functional_tests_hetero_train
# # NOTE: Disabled tests removed from needs
# # - functional_tests_inference
# # - functional_tests_serve
# # - functional_tests_rl
# runs-on: ubuntu-latest
# if: always()
# steps:
# - name: Verify all tests passed
# run: |
# # Check all test jobs (skip if not run)
# failed=false

# Only check functional tests if they ran
if [ "${{ needs.functional_tests_train.result }}" != "success" ] && \
[ "${{ needs.functional_tests_train.result }}" != "skipped" ]; then
echo "❌ Training functional tests failed"
failed=true
fi
# if [ "${{ needs.unit_tests.result }}" != "success" ]; then
# echo "❌ Unit tests failed"
# failed=true
# fi

if [ "${{ needs.functional_tests_hetero_train.result }}" != "success" ] && \
[ "${{ needs.functional_tests_hetero_train.result }}" != "skipped" ]; then
echo "❌ Heterogeneous training functional tests failed"
failed=true
fi
# # Only check functional tests if they ran
# if [ "${{ needs.functional_tests_train.result }}" != "success" ] && \
# [ "${{ needs.functional_tests_train.result }}" != "skipped" ]; then
# echo "❌ Training functional tests failed"
# failed=true
# fi

# NOTE: Inference, serve, and rl checks disabled
# if [ "${{ needs.functional_tests_inference.result }}" != "success" ] && \
# [ "${{ needs.functional_tests_inference.result }}" != "skipped" ]; then
# echo "❌ Inference functional tests failed"
# failed=true
# fi
# if [ "${{ needs.functional_tests_hetero_train.result }}" != "success" ] && \
# [ "${{ needs.functional_tests_hetero_train.result }}" != "skipped" ]; then
# echo "❌ Heterogeneous training functional tests failed"
# failed=true
# fi

# if [ "${{ needs.functional_tests_serve.result }}" != "success" ] && \
# [ "${{ needs.functional_tests_serve.result }}" != "skipped" ]; then
# echo "❌ Serve functional tests failed"
# failed=true
# fi
# # NOTE: Inference, serve, and rl checks disabled
# # if [ "${{ needs.functional_tests_inference.result }}" != "success" ] && \
# # [ "${{ needs.functional_tests_inference.result }}" != "skipped" ]; then
# # echo "❌ Inference functional tests failed"
# # failed=true
# # fi

# if [ "${{ needs.functional_tests_rl.result }}" != "success" ] && \
# [ "${{ needs.functional_tests_rl.result }}" != "skipped" ]; then
# echo "❌ RL functional tests failed"
# failed=true
# fi
# # if [ "${{ needs.functional_tests_serve.result }}" != "success" ] && \
# # [ "${{ needs.functional_tests_serve.result }}" != "skipped" ]; then
# # echo "❌ Serve functional tests failed"
# # failed=true
# # fi

if [ "$failed" = "true" ]; then
exit 1
fi
# # if [ "${{ needs.functional_tests_rl.result }}" != "success" ] && \
# # [ "${{ needs.functional_tests_rl.result }}" != "skipped" ]; then
# # echo "❌ RL functional tests failed"
# # failed=true
# # fi

# if [ "$failed" = "true" ]; then
# exit 1
# fi

echo "✅ All tests completed successfully!"
# echo "✅ All tests completed successfully!"
15 changes: 15 additions & 0 deletions .github/workflows/all_tests_cuda.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
name: cuda_tests

on:
# Called by Build Docker Images - CUDA workflow
workflow_call:

push:
branches: ["main"]
paths-ignore:
- 'docker/cuda/**'
- 'docker/build.sh'
- 'tools/install/**'
- 'requirements/**'
- '.github/workflows/build_image_cuda.yml'
pull_request:
branches: ["main"]
paths-ignore:
- 'docker/cuda/**'
- 'docker/build.sh'
- 'tools/install/**'
- 'requirements/**'
- '.github/workflows/build_image_cuda.yml'

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ github.actor }}
Expand Down
Loading
Loading