forked from flagos-ai/FlagScale
-
Notifications
You must be signed in to change notification settings - Fork 0
310 lines (272 loc) · 11.5 KB
/
functional_tests_benchmark.yml
File metadata and controls
310 lines (272 loc) · 11.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
name: Common Functional Tests - Benchmark
on:
workflow_call:
inputs:
platform:
required: true
type: string
description: Platform name (e.g., cuda, default)
test_matrix:
required: true
type: string
description: JSON array of test configurations
image:
required: true
type: string
runs_on:
required: true
type: string
container_volumes:
required: true
type: string
container_options:
required: true
type: string
source_artifact:
required: true
type: string
description: Name of the artifact containing source code
pkg_mgr:
required: false
type: string
description: Package manager (pip, uv, conda). Default uv.
default: "uv"
env_name:
required: false
type: string
description: Conda environment name (for conda only)
default: ""
env_path:
required: false
type: string
description: Environment path (venv path for uv, conda installation path for conda)
default: "/opt/venv"
jobs:
functional_test_benchmark:
defaults:
run:
shell: bash
env:
PROJECT_ROOT: /tmp/FlagScale
runs-on: ${{ fromJson(inputs.runs_on) }}
strategy:
fail-fast: false
matrix:
test_config: ${{ fromJson(inputs.test_matrix) }}
container:
image: ${{ inputs.image }}
ports:
- 80
volumes: ${{ fromJson(inputs.container_volumes) }}
options: ${{ inputs.container_options }}
steps:
- name: Download source code artifact (attempt 1)
uses: actions/download-artifact@v4
continue-on-error: true
id: download_attempt_1
with:
name: ${{ inputs.source_artifact }}
path: /tmp
- name: Download source code artifact (attempt 2)
if: steps.download_attempt_1.outcome == 'failure'
uses: actions/download-artifact@v4
continue-on-error: true
id: download_attempt_2
with:
name: ${{ inputs.source_artifact }}
path: /tmp
- name: Download source code artifact (attempt 3)
if: steps.download_attempt_2.outcome == 'failure'
uses: actions/download-artifact@v4
id: download_attempt_3
with:
name: ${{ inputs.source_artifact }}
path: /tmp
- name: Verify artifact download
run: |
if [ "${{ steps.download_attempt_1.outcome }}" == "success" ]; then
echo "Artifact downloaded successfully on attempt 1"
elif [ "${{ steps.download_attempt_2.outcome }}" == "success" ]; then
echo "Artifact downloaded successfully on attempt 2 (retried once)"
elif [ "${{ steps.download_attempt_3.outcome }}" == "success" ]; then
echo "Artifact downloaded successfully on attempt 3 (retried twice)"
else
echo "Error: All 3 download attempts failed"
echo "Artifact name: ${{ inputs.source_artifact }}"
exit 1
fi
- name: Extract source code
run: |
mkdir -p $PROJECT_ROOT
tar -xzf /tmp/flagscale-source.tar.gz -C $PROJECT_ROOT
- name: Set safe directory
run: |
git config --global --add safe.directory $PROJECT_ROOT
- name: Check environment info
run: cd $PROJECT_ROOT && bash ./tests/test_utils/runners/check_env.sh
- name: Install dependencies for benchmark
run: |
set -euo pipefail
cd $PROJECT_ROOT
PKG_MGR='${{ inputs.pkg_mgr }}'
ENV_NAME='${{ inputs.env_name }}'
ENV_PATH='${{ inputs.env_path }}'
echo "Installing dependencies for benchmark"
echo "Package Manager: $PKG_MGR"
echo "Environment Name: $ENV_NAME"
echo "Environment Path: $ENV_PATH"
# Source environment utilities
source ./tools/install/utils/pyenv_utils.sh
# Activate environment based on package manager
case "$PKG_MGR" in
conda)
if [ -n "$ENV_NAME" ] && [ -n "$ENV_PATH" ]; then
activate_conda "$ENV_NAME" "$ENV_PATH" || { echo "Conda activation failed"; exit 1; }
fi
;;
uv)
if [ -n "$ENV_PATH" ] && [ -d "$ENV_PATH" ]; then
activate_uv_env "$ENV_PATH" || { echo "UV activation failed"; exit 1; }
fi
;;
pip)
echo "Using system Python with pip"
;;
esac
echo "Python location: $(which python)"
echo "Python version: $(python --version)"
# Install FlagScale CLI
pip install . --no-build-isolation --root-user-action=ignore || { echo "FlagScale CLI install failed"; exit 1; }
# Verify installation
command -v flagscale || { echo "FlagScale CLI not found in PATH"; exit 1; }
echo "FlagScale CLI installed successfully: $(flagscale --version 2>/dev/null || echo 'version unknown')"
# Install Megatron-LM-FL from source (force-build to replace pre-installed megatron-core)
# Derive install-dir from env_path (e.g., /root/miniconda3 -> /root)
INSTALL_DIR=""
if [ "$PKG_MGR" = "conda" ] && [ -n "$ENV_PATH" ]; then
INSTALL_DIR=$(dirname "$ENV_PATH")
fi
./tools/install/install.sh \
--platform ${{ inputs.platform }} \
--task train \
--pkg-mgr "$PKG_MGR" \
${ENV_NAME:+--env-name "$ENV_NAME"} \
${INSTALL_DIR:+--install-dir "$INSTALL_DIR"} \
--no-system --no-dev --no-base --no-task \
--src-deps megatron-lm \
--force-build \
--retry-count 3
echo "Environment ready for benchmark tests"
timeout-minutes: 30
- name: Run benchmark tests
id: benchmark_test
run: |
set -euo pipefail
cd $PROJECT_ROOT
PLATFORM='${{ inputs.platform }}'
DEVICE='${{ matrix.test_config.device }}'
TASK='${{ matrix.test_config.task }}'
MODEL='${{ matrix.test_config.model }}'
CASE='${{ matrix.test_config.case }}'
PKG_MGR='${{ inputs.pkg_mgr }}'
ENV_NAME='${{ inputs.env_name }}'
ENV_PATH='${{ inputs.env_path }}'
echo "Running benchmark tests"
echo "Platform: $PLATFORM"
echo "Device: $DEVICE"
echo "Task: $TASK"
echo "Model: $MODEL"
echo "Case: ${CASE:-all}"
echo "Package Manager: $PKG_MGR"
echo "Environment Name: $ENV_NAME"
echo "Environment Path: $ENV_PATH"
echo "Project root: $PROJECT_ROOT"
# Source environment utilities
source ./tools/install/utils/pyenv_utils.sh
# Activate environment based on package manager
case "$PKG_MGR" in
conda)
if [ -n "$ENV_NAME" ]; then
activate_conda "$ENV_NAME" "$ENV_PATH" || echo "Conda activation failed"
fi
;;
uv)
if [ -n "$ENV_PATH" ] && [ -d "$ENV_PATH" ]; then
activate_uv_env "$ENV_PATH" || echo "UV activation failed"
fi
;;
pip)
echo "Running tests with pip/system Python"
;;
esac
# Display Python environment info
echo "Python location: $(which python)"
echo "Python version: $(python --version)"
# Collect GPU info before benchmark
echo "=========================================="
echo "GPU Information"
echo "=========================================="
nvidia-smi || echo "nvidia-smi not available"
echo "=========================================="
# Run benchmark tests using run_tests.sh
bash "$PROJECT_ROOT/tests/test_utils/runners/run_tests.sh" \
--platform "$PLATFORM" \
--device "$DEVICE" \
--type functional \
--task "$TASK" \
--model "$MODEL" \
--list "$CASE"
exit_code=$?
if [ $exit_code -eq 0 ]; then
echo "Benchmark tests passed for $PLATFORM/$DEVICE/$TASK/$MODEL/$CASE"
else
echo "Benchmark tests failed for $PLATFORM/$DEVICE/$TASK/$MODEL/$CASE (exit code: $exit_code)"
fi
echo "exit_code=$exit_code" >> $GITHUB_OUTPUT
exit $exit_code
timeout-minutes: 60
- name: Parse benchmark output to JSON
if: always() && steps.benchmark_test.outcome == 'success'
run: |
set -euo pipefail
cd $PROJECT_ROOT
TASK='${{ matrix.test_config.task }}'
MODEL='${{ matrix.test_config.model }}'
CASE='${{ matrix.test_config.case }}'
LOG_FILE="tests/functional_tests/${TASK}/${MODEL}/test_results/${CASE}/logs/host_0_localhost.output"
GOLD_FILE="tests/functional_tests/${TASK}/${MODEL}/gold_values/${CASE}.json"
OUTPUT_FILE="tests/functional_tests/${TASK}/${MODEL}/test_results/${CASE}/logs/benchmark_metrics.json"
echo "Parsing benchmark output to JSON"
echo "Log file: $LOG_FILE"
echo "Gold values: $GOLD_FILE"
echo "Output: $OUTPUT_FILE"
python tests/test_utils/runners/parse_benchmark_output.py \
"$LOG_FILE" "$GOLD_FILE" "$OUTPUT_FILE"
- name: Install jq
if: always() && steps.benchmark_test.outcome == 'success'
run: apt-get update && apt-get install -y jq
- name: Upload benchmark data to backend
if: always() && steps.benchmark_test.outcome == 'success'
uses: flagos-ai/FlagOps/actions/post-benchmark-report@main
env:
NO_PROXY: "flagcicd-inner.flagos.net"
with:
backend_url: 'http://flagcicd-inner.flagos.net:8000'
user_id: '000000000000000000'
report_path: ${{ env.PROJECT_ROOT }}/tests/functional_tests/${{ matrix.test_config.task }}/${{ matrix.test_config.model }}/test_results/${{ matrix.test_config.case }}/logs/benchmark_metrics.json
list_code: 'benchmark_${{ matrix.test_config.task }}_${{ matrix.test_config.model }}_${{ matrix.test_config.case }}'
list_name: 'Benchmark-${{ matrix.test_config.model }}-${{ matrix.test_config.case }}'
header_config: '[{"field":"metric","name":"Benchmark Metric","required":true,"sortable":true,"type":"string"},{"field":"values","name":"Per Step Details","required":false,"sortable":false,"type":"array"},{"field":"avg","name":"Avg","required":true,"sortable":true,"type":"number"},{"field":"p50","name":"P50","required":true,"sortable":true,"type":"number"},{"field":"p99","name":"P99","required":true,"sortable":true,"type":"number"}]'
repository_name: '${{ github.repository }}'
commit_id: '${{ github.event.pull_request.head.sha || github.sha }}'
workflow_id: '${{ github.run_id }}'
fail_on_error: 'false'
- name: Upload Benchmark Test Logs
if: always() && steps.benchmark_test.outcome == 'failure'
uses: actions/upload-artifact@v4
continue-on-error: true
with:
name: benchmark_tests-logs-${{ github.run_id }}-${{ matrix.test_config.task }}-${{ matrix.test_config.model }}
path: ${{ env.PROJECT_ROOT }}/tests/functional_tests/${{ matrix.test_config.task }}/${{ matrix.test_config.model }}/test_results
retention-days: 7
if-no-files-found: warn