Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 30 additions & 6 deletions ci/run_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,36 @@ run_py_benchmark() {
echo -e "\e[1;33mSLOW BENCHMARK: it may seem like a deadlock but will eventually complete.\e[0m"
fi

UCX_KEEPALIVE_INTERVAL=1ms \
UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} \
UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} \
python "${TIMEOUT_TOOL_PATH}" --enable-python $((2*60)) \
python -m ucxx.benchmarks.send_recv --backend "${BACKEND}" \
-o cupy --reuse-alloc -n 8MiB --n-buffers "$N_BUFFERS" --progress-mode "${PROGRESS_MODE}" ${ASYNCIO_WAIT}
MAX_ATTEMPTS=3
LAST_STATUS=0

set +e
for attempt in $(seq 1 "${MAX_ATTEMPTS}"); do
echo "Attempt ${attempt}/${MAX_ATTEMPTS} to run Python benchmark"

UCX_KEEPALIVE_INTERVAL=1ms \
UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} \
UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} \
python "${TIMEOUT_TOOL_PATH}" --enable-python $((2*60)) \
python -m ucxx.benchmarks.send_recv --backend "${BACKEND}" \
-o cupy --reuse-alloc -n 8MiB --n-buffers "$N_BUFFERS" --progress-mode "${PROGRESS_MODE}" ${ASYNCIO_WAIT}

LAST_STATUS=$?
if [ "${LAST_STATUS}" -eq 0 ]; then
break
fi

if [ "${attempt}" -lt "${MAX_ATTEMPTS}" ]; then
echo "Python benchmark failed with status ${LAST_STATUS}; retrying"
sleep 1
fi
done
set -e

if [ "${LAST_STATUS}" -ne 0 ]; then
echo "Failure running Python benchmark after ${MAX_ATTEMPTS} attempts"
exit "${LAST_STATUS}"
fi
}

log_message "Python Core Tests"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: BSD-3-Clause

import pytest
Expand All @@ -12,6 +12,10 @@

@pytest.mark.parametrize("protocol", ["ucx", "ucxx"])
@pytest.mark.parametrize("Worker", [Worker, Nanny])
@pytest.mark.flaky(
reruns=3,
only_rerun="Trying to reset UCX but not all Endpoints and/or Listeners are closed",
)
@gen_test()
async def test_protocol_from_scheduler_address(ucxx_loop, protocol, Worker):
async with Scheduler(protocol=protocol, dashboard_address=":0") as s:
Expand Down
12 changes: 10 additions & 2 deletions python/ucxx/ucxx/_lib_async/tests/test_multiple_nodes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: BSD-3-Clause

import asyncio
Expand Down Expand Up @@ -40,7 +40,15 @@ async def client_node(port):

@pytest.mark.asyncio
@pytest.mark.parametrize("num_servers", [1, 2, 4])
@pytest.mark.parametrize("num_clients", [1, 10, 50, 100])
@pytest.mark.parametrize(
"num_clients",
[
1,
10,
pytest.param(50, marks=pytest.mark.asyncio_timeout(90)),
pytest.param(100, marks=pytest.mark.asyncio_timeout(90)),
],
)
async def test_many_servers_many_clients(num_servers, num_clients):
somaxconn = get_somaxconn()

Expand Down
Loading