diff --git a/.github/workflows/iris-nightly-triton-test.yml b/.github/workflows/iris-nightly-triton-test.yml
new file mode 100644
index 00000000..c9d1d698
--- /dev/null
+++ b/.github/workflows/iris-nightly-triton-test.yml
@@ -0,0 +1,201 @@
+name: Iris Nightly Triton Test
+
+on:
+ schedule:
+ # Run nightly at 2 AM UTC
+ - cron: '0 2 * * *'
+ workflow_dispatch: # Allow manual triggering
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+permissions:
+ contents: read
+
+jobs:
+ build-container-with-latest-triton:
+ runs-on: [self-hosted, mi3xx]
+ timeout-minutes: 120 # Building with latest Triton may take longer
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Setup Apptainer (if not available)
+ run: |
+ if ! command -v apptainer &> /dev/null && ! command -v docker &> /dev/null; then
+ echo "Neither Apptainer nor Docker found, installing Apptainer..."
+ apt-get update && apt-get install -y software-properties-common
+ add-apt-repository -y ppa:apptainer/ppa
+ apt-get update && apt-get install -y apptainer
+ else
+ echo "Container runtime already available"
+ fi
+
+ - name: Modify Apptainer def file to use latest Triton
+ run: |
+ echo "Modifying apptainer/iris.def to use latest Triton from main branch"
+ # Replace any 40-character hex commit checkout with main branch
+ sed -i 's/git checkout [0-9a-f]\{40\}/git checkout main/' apptainer/iris.def
+ echo "Modified iris.def:"
+ grep -A2 -B2 "git checkout" apptainer/iris.def
+
+ - name: Build Iris container with latest Triton
+ run: |
+ set -e
+
+ # Check /dev/shm size
+ shm_size_gb=$(df -k /dev/shm | tail -1 | awk '{print int($2/1024/1024)}')
+ if [ "${shm_size_gb:-0}" -lt 64 ]; then
+ echo "❌ ERROR: /dev/shm is too small (${shm_size_gb}GB < 64GB)"
+ echo "Fix: mount -o remount,size=64G /dev/shm"
+ exit 1
+ fi
+ echo "✅ /dev/shm size OK (${shm_size_gb}GB)"
+
+ # Build with Apptainer, forcing rebuild
+ DEF_FILE=apptainer/iris.def
+ IMAGE_PATH="${HOME}/iris-apptainer-nightly/iris-dev-nightly.sif"
+
+ mkdir -p "${HOME}/iris-apptainer-nightly"
+
+ echo "Building Apptainer image with latest Triton..."
+ apptainer build --force "$IMAGE_PATH" "$DEF_FILE"
+
+ echo "Built image: $IMAGE_PATH"
+
+ test-nightly-triton:
+ name: Test ${{ matrix.test_dir }} (${{ matrix.num_ranks }} ranks)
+ needs: build-container-with-latest-triton
+ runs-on: [self-hosted, mi3xx]
+ timeout-minutes: 60
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ # Test each subdirectory with different rank counts
+ - test_dir: examples
+ num_ranks: 1
+ gpu_devices: "0,1"
+ - test_dir: examples
+ num_ranks: 2
+ gpu_devices: "2,3"
+ - test_dir: examples
+ num_ranks: 4
+ gpu_devices: "4,5,6,7"
+ - test_dir: examples
+ num_ranks: 8
+ gpu_devices: "0,1,2,3,4,5,6,7"
+ - test_dir: unittests
+ num_ranks: 1
+ gpu_devices: "0,1"
+ - test_dir: unittests
+ num_ranks: 2
+ gpu_devices: "2,3"
+ - test_dir: unittests
+ num_ranks: 4
+ gpu_devices: "4,5,6,7"
+ - test_dir: unittests
+ num_ranks: 8
+ gpu_devices: "0,1,2,3,4,5,6,7"
+ - test_dir: ccl
+ num_ranks: 1
+ gpu_devices: "0,1"
+ - test_dir: ccl
+ num_ranks: 2
+ gpu_devices: "2,3"
+ - test_dir: ccl
+ num_ranks: 4
+ gpu_devices: "4,5,6,7"
+ - test_dir: ccl
+ num_ranks: 8
+ gpu_devices: "0,1,2,3,4,5,6,7"
+ - test_dir: x
+ num_ranks: 1
+ gpu_devices: "0,1"
+ - test_dir: x
+ num_ranks: 2
+ gpu_devices: "2,3"
+ - test_dir: x
+ num_ranks: 4
+ gpu_devices: "4,5,6,7"
+ - test_dir: x
+ num_ranks: 8
+ gpu_devices: "0,1,2,3,4,5,6,7"
+ - test_dir: ops
+ num_ranks: 1
+ gpu_devices: "0,1"
+ - test_dir: ops
+ num_ranks: 2
+ gpu_devices: "2,3"
+ - test_dir: ops
+ num_ranks: 4
+ gpu_devices: "4,5,6,7"
+ - test_dir: ops
+ num_ranks: 8
+ gpu_devices: "0,1,2,3,4,5,6,7"
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Cleanup lingering ports before tests
+ run: |
+ bash .github/scripts/cleanup_ports.sh
+
+ - name: Run ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks
+ run: |
+ set -e
+ echo "::group::Running ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (nightly Triton)"
+
+ # Use nightly image path
+ NIGHTLY_IMAGE="${HOME}/iris-apptainer-nightly/iris-dev-nightly.sif"
+
+ # Build GPU argument
+ GPU_ARG=""
+ if [ -n "${{ matrix.gpu_devices }}" ]; then
+ GPU_ARG="--gpus ${{ matrix.gpu_devices }}"
+ fi
+
+ # Run tests in container using the nightly image
+ .github/scripts/container_exec.sh $GPU_ARG --image "$NIGHTLY_IMAGE" "
+ set -e
+
+ # tritonBLAS version to use
+ TRITONBLAS_COMMIT=\"47768c93acb7f89511d797964b84544c30ab81ad\"
+
+ # Install tritonBLAS if not already installed
+ echo \"Checking for tritonBLAS...\"
+ if ! python -c 'import tritonblas' 2>/dev/null; then
+ echo \"Installing tritonBLAS...\"
+ TRITONBLAS_DIR=\"./tritonblas_install\"
+ if [ ! -d \"\$TRITONBLAS_DIR\" ]; then
+ git clone https://github.com/ROCm/tritonBLAS.git \"\$TRITONBLAS_DIR\"
+ cd \"\$TRITONBLAS_DIR\"
+ git checkout \"\$TRITONBLAS_COMMIT\"
+ else
+ cd \"\$TRITONBLAS_DIR\"
+ git fetch
+ git checkout \"\$TRITONBLAS_COMMIT\"
+ fi
+ pip install -e .
+ cd ..
+ echo \"tritonBLAS installed successfully\"
+ else
+ echo \"tritonBLAS already installed\"
+ fi
+
+ echo \"Installing iris in editable mode\"
+ pip install -e .
+
+ # Run tests in the specified directory
+ for test_file in tests/${{ matrix.test_dir }}/test_*.py; do
+ if [ -f \"\$test_file\" ]; then
+ echo \"Testing: \$test_file with ${{ matrix.num_ranks }} ranks (nightly Triton)\"
+ python tests/run_tests_distributed.py --num_ranks ${{ matrix.num_ranks }} \"\$test_file\" -v --tb=short --durations=10
+ fi
+ done
+ "
+ echo "::endgroup::"
+ echo "✅ ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (nightly Triton) passed!"
diff --git a/README.md b/README.md
index 9fa2d55b..6f9a921c 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,7 @@ Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
+