Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
// Creates a stable agent socket at ~/.ssh/ssh-agent.sock and optionally loads ~/.ssh/id_rsa.
"initializeCommand": "bash -lc \"bash '${localWorkspaceFolder}/.devcontainer/ensure-ssh-agent.sh'\"",
"runArgs": [
"--name=${localEnv:USER}-iris-dev",
"--name=${localEnv:USER}-${localWorkspaceFolderBasename}-dev",
"--network=host",
"--device=/dev/kfd",
"--device=/dev/dri",
Expand Down
22 changes: 19 additions & 3 deletions .devcontainer/ensure-ssh-agent.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,29 @@ SOCK="${HOME}/.ssh/ssh-agent.sock"

mkdir -p "${HOME}/.ssh"

# Check if socket exists AND has keys loaded
if [[ -S "${SOCK}" ]]; then
exit 0
if SSH_AUTH_SOCK="${SOCK}" ssh-add -l >/dev/null 2>&1; then
# Agent is running and has keys loaded
exit 0
fi

# Check if agent is alive but just has no keys
if SSH_AUTH_SOCK="${SOCK}" ssh-add -l 2>&1 | grep -q "no identities"; then
# Agent is alive, just needs keys loaded - continue to key loading below
:
else
# Agent is dead or socket is stale, remove it
rm -f "${SOCK}" 2>/dev/null || true
fi
fi

rm -f "${SOCK}"
ssh-agent -a "${SOCK}" -t 8h >/dev/null
# Start agent if socket doesn't exist
if [[ ! -S "${SOCK}" ]]; then
ssh-agent -a "${SOCK}" -t 8h >/dev/null || true
fi

# Load SSH key if it exists
if [[ -f "${HOME}/.ssh/id_rsa" ]]; then
SSH_AUTH_SOCK="${SOCK}" ssh-add "${HOME}/.ssh/id_rsa" >/dev/null 2>&1 || true
fi
Expand Down
40 changes: 40 additions & 0 deletions .github/scripts/acquire_gpus.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash
# SPDX-License-Identifier: MIT
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Acquire GPUs for CI workflows - to be called as a workflow step
# Usage: acquire_gpus.sh <num_gpus>
#
# Exports GPU_DEVICES environment variable to $GITHUB_ENV for use in subsequent steps

set -e

NUM_GPUS=$1

if [ -z "$NUM_GPUS" ]; then
echo "[ERROR] Missing required argument"
echo "Usage: $0 <num_gpus>"
exit 1
fi

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

echo "[ACQUIRE-GPUS] Acquiring $NUM_GPUS GPU(s)"
source "$SCRIPT_DIR/gpu_allocator.sh"
acquire_gpus "$NUM_GPUS"

echo "[ACQUIRE-GPUS] Allocated GPUs: $GPU_DEVICES"
echo "[ACQUIRE-GPUS] GPU allocation details:"
echo " GPU_DEVICES=$GPU_DEVICES"
echo " ALLOCATED_GPU_BITMAP=$ALLOCATED_GPU_BITMAP"

# Export to GITHUB_ENV so subsequent steps can use these variables
if [ -n "$GITHUB_ENV" ]; then
{
echo "GPU_DEVICES=$GPU_DEVICES"
echo "ALLOCATED_GPU_BITMAP=$ALLOCATED_GPU_BITMAP"
} >> "$GITHUB_ENV"
echo "[ACQUIRE-GPUS] Exported variables to GITHUB_ENV"
else
echo "[ACQUIRE-GPUS] WARNING: GITHUB_ENV not set, variables not exported"
fi
43 changes: 24 additions & 19 deletions .github/scripts/container_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,34 +32,34 @@ echo "✅ /dev/shm size OK (${shm_size_gb}GB)"
if [ "$CONTAINER_RUNTIME" = "apptainer" ]; then
echo "[INFO] Building with Apptainer..."

# Create persistent Apptainer directory
mkdir -p ~/apptainer

# Define paths
IMAGE_PATH=~/apptainer/iris-dev.sif
DEF_FILE=apptainer/iris.def
CHECKSUM_FILE=~/apptainer/iris-dev.sif.checksum

# Verify def file exists
DEF_FILE=apptainer/iris.def
if [ ! -f "$DEF_FILE" ]; then
echo "[ERROR] Definition file $DEF_FILE not found"
exit 1
fi

# Calculate checksum of the def file
NEW_CHECKSUM=$(sha256sum "$DEF_FILE" | awk '{print $1}')
# Calculate checksum of the def file to use as subdirectory name
DEF_CHECKSUM=$(sha256sum "$DEF_FILE" | awk '{print $1}')

# Create persistent Apptainer directory with checksum subdirectory
mkdir -p "${HOME}/iris-apptainer-images/${DEF_CHECKSUM}"

# Define paths
IMAGE_PATH="${HOME}/iris-apptainer-images/${DEF_CHECKSUM}/iris-dev.sif"
CHECKSUM_FILE="${HOME}/iris-apptainer-images/${DEF_CHECKSUM}/iris-dev.sif.checksum"

# Check if image exists and has a valid checksum
REBUILD_NEEDED=true
if [ -f "$IMAGE_PATH" ] && [ -f "$CHECKSUM_FILE" ]; then
OLD_CHECKSUM=$(head -n1 "$CHECKSUM_FILE" 2>/dev/null)
# Validate checksum format (64 hex characters for SHA256)
if [[ "$OLD_CHECKSUM" =~ ^[a-f0-9]{64}$ ]] && [ "$OLD_CHECKSUM" = "$NEW_CHECKSUM" ]; then
echo "[INFO] Def file unchanged (checksum: $NEW_CHECKSUM)"
if [[ "$OLD_CHECKSUM" =~ ^[a-f0-9]{64}$ ]] && [ "$OLD_CHECKSUM" = "$DEF_CHECKSUM" ]; then
echo "[INFO] Def file unchanged (checksum: $DEF_CHECKSUM)"
echo "[INFO] Skipping rebuild, using existing image at $IMAGE_PATH"
REBUILD_NEEDED=false
else
echo "[INFO] Def file changed (old: ${OLD_CHECKSUM:-<invalid>}, new: $NEW_CHECKSUM)"
echo "[INFO] Def file changed (old: ${OLD_CHECKSUM:-<invalid>}, new: $DEF_CHECKSUM)"
echo "[INFO] Rebuilding Apptainer image..."
fi
else
Expand All @@ -70,9 +70,9 @@ if [ "$CONTAINER_RUNTIME" = "apptainer" ]; then
if [ "$REBUILD_NEEDED" = true ]; then
if apptainer build --force "$IMAGE_PATH" "$DEF_FILE"; then
# Store the checksum only if build succeeded
echo "$NEW_CHECKSUM" > "$CHECKSUM_FILE"
echo "$DEF_CHECKSUM" > "$CHECKSUM_FILE"
echo "[INFO] Built image: $IMAGE_PATH"
echo "[INFO] Checksum saved: $NEW_CHECKSUM"
echo "[INFO] Checksum saved: $DEF_CHECKSUM"
else
echo "[ERROR] Apptainer build failed"
exit 1
Expand All @@ -83,14 +83,19 @@ elif [ "$CONTAINER_RUNTIME" = "docker" ]; then
echo "[INFO] Checking Docker images..."
# Use GitHub variable if set, otherwise default to iris-dev
IMAGE_NAME=${DOCKER_IMAGE_NAME:-"iris-dev"}

# Check if the image exists
if docker image inspect "$IMAGE_NAME" &> /dev/null; then
echo "[INFO] Using existing Docker image: $IMAGE_NAME"
else
echo "[WARNING] Docker image $IMAGE_NAME not found"
echo "[INFO] Please build it using: ./build_triton_image.sh"
echo "[INFO] Or pull it if available from registry"
echo "[INFO] Docker image $IMAGE_NAME not found, building..."
DOCKER_DIR="$(dirname "$(realpath "$0")")/../../docker"
if docker build -t "$IMAGE_NAME" "$DOCKER_DIR"; then
echo "[INFO] Built Docker image: $IMAGE_NAME"
else
echo "[ERROR] Docker build failed"
exit 1
fi
fi
fi

Expand Down
35 changes: 15 additions & 20 deletions .github/scripts/container_exec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,21 @@ if [ "$CONTAINER_RUNTIME" = "apptainer" ]; then
# Find image
if [ -n "$CUSTOM_IMAGE" ]; then
IMAGE="$CUSTOM_IMAGE"
elif [ -f ~/apptainer/iris-dev.sif ]; then
IMAGE=~/apptainer/iris-dev.sif
elif [ -f apptainer/images/iris.sif ]; then
IMAGE="apptainer/images/iris.sif"
else
echo "[ERROR] Apptainer image not found" >&2
exit 1
# Calculate checksum of def file to find the correct subdirectory
DEF_FILE=apptainer/iris.def
if [ ! -f "$DEF_FILE" ]; then
echo "[ERROR] Definition file $DEF_FILE not found" >&2
exit 1
fi
DEF_CHECKSUM=$(sha256sum "$DEF_FILE" | awk '{print $1}')

if [ -f "${HOME}/iris-apptainer-images/${DEF_CHECKSUM}/iris-dev.sif" ]; then
IMAGE="${HOME}/iris-apptainer-images/${DEF_CHECKSUM}/iris-dev.sif"
else
echo "[ERROR] Apptainer image not found" >&2
exit 1
fi
fi

# Create temporary overlay in workspace with unique name based on PID and timestamp
Expand Down Expand Up @@ -99,24 +107,11 @@ elif [ "$CONTAINER_RUNTIME" = "docker" ]; then
fi

# Build run command with proper GPU access
# Get video and render group IDs from host
VIDEO_GID=$(getent group video | cut -d: -f3)
RENDER_GID=$(getent group render | cut -d: -f3)

RUN_CMD="docker run --rm --network=host --device=/dev/kfd --device=/dev/dri"
RUN_CMD="$RUN_CMD --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
RUN_CMD="$RUN_CMD -v ${PWD}:/iris_workspace -w /iris_workspace"
RUN_CMD="$RUN_CMD --shm-size=16G --ulimit memlock=-1 --ulimit stack=67108864"
RUN_CMD="$RUN_CMD --user $(id -u):$(id -g)"

# Add video and render groups for GPU access
if [ -n "$VIDEO_GID" ]; then
RUN_CMD="$RUN_CMD --group-add $VIDEO_GID"
fi
if [ -n "$RENDER_GID" ]; then
RUN_CMD="$RUN_CMD --group-add $RENDER_GID"
fi


RUN_CMD="$RUN_CMD -e HOME=/iris_workspace"
RUN_CMD="$RUN_CMD --entrypoint bash"

Expand Down
5 changes: 5 additions & 0 deletions .github/scripts/examples_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"31_message_passing": {
"required_ranks": 2
}
}
Loading