pytorch · scotts · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026
diff --git a/.github/workflows/linux_cuda.yml b/.github/workflows/linux_cuda.yml
@@ -6,133 +6,92 @@ on:
       - main
   pull_request:
 
-# Use TorchBench's docker image which has all basic dependencies.
-env:
-  DOCKER_IMAGE: "ghcr.io/pytorch/torchbench:latest"
-
-jobs:
-  pr-test:
-    # AWS A10G GPU instance label: linux.g5.4xlarge.nvidia.gpu
-    # OS version: Amazon Linux 2
-    runs-on: linux.g5.4xlarge.nvidia.gpu
-    timeout-minutes: 180 # 3 hours
-    steps:
-      - name: Checkout Kineto
-        uses: actions/checkout@v5
-        with:
-          path: kineto
-          submodules: recursive
-
-      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
-        with:
-          docker-image: ${{ env.DOCKER_IMAGE }}
-
-      - name: Install NVIDIA Driver, docker runtime, set GPU_FLAG
-        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
-
-      - name: Get env vars
-        run: |
-          echo GITHUB_WORKFLOW   = $GITHUB_WORKFLOW
-          echo HOME              = $HOME
-          echo GITHUB_ACTION     = $GITHUB_ACTION
-          echo GITHUB_ACTIONS    = $GITHUB_ACTIONS
-          echo GITHUB_REPOSITORY = $GITHUB_REPOSITORY
-          echo GITHUB_EVENT_NAME = $GITHUB_EVENT_NAME
-          echo GITHUB_EVENT_PATH = $GITHUB_EVENT_PATH
-          echo GITHUB_WORKSPACE  = $GITHUB_WORKSPACE
-          echo GITHUB_SHA        = $GITHUB_SHA
-          echo GITHUB_REF        = $GITHUB_REF
-
-      - name: Build libkineto (static and shared library)
-        run: |
-          container_name=$(docker run \
-            ${GPU_FLAG:-} \
-            --tty \
-            --detach \
-            --shm-size=32gb \
-            -v "${PWD}/kineto:/kineto" \
-            -w / \
-            "${{ env.DOCKER_IMAGE }}"
-          )
-          echo "Container name: ${container_name}"
-
-          docker exec -t -w "/kineto" "${container_name}" bash -c "
-            set -eux
-            mkdir build_static
-            mkdir build_shared
-            pip install cmake
-          "
-
-          docker exec -t -w "/kineto/build_static" "${container_name}" bash -c "
-            set -eux
-            cmake -DKINETO_LIBRARY_TYPE=static ../libkineto/
-            make -j
-          "
-
-          docker exec -t -w "/kineto/build_shared" "${container_name}" bash -c "
-            set -eux
-            cmake -DKINETO_LIBRARY_TYPE=shared ../libkineto/
-            make -j
-          "
-
-      - name:  Run libkineto tests
-        run: |
-          container_name=$(docker ps -lq)
-          docker exec -t -w "/kineto/build_static" "${container_name}" bash -c "make test"
-
-      - name: Clone PyTorch
-        run: |
-          container_name=$(docker ps -lq)
-          docker exec -t -w "/" "${container_name}" bash -c "
-            set -eux
-            git clone --recursive --branch viable/strict https://github.com/pytorch/pytorch.git
-          "
-
-      - name: Replace PyTorch's Kineto with PR version
-        run: |
-          container_name=$(docker ps -lq)
-          docker exec -t -w "/pytorch" "${container_name}" bash -c "
-            set -eux
-            rm -rf third_party/kineto
-            ln -s /kineto third_party/kineto
-          "
-
-      - name: Build PyTorch from source
-        run: |
-          container_name=$(docker ps -lq)
-          docker exec -t -w "/pytorch" "${container_name}" bash -c "
-            set -eux
-            pip install -r requirements.txt
-            export USE_CUDA=1
-            export BUILD_TEST=1
-            python setup.py develop
-          "
-
-      - name: Run PyTorch profiler tests
-        run: |
-          container_name=$(docker ps -lq)
-          docker exec -t -w "/pytorch" "${container_name}" bash -c "
-            set -eux
-            # TODO: Dynamically add/remove tests to the exclusion list based on their
-            # status on trunk instead of maintaining a hardcoded list of known failures.
-            # This will prevent the list from becoming stale as tests get fixed upstream.
-            python -m pytest test/profiler/ -v \
-              --deselect=test/profiler/test_memory_profiler.py::TestDataFlow::test_data_flow_graph_complicated \
-              --deselect=test/profiler/test_memory_profiler.py::TestMemoryProfilerE2E::test_categories_e2e_sequential_fwd_bwd \
-              --deselect=test/profiler/test_memory_profiler.py::TestMemoryProfilerE2E::test_categories_e2e_simple_fwd_bwd \
-              --deselect=test/profiler/test_memory_profiler.py::TestMemoryProfilerE2E::test_categories_e2e_simple_fwd_bwd_step \
-              --deselect=test/profiler/test_profiler.py::TestProfiler::test_kineto \
-              --deselect=test/profiler/test_profiler.py::TestProfiler::test_user_annotation \
-              --deselect=test/profiler/test_profiler.py::TestExperimentalUtils::test_fuzz_symbolize \
-              --deselect=test/profiler/test_profiler.py::TestExperimentalUtils::test_profiler_debug_autotuner \
-              --deselect=test/profiler/test_torch_tidy.py::TestTorchTidyProfiler::test_tensorimpl_invalidation_scalar_args
-          "
-
-      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
-        if: always()
-
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
   cancel-in-progress: true
+
+jobs:
+  pr-test:
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    with:
+      job-name: linux-cuda-build-and-test
+      # AWS A10G GPU instance
+      runner: linux.g5.4xlarge.nvidia.gpu
+      timeout: 180
+      # Checkout the Kineto repo at the PR ref with submodules
+      repository: ${{ github.repository }}
+      ref: ${{ github.ref }}
+      submodules: recursive
+      gpu-arch-type: cuda
+      gpu-arch-version: "12.6"
+      script: |
+        set -eux
+
+        echo "====: Working directory: $(pwd)"
+
+        # Ensure cmake is minimum version Kineto requires
+        conda install -y cmake>=3.27
+        echo "====: Installed cmake version: $(cmake --version)"
+
+        # Upgrade pip
+        python -m pip install --upgrade pip
+        echo "====: Installed pip version: $(python -m pip --version)"
+
+        # Build libkineto (static and shared library)
+        mkdir -p build_static build_shared
+
+        pushd build_static
+        cmake -DKINETO_LIBRARY_TYPE=static ../libkineto/
+        make -j
+        popd
+        echo "====: Compiled static libkineto"
+
+        pushd build_shared
+        cmake -DKINETO_LIBRARY_TYPE=shared ../libkineto/
+        make -j
+        popd
+        echo "====: Compiled shared libkineto"
+
+        # Run libkineto tests
+        pushd build_static
+        make test
+        popd
+        echo "====: Ran static libkineto tests"
+
+        # Save kineto directory path before cloning PyTorch
+        KINETO_DIR=$(pwd)
+        echo "====: Kineto directory: ${KINETO_DIR}"
+
+        # Clone PyTorch and replace its Kineto with PR version
+        git clone --recursive --branch viable/strict https://github.com/pytorch/pytorch.git
+        echo "====: Cloned PyTorch"
+
+        pushd pytorch
+        rm -rf third_party/kineto
+        ln -s "${KINETO_DIR}" third_party/kineto
+        echo "====: Linked PR version of Kineto to PyTorch (${KINETO_DIR} -> third_party/kineto)"
+
+        # Build PyTorch from source
+        pip install -r requirements.txt
+        export USE_CUDA=1
+        export BUILD_TEST=1
+        python setup.py develop
+        echo "====: Built PyTorch from source"
+
+        # Run PyTorch profiler tests
+        # TODO: Dynamically add/remove tests to the exclusion list based on their
+        # status on trunk instead of maintaining a hardcoded list of known failures.
+        # This will prevent the list from becoming stale as tests get fixed upstream.
+        pip install pytest
+        python -m pytest test/profiler/ -v \
+          --deselect=test/profiler/test_memory_profiler.py::TestDataFlow::test_data_flow_graph_complicated \
+          --deselect=test/profiler/test_memory_profiler.py::TestMemoryProfilerE2E::test_categories_e2e_sequential_fwd_bwd \
+          --deselect=test/profiler/test_memory_profiler.py::TestMemoryProfilerE2E::test_categories_e2e_simple_fwd_bwd \
+          --deselect=test/profiler/test_memory_profiler.py::TestMemoryProfilerE2E::test_categories_e2e_simple_fwd_bwd_step \
+          --deselect=test/profiler/test_profiler.py::TestProfiler::test_kineto \
+          --deselect=test/profiler/test_profiler.py::TestProfiler::test_user_annotation \
+          --deselect=test/profiler/test_profiler.py::TestExperimentalUtils::test_fuzz_symbolize \
+          --deselect=test/profiler/test_profiler.py::TestExperimentalUtils::test_profiler_debug_autotuner \
+          --deselect=test/profiler/test_torch_tidy.py::TestTorchTidyProfiler::test_tensorimpl_invalidation_scalar_args
+        popd
+        echo "====: Ran PyTorch profiler tests"