diff --git a/.github/workflows/test_torchtitan.yml b/.github/workflows/test_torchtitan.yml
index a80ee4cc..6315c035 100644
--- a/.github/workflows/test_torchtitan.yml
+++ b/.github/workflows/test_torchtitan.yml
@@ -51,3 +51,24 @@ jobs:
         # Check that AutoParallel and TorchTitan DeepSeek V3 produce matching
         # distributed loss and gradient norms for the same 4-GPU debug shape.
         torchrun --standalone --nproc-per-node 4 ../tests/torchtitan_dsv3_equivalence.py
+
+        # Run TorchTitan GraphTrainer AutoParallel integration tests.
+        rm -rf /tmp/graph_trainer_autoparallel_tests
+        mkdir -p /tmp/graph_trainer_autoparallel_tests
+        trap 'rm -rf /tmp/graph_trainer_autoparallel_tests' EXIT
+        python -m torchtitan.experiments.graph_trainer.tests.integration_tests \
+          /tmp/graph_trainer_autoparallel_tests/llama3 \
+          --test_suite graph_trainer_autoparallel \
+          --test_name autoparallel_llama3_fsdp_tp \
+          --gpu_arch_type cuda \
+          --ngpu 4
+        NCCL_NVLS_ENABLE=0 python -m torchtitan.experiments.graph_trainer.tests.integration_tests \
+          /tmp/graph_trainer_autoparallel_tests/deepseek_v3 \
+          --test_suite graph_trainer_autoparallel_h100 \
+          --test_name autoparallel_deepseek_v3_efsdp_ep \
+          --gpu_arch_type cuda \
+          --ngpu 4
+
+        # Run TorchTitan GraphTrainer AutoParallel numerics tests.
+        pytest torchtitan/experiments/graph_trainer/tests/test_numerics.py::TestGraphTrainerAutoParallelNumerics::test_llama3_aot_fx_trace_autoparallel_vs_eager -v
+        NCCL_NVLS_ENABLE=0 pytest torchtitan/experiments/graph_trainer/tests/test_numerics.py::TestGraphTrainerAutoParallelNumerics::test_deepseek_v3_aot_fx_trace_autoparallel_vs_eager -v