diff --git a/numba_cuda/numba/cuda/tests/test_auto_constants.py b/numba_cuda/numba/cuda/tests/test_auto_constants.py
new file mode 100644
index 000000000..3f835c93f
--- /dev/null
+++ b/numba_cuda/numba/cuda/tests/test_auto_constants.py
@@ -0,0 +1,112 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: BSD-2-Clause
+
+import numpy as np
+
+from numba import cuda
+from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+
+# -----------------------------
+# Kernels using auto-constants
+# -----------------------------
+
+@cuda.jit
+def int_constant_kernel(out):
+    i = cuda.grid(1)
+    if i < out.size:
+        out[i] = 42
+
+@cuda.jit
+def float_constant_kernel(out):
+    i = cuda.grid(1)
+    if i < out.size:
+        out[i] = 3.5
+
+@cuda.jit
+def bool_constant_kernel(out):
+    i = cuda.grid(1)
+    if i < out.size:
+        out[i] = True
+
+@cuda.jit
+def arithmetic_constant_kernel(out):
+    i = cuda.grid(1)
+    if i < out.size:
+        out[i] = 2 + 3 * 4  # 14
+
+@cuda.jit
+def mixed_constant_kernel(out):
+    i = cuda.grid(1)
+    if i < out.size:
+        out[i] = 1.5 + 2   # 3.5
+
+@cuda.jit
+def constant_index_kernel(inp, out):
+    i = cuda.grid(1)
+    if i < out.size:
+        out[i] = inp[2]
+
+@skip_on_cudasim("Auto-constant lowering differs under cudasim")
+class TestCudaAutoConstants(CUDATestCase):
+
+    def _launch_1d(self, kernel, args, size):
+        threadsperblock = 128
+        blockspergrid = (size + threadsperblock - 1) // threadsperblock
+        kernel[blockspergrid, threadsperblock](*args)
+        cuda.synchronize()
+
+    def test_int_constant(self):
+        out = np.zeros(8, dtype=np.int32)
+        d_out = cuda.to_device(out)
+        self._launch_1d(int_constant_kernel, (d_out,), out.size)
+        np.testing.assert_array_equal(
+            d_out.copy_to_host(),
+            np.full_like(out, 42),
+        )
+
+    def test_float_constant(self):
+        out = np.zeros(8, dtype=np.float32)
+        d_out = cuda.to_device(out)
+        self._launch_1d(float_constant_kernel, (d_out,), out.size)
+        np.testing.assert_array_equal(
+            d_out.copy_to_host(),
+            np.full_like(out, 3.5),
+        )
+
+    def test_bool_constant(self):
+        out = np.zeros(8, dtype=np.bool_)
+        d_out = cuda.to_device(out)
+        self._launch_1d(bool_constant_kernel, (d_out,), out.size)
+        np.testing.assert_array_equal(
+            d_out.copy_to_host(),
+            np.ones_like(out, dtype=np.bool_),
+        )
+
+    def test_arithmetic_constant(self):
+        out = np.zeros(8, dtype=np.int32)
+        d_out = cuda.to_device(out)
+        self._launch_1d(arithmetic_constant_kernel, (d_out,), out.size)
+        np.testing.assert_array_equal(
+            d_out.copy_to_host(),
+            np.full_like(out, 14),
+        )
+
+    def test_mixed_constant(self):
+        out = np.zeros(8, dtype=np.float32)
+        d_out = cuda.to_device(out)
+        self._launch_1d(mixed_constant_kernel, (d_out,), out.size)
+        np.testing.assert_array_equal(
+            d_out.copy_to_host(),
+            np.full_like(out, 3.5),
+        )
+
+    def test_constant_indexing(self):
+        inp = np.arange(10, dtype=np.int32)
+        out = np.zeros(5, dtype=np.int32)
+        d_inp = cuda.to_device(inp)
+        d_out = cuda.to_device(out)
+        self._launch_1d(constant_index_kernel, (d_inp, d_out), out.size)
+        np.testing.assert_array_equal(
+            d_out.copy_to_host(),
+            np.full_like(out, inp[2]),
+        )