datastax · sl-at-ibm · Jun 10, 2026 · Jun 8, 2026 · Jun 9, 2026 · Jun 9, 2026
diff --git a/.github/workflows/codecov_aggregator.yml b/.github/workflows/codecov_aggregator.yml
@@ -32,7 +32,7 @@ jobs:
     secrets:
       ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
       ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
-      HEADER_EMBEDDING_API_KEY_OPENAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_OPENAI }}
+      HEADER_EMBEDDING_API_KEY_VOYAGEAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_VOYAGEAI }}
 
   local_it:
     uses: ./.github/workflows/local.yml
@@ -46,7 +46,7 @@ jobs:
       AWS_ECR_ROLE_NAME: ${{ secrets.AWS_ECR_ROLE_NAME }}
       AWS_ECR_REPOSITORY: ${{ secrets.AWS_ECR_REPOSITORY }}
       AWS_ECR_REGISTRY: ${{ secrets.AWS_ECR_REGISTRY }}
-      HEADER_EMBEDDING_API_KEY_OPENAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_OPENAI }}
+      HEADER_EMBEDDING_API_KEY_VOYAGEAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_VOYAGEAI }}
       # hardcoding the target DB
       DOCKER_COMPOSE_LOCAL_DATA_API: "yes"
       # turn on header-based reranker auth

diff --git a/.github/workflows/local.yml b/.github/workflows/local.yml
@@ -22,7 +22,7 @@ on:
         required: true
       AWS_ECR_REGISTRY:
         required: true
-      HEADER_EMBEDDING_API_KEY_OPENAI:
+      HEADER_EMBEDDING_API_KEY_VOYAGEAI:
         required: true
       DOCKER_COMPOSE_LOCAL_DATA_API:
         required: true
@@ -41,7 +41,7 @@ jobs:
       AWS_ECR_ROLE_NAME: ${{ secrets.AWS_ECR_ROLE_NAME }}
       AWS_ECR_REPOSITORY: ${{ secrets.AWS_ECR_REPOSITORY }}
       AWS_ECR_REGISTRY: ${{ secrets.AWS_ECR_REGISTRY }}
-      HEADER_EMBEDDING_API_KEY_OPENAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_OPENAI }}
+      HEADER_EMBEDDING_API_KEY_VOYAGEAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_VOYAGEAI }}
       DOCKER_COMPOSE_LOCAL_DATA_API: ${{ secrets.DOCKER_COMPOSE_LOCAL_DATA_API }}
       HEADER_RERANKING_API_KEY_NVIDIA: ${{ secrets.HEADER_RERANKING_API_KEY_NVIDIA }}
     runs-on: ubuntu-latest

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -16,15 +16,15 @@ on:
         required: true
       ASTRA_DB_API_ENDPOINT:
         required: true
-      HEADER_EMBEDDING_API_KEY_OPENAI:
+      HEADER_EMBEDDING_API_KEY_VOYAGEAI:
         required: true
 
 jobs:
   test:
     env:
       ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
       ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
-      HEADER_EMBEDDING_API_KEY_OPENAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_OPENAI }}
+      HEADER_EMBEDDING_API_KEY_VOYAGEAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_VOYAGEAI }}
       LEGACY_INSERTMANY_BEHAVIOUR_PRE2193: ${{ secrets.LEGACY_INSERTMANY_BEHAVIOUR_PRE2193 }}
     runs-on: ubuntu-latest
 

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -124,7 +124,7 @@ jobs:
         env:
           ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
           ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
-          HEADER_EMBEDDING_API_KEY_OPENAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_OPENAI }}
+          HEADER_EMBEDDING_API_KEY_VOYAGEAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_VOYAGEAI }}
           LEGACY_INSERTMANY_BEHAVIOUR_PRE2193: ${{ secrets.LEGACY_INSERTMANY_BEHAVIOUR_PRE2193 }}
         run: make test-integration
 

diff --git a/DEVELOPING.md b/DEVELOPING.md
@@ -69,10 +69,13 @@ Note that the variables defined in the desired "base" template **must** be set t
 
 Additionally, you will need to define the environment variables in `tests/env_templates/env.vectorize-minimal.template`,
 which are needed by the minimal set of "vectorize" testing belonging to the "base" test group.
+These rely on a single embedding provider and model
+(the one configured in `embedding_provider_switcher.py`, to match variables in the env template).
 
 For Astra DB, you can include "shared secret" vectorize tests (i.e. KMS-based authentication).
-To run those tests, you must scope an OpenAI API key
-to the target Astra DB with secret name `"SHARED_SECRET_EMBEDDING_API_KEY_OPENAI"`
+To run those tests, you must scope an embedding provider API key
+to the target Astra DB with secret name matching the name set in
+the provider-switcher (e.g. `"SHARED_SECRET_EMBEDDING_API_KEY_VOYAGEAI"`).
 and comment the environment flag that suppresses them (see the base Astra env template).
 
 For non-Astra, the reranking-related tests run only if one sets

diff --git a/tests/base/conftest.py b/tests/base/conftest.py
@@ -44,7 +44,11 @@
     ADMIN_ENV_LIST,
     ADMIN_ENV_VARIABLE_MAP,
     CQL_AVAILABLE,
-    HEADER_EMBEDDING_API_KEY_OPENAI,
+    EMBEDDING_PROVIDER_API_KEY,
+    EMBEDDING_PROVIDER_DIMENSION,
+    EMBEDDING_PROVIDER_MODEL_NAME,
+    EMBEDDING_PROVIDER_NAME,
+    EMBEDDING_PROVIDER_SHARED_SECRET_KEY_NAME,
     HEADER_RERANKING_API_KEY_NVIDIA,
     IS_ASTRA_DB,
     RUN_SHARED_SECRET_VECTORIZE_TESTS,
@@ -227,10 +231,10 @@ def async_empty_collection(
 @pytest.fixture(scope="session")
 def service_collection_parameters() -> Iterable[dict[str, Any]]:
     yield {
-        "dimension": 1536,
-        "provider": "openai",
-        "modelName": "text-embedding-ada-002",
-        "api_key": HEADER_EMBEDDING_API_KEY_OPENAI,
+        "dimension": EMBEDDING_PROVIDER_DIMENSION,
+        "provider": EMBEDDING_PROVIDER_NAME,
+        "modelName": EMBEDDING_PROVIDER_MODEL_NAME,
+        "api_key": EMBEDDING_PROVIDER_API_KEY,
         "reranking_api_key": HEADER_RERANKING_API_KEY_NVIDIA,
     }
 
@@ -996,7 +1000,11 @@ def async_empty_table_logicalfiltering(
     "clean_nulls_from_dict",
     "is_future_version",
     "sync_fail_if_not_removed",
-    "HEADER_EMBEDDING_API_KEY_OPENAI",
+    "EMBEDDING_PROVIDER_NAME",
+    "EMBEDDING_PROVIDER_MODEL_NAME",
+    "EMBEDDING_PROVIDER_API_KEY",
+    "EMBEDDING_PROVIDER_DIMENSION",
+    "EMBEDDING_PROVIDER_SHARED_SECRET_KEY_NAME",
     "IS_ASTRA_DB",
     "ADMIN_ENV_LIST",
     "ADMIN_ENV_VARIABLE_MAP",

diff --git a/tests/base/integration/collections/test_collection_cursor_async.py b/tests/base/integration/collections/test_collection_cursor_async.py
@@ -26,8 +26,9 @@
 
 from ..conftest import DefaultAsyncCollection
 
-NUM_DOCS = 25  # keep this between 20 and 39
-NUM_DOCS_PAGINATION = 90  # keep this above 2 * (2 * 20) and below 2 * (3 * 20)
+PAGE_SIZE = 20  # TODO: set to 50, default as per Data API config after PR 2461
+NUM_DOCS = 2 * PAGE_SIZE + 5
+NUM_DOCS_PAGINATION = 2 * (2 * PAGE_SIZE) + 5
 
 
 @pytest.fixture
@@ -161,16 +162,16 @@ async def test_collection_cursors_started_properties_async(
         await cur.__anext__()
         # now this has 19 items in buffer, one is consumed
         assert cur.consumed == 1
-        assert cur.buffered_count == 19
+        assert cur.buffered_count == PAGE_SIZE - 1
         assert len(cur.consume_buffer(3)) == 3
         assert cur.consumed == 4
-        assert cur.buffered_count == 16
+        assert cur.buffered_count == PAGE_SIZE - 4
         # from time to time the buffer is empty:
-        for _ in range(16):
+        for _ in range(PAGE_SIZE - 4):
             await cur.__anext__()
         assert cur.buffered_count == 0
         assert cur.consume_buffer(3) == []
-        assert cur.consumed == 20
+        assert cur.consumed == PAGE_SIZE
         assert cur.buffered_count == 0
 
         with pytest.raises(CursorException):
@@ -213,12 +214,12 @@ async def test_collection_cursors_has_next_async(
         assert curmf.has_next()
         assert curmf.consumed == 2
         assert curmf.state == CursorState.STARTED
-        for _ in range(18):
+        for _ in range(PAGE_SIZE - 2):
             await curmf.__anext__()
         assert await curmf.has_next()
-        assert curmf.consumed == 20
+        assert curmf.consumed == PAGE_SIZE
         assert curmf.state == CursorState.STARTED
-        assert curmf.buffered_count == NUM_DOCS - 20
+        assert curmf.buffered_count == PAGE_SIZE
 
         cur0 = async_filled_collection.find()
         cur0.close()
@@ -494,11 +495,9 @@ async def test_collection_cursors_initialpagestate_async(
         self,
         async_filled_pagination_collection: DefaultAsyncCollection,
     ) -> None:
-        page_size = 20
-
         cur0 = async_filled_pagination_collection.find(filter={"even": True})
         ids0: list[int] = []
-        for _ in range(page_size):
+        for _ in range(PAGE_SIZE):
             doc = await cur0.__anext__()
             ids0.append(doc["_id"])
         nps0 = cur0._next_page_state
@@ -509,7 +508,7 @@ async def test_collection_cursors_initialpagestate_async(
             initial_page_state=nps0,
         )
         ids1: list[int] = []
-        for _ in range(page_size):
+        for _ in range(PAGE_SIZE):
             doc = await cur1.__anext__()
             ids1.append(doc["_id"])
         nps1 = cur1._next_page_state

diff --git a/tests/base/integration/collections/test_collection_cursor_sync.py b/tests/base/integration/collections/test_collection_cursor_sync.py
@@ -26,8 +26,9 @@
 
 from ..conftest import DefaultCollection
 
-NUM_DOCS = 25  # keep this between 20 and 39
-NUM_DOCS_PAGINATION = 90  # keep this above 2 * (2 * 20) and below 2 * (3 * 20)
+PAGE_SIZE = 20  # TODO: set to 50, default as per Data API config after PR 2461
+NUM_DOCS = 2 * PAGE_SIZE + 5
+NUM_DOCS_PAGINATION = 2 * (2 * PAGE_SIZE) + 5
 
 
 @pytest.fixture
@@ -159,16 +160,16 @@ def test_collection_cursors_started_properties_sync(
         next(cur)
         # now this has 19 items in buffer, one is consumed
         assert cur.consumed == 1
-        assert cur.buffered_count == 19
+        assert cur.buffered_count == PAGE_SIZE - 1
         assert len(cur.consume_buffer(3)) == 3
         assert cur.consumed == 4
-        assert cur.buffered_count == 16
+        assert cur.buffered_count == PAGE_SIZE - 4
         # from time to time the buffer is empty:
-        for _ in range(16):
+        for _ in range(PAGE_SIZE - 4):
             next(cur)
         assert cur.buffered_count == 0
         assert cur.consume_buffer(3) == []
-        assert cur.consumed == 20
+        assert cur.consumed == PAGE_SIZE
         assert cur.buffered_count == 0
 
         with pytest.raises(CursorException):
@@ -211,12 +212,12 @@ def test_collection_cursors_has_next_sync(
         assert curmf.has_next()
         assert curmf.consumed == 2
         assert curmf.state == CursorState.STARTED
-        for _ in range(18):
+        for _ in range(PAGE_SIZE - 2):
             next(curmf)
         assert curmf.has_next()
-        assert curmf.consumed == 20
+        assert curmf.consumed == PAGE_SIZE
         assert curmf.state == CursorState.STARTED
-        assert curmf.buffered_count == NUM_DOCS - 20
+        assert curmf.buffered_count == PAGE_SIZE
 
         cur0 = filled_collection.find()
         cur0.close()
@@ -417,26 +418,24 @@ def test_collection_cursors_initialpagestate_sync(
         self,
         filled_pagination_collection: DefaultCollection,
     ) -> None:
-        page_size = 20
-
         cur0 = filled_pagination_collection.find(filter={"even": True})
-        ids0 = [doc["_id"] for _, doc in zip(range(page_size), cur0)]
+        ids0 = [doc["_id"] for _, doc in zip(range(PAGE_SIZE), cur0)]
         nps0 = cur0._next_page_state
         assert isinstance(nps0, str)
 
         cur1 = filled_pagination_collection.find(
             filter={"even": True},
             initial_page_state=nps0,
         )
-        ids1 = [doc["_id"] for _, doc in zip(range(page_size), cur1)]
+        ids1 = [doc["_id"] for _, doc in zip(range(PAGE_SIZE), cur1)]
         nps1 = cur1._next_page_state
         assert isinstance(nps1, str)
 
         cur2 = filled_pagination_collection.find(
             filter={"even": True},
             initial_page_state=nps1,
         )
-        ids2 = [doc["_id"] for _, doc in zip(range(page_size), cur2)]
+        ids2 = [doc["_id"] for doc in cur2]
         assert cur2._next_page_state is None
 
         expected_ids = [i for i in range(NUM_DOCS_PAGINATION) if i % 2 == 0]

diff --git a/tests/base/integration/collections/test_collection_vectorize_methods_async.py b/tests/base/integration/collections/test_collection_vectorize_methods_async.py
@@ -26,14 +26,14 @@
 from astrapy.info import CollectionDefinition
 
 from ..conftest import (
-    HEADER_EMBEDDING_API_KEY_OPENAI,
+    EMBEDDING_PROVIDER_API_KEY,
     DefaultAsyncCollection,
 )
 
 
 @pytest.mark.skipif(
-    HEADER_EMBEDDING_API_KEY_OPENAI is None,
-    reason="No HEADER_EMBEDDING_API_KEY_OPENAI credential",
+    EMBEDDING_PROVIDER_API_KEY is None,
+    reason="No embedding API Key credential",
 )
 class TestCollectionVectorizeMethodsAsync:
     @pytest.mark.describe("test of vectorize in collection methods, async")

diff --git a/tests/base/integration/collections/test_collection_vectorize_methods_sync.py b/tests/base/integration/collections/test_collection_vectorize_methods_sync.py
@@ -24,14 +24,14 @@
 from astrapy.info import CollectionDefinition
 
 from ..conftest import (
-    HEADER_EMBEDDING_API_KEY_OPENAI,
+    EMBEDDING_PROVIDER_API_KEY,
     DefaultCollection,
 )
 
 
 @pytest.mark.skipif(
-    HEADER_EMBEDDING_API_KEY_OPENAI is None,
-    reason="No HEADER_EMBEDDING_API_KEY_OPENAI credential",
+    EMBEDDING_PROVIDER_API_KEY is None,
+    reason="No embedding API Key credential",
 )
 class TestCollectionVectorizeMethodsSync:
     @pytest.mark.describe("test of vectorize in collection methods, sync")

diff --git a/tests/base/integration/conftest.py b/tests/base/integration/conftest.py
@@ -18,7 +18,9 @@
     ADMIN_ENV_LIST,
     ADMIN_ENV_VARIABLE_MAP,
     CQL_AVAILABLE,
-    HEADER_EMBEDDING_API_KEY_OPENAI,
+    EMBEDDING_PROVIDER_API_KEY,
+    EMBEDDING_PROVIDER_DIMENSION,
+    EMBEDDING_PROVIDER_SHARED_SECRET_KEY_NAME,
     IS_ASTRA_DB,
     RUN_SHARED_SECRET_VECTORIZE_TESTS,
     SECONDARY_KEYSPACE,
@@ -62,7 +64,9 @@
     "async_fail_if_not_removed",
     "clean_nulls_from_dict",
     "sync_fail_if_not_removed",
-    "HEADER_EMBEDDING_API_KEY_OPENAI",
+    "EMBEDDING_PROVIDER_API_KEY",
+    "EMBEDDING_PROVIDER_DIMENSION",
+    "EMBEDDING_PROVIDER_SHARED_SECRET_KEY_NAME",
     "IS_ASTRA_DB",
     "ADMIN_ENV_LIST",
     "ADMIN_ENV_VARIABLE_MAP",

diff --git a/tests/base/integration/tables/test_table_userdefinedtypes_async.py b/tests/base/integration/tables/test_table_userdefinedtypes_async.py
@@ -567,6 +567,7 @@ async def test_table_udt_weirdcolumns_async(
         try:
             for cql_statement in WEIRD_UDT_BASE_INITIALIZE_STATEMENTS:
                 cql_session.execute(cql_statement)
+            await asyncio.sleep(1.5)  # udt propagation requires some time, it seems
 
             # test a read and a write for 'base weird'
             atable_weird_base = async_database.get_table(WEIRD_UDT_BASE_TABLE_NAME)

diff --git a/tests/base/integration/tables/test_table_userdefinedtypes_sync.py b/tests/base/integration/tables/test_table_userdefinedtypes_sync.py
@@ -562,6 +562,7 @@ def test_table_udt_weirdcolumns_sync(
         try:
             for cql_statement in WEIRD_UDT_BASE_INITIALIZE_STATEMENTS:
                 cql_session.execute(cql_statement)
+            time.sleep(1.5)  # udt propagation requires some time, it seems
 
             # test a read and a write for 'base weird'
             table_weird_base = sync_database.get_table(WEIRD_UDT_BASE_TABLE_NAME)