Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/codecov_aggregator.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
secrets:
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
HEADER_EMBEDDING_API_KEY_OPENAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_OPENAI }}
HEADER_EMBEDDING_API_KEY_VOYAGEAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_VOYAGEAI }}

local_it:
uses: ./.github/workflows/local.yml
Expand All @@ -46,7 +46,7 @@ jobs:
AWS_ECR_ROLE_NAME: ${{ secrets.AWS_ECR_ROLE_NAME }}
AWS_ECR_REPOSITORY: ${{ secrets.AWS_ECR_REPOSITORY }}
AWS_ECR_REGISTRY: ${{ secrets.AWS_ECR_REGISTRY }}
HEADER_EMBEDDING_API_KEY_OPENAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_OPENAI }}
HEADER_EMBEDDING_API_KEY_VOYAGEAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_VOYAGEAI }}
# hardcoding the target DB
DOCKER_COMPOSE_LOCAL_DATA_API: "yes"
# turn on header-based reranker auth
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/local.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ on:
required: true
AWS_ECR_REGISTRY:
required: true
HEADER_EMBEDDING_API_KEY_OPENAI:
HEADER_EMBEDDING_API_KEY_VOYAGEAI:
required: true
DOCKER_COMPOSE_LOCAL_DATA_API:
required: true
Expand All @@ -41,7 +41,7 @@ jobs:
AWS_ECR_ROLE_NAME: ${{ secrets.AWS_ECR_ROLE_NAME }}
AWS_ECR_REPOSITORY: ${{ secrets.AWS_ECR_REPOSITORY }}
AWS_ECR_REGISTRY: ${{ secrets.AWS_ECR_REGISTRY }}
HEADER_EMBEDDING_API_KEY_OPENAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_OPENAI }}
HEADER_EMBEDDING_API_KEY_VOYAGEAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_VOYAGEAI }}
DOCKER_COMPOSE_LOCAL_DATA_API: ${{ secrets.DOCKER_COMPOSE_LOCAL_DATA_API }}
HEADER_RERANKING_API_KEY_NVIDIA: ${{ secrets.HEADER_RERANKING_API_KEY_NVIDIA }}
runs-on: ubuntu-latest
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ on:
required: true
ASTRA_DB_API_ENDPOINT:
required: true
HEADER_EMBEDDING_API_KEY_OPENAI:
HEADER_EMBEDDING_API_KEY_VOYAGEAI:
required: true

jobs:
test:
env:
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
HEADER_EMBEDDING_API_KEY_OPENAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_OPENAI }}
HEADER_EMBEDDING_API_KEY_VOYAGEAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_VOYAGEAI }}
LEGACY_INSERTMANY_BEHAVIOUR_PRE2193: ${{ secrets.LEGACY_INSERTMANY_BEHAVIOUR_PRE2193 }}
runs-on: ubuntu-latest

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ jobs:
env:
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
HEADER_EMBEDDING_API_KEY_OPENAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_OPENAI }}
HEADER_EMBEDDING_API_KEY_VOYAGEAI: ${{ secrets.HEADER_EMBEDDING_API_KEY_VOYAGEAI }}
LEGACY_INSERTMANY_BEHAVIOUR_PRE2193: ${{ secrets.LEGACY_INSERTMANY_BEHAVIOUR_PRE2193 }}
run: make test-integration

Expand Down
7 changes: 5 additions & 2 deletions DEVELOPING.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,13 @@ Note that the variables defined in the desired "base" template **must** be set t

Additionally, you will need to define the environment variables in `tests/env_templates/env.vectorize-minimal.template`,
which are needed by the minimal set of "vectorize" testing belonging to the "base" test group.
These rely on a single embedding provider and model
(the one configured in `embedding_provider_switcher.py`, to match variables in the env template).

For Astra DB, you can include "shared secret" vectorize tests (i.e. KMS-based authentication).
To run those tests, you must scope an OpenAI API key
to the target Astra DB with secret name `"SHARED_SECRET_EMBEDDING_API_KEY_OPENAI"`
To run those tests, you must scope an embedding provider API key
to the target Astra DB with secret name matching the name set in
the provider-switcher (e.g. `"SHARED_SECRET_EMBEDDING_API_KEY_VOYAGEAI"`).
and comment the environment flag that suppresses them (see the base Astra env template).

For non-Astra, the reranking-related tests run only if one sets
Expand Down
20 changes: 14 additions & 6 deletions tests/base/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,11 @@
ADMIN_ENV_LIST,
ADMIN_ENV_VARIABLE_MAP,
CQL_AVAILABLE,
HEADER_EMBEDDING_API_KEY_OPENAI,
EMBEDDING_PROVIDER_API_KEY,
EMBEDDING_PROVIDER_DIMENSION,
EMBEDDING_PROVIDER_MODEL_NAME,
EMBEDDING_PROVIDER_NAME,
EMBEDDING_PROVIDER_SHARED_SECRET_KEY_NAME,
HEADER_RERANKING_API_KEY_NVIDIA,
IS_ASTRA_DB,
RUN_SHARED_SECRET_VECTORIZE_TESTS,
Expand Down Expand Up @@ -227,10 +231,10 @@ def async_empty_collection(
@pytest.fixture(scope="session")
def service_collection_parameters() -> Iterable[dict[str, Any]]:
yield {
"dimension": 1536,
"provider": "openai",
"modelName": "text-embedding-ada-002",
"api_key": HEADER_EMBEDDING_API_KEY_OPENAI,
"dimension": EMBEDDING_PROVIDER_DIMENSION,
"provider": EMBEDDING_PROVIDER_NAME,
"modelName": EMBEDDING_PROVIDER_MODEL_NAME,
"api_key": EMBEDDING_PROVIDER_API_KEY,
"reranking_api_key": HEADER_RERANKING_API_KEY_NVIDIA,
}

Expand Down Expand Up @@ -996,7 +1000,11 @@ def async_empty_table_logicalfiltering(
"clean_nulls_from_dict",
"is_future_version",
"sync_fail_if_not_removed",
"HEADER_EMBEDDING_API_KEY_OPENAI",
"EMBEDDING_PROVIDER_NAME",
"EMBEDDING_PROVIDER_MODEL_NAME",
"EMBEDDING_PROVIDER_API_KEY",
"EMBEDDING_PROVIDER_DIMENSION",
"EMBEDDING_PROVIDER_SHARED_SECRET_KEY_NAME",
"IS_ASTRA_DB",
"ADMIN_ENV_LIST",
"ADMIN_ENV_VARIABLE_MAP",
Expand Down
25 changes: 12 additions & 13 deletions tests/base/integration/collections/test_collection_cursor_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@

from ..conftest import DefaultAsyncCollection

NUM_DOCS = 25 # keep this between 20 and 39
NUM_DOCS_PAGINATION = 90 # keep this above 2 * (2 * 20) and below 2 * (3 * 20)
PAGE_SIZE = 20 # TODO: set to 50, default as per Data API config after PR 2461
NUM_DOCS = 2 * PAGE_SIZE + 5
NUM_DOCS_PAGINATION = 2 * (2 * PAGE_SIZE) + 5


@pytest.fixture
Expand Down Expand Up @@ -161,16 +162,16 @@ async def test_collection_cursors_started_properties_async(
await cur.__anext__()
# now this has 19 items in buffer, one is consumed
assert cur.consumed == 1
assert cur.buffered_count == 19
assert cur.buffered_count == PAGE_SIZE - 1
assert len(cur.consume_buffer(3)) == 3
assert cur.consumed == 4
assert cur.buffered_count == 16
assert cur.buffered_count == PAGE_SIZE - 4
# from time to time the buffer is empty:
for _ in range(16):
for _ in range(PAGE_SIZE - 4):
await cur.__anext__()
assert cur.buffered_count == 0
assert cur.consume_buffer(3) == []
assert cur.consumed == 20
assert cur.consumed == PAGE_SIZE
assert cur.buffered_count == 0

with pytest.raises(CursorException):
Expand Down Expand Up @@ -213,12 +214,12 @@ async def test_collection_cursors_has_next_async(
assert curmf.has_next()
assert curmf.consumed == 2
assert curmf.state == CursorState.STARTED
for _ in range(18):
for _ in range(PAGE_SIZE - 2):
await curmf.__anext__()
assert await curmf.has_next()
assert curmf.consumed == 20
assert curmf.consumed == PAGE_SIZE
assert curmf.state == CursorState.STARTED
assert curmf.buffered_count == NUM_DOCS - 20
assert curmf.buffered_count == PAGE_SIZE

cur0 = async_filled_collection.find()
cur0.close()
Expand Down Expand Up @@ -494,11 +495,9 @@ async def test_collection_cursors_initialpagestate_async(
self,
async_filled_pagination_collection: DefaultAsyncCollection,
) -> None:
page_size = 20

cur0 = async_filled_pagination_collection.find(filter={"even": True})
ids0: list[int] = []
for _ in range(page_size):
for _ in range(PAGE_SIZE):
doc = await cur0.__anext__()
ids0.append(doc["_id"])
nps0 = cur0._next_page_state
Expand All @@ -509,7 +508,7 @@ async def test_collection_cursors_initialpagestate_async(
initial_page_state=nps0,
)
ids1: list[int] = []
for _ in range(page_size):
for _ in range(PAGE_SIZE):
doc = await cur1.__anext__()
ids1.append(doc["_id"])
nps1 = cur1._next_page_state
Expand Down
27 changes: 13 additions & 14 deletions tests/base/integration/collections/test_collection_cursor_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@

from ..conftest import DefaultCollection

NUM_DOCS = 25 # keep this between 20 and 39
NUM_DOCS_PAGINATION = 90 # keep this above 2 * (2 * 20) and below 2 * (3 * 20)
PAGE_SIZE = 20 # TODO: set to 50, default as per Data API config after PR 2461
NUM_DOCS = 2 * PAGE_SIZE + 5
NUM_DOCS_PAGINATION = 2 * (2 * PAGE_SIZE) + 5


@pytest.fixture
Expand Down Expand Up @@ -159,16 +160,16 @@ def test_collection_cursors_started_properties_sync(
next(cur)
# now this has 19 items in buffer, one is consumed
assert cur.consumed == 1
assert cur.buffered_count == 19
assert cur.buffered_count == PAGE_SIZE - 1
assert len(cur.consume_buffer(3)) == 3
assert cur.consumed == 4
assert cur.buffered_count == 16
assert cur.buffered_count == PAGE_SIZE - 4
# from time to time the buffer is empty:
for _ in range(16):
for _ in range(PAGE_SIZE - 4):
next(cur)
assert cur.buffered_count == 0
assert cur.consume_buffer(3) == []
assert cur.consumed == 20
assert cur.consumed == PAGE_SIZE
assert cur.buffered_count == 0

with pytest.raises(CursorException):
Expand Down Expand Up @@ -211,12 +212,12 @@ def test_collection_cursors_has_next_sync(
assert curmf.has_next()
assert curmf.consumed == 2
assert curmf.state == CursorState.STARTED
for _ in range(18):
for _ in range(PAGE_SIZE - 2):
next(curmf)
assert curmf.has_next()
assert curmf.consumed == 20
assert curmf.consumed == PAGE_SIZE
assert curmf.state == CursorState.STARTED
assert curmf.buffered_count == NUM_DOCS - 20
assert curmf.buffered_count == PAGE_SIZE

cur0 = filled_collection.find()
cur0.close()
Expand Down Expand Up @@ -417,26 +418,24 @@ def test_collection_cursors_initialpagestate_sync(
self,
filled_pagination_collection: DefaultCollection,
) -> None:
page_size = 20

cur0 = filled_pagination_collection.find(filter={"even": True})
ids0 = [doc["_id"] for _, doc in zip(range(page_size), cur0)]
ids0 = [doc["_id"] for _, doc in zip(range(PAGE_SIZE), cur0)]
nps0 = cur0._next_page_state
assert isinstance(nps0, str)

cur1 = filled_pagination_collection.find(
filter={"even": True},
initial_page_state=nps0,
)
ids1 = [doc["_id"] for _, doc in zip(range(page_size), cur1)]
ids1 = [doc["_id"] for _, doc in zip(range(PAGE_SIZE), cur1)]
nps1 = cur1._next_page_state
assert isinstance(nps1, str)

cur2 = filled_pagination_collection.find(
filter={"even": True},
initial_page_state=nps1,
)
ids2 = [doc["_id"] for _, doc in zip(range(page_size), cur2)]
ids2 = [doc["_id"] for doc in cur2]
assert cur2._next_page_state is None

expected_ids = [i for i in range(NUM_DOCS_PAGINATION) if i % 2 == 0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@
from astrapy.info import CollectionDefinition

from ..conftest import (
HEADER_EMBEDDING_API_KEY_OPENAI,
EMBEDDING_PROVIDER_API_KEY,
DefaultAsyncCollection,
)


@pytest.mark.skipif(
HEADER_EMBEDDING_API_KEY_OPENAI is None,
reason="No HEADER_EMBEDDING_API_KEY_OPENAI credential",
EMBEDDING_PROVIDER_API_KEY is None,
reason="No embedding API Key credential",
)
class TestCollectionVectorizeMethodsAsync:
@pytest.mark.describe("test of vectorize in collection methods, async")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@
from astrapy.info import CollectionDefinition

from ..conftest import (
HEADER_EMBEDDING_API_KEY_OPENAI,
EMBEDDING_PROVIDER_API_KEY,
DefaultCollection,
)


@pytest.mark.skipif(
HEADER_EMBEDDING_API_KEY_OPENAI is None,
reason="No HEADER_EMBEDDING_API_KEY_OPENAI credential",
EMBEDDING_PROVIDER_API_KEY is None,
reason="No embedding API Key credential",
)
class TestCollectionVectorizeMethodsSync:
@pytest.mark.describe("test of vectorize in collection methods, sync")
Expand Down
8 changes: 6 additions & 2 deletions tests/base/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
ADMIN_ENV_LIST,
ADMIN_ENV_VARIABLE_MAP,
CQL_AVAILABLE,
HEADER_EMBEDDING_API_KEY_OPENAI,
EMBEDDING_PROVIDER_API_KEY,
EMBEDDING_PROVIDER_DIMENSION,
EMBEDDING_PROVIDER_SHARED_SECRET_KEY_NAME,
IS_ASTRA_DB,
RUN_SHARED_SECRET_VECTORIZE_TESTS,
SECONDARY_KEYSPACE,
Expand Down Expand Up @@ -62,7 +64,9 @@
"async_fail_if_not_removed",
"clean_nulls_from_dict",
"sync_fail_if_not_removed",
"HEADER_EMBEDDING_API_KEY_OPENAI",
"EMBEDDING_PROVIDER_API_KEY",
"EMBEDDING_PROVIDER_DIMENSION",
"EMBEDDING_PROVIDER_SHARED_SECRET_KEY_NAME",
"IS_ASTRA_DB",
"ADMIN_ENV_LIST",
"ADMIN_ENV_VARIABLE_MAP",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,7 @@ async def test_table_udt_weirdcolumns_async(
try:
for cql_statement in WEIRD_UDT_BASE_INITIALIZE_STATEMENTS:
cql_session.execute(cql_statement)
await asyncio.sleep(1.5) # udt propagation requires some time, it seems

# test a read and a write for 'base weird'
atable_weird_base = async_database.get_table(WEIRD_UDT_BASE_TABLE_NAME)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,7 @@ def test_table_udt_weirdcolumns_sync(
try:
for cql_statement in WEIRD_UDT_BASE_INITIALIZE_STATEMENTS:
cql_session.execute(cql_statement)
time.sleep(1.5) # udt propagation requires some time, it seems

# test a read and a write for 'base weird'
table_weird_base = sync_database.get_table(WEIRD_UDT_BASE_TABLE_NAME)
Expand Down
Loading
Loading