diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 46b9b6b..3b005e5 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "0.1.0-alpha.9"
+ ".": "0.1.0-alpha.10"
}
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 1d67610..a24fc51 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 14
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/zeroentropy%2Fzeroentropy-cd86445a8ef095a12e7bf74baddc7d5a8225531f8edb88ba613e12a52e219a42.yml
-openapi_spec_hash: 6da635b19c554a476ea9c967b619ae5b
-config_hash: f5fb1effd4b0e263e1e93de3f573f46f
+configured_endpoints: 15
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/zeroentropy%2Fzeroentropy-9cd927800fd253f2116ab12aa496b086605bd31d295cb600b65d793203e1e9e7.yml
+openapi_spec_hash: cd7f6d9db9ae338091bc6da83e27f4a6
+config_hash: e56152e1ee1a9273241d925702077e49
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a024c59..1f1810a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,14 @@
# Changelog
+## 0.1.0-alpha.10 (2026-03-03)
+
+Full Changelog: [v0.1.0-alpha.9...v0.1.0-alpha.10](https://github.com/zeroentropy-ai/zeroentropy-python/compare/v0.1.0-alpha.9...v0.1.0-alpha.10)
+
+### Features
+
+* **api:** manual updates ([7fcec0a](https://github.com/zeroentropy-ai/zeroentropy-python/commit/7fcec0a1c901f98c5953c66affbd742fe45a4de6))
+* **api:** manual updates ([71f3afe](https://github.com/zeroentropy-ai/zeroentropy-python/commit/71f3afe6cc59df2c9a0c92e97f602b38a9e5d723))
+
## 0.1.0-alpha.9 (2026-03-03)
Full Changelog: [v0.1.0-alpha.8...v0.1.0-alpha.9](https://github.com/zeroentropy-ai/zeroentropy-python/compare/v0.1.0-alpha.8...v0.1.0-alpha.9)
diff --git a/api.md b/api.md
index cc69a2d..5b96560 100644
--- a/api.md
+++ b/api.md
@@ -75,9 +75,10 @@ Methods:
Types:
```python
-from zeroentropy.types import ModelRerankResponse
+from zeroentropy.types import ModelEmbedResponse, ModelRerankResponse
```
Methods:
+- client.models.embed(\*\*params) -> ModelEmbedResponse
- client.models.rerank(\*\*params) -> ModelRerankResponse
diff --git a/pyproject.toml b/pyproject.toml
index 4fe374f..903a0c4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "zeroentropy"
-version = "0.1.0-alpha.9"
+version = "0.1.0-alpha.10"
description = "The official Python library for the ZeroEntropy API"
dynamic = ["readme"]
license = "Apache-2.0"
diff --git a/src/zeroentropy/_version.py b/src/zeroentropy/_version.py
index 61ae32c..3b653c4 100644
--- a/src/zeroentropy/_version.py
+++ b/src/zeroentropy/_version.py
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
__title__ = "zeroentropy"
-__version__ = "0.1.0-alpha.9" # x-release-please-version
+__version__ = "0.1.0-alpha.10" # x-release-please-version
diff --git a/src/zeroentropy/resources/models.py b/src/zeroentropy/resources/models.py
index 33b1764..e41d330 100644
--- a/src/zeroentropy/resources/models.py
+++ b/src/zeroentropy/resources/models.py
@@ -2,12 +2,12 @@
from __future__ import annotations
-from typing import Optional
+from typing import Union, Optional
from typing_extensions import Literal
import httpx
-from ..types import model_rerank_params
+from ..types import model_embed_params, model_rerank_params
from .._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
from .._utils import maybe_transform, async_maybe_transform
from .._compat import cached_property
@@ -19,6 +19,7 @@
async_to_streamed_response_wrapper,
)
from .._base_client import make_request_options
+from ..types.model_embed_response import ModelEmbedResponse
from ..types.model_rerank_response import ModelRerankResponse
__all__ = ["ModelsResource", "AsyncModelsResource"]
@@ -44,6 +45,84 @@ def with_streaming_response(self) -> ModelsResourceWithStreamingResponse:
"""
return ModelsResourceWithStreamingResponse(self)
+ def embed(
+ self,
+ *,
+ input: Union[str, SequenceNotStr[str]],
+ input_type: Literal["query", "document"],
+ model: str,
+ latency: Optional[Literal["fast", "slow"]] | Omit = omit,
+ output_dimensions: Optional[int] | Omit = omit,
+ output_format: Literal["float", "base64"] | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> ModelEmbedResponse:
+ """
+ Embeds the provided input text with ZeroEntropy embedding models.
+
+ The results will be returned in the same order as the text provided. The
+ embedding is such that queries will have high cosine similarity with documents
+ that are relevant to that query.
+
+ Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
+ and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
+ requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
+ bytes-per-minute. If even this is exceeded, you will get a `429` error. To
+ request higher ratelimits, please contact
+ [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
+ [Discord](https://go.zeroentropy.dev/discord) or
+ [Slack](https://go.zeroentropy.dev/slack)!
+
+ Args:
+ input: The string, or list of strings, to embed
+
+ input_type: The input type. For retrieval tasks, either `query` or `document`.
+
+ model: The model ID to use for embedding. Options are: ["zembed-1"]
+
+ latency: Whether the call will be inferenced "fast" or "slow". RateLimits for slow API
+ calls are orders of magnitude higher, but you can expect >10 second latency.
+ Fast inferences are guaranteed subsecond, but rate limits are lower. If not
+ specified, first a "fast" call will be attempted, but if you have exceeded your
+ fast rate limit, then a slow call will be executed. If explicitly set to "fast",
+ then 429 will be returned if it cannot be executed fast.
+
+ output_dimensions: The output dimensionality of the embedding model.
+
+ output_format: The output format of the embedding. `base64` is significantly more efficient
+ than `float`. The default is `float`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._post(
+ "/models/embed",
+ body=maybe_transform(
+ {
+ "input": input,
+ "input_type": input_type,
+ "model": model,
+ "latency": latency,
+ "output_dimensions": output_dimensions,
+ "output_format": output_format,
+ },
+ model_embed_params.ModelEmbedParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=ModelEmbedResponse,
+ )
+
def rerank(
self,
*,
@@ -68,12 +147,13 @@ def rerank(
by the reranker model. The results will be returned in descending order of
relevance.
- Organizations will, by default, have a ratelimit of `2,500,000`
- bytes-per-minute. If this is exceeded, requests will be throttled into
- `latency: "slow"` mode, up to `20,000,000` bytes-per-minute. If even this is
- exceeded, you will get a `429` error. To request higher ratelimits, please
- contact [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message
- us on [Discord](https://go.zeroentropy.dev/discord) or
+ Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
+ and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
+ requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
+ bytes-per-minute. If even this is exceeded, you will get a `429` error. To
+ request higher ratelimits, please contact
+ [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
+ [Discord](https://go.zeroentropy.dev/discord) or
[Slack](https://go.zeroentropy.dev/slack)!
Args:
@@ -141,6 +221,84 @@ def with_streaming_response(self) -> AsyncModelsResourceWithStreamingResponse:
"""
return AsyncModelsResourceWithStreamingResponse(self)
+ async def embed(
+ self,
+ *,
+ input: Union[str, SequenceNotStr[str]],
+ input_type: Literal["query", "document"],
+ model: str,
+ latency: Optional[Literal["fast", "slow"]] | Omit = omit,
+ output_dimensions: Optional[int] | Omit = omit,
+ output_format: Literal["float", "base64"] | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> ModelEmbedResponse:
+ """
+ Embeds the provided input text with ZeroEntropy embedding models.
+
+ The results will be returned in the same order as the text provided. The
+ embedding is such that queries will have high cosine similarity with documents
+ that are relevant to that query.
+
+ Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
+ and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
+ requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
+ bytes-per-minute. If even this is exceeded, you will get a `429` error. To
+ request higher ratelimits, please contact
+ [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
+ [Discord](https://go.zeroentropy.dev/discord) or
+ [Slack](https://go.zeroentropy.dev/slack)!
+
+ Args:
+ input: The string, or list of strings, to embed
+
+ input_type: The input type. For retrieval tasks, either `query` or `document`.
+
+ model: The model ID to use for embedding. Options are: ["zembed-1"]
+
+ latency: Whether the call will be inferenced "fast" or "slow". RateLimits for slow API
+ calls are orders of magnitude higher, but you can expect >10 second latency.
+ Fast inferences are guaranteed subsecond, but rate limits are lower. If not
+ specified, first a "fast" call will be attempted, but if you have exceeded your
+ fast rate limit, then a slow call will be executed. If explicitly set to "fast",
+ then 429 will be returned if it cannot be executed fast.
+
+ output_dimensions: The output dimensionality of the embedding model.
+
+ output_format: The output format of the embedding. `base64` is significantly more efficient
+ than `float`. The default is `float`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return await self._post(
+ "/models/embed",
+ body=await async_maybe_transform(
+ {
+ "input": input,
+ "input_type": input_type,
+ "model": model,
+ "latency": latency,
+ "output_dimensions": output_dimensions,
+ "output_format": output_format,
+ },
+ model_embed_params.ModelEmbedParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=ModelEmbedResponse,
+ )
+
async def rerank(
self,
*,
@@ -165,12 +323,13 @@ async def rerank(
by the reranker model. The results will be returned in descending order of
relevance.
- Organizations will, by default, have a ratelimit of `2,500,000`
- bytes-per-minute. If this is exceeded, requests will be throttled into
- `latency: "slow"` mode, up to `20,000,000` bytes-per-minute. If even this is
- exceeded, you will get a `429` error. To request higher ratelimits, please
- contact [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message
- us on [Discord](https://go.zeroentropy.dev/discord) or
+ Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
+ and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
+ requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
+ bytes-per-minute. If even this is exceeded, you will get a `429` error. To
+ request higher ratelimits, please contact
+ [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
+ [Discord](https://go.zeroentropy.dev/discord) or
[Slack](https://go.zeroentropy.dev/slack)!
Args:
@@ -222,6 +381,9 @@ class ModelsResourceWithRawResponse:
def __init__(self, models: ModelsResource) -> None:
self._models = models
+ self.embed = to_raw_response_wrapper(
+ models.embed,
+ )
self.rerank = to_raw_response_wrapper(
models.rerank,
)
@@ -231,6 +393,9 @@ class AsyncModelsResourceWithRawResponse:
def __init__(self, models: AsyncModelsResource) -> None:
self._models = models
+ self.embed = async_to_raw_response_wrapper(
+ models.embed,
+ )
self.rerank = async_to_raw_response_wrapper(
models.rerank,
)
@@ -240,6 +405,9 @@ class ModelsResourceWithStreamingResponse:
def __init__(self, models: ModelsResource) -> None:
self._models = models
+ self.embed = to_streamed_response_wrapper(
+ models.embed,
+ )
self.rerank = to_streamed_response_wrapper(
models.rerank,
)
@@ -249,6 +417,9 @@ class AsyncModelsResourceWithStreamingResponse:
def __init__(self, models: AsyncModelsResource) -> None:
self._models = models
+ self.embed = async_to_streamed_response_wrapper(
+ models.embed,
+ )
self.rerank = async_to_streamed_response_wrapper(
models.rerank,
)
diff --git a/src/zeroentropy/types/__init__.py b/src/zeroentropy/types/__init__.py
index 1117c5f..c9c0f7a 100644
--- a/src/zeroentropy/types/__init__.py
+++ b/src/zeroentropy/types/__init__.py
@@ -2,8 +2,10 @@
from __future__ import annotations
+from .model_embed_params import ModelEmbedParams as ModelEmbedParams
from .document_add_params import DocumentAddParams as DocumentAddParams
from .model_rerank_params import ModelRerankParams as ModelRerankParams
+from .model_embed_response import ModelEmbedResponse as ModelEmbedResponse
from .collection_add_params import CollectionAddParams as CollectionAddParams
from .document_add_response import DocumentAddResponse as DocumentAddResponse
from .model_rerank_response import ModelRerankResponse as ModelRerankResponse
diff --git a/src/zeroentropy/types/model_embed_params.py b/src/zeroentropy/types/model_embed_params.py
new file mode 100644
index 0000000..e89e9db
--- /dev/null
+++ b/src/zeroentropy/types/model_embed_params.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .._types import SequenceNotStr
+
+__all__ = ["ModelEmbedParams"]
+
+
+class ModelEmbedParams(TypedDict, total=False):
+ input: Required[Union[str, SequenceNotStr[str]]]
+ """The string, or list of strings, to embed"""
+
+ input_type: Required[Literal["query", "document"]]
+ """The input type. For retrieval tasks, either `query` or `document`."""
+
+ model: Required[str]
+ """The model ID to use for embedding. Options are: ["zembed-1"]"""
+
+ latency: Optional[Literal["fast", "slow"]]
+ """Whether the call will be inferenced "fast" or "slow".
+
+ RateLimits for slow API calls are orders of magnitude higher, but you can
+ expect >10 second latency. Fast inferences are guaranteed subsecond, but rate
+ limits are lower. If not specified, first a "fast" call will be attempted, but
+ if you have exceeded your fast rate limit, then a slow call will be executed. If
+ explicitly set to "fast", then 429 will be returned if it cannot be executed
+ fast.
+ """
+
+ output_dimensions: Optional[int]
+ """The output dimensionality of the embedding model."""
+
+ output_format: Literal["float", "base64"]
+ """The output format of the embedding.
+
+ `base64` is significantly more efficient than `float`. The default is `float`.
+ """
diff --git a/src/zeroentropy/types/model_embed_response.py b/src/zeroentropy/types/model_embed_response.py
new file mode 100644
index 0000000..b272e41
--- /dev/null
+++ b/src/zeroentropy/types/model_embed_response.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+
+from .._models import BaseModel
+
+__all__ = ["ModelEmbedResponse", "Result", "Usage"]
+
+
+class Result(BaseModel):
+ embedding: Union[List[float], str]
+ """The embedding of the input text.
+
+ If `base64` format is requested, the response will be an fp32 little endian byte
+ array, encoded as base64.
+ """
+
+
+class Usage(BaseModel):
+ """Statistics regarding the tokens used by the request."""
+
+ total_bytes: int
+ """The total number of bytes in the request. This is used for ratelimiting."""
+
+ total_tokens: int
+ """The total number of tokens in the request. This is used for billing."""
+
+
+class ModelEmbedResponse(BaseModel):
+ results: List[Result]
+ """The list of embedding results."""
+
+ usage: Usage
+ """Statistics regarding the tokens used by the request."""
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
index 0022c11..2efd7ec 100644
--- a/tests/api_resources/test_models.py
+++ b/tests/api_resources/test_models.py
@@ -9,7 +9,7 @@
from tests.utils import assert_matches_type
from zeroentropy import ZeroEntropy, AsyncZeroEntropy
-from zeroentropy.types import ModelRerankResponse
+from zeroentropy.types import ModelEmbedResponse, ModelRerankResponse
base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -17,6 +17,55 @@
class TestModels:
parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+ @parametrize
+ def test_method_embed(self, client: ZeroEntropy) -> None:
+ model = client.models.embed(
+ input="string",
+ input_type="query",
+ model="model",
+ )
+ assert_matches_type(ModelEmbedResponse, model, path=["response"])
+
+ @parametrize
+ def test_method_embed_with_all_params(self, client: ZeroEntropy) -> None:
+ model = client.models.embed(
+ input="string",
+ input_type="query",
+ model="model",
+ latency="fast",
+ output_dimensions=0,
+ output_format="float",
+ )
+ assert_matches_type(ModelEmbedResponse, model, path=["response"])
+
+ @parametrize
+ def test_raw_response_embed(self, client: ZeroEntropy) -> None:
+ response = client.models.with_raw_response.embed(
+ input="string",
+ input_type="query",
+ model="model",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ model = response.parse()
+ assert_matches_type(ModelEmbedResponse, model, path=["response"])
+
+ @parametrize
+ def test_streaming_response_embed(self, client: ZeroEntropy) -> None:
+ with client.models.with_streaming_response.embed(
+ input="string",
+ input_type="query",
+ model="model",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ model = response.parse()
+ assert_matches_type(ModelEmbedResponse, model, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
@parametrize
def test_method_rerank(self, client: ZeroEntropy) -> None:
model = client.models.rerank(
@@ -71,6 +120,55 @@ class TestAsyncModels:
"async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
)
+ @parametrize
+ async def test_method_embed(self, async_client: AsyncZeroEntropy) -> None:
+ model = await async_client.models.embed(
+ input="string",
+ input_type="query",
+ model="model",
+ )
+ assert_matches_type(ModelEmbedResponse, model, path=["response"])
+
+ @parametrize
+ async def test_method_embed_with_all_params(self, async_client: AsyncZeroEntropy) -> None:
+ model = await async_client.models.embed(
+ input="string",
+ input_type="query",
+ model="model",
+ latency="fast",
+ output_dimensions=0,
+ output_format="float",
+ )
+ assert_matches_type(ModelEmbedResponse, model, path=["response"])
+
+ @parametrize
+ async def test_raw_response_embed(self, async_client: AsyncZeroEntropy) -> None:
+ response = await async_client.models.with_raw_response.embed(
+ input="string",
+ input_type="query",
+ model="model",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ model = await response.parse()
+ assert_matches_type(ModelEmbedResponse, model, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_embed(self, async_client: AsyncZeroEntropy) -> None:
+ async with async_client.models.with_streaming_response.embed(
+ input="string",
+ input_type="query",
+ model="model",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ model = await response.parse()
+ assert_matches_type(ModelEmbedResponse, model, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
@parametrize
async def test_method_rerank(self, async_client: AsyncZeroEntropy) -> None:
model = await async_client.models.rerank(