zeroentropy-ai · npip99 · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.1.0-alpha.9"
+  ".": "0.1.0-alpha.10"
 }
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 14
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/zeroentropy%2Fzeroentropy-cd86445a8ef095a12e7bf74baddc7d5a8225531f8edb88ba613e12a52e219a42.yml
-openapi_spec_hash: 6da635b19c554a476ea9c967b619ae5b
-config_hash: f5fb1effd4b0e263e1e93de3f573f46f
+configured_endpoints: 15
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/zeroentropy%2Fzeroentropy-9cd927800fd253f2116ab12aa496b086605bd31d295cb600b65d793203e1e9e7.yml
+openapi_spec_hash: cd7f6d9db9ae338091bc6da83e27f4a6
+config_hash: e56152e1ee1a9273241d925702077e49
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,14 @@
 # Changelog
 
+## 0.1.0-alpha.10 (2026-03-03)
+
+Full Changelog: [v0.1.0-alpha.9...v0.1.0-alpha.10](https://github.com/zeroentropy-ai/zeroentropy-python/compare/v0.1.0-alpha.9...v0.1.0-alpha.10)
+
+### Features
+
+* **api:** manual updates ([7fcec0a](https://github.com/zeroentropy-ai/zeroentropy-python/commit/7fcec0a1c901f98c5953c66affbd742fe45a4de6))
+* **api:** manual updates ([71f3afe](https://github.com/zeroentropy-ai/zeroentropy-python/commit/71f3afe6cc59df2c9a0c92e97f602b38a9e5d723))
+
 ## 0.1.0-alpha.9 (2026-03-03)
 
 Full Changelog: [v0.1.0-alpha.8...v0.1.0-alpha.9](https://github.com/zeroentropy-ai/zeroentropy-python/compare/v0.1.0-alpha.8...v0.1.0-alpha.9)

diff --git a/api.md b/api.md
@@ -75,9 +75,10 @@ Methods:
 Types:
 
 ```python
-from zeroentropy.types import ModelRerankResponse
+from zeroentropy.types import ModelEmbedResponse, ModelRerankResponse
 ```
 
 Methods:
 
+- <code title="post /models/embed">client.models.<a href="./src/zeroentropy/resources/models.py">embed</a>(\*\*<a href="src/zeroentropy/types/model_embed_params.py">params</a>) -> <a href="./src/zeroentropy/types/model_embed_response.py">ModelEmbedResponse</a></code>
 - <code title="post /models/rerank">client.models.<a href="./src/zeroentropy/resources/models.py">rerank</a>(\*\*<a href="src/zeroentropy/types/model_rerank_params.py">params</a>) -> <a href="./src/zeroentropy/types/model_rerank_response.py">ModelRerankResponse</a></code>
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "zeroentropy"
-version = "0.1.0-alpha.9"
+version = "0.1.0-alpha.10"
 description = "The official Python library for the ZeroEntropy API"
 dynamic = ["readme"]
 license = "Apache-2.0"

diff --git a/src/zeroentropy/_version.py b/src/zeroentropy/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "zeroentropy"
-__version__ = "0.1.0-alpha.9"  # x-release-please-version
+__version__ = "0.1.0-alpha.10"  # x-release-please-version
diff --git a/src/zeroentropy/resources/models.py b/src/zeroentropy/resources/models.py
@@ -2,12 +2,12 @@
 
 from __future__ import annotations
 
-from typing import Optional
+from typing import Union, Optional
 from typing_extensions import Literal
 
 import httpx
 
-from ..types import model_rerank_params
+from ..types import model_embed_params, model_rerank_params
 from .._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
 from .._utils import maybe_transform, async_maybe_transform
 from .._compat import cached_property
@@ -19,6 +19,7 @@
     async_to_streamed_response_wrapper,
 )
 from .._base_client import make_request_options
+from ..types.model_embed_response import ModelEmbedResponse
 from ..types.model_rerank_response import ModelRerankResponse
 
 __all__ = ["ModelsResource", "AsyncModelsResource"]
@@ -44,6 +45,84 @@ def with_streaming_response(self) -> ModelsResourceWithStreamingResponse:
         """
         return ModelsResourceWithStreamingResponse(self)
 
+    def embed(
+        self,
+        *,
+        input: Union[str, SequenceNotStr[str]],
+        input_type: Literal["query", "document"],
+        model: str,
+        latency: Optional[Literal["fast", "slow"]] | Omit = omit,
+        output_dimensions: Optional[int] | Omit = omit,
+        output_format: Literal["float", "base64"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ModelEmbedResponse:
+        """
+        Embeds the provided input text with ZeroEntropy embedding models.
+
+        The results will be returned in the same order as the text provided. The
+        embedding is such that queries will have high cosine similarity with documents
+        that are relevant to that query.
+
+        Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
+        and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
+        requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
+        bytes-per-minute. If even this is exceeded, you will get a `429` error. To
+        request higher ratelimits, please contact
+        [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
+        [Discord](https://go.zeroentropy.dev/discord) or
+        [Slack](https://go.zeroentropy.dev/slack)!
+
+        Args:
+          input: The string, or list of strings, to embed
+
+          input_type: The input type. For retrieval tasks, either `query` or `document`.
+
+          model: The model ID to use for embedding. Options are: ["zembed-1"]
+
+          latency: Whether the call will be inferenced "fast" or "slow". RateLimits for slow API
+              calls are orders of magnitude higher, but you can expect >10 second latency.
+              Fast inferences are guaranteed subsecond, but rate limits are lower. If not
+              specified, first a "fast" call will be attempted, but if you have exceeded your
+              fast rate limit, then a slow call will be executed. If explicitly set to "fast",
+              then 429 will be returned if it cannot be executed fast.
+
+          output_dimensions: The output dimensionality of the embedding model.
+
+          output_format: The output format of the embedding. `base64` is significantly more efficient
+              than `float`. The default is `float`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/models/embed",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "input_type": input_type,
+                    "model": model,
+                    "latency": latency,
+                    "output_dimensions": output_dimensions,
+                    "output_format": output_format,
+                },
+                model_embed_params.ModelEmbedParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelEmbedResponse,
+        )
+
     def rerank(
         self,
         *,
@@ -68,12 +147,13 @@ def rerank(
         by the reranker model. The results will be returned in descending order of
         relevance.
 
-        Organizations will, by default, have a ratelimit of `2,500,000`
-        bytes-per-minute. If this is exceeded, requests will be throttled into
-        `latency: "slow"` mode, up to `20,000,000` bytes-per-minute. If even this is
-        exceeded, you will get a `429` error. To request higher ratelimits, please
-        contact [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message
-        us on [Discord](https://go.zeroentropy.dev/discord) or
+        Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
+        and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
+        requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
+        bytes-per-minute. If even this is exceeded, you will get a `429` error. To
+        request higher ratelimits, please contact
+        [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
+        [Discord](https://go.zeroentropy.dev/discord) or
         [Slack](https://go.zeroentropy.dev/slack)!
 
         Args:
@@ -141,6 +221,84 @@ def with_streaming_response(self) -> AsyncModelsResourceWithStreamingResponse:
         """
         return AsyncModelsResourceWithStreamingResponse(self)
 
+    async def embed(
+        self,
+        *,
+        input: Union[str, SequenceNotStr[str]],
+        input_type: Literal["query", "document"],
+        model: str,
+        latency: Optional[Literal["fast", "slow"]] | Omit = omit,
+        output_dimensions: Optional[int] | Omit = omit,
+        output_format: Literal["float", "base64"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ModelEmbedResponse:
+        """
+        Embeds the provided input text with ZeroEntropy embedding models.
+
+        The results will be returned in the same order as the text provided. The
+        embedding is such that queries will have high cosine similarity with documents
+        that are relevant to that query.
+
+        Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
+        and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
+        requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
+        bytes-per-minute. If even this is exceeded, you will get a `429` error. To
+        request higher ratelimits, please contact
+        [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
+        [Discord](https://go.zeroentropy.dev/discord) or
+        [Slack](https://go.zeroentropy.dev/slack)!
+
+        Args:
+          input: The string, or list of strings, to embed
+
+          input_type: The input type. For retrieval tasks, either `query` or `document`.
+
+          model: The model ID to use for embedding. Options are: ["zembed-1"]
+
+          latency: Whether the call will be inferenced "fast" or "slow". RateLimits for slow API
+              calls are orders of magnitude higher, but you can expect >10 second latency.
+              Fast inferences are guaranteed subsecond, but rate limits are lower. If not
+              specified, first a "fast" call will be attempted, but if you have exceeded your
+              fast rate limit, then a slow call will be executed. If explicitly set to "fast",
+              then 429 will be returned if it cannot be executed fast.
+
+          output_dimensions: The output dimensionality of the embedding model.
+
+          output_format: The output format of the embedding. `base64` is significantly more efficient
+              than `float`. The default is `float`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/models/embed",
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "input_type": input_type,
+                    "model": model,
+                    "latency": latency,
+                    "output_dimensions": output_dimensions,
+                    "output_format": output_format,
+                },
+                model_embed_params.ModelEmbedParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelEmbedResponse,
+        )
+
     async def rerank(
         self,
         *,
@@ -165,12 +323,13 @@ async def rerank(
         by the reranker model. The results will be returned in descending order of
         relevance.
 
-        Organizations will, by default, have a ratelimit of `2,500,000`
-        bytes-per-minute. If this is exceeded, requests will be throttled into
-        `latency: "slow"` mode, up to `20,000,000` bytes-per-minute. If even this is
-        exceeded, you will get a `429` error. To request higher ratelimits, please
-        contact [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message
-        us on [Discord](https://go.zeroentropy.dev/discord) or
+        Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
+        and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
+        requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
+        bytes-per-minute. If even this is exceeded, you will get a `429` error. To
+        request higher ratelimits, please contact
+        [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
+        [Discord](https://go.zeroentropy.dev/discord) or
         [Slack](https://go.zeroentropy.dev/slack)!
 
         Args:
@@ -222,6 +381,9 @@ class ModelsResourceWithRawResponse:
     def __init__(self, models: ModelsResource) -> None:
         self._models = models
 
+        self.embed = to_raw_response_wrapper(
+            models.embed,
+        )
         self.rerank = to_raw_response_wrapper(
             models.rerank,
         )
@@ -231,6 +393,9 @@ class AsyncModelsResourceWithRawResponse:
     def __init__(self, models: AsyncModelsResource) -> None:
         self._models = models
 
+        self.embed = async_to_raw_response_wrapper(
+            models.embed,
+        )
         self.rerank = async_to_raw_response_wrapper(
             models.rerank,
         )
@@ -240,6 +405,9 @@ class ModelsResourceWithStreamingResponse:
     def __init__(self, models: ModelsResource) -> None:
         self._models = models
 
+        self.embed = to_streamed_response_wrapper(
+            models.embed,
+        )
         self.rerank = to_streamed_response_wrapper(
             models.rerank,
         )
@@ -249,6 +417,9 @@ class AsyncModelsResourceWithStreamingResponse:
     def __init__(self, models: AsyncModelsResource) -> None:
         self._models = models
 
+        self.embed = async_to_streamed_response_wrapper(
+            models.embed,
+        )
         self.rerank = async_to_streamed_response_wrapper(
             models.rerank,
         )
diff --git a/src/zeroentropy/types/__init__.py b/src/zeroentropy/types/__init__.py
@@ -2,8 +2,10 @@
 
 from __future__ import annotations
 
+from .model_embed_params import ModelEmbedParams as ModelEmbedParams
 from .document_add_params import DocumentAddParams as DocumentAddParams
 from .model_rerank_params import ModelRerankParams as ModelRerankParams
+from .model_embed_response import ModelEmbedResponse as ModelEmbedResponse
 from .collection_add_params import CollectionAddParams as CollectionAddParams
 from .document_add_response import DocumentAddResponse as DocumentAddResponse
 from .model_rerank_response import ModelRerankResponse as ModelRerankResponse

diff --git a/src/zeroentropy/types/model_embed_params.py b/src/zeroentropy/types/model_embed_params.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .._types import SequenceNotStr
+
+__all__ = ["ModelEmbedParams"]
+
+
+class ModelEmbedParams(TypedDict, total=False):
+    input: Required[Union[str, SequenceNotStr[str]]]
+    """The string, or list of strings, to embed"""
+
+    input_type: Required[Literal["query", "document"]]
+    """The input type. For retrieval tasks, either `query` or `document`."""
+
+    model: Required[str]
+    """The model ID to use for embedding. Options are: ["zembed-1"]"""
+
+    latency: Optional[Literal["fast", "slow"]]
+    """Whether the call will be inferenced "fast" or "slow".
+
+    RateLimits for slow API calls are orders of magnitude higher, but you can
+    expect >10 second latency. Fast inferences are guaranteed subsecond, but rate
+    limits are lower. If not specified, first a "fast" call will be attempted, but
+    if you have exceeded your fast rate limit, then a slow call will be executed. If
+    explicitly set to "fast", then 429 will be returned if it cannot be executed
+    fast.
+    """
+
+    output_dimensions: Optional[int]
+    """The output dimensionality of the embedding model."""
+
+    output_format: Literal["float", "base64"]
+    """The output format of the embedding.
+
+    `base64` is significantly more efficient than `float`. The default is `float`.
+    """