diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 46b9b6b..3b005e5 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.1.0-alpha.9"
+  ".": "0.1.0-alpha.10"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 1d67610..a24fc51 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 14
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/zeroentropy%2Fzeroentropy-cd86445a8ef095a12e7bf74baddc7d5a8225531f8edb88ba613e12a52e219a42.yml
-openapi_spec_hash: 6da635b19c554a476ea9c967b619ae5b
-config_hash: f5fb1effd4b0e263e1e93de3f573f46f
+configured_endpoints: 15
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/zeroentropy%2Fzeroentropy-9cd927800fd253f2116ab12aa496b086605bd31d295cb600b65d793203e1e9e7.yml
+openapi_spec_hash: cd7f6d9db9ae338091bc6da83e27f4a6
+config_hash: e56152e1ee1a9273241d925702077e49
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a024c59..1f1810a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,14 @@
 # Changelog
 
+## 0.1.0-alpha.10 (2026-03-03)
+
+Full Changelog: [v0.1.0-alpha.9...v0.1.0-alpha.10](https://github.com/zeroentropy-ai/zeroentropy-python/compare/v0.1.0-alpha.9...v0.1.0-alpha.10)
+
+### Features
+
+* **api:** manual updates ([7fcec0a](https://github.com/zeroentropy-ai/zeroentropy-python/commit/7fcec0a1c901f98c5953c66affbd742fe45a4de6))
+* **api:** manual updates ([71f3afe](https://github.com/zeroentropy-ai/zeroentropy-python/commit/71f3afe6cc59df2c9a0c92e97f602b38a9e5d723))
+
 ## 0.1.0-alpha.9 (2026-03-03)
 
 Full Changelog: [v0.1.0-alpha.8...v0.1.0-alpha.9](https://github.com/zeroentropy-ai/zeroentropy-python/compare/v0.1.0-alpha.8...v0.1.0-alpha.9)
diff --git a/api.md b/api.md
index cc69a2d..5b96560 100644
--- a/api.md
+++ b/api.md
@@ -75,9 +75,10 @@ Methods:
 Types:
 
 ```python
-from zeroentropy.types import ModelRerankResponse
+from zeroentropy.types import ModelEmbedResponse, ModelRerankResponse
 ```
 
 Methods:
 
+- <code title="post /models/embed">client.models.<a href="./src/zeroentropy/resources/models.py">embed</a>(\*\*<a href="src/zeroentropy/types/model_embed_params.py">params</a>) -> <a href="./src/zeroentropy/types/model_embed_response.py">ModelEmbedResponse</a></code>
 - <code title="post /models/rerank">client.models.<a href="./src/zeroentropy/resources/models.py">rerank</a>(\*\*<a href="src/zeroentropy/types/model_rerank_params.py">params</a>) -> <a href="./src/zeroentropy/types/model_rerank_response.py">ModelRerankResponse</a></code>
diff --git a/pyproject.toml b/pyproject.toml
index 4fe374f..903a0c4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "zeroentropy"
-version = "0.1.0-alpha.9"
+version = "0.1.0-alpha.10"
 description = "The official Python library for the ZeroEntropy API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/src/zeroentropy/_version.py b/src/zeroentropy/_version.py
index 61ae32c..3b653c4 100644
--- a/src/zeroentropy/_version.py
+++ b/src/zeroentropy/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "zeroentropy"
-__version__ = "0.1.0-alpha.9"  # x-release-please-version
+__version__ = "0.1.0-alpha.10"  # x-release-please-version
diff --git a/src/zeroentropy/resources/models.py b/src/zeroentropy/resources/models.py
index 33b1764..e41d330 100644
--- a/src/zeroentropy/resources/models.py
+++ b/src/zeroentropy/resources/models.py
@@ -2,12 +2,12 @@
 
 from __future__ import annotations
 
-from typing import Optional
+from typing import Union, Optional
 from typing_extensions import Literal
 
 import httpx
 
-from ..types import model_rerank_params
+from ..types import model_embed_params, model_rerank_params
 from .._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
 from .._utils import maybe_transform, async_maybe_transform
 from .._compat import cached_property
@@ -19,6 +19,7 @@
     async_to_streamed_response_wrapper,
 )
 from .._base_client import make_request_options
+from ..types.model_embed_response import ModelEmbedResponse
 from ..types.model_rerank_response import ModelRerankResponse
 
 __all__ = ["ModelsResource", "AsyncModelsResource"]
@@ -44,6 +45,84 @@ def with_streaming_response(self) -> ModelsResourceWithStreamingResponse:
         """
         return ModelsResourceWithStreamingResponse(self)
 
+    def embed(
+        self,
+        *,
+        input: Union[str, SequenceNotStr[str]],
+        input_type: Literal["query", "document"],
+        model: str,
+        latency: Optional[Literal["fast", "slow"]] | Omit = omit,
+        output_dimensions: Optional[int] | Omit = omit,
+        output_format: Literal["float", "base64"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ModelEmbedResponse:
+        """
+        Embeds the provided input text with ZeroEntropy embedding models.
+
+        The results will be returned in the same order as the text provided. The
+        embedding is such that queries will have high cosine similarity with documents
+        that are relevant to that query.
+
+        Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
+        and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
+        requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
+        bytes-per-minute. If even this is exceeded, you will get a `429` error. To
+        request higher ratelimits, please contact
+        [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
+        [Discord](https://go.zeroentropy.dev/discord) or
+        [Slack](https://go.zeroentropy.dev/slack)!
+
+        Args:
+          input: The string, or list of strings, to embed
+
+          input_type: The input type. For retrieval tasks, either `query` or `document`.
+
+          model: The model ID to use for embedding. Options are: ["zembed-1"]
+
+          latency: Whether the call will be inferenced "fast" or "slow". RateLimits for slow API
+              calls are orders of magnitude higher, but you can expect >10 second latency.
+              Fast inferences are guaranteed subsecond, but rate limits are lower. If not
+              specified, first a "fast" call will be attempted, but if you have exceeded your
+              fast rate limit, then a slow call will be executed. If explicitly set to "fast",
+              then 429 will be returned if it cannot be executed fast.
+
+          output_dimensions: The output dimensionality of the embedding model.
+
+          output_format: The output format of the embedding. `base64` is significantly more efficient
+              than `float`. The default is `float`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/models/embed",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "input_type": input_type,
+                    "model": model,
+                    "latency": latency,
+                    "output_dimensions": output_dimensions,
+                    "output_format": output_format,
+                },
+                model_embed_params.ModelEmbedParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelEmbedResponse,
+        )
+
     def rerank(
         self,
         *,
@@ -68,12 +147,13 @@ def rerank(
         by the reranker model. The results will be returned in descending order of
         relevance.
 
-        Organizations will, by default, have a ratelimit of `2,500,000`
-        bytes-per-minute. If this is exceeded, requests will be throttled into
-        `latency: "slow"` mode, up to `20,000,000` bytes-per-minute. If even this is
-        exceeded, you will get a `429` error. To request higher ratelimits, please
-        contact [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message
-        us on [Discord](https://go.zeroentropy.dev/discord) or
+        Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
+        and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
+        requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
+        bytes-per-minute. If even this is exceeded, you will get a `429` error. To
+        request higher ratelimits, please contact
+        [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
+        [Discord](https://go.zeroentropy.dev/discord) or
         [Slack](https://go.zeroentropy.dev/slack)!
 
         Args:
@@ -141,6 +221,84 @@ def with_streaming_response(self) -> AsyncModelsResourceWithStreamingResponse:
         """
         return AsyncModelsResourceWithStreamingResponse(self)
 
+    async def embed(
+        self,
+        *,
+        input: Union[str, SequenceNotStr[str]],
+        input_type: Literal["query", "document"],
+        model: str,
+        latency: Optional[Literal["fast", "slow"]] | Omit = omit,
+        output_dimensions: Optional[int] | Omit = omit,
+        output_format: Literal["float", "base64"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ModelEmbedResponse:
+        """
+        Embeds the provided input text with ZeroEntropy embedding models.
+
+        The results will be returned in the same order as the text provided. The
+        embedding is such that queries will have high cosine similarity with documents
+        that are relevant to that query.
+
+        Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
+        and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
+        requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
+        bytes-per-minute. If even this is exceeded, you will get a `429` error. To
+        request higher ratelimits, please contact
+        [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
+        [Discord](https://go.zeroentropy.dev/discord) or
+        [Slack](https://go.zeroentropy.dev/slack)!
+
+        Args:
+          input: The string, or list of strings, to embed
+
+          input_type: The input type. For retrieval tasks, either `query` or `document`.
+
+          model: The model ID to use for embedding. Options are: ["zembed-1"]
+
+          latency: Whether the call will be inferenced "fast" or "slow". RateLimits for slow API
+              calls are orders of magnitude higher, but you can expect >10 second latency.
+              Fast inferences are guaranteed subsecond, but rate limits are lower. If not
+              specified, first a "fast" call will be attempted, but if you have exceeded your
+              fast rate limit, then a slow call will be executed. If explicitly set to "fast",
+              then 429 will be returned if it cannot be executed fast.
+
+          output_dimensions: The output dimensionality of the embedding model.
+
+          output_format: The output format of the embedding. `base64` is significantly more efficient
+              than `float`. The default is `float`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/models/embed",
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "input_type": input_type,
+                    "model": model,
+                    "latency": latency,
+                    "output_dimensions": output_dimensions,
+                    "output_format": output_format,
+                },
+                model_embed_params.ModelEmbedParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelEmbedResponse,
+        )
+
     async def rerank(
         self,
         *,
@@ -165,12 +323,13 @@ async def rerank(
         by the reranker model. The results will be returned in descending order of
         relevance.
 
-        Organizations will, by default, have a ratelimit of `2,500,000`
-        bytes-per-minute. If this is exceeded, requests will be throttled into
-        `latency: "slow"` mode, up to `20,000,000` bytes-per-minute. If even this is
-        exceeded, you will get a `429` error. To request higher ratelimits, please
-        contact [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message
-        us on [Discord](https://go.zeroentropy.dev/discord) or
+        Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
+        and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
+        requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
+        bytes-per-minute. If even this is exceeded, you will get a `429` error. To
+        request higher ratelimits, please contact
+        [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
+        [Discord](https://go.zeroentropy.dev/discord) or
         [Slack](https://go.zeroentropy.dev/slack)!
 
         Args:
@@ -222,6 +381,9 @@ class ModelsResourceWithRawResponse:
     def __init__(self, models: ModelsResource) -> None:
         self._models = models
 
+        self.embed = to_raw_response_wrapper(
+            models.embed,
+        )
         self.rerank = to_raw_response_wrapper(
             models.rerank,
         )
@@ -231,6 +393,9 @@ class AsyncModelsResourceWithRawResponse:
     def __init__(self, models: AsyncModelsResource) -> None:
         self._models = models
 
+        self.embed = async_to_raw_response_wrapper(
+            models.embed,
+        )
         self.rerank = async_to_raw_response_wrapper(
             models.rerank,
         )
@@ -240,6 +405,9 @@ class ModelsResourceWithStreamingResponse:
     def __init__(self, models: ModelsResource) -> None:
         self._models = models
 
+        self.embed = to_streamed_response_wrapper(
+            models.embed,
+        )
         self.rerank = to_streamed_response_wrapper(
             models.rerank,
         )
@@ -249,6 +417,9 @@ class AsyncModelsResourceWithStreamingResponse:
     def __init__(self, models: AsyncModelsResource) -> None:
         self._models = models
 
+        self.embed = async_to_streamed_response_wrapper(
+            models.embed,
+        )
         self.rerank = async_to_streamed_response_wrapper(
             models.rerank,
         )
diff --git a/src/zeroentropy/types/__init__.py b/src/zeroentropy/types/__init__.py
index 1117c5f..c9c0f7a 100644
--- a/src/zeroentropy/types/__init__.py
+++ b/src/zeroentropy/types/__init__.py
@@ -2,8 +2,10 @@
 
 from __future__ import annotations
 
+from .model_embed_params import ModelEmbedParams as ModelEmbedParams
 from .document_add_params import DocumentAddParams as DocumentAddParams
 from .model_rerank_params import ModelRerankParams as ModelRerankParams
+from .model_embed_response import ModelEmbedResponse as ModelEmbedResponse
 from .collection_add_params import CollectionAddParams as CollectionAddParams
 from .document_add_response import DocumentAddResponse as DocumentAddResponse
 from .model_rerank_response import ModelRerankResponse as ModelRerankResponse
diff --git a/src/zeroentropy/types/model_embed_params.py b/src/zeroentropy/types/model_embed_params.py
new file mode 100644
index 0000000..e89e9db
--- /dev/null
+++ b/src/zeroentropy/types/model_embed_params.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .._types import SequenceNotStr
+
+__all__ = ["ModelEmbedParams"]
+
+
+class ModelEmbedParams(TypedDict, total=False):
+    input: Required[Union[str, SequenceNotStr[str]]]
+    """The string, or list of strings, to embed"""
+
+    input_type: Required[Literal["query", "document"]]
+    """The input type. For retrieval tasks, either `query` or `document`."""
+
+    model: Required[str]
+    """The model ID to use for embedding. Options are: ["zembed-1"]"""
+
+    latency: Optional[Literal["fast", "slow"]]
+    """Whether the call will be inferenced "fast" or "slow".
+
+    RateLimits for slow API calls are orders of magnitude higher, but you can
+    expect >10 second latency. Fast inferences are guaranteed subsecond, but rate
+    limits are lower. If not specified, first a "fast" call will be attempted, but
+    if you have exceeded your fast rate limit, then a slow call will be executed. If
+    explicitly set to "fast", then 429 will be returned if it cannot be executed
+    fast.
+    """
+
+    output_dimensions: Optional[int]
+    """The output dimensionality of the embedding model."""
+
+    output_format: Literal["float", "base64"]
+    """The output format of the embedding.
+
+    `base64` is significantly more efficient than `float`. The default is `float`.
+    """
diff --git a/src/zeroentropy/types/model_embed_response.py b/src/zeroentropy/types/model_embed_response.py
new file mode 100644
index 0000000..b272e41
--- /dev/null
+++ b/src/zeroentropy/types/model_embed_response.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+
+from .._models import BaseModel
+
+__all__ = ["ModelEmbedResponse", "Result", "Usage"]
+
+
+class Result(BaseModel):
+    embedding: Union[List[float], str]
+    """The embedding of the input text.
+
+    If `base64` format is requested, the response will be an fp32 little endian byte
+    array, encoded as base64.
+    """
+
+
+class Usage(BaseModel):
+    """Statistics regarding the tokens used by the request."""
+
+    total_bytes: int
+    """The total number of bytes in the request. This is used for ratelimiting."""
+
+    total_tokens: int
+    """The total number of tokens in the request. This is used for billing."""
+
+
+class ModelEmbedResponse(BaseModel):
+    results: List[Result]
+    """The list of embedding results."""
+
+    usage: Usage
+    """Statistics regarding the tokens used by the request."""
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
index 0022c11..2efd7ec 100644
--- a/tests/api_resources/test_models.py
+++ b/tests/api_resources/test_models.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from zeroentropy import ZeroEntropy, AsyncZeroEntropy
-from zeroentropy.types import ModelRerankResponse
+from zeroentropy.types import ModelEmbedResponse, ModelRerankResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -17,6 +17,55 @@
 class TestModels:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
+    @parametrize
+    def test_method_embed(self, client: ZeroEntropy) -> None:
+        model = client.models.embed(
+            input="string",
+            input_type="query",
+            model="model",
+        )
+        assert_matches_type(ModelEmbedResponse, model, path=["response"])
+
+    @parametrize
+    def test_method_embed_with_all_params(self, client: ZeroEntropy) -> None:
+        model = client.models.embed(
+            input="string",
+            input_type="query",
+            model="model",
+            latency="fast",
+            output_dimensions=0,
+            output_format="float",
+        )
+        assert_matches_type(ModelEmbedResponse, model, path=["response"])
+
+    @parametrize
+    def test_raw_response_embed(self, client: ZeroEntropy) -> None:
+        response = client.models.with_raw_response.embed(
+            input="string",
+            input_type="query",
+            model="model",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = response.parse()
+        assert_matches_type(ModelEmbedResponse, model, path=["response"])
+
+    @parametrize
+    def test_streaming_response_embed(self, client: ZeroEntropy) -> None:
+        with client.models.with_streaming_response.embed(
+            input="string",
+            input_type="query",
+            model="model",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = response.parse()
+            assert_matches_type(ModelEmbedResponse, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_rerank(self, client: ZeroEntropy) -> None:
         model = client.models.rerank(
@@ -71,6 +120,55 @@ class TestAsyncModels:
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
 
+    @parametrize
+    async def test_method_embed(self, async_client: AsyncZeroEntropy) -> None:
+        model = await async_client.models.embed(
+            input="string",
+            input_type="query",
+            model="model",
+        )
+        assert_matches_type(ModelEmbedResponse, model, path=["response"])
+
+    @parametrize
+    async def test_method_embed_with_all_params(self, async_client: AsyncZeroEntropy) -> None:
+        model = await async_client.models.embed(
+            input="string",
+            input_type="query",
+            model="model",
+            latency="fast",
+            output_dimensions=0,
+            output_format="float",
+        )
+        assert_matches_type(ModelEmbedResponse, model, path=["response"])
+
+    @parametrize
+    async def test_raw_response_embed(self, async_client: AsyncZeroEntropy) -> None:
+        response = await async_client.models.with_raw_response.embed(
+            input="string",
+            input_type="query",
+            model="model",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = await response.parse()
+        assert_matches_type(ModelEmbedResponse, model, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_embed(self, async_client: AsyncZeroEntropy) -> None:
+        async with async_client.models.with_streaming_response.embed(
+            input="string",
+            input_type="query",
+            model="model",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = await response.parse()
+            assert_matches_type(ModelEmbedResponse, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_rerank(self, async_client: AsyncZeroEntropy) -> None:
         model = await async_client.models.rerank(