Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .release-please-manifest.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
".": "0.1.0-alpha.9"
".": "0.1.0-alpha.10"
}
8 changes: 4 additions & 4 deletions .stats.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
configured_endpoints: 14
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/zeroentropy%2Fzeroentropy-cd86445a8ef095a12e7bf74baddc7d5a8225531f8edb88ba613e12a52e219a42.yml
openapi_spec_hash: 6da635b19c554a476ea9c967b619ae5b
config_hash: f5fb1effd4b0e263e1e93de3f573f46f
configured_endpoints: 15
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/zeroentropy%2Fzeroentropy-9cd927800fd253f2116ab12aa496b086605bd31d295cb600b65d793203e1e9e7.yml
openapi_spec_hash: cd7f6d9db9ae338091bc6da83e27f4a6
config_hash: e56152e1ee1a9273241d925702077e49
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Changelog

## 0.1.0-alpha.10 (2026-03-03)

Full Changelog: [v0.1.0-alpha.9...v0.1.0-alpha.10](https://github.com/zeroentropy-ai/zeroentropy-python/compare/v0.1.0-alpha.9...v0.1.0-alpha.10)

### Features

* **api:** manual updates ([7fcec0a](https://github.com/zeroentropy-ai/zeroentropy-python/commit/7fcec0a1c901f98c5953c66affbd742fe45a4de6))
* **api:** manual updates ([71f3afe](https://github.com/zeroentropy-ai/zeroentropy-python/commit/71f3afe6cc59df2c9a0c92e97f602b38a9e5d723))

## 0.1.0-alpha.9 (2026-03-03)

Full Changelog: [v0.1.0-alpha.8...v0.1.0-alpha.9](https://github.com/zeroentropy-ai/zeroentropy-python/compare/v0.1.0-alpha.8...v0.1.0-alpha.9)
Expand Down
3 changes: 2 additions & 1 deletion api.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,10 @@ Methods:
Types:

```python
from zeroentropy.types import ModelRerankResponse
from zeroentropy.types import ModelEmbedResponse, ModelRerankResponse
```

Methods:

- <code title="post /models/embed">client.models.<a href="./src/zeroentropy/resources/models.py">embed</a>(\*\*<a href="src/zeroentropy/types/model_embed_params.py">params</a>) -> <a href="./src/zeroentropy/types/model_embed_response.py">ModelEmbedResponse</a></code>
- <code title="post /models/rerank">client.models.<a href="./src/zeroentropy/resources/models.py">rerank</a>(\*\*<a href="src/zeroentropy/types/model_rerank_params.py">params</a>) -> <a href="./src/zeroentropy/types/model_rerank_response.py">ModelRerankResponse</a></code>
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "zeroentropy"
version = "0.1.0-alpha.9"
version = "0.1.0-alpha.10"
description = "The official Python library for the ZeroEntropy API"
dynamic = ["readme"]
license = "Apache-2.0"
Expand Down
2 changes: 1 addition & 1 deletion src/zeroentropy/_version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.

__title__ = "zeroentropy"
__version__ = "0.1.0-alpha.9" # x-release-please-version
__version__ = "0.1.0-alpha.10" # x-release-please-version
199 changes: 185 additions & 14 deletions src/zeroentropy/resources/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

from __future__ import annotations

from typing import Optional
from typing import Union, Optional
from typing_extensions import Literal

import httpx

from ..types import model_rerank_params
from ..types import model_embed_params, model_rerank_params
from .._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
from .._utils import maybe_transform, async_maybe_transform
from .._compat import cached_property
Expand All @@ -19,6 +19,7 @@
async_to_streamed_response_wrapper,
)
from .._base_client import make_request_options
from ..types.model_embed_response import ModelEmbedResponse
from ..types.model_rerank_response import ModelRerankResponse

__all__ = ["ModelsResource", "AsyncModelsResource"]
Expand All @@ -44,6 +45,84 @@ def with_streaming_response(self) -> ModelsResourceWithStreamingResponse:
"""
return ModelsResourceWithStreamingResponse(self)

def embed(
self,
*,
input: Union[str, SequenceNotStr[str]],
input_type: Literal["query", "document"],
model: str,
latency: Optional[Literal["fast", "slow"]] | Omit = omit,
output_dimensions: Optional[int] | Omit = omit,
output_format: Literal["float", "base64"] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = not_given,
) -> ModelEmbedResponse:
"""
Embeds the provided input text with ZeroEntropy embedding models.

The results will be returned in the same order as the text provided. The
embedding is such that queries will have high cosine similarity with documents
that are relevant to that query.

Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
bytes-per-minute. If even this is exceeded, you will get a `429` error. To
request higher ratelimits, please contact
[founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
[Discord](https://go.zeroentropy.dev/discord) or
[Slack](https://go.zeroentropy.dev/slack)!

Args:
input: The string, or list of strings, to embed

input_type: The input type. For retrieval tasks, either `query` or `document`.

model: The model ID to use for embedding. Options are: ["zembed-1"]

latency: Whether the call will be inferenced "fast" or "slow". RateLimits for slow API
calls are orders of magnitude higher, but you can expect >10 second latency.
Fast inferences are guaranteed subsecond, but rate limits are lower. If not
specified, first a "fast" call will be attempted, but if you have exceeded your
fast rate limit, then a slow call will be executed. If explicitly set to "fast",
then 429 will be returned if it cannot be executed fast.

output_dimensions: The output dimensionality of the embedding model.

output_format: The output format of the embedding. `base64` is significantly more efficient
than `float`. The default is `float`.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request

extra_body: Add additional JSON properties to the request

timeout: Override the client-level default timeout for this request, in seconds
"""
return self._post(
"/models/embed",
body=maybe_transform(
{
"input": input,
"input_type": input_type,
"model": model,
"latency": latency,
"output_dimensions": output_dimensions,
"output_format": output_format,
},
model_embed_params.ModelEmbedParams,
),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
cast_to=ModelEmbedResponse,
)

def rerank(
self,
*,
Expand All @@ -68,12 +147,13 @@ def rerank(
by the reranker model. The results will be returned in descending order of
relevance.

Organizations will, by default, have a ratelimit of `2,500,000`
bytes-per-minute. If this is exceeded, requests will be throttled into
`latency: "slow"` mode, up to `20,000,000` bytes-per-minute. If even this is
exceeded, you will get a `429` error. To request higher ratelimits, please
contact [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message
us on [Discord](https://go.zeroentropy.dev/discord) or
Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
bytes-per-minute. If even this is exceeded, you will get a `429` error. To
request higher ratelimits, please contact
[founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
[Discord](https://go.zeroentropy.dev/discord) or
[Slack](https://go.zeroentropy.dev/slack)!

Args:
Expand Down Expand Up @@ -141,6 +221,84 @@ def with_streaming_response(self) -> AsyncModelsResourceWithStreamingResponse:
"""
return AsyncModelsResourceWithStreamingResponse(self)

async def embed(
self,
*,
input: Union[str, SequenceNotStr[str]],
input_type: Literal["query", "document"],
model: str,
latency: Optional[Literal["fast", "slow"]] | Omit = omit,
output_dimensions: Optional[int] | Omit = omit,
output_format: Literal["float", "base64"] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = not_given,
) -> ModelEmbedResponse:
"""
Embeds the provided input text with ZeroEntropy embedding models.

The results will be returned in the same order as the text provided. The
embedding is such that queries will have high cosine similarity with documents
that are relevant to that query.

Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
bytes-per-minute. If even this is exceeded, you will get a `429` error. To
request higher ratelimits, please contact
[founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
[Discord](https://go.zeroentropy.dev/discord) or
[Slack](https://go.zeroentropy.dev/slack)!

Args:
input: The string, or list of strings, to embed

input_type: The input type. For retrieval tasks, either `query` or `document`.

model: The model ID to use for embedding. Options are: ["zembed-1"]

latency: Whether the call will be inferenced "fast" or "slow". RateLimits for slow API
calls are orders of magnitude higher, but you can expect >10 second latency.
Fast inferences are guaranteed subsecond, but rate limits are lower. If not
specified, first a "fast" call will be attempted, but if you have exceeded your
fast rate limit, then a slow call will be executed. If explicitly set to "fast",
then 429 will be returned if it cannot be executed fast.

output_dimensions: The output dimensionality of the embedding model.

output_format: The output format of the embedding. `base64` is significantly more efficient
than `float`. The default is `float`.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request

extra_body: Add additional JSON properties to the request

timeout: Override the client-level default timeout for this request, in seconds
"""
return await self._post(
"/models/embed",
body=await async_maybe_transform(
{
"input": input,
"input_type": input_type,
"model": model,
"latency": latency,
"output_dimensions": output_dimensions,
"output_format": output_format,
},
model_embed_params.ModelEmbedParams,
),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
cast_to=ModelEmbedResponse,
)

async def rerank(
self,
*,
Expand All @@ -165,12 +323,13 @@ async def rerank(
by the reranker model. The results will be returned in descending order of
relevance.

Organizations will, by default, have a ratelimit of `2,500,000`
bytes-per-minute. If this is exceeded, requests will be throttled into
`latency: "slow"` mode, up to `20,000,000` bytes-per-minute. If even this is
exceeded, you will get a `429` error. To request higher ratelimits, please
contact [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message
us on [Discord](https://go.zeroentropy.dev/discord) or
Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
bytes-per-minute. If even this is exceeded, you will get a `429` error. To
request higher ratelimits, please contact
[founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
[Discord](https://go.zeroentropy.dev/discord) or
[Slack](https://go.zeroentropy.dev/slack)!

Args:
Expand Down Expand Up @@ -222,6 +381,9 @@ class ModelsResourceWithRawResponse:
def __init__(self, models: ModelsResource) -> None:
self._models = models

self.embed = to_raw_response_wrapper(
models.embed,
)
self.rerank = to_raw_response_wrapper(
models.rerank,
)
Expand All @@ -231,6 +393,9 @@ class AsyncModelsResourceWithRawResponse:
def __init__(self, models: AsyncModelsResource) -> None:
self._models = models

self.embed = async_to_raw_response_wrapper(
models.embed,
)
self.rerank = async_to_raw_response_wrapper(
models.rerank,
)
Expand All @@ -240,6 +405,9 @@ class ModelsResourceWithStreamingResponse:
def __init__(self, models: ModelsResource) -> None:
self._models = models

self.embed = to_streamed_response_wrapper(
models.embed,
)
self.rerank = to_streamed_response_wrapper(
models.rerank,
)
Expand All @@ -249,6 +417,9 @@ class AsyncModelsResourceWithStreamingResponse:
def __init__(self, models: AsyncModelsResource) -> None:
self._models = models

self.embed = async_to_streamed_response_wrapper(
models.embed,
)
self.rerank = async_to_streamed_response_wrapper(
models.rerank,
)
2 changes: 2 additions & 0 deletions src/zeroentropy/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

from __future__ import annotations

from .model_embed_params import ModelEmbedParams as ModelEmbedParams
from .document_add_params import DocumentAddParams as DocumentAddParams
from .model_rerank_params import ModelRerankParams as ModelRerankParams
from .model_embed_response import ModelEmbedResponse as ModelEmbedResponse
from .collection_add_params import CollectionAddParams as CollectionAddParams
from .document_add_response import DocumentAddResponse as DocumentAddResponse
from .model_rerank_response import ModelRerankResponse as ModelRerankResponse
Expand Down
41 changes: 41 additions & 0 deletions src/zeroentropy/types/model_embed_params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.

from __future__ import annotations

from typing import Union, Optional
from typing_extensions import Literal, Required, TypedDict

from .._types import SequenceNotStr

__all__ = ["ModelEmbedParams"]


class ModelEmbedParams(TypedDict, total=False):
input: Required[Union[str, SequenceNotStr[str]]]
"""The string, or list of strings, to embed"""

input_type: Required[Literal["query", "document"]]
"""The input type. For retrieval tasks, either `query` or `document`."""

model: Required[str]
"""The model ID to use for embedding. Options are: ["zembed-1"]"""

latency: Optional[Literal["fast", "slow"]]
"""Whether the call will be inferenced "fast" or "slow".

RateLimits for slow API calls are orders of magnitude higher, but you can
expect >10 second latency. Fast inferences are guaranteed subsecond, but rate
limits are lower. If not specified, first a "fast" call will be attempted, but
if you have exceeded your fast rate limit, then a slow call will be executed. If
explicitly set to "fast", then 429 will be returned if it cannot be executed
fast.
"""

output_dimensions: Optional[int]
"""The output dimensionality of the embedding model."""

output_format: Literal["float", "base64"]
"""The output format of the embedding.

`base64` is significantly more efficient than `float`. The default is `float`.
"""
Loading