diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2d1deb7..cff688f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -55,14 +55,18 @@ jobs:
run: ./scripts/build
- name: Get GitHub OIDC Token
- if: github.repository == 'stainless-sdks/zeroentropy-node'
+ if: |-
+ github.repository == 'stainless-sdks/zeroentropy-node' &&
+ !startsWith(github.ref, 'refs/heads/stl/')
id: github-oidc
uses: actions/github-script@v8
with:
script: core.setOutput('github_token', await core.getIDToken());
- name: Upload tarball
- if: github.repository == 'stainless-sdks/zeroentropy-node'
+ if: |-
+ github.repository == 'stainless-sdks/zeroentropy-node' &&
+ !startsWith(github.ref, 'refs/heads/stl/')
env:
URL: https://pkg.stainless.com/s
AUTH: ${{ steps.github-oidc.outputs.github_token }}
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 347d8bf..931fce1 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "0.1.0-alpha.9"
+ ".": "0.1.0-alpha.10"
}
diff --git a/.stats.yml b/.stats.yml
index 1d67610..a9365b5 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 14
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/zeroentropy%2Fzeroentropy-cd86445a8ef095a12e7bf74baddc7d5a8225531f8edb88ba613e12a52e219a42.yml
-openapi_spec_hash: 6da635b19c554a476ea9c967b619ae5b
-config_hash: f5fb1effd4b0e263e1e93de3f573f46f
+configured_endpoints: 15
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/zeroentropy%2Fzeroentropy-5ff1b24060b908d169910a7366213eeb423b52d14b03cc16902923eb0ffac71a.yml
+openapi_spec_hash: fc50a1765bfbfe9a3da7ea9001e479d4
+config_hash: e56152e1ee1a9273241d925702077e49
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b0183b9..da14712 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,27 @@
# Changelog
+## 0.1.0-alpha.10 (2026-03-07)
+
+Full Changelog: [v0.1.0-alpha.9...v0.1.0-alpha.10](https://github.com/zeroentropy-ai/zeroentropy-node/compare/v0.1.0-alpha.9...v0.1.0-alpha.10)
+
+### Features
+
+* **api:** manual updates ([471ffee](https://github.com/zeroentropy-ai/zeroentropy-node/commit/471ffee63884cc8841645ed87fd349d389294284))
+* **api:** manual updates ([9ea7cea](https://github.com/zeroentropy-ai/zeroentropy-node/commit/9ea7ceaff47290e5304f01353b5bff212caf0d19))
+* **api:** manual updates ([0879900](https://github.com/zeroentropy-ai/zeroentropy-node/commit/0879900ce1e3c4ff5900e2b648d63185e29c6d8e))
+
+
+### Bug Fixes
+
+* **client:** preserve URL params already embedded in path ([0069421](https://github.com/zeroentropy-ai/zeroentropy-node/commit/00694211cb65420ffdfaaebd7f1bf6ce9c35751c))
+
+
+### Chores
+
+* **ci:** skip uploading artifacts on stainless-internal branches ([e5578bc](https://github.com/zeroentropy-ai/zeroentropy-node/commit/e5578bc014968908516e60a4263df0fb3696713d))
+* **internal:** codegen related update ([5b8c286](https://github.com/zeroentropy-ai/zeroentropy-node/commit/5b8c286e978ac3be2a6bc600ea35492264099220))
+* **test:** do not count install time for mock server timeout ([2402313](https://github.com/zeroentropy-ai/zeroentropy-node/commit/2402313a21e6c0e962c68ec564bb17a50d068235))
+
## 0.1.0-alpha.9 (2026-03-03)
Full Changelog: [v0.1.0-alpha.8...v0.1.0-alpha.9](https://github.com/zeroentropy-ai/zeroentropy-node/compare/v0.1.0-alpha.8...v0.1.0-alpha.9)
diff --git a/api.md b/api.md
index 3121b4d..62b4c5c 100644
--- a/api.md
+++ b/api.md
@@ -60,8 +60,10 @@ Methods:
Types:
+- ModelEmbedResponse
- ModelRerankResponse
Methods:
+- client.models.embed({ ...params }) -> ModelEmbedResponse
- client.models.rerank({ ...params }) -> ModelRerankResponse
diff --git a/package.json b/package.json
index 08a5782..7c580b3 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "zeroentropy",
- "version": "0.1.0-alpha.9",
+ "version": "0.1.0-alpha.10",
"description": "The official TypeScript library for the ZeroEntropy API",
"author": "ZeroEntropy ",
"types": "dist/index.d.ts",
diff --git a/scripts/mock b/scripts/mock
index 0b28f6e..bcf3b39 100755
--- a/scripts/mock
+++ b/scripts/mock
@@ -21,11 +21,22 @@ echo "==> Starting mock server with URL ${URL}"
# Run prism mock on the given spec
if [ "$1" == "--daemon" ]; then
+ # Pre-install the package so the download doesn't eat into the startup timeout
+ npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism --version
+
npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism mock "$URL" &> .prism.log &
- # Wait for server to come online
+ # Wait for server to come online (max 30s)
echo -n "Waiting for server"
+ attempts=0
while ! grep -q "✖ fatal\|Prism is listening" ".prism.log" ; do
+ attempts=$((attempts + 1))
+ if [ "$attempts" -ge 300 ]; then
+ echo
+ echo "Timed out waiting for Prism server to start"
+ cat .prism.log
+ exit 1
+ fi
echo -n "."
sleep 0.1
done
diff --git a/src/core.ts b/src/core.ts
index b6cb660..ba418e9 100644
--- a/src/core.ts
+++ b/src/core.ts
@@ -524,8 +524,9 @@ export abstract class APIClient {
: new URL(baseURL + (baseURL.endsWith('/') && path.startsWith('/') ? path.slice(1) : path));
const defaultQuery = this.defaultQuery();
- if (!isEmptyObj(defaultQuery)) {
- query = { ...defaultQuery, ...query } as Req;
+ const pathQuery = Object.fromEntries(url.searchParams);
+ if (!isEmptyObj(defaultQuery) || !isEmptyObj(pathQuery)) {
+ query = { ...pathQuery, ...defaultQuery, ...query } as Req;
}
if (typeof query === 'object' && query && !Array.isArray(query)) {
@@ -618,9 +619,9 @@ export abstract class APIClient {
}
}
- // If the API asks us to wait a certain amount of time (and it's a reasonable amount),
- // just do what it says, but otherwise calculate a default
- if (!(timeoutMillis && 0 <= timeoutMillis && timeoutMillis < 60 * 1000)) {
+ // If the API asks us to wait a certain amount of time, do what it says.
+ // Otherwise calculate a default.
+ if (timeoutMillis === undefined) {
const maxRetries = options.maxRetries ?? this.maxRetries;
timeoutMillis = this.calculateDefaultRetryTimeoutMillis(retriesRemaining, maxRetries);
}
diff --git a/src/index.ts b/src/index.ts
index 45432a8..ca23f19 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -32,7 +32,13 @@ import {
DocumentUpdateResponse,
Documents,
} from './resources/documents';
-import { ModelRerankParams, ModelRerankResponse, Models } from './resources/models';
+import {
+ ModelEmbedParams,
+ ModelEmbedResponse,
+ ModelRerankParams,
+ ModelRerankResponse,
+ Models,
+} from './resources/models';
import {
Queries,
QueryTopDocumentsParams,
@@ -271,7 +277,9 @@ export declare namespace ZeroEntropy {
export {
Models as Models,
+ type ModelEmbedResponse as ModelEmbedResponse,
type ModelRerankResponse as ModelRerankResponse,
+ type ModelEmbedParams as ModelEmbedParams,
type ModelRerankParams as ModelRerankParams,
};
}
diff --git a/src/resources/index.ts b/src/resources/index.ts
index 7021f85..29c3891 100644
--- a/src/resources/index.ts
+++ b/src/resources/index.ts
@@ -25,7 +25,13 @@ export {
type DocumentGetInfoListParams,
type DocumentGetPageInfoParams,
} from './documents';
-export { Models, type ModelRerankResponse, type ModelRerankParams } from './models';
+export {
+ Models,
+ type ModelEmbedResponse,
+ type ModelRerankResponse,
+ type ModelEmbedParams,
+ type ModelRerankParams,
+} from './models';
export {
Queries,
type QueryTopDocumentsResponse,
diff --git a/src/resources/models.ts b/src/resources/models.ts
index 530fdbe..2761e29 100644
--- a/src/resources/models.ts
+++ b/src/resources/models.ts
@@ -4,6 +4,26 @@ import { APIResource } from '../resource';
import * as Core from '../core';
export class Models extends APIResource {
+ /**
+ * Embeds the provided input text with ZeroEntropy embedding models.
+ *
+ * The results will be returned in the same order as the text provided. The
+ * embedding is such that queries will have high cosine similarity with documents
+ * that are relevant to that query.
+ *
+ * Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
+ * and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
+ * requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
+ * bytes-per-minute. If even this is exceeded, you will get a `429` error. To
+ * request higher ratelimits, please contact
+ * [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
+ * [Discord](https://go.zeroentropy.dev/discord) or
+ * [Slack](https://go.zeroentropy.dev/slack)!
+ */
+ embed(body: ModelEmbedParams, options?: Core.RequestOptions): Core.APIPromise {
+ return this._client.post('/models/embed', { body, ...options });
+ }
+
/**
* Reranks the provided documents, according to the provided query.
*
@@ -13,12 +33,13 @@ export class Models extends APIResource {
* by the reranker model. The results will be returned in descending order of
* relevance.
*
- * Organizations will, by default, have a ratelimit of `2,500,000`
- * bytes-per-minute. If this is exceeded, requests will be throttled into
- * `latency: "slow"` mode, up to `20,000,000` bytes-per-minute. If even this is
- * exceeded, you will get a `429` error. To request higher ratelimits, please
- * contact [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message
- * us on [Discord](https://go.zeroentropy.dev/discord) or
+ * Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
+ * and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
+ * requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
+ * bytes-per-minute. If even this is exceeded, you will get a `429` error. To
+ * request higher ratelimits, please contact
+ * [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
+ * [Discord](https://go.zeroentropy.dev/discord) or
* [Slack](https://go.zeroentropy.dev/slack)!
*/
rerank(body: ModelRerankParams, options?: Core.RequestOptions): Core.APIPromise {
@@ -26,6 +47,44 @@ export class Models extends APIResource {
}
}
+export interface ModelEmbedResponse {
+ /**
+ * The list of embedding results.
+ */
+ results: Array;
+
+ /**
+ * Statistics regarding the tokens used by the request.
+ */
+ usage: ModelEmbedResponse.Usage;
+}
+
+export namespace ModelEmbedResponse {
+ export interface Result {
+ /**
+ * The embedding of the input text, as an array of floats. If `base64` format is
+ * requested, the response will be an fp32 little endian byte array, encoded as a
+ * base64 string.
+ */
+ embedding: Array | string;
+ }
+
+ /**
+ * Statistics regarding the tokens used by the request.
+ */
+ export interface Usage {
+ /**
+ * The total number of bytes in the request. This is used for ratelimiting.
+ */
+ total_bytes: number;
+
+ /**
+ * The total number of tokens in the request. This is used for billing.
+ */
+ total_tokens: number;
+ }
+}
+
export interface ModelRerankResponse {
/**
* The type of inference actually used. If `auto` is requested, then `fast` will be
@@ -85,6 +144,47 @@ export namespace ModelRerankResponse {
}
}
+export interface ModelEmbedParams {
+ /**
+ * The string, or list of strings, to embed.
+ */
+ input: string | Array;
+
+ /**
+ * The input type. For retrieval tasks, either `query` or `document`.
+ */
+ input_type: 'query' | 'document';
+
+ /**
+ * The model ID to use for embedding. Options are: ["zembed-1"]
+ */
+ model: string;
+
+ /**
+ * The output dimensionality of the embedding model. For `zembed-1`, the available
+ * options are: [2560, 1280, 640, 320, 160, 80, 40].
+ */
+ dimensions?: number | null;
+
+ /**
+ * The output format of the embedding. If `float`, an array of floats will be
+ * returned for each embeddings. If `base64`, a f32 little endian byte array will
+ * be returned, encoded as a base64 string. `base64` is significantly more
+ * efficient than `float`. The default is `float`.
+ */
+ encoding_format?: 'float' | 'base64';
+
+ /**
+ * Whether the call will be inferenced "fast" or "slow". RateLimits for slow API
+ * calls are orders of magnitude higher, but you can expect 2-20 second latency.
+ * Fast inferences are guaranteed subsecond, but rate limits are lower. If not
+ * specified, first a "fast" call will be attempted, but if you have exceeded your
+ * fast rate limit, then a slow call will be executed. If explicitly set to "fast",
+ * then 429 will be returned if it cannot be executed fast.
+ */
+ latency?: 'fast' | 'slow' | null;
+}
+
export interface ModelRerankParams {
/**
* The list of documents to rerank. Each document is a string.
@@ -120,5 +220,10 @@ export interface ModelRerankParams {
}
export declare namespace Models {
- export { type ModelRerankResponse as ModelRerankResponse, type ModelRerankParams as ModelRerankParams };
+ export {
+ type ModelEmbedResponse as ModelEmbedResponse,
+ type ModelRerankResponse as ModelRerankResponse,
+ type ModelEmbedParams as ModelEmbedParams,
+ type ModelRerankParams as ModelRerankParams,
+ };
}
diff --git a/src/version.ts b/src/version.ts
index fdbf689..5ed52cd 100644
--- a/src/version.ts
+++ b/src/version.ts
@@ -1 +1 @@
-export const VERSION = '0.1.0-alpha.9'; // x-release-please-version
+export const VERSION = '0.1.0-alpha.10'; // x-release-please-version
diff --git a/tests/api-resources/models.test.ts b/tests/api-resources/models.test.ts
index 1834572..826d585 100644
--- a/tests/api-resources/models.test.ts
+++ b/tests/api-resources/models.test.ts
@@ -9,6 +9,32 @@ const client = new ZeroEntropy({
});
describe('resource models', () => {
+ test('embed: only required params', async () => {
+ const responsePromise = client.models.embed({
+ input: 'string',
+ input_type: 'query',
+ model: 'model',
+ });
+ const rawResponse = await responsePromise.asResponse();
+ expect(rawResponse).toBeInstanceOf(Response);
+ const response = await responsePromise;
+ expect(response).not.toBeInstanceOf(Response);
+ const dataAndResponse = await responsePromise.withResponse();
+ expect(dataAndResponse.data).toBe(response);
+ expect(dataAndResponse.response).toBe(rawResponse);
+ });
+
+ test('embed: required and optional params', async () => {
+ const response = await client.models.embed({
+ input: 'string',
+ input_type: 'query',
+ model: 'model',
+ dimensions: 0,
+ encoding_format: 'float',
+ latency: 'fast',
+ });
+ });
+
test('rerank: only required params', async () => {
const responsePromise = client.models.rerank({
documents: ['string'],