zeroentropy-ai · npip99 · Mar 13, 2026 · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -55,14 +55,18 @@ jobs:
         run: ./scripts/build
 
       - name: Get GitHub OIDC Token
-        if: github.repository == 'stainless-sdks/zeroentropy-node'
+        if: |-
+          github.repository == 'stainless-sdks/zeroentropy-node' &&
+          !startsWith(github.ref, 'refs/heads/stl/')
         id: github-oidc
         uses: actions/github-script@v8
         with:
           script: core.setOutput('github_token', await core.getIDToken());
 
       - name: Upload tarball
-        if: github.repository == 'stainless-sdks/zeroentropy-node'
+        if: |-
+          github.repository == 'stainless-sdks/zeroentropy-node' &&
+          !startsWith(github.ref, 'refs/heads/stl/')
         env:
           URL: https://pkg.stainless.com/s
           AUTH: ${{ steps.github-oidc.outputs.github_token }}

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.1.0-alpha.9"
+  ".": "0.1.0-alpha.10"
 }
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 14
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/zeroentropy%2Fzeroentropy-cd86445a8ef095a12e7bf74baddc7d5a8225531f8edb88ba613e12a52e219a42.yml
-openapi_spec_hash: 6da635b19c554a476ea9c967b619ae5b
-config_hash: f5fb1effd4b0e263e1e93de3f573f46f
+configured_endpoints: 15
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/zeroentropy%2Fzeroentropy-5ff1b24060b908d169910a7366213eeb423b52d14b03cc16902923eb0ffac71a.yml
+openapi_spec_hash: fc50a1765bfbfe9a3da7ea9001e479d4
+config_hash: e56152e1ee1a9273241d925702077e49
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,27 @@
 # Changelog
 
+## 0.1.0-alpha.10 (2026-03-07)
+
+Full Changelog: [v0.1.0-alpha.9...v0.1.0-alpha.10](https://github.com/zeroentropy-ai/zeroentropy-node/compare/v0.1.0-alpha.9...v0.1.0-alpha.10)
+
+### Features
+
+* **api:** manual updates ([471ffee](https://github.com/zeroentropy-ai/zeroentropy-node/commit/471ffee63884cc8841645ed87fd349d389294284))
+* **api:** manual updates ([9ea7cea](https://github.com/zeroentropy-ai/zeroentropy-node/commit/9ea7ceaff47290e5304f01353b5bff212caf0d19))
+* **api:** manual updates ([0879900](https://github.com/zeroentropy-ai/zeroentropy-node/commit/0879900ce1e3c4ff5900e2b648d63185e29c6d8e))
+
+
+### Bug Fixes
+
+* **client:** preserve URL params already embedded in path ([0069421](https://github.com/zeroentropy-ai/zeroentropy-node/commit/00694211cb65420ffdfaaebd7f1bf6ce9c35751c))
+
+
+### Chores
+
+* **ci:** skip uploading artifacts on stainless-internal branches ([e5578bc](https://github.com/zeroentropy-ai/zeroentropy-node/commit/e5578bc014968908516e60a4263df0fb3696713d))
+* **internal:** codegen related update ([5b8c286](https://github.com/zeroentropy-ai/zeroentropy-node/commit/5b8c286e978ac3be2a6bc600ea35492264099220))
+* **test:** do not count install time for mock server timeout ([2402313](https://github.com/zeroentropy-ai/zeroentropy-node/commit/2402313a21e6c0e962c68ec564bb17a50d068235))
+
 ## 0.1.0-alpha.9 (2026-03-03)
 
 Full Changelog: [v0.1.0-alpha.8...v0.1.0-alpha.9](https://github.com/zeroentropy-ai/zeroentropy-node/compare/v0.1.0-alpha.8...v0.1.0-alpha.9)

diff --git a/api.md b/api.md
@@ -60,8 +60,10 @@ Methods:
 
 Types:
 
+- <code><a href="./src/resources/models.ts">ModelEmbedResponse</a></code>
 - <code><a href="./src/resources/models.ts">ModelRerankResponse</a></code>
 
 Methods:
 
+- <code title="post /models/embed">client.models.<a href="./src/resources/models.ts">embed</a>({ ...params }) -> ModelEmbedResponse</code>
 - <code title="post /models/rerank">client.models.<a href="./src/resources/models.ts">rerank</a>({ ...params }) -> ModelRerankResponse</code>
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "zeroentropy",
-  "version": "0.1.0-alpha.9",
+  "version": "0.1.0-alpha.10",
   "description": "The official TypeScript library for the ZeroEntropy API",
   "author": "ZeroEntropy <founders@zeroentropy.dev>",
   "types": "dist/index.d.ts",

diff --git a/scripts/mock b/scripts/mock
@@ -21,11 +21,22 @@ echo "==> Starting mock server with URL ${URL}"
 
 # Run prism mock on the given spec
 if [ "$1" == "--daemon" ]; then
+  # Pre-install the package so the download doesn't eat into the startup timeout
+  npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism --version
+
   npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism mock "$URL" &> .prism.log &
 
-  # Wait for server to come online
+  # Wait for server to come online (max 30s)
   echo -n "Waiting for server"
+  attempts=0
   while ! grep -q "✖  fatal\|Prism is listening" ".prism.log" ; do
+    attempts=$((attempts + 1))
+    if [ "$attempts" -ge 300 ]; then
+      echo
+      echo "Timed out waiting for Prism server to start"
+      cat .prism.log
+      exit 1
+    fi
     echo -n "."
     sleep 0.1
   done

diff --git a/src/core.ts b/src/core.ts
@@ -524,8 +524,9 @@ export abstract class APIClient {
       : new URL(baseURL + (baseURL.endsWith('/') && path.startsWith('/') ? path.slice(1) : path));
 
     const defaultQuery = this.defaultQuery();
-    if (!isEmptyObj(defaultQuery)) {
-      query = { ...defaultQuery, ...query } as Req;
+    const pathQuery = Object.fromEntries(url.searchParams);
+    if (!isEmptyObj(defaultQuery) || !isEmptyObj(pathQuery)) {
+      query = { ...pathQuery, ...defaultQuery, ...query } as Req;
     }
 
     if (typeof query === 'object' && query && !Array.isArray(query)) {
@@ -618,9 +619,9 @@ export abstract class APIClient {
       }
     }
 
-    // If the API asks us to wait a certain amount of time (and it's a reasonable amount),
-    // just do what it says, but otherwise calculate a default
-    if (!(timeoutMillis && 0 <= timeoutMillis && timeoutMillis < 60 * 1000)) {
+    // If the API asks us to wait a certain amount of time, do what it says.
+    // Otherwise calculate a default.
+    if (timeoutMillis === undefined) {
       const maxRetries = options.maxRetries ?? this.maxRetries;
       timeoutMillis = this.calculateDefaultRetryTimeoutMillis(retriesRemaining, maxRetries);
     }

diff --git a/src/index.ts b/src/index.ts
@@ -32,7 +32,13 @@ import {
   DocumentUpdateResponse,
   Documents,
 } from './resources/documents';
-import { ModelRerankParams, ModelRerankResponse, Models } from './resources/models';
+import {
+  ModelEmbedParams,
+  ModelEmbedResponse,
+  ModelRerankParams,
+  ModelRerankResponse,
+  Models,
+} from './resources/models';
 import {
   Queries,
   QueryTopDocumentsParams,
@@ -271,7 +277,9 @@ export declare namespace ZeroEntropy {
 
   export {
     Models as Models,
+    type ModelEmbedResponse as ModelEmbedResponse,
     type ModelRerankResponse as ModelRerankResponse,
+    type ModelEmbedParams as ModelEmbedParams,
     type ModelRerankParams as ModelRerankParams,
   };
 }

diff --git a/src/resources/index.ts b/src/resources/index.ts
@@ -25,7 +25,13 @@ export {
   type DocumentGetInfoListParams,
   type DocumentGetPageInfoParams,
 } from './documents';
-export { Models, type ModelRerankResponse, type ModelRerankParams } from './models';
+export {
+  Models,
+  type ModelEmbedResponse,
+  type ModelRerankResponse,
+  type ModelEmbedParams,
+  type ModelRerankParams,
+} from './models';
 export {
   Queries,
   type QueryTopDocumentsResponse,

diff --git a/src/resources/models.ts b/src/resources/models.ts
@@ -4,6 +4,26 @@ import { APIResource } from '../resource';
 import * as Core from '../core';
 
 export class Models extends APIResource {
+  /**
+   * Embeds the provided input text with ZeroEntropy embedding models.
+   *
+   * The results will be returned in the same order as the text provided. The
+   * embedding is such that queries will have high cosine similarity with documents
+   * that are relevant to that query.
+   *
+   * Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
+   * and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
+   * requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
+   * bytes-per-minute. If even this is exceeded, you will get a `429` error. To
+   * request higher ratelimits, please contact
+   * [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
+   * [Discord](https://go.zeroentropy.dev/discord) or
+   * [Slack](https://go.zeroentropy.dev/slack)!
+   */
+  embed(body: ModelEmbedParams, options?: Core.RequestOptions): Core.APIPromise<ModelEmbedResponse> {
+    return this._client.post('/models/embed', { body, ...options });
+  }
+
   /**
    * Reranks the provided documents, according to the provided query.
    *
@@ -13,19 +33,58 @@ export class Models extends APIResource {
    * by the reranker model. The results will be returned in descending order of
    * relevance.
    *
-   * Organizations will, by default, have a ratelimit of `2,500,000`
-   * bytes-per-minute. If this is exceeded, requests will be throttled into
-   * `latency: "slow"` mode, up to `20,000,000` bytes-per-minute. If even this is
-   * exceeded, you will get a `429` error. To request higher ratelimits, please
-   * contact [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message
-   * us on [Discord](https://go.zeroentropy.dev/discord) or
+   * Organizations will, by default, have a ratelimit of `2,500,000` bytes-per-minute
+   * and 1000 QPM. Ratelimits are refreshed every 15 seconds. If this is exceeded,
+   * requests will be throttled into `latency: "slow"` mode, up to `20,000,000`
+   * bytes-per-minute. If even this is exceeded, you will get a `429` error. To
+   * request higher ratelimits, please contact
+   * [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message us on
+   * [Discord](https://go.zeroentropy.dev/discord) or
    * [Slack](https://go.zeroentropy.dev/slack)!
    */
   rerank(body: ModelRerankParams, options?: Core.RequestOptions): Core.APIPromise<ModelRerankResponse> {
     return this._client.post('/models/rerank', { body, ...options });
   }
 }
 
+export interface ModelEmbedResponse {
+  /**
+   * The list of embedding results.
+   */
+  results: Array<ModelEmbedResponse.Result>;
+
+  /**
+   * Statistics regarding the tokens used by the request.
+   */
+  usage: ModelEmbedResponse.Usage;
+}
+
+export namespace ModelEmbedResponse {
+  export interface Result {
+    /**
+     * The embedding of the input text, as an array of floats. If `base64` format is
+     * requested, the response will be an fp32 little endian byte array, encoded as a
+     * base64 string.
+     */
+    embedding: Array<number> | string;
+  }
+
+  /**
+   * Statistics regarding the tokens used by the request.
+   */
+  export interface Usage {
+    /**
+     * The total number of bytes in the request. This is used for ratelimiting.
+     */
+    total_bytes: number;
+
+    /**
+     * The total number of tokens in the request. This is used for billing.
+     */
+    total_tokens: number;
+  }
+}
+
 export interface ModelRerankResponse {
   /**
    * The type of inference actually used. If `auto` is requested, then `fast` will be
@@ -85,6 +144,47 @@ export namespace ModelRerankResponse {
   }
 }
 
+export interface ModelEmbedParams {
+  /**
+   * The string, or list of strings, to embed.
+   */
+  input: string | Array<string>;
+
+  /**
+   * The input type. For retrieval tasks, either `query` or `document`.
+   */
+  input_type: 'query' | 'document';
+
+  /**
+   * The model ID to use for embedding. Options are: ["zembed-1"]
+   */
+  model: string;
+
+  /**
+   * The output dimensionality of the embedding model. For `zembed-1`, the available
+   * options are: [2560, 1280, 640, 320, 160, 80, 40].
+   */
+  dimensions?: number | null;
+
+  /**
+   * The output format of the embedding. If `float`, an array of floats will be
+   * returned for each embeddings. If `base64`, a f32 little endian byte array will
+   * be returned, encoded as a base64 string. `base64` is significantly more
+   * efficient than `float`. The default is `float`.
+   */
+  encoding_format?: 'float' | 'base64';
+
+  /**
+   * Whether the call will be inferenced "fast" or "slow". RateLimits for slow API
+   * calls are orders of magnitude higher, but you can expect 2-20 second latency.
+   * Fast inferences are guaranteed subsecond, but rate limits are lower. If not
+   * specified, first a "fast" call will be attempted, but if you have exceeded your
+   * fast rate limit, then a slow call will be executed. If explicitly set to "fast",
+   * then 429 will be returned if it cannot be executed fast.
+   */
+  latency?: 'fast' | 'slow' | null;
+}
+
 export interface ModelRerankParams {
   /**
    * The list of documents to rerank. Each document is a string.
@@ -120,5 +220,10 @@ export interface ModelRerankParams {
 }
 
 export declare namespace Models {
-  export { type ModelRerankResponse as ModelRerankResponse, type ModelRerankParams as ModelRerankParams };
+  export {
+    type ModelEmbedResponse as ModelEmbedResponse,
+    type ModelRerankResponse as ModelRerankResponse,
+    type ModelEmbedParams as ModelEmbedParams,
+    type ModelRerankParams as ModelRerankParams,
+  };
 }
diff --git a/src/version.ts b/src/version.ts
@@ -1 +1 @@
-export const VERSION = '0.1.0-alpha.9'; // x-release-please-version
+export const VERSION = '0.1.0-alpha.10'; // x-release-please-version
diff --git a/tests/api-resources/models.test.ts b/tests/api-resources/models.test.ts
@@ -9,6 +9,32 @@ const client = new ZeroEntropy({
 });
 
 describe('resource models', () => {
+  test('embed: only required params', async () => {
+    const responsePromise = client.models.embed({
+      input: 'string',
+      input_type: 'query',
+      model: 'model',
+    });
+    const rawResponse = await responsePromise.asResponse();
+    expect(rawResponse).toBeInstanceOf(Response);
+    const response = await responsePromise;
+    expect(response).not.toBeInstanceOf(Response);
+    const dataAndResponse = await responsePromise.withResponse();
+    expect(dataAndResponse.data).toBe(response);
+    expect(dataAndResponse.response).toBe(rawResponse);
+  });
+
+  test('embed: required and optional params', async () => {
+    const response = await client.models.embed({
+      input: 'string',
+      input_type: 'query',
+      model: 'model',
+      dimensions: 0,
+      encoding_format: 'float',
+      latency: 'fast',
+    });
+  });
+
   test('rerank: only required params', async () => {
     const responsePromise = client.models.rerank({
       documents: ['string'],
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		export const VERSION = '0.1.0-alpha.9'; // x-release-please-version
		export const VERSION = '0.1.0-alpha.10'; // x-release-please-version