diff --git a/.claude/skills/review-client/SKILL.md b/.claude/skills/review-client/SKILL.md new file mode 100644 index 000000000..d311d6418 --- /dev/null +++ b/.claude/skills/review-client/SKILL.md @@ -0,0 +1,776 @@ +--- +name: review-client +description: Review a pull request that changes QuestDB **QWP** client documentation (the WebSocket transport, `ws::` / `wss::`) from the perspective of an agent or developer building an application against the client. The load-bearing goal is **agent one-shot suitability**: an agent retrieving one section of the page must be able to generate working code without hallucinating, without requiring earlier-section context, and without an indirection chain. Validates that the docs answer the concrete questions someone writing code would hit on day one — null handling, concurrency, DDL/DML/streaming SQL, acks (sync vs async, optional vs required), failover behavior and backpressure, connection notifications, mid-stream stream restarts, connect-string clarity, Enterprise connection patterns (TLS + auth + multi-host worked examples, OIDC token acquisition and refresh, explicit "not supported" statements when applicable), exhaustive type coverage on bind-parameter and column-setter surfaces (no "and more" handwaves), the absence of content dependencies on legacy ILP pages scheduled for removal, a consistent capital-markets data model across every QWP client page (no `foo`/`bar` placeholders, no schema drift between languages), and field-level documentation of the diagnostic payload on every error object (status, message stability, affected scope, correlation ID, PII safety). Also enforces agent-retrieval properties: section self-containedness, refutation of plausible-but-nonexistent API calls (`setNull`, `bind_array`, etc.), simple→complex example flow, intra-page vocabulary consistency, working-code minimums (no `// ...` ellipses in first examples), bounded indirection chains, error handling baked into the first example of each section rather than bolted on later, and **capability discoverability above the fold** — when a client documents more than one capability class (e.g. ingestion AND querying after the QWP read path was added), every capability must surface in the frontmatter `description:`, the opening paragraph, AND a first-level `##` heading, never only midway down the page. Legacy ILP (`http::` / `tcp::`) client pages are explicitly out of scope. Requires a PR number as input. +argument-hint: +--- + +# Review client documentation PR + +**Usage:** `/review-client ` — for example, `/review-client 451`. + +The PR number is **required**. If the user invokes the skill with no +argument (e.g. just `/review-client`), do not proceed: ask them which PR to +review and stop. Do **not** infer the PR from the current branch, recent +commits, `gh pr list`, or any other source — the answer must come from the +user. Acceptable forms are a bare number (`451`), a `#`-prefixed form +(`#451`), or a full PR URL; reject anything else and ask again. + +Reviews a pull request that touches **QWP** client documentation against a +fixed checklist of questions an application developer (human or agent) would +need answered before they can ship code. The output is a structured review +the docs author can act on directly. + +## Scope: QWP only + +There are two parallel families of client documentation in this repo: + +- **QWP (in scope).** The new WebSocket transport. Pages render under + `/docs/connect/clients//` (slug `slug: /connect/clients/`) and + document the `ws::` / `wss::` connect-string schemas, the QWP ingress and + egress wire protocols, store-and-forward, durable ACK, multi-host + failover, etc. **These are the only pages this skill reviews.** +- **Legacy ILP (out of scope).** The older HTTP/TCP transport. Pages document + only `http::` / `https::` / `tcp::` / `tcps::` schemas and ILP-specific + buffering. These pages may still live on disk under + `documentation/ingestion/clients/` (and some have been given a + `/connect/clients/...` slug as part of a routing reshuffle without their + content being rewritten). **Skip them.** If the user wants those reviewed, + recommend `/review` instead. + +**How to tell them apart on a per-file basis:** open the file and look for +QWP signals — `ws::` / `wss::` in code blocks, references to +`QwpQueryClient` / `QwpWs*` / `qwp_ws_*` APIs, sections on store-and-forward, +durable ACK, or FSN watermarks, a link to +`/docs/connect/clients/connect-string/`. A page whose only schemas are +`http::` / `https::` / `tcp::` / `tcps::` and whose content centres on ILP +buffering is legacy — exclude it from the review even if it appears in the +PR's file list. + +## When to use + +Trigger when the user runs `/review-client ` or asks to review a QWP +client documentation PR. Examples: + +- `/review-client 451` +- "review the Java QWP client doc PR #451" +- "run review-client on PR 451" + +For generic doc review use `/review` instead. For legacy ILP client pages, +also use `/review`. + +## Inputs + +- **PR number** (required). Resolve via `gh pr view ` to get the + branch, head SHA, and changed files. +- If the user omits the PR number, ask for it. Do not guess from the current + branch. + +## Workflow + +### Step 1: Fetch PR metadata and changed files + +```bash +gh pr view --json number,title,headRefName,baseRefName,headRefOid,files +``` + +Then narrow the file list to **QWP client documentation only**. Candidate +paths to consider: + +- `documentation/ingestion/clients/*.md` — **but** include only files whose + content documents QWP (see the [Scope](#scope-qwp-only) section above for + the QWP-vs-legacy signals). Files that still document `http::` / `tcp::` + ILP only are out of scope, even when their slug now resolves to + `/docs/connect/clients/...`. +- `documentation/connect/**/*.md` — the new Connect section. In scope. +- `documentation/client-configuration/connect-string.md` (and any sibling + `connect-string*` file wherever it lives) — the shared QWP connect-string + reference. **Always in scope** when any QWP client page is changed; read + it even if the PR did not modify it. + +To classify a candidate file fast: read its first ~80 lines. If you see +`ws::` / `wss::` in code blocks, references to `QwpQueryClient` or +`qwp_ws_*` APIs, or a link to the connect-string reference under +`/docs/connect/clients/connect-string/`, treat it as QWP. If the only +schemas it shows are `http::` / `tcp::`, treat it as legacy and skip. + +Be explicit in your output about which files you considered and why each +was included or skipped — the docs author needs to know whether the absence +of a file from the review means "reviewed clean" or "skipped as legacy." + +If the PR changes no QWP client docs, stop and tell the user — recommend +`/review` instead. + +### Step 2: Read each changed client doc in full + +Use the Read tool on each file at the PR's head SHA (check it out, or read +from the working tree if the branch is already checked out). Do not rely on +the diff alone — context outside the diff matters for "is this question +answered anywhere on the page" checks. + +If a page links to a sibling reference (e.g., `connect-string.md`), read +that too. The connect-string page is shared across all client docs; treat it +as in-scope whenever any client page is changed. + +For multi-file PRs, the parent does **not** need to read each in-scope file +end-to-end — the deep reads are delegated to per-file subagents in Step 3. +The parent only needs the first ~80 lines of each candidate to classify it +as QWP vs legacy (Step 1). The shared connect-string reference is the one +exception: the parent should read it once, because every per-file subagent +will need to cite it. + +### Step 3: Run the checklist + +#### Parallelization + +The per-file checklist work is independent across files. **For any PR +touching more than one in-scope QWP file, fan out using the Agent tool**: +spawn one subagent per in-scope file, each running the full checklist +against its assigned file, and have the parent consolidate the per-file +reports into the final review. + +Send the subagents in a single message with multiple Agent tool uses so +they run concurrently. Each subagent's prompt must be self-contained — it +will not see this skill's text or the conversation history. Include: + +- The exact file path to review (absolute path). +- The PR number and head SHA, for citation context. +- The full checklist (items 1–30 from this section, including the + Agent one-shot suitability block) inlined into the prompt. Do **not** + just reference "the skill's checklist" — the subagent cannot resolve + that. Items 23–30 are the agent-fit bar and must be applied with + equal weight to the original 22; do not let the subagent silently + treat them as "nice to have." +- The expected output format: one finding per checklist item, severity + ordered (❌ → ⚠️ → ✅), with the section tag and exact line citations + (per Step 4). The subagent returns the file's section of the review, + ready to drop into the consolidated output. +- The doublecheck requirement from the [Doublecheck](#doublecheck-before-reporting-a-finding) + subsection at the end of Step 3. +- The list of *other* in-scope QWP client files in the PR, so cross-file + checks (item 21 schema drift, item 18 sibling-link verification) can + reference them. The subagent should read those siblings as needed for + comparison, but not produce findings on them — the agent reviewing each + sibling will cover its own. + +The shared `connect-string.md` reference (always in scope when any QWP +client page changes) gets its own subagent, evaluated only against the +checklist items that apply to a reference page (typically 12–14, 18, 19, +the parts of 22 that surface in the error-handling section, and the +agent-fit items 23, 24, 26, 28 — section self-containedness and +inference-trap refutations matter for a reference doc that agents grep +for individual keys, possibly more than for a client doc). + +If the PR only touches one QWP file, run the checklist inline in the +parent — fan-out has no benefit and adds latency. + +After the subagents return, the parent's job is to (a) consolidate +findings into the final output structure per Step 4, (b) write the +end-of-review summary that compares across files (top-three gaps, +cross-cutting themes, actively misleading items), and (c) doublecheck any +cross-file claim it added that no single subagent could have verified +alone. + +#### Verdict definitions + +For each changed client page, evaluate every item below against **two +bars**, and take the lower verdict: + +- **Human-skim bar.** Can a developer reading the page top-down answer + the question? +- **Agent one-shot bar.** If an agent retrieves only the most relevant + section (a single `##`/`###` chunk) into its context, can it generate + working code without hallucinating, without requiring earlier-section + context, and without chasing more than one link hop? This bar is + strictly harder than the human-skim bar. + +Each item gets one of three verdicts (the lower of the two bars +dominates): + +- ✅ **Covered** — passes both bars. Cite the section/line. +- ⚠️ **Partial** — passes one bar but not the other; or touched on but + unclear, buried, or missing an example. Quote the relevant text and + say which bar fails and how — "right answer, wrong section" (agent), + "buried under unrelated heading" (human), "indirection chain" (agent), + "phrasing requires earlier prose to disambiguate" (agent). +- ❌ **Missing** — fails both bars / not addressed. Say so plainly. + +Be specific. "Section X doesn't mention Y" beats "could be clearer." +Reference exact line numbers and quote short snippets when calling out a +gap. When the failure is agent-specific (e.g. an explanation is correct +but lives one section away from where an agent retrieves the code +example), say so explicitly — that is the signal the docs author needs +to choose between moving the explanation or duplicating it. + +#### Ingestion checklist + +1. **Inserting NULL values during ingestion.** Can the reader figure out how + to write a null for a given column without trial and error? Is there an + example? Does it explain whether "omit the column" is equivalent to + "explicit null," and whether that interacts with schema inference? +2. **Multiple concurrent publishers.** Is it clear whether `Sender` (or its + per-language equivalent) is thread-safe? If not, what is the + recommended pattern — one sender per thread, pool, queue+single-writer? + Is there guidance on whether parallel senders writing to the same table + need distinct identities (`sender_id`, store-and-forward slots)? +3. **Easy to execute DDL.** Is there a concrete copy-paste example for + `CREATE TABLE`, `ALTER`, `DROP`, `TRUNCATE` via the query client? Does + the page distinguish DDL response (`onExecDone`, `EXEC_DONE`, + `rowsAffected = 0`) from SELECT response? +4. **Easy to execute DML and stream rows.** Is `SELECT` with a row-by-row + callback shown? Is bind-parameter usage shown with the syntax + (`$1`/`?`/named) the client actually accepts? Is the + "columnar batch vs row view" tradeoff explained, with an example? +5. **Sync vs async acks — are acks optional?** Does the page say plainly + whether the application **must** await acknowledgements before + considering data durable, or whether `flush()` / `close()` is enough? + For async clients (WebSocket), is the error-handler callback shown? + Does it explain what happens to in-flight data if the app exits without + awaiting the ack? +6. **Durable ack vs WAL ack.** Is the distinction between "committed to + local WAL" and "uploaded to object storage" (Enterprise) clear? When + would an app care about `request_durable_ack`? + +#### Failover and resilience checklist + +7. **Ingress failover is bounded.** Does the page say that ingress reconnect + has a budget (`reconnect_max_duration_millis`) and will eventually give + up? Is it clear what the application sees when the budget is exhausted + (terminal exception, callback, etc.)? +8. **Backpressure on the application side.** If the server is unreachable + for a long time, where does buffered data go? Is store-and-forward + explained as the durability story, and the RAM buffer cap explained for + the non-SF case? Does the page tell the app how to detect "I am being + backpressured" so it can stop producing? +9. **Connection-state notifications.** Can the app wire a callback that + fires on `CONNECTED`, `DISCONNECTED`, `RECONNECTED`, `FAILED_OVER`, + `AUTH_FAILED`, `RECONNECT_BUDGET_EXHAUSTED`? Is there a code example? + Does the example show what an app would actually do (log, alert, + redirect traffic)? +10. **Mid-stream query failover — duplicate-data hazard.** Does the page + explain that if a query fails over mid-result, the server replays from + the start of the result set? Does it show the `onFailoverReset` + callback and **warn explicitly** that without wiring this callback the + application will see duplicate rows? This is the single most common + footgun — it must be impossible to miss. **Repetition required**: the + warning (at least a one-line note + link to the dedicated subsection) + must also appear in every other section that mentions failover, + mid-stream behaviour, or query restart — the per-query failover knob + table, the failover event-fields table, the connection-state + observability section. An agent retrieving the knob table alone + without the warning attached will generate buggy code. +11. **Per-query failover bounds.** Are the failover knobs + (`failover_max_attempts`, `failover_backoff_*`, `failover_max_duration_ms`) + listed with defaults? What does the app see if all attempts are + exhausted? + +#### Connect string and config checklist + +12. **Reference to connect-string docs.** Is there at least one link from + the client page to the connect-string reference? Is the link placed + where a reader needing it would actually look (near the first connect + string example, not just in a footer)? +13. **Connect string is easy to assemble.** Can a reader build a working + QWP connect string from scratch? Schema (`ws::` / `wss::`), address + syntax, where to put auth, where to put TLS, separator/terminator + rules. Are common pitfalls called out (trailing `;`, escaping `;` or + `=` in values, multi-address syntax)? Legacy `http::` / `tcp::` need + only a "for legacy ILP transports, see [link]" pointer — do not + require coverage on the QWP page itself. +14. **Environment variable path.** Is `QDB_CLIENT_CONF` (or per-language + equivalent) documented as the credentials-out-of-code path? + +#### Cross-cutting + +15. **Thread safety statement.** Stated in **every section whose first + example creates a handle** (sender, reader, query, cursor) — at + minimum a one-line restatement linking to the dedicated Concurrency + section. The naive "stated once" policy works for a human scanning + top-down but fails agent retrieval, which loads one section at a + time: an agent that fetches "DDL execution" but not "Concurrency" + will generate code that shares a single handle across threads. The + placement test ("a reader looking for *can I share this instance?* + would find it") still applies to the dedicated section. +16. **Error-handling story is end-to-end.** For each error class (auth, + schema, parse, transport, mid-stream), the page should answer: how is + the error surfaced (throw vs callback), what state is the client in + afterward (usable vs must-reset vs must-close), and what should the + app do. +17. **Migration / "what changed from before" notes** if applicable. If this + PR introduces a new transport (e.g., QWP) alongside legacy (e.g., ILP), + is there a side-by-side that a maintainer of existing code can scan? +18. **No content dependencies on legacy ILP pages.** Legacy ILP client + documentation (`documentation/ingestion/clients/{c-and-cpp,dotnet,nodejs,python}.md`, + `documentation/connect/compatibility/ilp/**`, `documentation/ingestion/clients/date-to-timestamp-conversion.md`, + and similar ILP-era support material) is on a deprecation path and + will be removed. Outbound links from a QWP client page to legacy ILP + content are acceptable **only** when framed as a "for legacy ILP, + see X" escape hatch — typically inside a `:::tip Legacy transports` + admonition near the top of the page. Flag as **Missing** any link + that *depends* on a legacy page to explain a concept the QWP reader + needs (e.g., "see the ILP overview for exactly-once delivery + semantics"). The concept must live somewhere that survives ILP + deprecation: the QWP page itself, the connect-string reference, a + transport-agnostic concepts page, or a new QWP-native page. Look + especially for sneaky cases: anchor links into legacy pages + (`/docs/connect/compatibility/ilp/overview/#some-section`) and + references to timestamp-conversion / date-handling support pages + that were authored for ILP. Fix shape suggestion: "move this + explanation onto the QWP page, or root it in a shared concepts page + under `/docs/concepts/`." +19. **Enterprise connection patterns and OIDC.** The page shows at least + one worked example combining TLS (`wss::`), credentials, and + multi-host `addr=...` — the realistic production shape — not just + three separate one-liners. For each Enterprise auth path the client + supports (HTTP basic, bearer token, OIDC, mTLS), there is either + (a) a concrete example showing how an application obtains and + passes the credential, or (b) an explicit one-line statement that + the path is not supported by this client, with a pointer to the + closest alternative. **Silence is not acceptable** — a reader must + not have to grep the page to discover that OIDC token refresh, mTLS + client certificates, or token rotation is unsupported. Special + attention to OIDC: the [OpenID Connect](/docs/security/oidc/) page + documents the server-side flow; the client page must answer "how + does the application acquire a token to pass to the client" and + "what happens when the token expires mid-session — does the client + refresh, does it fail, does it expect the app to register a + callback?" A bare "for OIDC, see the security page" is **not** + coverage — flag as Partial at best. +20. **Bind-parameter type coverage and limitations.** Where the page + documents bind parameters (or the per-language equivalent), it + enumerates **all** supported bind types — not a sample ending in + "and more" or "…". For every QuestDB column type a reader might + expect to bind (BOOLEAN, BYTE, SHORT, CHAR, INT, IPv4, LONG, FLOAT, + DOUBLE, TIMESTAMP, timestamp_ns, DATE, SYMBOL, VARCHAR, BINARY, + UUID, LONG256, DECIMAL64/128/256, GEOHASH, DOUBLE[]/ARRAY), the + page either (a) shows the setter / API and the type code, or + (b) lists the type explicitly under "unsupported as bind parameter" + with a one-line rationale (e.g., "ARRAY: bind ARGS frames don't + carry array shape; use SQL array literals instead"). Verdict + ladder: complete enumeration → Covered; sample-and-handwave ("and + more", "…", "see source") → ⚠️ Partial; no list at all → ❌ + Missing. The same principle — enumerate or call out as unsupported + — applies wherever the page documents a type-keyed surface + (ingestion column setters, result-batch accessors). The + bind-parameter table is the most common place coverage drifts + because the API is younger than the type system. +21. **Consistent capital-markets data model across clients.** Every code + example uses a capital-markets domain (trades, quotes, order books, + FX, market data). **Reject** generic placeholders — `foo`, `bar`, + `baz`, `my_table`, `t1`, `Example`, `Test`. The placeholder pattern + is a tell that the example was written in isolation and was never + cross-read against sibling client pages. Beyond the per-page check, + examples must be **consistent across the full set of QWP client + pages**: same table names, same column names, same column types, + same symbol values. When the PR ships one client page and the other + QWP client pages already exist, compare schemas — flag every + inconsistency the reader would hit when porting between languages: + + | Class of drift | Examples | + |---|---| + | Table name | `trades` vs `Trades` vs `market_trades` | + | Column name | `qty` vs `quantity` vs `amount`; `symbol` vs `sym` vs `instrument` | + | Column type | `LONG` vs `DOUBLE` for size; `SYMBOL` vs `VARCHAR` for ticker | + | Symbol value | `EURUSD` vs `EUR/USD` vs `EUR-USD`; `ETH-USD` vs `ETHUSD` | + | Timestamp precision | microseconds vs nanoseconds for the same notional event | + + Verdict ladder: domain-correct, placeholder-free, schema matches + every other QWP client page → ✅ Covered; domain-correct but + schema drifts from siblings → ⚠️ Partial (cite the specific + drift); generic placeholders or non-capital-markets domain + (sensors, IoT, logs) → ❌ Missing. Fix shape: pick the schema + used by the page with the most polished example and align the + others, or call out one canonical schema in this skill / a README + under `documentation/ingestion/clients/` so future client docs land + on it without negotiation. +22. **Diagnostic information on the error object/event.** Item 16 + enumerates the error categories and the surfacing / recovery + model. This item demands the next level of detail: **what + structured information is on the error and how user code reads + it**, so a real production handler can log, alert, debug, and + correlate with server-side state. + + For every error path the client exposes, the page documents: + - **Server message text** — which field or parameter carries it + (`SenderError.getServerMessage()`, the `message` parameter on + `onError`, `QwpWsSenderError.message`, etc.), whether it is + stable enough to pattern-match on, localized vs English, and + whether it is capped in length. + - **Status code** — both numeric (e.g. `0x05`) and named (e.g. + `PARSE_ERROR`), and how user code reads each. + - **Affected scope** — table name on ingest errors, FSN range + (`from_fsn`/`to_fsn`) or batch identifier on async ingest + rejections, failing SQL / bind index on query parse errors, + query ID on mid-stream query failures. + - **Server correlation / request ID** for support tickets, if the + protocol carries one; otherwise an explicit statement that no + such ID is surfaced. + - **PII / secret safety** — whether the message text is safe to + forward to end-user UIs or third-party error trackers, or + whether the application must sanitise first. + + Verdict ladder: every bullet covered on every error path → ✅ + Covered; primary fields named but stability / PII / correlation + silent → ⚠️ Partial; only "the message is human-readable text" + with no field-by-field guidance → ❌ Missing. The fix shape is + almost always a small table next to the error-handling code + example listing the fields, their types, and one-line guidance per + field — much more readable than burying these properties in + prose. + + **Co-location requirement (agent one-shot bar).** Every error path's + diagnostic table must live in the **same** `##`/`###` section as + that error path's code example. A correct table located one section + away from its example fails this item under the agent bar: an agent + retrieving the code-example section gets no field-level guidance + and will fabricate field names; an agent retrieving the table + section gets no concrete invocation and will fabricate the + surrounding code. Duplicate the table (or the example) into both + sections when one error surface is referenced from multiple places. + +#### Agent one-shot suitability checklist + +The previous 22 items test what the page *says*. The next seven items +test whether the page is *retrievable in one shot* — whether an agent +that loads a single section into context can ship working code from it. +The bar is strictly harder than the human-skim bar: a page where every +fact is true but every fact requires three hops to assemble is human- +adequate and agent-broken. + +23. **Section self-containedness.** Each `##` and `###` section must + name (or link to a single ≤ 1-hop reference for) the concrete API + call, the connect string used in its example, the language-specific + import / `#include`, and the next step the reader takes. An agent + that retrieves only that section into context must be able to + compile the example without inheriting setup from an earlier + paragraph. Test: read each section in isolation — if the first code + block references a `reader` variable that was constructed three + sections earlier and nowhere referenced in the current section, the + section fails the bar. Verdict ladder: every section closes its own + setup → ✅; one or two sections borrow setup from a labelled "Quick + start" but link to it explicitly → ⚠️ Partial; most sections assume + inherited state → ❌ Missing. Fix shape: prepend a one-line "you + have a `reader` constructed as in [Quick start](#quick-start)" + pointer, or duplicate a 2-line setup snippet. + +24. **Inference-trap explicitness.** The high-confidence wrong guesses + an LLM makes about *this* API must be explicitly refuted somewhere + on the page — ideally in the section where the wrong guess would be + written. Enumerate the plausible non-existent API calls and check + each: + - `setNull(name)` / `set_null(col)` when nulls are actually written + by omitting the setter. + - `bind_array(...)` / `setArray(...)` when arrays are not supported + as bind parameters. + - `setInterval(...)` when `INTERVAL` has no bind setter. + - `request_durable_ack` on the reader when it is sender-only. + - `connectionListener` / `onConnect` / `onDisconnect` callbacks + when no structured connection-state callback exists. + - `setUuid(string)` when only the 16-byte form exists. + - `auto_flush_rows` / `auto_flush_bytes` for the WebSocket sender + when auto-flush is rejected. + - Any sibling-language API name that doesn't exist in this + language's client (`flushAndKeep` vs `flush_and_keep`, + `await_acked_fsn` vs `awaitAckedFsn`, etc.). + + Verdict ladder: every plausible wrong guess refuted in-section (or + in a dedicated "Unsupported" table that the in-section text links + to) → ✅; some refuted → ⚠️ Partial; only positive knowledge with + no negative knowledge → ❌ Missing. Fix shape: an "Unsupported" + row in the relevant setter / column-getter / config-key table, or + a one-line refutation in the section that mentions the closest + supported alternative. + +25. **Information flow simple → complex.** Walk the code examples in + document order. The first runnable example uses the simplest + connect string (`ws::addr=localhost:9000;`), no auth, no TLS, no + failover, and the smallest possible payload. Each subsequent + example introduces at most one new concept (auth, then TLS, then + multi-host, then failover, then SF, etc.) — never two at once. + Production-shape examples (multi-host + token + TLS + failover + + durable ACK) appear at the **end**, not the beginning. Verdict + ladder: monotonic complexity from top to bottom → ✅; one + section inverts the order (e.g. the first example shows TLS but + a later section shows the bare connect string) → ⚠️ Partial; + the page opens with the production example → ❌ Missing. Fix + shape: reorder examples, or split a too-complex first example + into a minimal version followed by a "production shape" version. + +26. **Vocabulary consistency within the page.** The same operation must + be referred to by the same name everywhere. The same identifier + must be spelled identically in code and prose. If the page says + "the cursor" in section X, "the result handle" in section Y, and + "the row stream" in a table caption, an agent will treat them as + three distinct objects. Extends item 21 (cross-page schema + consistency) inward: this is the intra-page equivalent. Verdict + ladder: identifiers and operation names match across the page → ✅; + one or two drifts the agent could plausibly disambiguate from + context (e.g. "callback" vs "handler" used interchangeably) → ⚠️ + Partial; multiple drifts at the same surface (different setter + names referenced in code vs prose, different connect-string-key + capitalisation) → ❌ Missing. Fix shape: pick one name per concept + and grep-replace across the page. + +27. **Working-code minimum (no ellipses in the first example).** The + first code example in each section must be complete: explicit + setup, explicit error handling (see item 29), explicit cleanup. No + `// ...` placeholders, no `// process row...`, no `// handle + error...`. Subsequent examples in the same section may abbreviate + once the pattern is established. The bar is "an agent pasting this + snippet verbatim gets a compiling program" — not "an agent + paraphrasing this snippet gets the gist." Verdict ladder: every + first-in-section example is complete → ✅; some have ellipses but + the missing pieces are trivially inferable (`// imports omitted`) + → ⚠️ Partial; load-bearing logic is replaced by `// ...` → ❌ + Missing. Fix shape: inline the omitted lines. + +28. **Indirection-chain cap (≤ 1 hop).** From the section where a + question is naturally asked, the answer must be reachable in at + most one link click. A "see X" pointer to a section that itself + says "see Y" is a two-hop chain and fails. Item 12 requires *a* + link to the connect-string reference; this item caps how often the + page resolves a question by indirection at all. Note that the + "links to a sibling page count as Covered" allowance in Style + guidance applies only to the human-skim bar; under the agent bar, + every indirection is consumed against this cap, and a section + answering a question by sending the agent on a two-hop chase fails. + Verdict ladder: every concept reachable in ≤ 1 hop → ✅; one or + two two-hop chains for non-load-bearing detail → ⚠️ Partial; + load-bearing concepts (null handling, thread safety, failover + callback) require two or more hops → ❌ Missing. Fix shape: inline + the destination content, or restructure so the question is asked + in the section that already contains the answer. + +29. **Failure mode in the golden path, not bolted on later.** The first + runnable example in each section must include the error path — not + a happy-path `try { ... } catch { print }` placeholder, but the + real shape of error handling the section's API requires (the C + `err_out` parameter check + `goto on_error` cleanup; the C++ + `catch (const line_reader_error&)` with `e.code()` dispatch; the + sender error-handler callback registration; the async error poll). + Agents prompted "write the function" overwhelmingly copy the + *first* example's shape — if the first example skips error + handling, the generated app will skip it too. Item 16 covers + whether error handling is *documented*; this item covers whether + it is *demonstrated in the example agents will copy*. Verdict + ladder: every first-in-section example demonstrates the real + error path → ✅; some examples bolt error handling on as a later + snippet → ⚠️ Partial; the page's error-handling section is the + only place error handling appears → ❌ Missing. Fix shape: rewrite + the first example to include the error path, or move the + error-handling section ahead of the API examples and reference it + from each. + +30. **Capability surface visible above the fold.** Pages that document a + client with more than one capability class — ingestion **and** + querying, read **and** write, sync **and** async, blocking **and** + streaming — must surface every capability in all three of these + places: + + - the frontmatter `description:` field (this is what search, + sidebar tooltips, and AI retrieval index for landing decisions); + - the page's opening paragraph (the first text a reader sees after + the title); + - a first-level `##` heading on the page (not buried under `###`). + + A reader landing on the page from a "how do I query QuestDB from + ``" query must learn within the first screen of the page + — without scrolling past hundreds of lines of ingestion content — + that the client does queries at all. An agent that retrieves only + the page's opening chunk into context must learn what the client + can do from that chunk alone; if the second capability surfaces + only midway down the page, the retrieval misses it and the agent + will either fabricate a non-existent separate client or send the + user to the wrong page entirely (PGWire / REST). The most common + failure pattern is a page that was originally ingestion-only and + gained query support in a later PR — frontmatter, opening + paragraph, and any top-of-page `:::info` / `:::tip` admonition all + need to be rewritten when the new capability lands, not just a new + `##` section appended at the bottom. + + Verdict ladder: every capability mentioned in frontmatter + description AND opening paragraph AND a first-level heading → ✅ + Covered; one of those three surfaces omits a capability → ⚠️ + Partial (cite which surface and which capability); a capability + appears only deep in the page → ❌ Missing. Fix shape: rewrite the + frontmatter description to enumerate every capability class + explicitly (e.g. "client for high-throughput **ingestion** and SQL + **query execution**"), rewrite the opening paragraph as "two + complementary APIs live in the same library: ... and ...", promote + the new capability to its own `##` heading at the same depth as + the existing ones, and update any top-of-page admonitions that + redirect readers elsewhere ("for querying see PGWire") to reflect + the new capability. + +#### Doublecheck before reporting a finding + +Every ❌ Missing and ⚠️ Partial verdict must be verified by re-reading +the cited lines before it goes into the output. **False findings damage +the review more than missed ones** — they make the docs author waste +time chasing a non-issue and erode trust in the rest of the report. The +checklist is long and the same words ("flush", "error", "thread") recur +across sections; it is easy to write a finding from memory and miss the +paragraph two sections down that already covers it. + +Before finalizing each ❌ / ⚠️ finding, do the following — and only then +write it into the output: + +- **Re-read the cited line range.** Read the exact range you plan to + cite, plus enough surrounding lines (5–10 above and below) to confirm + the gap is real and the cited text is what you think it is. +- **Search the whole page for the missing concept.** A finding of the + form "the page never mentions X" is invalid if X appears under a + different heading. Grep / scan for the relevant identifier (`setNull`, + `onFailoverReset`, `sender_id`, `OIDC`, `backpressure`, etc.) across + the file before claiming absence. +- **Confirm section / header attributions.** A finding like "the + thread-safety statement is buried under 'Parallel queries'" is wrong + if the statement is actually under "Concurrency"; verify the section + heading that contains the cited line. +- **Verify both sides of cross-file claims.** For findings comparing + across files (schema drift in item 21, link-target validity in item + 18, "the Rust page does this correctly" comparisons), re-read both + files at the cited locations before reporting. A misattributed + comparison undermines the whole cross-file argument. +- **Confirm exact quotes.** If the finding quotes the page (e.g., "ends + literally with 'and more'"), re-confirm the quote is verbatim and + appears at the cited line — paraphrased "quotes" are a common + failure mode. +- **Simulate one-section retrieval for agent-fit findings (items 23-29 + and the agent-bar half of any other item).** Before flagging an + agent-fit gap, mentally load just the cited `##`/`###` section into a + fresh context window and ask: "Can I generate the code from this + alone?" If the answer is yes, the section is self-contained; the + finding is invalid. If the answer requires earlier-section state, an + identifier introduced elsewhere, or a two-hop link chase, the gap is + real — and the finding should name which of those three causes it. + +✅ Covered findings can be lighter-touch: a citation that points to the +right region is sufficient verification. The skew toward verifying +negative findings is intentional — a false ❌ wastes more author +attention than a false ✅. + +When the per-file work is fanned out to subagents (see +[Parallelization](#parallelization)), each subagent is responsible for +doublechecking its own findings before returning. The parent must +additionally doublecheck any cross-file claim it adds to the end-of-review +summary — those claims didn't exist in any single subagent's output and +therefore weren't verified upstream. + +### Step 4: Produce the review + +Format the output as one section per changed file. **Within each file, +order findings by severity, worst first** — ❌ Missing, then ⚠️ Partial, +then ✅ Covered at the bottom. This is the load-bearing rule of the output +format: human readers scan top-down looking for action items, and a doc +author should be able to stop reading as soon as the ❌/⚠️ blocks end. + +Do **not** group by checklist section (Ingestion / Failover / Connect +string / Cross-cutting / Agent one-shot). Instead, tag each finding +with its section in parentheses after the title — `(Ingestion)`, +`(Failover)`, `(Connect string)`, `(Cross-cutting)`, `(Agent one-shot)` +— so the author still knows which category an item belongs to without +losing the severity ordering. For items where the verdict is the lower +of two bars (e.g. human-Covered but agent-Partial), tag the failing +bar in the finding body: *"Partial under the agent bar: the table at +lines X-Y answers the question but lives in a separate section from +the code example at lines A-B that an agent would retrieve first."* + +Within a severity bucket, order by impact (the gap a reader would hit +first or hardest comes first). When in doubt, follow the checklist's own +ordering as a tiebreaker. + +The ✅ Covered block at the bottom may be terser than the ❌/⚠️ blocks +above it — one-line confirmations with citation are fine. The point of +keeping Covered findings in the output at all is to let the author see +that the item *was* checked and reassure them no follow-up is needed; it +is not to re-justify the verdict. + +Use this structure: + +```markdown +## documentation/ingestion/clients/.md + +- ❌ **Missing — inserting NULL values (Ingestion).** The column-method + list (lines 245-256) shows typed setters but never says how to write + null. No example. Recommend adding either an explicit `setNull(name)` + example or a one-liner stating that omitted columns are stored as null. +- ❌ **Missing — duplicate-data hazard on mid-stream failover (Failover).** + The `onFailoverReset` callback is mentioned (lines 784-790) but the page + does not say *what happens if you don't wire it*. Add an explicit + warning: "Without an onFailoverReset handler that clears accumulated + results, the application will observe duplicate rows after a mid-stream + reconnect." +- ⚠️ **Partial — multiple publishers (Ingestion).** Line 845 states + `Sender` is not thread-safe, but the statement is under "Parallel + queries" where a reader looking for ingestion guidance would not look. + Move or duplicate under "Data ingestion." +- ⚠️ **Partial — OIDC (Cross-cutting).** Line 172 is a bare "see the + security page" pointer; the client page must answer how the app + acquires the token and what happens on expiry. +- ⚠️ **Partial under agent bar — thread safety in DDL section + (Cross-cutting, item 15).** The CREATE TABLE example at lines + 559-582 never restates the thread-safety contract. An agent + retrieving only the DDL section will not learn that `Sender` is + single-threaded. Add a one-line note linking to the Concurrency + section at the top of the DDL section. +- ⚠️ **Partial under agent bar — inference trap (Agent one-shot, + item 24).** The Null values section at lines 441-477 shows the + omit-the-setter pattern but never refutes `setNull(name)` / + `set_null(col)`. An agent porting from PG / JDBC will fabricate + the call. Add one row to the "unsupported" table or a one-line + refutation in the Null values section. +- ✅ **Covered — DDL (Ingestion).** Lines 559-582 show CREATE TABLE with + `onExecDone`. +- ✅ **Covered — thread safety statement (Cross-cutting).** Stated at + l.236-244 next to the ingestion code. +- ✅ **Covered — connect-string reference link (Connect string).** l.198. +``` + +End with a short summary: total counts (Covered / Partial / Missing), the +top three highest-impact gaps, and any items where the doc actively +misleads the reader (call these out separately — they are worse than gaps). + +### Step 5: Offer to file the gaps + +After printing the review, ask whether to: + +- Post the review as a PR comment (`gh pr comment --body-file `). +- Draft inline edits for the highest-impact gaps. +- Stop here. + +Do not post the review without confirmation. + +## Style guidance for the review itself + +- Quote short snippets and cite line numbers (`file.md:245-256`). Vague + reviews are unactionable. +- For each gap, suggest the **shape** of the fix (one example, one warning + block, one paragraph move) — not the full prose. The doc author will + write the prose. +- Use ✅ / ⚠️ / ❌ markers so the author can scan. (Skill output is the + only place in this repo where emojis are appropriate, since the user + asked for a review tool.) +- Do not flag items that are correctly out of scope for the page (e.g., + don't ask the Java page to document the Python client's null handling). +- If the page links to a sibling page that fully answers an item, mark it + Covered **on the human-skim bar** with the link as the citation. Do + not require every page to be self-contained on the human bar. + **Exception:** links into legacy ILP material do not count as + coverage on either bar — see checklist item 18. A QWP page that + "covers" null handling by linking to the ILP overview is not + covered; it has a content dependency on a page scheduled for + removal. +- **Indirection charged against the agent bar.** A one-hop link to a + sibling page that answers the question still passes the agent bar + (item 28 allows ≤ 1 hop), but a two-hop chain ("see the + connect-string reference, which says: see the failover concepts + page") fails the agent bar even if the human-skim verdict is + Covered. In those cases mark the item ⚠️ Partial and call out the + agent-bar failure explicitly. + +## What this skill is not + +- Not a generic doc reviewer (`/review` for that). +- Not a copy-editor (no typo / wording polish). +- Not a security review (`/security-review` for that). +- Not a build/link checker — assume `yarn build` is run separately. diff --git a/ONBOARDING.md b/ONBOARDING.md new file mode 100644 index 000000000..b07b0bfd7 --- /dev/null +++ b/ONBOARDING.md @@ -0,0 +1,181 @@ +# QWP Documentation Project — Onboarding + +A coordinated three-person effort to document QuestDB's new wire protocols (QWP ingress + egress), client failover, and store-and-forward. This is your starting point. + +## The project in one paragraph + +We're shipping documentation for: a new public ingress wire protocol (QWP, with a WebSocket and a UDP variant), a new public egress wire protocol (QWP query result streaming), a comprehensive client failover system, and a store-and-forward client substrate. The specs in `questdb-enterprise/questdb/docs/qwp/` are the source of truth. The reference client is `java-questdb-client`. We document **Java-only on day one** — other languages follow later. + +## Setup + +Clone these three repos as siblings (parent directory doesn't matter, but they share one): + +``` +parent/ +├── documentation/ ← this repo, where docs land +├── questdb-enterprise/ ← spec source: docs/qwp/*.md +└── java-questdb-client/ ← reference implementation +``` + +You need access to the enterprise repo — ping in your team channel if you don't have it. + +Local dev server (from `documentation/`): + +``` +yarn install +yarn start # http://localhost:3001 +``` + +See `CLAUDE.md` in this repo for Docusaurus conventions, admonition syntax, custom components, and the railroad-diagram workflow. + +## Bundle assignments (proposed — swap if needed) + +| Bundle | Person | Scope | Files (exclusive ownership) | +|---|---|---|---| +| **A — Wire Protocols** | **Javier** | 4 new pages: Overview, QWP Ingress (WS), QWP Ingress (UDP), QWP Egress (WS). Audience is third-party client implementers. | `documentation/protocols/**` (all new) | +| **B — Client Configuration + central wiring** | **Vlad** | New top-level connect-string reference, 3 patches to existing pages, sole owner of `sidebars.js`. | `documentation/client-configuration/**`, the 3 patch files below, `documentation/sidebars.js` | +| **C — Client Reliability** | **Imre** | 6 new pages: 2 client failover, 4 store-and-forward. Lives under the Connect section (cross-linked from the existing High Availability section for server-side context). | `documentation/ingestion/clients/failover/**`, `documentation/ingestion/clients/store-and-forward/**` | + +Bundle B's three patch files: +- `documentation/ingestion/ilp/overview.md` — shorten "Multiple URLs for HA" → link to Bundle C +- `documentation/ingestion/clients/java.md` — shorten "Configuring multiple URLs" → link to Bundle C +- `documentation/ingestion/clients/configuration-string.md` — redirect to new location + +## Don't-trip-over-each-other rules + +1. **`sidebars.js` is single-writer.** Only Bundle B edits it. A and C: send your entries in PR descriptions; B commits them in one go. +2. **The connect-string page is single-writer.** Only B edits `documentation/client-configuration/connect-string.md`. C delivers SF / failover / reconnect key documentation as draft markdown snippets to B for inclusion. +3. **Day 1 — B lands the skeleton first.** Empty connect-string page with stable anchor IDs (`#auth`, `#tls`, `#failover-keys`, `#sf-keys`, `#reconnect-keys`, `#egress-flow`) + 4 Protocols stub pages + `sidebars.js` entries. Until this lands, A and C should not commit new pages — internal links would 404. +4. **File scopes are hard.** No bundle edits files outside its scope. Disputed patches belong to B. +5. **B's patches land last.** They replace shallow content with links into C's new pages, so they wait until C's pages are live. + +## Source specs + +Located in `questdb-enterprise/questdb/docs/qwp/`. These are normative — if a doc page contradicts the spec, the spec wins. + +| Spec file | Used by | +|---|---| +| `wire-ingress.md` | A (Ingress WS page) | +| `wire-egress.md` | A (Egress WS page) | +| `wire-udp.md` | A (Ingress UDP page) | +| `failover.md` | C (failover pages), B (failover keys section) | +| `sf-client.md` | C (SF pages), B (SF + reconnect keys sections) | +| `README.md` | A (Overview page), all (audience matrix) | + +Reference implementation paths in `java-questdb-client/`: +- `core/src/main/java/io/questdb/client/cutlass/qwp/` — QWP client +- `core/src/main/java/io/questdb/client/cutlass/qwp/client/sf/` — store-and-forward +- `core/src/main/java/io/questdb/client/impl/ConfStringParser.java` — canonical list of connect-string options +- `core/src/main/java/io/questdb/client/Sender.java` — public builder API + +## Using Claude Code on this project + +### Start a session + +From the `documentation/` clone: + +``` +claude +``` + +`CLAUDE.md` is loaded automatically — Claude already knows about Docusaurus conventions and dev commands. + +### High-value patterns for this work + +**Hand the spec to Claude — don't paraphrase.** +``` +Read ../questdb-enterprise/questdb/docs/qwp/wire-ingress.md. +We'll write documentation/protocols/qwp-ingress-websocket.md from it. +Audience: third-party client implementers. +``` + +**Use plan mode for any new page.** Press the plan-mode shortcut (or type `/plan`) before drafting so you can review structure and approach before content is written. + +**Delegate broad searches to subagents.** "Where is the existing failover documentation in this repo?" — Claude will spawn an Explore subagent instead of grepping in the foreground. + +**Cross-check against the reference impl.** When documenting an option: +``` +Before I write up reconnect_max_duration_millis, check +ConfStringParser.java in ../java-questdb-client for the actual default +and behavior. +``` + +**Run `/review` on your branch** before opening a PR. + +### Project-specific tips + +- Spec paths are relative to `documentation/`. Tell Claude they're sibling clones: `../questdb-enterprise/questdb/docs/qwp/...`. +- Docusaurus admonitions (`:::note`, `:::tip`, `:::warning`), code fences with `questdb-sql` for syntax highlighting, custom `` / `` — all covered in `CLAUDE.md`. +- For grammar railroad diagrams in protocol pages, see the `scripts/railroad.py` workflow in `CLAUDE.md`. +- **Java-only callout** belongs at the top of every failover and SF page: + > Client-side support is currently available in the Java client. Additional language clients are on the roadmap. +- Always run `yarn build` locally before opening a PR — it catches broken internal links. + +### First-prompt templates + +**Javier — Bundle A (Wire Protocols):** +``` +I'm documenting the QWP wire protocols for third-party client implementers. + +Read ../questdb-enterprise/questdb/docs/qwp/README.md for the audience matrix, +then ../questdb-enterprise/questdb/docs/qwp/wire-ingress.md. + +Help me draft documentation/protocols/qwp-ingress-websocket.md. +Audience: someone writing a non-Java client from scratch. They need framing, +type codes, schema/null encoding, close/error codes, versioning, and a +pointer to the reference impl (java-questdb-client at a pinned commit). + +Use plan mode first. +``` + +**Vlad — Bundle B (Client Configuration + central wiring):** +``` +I'm promoting documentation/ingestion/clients/configuration-string.md to a +new top-level "Client Configuration" section. The same connect-string now +drives ILP, QWP ingress, QWP egress, failover, and store-and-forward. + +Read the existing page, then ../java-questdb-client/core/src/main/java/io/ +questdb/client/impl/ConfStringParser.java for the canonical option list. + +Today's goal is a skeleton with stable anchor IDs (#auth, #tls, +#failover-keys, #sf-keys, #reconnect-keys, #egress-flow) so my +collaborators can deep-link while I flesh out the body. Also add the new +top-level entry in sidebars.js and 4 stub pages under documentation/ +protocols/ (Overview, Ingress WS, Ingress UDP, Egress WS). + +Use plan mode first. +``` + +**Imre — Bundle C (Client Reliability):** +``` +I'm writing client-side reliability documentation under the Connect +section. The files live under documentation/ingestion/clients/ in two +sub-folders: failover/ and store-and-forward/. + +Read ../questdb-enterprise/questdb/docs/qwp/failover.md and +../questdb-enterprise/questdb/docs/qwp/sf-client.md. + +Six pages to write: +- ingestion/clients/failover/concepts.md +- ingestion/clients/failover/configuration.md +- ingestion/clients/store-and-forward/concepts.md +- ingestion/clients/store-and-forward/when-to-use.md +- ingestion/clients/store-and-forward/operating.md +- ingestion/clients/store-and-forward/configuration.md + +Start with the failover concepts page. Audience is end users on QuestDB +Enterprise. Java-only callout at the top of every page. Cross-link to +the existing High Availability section for server-side HA context. + +Use plan mode first. +``` + +## When you're stuck + +- **Spec ambiguity** — ask the spec author before improvising. Specs are normative. +- **Cross-bundle question** — post in the project channel. Don't solve it by editing someone else's files. +- **Claude Code question** — type `/help` in a session. + +--- + +Good luck. The structure is designed so each bundle can drive to PR independently after Day 1. diff --git a/documentation/architecture/storage-engine.md b/documentation/architecture/storage-engine.md index ada358aef..9f7fb2c57 100644 --- a/documentation/architecture/storage-engine.md +++ b/documentation/architecture/storage-engine.md @@ -50,7 +50,7 @@ to optimize writes in the event of out-of-order data or when updating sampling i ### Tier Three: Parquet, Locally or in an Object Store Older partitions (any partition other than the most recent one) can be converted to -[Parquet](/docs/query/export-parquet) for both interoperability and compression ratio. +[Parquet](/docs/connect/compatibility/export-parquet) for both interoperability and compression ratio. Partitions in Parquet format remain fully available for queries. Users don't need to know whether a partition is in QuestDB binary format or Parquet format. All the data types available in QuestDB can be converted to Parquet. diff --git a/documentation/changelog.mdx b/documentation/changelog.mdx index 6c9e379c6..293d3cd13 100644 --- a/documentation/changelog.mdx +++ b/documentation/changelog.mdx @@ -26,6 +26,7 @@ This page tracks significant updates to the QuestDB documentation. ### Updated +- [Go client](/docs/connect/clients/go/) - Rewrote for the QWP binary protocol: ingestion, the QwpQueryClient query API, store-and-forward, and failover - [Aggregation functions](/docs/query/functions/aggregation/) - Added demo tags and updated examples with runnable queries - [LATEST ON](/docs/query/sql/latest-on/) - Added demo tags to examples - [JOIN](/docs/query/sql/join/) - Updated examples to use demo data @@ -51,7 +52,7 @@ This page tracks significant updates to the QuestDB documentation. - [UNNEST](/docs/query/sql/unnest/) - SQL reference for unnesting arrays into rows - [LATERAL JOIN](/docs/query/sql/lateral-join/) - SQL reference for lateral subqueries - [Sparkline and bar visualization functions](/docs/query/functions/visualization/) - Text-based chart functions for terminal and console output -- [StructArrayExplode transform](/docs/ingestion/message-brokers/kafka/) - Kafka SMT for exploding struct arrays +- [StructArrayExplode transform](/docs/connect/message-brokers/kafka/) - Kafka SMT for exploding struct arrays ### Reference @@ -65,13 +66,13 @@ This page tracks significant updates to the QuestDB documentation. - [SQL reference pages](/docs/query/sql/select/) - Replaced railroad diagrams with code-based syntax blocks across 65 SQL pages, with updated examples - [SAMPLE BY](/docs/query/sql/sample-by/) - Updated timezone bucket alignment behavior -- [Parquet export](/docs/query/export-parquet/) - Added partitioning options for exports +- [Parquet export](/docs/connect/compatibility/export-parquet/) - Added partitioning options for exports - [WINDOW JOIN](/docs/query/sql/window-join/) - Documented dynamic window boundaries - [HORIZON JOIN](/docs/query/sql/horizon-join/) - Documented multi-RHS table support - [JOIN](/docs/query/sql/join/) - Improved join documentation and reorganized page - [Web Console](/docs/getting-started/web-console/overview/) - Added table details, updated screenshots - [TTL](/docs/concepts/ttl/) - Fixed removal syntax and general improvements -- [REST API `/exp` endpoint](/docs/query/rest-api/) - Documented timeout parameter, removed outdated warning +- [REST API `/exp` endpoint](/docs/connect/compatibility/rest-api/) - Documented timeout parameter, removed outdated warning ## March 2026 @@ -97,7 +98,7 @@ This page tracks significant updates to the QuestDB documentation. - [Cookbook](/docs/cookbook/) - Refreshed recipes with lookback patterns, named windows, and updated schema references - [Per-column Parquet encoding and compression](/docs/query/sql/alter-table-alter-column-set-parquet/) - Comprehensive documentation for column-level settings - [Database replication](/docs/configuration/database-replication/) - GCP NFS transport and tuning updates -- [Ingestion benchmarks](/docs/ingestion/overview/) - Updated benchmark image to Q1 2026 +- [Ingestion benchmarks](/docs/connect/overview/) - Updated benchmark image to Q1 2026 ## February 2026 @@ -124,7 +125,7 @@ This page tracks significant updates to the QuestDB documentation. ### Updated - [Named windows](/docs/query/functions/window-functions/overview/) - Support for reusable `WINDOW` clause definitions -- [Parquet export](/docs/query/export-parquet/) - Fixed compression defaults, restructured page, updated `read_parquet` types +- [Parquet export](/docs/connect/compatibility/export-parquet/) - Fixed compression defaults, restructured page, updated `read_parquet` types - [Backup](/docs/operations/backup/) - Documented `backup.schedule.cron` format, improved scheduler visibility - [Date/time functions](/docs/query/functions/date-time/) - Page updates and corrections - [Window functions](/docs/query/functions/window-functions/overview/) - Added limitation documentation @@ -206,7 +207,7 @@ This page tracks significant updates to the QuestDB documentation. ### Updated -- [Parquet export](/docs/query/export-parquet/) - Complete documentation for exporting data to Parquet format +- [Parquet export](/docs/connect/compatibility/export-parquet/) - Complete documentation for exporting data to Parquet format - [Shared pool configuration](/docs/configuration/shared-workers/) - Updated with network, query, and write shared pool options - [SQL hints](/docs/query/sql/asof-join/#choose-the-optimal-algorithm-with-an-sql-hint) - Rewritten section on temporal join hints @@ -218,7 +219,7 @@ This page tracks significant updates to the QuestDB documentation. ### New -- [PGWire for C/C++](/docs/query/pgwire/c-and-cpp/) - Guide for C/C++ applications using PostgreSQL wire protocol +- [PGWire for C/C++](/docs/connect/compatibility/pgwire/c-and-cpp/) - Guide for C/C++ applications using PostgreSQL wire protocol - [Table and column naming rules](/docs/query/sql/create-table/#table-name) - Guidelines for valid identifiers ### Reference @@ -236,7 +237,7 @@ This page tracks significant updates to the QuestDB documentation. ### Updated - [Partitioning](/docs/concepts/partitions/) - Improved formatting and explanations -- [Go client examples](/docs/ingestion/clients/go/) - Updated to v4 API +- [Go client examples](/docs/connect/clients/go/) - Updated to v4 API ## August 2025 @@ -270,7 +271,7 @@ This page tracks significant updates to the QuestDB documentation. ### Updated -- [ILP clients](/docs/ingestion/overview/) - Array support added to Python, C++, Rust, Java, and .NET clients +- [ILP clients](/docs/connect/overview/) - Array support added to Python, C++, Rust, Java, and .NET clients - [WAL metrics](/docs/operations/monitoring-alerting/) - Added metrics for detecting WAL apply lag ## Earlier updates diff --git a/documentation/client-configuration/connect-string.md b/documentation/client-configuration/connect-string.md new file mode 100644 index 000000000..50aa8c7a7 --- /dev/null +++ b/documentation/client-configuration/connect-string.md @@ -0,0 +1,724 @@ +--- +slug: /connect/clients/connect-string +title: Connect string reference +description: + Configuration knobs for QuestDB native clients (QWP over WebSocket). + Drives ingress, egress, multi-host failover, and store-and-forward. +--- + +The QuestDB native client is configured with a single connect string. The +same string format drives QWP (QuestDB Wire Protocol) ingress, QWP egress, +multi-host failover, and the store-and-forward substrate. Per-language +clients accept the same options under the same names, so configuration is +portable across implementations. + +A `ws::` / `wss::` connect string is a single input shared by both the +ingress sender and the egress query client. Each client reads the keys +relevant to its direction and **silently ignores the rest** — a key meant +for the other direction is accepted and skipped, never rejected — so one +connect string configures both without edits. The *Applies to:* tag on each +section below marks which direction a key affects. + +For legacy InfluxDB Line Protocol (ILP) transports (`http`, `https`, `tcp`, +`tcps`), see the [ILP overview](/docs/connect/compatibility/ilp/overview/). + +**On this page:** + +- [Syntax](#syntax) +- [Common patterns](#common-patterns) +- [Recipes](#recipes) +- [Protocols and transports](#protocols-and-transports) +- [Authentication](#auth) +- [TLS](#tls) +- [Auto-flushing](#auto-flush) +- [Buffer sizing](#buffer) +- [Multi-host failover](#failover-keys) +- [Store-and-forward](#sf-keys) +- [Reconnect and failover](#reconnect-keys) +- [Durable ACK](#durable-ack) +- [Query client keys](#egress-keys) +- [Error handling](#error-handling) +- [Key index](#key-index) + +## Syntax {#syntax} + +A connect string has the form: + +``` +schema::key1=value1;key2=value2; +``` + +The `schema` selects the wire protocol and transport. The remaining +`key=value` pairs configure it. The trailing semicolon is optional but +recommended. + +For example: + +``` +ws::addr=localhost:9000;username=admin;password=secret; +``` + +This selects the QWP WebSocket transport, connects to `localhost:9000`, and +provides basic-auth credentials. + +For the list of supported schemas, see +[Protocols and transports](#protocols-and-transports). + +### Grammar + +- **Schema** — alphanumeric ASCII characters and underscore. Terminated by + `::`. +- **Key** — alphanumeric ASCII characters and underscore. Terminated by `=`. + Keys are case-sensitive; the canonical form is lowercase `snake_case`. +- **Value** — any character except control characters + (U+0000–U+001F, U+007F–U+009F). Terminated by `;`. +- **Escaping** — to include a literal `;` in a value, double it (`;;`). + +Example with an escaped semicolon in a password (the actual password value +is `p;ssw;rd`): + +``` +ws::addr=localhost:9000;username=admin;password=p;;ssw;;rd; +``` + +### Loading a connect string + +The Java client accepts a connect string in three ways: + +- From a string literal: + + ```java + Sender sender = Sender.fromConfig("ws::addr=localhost:9000;"); + ``` + +- From an environment variable (reads `QDB_CLIENT_CONF`): + + ```java + Sender sender = Sender.fromEnv(); + ``` + +- From the builder, which accepts the same option keys programmatically: + + ```java + Sender sender = Sender.builder(Transport.WS) + .address("localhost:9000") + .build(); + ``` + +Other language clients expose equivalent entry points; see each +[client library page](/docs/connect/overview/#client-libraries) for the +per-language syntax. + +## Common patterns {#common-patterns} + +Canonical shapes for typical deployments. Each can be extended with +auth, failover, or store-and-forward options from the sections below. + +### Local development (no auth, no TLS) + +``` +ws::addr=localhost:9000; +``` + +### Production with basic auth (TLS) + +``` +wss::addr=questdb.example.com:443;username=admin;password=secret; +``` + +### Production with a custom trust store + +``` +wss::addr=questdb.example.com:443;username=admin;password=secret;tls_roots=/etc/questdb/ca-roots;tls_roots_password=changeit; +``` + +### Ingest with store-and-forward across multiple nodes + +``` +ws::addr=node-a:9000,node-b:9000;sf_dir=/var/lib/myapp/qdb-sf;sender_id=ingest-1; +``` + +### Query (egress) preferring a replica in your zone + +``` +wss::addr=node-a:443,node-b:443;target=replica;zone=eu-west-1a; +``` + +### Tolerate a slow or restarting server at startup + +``` +ws::addr=node-a:9000;reconnect_max_duration_millis=120000; +``` + +The 2-minute reconnect budget covers both the *first* connect and any +subsequent reconnect: setting any explicit `reconnect_*` key implicitly +turns on `initial_connect_retry`. See +[Ingress reconnect](#reconnect-keys). + +## Recipes {#recipes} + +Goal-to-keys mapping. For complete connect-string templates, see +[Common patterns](#common-patterns). For per-key details (type, default, +caveats), follow the section links from the [Key index](#key-index). + +| Goal | Direction | Required keys | Optional / related | +| ------------------------------------------------- | --------- | -------------------------------------- | ------------------------------------------------------------------------------------------- | +| Minimal connect string | both | `addr` | — | +| Enable TLS | both | `addr` with `wss` schema | `tls_verify`, `tls_roots`, `tls_roots_password` | +| Basic-auth credentials | both | `username`, `password` | `auth_timeout_ms` | +| Bearer-token credentials | both | `token` | `auth_timeout_ms` | +| Multi-host failover | both | `addr=h1,h2,…` | `target`, `zone`, `reconnect_*` (ingress), `failover_*` (egress) | +| Query only the primary (freshest data) | egress | `target=primary` | — | +| Query only replicas (offload primary) | egress | `target=replica` | — | +| Zone-aware routing with DR last-resort | egress | `zone=` | `target` | +| Tune ingest batching | ingress | — | `auto_flush_rows`, `auto_flush_interval`, `auto_flush_bytes` | +| Disable auto-flush (manual `flush()` only) | ingress | `auto_flush=off` | — | +| Memory-buffered ingest (no disk durability) | ingress | (omit `sf_dir`) | `init_buf_size`, `max_buf_size` | +| Durable store-and-forward ingest | ingress | `sf_dir` | `sender_id`, `sf_max_bytes`, `sf_max_total_bytes`, `sf_append_deadline_millis` | +| Run multiple senders sharing one `sf_dir` | ingress | `sf_dir`, `sender_id` | unique `sender_id` per sender | +| Orphan recovery for crashed senders | ingress | `drain_orphans=on` | `max_background_drainers` | +| End-to-end durable acknowledgement | ingress | `request_durable_ack=on` | `durable_ack_keepalive_interval_millis` | +| Tune ingress reconnect budget | ingress | — | `reconnect_initial_backoff_millis`, `reconnect_max_backoff_millis`, `reconnect_max_duration_millis` (any of these also implies `initial_connect_retry=on`) | +| Force fail-fast on initial connect | ingress | `initial_connect_retry=off` | overrides the implicit promotion from any explicit `reconnect_*` key | +| Retry initial connect in background | ingress | `initial_connect_retry=async` | `reconnect_*` | +| Fast `close()` without drain | ingress | `close_flush_timeout_millis=0` | — | +| Disable per-query egress failover | egress | `failover=off` | — | +| Tune per-query egress failover | egress | — | `failover_max_attempts`, `failover_backoff_initial_ms`, `failover_backoff_max_ms`, `failover_max_duration_ms` | +| Configure async error inbox | both | — | `error_inbox_capacity` | + +## Protocols and transports {#protocols-and-transports} + +*Applies to: ingress and egress.* + +The schema prefix selects the QWP transport. + +| Schema | Transport | Default port | Notes | +| ------ | --------------- | ------------ | -------------------------------------------------------------------------------------------------------------------- | +| `ws` | WebSocket | `9000` | QWP over plain WebSocket. Use for development or trusted networks. | +| `wss` | WebSocket + TLS | `9000` | QWP over TLS-secured WebSocket. Recommended for production. | +| `udp` | UDP | `9007` | Fire-and-forget metrics ingest, single table per datagram. | + +`qwpws` / `qwpwss` are accepted as long-form aliases for `ws` / `wss`. + +The default port is applied when `addr` omits `:port`. Note that `wss` does +**not** default to `443`: both `ws` and `wss` use `9000` unless overridden. + +QWP negotiates its protocol version during the WebSocket upgrade — clients +do not need to configure it. + +## Authentication {#auth} + +*Applies to: ingress and egress.* + +QWP runs over WebSocket and uses HTTP-style credentials sent on the +WebSocket upgrade request. + +- `username` — username for HTTP basic authentication. +- `password` — password for HTTP basic authentication. +- `token` — bearer token sent as `Authorization: Bearer `. Mutually + exclusive with `username` / `password`. Token auth avoids the per-request + overhead of basic auth and is the recommended path for Enterprise + deployments. +- `auth_timeout_ms` — per-host upper bound on the upgrade response read. + Does not cover TCP connect, TLS handshake, or post-upgrade frame reads — + those use OS or hard-coded defaults. Default: `15000` (15 s). + +**Mutual TLS (mTLS).** Not supported. The client validates the server's +certificate against a trust store but cannot present a client certificate; +the TLS handshake is server-authenticated only. `tls_roots` / +`tls_roots_password` configure server-cert trust, not client identity. Use +`token=` or `username=` / `password=` for client authentication. + +## TLS {#tls} + +*Applies to: ingress and egress.* + +TLS is enabled by selecting the `wss` schema. + +- `tls_verify` — controls server certificate verification. Options: `on`, + `unsafe_off`. Default: `on`. `unsafe_off` disables verification; **use + only for testing** — bypassing verification makes the connection + vulnerable to MITM attacks. +- `tls_roots` — path to a file of trusted root certificates, used instead + of the system trust store. The on-disk format is client-specific — the + Java client loads a JKS keystore, the .NET client a PKCS#12 / PFX + bundle, and some clients a PEM file. If omitted, the system default + trust store is used. +- `tls_roots_password` — password for the `tls_roots` file, for clients + whose trust-store format requires one. Clients that load a passwordless + format (for example, PEM) reject this key. + +:::note Client support varies + +`tls_roots` / `tls_roots_password` support — and the trust-store file +format expected — vary by client. Some clients (for example, Go) verify +against the operating-system trust store only and **reject these keys at +parse time**; to trust a private CA there, install it in the host trust +store. Check the relevant +[client library page](/docs/connect/overview/#client-libraries) for +specifics. + +Mutual TLS (client certificates) is not supported by QuestDB — the server +does not negotiate client certificates regardless of client. See +[Authentication](#auth) for the supported credential paths. + +::: + +See also the [server-side TLS configuration](/docs/security/tls/). + +## Auto-flushing {#auto-flush} + +*Applies to: ingress.* + +The client buffers rows in memory and flushes them to the server in batches. +Auto-flushing controls when the buffer is sent without an explicit +`flush()` call. The three triggers below are OR'd — whichever threshold +trips first sends the batch. + +- `auto_flush` — global enable. Options: `on`, `off`. Default: `on`. + When `off`, the application must call `flush()` explicitly to send + buffered rows. +- `auto_flush_rows` — flush when the buffered row count reaches this + threshold. Set to `off` to disable. Default: `1000`. +- `auto_flush_interval` — flush when this many milliseconds have elapsed + since the first buffered row. Evaluated on the next `at()` / `flush()` + call (not driven by a wall-clock timer). Set to `off` to disable. + Default: `100` (100 ms). +- `auto_flush_bytes` — flush when the encode buffer reaches this byte + size. Set to `off` to disable. Default: `8m` (8 MiB). Accepts + [size suffixes](#size-suffixes). When set to a positive value, the + client clamps the effective threshold down to 90% of the server- + advertised `X-QWP-Max-Batch-Size` at handshake (one-way: a configured + value smaller than the advertised cap is kept as-is). The 10% margin + absorbs encoding overhead such as schema and dict-delta bytes. + Setting `off` opts out of byte-based auto-flush entirely — the + handshake clamp does not re-enable it, and the application takes + responsibility for not producing oversized batches. Older servers + that do not advertise the header leave the configured value + untouched. + +## Buffer sizing {#buffer} + +*Applies to: ingress (encode buffer). `max_schemas_per_connection` also +applies to egress.* + +These keys control the in-memory row buffer that the client uses before +flushing. + +- `init_buf_size` — initial buffer size in bytes. Default: `65536` + (64 KiB). Accepts [size suffixes](#size-suffixes). +- `max_buf_size` — maximum buffer size; the buffer grows up to this cap. + Default: `104857600` (100 MiB). Accepts size suffixes. +- `max_name_len` — maximum allowed length of a table or column name in + bytes. Default: `127`. +- `max_schemas_per_connection` — per-connection ceiling on the number of + distinct schema IDs the client can register. WebSocket / QWP only. + Default: `65535`. +- `max_datagram_size` — UDP only. Maximum datagram size; defaults to a + value below typical Ethernet MTU. + +### Size suffixes {#size-suffixes} + +Size-typed values (`init_buf_size`, `max_buf_size`, `sf_max_bytes`, +`sf_max_total_bytes`) accept JVM-style unit suffixes. Suffixes are +case-insensitive and 1024-based, matching `-Xmx` conventions: + +| Suffix | Meaning | Example | +| -------------- | ----------------- | ------------ | +| *(none)* | bytes | `65536` | +| `k` or `kb` | KiB (× 1024) | `64k` | +| `m` or `mb` | MiB (× 1024²) | `4m`, `4mb` | +| `g` or `gb` | GiB (× 1024³) | `1g`, `10gb` | +| `t` or `tb` | TiB (× 1024⁴) | `1t` | + +## Multi-host failover {#failover-keys} + +*Applies to: ingress and egress. The [Role filter and zone preference](#role-filter-and-zone-preference) +sub-section is egress only.* + +:::note QuestDB Enterprise + +Multi-host failover requires QuestDB Enterprise. OSS is single-node — there +is no secondary server to fail over to. + +::: + +The connect string accepts multiple `host:port` pairs in `addr`. Two +syntaxes are accepted and accumulate: + +``` +ws::addr=node-a:9000,node-b:9000,node-c:9000; +``` + +``` +ws::addr=node-a:9000;addr=node-b:9000;addr=node-c:9000; +``` + +Empty entries (`,,`, or leading / trailing commas) are rejected. + +The I/O loop rotates through the endpoints on every reconnect attempt +within a single outage budget. When the server rejects the connection +because the current host is in the wrong role, the client treats it as +failover input and immediately tries the next endpoint without waiting for +backoff. + +### Role filter and zone preference + +Both `target` and `zone` apply to **egress only**. QuestDB is currently a +single-primary cluster: ingress automatically follows the primary across +the host list and adapts when the primary moves to another node. These +keys are silently accepted on ingress but have no effect. + +- `target` — server-role filter applied per endpoint after the upgrade + reads `SERVER_INFO`. Options: + - `any` (default) — no preference; route to any healthy endpoint. + - `primary` — route only to the writer. Use when queries must see the + most recent data; replicas are eventually consistent and may lag the + primary. + - `replica` — route only to replicas. Use for historical or analytical + queries to avoid contending with the ingest traffic the primary is + handling. + + Endpoints whose role does not match the filter are skipped. + +- `zone` — client zone identifier (opaque, case-insensitive — e.g. + `eu-west-1a`, `dc-amsterdam`). When set, egress prefers endpoints whose + server-advertised `zone_id` matches the client's. Mismatched-zone + endpoints — typically a remote DR replica — drop to a lower priority + tier; the client routes to them only as a last resort, when every + same-zone endpoint is unhealthy. With `target=primary`, zone preference + collapses: the writer is followed regardless of zone. + +The full behavioural model — host picker policy, host-health states, error +classification, and backoff schedule — is documented under the Connect +section (Client failover, coming with Bundle C). Server-side HA is covered +separately under the +[High Availability section](/docs/high-availability/overview/). + +Related: [Reconnect and failover](#reconnect-keys), +[Store-and-forward](#sf-keys). + +:::warning Enable DEDUP on tables ingested through failover + +On unplanned failover — when the primary dies before issuing a durable +ACK — the client replays unacknowledged frames against the new primary. +Without [DEDUP](/docs/concepts/deduplication/) on the target table, those +replays can produce duplicate rows. Tables ingested through a multi-host +failover connect string **must** declare `DEDUP UPSERT KEYS(...)` covering +row identity. See [Delivery semantics](/docs/concepts/delivery-semantics/) +for the full at-least-once / exactly-once model. + +::: + +## Store-and-forward {#sf-keys} + +*Applies to: ingress.* + +Store-and-forward (SF) is an opt-in durability substrate available on QWP / +WebSocket. The client persists outgoing frames to disk before sending; the +server's cumulative ACK trims acknowledged segments. If the connection drops +or the client process restarts, the I/O thread silently reconnects and +replays whatever is still on disk. + +To enable SF mode, set `sf_dir`. Without it, the client runs a memory-only +equivalent — same architecture, no durability across restarts. + +### Storage + +- `sf_dir` — parent directory under which the slot lives. The slot path is + `//`. Required for SF mode; omit for memory-only mode. + Path handling: + - Taken verbatim. Absolute paths recommended for production; relative + paths resolve against the process working directory. + - Shell-style expansions like `~` are **not** expanded by the client. + - The leaf directory is created automatically if missing, but its parent + must already exist — the client does not create paths recursively. +- `sender_id` — slot identity. The slot lives at `//`, + used verbatim as the directory name. Allowed characters: letters, + digits, `_`, `-`. No path separators, no `.`, no spaces. Two senders + sharing the same `sender_id` collide on the slot lock — the second one + fails fast. Default: `default`. +- `sf_durability` — disk durability mode. Currently only `memory` is + shipping. (`flush` and `append` per-write fsync modes are planned.) +- `sf_max_bytes` — per-segment rotation threshold. Must be ≥ the largest + single flushed frame. Default: `4 MiB` (`4m`). Accepts + [size suffixes](#size-suffixes). +- `sf_max_total_bytes` — hard cap on per-slot storage. When the cap is + reached, append blocks until ACKs trim space (see + `sf_append_deadline_millis`). Defaults: `10 GiB` (`10g`) in SF mode, + `128 MiB` (`128m`) in memory mode. Accepts size suffixes. + +### Sender restart and replay + +SF persists outgoing frames and the durable-ack watermark to disk under +`//`. + +**Recovery is triggered at Sender creation.** When the application +instantiates a new sender — `Sender.fromConfig(...)`, `Sender.fromEnv()`, +or the builder — the client analyses the on-disk state under `sf_dir` +before returning control. There is no background daemon; replay is part +of the Sender lifecycle. + +To resume from the previous session's buffer after a restart — clean +exit, SIGKILL, host crash, or reboot — instantiate a new sender with the +**same** `sf_dir` and `sender_id`: + +1. The new sender acquires the slot's POSIX `flock` (`LockFileEx` on + Windows). If the previous process is still alive and holds the lock, + the new sender fails fast with `sf slot already in use`. The kernel + releases the lock on process exit, even after SIGKILL, so a crashed + sender does not leave the slot stuck. +2. Recovery reads the persisted ack watermark and replays every on-disk + segment past it against the server. Replay runs on the I/O thread in + parallel with the application's new `append()` calls — the application + is not blocked. + +If `sf_dir` is a relative path, ensure the process resolves it the same +way after restart (typically: use an absolute path). + +For an **abandoned** slot to be picked up by a *different* sender — the +original is never coming back — see [Orphan recovery](#orphan-recovery) +below. + +**At-least-once delivery.** Replay can re-send frames the server already +accepted but did not durable-acknowledge before the previous sender died. +To prevent duplicate rows in the target table, declare +[DEDUP](/docs/concepts/deduplication/) `UPSERT KEYS(...)` covering row +identity. See [Delivery semantics](/docs/concepts/delivery-semantics/) for +the full model and recipe. + +### Backpressure + +- `sf_append_deadline_millis` — maximum time `append()` waits for trim to + free space when the cap is hit. If the deadline fires, the call throws. + Default: `30000` (30 s). + +### Orphan recovery + +When `drain_orphans=on`, the new sender scans `/*` at startup for +sibling slots that are unlocked and contain unacked data. The scan runs +as part of Sender creation (alongside the same-slot recovery above). Each +orphan slot is locked, drained on its own dedicated connection, and +released — **multiple orphans drain in parallel**, up to +`max_background_drainers` concurrent drains. + +- `drain_orphans` — `on` enables the orphan drainer pool. Default: `off`. +- `max_background_drainers` — maximum concurrent drainers. Default: `4`. + +For delivery semantics, architecture, and tradeoffs (at-least-once +guarantees, DEDUP requirements, segment-granular trim), see the +Store-and-forward concepts page under Connect (coming with Bundle C). + +## Reconnect and failover {#reconnect-keys} + +*Applies to: ingress and egress (separate key families).* + +QWP / WebSocket has two distinct recovery loops, each with its own knob +family. The **ingress** cursor-engine reconnect loop runs continuously for +the lifetime of the sender. The **egress** per-`Execute()` failover loop +runs once per query. + +### Ingress reconnect + +These keys control the cursor-engine reconnect loop used by QWP ingest. +SF mode and memory-only mode share the same loop. + +- `reconnect_initial_backoff_millis` — initial wait between reconnect + attempts. Backoff grows exponentially up to `reconnect_max_backoff_millis`. + Default: `100`. Setting this enables `initial_connect_retry=on` implicitly; + see below. +- `reconnect_max_backoff_millis` — cap on per-attempt backoff. + Default: `5000` (5 s). Setting this enables + `initial_connect_retry=on` implicitly; see below. +- `reconnect_max_duration_millis` — total time budget for a single outage. + Once exceeded, the I/O loop gives up and surfaces a terminal error. + Default: `300000` (5 min). Setting this enables `initial_connect_retry=on` + implicitly; see below. +- `initial_connect_retry` — whether the initial connect attempt is retried + on failure. The same loop drives the retry. + - `off` (default, alias `false`) — fail fast on initial connect failure. + - `on` (aliases `sync`, `true`) — retry synchronously on the user + thread. + - `async` — return the `Sender` immediately; the I/O thread retries in + the background, surfacing terminal failures via the error inbox. + + **Implicit promotion.** Setting any explicit `reconnect_*` key without + also choosing an `initial_connect_retry` mode promotes + `initial_connect_retry` to `on` automatically, so the reconnect budget + also covers the *first* connect attempt — not only post-disconnect + ones. To keep the historical fail-fast behaviour on first connect while + still tuning the reconnect loop, set `initial_connect_retry=off` + explicitly; the override is preserved. +- `close_flush_timeout_millis` — `close()` blocks up to this many + milliseconds waiting for buffered frames to drain. Default: `5000` (5 s). + Set to `0` or `-1` for fast close (skip the drain). + +Auth failures during reconnect (authentication rejected, version mismatch, +durable-ack mismatch, non-101 upgrade without a role hint) are immediately +terminal — the loop does not retry them. + +### Egress failover {#egress-failover} + +These keys control the per-`Execute()` reconnect loop on the QWP query +client. Each query has its own budget; the loop resets between queries. +Requires QuestDB Enterprise (multi-host). + +- `failover` — master switch. `on` (default) or `off`. When `off`, + transport errors surface directly through `onError` without retry. +- `failover_max_attempts` — cap on reconnects per `Execute()` (initial + attempt + `N − 1` failovers). Default: `8`. +- `failover_backoff_initial_ms` — first post-failure sleep. Default: `50`. +- `failover_backoff_max_ms` — cap on per-attempt sleep. Default: `1000` + (1 s). +- `failover_max_duration_ms` — total wall-clock budget per `Execute()`. + Default: `30000` (30 s). Set to `0` for unbounded. + +## Durable ACK {#durable-ack} + +*Applies to: ingress.* + +:::note QuestDB Enterprise + +Durable ACK requires QuestDB Enterprise. OSS is single-node and does not +ship WALs off-box, so the server-side durability-acknowledgement signal +that drives this protocol is enterprise-only. + +::: + +QuestDB Enterprise ships Write-Ahead Logs (WALs) from the primary to an +object store or another file system — typically over the network. Once a +WAL is durably shipped, the server emits a `STATUS_DURABLE_ACK` frame to +the store-and-forward client; the client marks that frame's FSN as durable +only after this acknowledgement arrives. + +The benefit: if the primary dies before shipping a WAL, the client still +holds the corresponding frames in its SF buffer and replays them against +the new primary on failover — closing the data-loss window that a +transport-level OK ACK alone cannot close. + +- `request_durable_ack` — when `on`, the client gates trim on + `STATUS_DURABLE_ACK` frames from the server, suppressing OK-driven trim. + Default: `off`. +- `durable_ack_keepalive_interval_millis` — interval at which the client + emits keepalive PINGs while waiting for durable-ack frames. Required + because the server only flushes pending durable acks on inbound recv + events. Default: `200` (ms). Set to `0` or a negative value to disable. + +See the [QWP Egress (WebSocket)](/docs/connect/wire-protocols/qwp-egress-websocket/) +wire protocol for the underlying mechanism. + +## Query client keys {#egress-keys} + +*Applies to: egress (query client).* + +These keys are accepted by the QWP query client's connect string (the +egress / `QwpQueryClient` path). They are not sender keys. + +- `compression` — result-batch compression the client advertises. Options: + `raw` (default — no compression, the accept-encoding header is omitted so + pre-compression servers see an unchanged handshake), `zstd` (demand + zstd), `auto` (accept zstd if the server offers it). +- `compression_level` — zstd level hint. Range `1`–`22` (server clamps to + `1`–`9`). Default `1` — the cheapest server-side CPU; raise it if you + measure a meaningful ratio improvement on your payload and the server has + the headroom. Ignored when `compression=raw`. +- `initial_credit` — byte-credit flow-control budget. `0` (default) means + unbounded: the server streams as fast as the network allows. Set a + non-zero budget to bound server push on a memory-constrained client. +- `max_batch_rows` — upper bound on rows per result batch. +- `buffer_pool_size` — size of the client-side decode buffer pool. + +Equivalent options exist on the query client's builder API (for example, +`WithQwpQueryCompression`, `WithQwpQueryCompressionLevel`, +`WithQwpQueryInitialCredit` in the Go client). See the +[client library page](/docs/connect/overview/#client-libraries) for the +per-language names. + +## Error handling {#error-handling} + +*Applies to: ingress and egress.* + +The QWP / WebSocket I/O loop reports errors via an asynchronous inbox +consumed by the application. + +- `error_inbox_capacity` — bounded capacity for async error notifications. + Must be ≥ `16`. Overflow drops the oldest entry and bumps a + `droppedErrorNotifications` counter. Default: `256`. + +The following per-category override keys are **reserved by the spec but +not yet recognised by the Java connect-string parser** — today they are +wired only via the fluent builder API. New client implementations should +accept them in the connect string per the spec; precedence rules are +documented in the [QWP store-and-forward spec](https://github.com/questdb/questdb-enterprise/blob/main/questdb/docs/qwp/sf-client.md) +§14. + +- `on_server_error` — handler for server-reject status frames. +- `on_schema_error` — handler for schema-validation errors. +- `on_parse_error` — handler for client-side parse errors. +- `on_internal_error` — handler for unexpected client-side errors. +- `on_security_error` — handler for auth / TLS errors. +- `on_write_error` — handler for transport write failures. + +## Key index {#key-index} + +Alphabetical list of every option. The Section column links to the full +description and behaviour notes. + +| Key | Type | Default | Section | +| --------------------------------------- | ----------------------------- | ----------------------------- | ------------------------------------------------------------- | +| `addr` | `host:port[,host:port…]` | required | [Multi-host failover](#failover-keys) | +| `auth_timeout_ms` | int (ms) | `15000` | [Authentication](#auth) | +| `auto_flush` | enum (`on` / `off`) | `on` | [Auto-flushing](#auto-flush) | +| `auto_flush_bytes` | size | `8m` (8 MiB) | [Auto-flushing](#auto-flush) | +| `auto_flush_interval` | int (ms) / `off` | `100` (100 ms) | [Auto-flushing](#auto-flush) | +| `auto_flush_rows` | int / `off` | `1000` | [Auto-flushing](#auto-flush) | +| `buffer_pool_size` | int | `4` | [Query client keys](#egress-keys) | +| `close_flush_timeout_millis` | int (ms) | `5000` | [Ingress reconnect](#reconnect-keys) | +| `compression` | enum (`raw` / `zstd` / `auto`) | `raw` | [Query client keys](#egress-keys) | +| `compression_level` | int (`1`–`22`) | `1` | [Query client keys](#egress-keys) | +| `drain_orphans` | enum (`on` / `off`) | `off` | [Store-and-forward](#sf-keys) | +| `durable_ack_keepalive_interval_millis` | int (ms) | `200` | [Durable ACK](#durable-ack) | +| `error_inbox_capacity` | int (≥ 16) | `256` | [Error handling](#error-handling) | +| `failover` | enum (`on` / `off`) | `on` | [Egress failover](#reconnect-keys) | +| `failover_backoff_initial_ms` | int (ms) | `50` | [Egress failover](#reconnect-keys) | +| `failover_backoff_max_ms` | int (ms) | `1000` | [Egress failover](#reconnect-keys) | +| `failover_max_attempts` | int | `8` | [Egress failover](#reconnect-keys) | +| `failover_max_duration_ms` | int (ms) | `30000` | [Egress failover](#reconnect-keys) | +| `init_buf_size` | size | `65536` (64 KiB) | [Buffer sizing](#buffer) | +| `initial_connect_retry` | enum (`off` / `on` / `async`) | `off` (auto-promoted to `on` when any explicit `reconnect_*` key is set) | [Ingress reconnect](#reconnect-keys) | +| `initial_credit` | int (bytes) | `0` (unbounded) | [Query client keys](#egress-keys) | +| `max_background_drainers` | int | `4` | [Store-and-forward](#sf-keys) | +| `max_batch_rows` | int | server default | [Query client keys](#egress-keys) | +| `max_buf_size` | size | `104857600` (100 MiB) | [Buffer sizing](#buffer) | +| `max_datagram_size` | size | (UDP) below typical MTU | [Buffer sizing](#buffer) | +| `max_name_len` | int | `127` | [Buffer sizing](#buffer) | +| `max_schemas_per_connection` | int | `65535` | [Buffer sizing](#buffer) | +| `on_internal_error` * | enum | — (reserved) | [Error handling](#error-handling) | +| `on_parse_error` * | enum | — (reserved) | [Error handling](#error-handling) | +| `on_schema_error` * | enum | — (reserved) | [Error handling](#error-handling) | +| `on_security_error` * | enum | — (reserved) | [Error handling](#error-handling) | +| `on_server_error` * | enum | — (reserved) | [Error handling](#error-handling) | +| `on_write_error` * | enum | — (reserved) | [Error handling](#error-handling) | +| `password` | string | unset | [Authentication](#auth) | +| `reconnect_initial_backoff_millis` | int (ms) | `100` | [Ingress reconnect](#reconnect-keys) | +| `reconnect_max_backoff_millis` | int (ms) | `5000` | [Ingress reconnect](#reconnect-keys) | +| `reconnect_max_duration_millis` | int (ms) | `300000` (5 min) | [Ingress reconnect](#reconnect-keys) | +| `request_durable_ack` | enum (`on` / `off`) | `off` | [Durable ACK](#durable-ack) | +| `sender_id` | string | `default` | [Store-and-forward](#sf-keys) | +| `sf_append_deadline_millis` | int (ms) | `30000` (30 s) | [Store-and-forward](#sf-keys) | +| `sf_dir` | path | unset (memory mode) | [Store-and-forward](#sf-keys) | +| `sf_durability` | enum (`memory`) | `memory` | [Store-and-forward](#sf-keys) | +| `sf_max_bytes` | size | `4 MiB` | [Store-and-forward](#sf-keys) | +| `sf_max_total_bytes` | size | `128 MiB` mem / `10 GiB` SF | [Store-and-forward](#sf-keys) | +| `target` | enum (`any` / `primary` / `replica`) | `any` | [Multi-host failover](#failover-keys) | +| `tls_roots` | path | system trust store | [TLS](#tls) | +| `tls_roots_password` | string | — (client-specific) | [TLS](#tls) | +| `tls_verify` | enum (`on` / `unsafe_off`) | `on` | [TLS](#tls) | +| `token` | string | unset | [Authentication](#auth) | +| `username` | string | unset | [Authentication](#auth) | +| `zone` | string | unset | [Multi-host failover](#failover-keys) | + +\* Reserved by the spec; the Java connect-string parser does not yet +recognise these — they are currently wired only via the fluent builder +API. New client implementations should accept them. See +[Error handling](#error-handling). diff --git a/documentation/concepts/delivery-semantics.md b/documentation/concepts/delivery-semantics.md new file mode 100644 index 000000000..4ee695453 --- /dev/null +++ b/documentation/concepts/delivery-semantics.md @@ -0,0 +1,169 @@ +--- +title: Delivery semantics +sidebar_label: Delivery semantics +description: + How QuestDB clients deliver data (at-least-once), where duplicate rows can + arise, and how to combine designated timestamps with deduplication for + exactly-once outcomes. +--- + +QuestDB clients deliver data **at-least-once**: every row your application +publishes is guaranteed to reach the server, but under failure it may arrive +more than once. Storing each row exactly once is the application's +responsibility, and QuestDB provides the mechanisms to make it routine. + +This page explains where duplicates come from and how to suppress them. + +## At-least-once vs exactly-once + +| Property | Meaning | Where it comes from | +|----------|---------|---------------------| +| **At-most-once** | Each row reaches the server zero or one times. Rows can be lost. | A "fire and forget" client that does not retransmit on failure. | +| **At-least-once** | Each row reaches the server one or more times. No row is lost; duplicates are possible. | A client that retransmits unacknowledged data after a transport error. **This is the QuestDB client default.** | +| **Exactly-once** | Each row is stored exactly once. | At-least-once delivery plus server-side deduplication on a key covering row identity. | + +QuestDB's clients retransmit unacknowledged batches after transport errors, +host failovers, and process restarts. The trade-off is deliberate: losing +data silently is the worse failure mode. The cost is that the application +must tolerate or suppress duplicates. + +## Where duplicates come from + +Three replay paths can resend rows the server already accepted. + +### Client retry on transport error + +The client buffers unacknowledged rows. When the connection breaks before +the server confirms a batch, the client reconnects and re-sends. If the +server had already committed the batch but the acknowledgement was lost in +flight, the second send produces duplicates. + +This path applies to every QuestDB client deployment. + +### Multi-host failover replay + +In a [multi-host](/docs/high-availability/client-failover/concepts/) +Enterprise deployment, the client carries a list of peers. When the primary +fails over to a replica, the client redirects to the new primary and +replays any batches it had not yet seen acknowledged. If the dying primary +committed those batches before the failover took effect, the new primary +applies them again on replay. + +### Store-and-forward replay across sender restarts + +With [store-and-forward](/docs/high-availability/store-and-forward/concepts/) +enabled, the client persists outgoing frames to disk. After a sender +process crash or restart, the next sender instance reads the on-disk queue +and replays everything past the durable-ack watermark. The window between +"the server applied the frame" and "the client recorded the ack" is +exactly the window in which replay produces duplicates. + +This path applies only when `sf_dir` is set on the connect string. + +## Achieving exactly-once + +Three things must hold: + +1. **A user-assigned designated timestamp.** The application chooses the + timestamp for each row (event time), not the server. Server-assigned + timestamps — `atNow()`, `at_now()`, omitting `at()` — change between + the original send and the replay, so the two rows are not identical and + deduplication cannot match them. +2. **A [deduplication](/docs/concepts/deduplication/) key covering row + identity.** Declare `DEDUP UPSERT KEYS(...)` on the target table with + keys that uniquely identify a logical event. The designated timestamp + is always part of the key; add any other columns needed to distinguish + two events that share a timestamp. +3. **Stable values across retransmits.** Any column that participates in + row identity must be derived deterministically from the source event — + not from wall-clock time at the moment of sending, and not from a + per-attempt counter. + +When those three hold, the server treats a replayed batch as already-seen +and skips the write. + +## Recipe + +Define the table with DEDUP on the columns that identify a unique event: + +```questdb-sql +CREATE TABLE trades ( + ts TIMESTAMP, + symbol SYMBOL, + side SYMBOL, + price DOUBLE, + qty DOUBLE +) TIMESTAMP(ts) PARTITION BY DAY WAL +DEDUP UPSERT KEYS(ts, symbol, side); +``` + +In the publishing client, set `ts` explicitly to the event time: + +```java +sender.table("trades") + .symbol("symbol", "ETH-USD") + .symbol("side", "buy") + .doubleColumn("price", 2615.54) + .doubleColumn("qty", 0.5) + .at(eventInstant); // not atNow() +``` + +If two distinct events can share `(ts, symbol, side)` and both should be +preserved, widen `UPSERT KEYS` to include a column that distinguishes them +— for example a `trade_id` or `seq` column. + +:::warning DEDUP is required on tables behind multi-host failover + +When the client fails over from one primary to another, unacknowledged +batches are replayed against the new primary. Without `DEDUP UPSERT KEYS` +covering row identity, those replays produce duplicate rows in the target +table. + +::: + +## When at-least-once is enough + +DEDUP has a cost: the server compares each incoming row against existing +rows with the same keys. For most workloads the cost is invisible; for +high-cardinality keys or heavily out-of-order data, it adds work to the +write path. + +If your application tolerates occasional duplicates — counting events with +a small tolerance, aggregating over a window where one extra row shifts +the average by a negligible amount, append-only logs where uniqueness is +not meaningful — you can skip DEDUP and rely on at-least-once delivery +directly. + +The decision is per-table, not per-deployment: enable DEDUP on the tables +that need exactly-once, leave it off on the tables that don't. + +## Related Enterprise features + +These features change *where* the replay window opens, but do not change +the guarantee — at-least-once still applies, and DEDUP is still the +mechanism that achieves exactly-once. + +- **Durable ACK** + ([`request_durable_ack=on`](/docs/connect/clients/connect-string#durable-ack)) + — the server delays the per-batch acknowledgement until the WAL is + shipped to object storage. This narrows the replay window after primary + failover but does not eliminate it. +- **[Store-and-forward](/docs/high-availability/store-and-forward/concepts/)** + — provides at-least-once across sender process restarts. Replay + semantics from this page apply. +- **[Multi-host client failover](/docs/high-availability/client-failover/concepts/)** + — provides at-least-once across primary failovers. Replay semantics + from this page apply. + +## See also + +- [Deduplication](/docs/concepts/deduplication/) — the server-side + mechanism that makes exactly-once achievable. +- [Designated timestamp](/docs/concepts/designated-timestamp/) — required + for DEDUP and for explicit-timestamp publishing. +- [Write-ahead log](/docs/concepts/write-ahead-log/) — when the server + considers a batch durable. +- [Client failover concepts](/docs/high-availability/client-failover/concepts/) + — the multi-host replay path in detail. +- [Store-and-forward concepts](/docs/high-availability/store-and-forward/concepts/) + — the sender-restart replay path in detail. diff --git a/documentation/configuration/cairo-engine.md b/documentation/configuration/cairo-engine.md index 7112f46b6..7ea2cb40a 100644 --- a/documentation/configuration/cairo-engine.md +++ b/documentation/configuration/cairo-engine.md @@ -54,8 +54,8 @@ When `false`, disables the `reload_config()` SQL function. A global timeout in seconds for long-running queries. Per-query overrides are available via the HTTP header -[`Statement-Timeout`](/docs/query/rest-api/#headers) or the Postgres -[`options`](/docs/query/pgwire/overview/) +[`Statement-Timeout`](/docs/connect/compatibility/rest-api/#headers) or the Postgres +[`options`](/docs/connect/compatibility/pgwire/overview/) connection property. ## Commit and write behavior diff --git a/documentation/configuration/ingestion.md b/documentation/configuration/ingestion.md index d0b3fabcb..010225adb 100644 --- a/documentation/configuration/ingestion.md +++ b/documentation/configuration/ingestion.md @@ -280,7 +280,7 @@ yields. :::note The UDP receiver is deprecated since QuestDB version 6.5.2. We recommend -[ILP over HTTP](/docs/ingestion/ilp/overview/) instead. +[ILP over HTTP](/docs/connect/compatibility/ilp/overview/) instead. ::: diff --git a/documentation/configuration/overview.md b/documentation/configuration/overview.md index 5d2aa018a..8e3a7036b 100644 --- a/documentation/configuration/overview.md +++ b/documentation/configuration/overview.md @@ -537,6 +537,7 @@ http.net.connection.sndbuf=2m | [OpenID Connect (OIDC)](/docs/configuration/oidc/) | OIDC integration | ✓ | | [Parallel SQL execution](/docs/configuration/parallel-sql-execution/) | Query parallelism settings | | | [Postgres wire protocol](/docs/configuration/postgres-wire-protocol/) | PostgreSQL wire protocol connections | | +| [QuestDB Wire Protocol (QWP)](/docs/configuration/qwp/) | QWP protocol limits and UDP receiver | | | [Replication](/docs/configuration/database-replication/) | High availability cluster replication | ✓ | | [Shared workers](/docs/configuration/shared-workers/) | Worker thread pools | | | [Storage policy](/docs/configuration/storage-policy/) | Partition lifecycle management | ✓ | diff --git a/documentation/configuration/qwp.md b/documentation/configuration/qwp.md new file mode 100644 index 000000000..24c3d7ba7 --- /dev/null +++ b/documentation/configuration/qwp.md @@ -0,0 +1,142 @@ +--- +title: QuestDB Wire Protocol (QWP) +description: + Server-side configuration for QWP ingestion and query endpoints. +--- + +QWP is QuestDB's columnar binary protocol for high-throughput data ingestion +(`/write/v4`) and streaming query results (`/read/v1`) over WebSocket and UDP. +These properties control protocol limits and the UDP receiver. WebSocket +ingestion and egress share the HTTP server's network settings (port, TLS, +worker threads); see +[HTTP server configuration](/docs/configuration/http-server/) for those. + +## Protocol limits + +### qwp.max.rows.per.table + +- **Default**: `1000000` +- **Reloadable**: no + +Maximum number of rows per table block in a single QWP message. The server +rejects batches that exceed this limit with a parse error. + +### qwp.max.schemas.per.connection + +- **Default**: `65535` +- **Reloadable**: no + +Maximum number of distinct schemas the server registers per connection. Each +unique combination of column names and types consumes one schema slot. When +the limit is reached, the server rejects further full-schema messages. For +egress connections, a lower soft cap (4,096 by default) triggers a +`CACHE_RESET` frame that clears and restarts the registry before hitting +this hard limit. + +### qwp.max.tables.per.connection + +- **Default**: `10000` +- **Reloadable**: no + +Maximum number of distinct tables a single connection may write to. The +server rejects messages referencing additional tables once this limit is +reached. + +## UDP receiver + +:::note + +The QWP UDP receiver is a fire-and-forget ingestion path for metrics +workloads where occasional message loss is acceptable. It is disabled by +default. For reliable ingestion, use the WebSocket transport. + +::: + +### qwp.udp.bind.to + +- **Default**: `0.0.0.0:9007` +- **Reloadable**: no + +IP address and port the UDP receiver binds to. The default listens on all +network interfaces on port 9007. + +### qwp.udp.commit.interval + +- **Default**: `2000` (milliseconds) +- **Reloadable**: no + +Time interval between commits for data received over UDP. Lower values +reduce the window of uncommitted data at the cost of more frequent I/O. + +### qwp.udp.enabled + +- **Default**: `false` +- **Reloadable**: no + +Enable or disable the QWP UDP receiver. + +### qwp.udp.join + +- **Default**: `224.1.1.1` +- **Reloadable**: no + +Multicast group address the UDP receiver joins. Only relevant when +`qwp.udp.unicast` is `false`. + +### qwp.udp.max.uncommitted.datagrams + +- **Default**: `1048576` +- **Reloadable**: no + +Maximum number of uncommitted datagrams before the receiver forces a commit, +regardless of the time-based commit interval. + +### qwp.udp.msg.buffer.size + +- **Default**: `65536` (bytes) +- **Reloadable**: no + +Size of each message buffer allocated for the UDP receiver. + +### qwp.udp.msg.count + +- **Default**: `10000` +- **Reloadable**: no + +Number of message buffers to pre-allocate. Higher values absorb larger +bursts at the cost of more memory. + +### qwp.udp.own.thread + +- **Default**: `true` +- **Reloadable**: no + +When `true`, the UDP receiver runs in a dedicated thread with a busy-spin +loop for lowest latency. When `false`, the receiver uses the shared worker +pool. + +### qwp.udp.own.thread.affinity + +- **Default**: `-1` (no affinity) +- **Reloadable**: no + +CPU core affinity for the dedicated UDP receiver thread. A value of `-1` +lets the OS schedule the thread. Only applies when `qwp.udp.own.thread` is +`true`. + +### qwp.udp.receive.buffer.size + +- **Default**: `-1` (OS default) +- **Reloadable**: no + +OS-level socket receive buffer size in bytes. A value of `-1` uses the +operating system's default. Increase this if you observe datagram drops +under high throughput. + +### qwp.udp.unicast + +- **Default**: `true` +- **Reloadable**: no + +When `true`, the UDP receiver operates in unicast mode. When `false`, it +joins the multicast group specified by `qwp.udp.join`. diff --git a/documentation/connect/agents.md b/documentation/connect/agents.md new file mode 100644 index 000000000..96dd2a583 --- /dev/null +++ b/documentation/connect/agents.md @@ -0,0 +1,242 @@ +--- +slug: /connect/agents +title: Agents +description: + How AI agents operate QuestDB — which protocols they use, what tooling + exists, and how to give them safe access. +--- + +AI agents — Claude Code, Cursor, OpenAI Codex, autonomous research tools — +are first-class clients of QuestDB. They drive the database the same way a +developer would: discover the schema, write SQL, plot results, ingest new +data. What changes is the loop: an agent runs that cycle continuously, +often without a human in the inner loop. + +This page covers the three things to know: + +1. [Protocols](#protocols) — which endpoints agents use, and when. +2. [Tooling](#tooling) — concrete agents and skills that work with QuestDB. +3. [Practices](#practices) — how to give an agent safe, scoped access. + +For a hands-on walkthrough with named agents, see +[AI Coding Agents](/docs/getting-started/ai-coding-agents/) in Getting +Started. + +## Protocols + +Agents reach QuestDB through the same interfaces as any other client. The +right choice depends on what the agent is doing and which SDK or framework +it ships with. + +| Interface | Best for | Why | +|-----------------------------------------------------------|---------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| [**QWP egress**](/docs/connect/wire-protocols/qwp-egress-websocket/) | The primary path for executing SQL — DDL, exploratory SELECT, and large result streaming. | Binary, columnar, byte-credit flow control, multi-host failover. Use a native [client library](/docs/connect/overview/) when one exists for the agent's runtime; otherwise an agent can implement one directly against the protocol spec. | +| [**QWP ingress**](/docs/connect/wire-protocols/qwp-ingress-websocket/) | The primary path for ingesting data — agentic ETL, sensor feeds, bulk loads. | Native binary protocol with multi-host failover and store-and-forward built into the client. | +| [**REST API**](/docs/connect/compatibility/rest-api/) | Schema discovery and small ad-hoc queries (a few hundred rows or fewer). | HTTP + JSON. Every agent framework supports it; no SDK to install. `SHOW TABLES` / `SHOW COLUMNS` and other lookups map naturally to function-calling tools. | + +**QWP egress is the recommended path for any sustained SQL work** — +exploratory or production. Reach for REST when the agent is doing schema +discovery or pulling small result sets that fit comfortably in a single +HTTP response. + +## Tooling + +### General-purpose coding agents + +Claude Code, OpenAI Codex, Cursor, Aider, and similar code-execution agents +work with QuestDB out of the box. They read the public QuestDB documentation +and generate code that talks to a QWP client library or the REST API. No +setup, no MCP server required — point them at a QuestDB endpoint and ask. + +See [AI Coding Agents](/docs/getting-started/ai-coding-agents/) for the +quickstart, including the public demo at `https://demo.questdb.io/`. + +### QuestDB agent skills (Claude) + +The +[QuestDB agent skill](/docs/getting-started/ai-coding-agents/#questdb-agent-skill) +embeds QuestDB-specific context (SQL idioms, ingestion patterns, Grafana +dashboards) directly into the agent. Claude Code loads it on demand, so the +agent produces correct `SAMPLE BY`, `LATEST ON`, and time-series queries on +the first try instead of approximating PostgreSQL syntax. + +The +[TSBS Benchmark skill](/docs/getting-started/ai-coding-agents/#tsbs-benchmark-skill) +goes further: it automates end-to-end ingestion benchmarking, useful when an +agent is evaluating QuestDB against alternative time-series databases. + +## Practices + +### Schema discovery + +Agents need to know the shape of the data before they can query it. The +useful entry points all run over the standard SQL interfaces: + +```questdb-sql +-- List all tables +SHOW TABLES; + +-- Inspect a specific table's columns and types +SHOW COLUMNS FROM trades; + +-- Meta-query: full table metadata including designated timestamp +SELECT * FROM tables(); +``` + +Over REST, the same queries run as `GET /exec?query=SHOW%20TABLES`. + +See the [`SHOW` reference](/docs/query/sql/show/) and +[`tables()`](/docs/query/functions/meta/) for the full surface. + +### Read-only access + +Production deployments should give agents read-only credentials whenever +possible: + +- **Open Source**: configure HTTP basic auth and provide read-only + credentials to the agent. The same credentials authenticate the QWP + endpoints via the WebSocket upgrade. +- **Enterprise**: use [RBAC](/docs/security/rbac/) to create a role with + query-only permissions and assign it to the agent's user. The same role + applies whether the agent connects over REST or QWP. + +Pick the transport by data volume: + +- **Small queries** — schema inspection, parameter lookup, a few hundred + rows — fit naturally on REST `/exec`. The JSON response is directly + consumable by the agent without an SDK. +- **Large result sets** — exporting data into another system, materializing + analytics output — should go through a + [QWP egress client](/docs/connect/wire-protocols/qwp-egress-websocket/). Byte-credit + flow control prevents the agent from being overwhelmed mid-export, and + the binary columnar format keeps wire size low. + +Containing the blast radius this way matters: if the agent's prompt is +compromised or it hallucinates a destructive statement, the credentials +themselves prevent damage. + +### Query budgets + +Agents will write expensive queries while exploring. Set realistic ceilings: + +- Always include `LIMIT` in exploratory queries; the agent rarely needs more + than a few hundred rows to reason about the shape of the data. +- Cap concurrent agent traffic at the reverse proxy (HTTP rate limits) or + via QWP connection limits on the server side. +- Watch the [query log and metrics](/docs/operations/logging-metrics/) for + runaway scans. + +### Write access for ingest + +If the agent is generating ingestion code, not just querying, **QWP is the +recommended path for all writes**: + +- **Bulk upload and sustained ingestion** (agentic ETL, a streaming sensor + feed fronted by an LLM, batch loads from another system): use a + [QWP client library](/docs/connect/overview/). The agent generates + setup code; the runtime gets throughput, multi-host failover, and + store-and-forward for free. +- **No native client for the agent's runtime?** The agent can implement an + uploader directly against the + [QWP ingress wire spec](/docs/connect/wire-protocols/qwp-ingress-websocket/) — the + protocol is fully documented for clean-room implementations and a + minimum-viable client is on the order of a few hundred lines. +- **Quick one-off inserts** during exploration: `INSERT INTO ...` via REST + `/exec` is acceptable for ad-hoc testing, but production write paths + should always be on QWP. + +### Observability + +Treat agent traffic like any production workload: + +- Log all SQL the agent executes (most agent frameworks expose a hook for + pre-execution inspection). +- Surface query latency and result-size metrics — runaway scans show up + there first. +- Audit DDL statements separately if you allow them at all; an agent that + drops a table by accident is a different incident class from one that + writes a slow query. + +## Recipes + +### Uploading CSV or Parquet from the agent's local machine + +**Failure mode to avoid:** SQL functions like `read_parquet()`, +`read_csv()`, and the `COPY` statement all read files from the +**QuestDB server's filesystem** (via `cairo.sql.copy.root`). They do not +work when the agent has the file locally and the database is on another +host — a remote VM, a Docker container, a cloud deployment, or +`demo.questdb.io`. An agent reaching for these to upload a local file is +on a dead-end path; it will either fail with a permissions error or +"file not found", or beat around the bush trying to mount a directory it +can't reach. + +**Correct path:** parse the file in the agent's runtime, then push the +rows to QuestDB through a QWP ingress client. + +1. Check the [Ingestion overview](/docs/connect/overview/) for the + current list of QWP client libraries supported in the agent's runtime + language. +2. **Native client available** — the agent reads the file locally + (e.g., pyarrow / pandas / polars for Parquet; the language's built-in + CSV reader for CSV) and streams rows to QuestDB through the client. +3. **No native client for that runtime** — the agent can implement an + uploader directly against the + [QWP ingress wire spec](/docs/connect/wire-protocols/qwp-ingress-websocket/). The + protocol is fully documented for clean-room implementations and a + minimum-viable client (BOOLEAN, LONG, DOUBLE, TIMESTAMP, VARCHAR) is + on the order of a few hundred lines. See the next recipe for the two + patterns that matter for throughput. + +This works regardless of where QuestDB runs — Docker, cloud, +`demo.questdb.io`, remote VM — and gives the agent throughput, +multi-host failover, and store-and-forward for free. + +### Writing a fast QWP ingress uploader + +If the agent is implementing a QWP ingress client against the +[wire spec](/docs/connect/wire-protocols/qwp-ingress-websocket/) — because no native +client exists for its runtime, or as a bespoke one-off uploader — two +patterns make the difference between a slow client and a fast one. An LLM +left to its own devices tends to default to the slow shape because it +"looks correct" and the bottleneck only shows up under load. + +**Pipeline frames; don't wait for each ack.** QWP allows many frames in +flight per connection (up to the +[max in-flight batches](/docs/connect/wire-protocols/qwp-ingress-websocket/#protocol-limits) +limit, 128 by default). Acks arrive asynchronously on the same connection, +in send order, and the server-assigned `sequence` field correlates each +ack with its frame. A lock-step `send → await OK → send next` loop wastes +a round-trip time per batch and caps throughput at a small fraction of +what the link supports. Decouple the writer (which streams frames into +the WebSocket) from the reader (which drains OK frames and advances the +ack watermark), and let the writer keep pushing while the reader catches +up. The writer only needs to check **transport-level** backpressure — the +socket's send buffer fill, or a bounded queue between encoder and sender — +not application-level acks. + +**Encode column-major, not row-major.** QWP's wire format lays out all +values for column 0 first, then all values for column 1, and so on. Source +data from columnar formats (Parquet, Arrow, columnar DB exports) is +already in this shape; preserve it end-to-end. An encoder that +materialises an intermediate row-major buffer — pseudocode +`for row in rows: for col in cols: emit(row[col])` — pays for the +allocation, breaks CPU cache locality, and prevents the bulk memcpy / SIMD +path that fixed-width column buffers would otherwise allow. The right +shape is `for col in cols: bulkCopy(columnBuffers[col])` — one tight loop +per column, often a single bulk copy for fixed-width types. + +These two changes compound: a pipelined, column-major client is often +several-fold faster than a lock-step, row-major one — sometimes the +difference between "the client is the bottleneck" and "the link +saturates". + +## Next steps + +- **Quickstart**: [AI Coding Agents](/docs/getting-started/ai-coding-agents/) +- **Query interfaces**: [QWP egress (WebSocket)](/docs/connect/wire-protocols/qwp-egress-websocket/), + [REST API](/docs/connect/compatibility/rest-api/) +- **Ingest interfaces**: [Ingestion overview](/docs/connect/overview/), + [QWP ingress (WebSocket)](/docs/connect/wire-protocols/qwp-ingress-websocket/) +- **Operating safely**: [RBAC](/docs/security/rbac/) (Enterprise), + [TLS](/docs/security/tls/) diff --git a/documentation/cookbook/integrations/grafana/read-only-user.md b/documentation/cookbook/integrations/grafana/read-only-user.md index 064b64af0..154979df7 100644 --- a/documentation/cookbook/integrations/grafana/read-only-user.md +++ b/documentation/cookbook/integrations/grafana/read-only-user.md @@ -75,7 +75,7 @@ After enabling, you have two separate users: - Use for: Grafana dashboards, monitoring tools, analytics applications :::info Related Documentation -- [PostgreSQL wire protocol](/docs/query/pgwire/overview/) +- [PostgreSQL wire protocol](/docs/connect/compatibility/pgwire/overview/) - [QuestDB Enterprise RBAC](/docs/security/rbac/) - [Configuration reference](/docs/configuration/overview/) - [Grafana QuestDB data source](https://grafana.com/grafana/plugins/questdb-questdb-datasource/) diff --git a/documentation/cookbook/integrations/opcua-dense-format.md b/documentation/cookbook/integrations/opcua-dense-format.md index b5a0826bd..a91f97211 100644 --- a/documentation/cookbook/integrations/opcua-dense-format.md +++ b/documentation/cookbook/integrations/opcua-dense-format.md @@ -300,6 +300,6 @@ OPC-UA timestamps may have different precision than QuestDB expects. Ensure: :::info Related Documentation - [Telegraf OPC-UA plugin](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/opcua) - [Telegraf merge aggregator](https://github.com/influxdata/telegraf/tree/master/plugins/aggregators/merge) -- [QuestDB ILP reference](/docs/ingestion/ilp/overview/) -- [InfluxDB Line Protocol](/docs/ingestion/ilp/overview/) +- [QuestDB ILP reference](/docs/connect/compatibility/ilp/overview/) +- [InfluxDB Line Protocol](/docs/connect/compatibility/ilp/overview/) ::: diff --git a/documentation/cookbook/operations/check-transaction-applied.md b/documentation/cookbook/operations/check-transaction-applied.md index 75a6c29c1..51ecbe721 100644 --- a/documentation/cookbook/operations/check-transaction-applied.md +++ b/documentation/cookbook/operations/check-transaction-applied.md @@ -31,5 +31,5 @@ Another viable approach is to run `SELECT count(*) FROM my_table` and verify the :::info Related Documentation - [Write-Ahead Log concept](/docs/concepts/write-ahead-log/) - [Meta functions reference](/docs/query/functions/meta/) -- [InfluxDB Line Protocol overview](/docs/ingestion/ilp/overview/) +- [InfluxDB Line Protocol overview](/docs/connect/compatibility/ilp/overview/) ::: diff --git a/documentation/cookbook/operations/copy-data-between-instances.md b/documentation/cookbook/operations/copy-data-between-instances.md index f26f82be3..54244449c 100644 --- a/documentation/cookbook/operations/copy-data-between-instances.md +++ b/documentation/cookbook/operations/copy-data-between-instances.md @@ -30,10 +30,10 @@ This reads from the source instance using PostgreSQL wire protocol and writes to ## Alternative: Export endpoint -You can also use [the export endpoint](/docs/query/rest-api/#exp---export-data) to export data to CSV or other formats. +You can also use [the export endpoint](/docs/connect/compatibility/rest-api/#exp---export-data) to export data to CSV or other formats. :::info Related Documentation -- [ILP ingestion](/docs/ingestion/overview/) -- [PostgreSQL wire protocol](/docs/query/pgwire/overview/) -- [REST API export](/docs/query/rest-api/#exp---export-data) +- [ILP ingestion](/docs/connect/overview/) +- [PostgreSQL wire protocol](/docs/connect/compatibility/pgwire/overview/) +- [REST API export](/docs/connect/compatibility/rest-api/#exp---export-data) ::: diff --git a/documentation/cookbook/operations/csv-import-milliseconds.md b/documentation/cookbook/operations/csv-import-milliseconds.md index 330476439..96b5c1469 100644 --- a/documentation/cookbook/operations/csv-import-milliseconds.md +++ b/documentation/cookbook/operations/csv-import-milliseconds.md @@ -62,6 +62,6 @@ Read the CSV line-by-line and convert, then send via the ILP client. :::info Related Documentation - [CSV import](/docs/getting-started/web-console/import-csv/) -- [ILP ingestion](/docs/ingestion/overview/) +- [ILP ingestion](/docs/connect/overview/) - [read_parquet()](/docs/query/functions/parquet/) ::: diff --git a/documentation/cookbook/operations/docker-compose-config.md b/documentation/cookbook/operations/docker-compose-config.md index 96e77ee7b..434d2eb37 100644 --- a/documentation/cookbook/operations/docker-compose-config.md +++ b/documentation/cookbook/operations/docker-compose-config.md @@ -98,5 +98,5 @@ For a full list of available configuration parameters, see: :::info Related Documentation - [Server Configuration](/docs/configuration/overview/) - [Docker Deployment Guide](/docs/deployment/docker/) -- [PostgreSQL Wire Protocol](/docs/query/pgwire/overview/) +- [PostgreSQL Wire Protocol](/docs/connect/compatibility/pgwire/overview/) ::: diff --git a/documentation/cookbook/operations/store-questdb-metrics.md b/documentation/cookbook/operations/store-questdb-metrics.md index ffb596e5e..782df659e 100644 --- a/documentation/cookbook/operations/store-questdb-metrics.md +++ b/documentation/cookbook/operations/store-questdb-metrics.md @@ -52,6 +52,6 @@ A few things to note: :::info Related Documentation - [QuestDB metrics](/docs/operations/logging-metrics/) -- [ILP ingestion](/docs/ingestion/overview/) +- [ILP ingestion](/docs/connect/overview/) - [Telegraf documentation](https://docs.influxdata.com/telegraf/) ::: diff --git a/documentation/cookbook/operations/tls-pgbouncer.md b/documentation/cookbook/operations/tls-pgbouncer.md index 6db184d79..f36344d39 100644 --- a/documentation/cookbook/operations/tls-pgbouncer.md +++ b/documentation/cookbook/operations/tls-pgbouncer.md @@ -51,7 +51,7 @@ Traffic will be unencrypted between PgBouncer and QuestDB. This setup is only su :::info Related Documentation -- [PostgreSQL wire protocol](/docs/query/pgwire/overview/) +- [PostgreSQL wire protocol](/docs/connect/compatibility/pgwire/overview/) - [QuestDB security](/docs/security/tls/) - [PgBouncer documentation](https://www.pgbouncer.org/config.html) ::: diff --git a/documentation/cookbook/programmatic/cpp/missing-columns.md b/documentation/cookbook/programmatic/cpp/missing-columns.md index 2c31b5698..fe2991ac9 100644 --- a/documentation/cookbook/programmatic/cpp/missing-columns.md +++ b/documentation/cookbook/programmatic/cpp/missing-columns.md @@ -113,5 +113,5 @@ int main() :::info Related Documentation - [QuestDB C++ client documentation](https://github.com/questdb/c-questdb-client) -- [ILP reference](/docs/ingestion/ilp/overview/) +- [ILP reference](/docs/connect/compatibility/ilp/overview/) ::: diff --git a/documentation/cookbook/programmatic/php/inserting-ilp.md b/documentation/cookbook/programmatic/php/inserting-ilp.md index 2695b88cc..8acddc61f 100644 --- a/documentation/cookbook/programmatic/php/inserting-ilp.md +++ b/documentation/cookbook/programmatic/php/inserting-ilp.md @@ -47,7 +47,7 @@ The format consists of: - **Columns** (fields): Space-separated, then comma-separated key-value pairs for numerical or string data - **Timestamp** (optional): Nanosecond-precision timestamp; if omitted, QuestDB uses server time -For complete ILP specification, see the [ILP reference documentation](/docs/ingestion/ilp/overview/). +For complete ILP specification, see the [ILP reference documentation](/docs/connect/compatibility/ilp/overview/). ## ILP over HTTP @@ -351,7 +351,7 @@ TCP ILP does not provide acknowledgments for successful writes. If the connectio | **Recommended for** | Custom timestamps required | Ease of development, server timestamps acceptable | High-volume, loss-tolerant scenarios | :::info Related Documentation -- [ILP reference documentation](/docs/ingestion/ilp/overview/) -- [HTTP REST API](/docs/query/rest-api/) +- [ILP reference documentation](/docs/connect/compatibility/ilp/overview/) +- [HTTP REST API](/docs/connect/compatibility/rest-api/) - [Authentication and security](/docs/security/rbac/) ::: diff --git a/documentation/cookbook/programmatic/ruby/inserting-ilp.md b/documentation/cookbook/programmatic/ruby/inserting-ilp.md index 41ccd10c4..5fa3c473b 100644 --- a/documentation/cookbook/programmatic/ruby/inserting-ilp.md +++ b/documentation/cookbook/programmatic/ruby/inserting-ilp.md @@ -348,8 +348,8 @@ TCP ILP has no acknowledgments. If the connection drops, data may be lost silent ::: :::info Related Documentation -- [ILP reference](/docs/ingestion/ilp/overview/) -- [ILP over HTTP](/docs/ingestion/ilp/overview/#transport-selection) -- [ILP over TCP](/docs/ingestion/ilp/overview/#transport-selection) +- [ILP reference](/docs/connect/compatibility/ilp/overview/) +- [ILP over HTTP](/docs/connect/compatibility/ilp/overview/#transport-selection) +- [ILP over TCP](/docs/connect/compatibility/ilp/overview/#transport-selection) - [InfluxDB Ruby client](https://github.com/influxdata/influxdb-client-ruby) ::: diff --git a/documentation/cookbook/programmatic/tls-ca-configuration.md b/documentation/cookbook/programmatic/tls-ca-configuration.md index dc8847476..e836555a4 100644 --- a/documentation/cookbook/programmatic/tls-ca-configuration.md +++ b/documentation/cookbook/programmatic/tls-ca-configuration.md @@ -96,7 +96,7 @@ The examples are in Rust but the concepts are similar in other languages. Check :::info Related Documentation - [QuestDB Rust client](https://docs.rs/questdb/) -- [QuestDB Python client](/docs/ingestion/clients/python/) -- [QuestDB C++ client](/docs/ingestion/clients/c-and-cpp/) +- [QuestDB Python client](/docs/connect/clients/python/) +- [QuestDB C++ client](/docs/connect/clients/c-and-cpp/) - [QuestDB TLS configuration](/docs/security/tls/) ::: diff --git a/documentation/deployment/aws.md b/documentation/deployment/aws.md index c2e9ac0a8..b94f1e64a 100644 --- a/documentation/deployment/aws.md +++ b/documentation/deployment/aws.md @@ -190,7 +190,7 @@ pg.password=your_secure_password ``` **InfluxDB line protocol** - edit `conf/auth.json`. See -[ILP authentication](/docs/ingestion/ilp/overview/#authentication). +[ILP authentication](/docs/connect/compatibility/ilp/overview/#authentication). Restart after changes: diff --git a/documentation/deployment/azure.md b/documentation/deployment/azure.md index 6a2e05d31..8351978ea 100644 --- a/documentation/deployment/azure.md +++ b/documentation/deployment/azure.md @@ -262,7 +262,7 @@ pg.password=your_secure_password ``` **InfluxDB line protocol** - edit `conf/auth.json`. See -[ILP authentication](/docs/ingestion/ilp/overview/#authentication). +[ILP authentication](/docs/connect/compatibility/ilp/overview/#authentication). Restart after changes: diff --git a/documentation/deployment/digital-ocean.md b/documentation/deployment/digital-ocean.md index a5ff85d87..982179d75 100644 --- a/documentation/deployment/digital-ocean.md +++ b/documentation/deployment/digital-ocean.md @@ -99,7 +99,7 @@ pg.password=... ``` For details on authentication using InfluxDB line protocol, see the -[InfluxDB line protocol authentication guide](/docs/ingestion/ilp/overview/#authentication). +[InfluxDB line protocol authentication guide](/docs/connect/compatibility/ilp/overview/#authentication). ### Disabling authentication diff --git a/documentation/deployment/docker.md b/documentation/deployment/docker.md index 962f04526..57563e8af 100644 --- a/documentation/deployment/docker.md +++ b/documentation/deployment/docker.md @@ -53,16 +53,16 @@ Below each parameter is described in detail. This parameter will expose a port to the host. You can specify: -- `-p 9000:9000` - [REST API](/docs/query/rest-api/) and +- `-p 9000:9000` - [REST API](/docs/connect/compatibility/rest-api/) and [Web Console](/docs/getting-started/web-console/overview/) -- `-p 9009:9009` - [InfluxDB line protocol](/docs/ingestion/ilp/overview/) -- `-p 8812:8812` - [Postgres wire protocol](/docs/query/pgwire/overview/) +- `-p 9009:9009` - [InfluxDB line protocol](/docs/connect/compatibility/ilp/overview/) +- `-p 8812:8812` - [Postgres wire protocol](/docs/connect/compatibility/pgwire/overview/) - `-p 9003:9003` - [Min health server](/docs/operations/logging-metrics/#minimal-http-server) All ports are optional, you can pick only the ones you need. For example, it is enough to expose `8812` if you only plan to use -[Postgres wire protocol](/docs/query/pgwire/overview/). +[Postgres wire protocol](/docs/connect/compatibility/pgwire/overview/). ### `-v` parameter to mount storage @@ -161,11 +161,11 @@ the root cause. When QuestDB is running, you can start interacting with it: - Port `9000` is for REST. More info is available on the - [REST documentation page](/docs/query/rest-api/). + [REST documentation page](/docs/connect/compatibility/rest-api/). - Port `8812` is used for Postgres. Check our - [Postgres reference page](/docs/query/pgwire/overview/). + [Postgres reference page](/docs/connect/compatibility/pgwire/overview/). - Port `9009` is dedicated to InfluxDB Line Protocol. Consult our - [InfluxDB protocol page](/docs/ingestion/ilp/overview/). + [InfluxDB protocol page](/docs/connect/compatibility/ilp/overview/). ## Data persistence diff --git a/documentation/deployment/hetzner.md b/documentation/deployment/hetzner.md index a4b809cf9..6a5b4d35f 100644 --- a/documentation/deployment/hetzner.md +++ b/documentation/deployment/hetzner.md @@ -128,9 +128,9 @@ Replace `` with your actual public IP address. For production deploymen ::: **Default QuestDB Ports:** -- `9000`: [Web Console](/docs/getting-started/web-console/overview/) and [REST API](/docs/query/rest-api/) -- `8812`: [PostgreSQL wire protocol](/docs/query/pgwire/overview/) -- `9009`: [InfluxDB line protocol](/docs/ingestion/ilp/overview/) (TCP) +- `9000`: [Web Console](/docs/getting-started/web-console/overview/) and [REST API](/docs/connect/compatibility/rest-api/) +- `8812`: [PostgreSQL wire protocol](/docs/connect/compatibility/pgwire/overview/) +- `9009`: [InfluxDB line protocol](/docs/connect/compatibility/ilp/overview/) (TCP) - `9003`: [Health monitoring](/docs/operations/logging-metrics/#minimal-http-server) and Prometheus metrics Add firewall rules for additional ports as needed for your specific use case. @@ -239,9 +239,9 @@ renderText={(release) => ( /> **Port mappings explained:** -- `-p 9000:9000`: [Web Console](/docs/getting-started/web-console/overview/) and [REST API](/docs/query/rest-api/) -- `-p 9009:9009`: [InfluxDB line protocol](/docs/ingestion/ilp/overview/) (TCP) -- `-p 8812:8812`: [PostgreSQL wire protocol](/docs/query/pgwire/overview/) +- `-p 9000:9000`: [Web Console](/docs/getting-started/web-console/overview/) and [REST API](/docs/connect/compatibility/rest-api/) +- `-p 9009:9009`: [InfluxDB line protocol](/docs/connect/compatibility/ilp/overview/) (TCP) +- `-p 8812:8812`: [PostgreSQL wire protocol](/docs/connect/compatibility/pgwire/overview/) - `-p 9003:9003`: [Health monitoring](/docs/operations/logging-metrics/#minimal-http-server) and Prometheus metrics :::tip Port Selection @@ -570,7 +570,7 @@ questdb01$ psql -c "SELECT version();" Expected output should show QuestDB version information, confirming successful database connectivity. -For more details on QuestDB's PostgreSQL compatibility, see the [PostgreSQL wire protocol](/docs/query/pgwire/overview/) documentation. +For more details on QuestDB's PostgreSQL compatibility, see the [PostgreSQL wire protocol](/docs/connect/compatibility/pgwire/overview/) documentation. ### Manual Backup Test diff --git a/documentation/deployment/kubernetes.md b/documentation/deployment/kubernetes.md index 9a954bcf4..638294d64 100644 --- a/documentation/deployment/kubernetes.md +++ b/documentation/deployment/kubernetes.md @@ -70,10 +70,10 @@ kubectl port-forward my-questdb-0 9000 The following ports may also be used: -- 9000: [REST API](/docs/query/rest-api/) and +- 9000: [REST API](/docs/connect/compatibility/rest-api/) and [Web Console](/docs/getting-started/web-console/overview/) -- 8812: [Postgres](/docs/query/pgwire/overview/) -- 9009: [InfluxDB line protocol](/docs/ingestion/ilp/overview/) +- 8812: [Postgres](/docs/connect/compatibility/pgwire/overview/) +- 9009: [InfluxDB line protocol](/docs/connect/compatibility/ilp/overview/) ## Customizing the deployment diff --git a/documentation/getting-started/ai-coding-agents.mdx b/documentation/getting-started/ai-coding-agents.mdx index fbbb45447..c31744cd4 100644 --- a/documentation/getting-started/ai-coding-agents.mdx +++ b/documentation/getting-started/ai-coding-agents.mdx @@ -180,8 +180,8 @@ The agent will handle everything from installing prerequisites through reporting ## Next steps -- [REST API reference](/docs/query/rest-api/) - API documentation +- [REST API reference](/docs/connect/compatibility/rest-api/) - API documentation - [SQL overview](/docs/query/overview/) - QuestDB SQL syntax -- [Client libraries](/docs/ingestion/overview/) - Official client libraries +- [Client libraries](/docs/connect/overview/) - Official client libraries - [Sample datasets](https://github.com/questdb/sample-datasets) - Example data to try diff --git a/documentation/getting-started/capacity-planning.md b/documentation/getting-started/capacity-planning.md index 0257762a5..be4759d44 100644 --- a/documentation/getting-started/capacity-planning.md +++ b/documentation/getting-started/capacity-planning.md @@ -209,7 +209,7 @@ As of QuestDB 7.4.2, InfluxDB Line Protocol operates over HTTP instead of TCP. As such, ILP is optimal out-of-the box. -See your [ILP client](/docs/ingestion/overview/#first-party-clients) for +See your [ILP client](/docs/connect/overview/#client-libraries) for language-specific configurations. ### Postgres Wire Protocol diff --git a/documentation/getting-started/create-database.md b/documentation/getting-started/create-database.md index d148fc271..8d620784f 100644 --- a/documentation/getting-started/create-database.md +++ b/documentation/getting-started/create-database.md @@ -12,8 +12,8 @@ specific types. For most applications, you will import your data using methods like the InfluxDB Line Protocol, CSV imports, or integration with third-party tools such as -Telegraf, [Kafka](/docs/ingestion/message-brokers/kafka), or Prometheus. If your interest lies in data ingestion rather -than generation, refer to our [ingestion overview](/docs/ingestion/overview/). +Telegraf, [Kafka](/docs/connect/message-brokers/kafka), or Prometheus. If your interest lies in data ingestion rather +than generation, refer to our [ingestion overview](/docs/connect/overview/). Alternatively, the [QuestDB demo instance](https://demo.questdb.io) offers a practical way to explore data creation and manipulation without setting up your dataset. @@ -31,8 +31,8 @@ All commands are run through the [Web Console](/docs/getting-started/web-console `http://localhost:9000`. You can also run the same SQL via the -[Postgres endpoint](/docs/query/pgwire/overview/) or the -[REST API](/docs/query/rest-api/). +[Postgres endpoint](/docs/connect/compatibility/pgwire/overview/) or the +[REST API](/docs/connect/compatibility/rest-api/). If QuestDB is not running locally, checkout the [quick start](/docs/getting-started/quick-start/). diff --git a/documentation/getting-started/enterprise-quick-start.md b/documentation/getting-started/enterprise-quick-start.md index 1e1f64483..0ac0fe152 100644 --- a/documentation/getting-started/enterprise-quick-start.md +++ b/documentation/getting-started/enterprise-quick-start.md @@ -266,11 +266,11 @@ the server's certificate. For local testing with self-signed certificates, you c Connecting a client to ILP is a common path. -However, you may use something like [Kafka](/docs/ingestion/message-brokers/kafka). +However, you may use something like [Kafka](/docs/connect/message-brokers/kafka). For more on ILP ingestion, see: -- [ILP Overview](/docs/ingestion/ilp/overview/) — Protocol details and configuration -- [Ingestion Overview](/docs/ingestion/overview/) — Client libraries and ingestion methods +- [ILP Overview](/docs/connect/compatibility/ilp/overview/) — Protocol details and configuration +- [Ingestion Overview](/docs/connect/overview/) — Client libraries and ingestion methods ## 5. Ingest data, Kafka Connect (optional) @@ -344,7 +344,7 @@ client.conf.string=https::addr=localhost:9000;token=qt1KAsf1U9YbUVAX1H2IahXEE3-4 Once you deploy this configuration, the connector will start sending data from your Kafka topic to QuestDB. If you encounter any issues, check the logs for both your Kafka Connect worker and your QuestDB server for more details. -See the [QuestDB Kafka Connector documentation](/docs/ingestion/message-brokers/kafka/#questdb-kafka-connect-connector) for more details +See the [QuestDB Kafka Connector documentation](/docs/connect/message-brokers/kafka/#questdb-kafka-connect-connector) for more details on the configuration options and how to set up the connector. ## 6. Query data, PostgreSQL query @@ -404,7 +404,7 @@ This covers the very basics of user creation and service accounts. We have an `ingest` service account and a `dashboard` service account. For more on querying, see: -- [PostgreSQL Wire Protocol](/docs/query/pgwire/overview/) — Connection details and compatibility +- [PostgreSQL Wire Protocol](/docs/connect/compatibility/pgwire/overview/) — Connection details and compatibility - [Query & SQL Overview](/docs/query/overview/) — SQL syntax and functions > For the full role-based access control docs, including group management, see @@ -574,7 +574,7 @@ Enterprise. If you're new to QuestDB, consider checking out: -- [Ingestion overview](/docs/ingestion/overview/): Learn the various ingestion +- [Ingestion overview](/docs/connect/overview/): Learn the various ingestion methods and their benefits and tradeoffs, and pick a language client. - [Query & SQL overview](/docs/query/overview/): Learn how to query QuestDB. diff --git a/documentation/getting-started/quick-start.mdx b/documentation/getting-started/quick-start.mdx index 424d1a006..d40dda310 100644 --- a/documentation/getting-started/quick-start.mdx +++ b/documentation/getting-started/quick-start.mdx @@ -258,9 +258,9 @@ It works? You're ready to bring your data. | Port | Service | |------|---------| -| `9000` | [REST API](/docs/query/rest-api/) and [Web Console](/docs/getting-started/web-console/overview/) | -| `9009` | [InfluxDB Line Protocol (ILP)](/docs/ingestion/ilp/overview/) - Legacy TCP, use HTTP instead | -| `8812` | [PostgreSQL Wire Protocol](/docs/query/pgwire/overview/) | +| `9000` | [REST API](/docs/connect/compatibility/rest-api/) and [Web Console](/docs/getting-started/web-console/overview/) | +| `9009` | [InfluxDB Line Protocol (ILP)](/docs/connect/compatibility/ilp/overview/) - Legacy TCP, use HTTP instead | +| `8812` | [PostgreSQL Wire Protocol](/docs/connect/compatibility/pgwire/overview/) | | `9003` | [Health endpoint](/docs/operations/logging-metrics/#minimal-http-server) | ## Bring your data @@ -273,7 +273,7 @@ Choose from one of our premium ingest-only language clients: -_Want more options? See the [ingestion overview](/docs/ingestion/overview/)._ +_Want more options? See the [ingestion overview](/docs/connect/overview/)._ ### Create new data diff --git a/documentation/high-availability/client-failover/concepts.md b/documentation/high-availability/client-failover/concepts.md new file mode 100644 index 000000000..77e4d1319 --- /dev/null +++ b/documentation/high-availability/client-failover/concepts.md @@ -0,0 +1,243 @@ +--- +title: Client failover concepts +sidebar_label: Concepts +description: + How QuestDB clients detect a failed primary and transparently switch to a + healthy peer using multi-host addr lists, host-health classification, role + filtering, and zone-aware selection. +--- + +import { EnterpriseNote } from "@site/src/components/EnterpriseNote" + + + Client failover is most useful with QuestDB Enterprise + [primary-replica replication](/docs/high-availability/overview/). OSS users + with a single instance gain limited benefit from multi-host configuration. + + +When a QuestDB cluster fails over from one primary to another — whether through +a planned promotion, a rolling upgrade, or an unplanned outage — clients with a +single hard-coded address must be reconfigured and restarted. A failover-aware +client instead carries the full list of peers and walks that list automatically +when the current connection breaks. + +This page explains the model. The user-facing knobs and worked examples live in +the [Configuration](/docs/high-availability/client-failover/configuration/) +page. + +## What failover does + +You give the client a comma-separated list of endpoints: + +``` +addr=node-a:9000,node-b:9000,node-c:9000 +``` + +The client picks one, connects, and uses it until that connection breaks. When +it breaks, the client walks the rest of the list, classifies what it found at +each host, and either reconnects or surfaces a failure to your code. The exact +loop that drives this depends on whether you are ingesting (long-lived +background reconnect) or querying (per-request retry budget). Both loops share +the same primitives described here. + +## Host health model + +For every entry in `addr`, the client tracks two attributes: a **state** and a +**zone tier**. + +### State + +The state records what the client most recently observed when it tried that +host. + +| State | When the client moves a host here | +|---|---| +| `Healthy` | The last connect attempt succeeded. | +| `Unknown` | The host has not been tried in this round, or its classification was reset. | +| `TransientReject` | The server returned `421` with `X-QuestDB-Role: PRIMARY_CATCHUP` — it is a primary that is still catching up after promotion. Expected to recover. | +| `TransportError` | TCP/TLS handshake failed, an HTTP upgrade returned a transient error code, or an established connection broke mid-stream. | +| `TopologyReject` | The server returned `421` with any role other than `PRIMARY_CATCHUP` (`PRIMARY`, `REPLICA`, `STANDALONE`, or an unrecognised token), or — on egress — a successfully-upgraded host whose `SERVER_INFO` role does not satisfy the requested `target=` filter. The host will not become usable without a topology change. | + +A lower state in the table above is preferred when the client picks the next +host to try. + +### Zone tier + +Each host is also classified relative to the client's configured `zone=`: + +| Zone tier | Meaning | +|---|---| +| `Same` | Server's advertised zone matches the client's `zone=` (case-insensitive), or `zone=` is unset, or `target=primary`. | +| `Unknown` | Server has not advertised a zone yet. | +| `Other` | Server advertised a different zone. | + +Zone information is advertised by the server on a successful upgrade and +on `421` rejects. Once observed, the client remembers a host's zone tier for +the lifetime of that client — it persists across rounds and reconnects until +the host re-advertises a different zone. + +`target=primary` collapses every host's zone tier to `Same` — writers must +follow the primary regardless of geography. Ingress is currently zone-blind in +both storage modes, so the `zone=` key is silently accepted on ingress +connections and only takes effect on egress. + +### Selection priority + +When the client needs to pick the next host, it sorts by the tuple `(state, +zone_tier)` lexicographically — state first, zone second. So a known-good host +in another zone wins against an untried local host. Within a tied bucket, the +order in your `addr=` list is preserved verbatim. + +The client does **not** shuffle, randomise, or load-balance across peers. +Cluster-level load balancing is the responsibility of QuestDB's server-side +coordinators. If you need a different first-pick distribution across many +simultaneously-starting clients, rotate the connect string at deployment time. + +## Sticky-Healthy across rounds + +Once the client lands on a `Healthy` host, that host stays the priority pick on +the next round of failover — provided its zone tier is still `Same`. This +avoids unnecessary churn after a short blip: a momentary network glitch +doesn't promote a different node into the active slot just because it +happened to be probed first. + +`Healthy` hosts in another zone are reset to `Unknown` between outages rather +than kept sticky. Otherwise a once-healthy cross-zone host would lock the +client out of probing local hosts after they recover. + +## Role filter (`target=`) + +The `target=` key controls which server role the client is willing to bind to: + +| `target=` | STANDALONE | PRIMARY | REPLICA | PRIMARY_CATCHUP | +|---|---|---|---|---| +| `any` (default) | accept | accept | accept | accept (transient) | +| `primary` | accept | accept | reject (topology) | accept (transient) | +| `replica` | reject (topology) | reject (topology) | accept | reject (topology) | + +`PRIMARY_CATCHUP` is a primary that has been promoted but has not yet caught +up to its predecessor's WAL — the client treats it as transient and retries +the same host (with a fresh round, no exponential backoff) until it either +becomes a full `PRIMARY` or the outage budget expires. + +A `421 Misdirected Request` response **without** an `X-QuestDB-Role` header +is treated as a generic transport error, not a role reject — the client walks +to the next host but does not pin the rejecting host as topology-unreachable. + +`target=replica` is intended for read-side workloads that explicitly want to +spread query load across read-only peers (see the egress flow below). + +## Two failover contexts + +Failover applies to both directions of QWP traffic, but the two contexts have +very different goals. + +### Ingress (writes) + +The ingress reconnect loop sits inside the store-and-forward I/O thread. It +runs continuously in the background, retrying through outages while the +producer keeps appending to the local buffer. The defaults are tuned for +throughput-oriented workloads that can tolerate minutes of server unavailability: + +- Initial backoff: `100 ms` +- Maximum backoff: `5 s` +- Per-outage budget: `5 minutes` (`reconnect_max_duration_millis`) +- Jitter: **equal-jitter** `[base, 2·base)` — non-zero lower bound damps + reconnect storms when many producers share a cluster +- Inter-host pause within a round: **none** — the client walks the full + address list as fast as `auth_timeout_ms` allows, paying one backoff + sleep at round exhaustion + +See the [store-and-forward concepts](/docs/high-availability/store-and-forward/concepts/) +page for how the reconnect loop interacts with the disk-backed segment ring. +Replay across a primary failover delivers at-least-once — see +[Delivery semantics](/docs/concepts/delivery-semantics/) for the DEDUP +requirement on affected tables. + +### Egress (queries) + +The egress failover loop wraps each `execute()` call on the read-side query +client. It is interactive: a slow failover is worse than a clear error, so +the budget is short: + +- Initial backoff: `50 ms` +- Maximum backoff: `1 s` +- Total wall-clock budget: `30 s` (`failover_max_duration_ms`) +- Attempt cap: `8` (`failover_max_attempts`) +- Jitter: **full-jitter** `[0, base)` — a single-user query benefits from the + lowest expected recovery time, and one client per workload removes the + thundering-herd concern + +The egress loop also respects the `target=` role filter and prefers same-zone +hosts when `zone=` is set. + +## Error classification + +Every error the client encounters falls into one of three buckets, which drives +the loop's response: + +### Terminal — bypass failover + +The client surfaces the error to your code immediately. Retrying every host +will not help. + +| Condition | Why terminal | +|---|---| +| HTTP `401` / `403` on upgrade | Credentials are cluster-wide; retrying floods server logs without recovery. | +| Server-status reject (SF) | Application-layer reject; replay reproduces the same response. | + +### Topology — handled inside the round + +The host is demoted in the priority lattice; the client walks to the next host +within the same round. No exponential backoff is consumed. + +- `421` + `X-QuestDB-Role: PRIMARY_CATCHUP` → `TransientReject` +- `421` + any other non-empty role, including unrecognised tokens → `TopologyReject` +- `SERVER_INFO.Role` does not match the requested `target=` (egress only) + +If every host in a round role-rejects, ingress pays one fixed backoff sleep +(reset to `InitialBackoff`, no doubling) and starts a fresh round; egress +fails the current `execute()` call. + +### Transient — enter backoff + +Everything else: TCP/TLS errors, `auth_timeout_ms` expiry, mid-stream send or +receive failures, `404` / `426` / `503` on upgrade, version mismatches +(per-endpoint — a rolling upgrade in flight does not lock out compatible +peers), and generic frame-decode errors. The client records `TransportError` +and walks to the next host. + +When a round exhausts with transient errors, the client sleeps for the +backoff interval (clamped to the remaining outage budget) and starts the +next round. + +## Mid-stream demotion + +If a connection breaks mid-stream — for example, the receive pump throws after +a successful upgrade — the client marks the failed host as `TransportError` +**before** picking the next host. Without this ordering, the sticky-Healthy +rule would re-pick the same just-failed host as the priority candidate, and +the next attempt would target the broken node again. + +This invariant only matters when you are reading client source code or +debugging a custom implementation. As a user, you observe it as "failover +moves off a broken node on the very next attempt, with no exponential delay +when at least one peer is healthy." + +## Authentication is cluster-wide + +A `401` or `403` on the HTTP upgrade is terminal — the client does not retry +other hosts. The assumption is that auth credentials are configured +identically across the cluster, so a credential failure against one node is +a credential failure against all of them. Retrying would spam every peer's +audit log without recovering. + +If your deployment has per-host credentials, that is unsupported and outside +the failover model — split the workload into one connect string per credential. + +## Next steps + +- [Configuration](/docs/high-availability/client-failover/configuration/) — + the connect-string keys and worked examples for each context. +- [Store-and-forward concepts](/docs/high-availability/store-and-forward/concepts/) — + how the ingress failover loop interacts with the disk-backed substrate. diff --git a/documentation/high-availability/client-failover/configuration.md b/documentation/high-availability/client-failover/configuration.md new file mode 100644 index 000000000..064d8c53e --- /dev/null +++ b/documentation/high-availability/client-failover/configuration.md @@ -0,0 +1,158 @@ +--- +title: Client failover configuration +sidebar_label: Configuration +description: + Connect-string keys that configure multi-host failover for QuestDB clients, + including addr lists, zone preference, role filtering, and the ingress and + egress retry budgets. +--- + +This page is the configuration reference for client failover. For the model +behind these keys — host-health states, zone tiers, role filtering, and the +two retry loops — read [Concepts](/docs/high-availability/client-failover/concepts/) +first. + +## Common keys + +`addr` and `auth_timeout_ms` apply to every WS / WSS / HTTP / HTTPS client. +`zone` is accepted everywhere but only takes effect on egress; `target` is an +egress-only key and is rejected as an unknown key on an ingress connect string. +They are documented in full on the +[connect-string reference](/docs/connect/clients/connect-string#failover-keys); +the table below summarises the failover-relevant subset. + +| Key | Type | Default | Notes | +|---|---|---|---| +| `addr` | `host:port[,host:port…]` | required | Comma-separated peer list. The two syntactic forms (`addr=h1,h2` and repeated `addr=h1;addr=h2`) accumulate. Empty entries are rejected. | +| `zone` | string | unset | Client's zone identifier (opaque, case-insensitive — `eu-west-1a`, `dc-amsterdam`, etc.). Egress prefers same-zone peers when `target` is `any` or `replica`. Silently accepted but ignored on ingress. | +| `target` | `any` \| `primary` \| `replica` | `any` | **Egress only.** Which server role the query client accepts. Rejected as an unknown key on an ingress connect string. See [Role filter](/docs/high-availability/client-failover/concepts/#role-filter-target) for the role table. | +| `auth_timeout_ms` | int (ms) | `15000` | Upper bound on the HTTP-upgrade response read per host. Does **not** cover the TCP connect or TLS handshake — those use the OS default. Set lower if you have well-known network paths and want faster failover; set higher only if upgrade is genuinely slow. | + +`addr` syntax — both of these are equivalent and produce the same three-peer +list: + +``` +addr=node-a:9000,node-b:9000,node-c:9000 +addr=node-a:9000;addr=node-b:9000;addr=node-c:9000 +``` + +## Ingress (write) + +The ingress reconnect loop is driven by store-and-forward connect-string +keys. See +[Store-and-forward configuration](/docs/high-availability/store-and-forward/configuration/#reconnect-keys) +and the +[connect-string reference](/docs/connect/clients/connect-string#sf-keys) +for the full list. The failover-relevant keys are: + +| Key | Type | Default | Notes | +|---|---|---|---| +| `reconnect_max_duration_millis` | int (ms) | `300000` (5 min) | Per-outage wall-clock budget. Resets on every successful reconnect. Size this to span your largest expected failover window, but short enough to surface permanent topology issues. | +| `reconnect_initial_backoff_millis` | int (ms) | `100` | Starting backoff sleep at round exhaustion. Doubles up to `reconnect_max_backoff_millis`. | +| `reconnect_max_backoff_millis` | int (ms) | `5000` | Cap on the exponential backoff. With equal-jitter, the actual sleep lands in `[max, 2·max)` once the base saturates. | +| `initial_connect_retry` | `off` \| `on` \| `async` | `off` | Whether to apply the same retry loop to the very first connect attempt. See below. | + +### `initial_connect_retry` + +By default, the first connect failure is **terminal** — typically the first +attempt failing means a misconfiguration (wrong host, wrong port, no +network), and retrying for five minutes only hides it. + +| Value | Behaviour | +|---|---| +| `off` (default; alias `false`) | First-connect failure is terminal. The producer's call to build the sender throws immediately. | +| `on` (aliases `sync`, `true`) | First-connect failures enter the same reconnect loop as mid-stream failures. The constructor blocks until success or the per-outage budget expires. | +| `async` | The constructor returns immediately; the background I/O thread drives the reconnect loop. The producer experiences backpressure if it tries to publish before the connection comes up. Intended for unattended producers where the SF directory may already carry segments from a prior process and the server may come up later. | + +## Egress (query) + +The egress failover loop wraps each `execute()` call on the read-side query +client. The full key list lives on the +[connect-string reference](/docs/connect/clients/connect-string#egress-failover); +the user-visible knobs are: + +| Key | Type | Default | Notes | +|---|---|---|---| +| `failover` | `on` \| `off` | `on` | Global on/off. With `failover=off`, a single failed `execute()` call surfaces the underlying error without walking the address list. | +| `failover_max_attempts` | int | `8` | Hard cap on attempts within a single `execute()` call. | +| `failover_max_duration_ms` | int (ms) | `30000` | Wall-clock budget for failover eligibility. Bounds **when failover stops**, not the wall-clock of `execute()` itself — a final `WalkTracker` round can still cost up to `hostCount × auth_timeout_ms` after the budget expires. | +| `failover_backoff_initial_ms` | int (ms) | `50` | Starting backoff sleep. Doubles up to the cap. | +| `failover_backoff_max_ms` | int (ms) | `1000` | Cap on the exponential backoff. With full-jitter, the actual sleep lands in `[0, max)`. | + +## Worked examples + +### Three-node Enterprise cluster, default failover + +Most users need only the `addr` list — defaults cover the rest. + +```java +try (Sender sender = Sender.fromConfig( + "ws::addr=node-a:9000,node-b:9000,node-c:9000;sf_dir=/var/lib/qdb-sender;")) { + sender.table("events") + .symbol("source", "edge-42") + .longColumn("count", 1) + .atNow(); +} +``` + +The `ws::` scheme picks the QWP WebSocket transport. `sf_dir` enables the +disk-backed store-and-forward substrate, which keeps unacked data across +sender restarts; see +[Store-and-forward concepts](/docs/high-availability/store-and-forward/concepts/). + +### Zone-aware read replicas + +For read-only queries spread across same-zone replicas, with a primary as +final fallback: + +```java +try (QwpQueryClient client = QwpQueryClient.fromConfig( + "ws::addr=replica-eu-1a:9000,replica-eu-1b:9000,primary:9000;" + + "zone=eu-west-1a;target=any;")) { + client.connect(); + // handler is a QwpColumnBatchHandler that receives the result batches + client.execute("SELECT * FROM trades WHERE ts > now() - 1h", handler); +} +``` + +Setting `target=replica` would skip the primary entirely; `target=any` is +usually preferable so the query still completes after a replica outage. + +### Long-tolerated ingest with async first connect + +Useful for unattended ingest processes (edge sensors, ETL jobs) that may +restart before the server comes up: + +```java +try (Sender sender = Sender.fromConfig( + "ws::addr=primary:9000;sf_dir=/var/lib/qdb-sender;" + + "initial_connect_retry=async;" + + "reconnect_max_duration_millis=1800000;")) { + // appendBlocking() will absorb up to sf_max_total_bytes of writes + // while the I/O thread retries the initial connect. +} +``` + +The 30-minute reconnect budget gives a wide failover window; the `async` +initial-connect policy lets the producer thread proceed immediately. + +### Tight egress failover for an interactive dashboard + +```java +try (QwpQueryClient client = QwpQueryClient.fromConfig( + "ws::addr=node-a:9000,node-b:9000;" + + "failover_max_duration_ms=5000;failover_max_attempts=3;")) { + client.connect(); + // Surfaces an error within a few seconds if the cluster is unreachable. +} +``` + +## Where each key is documented + +| Key | Concept | Reference | +|---|---|---| +| `addr`, `zone`, `target`, `auth_timeout_ms` | Host selection, role filter | [connect-string #failover-keys](/docs/connect/clients/connect-string#failover-keys) | +| `reconnect_*`, `initial_connect_retry` | Ingress retry budget | [connect-string #reconnect-keys](/docs/connect/clients/connect-string#reconnect-keys) | +| `failover`, `failover_*` | Egress retry budget | [connect-string #egress-failover](/docs/connect/clients/connect-string#egress-failover) | +| `username` / `password` / `token` | Authentication | [connect-string #auth](/docs/connect/clients/connect-string#auth) | +| `tls_*` | TLS configuration | [connect-string #tls](/docs/connect/clients/connect-string#tls) | diff --git a/documentation/high-availability/overview.md b/documentation/high-availability/overview.md index c36f38bde..4d782e6a7 100644 --- a/documentation/high-availability/overview.md +++ b/documentation/high-availability/overview.md @@ -1,19 +1,35 @@ --- -title: Replication overview +title: High availability overview sidebar_label: Overview description: - Learn how QuestDB Enterprise replication works, its benefits, and architecture. + How QuestDB delivers high availability — server-side primary-replica + replication, plus client-side failover and store-and-forward. --- import { EnterpriseNote } from "@site/src/components/EnterpriseNote" - Replication provides high availability and disaster recovery for your QuestDB cluster. + Primary-replica replication is a QuestDB Enterprise feature. Client failover + and store-and-forward are available to all native clients. -QuestDB Enterprise provides **primary-replica replication** for high availability -and disaster recovery. Your data is automatically synced to replica instances -via an object store, with no direct network connections required between nodes. +QuestDB approaches high availability in two layers, and a resilient deployment +usually needs both: + +- **Server-side replication** keeps a hot copy of your data on one or more + replica nodes, so the cluster survives the loss of a node. This is a QuestDB + Enterprise feature, and it is the main subject of this page. +- **Client-side resilience** keeps your applications connected across the + failover that replication makes possible. + [Client failover](/docs/high-availability/client-failover/concepts/) lets a + client walk a list of hosts when its connection breaks, and + [store-and-forward](/docs/high-availability/store-and-forward/concepts/) + buffers unacknowledged data locally so a producer never loses writes during + the gap. + +Replication moves the data; the client-side features make sure your +applications follow it. The rest of this page covers replication — see the +**Client Failover** and **Store-and-Forward** sections for the client side. ## Why use replication? @@ -113,4 +129,10 @@ by you. ## Next steps -Ready to set up replication? Continue to the [Setup Guide](/docs/high-availability/setup/). +- [Setup Guide](/docs/high-availability/setup/) — configure object storage, the + primary, and replica nodes. +- [Client failover](/docs/high-availability/client-failover/concepts/) — + configure your applications to follow a primary promotion automatically. +- [Store-and-forward](/docs/high-availability/store-and-forward/concepts/) — + buffer unacknowledged writes on the client so a producer survives an outage + without data loss. diff --git a/documentation/high-availability/setup.md b/documentation/high-availability/setup.md index 7ec89a12b..41d3d43ec 100644 --- a/documentation/high-availability/setup.md +++ b/documentation/high-availability/setup.md @@ -254,6 +254,13 @@ Data committed to the primary but not yet replicated will be lost. Use planned migration if the primary is still functional. ::: +:::tip Keep clients connected across the switch +Promoting a replica only helps if your applications can find the new primary. +Configure clients with a multi-host address list so they fail over +automatically — see +[Client failover](/docs/high-availability/client-failover/concepts/). +::: + ### Point-in-time recovery Restore the database to a specific historical timestamp. @@ -273,3 +280,7 @@ Restore the database to a specific historical timestamp. - [Tuning guide](/docs/high-availability/tuning/) - Optimize replication performance +- [Client failover](/docs/high-availability/client-failover/concepts/) - + Configure your applications with a multi-host address list so they follow a + primary promotion automatically. Replication moves the data; client failover + keeps your clients connected to it. diff --git a/documentation/high-availability/store-and-forward/concepts.md b/documentation/high-availability/store-and-forward/concepts.md new file mode 100644 index 000000000..1345b205a --- /dev/null +++ b/documentation/high-availability/store-and-forward/concepts.md @@ -0,0 +1,294 @@ +--- +title: Store-and-forward concepts +sidebar_label: Concepts +description: + How the QuestDB store-and-forward client substrate decouples the producer + from the wire, masks network outages and server restarts, and replays + unacknowledged frames against a fresh connection. +--- + +Store-and-forward (SF) is the client-side substrate that sits between your +application code and the QWP wire transport. It absorbs publishes into a +local ring of fixed-size segments, drains them over a WebSocket connection +on a dedicated I/O thread, and replays any unacknowledged frames after a +disconnect or restart. + +The goal is **producer-never-blocks-on-the-wire**. Your call to `flush()` +returns as soon as data is published into the substrate. Acknowledgements +arrive asynchronously. A network outage or a server restart leaves your +producer code unaffected — the I/O thread quietly reconnects and replays +what remains. In SF mode, even a crash of the sender process itself loses +no unacked data: the next sender on the slot recovers it from disk and +replays it. + +## Two modes + +SF runs in either of two modes selected by the connect string: + +| Aspect | Memory mode | SF mode | +|---|---|---| +| Trigger | `sf_dir` is **unset** | `sf_dir` is set | +| Storage | malloc'd ring in process RAM | mmap'd files under `//` | +| Default capacity | `128 MiB` | `10 GiB` | +| Unacked data if the sender crashes | Lost | Recovered and replayed on restart | +| Unacked data if the sender's host reboots | Lost | Recovered, if the disk persists | +| Tolerates transient network blips | Yes | Yes | +| Tolerates multi-minute server outages | Bounded by RAM cap | Bounded by disk cap | +| Recovers another sender's stale slot | n/a | Opt-in via `drain_orphans=on` | + +Both modes share the same reconnect loop, the same backoff and retry +budgets, and the same on-the-wire behaviour. The only difference is +where unacked data lives. + +## What "frame" means here + +A **frame** is one encoded QWP message — typically a batch of rows for one +or more tables. The SF substrate treats frames as opaque payloads with two +properties: a length, and a CRC32C checksum. The append protocol writes the +payload first, the checksum last, and a partial write left behind by a +crash is detected and discarded by the recovery scanner on next start. + +Frames in SF mode are **self-sufficient**: every frame carries the full +schema for every table it touches and the full symbol-dictionary delta +from id 0. That makes a frame replayable against any server connection, +weeks or months later, even after a process restart that wiped all +in-memory schema state. The cost is a small per-batch overhead which is +accepted for correctness. + +## The FSN model + +Two distinct counters track frame identity: + +- **FSN** (frame-sequence-number) — a monotonic counter assigned when a + frame is appended to the substrate. FSN survives reconnects and (in SF + mode) restarts. It is the substrate's permanent identifier for a frame. +- **wireSeq** — the per-connection counter the server uses for + deduplication, reset to `0` on every successful WebSocket upgrade. + +On every (re)connect the relationship is pinned: + +``` +fsn = fsnAtZero + wireSeq +``` + +where `fsnAtZero` is `ackedFsn + 1` (i.e. the next un-acked FSN). The +client streams frames from disk to the wire in strict FSN order, one frame +per WebSocket binary message, incrementing `wireSeq`. The server echoes +back the same `wireSeq` in its OK frames, and the client maps that back to +the original FSN to advance the trim watermark. + +Two consequences: + +- Frames **must** be sent in strict order. The wire format does not + serialise `wireSeq` — the server assigns it implicitly from receive + order. Reordering breaks the FSN mapping. +- After a reconnect, the server sees the **same payloads** at new + `wireSeq` values. Server-side dedup keys off `messageSequence` inside + the payload, not `wireSeq`, so replay does not produce double-writes. + +## Trim: how unacked data is reclaimed + +The substrate holds frames until the server confirms it has received and +processed them. Each confirmation advances the **acked FSN**, which +allows the manager thread to unlink sealed segment files (in SF mode) or +release ring memory (in memory mode) up to that watermark. + +Two trim drivers exist: + +### Default — OK-driven trim + +Each successful batch produces an **OK frame** carrying the highest +`wireSeq` it acknowledges and the per-table `seqTxn` watermarks that +batch updated. On receipt: + +1. The substrate translates `wireSeq` back to FSN. +2. `ackedFsn` advances to the new value. +3. Any segment whose last FSN is `≤ ackedFsn` is unlinked and its bytes + returned to the available pool. + +This is the default and is sufficient when "data is in the server's WAL" +is the durability bar you need. + +### `request_durable_ack=on` — WAL-durable trim + +When the connect string sets `request_durable_ack=on`, trim is driven by +a separate frame: `STATUS_DURABLE_ACK`. These carry per-table watermarks +for data the server has **already uploaded from the WAL to the configured +object store** (S3, Azure Blob, GCS, or NFS). + +- OK frames still arrive on every batch, but they no longer advance the + trim watermark. Instead, they are stashed alongside their per-table + `seqTxn` values. +- A `STATUS_DURABLE_ACK` frame names tables and their durable `seqTxn` + watermarks. The client matches the head of the OK queue against these + watermarks; each fully-covered head entry pops, and `ackedFsn` + advances to the highest covered wireSeq. +- The client opt-in is mandatory — the connect fails loudly if the server + does not echo `X-QWP-Durable-Ack: enabled` on the upgrade response. + This avoids the silent failure mode where the producer waits forever + for ack frames that will never arrive. + +Durable-ack mode is the right choice when "data is in the object store" +is the durability bar, but it has two costs: a longer time-to-trim (so +larger steady-state disk usage in SF mode), and a small WebSocket PING +sent every `durable_ack_keepalive_interval_millis` to nudge the server's +flush path when the client is idle but has pending confirmations. + +See [When to use](/docs/high-availability/store-and-forward/when-to-use/) +for the decision. + +## Reconnect and replay + +When the wire connection breaks — for any reason — the I/O thread enters +the reconnect loop documented in +[Client failover concepts](/docs/high-availability/client-failover/concepts/). +The producer is **not notified**: it keeps publishing into the substrate, +bounded by `sf_max_total_bytes` (see backpressure below). + +On every successful (re)connect: + +1. `fsnAtZero = ackedFsn + 1`. +2. `wireSeq` resets to `0`. +3. The read cursor rewinds to the first un-acked frame on disk (or in + memory). +4. Frames stream to the wire in FSN order. The server's dedup window + absorbs any frames that landed before the disconnect. +5. New frames appended by the producer during replay are picked up + automatically — the I/O loop watches a volatile `publishedFsn` + cursor. + +Frames sent before the disconnect and re-sent after a reconnect count +in the `getTotalFramesReplayed` observability counter. + +## Backpressure + +The substrate enforces `sf_max_total_bytes` as a hard cap on resident +storage. When the cap is hit, the producer's `appendBlocking` call +busy-spins (with cooperative yield) up to `sf_append_deadline_millis` +waiting for ACK-driven trim to free space. If the deadline fires, the +call throws a typed exception. + +The exception message distinguishes the two scenarios: + +- **Backpressure while the wire is publishing** — the server is acking + but the producer is faster than the server can absorb. Solutions: + raise `sf_max_total_bytes`, slow the producer, or scale the server. +- **Backpressure while reconnecting** — the I/O loop is in the retry + loop and the substrate is filling. The message includes attempt count + and outage start time. Solutions: address the cluster outage, raise + `sf_max_total_bytes`, or accept that the producer will start throwing + once the cap is exhausted. + +## Close and shutdown + +`close()` waits up to `close_flush_timeout_millis` (default 5 s) for +`ackedFsn` to reach `publishedFsn` — i.e. for the server to acknowledge +everything the producer has handed in. If the wait succeeds, all data is +acked. If the timeout fires, a `WARN` is logged and: + +- in **SF mode**, the un-acked tail is left on disk and recovered by the + next sender on the same slot; +- in **memory mode**, the un-acked tail is lost. + +Setting `close_flush_timeout_millis=0` (or `-1`) skips the drain wait +entirely — useful for fast shutdown paths where you do not want to block. +Even in this branch, the slot lock is released and segments are unmapped +cleanly, and a non-blocking safety-net check rethrows any latched +terminal error that has not already been delivered through an async +handler or a synchronous producer call. + +## Crash recovery (SF mode) + +When the engine opens an SF-mode sender, it scans the slot directory: + +1. **Acquire the slot lock.** Two senders pointing at the same + `//` will collide here and the second one fails to + start, naming the holder's PID in the error message. +2. **Validate every segment file.** Headers are checked, frames are walked + forward verifying each CRC. The first invalid or torn frame becomes + the file's end-of-data; anything past it is discarded. +3. **Reconcile gaps.** Segments are sorted by their `baseSeq` and adjacent + pairs must satisfy `prev.baseSeq + prev.frameCount == curr.baseSeq`. + A gap is a fatal recovery error — the engine refuses to start. +4. **Seed the ack watermark.** Either from `.ack-watermark` (if your + client maintains it; see below) or from the lowest surviving FSN minus + one. +5. **Bump the connection generation** so the I/O loop, on first connect, + replays from disk against a fresh wireSeq window. + +After recovery the producer publishes new frames as normal; the I/O +thread replays the un-acked tail and then drains forward. + +### `.ack-watermark` + +An optional 16-byte file under the slot directory persists the cumulative +durable-ack FSN across process restarts. Without it, recovery seeds the +ack watermark from the lowest surviving segment's `baseSeq - 1` — which +guarantees no data loss, but cannot distinguish which frames inside that +lowest segment the previous sender had already received durable acks +for. Replay therefore re-sends every frame in that segment, producing +row-level duplicates against a still-alive server unless deduplication is +enabled on the target table. See +[Delivery semantics](/docs/concepts/delivery-semantics/) for the +at-least-once model and the DEDUP recipe. + +With `.ack-watermark`, recovery clamps the seed to the higher of the +on-disk and watermarked values, so already-durable-acked frames inside +the lowest surviving segment are not re-replayed. + +The file is **optional** — a conformant client may choose not to maintain +it. The reference client does. + +## Orphan adoption + +When the foreground sender's connect string sets `drain_orphans=on`, the +engine scans `/*` at startup for **sibling slot directories** — +other `sender_id`s under the same group root that contain unacked data +and are not marked `.failed`. For each one, up to +`max_background_drainers` at a time, a background drainer spawns, +acquires the orphan slot's lock (skipping if another process holds it), +opens a separate WebSocket connection, runs the same recovery + replay +flow, and exits when the orphan is fully drained. + +This is the rescue path for a sender that died without draining cleanly +— a process crash, an OOM kill, a host reboot. The replacement process picks +the orphan's slot lock and clears its disk footprint. Without +`drain_orphans=on` the dead sender's data persists on disk indefinitely +until an operator intervenes. + +The orphan flow is opt-in because in a multi-tenant deployment with +shared `sf_dir`, blindly draining unknown slots may be surprising. + +## Error frames + +Not every server response is an OK. Server errors fall into six +categories, each with a default policy: + +| Category | Default | Meaning | +|---|---|---| +| `SCHEMA_MISMATCH` | `DROP_AND_CONTINUE` | The batch's schema doesn't match the server. Replay won't help — the substrate logs and advances trim past the rejected span. | +| `WRITE_ERROR` | `DROP_AND_CONTINUE` | Per-batch write failure (e.g. table is not currently accepting writes). | +| `PARSE_ERROR` | `HALT` | Almost certainly a client bug. The substrate preserves on-disk frames for postmortem. | +| `INTERNAL_ERROR` | `HALT` | Catch-all server fault. | +| `SECURITY_ERROR` | `HALT` | Cluster-wide auth / authorization failure. | +| `PROTOCOL_VIOLATION` | `HALT` (forced) | Connection is gone after a terminal WebSocket close code; no choice. | + +Errors are also delivered to an **error inbox** — a bounded queue +consumed by a daemon dispatcher that invokes your registered handler. +Overflow drops the oldest entry rather than the newest (watermarks are +monotonic; the latest entry is the most informative). The default +handler logs every received error: silence is forbidden by the contract, +because a buggy or no-op handler would hide data loss +indistinguishably from a healthy connection. + +## Next steps + +- [When to use](/docs/high-availability/store-and-forward/when-to-use/) — + decision guide for memory vs SF mode, and when to opt into + durable-ack and orphan adoption. +- [Operating and tuning](/docs/high-availability/store-and-forward/operating-and-tuning/) — + slot directory layout, lock semantics, sizing, observability. +- [Configuration](/docs/high-availability/store-and-forward/configuration/) — + connect-string key reference. +- [Client failover concepts](/docs/high-availability/client-failover/concepts/) — + how the reconnect loop selects hosts and classifies errors. diff --git a/documentation/high-availability/store-and-forward/configuration.md b/documentation/high-availability/store-and-forward/configuration.md new file mode 100644 index 000000000..848ad448d --- /dev/null +++ b/documentation/high-availability/store-and-forward/configuration.md @@ -0,0 +1,198 @@ +--- +title: Store-and-forward configuration +sidebar_label: Configuration +description: + Connect-string keys that configure the QuestDB store-and-forward client + substrate — storage, reconnect, durable-ack, and error-handling. +--- + +This page is the configuration reference for the SF connect-string keys. +For the model behind each knob, read +[Concepts](/docs/high-availability/store-and-forward/concepts/); for +operational guidance read +[Operating and tuning](/docs/high-availability/store-and-forward/operating-and-tuning/). + +Shared keys (authentication, TLS, address list) are documented on the +[connect-string reference](/docs/connect/clients/connect-string). +The keys below are the SF-specific subset. + +## Storage keys + +These keys select between memory mode and SF mode and govern on-disk +layout. The single switch is `sf_dir`: unset → memory mode, set → SF +mode. + +| Key | Type | Default | Description | +|---|---|---|---| +| `sf_dir` | path | unset | Group root directory. When set, the slot lives at `//` and unacked data is durable across process restarts. When unset, the substrate runs in memory mode. | +| `sender_id` | string | `default` | Slot subdirectory name. Two senders sharing the same `sender_id` and `sf_dir` will collide on the slot lock. Must not contain path separators or be empty. | +| `sf_max_bytes` | size | `4M` | Per-segment file size; rotation threshold. | +| `sf_max_total_bytes` | size | `128M` (memory) / `10G` (SF) | Hard cap on resident SF storage. Triggers producer backpressure when full. | +| `sf_durability` | enum | `memory` | Reserved for future per-batch / per-frame fsync modes. Only `memory` is currently implemented; `flush` and `append` parse but are rejected at build time. | +| `sf_append_deadline_millis` | int (ms) | `30000` | How long a producer `appendBlocking` call waits for ACK-driven trim to free space before throwing. | +| `drain_orphans` | bool | `off` | Scan `/*` at startup and spawn drainers for sibling slots that contain unacked data. See [orphan adoption](/docs/high-availability/store-and-forward/concepts/#orphan-adoption). | +| `max_background_drainers` | int | `4` | Cap on concurrent orphan drainers. | + +Size values accept integer bytes or unit suffixes (`K`, `M`, `G`, `T`) +using binary multipliers. + +These keys are also documented on the central +[connect-string reference](/docs/connect/clients/connect-string#sf-keys). + +## Reconnect keys + +Govern the in-flight reconnect loop after the wire breaks. Backoff math +and host-walk semantics are documented in +[Client failover concepts](/docs/high-availability/client-failover/concepts/). + +| Key | Type | Default | Description | +|---|---|---|---| +| `reconnect_max_duration_millis` | int (ms) | `300000` (5 min) | Per-outage wall-clock budget. Resets on every successful reconnect. | +| `reconnect_initial_backoff_millis` | int (ms) | `100` | Initial backoff sleep at round exhaustion. | +| `reconnect_max_backoff_millis` | int (ms) | `5000` | Cap on the exponential backoff. With equal-jitter the actual sleep lands in `[max, 2·max)`. | +| `initial_connect_retry` | enum | `off` | `off` (alias `false`): first-connect failure is terminal. `on` (aliases `sync`, `true`): same retry loop as reconnect, blocking the constructor. `async`: same retry loop in the I/O thread, non-blocking. | +| `close_flush_timeout_millis` | int (ms) | `5000` | `close()` blocks up to this long waiting for `ackedFsn ≥ publishedFsn`. `0` or `-1` skips the drain wait. The safety-net `checkError()` still runs. | + +Cross-reference: +[connect-string #reconnect-keys](/docs/connect/clients/connect-string#reconnect-keys). + +## Durable-ack keys + +Opt in to object-store-durable trim. See +[Durable-ack: when to opt in](/docs/high-availability/store-and-forward/when-to-use/#durable-ack-when-to-opt-in). + +| Key | Type | Default | Description | +|---|---|---|---| +| `request_durable_ack` | bool | `off` | Opt-in via the upgrade header `X-QWP-Request-Durable-Ack: true`. Trim is then driven by `STATUS_DURABLE_ACK` frames only; OK frames no longer advance the trim watermark. Connect fails loudly if the server does not echo `X-QWP-Durable-Ack: enabled`. WebSocket transports only. | +| `durable_ack_keepalive_interval_millis` | int (ms) | `200` | Cadence of WebSocket PING the I/O loop sends while there are pending durable confirmations and the producer is idle. `0` or negative disables. | + +## Error-handling keys + +| Key | Type | Default | Description | +|---|---|---|---| +| `error_inbox_capacity` | int (≥16) | `256` | Bounded SPSC queue capacity for async error notifications. Overflow drops the oldest entry and increments `getDroppedErrorNotifications`. | +| `on_server_error`, `on_schema_error`, `on_parse_error`, `on_internal_error`, `on_security_error`, `on_write_error` | enum | per category | Override the default policy (`HALT` or `DROP_AND_CONTINUE`) for a category. Reserved in the spec but not yet recognised by the connect-string parser. | + +The per-category defaults are documented in +[Concepts § Error frames](/docs/high-availability/store-and-forward/concepts/#error-frames). +`PROTOCOL_VIOLATION` and `UNKNOWN` are forced `HALT` and not user-overridable. + +## Other relevant keys + +These keys are not SF-specific but affect SF behaviour. See the +[connect-string reference](/docs/connect/clients/connect-string) for the +canonical entries. + +| Key | Type | Default | Description | +|---|---|---|---| +| `addr` | `host[:port][,host[:port]…]` | required | Multi-host failover list. See [Client failover configuration](/docs/high-availability/client-failover/configuration/). | +| `username` / `password` | string | unset | HTTP Basic auth on the upgrade request. | +| `token` | string | unset | Bearer token on the upgrade request. | +| `tls_verify` | enum | `on` | `on` or `unsafe_off`. Applies to `wss::` / TLS connections. | +| `tls_roots` | path | system trust | Custom CA trust store. | +| `tls_roots_password` | string | unset | Trust store password. | +| `auto_flush` | bool | `on` | Global on/off for auto-flush triggers. | +| `auto_flush_rows` | int / `off` | `1000` | Row-count flush trigger. | +| `auto_flush_bytes` | int / `off` | `0` (off) | Byte-size flush trigger. | +| `auto_flush_interval` | int (ms) / `off` | `100` | Time-since-first-row flush trigger. | +| `init_buf_size` | size | `64K` | Initial encode buffer capacity. | +| `max_buf_size` | size | `100M` | Max encode buffer capacity. | +| `max_name_len` | int | `127` | Local validation cap for table / column names. | +| `max_schemas_per_connection` | int | `65535` | Per-connection schema-id ceiling. | + +## Validation + +The parser rejects: + +- Unknown keys (forward compatibility is via the spec, not silent + acceptance). +- `sf_durability` values other than `memory`, `flush`, `append`. `flush` + and `append` parse but are rejected at build time today. +- `sender_id` containing path separators or empty. +- `request_durable_ack=on` on non-WebSocket transports. + +## Worked examples + +### Single-node memory-mode producer + +```java +try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;")) { + sender.table("events") + .stringColumn("source", "edge-42") + .longColumn("count", 1) + .atNow(); +} +``` + +No `sf_dir`, so memory mode. The default `128 MiB` cap absorbs short +network blips. A sender crash loses the unacked tail. + +### Single-node durable producer + +```java +try (Sender sender = Sender.fromConfig( + "ws::addr=localhost:9000;sf_dir=/var/lib/qdb-sender;")) { + // ... +} +``` + +Same producer code; SF mode is enabled by the one extra key. Unacked +data persists at `/var/lib/qdb-sender/default/` across crashes. + +### Multi-host with object-store durability + +```java +try (Sender sender = Sender.fromConfig( + "wss::addr=node-a:9000,node-b:9000,node-c:9000;" + + "sf_dir=/var/lib/qdb-sender;sender_id=ingest-svc;" + + "request_durable_ack=on;" + + "username=ingest;password=…;")) { + // ... +} +``` + +`wss::` for TLS, three-host failover, durable-ack opt-in. Slot lives at +`/var/lib/qdb-sender/ingest-svc/`. The connect fails loudly if any peer +returns an upgrade without `X-QWP-Durable-Ack: enabled`. + +### Multi-tenant host with orphan rescue + +```java +try (Sender sender = Sender.fromConfig( + "ws::addr=node-a:9000;sf_dir=/var/lib/qdb-sender;" + + "sender_id=worker-" + workerInstanceId + ";" + + "drain_orphans=on;max_background_drainers=8;")) { + // ... +} +``` + +Each worker instance has a unique `sender_id`. When a worker crashes and +a new instance comes up under a different `sender_id`, the new +instance's foreground sender adopts the dead worker's slot in the +background and drains it. + +### Long-outage tolerance for unattended ingest + +```java +try (Sender sender = Sender.fromConfig( + "ws::addr=primary:9000;sf_dir=/var/lib/qdb-sender;" + + "sf_max_total_bytes=50G;" + + "reconnect_max_duration_millis=3600000;" + + "initial_connect_retry=async;")) { + // ... +} +``` + +50 GB of buffer space, a one-hour reconnect budget, async initial +connect so the constructor returns immediately even if the server is +down. Suitable for edge / IoT producers on unreliable links. + +## Where each key is documented + +| Group | Connect-string reference | +|---|---| +| Storage (`sf_dir`, `sender_id`, …) | [#sf-keys](/docs/connect/clients/connect-string#sf-keys) | +| Reconnect (`reconnect_*`, `initial_connect_retry`, `close_flush_timeout_millis`) | [#reconnect-keys](/docs/connect/clients/connect-string#reconnect-keys) | +| Failover (`addr`, `zone`, `target`, `auth_timeout_ms`) | [#failover-keys](/docs/connect/clients/connect-string#failover-keys) | +| Auth (`username`, `password`, `token`) | [#auth](/docs/connect/clients/connect-string#auth) | +| TLS (`tls_*`) | [#tls](/docs/connect/clients/connect-string#tls) | diff --git a/documentation/high-availability/store-and-forward/operating-and-tuning.md b/documentation/high-availability/store-and-forward/operating-and-tuning.md new file mode 100644 index 000000000..3bd064656 --- /dev/null +++ b/documentation/high-availability/store-and-forward/operating-and-tuning.md @@ -0,0 +1,302 @@ +--- +title: Operating and tuning store-and-forward +sidebar_label: Operating & tuning +description: + Operational guidance for QuestDB store-and-forward producers — slot + directory layout, locks, capacity sizing, recovery, backpressure, + observability, and orphan adoption. +--- + +This page is the operator-facing guide for SF in production: how to +provision the slot directory, what to watch, and how to tune the limits. +For the underlying model see +[Concepts](/docs/high-availability/store-and-forward/concepts/); for the +choice between memory mode and SF mode see +[When to use](/docs/high-availability/store-and-forward/when-to-use/). + +## Slot directory layout + +In SF mode every sender owns one **slot directory**: + +``` +// +├── .lock # advisory exclusive lock (kernel-released on process exit) +├── .lock.pid # UTF-8 text: holder PID + '\n' (diagnostic only) +├── .failed # optional drainer-failure sentinel (UTF-8 reason text) +├── .ack-watermark # optional 16-byte durable-ack high-water mark +├── sf-0000000000000001.sfa +├── sf-0000000000000002.sfa +└── ... +``` + +`` is the **group root** — the directory you point the connect +string at. `` is the slot subdirectory; it defaults to +`default` but should be set explicitly when more than one sender shares +the host. + +### `.lock` and `.lock.pid` + +The `.lock` file is held under an advisory exclusive lock for the engine's +lifetime — POSIX clients use `flock` / `fcntl`, Windows uses +`LockFileEx`. The lock is released automatically when the file descriptor +closes, including on hard process exit (kernel cleanup). + +A second sender pointing at the same slot directory will fail to start +with an error that names the holder's PID, read from `.lock.pid`. The +PID file is overwritten on every successful acquire; an absent or empty +`.lock.pid` reports `holder=unknown` rather than failing the lookup. + +Neither `.lock` nor `.lock.pid` is deleted on clean shutdown. Stale +files are harmless — the next acquirer silently overwrites them. + +**Cross-platform interop:** a POSIX client and a Windows client must +**not** share a slot on a network filesystem. Their lock primitives are +incompatible. + +### `.failed` + +Present iff a previous drainer attempt gave up on the slot — reconnect +budget exhausted, terminal auth failure, or irrecoverable corruption. +The file contents are a UTF-8 reason for human operators; the **presence** +is the signal that the orphan scanner uses to exclude the slot from +auto-drain on subsequent scans. + +**Operator action:** read the reason, fix the underlying cause (rotate +credentials, restore the missing peer, etc.), then delete `.failed`. The +next sender that scans `` will pick the slot up again. + +### Segment files + +Segments are named `sf-.sfa` where `` is a 16-character +zero-padded hexadecimal generation counter. The number reflects +allocation order, **not** the FSN range — that lives in the file header +and is read at recovery time. + +Pre-allocation reserves real disk blocks at file creation. On Linux this +is `posix_fallocate`; on macOS, `F_PREALLOCATE` / `F_ALLOCATEALL`. The +substrate refuses to fall back silently to `ftruncate` on filesystems +where these are unsupported — sparse files would risk a `SIGBUS` later +when the mmap'd region writes into a hole. On filesystems where the +native layer **must** fall back to `ftruncate`, size `sf_max_bytes` +conservatively against free space. + +## Lock collisions in practice + +Two `sender_id`s in the same `sf_dir` never collide — they are +independent slots. The same `sender_id` started twice **will** collide, +and the second start fails loudly. + +A common cause is a redeploy where the old process hasn't fully exited +when the new one comes up. Solutions: + +- Wait for the old process to release the lock (the kernel releases on + exit; `kill -9` is sufficient). +- Use a deployment unit that orders shutdown before startup. +- For containerised deployments, set `sender_id` from a per-pod stable + identity so two pods with the same template name don't collide. + +`drain_orphans=on` does **not** override the lock — a busy orphan slot +is skipped, not stolen. + +## Sizing capacity + +Two limits matter: + +### `sf_max_bytes` — per-segment file size (default `4 MiB`) + +This is the rotation threshold and the unit of trim. Segments that are +smaller release disk faster but waste more space on the active tail; +larger segments waste less on the active tail but hold acked frames in +the same file as the still-unacked tail until every frame in the segment +is acked. + +For most workloads `4 MiB` is fine. Raise it if you are appending very +large batches and pre-allocation cost matters; lower it if you observe +disk usage staying high under slow ack cadence. + +### `sf_max_total_bytes` — slot capacity (default `128 MiB` memory / `10 GiB` SF) + +This is the **hard cap** on resident SF storage — sealed segments plus +the active segment. When this fills, producer `appendBlocking` calls +block (with cooperative yield) for up to `sf_append_deadline_millis` +waiting for ACK-driven trim to free space; on timeout the call throws. + +Size this against your **worst expected outage** times your ingest +rate: + +``` +sf_max_total_bytes ≥ ingest_rate × max_tolerated_outage +``` + +A 5-minute reconnect budget at 10 MB/s of compressed frames implies at +least 3 GB. Add safety margin for trim latency — in particular, +`request_durable_ack=on` extends time-to-trim by the WAL→object-store +upload window. + +In memory mode the default `128 MiB` is deliberately small: it forces +you to think about backpressure rather than letting an outage silently +balloon process RSS. + +## Backpressure observability + +`appendBlocking` distinguishes two reasons it can stall: + +- **Wire-publishing backpressure.** The server is acking but the + producer is faster than ack throughput. The exception message names + this state. Solutions: scale the server, slow the producer, or raise + `sf_max_total_bytes`. +- **Reconnect backpressure.** The I/O loop is in the retry loop and the + substrate is filling. The exception message includes the attempt + count and outage start time. Solutions: address the cluster outage, + raise `sf_max_total_bytes`, or accept that the producer will start + throwing once the cap is exhausted. + +The `getTotalBackpressureStalls()` counter (see Observability below) +records every producer thread that hit the cap. + +## Recovery on restart + +When an SF-mode sender opens, it runs this sequence: + +1. Acquire `//.lock`. Fail loudly on contention. +2. Scan every `*.sfa` file: + - Validate magic, version, header. + - Walk frames forward verifying each CRC32C-Castagnoli. + - The first invalid frame becomes end-of-data; any non-zero bytes + past that point are logged as a torn-tail count. +3. Sort segments by `baseSeq` and verify no gaps. A gap is a fatal + recovery error. +4. Open `.ack-watermark` (if present) and read the cumulative + durable-ack FSN. Reject a watermark that exceeds the on-disk + ceiling — it would seed `ackedFsn` past every un-acked frame and + silently drop the un-acked tail. +5. Seed `ackedFsn = max(lowestBaseSeq - 1, watermark)`. +6. Allocate the next segment generation as `max(existing-gen) + 1`. +7. Bump the connection generation so the I/O loop replays from disk + against a fresh wireSeq window. + +A clean shutdown that drained everything is indistinguishable from a +fresh start: no segments, no replay. + +### Recovery failures + +| Symptom | Likely cause | Operator action | +|---|---|---| +| "Slot held by PID ``" | Two processes claiming the same `sender_id`. | Stop the duplicate. The lock releases on its exit. | +| "Gap between segments" | Corruption — a segment was deleted out of band. | Restore from backup or accept data loss; the substrate refuses to start. | +| "Watermark exceeds publishedFsn" | `.ack-watermark` is corrupt; the engine falls back to the no-watermark seed. | Logged as `WARN`. Replay will re-send the lowest segment's frames; rely on server deduplication. | +| Torn tail count > 0 | The previous process crashed mid-frame-write. | Informational; the CRC + zero-fill design discards the partial frame. | + +## Close and shutdown + +`close()` semantics depend on `close_flush_timeout_millis`: + +| Value | Behaviour | +|---|---| +| `5000` (default) | Block up to 5 s waiting for `ackedFsn ≥ publishedFsn`. Log `WARN` on timeout; un-acked tail stays on disk (SF) or is lost (memory). | +| `0` or `-1` | Skip the drain wait. Pending data persists on disk (SF) for the next sender, or is lost (memory). | +| any other positive value | That timeout in milliseconds. | + +In every branch `close()`: + +- Performs a non-blocking safety-net check that rethrows any latched + terminal error not already delivered through an async handler or a + synchronous producer call. +- Releases the slot lock and unmaps segment files. + +The safety-net check is what makes "close-and-forget" callers safe: if +the only API your code uses is `close()`, terminal errors still surface +rather than silently sinking into a no-op handler. + +## Orphan adoption in operations + +With `drain_orphans=on`, the foreground sender — after acquiring its own +lock — scans `/*` for siblings that: + +- are not its own `sender_id`, +- contain at least one `*.sfa` file, +- do not have a `.failed` sentinel. + +Up to `max_background_drainers` drainers run concurrently. Each drainer +opens its own engine and WebSocket connection, runs recovery + replay, +and exits when the orphan's `ackedFsn ≥ publishedFsn`. + +### Drainer failure modes + +- **Reconnect budget exhausted.** Drainer writes `.failed` with reason, + releases the lock, exits. +- **Auth-terminal upgrade error.** Same. +- **Irrecoverable corruption.** Same. + +`.failed` slots are excluded from auto-drain on subsequent scans — +operator action is required to clear the sentinel. + +### Observing drainers + +- `getActiveBackgroundDrainers()` — count of currently-running drainers + (best-effort: a just-finished drainer may still count for a few ms). +- `getTotalBackgroundDrainersSucceeded()` / `…Failed()` — cumulative + outcomes since process start. +- The `BackgroundDrainerListener` callback delivers per-drainer + events (progress watermark, durable-ack-mismatch escalation, terminal + outcome) for richer dashboards. +- On-disk `.failed` sentinels are the canonical record of giveup + events surviving sender restart. + +## Observability counters + +A conformant client exposes at minimum: + +| Counter | What it tells you | +|---|---| +| `getTotalReconnectAttempts()` | How often the wire has broken across the sender's lifetime. | +| `getTotalReconnectsSucceeded()` | How many of those recovered. | +| `getTotalFramesReplayed()` | Volume re-sent after reconnects. A spike usually means a fresh outage; sustained growth means a flapping wire. | +| `getTotalServerErrors()` | Count of error frames received (any category). | +| `getDroppedErrorNotifications()` | Error-inbox overflow count. Non-zero means a busy error stream or a slow handler. | +| `getTotalErrorNotificationsDelivered()` | Errors delivered to the user handler. | +| `getTotalBackpressureStalls()` | Producer threads that hit `sf_max_total_bytes`. | +| `getLastTerminalError()` | The latched `SenderError`, or null. | +| `getActiveBackgroundDrainers()` | Running orphan drainers right now. | +| `getTotalBackgroundDrainersSucceeded()` / `…Failed()` | Cumulative drainer outcomes. | + +### Suggested dashboards + +- **Reconnect health:** `reconnect_attempts - reconnect_succeeded` over + time. A non-zero difference for more than a few seconds means the + wire is currently down. Alert if it stays elevated past your + `reconnect_max_duration_millis`. +- **Replay volume:** `frames_replayed` rate. Bursts are expected; + sustained replay means a chronic instability. +- **Backpressure:** `backpressure_stalls` rate. Any non-zero rate is a + capacity signal. +- **Error rate by category:** instrument your error handler to bucket + by category. Background `SCHEMA_MISMATCH` is usually a schema-drift + symptom worth alerting on. + +The default error handler logs every received `SenderError` — +`ERROR`-level for HALT, `WARN`-level for DROP. Replace it only if you +are also routing the errors somewhere else (Sentry, structured logs): +silence is forbidden by the contract. + +## Multi-sender deployments + +When several senders share a host and a `sf_dir`: + +- Give each one a unique `sender_id`. The defaults `sender_id=default` + is fine for a single-sender host but collides for any second + sender. +- Consider `drain_orphans=on` if dynamic sender identities mean dead + instances can leave permanent orphans. +- Size `sf_max_total_bytes × number_of_senders` against available disk. +- Plan for the worst-case lock-collision recovery: a misconfigured + fleet that all share `sender_id=default` will leave only one sender + alive on each host. That is the design — fail loudly rather than + silently corrupt overlapping slots. + +## Next steps + +- [Configuration](/docs/high-availability/store-and-forward/configuration/) — + the full connect-string key reference. +- [Client failover concepts](/docs/high-availability/client-failover/concepts/) — + what the reconnect loop does between disconnects. diff --git a/documentation/high-availability/store-and-forward/when-to-use.md b/documentation/high-availability/store-and-forward/when-to-use.md new file mode 100644 index 000000000..5b4a953f7 --- /dev/null +++ b/documentation/high-availability/store-and-forward/when-to-use.md @@ -0,0 +1,213 @@ +--- +title: When to use store-and-forward +sidebar_label: When to use +description: + Decision guide for choosing between memory mode and disk-backed + store-and-forward, when to opt into durable-ack trim, and when to enable + orphan adoption. +--- + +The QWP WebSocket transport always uses a store-and-forward (SF) substrate. +What changes between deployments is **where** that substrate keeps unacked +data and **what durability bar** it acknowledges against. This page is the +decision guide. + +If you are new to SF, start with +[Concepts](/docs/high-availability/store-and-forward/concepts/). + +## Memory mode vs SF mode + +The single switch that decides this is whether you set `sf_dir` in the +connect string. + +### Memory mode — `sf_dir` unset + +Unacked frames live in a malloc'd ring in process memory. Default cap is +`128 MiB`. + +**Choose memory mode when:** + +- The producer process is short-lived or ephemeral (a CLI job, a CI + worker, a serverless function). +- A sender restart is acceptable as a fresh start — losing any in-flight + data when the sender stops is acceptable. +- You only need to tolerate **transient** network blips and short server + outages (think: rolling upgrades, brief network partitions). +- Your data volume comfortably fits in RAM during the longest outage you + care about. + +### SF mode — `sf_dir=/path/to/slot-root` + +Unacked frames are written to mmap'd files under +`//`. Default cap is `10 GiB`. + +**Choose SF mode when:** + +- The producer process is long-running and outage budgets are measured + in minutes (the default `reconnect_max_duration_millis` is 5 minutes + for a reason). +- In-flight data must not be lost when the sender stops or its host + reboots — crash, OOM kill, planned redeploy. +- You ingest at rates where minutes of buffering exceeds RAM you can + spare. +- You operate unattended at the edge (sensors, ETL jobs) where the + server may sometimes be unreachable for extended periods. + +Both modes share the same wire behaviour, the same failover loop, and +the same connect-string keys for everything other than storage. You can +switch between them without changing application code — only the connect +string. + +## Comparison at a glance + +| Aspect | Memory mode | SF mode | +|---|---|---| +| Buffered data location | Process RAM | Disk (`//`) | +| Default capacity | `128 MiB` | `10 GiB` | +| Unacked data after a sender crash (`kill -9`, OOM) | Lost | Recovered and replayed on restart | +| Unacked data after the sender's host reboots | Lost | Recovered, if the disk persists | +| Cross-sender rescue (orphan adoption) | n/a | Yes (opt-in) | +| Setup cost | Zero | Provisioning a writable directory | +| Operational cost | Zero | Sizing, monitoring, lock collisions | + +## Durable-ack: when to opt in + +By default the substrate trims unacked data on OK ack from the server. +That means the substrate releases a frame once the server has acknowledged +it into the WAL. The frame is durable on the **primary's** disk; whether +it has been replicated to the object store or to replicas is a separate +matter. + +When the connect string sets `request_durable_ack=on`, trim is held back +until a separate `STATUS_DURABLE_ACK` frame confirms the data has been +uploaded from the WAL to the **configured object store** (S3, Azure Blob, +GCS, or NFS). + +### Choose durable-ack when + +- You require object-store durability before considering a write + acknowledged — e.g. compliance requirements, end-to-end exactly-once + pipelines with cross-region recovery. +- Loss of an entire primary node (and its local disk) must not lose + in-flight data — replicas haven't downloaded the WAL yet, only the + object store has. +- You are willing to trade later trim (and so larger steady-state SF + disk usage) for the stronger guarantee. + +### Stay on the default OK trim when + +- WAL-local durability on the primary is sufficient. +- You want minimum steady-state disk usage. +- You are running OSS or a build that does not support durable-ack. + (The handshake fails loudly if you opt in but the server cannot + deliver — see below.) + +### Caveats + +- **Server support is required.** The client sends + `X-QWP-Request-Durable-Ack: true` on the upgrade. The server must echo + back `X-QWP-Durable-Ack: enabled`. If it does not — OSS build, + uninitialised primary, missing registry, hitting a replica — the + connect **fails loudly**, by design. Silently waiting for ack frames + that never arrive would let the SF disk fill up. +- **Idle keepalive.** The OSS server only flushes pending durable-ack + frames during inbound recv events. The client sends a WebSocket PING + every `durable_ack_keepalive_interval_millis` (default 200 ms) when + there are pending confirmations and the producer is idle. +- **Disk pressure.** Steady-state SF disk usage is roughly + `ingest_rate × time_to_object_store_durability`. Size + `sf_max_total_bytes` accordingly. + +## Orphan adoption: when to enable + +A sender that exits without draining its slot leaves unacked data on +disk. If another process restarts under the same `sender_id` and same +`sf_dir`, it picks up the orphan automatically as part of normal +recovery. But if no process ever uses that `sender_id` again, the data +sits on disk forever. + +Setting `drain_orphans=on` tells the **foreground sender** to scan +`/*` at startup for sibling `sender_id`s with unacked data and +spawn background drainers to clear them. + +### Enable orphan adoption when + +- You have a fleet of senders writing to a shared `sf_dir` (multi-tenant + host, container restart) and want any survivor to rescue dead + siblings' data. +- Your deployment can dynamically allocate `sender_id` (e.g. one per + process instance), so dead instances leave permanent orphans that no + natural restart will adopt. +- You prefer "automatic eventual delivery" over "operator manually + reattaches the slot." + +### Leave it off when + +- Each `sender_id` is statically pinned to a specific process — there + are no orphans by construction; a restart of the same process + recovers its own slot. +- You want explicit operator control over data movement in a shared + `sf_dir`. +- You run a single producer per host. + +Drainer concurrency is capped by `max_background_drainers` (default +`4`). Each drainer opens its own connection — they share the network +path but not the WebSocket. + +`drain_orphans=on` does not interfere with regular recovery: the +foreground sender still recovers its own `sender_id` first, then +drainers spawn for sibling slots. + +## Migrating from HTTP/TCP ILP + +If you are currently using HTTP or TCP ILP ingest, the comparison is: + +| Capability | HTTP ILP | TCP ILP | QWP WebSocket + SF | +|---|---|---|---| +| Non-blocking producer | No (request waits) | No (TCP backpressure) | Yes (buffer absorbs publishes) | +| No data loss on a sender crash | No | No | Yes (SF mode) | +| Server outage tolerance | Best-effort retry | None | Reconnect loop with multi-minute budget | +| Multi-host failover | Yes (HTTP only) | No | Yes | +| Cross-region durability ack | No | No | Yes (`request_durable_ack=on`) | +| Cluster-wide ordering | Best-effort | Best-effort | FSN-driven, server-deduplicated | + +The transition is application-transparent — `Sender.fromConfig` accepts +a `ws::` or `wss::` connect string and the public builder API is the +same. The most common migration is HTTP ILP → QWP WS+SF, with `sf_dir` +set, retaining HTTP for backward compatibility while the QWP path +becomes the primary. + +For specifically the multi-host HA path on HTTP ILP, see the existing +[ILP overview "Multiple URLs for High Availability"](/docs/connect/compatibility/ilp/overview/#multiple-urls-for-high-availability) +section. QWP failover (documented in +[Client failover concepts](/docs/high-availability/client-failover/concepts/)) +replaces and extends it. + +## Decision flowchart + +```mermaid +graph TD + Q1{Will the producer outlive any single outage you care about?} + Q2{A sender crash must not lose in-flight data?} + Q3{Is object-store durability required before ack?} + Q4{Multiple senders share sf_dir, with dynamic sender_id?} + + Q1 -->|"No (ephemeral job)"| Memory[Memory mode — leave sf_dir unset] + Q1 -->|"Yes (long-running service)"| Q2 + Q2 -->|No| Memory + Q2 -->|Yes| SF[SF mode — set sf_dir] + SF --> Q3 + Q3 -->|Yes| Durable[Add request_durable_ack=on] + Q3 -->|No| Q4 + Durable --> Q4 + Q4 -->|Yes| Orphans[Add drain_orphans=on] + Q4 -->|No| Done[Configuration complete] + Orphans --> Done +``` + +## Next steps + +- [Configuration](/docs/high-availability/store-and-forward/configuration/) — + the connect-string keys. +- [Operating and tuning](/docs/high-availability/store-and-forward/operating-and-tuning/) — + slot layout, sizing, observability. diff --git a/documentation/high-availability/tuning.md b/documentation/high-availability/tuning.md index 5a25a20a9..73b93a1a9 100644 --- a/documentation/high-availability/tuning.md +++ b/documentation/high-availability/tuning.md @@ -279,6 +279,7 @@ For example, a 2 MiB WAL segment becomes ~256 KiB in the transport layer. ## Next steps -- [Replication overview](/docs/high-availability/overview/) - How replication works +- [High availability overview](/docs/high-availability/overview/) - How replication works - [Setup guide](/docs/high-availability/setup/) - Configure replication +- [Client failover](/docs/high-availability/client-failover/concepts/) - Configure clients to follow a primary promotion - [Configuration reference](/docs/configuration/overview/) - All server settings diff --git a/documentation/ingestion/clients/c-and-cpp.md b/documentation/ingestion/clients/c-and-cpp.md index 359c576f2..f9bc44cfd 100644 --- a/documentation/ingestion/clients/c-and-cpp.md +++ b/documentation/ingestion/clients/c-and-cpp.md @@ -1,849 +1,2661 @@ --- -title: C & C++ Client Documentation -description: - "Dive into QuestDB using the C & C++ ingestion client for high-performance, - insert-only operations. Unlock peak time series data ingestion." -test: "foo" +slug: /connect/clients/c-and-cpp +title: C & C++ client for QuestDB +sidebar_label: C & C++ +description: "QuestDB C and C++ client for high-throughput ingestion and SQL query execution over the QWP binary protocol (WebSocket)." --- -import { ILPClientsTable } from "@theme/ILPClientsTable" +import Tabs from "@theme/Tabs" +import TabItem from "@theme/TabItem" +import SfDedupWarning from "../../partials/_sf-dedup-warning.partial.mdx" -QuestDB supports the C & C++ programming languages, providing a high-performance -ingestion client tailored for insert-only operations. This integration ensures -peak efficiency in time series data ingestion and analysis, perfectly suited for -systems which require top performance and minimal latency. +The QuestDB C and C++ client connects to QuestDB over the +[QWP — QuestDB Wire Protocol](/docs/connect/wire-protocols/qwp-ingress-websocket/) — a +columnar binary protocol carried over WebSocket. The library is implemented in +Rust and exposes both a C11 ABI and a C++17 header-only wrapper from a single +shared/static library. -Key features of the QuestDB C & C++ client include: +Two complementary APIs live in the same library: -- **Automatic table creation**: No need to define your schema upfront. -- **Concurrent schema changes**: Seamlessly handle multiple data streams with - on-the-fly schema modifications -- **Optimized batching**: Use strong defaults or curate the size of your batches -- **Health checks and feedback**: Ensure your system's integrity with built-in - health monitoring -- **Automatic write retries**: Reuse connections and retry after interruptions +- **Ingestion** (`line_sender_*` / `questdb::ingress::line_sender`): + column-oriented batched writes with automatic table creation, schema + evolution, multi-host failover, and optional store-and-forward durability. +- **Querying** (`line_reader_*` / `questdb::egress::reader`): + parameterised SQL over the QWP egress endpoint (`/read/v1`), with + streaming batch results, per-query failover, and credit-based flow + control. See [Querying and SQL execution](#querying-and-sql-execution). -### Requirements +:::tip Transports -- Requires a C/C++ compiler and standard libraries. -- Assumes QuestDB is running. If it's not, refer to - [the general quick start](/docs/getting-started/quick-start/). +QWP/WebSocket (`ws::` / `wss::`) is the current default ingest path and the +focus of this page. The same library also supports the legacy ILP transports +(`http::` / `https::` / `tcp::` / `tcps::`) and QWP over UDP for trusted +high-throughput networks. For ILP transport details, see the +[ILP overview](/docs/connect/compatibility/ilp/overview/). -### Client Installation - -You need to add the client as a dependency to your project. Depending on your -environment, you can do this in different ways. Please check the documentation -at the -[client's repository](https://github.com/questdb/c-questdb-client/blob/main/doc/DEPENDENCY.md). - -## C++ - -:::note +::: -This section is for the QuestDB C++ client. +:::info -For the QuestDB C Client, see the below seciton. +The ingestion and query clients are independent — each opens its own +WebSocket connection. You can also query QuestDB from C/C++ via the +[PGWire C++ client](/docs/connect/compatibility/pgwire/c-and-cpp/) or the +[REST API](/docs/connect/compatibility/rest-api/) when the QWP transport +is not available. ::: - - -Explore the full capabilities of the C++ client via the -[C++ README](https://github.com/questdb/c-questdb-client/blob/main/doc/CPP.md). +## Quick start -## Authentication +### Prerequisites -The QuestDB C++ client supports basic connection and authentication -configurations. +- A C11 or C++17 compiler (tested with GCC and Clang). +- CMake 3.15 or newer. +- Rust 1.61 or newer (only required when building the library from source). -Here is an example of how to configure and use the client for data ingestion: - -```c -#include +### Build the library -... +Clone and build from +[c-questdb-client](https://github.com/questdb/c-questdb-client): -auto sender = questdb::ingress::line_sender::from_conf( - "http::addr=localhost:9000;"); +```bash +git clone https://github.com/questdb/c-questdb-client.git +cd c-questdb-client +cmake -S . -B build -DCMAKE_BUILD_TYPE=Release +cmake --build build ``` -You can also pass the connection configuration via the `QDB_CLIENT_CONF` -environment variable: +The build produces both a static (`libquestdb_client.a`) and a shared +(`libquestdb_client.so` / `.dylib` / `.dll`) library. Headers live in +`include/questdb/ingress/`. The same build directory also contains +runnable example binaries (`line_sender_c_example*` for C, +`line_sender_cpp_example*` for C++) that are useful as reference workloads. -```bash -export QDB_CLIENT_CONF="http::addr=localhost:9000;username=admin;password=quest;" -``` +### Hello world -Then you use it like this: + + -```cpp -auto sender = questdb::ingress::line_sender::from_env(); -``` +```c +#include +#include +#include -When using QuestDB Enterprise, authentication can also be done via REST token. -Please check the [RBAC docs](/docs/security/rbac/#authentication) for more -info. +int main(void) { + line_sender_error* err = NULL; + line_sender* sender = NULL; + line_sender_buffer* buffer = NULL; -### Basic data insertion + const char* conf = "ws::addr=localhost:9000;"; + line_sender_utf8 conf_utf8; + if (!line_sender_utf8_init(&conf_utf8, strlen(conf), conf, &err)) goto on_error; -Basic insertion (no-auth): + sender = line_sender_from_conf(conf_utf8, &err); + if (!sender) goto on_error; -```cpp -// main.cpp -#include + buffer = line_sender_buffer_new_for_sender(sender); -int main() -{ - auto sender = questdb::ingress::line_sender::from_conf( - "http::addr=localhost:9000;"); + line_sender_table_name tbl = QDB_TABLE_NAME_LITERAL("trades"); + line_sender_column_name symbol_name = QDB_COLUMN_NAME_LITERAL("symbol"); + line_sender_column_name side_name = QDB_COLUMN_NAME_LITERAL("side"); + line_sender_column_name price_name = QDB_COLUMN_NAME_LITERAL("price"); + line_sender_column_name amount_name = QDB_COLUMN_NAME_LITERAL("amount"); + line_sender_utf8 symbol_val = QDB_UTF8_LITERAL("ETH-USD"); + line_sender_utf8 side_val = QDB_UTF8_LITERAL("sell"); - questdb::ingress::line_sender_buffer buffer = sender.new_buffer(); - buffer - .table("trades") - .symbol("symbol","ETH-USD") - .symbol("side","sell") - .column("price", 2615.54) - .column("amount", 0.00044) - .at(questdb::ingress::timestamp_nanos::now()); + if (!line_sender_buffer_table(buffer, tbl, &err)) goto on_error; + if (!line_sender_buffer_symbol(buffer, symbol_name, symbol_val, &err)) goto on_error; + if (!line_sender_buffer_symbol(buffer, side_name, side_val, &err)) goto on_error; + if (!line_sender_buffer_column_f64(buffer, price_name, 2615.54, &err)) goto on_error; + if (!line_sender_buffer_column_f64(buffer, amount_name, 0.00044, &err)) goto on_error; + if (!line_sender_buffer_at_nanos(buffer, line_sender_now_nanos(), &err)) goto on_error; - // To insert more records, call `buffer.table(..)...` again. + if (!line_sender_flush(sender, buffer, &err)) goto on_error; + if (!line_sender_qwpws_close_drain(sender, &err)) goto on_error; - sender.flush(buffer); + line_sender_buffer_free(buffer); + line_sender_close(sender); return 0; + +on_error:; + size_t err_len = 0; + const char* msg = line_sender_error_msg(err, &err_len); + fprintf(stderr, "error: %.*s\n", (int)err_len, msg); + line_sender_error_free(err); + if (buffer) line_sender_buffer_free(buffer); + if (sender) line_sender_close(sender); + return 1; } ``` -These are the main steps it takes: +:::caution `QDB_*_LITERAL` is for string literals only + +The `QDB_*_LITERAL` macros expand to `sizeof(literal) - 1`; passing a +`const char*` variable compiles but silently encodes the pointer size +(typically 7 bytes), not the string length. For runtime strings, use the +`_init` form — see how `conf` above is initialized with +`line_sender_utf8_init`. The matching `_table_name_init` and +`_column_name_init` exist as well. + +::: -- Use `questdb::ingress::line_sender::from_conf` to get the `sender` object -- Populate a `Buffer` with one or more rows of data -- Send the buffer using `sender.flush()`(`Sender::flush`) +Compile with: -In this case, we call `at()`, with the current timestamp. +```bash +gcc -std=c11 hello.c \ + -I /path/to/c-questdb-client/include \ + -L /path/to/c-questdb-client/build -lquestdb_client \ + -o hello +``` -Let's see now an example with explicit timestamps, custom timeout, basic auth, -and error control. + + ```cpp #include #include -#include -int main() -{ - try - { - // Create a sender using HTTP protocol - auto sender = questdb::ingress::line_sender::from_conf( - "http::addr=localhost:9000;username=admin;password=quest;retry_timeout=20000;"); - - // Get the current time as a timestamp - auto now = std::chrono::system_clock::now(); - auto duration = now.time_since_epoch(); - auto nanos = std::chrono::duration_cast(duration).count(); - - // Add rows to the buffer of the sender with the same timestamp - questdb::ingress::line_sender_buffer buffer = sender.new_buffer(); - buffer - .table("trades") - .symbol("symbol", "ETH-USD") - .symbol("side", "sell") - .column("price", 2615.54) - .column("amount", 0.00044) - .at(questdb::ingress::timestamp_nanos(nanos)); +namespace qdb = questdb::ingress; +using namespace questdb::ingress::literals; +int main() { + try { + auto sender = qdb::line_sender::from_conf( + "ws::addr=localhost:9000;"_utf8); + auto buffer = sender.new_buffer(); buffer - .table("trades") - .symbol("symbol", "BTC-USD") - .symbol("side", "sell") - .column("price", 39269.98) - .column("amount", 0.001) - .at(questdb::ingress::timestamp_nanos(nanos)); - - // Transactionality check - if (!buffer.transactional()) { - std::cerr << "Buffer is not transactional" << std::endl; - sender.close(); - return 1; - } - - // Flush and clear the buffer, sending the data to QuestDB + .table("trades"_tn) + .symbol("symbol"_cn, "ETH-USD"_utf8) + .symbol("side"_cn, "sell"_utf8) + .column("price"_cn, 2615.54) + .column("amount"_cn, 0.00044) + .at(qdb::timestamp_nanos::now()); sender.flush(buffer); - - // Close the connection after all rows ingested - sender.close(); + sender.close_drain(); return 0; - } - catch (const questdb::ingress::line_sender_error& err) - { - std::cerr << "Error running example: " << err.what() << std::endl; + } catch (const qdb::line_sender_error& e) { + std::cerr << "error: " << e.what() << "\n"; return 1; } } ``` -Now, both events use the same timestamp. We recommend using the event's -original timestamp when ingesting data into QuestDB. Using ingestion-time -timestamps precludes the ability to deduplicate rows, which is -[important for exactly-once processing](/docs/ingestion/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). +Compile with: -### Array Insertion +```bash +g++ -std=c++17 hello.cpp \ + -I /path/to/c-questdb-client/include \ + -L /path/to/c-questdb-client/build -lquestdb_client \ + -o hello +``` -QuestDB can accept N-dimensional arrays. For now these are limited to the -`double` element type. The easiest way is to insert an `std::array`, but the -database can also support `std::vector`, `std::span` (C++20) and additional -custom array types via a [customization point](https://github.com/questdb/c-questdb-client/blob/main/examples/line_sender_cpp_example_array_custom.cpp). + + -The customization point can be used to integrate your own (or third party) -n-dimensional array types by providing `shape` and, optionally if not row-major, -`strides`. +Linking against the shared `libquestdb_client.so` needs no extra libraries. +The static `libquestdb_client.a` additionally requires `-lpthread -ldl -lm` +(and TLS deps if the build was configured with rustls or OpenSSL). -Please refer to the [Concepts section on n-dimensional arrays](/docs/query/datatypes/array), -where this is explained in more detail. +If you linked against an in-tree build, the binary needs to find +`libquestdb_client.so` at runtime — either set +`LD_LIBRARY_PATH=/path/to/c-questdb-client/build` before running, or add +`-Wl,-rpath,/path/to/c-questdb-client/build` to the link line. -:::note -Arrays are supported from QuestDB version 9.0.0, and require updated -client libraries. -::: +For production builds, prefer CMake integration — see the [upstream dependency +guide](https://github.com/questdb/c-questdb-client/blob/main/doc/DEPENDENCY.md). -In this example, we insert some FX order book data. -* `bids` and `asks`: 2D arrays of L2 order book depth. Each level contains price and volume. -* `bids_exec_probs` and `asks_exec_probs`: 1D arrays of calculated execution probabilities for the next minute. +The four steps are: -```cpp -#include -#include -#include -#include +1. Build a sender from a connect string. +2. Append rows to a buffer. +3. Call `flush()` to publish. +4. Call `close_drain()` before destroying the sender so already-published + frames complete on the wire. -using namespace std::literals::string_view_literals; -using namespace questdb::ingress::literals; +C function names that include `qwpws` — for example +`line_sender_qwpws_close_drain` in step 4, or `line_sender_qwpws_poll_error` +in [Asynchronous error handling](#asynchronous-error-handling) — are +QWP/WebSocket-specific. Unprefixed functions (`line_sender_close`, +`line_sender_flush`, the buffer setters) work for any transport the library +supports. -struct tensor { - std::vector data; - std::vector shape; -}; +## Authentication and TLS -// Customization point for the QuestDB array API (discovered via ADL lookup) -inline auto to_array_view_state_impl(const tensor& t) -{ - return questdb::ingress::array::row_major_view{ - t.shape.size(), // rank - t.shape.data(), // shape - t.data.data(), t.data.size() // array data - }; -} +Authentication happens at the HTTP level during the WebSocket upgrade, before +any binary frames are exchanged. -int main() -{ - try - { - auto sender = questdb::ingress::line_sender::from_conf( - "http::addr=127.0.0.1:9000;"); +### HTTP basic auth - questdb::ingress::line_sender_buffer buffer = sender.new_buffer(); +```text +wss::addr=db.example.com:9000;username=admin;password=quest; +``` - buffer - .table("fx_order_book"_tn) - .symbol("symbol"_cn, "EUR/USD"_utf8) - .column("bids"_cn, tensor{ - { - 1.0850, 600000, - 1.0849, 300000, - 1.0848, 150000 - }, - {3, 2} - }) - .column("asks"_cn, tensor{ - { - 1.0853, 500000, - 1.0854, 250000, - 1.0855, 125000 - }, - {3, 2} - }) - .column("bids_exec_probs"_cn, std::array{ - 0.85, 0.50, 0.25}) - .column("asks_exec_probs"_cn, std::vector{ - 0.90, 0.55, 0.20}) - .at(questdb::ingress::timestamp_nanos::now()); + + - sender.flush(buffer); - return true; - } - catch (const questdb::ingress::line_sender_error& err) - { - std::cerr << "[ERROR] " << err.what() << std::endl; - return false; - } -} +```c +line_sender_utf8 conf = QDB_UTF8_LITERAL( + "wss::addr=db.example.com:9000;username=admin;password=quest;"); +line_sender_error* err = NULL; +line_sender* sender = line_sender_from_conf(conf, &err); ``` -If your type also supports strides, use the -`questdb::ingress::array::strided_view` instead. + + -:::note -The example above uses ILP/HTTP. If instead you're using ILP/TCP you'll need -to explicity opt into the newer protocol version 2 that supports sending arrays. +```cpp +auto sender = questdb::ingress::line_sender::from_conf( + "wss::addr=db.example.com:9000;username=admin;password=quest;"_utf8); +``` + + + +### Token auth (Enterprise, recommended) + +Token authentication has lower overhead than basic auth and is the +recommended path for Enterprise deployments. + +```text +wss::addr=db.example.com:9000;token=your_bearer_token; ``` -tcp::addr=127.0.0.1:9009;protocol_version=2; + +### TLS + +Use the `wss` schema for TLS. Select where root certificates come from with +`tls_ca`: + +| Key | Description | +|-----|-------------| +| `tls_ca=webpki_roots` | Use bundled webpki roots. | +| `tls_ca=os_roots` | Use the OS certificate store. | +| `tls_ca=webpki_and_os_roots` | Combine both. | +| `tls_roots=/path/to/root-ca.pem` | Load roots from a PEM file. Useful for self-signed certs during testing. | +| `tls_verify=unsafe_off` | Disable verification. Never use in production. Requires the library to be built with the `insecure-skip-verify` Cargo feature; the default builds reject this value. | + +Example with a custom CA file: + +```text +wss::addr=db.example.com:9000;tls_roots=/etc/ssl/qdb-ca.pem; ``` -Protocol Version 2 along with its support for arrays is available from QuestDB -version 9.0.0. -::: +### Authentication timeout - +For the full list of connect-string keys, see the +[connect string reference](/docs/connect/clients/connect-string/). -## C +### From an environment variable -:::note +Set `QDB_CLIENT_CONF` to keep credentials out of source code: + +```bash +export QDB_CLIENT_CONF="wss::addr=db.example.com:9000;username=admin;password=quest;" +``` -This section is for the QuestDB C client. + + + +```c +line_sender_error* err = NULL; +line_sender* sender = line_sender_from_env(&err); +``` -Skip to the bottom of this page for information relating to both the C and C++ -clients. + + -::: +```cpp +auto sender = questdb::ingress::line_sender::from_env(); +``` - + + -Explore the full capabilities of the C client via the -[C README](https://github.com/questdb/c-questdb-client/blob/main/doc/C.md). +### Using the options API -### Connection +For callers that prefer typed setters over a connect string, build the sender +through `line_sender_opts`: -The QuestDB C client supports basic connection and authentication -configurations. Here is an example of how to configure and use the client for -data ingestion: + + ```c -#include +line_sender_error* err = NULL; +line_sender_utf8 host = QDB_UTF8_LITERAL("localhost"); +line_sender_opts* opts = line_sender_opts_new( + line_sender_protocol_qwpws, host, 9000); +if (!line_sender_opts_qwpws_progress( + opts, LINE_SENDER_QWPWS_PROGRESS_BACKGROUND, &err)) + goto on_error; +line_sender* sender = line_sender_build(opts, &err); +line_sender_opts_free(opts); +``` -... + + -line_sender_utf8 conf = QDB_UTF8_LITERAL( - "http::addr=localhost:9000;"); +```cpp +namespace qdb = questdb::ingress; +using namespace questdb::ingress::literals; -line_sender_error *error = NULL; -line_sender *sender = line_sender_from_conf( - line_sender_utf8, &error); -if (!sender) { - /* ... handle error ... */ -} +qdb::opts options{qdb::protocol::qwpws, "localhost"_utf8, 9000}; +options.qwp_ws_progress(qdb::qwp_ws_progress::background) + .auth_timeout(15000); +qdb::line_sender sender{options}; ``` -You can also pass the connection configuration via the `QDB_CLIENT_CONF` -environment variable: + + -```bash -export QDB_CLIENT_CONF="http::addr=localhost:9000;username=admin;password=quest;" -``` +Most QWP/WebSocket settings are best configured through the connect string. The +options API mirrors the same keys with C/C++-typed setters; see the function +prototypes under `bool line_sender_opts_*` in `line_sender.h` for the full set. + +## Data ingestion + +### Concurrency + +`line_sender` is single-owner: only one thread may call publishing methods on a +given sender at a time. For concurrent producers, create one sender per +producer thread, or hand rows to a single owner over a queue. + +Buffers (`line_sender_buffer`) are also single-owner but are not tied to a +specific sender. Give each encoder thread its own buffer, fill it locally, +and hand the buffer to the sender thread (or call `flush()` if the same +thread owns both). This lets worker threads encode rows in parallel and +serialises only the publish step. + +When several sender instances share an `sf_dir`, give each a distinct +`sender_id`, slots are exclusive (see [Store-and-forward](#store-and-forward)). + +### General usage pattern -Then you use it like this: +1. Call `line_sender_buffer_table(buffer, name, &err)` (C++ `buffer.table(name)`) + to select a target table. +2. Call typed column setters to add values (see + [Column setters](#column-setters) below). +3. Finalize the row with `line_sender_buffer_at_nanos`, + `line_sender_buffer_at_micros`, or `line_sender_buffer_at_now` (C++ + `buffer.at(...)` / `buffer.at_now()`). +4. Repeat from step 1, or call `line_sender_flush(sender, buffer, &err)` (C++ + `sender.flush(buffer)`) to publish. + +Tables and columns are created automatically if they do not exist. + +A typical ingest loop reuses both the sender and the buffer; a successful +`line_sender_flush` clears the buffer (a failed flush retains the rows so +you can retry): ```c -#include -... -line_sender *sender = line_sender_from_env(&error); +while (running) { + if (!line_sender_buffer_table(buffer, tbl, &err)) break; + if (!line_sender_buffer_symbol(buffer, sensor_name, sensor_val, &err)) break; + if (!line_sender_buffer_column_f64(buffer, temp_name, read_temp(), &err)) break; + if (!line_sender_buffer_at_nanos(buffer, line_sender_now_nanos(), &err)) break; + if (!line_sender_flush(sender, buffer, &err)) break; + sleep_one_second(); +} ``` -### Basic data insertion +### Column setters + +QWP buffers accept every QuestDB column type. The C ABI exposes each type as a +separate function; the C++ wrapper overloads `column()` for primitive scalars +and provides dedicated methods for everything else. + +| QuestDB type | C function | C++ method | +|---|---|---| +| `SYMBOL` | `line_sender_buffer_symbol` | `buffer.symbol(name, value)` | +| `BOOLEAN` | `line_sender_buffer_column_bool` | `buffer.column(name, bool)` | +| `BYTE` (i8) | `line_sender_buffer_column_i8` | `buffer.column_i8(name, value)` | +| `SHORT` (i16) | `line_sender_buffer_column_i16` | `buffer.column_i16(name, value)` | +| `INT` (i32) | `line_sender_buffer_column_i32` | `buffer.column_i32(name, value)` | +| `LONG` (i64) | `line_sender_buffer_column_i64` | `buffer.column(name, int64_t)` | +| `FLOAT` (f32) | `line_sender_buffer_column_f32` | `buffer.column_f32(name, value)` | +| `DOUBLE` (f64) | `line_sender_buffer_column_f64` | `buffer.column(name, double)` | +| `CHAR` | `line_sender_buffer_column_char` | `buffer.column_char(name, code_unit)` | +| `VARCHAR` | `line_sender_buffer_column_str` | `buffer.column(name, std::string_view)` | +| `BINARY` | `line_sender_buffer_column_binary` | `buffer.column_binary(name, bytes, len)` | +| `UUID` | `line_sender_buffer_column_uuid` | `buffer.column_uuid(name, lo, hi)` | +| `LONG256` | `line_sender_buffer_column_long256` | `buffer.column_long256(name, bytes)` | +| `IPv4` | `line_sender_buffer_column_ipv4` | `buffer.column_ipv4(name, value)` | +| `DATE` | `line_sender_buffer_column_date` | `buffer.column_date(name, millis)` | +| `TIMESTAMP` (non-designated) | `line_sender_buffer_column_ts_micros` | `buffer.column(name, timestamp_micros)` | +| `TIMESTAMP_NS` (non-designated) | `line_sender_buffer_column_ts_nanos` | `buffer.column(name, timestamp_nanos)` | +| `GEOHASH` | `line_sender_buffer_column_geohash` | `buffer.column_geohash(name, bits, precision)` | +| `DECIMAL` (string form) | `line_sender_buffer_column_dec_str` | `buffer.column(name, decimal_str_view)` | +| `DECIMAL` (binary, generic) | `line_sender_buffer_column_dec` | `buffer.column(name, decimal_view)` | +| `DECIMAL64` | `line_sender_buffer_column_dec64` / `_dec64_str` | `buffer.column_dec64(name, ...)` | +| `DECIMAL128` | `line_sender_buffer_column_dec128` / `_dec128_str` | `buffer.column_dec128(name, ...)` | +| `DOUBLE[]` (arrays) | `line_sender_buffer_column_f64_arr_c_major` / `_byte_strides` / `_elem_strides` | `buffer.column(name, array_view)` | + +### Null values + +The C ABI does not expose `_opt` variants for typed nulls (unlike the Rust +client). To write a null for a column on a given row, **omit the setter for +that column** — there is no explicit "set null" call. + + + ```c -// line_sender_trades_example.c -#include -#include -#include +// `amount` is omitted on this row, so it is stored as NULL. +if (!line_sender_buffer_table(buffer, tbl, &err)) goto on_error; +if (!line_sender_buffer_symbol(buffer, symbol_name, symbol_val, &err)) goto on_error; +if (!line_sender_buffer_column_f64(buffer, price_name, 2615.54, &err)) goto on_error; +if (!line_sender_buffer_at_nanos(buffer, line_sender_now_nanos(), &err)) goto on_error; +``` -int main() { - // Initialize line sender - line_sender_error *error = NULL; - line_sender *sender = line_sender_from_conf( - QDB_UTF8_LITERAL("http::addr=localhost:9000;username=admin;password=quest;"), &error); - - if (error != NULL) { - size_t len; - const char *msg = line_sender_error_msg(error, &len); - fprintf(stderr, "Failed to create line sender: %.*s\n", (int)len, msg); - line_sender_error_free(error); - return 1; - } + + - // Print success message - printf("Line sender created successfully\n"); +```cpp +// `amount` is omitted on this row, so it is stored as NULL. +buffer + .table("trades"_tn) + .symbol("symbol"_cn, "ETH-USD"_utf8) + .column("price"_cn, 2615.54) + .at(qdb::timestamp_nanos::now()); +``` - // Initialize line sender buffer - line_sender_buffer *buffer = line_sender_buffer_new(); - if (buffer == NULL) { - fprintf(stderr, "Failed to create line sender buffer\n"); - line_sender_close(sender); - return 1; - } + + - // Add data to buffer for ETH-USD trade - if (!line_sender_buffer_table(buffer, - QDB_TABLE_NAME_LITERAL("trades"), &error)) - goto error; - if (!line_sender_buffer_symbol(buffer, - QDB_COLUMN_NAME_LITERAL("symbol"), QDB_UTF8_LITERAL("ETH-USD"), &error)) - goto error; - if (!line_sender_buffer_symbol(buffer, - QDB_COLUMN_NAME_LITERAL("side"), QDB_UTF8_LITERAL("sell"), &error)) - goto error; - if (!line_sender_buffer_column_f64(buffer, - QDB_COLUMN_NAME_LITERAL("price"), 2615.54, &error)) - goto error; - if (!line_sender_buffer_column_f64(buffer, - QDB_COLUMN_NAME_LITERAL("amount"), 0.00044, &error)) goto error; - if (!line_sender_buffer_at_nanos(buffer, line_sender_now_nanos(), &err)) - goto on_error; +The designated timestamp cannot be null — every row requires one of +`at_nanos`, `at_micros`, or `at_now`. - // Flush the buffer to QuestDB - if (!line_sender_flush(sender, buffer, &error)) { - size_t len; - const char *msg = line_sender_error_msg(error, &len); - fprintf(stderr, "Failed to flush data: %.*s\n", (int)len, msg); - line_sender_error_free(error); - line_sender_buffer_free(buffer); - line_sender_close(sender); - return 1; - } +On a brand-new table, an omitted column is not inferred from that row. +The server only adds the column when a later row supplies a non-null +value for it, so first-row nulls leave the column absent until then. - // Print success message - printf("Data flushed successfully\n"); +### Ingest arrays - // Free resources - line_sender_buffer_free(buffer); - line_sender_close(sender); +The client encodes one-dimensional and multidimensional `DOUBLE[]` (and +`LONG[]`, when the server accepts it) columns. Three layouts are supported: - return 0; +| Layout | Function (f64) | When to use | +|---|---|---| +| Row-major (C-major) | `line_sender_buffer_column_f64_arr_c_major` | Contiguous, row-major (C-style) buffers. | +| Byte strides | `line_sender_buffer_column_f64_arr_byte_strides` | Non-contiguous data with strides expressed in bytes. | +| Element strides | `line_sender_buffer_column_f64_arr_elem_strides` | Non-contiguous data with strides expressed in elements. | -error: - { - size_t len; - const char *msg = line_sender_error_msg(error, &len); - fprintf(stderr, "Error: %.*s\n", (int)len, msg); - line_sender_error_free(error); - line_sender_buffer_free(buffer); - line_sender_close(sender); - return 1; - } -} +```c +// Row-major 3x2 array of f64. +uintptr_t shape[2] = {3, 2}; +double values[6] = {1.0850, 600000.0, 1.0849, 300000.0, 1.0848, 150000.0}; +if (!line_sender_buffer_column_f64_arr_c_major( + buffer, + QDB_COLUMN_NAME_LITERAL("bids"), + 2, // rank + shape, // shape (length == rank) + values, // data (typed pointer) + sizeof(values) / sizeof(values[0]), // element count + &err)) + goto on_error; ``` -In this case, we call `line_sender_buffer_at_nanos()` and pass the current -timestamp. The value returned by `line_sender_now_nanos()` is nanoseconds -from unix epoch (UTC). +Array ingestion requires QuestDB 9.0.0 or later. -Let's see now an example with timestamps, custom timeout, basic auth, error -control, and transactional awareness. +### Decimal columns -```c -// line_sender_trades_example.c -#include -#include -#include -#include +:::caution -int main() { - // Initialize line sender - line_sender_error *error = NULL; - line_sender *sender = line_sender_from_conf( - QDB_UTF8_LITERAL( - "http::addr=localhost:9000;username=admin;password=quest;retry_timeout=20000;" - ), &error); - - if (error != NULL) { - size_t len; - const char *msg = line_sender_error_msg(error, &len); - fprintf(stderr, "Failed to create line sender: %.*s\n", (int)len, msg); - line_sender_error_free(error); - return 1; - } +Decimal ingestion requires QuestDB 9.2.0 or later. Pre-create decimal columns +with `DECIMAL(precision, scale)` so the server enforces the expected precision. +See the +[decimal data type](/docs/query/datatypes/decimal/#creating-tables-with-decimals) +page. - // Print success message - printf("Line sender created successfully\n"); +::: - // Initialize line sender buffer - line_sender_buffer *buffer = line_sender_buffer_new(); - if (buffer == NULL) { - fprintf(stderr, "Failed to create line sender buffer\n"); - line_sender_close(sender); - return 1; - } +The simplest path is the string form: - // Get current time in nanoseconds - int64_t nanos = line_sender_now_nanos(); - - // Add data to buffer for ETH-USD trade - if (!line_sender_buffer_table(buffer, - QDB_TABLE_NAME_LITERAL("trades"), &error)) - goto error; - if (!line_sender_buffer_symbol(buffer, - QDB_COLUMN_NAME_LITERAL("symbol"), QDB_UTF8_LITERAL("ETH-USD"), &error)) - goto error; - if (!line_sender_buffer_symbol(buffer, - QDB_COLUMN_NAME_LITERAL("side"), QDB_UTF8_LITERAL("sell"), &error)) - goto error; - if (!line_sender_buffer_column_f64(buffer, - QDB_COLUMN_NAME_LITERAL("price"), 2615.54, &error)) - goto error; - if (!line_sender_buffer_column_f64(buffer, - QDB_COLUMN_NAME_LITERAL("amount"), 0.00044, &error)) - goto error; - if (!line_sender_buffer_at_nanos(buffer, nanos, &error)) - goto error; - - // Add data to buffer for BTC-USD trade - if (!line_sender_buffer_table(buffer, - QDB_TABLE_NAME_LITERAL("trades"), &error)) - goto error; - if (!line_sender_buffer_symbol(buffer, - QDB_COLUMN_NAME_LITERAL("symbol"), - QDB_UTF8_LITERAL("BTC-USD"), &error)) - goto error; - if (!line_sender_buffer_symbol(buffer, - QDB_COLUMN_NAME_LITERAL("side"), QDB_UTF8_LITERAL("sell"), &error)) - goto error; - if (!line_sender_buffer_column_f64(buffer, - QDB_COLUMN_NAME_LITERAL("price"), 39269.98, &error)) - goto error; - if (!line_sender_buffer_column_f64(buffer, - QDB_COLUMN_NAME_LITERAL("amount"), 0.001, &error)) - goto error; - if (!line_sender_buffer_at_nanos(buffer, nanos, &error)) - goto error; - - // If we detect multiple tables within the same buffer, we abort to avoid potential - // inconsistency issues. Read below in this page for transaction details - if (!line_sender_buffer_transactional(buffer)) { - fprintf(stderr, "Buffer is not transactional\n"); - line_sender_buffer_free(buffer); - line_sender_close(sender); - return 1; - } +```c +const char* price = "2615.54"; +if (!line_sender_buffer_column_dec_str( + buffer, + QDB_COLUMN_NAME_LITERAL("price"), + price, strlen(price), + &err)) + goto on_error; +``` - // Flush the buffer to QuestDB - if (!line_sender_flush(sender, buffer, &error)) { - size_t len; - const char *msg = line_sender_error_msg(error, &len); - fprintf(stderr, "Failed to flush data: %.*s\n", (int)len, msg); - line_sender_error_free(error); - line_sender_buffer_free(buffer); - line_sender_close(sender); - return 1; - } +For fixed-width binary forms, use `line_sender_buffer_column_dec64` (one +`int64_t` unscaled value) and `line_sender_buffer_column_dec128` (a 16-byte +unscaled little-endian integer). - // Print success message - printf("Data flushed successfully\n"); +### Designated timestamp - // Free resources - line_sender_buffer_free(buffer); - line_sender_close(sender); +The [designated timestamp](/docs/concepts/designated-timestamp/) column +controls time-based partitioning and ordering. Two ways to set it: - return 0; +**User-assigned** (recommended for deduplication and exactly-once delivery): -error: - { - size_t len; - const char *msg = line_sender_error_msg(error, &len); - fprintf(stderr, "Error: %.*s\n", (int)len, msg); - line_sender_error_free(error); - line_sender_buffer_free(buffer); - line_sender_close(sender); - return 1; - } -} +```c +// Microsecond precision creates a standard TIMESTAMP column. +if (!line_sender_buffer_at_micros(buffer, line_sender_now_micros(), &err)) + goto on_error; + +// Nanosecond precision creates a TIMESTAMP_NS column. +if (!line_sender_buffer_at_nanos(buffer, line_sender_now_nanos(), &err)) + goto on_error; ``` -Now, both events use the same timestamp. We recommend using the event's -original timestamp when ingesting data into QuestDB. Using ingestion-time -timestamps precludes the ability to deduplicate rows, which is -[important for exactly-once processing](/docs/ingestion/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). +**Server-assigned** (server uses its wall-clock time): -### Array Insertion +```c +if (!line_sender_buffer_at_now(buffer, &err)) + goto on_error; +``` -The sender uses a plain 1-dimensional C array to insert an array of any -dimensionality. It contains the elements laid out flat in row-major order. -The shape describes the rank and dimensions of the array. +`at_now` removes the ability to deduplicate rows. Prefer explicit timestamps +for production ingestion. See +[Delivery semantics](/docs/concepts/delivery-semantics/) for why +server-assigned timestamps defeat exactly-once outcomes. :::note -Arrays are supported from QuestDB version 9.0.0, and require updated -client libraries. + +QuestDB works best when data arrives in chronological order (sorted by +timestamp). + ::: -In this example, we insert arrays of `double` values for some FX order book data. -* `bids` and `asks`: 2D arrays of L2 order book depth. Each level contains price and volume. -* `bids_exec_probs` and `asks_exec_probs`: 1D arrays of calculated execution probabilities for the next minute. +## Flushing -```c -#include -#include -#include -#include +The sender and buffer are decoupled. Buffered rows are not on the wire until +you call `line_sender_flush(sender, buffer, &err)` (C++ `sender.flush(buffer)`). -int main() -{ - line_sender_error* err = NULL; - line_sender* sender = NULL; - line_sender_buffer* buffer = NULL; +:::caution No auto-flush - // or "tcp::addr=127.0.0.1:9009;protocol_version=2;" - const char* conf_str = "http::addr=127.0.0.1:9000;"; +The QWP/WebSocket C/C++ client does not implement auto-flushing at all — +you must call `flush()` explicitly. The connect-string keys +`auto_flush_rows` and `auto_flush_bytes` are rejected; `auto_flush=off` is +accepted only as a no-op for compatibility with HTTP/ILP connect strings. - line_sender_utf8 conf_str_utf8 = {0, NULL}; - if (!line_sender_utf8_init( - &conf_str_utf8, strlen(conf_str), conf_str, &err)) - goto on_error; +A common pattern is to flush periodically on a timer and/or when the buffer +exceeds a threshold — by encoded byte size (`line_sender_buffer_size(buffer)`, +C++ `buffer.size()`) or row count (`line_sender_buffer_row_count(buffer)`, +C++ `buffer.row_count()`). - sender = line_sender_from_conf(conf_str_utf8, &err); - if (!sender) - goto on_error; +::: - buffer = line_sender_buffer_new_for_sender(sender); - line_sender_buffer_reserve(buffer, 64 * 1024); +`flush()` clears the buffer after publishing locally. Use +`line_sender_flush_and_keep` (C++ `sender.flush_and_keep(buffer)`) to retain +the contents, for example to fan the same buffer out to multiple senders. - line_sender_table_name table_name = QDB_TABLE_NAME_LITERAL("fx_order_book"); - line_sender_column_name symbol_col = QDB_COLUMN_NAME_LITERAL("symbol"); - line_sender_column_name bids_col = QDB_COLUMN_NAME_LITERAL("bids"); - line_sender_column_name asks_col = QDB_COLUMN_NAME_LITERAL("asks"); +On QWP/WebSocket, `flush()` returns once the buffer is accepted by the local +sender queue, before the server acknowledges it. The queue is in process +memory by default; setting `sf_dir` swaps it for the disk-backed +store-and-forward queue. Either way, the call can block if the queue is +full (see [Backpressure on flush](#backpressure-on-flush)). Server errors +observed later are reported asynchronously (see +[Asynchronous error handling](#asynchronous-error-handling)). - if (!line_sender_buffer_table(buffer, table_name, &err)) - goto on_error; +### Backpressure on flush - line_sender_utf8 symbol_val = QDB_UTF8_LITERAL("EUR/USD"); - if (!line_sender_buffer_symbol(buffer, symbol_col, symbol_val, &err)) - goto on_error; +`flush()` is not unconditionally non-blocking. The publisher feeds a bounded +queue with two stacked caps: - // bids: 3 rows (levels), 2 columns (price, volume) - uintptr_t bids_rank = 2; - uintptr_t bids_shape[] = {3, 2}; - double bids_data[] = { - 1.0850, 600000, - 1.0849, 300000, - 1.0848, 150000 - }; - - if (!line_sender_buffer_column_f64_arr_c_major( - buffer, - bids_col, - bids_rank, - bids_shape, - (const uint8_t*)bids_data, - sizeof(bids_data), - &err)) - goto on_error; +1. **In-flight window**, `max_in_flight` (default `128`) unacknowledged + frames on the connection. Reached first under steady-state load when the + server keeps up but you have many small flushes in flight. +2. **Queue cap**, `sf_max_total_bytes` (default `128 MiB` in memory mode, + `10 GiB` in disk mode). Reached when the server is unreachable long + enough that the in-flight count stops being the active limit. - // asks: 3 rows (levels), 2 columns (price, volume) - uintptr_t asks_rank = 2; - uintptr_t asks_shape[] = {3, 2}; - double asks_data[] = { - 1.0853, 500000, - 1.0854, 250000, - 1.0855, 125000 - }; - - if (!line_sender_buffer_column_f64_arr_c_major( - buffer, - asks_col, - asks_rank, - asks_shape, - (const uint8_t*)asks_data, - sizeof(asks_data), - &err)) - goto on_error; +When either cap is hit, `flush()` blocks the caller and retries as the I/O +loop releases capacity (ACK-driven trim). The wait is bounded by +`sf_append_deadline_millis` (default `30000`). If the deadline elapses, +`flush()` returns an error with code +`line_sender_error_server_flush_error` carrying a `SubmitTimedOut` +diagnostic, the application can retry, fail closed, or shed load. +**No data is ever dropped or overwritten** while the publisher is parked. - // Timestamp, leave as-is (similar to your example) - if (!line_sender_buffer_at_nanos(buffer, line_sender_now_nanos(), &err)) - goto on_error; +Column setters and `line_sender_buffer_table(...)` never block, they only +mutate the in-process buffer. Backpressure surfaces only at `flush()`. + +:::caution Oversized payloads are rejected, not parked + +A single flushed payload larger than `sf_max_bytes` (default `4 MiB`) returns +an error from `flush()` immediately, it does *not* enter the backpressure +wait. Fixes: reduce the number of rows you accumulate per buffer before +flushing, or raise `sf_max_bytes` to fit your largest single flushed payload. + +::: + +### FSN-based completion - if (!line_sender_flush(sender, buffer, &err)) +Every published frame is assigned a frame sequence number (FSN). To wait until +the server has acknowledged a specific frame: + + + + +```c +line_sender_qwpws_fsn fsn; +if (!line_sender_qwpws_flush_and_get_fsn(sender, buffer, &fsn, &err)) + goto on_error; + +if (fsn.has_value) { + bool reached = false; + if (!line_sender_qwpws_await_acked_fsn( + sender, fsn.value, 10000, &reached, &err)) goto on_error; + if (!reached) + fprintf(stderr, "timed out waiting for server ACK\n"); +} +``` - line_sender_close(sender); - return 0; + + -on_error:; - size_t err_len = 0; - const char* err_msg = line_sender_error_msg(err, &err_len); - fprintf(stderr, "Error: %.*s\n", (int)err_len, err_msg); - line_sender_error_free(err); - line_sender_buffer_free(buffer); - line_sender_close(sender); - return 1; +```cpp +auto fsn = sender.flush_and_get_fsn(buffer); +if (fsn && !sender.await_acked_fsn(*fsn, std::chrono::seconds{10})) { + std::cerr << "timed out waiting for server ACK\n"; } ``` -If you need to specify strides, you can do this via either the -`line_sender_buffer_column_f64_arr_byte_strides` or the -`line_sender_buffer_column_f64_arr_elem_strides` functions. + + -Please refer to the -[Concepts section on n-dimensional arrays](/docs/query/datatypes/array), where this is -explained in more detail. +Related accessors: - +### Handler callback -## Other Considerations for both C and C++ +Install a handler on the options object. It runs synchronously from sender +API calls such as `flush()`. The handler must not call back into the same +sender. -### Configuration options + + -The easiest way to configure the line sender is the configuration string. The -general structure is: +```c +static void on_qwp_error( + void* user_data, + const line_sender_qwpws_error_view* ev) +{ + (void)user_data; + fprintf(stderr, "qwp error: category=%d msg=%.*s\n", + (int)ev->category, (int)ev->message_len, ev->message); +} -```plain -::addr=host:port;param1=val1;param2=val2;... +line_sender_utf8 host = QDB_UTF8_LITERAL("localhost"); +line_sender_opts* opts = line_sender_opts_new( + line_sender_protocol_qwpws, host, 9000); +line_sender_opts_qwpws_error_handler(opts, on_qwp_error, NULL, &err); +line_sender* sender = line_sender_build(opts, &err); +line_sender_opts_free(opts); ``` -`transport` can be `http`, `https`, `tcp`, or `tcps`. The C/C++ and Rust clients -share the same codebase. Please refer to the -[Rust client's documentation](https://docs.rs/questdb-rs/latest/questdb/ingress) -for the full details on configuration. + + + +```cpp +namespace qdb = questdb::ingress; +using namespace questdb::ingress::literals; + +qdb::opts options{qdb::protocol::qwpws, "localhost"_utf8, 9000}; +options.qwp_ws_error_handler([](const qdb::qwp_ws_error& e) { + std::cerr << "qwp error: category=" << static_cast(e.category) + << " msg=" << e.message << "\n"; +}); +qdb::line_sender sender{options}; +``` -Alternatively, for a breakdown of Configuration string options available across -all clients, see the [Configuration string](/docs/ingestion/clients/configuration-string/) page. + + -### Don't forget to flush +### Error view fields -The sender and buffer objects are entirely decoupled. This means that the sender -won't get access to the data in the buffer until you explicitly call -`sender.flush` or `line_sender_flush`. This may lead to a pitfall where you drop -a buffer that still has some data in it, resulting in permanent data loss. +The `line_sender_qwpws_error_view` struct (and the C++ `qwp_ws_error` struct) +carries: -A common technique is to flush periodically on a timer and/or once the buffer -exceeds a certain size. You can check the buffer's size by calling -`buffer.size()` or `line_sender_buffer_size(...)`. +| Field | Meaning | +|---|---| +| `category` | `schema_mismatch`, `parse_error`, `internal_error`, `security_error`, `write_error`, `protocol_violation`, `unknown`. Use for programmatic dispatch. | +| `applied_policy` | `drop_and_continue` (batch dropped, sender continues) or `halt` (sender latched terminal). | +| `status` (`has_status`) | Raw QWP status byte. Absent for WebSocket protocol violations. | +| `message` / `message_len` | Human-readable error text from the server, or a client-synthesized close reason for WebSocket protocol violations. The pointer is **not** NUL-terminated; always read exactly `message_len` bytes. See [Message stability](#message-stability) and [PII safety](#message-pii). | +| `message_sequence` (`has_message_sequence`) | Server's per-frame QWP wire sequence for the error frame. Resets on reconnect, only meaningful within one connection. | +| `from_fsn` / `to_fsn` | Inclusive FSN span of the affected frame(s), client-side. | -The default `flush()` method clears the buffer after sending its data. If you -want to preserve its contents (for example, to send the same data to multiple -QuestDB instances), call `sender.flush_and_keep(&buffer)` or -`line_sender_flush_and_keep(...)` instead. +`line_sender_qwpws_errors_dropped` (C++ `qwp_ws_errors_dropped()`) reports how +many diagnostics were lost because the bounded log overflowed (typically due +to a lagging poll cursor). -### Transactional flush +#### Message stability {#message-stability} -As described in -[ILP overview](/docs/ingestion/ilp/overview#http-transaction-semantics), the -HTTP transport has some support for transactions. +`message` is a human-readable diagnostic, **not a stable contract.** Its +text varies across server versions and across provenance: -To ensure in advance that a flush will not affect more than one table, call -`buffer.transactional()` or `line_sender_buffer_transactional(buffer)`, as shown -in the examples above. This call will return false if the flush wouldn't be -data-transactional. +- **QWP error frames** carry a server-supplied UTF-8 string capped at + 1024 bytes by the wire spec. +- **WebSocket protocol violations** are client-synthesized as + `"ws-close[]: "`. +- The server-supplied text mirrors QuestDB's normal SQL error formatting, + which historically reworded across releases. +- The field may be empty. -### Protocol Version +Use `category` and `status` for programmatic dispatch. Never pattern-match +on `message`. -To enhance data ingestion performance, QuestDB introduced an upgrade to the -text-based InfluxDB Line Protocol which encodes arrays and `double` values in -binary form. Arrays are supported only in this upgraded protocol version. +#### PII / secret safety {#message-pii} -You can select the protocol version with the `protocol_version` setting in the -configuration string. +`message` may include fragments of the client's own payload, for +example, an offending column value quoted back by a schema or parse +rejection, or a server-supplied WebSocket close reason that the +operator did not control. **Treat `message` as potentially containing +PII or secrets.** -HTTP transport automatically negotiates the protocol version by default. In order -to avoid the slight latency cost at connection time, you can explicitly configure -the protocol version by setting `protocol_version=2|1;`. +Log it at the same trust level as the data being sent, and sanitize +before forwarding to external error trackers (Sentry, Datadog, end-user +UIs). The other fields on the error view are safe to forward as-is - +they carry only structural metadata. -TCP transport does not negotiate the protocol version and uses version 1 by -default. You must explicitly set `protocol_version=2;` in order to ingest -arrays, as in this example: +#### Correlating with server-side logs -```text -tcp::addr=localhost:9009;protocol_version=2; +The protocol does not currently surface a server-issued request or +connection identifier in the WebSocket upgrade response. The closest +correlation tuple is `(message_sequence, from_fsn, to_fsn)`: + +- `message_sequence`, per-connection QWP wire sequence the server + attached to the error frame. Resets on reconnect. +- `from_fsn` / `to_fsn`, client-side FSN span of the affected frames. + Not generally indexed by server-side logs. + +When opening a bug report, supply the connection start time (from your +application logs) and the `(message_sequence, from_fsn, to_fsn)` triple. + +After a `halt` policy fires, the sender is terminal. Drop it and create a new +one. `line_sender_must_close(sender)` (C++ `sender.must_close()`) reports +whether the sender has entered a terminal state. + +`drop_and_continue` errors do not halt the sender. The affected batch is +discarded; subsequent frames are unaffected and the I/O loop keeps running. + +For terminal diagnostics, the next failing sender call also returns a +`line_sender_error*` whose structured QWP/WebSocket diagnostic can be +copied with `line_sender_error_qwpws_get_view(err, &view)`. The view is +borrowed from the ordinary error object and is valid until +`line_sender_error_free()`. Copy exactly `message_len` bytes from +`view.message` before freeing the error. In C++, the same diagnostic is +available via `line_sender_error::qwp_ws_diagnostic()` on the thrown +exception. + +## Progress modes + +The client drives the WebSocket loop in one of two modes: + +| Mode | Behaviour | +|---|---| +| `LINE_SENDER_QWPWS_PROGRESS_BACKGROUND` (default) | A sender-owned thread sends frames, receives ACKs, reconnects, and replays. Right choice for most callers. | +| `LINE_SENDER_QWPWS_PROGRESS_MANUAL` | No background thread. The caller drives progress with `line_sender_qwpws_drive_once` or `line_sender_qwpws_await_acked_fsn`. | + +Set it via the connect string (`qwp_ws_progress=manual`) or the options API: + + + + +```c +#define _POSIX_C_SOURCE 199309L /* nanosleep */ +#include + +line_sender_utf8 conf = QDB_UTF8_LITERAL( + "ws::addr=localhost:9000;qwp_ws_progress=manual;"); +line_sender* sender = line_sender_from_conf(conf, &err); + +// ... append rows ... +line_sender_qwpws_fsn fsn; +if (!line_sender_qwpws_flush_and_get_fsn(sender, buffer, &fsn, &err)) + goto on_error; + +if (fsn.has_value) { + bool reached = false; + while (!reached) { + bool progressed = false; + if (!line_sender_qwpws_drive_once(sender, &progressed, &err)) + goto on_error; + line_sender_qwpws_fsn acked; + if (!line_sender_qwpws_acked_fsn(sender, &acked, &err)) + goto on_error; + if (acked.has_value && acked.value >= fsn.value) { + reached = true; + } else if (!progressed) { + /* Park briefly so the loop does not spin. Replace with + * whatever your scheduler/event loop uses. */ + struct timespec park = {0, 1000 * 1000}; + nanosleep(&park, NULL); + } + } +} +``` + + + + +```cpp +#include + +auto sender = qdb::line_sender::from_conf( + "ws::addr=localhost:9000;qwp_ws_progress=manual;"_utf8); +// ... append rows ... +auto fsn = sender.flush_and_get_fsn(buffer); +if (fsn) { + while (true) { + auto acked = sender.acked_fsn(); + if (acked && *acked >= *fsn) break; + if (!sender.drive_once()) { + std::this_thread::sleep_for(std::chrono::milliseconds{1}); + } + } +} +``` + + + + +`drive_once` performs at most one unit of work per call (send one frame, +drain ready responses, do one storage-maintenance step). For a simpler +blocking wait in manual mode, call `await_acked_fsn` directly, it drives +manual progress internally while waiting. + +## Failover and high availability + +:::note Enterprise + +Multi-host failover with automatic reconnect is most useful with QuestDB +Enterprise primary-replica replication. + +::: + +### Multiple endpoints + +Specify a comma-separated address list (or repeat `addr=`): + +```text +ws::addr=db-primary:9000,db-replica-1:9000,db-replica-2:9000; +``` + +The client picks an endpoint, connects, and walks the list to find the next +healthy peer when the current connection breaks. Duplicate `host:port` +entries are rejected at parse time. + +:::tip Strongly recommend sf_dir for multi-host deployments + +Without `sf_dir`, `flush()` blocks when the connection is down and the +in-memory queue fills up. After `sf_append_deadline_millis` (default 30s), +it returns an error. With `sf_dir`, `flush()` writes to disk and returns +quickly while the reconnect loop replays to the new primary in the +background. For any deployment where failover may take more than a few +seconds, `sf_dir` is strongly recommended. + +::: + +### Reconnect knobs + +| Key | Default | Description | +|---|---|---| +| `reconnect_max_duration_millis` | 300000 | Total outage budget before giving up. | +| `reconnect_initial_backoff_millis` | 100 | First post-failure sleep. | +| `reconnect_max_backoff_millis` | 5000 | Cap on per-attempt sleep. | +| `initial_connect_retry` | `off` | Retry on first connect. Values: `off`, `on` / `true` / `sync` (synchronous retry), `async` (background retry), `false` (alias for `off`). | + +By default the first connect fails fast; subsequent disconnects use the +reconnect policy. Set `initial_connect_retry=on` to apply the same policy to +the initial connect. + +Once `reconnect_max_duration_millis` elapses without a successful +reconnect, the sender latches terminal: `line_sender_must_close(sender)` +(C++ `sender.must_close()`) returns `true` and subsequent `flush()` calls +fail with a "sender is terminal" error. Drop the sender and create a new +one to continue. + +### Error classification + +- **Authentication errors** (`401`/`403`): terminal across all endpoints. The + reconnect loop stops immediately. +- **Role reject** (`421 + X-QuestDB-Role`): transient if the role is + `PRIMARY_CATCHUP`, topology-level otherwise. +- **Version mismatch at upgrade**: per-endpoint, not terminal. The client + tries the next endpoint. +- **All other errors** (TCP/TLS failures, `404`, `503`, mid-stream errors): + transient, fed into the reconnect loop. + +Connection lifecycle events are not surfaced as a structured C/C++ callback. +The default error handler writes one structured line to stderr per server +diagnostic; install your own +[handler](#handler-callback) to integrate with another logging system. + +## Closing the sender + +Call `line_sender_qwpws_close_drain` (C++ `sender.close_drain()`) before +freeing the sender for delivery-sensitive shutdown: + + + + +```c +if (!line_sender_qwpws_close_drain(sender, &err)) + goto on_error; +line_sender_close(sender); +``` + + + + +```cpp +sender.close_drain(); // throws on timeout or terminal failure +// sender's destructor runs as normal +``` + + + + +`close_drain` stops accepting new publications and waits up to +`close_flush_timeout_millis` (default 5000) for already-published frames to +ACK. Plain `line_sender_close` (C++ destructor) is best-effort and does +**not** report delivery failure, use `close_drain` whenever delivery +matters. With `sf_dir`, anything still un-acked is persisted to disk so a +later sender can replay it. + +## Querying and SQL execution + +The query client sends SQL over the +[QWP egress](/docs/connect/wire-protocols/qwp-egress-websocket/) endpoint +(`/read/v1`). It is a separate object from the ingestion sender — it +opens its own WebSocket and accepts the same connect-string schemas +(`ws::` / `wss::`). + +A reader connects to one endpoint at a time, executes one query at a +time, and streams the result as a sequence of column-oriented batches +until the server emits a terminal frame. DDL, DML, and `SELECT` use the +same `execute()` entry point and differ only in their terminal frame. + +### Quick start + + + + +```c +#include +#include +#include +#include + +int main(void) { + line_reader_error* err = NULL; + line_reader* reader = NULL; + line_reader_cursor* cursor = NULL; + + line_sender_utf8 conf = QDB_UTF8_LITERAL("ws::addr=localhost:9000;"); + reader = line_reader_from_conf(conf, &err); + if (!reader) goto on_error; + + line_sender_utf8 sql = QDB_UTF8_LITERAL( + "SELECT ts, symbol, price, amount FROM trades " + "WHERE symbol = 'ETH-USD' LIMIT 100"); + cursor = line_reader_execute(reader, sql, &err); + if (!cursor) goto on_error; + + const line_reader_batch* batch; + while ((batch = line_reader_cursor_next_batch(cursor, &err)) != NULL) { + const size_t rows = line_reader_batch_row_count(batch); + + /* Project each column once per batch, then index by row. */ + line_reader_column_data d_ts, d_sym, d_price; + line_reader_symbol_dict sym_dict; + if (!line_reader_batch_column_data(batch, 0, &d_ts, &err)) goto on_error; + if (!line_reader_batch_column_data(batch, 1, &d_sym, &err)) goto on_error; + if (!line_reader_batch_column_data(batch, 2, &d_price, &err)) goto on_error; + if (!line_reader_batch_symbol_dict(batch, &sym_dict, &err)) goto on_error; + + for (size_t r = 0; r < rows; ++r) { + bool ts_null = false, sym_null = false, price_null = false; + int64_t ts = line_reader_column_data_get_i64(&d_ts, r, &ts_null); + + const char* symbol = NULL; + size_t symbol_len = 0; + if (!line_reader_column_data_get_symbol( + &d_sym, &sym_dict, r, &symbol, &symbol_len, &sym_null)) + goto on_error; + + double price = line_reader_column_data_get_f64(&d_price, r, &price_null); + + printf("ts=%lld symbol=%.*s price=%g\n", + (long long)ts, (int)symbol_len, symbol ? symbol : "", price); + } + } + if (err) goto on_error; + + line_reader_cursor_free(cursor); + line_reader_close(reader); + return 0; + +on_error:; + size_t err_len = 0; + const char* msg = line_reader_error_msg(err, &err_len); + fprintf(stderr, "error: %.*s\n", (int)err_len, msg); + line_reader_error_free(err); + if (cursor) line_reader_cursor_free(cursor); + if (reader) line_reader_close(reader); + return 1; +} +``` + + + + +```cpp +#include +#include + +namespace qdb = questdb::egress; +using namespace questdb::ingress::literals; + +int main() { + try { + qdb::reader reader{"ws::addr=localhost:9000;"_utf8}; + auto cur = reader.execute( + "SELECT ts, symbol, price, amount FROM trades " + "WHERE symbol = 'ETH-USD' LIMIT 100"_utf8); + + while (auto bo = cur.next_batch()) { + auto& batch = *bo; + auto col_ts = batch.column(0); + auto col_symbol = batch.column(1); + auto col_price = batch.column(2); + const size_t rows = batch.row_count(); + for (size_t r = 0; r < rows; ++r) { + auto ts = col_ts.get(r); + auto symbol = col_symbol.symbol(r); + auto price = col_price.get(r); + std::cout + << "ts=" << (ts ? std::to_string(*ts) : "NULL") + << " symbol=" << (symbol ? *symbol : "NULL") + << " price=" << (price ? std::to_string(*price) : "NULL") + << "\n"; + } + } + return 0; + } catch (const qdb::line_reader_error& e) { + std::cerr << "error (code " << static_cast(e.code()) + << "): " << e.what() << "\n"; + return 1; + } +} +``` + + + + +The four steps mirror the ingestion side: + +1. Build a `reader` from a connect string. +2. Call `execute(sql)` (or `prepare(sql)` + binds + `.execute()`) to obtain a `cursor`. +3. Loop `next_batch()` until it returns `nullopt` / `NULL` (terminal). +4. For each batch, project columns once with `batch.column(c)` / C + `line_reader_batch_column_data(batch, c, &d, &err)`, then index by row + with `col.get(r)` (C++) or the `line_reader_column_data_get_*` + inline helpers (C). The C ABI is **bulk-only at the symbol level** — + one FFI call per column, then pointer arithmetic per row. + +For the full list of connect-string keys accepted by the reader (including +`target`, `zone`, `failover_*`, `compression`, and the shared TLS / auth +keys), see the +[connect string reference](/docs/connect/clients/connect-string/). + +### Creating a reader + +The reader uses the same connect-string format as the sender. Build it +from a literal, from `QDB_CLIENT_CONF`, or — in C — through the C ABI +directly. + + + + +```c +/* From a literal. */ +line_sender_utf8 conf = QDB_UTF8_LITERAL("ws::addr=localhost:9000;"); +line_reader_error* err = NULL; +line_reader* reader = line_reader_from_conf(conf, &err); + +/* From QDB_CLIENT_CONF (credentials out of source code). */ +line_reader* reader2 = line_reader_from_env(&err); +``` + + + + +```cpp +namespace qdb = questdb::egress; +using namespace questdb::ingress::literals; + +// From a literal. +qdb::reader reader{"ws::addr=localhost:9000;"_utf8}; + +// From QDB_CLIENT_CONF. +auto reader2 = qdb::reader::from_env(); +``` + + + + +Use the same `QDB_CLIENT_CONF` environment variable as the sender — both +clients parse the same connect-string grammar, so a single shared variable +works for callers that open one of each in the same process. + +### Concurrency + +The reader API has three handle types, each with its own thread-mobility +rule: + +| Handle | Concurrent access | Mobility | +|---|---|---| +| `line_reader` / `qdb::reader` | Single-threaded. | Movable between threads with an explicit happens-before edge (mutex hand-off, thread spawn/join, release/acquire on the pointer). | +| `line_reader_query` / `qdb::query` | Single-threaded. | **Pinned** to the thread that created it. | +| `line_reader_cursor` / `qdb::cursor` | Single-threaded. | **Pinned** to the thread that created it. | +| `line_reader_error` / `qdb::line_reader_error` | Not concurrent. | Free to move. | + +Concurrent operations on the same handle from two threads are always +undefined behaviour. To query in parallel, **create one reader per +thread**. Each opens its own WebSocket connection and runs one query at +a time. A reader can execute many queries sequentially; only one cursor +may be live on a reader at any given moment. + +A narrow set of reader stats are exempt from the one-thread rule because +they read atomic counters: `line_reader_bytes_received`, +`line_reader_credit_granted_total`, `line_reader_read_ns`, +`line_reader_decode_ns`, `line_reader_reset_timing`. Use these (not the +`_cursor_credit_granted_total` variant) when a monitoring thread polls a +reader that another thread is actively driving. + +### DDL, DML, and SELECT + +`execute()` is **blocking**: it sends the query, drives the receive loop +on the calling thread, and returns a cursor that streams the response. +DDL, DML, and `SELECT` use the same entry point; they differ only in the +terminal frame the cursor delivers. + +| Statement class | Terminal | `cursor.terminal_kind()` | What to read | +|---|---|---|---| +| `SELECT` | `RESULT_END` | `terminal_kind::end` | `terminal_end()` → `{final_seq, total_rows}` | +| `INSERT` / `UPDATE` / `DELETE` | `EXEC_DONE` | `terminal_kind::exec_done` | `terminal_exec_done()` → `{op_type, rows_affected}` | +| `CREATE` / `ALTER` / `DROP` / `TRUNCATE` | `EXEC_DONE` | `terminal_kind::exec_done` | `terminal_exec_done()` → `{op_type, rows_affected == 0}` | + +Concrete DDL / DML example: + + + + +```c +/* CREATE TABLE. The cursor delivers no batches; only the terminal frame. */ +line_sender_utf8 ddl = QDB_UTF8_LITERAL( + "CREATE TABLE IF NOT EXISTS trades (" + " ts TIMESTAMP, symbol SYMBOL, side SYMBOL," + " price DOUBLE, amount DOUBLE" + ") TIMESTAMP(ts) PARTITION BY DAY WAL"); +line_reader_cursor* cursor = line_reader_execute(reader, ddl, &err); +if (!cursor) goto on_error; + +/* Drain (DDL streams zero batches; the loop body never runs). */ +const line_reader_batch* batch; +while ((batch = line_reader_cursor_next_batch(cursor, &err)) != NULL) { + (void)batch; /* unused for DDL */ +} +if (err) goto on_error; + +uint8_t op_type = 0; +uint64_t rows_affected = 0; +if (line_reader_cursor_terminal_exec_done(cursor, &op_type, &rows_affected)) + printf("DDL ok: op_type=%u rows_affected=%llu\n", + (unsigned)op_type, (unsigned long long)rows_affected); + +line_reader_cursor_free(cursor); +``` + + + + +```cpp +auto cur = reader.execute( + "CREATE TABLE IF NOT EXISTS trades (" + " ts TIMESTAMP, symbol SYMBOL, side SYMBOL," + " price DOUBLE, amount DOUBLE" + ") TIMESTAMP(ts) PARTITION BY DAY WAL"_utf8); + +while (cur.next_batch()) { /* DDL streams zero batches */ } + +if (auto info = cur.terminal_exec_done()) { + std::cout << "DDL ok: op_type=" << int(info->op_type) + << " rows_affected=" << info->rows_affected << "\n"; +} +``` + + + + +`rows_affected` is the count for `INSERT` / `UPDATE` / `DELETE`. Pure DDL +reports `0`. + +`EXEC_DONE` confirms the statement has been applied to the local +write-ahead log (WAL) and is visible to subsequent `SELECT`s on this +reader. The `request_durable_ack` connect-string key is **sender-only** +— it is not accepted on the reader, and reader-driven `INSERT`s +acknowledge on local-WAL commit only. If you need durable upload to +object storage for reader-side `INSERT`s, drive the inserts through the +[ingestion sender](#data-ingestion) with `request_durable_ack=on` +instead. + +Sequencing operations across one reader is safe because `execute()` is +synchronous: the next statement does not start until the previous cursor +terminates and is freed. + +### Parameterised queries + +Use `prepare()` for SQL with `$N` placeholders. Append binds in +positional order, then call `execute()` to obtain a cursor. + + + + +```c +line_sender_utf8 sql = QDB_UTF8_LITERAL( + "SELECT ts, symbol, price, amount FROM trades " + "WHERE symbol = $1 AND price >= $2 LIMIT 1000"); + +line_reader_query* query = line_reader_prepare(reader, sql, &err); +if (!query) goto on_error; + +line_reader_query_bind_varchar(query, QDB_UTF8_LITERAL("ETH-USD")); +line_reader_query_bind_f64(query, 2500.0); + +line_reader_cursor* cursor = line_reader_query_execute(&query, &err); +/* `query` is now NULL — line_reader_query_execute consumes it. */ +if (!cursor) goto on_error; +``` + + + + +```cpp +auto cur = reader + .prepare( + "SELECT ts, symbol, price, amount FROM trades " + "WHERE symbol = $1 AND price >= $2 LIMIT 1000"_utf8) + .bind_varchar("ETH-USD"_utf8) + .bind_f64(2500.0) + .execute(); +``` + + + + +Binds are positional: the first `bind_*` call fills `$1`, the second +fills `$2`, and so on. The number of binds must match the number of +placeholders; mismatches surface from `execute()` as +`line_reader_error_invalid_bind`. + +Most binds are infallible at the call site — they only mutate the +in-process query buffer. The single exception is `bind_varchar`, which +re-validates UTF-8 and silently freezes the builder on invalid bytes; the +deferred error surfaces from `execute()` as +`line_reader_error_invalid_utf8`. To recover, drop the query and rebuild. + +#### Bind parameter types + +The C ABI exposes a separate function per QuestDB type; the C++ wrapper +exposes the same surface as `query::bind_*` methods returning `query&` +for chaining. Every QuestDB column type that can appear in a `$N` +placeholder has a setter: + +| QuestDB type | C function | C++ method | +|---|---|---| +| `BOOLEAN` | `line_reader_query_bind_bool` | `query.bind_bool(value)` | +| `BYTE` (i8) | `line_reader_query_bind_i8` | `query.bind_i8(value)` | +| `SHORT` (i16) | `line_reader_query_bind_i16` | `query.bind_i16(value)` | +| `INT` (i32) | `line_reader_query_bind_i32` | `query.bind_i32(value)` | +| `LONG` (i64) | `line_reader_query_bind_i64` | `query.bind_i64(value)` | +| `FLOAT` (f32) | `line_reader_query_bind_f32` | `query.bind_f32(value)` | +| `DOUBLE` (f64) | `line_reader_query_bind_f64` | `query.bind_f64(value)` | +| `CHAR` | `line_reader_query_bind_char` | `query.bind_char(code_unit)` | +| `VARCHAR` | `line_reader_query_bind_varchar` | `query.bind_varchar(utf8_view)` | +| `BINARY` | `line_reader_query_bind_binary` | `query.bind_binary(buf, len)` | +| `UUID` | `line_reader_query_bind_uuid` | `query.bind_uuid(std::array)` | +| `LONG256` | `line_reader_query_bind_long256` | `query.bind_long256(std::array)` | +| `IPv4` | `line_reader_query_bind_ipv4` | `query.bind_ipv4(host_order_u32)` | +| `TIMESTAMP` (μs) | `line_reader_query_bind_timestamp_micros` | `query.bind_timestamp_micros(micros)` | +| `TIMESTAMP_NS` (ns) | `line_reader_query_bind_timestamp_nanos` | `query.bind_timestamp_nanos(nanos)` | +| `DATE` (ms) | `line_reader_query_bind_date_millis` | `query.bind_date_millis(millis)` | +| `DECIMAL64` | `line_reader_query_bind_decimal64` | `query.bind_decimal64(mantissa, scale)` | +| `DECIMAL128` | `line_reader_query_bind_decimal128` | `query.bind_decimal128(lo, hi, scale)` | +| `DECIMAL256` | `line_reader_query_bind_decimal256` | `query.bind_decimal256(std::array, scale)` | +| `GEOHASH` | `line_reader_query_bind_geohash` | `query.bind_geohash(value, precision_bits)` | +| `SYMBOL` | use `bind_varchar` (the server narrows VARCHAR → SYMBOL on schema match) | use `bind_varchar` | + +`SYMBOL` reuses `bind_varchar` because the QWP bind framing carries +UTF-8 text; the server resolves to a symbol value on the receiving side. + +:::caution DECIMAL128 mantissa sign + +`bind_decimal128` splits the two's-complement i128 mantissa into a +`uint64_t mantissa_lo` (low 64 bits) and an `int64_t mantissa_hi` +(upper 64 bits). The high limb **must** be passed as `int64_t` so the +sign extends correctly into the full i128 — passing `uint64_t` zero- +extends and silently corrupts every negative value. For example, +i128 `-1` is `(mantissa_lo = UINT64_MAX, mantissa_hi = -1)`. The C++ +overload takes `int64_t` directly, so calling it with a literal works +without further care. + +::: + +The following column types have no `bind_*` variant — they can appear in +result columns but not in `$N` placeholders: + +| Type | Why no bind | Workaround | +|---|---|---| +| `DOUBLE[]` / `LONG[]` (arrays) | The QWP `ARGS` frame carries scalar binds only; shape and stride metadata have no wire encoding. | Emit array literals directly in SQL, or filter by an extracted element (`bids[1][1] >= $1`). | +| `INTERVAL` | Not yet exposed as a bind type. | Bind the boundary as a `TIMESTAMP` / `TIMESTAMP_NS` instead. | + +#### Binding NULL + +Bind a typed NULL with `bind_null` for the simple kinds, or with the +dedicated `bind_null_*` variants for kinds that carry metadata (column +scale on decimals, precision on geohash). Index drift is the failure +mode to avoid: NULL still consumes one positional slot. + + + + +```c +line_reader_query_bind_null(query, line_reader_column_kind_long); /* $1 = NULL LONG */ +line_reader_query_bind_null_varchar(query); /* $2 = NULL VARCHAR */ +line_reader_query_bind_null_decimal64(query, /*scale=*/4); /* $3 = NULL DECIMAL64 */ +line_reader_query_bind_null_geohash(query, /*precision_bits=*/20); /* $4 = NULL GEOHASH */ +``` + + + + +```cpp +query + .bind_null(qdb::column_kind::long_) + .bind_null_varchar() + .bind_null_decimal64(/*scale=*/4) + .bind_null_geohash(/*precision_bits=*/20); +``` + + + + +`bind_null_*` variants exist for `varchar`, `binary`, `decimal64`, +`decimal128`, `decimal256`, and `geohash`. Every other kind goes through +the generic `bind_null(kind)`. + +### Reading result batches + +`next_batch()` returns a borrowed `batch` handle when a batch is +available, `nullopt` when the stream has terminated, and throws (C++) / +returns `NULL` with `*err_out` set (C) on error. The batch handle is the +entry point for all data access — there are no per-cell cursor getters. + +Pointers and views returned by the batch (column names, descriptor +buffers, validity bitmaps, varlen / symbol strings, array shapes and +elements) borrow from the currently-loaded batch and are invalidated by +`next_batch()`, `cancel()`, `add_credit()`, or freeing the cursor. +**Copy out any values you need beyond the current batch.** + +Batches are decoded column-major: each column's values live in one +contiguous buffer. The C ABI exposes this as flat `line_reader_column_data` +/ `line_reader_array_data` descriptors — fill one with +`line_reader_batch_column_data(batch, col, &d, &err)` (or +`_array_column_data` for `DOUBLE[]`), then index by row. This is the +zero-copy path for Cython / numpy / pandas bindings. When iterating, run +the outer loop over columns and the inner loop over rows; sequential +access through each column buffer is cache-friendly. Row-major iteration +(outer over rows) is correct but jumps between per-column buffers on +every cell. + +#### Per-batch metadata + +| Accessor (C) | Accessor (C++) | Returns | +|---|---|---| +| `line_reader_batch_row_count(batch)` | `batch.row_count()` | `size_t` | +| `line_reader_batch_column_count(batch)` | `batch.column_count()` | `size_t` | +| `line_reader_batch_column_kind(batch, c, &k, &err)` | `batch.column_kind(c)` | `line_reader_column_kind` | +| `line_reader_batch_column_name(batch, c, &buf, &len, &err)` | `batch.column_name(c)` | `std::string_view` | +| `line_reader_batch_request_id(batch)` | `batch.request_id()` | `int64_t` | +| `line_reader_batch_seq(batch)` | `batch.seq()` | `uint64_t` | +| `line_reader_batch_flags(batch)` | `batch.flags()` | `uint8_t` | + +#### Column descriptor (C) + +`line_reader_batch_column_data` fills a `line_reader_column_data` struct +with: + +| Field | Use | +|---|---| +| `kind` | Column kind discriminant. Disambiguate units for 64-bit temporals (LONG / TIMESTAMP μs / TIMESTAMP_NS ns / DATE ms). | +| `row_count` | Same as `line_reader_batch_row_count(batch)`. | +| `validity` | LSB-first null bitmap, bit `1` = null. `NULL` when the column has no nulls. | +| `values` / `value_stride` | Dense little-endian buffer for fixed-width kinds (`stride` = 1 / 2 / 4 / 8 / 16 / 32 bytes per row, kind-dependent). | +| `var_offsets` / `var_data` / `var_data_len` | VARCHAR / BINARY ragged buffer: `var_offsets[r..r+1]` is a byte slice into `var_data`. | +| `symbol_codes` | SYMBOL per-row dictionary codes (`uint32_t`). Resolve via `line_reader_batch_symbol_dict(batch, &dict, &err)` then `dict.heap` + `dict.entries[code]`. | +| `decimal_scale` | DECIMAL64 / 128 / 256 column-wide scale. | +| `geohash_precision_bits` | GEOHASH precision (1..60). | + +For `DOUBLE[]` columns call `line_reader_batch_array_column_data` instead; +that fills `line_reader_array_data` with `data` + `data_offsets` (byte +offsets, row_count + 1) + `shapes` + `shape_offsets` (rank-prefixed). + +#### Casual single-cell reads (C) + +`` ships header-only `static inline` +helpers that package the row index + validity probe + typed little-endian +load over a filled descriptor — no extra FFI crossing, no new exported +symbols. + +| QuestDB type | Helper | +|---|---| +| `BOOLEAN` | `line_reader_column_data_get_bool` | +| `BYTE` (i8) | `line_reader_column_data_get_i8` | +| `SHORT` (i16) | `line_reader_column_data_get_i16` | +| `INT` (i32) | `line_reader_column_data_get_i32` | +| `IPv4` | `line_reader_column_data_get_ipv4` | +| `LONG`, `TIMESTAMP` (μs / ns), `DATE` (ms) | `line_reader_column_data_get_i64` | +| `FLOAT` (f32) | `line_reader_column_data_get_f32` | +| `DOUBLE` (f64) | `line_reader_column_data_get_f64` | +| `CHAR` (UTF-16 code unit) | `line_reader_column_data_get_char` | +| `VARCHAR` / `BINARY` | `line_reader_column_data_get_varlen` | +| `SYMBOL` | `line_reader_column_data_get_symbol` (takes a `line_reader_symbol_dict*` from `_batch_symbol_dict`) | +| `UUID` / `LONG256` (raw bytes) | `line_reader_column_data_get_bytes` | +| `DECIMAL64` (mantissa) | `line_reader_column_data_get_decimal64_mantissa` | +| `DECIMAL128` (low / high limbs) | `line_reader_column_data_get_decimal128` | +| `GEOHASH` | `line_reader_column_data_get_geohash` | + +For DECIMAL64 / 128 / 256 the column-wide scale is on `d->decimal_scale`; +for GEOHASH the column-wide precision is on `d->geohash_precision_bits`. + +The helpers do **not** bounds-check `row` — caller's responsibility (use +`d->row_count`). Tight loops should still inline-index the descriptor +buffers directly; the helpers exist for ergonomics, not performance. + +#### Column accessors (C++) + +The C++ wrapper exposes `batch.column(c)` returning a polymorphic +`egress::column` object covering every kind. Per-cell accessors are +methods on the column, not on the cursor: + +| QuestDB type | C++ method | Return shape | +|---|---|---| +| `BOOLEAN`, `BYTE`, `SHORT`, `CHAR`, `INT`, `IPv4`, `LONG` / `TIMESTAMP` / `TIMESTAMP_NS` / `DATE`, `FLOAT`, `DOUBLE` | `col.get(row)` | `nullable` | +| `VARCHAR` | `col.varchar(row)` | `nullable` | +| `BINARY` | `col.binary(row)` | `nullable` | +| `SYMBOL` | `col.symbol(row)` | `nullable` (resolved through the batch's symbol dict) | +| `UUID` | `col.get_uuid(row)` | `nullable>` | +| `LONG256` | `col.get_long256(row)` | `nullable>` | +| `GEOHASH` | `col.get_geohash(row)` | `nullable` (value + `precision_bits`) | +| `DECIMAL64` / `DECIMAL128` / `DECIMAL256` | `col.get_decimal64(row)` / `_128` / `_256` | `nullable` / `_128` / `_256` (mantissa or limbs + `scale`) | +| `DOUBLE[]` shape | `col.shape(row, &rank)` | `const uint32_t*` (dimension lengths) | +| `DOUBLE[]` elements | `col.elements(row, &count)` | `const double*` | +| Validity bitmap | `col.validity()` / `col.validity_bytes()` / `col.has_nulls()` / `col.is_null(row)` | raw LSB-first bytes | + +`column::get(row)` uses a kind whitelist (e.g. `int32_t` accepts only +`INT`, not `IPv4`; `int64_t` accepts `LONG` / `TIMESTAMP` / `DATE` / +`TIMESTAMP_NS` but not `DECIMAL64`). For deliberate reinterpretation use +the strict overload `col.get(row, kind)`. + +For column-oriented hot loops, get the dense pointer once and index it: + +```cpp +auto col = batch.column(0); +const int64_t* ts = col.values(); // throws on kind mismatch +const uint8_t* validity = col.validity(); // null when no nulls +for (size_t r = 0; r < col.row_count(); ++r) { + if (validity && ((validity[r >> 3] >> (r & 7)) & 1)) continue; + process(ts[r]); +} +``` + +`LONG[]` columns are reserved on the wire but not supported in this +revision — `batch.column(c)` throws `invalid_api_call` for them. + +#### Visitor dispatch (C++) + +When the schema is unknown at compile time — generic row formatters, CSV +/ JSON converters, arrow-record builders — `col.visit(visitor)` is the +ergonomic alternative to a hand-written `switch (col.kind())`. The +column runs the kind discriminant once and hands the visitor a typed +view; the visitor's overloads cover the kinds the caller cares about. + +The seven view types are: + +| View | Kinds | Key members | +|---|---|---| +| `fixed_view` | `BOOLEAN`, `BYTE`, `SHORT`, `CHAR`, `INT`, `IPv4`, `LONG` / `TIMESTAMP` / `TIMESTAMP_NS` / `DATE`, `FLOAT`, `DOUBLE` | `kind`, `values` (typed pointer), `row_count`, `validity`, `value(row) → nullable` | +| `decimal_view` | `DECIMAL64`, `DECIMAL128`, `DECIMAL256` | `kind`, `values` (raw LE mantissa bytes), `value_stride` (8 / 16 / 32), `scale`, `row_count`, `validity` | +| `bytes_view` | `UUID`, `LONG256` | `kind`, `values` (raw LE bytes), `value_stride` (16 / 32), `row_count`, `validity` | +| `geohash_view` | `GEOHASH` | `values` (raw LE bytes), `value_stride` (1..8), `precision_bits`, `row_count`, `validity` | +| `varlen_view` | `VARCHAR`, `BINARY` | `kind`, `offsets` (row_count + 1), `data`, `data_len`, `as_string_view(row)`, `as_binary(row)`, `validity` | +| `symbol_view` | `SYMBOL` | `codes` (per-row dict codes), `dict` (snapshot), `resolve(row) → nullable`, `validity` | +| `array_view` | `DOUBLE[]` (T = double) | `kind`, `data`, `data_offsets`, `shapes`, `shape_offsets`, `shape(row)`, `elements(row)`, `validity` | + +Every view also has `is_null(row) → bool`. `LONG[]` columns are not +supported in this revision — `visit` throws `invalid_api_call` for them. + +Two contracts to know: + +- **All visitor overloads must return the same type.** `decltype(auto)` + deduction across the switch arms requires a common type. Return + `void` for side-effect-only visitors. Mismatches are caught at compile + time. +- **Unhandled kinds throw `invalid_api_call`.** If the visitor has no + overload for the kind the column actually carries, `visit` throws + rather than calling an unrelated overload — there is no implicit + conversion between view types. + +The idiomatic visitor uses the C++17 `overload` helper (CTAD over a +list of lambdas): + +```cpp +namespace eg = questdb::egress; + +template struct overload : Fs... { using Fs::operator()...; }; +template overload(Fs...) -> overload; + +void print_cell(const eg::column& col, size_t row) +{ + col.visit(overload{ + // Fixed-width primitives — one lambda per T. + [&](eg::fixed_view v) { + // Covers LONG / TIMESTAMP / DATE / TIMESTAMP_NS; + // disambiguate via v.kind if the unit matters. + if (auto x = v.value(row)) std::cout << *x; + else std::cout << "NULL"; + }, + [&](eg::fixed_view v) { + if (auto x = v.value(row)) std::cout << *x; + else std::cout << "NULL"; + }, + // Variable-width. + [&](eg::varlen_view v) { + if (v.kind == eg::column_kind::binary) { + if (auto x = v.as_binary(row)) { + /* x->data, x->size */ + } else { std::cout << "NULL"; } + } else { + std::cout << v.as_string_view(row).value_or("NULL"); + } + }, + // SYMBOL — resolved through the batch's dict snapshot. + [&](eg::symbol_view v) { + std::cout << v.resolve(row).value_or("NULL"); + }, + // Arrays. + [&](eg::array_view v) { + if (v.is_null(row)) { std::cout << "NULL"; return; } + auto el = v.elements(row); // pair + std::cout << "["; + for (size_t i = 0; i < el->second; ++i) + std::cout << (i ? " " : "") << el->first[i]; + std::cout << "]"; + }, + // Catch-all for the kinds this caller doesn't need. + [&](auto v) { (void)v; std::cout << "(unhandled)"; }, + }); +} +``` + +The `[](auto v){ ... }` generic lambda at the end is optional but lets +you avoid listing every view type when the caller only needs a subset. +Without it, the visitor must provide an overload for every kind the +column might be — `visit` will throw if dispatch lands on an +unmatched view. + +For a complete worked example covering all 7 view types — including +hex / IPv4 / decimal scale / geohash precision rendering — see +`examples/line_reader_cpp_example_columns.cpp` in the client repo. + +##### When to use which + +| Pattern | Use | +|---|---| +| Caller knows T at compile time (e.g. `LONG` accumulator). | `col.get(row)` for one cell, `col.values()` for a contiguous loop. | +| Caller scans a known-mixed schema (a few kinds, fixed). | Inline `switch (col.kind())` with `col.varchar`/`col.symbol`/`col.get` per arm. Smallest code. | +| Caller scans an unknown / wide / kind-agnostic schema. | `col.visit(overload{...})`. Kind discriminant runs once per column; the compiler picks the right lambda. | +| Caller needs the raw dense buffer (zero-copy interop). | `col.values()` (scalar) or `col.elements(row, &count)` / `col.shape(row, &rank)` (array). The view types' `values` / `data` fields work the same way inside a visitor. | + +#### Reading NULLs + +C ABI: the inline helpers write `*out_is_null` separately from the +typed return value. Always branch on `*out_is_null` before consuming the +value — a default-zero `int64_t` or empty `string_view` is a valid value, +not a NULL marker. The underlying contract is that the column's +`validity` bitmap (LSB-first; bit `1` = null) is `NULL` when the column +has no nulls. + +```c +line_reader_column_data d; +if (!line_reader_batch_column_data(batch, 0, &d, &err)) goto on_error; +bool is_null = false; +int64_t price_micros = line_reader_column_data_get_i64(&d, r, &is_null); +if (is_null) { + /* SQL NULL. Skip, sentinel, error — your call. */ +} else { + /* Real value. */ +} +``` + +C++ wrapper: every per-cell accessor returns `std::optional` +(`nullable`), empty for NULL cells. + +```cpp +auto col = batch.column(col_idx); +if (auto price = col.get(r)) + process(*price); +else + handle_null(); +``` + +#### Reading arrays + +`DOUBLE[]` columns use a separate descriptor — +`line_reader_batch_array_column_data(batch, c, &d, &err)` fills a +`line_reader_array_data` struct with four borrowed buffers: + +| Field | Layout | +|---|---| +| `data` | Concatenated little-endian `double` bytes for every row, all rows back-to-back. | +| `data_offsets` | `row_count + 1` entries; row `r`'s slice is `data[data_offsets[r] .. data_offsets[r+1]]` (byte offsets). | +| `shapes` | Concatenated `uint32_t` dimension lengths (row-major; innermost dimension last). | +| `shape_offsets` | `row_count + 1` entries indexing `shapes`; row `r`'s rank is `shape_offsets[r+1] - shape_offsets[r]`. | +| `validity` | LSB-first null bitmap. `NULL` when the column has no nulls. | + +In C++, `batch.column(c).shape(row, &rank)` and +`col.elements(row, &count)` decode each row into typed pointers, +or use `col.visit(...)` with an `array_view` overload for full +shape/element access (see [Visitor dispatch](#visitor-dispatch-c)). + +A NULL array row is flagged via the column's validity bitmap (or +`col.is_null(row)`). A non-null row whose shape produces zero elements +(e.g. `[2, 0, 3]`) has `shape != nullptr` but `count == 0` from +`elements`. + +`LONG[]` is reserved on the wire but not supported in this revision — +`line_reader_batch_column_data` / `_array_column_data` reject it with +`invalid_api_call`, and `batch.column(c)` throws in C++. + +Arrays require QuestDB 9.0.0 or later — older servers reject the QWP +encoding outright with `unsupported_server`. + +### Flow control: credit + +By default the server streams as fast as the network allows. Set +`initial_credit(bytes)` on the query to apply byte-credit flow control: +the server pauses after the configured byte budget is exhausted and the +client tops it up by calling `add_credit(more_bytes)` after each batch +is processed. `0` is the sentinel for "unbounded". + + + + +```c +line_reader_query* query = line_reader_prepare(reader, sql, &err); +if (!query) goto on_error; +line_reader_query_initial_credit(query, 256 * 1024); /* 256 KiB */ +line_reader_cursor* cursor = line_reader_query_execute(&query, &err); + +const line_reader_batch* batch; +while ((batch = line_reader_cursor_next_batch(cursor, &err)) != NULL) { + /* ... read the batch (project columns, index by row) ... */ + if (!line_reader_cursor_add_credit(cursor, 256 * 1024, &err)) + goto on_error; +} +if (err) goto on_error; +``` + + + + +```cpp +auto cur = reader + .prepare(sql) + .initial_credit(256 * 1024) + .execute(); + +while (cur.next_batch()) { + // ... read the batch ... + cur.add_credit(256 * 1024); +} +``` + + + + +Inspect `line_reader_credit_granted_total(reader)` (or +`reader.credit_granted_total()`) from a monitoring thread to track +cumulative credit issued on a connection. + +### Cancelling an in-flight query + +There are two ways to stop a stream early: + +- **`cancel()`** (C: `line_reader_cursor_cancel`) — sends `CANCEL`, + drains pending frames until the server's terminal reply, and **surfaces + any transport errors** through `err_out` / an exception. Use this when + you need to know whether the cancellation completed cleanly — for + example before reusing the reader from a critical path, or when the + cancel is itself observable to the application. +- **Free the cursor** (C: `line_reader_cursor_free`; C++ destructor) — + best-effort: sends `CANCEL`, then tears down the WebSocket bounded by a + short internal timeout. Transport errors during teardown are + **swallowed**. Use this when you don't care about clean closure (e.g. + the process is shutting down anyway). + +Either way, after the cursor is gone the reader is ready for the next +`execute()`. + +### Server info and connection state + +Once the reader (or cursor) is connected, `server_info()` exposes the +last-seen QWP `SERVER_INFO` frame: + +```cpp +if (auto info = reader.server_info()) { + std::cout << "role=" << int(info.role()) + << " epoch=" << info.epoch() + << " cluster=" << info.cluster_id() + << " node=" << info.node_id() << "\n"; +} +``` + +`role` is one of `standalone`, `primary`, `replica`, `primary_catchup`, +or `other` (use `role_byte()` for unknown values). `epoch` is monotonic +across failover/role transitions. `current_host()` / `current_port()` +return the endpoint currently in use. + +The reader's getters reject while a cursor is live — they read non-atomic +state the cursor thread may mutate. Use the cursor-scoped equivalents +(`cursor.server_info()`, `cursor.current_host()`, `cursor.current_port()`, +`cursor.server_version()`) instead, or release the cursor first. + +### Failover and high availability + +:::note Enterprise + +Multi-host failover is most useful with QuestDB Enterprise primary-replica +replication. A single-node deployment can configure the connect string +the same way, but failover only kicks in when there is a second healthy +endpoint to try. + +::: + +#### Multiple endpoints and routing + +Pass a comma-separated address list: + +```text +wss::addr=db-primary:9000,db-replica-1:9000,db-replica-2:9000; +``` + +`target` filters by `SERVER_INFO.role`: + +| Value | Endpoints accepted | +|---|---| +| `any` (default) | Any role. | +| `primary` | `PRIMARY`, `PRIMARY_CATCHUP`, or `STANDALONE`. | +| `replica` | `REPLICA` only. | + +`zone=` biases endpoint selection toward same-zone hosts (Enterprise). + +#### Per-query failover loop + +When the connection breaks mid-stream, the cursor reconnects to another +endpoint and replays the query from the start. The loop is bounded: + +| Key | Default | Description | +|---|---|---| +| `failover` | `on` | Master switch. `failover=off` disables per-query reconnect. | +| `failover_max_attempts` | `8` | Cap on reconnect attempts per `execute()`. | +| `failover_backoff_initial_ms` | `50` | First post-failure sleep. | +| `failover_backoff_max_ms` | `1000` | Cap on per-attempt sleep. | +| `failover_max_duration_ms` | `30000` | Total wall-clock budget per `execute()`. | +| `auth_timeout_ms` | `15000` | Per-host HTTP upgrade timeout. | + +When the budget is exhausted, `next_batch()` (or `execute()`) surfaces a +terminal error — typically `socket_error`, `handshake_error`, or +`role_mismatch`. Free the cursor; the reader is then available for the +next `execute()`. There is **no continuous reconnect** spanning idle +periods between queries — each `execute()` starts its own failover budget. + +:::warning Failover requires multiple endpoints + +The failover loop rotates across the `addr=` list. With a single address +there is no other host to try, and the budget collapses after one +attempt regardless of `failover_max_attempts`. Provide at least two +addresses for failover to be useful. + +::: + +#### Mid-stream failover hazard: duplicate rows + +**Read this before deploying a long-running cursor against a failover- +enabled connect string.** + +When mid-stream failover fires, the server replays the query from the +beginning. **Any rows the application has already consumed will be +delivered again on the new connection.** Without explicit recovery, an +aggregation, fan-out, or downstream writer sees duplicates. + +The library does **not** silently discard the replayed rows. Instead, it +gives the application a choice via either the `on_failover_reset` or the +`on_failover_progress` callback — installing either one clears the +silent-duplicate guard: + +- **Wire a callback.** The trampoline fires on the cursor's thread, + before any replayed batch arrives. Discard the partial state you've + accumulated. Replay then proceeds transparently. Use + `on_failover_reset` for the reset-only signal, or + `on_failover_progress` (see [Failover progress + phases](#failover-progress-phases)) for the full lifecycle — + disconnect, per-retry, reset, gave-up. +- **Don't wire a callback, and don't want replay.** The cursor surfaces + the next `next_batch()` (or `execute()`) call as + `line_reader_error_failover_would_duplicate` and terminates instead of + double-delivering. The application can then re-execute the query from + scratch. + +Initial-connect failover (before any batch has been yielded) is always +transparent and ignores the callback — no rows are consumed yet, so +there is nothing to duplicate. + + + + +```c +static void on_failover_reset( + const line_reader_failover_event* ev, + void* user_data) +{ + /* `user_data` is your accumulator; clear it before replay. */ + struct row_buf* buf = (struct row_buf*)user_data; + row_buf_clear(buf); + + /* Diagnostics only — must NOT call back into reader / query / cursor. */ + const char* new_host = NULL; + size_t new_host_len = 0; + line_reader_failover_event_new_host(ev, &new_host, &new_host_len); + fprintf(stderr, + "failover -> %.*s:%u after %u attempts\n", + (int)new_host_len, new_host, + (unsigned)line_reader_failover_event_new_port(ev), + (unsigned)line_reader_failover_event_attempts(ev)); +} + +line_reader_query* query = line_reader_prepare(reader, sql, &err); +line_reader_query_on_failover_reset(query, on_failover_reset, &my_buf); +line_reader_cursor* cursor = line_reader_query_execute(&query, &err); +``` + + + + +```cpp +std::vector accumulator; + +auto cur = reader + .prepare(sql) + .on_failover_reset([&](const qdb::failover_event_view& ev) { + accumulator.clear(); // discard partial result; replay incoming + std::cerr + << "failover -> " << ev.new_host() << ":" << ev.new_port() + << " after " << ev.attempts() << " attempts\n"; + }) + .execute(); +``` + + + + +The callback runs synchronously on the cursor's drive thread. It **must +not** call back into the originating reader, query, or cursor (including +read-only stat getters — they read state the trampoline is mid-mutation +on); must not throw or `longjmp` across the C boundary (an escaping +unwind aborts the C++ trampoline); and must not block, because while it +runs no batch is being read and no credit is being granted to the +server. + +#### Failover event fields + +The `failover_event` passed to the callback carries: + +| C++ accessor | C function | Meaning | +|---|---|---| +| `failed_host()` / `failed_port()` | `line_reader_failover_event_failed_host` / `_port` | The previously-connected endpoint that failed. | +| `new_host()` / `new_port()` | `line_reader_failover_event_new_host` / `_port` | The endpoint the cursor is now connected to. | +| `new_request_id()` | `line_reader_failover_event_new_request_id` | Server-assigned request ID on the new connection. | +| `attempts()` | `line_reader_failover_event_attempts` | Number of reconnect attempts that preceded this success (1 = first retry). | +| `elapsed_ns()` | `line_reader_failover_event_elapsed_ns` | Wall-clock nanoseconds spent reconnecting (sleep + dial + handshake + `SERVER_INFO`). | +| `trigger_code()` | `line_reader_failover_event_trigger_code` | `line_reader_error_code` that triggered the failover. | +| `trigger_msg()` | `line_reader_failover_event_trigger_msg` | Human-readable message for the trigger. | +| `server_info()` | `line_reader_failover_event_server_info` | `SERVER_INFO` of the new endpoint (empty on v1 servers). | + +Outside the callback, `cursor.failover_resets()` reports the cumulative +number of successful resets observed by this cursor since `execute()`. + +#### Failover progress phases + +`on_failover_reset` only fires on a successful reconnect. For full +connection-lifecycle visibility — outage observed, per-retry telemetry, +reset, retry budget exhausted — install an `on_failover_progress` +callback instead (or in addition). The same callback fires for every +phase; route on the phase discriminant: + +| Phase (C / C++) | When it fires | Fields populated beyond the always-set ones | +|---|---|---| +| `line_reader_failover_phase_disconnected` / `failover_phase::disconnected` | Once, immediately after the cursor's connection drops — **before** any retry. Lets observers alert on "QuestDB unreachable now" instead of retroactively when reconnect lands. | None. `attempt` is `0`. | +| `line_reader_failover_phase_retrying` / `failover_phase::retrying` | Once per outer-loop iteration, **after** the inter-attempt backoff sleep and immediately before the dial. | `attempt` is `1`-based for the about-to-be-tried dial. `elapsed_ns` already includes backoff cost. | +| `line_reader_failover_phase_reset` / `failover_phase::reset` | A reconnect succeeded; replayed batches will start arriving next. Fires immediately **before** the `on_failover_reset` callback (when both are installed) so a single sink sees the lifecycle in order. | `new_host` / `new_port`, `new_request_id`, `server_info` (empty on v1 servers). `attempt` is the dial that landed. | +| `line_reader_failover_phase_gave_up` / `failover_phase::gave_up` | The retry budget is exhausted; the cursor is terminal. The same error will surface on the next `next_batch()` / `add_credit()` call. | `final_error_code` / `final_error_msg`. `attempt` is the total number of dials burned (may be `0` if the wall-clock deadline was already exhausted). | + +Always-set fields (every phase): `failed_host` / `failed_port` (the +endpoint that died), `trigger_code` / `trigger_msg` (the original +cause-of-death, preserved across phases), `elapsed_ns` (wall-clock since +the disconnect was observed, monotonically non-decreasing across phases +of the same lifecycle), and `attempt` (per-phase semantics above). + +Accessor surface: + +| C++ accessor | C function | Notes | +|---|---|---| +| `phase()` | `line_reader_failover_progress_event_phase` | Discriminant. | +| `failed_host()` / `failed_port()` | `line_reader_failover_progress_event_failed_host` / `_failed_port` | Always set. | +| `new_host()` / `new_port()` | `line_reader_failover_progress_event_new_host` / `_new_port` | Empty / `0` outside `Reset`. | +| `new_request_id()` → `std::optional` | `line_reader_failover_progress_event_new_request_id` (returns `bool`) | `nullopt` / `false` outside `Reset`. | +| `attempt()` | `line_reader_failover_progress_event_attempt` | Per-phase semantics, see table. | +| `trigger_code()` / `trigger_msg()` | `line_reader_failover_progress_event_trigger_code` / `_trigger_msg` | Always set; preserved across phases. | +| `elapsed_ns()` | `line_reader_failover_progress_event_elapsed_ns` | Wall-clock since disconnect. Saturating, monotonic. | +| `server_info()` | `line_reader_failover_progress_event_server_info` | Non-NULL only on `Reset`, v2+ servers. | +| `final_error_code()` → `std::optional` | `line_reader_failover_progress_event_final_error_code` (returns `bool`) | Populated only on `GaveUp`. | +| `final_error_msg()` | `line_reader_failover_progress_event_final_error_msg` (returns `bool`) | Populated only on `GaveUp`. | + +Installing `on_failover_progress` also clears the silent-duplicate +guard documented under [Mid-stream failover +hazard](#mid-stream-failover-hazard-duplicate-rows) — same as +`on_failover_reset`. If you only want telemetry and not replay +semantics, set `failover=off` in the connect string instead. + +The reentrancy contract is identical to `on_failover_reset`: the +callback runs synchronously on the cursor's drive thread, **must not** +call back into the originating reader / query / cursor (including +read-only stat getters), **must not** throw or `longjmp` across the C +boundary (an escaping unwind aborts the process), and **must not** +block — while it runs, no batch is being read and the failover loop +cannot make progress. + + + + +```c +static void on_failover_progress( + const line_reader_failover_progress_event* ev, + void* user_data) +{ + const char* failed_host = NULL; + size_t failed_host_len = 0; + line_reader_failover_progress_event_failed_host( + ev, &failed_host, &failed_host_len); + const uint16_t failed_port = + line_reader_failover_progress_event_failed_port(ev); + const uint64_t elapsed_ns = + line_reader_failover_progress_event_elapsed_ns(ev); + + switch (line_reader_failover_progress_event_phase(ev)) { + case line_reader_failover_phase_disconnected: + fprintf(stderr, "failover: disconnected from %.*s:%u\n", + (int)failed_host_len, failed_host, (unsigned)failed_port); + break; + case line_reader_failover_phase_retrying: + fprintf(stderr, "failover: retry #%u after %llu ns\n", + (unsigned)line_reader_failover_progress_event_attempt(ev), + (unsigned long long)elapsed_ns); + break; + case line_reader_failover_phase_reset: { + const char* new_host = NULL; + size_t new_host_len = 0; + line_reader_failover_progress_event_new_host( + ev, &new_host, &new_host_len); + fprintf(stderr, + "failover: reset -> %.*s:%u after %u attempts (%llu ns)\n", + (int)new_host_len, new_host, + (unsigned)line_reader_failover_progress_event_new_port(ev), + (unsigned)line_reader_failover_progress_event_attempt(ev), + (unsigned long long)elapsed_ns); + break; + } + case line_reader_failover_phase_gave_up: { + const char* msg = NULL; + size_t msg_len = 0; + line_reader_failover_progress_event_final_error_msg( + ev, &msg, &msg_len); + fprintf(stderr, + "failover: gave up after %u attempts: %.*s\n", + (unsigned)line_reader_failover_progress_event_attempt(ev), + (int)msg_len, msg); + break; + } + } +} + +line_reader_query* query = line_reader_prepare(reader, sql, &err); +line_reader_query_on_failover_progress(query, on_failover_progress, NULL); +line_reader_cursor* cursor = line_reader_query_execute(&query, &err); +``` + + + + +```cpp +auto cur = reader + .prepare(sql) + .on_failover_progress([&](const qdb::failover_progress_event_view& ev) { + switch (ev.phase()) { + case qdb::failover_phase::disconnected: + std::cerr << "failover: disconnected from " + << ev.failed_host() << ":" << ev.failed_port() << "\n"; + break; + case qdb::failover_phase::retrying: + std::cerr << "failover: retry #" << ev.attempt() + << " after " << ev.elapsed_ns() << " ns\n"; + break; + case qdb::failover_phase::reset: + std::cerr << "failover: reset -> " + << ev.new_host() << ":" << ev.new_port() + << " after " << ev.attempt() << " attempts (" + << ev.elapsed_ns() << " ns)\n"; + break; + case qdb::failover_phase::gave_up: + std::cerr << "failover: gave up after " << ev.attempt() + << " attempts: " << ev.final_error_msg() << "\n"; + break; + } + }) + .execute(); +``` + + + + +`on_failover_progress` and `on_failover_reset` coexist: when both are +installed, the `Reset` phase fires immediately before the reset +callback so a single observer sees the whole lifecycle in order. Pick +whichever shape fits your sink — phased event stream vs. one-shot +reset hook — or install both. + +#### Connection-state observability + +For mid-query connection lifecycle, install `on_failover_progress` +(above): it surfaces `Disconnected` (on outage), `Retrying` (per dial +attempt), `Reset` (reconnect succeeded), and `GaveUp` (budget +exhausted) as structured events on the cursor's drive thread. + +What remains polling-based: + +- **Initial-connect failures** (before `execute()` returns): no + progress callback fires — `line_reader_from_conf` or + `line_reader_execute` itself fails. Inspect the host / port the + reader settled on with `current_host()` / `current_port()` once + `execute()` returns a cursor. +- **Between queries**, while no cursor is live, the reader holds no + connection in the foreground, so there is nothing to observe. Poll + `reader.current_host()` / `reader.current_port()` (or + `reader.server_info().epoch()`) between successive `execute()` calls + for "endpoint changed" or "topology rotated" signals. + +### Error handling + +Every reader / query / cursor entry point that can fail returns a +NULL/false / `-1` on error and writes an opaque `line_reader_error*` +through its `err_out` parameter. The C++ wrapper throws +`questdb::egress::line_reader_error` (derived from `std::runtime_error`) +with the same code and message. + +`line_reader_error_get_code(err)` returns one of: + +| Code | Meaning | Typical recovery | +|---|---|---| +| `could_not_resolve_addr` | Bad URL, host, or interface in the connect string. | Fix the connect string. | +| `config_error` | Connect-string syntax or unknown key. | Fix the connect string. | +| `invalid_api_call` | Wrong-order or wrong-state call (e.g. `execute` while a cursor is live, kind-mismatched `col.get` / `_column_data_get_*`). | Bug in the application; fix the call site. | +| `socket_error` | TCP / WS read / write / close failure. | Per-query failover handles it transparently if `failover=on` and multiple `addr=` entries are configured; otherwise rebuild the reader. | +| `tls_error` | TLS handshake failure. | Inspect cert chain; verify `tls_roots` / `tls_ca`. | +| `handshake_error` | HTTP upgrade or WebSocket handshake failed. | Often a version or compression mismatch; check the server release. | +| `auth_error` | `401`/`403` from the upgrade response. | Re-mint the credential; auth failures are terminal across all endpoints — failover does not retry them. | +| `unsupported_server` | Server advertises a QWP version, encoding, or capability the client cannot use. | Align client and server versions; disable optional encodings. | +| `role_mismatch` | All endpoints connected but none matched `target=`. | Loosen `target` or fix the topology. | +| `protocol_error` | Wire-format violation: bad magic, truncated frame, bad varint. | Rebuild the reader; report a bug if it recurs against an in-sync server. | +| `invalid_utf8` | Connect string, SQL, or `bind_varchar` value contained invalid UTF-8. | Re-encode the input. | +| `invalid_bind` | Bind count, index, or value rejected client-side (including timestamp / decimal / geohash range failures). | Fix the bind. | +| `server_schema_mismatch` | Server-reported `SCHEMA_MISMATCH` (`0x03`). | Fix bind types or SQL. | +| `server_parse_error` | Server-reported `PARSE_ERROR` (`0x05`). | Fix SQL. | +| `server_internal_error` | Server-reported `INTERNAL_ERROR` (`0x06`). | Inspect server logs; retry with backoff. | +| `server_security_error` | Server-reported `SECURITY_ERROR` (`0x08`). | Permission denied; check role / grants. | +| `server_limit_exceeded` | Server-reported `LIMIT_EXCEEDED` (`0x0B`). | Reduce result size or raise the server limit. | +| `limit_exceeded` | Client-side limit hit (e.g. array row exceeds per-row element cap). | Reduce row size. | +| `cancelled` | Local `cancel()` or server `CANCELLED` (`0x0A`). | Expected after `cancel()`. | +| `failover_would_duplicate` | Mid-query failover would replay rows the application has already consumed, and no `on_failover_reset` callback was installed. | See [Mid-stream failover hazard](#mid-stream-failover-hazard-duplicate-rows). Either wire a callback and discard partial state, or re-execute the query from scratch. | + +Once any of the `server_*` codes surface, the cursor is terminated; free +it and (typically) reuse the reader for the next `execute()`. For +`auth_error`, `unsupported_server`, `tls_error`, and `role_mismatch`, +rebuild the reader from scratch — these are not transient. + +#### Error object fields + +A production error handler usually wants more than the code and the +text. The following fields are everything you need to log a structured +event, decide what to retry, and assemble a bug report: + +| Field | C++ accessor | C function | Notes — stability / PII / scope | +|---|---|---|---| +| Code | `e.code()` | `line_reader_error_get_code(err)` | `line_reader_error_code` discriminant. **Stable across releases** — the right field to dispatch on. For server-originated codes the discriminant embeds the QWP status byte (e.g. `server_parse_error = 0x05`). Safe to forward as-is. | +| Message | `e.what()` | `line_reader_error_msg(err, &len)` | UTF-8 diagnostic. **Not null-terminated** in C — read exactly `len` bytes; pointer is owned by the error and stays valid until `line_reader_error_free`. Server messages mirror QuestDB's normal SQL error formatting (capped at the QWP error-message limit, currently 1 KiB); client-synthesised messages cover transport / handshake / bind validation. **Not stable across server versions** — never pattern-match. **May contain PII / secrets**: it can echo offending bind values or server-supplied close reasons — log at the input's trust level and sanitise before forwarding to external trackers. | +| Cursor request ID | `cursor.request_id()` | `line_reader_cursor_request_id` | Server-assigned request ID for the current connection. Refreshes on failover. Safe to forward. | +| Batch request ID | `batch.request_id()` | `line_reader_batch_request_id(batch)` | Request ID stamped on the most recently decoded batch (may differ from `cursor.request_id()` for already-buffered frames mid-failover). Read from the borrowed batch handle returned by `next_batch()`. Safe to forward. | +| Failover resets | `cursor.failover_resets()` | `line_reader_cursor_failover_resets` | Cumulative successful mid-stream resets since `execute()`. A non-zero value next to a duplicate-row complaint tells you replay happened. Safe to forward. | +| Current endpoint | `cursor.current_host()` / `cursor.current_port()` | `line_reader_cursor_current_addr_host` / `_port` | Endpoint the cursor was attached to when the error fired. Safe to forward. | +| Client identifier | (connect-string `client_id=` value) | (same) | Opaque label echoed by QuestDB as `X-QWP-Client-Id`. Set this in production so support can correlate sessions. Safe to forward. | + +The protocol does not currently surface a server-issued request or +connection identifier in the WebSocket upgrade response. When opening a +bug report, supply the connection start time (from your application +logs), the `client_id=` value, and the cursor tuple +`(cursor.request_id, batch.request_id, failover_resets, current_host, current_port)` +— that is the closest you can get to a server-side correlation handle +today. + +### Reader authentication and TLS + +The reader and the sender share the same authentication and TLS +configuration grammar — see [Authentication and TLS](#authentication-and-tls) +above for the full table of `tls_ca` / `tls_roots` / `tls_verify` values. +In brief: + +- **HTTP basic auth**: `username=...;password=...;`. +- **Bearer token (Enterprise)**: `token=...;`. Sent as + `Authorization: Bearer ` on the WebSocket upgrade. +- **TLS**: switch to `wss::`. Select root certificates with `tls_ca` + (`webpki_roots`, `os_roots`, `webpki_and_os_roots`) or + `tls_roots=/path/to/ca.pem`. `tls_roots_password` unlocks JKS / PKCS#12 + keystores. + +The reader applies the same restrictions as the sender: + +| Path | Status | Workaround | +|---|---|---| +| OIDC token acquisition or in-band refresh | Not supported by this client. There is no IdP integration and no callback to refresh a token mid-session. | QuestDB itself supports OIDC — see [OpenID Connect](/docs/security/oidc/). Acquire an access token out-of-band from your IdP, pass it via `token=...`, and rebuild the reader when the token nears expiry. | +| Mutual TLS (client certificates) | Not supported. The QuestDB server does not negotiate client certificates. | Use bearer-token auth over `wss://`. See the connect-string reference's [TLS section](/docs/connect/clients/connect-string/#tls). | +| Token rotation mid-session | Not supported. The token is presented once during the WebSocket upgrade and is not re-sent. | On token expiry, free the reader and build a fresh one with the new token. The cursor (and any open query) must be freed first. | + +### Enterprise example: TLS, token auth, multi-host failover + +A production read path typically combines all three. The reader uses the +same connect-string grammar as the sender — drop `sf_*` keys (they are +sender-only) and configure failover instead: + +```text +wss::addr=db-primary:9000,db-replica-1:9000,db-replica-2:9000; + token=eyJhbGciOi...; + target=primary; + failover_max_attempts=10; + failover_max_duration_ms=60000; + compression=auto; + tls_ca=os_roots; + client_id=quote-server; +``` + + + + +```c +line_sender_utf8 conf = QDB_UTF8_LITERAL( + "wss::addr=db-primary:9000,db-replica-1:9000,db-replica-2:9000;" + "token=eyJhbGciOi...;" + "target=primary;" + "failover_max_attempts=10;" + "failover_max_duration_ms=60000;" + "compression=auto;" + "tls_ca=os_roots;" + "client_id=quote-server;"); +line_reader_error* err = NULL; +line_reader* reader = line_reader_from_conf(conf, &err); +``` + + + + +```cpp +qdb::reader reader{ + "wss::addr=db-primary:9000,db-replica-1:9000,db-replica-2:9000;" + "token=eyJhbGciOi...;" + "target=primary;" + "failover_max_attempts=10;" + "failover_max_duration_ms=60000;" + "compression=auto;" + "tls_ca=os_roots;" + "client_id=quote-server;"_utf8}; +``` + + + + +`client_id` is opaque to the server but appears in QuestDB's connection +logs as `X-QWP-Client-Id` — include it in bug reports to help support +correlate sessions. + +## Configuration reference + +For the full list of connect-string keys and their defaults, see the +[connect string reference](/docs/connect/clients/connect-string/). + +Common WebSocket-specific options accepted by the **ingestion sender**: + +| Key | Default | Description | +|---|---|---| +| `addr` | required | One or more `host:port` entries, comma-separated or repeated. | +| `username` / `password` | unset | HTTP basic auth. | +| `token` | unset | Bearer token auth (Enterprise). | +| `auth_timeout_ms` | 15000 | WebSocket upgrade timeout (milliseconds). `auth_timeout` is also accepted. | +| `tls_ca` / `tls_roots` / `tls_verify` | `webpki_roots` | TLS configuration (`wss` only). | +| `auto_flush` | required `off` if set | Auto-flush is not supported. `auto_flush_rows` and `auto_flush_bytes` are rejected. | +| `sf_dir` | unset | Enable disk-backed store-and-forward. | +| `sender_id` | `default` | SF slot identity. | +| `sf_durability` | `memory` | Only `memory` is currently accepted. | +| `request_durable_ack` | `off` | Wait for durable upload before ACK (Enterprise). | +| `reconnect_max_duration_millis` | 300000 | Per-outage reconnect budget. | +| `initial_connect_retry` | `off` | Apply reconnect policy to the first connect. | +| `close_flush_timeout_millis` | 5000 | Bound on `close_drain` wait. | +| `qwp_ws_progress` | `background` | `background` or `manual`. | +| `max_in_flight` | 128 | Max unacknowledged frames in flight on a connection. Acts as the backpressure window: publishers block locally once the window is full. | + +Common WebSocket-specific options accepted by the **query reader**: + +| Key | Default | Description | +|---|---|---| +| `addr` | required | One or more `host:port` entries, comma-separated or repeated. | +| `username` / `password` | unset | HTTP basic auth. | +| `token` | unset | Bearer token auth (Enterprise). | +| `auth_timeout_ms` | 15000 | Per-host HTTP upgrade timeout (milliseconds). | +| `tls_ca` / `tls_roots` / `tls_verify` / `tls_roots_password` | `webpki_roots` | TLS configuration (`wss` only). | +| `target` | `any` | Endpoint role filter: `any`, `primary`, `replica`. | +| `zone` | unset | Zone-aware routing hint (Enterprise). | +| `failover` | `on` | Master switch for per-query reconnect. | +| `failover_max_attempts` | `8` | Cap on reconnect attempts per `execute()`. | +| `failover_backoff_initial_ms` | `50` | First post-failure sleep. | +| `failover_backoff_max_ms` | `1000` | Cap on per-attempt sleep. | +| `failover_max_duration_ms` | `30000` | Total wall-clock budget per `execute()`. | +| `compression` | `raw` | `raw` / `zstd` / `auto`. `zstd` and `auto` require the `compression-zstd` build feature. | +| `compression_level` | `1` | Advertised zstd level. Server clamps to `[1, 9]`. | +| `max_batch_rows` | server default | Hint passed in `X-QWP-Max-Batch-Rows`. `0` (or unset) defers to the server. | +| `client_id` | unset | Opaque identifier echoed in server logs as `X-QWP-Client-Id`. | +| `path` | `/read/v1` | Endpoint path. Rarely changed. | +| `max_version` | `2` | Highest QWP version the reader advertises. | + +## Migration from ILP (HTTP/TCP) + +The buffer API is unchanged. To switch a sender to QWP/WebSocket: + +| Aspect | HTTP (ILP) | WebSocket (QWP) | +|---|---|---| +| Connect string schema | `http::` / `https::` | `ws::` / `wss::` (`qwpws::` / `qwpwss::` aliases) | +| Batch trigger | Row/time-based auto-flush (defaults: 75000 rows, 1000 ms) | Explicit `flush()` only | +| Error model | Synchronous on `flush()` | Async via `line_sender_qwpws_poll_error` / handler | +| Completion tracking | Implicit per request | Explicit FSN watermarks | +| Store-and-forward | Not available | Available (`sf_dir`) | +| Multi-endpoint failover | Not available | Built in (comma-separated `addr`) | +| Shutdown | `line_sender_close` | `line_sender_qwpws_close_drain`, then `line_sender_close` | +| Querying SQL from the same library | Not available | `line_reader_*` (C) / `questdb::egress::reader` (C++) — see [Querying and SQL execution](#querying-and-sql-execution) | + +To migrate, change the connect string from `http::` to `ws::` (or +`https::` to `wss::`), drop any `auto_flush_*` keys, install a +`line_sender_opts_qwpws_error_handler` (C) / `qwp_ws_error_handler` (C++) +callback or poll `line_sender_qwpws_poll_error`, and call +`line_sender_qwpws_close_drain` before closing the sender. + +## Full example: multi-host ingestion with failover + +This example shows a production ingestion loop with store-and-forward, +multi-host failover, and proper error handling including the retry pattern +around `flush()`. + +```c +#include +#include +#include + +static void on_qwp_error( + void* user_data, + const line_sender_qwpws_error_view* ev) +{ + (void)user_data; + fprintf(stderr, + "qwp error: category=%d policy=%d msg=%.*s\n", + (int)ev->category, (int)ev->applied_policy, + (int)ev->message_len, ev->message); +} + +int main(void) { + line_sender_error* err = NULL; + line_sender* sender = NULL; + line_sender_buffer* buffer = NULL; + + /* Multi-host with store-and-forward for failover durability. + * Without sf_dir, flush() blocks during an outage and times out + * after sf_append_deadline_millis (default 30s). With sf_dir, + * flush() writes to disk and returns quickly while the reconnect + * loop replays to the new primary in the background. */ + line_sender_utf8 conf = QDB_UTF8_LITERAL( + "wss::addr=db-primary:9000,db-replica:9000;" + "token=your_bearer_token;" + "sf_dir=/var/lib/myapp/qdb-sf;" + "sender_id=ingest-1;" + "reconnect_max_duration_millis=300000;"); + sender = line_sender_from_conf(conf, &err); + if (!sender) goto on_error; + + buffer = line_sender_buffer_new_for_sender(sender); + + line_sender_table_name tbl = QDB_TABLE_NAME_LITERAL("book"); + line_sender_column_name ticker_name = QDB_COLUMN_NAME_LITERAL("ticker"); + line_sender_column_name price_name = QDB_COLUMN_NAME_LITERAL("price"); + line_sender_column_name size_name = QDB_COLUMN_NAME_LITERAL("size"); + line_sender_utf8 ticker_val = QDB_UTF8_LITERAL("EURUSD"); + + for (;;) { + /* Only encode a fresh row when the previous batch has been flushed. + * On flush() failure, the rows stay buffered for the next attempt. */ + if (line_sender_buffer_row_count(buffer) == 0) { + if (!line_sender_buffer_table(buffer, tbl, &err)) goto on_error; + if (!line_sender_buffer_symbol(buffer, ticker_name, ticker_val, &err)) + goto on_error; + if (!line_sender_buffer_column_f64(buffer, price_name, 1.0842, &err)) + goto on_error; + if (!line_sender_buffer_column_f64(buffer, size_name, 100000.0, &err)) + goto on_error; + if (!line_sender_buffer_at_nanos(buffer, line_sender_now_nanos(), &err)) + goto on_error; + } + + /* flush() can still return an error if the SF queue fills to + * sf_max_total_bytes during a prolonged outage. On success the + * buffer is cleared; on failure it is retained so the next + * iteration retries the same payload. */ + if (!line_sender_flush(sender, buffer, &err)) { + size_t err_len = 0; + const char* msg = line_sender_error_msg(err, &err_len); + fprintf(stderr, "flush error: %.*s\n", (int)err_len, msg); + line_sender_error_free(err); + err = NULL; + + /* Check if the sender is terminal (auth failure, reconnect + * budget exhausted). If so, recreate it. */ + if (line_sender_must_close(sender)) { + fprintf(stderr, "sender is terminal, exiting\n"); + break; + } + } + + /* Pace the loop as appropriate for your workload. */ + } + + if (!line_sender_qwpws_close_drain(sender, &err)) goto on_error; + line_sender_buffer_free(buffer); + line_sender_close(sender); + return 0; + +on_error:; + size_t err_len = 0; + const char* msg = line_sender_error_msg(err, &err_len); + fprintf(stderr, "error: %.*s\n", (int)err_len, msg); + line_sender_error_free(err); + if (buffer) line_sender_buffer_free(buffer); + if (sender) line_sender_close(sender); + return 1; +} ``` -Protocol Version 2 along with its support for arrays is available from QuestDB -version 9.0.0. +## Next steps -## Next Steps +The header files are extensively commented and serve as the canonical API +reference. Browse them on GitHub or in your local checkout: -Please refer to the [ILP overview](/docs/ingestion/ilp/overview) for details -about transactions, error control, delivery guarantees, health check, or table -and column auto-creation. +- Ingestion C: [`include/questdb/ingress/line_sender.h`](https://github.com/questdb/c-questdb-client/blob/main/include/questdb/ingress/line_sender.h) +- Ingestion C++: [`include/questdb/ingress/line_sender.hpp`](https://github.com/questdb/c-questdb-client/blob/main/include/questdb/ingress/line_sender.hpp) +- Query C: [`include/questdb/egress/line_reader.h`](https://github.com/questdb/c-questdb-client/blob/main/include/questdb/egress/line_reader.h) +- Query C++: [`include/questdb/egress/line_reader.hpp`](https://github.com/questdb/c-questdb-client/blob/main/include/questdb/egress/line_reader.hpp) -With data flowing into QuestDB, now it's time for analysis. +For SQL execution from C/C++, the [Querying and SQL execution](#querying-and-sql-execution) +section covers the QWP reader. Alternatives outside this library are the +[PGWire C++ client](/docs/connect/compatibility/pgwire/c-and-cpp/) and the +[REST API](/docs/connect/compatibility/rest-api/). -To learn _The Way_ of QuestDB SQL, see the -[Query & SQL Overview](/docs/query/overview/). +With data flowing into QuestDB, the next step is querying. See the +[Query overview](/docs/query/overview/) to learn QuestDB SQL. -Alone? Stuck? Want help? Visit us in our +Need help? Visit the [Community Forum](https://community.questdb.com/). diff --git a/documentation/ingestion/clients/configuration-string.md b/documentation/ingestion/clients/configuration-string.md index eedb19416..d5a3e5cff 100644 --- a/documentation/ingestion/clients/configuration-string.md +++ b/documentation/ingestion/clients/configuration-string.md @@ -90,7 +90,7 @@ The following options are available: ## Other considerations -- Please refer to the [ILP overview](/docs/ingestion/ilp/overview) for +- Please refer to the [ILP overview](/docs/connect/compatibility/ilp/overview) for details about transactions, error control, delivery guarantees, health check, or table and column auto-creation. - The method `flush()` can be called to force sending the internal buffer to a diff --git a/documentation/ingestion/clients/date-to-timestamp-conversion.md b/documentation/ingestion/clients/date-to-timestamp-conversion.md index 9293f8cf3..8c1f19b4d 100644 --- a/documentation/ingestion/clients/date-to-timestamp-conversion.md +++ b/documentation/ingestion/clients/date-to-timestamp-conversion.md @@ -1,7 +1,10 @@ --- +slug: /connect/clients/date-to-timestamp-conversion title: Date to Timestamp Conversion in Different Programming Languages sidebar_label: Date to Timestamp -description: Python, Go, JAVA, JavaScript, C/C++, Rust, .Net, PHP, or Ruby. +description: + How to convert language-native date/time values into QuestDB timestamp + columns, for each supported client library. --- Most languages have a dedicated type for dates or timestamps, with the notable exception of C. In this guide, we show how to convert from a literal string representing a date into the native `Date` type, and then @@ -11,7 +14,7 @@ QuestDB offers clients for Python, Go, Java, C, C++, Rust, C#/.NET, and JavaScri can directly use a `Timestamp` type when using the client, while others need to convert the timestamp into a long representing the epoch time in microseconds. We add such required conversions into the snippets. -Please refer to the [ingestion overview](/docs/ingestion/overview/) to learn more about the details of the client library for your language. +Please refer to the [ingestion overview](/docs/connect/overview/) to learn more about the details of the client library for your language. ## Date to Timestamp in Python @@ -45,7 +48,7 @@ pd_timestamp = pd.Timestamp(datetime_obj) print(f"Pandas Timestamp: {pd_timestamp}") ``` -Learn more about the [QuestDB Python Client](/docs/ingestion/clients/python/) +Learn more about the [QuestDB Python Client](/docs/connect/clients/python/) ## Date to Timestamp in Go @@ -89,7 +92,7 @@ func main() { } ``` -Learn more about the [QuestDB Go Client](/docs/ingestion/clients/go/) +Learn more about the [QuestDB Go Client](/docs/connect/clients/go/) ## Date to Timestamp in Java @@ -173,7 +176,7 @@ public class Main { } ``` -Learn more about the [QuestDB Java Client](/docs/ingestion/clients/java/) +Learn more about the [QuestDB Java Client](/docs/connect/clients/java/) ## Date to Timestamp in C @@ -207,7 +210,7 @@ int main() { } ``` -Learn more about the [QuestDB C Client](/docs/ingestion/clients/c-and-cpp/#c-1) +Learn more about the [QuestDB C Client](/docs/connect/clients/c-and-cpp/) ## Date to Timestamp in C++ @@ -244,7 +247,7 @@ int main() { return 0; } ``` -Learn more about the [QuestDB C++ Client](/docs/ingestion/clients/c-and-cpp/) +Learn more about the [QuestDB C++ Client](/docs/connect/clients/c-and-cpp/) ## Date to Timestamp in Rust @@ -276,7 +279,7 @@ fn main() { } ``` -Learn more about the [QuestDB Rust Client](/docs/ingestion/clients/rust/) +Learn more about the [QuestDB Rust Client](/docs/connect/clients/rust/) ## Date to Timestamp in C#/.NET @@ -314,7 +317,7 @@ class Program } ``` -Learn more about the [QuestDB .NET Client](/docs/ingestion/clients/dotnet/) +Learn more about the [QuestDB .NET Client](/docs/connect/clients/dotnet/) ## Date to Timestamp in JavasScript/Node.js @@ -337,7 +340,7 @@ console.log("Timestamp (microseconds):", timestamp.toString()); // .timestampColumn("NonDesignatedTimestampColumnName", timestamp) ``` -Learn more about the [QuestDB Node.js Client](/docs/ingestion/clients/nodejs/) +Learn more about the [QuestDB Node.js Client](/docs/connect/clients/nodejs/) ## Date to Timestamp in Ruby @@ -361,7 +364,7 @@ puts "Date: #{date_obj}" puts "Timestamp (microseconds): #{timestamp}" ``` -Learn more about the [ILP text format](/docs/ingestion/ilp/advanced-settings/). +Learn more about the [ILP text format](/docs/connect/compatibility/ilp/advanced-settings/). ## Date to Timestamp in PHP @@ -389,5 +392,5 @@ echo "Date: " . $time_precise->format('Y-m-d H:i:s.u') . PHP_EOL; echo "Timestamp (microseconds): " . $timestamp_precise . PHP_EOL; ``` -Learn more about the [ILP text format](/docs/ingestion/ilp/advanced-settings/). +Learn more about the [ILP text format](/docs/connect/compatibility/ilp/advanced-settings/). diff --git a/documentation/ingestion/clients/dotnet.md b/documentation/ingestion/clients/dotnet.md index 627f86b49..05750fe96 100644 --- a/documentation/ingestion/clients/dotnet.md +++ b/documentation/ingestion/clients/dotnet.md @@ -1,278 +1,262 @@ --- -title: .NET Client Documentation +slug: /connect/clients/dotnet +title: .NET client for QuestDB +sidebar_label: .NET description: - "Get started with QuestDB using the .NET client for efficient, - high-performance insert operations. Achieve unparalleled time series data - ingestion capabilities." + "QuestDB .NET client for high-throughput data ingestion and SQL querying + over the QWP binary protocol (WebSocket)." --- import { ILPClientsTable } from "@theme/ILPClientsTable" +import SfDedupWarning from "../../partials/_sf-dedup-warning.partial.mdx" -QuestDB supports the .NET ecosystem with its dedicated .NET client, engineered -for high-throughput data ingestion, focusing on insert-only operations. +The QuestDB .NET client connects to QuestDB over the +[QWP binary protocol](/docs/connect/wire-protocols/qwp-ingress-websocket/) (WebSocket). +QWP is a column-oriented binary wire format: smaller and faster than the text +ILP used by `http::` and `tcp::`, with the full QuestDB type system, automatic +table creation, schema evolution, multi-host failover, and optional +store-and-forward durability. -Apart from blazing fast ingestion, our clients provide these key benefits: +Two complementary clients live in the same NuGet package: -- **Automatic table creation**: No need to define your schema upfront. -- **Concurrent schema changes**: Seamlessly handle multiple data streams with - on-the-fly schema modifications -- **Optimized batching**: Use strong defaults or curate the size of your batches -- **Health checks and feedback**: Ensure your system's integrity with built-in - health monitoring -- **Automatic write retries**: Reuse connections and retry after interruptions +- **Ingestion** (`Sender` / `IQwpWebSocketSender`): column-oriented batched + writes with automatic table creation, schema evolution, and optional + store-and-forward durability. +- **Querying** (`QueryClient` / `IQwpQueryClient`): parameterised SQL over the + QWP egress endpoint (`/read/v1`), with streaming columnar batches, DDL/DML + execution, per-query failover, and credit-based flow control. See + [Querying and SQL execution](#querying-and-sql-execution). -This quick start guide aims to familiarize you with the fundamental features of -the .NET client, including how to establish a connection, authenticate, and -perform basic insert operations. +:::tip Legacy transports - - -:::info - -This page focuses on our high-performance ingestion client, which is optimized -for **writing** data to QuestDB. For retrieving data, we recommend using a -[PostgreSQL-compatible .NET library](/docs/query/pgwire/dotnet/) or our -[HTTP query endpoint](/docs/query/overview/#rest-http-api). +The same `Sender` also speaks ILP over HTTP and TCP. This page documents the +recommended WebSocket (QWP) path; ILP keeps working unchanged for existing +deployments. For ILP transport details, see the +[ILP overview](/docs/connect/compatibility/ilp/overview/). ::: + + ## Requirements -- .NET 6.0 or higher is required. -- QuestDB must be running. If not, see - [the general quick start guide](/docs/getting-started/quick-start/). +- **.NET 7.0 or higher** for the `ws::` / `wss::` (QWP) transport — it depends + on header-aware `ClientWebSocket` APIs. The HTTP and TCP transports work on + .NET 6.0+. +- QuestDB must be running. If not, see the + [quick start guide](/docs/getting-started/quick-start/). ## Client installation -The latest version of the library is -[2.1.0](https://www.nuget.org/packages/net-questdb-client/) -([changelog](https://github.com/questdb/net-questdb-client/releases/tag/2.1.0)) - -The NuGet package can be installed using the dotnet CLI: +Install the NuGet package with the dotnet CLI: ```shell dotnet add package net-questdb-client ``` -:::note - -`Sender` is single-threaded, and uses a single connection to the database. - -If you want to send in parallel, you can use multiple senders and standard async -tasks. - -::: - -## Authentication - -### HTTP - -The HTTP protocol supports authentication via -[Basic Authentication](https://datatracker.ietf.org/doc/html/rfc7617), and -[Token Authentication](https://datatracker.ietf.org/doc/html/rfc6750). +## Quick start -**Basic Authentication** +### Ingest data -Configure Basic Authentication with the `username` and `password` parameters: - -```csharp -using QuestDB; - ... -using var sender = Sender.New("http::addr=localhost:9000;username=admin;password=quest;"); - ... -``` - -**Token Authentication** - -_QuestDB Enterprise Only_ - -Configure Token Authentication with the `username` and `token` parameters: - -```csharp -using var sender = Sender.New("http::addr=localhost:9000;username=admin;token="); -``` - -### TCP - -TCP authentication can be configured using JWK tokens: - -```csharp -using var sender = Sender.New("tcp::addr=localhost:9009;username=admin;token="); -``` - -The connection options can also be built programatically. See -[Ways to create the client](#ways-to-create-the-client) for details. - -## Basic insert - -Basic insertion (no-auth): +Build a sender from a connect string, append rows, and flush: ```csharp using System; using QuestDB; +using QuestDB.Senders; + +await using var sender = Sender.New("ws::addr=localhost:9000;"); -using var sender = Sender.New("http::addr=localhost:9000;"); await sender.Table("trades") .Symbol("symbol", "ETH-USD") .Symbol("side", "sell") .Column("price", 2615.54) .Column("amount", 0.00044) - .AtNowAsync(); + .AtAsync(DateTime.UtcNow); + await sender.Table("trades") .Symbol("symbol", "BTC-USD") - .Symbol("side", "sell") + .Symbol("side", "buy") .Column("price", 39269.98) .Column("amount", 0.001) - .AtNowAsync(); + .AtAsync(DateTime.UtcNow); + await sender.SendAsync(); ``` -In this case, we asked the server to assign the timestamp to each row. Let's see -now an example with timestamps, custom auto-flushing, basic auth, and error -reporting. +The steps are: + +1. Build a sender from a connect string (`ws::` for plain, `wss::` for TLS). +2. Append rows with the fluent `Table` / `Symbol` / `Column` / `At` builder. +3. Call `SendAsync()` (or rely on auto-flush) to publish. +4. Dispose the sender — `await using` drains in-flight frames on close. + +We recommend supplying the event's own timestamp to `AtAsync`. Ingestion-time +timestamps preclude deduplication, which is +[important for exactly-once processing](/docs/connect/compatibility/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). + +### Query data + +Read the same rows back over the QWP egress endpoint. `QueryClient` lives in +the same NuGet package: ```csharp using QuestDB; -using System; -using System.Threading.Tasks; +using QuestDB.Qwp.Query; + +await using var client = await QueryClient.NewAsync("ws::addr=localhost:9000;"); -class Program +await client.ExecuteAsync( + "SELECT ts, symbol, price, amount FROM trades WHERE symbol = 'ETH-USD' LIMIT 10", + new PrintHandler()); + +internal sealed class PrintHandler : QwpColumnBatchHandler { - static async Task Main(string[] args) + public override void OnBatch(QwpColumnBatch batch) { - using var sender = Sender.New( - "http::addr=localhost:9000;username=admin;password=quest;auto_flush_rows=100;auto_flush_interval=1000;" - ); - - var now = DateTime.UtcNow; - try + for (var row = 0; row < batch.RowCount; row++) { - await sender.Table("trades") - .Symbol("symbol", "ETH-USD") - .Symbol("side", "sell") - .Column("price", 2615.54) - .Column("amount", 0.00044) - .AtAsync(now); - - await sender.Table("trades") - .Symbol("symbol", "BTC-USD") - .Symbol("side", "sell") - .Column("price", 39269.98) - .Column("amount", 0.001) - .AtAsync(now); - - await sender.SendAsync(); - - Console.WriteLine("Data flushed successfully."); - } - catch (Exception ex) - { - Console.Error.WriteLine($"Error: {ex.Message}"); + Console.WriteLine( + $"ts={batch.GetLongValue(0, row)} " + + $"symbol={batch.GetString(1, row)} " + + $"price={batch.GetDoubleValue(2, row)} " + + $"amount={batch.GetDoubleValue(3, row)}"); } } + + public override void OnEnd(long totalRows) => + Console.WriteLine($"done: {totalRows} rows"); + + public override void OnError(byte status, string message) => + Console.Error.WriteLine($"query failed: 0x{status:X2} {message}"); } ``` -Now, both events use the same timestamp. We recommend using the event's -original timestamp when ingesting data into QuestDB. Using ingestion-time -timestamps precludes the ability to deduplicate rows, which is -[important for exactly-once processing](/docs/ingestion/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). - - +`auth_timeout_ms` (default 15000) bounds how long the client waits for the +WebSocket upgrade response. + +### Unsupported authentication paths + +| Path | Status | Workaround | +|---|---|---| +| OIDC token acquisition or in-band refresh | Not supported by this client. It does not negotiate with an identity provider and has no callback to refresh a token mid-session. | QuestDB itself supports OIDC — see [OpenID Connect](/docs/security/oidc/). Acquire an access token out-of-band from your IdP, pass it via `token=...` above, and rebuild the sender / query client when the token nears expiry. | +| Mutual TLS (client certificates) | Not supported. The QuestDB server does not negotiate client certificates regardless of client. | Use bearer-token auth over `wss://`. See the connect-string reference for the canonical statement. | +| Token rotation mid-session | Not supported. Credentials are presented once during the WebSocket upgrade and are not re-sent. | On token expiry, `await sender.DisposeAsync()` and build a fresh sender with the new token. The same applies to `QueryClient`. | + +### Production example (TLS + token + multi-host) + +A realistic Enterprise deployment combines `wss`, token auth, multi-host +failover, and a store-and-forward directory so unacked frames survive a +sender restart: + +```csharp +// Ingestion — write to any writeable node. +await using var sender = Sender.NewQwp( + "wss::addr=db-primary:9000,db-replica:9000;" + + "token=your_bearer_token;" + + "sf_dir=/var/lib/myapp/qdb-sf;" + + "sender_id=ingest-1;" + + "reconnect_max_duration_millis=300000;"); + +// Querying — prefer a replica to offload the primary. +await using var query = await QueryClient.NewAsync( + "wss::addr=db-primary:9000,db-replica:9000;" + + "token=your_bearer_token;" + + "target=replica;" + + "failover=on;failover_max_duration_ms=30000;"); +``` + +`tls_verify=unsafe_off` is **never** safe in production; pin a CA with +`tls_roots=/path/to/roots.pfx;tls_roots_password=...` if you need to override +the system trust store. ## Ways to create the client There are three ways to create a client instance: -1. **From a configuration string.** This is the most common way to create a - client instance. It describes the entire client configuration in a single - string. See [Configuration options](#configuration-options) for all available - options. It allows sharing the same configuration across clients in different - languages. +1. **From a connect string** — the most common way. It describes the whole + configuration in one string and is portable across language clients. ```csharp - using var sender = Sender.New("http::addr=localhost:9000;"); + using var sender = Sender.New("ws::addr=localhost:9000;"); ``` -2. **From an environment variable.** The `QDB_CLIENT_CONF` environment variable - is used to set the configuration string. Moving configuration parameters to - an environment variable allows you to avoid hard-coding sensitive information - such as tokens and password in your code. - - If you want to initialise some properties programmatically after the initial - config string, you can use `Configure` and `Build`. +2. **From an environment variable.** `QDB_CLIENT_CONF` holds the connect + string, keeping credentials out of source code. ```bash - export QDB_CLIENT_CONF="http::addr=localhost:9000;auto_flush_rows=5000;retry_timeout=10000;" + export QDB_CLIENT_CONF="wss::addr=localhost:9000;token=your_bearer_token;" ``` ```csharp - (Sender.Configure("http::addr=localhost:9000;") with { auto_flush = AutoFlushType.off }).Build() + using var sender = Sender.FromEnv(); ``` -3. **From SenderOptions.** + To set properties programmatically on top of a connect string, use + `Configure` and `Build`: ```csharp - await using var sender = Sender.New(new SenderOptions()); + using var sender = + (Sender.Configure("ws::addr=localhost:9000;") with { auto_flush = AutoFlushType.off }) + .Build(); ``` - This way you can bind options from configuration: +3. **From `SenderOptions`** — bind options from configuration: ```json { - "QuestDB": { - "addr": "localhost:9000", - "tls_verify": "unsafe_off;" - } + "QuestDB": { "addr": "localhost:9000", "protocol": "ws" } } ``` @@ -282,388 +266,1135 @@ There are three ways to create a client instance: .Build() .GetSection("QuestDB") .Get(); + await using var sender = Sender.New(options); ``` -## Configuration options +`Sender.New` and `Sender.FromEnv` return `ISender`. For the QWP-only +operations — ping, `seqTxn` watermarks, FSN tracking, decimal columns — call +`Sender.NewQwp(connectString)` (or `Sender.NewQwp(options)`) instead: it takes +the same `ws::` / `wss::` configuration and returns `IQwpWebSocketSender` +directly, so you skip the `is IQwpWebSocketSender` cast. -The easiest way to configure the `Sender` is the configuration string. The -general structure is: -```plain -::addr=host:port;param1=val1;param2=val2;... -``` -`transport` can be `http`, `https`, `tcp`, or `tcps`. Go to the client's -[crate documentation](https://docs.rs/questdb-rs/latest/questdb/ingress) for the -full details on configuration. -Alternatively, for breakdown of available params, see the -[Configuration string](/docs/ingestion/clients/configuration-string/) page. +For the full list of connect-string keys and defaults, see the +[connect string reference](/docs/connect/clients/connect-string/). +## Data ingestion -## Preparing Data +### Building rows -The Sender uses an internal buffer to convert input values into an -ILP-compatible UTF-8 byte-string. +A row starts with a table name, then symbols, then other columns, and is +finished with a timestamp: -You can control buffer sizing with the `init_buf_size` and `max_buf_size` -parameters. +```csharp +await sender.Table("trades") // 1. select the target table + .Symbol("symbol", "ETH-USD") // 2. symbols first + .Column("price", 2615.54) // 3. other columns + .AtAsync(DateTime.UtcNow); // 4. finish the row +``` -Here is how to build a buffer of rows ready to be sent to QuestDB. +- **`Table(name)`** — must be called first for each row. +- **`Symbol(name, value)`** — a [symbol](/docs/concepts/symbol/) is a + dictionary-encoded string; all symbols must come before other columns. +- **`Column(name, value)`** — overloaded for every QuestDB type reachable + from `ISender`; see [Type reference](#type-reference) for the full matrix + and the additional `IQwpWebSocketSender` setters. +- **`At(...)` / `AtAsync(...)`** — finishes the row with the + [designated timestamp](/docs/concepts/designated-timestamp/). `AtNow()` / + `AtNowAsync()` let the server assign it (this defeats deduplication). + +Tables and columns are created automatically if they do not exist. Use the +`Async` overloads (`AtAsync`, `SendAsync`) to avoid blocking the calling +thread. + +### Type reference + +`ISender` covers the everyday types from the .NET runtime overloads. +`IQwpWebSocketSender` adds the QWP-only types that ILP cannot carry — cast +the sender (or build it with `Sender.NewQwp(...)`) to reach them: + +| QuestDB type | `ISender` setter | `IQwpWebSocketSender` setter | Null variant | +|---|---|---|---| +| `SYMBOL` | `Symbol(name, ReadOnlySpan)` | — | omit the call | +| `BOOLEAN` | `Column(name, bool)` | — | `NullableColumn(name, bool?)` | +| `BYTE` (i8) | — | `ColumnByte(name, sbyte)` | omit the call | +| `SHORT` (i16) | — | `ColumnShort(name, short)` | omit the call | +| `INT` (i32) | `Column(name, int)` | — | omit the call (use `long?` overload for nullable) | +| `LONG` (i64) | `Column(name, long)` | — | `NullableColumn(name, long?)` | +| `FLOAT` (f32) | — | `ColumnFloat(name, float)` | omit the call | +| `DOUBLE` (f64) | `Column(name, double)` | — | `NullableColumn(name, double?)` | +| `CHAR` | `Column(name, char)` | — | `NullableColumn(name, char?)` | +| `VARCHAR` | `Column(name, ReadOnlySpan)` | — | `NullableColumn(name, string?)` | +| `BINARY` | — | `ColumnBinary(name, ReadOnlySpan)` | omit the call | +| `UUID` | `Column(name, Guid)` | — | `NullableColumn(name, Guid?)` | +| `LONG256` | — | `ColumnLong256(name, BigInteger)` (non-negative) | omit the call | +| `DATE` | — | `ColumnDate(name, long millisSinceEpoch)` | omit the call | +| `TIMESTAMP` (non-designated) | `Column(name, DateTime)` / `Column(name, DateTimeOffset)` | — | `NullableColumn(name, DateTime?)` / `NullableColumn(name, DateTimeOffset?)` | +| `TIMESTAMP_NS` (non-designated) | `ColumnNanos(name, long timestampNanos)` | — | omit the call | +| `IPv4` | — | `ColumnIPv4(name, System.Net.IPAddress)` | omit the call | +| `GEOHASH` | — | `ColumnGeohash(name, ulong hash, int precisionBits)` (1–60 bits) | omit the call | +| `DECIMAL64` | — | `ColumnDecimal64(name, decimal)` / `ColumnDecimal64(name, long unscaled, byte scale)` | omit the call | +| `DECIMAL128` | `Column(name, decimal)` (default for `decimal`) | `ColumnDecimal128(name, long lo, long hi, byte scale)` (full 38-digit range) | `NullableColumn(name, decimal?)` | +| `DECIMAL256` | — | `ColumnDecimal256(name, decimal)` / `ColumnDecimal256(name, long, long, long, long, byte scale)` | omit the call | +| `DOUBLE[]` / `LONG[]` (n-D arrays) | `Column(name, ReadOnlySpan)` / `Column(name, IEnumerable, IEnumerable shape)` / `Column(name, Array)` | — | `NullableColumn(name, Array?)` | +| Designated timestamp | `AtAsync(DateTime)` / `At(DateTime)` / `AtAsync(DateTimeOffset)` / `At(DateTimeOffset)` / `AtAsync(long micros)` / `At(long micros)` / `AtNanosAsync(long)` / `AtNanos(long)` | — | **required, not null** | + +The single-arg `Column(name, decimal)` writes `DECIMAL128` so it never +overflows `System.Decimal` (~29 significant digits) — see +[Decimal columns](#decimal-columns) for picking a narrower width. + +### Null values + +The .NET client has no `setNull` method. Two idioms produce a NULL: + +1. **`NullableColumn(name, T?)`** — the wrapper writes a value when the + nullable argument is set and skips the column when it is `null`: -:::warning + ```csharp + sender.Table("trades") + .Symbol("symbol", "ETH-USD") + .NullableColumn("price", maybePrice) // null → column omitted + .NullableColumn("notes", maybeNotes); // null → column omitted + await sender.AtAsync(DateTime.UtcNow); + ``` -The senders are **not** thread safe, since they manage an internal buffer. If -you wish to send data in parallel, construct multiple senders and use -non-blocking I/O to submit to QuestDB. +2. **Omit the setter** — every column not set on a row is gap-filled with + NULL when the row is finished. The two idioms produce the same wire + output; `NullableColumn` just makes the optionality explicit at the call + site. -::: +On a brand-new table, an omitted column is not inferred from that row. The +server only adds the column when a later row supplies a non-null value for +it, so first-row nulls leave the column absent until then. -The API follows the following overall flow: +The designated timestamp **cannot** be null — every row requires one of +`AtAsync(DateTime)`, `AtAsync(DateTimeOffset)`, `AtAsync(long micros)`, or +`AtNanosAsync(long)`. -```mermaid -flowchart LR +`Symbol(name, value)` and the string overload of `Column(name, value)` take +`ReadOnlySpan`, which cannot itself be null; an empty span is a valid +non-null empty string. Use `NullableColumn(name, string?)` if your value can +be `null`. -A[New] --> B(Table) +### Decimal columns -B --> D(Symbol) -D --> E -B --> E(Column) -D --> F(At) -E --> F -F --> B -F --> G(Send) -G --> B -G--> H(Dispose) -``` +:::caution -### Specify the table +Decimal ingestion requires QuestDB 9.2.0 or later. Pre-create decimal columns +with `DECIMAL(precision, scale)`. See the +[decimal data type](/docs/query/datatypes/decimal/#creating-tables-with-decimals) +page. -An ILP row starts with a table name, using `Table`. +::: + +`Column(name, decimal)` writes a `DECIMAL128` (16-byte) column: ```csharp -sender.Table("table_name"); +sender.Table("fx_prices") + .Symbol("pair", "EURUSD") + .Column("bid", 1.071234m) // scale locked on first write + .Column("ask", 1.071258m); +await sender.AtAsync(DateTime.UtcNow); ``` -The table name must always be called before other builder functions. +`DECIMAL128` matches the range of .NET's `System.Decimal` (~29 significant +digits). `DECIMAL64` holds only 18 digits, so it cannot be the safe default — +a large `decimal` would overflow it. + +:::tip Narrower columns + +If your values fit in 18 digits — typical for prices and quantities — the +8-byte `DECIMAL64` halves wire and storage size. Either pre-create the column +as `DECIMAL(p, s)` with `p ≤ 18` (the stored width follows the column +definition, not the wire width), or cast to `IQwpWebSocketSender` and call +`ColumnDecimal64` explicitly. + +::: + +`ColumnDecimal64` and `ColumnDecimal256` (32-byte) also accept a +`System.Decimal`. All three widths additionally expose an unscaled-mantissa +overload with an explicit scale, for values beyond `System.Decimal`'s +~28-digit range. -### Add symbols +## Flushing -A [symbol](/docs/concepts/symbol/) is a dictionary-encoded string, used to -efficiently store commonly repeated data. We recommend using this type for -identifiers, because you can create a -[secondary index](/docs/concepts/deep-dive/indexes/) for a symbol column. +Buffered rows are not on the wire until they are flushed — automatically or +explicitly. -Add symbols by calling `Symbol()`, which expects a symbol column -name, and a string value. +### Auto-flushing + +Unlike the C/Rust QWP clients, the .NET WebSocket sender **supports +auto-flushing**. After each `At` / `AtAsync` call the sender checks three +OR'd triggers; whichever trips first publishes the batch. + +| Key | `ws` default | Description | +|---|---|---| +| `auto_flush` | `on` | Master switch. `off` requires explicit `Send`. | +| `auto_flush_rows` | `1000` | Flush after this many buffered rows. | +| `auto_flush_interval` | `100` ms | Flush after this long since the first buffered row. | +| `auto_flush_bytes` | `8 MiB` | Flush after the encode buffer reaches this size. | ```csharp -sender.Symbol("foo", "bah"); +// Tune the batch size, or disable auto-flush entirely. +using var sender = Sender.New("ws::addr=localhost:9000;auto_flush_rows=5000;"); +using var manual = Sender.New("ws::addr=localhost:9000;auto_flush=off;"); +``` + +### Explicit flushing + +Call `Send()` or `SendAsync()` to publish buffered rows immediately: + +```csharp +sender.Table("trades").Symbol("symbol", "ETH-USD").Column("price", 2615.54) + .At(DateTime.UtcNow); +await sender.SendAsync(); ``` -You must specify all symbol columns first, before any other columns. +On QWP, a flush returns once the batch is accepted by the local send engine — +**before** the server acknowledges it. Server-side errors surface +asynchronously; see [Delivery tracking](#delivery-tracking). -### Add other columns +## Delivery tracking -There are several data types you can send to QuestDB via ILP, including string / -long / double / DateTime / DateTimeOffset. +Awaiting ACKs is **optional**: an app that never calls `PingAsync` or +`AwaitAckedFsnAsync` and just `await using`-disposes the sender is safe — +`DisposeAsync` drains in-flight ACKs, bounded by `close_flush_timeout_millis` +(default 5000 ms). Reach for the APIs below when the app needs to (a) know a +specific write made it before continuing, (b) cooperate with QuestDB +Enterprise durable-replication watermarks, or (c) co-ordinate a graceful +shutdown that must not exit until the queue has drained. -Provide these by calling `Column()`. +`Sender.NewQwp(...)` returns `IQwpWebSocketSender`, which adds QWP-only +delivery operations on top of `ISender`: ```csharp -sender.Column("baz", 102); -``` +await using var sender = Sender.NewQwp("ws::addr=localhost:9000;"); + +// ... ingest rows, then flush ... +await sender.SendAsync(); -### Finish the row +// Drain the in-flight ACK window: every batch sent so far is acknowledged +// once this returns. Bounded by ping_timeout. +await sender.PingAsync(); -Completed a row by specifying the designated timestamp: +// Per-table commit watermark (populated once the server ACKs a batch). +Console.WriteLine($"trades committed seqTxn: {sender.GetHighestAckedSeqTxn("trades")}"); +``` + +For frame-level tracking, every flush is assigned a frame sequence number +(FSN): ```csharp -sender.At(DateTime.UtcNow); +long fsn = await sender.FlushAndGetSequenceAsync(); +bool acked = await sender.AwaitAckedFsnAsync(fsn, TimeSpan.FromSeconds(10)); +if (!acked) Console.Error.WriteLine("timed out waiting for server ACK"); ``` -You can also let the server assign the timestamp, by calling `AtNow()` instead. +| Member | Returns | +|---|---| +| `FlushAndGetSequenceAsync()` | FSN of the highest frame published by this call. | +| `AckedFsn` | Highest FSN the server has acknowledged. | +| `AwaitAckedFsnAsync(fsn, timeout)` | Block until `AckedFsn` reaches `fsn`. | +| `GetHighestAckedSeqTxn(table)` | Highest committed `seqTxn` per table (`-1` if none). | +| `GetHighestDurableSeqTxn(table)` | Highest durably-uploaded `seqTxn` per table. | -:::caution +## Asynchronous error handling -We recommend using the event's original timestamp when ingesting data into -QuestDB. Using ingestion-time timestamps precludes the ability to deduplicate -rows, which is -[important for exactly-once processing](/docs/ingestion/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). +QWP ingestion is asynchronous: a flush returns once the batch is accepted by +the local send engine, before the server validates it. Server rejections and +protocol violations surface separately. -::: +### How errors surface -## Flushing +Each error is classified into a `SenderErrorCategory` and assigned a +`SenderErrorPolicy`: -Once the buffer is filled with data ready to be sent, it can be flushed to the -database automatically, or manually. +| Policy | Effect | Default categories | +|---|---|---| +| `DropAndContinue` | The rejected batch is dropped; the sender keeps running. | `SchemaMismatch`, `WriteError` | +| `Halt` | The sender latches terminal; the next producer call throws `LineSenderServerException`. | `ParseError`, `InternalError`, `SecurityError`, `ProtocolViolation`, `Unknown` | -### Auto-flushing +After a `Halt`, discard the sender and create a new one. -When you call one of the `At` functions, the row is complete. The sender checks -the auto-flushing parameters to see if it should flush the buffer to the server. +### Error handler + +Install a `SenderErrorHandler` on `SenderOptions` to observe every error. It +runs on a background dispatcher — never the I/O or producer thread — so a slow +handler cannot stall publishing; thrown exceptions are caught and traced. ```csharp -sender.At(new DateTime(0,0,1)); +var options = Sender.Configure("ws::addr=localhost:9000;"); +options.error_handler = err => + Console.Error.WriteLine( + $"qwp error: category={err.Category} policy={err.AppliedPolicy} " + + $"fsn=[{err.FromFsn},{err.ToFsn}] table={err.TableName} msg={err.ServerMessage}"); + +await using var sender = Sender.NewQwp(options); ``` -To avoid blocking the calling thread, use the Async overloads of the `At`, such -as `AtAsync`. +`error_inbox_capacity` (default 256, minimum 16) bounds the async error inbox; +on overflow the oldest entry is dropped — `IQwpWebSocketSender.DroppedErrorNotifications` +counts how often that happened. + +### Error payload fields + +Each `SenderError` carries the following fields: + +| Field | Description | +|---|---| +| `Category` | `SchemaMismatch`, `ParseError`, `InternalError`, `SecurityError`, `WriteError`, `ProtocolViolation`, `Unknown`. Use for programmatic dispatch. | +| `AppliedPolicy` | `DropAndContinue` (batch dropped, sender continues) or `Halt` (sender latched terminal; next API call throws `LineSenderServerException`). | +| `ServerStatusByte` | Raw QWP status byte (e.g. `0x03` for `SchemaMismatch`). `-1` (`SenderError.NoStatusByte`) on `ProtocolViolation` and engine-internal terminal failures. | +| `ServerMessage` | Human-readable server text (≤ 1024 UTF-8 bytes), or `null`. See [Message stability](#message-stability) and [PII safety](#message-pii). | +| `MessageSequence` | Server's per-frame QWP wire sequence for the error frame. `-1` (`SenderError.NoMessageSequence`) for engine-internal failures. **Resets on reconnect** — only meaningful within one connection. | +| `FromFsn` / `ToFsn` | Inclusive client-side FSN span of the affected batch. Pair with `FlushAndGetSequenceAsync()` to identify the rejected rows. | +| `TableName` | Rejected table; `null` for multi-table batches or when the server did not attribute the error. | +| `DetectedAtUtc` | Wall-clock receipt time on the I/O thread; for ops timelines, not for correlation. | +| `Exception` | Non-`null` for engine-internal failures (connect-budget exhaustion, fatal upgrade reject); `null` for server rejections. | +| `IsInitialConnect` | `true` if the engine never reached a first successful connection (config / connectivity issue); always `false` for server-side rejections. | + +#### Message stability {#message-stability} + +`ServerMessage` is a human-readable diagnostic — **not a stable contract.** +QWP error frames carry a server-supplied UTF-8 string capped at 1024 bytes by +the wire spec; the text mirrors QuestDB's normal SQL error formatting and has +historically been reworded across releases. The field may be empty. Use +`Category` and `ServerStatusByte` for programmatic dispatch; never +pattern-match on `ServerMessage`. + +#### PII / secret safety {#message-pii} + +`ServerMessage` may include fragments of the client's own payload — for +example, an offending column value quoted back by a schema or parse +rejection. `TableName` and any text exposed by `Exception.Message` are +similarly user-controlled. **Treat them as potentially containing PII or +secrets.** Log them at the trust level of the data being sent, and sanitise +before forwarding to external error trackers (Sentry, Datadog, end-user UIs). +The other `SenderError` fields are safe to forward as-is — they carry only +structural metadata. + +#### Correlating with server-side logs + +QWP does not surface a server-issued request or connection identifier. The +closest correlation handle is the `(MessageSequence, FromFsn, ToFsn)` tuple +plus the connection start time from your application logs — `MessageSequence` +resets on reconnect, so it only disambiguates frames within a single +connection. When filing a support ticket, include the connection start time +and the `(MessageSequence, FromFsn, ToFsn)` triple. + +### Synchronous errors + +Misconfiguration and API-misuse errors surface synchronously as `IngressError` +(or its subclass `LineSenderServerException` for HALT-policy server +rejections). They are thrown directly from the call site: + +| Site | Throws when | +|---|---| +| `Sender.New(...)` / `Sender.NewQwp(...)` / `QueryClient.New(...)` | The connect string is malformed (missing `::`, unknown key, invalid value), required fields are absent, or mutually exclusive auth modes are combined. `Sender.NewQwp` additionally rejects non-`ws::` / `wss::` schemes. | +| `Sender.FromEnv()` / `QueryClient.FromEnv()` | `QDB_CLIENT_CONF` is unset or blank. | +| `Column(...)` / `Symbol(...)` before `Table(...)` | The row has not been started; the builder requires `Table(...)` first. | +| Array `Column(...)` overloads | The `shape` does not match the element count, dimensionality exceeds 32, or the element type is not `double` / `long`. | +| `ColumnGeohash(...)` | `precisionBits` is outside `[1, 60]`. | +| `ColumnDecimal*(...)` with explicit `scale` | `scale` is outside `[0, 18]` (DECIMAL64), `[0, 38]` (DECIMAL128), or `[0, 76]` (DECIMAL256). | +| Producer-thread call after `Halt` policy fired | The next `Table`, `Column`, `AtAsync`, or `SendAsync` throws `LineSenderServerException` carrying the latched `SenderError`. Discard the sender and create a new one. | + +Authentication failures surface differently between paths: a `401` / `403` +during the WebSocket upgrade returns synchronously from `Sender.New` / +`QueryClient.New` as `IngressError` (since the upgrade is part of `connect`), +while an upgrade that succeeded but later loses the connection and is denied +on reconnect surfaces asynchronously as a `SecurityError` `SenderError` with +the sender latched terminal. + +### Per-category policy + +Override the default policy per category with the `on_*_error` connect-string +keys (values `halt` or `drop`): ```csharp -await sender.AtNowAsync(); +// Treat a schema mismatch as fatal instead of dropping the batch. +using var sender = Sender.New( + "ws::addr=localhost:9000;on_schema_mismatch_error=halt;"); ``` -Auto-flushing can be enabled or disabled: +| Key | Scope | +|---|---| +| `on_server_error` | Catch-all default for every category. | +| `on_schema_mismatch_error` (alias: `on_schema_error`) | Schema-validation rejections. | +| `on_parse_error` | Client-side parse errors. | +| `on_internal_error` | Unexpected client-side errors. | +| `on_security_error` | Auth / TLS errors. | +| `on_write_error` | Transport write failures. | + +`ProtocolViolation` and `Unknown` are always `Halt`, regardless of these keys. +For programmatic control, set `SenderOptions.error_policy_resolver` to a +`SenderErrorPolicyResolver` delegate. + +### Connection-level errors + +These are not delivered through the `error_handler` because they happen +before the I/O loop is operating against a healthy connection — they surface +synchronously from the factory, from `ExecuteAsync`, or as listener events: + +- **Authentication failure** (`401` / `403` during the WebSocket upgrade) — + terminal across all endpoints. The reconnect / failover loop stops + immediately rather than replaying the same credential against every host. +- **Malformed frames** — `QwpDecodeException` (egress) or `IngressError` + with a `ProtocolViolation` category (ingress); the WebSocket is closed + with a terminal code. The sender / query client transitions to a + non-recoverable state. +- **Role mismatch** — `QwpRoleMismatchException` from `QueryClient.NewAsync` + or the next `ExecuteAsync` when no endpoint matches the configured + `target=any|primary|replica` filter. `LastObserved` carries the most + recent `QwpServerInfo` to distinguish "no primary available" from + "all endpoints unreachable". +- **TCP / TLS connect failure** — treated as transient on the ingress side + and fed into the reconnect loop, capped by `reconnect_max_duration_millis`. + +### Error classification + +A summary of how the engine treats each error class on the wire: + +| Source | Status | Effect | +|---|---|---| +| Auth (`401` / `403`) on any endpoint | Terminal | Halts the failover loop immediately; the sender / query client latches non-recoverable. | +| Role reject (`421` + `X-QuestDB-Role`) | Topology-level (transient if `PRIMARY_CATCHUP`, otherwise terminal for the loop) | The client tries the next endpoint; if every endpoint rejects, surfaces as `QwpRoleMismatchException` (egress) or the sender's reconnect loop exhausts. | +| Version mismatch during upgrade | Per-endpoint, **not** terminal | The client moves on to the next endpoint. | +| Server rejection of a batch (`SchemaMismatch`, `ParseError`, `WriteError`, etc.) | Per the `on_*_error` policy — default is `DropAndContinue` for `SchemaMismatch` / `WriteError`, `Halt` for everything else. | `DropAndContinue` keeps the sender alive; `Halt` latches the sender so the next producer call throws `LineSenderServerException`. | +| TCP / TLS failure, `404`, `503`, mid-stream drop | Transient | Fed into the ingress reconnect loop (`reconnect_max_*` keys) or, on egress, the per-query failover loop (`failover_*` keys). | +| `ProtocolViolation`, `Unknown` | Terminal | Always `Halt`, regardless of `on_*_error` settings. | + +### Connection events + +Implement `ISenderConnectionListener` and assign it to +`SenderOptions.ConnectionListener` to observe connection-state transitions: ```csharp -using var sender = Sender.New("http::addr=localhost:9000;auto_flush=off;"); // or `on`, defaults to `on` +class Listener : ISenderConnectionListener +{ + public void OnEvent(SenderConnectionEvent evt) => + Console.WriteLine($"{evt.Kind} {evt.Host}:{evt.Port}"); +} + +var options = Sender.Configure("ws::addr=db-a:9000,db-b:9000;"); +options.ConnectionListener = new Listener(); +await using var sender = Sender.NewQwp(options); ``` -#### Flush by rows +Event kinds: `Connected`, `Disconnected`, `Reconnected`, `FailedOver`, +`EndpointAttemptFailed`, `AllEndpointsUnreachable`, `AuthFailed`, +`ReconnectBudgetExhausted`. Listeners run on a dedicated dispatcher thread. -You can specify the number of rows that will trigger an auto-flush, creating a -batch insert operation of that size. +`AuthFailed` and `ReconnectBudgetExhausted` are **terminal**: the sender +latches a non-recoverable failure, the next producer-thread call (`Table`, +`Column`, `AtAsync`, `SendAsync`) throws `IngressError` (or +`LineSenderServerException` if a HALT-policy error was latched alongside), +and no further data can be sent. Discard the sender, build a new one, and +replay any state your application owns. `DroppedConnectionNotifications` on +`IQwpWebSocketSender` counts events that were dropped because a slow listener +fell behind the dispatcher inbox. + +## Store-and-forward + +With store-and-forward (SF) enabled, unacknowledged frames are persisted to +disk and replayed after reconnection, surviving sender process restarts: ```csharp -using var sender = Sender.New("http::addr=localhost:9000;auto_flush=on;auto_flush_rows=5000;"); +using var sender = Sender.New( + "ws::addr=localhost:9000;sf_dir=/var/lib/myapp/qdb-sf;sender_id=ingest-1;"); ``` -By default, the HTTP sender auto-flushes after 75,000 rows, and TCP after 600 -rows. +Without `sf_dir` the send queue lives in process memory and is lost if the +process exits; the reconnect loop still spans transient outages, bounded by a +RAM cap. -:::tip + -`auto_flush_rows` and `auto_flush_interval` are both enabled by default. If you -wish to only auto-flush based on one of these properties, disable the other -using `off` or `-1`. +| Key | Default | Description | +|---|---|---| +| `sf_dir` | unset | Enables disk-backed SF when set. | +| `sender_id` | `default` | Slot identity (`A-Za-z0-9_-`). Use a distinct id per sender process sharing one `sf_dir`. | +| `sf_max_bytes` | 4 MiB | Per-segment size cap. | +| `sf_max_total_bytes` | 128 MiB (memory) / 10 GiB (disk) | Cap on total queued bytes. | +| `sf_append_deadline_millis` | 30000 | Max time a flush blocks waiting for queue capacity. | +| `drain_orphans` | `off` | If `on`, take over stale slots from a crashed sender. | -::: +## Durable acknowledgement -#### Flush by interval +:::note Enterprise -You can specify the time interval between auto-flushes. The sender checks it -every time you call an `At` function. +Durable acknowledgement requires QuestDB Enterprise with primary replication. + +::: + +By default the server confirms a batch once it is committed to the local +[WAL](/docs/concepts/write-ahead-log/). With `request_durable_ack=on`, the +client tracks when the batch is durably uploaded to object storage: ```csharp -using var sender = Sender.New("http::addr=localhost:9000;auto_flush=on;auto_flush_interval=5000;"); -``` +await using var sender = Sender.NewQwp( + "ws::addr=localhost:9000;sf_dir=/var/lib/myapp/qdb-sf;request_durable_ack=on;"); -By default, `auto_flush_interval` is 1000 ms. +// ... ingest rows ... +await sender.SendAsync(); -#### Flush by bytes +Console.WriteLine($"trades durable seqTxn: {sender.GetHighestDurableSeqTxn("trades")}"); +``` -As an additional option, disabled by default, you can specify the batch size in -terms of bytes instead of rows. You should ensure that `init_buf_size` -$\lt$ `auto_flush_bytes` $\leq$ `max_buf_size`. +## Failover and high availability -This can be useful if you have large variation in row sizes and want to limit -the request sizes. By default, this is disabled, but set to `100 KiB`. +:::note Enterprise -```csharp -using var sender = Sender.New("http::addr=localhost:9000;auto_flush=on;auto_flush_bytes=65536;"); -``` +Multi-host failover is most useful with QuestDB Enterprise primary-replica +replication. -### Explicit flushing +::: -You can also manually flush the buffer at any time by calling `Send` or -`SendAsync`. This will send any outstanding data to the QuestDB server. +Supply a comma-separated address list (or repeat `addr=`). The client connects +to one endpoint and walks the list to the next healthy peer when the +connection breaks: ```csharp -using var sender = Sender.New("http::addr=localhost:9000;auto_flush=off;"); -sender.Table("foo").Symbol("bah", "baz").Column("num", 123).At(DateTime.UtcNow); -await sender.SendAsync(); // send non-blocking -// OR -sender.Send(); // send synchronously +using var sender = Sender.New( + "ws::addr=db-primary:9000,db-replica:9000;sf_dir=/var/lib/myapp/qdb-sf;"); ``` -You should always perform an explicit flush before closing the sender. The HTTP -sender normally retries the requests in case of errors, but won't do that while -auto-flushing before closing. Flushing explicitly ensures that the client -applies the same effort to send all the remaining data. +| Key | Default | Description | +|---|---|---| +| `reconnect_max_duration_millis` | 300000 | Per-outage reconnect budget. | +| `reconnect_initial_backoff_millis` | 100 | First post-failure sleep. | +| `reconnect_max_backoff_millis` | 5000 | Cap on per-attempt sleep. | +| `initial_connect_retry` | `off` | Retry the first connect (`off` / `on` / `async`). Setting any `reconnect_*` key promotes this to `on`. | + +`sf_dir` is strongly recommended for multi-host deployments: a flush writes to +disk and returns quickly while the reconnect loop replays to the new primary +in the background, instead of blocking until `sf_append_deadline_millis`. + +### Backpressure on send + +Row builders (`Table`, `Symbol`, `Column`, `NullableColumn`) never block — +they only mutate the in-process encode buffer, which grows up to +`max_buf_size` (default 100 MiB). Backpressure surfaces at flush time: + +- **In-memory mode (no `sf_dir`).** The in-flight publish window caps how + many unacknowledged frames can sit on the connection. When the server is + reachable but slow, `SendAsync()` waits for ACK-driven capacity before + returning. When the server is unreachable for longer than the in-flight + window can absorb, the rows stay buffered until either the connection + recovers or `DisposeAsync` fires and `close_flush_timeout_millis` elapses. + In-memory mode does **not** survive a process exit; unacked frames are lost. +- **Store-and-forward mode (`sf_dir` set).** `SendAsync()` appends to the + on-disk segment and returns quickly; the I/O loop drains it in the + background. If the disk queue is at its `sf_max_total_bytes` cap, the + append blocks waiting for the loop to trim acknowledged frames, bounded by + `sf_append_deadline_millis` (default 30 000 ms). If the deadline elapses + the engine latches a terminal error and the next producer call surfaces it. + No data is dropped while the publisher is parked. + +A single batch larger than `sf_max_bytes` (default 4 MiB) is rejected +immediately — it does not enter the backpressure wait. Reduce the rows you +accumulate per flush, or raise `sf_max_bytes` to fit your largest single +payload. ## Transactions -The HTTP transport provides transactionality for requests. Each request in a -flush sends a batch of rows, which will be committed at once, or not at all. +:::caution WebSocket / QWP does not support transactions -Server-side transactions are only for a single table. Therefore, a request -containing multiple tables will be split into a single transaction per table. If -a transaction fails for one table, other transactions may still complete. +The `Transaction` / `Commit` / `Rollback` API is **HTTP-only**. The QWP +WebSocket sender rejects it — QWP frames are independently acknowledged batches. +Use [store-and-forward](#store-and-forward) plus +[DEDUP](/docs/concepts/deduplication/) keys for delivery guarantees on QWP. -For data transactionality, one can use the transaction feature to enforce a -batch only for a single table. +::: -:::caution +For transactional ILP over HTTP, see the +[ILP overview](/docs/connect/compatibility/ilp/overview#http-transaction-semantics). -As described in the -[ILP overview](/docs/ingestion/ilp/overview#http-transaction-semantics), the -HTTP transport has some limitations for transactions when adding new columns. +## Querying and SQL execution -::: +`QueryClient` sends SQL statements over the +[QWP egress](/docs/connect/wire-protocols/qwp-egress-websocket/) endpoint +(`/read/v1`). Results arrive as columnar batches via a callback handler. -Transactions follow this flow: +`ExecuteAsync` is **blocking on completion**: it sends the query, drives the +WebSocket receive loop, invokes the handler callbacks (`OnBatch`, `OnEnd`, +`OnExecDone`, or `OnError`), and returns only after the query terminates. +That makes operations easy to sequence: -```mermaid -flowchart LR +```csharp +await client.ExecuteAsync("CREATE TABLE trades (...) ...", ddlHandler); +// Table exists by this point +await client.ExecuteAsync("INSERT INTO trades VALUES (...) ...", dmlHandler); +// Data is committed by this point +await client.ExecuteAsync("SELECT * FROM trades", selectHandler); +// Results have been fully consumed by this point +``` -A[New] --> B(Transaction) +One `QueryClient` owns one WebSocket and runs **one query at a time**. To run +queries in parallel, create one client per concurrent caller — the same +multi-publisher pattern as for `Sender`. -B --> D(Symbol) -D --> E -B --> E(Column) -F --> D -D --> F(At) -E --> F -F --> E -F --> G(Commit) -G --> B -G --> H(Dispose) -``` +### Building a query client -One way to use this route effectively is to assign a single `Sender` per table, -and then use transactions for each sender. This minimises server-side overhead -by reducing how many tables are submitted to from different connections. +```csharp +using QuestDB; +using QuestDB.Qwp.Query; + +await using var client = await QueryClient.NewAsync("ws::addr=localhost:9000;"); +``` -It is still recommended to enable deduplication keys on your tables. This is -because an early request timeout, or failure to read the response stream, could -cause an error in the client, even though the server was returning a success -response. Therefore, making the table idempotent is best to allow for safe -retries. With TCP, this is a much greater risk. +| Factory | Returns | Notes | +|---|---|---| +| `QueryClient.New(string connStr)` | `IQwpQueryClient` | Synchronous. Hops the threadpool to avoid sync-over-async deadlocks on UI / classic ASP.NET. | +| `QueryClient.New(QueryOptions options)` | `IQwpQueryClient` | For programmatic configuration. | +| `QueryClient.NewAsync(string connStr, CancellationToken)` | `Task` | **Preferred from async code.** | +| `QueryClient.NewAsync(QueryOptions, CancellationToken)` | `Task` | Same, programmatic. | +| `QueryClient.FromEnv()` | `IQwpQueryClient` | Reads the connect string from `QDB_CLIENT_CONF`. | -### Opening a transaction +The egress side requires .NET 7+, the same minimum as the QWP sender. +`QueryClient` is constructed and connected up-front: by the time the factory +returns, the WebSocket upgrade has completed and the negotiated server +metadata is available via `client.ServerInfo`, `client.NegotiatedVersion`, +and `client.NegotiatedCompression`. -To start a `Transaction`, and pass the name of the table. +### Executing SELECT queries ```csharp -sender.Transaction("foo"); -``` +await client.ExecuteAsync( + "SELECT ts, symbol, price FROM trades WHERE symbol = 'ETH-USD' LIMIT 100", + new PrintHandler()); + +internal sealed class PrintHandler : QwpColumnBatchHandler +{ + public override void OnBatch(QwpColumnBatch batch) + { + for (var row = 0; row < batch.RowCount; row++) + { + if (batch.IsNull(2, row)) continue; + var ts = batch.GetLongValue(0, row); // TIMESTAMP — microseconds + var sym = batch.GetString(1, row); // SYMBOL — null for NULL + var price = batch.GetDoubleValue(2, row); + Console.WriteLine($"{ts} {sym} {price}"); + } + } -The sender will return errors if you try to specify an alternate table whilst a -transaction is open. + public override void OnEnd(long totalRows) => + Console.WriteLine($"done: {totalRows} rows"); -### Adding data + public override void OnError(byte status, string message) => + Console.Error.WriteLine($"query failed: 0x{status:X2} {message}"); +} +``` -Add data to a transaction in the usual way, but without calling `Table` between -rows. +The `QwpColumnBatch` instance — and every span its accessors return — is +**reused across batches**. Do not store a reference past the `OnBatch` +invocation; copy any string / array data you need to keep. + +### Reading result batches + +`QwpColumnBatch` exposes typed accessors for every QuestDB column type. All +value accessors return a zero-like sentinel (`0`, `false`, `'\0'`, `null`, +`-1`, `Guid.Empty`, `BigInteger.Zero`, empty span) for a NULL cell; call +`IsNull(col, row)` first to disambiguate from a legal zero value. + +| Accessor | QuestDB column types | +|---|---| +| `IsNull(col, row)` | all types — call before any value accessor when the column is nullable | +| `GetBoolValue(col, row)` | `BOOLEAN` | +| `GetByteValue(col, row)` / `GetSByteValue(col, row)` | `BYTE` (uint8 or sbyte view) | +| `GetShortValue(col, row)` | `SHORT` | +| `GetCharValue(col, row)` | `CHAR` (UTF-16 code unit) | +| `GetIntValue(col, row)` | `INT`, `IPv4` | +| `GetIPv4Value(col, row)` | `IPv4` (packed `int`; same bits as `GetIntValue`) | +| `GetLongValue(col, row)` | `LONG`, `TIMESTAMP`, `TIMESTAMP_NS`, `DATE` (see units below) | +| `GetTimestampValue(col, row)` | `TIMESTAMP` / `TIMESTAMP_NS` (alias for `GetLongValue`; consult `GetColumnWireType` for the unit) | +| `GetDateValue(col, row)` | `DATE` (millis since Unix epoch; alias for `GetLongValue`) | +| `GetFloatValue(col, row)` | `FLOAT` | +| `GetDoubleValue(col, row)` | `DOUBLE` | +| `GetStringSpan(col, row)` | `VARCHAR`, `SYMBOL` (UTF-8 bytes; valid only during the `OnBatch` call) | +| `GetString(col, row)` | any column — best-effort allocating string; `null` for NULL | +| `GetSymbol(col, row)` / `GetSymbolId(col, row)` | `SYMBOL` (managed string / dictionary id) | +| `GetSymbolForId(col, dictId)` / `GetSymbolDictSize(col)` | `SYMBOL` dictionary access | +| `GetBinarySpan(col, row)` | `BINARY` (raw bytes; valid only during the `OnBatch` call) | +| `GetUuid(col, row)` / `GetUuidLo(col, row)` / `GetUuidHi(col, row)` | `UUID` (as `Guid`, or as 64-bit halves on the QWP wire layout) | +| `GetLong256(col, row)` (BigInteger) / `GetLong256(col, row, out w0, out w1, out w2, out w3)` | `LONG256` (BigInteger, or four 64-bit limbs least → most significant) | +| `GetDecimal64UnscaledValue(col, row)` + `GetDecimalScale(col)` | `DECIMAL64` | +| `GetDecimal128Lo(col, row)` / `GetDecimal128Hi(col, row)` + `GetDecimalScale(col)` | `DECIMAL128` (two int64 limbs) | +| `GetDecimal256(col, row, out ll, out lh, out hl, out hh)` + `GetDecimalScale(col)` | `DECIMAL256` (four int64 limbs least → most significant) | +| `GetGeohashValue(col, row)` + `GetGeohashPrecisionBits(col)` | `GEOHASH` (packed bits + per-column precision; `-1` value for NULL) | +| `GetDoubleArraySpan(col, row)` / `GetDoubleArrayElements(col, row)` | `DOUBLE[]` (row-major, flattened) | +| `GetLongArraySpan(col, row)` / `GetLongArrayElements(col, row)` | `LONG[]` (row-major, flattened) | +| `GetArrayNDims(col, row)` / `GetArrayShape(col, row)` | array dimensionality and shape (per row) | +| `GetColumnName(col)` / `GetColumnWireType(col)` / `ColumnCount` / `RowCount` / `BatchSeq` / `RequestId` | column / batch metadata | + +`GetColumnWireType(col)` returns the `QwpTypeCode` of the column; pair it +with the type-specific accessor when the column type is not known statically. + +### DDL and DML statements + +Non-`SELECT` statements (`CREATE TABLE`, `INSERT`, `UPDATE`, `ALTER`, `DROP`, +`TRUNCATE`) run through the same `ExecuteAsync`. The server emits `EXEC_DONE` +instead of result batches — overload `OnExecDone` to consume it: ```csharp -// add a symbol, integer column, and end with current timestamp -sender.Symbol("bah", "baz").Column("num", 123).At(DateTime.UtcNow); +await client.ExecuteAsync( + "CREATE TABLE trades (" + + "ts TIMESTAMP, symbol SYMBOL, price DOUBLE, amount LONG" + + ") TIMESTAMP(ts) PARTITION BY DAY WAL", + new DdlHandler()); + +internal sealed class DdlHandler : QwpColumnBatchHandler +{ + public override void OnExecDone(byte opType, long rowsAffected) => + Console.WriteLine($"done: opType={opType} rows={rowsAffected}"); + + public override void OnError(byte status, string message) => + Console.Error.WriteLine($"DDL/DML failed: 0x{status:X2} {message}"); +} ``` -### Closing a transaction +`rowsAffected` reports the row count for `INSERT` / `UPDATE` / `DELETE`. Pure +DDL (`CREATE`, `DROP`, `ALTER`, `TRUNCATE`) reports `0`. -Commit transactions and flush using `Commit` or `CommitAsync`. This will flush -data to the database, and remove the transactional state. +### Bind parameters + +Parameterised queries use a `QwpBindSetter` delegate. It receives a +`QwpBindValues` and **must set indices in strict ascending order starting at +zero** — gaps and reuses throw `IngressError`. Bind indices are 0-based +(`$1` → index `0`): ```csharp -await sender.CommitAsync(); +const string sql = + "SELECT ts, symbol, price, amount FROM trades " + + "WHERE symbol = $1 AND price >= $2 LIMIT 1000"; + +foreach (var symbol in new[] { "ETH-USD", "BTC-USD" }) +{ + await client.ExecuteAsync( + sql, + binds => + { + binds.SetVarchar(0, symbol); + binds.SetDouble(1, 2000.0); + }, + new PrintHandler()); +} ``` -Alternatively, if you wish to discard the transaction, you can use `Rollback`. -This will clear the buffer and transactional state, without sending data to the -server. +| Setter | Bind type | +|---|---| +| `SetBoolean(index, bool)` | `BOOLEAN` | +| `SetByte(index, byte)` | `BYTE` (uint8) | +| `SetShort(index, short)` | `SHORT` | +| `SetChar(index, char)` | `CHAR` | +| `SetInt(index, int)` | `INT` | +| `SetLong(index, long)` | `LONG` | +| `SetFloat(index, float)` | `FLOAT` | +| `SetDouble(index, double)` | `DOUBLE` | +| `SetDate(index, long millis)` | `DATE` | +| `SetTimestampMicros(index, long)` | `TIMESTAMP` | +| `SetTimestampNanos(index, long)` | `TIMESTAMP_NS` | +| `SetVarchar(index, string?)` | `VARCHAR` / `STRING` / `SYMBOL` (`null` ⇒ NULL bind) | +| `SetUuid(index, Guid)` / `SetUuid(index, long lo, long hi)` | `UUID` | +| `SetLong256(index, BigInteger)` (non-negative) / `SetLong256(index, long w0, long w1, long w2, long w3)` | `LONG256` | +| `SetGeohash(index, int precisionBits, long value)` | `GEOHASH` (1–60 bits) | +| `SetDecimal64(index, int scale, long unscaled)` | `DECIMAL64` (`scale` 0–18) | +| `SetDecimal128(index, int scale, long lo, long hi)` | `DECIMAL128` (`scale` 0–38) | +| `SetDecimal256(index, int scale, long ll, long lh, long hl, long hh)` | `DECIMAL256` (`scale` 0–76) | + +Up to `1024` bind parameters are accepted per query. + +To bind a typed NULL — necessary when the placeholder type would otherwise +be inferred from the value — use `SetNull` with the wire type code, or the +type-specific overloads that carry scale / precision: ```csharp -sender.Rollback(); +binds => +{ + binds.SetVarchar(0, null); // null VARCHAR (also SetNull(0, QwpTypeCode.Varchar)) + binds.SetNull(1, QwpTypeCode.Long); // null LONG + binds.SetNullGeohash(2, precisionBits: 20); // null GEOHASH with explicit precision + binds.SetNullDecimal64(3, scale: 4); // null DECIMAL64 with explicit scale +}; ``` -## Misc +### Cancellation + +There are two ways to cancel an in-flight query, and they differ in whether +the connection survives: + +- **`client.Cancel()`** — cooperative. Posts a QWP `CANCEL` frame to the + server; the query terminates with `OnError(status=0x0A, …)` (or, if the + server raced to finish, a normal `OnEnd`). The WebSocket stays open and + the client is reusable for the next `ExecuteAsync`. `Cancel()` is + thread-safe and a no-op when no query is in flight. It does **not** + interrupt an in-progress `ReceiveAsync`; if the server hangs and never + acknowledges, `ExecuteAsync` will not return. +- **`CancellationToken` cancellation** — terminal. Cancelling the token + passed to `ExecuteAsync` tears down the WebSocket; the client transitions + to a non-recoverable state. Use it as a hard stop when cooperative cancel + is not viable. + +### Query error status codes + +`OnError(byte status, string message)` carries the QWP wire status byte. The +codes the server raises today: + +| Code | Name | Description | +|--------|-------------------|---------------------------------------------------| +| `0x03` | `SchemaMismatch` | Bind parameter type incompatible with placeholder | +| `0x05` | `ParseError` | SQL syntax error or malformed message | +| `0x06` | `InternalError` | Server-side execution failure | +| `0x08` | `SecurityError` | Authorization failure | +| `0x0A` | `Cancelled` | Query terminated by `CANCEL` | +| `0x0B` | `LimitExceeded` | Protocol limit hit (oversized payload, bind cap) | + +`OnError` can arrive before any `OnBatch` (parse failure, schema mismatch on +binds) or mid-stream (storage failure, server shutdown). Once `OnError` +fires, no further frames arrive for that query — the next `ExecuteAsync` on +the same client starts fresh, unless the failure was a transport-level +exception thrown out of `ExecuteAsync` (which is terminal — see Failover +below). + +### Failover + +When multiple addresses are listed in `addr=`, the query client tries them +in order on connect and on every mid-stream reconnect. Egress failover is +**per-query**: the loop runs within a single `ExecuteAsync` call; between +queries the client uses whichever endpoint last succeeded. + +| Connect-string key | Default | Description | +|---|---|---| +| `failover` | `on` | Master switch for per-query reconnect-and-replay. | +| `failover_max_attempts` | `8` | Max reconnect attempts per query. | +| `failover_backoff_initial_ms` | `50` | First post-failure sleep. | +| `failover_backoff_max_ms` | `1000` | Cap on per-attempt sleep. | +| `failover_max_duration_ms` | `30000` | Total wall-clock budget per query (`0` ⇒ unbounded). The loop ends when either this or `failover_max_attempts` fires first. | +| `target` | `any` | Endpoint role filter: `any` (STANDALONE, PRIMARY, PRIMARY_CATCHUP, REPLICA), `primary` (STANDALONE, PRIMARY, PRIMARY_CATCHUP), or `replica` (REPLICA only). | +| `zone` | unset | Opaque zone hint; with `target=any` / `target=replica`, prefers endpoints whose advertised `zone_id` matches. Ignored with `target=primary`. | + +:::warning Failover requires multiple endpoints + +Failover rotates across the addresses listed in `addr=`. With a single +address, there is no other host to try and the loop exhausts after one +attempt regardless of `failover_max_attempts`. For failover to be useful, +provide at least two addresses. -### Cancelling rows - -Cancel the current line using `CancelRow`. +::: -This must be called before the row is complete, as otherwise it may have been -sent already. +**Handling partial results.** When the connection fails over mid-stream the +server replays the query from scratch — the client invokes +`OnFailoverReset(QwpServerInfo?)` before the first replayed batch arrives so +the handler can drop any accumulated state: ```csharp -sender.Table("foo").Symbol("bah", "baz").CancelRow(); // cancels the current row -sender.Table("foo").Symbol("bah", "baz").At(DateTime.UtcNow); // invalid - no row to cancel +public override void OnFailoverReset(QwpServerInfo? newNode) +{ + Console.WriteLine($"failover to {newNode?.NodeId ?? ""}"); + results.Clear(); // drop partial rows; server will resend from row 0 +} ``` -This can be useful if a row is being built step-by-step, and an error is thrown. -The user can cancel the row and preserve the rest of the buffer that was built -correctly. +If `OnFailoverReset` itself throws, the in-flight query is abandoned and the +exception bubbles out of `ExecuteAsync`. `OnFailoverReset` only fires +mid-stream; reconnects that happen between queries are handled internally +and do not invoke the callback. -### Trimming the buffer +**Role mismatch.** If the requested `target=` cannot be satisfied by any +endpoint, the factory or the next `ExecuteAsync` throws +`QwpRoleMismatchException`. Its `Target` property echoes the requested +filter; `LastObserved` carries the last `QwpServerInfo` the client saw, so +your application can distinguish "no primary available" from +"all endpoints unreachable". -Set properties in the configuration string to control the buffer size. +**Authentication failure is terminal.** A `401` / `403` from any failover +candidate aborts the loop without trying the remaining hosts — replaying an +unsupported credential against every host wastes time and floods server +logs. -It may be that the case that the buffer needs to grow earlier and shrink later. +### Compression -In this scenario, the user can call `Truncate`. This will trim the internal -buffer, removing extra pages (each of which is the size of `init_buf_size`), -reducing overall memory consumption: +Negotiate zstd compression to reduce egress bandwidth on large result sets: ```csharp -using var sender = Sender.New("http::addr=localhost:9000;init_buf_size=1024;"); -for (int i = 0; i < 100_000; i++) { - sender.Table("foo").Column("num", i).At(DateTime.UtcNow); -} -await sender.SendAsync(); // buffer is now flushed and empty -sender.Truncate(); // buffer is trimmed back to `init_buf_size` +await using var client = await QueryClient.NewAsync( + "ws::addr=localhost:9000;compression=zstd;compression_level=3;"); ``` -### Clearing the buffer +| Value | Behaviour | +|---|---| +| `raw` (default) | No compression — sent as `raw` in the upgrade header. | +| `zstd` | Demand zstd; the server falls back to raw per-batch when raw is smaller. | +| `auto` | Advertise both; the server picks zstd if it supports it, else raw. | + +`compression_level` is in `[1, 9]` (zstd levels). Inspect +`client.NegotiatedCompression` after connect to see what the server actually +chose. Batches decompress transparently — your `OnBatch` code is unchanged. + +### Query connect-string reference + +The connect string is shared with the ingest sender; the query parser +accepts the full union and silently ignores the keys that only the sender +acts on, so one connect string drives both clients without erroring. The +keys it honours: + +| Category | Keys | +|---|---| +| Addressing | `addr` (one or comma-separated `host:port` entries), `path` (defaults to `/read/v1`), `protocol` (auto-derived from the `ws::` / `wss::` scheme) | +| TLS | `tls_verify`, `tls_roots`, `tls_roots_password` | +| Auth | `username` / `password` (HTTP Basic), `token` (Bearer), `auth` (pre-built `Authorization` header), `auth_timeout_ms` | +| Routing | `target`, `zone`, `client_id` | +| Failover | `failover`, `failover_max_attempts`, `failover_backoff_initial_ms`, `failover_backoff_max_ms`, `failover_max_duration_ms` | +| Streaming | `compression`, `compression_level`, `max_batch_rows`, `initial_credit` | + +`initial_credit` (bytes; `0` = unbounded) caps how much data the server may +emit before pausing for a `CREDIT` frame from the client — useful when a +single result is much larger than the consumer's working set. The client +auto-replenishes credit per consumed batch. + +`auth`, `username`/`password`, and `token` are mutually exclusive; setting +two raises `IngressError`. Control characters are rejected in all string +values (connect-string parsing is strict). + +## Misc -Keep the sender, but clear the internal buffer. +### Cancelling a row -This can be performed using `Clear`. +`CancelRow` discards the partially-built current row, before it is finished: ```csharp -sender.Clear(); // empties the internal buffer +sender.Table("trades").Symbol("symbol", "ETH-USD").CancelRow(); ``` -## Security - -_QuestDB Enterprise offers native TLS support_ - -### TLS +### Buffer management -Enable TLS via the `https` or `tcps` protocol, along with other associated -configuration. +`Truncate()` trims the internal buffer back to `init_buf_size`; `Clear()` +empties it without sending. Buffer growth is bounded by `init_buf_size` / +`max_buf_size`. -TLS is supported only by [QuestDB Enterprise](/docs/security/tls/) version of -QuestDB. +## Closing the sender -For development purposes, the verification of TLS certificates can be disabled: +Dispose the sender to flush and drain in-flight frames. Prefer `await using` +(or `DisposeAsync`) so the close path is non-blocking and surfaces delivery +errors: ```csharp -using var sender = Sender.New("https::addr=localhost:9000;tls_verify=unsafe_off;"); +await using var sender = Sender.New("ws::addr=localhost:9000;"); +// ... ingest ... +// DisposeAsync drains in-flight ACKs, bounded by close_flush_timeout_millis (default 5000). ``` -### HTTP TLS with Basic Authentication +With `sf_dir` set, anything still un-acked at close is persisted to disk so a +later sender with the same `sf_dir` / `sender_id` replays it. + +## Configuration reference + +For the full list of connect-string keys and defaults, see the +[connect string reference](/docs/connect/clients/connect-string/). Common +WebSocket options: + +| Key | Default | Description | +|---|---|---| +| `addr` | required | One or more `host:port` entries, comma-separated or repeated. Default port `9000`. | +| `username` / `password` | unset | HTTP basic auth. | +| `token` | unset | Bearer token auth (Enterprise). | +| `auth_timeout_ms` | 15000 | WebSocket upgrade timeout. | +| `tls_verify` / `tls_roots` / `tls_roots_password` | — | TLS configuration (`wss` only). | +| `auto_flush` / `auto_flush_rows` / `auto_flush_interval` / `auto_flush_bytes` | `on` / 1000 / 100 ms / 8 MiB | Auto-flush triggers. | +| `sf_dir` / `sender_id` | unset / `default` | Store-and-forward. | +| `request_durable_ack` | `off` | Wait for durable upload (Enterprise). | +| `reconnect_max_duration_millis` | 300000 | Per-outage reconnect budget. | +| `close_flush_timeout_millis` | 5000 | Bound on the drain at dispose. | + +## Migration from ILP (HTTP/TCP) + +The `Table` / `Symbol` / `Column` / `At` builder is unchanged. To switch a +sender to QWP/WebSocket: + +| Aspect | HTTP (ILP) | WebSocket (QWP) | +|---|---|---| +| Connect string schema | `http::` / `https::` | `ws::` / `wss::` | +| Factory | `Sender.New(...)` returns `ISender` | Same; or `Sender.NewQwp(...)` returns `IQwpWebSocketSender` directly (skip the `is IQwpWebSocketSender` cast for QWP-only methods like `PingAsync`, `ColumnDecimal64`, FSN tracking) | +| Type surface | ILP textual types | Full QuestDB type system (DECIMAL64/128/256, BYTE, SHORT, FLOAT, DATE, IPv4, GEOHASH, LONG256, BINARY, n-D arrays) via `IQwpWebSocketSender` | +| Error model | Synchronous on `Send` | Async — observed via [`error_handler`](#error-handler), FSN / `seqTxn` watermarks | +| Transactions | Supported | Not supported (use SF + DEDUP) | +| Store-and-forward | Not available | Available (`sf_dir`) | +| Multi-endpoint failover | HTTP only | Built in (comma-separated `addr`) | +| Querying | Not available | [`QueryClient`](#querying-and-sql-execution) on the same NuGet package | +| Minimum runtime | .NET 6.0 | .NET 7.0 | + +The minimal swap is "change the connect string from `http::` to `ws::` (or +`https::` to `wss::`) and drop any transaction calls"; reach for +`Sender.NewQwp(...)` when the application also needs the QWP-only column +types, delivery watermarks, or `PingAsync`. + +## Full example: ingestion and querying with failover + +This example combines a multi-host ingest sender with the recreate-on- +terminal-failure pattern for the query client. It uses `Sender.NewQwp` for +ingest (so the QWP-only methods are directly reachable), TLS + token auth, +store-and-forward, and a connection listener. ```csharp -// Runs against QuestDB Enterprise, demonstrating HTTPS and Basic Authentication support. +using QuestDB; +using QuestDB.Qwp.Query; +using QuestDB.Senders; +using QuestDB.Utils; + +const string ingestConnStr = + "wss::addr=db-primary:9000,db-replica:9000;" // Enterprise: wss + multi-host + + "token=your_bearer_token;" // Enterprise: token auth + + "tls_verify=unsafe_off;" // test only! + + "sf_dir=/var/lib/myapp/qdb-sf;" // disk-backed durability + + "sender_id=ingest-1;" // distinct per process + + "reconnect_max_duration_millis=300000;"; + +const string queryConnStr = + "wss::addr=db-primary:9000,db-replica:9000;" + + "token=your_bearer_token;" + + "tls_verify=unsafe_off;" // test only! + + "target=replica;" // offload reads + + "failover=on;failover_max_attempts=8;" + + "failover_max_duration_ms=30000;"; + +// ─── Ingestion ────────────────────────────────────────────────────── + +var ingestOptions = Sender.Configure(ingestConnStr); +ingestOptions.error_handler = err => + Console.Error.WriteLine( + $"batch rejected: category={err.Category} table={err.TableName} " + + $"fsn=[{err.FromFsn},{err.ToFsn}] msg={err.ServerMessage}"); +ingestOptions.ConnectionListener = new IngestListener(); + +await using var sender = Sender.NewQwp(ingestOptions); + +for (var i = 0; i < 100; i++) +{ + await sender.Table("trades") + .Symbol("symbol", "ETH-USD") + .Symbol("side", i % 2 == 0 ? "buy" : "sell") + .Column("price", 2615.54 + i * 0.01) + .Column("amount", 0.001 * (i + 1)) + .AtAsync(DateTime.UtcNow); +} -using var sender = - Sender.New("https::addr=localhost:9000;tls_verify=unsafe_off;username=admin;password=quest;"); -``` +// Bound the publish on a known FSN, then drain remaining ACKs on dispose. +long fsn = await sender.FlushAndGetSequenceAsync(); +await sender.AwaitAckedFsnAsync(fsn, TimeSpan.FromSeconds(10)); -### TCP TLS with JWK Authentication +// Connection events you may see in IngestListener.OnEvent: +// Connected db-primary:9000 — initial connection +// Disconnected db-primary:9000 — primary dropped +// EndpointAttemptFailed db-primary:9000 — retries during outage +// FailedOver db-replica:9000 — replica took over +// +// With sf_dir set, unacked frames are persisted to disk during the +// outage and replayed once the new primary is reachable. -```csharp -// Demonstrates TCPS connection against QuestDB Enterprise -using var sender = - Sender.New( - "tcps::addr=localhost:9009;tls_verify=unsafe_off;username=admin;token=NgdiOWDoQNUP18WOnb1xkkEG5TzPYMda5SiUOvT1K0U=;"); -// See: /docs/ingestion/ilp/authenticate -``` +// ─── Querying (recreate-on-terminal pattern) ──────────────────────── + +// The QueryClient enters a terminal state once the failover budget is +// exhausted (or on CancellationToken cancel, AuthFailed, or +// QwpRoleMismatchException). The application must Dispose the dead +// client and build a new one. This loop encodes that contract. + +IQwpQueryClient? client = null; + +while (true) +{ + if (client is null) + { + try + { + client = await QueryClient.NewAsync(queryConnStr); + } + catch (Exception ex) + { + Console.Error.WriteLine($"connect failed: {ex.Message}"); + await Task.Delay(TimeSpan.FromSeconds(2)); + continue; + } + } + + try + { + await client.ExecuteAsync( + "SELECT ts, symbol, price, amount FROM trades " + + "ORDER BY ts DESC LIMIT 10", + new PrintHandler()); + } + catch (Exception ex) // failover exhausted, transport tear-down, etc. + { + Console.Error.WriteLine($"query failed terminally: {ex.Message}"); + try { await client.DisposeAsync(); } catch { /* best-effort */ } + client = null; // recreate on next iteration + continue; + } + + await Task.Delay(TimeSpan.FromSeconds(2)); +} -## Next Steps -Please refer to the [ILP overview](/docs/ingestion/ilp/overview) for details -about transactions, error control, delivery guarantees, health check, or table -and column auto-creation. +internal sealed class IngestListener : ISenderConnectionListener +{ + public void OnEvent(SenderConnectionEvent evt) => + Console.WriteLine($"{evt.Kind} {evt.Host}:{evt.Port}"); +} -Dive deeper into the .NET client capabilities by exploring more examples -provided in the -[GitHub repository](https://github.com/questdb/net-questdb-client). +internal sealed class PrintHandler : QwpColumnBatchHandler +{ + public override void OnBatch(QwpColumnBatch batch) + { + for (var row = 0; row < batch.RowCount; row++) + { + Console.WriteLine( + $"{batch.GetLongValue(0, row)} " + + $"{batch.GetString(1, row)} " + + $"{batch.GetDoubleValue(2, row)} " + + $"{batch.GetDoubleValue(3, row)}"); + } + } -To learn _The Way_ of QuestDB SQL, see the -[Query & SQL Overview](/docs/query/overview/). + public override void OnEnd(long totalRows) => + Console.WriteLine($"({totalRows} rows)"); + + public override void OnError(byte status, string message) => + Console.Error.WriteLine($"query error 0x{status:X2}: {message}"); + + public override void OnFailoverReset(QwpServerInfo? newNode) + { + // Fires only when failover happens mid-query. Clear any + // accumulated partial results — the server will resend from row 0. + Console.WriteLine( + $"failover reset to node={newNode?.NodeId ?? ""} " + + $"role={newNode?.RoleName ?? ""}"); + } +} +``` -Should you encounter any issues or have questions, the -[Community Forum](https://community.questdb.com/) is a vibrant platform for -discussions. +Notes on the pattern: + +- **Ingestion failover is continuous** — the sender's reconnect loop + (`reconnect_max_duration_millis`, default 5 min) walks the address list + transparently and resumes once a healthy host is reachable. The + application keeps publishing. +- **Egress failover is per-query** — the loop runs only inside one + `ExecuteAsync`. A total outage that exceeds `failover_max_duration_ms` + leaves the `QueryClient` terminal; the `recreate-on-catch` outer loop is + the supported recovery shape. +- **Connect strings are shared-vocabulary, side-private** — the same + `ws::` / `wss::` URL works for both sides. Each parser silently ignores + the keys belonging to the other half. The ingest sender pins QWP v1 and + does not read `SERVER_INFO`, so the `zone=` key is accepted but ignored + on ingress; egress honours it for replica preference when + `target=any|replica`. + +## Next steps + +Explore more examples in the +[GitHub repository](https://github.com/questdb/net-questdb-client), and read +[Querying and SQL execution](#querying-and-sql-execution) on this page to +add SQL reads on the same WebSocket transport. + +For SQL reference material, see the [Query & SQL overview](/docs/query/overview/). + +Need help? Visit the [Community Forum](https://community.questdb.com/). diff --git a/documentation/ingestion/clients/go.md b/documentation/ingestion/clients/go.md index dabd5ab3c..eb4fdfa72 100644 --- a/documentation/ingestion/clients/go.md +++ b/documentation/ingestion/clients/go.md @@ -1,293 +1,1277 @@ --- -title: Go Client Documentation +slug: /connect/clients/go +title: Go client for QuestDB +sidebar_label: Go description: - "Dive into QuestDB using the Go ingestion client for high-performance, - insert-only operations. Unlock peak time series data ingestion." + "QuestDB Go client for high-throughput data ingestion and streaming SQL + queries over the QWP binary protocol." --- -import { ILPClientsTable } from "@theme/ILPClientsTable" +import { RemoteRepoExample } from "@theme/RemoteRepoExample" -QuestDB supports the Go ecosystem, offering a Go client designed for -high-performance data ingestion, tailored specifically for insert-only -operations. This combination of QuestDB and its Go client provides exceptional -time series data ingestion and analytical capabilities. +import SfDedupWarning from "../../partials/_sf-dedup-warning.partial.mdx" -The Go client introduces several advantages: +The QuestDB Go client connects to QuestDB over +[QWP — QuestDB Wire Protocol](/docs/connect/wire-protocols/qwp-ingress-websocket/) — a +columnar binary protocol carried over WebSocket. It supports high-throughput +data ingestion and streaming SQL queries on the same transport. -- **Automatic table creation**: No need to define your schema upfront. -- **Concurrent schema changes**: Seamlessly handle multiple data streams with - on-the-fly schema modifications -- **Optimized batching**: Use strong defaults or curate the size of your batches -- **Health checks and feedback**: Ensure your system's integrity with built-in - health monitoring -- **Automatic write retries**: Reuse connections and retry after interruptions +Key capabilities: -This quick start guide will help you get up and running with the basic -functionalities of the Go client, covering connection setup, authentication, and -some common insert patterns. +- **Ingestion**: column-oriented batched writes with automatic table creation, + schema evolution, and optional store-and-forward durability. +- **Querying**: streaming SQL result sets, DDL and DML execution, bind + parameters, and byte-credit flow control. +- **Failover**: multi-endpoint connections with automatic reconnect across + rolling upgrades and primary migrations. - +:::tip Legacy transports -:::info - -This page focuses on our high-performance ingestion client, which is optimized for **writing** data to QuestDB. -For retrieving data, we recommend using a [PostgreSQL-compatible Go library](/docs/query/pgwire/go/) or our -[HTTP query endpoint](/docs/query/overview/#rest-http-api). +The client also supports ILP ingestion over HTTP and TCP for backward +compatibility. This page documents the recommended WebSocket (QWP) path. For +ILP transport details, see the +[ILP overview](/docs/connect/compatibility/ilp/overview/). ::: -## Requirements - -- Requires Go 1.19 or later. -- Assumes QuestDB is running. If it's not, refer to - [the general quick start](/docs/getting-started/quick-start/). - -## Client Installation +## Quick start -To add the QuestDB client to your Go project: +The client requires Go 1.23 or later. Add it to your module: -```toml -go get github.com/questdb/go-questdb-client/ +```bash +go get github.com/questdb/go-questdb-client/v4 ``` -## Authentication +### Ingest data -Passing in a configuration string with HTTP basic authentication: - -```Go +```go package main import ( "context" - "github.com/questdb/go-questdb-client/v4" + + qdb "github.com/questdb/go-questdb-client/v4" ) func main() { ctx := context.TODO() - client, err := questdb.LineSenderFromConf(ctx, "http::addr=localhost:9000;username=admin;password=quest;") + sender, err := qdb.LineSenderFromConf(ctx, "ws::addr=localhost:9000;") + if err != nil { + panic(err) + } + defer sender.Close(ctx) + + err = sender.Table("trades"). + Symbol("symbol", "ETH-USD"). + Symbol("side", "sell"). + Float64Column("price", 2615.54). + Float64Column("amount", 0.00044). + AtNow(ctx) if err != nil { - panic("Failed to create client") + panic(err) } - // Utilize the client for your operations... + if err := sender.Flush(ctx); err != nil { + panic(err) + } } ``` -Or, set the QDB_CLIENT_CONF environment variable and call -`questdb.LineSenderFromEnv()`. - -1. Export the configuration string as an environment variable: - ```bash - export QDB_CLIENT_CONF="http::addr=localhost:9000;username=admin;password=quest;" - ``` -2. Then in your Go code: - ```Go - client, err := questdb.LineSenderFromEnv(context.TODO()) - ``` - -Alternatively, you can use the built-in Go API to specify the connection -options. +### Query data ```go package main import ( - "context" - qdb "github.com/questdb/go-questdb-client/v4" -) + "context" + "fmt" + qdb "github.com/questdb/go-questdb-client/v4" +) func main() { - ctx := context.TODO() + ctx := context.TODO() - client, err := qdb.NewLineSender(context.TODO(), qdb.WithHttp(), qdb.WithAddress("localhost:9000"), qdb.WithBasicAuth("admin", "quest")) + client, err := qdb.NewQwpQueryClient(ctx, + qdb.WithQwpQueryAddress("localhost:9000")) + if err != nil { + panic(err) + } + defer client.Close(ctx) + + q := client.Query(ctx, + "SELECT symbol, price FROM trades WHERE symbol = 'ETH-USD' LIMIT 10") + defer q.Close() + + for batch, err := range q.Batches() { + if err != nil { + panic(err) + } + for row := 0; row < batch.RowCount(); row++ { + fmt.Println(batch.String(0, row), batch.Float64(1, row)) + } + } +} ``` -When using QuestDB Enterprise, authentication can also be done via REST token. -Please check the [RBAC docs](/docs/security/rbac/#authentication) for more -info. +:::caution Read before building on these snippets -## Basic Insert +The two snippets above are deliberately minimal. Three behaviors will cause +data loss, corruption, or panics if you carry the minimal form into real code: -Example: inserting executed trades for cryptocurrencies. +- **Ingestion errors are asynchronous.** `Flush` returning `nil` does **not** + mean the server accepted the rows. Schema, parse, and write rejections are + delivered out of band. Register an error handler. See + [Ingestion errors](#ingestion-errors). +- **A sender or query client is not safe for concurrent use.** Use one per + goroutine. See [Concurrency](#concurrency). +- **A query batch is valid only inside its loop iteration.** Some accessors + alias the network buffer. Copy out anything you keep. See + [Reading result batches](#reading-result-batches). -Without authentication and using the current timestamp: +Building with multi-host failover? It adds exactly three rules on top of the +single-host code, listed up front in +[Failover and high availability](#failover-and-high-availability). Single-host +applications can ignore them. -```Go -package main +::: -import ( - "context" - "github.com/questdb/go-questdb-client/v4" -) +## Authentication and TLS -func main() { - ctx := context.TODO() +Authentication happens at the HTTP level during the WebSocket upgrade, before +any binary frames are exchanged. The same mechanisms work for both the +`LineSender` (ingestion) and the `QwpQueryClient` (querying). - client, err := questdb.LineSenderFromConf(ctx, "http::addr=localhost:9000;") - if err != nil { - panic("Failed to create client") - } +### HTTP basic auth - err = client.Table("trades"). - Symbol("symbol", "ETH-USD"). - Symbol("side", "sell"). - Float64Column("price", 2615.54). - Float64Column("amount", 0.00044). - AtNow(ctx) +```go +// Ingestion +sender, err := qdb.LineSenderFromConf(ctx, + "wss::addr=db.example.com:9000;username=admin;password=quest;") - if err != nil { - panic("Failed to insert data") - } +// Querying +client, err := qdb.QwpQueryClientFromConf(ctx, + "wss::addr=db.example.com:9000;username=admin;password=quest;") +``` - err = client.Flush(ctx) - if err != nil { - panic("Failed to flush data") - } +The options API exposes the same settings: + +```go +sender, err := qdb.NewLineSender(ctx, + qdb.WithQwp(), + qdb.WithAddress("db.example.com:9000"), + qdb.WithTls(), + qdb.WithBasicAuth("admin", "quest")) +``` + +### Token auth (Enterprise, recommended) + +Token authentication avoids the per-request overhead of basic auth and is +the recommended path for Enterprise deployments. + +```go +sender, err := qdb.LineSenderFromConf(ctx, + "wss::addr=db.example.com:9000;token=your_bearer_token;") + +client, err := qdb.NewQwpQueryClient(ctx, + qdb.WithQwpQueryAddress("db.example.com:9000"), + qdb.WithQwpQueryTls(), + qdb.WithQwpQueryBearerToken("your_bearer_token")) +``` + +The token is a **static credential**: the client sends exactly the string +you pass and never refreshes or renews it. Acquire it out of band — QuestDB +Enterprise issues bearer tokens through its +[OpenID Connect flow](/docs/security/oidc/) — and manage its lifetime +yourself. There is no token-refresh callback: when the token expires or is +rotated, construct a new sender or query client with the new token. An +expired or rejected token surfaces as an authentication failure (see +[Connection-level errors](#connection-level-errors)). + +### Production example (TLS + token + multi-host) + +A realistic Enterprise deployment combines `wss`, token auth, and a +multi-host `addr` list. The `target` key controls which server roles the +client will connect to: `primary` for the authoritative write node, +`replica` for read-only replicas, or `any` (default) for either. + +```go +// Ingestion: connect to any writeable node +sender, err := qdb.LineSenderFromConf(ctx, + "wss::addr=db-1.example.com:9000,db-2.example.com:9000;"+ + "token=your_bearer_token;") + +// Querying: prefer a replica to offload the primary +client, err := qdb.QwpQueryClientFromConf(ctx, + "wss::addr=db-1.example.com:9000,db-2.example.com:9000;"+ + "token=your_bearer_token;target=replica;") +``` + +### TLS trust store + +TLS is enabled by the `wss` schema (or `qdb.WithTls()`). The Go client +verifies the server certificate against the **operating-system trust +store**. It does **not** support a custom trust store: the `tls_roots` / +`tls_roots_password` connect-string keys (a Java-keystore feature) are +rejected by the Go connect-string parser. To trust a private CA, install it +in the host trust store. For test-only certificate-verification bypass, see +`tls_verify` in the +[TLS section](/docs/connect/clients/connect-string#tls) of the connect +string reference. + +## Creating the client + +### From a connect string + +The connect string format is `::=;=;...;`. Use +`ws` for plain WebSocket or `wss` for TLS: + +```go +sender, err := qdb.LineSenderFromConf(ctx, "ws::addr=localhost:9000;") + +client, err := qdb.QwpQueryClientFromConf(ctx, "ws::addr=localhost:9000;") +``` + +For the full list of connect-string keys, see the +[connect string reference](/docs/connect/clients/connect-string/). + +### From an environment variable + +Set `QDB_CLIENT_CONF` to avoid hard-coding credentials: + +```bash +export QDB_CLIENT_CONF="wss::addr=db.example.com:9000;username=admin;password=quest;" +``` + +```go +sender, err := qdb.LineSenderFromEnv(ctx) +``` + +### Using the options API + +The options API exposes the same options as the connect string, with type-safe +Go signatures (e.g., `sf_append_deadline_millis` becomes +`qdb.WithSfAppendDeadline(30*time.Second)`). For the full list of keys, see +the [connect string reference](/docs/connect/clients/connect-string/). + +`NewLineSender` requires exactly one transport option (`qdb.WithQwp()` here); +`LineSenderFromConf` infers the transport from the `ws`/`wss` schema instead. +An error handler can only be set through the options API: + +```go +sender, err := qdb.NewLineSender(ctx, + qdb.WithQwp(), + qdb.WithAddress("localhost:9000"), + qdb.WithAutoFlushRows(500), + qdb.WithAutoFlushInterval(50*time.Millisecond), + qdb.WithErrorHandler(func(e *qdb.SenderError) { /* see Error handling */ })) + +client, err := qdb.NewQwpQueryClient(ctx, + qdb.WithQwpQueryAddress("localhost:9000"), + qdb.WithQwpQueryInitialCredit(256*1024)) +``` + +## Data ingestion + +### Concurrency + +A `LineSender` owns a single connection and is **not safe for concurrent +use**. Sharing one across goroutines corrupts the buffer and interleaves +rows. Create one sender per goroutine, or hand rows to a single dedicated +writer goroutine through a channel. + +Connection pooling (`LineSenderPool`) targets the stateless HTTP transport and +is not available for QWP, so it is not the answer to QWP concurrency. + +### General usage pattern + +1. Create a sender via `qdb.LineSenderFromConf()` or `qdb.NewLineSender()`. +2. Call `Table(name)` to select a table. +3. Call column methods to add values: + - `Symbol(name, value)` + - `StringColumn(name, value)`, `BoolColumn(name, value)` + - `Int64Column(name, value)`, `Float64Column(name, value)` + - `TimestampColumn(name, time.Time)` for non-designated timestamps + - `Long256Column(name, *big.Int)` + - `Float64Array1DColumn` / `2D` / `3D` / `NDColumn` (see + [Ingest arrays](#ingest-arrays)) + - `DecimalColumn`, `DecimalColumnFromString` (see + [Decimal columns](#decimal-columns)) +4. Call `At(ctx, time.Time)` or `AtNow(ctx)` to finalize the row. +5. Repeat from step 2, or call `Flush(ctx)` to send buffered data. +6. Call `Close(ctx)` when done. + +The call order is fixed: `Table`, then `Symbol`s, then column setters, then +`At`/`AtNow`. The fluent methods do not return errors; the first error is +latched and surfaces from `At`, `AtNow`, or `Flush`, so always check that +return value. + +:::caution The error from `At`/`AtNow`/`Flush` is only the local error + +It reports a client-side problem: a bad value, wrong call order, or +store-and-forward backpressure. Server-side rejections (schema mismatch, +parse error, write error) are **asynchronous** and are delivered to the +error handler, never returned here. A `nil` return does not mean the server +accepted the data. See [Ingestion errors](#ingestion-errors). + +::: + +Tables and columns are created automatically if they do not exist. The full +runnable example registers an error handler, the minimum correct shape for a +QWP producer: + + + +The QWP transport exposes column types that are not part of ILP. Type-assert +the sender to `qdb.QwpSender` with the comma-ok form (only `ws`/`wss` senders +implement it; an HTTP or TCP sender does not): + +```go +sender, err := qdb.LineSenderFromConf(ctx, "ws::addr=localhost:9000;") +qs, ok := sender.(qdb.QwpSender) +if !ok { + panic("not a QWP sender") } + +err = qs.Table("trades"). + Symbol("symbol", "ETH-USD"). + Int32Column("venue_id", 7). + CharColumn("side", 'S'). + UuidColumn("order_id", hi, lo). + AtNano(ctx, time.Now()) ``` -In this case, the designated timestamp will be the one at execution time. Let's -see now an example with an explicit timestamp, custom auto-flushing, and basic -auth. +`QwpSender` adds `ByteColumn`, `ShortColumn`, `Int32Column`, `Float32Column`, +`CharColumn`, `DateColumn`, `TimestampNanosColumn`, `UuidColumn`, +`GeohashColumn`, `Int64Array1DColumn` / `2D` / `3D`, the decimal columns, and +`AtNano` for nanosecond designated timestamps. -```Go -package main +### Null values -import ( - "context" - "github.com/questdb/go-questdb-client/v4" - "time" -) +The client has no null setter. To store a null for a column in a given row, +omit that column's setter before `At`/`AtNow`/`AtNano`. On row commit, every +column not set in the row is gap-filled with a null, so omitting a column and +writing an "explicit null" are the same operation. -func main() { - ctx := context.TODO() +The buffered column set is the union across the batch: a column first used on +a later row is backfilled with null for every earlier row still in the send +buffer. - client, err := questdb.LineSenderFromConf(ctx, "http::addr=localhost:9000;username=admin;password=quest;auto_flush_rows=100;auto_flush_interval=1000;") - if err != nil { - panic("Failed to create client") - } +### Ingest arrays - timestamp := time.Now() - err = client.Table("trades"). - Symbol("symbol", "ETH-USD"). - Symbol("side", "sell"). - Float64Column("price", 2615.54). - Float64Column("amount", 0.00044). - At(ctx, timestamp) +For 1D, 2D, and 3D `double` arrays, pass a Go slice directly: - if err != nil { - panic("Failed to insert data") - } +```go +prices := []float64{1.0842, 1.0843, 1.0841} +err = sender.Table("book").Float64Array1DColumn("levels", prices).AtNow(ctx) +``` - err = client.Flush(ctx) - // You can flush manually at any point. - // If you don't flush manually, the client will flush automatically - // when a row is added and either: - // * The buffer contains 75000 rows (if HTTP) or 600 rows (if TCP) - // * The last flush was more than 1000ms ago. - // Auto-flushing can be customized via the `auto_flush_..` params. +For higher-dimensional arrays, build an `NdArray` once and reuse it: - if err != nil { - panic("Failed to flush data") - } +```go +arr, err := qdb.NewNDArray[float64](3, 3, 3) +if err != nil { + panic(err) } +arr.Fill(1.5) +err = sender.Table("book").Float64ArrayNDColumn("cube", arr).AtNow(ctx) ``` -We recommended to use User-assigned timestamps when ingesting data into QuestDB. -Using the current timestamp hinder the ability to deduplicate rows which is -[important for exactly-once processing](/docs/ingestion/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). +Values are stored in row-major order: the last dimension varies fastest. Use +`Set(value, positions...)` to write at specific coordinates, `Append(value)` +for sequential fills, and `Reshape(shape...)` to change the shape without +reallocating. - +Batches are decompressed automatically. + +## Error handling -## Configuration options +### Ingestion errors -The minimal configuration string needs to have the protocol, host, and port, as -in: +WebSocket ingestion uses an asynchronous error model. Batch rejections are +**not** returned from `Flush`. They are delivered to a `SenderErrorHandler` +callback. If you do not register one, a built-in handler logs them, but your +application is not notified and cannot dead-letter or alert, so register one +in any non-trivial producer: +```go +sender, err := qdb.NewLineSender(ctx, + qdb.WithQwp(), + qdb.WithAddress("localhost:9000"), + qdb.WithErrorHandler(func(e *qdb.SenderError) { + log.Printf("rejected: category=%s table=%s msg=%s fsn=[%d,%d]", + e.Category, e.TableName, e.ServerMessage, e.FromFsn, e.ToFsn) + })) ``` -http::addr=localhost:9000; + +Full `SenderError` field set, for logging, alerting, and support +correlation: + +| Field | Type | Use | +| ------------------ | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `Category` | `Category` | Stable named class (`CategorySchemaMismatch`, `CategoryParseError`, `CategoryInternalError`, `CategorySecurityError`, `CategoryWriteError`, `CategoryProtocolViolation`, `CategoryUnknown`). The recommended switch target. | +| `ServerStatusByte` | `int` | Numeric wire status (e.g. `0x03`). `NoStatusByte` (`-1`) for `CategoryProtocolViolation`. | +| `AppliedPolicy` | `Policy` | `PolicyHalt` or `PolicyDropAndContinue` — what the send loop did. | +| `ServerMessage` | `string` | Human-readable server text. **≤ 1024 UTF-8 bytes**, English, may be empty. Safe to log; not a stable pattern-match key (switch on `Category` / `ServerStatusByte`). May echo table / column names — sanitise before forwarding to third-party error trackers. | +| `TableName` | `string` | Rejected table; empty for unknown or multi-table batches. | +| `FromFsn`,`ToFsn` | `int64` | Inclusive FSN span; join to `FlushAndGetSequence` to identify the rejected rows. | +| `MessageSequence` | `int64` | Server's per-frame wire sequence for the rejection frame. **Resets on reconnect** — only meaningful within one connection; round-trips verbatim against that connection's server-side logs. Not a standalone correlation key (see below). `NoMessageSequence` (`-1`) for protocol violations. | +| `DetectedAt` | `time.Time` | Client-side receipt time, for ops timelines (not for correlation). | + +The protocol does not surface a server-issued request or connection +identifier. The closest correlation handle is the `(MessageSequence, +FromFsn, ToFsn)` tuple plus the connection start time from your +application logs — `MessageSequence` resets on reconnect, so it only +disambiguates frames within a single connection. The client sends an +`X-QWP-Client-Id` header (default `go/`) on the upgrade. When +filing a support ticket, include the connection start time and the +`(MessageSequence, FromFsn, ToFsn)` triple. + +The per-category policy is configurable. Resolution precedence is the policy +resolver, then the per-category policy, then the connect-string `on_*_error` +keys, then the spec defaults. `CategoryProtocolViolation` and +`CategoryUnknown` are always `PolicyHalt`: + +```go +qdb.WithErrorPolicy(qdb.CategorySchemaMismatch, qdb.PolicyDropAndContinue) +qdb.WithErrorPolicyResolver(func(c qdb.Category) qdb.Policy { ... }) +qdb.WithErrorInboxCapacity(512) +``` + +After a `PolicyHalt` rejection, the sender stops draining and the next +producer call returns the same payload as a typed error. Unwrap it with +`errors.As`, then `Close` and rebuild the sender to continue: + +```go +if err := sender.Flush(ctx); err != nil { + var se *qdb.SenderError + if errors.As(err, &se) { + // se.Category, se.ServerMessage, se.FromFsn, se.ToFsn + } +} ``` -In the Go client, you can set the configuration options via the standard config -string, which is the same across all clients, or using -[the built-in API](https://pkg.go.dev/github.com/questdb/go-questdb-client/#LineSenderOption). +The handler runs on a dedicated dispatcher goroutine, never on the producer +goroutine. If the bounded inbox fills, surplus notifications are dropped and +counted by `QwpSender.DroppedErrorNotifications()`. -For all the extra options you can use, please check -[the client docs](https://pkg.go.dev/github.com/questdb/go-questdb-client/#LineSenderFromConf) +### Query errors -Alternatively, for a breakdown of Configuration string options available across -all clients, see the [Configuration string](/docs/ingestion/clients/configuration-string/) page. +Server-side query failures surface as a `*QwpQueryError` from the `Batches()` +iteration or the `Exec` return value: -## Next Steps +```go +for batch, err := range q.Batches() { + if err != nil { + var qe *qdb.QwpQueryError + if errors.As(err, &qe) { + log.Printf("query %d failed: 0x%02X %s", + qe.RequestId, qe.Status, qe.Message) + } + break + } + // ... +} +``` -Please refer to the [ILP overview](/docs/ingestion/ilp/overview) for details -about transactions, error control, delivery guarantees, health check, or table -and column auto-creation. +| Code | Name | Description | +| ------ | --------------- | ---------------------------------------------------- | +| `0x03` | SCHEMA_MISMATCH | Bind parameter type incompatible with placeholder | +| `0x05` | PARSE_ERROR | SQL syntax error or malformed message | +| `0x06` | INTERNAL_ERROR | Server-side execution failure | +| `0x08` | SECURITY_ERROR | Authorization failure | +| `0x09` | WRITE_ERROR | Write failure (e.g. table not accepting writes; DML) | +| `0x0A` | CANCELLED | Query terminated by `Cancel` | +| `0x0B` | LIMIT_EXCEEDED | Protocol limit hit | + +`QwpQueryError` also carries `RequestId` (the client-assigned query id — the +correlation key for support tickets and server-log matching) and `Message` +(server-supplied UTF-8, English, may be empty; safe to log, but switch on +`Status`, not on message text). Errors can arrive before any data or +mid-stream. Once an error is yielded, no further batches arrive for that +query. + +### Connection-level errors + +- **Authentication failure**: a `401` or `403` response before the WebSocket + upgrade completes. Terminal across all endpoints. +- **Role mismatch**: `*QwpRoleMismatchError` from `NewQwpQueryClient` when no + configured endpoint satisfies the `target=` filter. It reports the endpoints + tried, the last observed server role, and the last transport error. + +## Failover and high availability + +:::note Enterprise +Multi-host failover with automatic reconnect requires QuestDB Enterprise. +::: + +Single-host applications need nothing from this section. The simple loops +shown earlier are already correct: treating any iteration error as terminal is +always safe, including when a reconnect happens. + +If you connect to multiple hosts for failover, a correct application must do +exactly three things beyond the single-host code. This is the whole list: + +1. **Ingestion: no loop changes.** Configure multiple endpoints and a + reconnect policy; reconnection is transparent to the producer. You still + need the universal asynchronous error handling from + [Ingestion errors](#ingestion-errors). Details: + [Ingestion failover](#ingestion-failover). +2. **Querying: handle `*QwpFailoverReset`, but only if you accumulate rows.** + If you build up rows across batches, discard them on a reset and continue + iterating. If you process each batch and keep nothing, the simple + terminal-on-error loop is already correct. Pattern: + [Query failover](#query-failover). +3. **DDL/DML: `Exec` is not retried by default.** A `*QwpFailoverReset` from + `Exec` means the statement was not confirmed, not that it succeeded. + Re-issue it only if it is idempotent, or opt into + `qdb.WithQwpQueryReplayExec(true)`. Details: + [the Exec caution](#ddl-and-dml-statements). + +Everything below is the detail behind these three points. + +### Multiple endpoints + +Specify comma-separated addresses in the connect string, or pass them to the +options API: + +```text +ws::addr=db-primary:9000,db-replica-1:9000,db-replica-2:9000; +``` + +```go +client, err := qdb.NewQwpQueryClient(ctx, + qdb.WithQwpQueryEndpoints("db-primary:9000", "db-replica-1:9000")) +``` + +The client tries endpoints in order and walks the list to find the next +healthy one on connection loss. + +### Ingestion failover + +The ingestion sender uses a reconnect loop with exponential backoff. Configure +it via the connect string or `qdb.WithReconnectPolicy(maxDuration, +initialBackoff, maxBackoff)`: + +| Key | Default | Description | +| ---------------------------------- | -------- | ------------------------------------ | +| `reconnect_max_duration_millis` | `300000` | Total outage budget before giving up | +| `reconnect_initial_backoff_millis` | `100` | First post-failure sleep | +| `reconnect_max_backoff_millis` | `5000` | Cap on per-attempt sleep | +| `initial_connect_retry` | `off` | Retry on first connect | + +`qdb.WithInitialConnectMode` selects `InitialConnectOff` (default), +`InitialConnectSync` (block the constructor while retrying), or +`InitialConnectAsync` (return immediately and buffer rows until connected). +Ingress is zone-blind: it pins QWP v1 and ignores the `zone=` key, so a connect +string shared with query clients works unchanged. Reconnect is transparent to +the producer; you do not change the ingestion loop for it. + +### Query failover + +The query client drives a per-query reconnect loop. On a mid-stream transport +error it reconnects and replays the query. + +| Key | Default | Description | +| ----------------------------- | ------- | --------------------------------- | +| `failover` | `on` | Master switch for reconnect | +| `failover_max_attempts` | `8` | Max reconnect attempts per query | +| `failover_backoff_initial_ms` | `50` | First post-failure sleep | +| `failover_backoff_max_ms` | `1000` | Cap on per-attempt sleep | +| `failover_max_duration_ms` | `30000` | Total wall-clock failover budget per query (`0` = unbounded) | +| `target` | `any` | Role filter: `any`, `primary`, `replica` | + +The matching options are `qdb.WithQwpQueryFailover`, +`qdb.WithQwpQueryFailoverMaxAttempts`, `qdb.WithQwpQueryFailoverBackoff`, +`qdb.WithQwpQueryFailoverMaxDuration`, and `qdb.WithQwpQueryTarget`. + +You only need the pattern below if you **accumulate rows across batches and +want the query to continue transparently across a reconnect**. When failover +occurs mid-stream, `Batches()` yields a non-fatal `*QwpFailoverReset` before +the replayed batches arrive. Detect it with `errors.As`, discard the rows you +accumulated from the prior connection (the server replays from the +beginning), and continue iterating: + +```go +for batch, err := range q.Batches() { + if err != nil { + var reset *qdb.QwpFailoverReset + if errors.As(err, &reset) { + results = results[:0] // server replays from the beginning + continue + } + return err // any other error is terminal + } + // ... +} +``` + +:::warning Without the reset branch, accumulated rows are duplicated + +If you accumulate rows across batches and do **not** handle +`*QwpFailoverReset`, the rows you kept from the prior connection stay in your +buffer while the server replays the **entire** result set from the beginning +after the reconnect. The replayed rows are appended to the ones you already +have, so every pre-failover row ends up in your result set twice. Either +clear the accumulator on the reset (as shown above), or use the simple +terminal-on-error loop, which discards everything on any error and so cannot +duplicate. + +::: -Explore the full capabilities of the Go client via -[Go.dev](https://pkg.go.dev/github.com/questdb/go-questdb-client/). +If you do not need transparent continuation, the simple loop is correct: +returning on any error treats a reset as terminal, which the client supports +explicitly. When the failover budget is consumed, `Batches()` (and `Exec`) +return `*QwpFailoverExhaustedError`. -With data flowing into QuestDB, now it's time to for analysis. +After failover exhaustion or a total outage (all endpoints down), the query +client enters a terminal state and returns errors on every subsequent call. +Close it and create a new one. This differs from ingestion, where the +`LineSender` has a continuous reconnect loop (`reconnect_max_duration_millis`, +default 5 minutes) that spans full outages transparently. The query client +reconnects only within the scope of a single query. -To learn _The Way_ of QuestDB SQL, see the -[Query & SQL Overview](/docs/query/overview/). +:::warning Failover requires multiple endpoints -Alone? Stuck? Want help? Visit us in our -[Community Forum](https://community.questdb.com/). +Failover rotates across endpoints. With a single `addr`, there is no other +host to try, and the loop exhausts after one attempt regardless of +`failover_max_attempts`. For failover to be useful, provide at least two +addresses. + +::: + +### Observability + +`QwpSender` exposes counters for dashboards: `TotalReconnectAttempts`, +`TotalReconnectsSucceeded`, `TotalFramesReplayed`, `TotalBackpressureStalls`, +`TotalServerErrors`, and `LastTerminalError`. With `drain_orphans=on`, +`BackgroundDrainers()` snapshots the goroutines adopting unacked data from +crashed sibling senders. The query client exposes `ServerInfo()` and +`CurrentEndpoint()`; `QwpServerInfo.RoleName()` returns the bound node's role. + +There is no per-transition connection callback: connect, disconnect, +reconnect, and failover are not delivered as events. Observe reconnect and +failover through these counters, and terminal failures through the +[ingestion error handler](#ingestion-errors). Poll the counters from a +background goroutine: + +```go +go func() { + t := time.NewTicker(10 * time.Second) + defer t.Stop() + for range t.C { + log.Printf("qwp: reconnects=%d/%d replayed=%d stalls=%d", + qs.TotalReconnectsSucceeded(), qs.TotalReconnectAttempts(), + qs.TotalFramesReplayed(), qs.TotalBackpressureStalls()) + if e := qs.LastTerminalError(); e != nil { + // Page on-call: the sender has stopped draining. + log.Printf("qwp TERMINAL: %s", e) + } + } +}() +``` + +where `qs` is the `qdb.QwpSender` from the type assertion shown earlier. + +For background and worked configurations, see +[client failover concepts](/docs/high-availability/client-failover/concepts/), +[client failover configuration](/docs/high-availability/client-failover/configuration/), +and the +[multi-host failover](/docs/connect/clients/connect-string#failover-keys) and +[reconnect](/docs/connect/clients/connect-string#reconnect-keys) keys of the +connect string reference. + +## Concurrency and parallel queries + +:::note Phase 1 limitation +The current implementation supports a single in-flight query per connection. +Multi-query support is planned for a future release. +::: + +Neither the `LineSender` nor the `QwpQueryClient` is safe for concurrent use. +For multi-threaded workloads, use one instance per goroutine. To run queries +in parallel, create separate `QwpQueryClient` instances, one per goroutine. +`Cancel` (on a `*QwpQuery`) and `Close` are safe to call from other +goroutines, which is how you cancel an in-flight query or shut down cleanly. + +## Configuration reference + +For the full list of connect-string keys and their defaults, see the +[connect string reference](/docs/connect/clients/connect-string/). + +Common WebSocket-specific keys: + +| Key | Default | Description | +| ------------------------------- | -------- | ------------------------------------ | +| `auto_flush_rows` | `1000` | Rows before auto-flush | +| `auto_flush_interval` | `100` | Milliseconds before auto-flush | +| `sf_dir` | unset | Store-and-forward directory | +| `sender_id` | `default`| Sender slot identity for SF | +| `request_durable_ack` | `off` | Request durable upload ACK (Enterprise) | +| `reconnect_max_duration_millis` | `300000` | Ingress reconnect budget | +| `failover` | `on` | Query per-query reconnect switch | +| `compression` | `raw` | Query batch compression (`raw`, `zstd`) | + +## Migration from ILP (HTTP/TCP) + +The row-building API is unchanged across transports. The main differences: + +| Aspect | HTTP (ILP) | WebSocket (QWP) | +| --------------------- | ----------------- | ----------------------- | +| Connect string schema | `http::` / `https::` | `ws::` / `wss::` | +| Options transport | `qdb.WithHttp()` | `qdb.WithQwp()` | +| Auto-flush rows | 75,000 | 1,000 | +| Auto-flush interval | 1,000 ms | 100 ms | +| Error model | Synchronous | Async `SenderErrorHandler` | +| Store-and-forward | Not available | Available (`sf_dir`) | +| Multi-endpoint failover | Limited | Full reconnect loop | +| Querying | Not available | `QwpQueryClient` | + +The biggest behavioral change is the error model: on HTTP, `Flush` returns the +rejection synchronously; on QWP it does not. To migrate, change the connect +string from `http::` to `ws::` (or `https::` to `wss::`), register a +`SenderErrorHandler`, and adjust auto-flush settings if needed. `QwpSender` is +a superset of `LineSender`, so existing ingestion code keeps working. + +## Full example: ingestion and querying with failover + +This example combines ingestion with store-and-forward and connection +observability, then queries the data back with the recreate-on-failure +pattern for egress. + +```go +package main + +import ( + "context" + "errors" + "fmt" + "math/rand" + "time" + + qdb "github.com/questdb/go-questdb-client/v4" +) + +// ─── Ingestion (options API with store-and-forward) ───────────────── + +// Multi-host with store-and-forward for failover durability. +// Without sf_dir, data buffered during an outage lives in process memory +// and is lost if the sender process dies. With sf_dir, unacknowledged +// frames are persisted to disk and replayed after reconnection. + +func ingestExample() { + ctx := context.Background() + + sender, err := qdb.NewLineSender(ctx, + qdb.WithQwp(), + qdb.WithAddress("db-primary:9000"), // Enterprise: multi-host + qdb.WithAddress("db-replica:9000"), // Enterprise: multi-host + qdb.WithTls(), // Enterprise: wss (TLS) + qdb.WithBearerToken("your_bearer_token"), // Enterprise: token auth + qdb.WithSfDir("/var/lib/myapp/qdb-sf"), // durability across outages + qdb.WithSenderId("ingest-1"), // unique per sender process + qdb.WithReconnectPolicy( + 5*time.Minute, // max outage budget + 100*time.Millisecond, // initial backoff + 5*time.Second), // max backoff + qdb.WithErrorHandler(func(e *qdb.SenderError) { + fmt.Printf("batch rejected: category=%s table=%s msg=%s\n", + e.Category, e.TableName, e.ServerMessage) + })) + if err != nil { + panic(err) + } + defer sender.Close(ctx) + + for i := 0; i < 100; i++ { + price := 1.0842 + (rand.Float64()-0.5)*0.002 + err = sender.Table("book"). + Symbol("ticker", "EURUSD"). + Float64Column("price", price). + Float64Column("size", 100000+rand.Float64()*900000). + At(ctx, time.Now()) + if err != nil { + fmt.Printf("row error: %s\n", err) + } + } + if err := sender.Flush(ctx); err != nil { + fmt.Printf("flush error: %s\n", err) + } +} + +// With sf_dir set, unacknowledged frames are persisted to disk during +// the outage and replayed when the new primary becomes reachable. +// Without sf_dir, the reconnect loop still works but data is lost if +// the sender process dies. +// +// Observability (no per-event callback in Go): +// qs := sender.(qdb.QwpSender) +// qs.TotalReconnectAttempts() +// qs.TotalReconnectsSucceeded() +// qs.TotalFramesReplayed() +// qs.LastTerminalError() + + +// ─── Querying (connect string, with reconnect-on-failure) ─────────── + +// The QwpQueryClient becomes permanently dead after a total outage +// exhausts the failover budget. The application must close the dead +// client and create a new one. This pattern handles that: + +func queryExample() { + ctx := context.Background() + + connString := + "wss::addr=db-primary:9000,db-replica:9000,db-replica2:9000;" + // Enterprise: wss, multi-host + "token=your_bearer_token;" + // Enterprise: token auth + "tls_verify=unsafe_off;" + // test only! + "failover=on;" + // Enterprise: failover + "failover_max_attempts=8;" + + "failover_max_duration_ms=30000;" + + var client *qdb.QwpQueryClient + + for { + // Reconnect if the client is dead + if client == nil { + var err error + client, err = qdb.QwpQueryClientFromConf(ctx, connString) + if err != nil { + fmt.Printf("connect failed: %s\n", err) + time.Sleep(2 * time.Second) + continue + } + } + + q := client.Query(ctx, + "SELECT ts, ticker, price FROM book ORDER BY ts DESC LIMIT 10") + + rowCount := 0 + for batch, err := range q.Batches() { + if err != nil { + var reset *qdb.QwpFailoverReset + if errors.As(err, &reset) { + // Fires only when failover happens mid-query. + // Clear any accumulated partial results here. + fmt.Println("failover, clearing partial results") + rowCount = 0 + continue + } + // Any other error is terminal for this client + fmt.Printf("query failed: %s\n", err) + q.Close() + client.Close(ctx) + client = nil + fmt.Println("(will reconnect on next query)") + break + } + for row := 0; row < batch.RowCount(); row++ { + ts := time.UnixMicro(batch.Int64(0, row)) + ticker := batch.String(1, row) + price := batch.Float64(2, row) + fmt.Printf("%s %s price=%.5f\n", + ts.Format("2006-01-02T15:04:05.000Z"), ticker, price) + rowCount++ + } + } + if client != nil { + q.Close() + fmt.Printf("(%d rows)\n", rowCount) + } + + time.Sleep(2 * time.Second) + } +} +``` diff --git a/documentation/ingestion/clients/java.md b/documentation/ingestion/clients/java.md index bd33241e5..92db8cb1f 100644 --- a/documentation/ingestion/clients/java.md +++ b/documentation/ingestion/clients/java.md @@ -1,51 +1,51 @@ --- -title: Java Client Documentation -description: "Reference for the questdb-client Maven artifact — the Java ILP ingestion client for QuestDB, covering setup, configuration, authentication, and error handling." +slug: /connect/clients/java +title: Java client for QuestDB +sidebar_label: Java +description: "QuestDB Java client for high-throughput data ingestion and streaming SQL queries over the QWP binary protocol." --- import Tabs from "@theme/Tabs" import TabItem from "@theme/TabItem" -import CodeBlock from "@theme/CodeBlock" +import SfDedupWarning from "../../partials/_sf-dedup-warning.partial.mdx" -import { RemoteRepoExample } from "@theme/RemoteRepoExample" +import CodeBlock from "@theme/CodeBlock" :::note -This is the reference for the QuestDB Java Client when QuestDB is used as a -server. - -For embedded QuestDB, please check our -[Java Embedded Guide](/docs/ingestion/java-embedded/). +This is the reference for the QuestDB Java client when QuestDB is used as a +server. For embedded QuestDB, see the +[Java embedded guide](/docs/connect/java-embedded/). ::: -The QuestDB Java client is distributed as a separate Maven artifact -(`org.questdb:questdb-client`). +The QuestDB Java client connects to QuestDB over +[QWP — QuestDB Wire Protocol](/docs/connect/wire-protocols/qwp-ingress-websocket/) — a +columnar binary protocol carried over WebSocket. It supports high-throughput +data ingestion and streaming SQL queries on the same transport. -The client provides the following benefits: +Key capabilities: -- **Automatic table creation**: No need to define your schema upfront. -- **Concurrent schema changes**: Seamlessly handle multiple data streams with - on-the-fly schema modifications -- **Optimized batching**: Use strong defaults or curate the size of your batches -- **Health checks and feedback**: Ensure your system's integrity with built-in - health monitoring -- **Automatic write retries**: Reuse connections and retry after interruptions +- **Ingestion**: column-oriented batched writes with automatic table creation, + schema evolution, and optional store-and-forward durability. +- **Querying**: streaming SQL result sets, DDL/DML execution, bind parameters, + and byte-credit flow control. +- **Failover**: multi-endpoint connections with automatic reconnect across + rolling upgrades and primary migrations. -:::info +:::tip Legacy transports -This page focuses on our high-performance ingestion client, which is optimized -for **writing** data to QuestDB. For retrieving data, we recommend using a -[PostgreSQL-compatible Java library](/docs/query/pgwire/java/) or our -[HTTP query endpoint](/docs/query/overview/#rest-http-api). +The client also supports ILP ingestion over HTTP and TCP for backward +compatibility. This page documents the recommended WebSocket (QWP) path. For +ILP transport details, see the [ILP overview](/docs/connect/compatibility/ilp/overview/). ::: ## Quick start -Add the QuestDB Java client as a dependency in your project's build configuration file. +Add the dependency: -The code below creates a client instance configured to use HTTP transport to -connect to a QuestDB server running on localhost, port 9000. It then sends two -rows, each containing one symbol and two floating-point values. The client asks -the server to assign a timestamp to each row based on the server's wall-clock -time. - - +### Ingest data -The client is configured using a configuration string. See -[Ways to create the client](#ways-to-create-the-client) for all configuration -methods, and [Configuration options](#configuration-options) for available -settings. +```java +import io.questdb.client.Sender; -## Authenticate and encrypt +try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;")) { + sender.table("trades") + .symbol("symbol", "ETH-USD") + .symbol("side", "sell") + .doubleColumn("price", 2615.54) + .doubleColumn("amount", 0.00044) + .atNow(); + sender.table("trades") + .symbol("symbol", "BTC-USD") + .symbol("side", "sell") + .doubleColumn("price", 39269.98) + .doubleColumn("amount", 0.001) + .atNow(); + sender.flush(); +} +``` -This sample configures the client to use HTTP transport with TLS enabled for a -connection to a QuestDB server. It also instructs the client to authenticate -using HTTP Basic Authentication. +### Query data -When using QuestDB Enterprise, you can authenticate using a REST bearer token as -well. Please check the [RBAC docs](/docs/security/rbac/#authentication) for -more info. +```java +import io.questdb.client.cutlass.qwp.client.QwpQueryClient; +import io.questdb.client.cutlass.qwp.client.QwpColumnBatchHandler; +import io.questdb.client.cutlass.qwp.client.QwpColumnBatch; + +try (QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000)) { + client.connect(); + client.execute( + "SELECT ts, sym, price, amount FROM trades WHERE sym = 'ETH-USD' LIMIT 10", + new QwpColumnBatchHandler() { + @Override + public void onBatch(QwpColumnBatch batch) { + batch.forEachRow(row -> System.out.printf( + "ts=%d sym=%s price=%.4f amount=%d%n", + row.getLongValue(0), + row.getSymbol(1), + row.getDoubleValue(2), + row.getLongValue(3) + )); + } + + @Override + public void onEnd(long totalRows) { + System.out.println("done: " + totalRows + " rows"); + } + + @Override + public void onError(byte status, String message) { + System.err.println("query failed: " + message); + } + } + ); +} +``` - +## Authentication and TLS -## Ways to create the client +Authentication happens at the HTTP level during the WebSocket upgrade, before +any binary frames are exchanged. The same mechanisms work for both `Sender` +(ingestion) and `QwpQueryClient` (querying). -There are three ways to create a client instance: +### HTTP basic auth -1. **From a configuration string.** This is the most common way to create a - client instance. It describes the entire client configuration in a single - string, and allows sharing the same configuration across clients in different - languages. The general format is: +```java +// Ingestion +try (Sender sender = Sender.fromConfig( + "wss::addr=db.example.com:9000;username=admin;password=quest;")) { + // ... +} - ```text - ::=;=;...; - ``` +// Querying +try (QwpQueryClient client = QwpQueryClient.fromConfig( + "wss::addr=db.example.com:9000;username=admin;password=quest;")) { + client.connect(); + // ... +} +``` - [Transport protocol](/docs/ingestion/ilp/overview/#transport-selection) - can be one of these: +### Token auth (Enterprise, recommended) - - `http` — ILP/HTTP - - `https` — ILP/HTTP with TLS encryption - - `tcp` — ILP/TCP - - `tcps` — ILP/TCP with TLS encryption +Token authentication avoids the per-request overhead of basic auth and is +the recommended path for Enterprise deployments. - The key `addr` sets the hostname and port of the QuestDB server. Port - defaults to 9000 for HTTP(S) and 9009 for TCP(S). The minimum configuration - includes the transport and the address. +```java +try (Sender sender = Sender.fromConfig( + "wss::addr=db.example.com:9000;token=your_bearer_token;")) { + // ... +} +``` - ```java - try (Sender sender = Sender.fromConfig("http::addr=localhost:9000;auto_flush_rows=5000;retry_timeout=10000;")) { - // ... - } - ``` +### TLS with custom trust store - For all available options, see - [Configuration options](#configuration-options). +```java +try (Sender sender = Sender.fromConfig( + "wss::addr=db.example.com:9000;tls_roots=/path/to/truststore.jks;tls_roots_password=changeit;")) { + // ... +} +``` -2. **From an environment variable.** The `QDB_CLIENT_CONF` environment variable - is used to set the configuration string. Moving configuration parameters to - an environment variable allows you to avoid hard-coding sensitive information - such as tokens and passwords in your code. +## Creating the client - ```bash - export QDB_CLIENT_CONF="http::addr=localhost:9000;auto_flush_rows=5000;retry_timeout=10000;" - ``` +### From a connect string - ```java - try (Sender sender = Sender.fromEnv()) { - // ... - } - ``` +The connect string format is `::=;=;...;` -3. **Using the Java builder API.** This provides type-safe configuration. +For ingestion, use `ws` (plain) or `wss` (TLS): - ```java - try (Sender sender = Sender.builder(Sender.Transport.HTTP) - .address("localhost:9000") - .autoFlushRows(5000) - .retryTimeoutMillis(10000) - .build()) { - // ... - } - ``` - -## Configuring multiple URLs +```java +try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;")) { + // ... +} +``` -:::note +For querying: -This feature requires QuestDB OSS 9.1.0+ or Enterprise 3.0.4+. +```java +try (QwpQueryClient client = QwpQueryClient.fromConfig("ws::addr=localhost:9000;")) { + client.connect(); + // ... +} +``` -::: +For the full list of connect-string keys, see the +[connect string reference](/docs/connect/clients/connect-string/). -The ILP client can be configured with multiple _possible_ endpoints to send your data to. Only one endpoint is used at -a time. +### From an environment variable -To configure this feature, simply provide multiple `addr` entries. For example: +Set `QDB_CLIENT_CONF` to avoid hard-coding credentials: +```bash +export QDB_CLIENT_CONF="wss::addr=db.example.com:9000;username=admin;password=quest;" +``` ```java -try (Sender sender = Sender.fromConfig("http::addr=localhost:9000;addr=localhost:9999;")) { - // ... +try (Sender sender = Sender.fromEnv()) { + // ... } ``` -On initialisation, if `protocol_version=auto`, the sender will identify the first instance that is writeable. Then it will _stick_ to this instance and write -any subsequent data to it. - -In the event that the instance becomes unavailable for writes, the client will retry the other possible endpoints, and when it finds -a new writeable instance, will _stick_ to it instead. This unavailability is characterised by failures to connect or locate the instance, -or the instance returning an error code due to it being read-only. - -By configuring multiple addresses, you can continue to capture data if your primary instance -fails, without having to reconfigure the clients. This backup instance can be hot or cold, and so long as it is assigned a known address, it will be written to as soon as it is started. +### Using the builder API -Enterprise users can leverage this feature to transparently handle replication failover, without the need to introduce a load-balancer or -reconfigure clients. +The builder exposes the same options as the connect string. Method names +follow camelCase convention (e.g., `sf_append_deadline_millis` becomes +`sfAppendDeadlineMillis()`). For the full list of keys, see the +[connect string reference](/docs/connect/clients/connect-string/). -:::tip +```java +try (Sender sender = Sender.builder(Sender.Transport.WEBSOCKET) + .address("localhost:9000") + .autoFlushRows(500) + .autoFlushIntervalMillis(50) + .build()) { + // ... +} +``` -You may wish to increase the value of `retry_timeout` if you expect your backup instance to take a large amount of time to become writeable. +**Enterprise builder with TLS, token auth, and listeners:** -For example, when performing a primary migration (Enterprise replication), with default settings, you might want to increase this -to `30s` or higher. +```java +try (Sender sender = Sender.builder(Sender.Transport.WEBSOCKET) + .address("db-primary:9000") + .address("db-replica:9000") + .enableTls() + .advancedTls().disableCertificateValidation() // test only + .httpToken("your_bearer_token") // works for WebSocket too + .reconnectMaxDurationMillis(300_000) + .reconnectInitialBackoffMillis(100) + .reconnectMaxBackoffMillis(5_000) + .errorHandler(error -> { + System.err.printf("batch rejected: category=%s table=%s msg=%s%n", + error.getCategory(), error.getTableName(), + error.getServerMessage()); + }) + .connectionListener(event -> { + System.out.printf("connection: %s host=%s:%d%n", + event.getKind(), event.getHost(), event.getPort()); + }) + .build()) { + // ... +} +``` +:::note +The token method is named `httpToken()` for historical reasons but works +on all transports including WebSocket. For production TLS, use +`advancedTls().customTrustStore(path, password)` instead of +`disableCertificateValidation()`. ::: +For `QwpQueryClient`, use the factory methods or configure post-construction: -## General usage pattern - -1. Create a client instance via `Sender.fromConfig()`. -2. Use `table(CharSequence)` to select a table for inserting a new row. -3. Use `symbol(CharSequence, CharSequence)` to add all symbols. You must add - symbols before adding other column types. -4. Use the following options to add all the remaining columns: +```java +try (QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000)) { + client.withInitialCredit(256 * 1024); + client.connect(); + // ... +} +``` - - `stringColumn(CharSequence, CharSequence)` - - `longColumn(CharSequence, long)` - - `doubleColumn(CharSequence, double)` - - `boolColumn(CharSequence, boolean)` - - `arrayColumn()` -- several variants, see below - - `timestampColumn(CharSequence, Instant)`, or - `timestampColumn(CharSequence, long, ChronoUnit)` - - `decimalColumn(CharSequence, Decimal256)` or - `decimalColumn(CharSequence, CharSequence)` (string literal) +## Data ingestion + +### General usage pattern + +`Sender` is not thread-safe. For multi-threaded workloads, create one instance +per thread or use an object pool. + +1. Create a `Sender` via `Sender.fromConfig()` or the builder. +2. Call `table(name)` to select a table. +3. Call column methods to add values: + - `symbol(name, value)` + - `stringColumn(name, value)` + - `boolColumn(name, value)` + - `byteColumn(name, byte)`, `shortColumn(name, short)`, `intColumn(name, int)` + - `longColumn(name, long)`, `floatColumn(name, float)`, `doubleColumn(name, double)` + - `charColumn(name, char)` + - `timestampColumn(name, Instant)` or `timestampColumn(name, long, ChronoUnit)` + - `uuidColumn(name, lo, hi)` (two longs) + - `long256Column(name, l0, l1, l2, l3)` (four longs, least significant first) + - `decimalColumn(name, Decimal256)` or `decimalColumn(name, CharSequence)` + - `ipv4Column(name, int)` (packed 32-bit address) or `ipv4Column(name, CharSequence)` + (dotted-quad) + - `geoHashColumn(name, long bits, int precisionBits)` or + `geoHashColumn(name, CharSequence base32)` + - `binaryColumn(name, byte[])`, `binaryColumn(name, long ptr, long len)`, or + `binaryColumn(name, DirectByteSlice)` + - `doubleArray(name, ...)` and `longArray(name, ...)` (see [Ingest arrays](#ingest-arrays)) + + DATE is accepted on ingress server-side but the Java client does not yet + expose a `dateColumn()` setter. All types are readable on the + [egress side](#reading-result-batches). + + To store a null for a column, omit that column's setter before calling + `at()` or `atNow()`. The column set for the batch is the union of all + columns seen across rows; a column first used on a later row is backfilled + with null for earlier rows. + + :::note IPv4 string input is strict + + `ipv4Column(name, CharSequence)` rejects the literal strings `"null"` + (case-insensitive) and `"0.0.0.0"` with a `LineSenderException`. Passing + a `null` reference is a no-op (the column is left unset, which surfaces + as SQL NULL on read). This avoids the previous silent round-trip where + `"0.0.0.0"` and `"null"` both stored as IPv4 NULL. + + ::: + + :::note GEOHASH precision is locked per column + + The first call to `geoHashColumn` for a column fixes its precision + (number of bits). Subsequent rows must use the same precision or the + call throws `LineSenderException`. For the string overload, precision + is `value.length() * 5` bits; for the bits overload, it is the + explicit `precisionBits` argument (1..60). + + ::: + +5. Call `at(Instant)`, `at(long, ChronoUnit)`, or `atNow()` to finalize the row. +6. Repeat from step 2, or call `flush()` to send buffered data. +7. Call `close()` when done (or use try-with-resources). -:::caution -Decimal values require QuestDB version 9.2.0 or later. +```java +try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;")) { + sender.table("trades") + .symbol("symbol", "EURUSD") + .symbol("side", "buy") + .doubleColumn("price", 1.0842) + .longColumn("amount", 100_000) + .at(Instant.now()); +} +``` -Create decimal columns ahead of time with `DECIMAL(precision, scale)` so QuestDB can ingest the values -with the expected precision. See the -[decimal data type](/docs/query/datatypes/decimal/#creating-tables-with-decimals) page for a refresher on -precision and scale. -::: +Tables and columns are created automatically if they do not exist. -5. Use `at(Instant)` or `at(long timestamp, ChronoUnit unit)` or `atNow()` to - set a designated timestamp. -6. Optionally: You can use `flush()` to send locally buffered data into a - server. -7. Repeat from step 2 to start a new row. -8. Use `close()` to dispose the Sender after you no longer need it. +### Ingest arrays -## Ingest arrays +For 1D and 2D arrays, pass a Java array directly: -To ingest a 1D or 2D array, simply construct a Java array of the appropriate -type (`double[]`, `double[][]`) and supply it to the `arrayColumn()` method. In -order to avoid GC overheads, create the array instance once, and then populate -it with the data of each row. +```java +double[] prices = {1.0842, 1.0843, 1.0841}; +sender.table("book").doubleArray("levels", prices).atNow(); +``` -For arrays of higher dimensionality, use the `DoubleArray` class. Here's a basic -example for a 3D array: +For higher-dimensional arrays, use the `DoubleArray` class to avoid GC +overhead. Create the instance once and reuse it across rows by calling +`clear()` before populating each row: ```java -// or "tcp::addr=localhost:9009;protocol_version=2;" -try (Sender sender = Sender.fromConfig("http::addr=localhost:9000;"); - DoubleArray ary = new DoubleArray(3, 3, 3); -) { +import io.questdb.client.cutlass.line.array.DoubleArray; + +try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;"); + DoubleArray ary = new DoubleArray(3, 3, 3)) { for (int i = 0; i < ROW_COUNT; i++) { - for (int value = 0; value < 3 * 3 * 3; value++) { - ary.append(value); + ary.clear(); // reset write position, reuse native memory + for (int v = 0; v < 27; v++) { + ary.append(v); } - sender.table("tango") - .doubleArray("array", ary) + sender.table("book") + .doubleArray("cube", ary) .at(getTimestamp(), ChronoUnit.MICROS); } } ``` -The `ary.append(value)` method allows you to populate the array in the row-major -order, without having to compute every coordinate individually. You can also use -`ary.set(value, coords...)` to set a value at specific coordinates. +The constructor `new DoubleArray(d1, d2, ...)` defines the shape. Values are +appended in row-major order: the last dimension varies fastest. For a 2D array +with shape `(3, 2)`, `append()` fills positions `[0,0], [0,1], [1,0], [1,1], +[2,0], [2,1]`. You can also use `set(value, i, j, ...)` to write at specific +coordinates. Call `reshape(d1, d2, ...)` to change the shape without +reallocating. + +`LongArray` works the same way for 64-bit integer arrays — pass a Java +`long[]`, `long[][]`, or `long[][][]` directly, or use the reusable +`LongArray` class for higher dimensions: + +```java +import io.questdb.client.cutlass.line.array.LongArray; + +try (LongArray counts = new LongArray(3, 3, 3)) { + counts.clear(); + for (int v = 0; v < 27; v++) { + counts.append(v); + } + sender.table("book").longArray("counts", counts).atNow(); +} +``` + +### Designated timestamp + +The [designated timestamp](/docs/concepts/designated-timestamp/) column +controls time-based partitioning and ordering. There are two ways to set it: + +**User-assigned** (recommended for deduplication and exactly-once delivery): + +```java +sender.table("trades") + .symbol("symbol", "EURUSD") + .doubleColumn("price", 1.0842) + .at(Instant.now()); + +// Explicit microseconds for high-throughput paths: +sender.table("trades") + .symbol("symbol", "EURUSD") + .doubleColumn("price", 1.0842) + .at(System.currentTimeMillis() * 1000, ChronoUnit.MICROS); + +// Nanosecond precision (creates a timestamp_ns column): +sender.table("ticks") + .symbol("symbol", "EURUSD") + .doubleColumn("price", 1.0842) + .at(System.nanoTime(), ChronoUnit.NANOS); +``` + +Using `ChronoUnit.NANOS` with `at()` or `timestampColumn()` creates a +`timestamp_ns` column. Using any other unit creates a standard `TIMESTAMP` +column (microsecond precision). + +**Server-assigned** (server uses its wall-clock time): + +```java +sender.table("trades") + .symbol("symbol", "EURUSD") + .doubleColumn("price", 1.0842) + .atNow(); +``` :::note -Arrays are supported from QuestDB version 9.0.0, and require updated -client libraries. +QuestDB works best when data arrives in chronological order (sorted by +timestamp). ::: -## Flush the buffer +### Decimal columns -The client accumulates the data into an internal buffer and doesn't immediately -send it to the server. It can flush the buffer to the server either -automatically or on explicit request. +Create decimal columns ahead of time with `DECIMAL(precision, scale)` so +QuestDB ingests values with the expected precision. See the +[decimal data type](/docs/query/datatypes/decimal/#creating-tables-with-decimals) +page for details. -### Flush explicitly +### Flushing + +The client accumulates rows in an internal buffer and sends them in batches. + +**Auto-flush** (default): the client flushes when either threshold is reached: + +| Trigger | WebSocket default | HTTP default | +|------------|-------------------|--------------| +| Row count | 1,000 rows | 75,000 rows | +| Time | 100 ms | 1,000 ms | + +Customize via connect string: + +```text +ws::addr=localhost:9000;auto_flush_rows=500;auto_flush_interval=50; +``` -You can configure the client to not use automatic flushing, and issue explicit -flush requests by calling `sender.flush()`: +**Explicit flush**: you can call `flush()` at any time to send buffered data +immediately, even with auto-flush enabled: ```java -try (Sender sender = Sender.fromConfig("http::addr=localhost:9000;auto_flush=off")) { - sender.table("trades") - .symbol("symbol", "ETH-USD") - .symbol("side", "sell") - .doubleColumn("price", 2615.54) - .doubleColumn("amount", 0.00044) - .atNow(); - sender.table("trades") - .symbol("symbol", "BTC-USD") - .symbol("side", "sell") - .doubleColumn("price", 39269.98) - .doubleColumn("amount", 0.001) - .atNow(); - sender.flush(); +try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;")) { + for (Trade trade : trades) { + sender.table("trades") + .symbol("symbol", trade.symbol()) + .doubleColumn("price", trade.price()) + .longColumn("amount", trade.amount()) + .at(trade.timestamp()); + } + sender.flush(); // send everything now, regardless of auto-flush thresholds } ``` :::note +Disabling auto-flush entirely (`auto_flush=off`) is not supported on the +WebSocket transport. Use the auto-flush row count and interval settings to +control batch size instead. +::: -Calling `sender.flush()` will flush the buffer even with auto-flushing enabled, -but this isn't a typical way to use the client. +The client also flushes when closed, waiting up to `close_flush_timeout_millis` +(default 5000) for acknowledgements. If the flush fails at close time, the +client does not retry. Always flush explicitly before closing. + +:::note Server-advertised batch cap + +The server advertises its maximum accepted batch size on the WebSocket +upgrade response (`X-QWP-Max-Batch-Size`). The client parses this header +on connect and clamps subsequent batches to the advertised cap. A single +row larger than the cap, or a batch that would exceed the cap at flush +time, surfaces synchronously as a `LineSenderException` from the +offending column call or from `flush()` — earlier client versions only +saw this as a `1009` WebSocket close on the next operation. ::: -### Flush automatically +### Store-and-forward + +With store-and-forward enabled, unacknowledged data is persisted to disk and +replayed after reconnection, surviving sender process restarts. + +```text +ws::addr=localhost:9000;sf_dir=/var/lib/questdb/sf;sender_id=ingest-1; +``` + +When multiple senders share the same `sf_dir`, each must have a distinct +`sender_id`. Slots are exclusive: two senders with the same ID will collide. +Allowed characters: `A-Za-z0-9_-`. + +Without `sf_dir`, unacknowledged data lives in process memory and is lost if +the sender process dies. The reconnect loop still spans transient server +outages (rolling upgrades), but the RAM buffer caps how much data can +accumulate. -By default, the client automatically flushes the buffer according to a simple -policy. With HTTP, it will automatically flush at the time you append a new -row, if either of these has become true: + -- reached 75,000 rows -- hasn't been flushed for 1 second +With store-and-forward enabled, `flush()` can block when the buffer hits its +cap. The producer blocks until the wire path drains enough capacity, up to +`sf_append_deadline_millis` (default 30 seconds). If the deadline elapses, the +call fails without dropping data. Terminal rejections (schema, parse, or +security errors) latch a terminal error on the sender. The next API call +throws `LineSenderServerException`; close the sender and create a new one to +continue. -Both parameters can be customized in order to achieve a good tradeoff between -throughput (large batches) and latency (small batches). +### Durable acknowledgement -This configuration string will cause the client to auto-flush every 10 rows or -every 10 seconds, whichever comes first: +:::note Enterprise + +Durable acknowledgement requires QuestDB Enterprise with primary replication +configured. + +::: + +By default, the server confirms a batch when it is committed to the local +[WAL](/docs/concepts/write-ahead-log/). To wait for the batch to be durably +uploaded to object storage: + +```text +ws::addr=localhost:9000;sf_dir=/var/lib/questdb/sf;request_durable_ack=on; +``` -`http::addr=localhost:9000;auto_flush_rows=10;auto_flush_interval=10000;` +## Querying and SQL execution -With TCP, the client flushes its internal buffer whenever it gets full. +The `QwpQueryClient` sends SQL statements over the +[QWP egress](/docs/connect/wire-protocols/qwp-egress-websocket/) endpoint (`/read/v1`). +Results arrive as columnar batches via a callback handler. -The client will also flush automatically when it is being closed and there's -still some data in the buffer. However, **if the network operation fails at this -time, the client won't retry it.** Always explicitly flush the buffer before -closing the client. +`execute()` is **blocking**: it sends the query, drives the WebSocket receive +loop on the calling thread, invokes the handler callbacks (`onBatch`, +`onEnd`, `onError`, or `onExecDone`), and returns only after the query +completes. This means you can safely sequence operations: + +```java +client.execute("CREATE TABLE t (...) ...", ddlHandler); +// Table exists by this point +client.execute("INSERT INTO t VALUES ...", dmlHandler); +// Data is committed by this point +client.execute("SELECT * FROM t", selectHandler); +// Results have been fully consumed by this point +``` + +### Executing SELECT queries + +```java +try (QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000)) { + client.connect(); + client.execute( + "SELECT ts, sym, price FROM trades WHERE sym = 'EURUSD' LIMIT 100", + new QwpColumnBatchHandler() { + @Override + public void onBatch(QwpColumnBatch batch) { + for (int row = 0; row < batch.getRowCount(); row++) { + long ts = batch.getLongValue(0, row); + String sym = batch.getSymbol(1, row); + double price = batch.getDoubleValue(2, row); + // process row... + } + } + + @Override + public void onEnd(long totalRows) { } + + @Override + public void onError(byte status, String message) { + System.err.printf("error: 0x%02X %s%n", status & 0xFF, message); + } + } + ); +} +``` + +The `QwpColumnBatch` object is valid only during the `onBatch` callback. Copy +values out if you need them after the callback returns. + +**Convenience accessors**: `batch.forEachRow(row -> ...)` provides a +`RowView` with single-argument accessors (`row.getLongValue(col)`, +`row.getSymbol(col)`, etc.) for compact read paths. + +**Null checking**: call `batch.isNull(col, row)` before reading a value. + +### Reading result batches + +`QwpColumnBatch` provides typed accessors for all QuestDB column types: + +| Accessor | Column types | +|----------|-------------| +| `getBoolValue(col, row)` | BOOLEAN | +| `getByteValue(col, row)` | BYTE | +| `getShortValue(col, row)` | SHORT | +| `getCharValue(col, row)` | CHAR | +| `getIntValue(col, row)` | INT, IPv4 | +| `getLongValue(col, row)` | LONG, TIMESTAMP, `timestamp_ns`, DATE | +| `getFloatValue(col, row)` | FLOAT | +| `getDoubleValue(col, row)` | DOUBLE | +| `getSymbol(col, row)` | SYMBOL (returns cached `String`) | +| `getStrA(col, row)` / `getStrB(col, row)` | VARCHAR (reusable `CharSequence` views) | +| `getString(col, row)` | VARCHAR (heap-allocating `String`) | +| `getString(col, row, CharSink)` | VARCHAR (copy into sink) | +| `getBinaryA(col, row)` / `getBinaryB(col, row)` | BINARY (reusable native views) | +| `getBinary(col, row)` | BINARY (heap-allocating `byte[]`) | +| `getUuid(col, row, Uuid)` | UUID (zero-allocation, into sink) | +| `getUuidHi(col, row)` / `getUuidLo(col, row)` | UUID (individual 64-bit halves) | +| `getLong256(col, row, Long256Sink)` | LONG256 (into sink) | +| `getLong256Word(col, row, wordIndex)` | LONG256 (individual 64-bit word) | +| `getGeohashValue(col, row)` | GEOHASH (raw long value) | +| `getGeohashPrecisionBits(col)` | GEOHASH (precision metadata, per column) | +| `getDecimal128High(col, row)` / `getDecimal128Low(col, row)` | DECIMAL128 (two longs) | +| `getDecimalScale(col)` | DECIMAL (scale metadata, per column) | +| `getDoubleArrayElements(col, row)` | DOUBLE_ARRAY (flattened `double[]`, row-major) | +| `getArrayNDims(col, row)` | DOUBLE_ARRAY (dimension count) | +| `isNull(col, row)` | All types | + +Column metadata is available via `batch.getColumnName(col)`, +`batch.getColumnWireType(col)`, and `batch.getColumnCount()`. + +**Reading array columns:** + +`getDoubleArrayElements(col, row)` returns a flattened `double[]` in row-major +order. Use `getArrayNDims(col, row)` to discover the dimensionality. For +example, reading a 2D `DOUBLE[][]` column: + +```java +int nDims = batch.getArrayNDims(colIndex, row); // e.g. 2 +double[] flat = batch.getDoubleArrayElements(colIndex, row); +// flat contains all elements in row-major order +``` + +Alternatively, you can extract individual elements in SQL (e.g., +`SELECT bids[1][1] FROM market_data`) and read them as scalar doubles. + +### DDL and DML statements + +Non-SELECT statements (CREATE TABLE, INSERT, UPDATE, ALTER, DROP, TRUNCATE) +are executed through the same `execute()` method. The server responds with +`EXEC_DONE` instead of result batches: + +```java +client.execute( + "CREATE TABLE trades (" + + "ts TIMESTAMP, sym SYMBOL, price DOUBLE, amount LONG" + + ") TIMESTAMP(ts) PARTITION BY DAY WAL", + new QwpColumnBatchHandler() { + @Override + public void onBatch(QwpColumnBatch batch) { } + + @Override + public void onEnd(long totalRows) { } + + @Override + public void onError(byte status, String message) { + System.err.println("failed: " + message); + } + + @Override + public void onExecDone(short opType, long rowsAffected) { + System.out.printf("done: opType=%d rows=%d%n", opType, rowsAffected); + } + } +); +``` + +`rowsAffected` reports the count for INSERT/UPDATE/DELETE. Pure DDL (CREATE, +DROP, ALTER, TRUNCATE) reports 0. + +### Bind parameters + +Parameterized queries use typed bind values, avoiding SQL injection and +enabling server-side factory cache reuse across repeated calls: + +```java +String sql = "SELECT ts, sym, price, amount FROM trades " + + "WHERE sym = $1 AND price >= $2 LIMIT 1000"; + +for (String symbol : List.of("EURUSD", "GBPUSD", "USDJPY")) { + client.execute( + sql, + binds -> binds + .setVarchar(0, symbol) + .setDouble(1, 1.0), + handler + ); +} +``` + +Bind indices are 0-based (`$1` maps to index 0). Available setters: + +| Setter | Bind type | +|--------|-----------| +| `setBoolean(index, value)` | BOOLEAN | +| `setByte(index, value)` | BYTE | +| `setChar(index, value)` | CHAR | +| `setShort(index, value)` | SHORT | +| `setInt(index, value)` | INT | +| `setLong(index, value)` | LONG | +| `setFloat(index, value)` | FLOAT | +| `setDouble(index, value)` | DOUBLE | +| `setDate(index, millis)` | DATE | +| `setTimestampMicros(index, micros)` | TIMESTAMP | +| `setTimestampNanos(index, nanos)` | `timestamp_ns` | +| `setVarchar(index, value)` | VARCHAR, STRING, and SYMBOL columns | +| `setUuid(index, lo, hi)` or `setUuid(index, UUID)` | UUID | +| `setLong256(index, l0, l1, l2, l3)` | LONG256 | +| `setGeohash(index, precisionBits, value)` | GEOHASH | +| `setDecimal64(index, scale, unscaled)` | DECIMAL64 | +| `setDecimal128(index, scale, lo, hi)` | DECIMAL128 | +| `setDecimal256(index, scale, ll, lh, hl, hh)` | DECIMAL256 | + +To pass a NULL bind value, either pass `null` to `setVarchar` or use the +typed `setNull`: + +```java +binds -> binds.setVarchar(0, null) // null VARCHAR/SYMBOL +binds -> binds.setNull(0, TYPE_LONG) // typed null (requires QWP type code) +binds -> binds.setNullGeohash(0, 20) // null GEOHASH with precision +binds -> binds.setNullDecimal64(0, 4) // null DECIMAL64 with scale +``` + +### Flow control + +For large result sets, byte-credit flow control prevents the server from +overwhelming the client: + +```java +try (QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000) + .withInitialCredit(256 * 1024)) { + client.connect(); + // Server pauses after streaming ~256 KiB, auto-replenishes after each batch +} +``` + +A credit of `0` (the default) means unbounded: the server streams as fast as +the network allows. + +### Compression + +Negotiate zstd compression to reduce network bandwidth for large result sets: + +```java +try (QwpQueryClient client = QwpQueryClient.fromConfig( + "ws::addr=localhost:9000;compression=zstd;compression_level=3;")) { + client.connect(); + // Batches are automatically decompressed +} +``` ## Error handling -HTTP automatically retries failed, recoverable requests: network errors, some -server errors, and timeouts. Non-recoverable errors include invalid data, -authentication errors, and other client-side errors. +### Ingestion errors -:::note +WebSocket ingestion uses an asynchronous error model. Batch rejections are +delivered via the `SenderErrorHandler` callback, not thrown from `flush()`: -If you have configured multiple addresses, retries will be run against different instances. +```java +try (Sender sender = Sender.builder(Sender.Transport.WEBSOCKET) + .address("localhost:9000") + .errorHandler(error -> { + System.err.printf("batch rejected: category=%s table=%s msg=%s%n", + error.getCategory(), error.getTableName(), error.getServerMessage()); + }) + .build()) { + // ... +} +``` -::: +Each `SenderError` carries: -Retrying is especially useful during transient network issues or when the server -goes offline for a short period. Configure the retrying behavior through the -`retry_timeout` configuration option or via the builder API with -`retryTimeoutMillis(long timeoutMillis)`. The client continues to retry after -recoverable errors until it either succeeds or the specified timeout expires. If -it hits the timeout without success, the client throws a `LineSenderException`. +| Field | Accessor | Description | +|-------|----------|-------------| +| Category | `getCategory()` | `SCHEMA_MISMATCH`, `PARSE_ERROR`, `INTERNAL_ERROR`, `SECURITY_ERROR`, `WRITE_ERROR`, `PROTOCOL_VIOLATION`, or `UNKNOWN` | +| Policy | `getAppliedPolicy()` | `DROP_AND_CONTINUE` (batch dropped, sender continues) or `HALT` (next API call throws `LineSenderServerException`) | +| Server message | `getServerMessage()` | Human-readable error text from the server (may be null) | +| Table name | `getTableName()` | The rejected table (null for multi-table batches) | +| FSN range | `getFromFsn()` / `getToFsn()` | Frame sequence number span identifying the rejected batch | +| Message sequence | `getMessageSequence()` | Server's per-frame sequence number (`-1` if not available) | +| Status byte | `getServerStatusByte()` | Raw QWP status code (`-1` if not available) | -The client won't retry requests while it's being closed and attempting to flush -the data left over in the buffer. +The error handler runs on a dedicated dispatcher thread, never on the I/O or +producer thread. -The TCP transport has no mechanism to notify the client it encountered an -error; instead it just disconnects. When the client detects this, it throws a -`LineSenderException` and becomes unusable. +**Recovery after errors**: call `reset()` to clear buffers and continue with +fresh data. On WebSocket, `reset()` does not recover from terminal failures +(auth failure, reconnect budget exhaustion). In those cases, close the sender +and create a new one. -## Recover after a client-side error +### Query errors -With HTTP transport, the client always prepares a full row in RAM before trying -to send it. It also remains usable after an exception has occurred. This allows -you to cancel sending a row, for example due to a validation error, and go on -with the next row. +Query errors arrive via the `onError` callback: -With TCP transport, you don't have this option. If you get an exception, you -can't continue with the same client instance, and don't have insight into which -rows were accepted by the server. +```java +@Override +public void onError(byte status, String message) { + System.err.printf("query failed: 0x%02X %s%n", status & 0xFF, message); +} +``` + +Status codes: + +| Code | Name | Description | +|--------|-----------------|---------------------------------------------------| +| `0x03` | SCHEMA_MISMATCH | Bind parameter type incompatible with placeholder | +| `0x05` | PARSE_ERROR | SQL syntax error or malformed message | +| `0x06` | INTERNAL_ERROR | Server-side execution failure | +| `0x08` | SECURITY_ERROR | Authorization failure | +| `0x0A` | CANCELLED | Query terminated by CANCEL | +| `0x0B` | LIMIT_EXCEEDED | Protocol limit hit | -:::caution +Errors can arrive before any data (parse failure) or mid-stream (storage +failure, server shutdown). When `onError` is called, no further frames arrive +for that query. -Error handling behaviour changed with the release of QuestDB 9.1.0. +### Connection-level errors -Previously, failing all retries would cause an exception and release the buffered data. +- **Authentication failure**: `401`/`403` HTTP response before the WebSocket + upgrade completes. Terminal across all endpoints. +- **Malformed frames**: `QwpDecodeException` or WebSocket close with a + terminal code. +- **Role mismatch**: `QwpRoleMismatchException` when all endpoints report + roles that do not match the `target=` filter. -Now the buffer will not be released. If you wish to re-use the same sender with fresh data, you must call the -new `reset()` function. +## Failover and high availability + +:::note Enterprise + +Multi-host failover with automatic reconnect requires QuestDB Enterprise. ::: -## Designated timestamp considerations - -The concept of [designated timestamp](/docs/concepts/designated-timestamp/) is -important when ingesting data into QuestDB. - -There are two ways to assign a designated timestamp to a row: - -1. User-assigned timestamp: the client assigns a specific timestamp to the row. - - ```java - java.time.Instant timestamp = Instant.now(); // or any other timestamp - sender.table("trades") - .symbol("symbol", "ETH-USD") - .symbol("side", "sell") - .doubleColumn("price", 2615.54) - .doubleColumn("amount", 0.00044) - .at(timestamp); - ``` - - The `Instant` class is part of the `java.time` package and is used to - represent a specific moment in time. The `sender.at()` method can accept a - long timestamp representing the elapsed time since the beginning of the - [Unix epoch](https://en.wikipedia.org/wiki/Unix_time), as well as a - `ChronoUnit` to specify the time unit. This approach is useful in - high-throughput scenarios where instantiating an `Instant` object for each - row is not feasible due to performance considerations. - -2. Server-assigned timestamp: the server automatically assigns a timestamp to - the row based on the server's wall-clock time at the time of ingesting the - row. Example: - - ```java - sender.table("trades") - .symbol("symbol", "ETH-USD") - .symbol("side", "sell") - .doubleColumn("price", 2615.54) - .doubleColumn("amount", 0.00044) - .atNow(); - ``` - -We recommend using the event's original timestamp when ingesting data into -QuestDB. Using ingestion-time timestamps precludes the ability to deduplicate -rows, which is -[important for exactly-once processing](/docs/ingestion/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). +### Multiple endpoints -:::note +Specify comma-separated addresses in the connect string: -QuestDB works best when you send data in chronological order (sorted by -timestamp). +```text +ws::addr=db-primary:9000,db-replica-1:9000,db-replica-2:9000; +``` + +The client tries endpoints in order. On connection loss, it walks the list +to find the next healthy endpoint. + +### Ingestion failover + +The ingestion sender uses a reconnect loop with exponential backoff. Key +connect-string options: + +| Key | Default | Description | +|----------------------------------|-----------|-------------------------------------------| +| `reconnect_max_duration_millis` | `300000` | Total outage budget before giving up. | +| `reconnect_initial_backoff_millis` | `100` | First post-failure sleep. | +| `reconnect_max_backoff_millis` | `5000` | Cap on per-attempt sleep. | +| `initial_connect_retry` | `off` | Retry on first connect (`on`, `sync`, `async`). | + +Ingress is zone-blind: it pins QWP v1 and does not read `SERVER_INFO`. The +`zone=` key is accepted but ignored, so a connect string shared with egress +clients works unchanged. + +With store-and-forward (`sf_dir` set), unacknowledged data survives sender +restarts. Without it, unacknowledged data lives in process memory and is lost +if the sender process dies. + +### Query failover + +The query client drives a per-query reconnect loop. When a transport error +occurs mid-stream, the client reconnects to another endpoint and replays the +query. `batch_seq` restarts at 0 on the new connection. + +Key connect-string options: + +| Key | Default | Description | +|-------------------------------|---------|-------------------------------------------| +| `failover` | `on` | Master switch for per-query reconnect. | +| `failover_max_attempts` | `8` | Max reconnect attempts per query. | +| `failover_backoff_initial_ms` | `50` | First post-failure sleep. | +| `failover_backoff_max_ms` | `1000` | Cap on per-attempt sleep. | +| `failover_max_duration_ms` | `30000` | Total wall-clock budget per query. | + +:::warning Failover requires multiple endpoints + +Failover rotates across endpoints. With a single `addr`, there is no other +host to try, and the loop exhausts after one attempt regardless of +`failover_max_attempts`. For failover to be useful, provide at least two +addresses. ::: -## Protocol Version +**Handling partial results**: when failover occurs mid-stream, the +`onFailoverReset` callback fires before replayed batches arrive. Use it to +clear any accumulated state: -To enhance data ingestion performance, QuestDB _version 9.0.0_ introduced an -upgraded version "2" to the text-based InfluxDB Line Protocol which encodes -arrays and f64 values in binary form. Arrays are supported only in this upgraded -protocol version. +```java +@Override +public void onFailoverReset(QwpServerInfo newNode) { + // Clear partial results; the server will re-send from the beginning + results.clear(); +} +``` -You can select the protocol version with the `protocol_version` setting in the -configuration string. +If you do not clear state, you will see overlapping data (the server replays +the full result set). -HTTP transport automatically negotiates the protocol version by default. -In order to avoid the slight latency cost at connection time, you can explicitly -configure the protocol version by setting `protocol_version=2|1;`. +`onFailoverReset` is a mid-stream event only. It does not fire during +`connect()` or between queries. If the connection drops between queries, +the next `execute()` call handles the reconnect internally. -TCP transport does not negotiate the protocol version and uses version 1 by -default. You must explicitly set `protocol_version=2;` in order to ingest -arrays, as in this example: +**Terminal failure**: when all endpoints are unreachable and the failover +budget is exhausted, the error is delivered via `onError` and the +`QwpQueryClient` enters a terminal state. Subsequent `execute()` calls +throw `IllegalStateException`. Close the client and create a new one: -```text -tcp::addr=localhost:9009;protocol_version=2; +```java +try { + client.execute(sql, handler); +} catch (IllegalStateException e) { + client.close(); + client = QwpQueryClient.fromConfig("ws::addr=..."); + client.connect(); + client.execute(sql, handler); +} ``` -## Configuration options +This differs from ingestion, where the `Sender` has a continuous reconnect +loop (`reconnect_max_duration_millis`, default 5 minutes) that spans full +outages transparently. The query client does not have an equivalent; it +reconnects only within the scope of a single `execute()` call. + +### Connection events + +For ingestion, register a `SenderConnectionListener` to observe connection +state transitions: + +```java +Sender sender = Sender.builder(Sender.Transport.WEBSOCKET) + .address("db-primary:9000") + .address("db-replica:9000") + .connectionListener(event -> { + System.out.printf("%s host=%s:%d%n", + event.getKind(), event.getHost(), event.getPort()); + }) + .build(); +``` + +Event kinds: `CONNECTED`, `DISCONNECTED`, `RECONNECTED`, `FAILED_OVER`, +`ENDPOINT_ATTEMPT_FAILED`, `ALL_ENDPOINTS_UNREACHABLE`, `AUTH_FAILED` +(terminal), `RECONNECT_BUDGET_EXHAUSTED` (terminal). + +### Error classification + +- **Authentication errors** (`401`/`403`): terminal at any host. The + reconnect loop stops immediately. +- **Role reject** (`421 + X-QuestDB-Role`): transient if the role is + `PRIMARY_CATCHUP`, topology-level otherwise. +- **Version mismatch** at upgrade: per-endpoint, not terminal. The client + tries the next endpoint. +- **All other errors** (TCP/TLS failures, `404`, `503`, mid-stream errors): + transient, fed into the reconnect loop. + +For the full list of connect-string keys, see the +[reconnect and failover](/docs/connect/clients/connect-string#reconnect-keys) +and +[multi-host failover](/docs/connect/clients/connect-string#failover-keys) +sections of the connect string reference. -Client can be configured either by using a configuration string as shown in the -examples above, or by using the builder API. +## Parallel queries -The builder API is available via the `Sender.builder(Transport transport)` -method. +:::note Phase 1 limitation -For a breakdown of available options, see the -[Configuration string](/docs/ingestion/clients/configuration-string/) page. +The current implementation supports a single in-flight query per connection. +The wire protocol allows multiple concurrent queries (demultiplexed by +request ID); multi-query support is planned for a future release. + +::: + +To run queries in parallel, create separate `QwpQueryClient` instances. Each +instance manages its own WebSocket connection. + +Neither `Sender` nor `QwpQueryClient` is thread-safe. For multi-threaded +workloads, use one instance per thread or use an object pool. + +## Configuration reference + +For the full list of connect-string keys and their defaults, see the +[connect string reference](/docs/connect/clients/connect-string/). + +Common WebSocket-specific options: + +| Key | Default | Description | +|-----|---------|-------------| +| `auto_flush_rows` | `1000` | Rows before auto-flush. | +| `auto_flush_interval` | `100` | Milliseconds before auto-flush. | +| `auto_flush_bytes` | disabled | Bytes before auto-flush. | +| `sf_dir` | unset | Store-and-forward directory. | +| `sender_id` | `default` | Sender slot identity for SF. | +| `request_durable_ack` | `off` | Request durable upload ACK (Enterprise). | +| `reconnect_max_duration_millis` | `300000` | Ingress reconnect budget. | +| `failover` | `on` | Egress per-query reconnect switch. | +| `compression` | `raw` | Egress batch compression (`raw`, `zstd`). | ## Compatible JDKs The client relies on some JDK internal libraries, which certain specialised JDK offerings may not support. -Here is a list of known incompatible JDKs: - -- Azul Zing 17 - - A fix is in progress. You can use Azul Zulu 17 in the meantime. - -## Other considerations - -- Refer to the [ILP overview](/docs/ingestion/ilp/overview) for details - about transactions, error control, delivery guarantees, health check, or table - and column auto-creation. -- The method `flush()` can be called to force sending the internal buffer to a - server, even when the buffer is not full yet. -- The Sender is not thread-safe. For multiple threads to send data to QuestDB, - each thread should have its own Sender instance. An object pool can also be - used to re-use Sender instances. -- The Sender instance has to be closed after it is no longer in use. The Sender - implements the `java.lang.AutoCloseable` interface, and therefore the - [try-with-resource](https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html) - pattern can be used to ensure that the Sender is closed. +Known incompatible JDKs: + +- Azul Zing 17 (use Azul Zulu 17 instead) + +## Migration from ILP (HTTP/TCP) + +If you are migrating from the ILP-based client, the row-building API is +unchanged. The main differences: + +| Aspect | HTTP (ILP) | WebSocket (QWP) | +|--------|-----------|-----------------| +| Connect string schema | `http::` / `https::` | `ws::` / `wss::` | +| Auto-flush rows | 75,000 | 1,000 | +| Auto-flush interval | 1,000 ms | 100 ms | +| Error model | Synchronous (`flush()` throws) | Async (`SenderErrorHandler` callback) | +| Buffer capacity | Configurable | Not configurable (internal cursor) | +| Store-and-forward | Not available | Available (`sf_dir`) | +| Multi-endpoint failover | Limited | Full reconnect loop with backoff | +| Querying | Not available | `QwpQueryClient` | + +To migrate, change your connect string from `http::` to `ws::` (or `https::` +to `wss::`), register a `SenderErrorHandler` for async error handling, and +adjust auto-flush settings if needed. + +## Full example: ingestion and querying with failover + +This example combines ingestion with 2D arrays and connection events, then +queries the data back with the recreate-on-failure pattern for egress. It +uses the builder API with enterprise TLS and token auth. + +```java +import io.questdb.client.Sender; +import io.questdb.client.cutlass.line.array.DoubleArray; +import io.questdb.client.cutlass.qwp.client.QwpColumnBatch; +import io.questdb.client.cutlass.qwp.client.QwpColumnBatchHandler; +import io.questdb.client.cutlass.qwp.client.QwpQueryClient; +import io.questdb.client.cutlass.qwp.client.QwpServerInfo; + +import java.time.Instant; +import java.util.concurrent.ThreadLocalRandom; + +// ─── Ingestion (builder API with connection events) ───────────────── + +// Multi-host with store-and-forward for failover durability. +// Without sf_dir, data buffered during an outage lives in process memory +// and is lost if the sender process dies. With sf_dir, unacknowledged +// frames are persisted to disk and replayed after reconnection. + +try (Sender sender = Sender.builder(Sender.Transport.WEBSOCKET) + .address("db-primary:9000") // Enterprise: multi-host + .address("db-replica:9000") // Enterprise: multi-host + .enableTls() // Enterprise: wss (TLS) + .advancedTls().disableCertificateValidation() // test only! + .httpToken("your_bearer_token") // Enterprise: token auth (works for WS too) + .storeAndForwardDir("/var/lib/myapp/qdb-sf") // durability across outages + .senderId("ingest-1") // unique per sender process + .reconnectMaxDurationMillis(300_000) + .reconnectInitialBackoffMillis(100) + .reconnectMaxBackoffMillis(5_000) + .errorHandler(error -> { + System.err.printf("batch rejected: category=%s table=%s msg=%s%n", + error.getCategory(), error.getTableName(), + error.getServerMessage()); + }) + .connectionListener(event -> { + System.out.printf("connection: %s host=%s:%d%n", + event.getKind(), event.getHost(), event.getPort()); + }) + .build(); + DoubleArray bids = new DoubleArray(5, 2); + DoubleArray asks = new DoubleArray(5, 2)) { + + for (int i = 0; i < 100; i++) { + bids.clear(); + asks.clear(); + for (int lvl = 0; lvl < 5; lvl++) { + bids.append(1.0842 - 0.0001 * (lvl + 1)); // price + bids.append(100_000 + ThreadLocalRandom.current().nextInt(900_000)); // size + asks.append(1.0842 + 0.0001 * (lvl + 1)); + asks.append(100_000 + ThreadLocalRandom.current().nextInt(900_000)); + } + sender.table("book") + .symbol("ticker", "EURUSD") + .doubleArray("bids", bids) + .doubleArray("asks", asks) + .at(Instant.now()); + } + sender.flush(); +} + +// Connection events you will see: +// CONNECTED host=db-primary:9000 — initial connection +// DISCONNECTED host=db-primary:9000 — primary goes down +// ENDPOINT_ATTEMPT_FAILED host=... — retry attempts during outage +// ALL_ENDPOINTS_UNREACHABLE host=... — all hosts down (retries continue) +// FAILED_OVER host=db-replica:9000 — replica promoted, sender resumes + +// With sf_dir set, unacknowledged frames are persisted to disk during +// the outage and replayed when the new primary becomes reachable. +// Without sf_dir, the Sender buffers in memory only — the reconnect +// loop still works, but data is lost if the sender process dies. + + +// ─── Querying (connect string, with reconnect-on-failure) ─────────── + +// The QwpQueryClient becomes permanently dead after a total outage +// exhausts the failover budget. The application must close the dead +// client and create a new one. This pattern handles that: + +String connString = + "wss::addr=db-primary:9000,db-replica:9000,db-replica2:9000;" // Enterprise: wss, multi-host + + "token=your_bearer_token;" // Enterprise: token auth + + "tls_verify=unsafe_off;" // test only! + + "failover=on;" // Enterprise: failover + + "failover_max_attempts=8;" + + "failover_max_duration_ms=30000;"; + +QwpQueryClient client = null; + +while (true) { + // Reconnect if the client is dead + if (client == null) { + try { + client = QwpQueryClient.fromConfig(connString); + client.connect(); + } catch (Exception e) { + System.err.println("connect failed: " + e.getMessage()); + client = null; + Thread.sleep(2000); + continue; + } + } + + try { + client.execute( + "SELECT ts, ticker, bids[1][1] AS best_bid, asks[1][1] AS best_ask " + + "FROM book ORDER BY ts DESC LIMIT 10", + new QwpColumnBatchHandler() { + @Override + public void onBatch(QwpColumnBatch batch) { + batch.forEachRow(row -> System.out.printf( + "ts=%s ticker=%s bid=%.5f ask=%.5f%n", + Instant.ofEpochMilli(row.getLongValue(0) / 1000), + row.getSymbol(1), + row.getDoubleValue(2), + row.getDoubleValue(3))); + } + + @Override + public void onEnd(long totalRows) { + System.out.println("(" + totalRows + " rows)"); + } + + @Override + public void onError(byte status, String message) { + System.err.printf("query error: 0x%02X %s%n", + status & 0xFF, message); + } + + @Override + public void onFailoverReset(QwpServerInfo newNode) { + // Fires only when failover happens mid-query. + // Clear any accumulated partial results here. + System.out.printf("failover to node=%s role=%s%n", + newNode.getNodeId(), + QwpServerInfo.roleName(newNode.getRole())); + } + } + ); + } catch (Exception e) { + // Failover budget exhausted or client dead — recreate + System.err.println("query failed: " + e.getMessage()); + try { client.close(); } catch (Exception ignored) { } + client = null; + System.out.println("(will reconnect on next query)"); + } + + Thread.sleep(2000); +} +``` diff --git a/documentation/ingestion/clients/nodejs.md b/documentation/ingestion/clients/nodejs.md index 65c90997c..eadba4e35 100644 --- a/documentation/ingestion/clients/nodejs.md +++ b/documentation/ingestion/clients/nodejs.md @@ -1,4 +1,5 @@ --- +slug: /connect/clients/nodejs title: Node.js Client Documentation description: "Get started with QuestDB using the Node.js client for efficient, @@ -30,7 +31,7 @@ QuestDB. :::info This page focuses on our high-performance ingestion client, which is optimized for **writing** data to QuestDB. -For retrieving data, we recommend using a [PostgreSQL-compatible Node.js library](/docs/query/pgwire/nodejs/) or our +For retrieving data, we recommend using a [PostgreSQL-compatible Node.js library](/docs/connect/compatibility/pgwire/nodejs/) or our [HTTP query endpoint](/docs/query/overview/#rest-http-api). ::: @@ -163,7 +164,7 @@ run().then(console.log).catch(console.error) As you can see, both events now are using the same timestamp. We recommended to use the original event timestamps when ingesting data into QuestDB. Using the current timestamp hinder the ability to deduplicate rows which is -[important for exactly-once processing](/docs/ingestion/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). +[important for exactly-once processing](/docs/connect/compatibility/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). ## Decimal insertion @@ -238,11 +239,11 @@ For all the extra options you can use, please check [the client docs](https://questdb.github.io/nodejs-questdb-client/classes/SenderOptions.html) Alternatively, for a breakdown of Configuration string options available across -all clients, see the [Configuration string](/docs/ingestion/clients/configuration-string/) page. +all clients, see the [Connect string](/docs/connect/clients/connect-string/) page. ## Next Steps -Please refer to the [ILP overview](/docs/ingestion/ilp/overview) for details +Please refer to the [ILP overview](/docs/connect/compatibility/ilp/overview) for details about transactions, error control, delivery guarantees, health check, or table and column auto-creation. diff --git a/documentation/ingestion/clients/python.md b/documentation/ingestion/clients/python.md index c12e0ec8e..5075bf711 100644 --- a/documentation/ingestion/clients/python.md +++ b/documentation/ingestion/clients/python.md @@ -1,4 +1,5 @@ --- +slug: /connect/clients/python title: Python Client Documentation description: Get started with QuestDB, as quickly as possible. Provides instructions and @@ -35,7 +36,7 @@ It covers basic connection, authentication and some insert patterns. This page focuses on our high-performance ingestion client, which is optimized for **writing** data to QuestDB. -For retrieving data, we recommend using a [PostgreSQL-compatible Python library](/docs/query/pgwire/python/) or our +For retrieving data, we recommend using a [PostgreSQL-compatible Python library](/docs/connect/compatibility/pgwire/python/) or our [HTTP query endpoint](/docs/query/overview/#rest-http-api). ::: @@ -196,7 +197,7 @@ if __name__ == '__main__': We recommended `User`-assigned timestamps when ingesting data into QuestDB. Using `Server`-assigned timestamps hinders the ability to deduplicate rows which is -[important for exactly-once processing](/docs/ingestion/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). +[important for exactly-once processing](/docs/connect/compatibility/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). The same `trades` insert, but via a Pandas dataframe: @@ -443,12 +444,12 @@ For all the extra options you can use, please check [the client docs](https://py-questdb-client.readthedocs.io/en/latest/conf.html#sender-conf) Alternatively, for a breakdown of Configuration string options available across -all clients, see the [Configuration string](/docs/ingestion/clients/configuration-string/) page. +all clients, see the [Connect string](/docs/connect/clients/connect-string/) page. ## Transactional flush As described at the -[ILP overview](/docs/ingestion/ilp/overview#http-transaction-semantics), the +[ILP overview](/docs/connect/compatibility/ilp/overview#http-transaction-semantics), the HTTP transport has some support for transactions. The python client exposes @@ -457,7 +458,7 @@ to make working with transactions more convenient ## Next steps -Please refer to the [ILP overview](/docs/ingestion/ilp/overview) for general +Please refer to the [ILP overview](/docs/connect/compatibility/ilp/overview) for general details about transactions, error control, delivery guarantees, health check, or table and column auto-creation. The [Python client docs](https://py-questdb-client.readthedocs.io/en/latest/sender.html) @@ -475,6 +476,6 @@ Alone? Stuck? Want help? Visit us in our [Community Forum](https://community.questdb.com/). ## Additional resources -- [QuestDB Python clients guide](/docs/query/pgwire/python/) +- [QuestDB Python clients guide](/docs/connect/compatibility/pgwire/python/) - [Integration with Polars](/docs/integrations/data-processing/polars/) - [Integration with Pandas](/docs/integrations/data-processing/pandas/) diff --git a/documentation/ingestion/clients/rust.md b/documentation/ingestion/clients/rust.md index 569b3a6d4..628775da2 100644 --- a/documentation/ingestion/clients/rust.md +++ b/documentation/ingestion/clients/rust.md @@ -1,198 +1,264 @@ --- -title: Rust Client Documentation -description: - "Dive into QuestDB using the Rust ingestion client for high-performance, - insert-only operations. Unlock peak time series data ingestion." +slug: /connect/clients/rust +title: Rust client for QuestDB +sidebar_label: Rust +description: "QuestDB Rust client for high-throughput data ingestion over the QWP binary protocol (WebSocket)." --- -import { ILPClientsTable } from "@theme/ILPClientsTable" +import SfDedupWarning from "../../partials/_sf-dedup-warning.partial.mdx" -QuestDB offers a Rust client designed for high-performance data ingestion. These -are some of the highlights: +The QuestDB Rust client connects to QuestDB over the +[QWP — QuestDB Wire Protocol](/docs/connect/wire-protocols/qwp-ingress-websocket/) — a +columnar binary protocol carried over WebSocket. +It supports high-throughput, column-oriented batched writes with automatic table +creation, schema evolution, multi-host failover, and optional store-and-forward +durability. -- **Creates tables automatically**: no need to define your schema up-front -- **Concurrent schema changes**: seamlessly handle multiple data streams that - modify the table schema on the fly -- **Optimized batching**: buffer the data and send many rows in one go -- **Health checks and feedback**: built-in health monitoring ensures the health - of your system +:::tip Legacy transports - +The client also supports ILP ingestion over HTTP and TCP for backward +compatibility. This page documents the recommended WebSocket (QWP) path. For +ILP transport details, see the +[ILP overview](/docs/connect/compatibility/ilp/overview/). + +::: :::info -This page focuses on our high-performance ingestion client, which is optimized -for **writing** data to QuestDB. For retrieving data, we recommend using a -[PostgreSQL-compatible Rust library](/docs/query/pgwire/rust/) or our -[HTTP query endpoint](/docs/query/overview/#rest-http-api). +This page focuses on ingestion. For querying QuestDB from Rust, see the +[PGWire Rust client](/docs/connect/compatibility/pgwire/rust/) or the +[REST API](/docs/connect/compatibility/rest-api/). ::: -If you don't have a QuestDB server yet, follow the -[Quick Start](/docs/getting-started/quick-start/) section to set it up. +## Quick start -## Add the client crate to your project - -QuestDB clients requires Rust 1.40 or later. Add its crate to your project using -the command line: +Add the dependency: ```bash cargo add questdb-rs ``` -## Authenticate +Then ingest data: + +```rust +use questdb::{ + Result, + ingress::{Sender, TimestampNanos}, +}; + +fn main() -> Result<()> { + let mut sender = Sender::from_conf("ws::addr=localhost:9000;")?; + let mut buffer = sender.new_buffer(); + buffer + .table("trades")? + .symbol("symbol", "ETH-USD")? + .symbol("side", "sell")? + .column_f64("price", 2615.54)? + .column_f64("amount", 0.00044)? + .at(TimestampNanos::now())?; + sender.flush(&mut buffer)?; + sender.close_drain()?; + Ok(()) +} +``` + +The four steps are: + +1. Get a `Sender` via `Sender::from_conf` or the builder. +2. Populate a `Buffer` with one or more rows. +3. Call `sender.flush(&mut buffer)` to publish. +4. Call `sender.close_drain()` before the sender is dropped so already-published + frames complete on the wire. + +## Authentication and TLS -This is how you authenticate using the HTTP Basic authentication: +Authentication happens at the HTTP level during the WebSocket upgrade, before +any binary frames are exchanged. + +### HTTP basic auth ```rust let mut sender = Sender::from_conf( - "https::addr=localhost:9000;username=admin;password=quest;" + "wss::addr=db.example.com:9000;username=admin;password=quest;" )?; ``` -You can also pass the connection configuration via the `QDB_CLIENT_CONF` -environment variable: +### Token auth (Enterprise, recommended) -```bash -export QDB_CLIENT_CONF="http::addr=localhost:9000;username=admin;password=quest;" +Token authentication avoids the per-request overhead of basic auth and is +the recommended path for Enterprise deployments. + +```rust +let mut sender = Sender::from_conf( + "wss::addr=db.example.com:9000;token=your_bearer_token;" +)?; ``` -Then you use it like this: +### TLS -```rust -let mut sender = Sender::from_env()?; +Use the `wss` schema for TLS. Select where root certificates come from with +`tls_ca`: + +```text +wss::addr=db.example.com:9000;tls_ca=webpki_roots; ``` -When using QuestDB Enterprise, you can authenticate via a REST token. Please -check the [RBAC docs](/docs/security/rbac/#authentication) for more info. +Supported values: -## Insert data +| Key | Description | +|-----|-------------| +| `tls_ca=webpki_roots` | Use the [`webpki-roots`](https://crates.io/crates/webpki-roots) crate. | +| `tls_ca=os_roots` | Use the OS certificate store. | +| `tls_ca=webpki_and_os_roots` | Combine both. | +| `tls_roots=/path/to/root-ca.pem` | Load roots from a PEM file. Useful for self-signed certs during testing. | +| `tls_verify=unsafe_off` | Disable verification. Never use in production. | -This snippet connects to QuestDB and inserts one row of data: +### Authentication timeout -```rust -use questdb::{ - Result, - ingress::{ - Sender, - Buffer, - TimestampNanos}}; +`auth_timeout_ms` (default 15000) controls how long the client waits for the +WebSocket upgrade to complete. `auth_timeout` is also accepted for +compatibility with the HTTP transport's spelling. -fn main() -> Result<()> { - let mut sender = Sender::from_conf("http::addr=localhost:9000;")?; - let mut buffer = Buffer::new(); - buffer - .table("trades")? - .symbol("symbol", "ETH-USD")? - .symbol("side", "sell")? - .column_f64("price", 2615.54)? - .column_f64("amount", 0.00044)? - .at(TimestampNanos::now())?; - sender.flush(&mut buffer)?; - Ok(()) -} -``` +## Creating the client -These are the main steps it takes: +### From a connect string + +The connect string format is `::=;=;...` + +```rust +let mut sender = Sender::from_conf("ws::addr=localhost:9000;")?; +``` -- Use `Sender::from_conf()` to get the `sender` object -- Populate a `Buffer` with one or more rows of data -- Send the buffer using `sender.flush()`(`Sender::flush`) +Use `ws` (plain) or `wss` (TLS). `qwpws` / `qwpwss` are accepted as aliases. +The default port is `9000`. -In this case, the designated timestamp will be the one at execution time. +For the full list of connect-string keys, see the +[connect string reference](/docs/connect/clients/connect-string/). -Let's see now an example with timestamps using Chrono, custom timeout, and basic -auth. +### From an environment variable -You need to enable the `chrono_timestamp` feature to the QuestDB crate and add -the Chrono crate. +Set `QDB_CLIENT_CONF` to keep credentials out of source code: ```bash -cargo add questdb-rs --features chrono_timestamp -cargo add chrono +export QDB_CLIENT_CONF="wss::addr=db.example.com:9000;username=admin;password=quest;" ``` ```rust -use questdb::{ - Result, - ingress::{ - Sender, - Buffer, - TimestampNanos - }, -}; -use chrono::Utc; +let mut sender = Sender::from_env()?; +``` -fn main() -> Result<()> { - let mut sender = Sender::from_conf( - "http::addr=localhost:9000;username=admin;password=quest;retry_timeout=20000;" - )?; - let mut buffer = Buffer::new(); - let current_datetime = Utc::now(); +### Using the builder API - buffer - .table("trades")? - .symbol("symbol", "ETH-USD")? - .symbol("side", "sell")? - .column_f64("price", 2615.54)? - .column_f64("amount", 0.00044)? - .at(TimestampNanos::from_datetime(current_datetime)?)?; +The builder exposes the same options as the connect string, with Rust-typed +signatures (e.g., `sf_append_deadline_millis` becomes +`sf_append_deadline(Duration::from_secs(30))`). For the full list of keys, see +the [connect string reference](/docs/connect/clients/connect-string/). - sender.flush(&mut buffer)?; - Ok(()) -} +```rust +use questdb::ingress::{Protocol, SenderBuilder, QwpWsProgress}; +use std::time::Duration; + +let mut sender = SenderBuilder::new(Protocol::QwpWs, "localhost", 9000) + .qwp_ws_progress(QwpWsProgress::Background)? + .reconnect_max_duration(Duration::from_secs(300))? + .qwp_ws_error_handler(|err| { + eprintln!("QWP error: {err:?}"); + })? + .build()?; ``` -:::note +Most QWP/WebSocket settings are configured through the connect string. The +builder exposes typed setters for the most common runtime knobs: error handler, +progress mode, reconnect timing, and `initial_connect_retry`. -Avoid using `at_now()` instead of `at(some_timestamp)`. This removes the ability -to deduplicate rows, which is -[important for exactly-once processing](/docs/ingestion/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). +## Data ingestion -::: +### Concurrency -## Ingest arrays +`Sender` is single-owner: every publishing method takes `&mut self`, so +only one caller can use it at a time. For concurrent producers, create +one `Sender` per thread, or hand rows to a single owner over a channel. -The `Sender::column_arr` method supports efficient ingestion of N-dimensional -arrays using several convenient types: +`Buffer` is decoupled from `Sender`. Build buffers on any thread, then +call `sender.flush(&mut buffer)` once you have the sender in scope. This +lets worker threads encode rows in parallel and serialises only the +publish step. -- native Rust arrays and slices (up to 3-dimensional) -- native Rust vectors (up to 3-dimensional) -- arrays from the [ndarray](https://docs.rs/ndarray) crate, or other types that - support the `questdb::ingress::NdArrayView` trait. +When several `Sender` instances share an `sf_dir`, give each a distinct +`sender_id` — slots are exclusive (see +[Store-and-forward](#store-and-forward)). -:::note -Arrays are supported from QuestDB version 9.0.0, and require updated -client libraries. -::: +### General usage pattern -In this example, we insert some FX order book data. -* `bids` and `asks`: 2D arrays of L2 order book depth. Each level contains price and volume. -* `bids_exec_probs` and `asks_exec_probs`: 1D arrays of calculated execution probabilities for the next minute. +1. Call `buffer.table(name)?` to select a table. +2. Call typed column setters to add values (see + [Column setters](#column-setters) below). +3. Call `at(timestamp)?` or `at_now()?` to finalize the row. +4. Repeat from step 1, or call `sender.flush(&mut buffer)?` to send. -:::note +Tables and columns are created automatically if they do not exist. -You must use protocol version 2 to ingest arrays. HTTP transport will -automatically enable it as long as you're connecting to an up-to-date QuestDB -server (version 9.0.0 or later), but with TCP you must explicitly specify it in -the configuration string: `protocol_version=2;` See [below](#protocol-version) -for more details on protocol versions. +### Column setters -::: +Every typed setter has an `_opt` variant taking `Option` that writes +a null when the value is `None`: + +```rust +buffer.column_f64_opt("price", None)?; // writes null +buffer.column_f64_opt("price", Some(2615.54))?; // equivalent to column_f64 +``` + +| QuestDB type | Setter | NULL variant | +| --- | --- | --- | +| `SYMBOL` | `symbol(name, &str)` | `symbol_opt(name, Option<&str>)` | +| `BOOLEAN` | `column_bool(name, bool)` | `column_bool_opt(name, Option)` | +| `BYTE` | `column_i8(name, i8)` | `column_i8_opt(name, Option)` | +| `SHORT` | `column_i16(name, i16)` | `column_i16_opt(name, Option)` | +| `INT` | `column_i32(name, i32)` | `column_i32_opt(name, Option)` | +| `LONG` | `column_i64(name, i64)` | `column_i64_opt(name, Option)` | +| `FLOAT` | `column_f32(name, f32)` | `column_f32_opt(name, Option)` | +| `DOUBLE` | `column_f64(name, f64)` | `column_f64_opt(name, Option)` | +| `CHAR` | `column_char(name, u16)` (UTF-16 code unit) | `column_char_opt(name, Option)` | +| `VARCHAR` | `column_str(name, &str)` | `column_str_opt(name, Option<&str>)` | +| `BINARY` | `column_binary(name, &[u8])` | `column_binary_opt(name, Option<&[u8]>)` | +| `UUID` | `column_uuid(name, lo: u64, hi: u64)` | `column_uuid_opt(name, Option<(u64, u64)>)` | +| `LONG256` | `column_long256(name, &[u8; 32])` (4 LE limbs) | `column_long256_opt(name, Option<&[u8; 32]>)` | +| `DATE` | `column_date(name, millis: i64)` | `column_date_opt(name, Option)` | +| `TIMESTAMP` / `timestamp_ns` (non-designated) | `column_ts(name, TimestampMicros / TimestampNanos)` | `column_ts_opt(name, Option<…>)` | +| `GEOHASH` | `column_geohash(name, bits: u64, precision_bits: u8)` (1–60 bits) | `column_geohash_opt(name, Option<(u64, u8)>)` | +| `DECIMAL` (up to 256-bit) | `column_dec(name, &str / rust_decimal / bigdecimal)` | `column_dec_opt(name, …)` | +| `DECIMAL64` | `column_dec64(name, …)` | `column_dec64_opt(name, …)` | +| `DECIMAL128` | `column_dec128(name, …)` | `column_dec128_opt(name, …)` | +| `DOUBLE[]` (arrays) | `column_arr(name, &view)` — slices, vecs up to 3D, [`ndarray`](https://docs.rs/ndarray) views | `column_arr_opt(name, Option<&view>)` | +| `IPv4` † | `column_ipv4(name, std::net::Ipv4Addr)` | `column_ipv4_opt(name, Option)` | +| `LONG[]` (i64 arrays) † | `column_arr` with `i64` element type | `column_arr_opt` with `i64` element type | + +† **Spec-only — currently rejected by the server.** QWP v1 defines these +wire types and the client encodes them correctly, but server-side ingest +does not yet accept them. Batches using them will be rejected with a +descriptive error. Application code written against these setters today +will start working once the server adds support; no client change is +needed. + +For exact signatures and accepted parameter conversions, see the +[crate docs](https://docs.rs/questdb-rs/latest/questdb/ingress/struct.Buffer.html). + +### Ingest arrays + +`Buffer::column_arr` accepts native Rust arrays/slices/vectors (up to 3D) and +[`ndarray`](https://docs.rs/ndarray) arrays for higher dimensions: ```rust -use questdb::{Result, ingress::{SenderBuilder, TimestampNanos}}; +use questdb::{Result, ingress::{Sender, TimestampNanos}}; use ndarray::arr2; fn main() -> Result<()> { - // or `tcp::addr=127.0.0.1:9009;protocol_version=2;` - let mut sender = SenderBuilder::from_conf("http::addr=127.0.0.1:9000;")? - .build()?; - + let mut sender = Sender::from_conf("ws::addr=127.0.0.1:9000;")?; let mut buffer = sender.new_buffer(); buffer - .table("fx_order_book")? - .symbol("symbol", "EUR/USD")? + .table("fx_order_book")? + .symbol("symbol", "EURUSD")? .column_arr("bids", &vec![ vec![1.0850, 600000.0], vec![1.0849, 300000.0], @@ -201,202 +267,580 @@ fn main() -> Result<()> { [1.0853, 500000.0], [1.0854, 250000.0], [1.0855, 125000.0]]).view())? - .column_arr("bids_exec_probs", - &[0.85, 0.50, 0.25])? - .column_arr("asks_exec_probs", - &vec![0.90, 0.55, 0.20])? .at(TimestampNanos::now())?; - sender.flush(&mut buffer)?; + sender.close_drain()?; Ok(()) } ``` - +### Handler callback -## Configuration options +Install a handler on the builder. It runs synchronously from sender API calls +such as `flush()`. The handler must not call back into the same sender. -The easiest way to configure the line sender is the configuration string. The -general structure is: +```rust +use questdb::ingress::{Protocol, SenderBuilder}; -```plain -::addr=host:port;param1=val1;param2=val2;... +let mut sender = SenderBuilder::new(Protocol::QwpWs, "localhost", 9000) + .qwp_ws_error_handler(|err| { + eprintln!("QWP error: {err:?}"); + })? + .build()?; ``` -`transport` can be `http`, `https`, `tcp`, or `tcps`. Go to the client's -[crate documentation](https://docs.rs/questdb-rs/latest/questdb/ingress) for the -full details on configuration. +### `QwpWsSenderError` fields -Alternatively, for breakdown of available params, see the -[Configuration string](/docs/ingestion/clients/configuration-string/) page. +| Field | Meaning | +|-------|---------| +| `category` | `SchemaMismatch`, `ParseError`, `InternalError`, `SecurityError`, `WriteError`, `ProtocolViolation`, `Unknown`. Use for programmatic dispatch. | +| `applied_policy` | `DropAndContinue` (batch dropped, sender continues) or `Halt` (sender latched terminal). | +| `status` | Raw QWP status byte. `None` for WebSocket protocol violations. | +| `message` | Human-readable error text from the server, or a client-synthesized close reason for WebSocket protocol violations. See [Message stability](#message-stability) and [PII safety](#message-pii) below. | +| `message_sequence` | Server's per-frame QWP wire sequence for the error frame. Resets on reconnect — only meaningful within one connection. | +| `from_fsn` / `to_fsn` | Inclusive FSN span of the affected frame(s), client-side. | -## Don't forget to flush +`Sender::qwp_ws_errors_dropped()` reports how many diagnostics were lost +because the bounded log overflowed (typically due to a lagging poll cursor). -The sender and buffer objects are entirely decoupled. This means that the sender -won't get access to the data in the buffer until you explicitly call -`sender.flush(&mut buffer)` or a variant. This may lead to a pitfall where you -drop a buffer that still has some data in it, resulting in permanent data loss. +#### Message stability {#message-stability} -A common technique is to flush periodically on a timer and/or once the buffer -exceeds a certain size. You can check the buffer's size by calling -`buffer.len()`. +`message` is a human-readable diagnostic — **not a stable contract.** Its +text varies across server versions and across provenance: -The default `flush()` method clears the buffer after sending its data. If you -want to preserve its contents (for example, to send the same data to multiple -QuestDB instances), call `sender.flush_and_keep(&mut buffer)` instead. +- **QWP error frames** carry a server-supplied UTF-8 string capped at + 1024 bytes by the wire spec. +- **WebSocket protocol violations** are client-synthesized as + `"ws-close[]: "`. +- The server-supplied text mirrors QuestDB's normal SQL error formatting, + which historically reworded across releases. +- The field may be empty. -## Transactional flush +Use `category` and `status` for programmatic dispatch. Never pattern-match +on `message`. -As described in -[ILP overview](/docs/ingestion/ilp/overview#http-transaction-semantics), the -HTTP transport has some support for transactions. +#### PII / secret safety {#message-pii} -In order to ensure in advance that a flush will not affect more than one table, -call `sender.flush_and_keep_with_flags(&mut buffer, true)`. This call will -refuse to flush a buffer if the flush wouldn't be data-transactional. +`message` may include fragments of the client's own payload — for +example, an offending column value quoted back by a schema or parse +rejection — or a server-supplied WebSocket close reason that the +operator did not control. **Treat `message` as potentially containing +PII or secrets.** -## Error handling +Log it at the same trust level as the data being sent, and sanitize +before forwarding to external error trackers (Sentry, Datadog, end-user +UIs). The other fields on `QwpWsSenderError` are safe to forward as-is — +they carry only structural metadata. -The two supported transport modes, HTTP and TCP, handle errors very differently. -In a nutshell, HTTP is much better at error handling. +#### Correlating with server-side logs -### HTTP +The protocol does not currently surface a server-issued request or +connection identifier in the WebSocket upgrade response. The closest +correlation tuple is `(message_sequence, from_fsn, to_fsn)`: -HTTP distinguishes between recoverable and non-recoverable errors. For -recoverable ones, it enters a retry loop with exponential backoff, and reports -the error to the caller only after it has exhausted the retry time budget -(configuration parameter: `retry_timeout`). +- `message_sequence` — per-connection QWP wire sequence the server + attached to the error frame. Resets on reconnect. +- `from_fsn` / `to_fsn` — client-side FSN span of the affected frames. + Not generally indexed by server-side logs. -`sender.flush()` and variant methods communicate the error in the `Result` -return value. The category of the error is signalled through the `ErrorCode` -enum, and it's accompanied with an error message. +When opening a bug report, supply: -After the sender has signalled an error, it remains usable. You can handle the -error as appropriate and continue using it. +1. The connection start time (from your application logs). +2. The client's `X-QWP-Client-Id` header value, if your application sets one. +3. The `(message_sequence, from_fsn, to_fsn)` triple. -### TCP +There is no globally unique handle. -TCP doesn't report errors at all to the sender; instead, the server quietly -disconnects and you'll have to inspect the server logs to get more information -on the reason. When this has happened, the sender transitions into an error -state, and it is permanently unusable. You must drop it and create a new sender. -You can inspect the sender's error state by calling `sender.must_close()`. +After a `Halt` policy fires, the sender is terminal. Drop it and create a new +one. `Sender::must_close()` reports whether the sender has entered a terminal +state. -For more details about the HTTP and TCP transports, please refer to the -[ILP overview](/docs/ingestion/ilp/overview#transport-selection). +`DropAndContinue` errors do not halt the sender. The affected batch is +discarded; subsequent frames are unaffected and the I/O loop keeps running. -## Protocol Version +## Progress modes -To enhance data ingestion performance, QuestDB introduced an upgrade to the -text-based InfluxDB Line Protocol which encodes arrays and `f64` values in -binary form. Arrays are supported only in this upgraded protocol version. +The client drives the WebSocket loop in one of two modes: -You can select the protocol version with the `protocol_version` setting in the -configuration string. +| Mode | Behaviour | +|------|-----------| +| `QwpWsProgress::Background` (default) | A sender-owned thread sends frames, receives ACKs, reconnects, and replays. Right choice for most callers. | +| `QwpWsProgress::Manual` | No background thread. The caller drives progress with `Sender::drive_once()` or `Sender::await_acked_fsn()`. | -HTTP transport automatically negotiates the protocol version by default. In order -to avoid the slight latency cost at connection time, you can explicitly configure -the protocol version by setting `protocol_version=2|1;`. +```rust +use questdb::ingress::{Protocol, SenderBuilder, QwpWsProgress}; + +let mut sender = SenderBuilder::new(Protocol::QwpWs, "localhost", 9000) + .qwp_ws_progress(QwpWsProgress::Manual)? + .build()?; + +loop { + // ... publish frames ... + sender.flush(&mut buffer)?; + // Drive until idle so the I/O loop catches up. + while sender.drive_once()? {} +} +``` + +`drive_once()` performs at most one unit of work per call (send one frame, +drain ready responses, do one storage-maintenance step). Call it in a loop +until it returns `false` before parking. + +## Failover and high availability + +:::note Enterprise + +Multi-host failover with automatic reconnect is most useful with QuestDB +Enterprise primary-replica replication. + +::: + +### Multiple endpoints -TCP transport does not negotiate the protocol version and uses version 1 by -default. You must explicitly set `protocol_version=2;` in order to ingest -arrays, as in this example: +Specify a comma-separated address list (or repeat `addr=`): ```text -tcp::addr=localhost:9009;protocol_version=2; +ws::addr=db-primary:9000,db-replica-1:9000,db-replica-2:9000; ``` -Protocol Version 2 along with its support for arrays is available from QuestDB -version 9.0.0. +The client picks an endpoint, connects, and walks the list to find the next +healthy peer when the current connection breaks. + +:::tip Strongly recommend sf_dir for multi-host deployments + +Without `sf_dir`, `flush()` blocks when the connection is down and the +in-memory queue fills up. After `sf_append_deadline_millis` (default 30s), +it returns `SubmitTimedOut`. With `sf_dir`, `flush()` writes to disk and +returns quickly while the reconnect loop replays to the new primary in the +background. For any deployment where failover may take more than a few +seconds, `sf_dir` is strongly recommended. + +::: + +### Reconnect knobs + +| Key | Default | Description | +|-----|---------|-------------| +| `reconnect_max_duration_millis` | 300000 | Total outage budget before giving up. | +| `reconnect_initial_backoff_millis` | 100 | First post-failure sleep. | +| `reconnect_max_backoff_millis` | 5000 | Cap on per-attempt sleep. | +| `initial_connect_retry` | `off` | Retry on first connect. Values: `off`, `on` / `true` / `sync` (synchronous retry), `async` (background retry), `false` (alias for `off`). | + +By default the first connect fails fast; subsequent disconnects use the +reconnect policy. Set `initial_connect_retry=on` to apply the same policy to +the initial connect. + +The Rust client is zone-blind on ingress: the `zone=` key is accepted but +ignored, so connect strings shared with future zone-aware egress clients work +unchanged. + +The Rust client does not currently expose connection-state event callbacks +(the equivalent of Java's `SenderConnectionListener`). Connection lifecycle is +observable through `log` crate output and through error notifications +delivered to the polling API or the `qwp_ws_error_handler` callback. To see +reconnect events, enable logging for the `questdb` target: + +```rust +// e.g., with the env_logger crate +env_logger::Builder::from_env( + env_logger::Env::default().default_filter_or("questdb=info") +).init(); +``` + +### Error classification + +- **Authentication errors** (`401`/`403`): terminal across all endpoints. The + reconnect loop stops immediately. +- **Role reject** (`421 + X-QuestDB-Role`): transient if the role is + `PRIMARY_CATCHUP`, topology-level otherwise. +- **Version mismatch at upgrade**: per-endpoint, not terminal. The client + tries the next endpoint. +- **All other errors** (TCP/TLS failures, `404`, `503`, mid-stream errors): + transient, fed into the reconnect loop. + +## Closing the sender + +Call `Sender::close_drain()` before dropping the sender: + +```rust +sender.close_drain()?; +drop(sender); +``` + +`close_drain()` stops accepting new publications and waits up to +`close_flush_timeout_millis` (default 5000) for already-published frames to +ACK. Dropping the sender without `close_drain` may discard unacknowledged +in-memory frames; SF mode persists them to disk so a later sender can replay +them. + +## Configuration reference + +For the full list of connect-string keys and their defaults, see the +[connect string reference](/docs/connect/clients/connect-string/). + +Common WebSocket-specific options: + +| Key | Default | Description | +|-----|---------|-------------| +| `addr` | required | One or more `host:port` entries. | +| `username` / `password` | unset | HTTP basic auth. | +| `token` | unset | Bearer token auth (Enterprise). | +| `auth_timeout_ms` | 15000 | WebSocket upgrade timeout. | +| `tls_ca` / `tls_roots` / `tls_verify` | webpki | TLS configuration (`wss`/`qwpwss` only). | +| `auto_flush` | required `off` if set | Auto-flush is not supported. `auto_flush_rows` and `auto_flush_bytes` are rejected. | +| `sf_dir` | unset | Enable disk-backed store-and-forward. | +| `sender_id` | `default` | SF slot identity. | +| `sf_durability` | `memory` | Only `memory` is currently accepted (see [SF tuning keys](#sf-tuning-keys)). | +| `request_durable_ack` | `off` | Wait for durable upload before ACK (Enterprise). | +| `reconnect_max_duration_millis` | 300000 | Per-outage reconnect budget. | +| `initial_connect_retry` | `off` | Apply reconnect policy to the first connect. | +| `close_flush_timeout_millis` | 5000 | Bound on `close_drain` wait. | +| `qwp_ws_progress` | `background` | `background` or `manual`. | +| `max_in_flight` | 128 | Max unacknowledged frames in flight on a connection. Acts as the backpressure window: publishers block locally once the window is full. | ## Crate features -The QuestDB client crate supports some optional features, mostly related to -additional library dependencies. +The QuestDB Rust client uses Cargo features to gate optional dependencies and +transports. ### Default-enabled features -- `tls-webpki-certs`: supports using the `webpki-roots` crate for TLS - certificate verification. +- `sync-sender`: enables all sync sender transports (TCP, HTTP, QWP/UDP, + QWP/WebSocket). +- `tls-webpki-certs`: TLS verification using `webpki-roots`. +- `ring-crypto`: TLS crypto via the `ring` crate. ### Optional features -These features are opt-in: +- `sync-sender-qwp-ws`: QWP/WebSocket transport only (subset of `sync-sender`). +- `chrono_timestamp`: build timestamps from `chrono::DateTime`. +- `ndarray`: ingest arrays from the [ndarray](https://docs.rs/ndarray) crate. +- `rust_decimal` / `bigdecimal`: ingest decimals from those crates. +- `tls-native-certs`: validate TLS against the OS certificate store. +- `insecure-skip-verify`: disable TLS verification (testing only). -- `ilp-over-http`: Enables ILP/HTTP support using the `ureq` crate. -- `chrono_timestamp`: Allows specifying timestamps as `chrono::Datetime` - objects. -- `tls-native-certs`: Supports validating TLS certificates against the OS's - certificates store. -- `insecure-skip-verify`: Allows skipping server certificate validation in TLS - (this compromises security). -- `ndarray`: Enables ingestion of arrays from the - [ndarray](https://docs.rs/ndarray) crate. +## Migration from ILP (HTTP/TCP) -## Next steps +The buffer API is unchanged. To switch a sender to QWP/WebSocket: + +| Aspect | HTTP (ILP) | WebSocket (QWP) | +|--------|-----------|-----------------| +| Connect string schema | `http::` / `https::` | `ws::` / `wss::` | +| Batch trigger | Row/time-based auto-flush (defaults: 75000 rows, 1000 ms) | Explicit `flush()` only | +| Error model | Synchronous on `flush()` | Async via `poll_qwp_ws_error` / handler | +| Completion tracking | Implicit per request | Explicit FSN watermarks | +| Store-and-forward | Not available | Available (`sf_dir`) | +| Multi-endpoint failover | Not available | Built in (comma-separated `addr`) | +| Shutdown | `drop` | `close_drain()` then `drop` | + +To migrate an existing sender, change the connect string from `http::` to +`ws::` (or `https::` to `wss::`), drop any `auto_flush_*` keys, install a +`qwp_ws_error_handler` or poll `poll_qwp_ws_error()`, and call `close_drain()` +before dropping the sender. + +## Full example: multi-host ingestion with failover -Please refer to the [ILP overview](/docs/ingestion/ilp/overview) for details -about transactions, error control, delivery guarantees, health check, or table -and column auto-creation. +This example shows a production ingestion loop with store-and-forward, +multi-host failover, and proper error handling including the retry pattern +around `flush()`. + +```rust +use questdb::ingress::{Sender, TimestampNanos}; +use std::{thread, time::Duration}; + +fn main() -> questdb::Result<()> { + // Multi-host with store-and-forward for failover durability. + // Without sf_dir, flush() blocks during an outage and times out + // after sf_append_deadline_millis (default 30s). With sf_dir, + // flush() writes to disk and returns quickly while the reconnect + // loop replays to the new primary in the background. + let mut sender = Sender::from_conf( + "wss::addr=db-primary:9000,db-replica:9000;\ // Enterprise: wss, multi-host + token=your_bearer_token;\ // Enterprise: token auth + tls_verify=unsafe_off;\ // test only! + sf_dir=/var/lib/myapp/qdb-sf;\ + sender_id=ingest-1;\ + reconnect_max_duration_millis=300000;" + )?; + + let mut buffer = sender.new_buffer(); + + loop { + buffer + .table("book")? + .symbol("ticker", "EURUSD")? + .column_f64("price", 1.0842)? + .column_f64("size", 100_000.0)? + .at(TimestampNanos::now())?; + + // flush() can still return SubmitTimedOut if the SF queue + // fills to sf_max_total_bytes during a prolonged outage. + // The buffer is retained on error; retry on the next pass. + match sender.flush(&mut buffer) { + Ok(()) => {} + Err(e) => { + eprintln!("flush error: {e}"); + // Check if the sender is terminal (auth failure, + // reconnect budget exhausted). If so, recreate it. + if sender.must_close() { + eprintln!("sender is terminal, exiting"); + break; + } + // Otherwise the buffer still holds the rows; + // the next flush() retries them. + } + } + + thread::sleep(Duration::from_millis(500)); + } + + sender.close_drain()?; + Ok(()) +} + +// Without store-and-forward (sf_dir not set), the same code works for +// short outages but flush() will return SubmitTimedOut if the in-memory +// queue fills before the reconnect loop succeeds. For any multi-host +// deployment where failover may take more than a few seconds, sf_dir +// is strongly recommended. +``` + +## Next steps -Explore the full capabilities of the Rust client via the -[Crate API page](https://docs.rs/questdb-rs/latest/questdb/). +Explore the full API on the +[crate docs](https://docs.rs/questdb-rs/latest/questdb/ingress/). -With data flowing into QuestDB, now it's time for analysis. +For querying QuestDB from Rust, see the +[PGWire Rust client](/docs/connect/compatibility/pgwire/rust/) or the +[REST API](/docs/connect/compatibility/rest-api/). -To learn _The Way_ of QuestDB SQL, see the -[Query & SQL Overview](/docs/query/overview/). +With data flowing into QuestDB, the next step is querying. See the +[Query overview](/docs/query/overview/) to learn QuestDB SQL. -Alone? Stuck? Want help? Visit us in our +Need help? Visit the [Community Forum](https://community.questdb.com/). diff --git a/documentation/ingestion/ilp/advanced-settings.md b/documentation/ingestion/ilp/advanced-settings.md index 0381fe575..1d75c1ddc 100644 --- a/documentation/ingestion/ilp/advanced-settings.md +++ b/documentation/ingestion/ilp/advanced-settings.md @@ -1,4 +1,5 @@ --- +slug: /connect/compatibility/ilp/advanced-settings title: Advanced InfluxDB Line Protocol settings sidebar_label: Advanced settings description: @@ -9,11 +10,23 @@ description: This documentation provides aid for those venturing outside of the path laid down by their language clients. +:::tip Consider QWP first + +This page is for operators tuning the legacy ILP transport. For new +deployments, prefer the +[QuestDB Wire Protocol (QWP)](/docs/connect/wire-protocols/qwp-ingress-websocket/) — +the native binary protocol with multi-host failover, store-and-forward, +and the full QuestDB type system built in. See the +[ingestion overview](/docs/connect/overview/) for languages with +native QWP support. + +::: + For the introductory InfluxDB Line Protocol materials, including authentication, -see the [ILP overview](/docs/ingestion/ilp/overview/). +see the [ILP overview](/docs/connect/compatibility/ilp/overview/). For the the basics of ingestion, instead consult the -[Ingestion overview](/docs/ingestion/overview/). +[Ingestion overview](/docs/connect/overview/). ## Syntax @@ -88,7 +101,7 @@ as the row timestamp. See `cairo.timestamp.locale` and `line.tcp.timestamp` :::caution - While - [`columnset` timestamp type units](/docs/ingestion/ilp/columnset-types/#timestamp) + [`columnset` timestamp type units](/docs/connect/compatibility/ilp/columnset-types/#timestamp) are microseconds, the designated timestamp units are nanoseconds by default, and can be overridden via the `line.tcp.timestamp` configuration property. @@ -123,7 +136,7 @@ We recommend populating designated timestamp via trailing value syntax above. ::: It is also possible to populate designated timestamp via `columnset`. Please see -[mixed timestamp](/docs/ingestion/ilp/columnset-types/#timestamp) reference. +[mixed timestamp](/docs/connect/compatibility/ilp/columnset-types/#timestamp) reference. ## Irregularly-structured data @@ -239,11 +252,11 @@ of new column or mapping strategy when column already exists. These types are limited by existing InfluxDB Line Protocol specification. Wider QuestDB type system is available by creating table via SQL upfront. The following are supported value types: -[Integer](/docs/ingestion/ilp/columnset-types/#integer), -[Long256](/docs/ingestion/ilp/columnset-types/#long256), -[Float](/docs/ingestion/ilp/columnset-types/#float), -[String](/docs/ingestion/ilp/columnset-types/#string) and -[Timestamp](/docs/ingestion/ilp/columnset-types/#timestamp) +[Integer](/docs/connect/compatibility/ilp/columnset-types/#integer), +[Long256](/docs/connect/compatibility/ilp/columnset-types/#long256), +[Float](/docs/connect/compatibility/ilp/columnset-types/#float), +[String](/docs/connect/compatibility/ilp/columnset-types/#string) and +[Timestamp](/docs/connect/compatibility/ilp/columnset-types/#timestamp) ## Inserting NULL values @@ -296,7 +309,7 @@ means that when sending data you should be aware of the performed conversions. See: - [QuestDB Types in SQL](/docs/query/datatypes/overview/) -- [InfluxDB Line Protocol types and cast conversion tables](/docs/ingestion/ilp/columnset-types/) +- [InfluxDB Line Protocol types and cast conversion tables](/docs/connect/compatibility/ilp/columnset-types/) ## Constructing well-formed messages diff --git a/documentation/ingestion/ilp/columnset-types.md b/documentation/ingestion/ilp/columnset-types.md index 404800bb3..fc4093eb8 100644 --- a/documentation/ingestion/ilp/columnset-types.md +++ b/documentation/ingestion/ilp/columnset-types.md @@ -1,4 +1,5 @@ --- +slug: /connect/compatibility/ilp/columnset-types title: InfluxDB Line Protocol Columnset Value Types sidebar_label: Columnset value types description: @@ -8,6 +9,19 @@ description: This page lists the supported InfluxDB Line Protocol columnset value types and details about type casting. +:::tip QWP exposes the full type system natively + +This page documents InfluxDB line-protocol type extensions QuestDB layers +on top of the InfluxDB type model (the `i`, `t`, `n`, etc. suffixes and the +cast tables below). The native +[QuestDB Wire Protocol (QWP)](/docs/connect/wire-protocols/qwp-ingress-websocket/) +supports the entire QuestDB type system directly — no suffix encoding, no +implicit casts — and is the recommended choice for new clients. See the +[ingestion overview](/docs/connect/overview/) for languages with native +QWP support. + +::: + If a target column does not exist, QuestDB will create a column using the same type that the ILP client sends. diff --git a/documentation/ingestion/ilp/overview.md b/documentation/ingestion/ilp/overview.md index a0db5c1dd..2c71dff56 100644 --- a/documentation/ingestion/ilp/overview.md +++ b/documentation/ingestion/ilp/overview.md @@ -1,4 +1,5 @@ --- +slug: /connect/compatibility/ilp/overview title: InfluxDB Line Protocol Overview sidebar_label: Overview description: InfluxDB line protocol reference documentation. @@ -18,6 +19,21 @@ import { Clients } from "../../../src/components/Clients" QuestDB implements the InfluxDB Line Protocol to ingest data. +:::tip Use QWP for new clients + +ILP is now a **compatibility protocol** in QuestDB. It exists for users +coming from InfluxDB, Telegraf, or Kafka / Flink pipelines that already +emit ILP. New deployments should prefer the +[QuestDB Wire Protocol (QWP)](/docs/connect/wire-protocols/qwp-ingress-websocket/) — +binary on the wire, type-rich (full QuestDB type system, no suffix +encoding), faster, and with multi-host failover and store-and-forward +built into the client. See the +[ingestion overview](/docs/connect/overview/) for a side-by-side +comparison and the list of languages with native QWP clients available +today. + +::: + The InfluxDB Line Protocol is for **data ingestion only**. For building queries, see the @@ -28,18 +44,18 @@ Each ILP client library also has its own language-specific documentation set. This supporting document thus provides an overview to aid in client selection and initial configuration: -1. [Client libraries](/docs/ingestion/ilp/overview/#client-libraries) -2. [Server-Side configuration](/docs/ingestion/ilp/overview/#server-side-configuration) -3. [Transport selection](/docs/ingestion/ilp/overview/#transport-selection) -4. [Client-Side configuration](/docs/ingestion/ilp/overview/#client-side-configuration) -5. [Error handling](/docs/ingestion/ilp/overview/#error-handling) -6. [Authentication](/docs/ingestion/ilp/overview/#authentication) -7. [Table and column auto-creation](/docs/ingestion/ilp/overview/#table-and-column-auto-creation) -8. [Timestamp column name](/docs/ingestion/ilp/overview/#timestamp-column-name) -9. [HTTP Transaction semantics](/docs/ingestion/ilp/overview/#http-transaction-semantics) -10. [Exactly-once delivery](/docs/ingestion/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery) -11. [Multiple URLs for High Availability](/docs/ingestion/ilp/overview/#multiple-urls-for-high-availability) -12. [Health Check](/docs/ingestion/ilp/overview/#health-check) +1. [Client libraries](/docs/connect/compatibility/ilp/overview/#client-libraries) +2. [Server-Side configuration](/docs/connect/compatibility/ilp/overview/#server-side-configuration) +3. [Transport selection](/docs/connect/compatibility/ilp/overview/#transport-selection) +4. [Client-Side configuration](/docs/connect/compatibility/ilp/overview/#client-side-configuration) +5. [Error handling](/docs/connect/compatibility/ilp/overview/#error-handling) +6. [Authentication](/docs/connect/compatibility/ilp/overview/#authentication) +7. [Table and column auto-creation](/docs/connect/compatibility/ilp/overview/#table-and-column-auto-creation) +8. [Timestamp column name](/docs/connect/compatibility/ilp/overview/#timestamp-column-name) +9. [HTTP Transaction semantics](/docs/connect/compatibility/ilp/overview/#http-transaction-semantics) +10. [Exactly-once delivery](/docs/connect/compatibility/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery) +11. [Multiple URLs for High Availability](/docs/connect/compatibility/ilp/overview/#multiple-urls-for-high-availability) +12. [Health Check](/docs/connect/compatibility/ilp/overview/#health-check) ## Client libraries @@ -104,7 +120,7 @@ schema::key1=value1;key2=value2;key3=value3; It is made up of the following parts: - **Schema**: One of the specified schemas in the - [core parameters](/docs/ingestion/ilp/overview/#core-parameters) section + [core parameters](/docs/connect/compatibility/ilp/overview/#core-parameters) section below - **Key=Value**: Each key-value pair sets a specific parameter for the client - **Terminating semicolon**: A semicolon must follow the last key-value pair @@ -179,7 +195,7 @@ completeness and for users who have specific requirements. ::: -_See the [Authentication](/docs/ingestion/ilp/overview/#authentication) +_See the [Authentication](/docs/connect/compatibility/ilp/overview/#authentication) section below for configuration._ - **auth_timeout**: Timeout for TCP authentication with QuestDB server, in @@ -281,9 +297,9 @@ provides holistic security out-of-the-box. ::: InfluxDB Line Protocol supports authentication via HTTP Basic Authentication, -using [the HTTP Parameters](/docs/ingestion/ilp/overview/#http-parameters), +using [the HTTP Parameters](/docs/connect/compatibility/ilp/overview/#http-parameters), or via token when using the TCP transport, using -[the TCP Parameters](/docs/ingestion/ilp/overview/#tcp-parameters). +[the TCP Parameters](/docs/connect/compatibility/ilp/overview/#tcp-parameters). A similar pattern is used across all client libraries. If you want to use a TCP token, you need to configure your QuestDB server. This document will break down diff --git a/documentation/ingestion/import-csv.md b/documentation/ingestion/import-csv.md index fad81f382..acc464e99 100644 --- a/documentation/ingestion/import-csv.md +++ b/documentation/ingestion/import-csv.md @@ -1,4 +1,5 @@ --- +slug: /connect/compatibility/import-csv title: CSV Import sidebar_label: CSV Import description: @@ -9,7 +10,7 @@ description: :::tip CSV import is for bulk/batch loading. For streaming data, use -[InfluxDB Line Protocol (ILP)](/docs/ingestion/overview/) instead. +[InfluxDB Line Protocol (ILP)](/docs/connect/overview/) instead. ::: There are three methods for CSV import: @@ -623,7 +624,7 @@ The REST API is better suited when the following conditions are true: For database migrations, or uploading one large CSV file into QuestDB, users may consider using the `COPY` SQL command. See [COPY command documentation](/docs/query/sql/copy/) and -[Guide on CSV import](/docs/ingestion/import-csv/#import-csv-via-copy-sql) for more +[Guide on CSV import](/docs/connect/compatibility/import-csv/#import-csv-via-copy-sql) for more details. ::: @@ -772,4 +773,4 @@ curl -F data=@weather.csv 'http://localhost:9000/imp' ``` For more information on the `/imp` entry point, refer to the -[REST API documentation](/docs/query/rest-api/#imp---import-data). +[REST API documentation](/docs/connect/compatibility/rest-api/#imp---import-data). diff --git a/documentation/ingestion/java-embedded.md b/documentation/ingestion/java-embedded.md index 2b2453578..77f1d7b2c 100644 --- a/documentation/ingestion/java-embedded.md +++ b/documentation/ingestion/java-embedded.md @@ -1,4 +1,5 @@ --- +slug: /connect/java-embedded title: Java (embedded) description: Java embedded API reference documentation. --- @@ -449,5 +450,5 @@ caching of records pointless. ## InfluxDB Line Protocol client library -We have [Java Client Library](/docs/ingestion/clients/java/) to allow fast data +We have [Java Client Library](/docs/connect/clients/java/) to allow fast data ingestion. diff --git a/documentation/ingestion/message-brokers/flink.md b/documentation/ingestion/message-brokers/flink.md index 5e2d8e22e..48b57f64c 100644 --- a/documentation/ingestion/message-brokers/flink.md +++ b/documentation/ingestion/message-brokers/flink.md @@ -1,4 +1,5 @@ --- +slug: /connect/message-brokers/flink title: QuestDB Flink connector sidebar_label: Flink description: diff --git a/documentation/ingestion/message-brokers/kafka.md b/documentation/ingestion/message-brokers/kafka.md index 449746dba..256ca56f6 100644 --- a/documentation/ingestion/message-brokers/kafka.md +++ b/documentation/ingestion/message-brokers/kafka.md @@ -1,4 +1,5 @@ --- +slug: /connect/message-brokers/kafka title: Ingestion from Kafka Overview sidebar_label: Kafka description: Apache Kafka and QuestDB Kafka Connector overview and guide. Thorough explanations and examples. @@ -29,7 +30,7 @@ automatically, and requires minimal configuration. The [QuestDB Kafka connector](https://github.com/questdb/kafka-questdb-connector) is built on the [Kafka Connect framework](https://docs.confluent.io/platform/current/connect/index.html) and uses InfluxDB Line Protocol for high-performance data transfer. It works -with Kafka-compatible systems like [Redpanda](/docs/ingestion/message-brokers/redpanda/). +with Kafka-compatible systems like [Redpanda](/docs/connect/message-brokers/redpanda/). ### Quick start @@ -204,7 +205,7 @@ client.conf.string=https::addr=questdb.example.com:9000;retry_timeout=60000; client.conf.string=http::addr=localhost:9000;token=${QUESTDB_TOKEN}; ``` -See the [Java Client configuration guide](/docs/ingestion/clients/java) for all +See the [Java Client configuration guide](/docs/connect/clients/java) for all available client options. :::danger @@ -602,7 +603,7 @@ Additional examples are available on GitHub: [Apache Flink](https://flink.apache.org/) provide rich APIs for data transformation, enrichment, and filtering with built-in fault tolerance. -QuestDB offers a [Flink connector](/docs/ingestion/message-brokers/flink/) for +QuestDB offers a [Flink connector](/docs/connect/message-brokers/flink/) for users who need complex transformations while ingesting from Kafka. **Use stream processing when you need:** diff --git a/documentation/ingestion/message-brokers/redpanda.md b/documentation/ingestion/message-brokers/redpanda.md index 2eb2a5bc1..454e07ee0 100644 --- a/documentation/ingestion/message-brokers/redpanda.md +++ b/documentation/ingestion/message-brokers/redpanda.md @@ -1,4 +1,5 @@ --- +slug: /connect/message-brokers/redpanda title: Redpanda description: Guide for using Redpanda with QuestDB via the QuestDB Kafka connector. Also @@ -9,7 +10,7 @@ description: [Redpanda](https://redpanda.com/) is an open-source, Kafka-compatible streaming platform that uses C++ and Raft to replace Java and Zookeeper. Since it is Kafka compatible, it can be used with the -[QuestDB Kafka connector](/docs/ingestion/message-brokers/kafka/#questdb-kafka-connect-connector), +[QuestDB Kafka connector](/docs/connect/message-brokers/kafka/#questdb-kafka-connect-connector), providing an alternative data [streaming](/glossary/stream-processing) option. This guide also covers [Redpanda Connect](#redpanda-connect), a stream processing @@ -253,7 +254,7 @@ from the Kafka message metadata. The connector can be also configured to use a custom timestamp field from the Kafka message. See the -[QuestDB Kafka Connector reference manual](/docs/ingestion/message-brokers/kafka/#designated-timestamps) +[QuestDB Kafka Connector reference manual](/docs/connect/message-brokers/kafka/#designated-timestamps) for details. A possible improvement could be to explicitly create the target table in QuestDB @@ -261,17 +262,17 @@ instead of relying on the connector to create it implicitly. This way, you can control the schema, [partitioning](/glossary/database-partitioning/) and data types of the table. It also enables QuestDB's native [deduplication feature](/docs/concepts/deduplication). Deduplication is required -for [Exactly-Once](/docs/ingestion/message-brokers/kafka/#fault-tolerance) +for [Exactly-Once](/docs/connect/message-brokers/kafka/#fault-tolerance) processing semantics. ## See also -- [QuestDB Kafka Connector reference manual](/docs/ingestion/message-brokers/kafka/) +- [QuestDB Kafka Connector reference manual](/docs/connect/message-brokers/kafka/) ## Redpanda Connect Redpanda Connect is a stream processing tool that can be used to build data pipelines. -It's a lightweight alternative to [Apache Kafka Connect](/docs/ingestion/message-brokers/kafka/#questdb-kafka-connect-connector). +It's a lightweight alternative to [Apache Kafka Connect](/docs/connect/message-brokers/kafka/#questdb-kafka-connect-connector). This guide shows the steps to use the Redpanda Connect to write JSON data as rows into a QuestDB table. diff --git a/documentation/ingestion/message-brokers/telegraf.md b/documentation/ingestion/message-brokers/telegraf.md index b05bf5d63..86f8e7a42 100644 --- a/documentation/ingestion/message-brokers/telegraf.md +++ b/documentation/ingestion/message-brokers/telegraf.md @@ -1,4 +1,5 @@ --- +slug: /connect/message-brokers/telegraf title: Telegraf description: Learn how to use Telegraf to collect system metrics and send this data to diff --git a/documentation/ingestion/overview.md b/documentation/ingestion/overview.md index f050359b8..7e0760163 100644 --- a/documentation/ingestion/overview.md +++ b/documentation/ingestion/overview.md @@ -1,124 +1,104 @@ --- -title: Ingestion overview +slug: /connect/overview +title: Connect to QuestDB +sidebar_label: Overview description: - Learn how to ingest data into QuestDB, whether through the InfluxDB Line - Protocol, PostgreSQL Wire Protocol, or through a service like Apache Kafka, - Apache Spark, and more. + How to send data to QuestDB and run queries. Choose between native client + libraries, compatibility protocols (ILP, PGWire, REST), or the wire-protocol + specifications. --- -import Screenshot from "@theme/Screenshot" - import { Clients } from "../../src/components/Clients" -For high-throughput data ingestion, use our **first-party clients** with the -**InfluxDB Line Protocol (ILP)**. This is the recommended method for production -workloads. - -## First-party clients - -Our first-party clients are **the fastest way to insert data**. They excel -with high-throughput, low-latency data streaming and are the recommended choice -for production deployments. - -To start quickly, select your language: +QuestDB exposes several ways for applications to send data and run queries. +Pick the path that matches your environment. + +## Choose your path + +| Your situation | Use | +| ----------------------------------------------------------------------- | ---------------------------------------------------------------- | +| Greenfield app — want the best throughput, durability, and feature set | [**Client Libraries**](#client-libraries) | +| Existing InfluxDB collectors, Telegraf, or Kafka / Flink pipelines | [Compatibility → ILP](/docs/connect/compatibility/ilp/overview/) | +| Postgres-shaped data layer, BI tools, ORMs | [Compatibility → PGWire](/docs/connect/compatibility/pgwire/overview/) | +| HTTP scripts, ad-hoc `curl`, or CSV imports | [Compatibility → REST API](/docs/connect/compatibility/rest-api/) | +| Building a new QuestDB client library (QWP spec) | [Wire Protocols](/docs/connect/wire-protocols/overview/) | + +## Client Libraries + +The first-party libraries for **Java, Python, Go, Rust, Node.js, C & C++, and +.NET** are the recommended way to talk to QuestDB. They speak the +**QuestDB Wire Protocol (QWP)** and unify ingest and query under one +configuration and one connection. + +### QWP support + +QWP ships in the libraries below. The remaining language clients are being +updated — until they ship a QWP build, they continue to use ILP for ingestion +and PGWire for queries. + +| Language | QWP support | +| --------- | ----------- | +| Java | ✓ | +| C & C++ | ✓ | +| Rust | ✓ | +| Go | ✓ | +| .NET | ✓ | +| Python | Planned | +| Node.js | Planned | + +Highlights: + +- **Binary on the wire** — roughly half the size of ILP or HTTP. +- **Streaming both directions** — sustained 800 MiB/s ingress, up to + 2.5 GiB/s egress on a single connection. +- **Automatic failover** — ingress and egress fail over without application + intervention. +- **Store-and-forward** — survives server outages, including full server + destruction. Sub-200 ns offload latency. +- **One configuration** — a single + [connect string](/docs/connect/clients/connect-string/) drives every + option, portable across all languages. +- **Schema-flexible** — automatic table creation and on-the-fly column + additions. + +Pick a language: -Our clients utitilize the InfluxDB Line Protocol (ILP) which is an insert-only -protocol that bypasses SQL `INSERT` statements, thus achieving significantly -higher throughput. It also provides some key benefits: - -- **Automatic table creation**: No need to define your schema upfront. -- **Concurrent schema changes**: Seamlessly handle multiple data streams with - on-the-fly schema modifications -- **Optimized batching**: Use strong defaults or curate the size of your batches -- **Health checks and feedback**: Ensure your system's integrity with built-in - health monitoring -- **Automatic write retries**: Reuse connections and retry after interruptions - -An example of "data-in" - via the line - appears as: - -```shell -trades,symbol=ETH-USD,side=sell price=2615.54,amount=0.00044 1646762637609765000\n -trades,symbol=BTC-USD,side=sell price=39269.98,amount=0.001 1646762637710419000\n -trades,symbol=ETH-USD,side=buy price=2615.4,amount=0.002 1646762637764098000\n -``` - -Once inside of QuestDB, it's yours to manipulate and query via extended SQL. Please note that table and column names -must follow the QuestDB [naming rules](/docs/query/sql/create-table/#table-name). - -### Ingestion characteristics - -QuestDB is optimized for both throughput and latency. Send data when you have -it - there's no need to artificially batch on the client side. - -| Mode | Throughput (per connection) | -|------|----------------------------| -| Batched writes | ~400k rows/sec | -| Single-row writes | ~60-80k rows/sec | - -Clients control batching via explicit `flush()` calls. Each flush ends a batch -and sends it to the server. If your data arrives one row at a time, send it one -row at a time - QuestDB handles this efficiently. If data arrives in bursts, -batch it naturally and flush when ready. - -Server-side, WAL processing is asynchronous. Transactions are grouped into -segments that roll based on size or row count, requiring no client-side tuning. - -## Message brokers and queues - -If you already have Kafka, Flink, or another streaming platform in your stack, -QuestDB integrates seamlessly. - -See our integration guides: - -- [Flink](/docs/ingestion/message-brokers/flink) -- [Kafka](/docs/ingestion/message-brokers/kafka) -- [Redpanda](/docs/ingestion/message-brokers/redpanda) -- [Telegraf](/docs/ingestion/message-brokers/telegraf) - -## CSV import - -For bulk imports or one-time data loads, use the -[Import CSV tab](/docs/getting-started/web-console/import-csv) in the [Web Console](/docs/getting-started/web-console/overview/): - - - -For all CSV import methods, including using the APIs directly, see the -[CSV Import Guide](/docs/ingestion/import-csv/). - -## Create new data +## Compatibility protocols -No data yet? Just starting? No worries. We've got you covered. +Use these if you have existing tooling that speaks them, or if a native client +library isn't a fit for your environment. -There are several quick scaffolding options: +- **[InfluxDB Line Protocol (ILP)](/docs/connect/compatibility/ilp/overview/)** — the + text-based ingest protocol used by InfluxDB. Works with Telegraf, Kafka, + Redpanda, Flink, and any collector that already emits ILP. +- **[PostgreSQL Wire Protocol (PGWire)](/docs/connect/compatibility/pgwire/overview/)** — query + QuestDB from any Postgres-compatible driver (psycopg, JDBC, pgx, …), BI + tools (Tableau, Grafana, Metabase), and ORMs. +- **[REST API](/docs/connect/compatibility/rest-api/)** — HTTP / JSON endpoints for ad-hoc + queries, scripting, and bulk [CSV import](/docs/connect/compatibility/import-csv/). -1. [QuestDB demo instance](https://demo.questdb.io): Hosted, fully loaded and - ready to go. Quickly explore the [Web Console](/docs/getting-started/web-console/overview/) and SQL syntax. -2. [Create my first data set guide](/docs/getting-started/create-database/): Create - tables, use `rnd_` functions and make your own data. -3. [Sample dataset repos](https://github.com/questdb/sample-datasets): IoT, - e-commerce, finance or git logs? Check them out! -4. [Quick start repos](https://github.com/questdb/questdb-quickstart): - Code-based quick starts that cover ingestion, querying and data visualization - using common programming languages and use cases. Also, a cat in a tracksuit. -5. [Time series streaming analytics template](https://github.com/questdb/time-series-streaming-analytics-template): - A handy template for near real-time analytics using open source technologies. +These remain fully supported. They are grouped as *compatibility* because they +predate QWP and exist primarily to integrate with tooling that already speaks +them. -## Next step - queries +## Wire protocols -Depending on your infrastructure, it should now be apparent which ingestion -method is worth pursuing. +The byte-on-the-wire specifications for the **QuestDB Wire Protocol (QWP)**, +including WebSocket variants for ingress and egress and a UDP variant for +fire-and-forget metrics. Read these if you are **building a new QuestDB +client library** in a language we don't yet support, or embedding QuestDB +connectivity into an existing framework. -Of course, ingestion (data-in) is only half the battle. +See the [Wire Protocols reference](/docs/connect/wire-protocols/overview/). -> **Your next best step? Learn how to query and explore data-out from the -> [Query & SQL Overview](/docs/query/overview/).** +## Next steps -It might also be a solid bet to review -[timestamp basics](/docs/concepts/timestamps-timezones/). +- Pick a language above and follow its quick-start. +- For SQL syntax, functions, and operators, see the + [SQL Reference](/docs/query/overview/). +- New to QuestDB? Try the [demo instance](https://demo.questdb.io), or follow + the [first-data-set guide](/docs/getting-started/create-database/). +- Background on time-series fundamentals: + [timestamp basics](/docs/concepts/timestamps-timezones/). diff --git a/documentation/integrations/data-processing/pandas.md b/documentation/integrations/data-processing/pandas.md index eef59e225..7b5eef1e1 100644 --- a/documentation/integrations/data-processing/pandas.md +++ b/documentation/integrations/data-processing/pandas.md @@ -23,7 +23,7 @@ Protocol. ## Querying vs. Ingestion This page focuses on ingestion, which is the process of inserting data into -QuestDB. For querying data, see [PGWire client guide](/docs/query/pgwire/python/#integration-with-pandas). +QuestDB. For querying data, see [PGWire client guide](/docs/connect/compatibility/pgwire/python/#integration-with-pandas). ## Overview diff --git a/documentation/integrations/data-processing/polars.md b/documentation/integrations/data-processing/polars.md index e725b101c..ee2dc68bd 100644 --- a/documentation/integrations/data-processing/polars.md +++ b/documentation/integrations/data-processing/polars.md @@ -54,9 +54,9 @@ it makes ConnectorX to avoid using features not supported by QuestDB. ## Ingestion vs Querying This guides deals with querying data from QuestDB using Polars. For ingestion to QuestDB we recommend using the -[QuestDB Python client](/docs/ingestion/clients/python/). +[QuestDB Python client](/docs/connect/clients/python/). ## Additional Resources - [Integration with Pandas](/docs/integrations/data-processing/pandas/) -- [QuestDB Client for fast ingestion](/docs/ingestion/clients/python/) -- [Python clients guide](/docs/query/pgwire/python/) \ No newline at end of file +- [QuestDB Client for fast ingestion](/docs/connect/clients/python/) +- [Python clients guide](/docs/connect/compatibility/pgwire/python/) \ No newline at end of file diff --git a/documentation/integrations/other/airbyte.md b/documentation/integrations/other/airbyte.md index 1f32ea6c5..2e4872ab7 100644 --- a/documentation/integrations/other/airbyte.md +++ b/documentation/integrations/other/airbyte.md @@ -189,7 +189,7 @@ SELECT * FROM cpu; ``` 3. **Inspect the Results**: - After running the query, the results should display columns for each metric collected by the [Telegraf](/docs/ingestion/message-brokers/telegraf/) plugin for monitoring CPU usage, such as: + After running the query, the results should display columns for each metric collected by the [Telegraf](/docs/connect/message-brokers/telegraf/) plugin for monitoring CPU usage, such as: - `usage_idle` - `usage_user` diff --git a/documentation/integrations/other/databento.md b/documentation/integrations/other/databento.md index 6b53c3471..c2de8d5d1 100644 --- a/documentation/integrations/other/databento.md +++ b/documentation/integrations/other/databento.md @@ -16,7 +16,7 @@ For a deeper dive, see our [Databento & QuestDB blog](/blog/ingesting-live-marke - [QuestDB](/download/) - [Databento Python client](https://pypi.org/project/databento/) -- [QuestDB Python client](/docs/ingestion/clients/python/) +- [QuestDB Python client](/docs/connect/clients/python/) - [Grafana](/docs/integrations/visualization/grafana/) (Optional) Install the required Python libraries: diff --git a/documentation/integrations/other/mindsdb.md b/documentation/integrations/other/mindsdb.md index 503af5d55..65abdda26 100644 --- a/documentation/integrations/other/mindsdb.md +++ b/documentation/integrations/other/mindsdb.md @@ -104,7 +104,7 @@ ports: ### Adding data to QuestDB -There are different ways to [insert data to QuestDB](/docs/ingestion/overview/). +There are different ways to [insert data to QuestDB](/docs/connect/overview/). #### SQL diff --git a/documentation/integrations/overview.md b/documentation/integrations/overview.md index a781a802b..84c22b556 100644 --- a/documentation/integrations/overview.md +++ b/documentation/integrations/overview.md @@ -29,13 +29,13 @@ platforms: Ingest, store, and process high-throughput and real-time data streams with these integrations: -- **[Apache Kafka](/docs/ingestion/message-brokers/kafka):** A distributed +- **[Apache Kafka](/docs/connect/message-brokers/kafka):** A distributed event streaming platform for high-throughput data pipelines. -- [Telegraf](/docs/ingestion/message-brokers/telegraf/): Collect and report metrics from +- [Telegraf](/docs/connect/message-brokers/telegraf/): Collect and report metrics from various sources. -- **[Redpanda](/docs/ingestion/message-brokers/redpanda/):** A Kafka-compatible streaming +- **[Redpanda](/docs/connect/message-brokers/redpanda/):** A Kafka-compatible streaming platform for simplified data pipelines. -- [Apache Flink](/docs/ingestion/message-brokers/flink/): Process real-time data streams +- [Apache Flink](/docs/connect/message-brokers/flink/): Process real-time data streams efficiently. tasks at scale. diff --git a/documentation/integrations/visualization/powerbi.md b/documentation/integrations/visualization/powerbi.md index 99c896aea..b032ef9ea 100644 --- a/documentation/integrations/visualization/powerbi.md +++ b/documentation/integrations/visualization/powerbi.md @@ -131,5 +131,5 @@ If you'd like QuestDB to support this feature, please add a 👍 to [this GitHub ## Further Reading -- [QuestDB PGWire](/docs/query/pgwire/overview/) +- [QuestDB PGWire](/docs/connect/compatibility/pgwire/overview/) - [PowerBI Documentation](https://docs.microsoft.com/en-us/power-bi/) \ No newline at end of file diff --git a/documentation/integrations/visualization/qstudio.md b/documentation/integrations/visualization/qstudio.md index 1c50f3129..b16590349 100644 --- a/documentation/integrations/visualization/qstudio.md +++ b/documentation/integrations/visualization/qstudio.md @@ -52,4 +52,4 @@ export const screenshotTitle = ## See also -- [QuestDB Postgres wire protocol](/docs/query/pgwire/overview/) +- [QuestDB Postgres wire protocol](/docs/connect/compatibility/pgwire/overview/) diff --git a/documentation/introduction.md b/documentation/introduction.md index f67073df6..0376b1672 100644 --- a/documentation/introduction.md +++ b/documentation/introduction.md @@ -45,7 +45,7 @@ automated backups, and multi-tier storage with seamless object storage integrati 1. **[Quick start](/docs/getting-started/quick-start/)** - Install and run QuestDB 2. **[Schema design](/docs/schema-design-essentials/)** - Design your tables -3. **[Ingest data](/docs/ingestion/overview/)** - Bring your data using QuestDB clients +3. **[Ingest data](/docs/connect/overview/)** - Bring your data using QuestDB clients 4. **[Query data](/docs/query/overview/)** - Analyze with SQL ## Guides diff --git a/documentation/operations/monitoring-alerting.md b/documentation/operations/monitoring-alerting.md index 940fb43d6..2e7bc597c 100644 --- a/documentation/operations/monitoring-alerting.md +++ b/documentation/operations/monitoring-alerting.md @@ -200,7 +200,7 @@ WHERE walEnabled **Resolution:** -- Use the [official client libraries](/docs/ingestion/overview/#first-party-clients) +- Use the [official client libraries](/docs/connect/overview/#client-libraries) which handle batching automatically - For custom ILP clients, configure auto-flush by row count or time interval rather than flushing after each row diff --git a/documentation/operations/task-automation.md b/documentation/operations/task-automation.md index 7fff2da87..643dedace 100644 --- a/documentation/operations/task-automation.md +++ b/documentation/operations/task-automation.md @@ -5,7 +5,7 @@ description: Learn how to automate QuestDB tasks using the REST HTTP API, or one of our recommended workflow orchestrators. --- -QuestDB provides a simple [HTTP API](/docs/query/rest-api/) that allows you to interact with the database using SQL queries. +QuestDB provides a simple [HTTP API](/docs/connect/compatibility/rest-api/) that allows you to interact with the database using SQL queries. This API can be leveraged for automation using Bash scripts and scheduled execution via cron jobs. This is a lightweight approach that requires minimal dependencies. diff --git a/documentation/partials/_curl.imp.insert.partial.mdx b/documentation/partials/_curl.imp.insert.partial.mdx index 27afb2ee8..15f3cbd9d 100644 --- a/documentation/partials/_curl.imp.insert.partial.mdx +++ b/documentation/partials/_curl.imp.insert.partial.mdx @@ -7,7 +7,7 @@ curl -F data=@data.csv http://localhost:9000/imp?name=table_name This example overwrites an existing table and specifies a timestamp format and a designated timestamp column. For more information on the optional parameters to specify timestamp formats, partitioning and renaming tables, see the -[REST API documentation](/docs/query/rest-api/#examples). +[REST API documentation](/docs/connect/compatibility/rest-api/#examples). ```bash title="Providing a user-defined schema" curl \ diff --git a/documentation/partials/_qwp.message-header.partial.mdx b/documentation/partials/_qwp.message-header.partial.mdx new file mode 100644 index 000000000..7f8187bbd --- /dev/null +++ b/documentation/partials/_qwp.message-header.partial.mdx @@ -0,0 +1,11 @@ +```text +Offset Size Type Field Description +------ ---- ------ ------------- -------------------------------- +0 4 int32 magic "QWP1" (0x31505751) +4 1 uint8 version Protocol version (0x01) +5 1 uint8 flags Encoding flags +6 2 uint16 table_count Number of table blocks +8 4 uint32 payload_length Payload size in bytes +``` + +**Total message size** = 12 + payload_length. diff --git a/documentation/partials/_sf-dedup-warning.partial.mdx b/documentation/partials/_sf-dedup-warning.partial.mdx new file mode 100644 index 000000000..d01c79f1f --- /dev/null +++ b/documentation/partials/_sf-dedup-warning.partial.mdx @@ -0,0 +1,11 @@ +:::caution Replay is at-least-once — enable DEDUP + +After a reconnect or a sender restart, the client replays frames the server +may have accepted but not yet acknowledged. Without +[DEDUP](/docs/concepts/deduplication/) on the target table, replay produces +duplicate rows. Tables ingested over a reconnecting or multi-host connection +**must** declare `DEDUP UPSERT KEYS(...)` covering row identity. See +[Delivery semantics](/docs/concepts/delivery-semantics/) for the full +at-least-once / exactly-once model. + +::: diff --git a/documentation/protocols/overview.md b/documentation/protocols/overview.md new file mode 100644 index 000000000..a8d10a667 --- /dev/null +++ b/documentation/protocols/overview.md @@ -0,0 +1,54 @@ +--- +slug: /connect/wire-protocols/overview +title: Wire protocols overview +description: + QuestDB's wire-protocol specifications for client implementers. +--- + +:::note Page in draft + +This is the day-one skeleton for the Protocols section. Content is being +filled in. + +::: + +:::info Audience + +This section documents QuestDB's wire protocols at the byte-on-the-wire +level for **client implementers** — engineers building a new QuestDB client +from scratch. End users should see the +[language client guides](/docs/connect/overview) and the +[connect string reference](/docs/connect/clients/connect-string). + +::: + +## QWP — QuestDB Wire Protocol + +QWP is QuestDB's native wire protocol for both ingest and query traffic. The +specifications below are normative — if a client's behaviour conflicts with +a spec, the spec wins. + +| Protocol | Transport | Purpose | +| --- | --- | --- | +| [QWP Ingress (WebSocket)](/docs/connect/wire-protocols/qwp-ingress-websocket) | WebSocket | Columnar binary ingest with optional store-and-forward | +| [QWP Egress (WebSocket)](/docs/connect/wire-protocols/qwp-egress-websocket) | WebSocket | Streaming SQL query results | + +## Versioning + + + +## Reference implementation + +The reference client implementation is the Java client +([`java-questdb-client`](https://github.com/questdb/java-questdb-client)). +Each protocol page below pins the reference-implementation commit that +matches the documented version. + + + +## Source specifications + +The canonical specs live in the QuestDB Enterprise repository under +`docs/qwp/`. The pages in this section are the public expression of those +specs; the specs themselves remain the source of truth. diff --git a/documentation/protocols/qwp-egress-websocket.md b/documentation/protocols/qwp-egress-websocket.md new file mode 100644 index 000000000..73174786b --- /dev/null +++ b/documentation/protocols/qwp-egress-websocket.md @@ -0,0 +1,1005 @@ +--- +slug: /connect/wire-protocols/qwp-egress-websocket +title: QWP egress (WebSocket) +description: + Wire-protocol specification for QuestDB's WebSocket-based streaming + query-result protocol. +--- + +import QwpMessageHeader from "../partials/_qwp.message-header.partial.mdx" + +:::info Audience + +This is a **wire-protocol specification** for client implementers building a +new QuestDB query client from scratch. End users should see the +[language client guides](/docs/query/overview) and the +[connect string reference](/docs/connect/clients/connect-string). + +::: + +QWP egress streams SQL query results to clients over +[WebSocket](https://datatracker.ietf.org/doc/html/rfc6455), reusing the same +columnar binary encoding as +[QWP ingress](/docs/connect/wire-protocols/qwp-ingress-websocket/). The column types, null +handling, and per-column data encodings are identical. Egress adds a message +kind byte at the start of each payload, eight new message kinds for the +request/response lifecycle, and byte-credit flow control. + +For data ingestion, see +[QWP ingress (WebSocket)](/docs/connect/wire-protocols/qwp-ingress-websocket/). + +## Why implement a QWP query client + +If your language already has a QuestDB client, use it — the +[language client guides](/docs/query/overview) list what's available. The +rest of this section is for implementers writing a new one (e.g., to bring +QWP query support to JavaScript, Rust, .NET, or runtimes that the existing +clients don't cover). + +Compared with the row-oriented HTTP `/exec` JSON endpoint, QWP egress trades +a denser binary encoding for higher throughput and lower CPU on both ends: + +- **Columnar result batches.** Each batch is a single QWP table block — the + same shape QuestDB uses on disk. No per-row type tags, no JSON parsing. +- **Server-driven schemas.** After the first batch carries the schema in + full mode, subsequent batches reference it by integer ID. No repeated + column metadata on the wire. +- **Per-connection symbol dictionary.** Repeated queries on the same + connection (BI dashboards refreshing identical SELECTs) reuse prior + symbol IDs without retransmitting strings. +- **Byte-credit flow control.** The client tells the server how many bytes + it's ready to receive; the server pauses production when the window is + exhausted. Bounded memory for arbitrarily large result sets. +- **zstd compression (optional).** Negotiated at the upgrade, + applied per-batch when it shrinks the payload. +- **Bind parameters.** Typed binds prevent SQL injection and let the + server reuse plans without re-parsing. +- **Multi-host failover (Enterprise).** Connect strings can list multiple + endpoints with role/zone preferences; clients reconnect and replay + on transport failure. + +A minimum-viable client that supports SELECTs with the common column types +(BOOLEAN, LONG, DOUBLE, TIMESTAMP, VARCHAR, SYMBOL) plus simple binds is +on the order of ~600 lines in a typed language, plus a WebSocket library +and (optionally) a zstd dependency. + +The authoritative reference implementation is +[`java-questdb-client`](https://github.com/questdb/java-questdb-client). It's +worth keeping open in a tab as you read this page. + +## Overview + +Key properties: + +- **Columnar result batches.** Each batch is a single QWP table block (schema + section followed by per-column data with null bitmaps). The decoder is the + same code path as ingress. +- **Server-driven schemas.** The server assigns connection-scoped schema IDs. + Full mode (0x00) on the first batch of a query; reference mode (0x01) on + subsequent batches with the same column set. +- **Per-connection symbol dictionary.** The server accumulates symbol entries + across all queries on the connection. Repeated queries reuse prior IDs + without retransmitting the strings. +- **Byte-credit flow control.** The client grants the server permission to + send up to N bytes of result data. The server pauses once the credit window + is exhausted. A row floor guarantees forward progress. +- **One result set per request.** One `QUERY_REQUEST` produces zero or more + `RESULT_BATCH` frames followed by exactly one terminator (`RESULT_END`, + `EXEC_DONE`, or `QUERY_ERROR`). + +## Transport and versioning + +### Endpoint + +Egress uses a dedicated endpoint, separate from ingress: + +```text +GET /read/v1 +``` + +This separation lets operators route, scale, and authorize ingest and query +workloads independently. Mixed-mode clients open one connection per direction. + +### Version negotiation + +Version and compression are negotiated at the HTTP upgrade: + +**Client request headers:** + +| Header | Required | Description | +|-------------------------|----------|-----------------------------------------------------------------------------| +| `X-QWP-Max-Version` | No | Maximum QWP version the client supports. Defaults to 1 if absent. | +| `X-QWP-Client-Id` | No | Free-form client identifier (e.g., `java-egress/1.0.0`). | +| `X-QWP-Accept-Encoding` | No | Comma-separated list of acceptable result batch body encodings (see below). | +| `X-QWP-Max-Batch-Rows` | No | Client-preferred per-batch row cap; the server clamps to its own hard limit, so this only ever asks for *smaller* batches (lower latency to first row, more per-batch overhead). `0` or absent = server default. | + +**Server response headers:** + +| Header | Description | +|--------------------------|--------------------------------------------------------------------------| +| `X-QWP-Version` | Negotiated version = `min(clientMax, serverMax)`. | +| `X-QWP-Content-Encoding` | Server's selected encoding from the client's accept list. Absent = raw. | + +The connection-level contract from the ingress spec applies: every message's +header version byte must equal the negotiated version. + +### Authentication + +Authentication is handled at the HTTP level during the WebSocket upgrade, +identical to ingress. See the +[ingress authentication section](/docs/connect/wire-protocols/qwp-ingress-websocket/#authentication) +for supported methods. + +### Batch body compression + +`X-QWP-Accept-Encoding` is a comma-separated list of tokens. First match wins. + +| Token | Description | +|----------|---------------------------------------------------------------------------------| +| `raw` | No compression (also accepted as `identity`). | +| `zstd` | Whole-batch zstd compression. Optional `level=N` hint; server clamps to [1,9]. | + +When `zstd` is negotiated, individual `RESULT_BATCH` frames set `FLAG_ZSTD` +on a per-batch basis. A batch whose compressed form is larger than raw ships +uncompressed. The region before the payload (msg_kind + request_id + +batch_seq) is never compressed so the client can dispatch frames without +decompressing first. + +Absent `X-QWP-Accept-Encoding`, the server defaults to `raw`. + +### Current version + +Version 1 is the initial egress release. Version 2 adds an unsolicited +`SERVER_INFO` frame (see [SERVER_INFO](#server_info-0x18)) delivered as the first +WebSocket frame after the upgrade. A v1 client never sees it. + +## Client lifecycle + +The end-to-end shape of a QWP query client session, before the encoding +details: + +1. **Open WebSocket to `/read/v1`.** Standard `Upgrade: websocket` headers, + plus: + - `X-QWP-Max-Version: 2` — request v2 to receive `SERVER_INFO`; the + server downgrades to v1 if it doesn't support v2. + - `X-QWP-Client-Id: /` — recommended. + - `X-QWP-Accept-Encoding: zstd, raw` — optional; opt into compression. + - `X-QWP-Max-Batch-Rows: ` — optional; request smaller batches than + the server default (for lower latency to first row). + - Authentication header (`Authorization: Basic …` or `Authorization: Bearer …`). +2. **Verify the upgrade.** On `101 Switching Protocols`: + - `X-QWP-Version` is the negotiated version. Use it as the `version` + byte in every outgoing message header. + - `X-QWP-Content-Encoding` is the server's chosen compression (absent + means `raw`). +3. **(v2 only) Read `SERVER_INFO`.** The first WebSocket binary frame + carries the server's role, cluster/node identity, and zone (if + advertised). Apply your `target=` / `zone=` filter before sending a + `QUERY_REQUEST`; if the role doesn't match, close and try the next + endpoint. +4. **Send `QUERY_REQUEST`.** Assign a fresh `request_id` (client-owned, + unique within the connection), include SQL text, bind parameters, and + `initial_credit` (`0` for unbounded streaming). The WebSocket binary + frame body starts directly with `msg_kind = 0x10` — see + [Message structure](#message-structure); client-to-server frames carry + no 12-byte QWP header. +5. **Drain frames demuxed by `request_id`.** The server streams + `RESULT_BATCH(seq=0, schema mode 0x00)`, then + `RESULT_BATCH(seq=1+, schema mode 0x01)`, until a terminator: + - `RESULT_END` — cursor exhausted, success. + - `EXEC_DONE` — non-SELECT statement, no rows; carries `rows_affected`. + - `QUERY_ERROR` — failure at any point in the lifecycle; terminal. + The server may interpose a `CACHE_RESET` between a terminator and the + next query's first frame; clients must process it before assuming + schema-ID or symbol-dict continuity. +6. **Flow control.** If you set a non-zero `initial_credit`, send + `CREDIT(request_id, additional_bytes)` frames to keep the byte window + open. The server pauses production when the budget reaches zero (with + a one-batch row floor to guarantee progress). +7. **Cancel (optional).** Send `CANCEL(request_id)` to abort. Continue + draining in-flight `RESULT_BATCH` frames until the terminator + (`QUERY_ERROR(CANCELLED)` or, if it raced, `RESULT_END`). +8. **Close.** Send a WebSocket `Close` frame after the last expected + terminator has been drained. + +Reconnects reset connection-scoped state on both sides: schema registry, +symbol dictionary, and `batch_seq` (which restarts at `0` for any replayed +query on the new connection). + +## Message structure + +Egress framing is **asymmetric**: + +- **Server → client** frames carry the full 12-byte QWP header followed + by the payload. The header is byte-identical to the + [ingress header](/docs/connect/wire-protocols/qwp-ingress-websocket/#message-structure): + + + +- **Client → server** frames carry **only the payload**, starting directly + with `msg_kind`. There is no 12-byte QWP header on outbound client frames. + +```text ++------------------------------------------+ +| WebSocket frame body, server -> client: | +| Header (12 bytes) | +| Payload | +| msg_kind: uint8 | +| (kind-specific body) | ++------------------------------------------+ + ++------------------------------------------+ +| WebSocket frame body, client -> server: | +| Payload | +| msg_kind: uint8 | +| (kind-specific body) | ++------------------------------------------+ +``` + +:::warning Asymmetric framing — common stumbling block + +If you copy the ingress framing (which is symmetric — header on both +directions) into an egress client, the server reads the QWP magic's first +byte (`0x51`, the ASCII `Q`) as an unknown `msg_kind` and closes the +WebSocket with code 1006. Client frames must start directly with +`msg_kind`. + +The header is retained server-to-client because `RESULT_BATCH` uses the +header's `flags` byte (Gorilla, delta dict, zstd) and `payload_length`. +Client-to-server frames have no analogous needs: version is fixed from the +upgrade, `table_count` doesn't apply to control kinds, and the WebSocket +frame already carries the payload length. + +::: + +### Flags byte + +For `RESULT_BATCH` frames, the flags byte uses the ingress bit definitions +plus one egress-specific bit: + +| Bit | Name | Description | +|--------|--------------------------|-----------------------------------------------------------------------| +| `0x04` | `FLAG_GORILLA` | Gorilla delta-of-delta encoding on timestamp columns. | +| `0x08` | `FLAG_DELTA_SYMBOL_DICT` | Connection-scoped delta symbol dictionary section present. | +| `0x10` | `FLAG_ZSTD` | Payload after msg_kind/request_id/batch_seq is zstd-compressed. | + +`FLAG_GORILLA` and `FLAG_DELTA_SYMBOL_DICT` are always set on `RESULT_BATCH` +frames in the current implementation. When `FLAG_GORILLA` is set, every +TIMESTAMP, TIMESTAMP_NANOS, and DATE column carries a 1-byte encoding flag +before its value region: `0x00` = raw int64 values, `0x01` = Gorilla +bitstream. The server picks Gorilla when the column has at least three +non-null values and the delta-of-delta bitstream is smaller than +`nonNullCount * 8` bytes; unordered or jumpy columns fall back to raw. + +:::warning DATE is timestamp-ish on egress only — opposite of ingress + +The encoding flag (plus optional Gorilla) applies to `DATE` (`0x0B`) +**only on the egress wire**. On the **ingress** wire `DATE` is a plain +`int64` column written exactly like `LONG`: no encoding flag, never +Gorilla-encoded, even under `FLAG_GORILLA`. See the DATE asymmetry warning +in the [QWP ingress protocol](/docs/connect/wire-protocols/qwp-ingress-websocket/). + +A codec that reuses its egress DATE path for ingress (or vice-versa) +shifts every DATE value by one byte (a clean ×256) and breaks Gorilla DATE +entirely. + +::: + +## Message kinds + +| Code | Name | Direction | Description | +|--------|---------------|-----------|-----------------------------------------| +| `0x10` | QUERY_REQUEST | C -> S | SQL query plus bind parameters | +| `0x11` | RESULT_BATCH | S -> C | One table block of result rows | +| `0x12` | RESULT_END | S -> C | Cursor exhausted (success) | +| `0x13` | QUERY_ERROR | S -> C | Mid-stream or parse-time error | +| `0x14` | CANCEL | C -> S | Stop a running query | +| `0x15` | CREDIT | C -> S | Extend the byte-credit window | +| `0x16` | EXEC_DONE | S -> C | Non-SELECT statement acknowledgement | +| `0x17` | CACHE_RESET | S -> C | Clear connection-scoped caches | +| `0x18` | SERVER_INFO | S -> C | Server role and identity (v2 only) | + +Codes `0x00` and `0x01` are the ingress DATA_BATCH and RESPONSE kinds +(not used on the egress endpoint). Codes `0x19` through `0x1F` are reserved +for future egress kinds. `0x20+` is reserved for protocol extensions. + +## QUERY_REQUEST (0x10) + +Client to server. Initiates a new query cursor. + +```text ++----------------------------------------------------------+ +| msg_kind: uint8 0x10 | +| request_id: int64 Client-assigned, unique | +| within the connection | +| sql_length: varint UTF-8 byte length | +| sql_bytes: bytes SQL text | +| initial_credit: varint Bytes; 0 = unbounded | +| bind_count: varint Number of bind parameters | +| For each bind parameter (in declaration order): | +| type_code: uint8 Column type code | +| bind_block: column_data Ingress column encoding | +| with row_count = 1 | ++----------------------------------------------------------+ +``` + +### request_id + +64-bit client-assigned identifier. It is echoed back by every server-to-client +frame related to the query (`RESULT_BATCH`, `RESULT_END`, `QUERY_ERROR`). The +client may reuse a `request_id` only after observing the terminator for the +previous use. + +### Bind parameters + +A bind parameter is encoded exactly as a one-row column under the +[ingress column data encoding](/docs/connect/wire-protocols/qwp-ingress-websocket/#column-data-encoding). +Each block begins with a `type_code` (uint8), followed by the standard +`null_flag` byte and either zero or one value. + +A NULL bind parameter is: `type_code` + `null_flag = 0x01` + bitmap byte +`0x01`, with no value bytes following. + +DECIMAL binds carry the 1-byte scale prefix. ARRAY binds carry the per-row +dimension header. Symbol bind parameters are encoded as VARCHAR (no dictionary +for a single value). + +:::note Server leniency + +The current server decoder accepts a SYMBOL wire type code for a bind +parameter and treats it identically to VARCHAR. Compliant clients should still +send VARCHAR. A future revision may reject SYMBOL bind type codes. + +::: + +### Concurrency + +:::note Phase 1 limitation + +The current implementation supports a single in-flight query per connection. +The server rejects a second `QUERY_REQUEST` before the active query terminates. +The wire protocol allows multiple in-flight queries (demultiplexed by +`request_id`); multi-query support is planned for a future release. + +::: + +## RESULT_BATCH (0x11) + +Server to client. Carries one table block of result rows. + +```text ++----------------------------------------------------------+ +| msg_kind: uint8 0x11 | +| request_id: int64 From the originating | +| QUERY_REQUEST | +| batch_seq: varint Monotonic per request, | +| starting at 0 | +| (rest of payload: optional delta symbol dictionary, | +| then exactly one table block) | ++----------------------------------------------------------+ +``` + +The header's `table_count` is `1`. The table block format is identical to +ingress: schema section followed by per-column data. The table name is empty +(`name_length = 0`); result sets have no table name. + +**Schema handling:** + +- First batch for a query: schema mode 0x00 (full) with a server-assigned + schema_id. +- Subsequent batches with the same columns: schema mode 0x01 (reference). + +If the result set is empty, the server still sends one `RESULT_BATCH` with +`row_count = 0` so the client receives the schema, followed by `RESULT_END`. + +## RESULT_END (0x12) + +Server to client. Signals successful end of stream. + +```text ++----------------------------------------------------------+ +| msg_kind: uint8 0x12 | +| request_id: int64 | +| final_seq: varint Sequence of last RESULT_BATCH | +| (or 0 if none) | +| total_rows: varint Total rows produced; 0 if not | +| tracked by the server | ++----------------------------------------------------------+ +``` + +The header's `table_count` is `0`. After `RESULT_END`, the server has no +further state for this `request_id` and the client may reuse it. + +## QUERY_ERROR (0x13) + +Server to client. Signals failure at any point in the lifecycle: before any +`RESULT_BATCH` (parse or security failure) or mid-stream (storage failure, +cancellation, server shutdown). + +```text ++----------------------------------------------------------+ +| msg_kind: uint8 0x13 | +| request_id: int64 | +| status: uint8 See Status codes below | +| msg_length: uint16 UTF-8 byte length | +| msg_bytes: bytes Human-readable error message | ++----------------------------------------------------------+ +``` + +The header's `table_count` is `0`. `QUERY_ERROR` is terminal: the client must +not expect any further frames for this `request_id`. + +## CANCEL (0x14) + +Client to server. Requests termination of a running query. + +```text ++---------------------------+ +| msg_kind: uint8 0x14 | +| request_id: int64 | ++---------------------------+ +``` + +The server acknowledges by emitting either `RESULT_END` (if the cursor +finished first) or `QUERY_ERROR` with status `CANCELLED`. The client must +continue to drain any in-flight `RESULT_BATCH` frames the server sent before +processing the cancel; the terminator is the synchronization point. + +If `request_id` does not refer to an active query, the server silently drops +the cancel. + +## CREDIT (0x15) + +Client to server. Extends the byte-credit window for a specific query. + +```text ++----------------------------------------------+ +| msg_kind: uint8 0x15 | +| request_id: int64 | +| additional_bytes: varint Bytes to add | ++----------------------------------------------+ +``` + +See [Flow control](#flow-control) for the credit model. + +## EXEC_DONE (0x16) + +Server to client. Terminates a non-SELECT `QUERY_REQUEST` (DDL, INSERT, +UPDATE, ALTER, DROP, TRUNCATE, CREATE TABLE, CREATE MATERIALIZED VIEW). No +`RESULT_BATCH` frames are sent for these statements. + +```text ++----------------------------------------------------------+ +| msg_kind: uint8 0x16 | +| request_id: int64 | +| op_type: uint8 Statement type discriminator | +| rows_affected: varint Row count for INSERT/UPDATE; | +| 0 for DDL | ++----------------------------------------------------------+ +``` + +The header's `table_count` is `0`. `EXEC_DONE` is terminal: the client must +not expect any further frames for this `request_id`. If the statement fails, +the server sends `QUERY_ERROR` instead. + +## CACHE_RESET (0x17) + +Server to client. Instructs the client to clear one or both connection-scoped +caches: the symbol delta dictionary and the schema registry. Emitted at a +query boundary (between the previous query's terminator and the next query's +first `RESULT_BATCH` or `EXEC_DONE`); never mid-stream. + +```text ++----------------------------------------------+ +| msg_kind: uint8 0x17 | +| reset_mask: uint8 Bit 0 = symbol dict | +| Bit 1 = schema cache | +| Bits 2-7 reserved (0) | ++----------------------------------------------+ +``` + +The header's `table_count` is `0`. No `request_id`: the frame targets +connection state, not a specific query. + +**Semantics by bit:** + +- **Bit 0 (RESET_MASK_DICT)**: clear the connection-scoped symbol dictionary. + After the reset, the dictionary is empty. The next `RESULT_BATCH` with + `FLAG_DELTA_SYMBOL_DICT` must start its delta section at `deltaStart = 0`. +- **Bit 1 (RESET_MASK_SCHEMAS)**: clear the connection-scoped schema + registry. All previously assigned schema IDs are discarded. The next + `RESULT_BATCH` must use full schema mode (0x00) with freshly allocated IDs. + +Both bits may be set in the same frame. Clients must ignore unknown reserved +bits. + +**Default soft caps:** + +| Cap | Default | Triggers | +|----------------------------------|-----------|--------------------| +| Symbol dict entries | 100,000 | `RESET_MASK_DICT` | +| Symbol dict UTF-8 heap bytes | 8 MiB | `RESET_MASK_DICT` | +| Distinct registered schemas | 4,096 | `RESET_MASK_SCHEMAS` | + +Actual cap values are implementation-defined. Clients must accept any cap +policy and must be prepared to receive `CACHE_RESET` after any query +terminator. + +**Why never mid-stream:** resetting the dictionary or schema registry while a +`RESULT_BATCH` is in flight would invalidate IDs already referenced in that +batch's payload. The server postpones the reset until a natural query +boundary. Under a saturating workload, the server may temporarily exceed its +soft caps for the duration of a single query; the caps are self-healing and +bounded by any one query's distinct symbol/schema footprint. + +**Wire-level example:** + +```text +client -> QUERY_REQUEST(request_id=42, ...) +server -> CACHE_RESET(reset_mask=0x01) # dict bit only +server -> RESULT_BATCH(request_id=42, batch_seq=0, deltaStart=0, ...) +server -> RESULT_BATCH(request_id=42, batch_seq=1, ...) +server -> RESULT_END(request_id=42, ...) +``` + +If the schema cache is also over cap, the server emits a single +`CACHE_RESET(reset_mask=0x03)` and the client clears both caches in one hop. + +## SERVER_INFO (0x18) + +Server to client. Unsolicited frame delivered as the first WebSocket frame +after the HTTP upgrade, only when the negotiated version is 2 or above. A v1 +client never sees it. + +```text ++----------------------------------------------------------+ +| msg_kind: uint8 0x18 | +| role: uint8 See role table | +| epoch: uint64 Monotonic role epoch | +| capabilities: uint32 Bitfield | +| server_wall_ns: int64 Server wall-clock (ns since | +| Unix epoch) | +| cluster_id_len: uint16 UTF-8 byte length | +| cluster_id: bytes Cluster identifier | +| node_id_len: uint16 UTF-8 byte length | +| node_id: bytes Node identifier | +| (if capabilities & 0x01): | +| zone_id_len: uint16 UTF-8 byte length | +| zone_id: bytes Geographic/logical zone | ++----------------------------------------------------------+ +``` + +**Role values:** + +| Value | Role | Description | +|--------|------------------|----------------------------------------------------------| +| `0x00` | STANDALONE | No replication configured. Behaves like a primary. | +| `0x01` | PRIMARY | Authoritative write node; reads see latest commits. | +| `0x02` | REPLICA | Read-only replica; reads may lag the primary. | +| `0x03` | PRIMARY_CATCHUP | Promotion in flight; behaves like a primary. | + +**Capabilities:** + +| Bit | Name | Description | +|--------------|----------|----------------------------------------------------------| +| `0x00000001` | CAP_ZONE | `zone_id` fields are appended after `node_id`. | + +Clients encountering unknown capability bits must ignore them. Trailing fields +gated by unset bits are absent from the frame. + +**epoch:** monotonic across role transitions on the same node (e.g., replica +promoted to primary). Clients tracking a specific primary can use it to refuse +a stale reconnect that lands on a node which no longer holds the primary role +at the current cluster epoch. The field is 0 on releases where fencing has not +been wired up yet; clients may treat it as a hint. + +**Delivery timing:** `SERVER_INFO` is included in the same TCP send buffer as +the 101 upgrade response, so on a healthy connection the frame is already in +the client's kernel recv buffer by the time the client parses the upgrade. If +the server negotiates v1, it omits the frame entirely and clients fall back to +treating the server as `STANDALONE`. + +### Client routing + +Egress clients that support v2 can accept multiple endpoints plus role and +zone preferences on the connect string: + +```text +ws::addr=db-a:9000,db-b:9000,db-c:9000;target=any;zone=eu-west-1a;failover=on; +``` + +| Key | Values | Default | Description | +|------------|---------------------------|---------|-----------------------------------------------| +| `target` | `any`, `primary`, `replica` | `any` | Role filter applied per endpoint after reading `SERVER_INFO`. | +| `zone` | free-form string | | Compared case-insensitively against `zone_id` from `SERVER_INFO`. | +| `failover` | `on`, `off` | `on` | Master switch for per-query reconnect loop. `off` surfaces transport errors directly. | + +When `target=primary`, zone preference is still recorded but every host's zone +tier is treated as equivalent (the primary must be followed across zones). + +The `421 + X-QuestDB-Role` (and optional `X-QuestDB-Zone`) upgrade-reject +convention is shared with ingress: the server returns HTTP 421 when the +connecting client's role filter does not match, allowing the client to try the +next endpoint without completing the WebSocket handshake. + +## Null sentinel conventions + +Egress inherits QuestDB's internal null sentinel conventions. When the server +writes a null value into the dense values array, it uses the type's sentinel +and also sets the corresponding null bitmap bit. Clients consuming egress +results should treat these sentinels as indistinguishable from explicit NULL: + +| Type | Null sentinel | +|----------------------------------------------|---------------------| +| INT, IPv4 | `Integer.MIN_VALUE` (INT); `0` (IPv4) | +| LONG, DATE, TIMESTAMP, TIMESTAMP_NANOS, DECIMAL64 | `Long.MIN_VALUE` | +| FLOAT | any `NaN` (incl. `0.0f / 0.0f`) | +| DOUBLE | any `NaN` (incl. `0.0 / 0.0`) | +| GEOHASH (all widths) | All-ones (`-1`) | +| UUID | Both halves `Long.MIN_VALUE` | +| LONG256 | All four longs `Long.MIN_VALUE` | +| BOOLEAN, BYTE, SHORT, CHAR | No null sentinel; these types cannot carry NULL in QuestDB | + +A consequence of reusing in-engine sentinels on the wire is that some bit +patterns cannot be expressed as non-null: + +- **IPv4 `0.0.0.0`** is the IPv4 null sentinel; a non-null `0.0.0.0` cannot be + round-tripped and decodes as NULL. +- **GEOHASH "all ones"** is the geohash null sentinel; a geohash whose bit + pattern is all-ones cannot be round-tripped and decodes as NULL. +- **FLOAT / DOUBLE `NaN`** of any bit pattern (including non-canonical NaNs + like `0.0 / 0.0`) decodes as NULL. There is no separate "QWP NaN". + +### Array element nulls + +Array columns (`DOUBLE_ARRAY`, `LONG_ARRAY`) have no per-element null bitmap. +Element-level NULL uses the element type's row-level sentinel: + +- `DOUBLE_ARRAY` element: `NaN` (a non-null `NaN` is indistinguishable from NULL) +- `LONG_ARRAY` element: `Long.MIN_VALUE` (cannot be represented as non-null) + +The row-level null bitmap bit signals "the array itself is NULL", distinct +from "an array of zero or more elements where some may be element-NULL." + +## Schema and symbol dictionary scope + +### Schema registry + +The server maintains a per-connection schema registry. The first +`RESULT_BATCH` for a query registers a new schema in full mode (0x00); +subsequent batches with the same column set use reference mode (0x01). + +Connections that accumulate many distinct column shapes may cross the server's +schema soft cap. When that happens, the server emits `CACHE_RESET` with +`RESET_MASK_SCHEMAS` at a query boundary and both sides clear the registry. +Schema IDs after the reset may collide with previously used values. + +On disconnect, both sides reset the registry. + +### Symbol dictionary + +Egress uses a connection-scoped delta dictionary (the same +`FLAG_DELTA_SYMBOL_DICT` mechanic as ingress). The server maintains a global +mapping of symbol strings to sequential integer IDs starting at 0, shared +across every query on the connection. Each `RESULT_BATCH` carries a delta +section listing newly added symbols. + +Per-connection scope benefits repeated queries (e.g. BI dashboards refreshing +the same SELECTs). The server enforces soft caps on entry count and heap bytes. +When either cap is crossed, the server emits `CACHE_RESET` with +`RESET_MASK_DICT` and both sides clear the dictionary; the next delta section +starts at `deltaStart = 0`. + +On disconnect, both sides reset the dictionary. + +## Cursor lifecycle + +```text + QUERY_REQUEST + client ---------------------------------> server + | + (parse, plan, + open cursor) + | + client <---------- RESULT_BATCH(seq=0) ----- schema mode 0x00 + client <---------- RESULT_BATCH(seq=1) ----- schema mode 0x01 + client <---------- RESULT_BATCH(seq=N) ----- + | + client <----------- RESULT_END -------------- +``` + +**Error path:** + +```text + client <---------- RESULT_BATCH(seq=K) ----- + client <----------- QUERY_ERROR ------------- (terminal) +``` + +**Cancel path:** + +```text + client ----------- CANCEL ------------------> + client <--- (any in-flight RESULT_BATCH) ---- + client <----------- QUERY_ERROR ------------- status = CANCELLED + (or RESULT_END if it raced) +``` + +**Non-SELECT path:** + +```text + QUERY_REQUEST (DDL/INSERT/UPDATE) + client ---------------------------------> server + client <----------- EXEC_DONE --------------- +``` + +**Cache reset at query boundary:** + +```text + client <----------- RESULT_END -------------- (query N) + client <----------- CACHE_RESET ------------- (optional) + QUERY_REQUEST + client ---------------------------------> server (query N+1) + client <---------- RESULT_BATCH(seq=0) ----- deltaStart=0 after reset +``` + +A connection-level error (malformed header, authentication failure) closes the +WebSocket. The server's last frame before close should be a `QUERY_ERROR` with +`request_id = -1` if the failure is not attributable to a specific request. + +## Failover and high availability + +Egress clients can drive a per-query reconnect loop across multiple endpoints. +When a transport error occurs mid-stream, the client reconnects to the next +healthy endpoint, reads `SERVER_INFO` to verify the role filter, and replays +the query. `batch_seq` restarts at 0 on the new connection. + +The connect-string keys that control egress failover +(`failover_max_attempts`, `failover_backoff_initial_ms`, +`failover_backoff_max_ms`, `failover_max_duration_ms`) are documented in the +[reconnect and failover](/docs/connect/clients/connect-string#reconnect-keys) +section of the connect string reference. The shared failover primitives +(host-health model, backoff, role filter, error classification) are covered in +[multi-host failover](/docs/connect/clients/connect-string#failover-keys). + +Key behaviors: + +- Authentication errors are terminal at any host; the reconnect loop does not + continue past them. +- A `CANCEL` acknowledged with `QUERY_ERROR(CANCELLED)` routes through the + normal error path, not the transport-error path, so it never triggers + failover. +- An upgrade-time version mismatch is per-endpoint, not terminal. A host + whose upgrade response advertises a QWP version outside the client's + supported range is recorded as a transport error and the walk continues. + +:::note Enterprise + +Multi-host failover with automatic reconnect requires QuestDB Enterprise. + +::: + +## Flow control + +:::note Byte credits + +Egress uses byte-credit flow control to prevent the server from overwhelming +the client with result data. The client tells the server how many bytes it is +willing to receive, and the server pauses when the budget is exhausted. + +::: + +### Initial credit + +The client sets `initial_credit` in `QUERY_REQUEST`. A value of `0` means +unbounded: the server streams without waiting for credit. A nonzero value is +the byte budget the server may emit before pausing. + +### Granting more credit + +The client sends `CREDIT` frames to extend the window. The server adds +`additional_bytes` to the remaining budget. There is no upper bound on a +single grant. + +### Accounting + +The server decrements the budget by the total wire length of each +`RESULT_BATCH` (header + payload). When the budget would go non-positive, the +server pauses production for that `request_id`. + +### Row floor + +To prevent deadlock on rows larger than the remaining window, the server may +send one additional `RESULT_BATCH` of at least one row even if doing so drives +the budget negative. The next batch will not be sent until credit returns to a +positive value. + +This guarantees forward progress for any well-formed query regardless of +credit size. Clients should size buffers to absorb up to one extra batch. + +### Independence per request + +Each `request_id` has its own credit accounting. Granting credit on one +request does not unblock another. + +## Status codes + +`QUERY_ERROR` reuses the ingress status code namespace and adds two +egress-specific codes: + +| Code | Hex | Name | Description | +|------|--------|-----------------|---------------------------------------------------| +| 3 | `0x03` | SCHEMA_MISMATCH | Bind parameter type incompatible with placeholder | +| 5 | `0x05` | PARSE_ERROR | Malformed message or SQL syntax error | +| 6 | `0x06` | INTERNAL_ERROR | Server-side execution failure | +| 8 | `0x08` | SECURITY_ERROR | Authorization failure | +| 10 | `0x0A` | CANCELLED | Query terminated in response to CANCEL | +| 11 | `0x0B` | LIMIT_EXCEEDED | A protocol limit was hit (see Protocol limits) | + +OK (0x00) is not used in egress; success terminates with `RESULT_END` or +`EXEC_DONE`. + +## Protocol limits + +| Limit | Default value | Notes | +|----------------------------------|---------------|----------------------------------------------------| +| Max in-flight queries | 1 | Per connection. Wire protocol allows more; Phase 1 enforces 1. | +| Max SQL text length | 1 MiB | UTF-8 bytes. | +| Max bind parameters | 1,024 | Per QUERY_REQUEST. | +| Max RESULT_BATCH wire size | 16 MiB | Same as ingress batch ceiling. | +| Symbol dict soft cap (entries) | 100,000 | Per connection. Exceeding triggers CACHE_RESET. | +| Symbol dict soft cap (heap) | 8 MiB | Per connection, UTF-8 bytes. | +| Schema registry soft cap | 4,096 | Per connection. Exceeding triggers CACHE_RESET. | + +Soft caps are implementation-defined and may be tuned by the server operator. + +### Practical WebSocket frame cap + +The 16 MiB `RESULT_BATCH` limit and 1 MiB SQL limit are **QWP protocol +ceilings**, not effective server-side caps. The HTTP receive buffer for the +`/read/v1` endpoint applies to **client → server** frames (`QUERY_REQUEST`, +`CANCEL`, `CREDIT`) and is checked before the QWP parser sees the payload: + +| Server config key | Default | Effect | +|-------------------------|---------|--------------------------------------------------------------------------------------------| +| `http.recv.buffer.size` | 2 MiB | Maximum WebSocket frame the server will accept on `/read/v1`. | + +A client-side frame larger than this is rejected with WebSocket close code +`1009 MESSAGE_TOO_BIG` and the connection is dropped — the client observes an +abrupt disconnect (`ECANCELED`, `EPIPE`, or similar) before any +`QUERY_ERROR` arrives. + +**For client implementers:** a `QUERY_REQUEST` carries SQL text plus all bind +parameter values. Keep the total under `http.recv.buffer.size` minus +WebSocket frame overhead (≤ 14 bytes). With the default 2 MiB recv buffer, +~1.9 MiB of SQL + binds is a safe ceiling. Long SQL or large array binds are +the realistic triggers. + +`RESULT_BATCH` frames (server → client) are bounded by the server's own +producer-side configuration; sizing the client's WebSocket library to handle +up to 16 MiB receive frames covers any well-configured server. + +## Examples + +### Simple unbounded query + +Client sends `SELECT id, value FROM sensors LIMIT 2` with no bind parameters +and unbounded credit. + +```text +QUERY_REQUEST (client -> server; WebSocket binary frame body + — no QWP header, see "Message structure" above): + + 10 # msg_kind = QUERY_REQUEST + 01 00 00 00 00 00 00 00 # request_id = 1 + 24 # sql_length = 36 + 53 45 4C 45 43 54 20 69 # "SELECT i" + 64 2C 20 76 61 6C 75 65 # "d, value" + 20 46 52 4F 4D 20 73 65 # " FROM se" + 6E 73 6F 72 73 20 4C 49 # "nsors LI" + 4D 49 54 20 32 # "MIT 2" + 00 # initial_credit = 0 (unbounded) + 00 # bind_count = 0 +``` + +Server responds with one result batch and end-of-stream: + +```text +RESULT_BATCH (seq=0): + Header: + 51 57 50 31 # Magic: "QWP1" + 01 # Version: 1 + 00 # Flags + 01 00 # table_count = 1 + XX XX XX XX # payload_length + + Payload: + 11 # msg_kind = RESULT_BATCH + 01 00 00 00 00 00 00 00 # request_id = 1 + 00 # batch_seq = 0 + + Table block: + 00 # name_length = 0 (anonymous) + 02 # row_count = 2 + 02 # column_count = 2 + + Schema (full mode): + 00 # schema_mode = FULL + 00 # schema_id = 0 + 02 69 64 05 # "id" : LONG + 05 76 61 6C 75 65 07 # "value" : DOUBLE + + Column 0 (LONG): + 00 # null_flag = 0 + 01 00 00 00 00 00 00 00 # 1 + 02 00 00 00 00 00 00 00 # 2 + + Column 1 (DOUBLE): + 00 # null_flag = 0 + CD CC CC CC CC CC F4 3F # 1.3 + 9A 99 99 99 99 99 01 40 # 2.2 + +RESULT_END: + Header: + 51 57 50 31 01 00 00 00 XX XX XX XX + + Payload: + 12 # msg_kind = RESULT_END + 01 00 00 00 00 00 00 00 # request_id = 1 + 00 # final_seq = 0 + 02 # total_rows = 2 +``` + +### Bind parameter + +A LONG bind parameter with value `42`: + +```text +05 # type_code = LONG +00 # null_flag = 0 (no nulls) +2A 00 00 00 00 00 00 00 # value = 42 +``` + +A NULL LONG bind parameter: + +```text +05 # type_code = LONG +01 # null_flag = nonzero (bitmap follows) +01 # bitmap byte: bit 0 set = NULL + # (no value bytes) +``` + +### Credit-controlled streaming + +Client opens a query with a 64 KiB initial credit: + +```text +QUERY_REQUEST: initial_credit = 65536, request_id = 7 +``` + +Server emits `RESULT_BATCH` frames totaling 60 KiB, then pauses. Client +grants more credit: + +```text +CREDIT: + 15 # msg_kind = CREDIT + 07 00 00 00 00 00 00 00 # request_id = 7 + 80 80 04 # additional_bytes = 65536 +``` + +Server resumes streaming. + +## Reference implementation + +The reference client implementation is +[`java-questdb-client`](https://github.com/questdb/java-questdb-client) +at commit +[`67bb5e4`](https://github.com/questdb/java-questdb-client/commit/67bb5e49feea7e63b813ea08189c23ea11486131). + +The server-side egress handler lives in the QuestDB server repository. + +## Version history + +| Version | Description | +|------------|------------------------------------------------------------| +| 1 (`0x01`) | Initial egress release. | +| 2 (`0x02`) | Adds unsolicited SERVER_INFO frame after upgrade (v2 only).| diff --git a/documentation/protocols/qwp-ingress-websocket.md b/documentation/protocols/qwp-ingress-websocket.md new file mode 100644 index 000000000..92c3b7919 --- /dev/null +++ b/documentation/protocols/qwp-ingress-websocket.md @@ -0,0 +1,1233 @@ +--- +slug: /connect/wire-protocols/qwp-ingress-websocket +title: QWP ingress (WebSocket) +description: + Wire-protocol specification for QuestDB's WebSocket-based columnar binary + ingest protocol. +--- + +import QwpMessageHeader from "../partials/_qwp.message-header.partial.mdx" + +:::info Audience + +This is a **wire-protocol specification** for client implementers building a +new QuestDB ingest client from scratch. End users should see the +[language client guides](/docs/connect/overview) and the +[connect string reference](/docs/connect/clients/connect-string). + +::: + +QuestDB Wire Protocol (QWP) is QuestDB's columnar binary protocol for +high-throughput data ingestion over WebSocket. Each message carries one or more +table blocks, where every column's values are stored contiguously. Batched +messages, schema references, and Gorilla-compressed timestamps reduce wire +overhead for sustained streaming workloads. + +This page covers WebSocket ingress only. For streaming query results back to +clients, see [QWP egress (WebSocket)](/docs/connect/wire-protocols/qwp-egress-websocket/). + +## Why implement a QWP client + +If your language already has a QuestDB client, use it — the +[language client guides](/docs/connect/overview) list what's available. The +rest of this section is for implementers writing a new one (e.g., to bring +QWP to JavaScript, Rust, Ruby, .NET, or an embedded runtime that the existing +clients don't cover). + +Compared with the line-oriented ILP protocols (`http`, `https`, `tcp`), +QWP trades a denser binary encoding for higher throughput and lower CPU on +both ends: + +- **One schema, many batches.** After the first message defines a table's + columns, subsequent messages reference the schema by an integer ID — no + per-row type tags, no per-batch column names. +- **Columnar wire format.** Each column's values are contiguous in the + message, so the server commits them column-at-a-time without row-by-row + parsing. This is the same shape QuestDB uses on disk. +- **Gorilla timestamps.** Steady-cadence timestamps collapse from 8 bytes to + as little as 1 bit each via delta-of-delta encoding. +- **Global symbol delta dictionary.** Low-cardinality string columns send + each distinct value once per connection, then reference it by varint ID. +- **Multi-table batches.** A single WebSocket frame can carry rows for many + tables in one trip across the wire. +- **Server-acknowledged commits.** Every batch gets an OK frame carrying the + per-table sequencer transaction it landed in, so the client knows + precisely what's durable. An optional `X-QWP-Request-Durable-Ack` opt-in + on the upgrade extends this to cluster-durable acks (Enterprise only). + +A minimum-viable client that supports BOOLEAN, LONG, DOUBLE, TIMESTAMP, and +VARCHAR — the five types that cover most real workloads — is on the order of +~500 lines in a typed language, plus a WebSocket library. Adding the +remaining ~20 types is mostly extending switch statements; the framing, +schema registry, and ack loop stay the same. + +The authoritative reference implementation is +[`java-questdb-client`](https://github.com/questdb/java-questdb-client). It's +worth keeping open in a tab as you read this page. + +## Overview + +QWP encodes data in a column-major layout: all values for a single column are +packed together before the next column begins. This allows the server to +decompress and commit each column independently, avoiding row-by-row +deserialization. + +Design goals: + +- **Column-oriented**: values for each column are contiguous in the message. +- **Batch-oriented**: a single message can carry rows for multiple tables. +- **Schema-referencing**: after the first batch, subsequent batches reference a + previously sent schema by numeric ID, avoiding redundant column definitions. +- **Timestamp compression**: designated timestamp columns can use + Gorilla delta-of-delta encoding, reducing 8 bytes per timestamp to as + little as 1 bit for steady-rate streams. + +Every QWP message begins with a 4-byte magic: + +| Magic | Hex value | Description | +|--------|----------------|-----------------------| +| `QWP1` | `0x31505751` | Standard data message | + +## Transport and versioning + +### WebSocket endpoints + +The client initiates an HTTP GET request to either `/write/v4` or `/api/v4/write` +with standard [WebSocket](https://datatracker.ietf.org/doc/html/rfc6455) upgrade +headers. After the server responds with `101 Switching Protocols`, all +communication uses binary WebSocket frames. + +### Version negotiation + +During the HTTP upgrade, the client and server negotiate the protocol version +using custom headers. + +**Client request headers:** + +| Header | Required | Description | +|---------------------|----------|--------------------------------------------------------------------------------------| +| `X-QWP-Max-Version` | No | Maximum QWP version the client supports (positive integer). Defaults to 1 if absent. | +| `X-QWP-Client-Id` | No | Free-form client identifier (e.g., `java/1.0.2`, `zig/0.1.0`). | + +**Server response headers:** + +| Header | Description | +|------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `X-QWP-Version` | The QWP version selected for this connection. | +| `X-QWP-Max-Batch-Size` | Server's effective per-message payload cap in bytes, computed as `min(http.recv.buffer.size − 14, 16 MiB)` (the protocol ceiling clamped by the actual WebSocket recv buffer minus the worst-case frame header). Clients should size batches to stay under this value. Absent on servers older than the introduction of this header — clients fall back to their locally configured byte budget. | +| `X-QWP-Durable-Ack` | `enabled` when the connection will emit `STATUS_DURABLE_ACK` frames. Sent only when the client opted in via `X-QWP-Request-Durable-Ack: true` *and* the server has durable-ack support configured. Absent in every other case. | + +The server selects the version as `min(clientMax, serverMax)`. The selected +version is never higher than either side's maximum. The server may also +consider the `X-QWP-Client-Id` when selecting the version. + +### Connection-level contract + +All messages on a connection must carry the negotiated version in the version +byte (offset 4) of the message header. The server validates every incoming +message against the negotiated version and rejects mismatches with a parse +error. + +### Current version + +Ingress is pinned to version 1. No v2 ingest semantics exist. Ingress +clients advertise `X-QWP-Max-Version: 1`. + +## Authentication + +Authentication is handled at the HTTP level during the WebSocket upgrade +handshake, before any QWP binary frames are exchanged. + +Supported methods: + +- **HTTP basic auth** (OSS and Enterprise): see + [Authentication in QuestDB Open Source](/docs/connect/compatibility/rest-api/#authentication-in-questdb-open-source). +- **Token-based auth** (Enterprise only): see + [Authentication (RBAC)](/docs/connect/compatibility/rest-api/#authentication-rbac). +- **OIDC** (Enterprise only): see [OpenID Connect](/docs/security/oidc/). + +A failed authentication results in a `401` or `403` HTTP response before the +WebSocket connection is established. No QWP-level auth handshake exists. + +## Client lifecycle + +The end-to-end shape of a QWP client session, before the encoding details: + +1. **Open WebSocket.** Issue an HTTP `GET` to `/write/v4` (or `/api/v4/write`) + with the standard `Upgrade: websocket` headers, plus: + - `X-QWP-Max-Version: 1` — highest version supported. + - `X-QWP-Client-Id: /` — recommended, helps server-side + diagnostics and version negotiation. + - Authentication header (`Authorization: Basic …` or `Authorization: Bearer …`). + - `X-QWP-Request-Durable-Ack: true` — optional, opt-in for cluster-durable + acks (Enterprise). +2. **Verify the upgrade.** On `101 Switching Protocols`, read the response + headers: + - `X-QWP-Version` — the version the connection runs on. Use it for the + `version` byte in every outgoing message header. Reject the connection + if it's outside the range your client supports. + - `X-QWP-Durable-Ack: enabled` — confirms durable-ack frames will follow, + iff you opted in. If you opted in and this header is absent, fail the + connection (don't silently wait for acks the server will never send). + - `X-QWP-Max-Batch-Size` (optional, older servers omit it) — server's + effective per-message payload cap in bytes. Clients should clamp their + batch-size triggers to fit under this value (a safety margin of ~10% + absorbs encoding overhead such as schema and dict-delta bytes). When + absent, fall back to a locally configured budget or a conservative + default such as 1.9 MiB to stay under the typical 2 MiB recv buffer. +3. **Send binary frames.** Each frame is one QWP message: + `12-byte header` + payload (`Delta Symbol Dictionary` if any, then one or + more `Table Block`s). The first frame for a given table carries a full + schema; subsequent frames for the same column set reference it by + schema ID. +4. **Drain server responses.** The server sends an OK (or error) binary frame + per request, in send order. Match responses to requests by their position + in your in-flight queue — the server-assigned `sequence` field in each + response is the authoritative confirmation. If you opted in to durable + ack, you'll also receive periodic `STATUS_DURABLE_ACK` frames carrying + cumulative per-table watermarks. +5. **Close.** Send a WebSocket `Close` frame after the last expected OK has + been drained. + +Every reconnect resets connection-scoped state on both sides: schema IDs, +symbol dictionary, and sequence counter. Clients that want sender-restart +durability layer a store-and-forward buffer on top — see the +[connect string reference](/docs/connect/clients/connect-string#sf-keys). + +## Encoding primitives + +### Byte ordering + +All multi-byte numeric values are **little-endian**. Variable-length integers +use unsigned LEB128 (see below). + +### Variable-length integer encoding (varint) + +:::note LEB128 + +LEB128 (Little Endian Base 128) is a variable-length integer encoding from the +[DWARF debugging format](https://en.wikipedia.org/wiki/LEB128), also used by +Protocol Buffers and WebAssembly. It encodes small values in fewer bytes than +fixed-width integers. + +::: + +QWP uses **unsigned LEB128** for variable-length integers. Values are split into +7-bit groups, least significant first. The high bit of each byte is a +continuation flag: set (1) means more bytes follow, clear (0) means this is the +last byte. A 64-bit value requires at most 10 bytes. + +**Encoding:** + +```python +while (value & ~0x7F) != 0: + output_byte((value & 0x7F) | 0x80) + value >>= 7 +output_byte(value) +``` + +**Decoding:** + +```python +result = 0 +shift = 0 +while True: + b = read_byte() + result |= (b & 0x7F) << shift + shift += 7 + if (b & 0x80) == 0: + break +return result +``` + +**Examples:** + +| Value | Encoded bytes | +|-------|--------------------| +| 0 | `0x00` | +| 1 | `0x01` | +| 127 | `0x7F` | +| 128 | `0x80 0x01` | +| 255 | `0xFF 0x01` | +| 300 | `0xAC 0x02` | +| 16384 | `0x80 0x80 0x01` | + +### ZigZag encoding + +:::note ZigZag encoding + +ZigZag encoding maps signed integers to unsigned integers so that values with +small absolute values produce small varints. It was popularized by +[Protocol Buffers](https://protobuf.dev/programming-guides/encoding/#signed-ints). + +::: + +```python +def zigzag_encode(n): + return (n << 1) ^ (n >> 63) + +def zigzag_decode(n): + return (n >> 1) ^ -(n & 1) +``` + +| Signed | Unsigned | +|--------|----------| +| 0 | 0 | +| -1 | 1 | +| 1 | 2 | +| -2 | 3 | +| 2 | 4 | + +## Message structure + +### Message header (12 bytes, fixed) + + + +### Flags byte + +| Bit | Mask | Name | Description | +|-----|--------|----------------------------|-------------------------------------------------------| +| 0-1 | | Reserved | Must be 0 | +| 2 | `0x04` | `FLAG_GORILLA` | Gorilla delta-of-delta encoding for timestamp columns | +| 3 | `0x08` | `FLAG_DELTA_SYMBOL_DICT` | Delta symbol dictionary mode enabled | +| 4-7 | | Reserved | Must be 0 | + +### Complete message layout + +```text ++---------------------------------------------+ +| Message Header (12 bytes) | ++---------------------------------------------+ +| Payload (variable) | +| +- [Delta Symbol Dictionary] (if 0x08) | +| +- Table Block 0 | +| +- Table Block 1 | +| +- ... Table Block N-1 | ++---------------------------------------------+ +``` + +### Delta symbol dictionary + +Present only when `FLAG_DELTA_SYMBOL_DICT` (0x08) is set. Appears at the start +of the payload, before any table blocks. + +```text ++------------------------------------------------------------+ +| delta_start: varint Starting global ID for this delta | +| delta_count: varint Number of new entries | +| For each new entry: | +| name_length: varint UTF-8 byte length | +| name_bytes: bytes UTF-8 encoded symbol string | ++------------------------------------------------------------+ +``` + +The client maintains a global symbol dictionary mapping symbol strings to +sequential integer IDs starting from 0. On each batch, only newly added +symbols (the "delta") are transmitted. The server accumulates these entries +across batches for the lifetime of the connection. + +WebSocket clients set `FLAG_DELTA_SYMBOL_DICT` on every message and use global +delta dictionaries exclusively. Symbol columns then contain varint-encoded +global IDs instead of per-column dictionaries. + +On connection loss, both sides reset the dictionary. + +## Table blocks + +Each table block contains data for a single table. + +```text ++----------------------------------+ +| Table Header (variable) | ++----------------------------------+ +| Schema Section (variable) | ++----------------------------------+ +| Column Data (variable) | +| +- Column 0 data | +| +- Column 1 data | +| +- ... Column N-1 data | ++----------------------------------+ +``` + +### Table header + +| Field | Type | Description | +|--------------|--------|------------------------------------| +| name_length | varint | Table name length in bytes | +| name | UTF-8 | Table name (max 127 bytes) | +| row_count | varint | Number of rows in this block | +| column_count | varint | Number of columns | + +## Schema definition + +The schema section immediately follows the table header and defines the columns +in the block. + +### Schema mode byte + +| Value | Mode | Description | +|--------|-----------|------------------------------------------------| +| `0x00` | Full | Schema ID + complete column definitions inline | +| `0x01` | Reference | Schema ID only (lookup from registry) | + +### Full schema mode (0x00) + +Sent the first time a table's schema appears on a connection, or whenever the +column set changes. + +```text ++----------------------------------+ +| mode_byte: 0x00 | ++----------------------------------+ +| schema_id: varint | ++----------------------------------+ +| Column Definition 0 | +| +- name_length: varint | +| +- name: UTF-8 bytes | +| +- type_code: uint8 | ++----------------------------------+ +| Column Definition 1 ... | ++----------------------------------+ +``` + +Schema IDs are non-negative integers assigned by the client and scoped to the +lifetime of a single connection. They are global across all tables on the +connection (not per-table). Clients typically assign them sequentially starting +at 0, but the server does not require any particular ordering. + +A column with an **empty name** (length 0) and type TIMESTAMP denotes the +[designated timestamp](/docs/concepts/designated-timestamp/) column, the +per-table column that QuestDB uses for time-based partitioning and ordering. + +### Reference schema mode (0x01) + +Used for subsequent batches when the server has already registered the schema. + +```text ++-------------------------+ +| mode_byte: 0x01 | ++-------------------------+ +| schema_id: varint | ++-------------------------+ +``` + +The server looks up the schema by its ID in the per-connection schema registry. + +### Schema registry lifecycle + +1. First batch for a table: full schema mode with a new schema ID. +2. Subsequent batches with the same columns: reference mode with the same ID. +3. When a table gains a column, the client assigns a new schema ID and sends + it in full mode. +4. Full-mode schemas may re-register an existing ID; the server accepts any ID + within the per-connection schema-ID limit. +5. On reconnect, both sides reset: the client reassigns IDs from 0 and the + server clears its registry. + +## Column types + +| Code | Hex | Type | Size | Description | +|------|--------|-----------------|---------|------------------------------------| +| 1 | `0x01` | BOOLEAN | 1 bit | Bit-packed boolean | +| 2 | `0x02` | BYTE | 1 | Signed 8-bit integer | +| 3 | `0x03` | SHORT | 2 | Signed 16-bit integer | +| 4 | `0x04` | INT | 4 | Signed 32-bit integer | +| 5 | `0x05` | LONG | 8 | Signed 64-bit integer | +| 6 | `0x06` | FLOAT | 4 | IEEE 754 single precision | +| 7 | `0x07` | DOUBLE | 8 | IEEE 754 double precision | +| 9 | `0x09` | SYMBOL | var | Dictionary-encoded string | +| 10 | `0x0A` | TIMESTAMP | 8 | Microseconds since Unix epoch | +| 11 | `0x0B` | DATE | 8 | Milliseconds since Unix epoch | +| 12 | `0x0C` | UUID | 16 | RFC 4122 UUID | +| 13 | `0x0D` | LONG256 | 32 | 256-bit integer | +| 14 | `0x0E` | GEOHASH | var | Geospatial hash | +| 15 | `0x0F` | VARCHAR | var | Length-prefixed UTF-8 | +| 16 | `0x10` | TIMESTAMP_NANOS | 8 | Nanoseconds since Unix epoch | +| 17 | `0x11` | DOUBLE_ARRAY | var | N-dimensional double array | +| 18 | `0x12` | LONG_ARRAY | var | N-dimensional long array | +| 19 | `0x13` | DECIMAL64 | 8 | Decimal (18 digits precision) | +| 20 | `0x14` | DECIMAL128 | 16 | Decimal (38 digits precision) | +| 21 | `0x15` | DECIMAL256 | 32 | Decimal (77 digits precision) | +| 22 | `0x16` | CHAR | 2 | Single UTF-16 code unit | +| 23 | `0x17` | BINARY | var | Length-prefixed opaque bytes | +| 24 | `0x18` | IPv4 | 4 | 32-bit IPv4 address | + +Code `0x08` is unassigned. It was previously STRING, which has been removed. +Use VARCHAR (`0x0F`) for text columns. + +TIMESTAMP and TIMESTAMP_NANOS may use Gorilla encoding when `FLAG_GORILLA` is +set. See [Timestamp encoding](#timestamp-encoding) below. + +:::warning DATE is not timestamp-ish on ingress — and this is deliberately asymmetric with egress + +On the **ingress** wire, `DATE` (`0x0B`) is a plain fixed-width `int64` +column: written exactly like `LONG`, with **no** per-column encoding-flag +byte and **never** Gorilla-encoded, even when `FLAG_GORILLA` is set. Only +`TIMESTAMP` (`0x0A`) and `TIMESTAMP_NANOS` (`0x10`) carry the encoding flag +on ingress. + +On the **egress** wire it is the opposite: `DATE` *is* grouped with +`TIMESTAMP` / `TIMESTAMP_NANOS` and *does* carry the 1-byte encoding flag +(plus optional Gorilla). See the `FLAG_GORILLA` description in the +[QWP egress protocol](/docs/connect/wire-protocols/qwp-egress-websocket/). + +Reusing one direction's DATE rule for the other shifts every DATE value by +one byte (a clean ×256) and breaks Gorilla-encoded DATE entirely. Treat +`DATE` as a generic fixed-width `int64` column on ingress, the same as +`LONG`. + +::: + +## Null handling + +Each column's data section begins with a 1-byte **null flag**. The flag tells +the decoder how nulls are represented in the data that follows. + +### Sentinel mode (null flag = 0x00) + +No bitmap follows. The column data contains one value per row (`row_count` +values total). Null rows are represented by a reserved marker value (a +"sentinel") that falls outside the column's valid range. For example, `0x00` +for BYTE or `0x0000` for SHORT. The decoder recognizes these values as null +rather than as real data. + +Sentinel mode requires the type to have a dedicated null representation. Types +whose full value range is meaningful payload (e.g., VARCHAR, SYMBOL) cannot use +sentinel mode. + +### Bitmap mode (null flag != 0x00) + +A null bitmap follows immediately after the flag byte. The column data then +contains only non-null values, densely packed +(`value_count = row_count - null_count`). + +**Bitmap format:** + +- **Size**: `ceil(row_count / 8)` bytes +- **Bit order**: LSB first within each byte +- **Semantics**: bit = 1 means the row is NULL, bit = 0 means the row has a value + +```text +Byte 0: [row7][row6][row5][row4][row3][row2][row1][row0] +Byte 1: [row15][row14][row13][row12][row11][row10][row9][row8] +... +``` + +**Accessing null status:** + +```python +byte_index = row_index // 8 +bit_index = row_index % 8 +is_null = (bitmap[byte_index] & (1 << bit_index)) != 0 +``` + +**Example:** 10 rows where rows 0, 2, and 9 are null: + +```text +Byte 0: 0b00000101 = 0x05 (bits 0 and 2 set) +Byte 1: 0b00000010 = 0x02 (bit 1 set = row 9) +``` + +### Complete column data layout + +```text ++------------------------------------------------------------+ +| null_flag: uint8 0 = sentinel, nonzero = bitmap | +| [null bitmap: ceil(row_count/8) bytes if flag != 0] | +| Column values: | +| flag == 0 : row_count entries (null rows use sentinels) | +| flag != 0 : value_count non-null entries, densely packed | +| (value_count = row_count - null_count) | ++------------------------------------------------------------+ +``` + +The encoder chooses the strategy per column. The decoder must support both. + +### Sentinel values + +When the reference implementation emits sentinel mode (null flag = 0x00), null +rows are encoded as: + +| Type | Sentinel | +|---------|---------------------------------------------------------------------------------------------------------------------------------------| +| BOOLEAN | bit `0` (false) | +| BYTE | `0x00` | +| SHORT | `0x0000` | +| CHAR | `0x0000` | +| GEOHASH | All-ones (`0xFF...FF`), truncated to `ceil(precision_bits / 8)` bytes | +| IPv4 | `0x00 0x00 0x00 0x00` (the bit pattern 0, i.e. address `0.0.0.0`) | +| UUID | two little-endian int64 halves, each equal to `Long.MIN_VALUE` (every byte `0x00` except the MSB of each half, which is `0x80`); 16 B | +| LONG256 | four little-endian int64 words, each equal to `Long.MIN_VALUE`; 32 B (the UUID pattern repeated) | + +Alternative implementations may freely choose sentinel mode for GEOHASH, +IPv4, UUID, or LONG256: the server's decoder recognizes these byte +patterns as null in sentinel mode regardless of which client emitted them. +The reference Java client itself currently encodes these types in bitmap +mode whenever the column contains any null rows. + +### Reference implementation null strategy + +The reference Java client uses these strategies per type: + +| Strategy | Types | +|----------|----------------------------------------------------------------------------------------------------------------------------------------------------------| +| Sentinel | BOOLEAN, BYTE, SHORT, CHAR | +| Bitmap | INT, LONG, FLOAT, DOUBLE, VARCHAR, BINARY, SYMBOL, TIMESTAMP, TIMESTAMP_NANOS, DATE, UUID, LONG256, IPv4, GEOHASH, DECIMAL64, DECIMAL128, DECIMAL256, DOUBLE_ARRAY, LONG_ARRAY | + +Alternative implementations may make different per-column choices as long as +the null flag accurately describes the data that follows. A column with no null +rows produces identical output under either strategy (null flag = 0x00, +`row_count` values). + +## Column data encoding + +### Fixed-width types + +For BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, DATE, CHAR, and IPv4: values are +written as contiguous arrays of their respective sizes in little-endian byte +order. + +```text ++------------------------------------------------------+ +| [Null flag + bitmap (see Null handling)] | ++------------------------------------------------------+ +| Values: | +| value[0], value[1], ... value[N-1] | +| N = row_count if null_flag == 0 | +| N = row_count - null_count if null_flag != 0 | ++------------------------------------------------------+ +``` + +### Boolean + +Values are bit-packed, 8 per byte, LSB-first. `ceil(N/8)` bytes are written +where `N = row_count` in sentinel mode or `N = row_count - null_count` in +bitmap mode. The reference implementation uses sentinel mode for BOOLEAN: null +rows appear as bit `0` (false). + +```text +Values [true, false, true, true, false, false, false, true]: + 0b10001101 = 0x8D +``` + +### VARCHAR and BINARY + +VARCHAR, and BINARY share the same wire format: + +```text ++--------------------------------------------------+ +| [Null flag + bitmap (see Null handling)] | ++--------------------------------------------------+ +| Offset array: (value_count + 1) x uint32 LE | +| offset[0] = 0 | +| offset[i+1] = end of value[i] | ++--------------------------------------------------+ +| Data: concatenated bytes | ++--------------------------------------------------+ +``` + +- `value_count = row_count - null_count` +- Offsets are uint32, little-endian (all multi-byte numeric values in QWP are + little-endian — restated here because the diagram is often skimmed). +- Value `i` spans bytes `[offset[i], offset[i+1])` +- For VARCHAR, the bytes are valid UTF-8. For BINARY, the bytes are opaque. +- The uint32 offsets bound individual values to 2^31 - 1 bytes. + +### Symbol + +Dictionary-encoded strings for low-cardinality columns. + +:::info WebSocket uses global delta dictionaries only + +WebSocket clients set `FLAG_DELTA_SYMBOL_DICT` (`0x08`) on every message +and use the global delta dictionary mode **exclusively**. The per-table +dictionary mode used by UDP datagrams is not covered here. + +::: + +The dictionary entries themselves are sent in the message-level +[delta symbol dictionary](#delta-symbol-dictionary) section. Column data for a +SYMBOL column is then just a sequence of varint-encoded global IDs, one per +non-null row: + +```text ++--------------------------------------------+ +| [Null flag + bitmap (see Null handling)] | ++--------------------------------------------+ +| For each non-null row: | +| global_id: varint Global symbol ID | ++--------------------------------------------+ +``` + +The client owns the global ID assignment. Each new string gets the next +sequential integer, starting from `0` on a fresh connection. Only the new +entries since the previous message are transmitted; the server accumulates the +dictionary for the lifetime of the connection. + +### Timestamp encoding + +:::warning Applies to TIMESTAMP (`0x0A`) and TIMESTAMP_NANOS (`0x10`) only — DATE is excluded + +Everything in this section applies **only** to `TIMESTAMP` (`0x0A`) and +`TIMESTAMP_NANOS` (`0x10`). Despite "milliseconds since epoch" looking +timestamp-like, `DATE` (`0x0B`) is a plain `int64` column on the ingress +wire (written like `LONG`: no encoding flag, never Gorilla), regardless of +`FLAG_GORILLA`. Do **not** apply the rules below to `DATE`. This is the +opposite of the egress wire, where `DATE` *is* timestamp-ish — see the DATE +asymmetry warning in [Column types](#column-types) above. + +::: + +:::note Gorilla compression + +Gorilla is a time-series compression scheme from the +[Facebook/Meta Gorilla paper](https://www.vldb.org/pvldb/vol8/p1816-teller.pdf) +(Pelkonen et al., VLDB 2015). It exploits the regularity of timestamps in +time-series data by encoding the delta-of-deltas between consecutive values, +which are often zero or very small. + +::: + +When `FLAG_GORILLA` (0x04) is **not** set, `TIMESTAMP` and `TIMESTAMP_NANOS` +columns are written as plain int64 arrays with no encoding flag (`DATE` is +always written this way regardless of `FLAG_GORILLA`): + +```text ++----------------------------------------------+ +| [Null flag + bitmap (see Null handling)] | ++----------------------------------------------+ +| Timestamp values (non-null only): | +| value_count x int64 | ++----------------------------------------------+ +``` + +When `FLAG_GORILLA` (0x04) **is** set, a 1-byte encoding flag follows the null +handling section: + +| Flag | Mode | Description | +|--------|--------------|------------------------------------------------| +| `0x00` | Uncompressed | Array of int64 values (non-null only) | +| `0x01` | Gorilla | Delta-of-delta compressed | + +**Uncompressed mode (0x00):** + +```text ++----------------------------------------------+ +| [Null flag + bitmap (see Null handling)] | ++----------------------------------------------+ +| encoding_flag: uint8 (0x00) | ++----------------------------------------------+ +| Timestamp values (non-null only): | +| value_count x int64 | ++----------------------------------------------+ +``` + +**Gorilla mode (0x01):** + +```text ++----------------------------------------------+ +| [Null flag + bitmap (see Null handling)] | ++----------------------------------------------+ +| encoding_flag: uint8 (0x01) | ++----------------------------------------------+ +| first_timestamp: int64 | ++----------------------------------------------+ +| second_timestamp: int64 | ++----------------------------------------------+ +| Bit-packed delta-of-deltas: | +| For timestamps 3..N | ++----------------------------------------------+ +``` + +#### Gorilla delta-of-delta algorithm + +The first two timestamps are written in full as int64 values. Starting from +the third timestamp (index `i = 2`), each subsequent value is encoded as a +delta-of-deltas: + +```python +delta_i = t[i] - t[i - 1] +dod_i = delta_i - delta_{i-1} # delta_{i-1} = t[i-1] - t[i-2] +``` + +The very first encoded DoD applies at `i = 2`, where `delta_{i-1} = t[1] - t[0]`. +There is no implicit zero-delta anchor before that. + +Encoding buckets (bits are written LSB-first): + +| Condition | Prefix | Value bits | Total bits | +|----------------------|--------|-------------|------------| +| DoD == 0 | `0` | 0 | 1 | +| DoD in [-64, 63] | `10` | 7 (signed) | 9 | +| DoD in [-256, 255] | `110` | 9 (signed) | 12 | +| DoD in [-2048, 2047] | `1110` | 12 (signed) | 16 | +| Otherwise | `1111` | 32 (signed) | 36 | + +The bit stream is padded to a byte boundary at the end. If any DoD value +exceeds the 32-bit signed integer range, the encoder falls back to +uncompressed mode. + +### UUID + +16 bytes per value: 8 bytes for the low 64 bits, then 8 bytes for the high +64 bits, both little-endian. + +### LONG256 + +32 bytes per value: four int64 values, least significant first, all +little-endian. + +### GeoHash + +```text ++------------------------------------------------------+ +| [Null flag + bitmap (see Null handling)] | ++------------------------------------------------------+ +| precision_bits: varint (1-60) | ++------------------------------------------------------+ +| Packed geohash values: | +| bytes_per_value = ceil(precision_bits / 8) | +| total = bytes_per_value x N | +| N = row_count if null_flag == 0 | +| N = row_count - null_count if null_flag != 0 | ++------------------------------------------------------+ +``` + +The reference implementation uses sentinel mode for GEOHASH: null rows are +encoded as all-ones truncated to `bytes_per_value`. + +### Array types (DOUBLE_ARRAY, LONG_ARRAY) + +N-dimensional arrays, row-major order: + +```text ++------------------------------------------------------+ +| For each non-null row: | +| n_dims: uint8 Number of dimensions | +| dim_lengths: n_dims x int32 Length per dimension | +| values: product(dims) x element | +| (float64 for DOUBLE_ARRAY, | +| int64 for LONG_ARRAY) | ++------------------------------------------------------+ +``` + +### Decimal types (DECIMAL64, DECIMAL128, DECIMAL256) + +Decimal values are stored as two's complement integers. A 1-byte scale prefix +is shared by all values in the column. The scale is the number of decimal +digits to the right of the decimal point — i.e., the real value is reconstructed +as: + +```text +value = unscaled_int / 10^scale +``` + +For example, with `scale = 3` an unscaled int64 of `12345` decodes to `12.345`. +The scale is base-10, not base-2. + +```text ++----------------------------------------------+ +| [Null flag + bitmap (see Null handling)] | ++----------------------------------------------+ +| scale: uint8 | ++----------------------------------------------+ +| Unscaled values: | +| DECIMAL64: 8 bytes x value_count | +| DECIMAL128: 16 bytes x value_count | +| DECIMAL256: 32 bytes x value_count | ++----------------------------------------------+ +``` + +| Type | Value size | Precision | +|-------------|------------|------------| +| DECIMAL64 | 8 bytes | 18 digits | +| DECIMAL128 | 16 bytes | 38 digits | +| DECIMAL256 | 32 bytes | 77 digits | + +## Server responses + +Every response starts with a 1-byte status code. OK and error responses include +an 8-byte sequence number that correlates the response with the original +request. + +### Sequence numbering + +The QWP wire encoder does **not** put a sequence number into the request +header — the message header at offset 0 ends at offset 12 with `payload_length`, +and that is the entire client-side framing. The server assigns the sequence +number itself: it counts inbound binary frames on the connection (starting at +`0`) and echoes the assigned `wireSeq` in the `sequence` field of every OK and +error frame. + +Two consequences for client implementers: + +- **Frames must be sent in strict order.** The server assumes "the Nth frame + received is wireSeq = N", so any reordering by the client breaks the mapping + between requests and responses. +- **Match responses by send order.** The client tracks an ordered list of + outstanding messages; the next OK/error response always corresponds to the + oldest unacknowledged message, and the `sequence` field is the server's + authoritative confirmation of which one. + +On a fresh connection both sides start at `0`. On reconnect both sides reset. + +### OK response + +```text ++------------------------------------------------------+ +| status: uint8 (0x00) | +| sequence: int64 Request sequence number | +| tableCount: uint16 Number of table entries | +| Repeated tableCount times: | +| nameLen: uint16 Table name length | +| name: bytes UTF-8 table name | +| seqTxn: int64 Sequencer txn for table | ++------------------------------------------------------+ +``` + +The per-table entries report the +[sequencer transaction](/docs/query/functions/meta/#wal_tables) assigned to each +table that committed data in the acknowledged batch. `tableCount` is 0 when no +[WAL](/docs/concepts/write-ahead-log/) (Write-Ahead Log) tables committed +(e.g., non-WAL tables or empty batches). + +### Error response + +```text ++-----------------------------------------------------+ +| status: uint8 Status code | +| sequence: int64 Request sequence number | +| msg_len: uint16 Error message length | +| msg_bytes: bytes UTF-8 error message | ++-----------------------------------------------------+ +``` + +### Status codes + +| Code | Hex | Name | Description | +|------|--------|-----------------|--------------------------------------------------| +| 0 | `0x00` | OK | Batch accepted (written to WAL) | +| 2 | `0x02` | DURABLE_ACK | Batch WAL uploaded to object store (Enterprise) | +| 3 | `0x03` | SCHEMA_MISMATCH | Column type incompatible with existing table | +| 5 | `0x05` | PARSE_ERROR | Malformed message | +| 6 | `0x06` | INTERNAL_ERROR | Server-side error | +| 8 | `0x08` | SECURITY_ERROR | Authorization failure | +| 9 | `0x09` | WRITE_ERROR | Write failure (e.g., table not accepting writes) | + +### Durable acknowledgement + +:::note Enterprise + +Durable acknowledgement (status code 0x02) is available in QuestDB Enterprise +with primary replication configured. Open source QuestDB returns OK (0x00) or +error responses only. + +::: + +A standard OK confirms the batch was committed to the server's local WAL. To +receive a second acknowledgement after the WAL has been durably uploaded to the +configured object store, include `X-QWP-Request-Durable-Ack: true` +(case-insensitive) in the WebSocket upgrade request. + +If the server accepts the opt-in, it echoes `X-QWP-Durable-Ack: enabled` in +the 101 response. Clients that opt in **must** verify this header is present +and fail the connect attempt if it is absent. + +**Durable-ack response format:** + +```text ++------------------------------------------------------+ +| status: uint8 (0x02) | +| tableCount: uint16 Number of table entries | +| Repeated tableCount times: | +| nameLen: uint16 Table name length | +| name: bytes UTF-8 table name | +| seqTxn: int64 Durably-uploaded seqTxn | ++------------------------------------------------------+ +``` + +The durable-ack has no sequence field. It carries cumulative per-table +watermarks that advance as uploads complete. Only tables whose durable +watermark advanced since the last durable-ack are included. + +The durable-ack watermark always trails the regular OK watermark. Empty +messages (those that produced no WAL commit, for example messages that only +reference materialized views) are trivially durable; their sequence advances +the durable watermark as soon as all preceding messages are durable. + +Reconnects discard any in-flight durable-ack tracking. The new connection +re-OKs replayed batches and the server re-emits cumulative durable-ack +watermarks from scratch, so the client's trim watermark must restart against +the new connection's wire sequencing. + +Servers without replication silently ignore the request header and never emit +durable-ack frames. There is no durable-failure status; persistent upload +failures surface only as absence of a durable-ack frame. + +## Protocol limits + +| Limit | Default value | +|-------------------------------|---------------| +| Max batch size | 16 MB | +| Max tables per connection | 10,000 | +| Max rows per table block | 1,000,000 | +| Max columns per table | 2,048 | +| Max table name length | 127 bytes | +| Max column name length | 127 bytes | +| Max in-flight batches | 128 | +| Max symbol dictionary entries | 1,000,000 | + +The header's `table_count` field is a uint16, so the protocol ceiling for +tables per message is 65,535 regardless of the configured limit. Individual +string values have no dedicated length limit; they are bounded only by the max +batch size. + +The symbol dictionary limit applies per column in per-table dictionary mode and +per connection in global delta dictionary mode. Exceeding it causes the server +to reject the message with `PARSE_ERROR`. + +### Practical WebSocket frame cap + +The 16 MB max batch is a **QWP protocol ceiling**, not an effective server-side +cap. The HTTP receive buffer used by the WebSocket plumbing is typically +smaller, and it is checked **before** the QWP parser ever sees the payload: + +| Server config key | Default | Effect | +|-------------------------|---------|---------------------------------------------------------------------| +| `http.recv.buffer.size` | 2 MiB | Maximum WebSocket frame the server will accept on `/write/v4`. | + +A WebSocket binary frame larger than this is rejected immediately with close +code `1009 MESSAGE_TOO_BIG` and the connection is dropped — the client will +observe an abrupt disconnect (`ECANCELED`, EPIPE, or similar depending on the +WebSocket library) partway through the send. + +The effective per-message size limit is therefore +`min(http.recv.buffer.size − 14, 16 MiB)` — the server folds the 14-byte +worst-case WebSocket frame header into the value it advertises in the +[`X-QWP-Max-Batch-Size`](#version-negotiation) response header on the 101 +upgrade. Clients that parse the header can size batches against that exact +value rather than guessing the operator's `http.recv.buffer.size`. + +**Recommendation for client implementers:** read `X-QWP-Max-Batch-Size` from +the 101 response and clamp the batch-size trigger to ~90% of it. The 10% +margin absorbs schema / dict-delta / framing overhead that wire bytes can +carry beyond raw column-buffer bytes. Older servers omit the header — fall +back to ~1.9 MiB (safe against the default 2 MiB recv buffer). Operators who +want larger batches must raise `http.recv.buffer.size` on the server (e.g., +`http.recv.buffer.size=17m` to use the full QWP 16 MB headroom); the next +handshake will advertise the new ceiling automatically. + +## Client operation + +This section describes the high-level batching and I/O behavior a client +implements. The full client-side substrate (on-disk store-and-forward, frame +sequence numbers, ACK-driven trim, reconnect/replay semantics) is specified in +the [connect string reference](/docs/connect/clients/connect-string). + +### Double-buffered async I/O + +The client uses double-buffered microbatches: + +1. The user thread writes rows to the **active** buffer. +2. When a buffer reaches its threshold (row count, byte size, or age), the + client seals it and enqueues it for sending. +3. A dedicated I/O thread sends batches over the WebSocket. +4. The client swaps to the other buffer so writing can continue without + blocking. + +### Auto-flush triggers + +| Trigger | Default | +|----------------------|------------| +| Row count | 1,000 rows | +| Byte size | disabled | +| Time since first row | 100 ms | + +### Failover and high availability + +Ingress senders use a reconnect loop regardless of whether store-and-forward +is configured. The two storage modes share identical failover semantics; they +differ only in where unacknowledged data lives: + +- **`sf_dir` set** (store-and-forward): segments are memory-mapped files under + `sf_dir`. Unacknowledged data survives sender restarts and is replayed by + the next sender bound to the same slot. +- **`sf_dir` unset** (memory mode): segments are allocated in process memory. + Unacknowledged data is lost if the sender process dies. The reconnect loop + still spans transient server outages such as rolling upgrades, but the RAM + buffer caps how much data can accumulate during the outage. + +Connect-string keys that control ingress failover are documented in the +[reconnect and failover](/docs/connect/clients/connect-string#reconnect-keys) +section of the connect string reference: + +| Key | Default | Description | +|----------------------------------|-----------|-------------------------------------------| +| `reconnect_max_duration_millis` | `300000` | Total outage budget before giving up. | +| `reconnect_initial_backoff_millis` | `100` | First post-failure sleep. | +| `reconnect_max_backoff_millis` | `5000` | Cap on per-attempt sleep. | +| `initial_connect_retry` | `off` | Retry on first connect (`on`, `sync`, `async`). | + +Key behaviors: + +- **Ingress is zone-blind.** It pins QWP v1 and never reads `SERVER_INFO`, so + every host's zone tier is equivalent and selection is based on health state + only. The `zone=` connect-string key is accepted but silently ignored, so a + connect string shared with egress clients works unchanged on ingress. +- **Authentication errors are terminal** at any host (`401`/`403`). The + reconnect loop does not continue past them. +- **`421 + X-QuestDB-Role`** is a role reject: transient if the role is + `PRIMARY_CATCHUP`, topology-level otherwise. +- **All other upgrade errors are transient** and feed into the reconnect loop, + including `404`, `426`, `503`, generic 4xx/5xx, TCP/TLS failures, + mid-stream send/recv errors, and an upgrade response that advertises a QWP + version outside the client's supported range (per-endpoint, so a host on a + rolling upgrade does not lock the client out of compatible peers). + +:::note Enterprise + +Multi-host failover with automatic reconnect requires QuestDB Enterprise. + +::: + +## Examples + +### Single table with three columns + +Table `sensors`, 2 rows, 3 columns: `id` (LONG), `value` (DOUBLE), `ts` +(TIMESTAMP). No nulls, no Gorilla compression, no delta symbol dictionary. + +```text +# Header (12 bytes) +51 57 50 31 # Magic: "QWP1" +01 # Version: 1 +00 # Flags: none +01 00 # Table count: 1 +XX XX XX XX # Payload length + +# Table Block +07 # Table name length: 7 +73 65 6E 73 6F 72 73 # "sensors" UTF-8 +02 # Row count: 2 +03 # Column count: 3 + +# Schema (full mode) +00 # Schema mode: full +00 # Schema ID: 0 + +# Column 0: id (LONG) +02 # Name length: 2 +69 64 # "id" UTF-8 +05 # Type: LONG + +# Column 1: value (DOUBLE) +05 # Name length: 5 +76 61 6C 75 65 # "value" UTF-8 +07 # Type: DOUBLE + +# Column 2: ts (TIMESTAMP, designated) +00 # Name length: 0 (designated timestamp) +0A # Type: TIMESTAMP + +# Column 0 data (LONG, 2 values) +00 # null_flag: 0x00 (no bitmap) +01 00 00 00 00 00 00 00 # id = 1 +02 00 00 00 00 00 00 00 # id = 2 + +# Column 1 data (DOUBLE, 2 values) +00 # null_flag: 0x00 (no bitmap) +CD CC CC CC CC CC F4 3F # value = 1.3 +9A 99 99 99 99 99 01 40 # value = 2.2 + +# Column 2 data (TIMESTAMP, uncompressed, 2 values) +00 # null_flag: 0x00 (no bitmap) +00 E4 0B 54 02 00 00 00 # ts = 10000000000 microseconds +80 1A 06 00 00 00 00 00 # ts = 400000 microseconds +``` + +### Nullable VARCHAR column + +4 rows where row 1 is null: + +```text +# Null flag + bitmap +01 # null_flag: nonzero = bitmap follows +02 # 0b00000010 (bit 1 set = row 1 is null) + +# Offset array (3 non-null values = 4 offsets) +00 00 00 00 # offset[0] = 0 (start of "foo") +03 00 00 00 # offset[1] = 3 (end of "foo") +06 00 00 00 # offset[2] = 6 (end of "bar") +09 00 00 00 # offset[3] = 9 (end of "baz") + +# String data (concatenated UTF-8) +66 6F 6F # "foo" (row 0) +62 61 72 # "bar" (row 2) +62 61 7A # "baz" (row 3) +``` + +### Gorilla timestamps with delta symbol dictionary + +Table `sensors`, 2 rows, 3 columns: `host` (SYMBOL), `temp` (DOUBLE), +designated TIMESTAMP. Both `FLAG_GORILLA` and `FLAG_DELTA_SYMBOL_DICT` are set. + +```text +# Header (12 bytes) +51 57 50 31 # Magic: "QWP1" +01 # Version: 1 +0C # Flags: 0x04 (Gorilla) | 0x08 (Delta Symbol Dict) +01 00 # Table count: 1 +XX XX XX XX # Payload length + +# Delta Symbol Dictionary +00 # delta_start = 0 +02 # delta_count = 2 +07 73 65 72 76 65 72 31 # "server1" (length = 7) +07 73 65 72 76 65 72 32 # "server2" (length = 7) + +# Table Block +07 73 65 6E 73 6F 72 73 # Table name "sensors" (length = 7) +02 # row_count = 2 +03 # column_count = 3 + +# Schema (full mode) +00 # schema_mode = FULL +00 # schema_id = 0 +04 68 6F 73 74 09 # "host" : SYMBOL +04 74 65 6D 70 07 # "temp" : DOUBLE +00 0A # "" : TIMESTAMP (designated) + +# Column 0 (SYMBOL, global delta IDs) +00 # null_flag: no nulls +00 # Row 0: global ID 0 +01 # Row 1: global ID 1 + +# Column 1 (DOUBLE, 2 values) +00 # null_flag: no nulls +66 66 66 66 66 E6 56 40 # 91.6 +9A 99 99 99 99 19 57 40 # 92.4 + +# Column 2 (TIMESTAMP, Gorilla) +00 # null_flag: no nulls +01 # encoding = Gorilla +[8 bytes: first timestamp] +[8 bytes: second timestamp] +# (only 2 values, so no delta-of-delta bit stream follows) +``` + +## Reference implementation + +The reference client implementation is +[`java-questdb-client`](https://github.com/questdb/java-questdb-client) +at commit +[`67bb5e4`](https://github.com/questdb/java-questdb-client/commit/67bb5e49feea7e63b813ea08189c23ea11486131). + +The server-side protocol parser lives in the QuestDB server repository under +`core/src/main/java/io/questdb/cutlass/qwp/protocol/`. + +## Version history + +| Version | Description | +|------------|---------------------------------| +| 1 (`0x01`) | Initial binary protocol release | diff --git a/documentation/query/datatypes/geohashes.md b/documentation/query/datatypes/geohashes.md index c5ea9698c..2f8cfc6b9 100644 --- a/documentation/query/datatypes/geohashes.md +++ b/documentation/query/datatypes/geohashes.md @@ -418,7 +418,7 @@ Invoking the method above will return one of the following: - `ColumnType.GEOLONG` For more information and detailed examples of using table readers and writers, -see the [Java API documentation](/docs/ingestion/java-embedded/). +see the [Java API documentation](/docs/connect/java-embedded/). ## InfluxDB Line Protocol @@ -461,7 +461,7 @@ geo_data geohash="46swgj10" ## CSV import Geohashes may also be inserted via -[REST API](/docs/ingestion/import-csv/#import-csv-via-rest). In order to perform +[REST API](/docs/connect/compatibility/import-csv/#import-csv-via-rest). In order to perform inserts in this way; 1. Create a table with columns of geohash type beforehand: diff --git a/documentation/query/datatypes/overview.md b/documentation/query/datatypes/overview.md index a645b2598..506509aa4 100644 --- a/documentation/query/datatypes/overview.md +++ b/documentation/query/datatypes/overview.md @@ -160,7 +160,7 @@ INSERT INTO my_table VALUES ('a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11'); SELECT * FROM my_table WHERE id = 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11'; ``` -If you use the [PostgreSQL Wire Protocol](/docs/query/pgwire/overview/) then +If you use the [PostgreSQL Wire Protocol](/docs/connect/compatibility/pgwire/overview/) then you can use the `uuid` type in your queries. The JDBC API does not distinguish the UUID type, but the Postgres JDBC driver supports it in prepared statements: @@ -170,7 +170,7 @@ PreparedStatement ps = connection.prepareStatement("INSERT INTO my_table VALUES ps.setObject(1, uuid); ``` -[QuestDB Client Libraries](/docs/ingestion/overview/#first-party-clients) can +[QuestDB Client Libraries](/docs/connect/overview/#client-libraries) can send `UUIDs` as `strings` to be converted to UUIDs by the server. ## IPv4 diff --git a/documentation/query/export-parquet.md b/documentation/query/export-parquet.md index fc5c7c3d2..ca8e94b2d 100644 --- a/documentation/query/export-parquet.md +++ b/documentation/query/export-parquet.md @@ -1,4 +1,5 @@ --- +slug: /connect/compatibility/export-parquet title: Parquet Export sidebar_label: Parquet Export description: @@ -35,11 +36,11 @@ The `/exp` REST API endpoint executes a query and streams the result as a Parque :::tip -See also the [/exp documentation](/docs/query/rest-api/#exp---export-data). +See also the [/exp documentation](/docs/connect/compatibility/rest-api/#exp---export-data). ::: -You can use the same parameters as when doing a [CSV export](/docs/query/rest-api/#exp---export-data), but passing `parquet` as the `fmt` parameter value. +You can use the same parameters as when doing a [CSV export](/docs/connect/compatibility/rest-api/#exp---export-data), but passing `parquet` as the `fmt` parameter value. ```bash curl -G \ @@ -71,7 +72,7 @@ See also the [COPY-TO documentation](/docs/query/sql/copy). ::: You can use the `COPY` command from the web console, from any pgwire-compliant client, -or using the [`exec` endpoint](/docs/query/rest-api/#exec---execute-queries) of the REST API. +or using the [`exec` endpoint](/docs/connect/compatibility/rest-api/#exec---execute-queries) of the REST API. You can export a query: @@ -301,7 +302,7 @@ Bloom filters can be enabled per-column via the `BLOOM_FILTER` keyword in or per-export via `bloom_filter_columns` in [`CONVERT PARTITION`](#bloom-filters-for-in-place-conversion), [`COPY TO`](/docs/query/sql/copy/), and the -[REST `/exp` endpoint](/docs/query/rest-api/#parquet-export-parameters). +[REST `/exp` endpoint](/docs/connect/compatibility/rest-api/#parquet-export-parameters). The false positive probability (FPP) determines the trade-off between filter size and accuracy. It is configured globally: diff --git a/documentation/query/functions/date-time.md b/documentation/query/functions/date-time.md index 54c3d98c9..587bad735 100644 --- a/documentation/query/functions/date-time.md +++ b/documentation/query/functions/date-time.md @@ -46,7 +46,7 @@ SELECT CAST(ts_column AS TIMESTAMP_NS) FROM my_table; To convert language-specific datetime objects (Python `datetime`, Java `Instant`, etc.) into QuestDB timestamps, see the -[Date to Timestamp conversion](/docs/ingestion/clients/date-to-timestamp-conversion) +[Date to Timestamp conversion](/docs/connect/clients/date-to-timestamp-conversion) reference for Python, Go, Java, JavaScript, C/C++, Rust, and C#/.NET. --- diff --git a/documentation/query/functions/meta.md b/documentation/query/functions/meta.md index e02f30f6b..704a7d3e0 100644 --- a/documentation/query/functions/meta.md +++ b/documentation/query/functions/meta.md @@ -446,7 +446,7 @@ Returns a table with the following columns: attached (`name` of the partition will contain the `.attachable` extension) - `hasParquetGenerated` - _BOOLEAN_, true if a Parquet copy of the partition has been produced alongside the native files. Set by either - [manual Parquet conversion](/docs/query/export-parquet/#in-place-conversion) + [manual Parquet conversion](/docs/connect/compatibility/export-parquet/#in-place-conversion) (`ALTER TABLE ... CONVERT PARTITION TO PARQUET`) or by a [storage policy](/docs/concepts/storage-policy/)'s `TO PARQUET` stage (Enterprise). The partition is still served from native storage until it is diff --git a/documentation/query/functions/parquet.md b/documentation/query/functions/parquet.md index 34894534c..0fbea7a85 100644 --- a/documentation/query/functions/parquet.md +++ b/documentation/query/functions/parquet.md @@ -6,7 +6,7 @@ description: QuestDB Apache Parquet functions reference documentation. QuestDB can read and query external [Apache Parquet](/glossary/apache-parquet/) files using SQL. -To export data as Parquet, see [Parquet Export](/docs/query/export-parquet/). +To export data as Parquet, see [Parquet Export](/docs/connect/compatibility/export-parquet/). :::info Apache Parquet support is in **beta**. Please report issues via [email](mailto:support@questdb.io), [Slack](https://slack.questdb.com/), or [Discourse](https://community.questdb.com/). diff --git a/documentation/query/overview.md b/documentation/query/overview.md index 3669722c1..a4c684bb7 100644 --- a/documentation/query/overview.md +++ b/documentation/query/overview.md @@ -58,7 +58,7 @@ heavily by third-party tools to provide visualizations, such as within libraries like [Polars](/docs/integrations/data-processing/polars/). > Need to ingest data first? Checkout our -> [Ingestion overview](/docs/ingestion/overview/). +> [Ingestion overview](/docs/connect/overview/). ## QuestDB Web Console @@ -97,7 +97,7 @@ against the demo instance. Query QuestDB using the PostgreSQL endpoint via the default port `8812`. -See [PGWire Client overview](/docs/query/pgwire/overview/) for details on how to +See [PGWire Client overview](/docs/connect/compatibility/pgwire/overview/) for details on how to connect to QuestDB using PostgreSQL clients. Brief examples in multiple languages are shown below. @@ -144,7 +144,7 @@ Brief examples in multiple languages are shown below. #### Further Reading -See the [PGWire Client overview](/docs/query/pgwire/overview/) for more details on how to use PostgreSQL +See the [PGWire Client overview](/docs/connect/compatibility/pgwire/overview/) for more details on how to use PostgreSQL clients to connect to QuestDB. ## REST HTTP API @@ -156,12 +156,12 @@ The REST API is accessible on port `9000` and has the following query-capable entrypoints: For details such as content type, query parameters and more, refer to the -[REST HTTP API](/docs/query/rest-api/) reference. +[REST HTTP API](/docs/connect/compatibility/rest-api/) reference. | Entrypoint | HTTP Method | Description | REST HTTP API Reference | | :------------------------------------------ | :---------- | :-------------------------------------- | :------------------------------------------------------------ | -| [`/exp?query=..`](#exp-sql-query-to-csv) | GET | Export SQL Query as CSV | [Reference](/docs/query/rest-api/#exp---export-data) | -| [`/exec?query=..`](#exec-sql-query-to-json) | GET | Run SQL Query returning JSON result set | [Reference](/docs/query/rest-api/#exec---execute-queries) | +| [`/exp?query=..`](#exp-sql-query-to-csv) | GET | Export SQL Query as CSV | [Reference](/docs/connect/compatibility/rest-api/#exp---export-data) | +| [`/exec?query=..`](#exec-sql-query-to-json) | GET | Run SQL Query returning JSON result set | [Reference](/docs/connect/compatibility/rest-api/#exec---execute-queries) | #### `/exp`: SQL Query to CSV diff --git a/documentation/query/pgwire/c-and-cpp.md b/documentation/query/pgwire/c-and-cpp.md index f5b10f45e..de6e5160b 100644 --- a/documentation/query/pgwire/c-and-cpp.md +++ b/documentation/query/pgwire/c-and-cpp.md @@ -1,4 +1,5 @@ --- +slug: /connect/compatibility/pgwire/c-and-cpp title: C/C++ PGWire Guide description: C and C++ clients for QuestDB over the PostgreSQL wire protocol (PGWire). @@ -28,7 +29,7 @@ a client that doesn’t work, please :::tip For **data ingestion**, use QuestDB’s high-throughput -[InfluxDB Line Protocol (ILP)](/docs/ingestion/overview/) instead of PGWire. +[InfluxDB Line Protocol (ILP)](/docs/connect/overview/) instead of PGWire. PGWire is best for **querying**. ::: diff --git a/documentation/query/pgwire/dotnet.md b/documentation/query/pgwire/dotnet.md index f3cd93117..e744fec6d 100644 --- a/documentation/query/pgwire/dotnet.md +++ b/documentation/query/pgwire/dotnet.md @@ -1,4 +1,5 @@ --- +slug: /connect/compatibility/pgwire/dotnet title: .NET PGWire Guide description: .NET/C# clients for QuestDB PGWire protocol. Learn how to use the PGWire @@ -28,9 +29,9 @@ C#, we recommend using Npgsql with connection pooling. :::tip -For data ingestion, we recommend using QuestDB's first-party clients with the [InfluxDB Line Protocol (ILP)](/docs/ingestion/overview/) +For data ingestion, we recommend using QuestDB's first-party clients with the [InfluxDB Line Protocol (ILP)](/docs/connect/overview/) instead of PGWire. PGWire should primarily be used for querying data in QuestDB. QuestDB provides an -official [.NET client](https://questdb.com/docs/ingestion/clients/dotnet/) for data ingestion using ILP. +official [.NET client](https://questdb.com/docs/connect/clients/dotnet/) for data ingestion using ILP. ::: @@ -812,7 +813,7 @@ the guidelines in this documentation, you can effectively query time-series data various .NET applications. For data ingestion, remember that QuestDB provides an -official [.NET client](https://questdb.com/docs/ingestion/clients/dotnet/) that uses the InfluxDB Line Protocol (ILP) for +official [.NET client](https://questdb.com/docs/connect/clients/dotnet/) that uses the InfluxDB Line Protocol (ILP) for high-throughput data insertion. For optimal performance, use this client for data ingestion and Npgsql for querying. QuestDB's SQL extensions for time-series data, such as `SAMPLE BY` and `LATEST ON`, provide powerful tools for analyzing diff --git a/documentation/query/pgwire/go.md b/documentation/query/pgwire/go.md index eff1bd263..d92dfc11b 100644 --- a/documentation/query/pgwire/go.md +++ b/documentation/query/pgwire/go.md @@ -1,4 +1,5 @@ --- +slug: /connect/compatibility/pgwire/go title: Go PGWire Guide description: Go clients for QuestDB PGWire protocol. Learn how to use the PGWire @@ -27,9 +28,9 @@ for performance. For best performance when querying data from QuestDB with Go, w :::tip -For data ingestion, we recommend using QuestDB's first-party clients with the [InfluxDB Line Protocol (ILP)](/docs/ingestion/overview/) +For data ingestion, we recommend using QuestDB's first-party clients with the [InfluxDB Line Protocol (ILP)](/docs/connect/overview/) instead of PGWire. PGWire should primarily be used for querying data in QuestDB. QuestDB provides an -official [Go client](/docs/ingestion/clients/go/) for data ingestion using ILP. +official [Go client](/docs/connect/clients/go/) for data ingestion using ILP. ::: diff --git a/documentation/query/pgwire/java.md b/documentation/query/pgwire/java.md index 7c66a3cba..8a63efc8e 100644 --- a/documentation/query/pgwire/java.md +++ b/documentation/query/pgwire/java.md @@ -1,4 +1,5 @@ --- +slug: /connect/compatibility/pgwire/java title: Java PGWire Guide description: Java clients for QuestDB PGWire protocol. Learn how to use the PGWire @@ -29,9 +30,9 @@ driver with connection pooling. :::tip -For data ingestion, we recommend using QuestDB's first-party clients with the [InfluxDB Line Protocol (ILP)](/docs/ingestion/overview/) +For data ingestion, we recommend using QuestDB's first-party clients with the [InfluxDB Line Protocol (ILP)](/docs/connect/overview/) instead of PGWire. PGWire should primarily be used for querying data in QuestDB. QuestDB provides an -official [Java client](/docs/ingestion/clients/java/) for data ingestion using ILP. +official [Java client](/docs/connect/clients/java/) for data ingestion using ILP. ::: @@ -216,7 +217,7 @@ QuestDB, via the PostgreSQL wire protocol, supports array data types, including :::tip Inserting large amounts of data using the JDBC driver can be inefficient. For high-throughput ingestion, consider using -QuestDB's [Java ILP client](/docs/ingestion/clients/java/) or the [InfluxDB Line Protocol (ILP)](/docs/ingestion/overview/). +QuestDB's [Java ILP client](/docs/connect/clients/java/) or the [InfluxDB Line Protocol (ILP)](/docs/connect/overview/). ::: When you need to insert multiple rows containing array data, such as a series of order book snapshots, diff --git a/documentation/query/pgwire/large-result-sets.md b/documentation/query/pgwire/large-result-sets.md index a463558ff..f38913b5d 100644 --- a/documentation/query/pgwire/large-result-sets.md +++ b/documentation/query/pgwire/large-result-sets.md @@ -1,4 +1,5 @@ --- +slug: /connect/compatibility/pgwire/large-result-sets title: Handling Large Result Sets sidebar_label: Large Result Sets description: @@ -365,6 +366,6 @@ for row in cursor: # Streams in batches ## See also -- [PostgreSQL Wire Protocol Overview](/docs/query/pgwire/overview/) +- [PostgreSQL Wire Protocol Overview](/docs/connect/compatibility/pgwire/overview/) - [Query & SQL Overview](/docs/query/overview/) - [Capacity Planning](/docs/getting-started/capacity-planning/) diff --git a/documentation/query/pgwire/nodejs.md b/documentation/query/pgwire/nodejs.md index ec31ccf4d..ab69931e5 100644 --- a/documentation/query/pgwire/nodejs.md +++ b/documentation/query/pgwire/nodejs.md @@ -1,4 +1,5 @@ --- +slug: /connect/compatibility/pgwire/nodejs title: JavaScript PGWire Guide description: JavaScript clients for QuestDB PGWire protocol. Learn how to use the PGWire @@ -29,8 +30,8 @@ for performance. Our recommendation is to use the `pg` client for most use cases :::tip For data ingestion, we recommend using QuestDB's first-party clients with -the [InfluxDB Line Protocol (ILP)](/docs/ingestion/overview/) instead of PGWire. PGWire should primarily be used for -querying data in QuestDB. QuestDB provides an official [JavaScript client](/docs/ingestion/clients/nodejs/) for data +the [InfluxDB Line Protocol (ILP)](/docs/connect/overview/) instead of PGWire. PGWire should primarily be used for +querying data in QuestDB. QuestDB provides an official [JavaScript client](/docs/connect/clients/nodejs/) for data ingestion using ILP. ::: diff --git a/documentation/query/pgwire/overview.md b/documentation/query/pgwire/overview.md index b1636db3b..3d1a6a874 100644 --- a/documentation/query/pgwire/overview.md +++ b/documentation/query/pgwire/overview.md @@ -1,4 +1,5 @@ --- +slug: /connect/compatibility/pgwire/overview title: PostgreSQL Wire Protocol description: QuestDB supports the PostgreSQL wire protocol (PGWire), allowing you to connect @@ -22,7 +23,7 @@ tools. PGWire also supports [INSERT statements](#insert-examples) for lower-volume data ingestion. For high-throughput ingestion, use the -[QuestDB clients](/docs/ingestion/overview/) instead. +[QuestDB clients](/docs/connect/overview/) instead. ## Query examples @@ -66,7 +67,7 @@ slow performance. **Solution:** Use cursor-based fetching to retrieve rows in batches. -See [Handling Large Result Sets](/docs/query/pgwire/large-result-sets/) for +See [Handling Large Result Sets](/docs/connect/compatibility/pgwire/large-result-sets/) for per-language examples. ### Timestamp handling diff --git a/documentation/query/pgwire/php.md b/documentation/query/pgwire/php.md index 100a39788..9df820cda 100644 --- a/documentation/query/pgwire/php.md +++ b/documentation/query/pgwire/php.md @@ -1,4 +1,5 @@ --- +slug: /connect/compatibility/pgwire/php title: PHP PGWire Guide description: PHP clients for QuestDB PGWire protocol. Learn how to use the PGWire @@ -29,7 +30,7 @@ pooling. :::tip For data ingestion, we recommend using QuestDB's first-party clients with -the [InfluxDB Line Protocol (ILP)](/docs/ingestion/overview/) instead of PGWire. +the [InfluxDB Line Protocol (ILP)](/docs/connect/overview/) instead of PGWire. PGWire should primarily be used for querying data in QuestDB. ::: diff --git a/documentation/query/pgwire/python.md b/documentation/query/pgwire/python.md index 2e30d2be2..bb5f092cb 100644 --- a/documentation/query/pgwire/python.md +++ b/documentation/query/pgwire/python.md @@ -1,4 +1,5 @@ --- +slug: /connect/compatibility/pgwire/python title: Python PGWire Guide description: Python clients for QuestDB PGWire protocol. Learn how to use the PGWire @@ -30,9 +31,9 @@ for the best performance when querying data. :::tip -For data ingestion, we recommend using QuestDB's first-party clients with the [InfluxDB Line Protocol (ILP)](/docs/ingestion/overview/) +For data ingestion, we recommend using QuestDB's first-party clients with the [InfluxDB Line Protocol (ILP)](/docs/connect/overview/) instead of PGWire. PGWire should primarily be used for querying data in QuestDB. QuestDB provides an -official [Python client](/docs/ingestion/clients/python/) for data ingestion using ILP. +official [Python client](/docs/connect/clients/python/) for data ingestion using ILP. ::: @@ -102,7 +103,7 @@ asyncio.run(connect_to_questdb()) To set the timezone to UTC, you can set the `TZ` environment variable before running your script. This is important for time-series data to ensure consistent timestamps. -See the [Timestamp Handling](/docs/query/pgwire/overview#timestamp-handling) chapter for additional context on how +See the [Timestamp Handling](/docs/connect/compatibility/pgwire/overview#timestamp-handling) chapter for additional context on how on how QuestDB handles timezones. ::: @@ -300,7 +301,7 @@ asyncio.run(parameterized_query()) ``` ### Batch Inserts with `executemany()` -While we recommend using the [InfluxDB Line Protocol (ILP)](/docs/ingestion/overview/) for ingestion, you can also use +While we recommend using the [InfluxDB Line Protocol (ILP)](/docs/connect/overview/) for ingestion, you can also use the `executemany()` method to insert multiple rows in a single query. It is highly efficient for executing the same parameterized statements multiple times with different sets of data. This method is significantly faster than executing individual statements in a loop because it reduces network round-trips and allows for potential batching optimizations @@ -715,7 +716,7 @@ When you need to insert multiple rows containing array data, such as a series of offers a more performant way to do so compared to inserting row by row with execute(). :::tip -For data ingestion, we recommend using QuestDB's first-party clients with the [InfluxDB Line Protocol (ILP)](/docs/ingestion/overview/) +For data ingestion, we recommend using QuestDB's first-party clients with the [InfluxDB Line Protocol (ILP)](/docs/connect/overview/) instead of PGWire. PGWire should primarily be used for querying data in QuestDB. If you cannot use ILP for some reason, you should prefer [asyncpg](#inserting-arrays) over psycopg3 for performance @@ -1197,4 +1198,4 @@ QuestDB's first-party clients with the InfluxDB Line Protocol (ILP) for maximum ## Additional Resources - [Polars Integration with QuestDB](/docs/integrations/data-processing/polars) -- [QuestDB Client for fast ingestion](/docs/ingestion/clients/python/) +- [QuestDB Client for fast ingestion](/docs/connect/clients/python/) diff --git a/documentation/query/pgwire/r.md b/documentation/query/pgwire/r.md index d0c80fe90..3ca02b3a2 100644 --- a/documentation/query/pgwire/r.md +++ b/documentation/query/pgwire/r.md @@ -1,4 +1,5 @@ --- +slug: /connect/compatibility/pgwire/r title: R PGwire Guide description: R clients for QuestDB PGWire protocol. Learn how to use the PGWire @@ -29,7 +30,7 @@ interface. :::tip For data ingestion, we recommend using QuestDB's first-party clients with -the [InfluxDB Line Protocol (ILP)](/docs/ingestion/overview/) instead of PGWire. PGWire should primarily be used for +the [InfluxDB Line Protocol (ILP)](/docs/connect/overview/) instead of PGWire. PGWire should primarily be used for querying data in QuestDB. ::: diff --git a/documentation/query/pgwire/rust.md b/documentation/query/pgwire/rust.md index 261d8b157..f54732443 100644 --- a/documentation/query/pgwire/rust.md +++ b/documentation/query/pgwire/rust.md @@ -1,4 +1,5 @@ --- +slug: /connect/compatibility/pgwire/rust title: Rust PGWire Guide description: Rust clients for QuestDB PGWire protocol. Learn how to use the PGWire @@ -28,9 +29,9 @@ connection pooling. :::tip -For data ingestion, we recommend using QuestDB's first-party clients with the [InfluxDB Line Protocol (ILP)](/docs/ingestion/overview/) +For data ingestion, we recommend using QuestDB's first-party clients with the [InfluxDB Line Protocol (ILP)](/docs/connect/overview/) instead of PGWire. PGWire should primarily be used for querying data in QuestDB. QuestDB provides an -official [Rust client](/docs/ingestion/clients/rust/) for data ingestion using ILP. +official [Rust client](/docs/connect/clients/rust/) for data ingestion using ILP. ::: diff --git a/documentation/query/rest-api.md b/documentation/query/rest-api.md index 3d3ee40e0..3b9714f9b 100644 --- a/documentation/query/rest-api.md +++ b/documentation/query/rest-api.md @@ -1,4 +1,5 @@ --- +slug: /connect/compatibility/rest-api title: REST API sidebar_label: REST HTTP API description: REST API reference documentation. @@ -36,11 +37,11 @@ insert-capable entrypoints: | Entrypoint | HTTP Method | Description | API Docs | | :----------------------------------------- | :---------- | :-------------------------------------- | :-------------------------------------------------------- | -| [`/imp`](#imp-uploading-tabular-data) | POST | Import CSV data | [Reference](/docs/query/rest-api/#imp---import-data) | -| [`/exec?query=..`](#exec-sql-insert-query) | GET | Run SQL Query returning JSON result set | [Reference](/docs/query/rest-api/#exec---execute-queries) | +| [`/imp`](#imp-uploading-tabular-data) | POST | Import CSV data | [Reference](/docs/connect/compatibility/rest-api/#imp---import-data) | +| [`/exec?query=..`](#exec-sql-insert-query) | GET | Run SQL Query returning JSON result set | [Reference](/docs/connect/compatibility/rest-api/#exec---execute-queries) | For details such as content type, query parameters and more, refer to the -[REST API](/docs/query/rest-api/) docs. +[REST API](/docs/connect/compatibility/rest-api/) docs. ### `/imp`: Uploading Tabular Data @@ -143,7 +144,7 @@ need high-performance inserts. :::tip For a complete guide including text loader configuration and troubleshooting, -see [CSV Import](/docs/ingestion/import-csv/#import-csv-via-rest). +see [CSV Import](/docs/connect/compatibility/import-csv/#import-csv-via-rest). ::: `/imp` streams tabular text data directly into a table. It supports CSV, TAB and @@ -177,7 +178,7 @@ Content-Type with following optional URL parameters which must be URL encoded: | `delimiter` | No | | URL encoded delimiter character. When set, import will try to detect the delimiter automatically. Since automatic delimiter detection requires at least two lines (rows) to be present in the file, this parameter may be used to allow single line file import. | | `fmt` | No | `tabular` | Can be set to `json` to get the response formatted as such. | | `forceHeader` | No | `false` | `true` or `false`. When `false`, QuestDB will try to infer if the first line of the file is the header line. When set to `true`, QuestDB will expect that line to be the header line. | -| `name` | No | Name of the file | Name of the table to create, [see below](/docs/query/rest-api/#names). | +| `name` | No | Name of the file | Name of the table to create, [see below](/docs/connect/compatibility/rest-api/#names). | | `overwrite` | No | `false` | `true` or `false`. When set to true, any existing data or structure will be overwritten. | | `partitionBy` | No | `NONE` | See [partitions](/docs/concepts/partitions/#creating-partitioned-tables). | | `o3MaxLag` | No | | Sets upper limit on the created table to be used for the in-memory out-of-order buffer. Can be also set globally via the `cairo.o3.max.lag` configuration property. | diff --git a/documentation/query/sql/alter-table-alter-column-set-parquet.md b/documentation/query/sql/alter-table-alter-column-set-parquet.md index de610b94d..b57fb0c9b 100644 --- a/documentation/query/sql/alter-table-alter-column-set-parquet.md +++ b/documentation/query/sql/alter-table-alter-column-set-parquet.md @@ -12,7 +12,7 @@ values, significantly speeding up equality and `IN` queries on large Parquet partitions. These settings only affect -[Parquet partitions](/docs/query/export-parquet/#in-place-conversion) and are +[Parquet partitions](/docs/connect/compatibility/export-parquet/#in-place-conversion) and are ignored for native partitions. ## Syntax diff --git a/documentation/query/sql/copy.md b/documentation/query/sql/copy.md index b3e2ab8aa..27751ad1f 100644 --- a/documentation/query/sql/copy.md +++ b/documentation/query/sql/copy.md @@ -176,7 +176,7 @@ These options are provided as key-value pairs after the `WITH` keyword. ### Examples For more details on parallel import, please also see -[Importing data in bulk via CSV](/docs/ingestion/import-csv/#import-csv-via-copy-sql). +[Importing data in bulk via CSV](/docs/connect/compatibility/import-csv/#import-csv-via-copy-sql). ```questdb-sql title="COPY" COPY weather FROM 'weather.csv' WITH HEADER true FORMAT 'yyyy-MM-ddTHH:mm:ss.SSSUUUZ' ON ERROR SKIP_ROW; diff --git a/documentation/query/sql/create-table.md b/documentation/query/sql/create-table.md index 22048cb33..789c790d2 100644 --- a/documentation/query/sql/create-table.md +++ b/documentation/query/sql/create-table.md @@ -489,7 +489,7 @@ PARQUET(encoding [, compression[(level)]]) Column definitions may include an optional `PARQUET(encoding [, compression[(level)]] [, BLOOM_FILTER])` clause. These settings only affect -[Parquet partitions](/docs/query/export-parquet/#in-place-conversion) and are +[Parquet partitions](/docs/connect/compatibility/export-parquet/#in-place-conversion) and are ignored for native partitions. Encoding, compression, and bloom filter are all optional — use `default` for the encoding when specifying compression only. @@ -617,7 +617,7 @@ configuration options. :::note When converting partitions with an explicit `bloom_filter_columns` option in -[`CONVERT PARTITION`](/docs/query/export-parquet/#bloom-filters-for-in-place-conversion), +[`CONVERT PARTITION`](/docs/connect/compatibility/export-parquet/#bloom-filters-for-in-place-conversion), the explicit list overrides per-column `BLOOM_FILTER` metadata. ::: diff --git a/documentation/schema-design-essentials.md b/documentation/schema-design-essentials.md index b88a6c929..438d4b639 100644 --- a/documentation/schema-design-essentials.md +++ b/documentation/schema-design-essentials.md @@ -400,7 +400,7 @@ permissions for access control. ## PostgreSQL compatibility -QuestDB supports the [PostgreSQL wire protocol](/docs/query/pgwire/overview/), +QuestDB supports the [PostgreSQL wire protocol](/docs/connect/compatibility/pgwire/overview/), so most PostgreSQL client libraries work. However, QuestDB is not PostgreSQL: - No `PRIMARY KEY`, `FOREIGN KEY`, or `NOT NULL` constraints @@ -502,11 +502,11 @@ CREATE TABLE metrics ( For schema migrations, QuestDB supports [Flyway](https://documentation.red-gate.com/fd/questdb-305791448.html). You can also use ILP auto-creation for dynamic schemas, though this applies -default settings. See [ILP Overview](/docs/ingestion/ilp/overview/) for details. +default settings. See [ILP Overview](/docs/connect/compatibility/ilp/overview/) for details. ## Next steps - [Quick Start](/docs/getting-started/quick-start/) — Create your first table and run queries - [Capacity Planning](/docs/getting-started/capacity-planning/) — Size your deployment for production -- [Connect & Ingest](/docs/ingestion/overview/) — Load data into QuestDB +- [Connect & Ingest](/docs/connect/overview/) — Load data into QuestDB - [Materialized Views](/docs/concepts/materialized-views/) — Pre-compute aggregations for fast dashboards diff --git a/documentation/security/tls.md b/documentation/security/tls.md index 2df11eba7..820a94e67 100644 --- a/documentation/security/tls.md +++ b/documentation/security/tls.md @@ -14,9 +14,9 @@ import { EnterpriseNote } from "@site/src/components/EnterpriseNote" Transport Layer Security (TLS) encryption is available on all supported network interfaces and protocols: -- [InfluxDB Line Protocol over TCP](/docs/ingestion/ilp/overview/) -- [PGWire](/docs/query/pgwire/overview/) -- [HTTP](/docs/query/rest-api/) (REST API) +- [InfluxDB Line Protocol over TCP](/docs/connect/compatibility/ilp/overview/) +- [PGWire](/docs/connect/compatibility/pgwire/overview/) +- [HTTP](/docs/connect/compatibility/rest-api/) (REST API) It's possible to configure encryption on all interfaces at once or individually. diff --git a/documentation/sidebars.js b/documentation/sidebars.js index 9adf887bb..385acae1f 100644 --- a/documentation/sidebars.js +++ b/documentation/sidebars.js @@ -45,11 +45,11 @@ module.exports = { }, // =================== - // INGESTION REFERENCE + // CONNECT // =================== { type: "category", - label: "Ingestion Reference", + label: "Connect", items: [ { id: "ingestion/overview", @@ -58,13 +58,17 @@ module.exports = { }, { type: "category", - label: "Language Clients", - collapsed: true, + label: "Client Libraries", items: [ { - id: "ingestion/clients/configuration-string", + id: "client-configuration/connect-string", + type: "doc", + label: "Connect string", + }, + { + id: "ingestion/clients/java", type: "doc", - label: "Configuration String", + label: "Java", }, { id: "ingestion/clients/python", @@ -76,11 +80,6 @@ module.exports = { type: "doc", label: "Go", }, - { - id: "ingestion/clients/java", - type: "doc", - label: "Java", - }, { id: "ingestion/clients/rust", type: "doc", @@ -108,6 +107,11 @@ module.exports = { }, ], }, + { + id: "connect/agents", + type: "doc", + label: "Agents", + }, { type: "category", label: "Message Brokers", @@ -121,12 +125,12 @@ module.exports = { }, { type: "category", - label: "Protocols", - collapsed: true, + label: "Compatibility Protocols", items: [ { type: "category", label: "InfluxDB Line Protocol (ILP)", + collapsed: true, items: [ { id: "ingestion/ilp/overview", @@ -146,88 +150,117 @@ module.exports = { ], }, { - id: "ingestion/java-embedded", - type: "doc", - label: "Java Embedded", + type: "category", + label: "PostgreSQL Wire Protocol (PGWire)", + collapsed: true, + items: [ + { + id: "query/pgwire/overview", + type: "doc", + label: "Overview", + }, + { + id: "query/pgwire/large-result-sets", + type: "doc", + label: "Large Result Sets", + }, + { + id: "query/pgwire/python", + type: "doc", + label: "Python", + }, + { + id: "query/pgwire/go", + type: "doc", + label: "Go", + }, + { + id: "query/pgwire/java", + type: "doc", + label: "Java", + }, + { + id: "query/pgwire/rust", + type: "doc", + label: "Rust", + }, + { + id: "query/pgwire/nodejs", + type: "doc", + label: "Node.js", + }, + { + id: "query/pgwire/dotnet", + type: "doc", + label: ".NET", + }, + { + id: "query/pgwire/php", + type: "doc", + label: "PHP", + }, + { + id: "query/pgwire/r", + type: "doc", + label: "R", + }, + { + id: "query/pgwire/c-and-cpp", + type: "doc", + label: "C/C++", + }, + ], + }, + { + type: "category", + label: "REST API", + collapsed: true, + items: [ + "query/rest-api", + "ingestion/import-csv", + "query/export-parquet", + ], }, ], }, - "ingestion/import-csv", - ], - }, - - // =================== - // QUERY & SQL REFERENCE - // =================== - { - type: "category", - label: "Query & SQL Reference", - items: [ - "query/overview", { + id: "ingestion/java-embedded", + type: "doc", + label: "Java Embedded", + }, + { + label: "Wire Protocols", type: "category", - label: "PostgreSQL Wire Protocol", collapsed: true, items: [ { - id: "query/pgwire/overview", + id: "protocols/overview", type: "doc", label: "Overview", }, { - id: "query/pgwire/large-result-sets", - type: "doc", - label: "Large Result Sets", - }, - { - id: "query/pgwire/python", - type: "doc", - label: "Python", - }, - { - id: "query/pgwire/go", - type: "doc", - label: "Go", - }, - { - id: "query/pgwire/java", - type: "doc", - label: "Java", - }, - { - id: "query/pgwire/rust", - type: "doc", - label: "Rust", - }, - { - id: "query/pgwire/nodejs", - type: "doc", - label: "Node.js", - }, - { - id: "query/pgwire/dotnet", - type: "doc", - label: ".NET", - }, - { - id: "query/pgwire/php", - type: "doc", - label: "PHP", - }, - { - id: "query/pgwire/r", + id: "protocols/qwp-ingress-websocket", type: "doc", - label: "R", + label: "QWP Ingress (WebSocket)", }, { - id: "query/pgwire/c-and-cpp", + id: "protocols/qwp-egress-websocket", type: "doc", - label: "C/C++", + label: "QWP Egress (WebSocket)", }, ], }, - "query/rest-api", - "query/export-parquet", + ], + }, + + // =================== + // SQL REFERENCE + // =================== + { + type: "category", + label: "SQL Reference", + items: [ + "query/overview", { type: "category", label: "Data Types", @@ -533,6 +566,7 @@ module.exports = { label: "Materialized Views", }, "concepts/deduplication", + "concepts/delivery-semantics", "concepts/ttl", "concepts/storage-policy", "concepts/write-ahead-log", @@ -595,6 +629,7 @@ module.exports = { "configuration/oidc", "configuration/parallel-sql-execution", "configuration/postgres-wire-protocol", + "configuration/qwp", "configuration/database-replication", "configuration/shared-workers", "configuration/storage-policy", @@ -641,19 +676,31 @@ module.exports = { label: "Overview", }, { - id: "high-availability/setup", - type: "doc", - label: "Setup Guide", + type: "category", + label: "Replication", + items: [ + "high-availability/setup", + "high-availability/tuning", + "high-availability/wal-cleanup", + ], }, { - id: "high-availability/tuning", - type: "doc", - label: "Tuning", + type: "category", + label: "Client Failover", + items: [ + "high-availability/client-failover/concepts", + "high-availability/client-failover/configuration", + ], }, { - id: "high-availability/wal-cleanup", - type: "doc", - label: "WAL Cleanup", + type: "category", + label: "Store-and-Forward", + items: [ + "high-availability/store-and-forward/concepts", + "high-availability/store-and-forward/when-to-use", + "high-availability/store-and-forward/operating-and-tuning", + "high-availability/store-and-forward/configuration", + ], }, ], }, diff --git a/documentation/troubleshooting/faq.md b/documentation/troubleshooting/faq.md index 175f523f9..814635857 100644 --- a/documentation/troubleshooting/faq.md +++ b/documentation/troubleshooting/faq.md @@ -181,7 +181,7 @@ you cannot navigate backwards or access result positions as you might with scrollable cursors in PostgreSQL. For more information and for tips to work around, see the -[PostgreSQL compatability section](/docs/query/pgwire/overview/#forward-only-cursors) +[PostgreSQL compatability section](/docs/connect/compatibility/pgwire/overview/#forward-only-cursors) in our Query & SQL overview. ## My table has corrupted WAL data due to a previous full disk or kernel limits error. What do I do? diff --git a/documentation/tutorials/influxdb-migration.md b/documentation/tutorials/influxdb-migration.md index 1d087d406..00984ab2c 100644 --- a/documentation/tutorials/influxdb-migration.md +++ b/documentation/tutorials/influxdb-migration.md @@ -14,7 +14,7 @@ As a result, the InfluxDB clients work with QuestDB. This guide demonstrates how to use **InfluxDB's clients** with QuestDB. If you don't use the InfluxDB clients and want to use the QuestDB clients, see -the [ingestion overview](/docs/ingestion/overview/). +the [ingestion overview](/docs/connect/overview/). While InfluxDB clients are supported, we recommend using the QuestDB clients. @@ -223,7 +223,7 @@ group: You can use the [INSERT](/docs/query/sql/insert/) keyword to output the processed result into a new table. -Also, if you leverage [Telegraf](/docs/ingestion/message-brokers/telegraf/), you can do +Also, if you leverage [Telegraf](/docs/connect/message-brokers/telegraf/), you can do this with a small configuration change: ```shell @@ -251,6 +251,6 @@ deeper into customization options for ILP or HTTP, learn more about the protocol, or compare QuestDB with Influx: - [ILP `server.conf` configuration](/docs/configuration/ingestion/) -- [ILP API Overview](/docs/ingestion/ilp/overview/) +- [ILP API Overview](/docs/connect/compatibility/ilp/overview/) - [Comparing TimescaleDB and QuestDB performance and architecture blog](/blog/timescaledb-vs-questdb-comparison/) - [Deep dive of QuestDB vs InfluxDB internals](/blog/2024/02/26/questdb-versus-influxdb/) diff --git a/shared/clients.json b/shared/clients.json index 69c757658..d06a94630 100644 --- a/shared/clients.json +++ b/shared/clients.json @@ -1,6 +1,6 @@ [ { - "href": "/docs/ingestion/clients/c-and-cpp", + "href": "/docs/connect/clients/c-and-cpp", "name": "C & C++", "description": "High-performance client for systems programming and embedded applications.", @@ -8,7 +8,7 @@ "protocol": "ILP" }, { - "href": "/docs/ingestion/clients/dotnet", + "href": "/docs/connect/clients/dotnet", "name": ".NET", "description": "Cross-platform client for building applications with .NET technologies.", @@ -16,7 +16,7 @@ "protocol": "ILP" }, { - "href": "/docs/query/pgwire/dotnet", + "href": "/docs/connect/compatibility/pgwire/dotnet", "name": ".NET", "description": "Query QuestDB using Npgsql or other .NET PostgreSQL drivers.", @@ -24,7 +24,7 @@ "protocol": "PGWire" }, { - "href": "/docs/ingestion/clients/go", + "href": "/docs/connect/clients/go", "name": "Go", "description": "An open-source programming language supported by Google with built-in concurrency.", @@ -32,7 +32,7 @@ "protocol": "ILP" }, { - "href": "/docs/query/pgwire/go", + "href": "/docs/connect/compatibility/pgwire/go", "name": "Go", "description": "Query QuestDB using pgx or other Go PostgreSQL drivers.", @@ -40,7 +40,7 @@ "protocol": "PGWire" }, { - "href": "/docs/ingestion/clients/java", + "href": "/docs/connect/clients/java", "name": "Java", "description": "Platform-independent client for enterprise applications and Android development.", @@ -48,7 +48,7 @@ "protocol": "ILP" }, { - "href": "/docs/query/pgwire/java", + "href": "/docs/connect/compatibility/pgwire/java", "name": "Java", "description": "Query QuestDB using JDBC with any PostgreSQL-compatible driver.", @@ -56,7 +56,7 @@ "protocol": "PGWire" }, { - "href": "/docs/ingestion/clients/nodejs", + "href": "/docs/connect/clients/nodejs", "name": "Node.js", "description": "Node.js® is an open-source, cross-platform JavaScript runtime environment.", @@ -64,7 +64,7 @@ "protocol": "ILP" }, { - "href": "/docs/query/pgwire/nodejs", + "href": "/docs/connect/compatibility/pgwire/nodejs", "name": "Node.js", "description": "Query QuestDB using pg or other Node.js PostgreSQL clients.", @@ -72,7 +72,7 @@ "protocol": "PGWire" }, { - "href": "/docs/ingestion/clients/python", + "href": "/docs/connect/clients/python", "name": "Python", "description": "Python is a programming language that lets you work quickly and integrate systems more effectively.", @@ -80,7 +80,7 @@ "protocol": "ILP" }, { - "href": "/docs/query/pgwire/python", + "href": "/docs/connect/compatibility/pgwire/python", "name": "Python", "description": "Query QuestDB using psycopg, asyncpg, or other Python drivers.", @@ -88,7 +88,7 @@ "protocol": "PGWire" }, { - "href": "/docs/ingestion/clients/rust", + "href": "/docs/connect/clients/rust", "name": "Rust", "description": "Systems programming language focused on safety, speed, and concurrency.", @@ -96,7 +96,7 @@ "protocol": "ILP" }, { - "href": "/docs/query/pgwire/rust", + "href": "/docs/connect/compatibility/pgwire/rust", "name": "Rust", "description": "Query QuestDB using tokio-postgres or other Rust PostgreSQL crates.", @@ -104,7 +104,7 @@ "protocol": "PGWire" }, { - "href": "/docs/query/pgwire/php", + "href": "/docs/connect/compatibility/pgwire/php", "name": "PHP", "description": "Query QuestDB using PDO or other PHP PostgreSQL extensions.", @@ -112,7 +112,7 @@ "protocol": "PGWire" }, { - "href": "/docs/query/pgwire/r", + "href": "/docs/connect/compatibility/pgwire/r", "name": "R", "description": "Query QuestDB using RPostgres or other R database packages.", diff --git a/shared/ilp_clients.json b/shared/ilp_clients.json index 3180875f5..713e944c5 100644 --- a/shared/ilp_clients.json +++ b/shared/ilp_clients.json @@ -19,7 +19,7 @@ }, { "label": "Java", - "docsUrl": "/docs/ingestion/clients/java/", + "docsUrl": "/docs/connect/clients/java/", "logoSrc": "/images/logos/java.svg" }, { diff --git a/src/components/Resources/index.tsx b/src/components/Resources/index.tsx index e2fa9f143..4fa8931a0 100644 --- a/src/components/Resources/index.tsx +++ b/src/components/Resources/index.tsx @@ -41,7 +41,7 @@ const resources: Array = [ }, }, { - href: '/docs/ingestion/overview/#first-party-clients', + href: '/docs/connect/overview/#client-libraries', name: 'Language clients', description: 'Explore our language clients and how to use them to ingest data into QuestDB.', diff --git a/src/modules/integration/index.tsx b/src/modules/integration/index.tsx index 546a6c4cb..1475347e2 100644 --- a/src/modules/integration/index.tsx +++ b/src/modules/integration/index.tsx @@ -29,12 +29,12 @@ const integrations: Array<{ { logo: { ...logos.redpanda, width: 90, svg: RedpandaLogo }, label: "Redpanda", - src: "/docs/ingestion/message-brokers/redpanda/", + src: "/docs/connect/message-brokers/redpanda/", }, { logo: { ...logos.postgres, svg: PostgresLogo }, label: "Postgres", - src: "/docs/query/pgwire/overview/", + src: "/docs/connect/compatibility/pgwire/overview/", }, { logo: { ...logos.grafana, svg: GrafanaLogo }, @@ -44,7 +44,7 @@ const integrations: Array<{ { logo: { ...logos.kafka, svg: KafkaLogo }, label: "Kafka", - src: "/docs/ingestion/message-brokers/kafka/", + src: "/docs/connect/message-brokers/kafka/", }, { logo: { ...logos.python, svg: PythonLogo }, @@ -69,7 +69,7 @@ const integrations: Array<{ { logo: { ...logos.telegraf, svg: TelegrafLogo }, label: "Telegraf", - src: "/docs/ingestion/message-brokers/telegraf/", + src: "/docs/connect/message-brokers/telegraf/", }, { logo: { ...logos.mindsDB, svg: MindsDBLogo }, diff --git a/src/theme/RemoteRepoExample/index.tsx b/src/theme/RemoteRepoExample/index.tsx index 220ed28e2..edff50102 100644 --- a/src/theme/RemoteRepoExample/index.tsx +++ b/src/theme/RemoteRepoExample/index.tsx @@ -76,8 +76,23 @@ export const RemoteRepoExample = ({ } } - const example: Example = repoExample[id] - const headerMd = example?.header + const example: Example | undefined = repoExample[id] + + if (example === undefined) { + // The example is not in the remote-repo-example plugin data: its + // source file has not landed in the client repo yet, or the manifest + // entry is missing. The plugin (plugins/remote-repo-example) already + // skips absent files with a warning rather than failing the build; + // mirror that here instead of crashing the entire static-site + // generation. The block renders once the example lands upstream. + console.warn( + `[RemoteRepoExample] no example "${id}" in remote-repo-example ` + + `data; skipping. It will render once it lands in the source repo.`, + ) + return null + } + + const headerMd = example.header let code = example.code ?? "" const valueReplaceMap = [