diff --git a/README.md b/README.md index febf0e7..3968727 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,11 @@ await logger.info('Server started', { port: 3000 }); - [Transport filter predicate](#transport-filter-predicate) - [Log search](#log-search) - [OpenTelemetry](#opentelemetry) +- [OTLP logs export](#otlp-logs-export-opentelemetry-native) +- [Wide events / canonical log lines](#wide-events--canonical-log-lines) +- [Dynamic runtime reconfiguration](#dynamic-runtime-reconfiguration) +- [Adaptive sampling](#adaptive-sampling) +- [Robust serialization](#robust-serialization) - [Graceful shutdown](#graceful-shutdown) - [Plugin / extension API](#plugin--extension-api) - [Writing a plugin](#writing-a-plugin) @@ -1640,10 +1645,126 @@ app.post('/checkout', async (req, res) => { --- +## OTLP logs export (OpenTelemetry-native) + +logixia doesn't just _read_ the active OTel span (above) — it can _emit_ logs in the **OTLP/HTTP** format so they land in any OpenTelemetry backend (Grafana Loki, OpenObserve, Better Stack, Axiom, Datadog, SigNoz…) already correlated with traces. No `@opentelemetry/*` packages required (that JS API is still alpha) — the OTLP JSON is built directly, with proper `SeverityNumber` mapping (DEBUG=5, INFO=9, WARN=13, ERROR=17) and resource attributes. + +```typescript +import { OtlpLogTransport } from 'logixia'; + +const logger = createLogger({ + appName: 'api', + transports: { + custom: [ + new OtlpLogTransport({ + url: 'http://localhost:4318/v1/logs', + serviceName: 'api', + serviceVersion: '1.4.0', + environment: 'production', + headers: { 'x-api-key': process.env.OTLP_KEY! }, + }), + ], + }, +}); +// Every log is exported as an OTel LogRecord with traceId/spanId for native +// trace↔log correlation. Buffers drain on close() — no loss on shutdown. +``` + +--- + +## Wide events / canonical log lines + +Instead of scattering a request's story across many narrow log lines that you have to JOIN during an incident, emit **one dense, structured event per request** — the "canonical log line" (Stripe) / "wide event" (Honeycomb, _Observability 2.0_) pattern. Fields accumulate as the request flows through middleware and business logic via `AsyncLocalStorage`, then the whole event is emitted **once** — in a teardown path so it fires even on errors. + +```typescript +import { wideEventMiddleware, addEventFields } from 'logixia'; + +// One canonical line per request, auto-emitted on response finish/close: +app.use(wideEventMiddleware(logger)); // pre-fills method, url, ip, status, duration + +app.get('/checkout', (req, res) => { + addEventFields({ userId: req.user.id, planTier: 'pro' }); // from anywhere + addEventFields({ dbQueries: 4, cacheHit: true }); + res.json({ ok: true }); + // → ONE log line: { method, url, statusCode, durationMs, userId, planTier, + // dbQueries, cacheHit, traceId } — no JOINs at query time +}); +``` + +Or wrap any unit of work manually — the event is emitted once, even if the callback throws: + +```typescript +import { withWideEvent, addEventFields } from 'logixia'; + +await withWideEvent(logger, { job: 'reindex' }, async () => { + addEventFields({ shard: 3 }); + await doWork(); // throws? → event still emitted with { error: true, errorMessage } +}); +``` + +--- + +## Dynamic runtime reconfiguration + +Change log levels in a **running** process — no restart — to chase a bug without raising global volume. This is the single most-requested feature across the Winston ([#1107](https://github.com/winstonjs/winston/issues/1107)) and Pino ([#206](https://github.com/pinojs/pino/issues/206)) trackers; logixia ships it first-class. + +```typescript +import { registerLevelSignal, createLevelControlHandler } from 'logixia'; + +// 1. Per-namespace level, live: +logger.setNamespaceLevels({ 'db.*': 'debug', '*': 'info' }); // db.* → debug now + +// 2. Cycle the global level with one signal (kill -USR2 ): +registerLevelSignal(logger); // info → debug → trace → … → info + +// 3. Ops endpoint (mount behind your auth): +app.all('/admin/log-level', createLevelControlHandler(logger)); +// GET → { level, namespaceLevels } +// POST { "level": "debug", "namespaceLevels": { "db.*": "trace" } } +``` + +--- + +## Adaptive sampling + +On top of static / per-level / trace-consistent sampling + a token-bucket rate cap, logixia can **boost the sample rate automatically during an incident** — so high-volume cost control never costs you the logs that matter most. When the windowed error rate crosses a threshold, sampling lifts toward 1.0; in steady state it relaxes back to the base rate. + +```typescript +const logger = createLogger({ + appName: 'api', + sampling: { + rate: 0.1, // keep 10% in steady state + adaptive: { + errorRateThreshold: 0.05, // ≥5% errors in the window… + boostRate: 1.0, // …keep everything until it subsides + windowMs: 10_000, + }, + }, +}); +``` + +--- + +## Robust serialization + +logixia never throws while serializing a log payload. Circular references become `[Circular]`, and — going beyond what Winston/Pino do — **BigInt** is handled (raw `JSON.stringify` throws on it) and you can opt into **true round-trippable decycling** for shared/circular graphs: + +```typescript +import { safeStringify, decycleValue, retrocycle } from 'logixia'; + +safeStringify({ id: 9007199254740993n, self: obj }); // BigInt + cycle safe +const json = safeStringify(graph, { decycle: true }); // $ref pointers, not [Circular] +const restored = retrocycle(JSON.parse(json)); // shared refs reconstructed +``` + +--- + ## Graceful shutdown Ensures all buffered log entries are flushed to every transport before the process exits. Critical for database and analytics transports that batch writes. +> **Reliability guarantee — no log loss on shutdown.** The most painful, still-open bug in the most popular Node logger is exactly this: Pino's [#1705](https://github.com/pinojs/pino/issues/1705) ("Logs are not flushed, missing log entries after `process.exit()`") has been open since 2023, with its maintainer noting a race condition in the transport flush path that "I won't be able to fix it anytime soon." It recurs across [#542](https://github.com/pinojs/pino/issues/542), [#1774](https://github.com/pinojs/pino/issues/1774), [#1889](https://github.com/pinojs/pino/issues/1889), and [#2054](https://github.com/pinojs/pino/issues/2054). logixia is built the other way around: **every** batching/async transport (database, analytics, CloudWatch/GCP/Azure, worker-thread, browser, OTLP) drains its buffer synchronously on `close()` with bounded retry, and the SIGTERM/SIGINT handler is guarded against concurrent signals so a second Ctrl+C can't truncate the flush. Each guarantee is covered by a regression test. + The simplest approach is to set `gracefulShutdown: true` in config — logixia registers SIGTERM and SIGINT handlers automatically: ```typescript diff --git a/research/LOGGER-IMPROVEMENTS.md b/research/LOGGER-IMPROVEMENTS.md new file mode 100644 index 0000000..a777945 --- /dev/null +++ b/research/LOGGER-IMPROVEMENTS.md @@ -0,0 +1,283 @@ +# logixia — Logger Improvement Research & Roadmap + +> **Method:** Deep multi-source web research (5 search angles → ~30 fetched sources → 3-vote +> adversarial verification on every factual claim). 79 claims were independently verified; +> ~15 were **refuted** and dropped (e.g. "Winston can't handle circular refs" — false, modern +> Winston uses `safe-stable-stringify`). Only claims that survived skeptical scrutiny against +> **primary sources** (GitHub issues with verbatim maintainer quotes, official specs, live API +> cross-checks) are used below. Each recommendation is rated **Effort (S/M/L)** and **Impact**, +> and cross-checked against logixia's existing v1.10.x feature set. +> +> **Date:** 2026-06-10 · **Branch:** `research/logger-improvements` + +--- + +## TL;DR — The Roadmap (prioritized) + +> **Status:** Items 1, 2, 3, 4, 5, 6, 9 **IMPLEMENTED** on `research/logger-improvements` +> (commits below, all with tests). Items 7 (cross-runtime) and 8 (schema validation) +> remain as follow-ups. + +| # | Recommendation | Type | Effort | Impact | Status | +| --- | ----------------------------------------------------------------------- | ---------- | ------ | ------ | ------------------------------------------- | +| 1 | **Canonical Log Lines / Wide Events API** | 🟢 NET-NEW | M | ⭐⭐⭐ | ✅ `src/wide-events.ts` | +| 2 | **OTLP Logs export (native OTel Logs bridge OUT)** | 🟠 IMPROVE | M | ⭐⭐⭐ | ✅ `src/transports/otlp.transport.ts` | +| 3 | **Dynamic runtime reconfiguration** (level via signal/HTTP, no restart) | 🟢 NET-NEW | S–M | ⭐⭐⭐ | ✅ `src/utils/runtime-control.ts` | +| 4 | **Per-module / per-namespace runtime level control** | 🟠 IMPROVE | S | ⭐⭐ | ✅ `logger.setNamespaceLevels()` | +| 5 | **Document flush-on-exit reliability guarantee** | 🟠 IMPROVE | S | ⭐⭐⭐ | ✅ README "Reliability guarantee" | +| 6 | **Adaptive / dynamic sampling** (raise rate on errors/spikes) | 🟠 IMPROVE | M | ⭐⭐ | ✅ `SamplingConfig.adaptive` | +| 7 | **Cross-runtime story: Deno / Bun / Cloudflare Workers** | 🟢 NET-NEW | M–L | ⭐⭐ | ⏳ follow-up | +| 8 | **Schema validation / typed-field enforcement for log fields** | 🟠 IMPROVE | S–M | ⭐ | ⏳ follow-up (partial: typed-logger exists) | +| 9 | **Richer error/serialization edge cases** (BigInt, true `$ref` decycle) | 🟠 IMPROVE | S | ⭐ | ✅ `src/utils/safe-stringify.ts` | + +**The headline strategy:** logixia is already feature-rich. The highest-leverage moves are +**(a) the Wide Events API** (a genuine category gap nobody in the Node ecosystem ships +first-class), **(b) becoming OTel-Logs-native (emit OTLP, not just read spans)**, and +**(c) dynamic runtime reconfiguration** (the most-requested missing feature industry-wide). +Plus **loudly marketing the reliability logixia already has** — flush-on-exit is Pino's +most painful open bug, and logixia already solved it. + +--- + +## Part 1 — PAIN POINTS with existing loggers (verified) + +### 1.1 Log loss on process exit / crash — **Pino's #1 unfixed reliability bug** + +- **Pino issue [#1705](https://github.com/pinojs/pino/issues/1705)** ("Logs are not flushed, + missing log entries after `process.exit()`") is **OPEN** (filed 2023-04-26, still open as of + 2026-06). Lead maintainer **Matteo Collina** confirmed verbatim: + > "I can see there is a race condition in pino itself at `lib/transport.js#L53`. What we should + > be doing there instead is to synchronously wait for the worker to have processed all the logs. + > At this time I'm not affected by that bug and I won't be able to fix it anytime soon." +- Recurs across **#542, #1400, #1774, #1889, #2054** (the last from Oct 2024, confirming it + persists in v9/v10). Root cause: async worker-thread transports don't drain synchronously + before exit. Official Pino docs concede: _"If logs are printed before the transport is ready + when `process.exit(0)` is called, they will be lost."_ +- **logixia's position:** ✅ **Already fixed** (graceful shutdown + drain-on-close on _every_ + transport — the audit hardened this further). **This is a marketing goldmine, not a gap.** + +### 1.2 Memory leaks under sustained high-volume logging — **Winston (now fixed, but telling)** + +- **Winston issue [#1871](https://github.com/winstonjs/winston/issues/1871)** ("Memory leak when + logging large amount of logs", 20 👍): memory grew unbounded _even when logs were below the + configured level threshold_. Root-caused (by community + maintainer-merged fix) to + `readable-stream`'s `_writableState.sync = true` deferring callback cleanup. One user reported + **+1.2 GB RSS**; demo repro peaked 31.6 MB → 9.5 MB post-fix. Shipped in winston **v3.6.0**. +- Also **#430**: logging >50k items pushed RSS from <80 MB to 500–600 MB. +- **logixia's position:** ✅ logixia is async-first with bounded buffers; the audit added + explicit caps (sampling trace-set cap, search-engine buffer cap, indexer O(1) stats). **Good — + keep bounded-by-default as an explicit guarantee.** + +### 1.3 Circular-reference / serialization handling — **NUANCED (claim partly refuted)** + +- ⚠️ **Refuted:** "Winston/Pino can't serialize circular refs" is **false today** — modern + Winston's `logform/json.js` uses `safe-stable-stringify` (since logform 1.8.0 / 2.3.0), and + Pino uses `fast-safe-stringify`. Both emit `[Circular]` by default. _Do not claim this as a + differentiator._ +- ✅ **Still true:** Winston **#1497** documented circular objects (e.g. Mongoose `CastError`) + causing an **infinite loop → CPU pin → OOM** on older winston@2. The _genuine_ remaining gap + across the ecosystem is **true decycling with `$ref` round-tripping** and richer **BigInt / + typed-attribute** serialization — not basic cycle survival. +- **logixia's position:** ✅ Circular crash was fixed in the audit (formatters, search, NestJS). + Opportunity: go _beyond_ `[Circular]` — offer optional true `$ref` decycle + BigInt handling. + +### 1.4 Dynamic per-module level control declined to core — **Pino, by design** + +- **Pino issue [#206](https://github.com/pinojs/pino/issues/206)** requested `debug`-style + per-logger-name filtering + a runtime `setFilter({ 'mymodule': 'debug', '*': 'info' })`. + Maintainers **declined to core for performance**: + > jsumners: _"I think that would add too much overhead to core. It could easily be done in a + > transport… the transport route is the best way to go."_ + > mcollina: _"the father maintain no link to its children"_ (so parent→child cascade is hard). +- ⚠️ **Partly refuted/outdated:** Pino _now_ has an `onChild` hook + `pino-arborsculpture` + + mutable `child.level`, so per-child runtime control **is** achievable today (just not a clean + built-in `setFilter`). Frame the gap precisely: **a first-class, batch, name-pattern runtime + level API** — not "impossible in Pino". +- **logixia's position:** 🟡 Has per-namespace levels at **config** time + ENV overrides. **Gap: + runtime mutation via one call/signal/HTTP without restart.** → Recommendations #3, #4. + +### 1.5 Configuration complexity & poor defaults — **Winston** + +- Better Stack's library comparison documents Winston's _"poorly-considered defaults"_: no + timestamp unless configured, **no stack trace for logged `Error`s without extra setup**, and + per-transport level changes requiring you to hold transport references _outside_ the logger + (issues **#1107, #1191, #1212** — `logger.level` does **not** reliably propagate to transports). +- **logixia's position:** ✅ Adaptive defaults (NODE_ENV/CI), structured-by-default, error + serialization with cause chains. **Strength — keep "zero-config sane defaults" front and center.** + +--- + +## Part 2 — WHAT DEVELOPERS WANT (verified demand) + +### 2.1 Dynamic runtime reconfiguration — **the single most-requested feature** + +- Independently raised in **Winston [#1107](https://github.com/winstonjs/winston/issues/1107)** + ("Better way to dynamically change the log level"), **Pino #206 / #834 / #766 / #677** + (browser), **express-pino-logger #11**, and **nestjs-pino #371** ("Changing loglevel during + runtime"). Users want to **flip levels via an HTTP endpoint or OS signal without restarting**. +- The demand is strong enough that the community built a dedicated module, + **[`pino-arborsculpture`](https://github.com/pinojs/pino-arborsculpture)** (watches a file, + mutates levels live). _Its existence is the evidence._ +- **logixia gap:** No built-in runtime reconfiguration API. → **Recommendation #3.** + +### 2.2 OpenTelemetry-native logs (emit, not just correlate) + +- **OTel-JS [#1350](https://github.com/open-telemetry/opentelemetry-js/issues/1350)** (2020) was + the origin of demand for first-class OTel support in winston/bunyan/pino — motivated by + trace-log correlation. OTel-JS now ships official `@opentelemetry/instrumentation-{winston, +bunyan,pino}` that inject `trace_id`/`span_id` **and emit to the OTel Logs SDK**. +- The **OTel Logs spec** is a deliberate **bridge/appender model**: _"primarily designed for + library authors to build log appenders… to bridge between existing logging libraries and the + OpenTelemetry log data model."_ LogRecords carry `TraceId`/`SpanId` and a **SeverityNumber + (1–24)** mapping (DEBUG=5, INFO=9, WARN=13, ERROR=17). The JS `@opentelemetry/api-logs` package + is still **alpha** ("no guarantee of stability") — so this is a _moving target = differentiation + opportunity_. +- **logixia's position:** 🟡 Has an OTel **bridge IN** (reads active span → injects trace/span + into payload). **Gap: emit OTLP LogRecords OUT** (proper SeverityNumber mapping, resource + attributes, OTLP exporter / collector transport). → **Recommendation #2.** +- Related real request: **opentelemetry-js-contrib [#1664](https://github.com/open-telemetry/opentelemetry-js-contrib/issues/1664)** + — teams with strict standards rejected snake_case `trace_id` and wanted **configurable field + keys** (→ schema/naming, Recommendation #8). + +### 2.3 Cost control on high-volume logging + +- groundcover + Honeycomb's _"Cost Crisis in Observability"_ frame **adaptive/dynamic sampling** + (head vs tail-based; raise sample rate during anomalies/spikes, lower during steady state) as a + top-tier 2025-26 need. Wide events are pitched as _more_ cost-effective than metrics because one + dense event re-derives metrics/traces. +- **logixia's position:** 🟡 Has static + per-level + trace-consistent sampling + token-bucket + rate limit. **Gap: _adaptive_ (anomaly-driven) sampling.** → **Recommendation #6.** + +### 2.4 Trace-log correlation by default — **now table-stakes** + +- OneUptime (2026) + SigNoz document that OTel auto-injects `trace_id`/`span_id`/`trace_flags` + into every log line across Node/Python/Go/.NET/Java. Developers now **expect** this for free. +- **logixia's position:** ✅ Has it (AsyncLocalStorage + OTel bridge). Keep; ensure it's + zero-config when an OTel SDK is present. + +### 2.5 Edge / serverless + cross-runtime + +- HN 2026 ("Logging in Node.js or Deno or Bun or edge functions in 2026") shows the conversation + has moved beyond Node. Edge functions can be **killed immediately after responding**, so an + explicit `flush()/dispose()` is mandatory; incumbents (Winston/Pino, Node-centric) are weak here. +- **logixia's position:** 🟡 Has a browser build + remote transport w/ `keepalive` + graceful + shutdown. **Gap: explicit Deno/Bun/Workers support + a documented `dispose()` for edge.** + → **Recommendation #7.** + +--- + +## Part 3 — INNOVATION OPPORTUNITIES (2025–2026 frontier) + +### 3.1 ⭐ Canonical Log Lines / Wide Events — **the flagship differentiator** + +The strongest, best-sourced finding. The industry (Stripe, Honeycomb, brandur, multiple Mar-2026 +write-ups + active HN threads) has converged on **"wide events" / "canonical log lines"**: emit +**ONE dense, structured event per request** (built up via a request-scoped object that middleware +and business logic decorate), emitted in a `finally`/`ensure` block so it fires even on exceptions. + +- **Stripe** ([canonical-log-lines](https://stripe.com/blog/canonical-log-lines)): one wide line + per request, accumulated in a request-scoped object, emitted in an `ensure` block. +- **Honeycomb** ("Observability 2.0"): ONE arbitrarily-wide structured event as the single source + of truth, from which metrics/logs/traces are derived (vs the "three pillars"). +- Practitioner quote (verified): _"those logs were orders of magnitude easier to work with than + having to coalesce lots of logs… 'I need X. Cool, here's all 50 complete calls.'"_ +- Maps cleanly onto OTel: the root span carries the canonical event; typed attributes make + percentile/range queries fast; `trace_id`/`span_id` auto-correlate. + +**Why it's a logixia gap:** logixia has child loggers + structured logging + correlation, but +**no first-class wide-event API**. This is a _category_ most Node loggers don't ship at all. + +**Proposed API (M effort, ⭐⭐⭐ impact):** + +```ts +// Request-scoped accumulator, auto-emitted once on end (even on throw) +const canonical = logger.beginEvent(); // or middleware: app.use(logixia.wideEvents()) +canonical.add({ userId, route, planTier }); // decorate from anywhere in the request +canonical.add({ dbQueries: 4, cacheHit: true }); +// ...on response finish / error → one dense log line, with trace_id/span_id, emitted in finally +``` + +Ship as: a `WideEvent` accumulator + Express/Fastify/NestJS middleware that auto-emits on +`finish`/`error`, integrated with the existing AsyncLocalStorage context so `.add()` works +anywhere without threading. Bridge the emitted event into the OTel log record when the bridge is on. + +### 3.2 AI is moving to the PLATFORM layer — logixia should _bridge_, not rebuild + +- Better Stack ("AI SRE"), OpenObserve (O2 NL-query assistant), Axiom ("ingest everything, no + sampling, no data loss", APL/Kusto query), Highlight.io (session replay + frontend correlation) + are all doing **AI root-cause analysis + natural-language query at the platform layer**. +- **Takeaway:** A _library_ shouldn't try to do AI analysis. Its job is **high-fidelity, + well-structured, trace-correlated emission + clean OTLP/transport bridges** to these backends. + This reinforces #1 (wide events) and #2 (OTLP) as the right bets — they make logixia's output + maximally useful to the AI-driven platforms where analysis now lives. + +### 3.3 Observability 2.0 positioning + +- The strategic frame (Honeycomb/Charity Majors): one wide structured-event stream → derive + metrics, logs, traces. logixia already has logs + Prometheus metrics + OTel trace bridge; adding + **wide events** lets it credibly position as an **"Observability 2.0-ready" emission layer** — + a sharp, current marketing story no incumbent Node logger owns. + +--- + +## Part 4 — Detailed recommendations (effort · impact · sources) + +### 🟢 Net-new (genuine gaps) + +**R1 · Wide Events / Canonical Log Lines API** — Effort **M**, Impact **⭐⭐⭐** +Request-scoped accumulator + auto-emit-in-finally middleware (Express/Fastify/NestJS) + OTel +bridge. _Sources: Stripe, Honeycomb ×3, brandur, bookofdaniel (Mar 2026), HN #47427271._ + +**R3 · Dynamic runtime reconfiguration** — Effort **S–M**, Impact **⭐⭐⭐** +`logger.setLevel()` / signal handler (SIGUSR2) / optional HTTP admin endpoint to change levels +(global + per-namespace) **without restart**. _Sources: winston #1107, pino #206/#677, +nestjs-pino #371, pino-arborsculpture._ + +**R7 · Cross-runtime + edge `dispose()`** — Effort **M–L**, Impact **⭐⭐** +Verified Deno/Bun/Cloudflare Workers support + documented `await logger.dispose()` for edge +teardown. _Sources: HN "Logging in 2026" #46454886, pino #677._ + +### 🟠 Improve existing + +**R2 · OTLP Logs export (emit OUT)** — Effort **M**, Impact **⭐⭐⭐** +Add an OTLP log exporter / collector transport: map levels → SeverityNumber (1–24), attach +resource attributes (`service.name`/`version`/`env`), emit LogRecords with `TraceId`/`SpanId`. +Note `@opentelemetry/api-logs` is alpha — pin carefully. _Sources: OTel Logs spec, SDK spec, +status page, SigNoz, OneUptime._ + +**R4 · Per-namespace runtime level toggling** — Effort **S**, Impact **⭐⭐** +Extend existing namespace levels with a runtime `setNamespaceLevels({ 'db.*': 'debug', '*': +'info' })`. _Sources: pino #206 (verbatim `setFilter` request)._ + +**R5 · Make flush-on-exit a headline guarantee** — Effort **S**, Impact **⭐⭐⭐** +Document + benchmark + add a README "reliability" section proving zero loss on +SIGTERM/SIGINT/`process.exit()` with buffered async transports — the exact scenario that is +Pino's most painful **open** bug. _Sources: pino #1705 (open), #2054, #542, Collina blog._ + +**R6 · Adaptive / anomaly-driven sampling** — Effort **M**, Impact **⭐⭐** +Auto-raise sample rate on error bursts/latency spikes, lower in steady state (tail-based option). +_Sources: groundcover log-sampling, Honeycomb cost-crisis._ + +**R8 · Field schema validation + configurable key naming** — Effort **S–M**, Impact **⭐** +Runtime schema/required-field enforcement + camelCase/snake_case key config (extends typed-logger). +_Sources: otel-js-contrib #1664, Better Stack guide._ + +**R9 · Serialization beyond `[Circular]`** — Effort **S**, Impact **⭐** +Optional true `$ref` decycle (round-trippable) + BigInt/typed handling — one notch above the +`safe-stable-stringify` baseline incumbents already match. _Sources: winston #1497, refuted-claim +analysis._ + +--- + +## Appendix — Claims that were REFUTED (do NOT use in marketing) + +1. ❌ "Winston/Pino can't serialize circular references" — **false**; both use safe-stringify by + default today (`safe-stable-stringify` / `fast-safe-stringify`). +2. ❌ "Pino can't do per-child runtime level changes (architecturally impossible)" — **outdated**; + Pino now has `onChild` hook + mutable `child.level` + `pino-arborsculpture`. The real gap is a + _clean built-in batch `setFilter`_, not impossibility. +3. ❌ "Pino frequently loses logs in production" — **overreach**; scoped to `process.exit()` + + large buffers + worker-thread transports (esp. `pino-pretty`). Pino _does_ drain on clean + exit/SIGTERM/SIGINT. Frame precisely: the gap is `process.exit()`/hard-exit + buffered async. + +_All surviving claims above passed 3-vote adversarial verification against primary sources._ diff --git a/research/_logixia-current-features.md b/research/_logixia-current-features.md new file mode 100644 index 0000000..148fd2e --- /dev/null +++ b/research/_logixia-current-features.md @@ -0,0 +1,29 @@ +# logixia — current feature surface (as of audit) + +logixia ALREADY HAS these features (do NOT recommend adding what it already has): + +- Log levels (error/warn/info/debug/trace/verbose) + custom levels +- Structured logging, child loggers +- Adaptive log level (NODE_ENV/CI-based) +- Per-namespace log levels (with ENV overrides) +- Transports: Console, File (with rotation + gzip), Database (PG/MySQL/SQLite/Mongo), Analytics (Mixpanel/DataDog/Segment/GA) +- Cloud adapters: AWS CloudWatch, GCP Cloud Logging, Azure Monitor +- Request tracing: trace utils, Express/Fastify middleware, NestJS middleware, Kafka + WebSocket interceptors (AsyncLocalStorage based) +- NestJS integration: @LogMethod decorator, LogixiaExceptionFilter, DI module +- Correlation ID propagation: Express/Fastify, outbound fetch + axios, Kafka/SQS/RabbitMQ helpers +- Browser support (zero Node deps, remote transport with keepalive) +- Log redaction: path-based (\*\* globs) + regex patterns + auto-detect PII (conservative/aggressive), message-string redaction +- Timer API (time/timeEnd) +- Field management (enable/disable fields) +- Per-transport level control + filter predicates +- Log search engine (full-text, NL query, trace correlation, similarity, presets, in-memory index) +- OpenTelemetry bridge (auto trace/span injection) +- Graceful shutdown (flushOnExit, SIGTERM/SIGINT, drain-on-close) +- Plugin/extension API (onInit/onLog/onError/onShutdown) +- Metrics -> Prometheus (counters/histograms/gauges, /metrics endpoint) +- Multi-transport retry + failover + fallback +- Sampling (rate, per-level, trace-consistent, token-bucket rate limit) +- CLI tool (tail, search, query, stats, analyze, explore, export) +- TypeScript-first, async-first (non-blocking) + +TARGET RUNTIME: Node.js (Express/NestJS/Fastify primary), also browser/edge. diff --git a/src/__tests__/wide-events.test.ts b/src/__tests__/wide-events.test.ts new file mode 100644 index 0000000..0be9161 --- /dev/null +++ b/src/__tests__/wide-events.test.ts @@ -0,0 +1,162 @@ +/** + * Tests for Wide Events / Canonical Log Lines (R1). + * + * Verifies: fields accumulated from anywhere in the async tree land on ONE + * event; the event is emitted exactly once on success OR error (the + * emit-in-finally guarantee); duration + trace are attached; and the Express + * middleware emits one canonical line on finish/close with statusCode. + */ + +import { + addEventFields, + getEventFields, + setEventField, + type WideEventLogger, + wideEventMiddleware, + withWideEvent, +} from '../wide-events'; + +interface Emitted { + level: string; + message: string; + data?: Record; +} + +function makeLogger(): { logger: WideEventLogger; events: Emitted[] } { + const events: Emitted[] = []; + return { + events, + logger: { + logLevel: (level, message, data) => { + events.push({ level, message, data }); + return Promise.resolve(); + }, + }, + }; +} + +describe('withWideEvent', () => { + it('accumulates fields from nested calls into a single event', async () => { + const { logger, events } = makeLogger(); + + await withWideEvent(logger, { route: '/checkout' }, async () => { + addEventFields({ userId: 'u1' }); + setEventField('planTier', 'pro'); + await Promise.resolve(); + addEventFields({ dbQueries: 4 }); + }); + + expect(events).toHaveLength(1); + const data = events[0]!.data!; + expect(data.route).toBe('/checkout'); + expect(data.userId).toBe('u1'); + expect(data.planTier).toBe('pro'); + expect(data.dbQueries).toBe(4); + expect(typeof data.durationMs).toBe('number'); + }); + + it('emits exactly once even when the callback throws, with error fields', async () => { + const { logger, events } = makeLogger(); + + await expect( + withWideEvent(logger, { op: 'risky' }, async () => { + addEventFields({ step: 1 }); + throw new Error('kaboom'); + }) + ).rejects.toThrow('kaboom'); + + expect(events).toHaveLength(1); + const data = events[0]!.data!; + expect(data.step).toBe(1); + expect(data.error).toBe(true); + expect(data.errorMessage).toBe('kaboom'); + }); + + it('respects custom level and message', async () => { + const { logger, events } = makeLogger(); + await withWideEvent(logger, {}, async () => {}, { level: 'debug', message: 'canonical' }); + expect(events[0]!.level).toBe('debug'); + expect(events[0]!.message).toBe('canonical'); + }); + + it('addEventFields outside a scope is a no-op (does not throw)', () => { + expect(() => addEventFields({ x: 1 })).not.toThrow(); + expect(getEventFields()).toBeUndefined(); + }); + + it('isolates concurrent scopes (no field bleed across requests)', async () => { + const { logger, events } = makeLogger(); + await Promise.all([ + withWideEvent(logger, { req: 'A' }, async () => { + await new Promise((r) => setTimeout(r, 5)); + addEventFields({ who: 'A' }); + }), + withWideEvent(logger, { req: 'B' }, async () => { + addEventFields({ who: 'B' }); + }), + ]); + + const a = events.find((e) => e.data!.req === 'A')!; + const b = events.find((e) => e.data!.req === 'B')!; + expect(a.data!.who).toBe('A'); + expect(b.data!.who).toBe('B'); + }); +}); + +describe('wideEventMiddleware', () => { + function fakeRes() { + const handlers: Record void>> = {}; + return { + statusCode: 200, + once(event: string, cb: () => void) { + if (!handlers[event]) handlers[event] = []; + handlers[event]!.push(cb); + }, + fire(event: string) { + for (const cb of handlers[event] ?? []) cb(); + }, + }; + } + + it('emits one canonical event on finish with method/url/status/duration', () => { + const { logger, events } = makeLogger(); + const mw = wideEventMiddleware(logger); + const req = { method: 'GET', url: '/x', headers: {} }; + const res = fakeRes(); + + mw(req, res, () => { + // handler adds fields within the request scope + addEventFields({ handled: true }); + }); + res.fire('finish'); + + expect(events).toHaveLength(1); + const data = events[0]!.data!; + expect(data.method).toBe('GET'); + expect(data.url).toBe('/x'); + expect(data.statusCode).toBe(200); + expect(data.handled).toBe(true); + expect(typeof data.durationMs).toBe('number'); + }); + + it('emits only once when both finish and close fire', () => { + const { logger, events } = makeLogger(); + const mw = wideEventMiddleware(logger); + const res = fakeRes(); + mw({ method: 'GET', url: '/y' }, res, () => {}); + res.fire('finish'); + res.fire('close'); + expect(events).toHaveLength(1); + }); + + it('skips when the skip predicate returns true', () => { + const { logger, events } = makeLogger(); + const mw = wideEventMiddleware(logger, { skip: (r) => r.url === '/health' }); + let nextCalled = false; + mw({ method: 'GET', url: '/health' }, fakeRes(), () => { + nextCalled = true; + }); + expect(nextCalled).toBe(true); + expect(events).toHaveLength(0); + }); +}); diff --git a/src/core/logitron-logger.ts b/src/core/logitron-logger.ts index 73ac5b0..94205ac 100644 --- a/src/core/logitron-logger.ts +++ b/src/core/logitron-logger.ts @@ -41,6 +41,7 @@ import type { LoggerConfig, LoggerWithLevels, LogLevelString, + NamespaceLevels, TimingEntry, } from '../types'; import { LogLevel } from '../types'; @@ -502,6 +503,38 @@ export class LogixiaLogger< return (this.config.levelOptions?.level as LogLevelString) ?? LogLevel.INFO; } + /** + * Replace the per-namespace level map at runtime — no restart needed. + * + * This is the runtime counterpart to the `namespaceLevels` config option and + * the most-requested logging feature across the Winston/Pino issue trackers + * (flip a module's verbosity live to chase a bug without raising global + * volume). The new map fully replaces the previous one. + * + * @example + * ```ts + * // Turn on debug for the DB layer only, live: + * logger.setNamespaceLevels({ 'db.*': 'debug', '*': 'info' }); + * ``` + */ + setNamespaceLevels(levels: NamespaceLevels): void { + this.config.namespaceLevels = { ...levels }; + // Re-resolve the effective level (which consults namespaceLevels) and rebuild + // the hot-path caches so the change takes effect on the very next log call. + this._buildPerfCaches(); + } + + /** Merge entries into the existing per-namespace level map at runtime. */ + patchNamespaceLevels(levels: NamespaceLevels): void { + this.config.namespaceLevels = { ...this.config.namespaceLevels, ...levels }; + this._buildPerfCaches(); + } + + /** Return a copy of the current per-namespace level map. */ + getNamespaceLevels(): NamespaceLevels { + return { ...this.config.namespaceLevels }; + } + setContext(context: string): void { this.context = typeof context === 'string' ? context : safeToString(context); } diff --git a/src/index.ts b/src/index.ts index 95400d5..b19f70b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -33,6 +33,10 @@ export { initOtelBridge, } from './utils/otel'; export { applyRedaction, redactObject } from './utils/redact.utils'; +export type { LevelSignalOptions, ReconfigurableLogger } from './utils/runtime-control'; +export { createLevelControlHandler, registerLevelSignal } from './utils/runtime-control'; +export type { SafeStringifyOptions } from './utils/safe-stringify'; +export { decycleValue, retrocycle, safeStringify } from './utils/safe-stringify'; export * from './utils/shutdown.utils'; export * from './utils/trace.utils'; export type { @@ -44,6 +48,23 @@ export type { TypedLogger, } from './utils/typed-logger'; export { createTypedLogger, defineLogSchema } from './utils/typed-logger'; +export type { + WideEventFields, + WideEventLogger, + WideEventMiddlewareOptions, + WideEventOptions, +} from './wide-events'; +export { + addEventFields, + getEventFields, + setEventField, + wideEventMiddleware, + withWideEvent, +} from './wide-events'; + +// OTLP logs export transport (OpenTelemetry-native log emission) +export type { OtlpLogTransportConfig } from './transports/otlp.transport'; +export { OtlpLogTransport, toOtelSeverity } from './transports/otlp.transport'; // Search module exports export * from './search'; diff --git a/src/transports/__tests__/otlp.transport.test.ts b/src/transports/__tests__/otlp.transport.test.ts new file mode 100644 index 0000000..5fa99b9 --- /dev/null +++ b/src/transports/__tests__/otlp.transport.test.ts @@ -0,0 +1,153 @@ +/** + * Tests for the OTLP Logs transport (R2). + * + * Verifies the OTel SeverityNumber mapping, the OTLP/HTTP JSON payload shape + * (resource attributes, scopeLogs, logRecords with severity/body/attributes/ + * traceId), whole-batch drain, close() draining + retry, and failure re-buffer. + * fetch is mocked — no network. + */ + +// OTLP collectors are conventionally reached over plain http on localhost in +// dev/test (e.g. http://localhost:4318/v1/logs); these are test URLs, not prod. +/* eslint-disable sonarjs/no-clear-text-protocols */ +import type { TransportLogEntry } from '../../types/transport.types'; +import { OtlpLogTransport, toOtelSeverity } from '../otlp.transport'; + +function entry(i: number, over: Partial = {}): TransportLogEntry { + return { + timestamp: new Date('2026-01-01T00:00:00.000Z'), + level: 'info', + message: `otlp-${i}`, + ...over, + }; +} + +describe('toOtelSeverity', () => { + it('maps standard levels to the OTel SeverityNumber scale', () => { + expect(toOtelSeverity('trace').number).toBe(1); + expect(toOtelSeverity('debug').number).toBe(5); + expect(toOtelSeverity('info').number).toBe(9); + expect(toOtelSeverity('warn').number).toBe(13); + expect(toOtelSeverity('error').number).toBe(17); + expect(toOtelSeverity('fatal').number).toBe(21); + }); + + it('falls back to INFO (9) for unknown/custom levels', () => { + expect(toOtelSeverity('kafka').number).toBe(9); + expect(toOtelSeverity('kafka').text).toBe('INFO'); + }); +}); + +describe('OtlpLogTransport', () => { + let fetchMock: jest.Mock; + let original: typeof globalThis.fetch | undefined; + + beforeEach(() => { + original = globalThis.fetch; + fetchMock = jest.fn().mockResolvedValue({ ok: true, status: 200 }); + (globalThis as { fetch: unknown }).fetch = fetchMock; + }); + + afterEach(() => { + (globalThis as { fetch: unknown }).fetch = original; + }); + + function lastPayload(): Record { + const call = fetchMock.mock.calls[fetchMock.mock.calls.length - 1]!; + return JSON.parse((call[1] as { body: string }).body); + } + + it('POSTs an OTLP-shaped payload with resource + scope + logRecords', async () => { + const t = new OtlpLogTransport({ + url: 'http://collector:4318/v1/logs', + serviceName: 'api', + serviceVersion: '1.2.3', + environment: 'prod', + batchSize: 1000, + flushIntervalMs: 999_999, + }); + t.write(entry(1, { level: 'error', message: 'boom', traceId: 'trace-1', data: { code: 42 } })); + await t.flush(); + + const p = lastPayload() as { + resourceLogs: Array<{ + resource: { attributes: Array<{ key: string; value: { stringValue?: string } }> }; + scopeLogs: Array<{ + scope: { name: string }; + logRecords: Array>; + }>; + }>; + }; + + const rl = p.resourceLogs[0]!; + const resAttrKeys = rl.resource.attributes.map((a) => a.key); + expect(resAttrKeys).toContain('service.name'); + expect(resAttrKeys).toContain('service.version'); + expect(resAttrKeys).toContain('deployment.environment'); + + const rec = rl.scopeLogs[0]!.logRecords[0]!; + expect(rec.severityNumber).toBe(17); // error + expect(rec.severityText).toBe('ERROR'); + expect((rec.body as { stringValue: string }).stringValue).toBe('boom'); + expect(rec.traceId).toBe('trace-1'); + // data fields become attributes + const attrs = rec.attributes as Array<{ key: string; value: { intValue?: number } }>; + expect(attrs.find((a) => a.key === 'code')?.value.intValue).toBe(42); + + await t.close(); + }); + + it('drains the whole batch across multiple POSTs', async () => { + const t = new OtlpLogTransport({ + url: 'http://c/v1/logs', + batchSize: 10, + flushIntervalMs: 999_999, + }); + for (let i = 0; i < 35; i += 1) t.write(entry(i)); + await t.flush(); + + const totalRecords = fetchMock.mock.calls.reduce((sum, call) => { + const body = JSON.parse((call[1] as { body: string }).body); + return sum + body.resourceLogs[0].scopeLogs[0].logRecords.length; + }, 0); + expect(totalRecords).toBe(35); + await t.close(); + }); + + it('re-buffers on a failed POST (no loss) and close() drains it', async () => { + fetchMock.mockResolvedValueOnce({ ok: false, status: 503 }); + const t = new OtlpLogTransport({ + url: 'http://c/v1/logs', + batchSize: 1000, + flushIntervalMs: 999_999, + }); + for (let i = 0; i < 5; i += 1) t.write(entry(i)); + + await t.flush(); // first POST fails → re-buffered + await t.close(); // retries and drains + + const totalRecords = fetchMock.mock.calls + .filter((c) => c) + .reduce((sum, call) => { + try { + const body = JSON.parse((call[1] as { body: string }).body); + return sum + body.resourceLogs[0].scopeLogs[0].logRecords.length; + } catch { + return sum; + } + }, 0); + // 5 records were eventually delivered (the failed attempt re-queued them). + expect(totalRecords).toBeGreaterThanOrEqual(5); + }); + + it('serializes object data and bigint without throwing', async () => { + const t = new OtlpLogTransport({ + url: 'http://c/v1/logs', + batchSize: 1000, + flushIntervalMs: 999_999, + }); + t.write(entry(1, { data: { nested: { a: 1 }, big: BigInt(9) } })); + await expect(t.flush()).resolves.toBeUndefined(); + await t.close(); + }); +}); diff --git a/src/transports/otlp.transport.ts b/src/transports/otlp.transport.ts new file mode 100644 index 0000000..49cc940 --- /dev/null +++ b/src/transports/otlp.transport.ts @@ -0,0 +1,229 @@ +/** + * OTLP Logs transport — emit logixia logs as OpenTelemetry LogRecords. + * + * logixia already READS the active OTel span (the bridge injects traceId/spanId + * into payloads). This transport closes the loop by EMITTING logs OUT in the + * OTLP/HTTP JSON format to a collector, making logixia an OTel-Logs-native + * source — logs land in any OTLP backend (Grafana Loki, OpenObserve, Better + * Stack, Axiom, Datadog, SigNoz…) already correlated with traces. + * + * Dependency-free: builds the OTLP/HTTP JSON payload directly (no + * @opentelemetry/* packages required — that JS API is still alpha), conforming + * to the OTel logs data model: SeverityNumber (1–24), resource attributes + * (service.name/version, deployment.environment), and TraceId/SpanId on each + * record for correlation. + * + * @example + * ```ts + * transports: { + * custom: [ new OtlpLogTransport({ + * url: 'http://localhost:4318/v1/logs', + * serviceName: 'api', + * headers: { 'x-api-key': process.env.OTLP_KEY! }, + * }) ], + * } + * ``` + */ + +import type { IAsyncTransport, TransportLogEntry } from '../types/transport.types'; +import { internalError, internalWarn } from '../utils/internal-log'; + +export interface OtlpLogTransportConfig { + /** OTLP/HTTP logs endpoint, e.g. `http://localhost:4318/v1/logs`. */ + url: string; + /** Extra HTTP headers (auth, tenant, etc.). */ + headers?: Record; + /** `service.name` resource attribute. Default: 'logixia'. */ + serviceName?: string; + /** `service.version` resource attribute. */ + serviceVersion?: string; + /** `deployment.environment` resource attribute. */ + environment?: string; + /** Extra resource attributes merged into the OTLP resource. */ + resourceAttributes?: Record; + /** Entries per batch / per POST. Default: 100. */ + batchSize?: number; + /** Auto-flush interval (ms). Default: 5000. */ + flushIntervalMs?: number; + level?: string; +} + +/** + * Map a logixia level name to an OTel SeverityNumber (1–24) and text. + * Per the OTel logs SDK spec: TRACE=1, DEBUG=5, INFO=9, WARN=13, ERROR=17, + * FATAL=21. Custom/unknown levels fall back to INFO (9). + */ +export function toOtelSeverity(level: string): { number: number; text: string } { + switch (level.toLowerCase()) { + case 'trace': + return { number: 1, text: 'TRACE' }; + case 'verbose': + return { number: 5, text: 'DEBUG' }; // verbose maps to DEBUG range + case 'debug': + return { number: 5, text: 'DEBUG' }; + case 'info': + return { number: 9, text: 'INFO' }; + case 'warn': + case 'warning': + return { number: 13, text: 'WARN' }; + case 'error': + return { number: 17, text: 'ERROR' }; + case 'fatal': + return { number: 21, text: 'FATAL' }; + default: + return { number: 9, text: 'INFO' }; + } +} + +/** Coerce a JS value into an OTLP AnyValue. */ +function toAnyValue(value: unknown): Record { + if (typeof value === 'string') return { stringValue: value }; + if (typeof value === 'boolean') return { boolValue: value }; + if (typeof value === 'number') { + return Number.isInteger(value) ? { intValue: value } : { doubleValue: value }; + } + if (typeof value === 'bigint') return { stringValue: value.toString() }; + if (value === null || value === undefined) return { stringValue: '' }; + // Objects/arrays → JSON string (kvlistValue would be richer but stringValue is + // universally accepted and avoids deep recursion / circular issues here). + try { + return { stringValue: JSON.stringify(value) }; + } catch { + return { stringValue: String(value) }; + } +} + +/** Build the OTLP KeyValue attribute list from a flat record. */ +function toAttributes(rec: Record): Array<{ key: string; value: unknown }> { + return Object.entries(rec).map(([key, value]) => ({ key, value: toAnyValue(value) })); +} + +export class OtlpLogTransport implements IAsyncTransport { + public readonly name = 'otlp'; + public readonly level: string | undefined; + + private readonly url: string; + private readonly headers: Record; + private readonly batchSize: number; + private readonly flushIntervalMs: number; + private readonly resourceAttrs: Array<{ key: string; value: unknown }>; + + private batch: TransportLogEntry[] = []; + private flushTimer: NodeJS.Timeout | null = null; + + constructor(config: OtlpLogTransportConfig) { + this.url = config.url; + this.headers = config.headers ?? {}; + this.batchSize = config.batchSize ?? 100; + this.flushIntervalMs = config.flushIntervalMs ?? 5000; + this.level = config.level; + + const resource: Record = { + 'service.name': config.serviceName ?? 'logixia', + ...(config.serviceVersion ? { 'service.version': config.serviceVersion } : {}), + ...(config.environment ? { 'deployment.environment': config.environment } : {}), + ...(config.resourceAttributes ?? {}), + }; + this.resourceAttrs = toAttributes(resource); + + this.flushTimer = setInterval(() => { + this.flush().catch(() => {}); + }, this.flushIntervalMs); + if (this.flushTimer.unref) this.flushTimer.unref(); + } + + write(entry: TransportLogEntry): void { + this.batch.push(entry); + if (this.batch.length >= this.batchSize) { + this.flush().catch(() => {}); + } + } + + /** Convert one entry into an OTLP LogRecord. */ + private toLogRecord(entry: TransportLogEntry): Record { + const sev = toOtelSeverity(entry.level); + const tsNanos = String(entry.timestamp.getTime() * 1_000_000); + + const attrs: Record = { ...(entry.data ?? {}) }; + if (entry.context !== undefined) attrs['context'] = entry.context; + if (entry.appName !== undefined) attrs['app.name'] = entry.appName; + if (entry.environment !== undefined) attrs['deployment.environment'] = entry.environment; + + const record: Record = { + timeUnixNano: tsNanos, + observedTimeUnixNano: tsNanos, + severityNumber: sev.number, + severityText: sev.text, + body: { stringValue: entry.message }, + attributes: toAttributes(attrs), + }; + + // Trace correlation: OTel expects 32-hex traceId / 16-hex spanId. Pass the + // traceId through (collectors tolerate non-hex correlation ids via the + // attribute too, but the dedicated field enables native trace-log linking). + if (entry.traceId) { + record['traceId'] = entry.traceId; + } + + return record; + } + + private buildPayload(entries: TransportLogEntry[]): string { + return JSON.stringify({ + resourceLogs: [ + { + resource: { attributes: this.resourceAttrs }, + scopeLogs: [ + { + scope: { name: 'logixia' }, + logRecords: entries.map((e) => this.toLogRecord(e)), + }, + ], + }, + ], + }); + } + + async flush(): Promise { + // Drain the WHOLE batch; splice() detaches synchronously so concurrent + // writes are never sent twice, and looping empties everything on shutdown. + while (this.batch.length > 0) { + const entries = this.batch.splice(0, this.batchSize); + try { + await this.send(entries); + } catch (err) { + internalError('OtlpLogTransport flush error', err); + this.batch.unshift(...entries); + return; + } + } + } + + private async send(entries: TransportLogEntry[]): Promise { + if (typeof fetch !== 'function') { + internalWarn('OtlpLogTransport: global fetch unavailable — cannot export logs'); + return; + } + const res = await fetch(this.url, { + method: 'POST', + headers: { 'Content-Type': 'application/json', ...this.headers }, + body: this.buildPayload(entries), + }); + if (!res.ok) { + throw new Error(`OTLP export failed: HTTP ${res.status}`); + } + } + + async close(): Promise { + if (this.flushTimer) { + clearInterval(this.flushTimer); + this.flushTimer = null; + } + for (let attempt = 0; attempt < 3 && this.batch.length > 0; attempt += 1) { + await this.flush(); + } + if (this.batch.length > 0) { + internalError(`OtlpLogTransport closing with ${this.batch.length} undelivered record(s)`); + } + } +} diff --git a/src/types/index.ts b/src/types/index.ts index 177379a..699b693 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -161,6 +161,37 @@ export interface SamplingConfig { * Set to 0 to disable. Default: 60_000 (60 s). */ statsIntervalMs?: number; + /** + * Adaptive (anomaly-driven) sampling. When the error rate over a sliding + * window crosses a threshold, the effective sample rate is boosted toward 1.0 + * so an incident is captured in full; in steady state it relaxes back to the + * base `rate`. This balances high-volume cost control with not-missing-logs + * exactly when they matter most. + * + * @example `{ adaptive: { errorRateThreshold: 0.05, boostRate: 1.0 } }` + */ + adaptive?: AdaptiveSamplingConfig; +} + +export interface AdaptiveSamplingConfig { + /** + * Fraction of evaluated entries that must be errors within the window to + * trigger a boost. e.g. 0.05 = boost once ≥5% of recent logs are errors. + * Default: 0.05. + */ + errorRateThreshold?: number; + /** + * Sample rate to use while boosted (overrides base `rate`/`perLevel`). + * Default: 1.0 (keep everything during an incident). + */ + boostRate?: number; + /** Sliding window size in ms over which the error rate is measured. Default: 10_000. */ + windowMs?: number; + /** + * Minimum events in the window before the error rate is trusted (avoids + * boosting on a single error in a quiet system). Default: 20. + */ + minSamples?: number; } // ── Namespace Levels ─────────────────────────────────────────────────────────── diff --git a/src/utils/__tests__/runtime-control.test.ts b/src/utils/__tests__/runtime-control.test.ts new file mode 100644 index 0000000..be7d70e --- /dev/null +++ b/src/utils/__tests__/runtime-control.test.ts @@ -0,0 +1,201 @@ +/** + * Tests for dynamic runtime log-level reconfiguration (R3 + R4). + * + * Covers the logger's setNamespaceLevels/patch/get methods, the SIGUSR2-style + * signal cycler, and the HTTP admin handler (GET reads, POST sets, bad input + * rejected). + */ + +import { LogixiaLogger } from '../../core/logitron-logger'; +import type { LogLevelString, NamespaceLevels } from '../../types'; +import { + createLevelControlHandler, + type ReconfigurableLogger, + registerLevelSignal, +} from '../runtime-control'; +import { resetShutdownHandlers } from '../shutdown.utils'; + +const BASE = { + appName: 'TestApp', + format: { timestamp: false, colorize: false, json: false }, + traceId: false, + silent: true, +}; + +afterEach(() => { + resetShutdownHandlers(); +}); + +describe('LogixiaLogger — runtime namespace level methods', () => { + it('setNamespaceLevels replaces the map and getNamespaceLevels returns a copy', () => { + const logger = new LogixiaLogger({ ...BASE }); + logger.setNamespaceLevels({ 'db.*': 'debug', '*': 'info' }); + expect(logger.getNamespaceLevels()).toEqual({ 'db.*': 'debug', '*': 'info' }); + + // Returned object is a copy — mutating it doesn't affect internal state. + const snap = logger.getNamespaceLevels(); + snap['db.*'] = 'trace'; + expect(logger.getNamespaceLevels()['db.*']).toBe('debug'); + }); + + it('patchNamespaceLevels merges into the existing map', () => { + const logger = new LogixiaLogger({ ...BASE }); + logger.setNamespaceLevels({ 'db.*': 'debug' }); + logger.patchNamespaceLevels({ 'http.*': 'warn' }); + expect(logger.getNamespaceLevels()).toEqual({ 'db.*': 'debug', 'http.*': 'warn' }); + }); + + it('a child logger in a matching namespace honors a runtime level change', async () => { + const lines: string[] = []; + const orig = process.stdout.write.bind(process.stdout); + (process.stdout as NodeJS.WriteStream).write = ((c: unknown) => { + lines.push(String(c ?? '')); + return true; + }) as typeof process.stdout.write; + + try { + const logger = new LogixiaLogger({ + ...BASE, + silent: false, + levelOptions: { level: 'info' }, + }); + const db = logger.child('db.queries'); + // At info level, a debug line is suppressed. + await db.debug('before'); + const beforeCount = lines.filter((l) => l.includes('before')).length; + + // Flip db.* to debug at runtime. + logger.setNamespaceLevels({ 'db.*': 'debug' }); + const db2 = logger.child('db.queries'); + await db2.debug('after'); + const afterCount = lines.filter((l) => l.includes('after')).length; + + expect(beforeCount).toBe(0); + expect(afterCount).toBe(1); + } finally { + (process.stdout as NodeJS.WriteStream).write = orig; + } + }); +}); + +describe('registerLevelSignal', () => { + it('cycles the level on each signal and dispose removes the listener', () => { + const logger = new LogixiaLogger({ ...BASE, levelOptions: { level: 'info' } }); + const dispose = registerLevelSignal(logger, { signal: 'SIGUSR2' }); + + expect(logger.getLevel()).toBe('info'); + process.emit('SIGUSR2', 'SIGUSR2'); + expect(logger.getLevel()).toBe('debug'); // info → debug + process.emit('SIGUSR2', 'SIGUSR2'); + expect(logger.getLevel()).toBe('trace'); // debug → trace + + dispose(); + const before = logger.getLevel(); + process.emit('SIGUSR2', 'SIGUSR2'); + expect(logger.getLevel()).toBe(before); // listener removed → no change + + process.removeAllListeners('SIGUSR2'); + }); + + it('uses a custom cycle when provided', () => { + const logger = new LogixiaLogger({ ...BASE, levelOptions: { level: 'error' } }); + const cycle: LogLevelString[] = ['error', 'warn'] as unknown as LogLevelString[]; + const dispose = registerLevelSignal(logger, { cycle }); + process.emit('SIGUSR2', 'SIGUSR2'); + expect(logger.getLevel()).toBe('warn'); + process.emit('SIGUSR2', 'SIGUSR2'); + expect(logger.getLevel()).toBe('error'); // wraps + dispose(); + process.removeAllListeners('SIGUSR2'); + }); +}); + +describe('createLevelControlHandler', () => { + function fakeRes() { + return { + statusCode: 200, + headers: {} as Record, + body: '', + setHeader(k: string, v: string) { + this.headers[k] = v; + }, + end(b?: string) { + this.body = b ?? ''; + }, + }; + } + + function makeLogger() { + let level: LogLevelString = 'info' as LogLevelString; + let ns: NamespaceLevels = {}; + const logger: ReconfigurableLogger = { + getLevel: () => level, + setLevel: (l) => { + level = l; + }, + setNamespaceLevels: (l) => { + ns = { ...l }; + }, + getNamespaceLevels: () => ({ ...ns }), + }; + return logger; + } + + it('GET returns the current level and namespace levels', () => { + const handler = createLevelControlHandler(makeLogger()); + const res = fakeRes(); + handler({ method: 'GET' }, res); + expect(res.statusCode).toBe(200); + expect(JSON.parse(res.body)).toEqual({ level: 'info', namespaceLevels: {} }); + }); + + it('POST with a body object (Express style) sets the level', () => { + const logger = makeLogger(); + const handler = createLevelControlHandler(logger); + const res = fakeRes(); + handler({ method: 'POST', body: { level: 'debug' } } as never, res); + expect(res.statusCode).toBe(200); + expect(logger.getLevel()).toBe('debug'); + }); + + it('POST sets namespace levels', () => { + const logger = makeLogger(); + const handler = createLevelControlHandler(logger); + const res = fakeRes(); + handler({ method: 'POST', body: { namespaceLevels: { 'db.*': 'trace' } } } as never, res); + expect(logger.getNamespaceLevels!()).toEqual({ 'db.*': 'trace' }); + }); + + it('rejects an unknown level with 400', () => { + const handler = createLevelControlHandler(makeLogger()); + const res = fakeRes(); + handler({ method: 'POST', body: { level: 'loud' } } as never, res); + expect(res.statusCode).toBe(400); + expect(JSON.parse(res.body).error).toContain('unknown level'); + }); + + it('rejects a non-GET/POST method with 405', () => { + const handler = createLevelControlHandler(makeLogger()); + const res = fakeRes(); + handler({ method: 'DELETE' }, res); + expect(res.statusCode).toBe(405); + }); + + it('reads a streamed raw body when req.on is provided (Node http style)', () => { + const logger = makeLogger(); + const handler = createLevelControlHandler(logger); + const res = fakeRes(); + const listeners: Record void> = {}; + const req = { + method: 'POST', + on(event: string, cb: (c?: unknown) => void) { + listeners[event] = cb; + }, + }; + handler(req, res); + listeners['data']!('{"level":'); + listeners['data']!('"warn"}'); + listeners['end']!(); + expect(logger.getLevel()).toBe('warn'); + }); +}); diff --git a/src/utils/__tests__/safe-stringify.test.ts b/src/utils/__tests__/safe-stringify.test.ts new file mode 100644 index 0000000..8c493cf --- /dev/null +++ b/src/utils/__tests__/safe-stringify.test.ts @@ -0,0 +1,86 @@ +/** + * Tests for robust serialization (R9): BigInt, circular refs, deterministic key + * order, and round-trippable $ref decycle/retrocycle. + */ + +import { decycleValue, retrocycle, safeStringify } from '../safe-stringify'; + +describe('safeStringify', () => { + it('serializes BigInt as a string by default (JSON.stringify would throw)', () => { + expect(() => JSON.stringify({ id: 10n })).toThrow(); + expect(safeStringify({ id: 10n })).toBe('{"id":"10"}'); + }); + + it('serializes BigInt as a number when requested', () => { + expect(safeStringify({ id: 7n }, { bigint: 'number' })).toBe('{"id":7}'); + }); + + it('replaces circular references with [Circular] (no throw)', () => { + const obj: Record = { a: 1 }; + obj.self = obj; + const out = safeStringify(obj); + expect(out).toContain('[Circular]'); + expect(JSON.parse(out)).toEqual({ a: 1, self: '[Circular]' }); + }); + + it('allows the same object in sibling branches (not a false cycle)', () => { + const shared = { x: 1 }; + const out = safeStringify({ a: shared, b: shared }); + // shared appears in both — neither should be [Circular] since it's not a cycle. + expect(JSON.parse(out)).toEqual({ a: { x: 1 }, b: { x: 1 } }); + }); + + it('produces deterministic key order when requested', () => { + const a = safeStringify({ b: 1, a: 2, c: 3 }, { deterministic: true }); + const b = safeStringify({ c: 3, a: 2, b: 1 }, { deterministic: true }); + expect(a).toBe(b); + expect(a).toBe('{"a":2,"b":1,"c":3}'); + }); + + it('renders Dates, functions and symbols safely', () => { + const out = JSON.parse( + safeStringify({ d: new Date('2026-01-01T00:00:00.000Z'), f: () => 0, s: Symbol('z') }) + ); + expect(out.d).toBe('2026-01-01T00:00:00.000Z'); + expect(out.f).toContain('[Function'); + expect(out.s).toContain('Symbol'); + }); + + it('ignores prototype-pollution keys', () => { + const out = JSON.parse(safeStringify(JSON.parse('{"__proto__":{"x":1},"ok":2}'))); + expect(out.ok).toBe(2); + expect(out.__proto__).not.toEqual({ x: 1 }); + }); +}); + +describe('decycle / retrocycle round-trip', () => { + it('emits $ref pointers for repeated references and round-trips them back', () => { + const a: Record = { name: 'A' }; + const b: Record = { name: 'B', parent: a }; + a.child = b; // a → b → a (cycle) + + const json = safeStringify(a, { decycle: true }); + // The back-reference is a $ref pointer, not [Circular]. + expect(json).toContain('$ref'); + expect(json).not.toContain('[Circular]'); + + const parsed = retrocycle(JSON.parse(json)) as Record; + expect(parsed.name).toBe('A'); + const childBack = parsed.child as Record; + expect(childBack.name).toBe('B'); + // The cycle is reconstructed: child.parent === root. + expect(childBack.parent).toBe(parsed); + }); + + it('decycleValue handles BigInt inside a decycled graph', () => { + const out = decycleValue({ big: 5n }); + expect(out).toEqual({ big: '5' }); + }); + + it('round-trips shared (non-circular) references to the same object', () => { + const shared = { v: 42 }; + const json = safeStringify({ p: shared, q: shared }, { decycle: true }); + const parsed = retrocycle(JSON.parse(json)) as { p: unknown; q: unknown }; + expect(parsed.p).toBe(parsed.q); // same reference restored + }); +}); diff --git a/src/utils/__tests__/sampling.utils.comprehensive.test.ts b/src/utils/__tests__/sampling.utils.comprehensive.test.ts index ee9992e..8ff375b 100644 --- a/src/utils/__tests__/sampling.utils.comprehensive.test.ts +++ b/src/utils/__tests__/sampling.utils.comprehensive.test.ts @@ -426,4 +426,77 @@ describe('Sampler', () => { expect(lastStats).toBeDefined(); }); }); + + // ── adaptive (anomaly-driven) sampling ────────────────────────────────────── + + describe('adaptive sampling', () => { + it('does not boost in steady state (error rate below threshold)', () => { + const s = new Sampler({ + rate: 0, + adaptive: { errorRateThreshold: 0.5, minSamples: 10, boostRate: 1.0 }, + }); + // 20 info logs, no errors → 0% error rate → no boost. + for (let i = 0; i < 20; i += 1) s.shouldEmit('info', `t${i}`); + expect(s.isBoosting()).toBe(false); + // With rate 0 and no boost, a fresh info log is dropped. + expect(s.shouldEmit('info', 'x')).toBe(false); + s.destroy(); + }); + + it('boosts the sample rate once the windowed error rate crosses the threshold', () => { + const s = new Sampler({ + rate: 0, // base: drop everything… + adaptive: { errorRateThreshold: 0.3, minSamples: 10, boostRate: 1.0 }, + // perLevel keeps error from short-circuiting so it counts as an evaluated sample + perLevel: { error: 1.0 }, + }); + // Feed a burst where >30% are errors. + for (let i = 0; i < 7; i += 1) s.shouldEmit('info', `i${i}`); + for (let i = 0; i < 5; i += 1) s.shouldEmit('error', `e${i}`); + + expect(s.isBoosting()).toBe(true); + // While boosted, even a debug at base-rate-0 is now kept (boost rate 1.0). + expect(s.shouldEmit('debug', 'd1')).toBe(true); + s.destroy(); + }); + + it('does not boost before minSamples is reached', () => { + const s = new Sampler({ + rate: 0, + adaptive: { errorRateThreshold: 0.1, minSamples: 50, boostRate: 1.0 }, + perLevel: { error: 1.0 }, + }); + // 100% errors but only a handful of samples → not enough to trust. + for (let i = 0; i < 5; i += 1) s.shouldEmit('error', `e${i}`); + expect(s.isBoosting()).toBe(false); + s.destroy(); + }); + + it('relaxes the boost as old errors age out of the window', () => { + jest.useFakeTimers(); + try { + const s = new Sampler({ + rate: 0, + adaptive: { + errorRateThreshold: 0.3, + minSamples: 10, + boostRate: 1.0, + windowMs: 1000, + }, + perLevel: { error: 1.0 }, + }); + for (let i = 0; i < 7; i += 1) s.shouldEmit('info', `i${i}`); + for (let i = 0; i < 5; i += 1) s.shouldEmit('error', `e${i}`); + expect(s.isBoosting()).toBe(true); + + // Advance past the window and add only info samples — errors age out. + jest.advanceTimersByTime(1100); + for (let i = 0; i < 15; i += 1) s.shouldEmit('info', `j${i}`); + expect(s.isBoosting()).toBe(false); + s.destroy(); + } finally { + jest.useRealTimers(); + } + }); + }); }); diff --git a/src/utils/runtime-control.ts b/src/utils/runtime-control.ts new file mode 100644 index 0000000..31476ce --- /dev/null +++ b/src/utils/runtime-control.ts @@ -0,0 +1,216 @@ +/** + * logixia — Dynamic runtime log-level reconfiguration. + * + * Change log levels in a running process WITHOUT a restart — the single + * most-requested logging feature across the Winston and Pino issue trackers + * (winston#1107, pino#206/#677, nestjs-pino#371). The ecosystem even built a + * standalone module for it (pino-arborsculpture); logixia ships it first-class. + * + * Two trigger surfaces: + * - {@link registerLevelSignal} — flip levels via an OS signal (default SIGUSR2), + * cycling through a level list. Zero HTTP surface, safe for any deployment. + * - {@link createLevelControlHandler} — a tiny HTTP handler so an ops dashboard + * can GET the current level and POST a new global / per-namespace level. + * + * @example Signal-based (cycle levels on each `kill -USR2 `) + * ```ts + * import { registerLevelSignal } from 'logixia'; + * const dispose = registerLevelSignal(logger); + * // later: process.kill(process.pid, 'SIGUSR2') → info → debug → trace → info … + * ``` + * + * @example HTTP admin endpoint + * ```ts + * import { createLevelControlHandler } from 'logixia'; + * const handler = createLevelControlHandler(logger); + * app.all('/admin/log-level', handler); // GET reads, POST { level, namespaceLevels } sets + * ``` + */ + +import type { LogLevelString, NamespaceLevels } from '../types'; +import { internalLog, internalWarn } from './internal-log'; + +/** Minimal logger surface the runtime controls need. */ +export interface ReconfigurableLogger { + getLevel(): LogLevelString; + setLevel(level: LogLevelString): void; + setNamespaceLevels?(levels: NamespaceLevels): void; + getNamespaceLevels?(): NamespaceLevels; +} + +const DEFAULT_CYCLE: readonly LogLevelString[] = [ + 'error', + 'warn', + 'info', + 'debug', + 'trace', + 'verbose', +] as unknown as readonly LogLevelString[]; + +export interface LevelSignalOptions { + /** Signal to listen on. Default: 'SIGUSR2' (SIGUSR1 is used by the Node debugger). */ + signal?: NodeJS.Signals; + /** Ordered levels to cycle through on each signal. Default: error→…→verbose. */ + cycle?: LogLevelString[]; +} + +/** + * Register an OS-signal handler that cycles the logger's global level on each + * signal. Returns a dispose function that removes the listener. + * + * Cycling (rather than jumping straight to a fixed level) means a single, + * memorizable command (`kill -USR2 `) is enough to ratchet verbosity up + * while chasing a bug and back down again — no value to remember. + */ +export function registerLevelSignal( + logger: ReconfigurableLogger, + options: LevelSignalOptions = {} +): () => void { + const signal = options.signal ?? 'SIGUSR2'; + const cycle = options.cycle && options.cycle.length > 0 ? options.cycle : [...DEFAULT_CYCLE]; + + const handler = (): void => { + const current = logger.getLevel(); + const idx = cycle.indexOf(current); + const next = cycle[(idx + 1) % cycle.length]!; + logger.setLevel(next); + internalLog(`runtime level changed via ${signal}: ${current} → ${next}`); + }; + + process.on(signal, handler); + return () => { + process.removeListener(signal, handler); + }; +} + +// ── HTTP admin handler ──────────────────────────────────────────────────────── + +interface MinimalReq { + method?: string | undefined; + on?: (event: string, cb: (chunk?: unknown) => void) => void; +} +interface MinimalRes { + statusCode?: number; + setHeader?: (k: string, v: string) => void; + end: (body?: string) => void; +} + +const VALID_LEVELS = new Set([ + 'error', + 'warn', + 'info', + 'debug', + 'trace', + 'verbose', + 'fatal', +]); + +/** + * Create an HTTP handler (Node `http`/Express-compatible) that reads and sets + * the logger's level at runtime. + * + * - `GET` → `{ level, namespaceLevels }` + * - `POST` → body `{ level?, namespaceLevels? }` applies them, returns the new state + * + * Custom levels are accepted too: any level the logger already knows passes + * through. Unknown levels are rejected with 400 so a typo can't silently mute + * logging. Mount behind your own auth — this intentionally has none. + */ +export function createLevelControlHandler( + logger: ReconfigurableLogger, + options: { allowedLevels?: string[] } = {} +): (req: MinimalReq, res: MinimalRes) => void { + const allowed = + options.allowedLevels && options.allowedLevels.length > 0 + ? new Set(options.allowedLevels.map((l) => l.toLowerCase())) + : VALID_LEVELS; + + const snapshot = (): { level: LogLevelString; namespaceLevels: NamespaceLevels } => ({ + level: logger.getLevel(), + namespaceLevels: logger.getNamespaceLevels?.() ?? {}, + }); + + const send = (res: MinimalRes, status: number, body: unknown): void => { + res.statusCode = status; + res.setHeader?.('Content-Type', 'application/json'); + res.end(JSON.stringify(body)); + }; + + const applyBody = (res: MinimalRes, raw: string): void => { + let parsed: { level?: unknown; namespaceLevels?: unknown }; + try { + parsed = raw ? (JSON.parse(raw) as typeof parsed) : {}; + } catch { + send(res, 400, { error: 'invalid JSON body' }); + return; + } + + if (parsed.level !== undefined) { + const lvl = String(parsed.level).toLowerCase(); + if (!allowed.has(lvl)) { + send(res, 400, { error: `unknown level "${parsed.level}"`, allowed: [...allowed] }); + return; + } + logger.setLevel(lvl as LogLevelString); + } + + if (parsed.namespaceLevels !== undefined) { + if ( + typeof parsed.namespaceLevels !== 'object' || + parsed.namespaceLevels === null || + Array.isArray(parsed.namespaceLevels) + ) { + send(res, 400, { error: 'namespaceLevels must be an object' }); + return; + } + const nl = parsed.namespaceLevels as Record; + for (const [pat, lvl] of Object.entries(nl)) { + if (!allowed.has(String(lvl).toLowerCase())) { + send(res, 400, { error: `unknown level "${String(lvl)}" for namespace "${pat}"` }); + return; + } + } + if (logger.setNamespaceLevels) { + const coerced: NamespaceLevels = {}; + for (const [pat, lvl] of Object.entries(nl)) { + coerced[pat] = String(lvl).toLowerCase() as LogLevelString; + } + logger.setNamespaceLevels(coerced); + } else { + internalWarn('level control: logger does not support setNamespaceLevels — ignored'); + } + } + + send(res, 200, snapshot()); + }; + + return function levelControlHandler(req: MinimalReq, res: MinimalRes): void { + const method = (req.method ?? 'GET').toUpperCase(); + + if (method === 'GET') { + send(res, 200, snapshot()); + return; + } + + if (method === 'POST' || method === 'PUT' || method === 'PATCH') { + // Express already-parsed body (req.body) vs raw stream. + const maybeBody = (req as unknown as { body?: unknown }).body; + if (maybeBody !== undefined && typeof req.on !== 'function') { + applyBody(res, typeof maybeBody === 'string' ? maybeBody : JSON.stringify(maybeBody)); + return; + } + if (typeof req.on === 'function') { + let raw = ''; + req.on('data', (chunk?: unknown) => { + raw += String(chunk ?? ''); + }); + req.on('end', () => applyBody(res, raw)); + return; + } + applyBody(res, ''); + return; + } + + send(res, 405, { error: `method ${method} not allowed` }); + }; +} diff --git a/src/utils/safe-stringify.ts b/src/utils/safe-stringify.ts new file mode 100644 index 0000000..7fcbaf4 --- /dev/null +++ b/src/utils/safe-stringify.ts @@ -0,0 +1,168 @@ +/** + * logixia — robust JSON serialization for log payloads. + * + * Modern Winston/Pino already neutralize circular refs to `[Circular]` (via + * safe-stable-stringify / fast-safe-stringify), so merely "surviving cycles" is + * not a differentiator. This goes one notch further: + * + * - **BigInt** → serialized (as a string by default; JSON.stringify throws on + * BigInt, which silently breaks logging of e.g. DB bigint ids). + * - **Deterministic key order** (optional) so identical objects hash/diff equal. + * - **True decycle** (optional) — replace repeated references with JSONPath + * `$ref` pointers that round-trip back to the original object graph, instead + * of the lossy `[Circular]` tag. Useful when you need to reconstruct shared + * structure downstream. + * + * @example + * ```ts + * safeStringify({ id: 10n, self: obj }); // → BigInt + cycle safe + * safeStringify(graph, { decycle: true }); // → round-trippable $ref pointers + * const back = retrocycle(JSON.parse(json)); // → reconstruct shared refs + * ``` + */ + +export interface SafeStringifyOptions { + /** Indentation passed to JSON.stringify (number of spaces or a string). */ + indent?: number | string; + /** Sort object keys for deterministic output. Default: false. */ + deterministic?: boolean; + /** + * Use round-trippable `$ref` JSONPath pointers for repeated references instead + * of the lossy "[Circular]" tag. Default: false. + */ + decycle?: boolean; + /** How to render BigInt: 'string' (default) or 'number' (may lose precision). */ + bigint?: 'string' | 'number'; +} + +/** Build a JSONPath like `$["a"][0]["b"]` for a decycle pointer. */ +function jsonPath(parts: Array): string { + let path = '$'; + for (const p of parts) { + path += typeof p === 'number' ? `[${p}]` : `[${JSON.stringify(p)}]`; + } + return path; +} + +/** + * Serialize any value to JSON without throwing on circular references or BigInt. + * Circular refs become `"[Circular]"` (or `{ $ref }` pointers when `decycle`). + */ +export function safeStringify(value: unknown, options: SafeStringifyOptions = {}): string { + const { indent, deterministic = false, decycle = false, bigint = 'string' } = options; + + if (decycle) { + return JSON.stringify(decycleValue(value, bigint), undefined, indent); + } + + const seen = new WeakSet(); + + const transform = (val: unknown): unknown => { + if (typeof val === 'bigint') return bigint === 'number' ? Number(val) : val.toString(); + if (typeof val === 'function') return `[Function: ${val.name || 'anonymous'}]`; + if (typeof val === 'symbol') return val.toString(); + if (val === null || typeof val !== 'object') return val; + + if (seen.has(val)) return '[Circular]'; + seen.add(val); + + let out: unknown; + if (Array.isArray(val)) { + out = val.map((item) => transform(item)); + } else if (val instanceof Date) { + out = val.toISOString(); + } else { + const rec = val as Record; + const keys = deterministic ? Object.keys(rec).sort() : Object.keys(rec); + const obj: Record = {}; + for (const k of keys) { + if (k === '__proto__' || k === 'constructor' || k === 'prototype') continue; + obj[k] = transform(rec[k]); + } + out = obj; + } + // Allow the same object to appear in sibling branches (not a true cycle): + // remove from `seen` after we finish its subtree. + seen.delete(val); + return out; + }; + + return JSON.stringify(transform(value), undefined, indent); +} + +/** + * Replace repeated object references with round-trippable `{ "$ref": "$..." }` + * JSONPath pointers (the classic Crockford decycle). Pair with {@link retrocycle} + * to reconstruct the original shared/circular graph. + */ +export function decycleValue(value: unknown, bigint: 'string' | 'number' = 'string'): unknown { + const paths = new WeakMap(); + + const walk = (val: unknown, path: Array): unknown => { + if (typeof val === 'bigint') return bigint === 'number' ? Number(val) : val.toString(); + if (val === null || typeof val !== 'object') return val; + if (val instanceof Date) return val.toISOString(); + + const existing = paths.get(val); + if (existing !== undefined) return { $ref: existing }; + paths.set(val, jsonPath(path)); + + if (Array.isArray(val)) { + return val.map((item, i) => walk(item, [...path, i])); + } + const rec = val as Record; + const obj: Record = {}; + for (const k of Object.keys(rec)) { + if (k === '__proto__' || k === 'constructor' || k === 'prototype') continue; + obj[k] = walk(rec[k], [...path, k]); + } + return obj; + }; + + return walk(value, []); +} + +/** + * Inverse of {@link decycleValue}: resolve `{ "$ref": "$..." }` pointers back + * into the live object graph (mutates and returns the parsed input). + */ +export function retrocycle(root: T): T { + const refRe = /^\$(?:\[(?:\d+|"(?:[^"\\]|\\.)*")\])*$/; + + const resolve = (path: string): unknown => { + // Parse the JSONPath segments back out. + const segs: Array = []; + const partRe = /\[(\d+|"(?:[^"\\]|\\.)*")\]/g; + let m: RegExpExecArray | null; + while ((m = partRe.exec(path)) !== null) { + const raw = m[1]!; + segs.push(raw.startsWith('"') ? (JSON.parse(raw) as string) : Number(raw)); + } + let node: unknown = root; + for (const s of segs) { + node = (node as Record)[s]; + } + return node; + }; + + const walk = (val: unknown): void => { + if (val === null || typeof val !== 'object') return; + const rec = val as Record; + for (const k of Object.keys(rec)) { + const child = rec[k]; + if ( + child !== null && + typeof child === 'object' && + typeof (child as { $ref?: unknown }).$ref === 'string' && + refRe.test((child as { $ref: string }).$ref) + ) { + rec[k] = resolve((child as { $ref: string }).$ref); + } else { + walk(child); + } + } + }; + + walk(root); + return root; +} diff --git a/src/utils/sampling.utils.ts b/src/utils/sampling.utils.ts index 2ce9bf5..7aaba3b 100644 --- a/src/utils/sampling.utils.ts +++ b/src/utils/sampling.utils.ts @@ -48,6 +48,10 @@ export class Sampler { private _tokenBucket = 0; private _lastRefillMs = Date.now(); + // Adaptive (anomaly-driven) sampling state — a sliding window of recent + // (timestamp, isError) samples used to compute the current error rate. + private readonly _adaptiveWindow: Array<{ t: number; err: boolean }> = []; + // Stats private _stats: SamplingStats = { evaluated: 0, @@ -89,6 +93,7 @@ export class Sampler { shouldEmit(level: string, traceId?: string): boolean { const lvl = level.toLowerCase(); this._trackEvaluated(lvl); + if (this.config.adaptive) this._recordAdaptiveSample(lvl); // ── 1. Safety: error/fatal always pass (unless explicitly overridden) ───── if (ALWAYS_EMIT_LEVELS.has(lvl) && this.config.perLevel?.[lvl] === undefined) { @@ -161,15 +166,57 @@ export class Sampler { // ── Private helpers ───────────────────────────────────────────────────────── private _sampleByRate(level: string): boolean { - const rate = + const baseRate = this.config.perLevel?.[level] ?? this.config.perLevel?.['*'] ?? this.config.rate ?? 1.0; + // Adaptive boost: during an error anomaly, lift the rate so the incident is + // captured in full, then relax back to the base rate in steady state. + const rate = Math.max(baseRate, this._adaptiveBoostRate()); + if (rate >= 1.0) return true; if (rate <= 0.0) return false; // eslint-disable-next-line sonarjs/pseudo-random -- probabilistic sampling, not security-sensitive return Math.random() < rate; } + /** + * Record one evaluated sample into the adaptive sliding window and evict + * entries older than the window. Cheap O(evicted) amortized. + */ + private _recordAdaptiveSample(level: string): void { + const now = Date.now(); + const windowMs = this.config.adaptive?.windowMs ?? 10_000; + this._adaptiveWindow.push({ t: now, err: level === 'error' || level === 'fatal' }); + const cutoff = now - windowMs; + while (this._adaptiveWindow.length > 0 && this._adaptiveWindow[0]!.t < cutoff) { + this._adaptiveWindow.shift(); + } + } + + /** + * Effective boost rate (0 = no boost). Returns the configured boostRate when + * the windowed error rate is at/above the threshold AND there are enough + * samples to trust it; otherwise 0. + */ + private _adaptiveBoostRate(): number { + const cfg = this.config.adaptive; + if (!cfg) return 0; + const minSamples = cfg.minSamples ?? 20; + const total = this._adaptiveWindow.length; + if (total < minSamples) return 0; + + let errors = 0; + for (const s of this._adaptiveWindow) if (s.err) errors += 1; + const errorRate = errors / total; + const threshold = cfg.errorRateThreshold ?? 0.05; + return errorRate >= threshold ? (cfg.boostRate ?? 1.0) : 0; + } + + /** @internal Expose the current adaptive boost decision (tests / observability). */ + isBoosting(): boolean { + return this._adaptiveBoostRate() > 0; + } + /** * Record a trace decision, bounding the Set so it can't grow without limit. * If the Set has reached the cap, clear it before inserting — stale decisions diff --git a/src/wide-events.ts b/src/wide-events.ts new file mode 100644 index 0000000..8aa9508 --- /dev/null +++ b/src/wide-events.ts @@ -0,0 +1,215 @@ +/** + * logixia — Canonical Log Lines / Wide Events. + * + * Emit ONE dense, structured event per unit of work (usually a request) instead + * of scattering details across many narrow log lines. Fields are accumulated as + * the request flows through middleware and business logic, then the whole event + * is emitted ONCE — in a `finally`/teardown path so it fires even on errors. + * + * This is the "canonical log line" pattern (Stripe) / "wide events" / + * "Observability 2.0" (Honeycomb): one pre-joined, queryable record per request, + * so operators never JOIN across log lines during an incident. It composes with + * logixia's existing trace correlation — when a trace is active, the event + * carries `traceId`/`spanId`, making it OTel-friendly. + * + * @example Manual scope + * ```ts + * import { withWideEvent, addEventFields } from 'logixia'; + * + * await withWideEvent(logger, { route: '/checkout' }, async () => { + * addEventFields({ userId, planTier }); // from anywhere in the call tree + * addEventFields({ dbQueries: 4, cacheHit }); + * // ...one wide event is emitted automatically when this callback settles, + * // even if it throws. + * }); + * ``` + * + * @example Express middleware (auto-emit on response finish/close) + * ```ts + * import { wideEventMiddleware } from 'logixia'; + * app.use(wideEventMiddleware(logger)); + * app.get('/x', (req, res) => { addEventFields({ handled: 'x' }); res.json({}); }); + * ``` + */ + +import { AsyncLocalStorage } from 'node:async_hooks'; + +import { getCurrentTraceId } from './utils/trace.utils'; + +/** The accumulating event fields for the current scope. */ +export type WideEventFields = Record; + +interface WideEventState { + fields: WideEventFields; + startMs: number; + emitted: boolean; +} + +const _storage = new AsyncLocalStorage(); + +/** Minimal logger surface a wide event needs to emit itself. */ +export interface WideEventLogger { + logLevel(level: string, message: string, data?: Record): Promise | void; +} + +export interface WideEventOptions { + /** Level the canonical event is logged at. Default: 'info'. */ + level?: string; + /** Message for the canonical event line. Default: 'request'. */ + message?: string; + /** + * Attach `traceId` (and `spanId` when present) from the active trace context. + * Default: true. + */ + includeTrace?: boolean; + /** Field name for the auto-computed duration in ms. Default: 'durationMs'. */ + durationField?: string; +} + +/** + * Merge fields into the wide event for the current async scope. No-op (with no + * throw) when called outside a `withWideEvent` / middleware scope, so business + * code can call it unconditionally. + */ +export function addEventFields(fields: WideEventFields): void { + const state = _storage.getStore(); + if (!state || state.emitted) return; + Object.assign(state.fields, fields); +} + +/** Set a single field on the current wide event. */ +export function setEventField(key: string, value: unknown): void { + addEventFields({ [key]: value }); +} + +/** Read a shallow copy of the wide event accumulated so far, or undefined. */ +export function getEventFields(): WideEventFields | undefined { + const state = _storage.getStore(); + return state ? { ...state.fields } : undefined; +} + +function emit( + logger: WideEventLogger, + state: WideEventState, + options: WideEventOptions, + extra?: WideEventFields +): void { + if (state.emitted) return; + state.emitted = true; + + const level = options.level ?? 'info'; + const message = options.message ?? 'request'; + const durationField = options.durationField ?? 'durationMs'; + const includeTrace = options.includeTrace ?? true; + + const payload: WideEventFields = { ...state.fields }; + if (extra) Object.assign(payload, extra); + payload[durationField] = Date.now() - state.startMs; + + if (includeTrace) { + const traceId = getCurrentTraceId(); + if (traceId !== undefined && payload['traceId'] === undefined) payload['traceId'] = traceId; + } + + // logLevel may be async (transport-backed); fire-and-forget so the wide-event + // emit never blocks request teardown. Swallow rejections (the logger surfaces + // its own transport errors) so this never becomes an unhandled rejection. + const p = logger.logLevel(level, message, payload); + if (p && typeof (p as Promise).catch === 'function') { + (p as Promise).catch(() => {}); + } +} + +/** + * Run `callback` inside a wide-event scope. `addEventFields` calls anywhere in + * the (async) call tree accumulate onto one event, which is emitted exactly once + * when the callback settles — on success OR error (the canonical "emit in + * finally" guarantee). On error, `error` + `errorMessage` fields are added. + */ +export async function withWideEvent( + logger: WideEventLogger, + initialFields: WideEventFields, + callback: () => Promise | T, + options: WideEventOptions = {} +): Promise { + const state: WideEventState = { + fields: { ...initialFields }, + startMs: Date.now(), + emitted: false, + }; + + return _storage.run(state, async () => { + try { + const result = await callback(); + emit(logger, state, options); + return result; + } catch (error) { + emit(logger, state, options, { + error: true, + errorMessage: error instanceof Error ? error.message : String(error), + }); + throw error; + } + }); +} + +// ── HTTP middleware ───────────────────────────────────────────────────────── + +interface MwReq { + method?: string | undefined; + url?: string | undefined; + originalUrl?: string | undefined; + ip?: string | undefined; + headers?: Record | undefined; + socket?: { remoteAddress?: string } | undefined; +} +interface MwRes { + statusCode?: number; + once?: (event: string, cb: () => void) => void; +} + +export interface WideEventMiddlewareOptions extends WideEventOptions { + /** Derive extra initial fields from the request. */ + enrich?: (req: MwReq) => WideEventFields; + /** Skip wide-event emission for a request (e.g. health checks). */ + skip?: (req: MwReq) => boolean; +} + +/** + * Express/Connect middleware that opens a wide-event scope per request and + * emits ONE canonical event on response `finish`/`close` — even if the handler + * throws or the client disconnects. Pre-populates method/url/ip; handlers add + * more via `addEventFields`. The completion event includes `statusCode` and the + * request duration. + */ +export function wideEventMiddleware( + logger: WideEventLogger, + options: WideEventMiddlewareOptions = {} +): (req: MwReq, res: MwRes, next: () => void) => void { + return function logixiaWideEventMiddleware(req: MwReq, res: MwRes, next: () => void): void { + if (options.skip?.(req)) { + next(); + return; + } + + const base: WideEventFields = { + method: req.method, + url: req.originalUrl ?? req.url, + ip: req.ip ?? req.socket?.remoteAddress, + ...(options.enrich ? options.enrich(req) : {}), + }; + + const state: WideEventState = { fields: base, startMs: Date.now(), emitted: false }; + + const finalize = (): void => { + emit(logger, state, options, { statusCode: res.statusCode ?? 0 }); + }; + + // Both 'finish' and 'close' may fire; emit() is idempotent (emitted guard), + // so the canonical line is logged exactly once. + res.once?.('finish', finalize); + res.once?.('close', finalize); + + _storage.run(state, () => next()); + }; +}