diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 0000000..b353dd9
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,84 @@
+name: Docs
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'book/**'
+      - 'docs/**'
+      - '.github/workflows/docs.yml'
+      - 'CHANGELOG.md'
+      - 'CONTRIBUTING.md'
+      - 'README.md'
+  pull_request:
+    paths:
+      - 'book/**'
+      - 'docs/**'
+      - '.github/workflows/docs.yml'
+      - 'CHANGELOG.md'
+      - 'CONTRIBUTING.md'
+      - 'README.md'
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+concurrency:
+  group: docs-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: dtolnay/rust-toolchain@stable
+
+      # Bump cache key when any tool version in the install step changes
+      - name: Cache mdbook binaries
+        id: cache-mdbook
+        uses: actions/cache@v5
+        with:
+          path: ~/.cargo/bin/mdbook*
+          key: mdbook-v2-${{ hashFiles('.github/workflows/docs.yml') }}
+
+      - name: Install mdbook and plugins
+        if: steps.cache-mdbook.outputs.cache-hit != 'true'
+        run: |
+          cargo install \
+            mdbook@0.4.40 \
+            mdbook-linkcheck@0.7.7 \
+            mdbook-toc@0.14.2 \
+            mdbook-admonish@1.18.0 \
+            mdbook-mermaid@0.14.1
+
+      - name: Build book
+        run: mdbook build book
+
+      - name: Upload Pages artifact
+        if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
+        uses: actions/upload-pages-artifact@v4
+        with:
+          path: target/book/html
+
+      - name: Verify build (PR)
+        if: github.event_name == 'pull_request'
+        run: |
+          test -f target/book/html/index.html
+          test -s target/book/html/index.html
+          echo "Build OK"
+
+  deploy:
+    needs: build
+    if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
+    runs-on: ubuntu-latest
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/.gitignore b/.gitignore
index 416c1dd..4aa5334 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,6 +27,9 @@ result-*
 # Ruff cache (leftover from Python tooling)
 .ruff_cache/
 
+# mdBook build output
+/target/book/
+
 # integration test artifacts
 tests/integration/level1/fixtures/*.flow
 tests/integration/level1/out/
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ea1c55c..2d855a6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,3 @@
-# Changelog
-
 All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 81fa319..d50a310 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,5 +1,3 @@
-# Contributing — Local Testing Guide
-
 This document covers how to run the three test tracks locally.
 
 ## Prerequisites
diff --git a/Cargo.toml b/Cargo.toml
index 7f3d875..045a156 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,6 +17,7 @@ exclude = [
   "docs/demo.mp4", ".github/**", "scripts/**",
   "flake.nix", "flake.lock", ".envrc", ".direnv/**",
   ".sisyphus/**", ".ruff_cache/**",
+  "book/**", "docs/**",
 ]
 
 [[bin]]
diff --git a/README.md b/README.md
index ab7bffe..1ac5ef1 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,7 @@ A Rust rewrite of [mitmproxy2swagger](https://github.com/alufers/mitmproxy2swagg
 [![Crates.io](https://img.shields.io/crates/v/mitm2openapi.svg)](https://crates.io/crates/mitm2openapi)
 [![Downloads](https://img.shields.io/crates/d/mitm2openapi.svg)](https://crates.io/crates/mitm2openapi)
 [![docs.rs](https://img.shields.io/docsrs/mitm2openapi)](https://docs.rs/mitm2openapi)
+[![docs](https://img.shields.io/badge/docs-arkptz.github.io-blue)](https://arkptz.github.io/mitm2openapi/)
 [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
 
 <img src="docs/demo.gif" alt="Demo: capture → discover → generate → browse Swagger UI" width="720">
@@ -39,17 +40,13 @@ Credit to [@alufers](https://github.com/alufers) for the original tool that pion
 
 ## Installation
 
-### From binary releases
-
-Download a pre-built binary from [GitHub Releases](https://github.com/Arkptz/mitm2openapi/releases).
-
-### From source
-
 ```bash
-cargo install --git https://github.com/Arkptz/mitm2openapi
+cargo install mitm2openapi
 ```
 
-## Quick Start
+Or download a pre-built binary from [GitHub Releases](https://github.com/Arkptz/mitm2openapi/releases).
+
+## Quick start
 
 ```bash
 # 1. Capture traffic with mitmproxy
@@ -64,206 +61,13 @@ mitm2openapi discover -i capture.flow -o templates.yaml -p "https://api.example.
 mitm2openapi generate -i capture.flow -t templates.yaml -o openapi.yaml -p "https://api.example.com"
 ```
 
-### Skip the manual edit
-
-If you know which paths you care about up front, use `--exclude-patterns`
-and `--include-patterns` to let `discover` do the curation:
-
-```bash
-mitm2openapi discover \
-  -i capture.flow -o templates.yaml -p "https://api.example.com" \
-  --exclude-patterns '/static/**,/images/**,*.css,*.js,*.svg' \
-  --include-patterns '/api/**,/v2/**'
-
-mitm2openapi generate \
-  -i capture.flow -t templates.yaml -o openapi.yaml -p "https://api.example.com"
-```
-
-Paths matching `--include-patterns` are auto-activated (emitted without
-the `ignore:` prefix). Paths matching `--exclude-patterns` are dropped
-entirely. Everything else still gets `ignore:` for manual review.
-
-<details>
-<summary><strong>CLI Reference</strong> (click to expand)</summary>
-
-### `discover`
-
-Scan captured traffic and produce a templates file listing all observed endpoints.
-
-```
-mitm2openapi discover [OPTIONS] -i <INPUT> -o <OUTPUT> -p <PREFIX>
-```
-
-| Option | Description |
-|--------|-------------|
-| `-i, --input <PATH>` | Input file (flow dump or HAR) |
-| `-o, --output <PATH>` | Output YAML templates file |
-| `-p, --prefix <URL>` | API prefix URL to filter requests |
-| `--format <FORMAT>` | Input format: `auto`, `har`, `mitmproxy` (default: `auto`) |
-| `--exclude-patterns <GLOBS>` | Comma-separated globs; matching paths are dropped entirely. `*` = single segment, `**` = any subtree. E.g. `/static/**,*.css` |
-| `--include-patterns <GLOBS>` | Comma-separated globs; matching paths are emitted without `ignore:` (auto-activated for `generate`) |
-| `--max-input-size <BYTES>` | Maximum input file size (default: `2GiB`). Accepts suffixes: `KiB`, `MiB`, `GiB` |
-| `--allow-symlinks` | Allow symlinked input files (default: rejected for safety) |
-| `--strict` | Treat warnings as errors; exit code 2 if any cap fires, flow is rejected, or parse error occurs |
-| `--report <PATH>` | Write a structured JSON processing report to the given path |
-
-### `generate`
-
-Generate an OpenAPI 3.0 spec from captured traffic using a curated templates file.
-
-```
-mitm2openapi generate [OPTIONS] -i <INPUT> -t <TEMPLATES> -o <OUTPUT> -p <PREFIX>
-```
-
-| Option | Description |
-|--------|-------------|
-| `-i, --input <PATH>` | Input file (flow dump or HAR) |
-| `-t, --templates <PATH>` | Templates YAML file (from `discover`) |
-| `-o, --output <PATH>` | Output OpenAPI YAML file |
-| `-p, --prefix <URL>` | API prefix URL |
-| `--format <FORMAT>` | Input format: `auto`, `har`, `mitmproxy` (default: `auto`) |
-| `--openapi-title <TITLE>` | Custom title for the spec |
-| `--openapi-version <VER>` | Custom spec version (default: `1.0.0`) |
-| `--exclude-headers <LIST>` | Comma-separated headers to exclude |
-| `--exclude-cookies <LIST>` | Comma-separated cookies to exclude |
-| `--include-headers` | Include headers in the spec |
-| `--ignore-images` | Ignore image content types |
-| `--suppress-params` | Suppress parameter suggestions |
-| `--tags-overrides <JSON>` | JSON string for tag overrides |
-| `--max-input-size <BYTES>` | Maximum input file size (default: `2GiB`). Accepts suffixes: `KiB`, `MiB`, `GiB` |
-| `--max-payload-size <BYTES>` | Maximum tnetstring payload size (default: `256MiB`) |
-| `--max-depth <N>` | Maximum tnetstring nesting depth (default: `256`) |
-| `--max-body-size <BYTES>` | Maximum request/response body size (default: `64MiB`) |
-| `--allow-symlinks` | Allow symlinked input files (default: rejected for safety) |
-| `--strict` | Treat warnings as errors; exit code 2 if any cap fires, flow is rejected, or parse error occurs |
-| `--report <PATH>` | Write a structured JSON processing report to the given path |
-
-</details>
-
-## Resource Limits
-
-To prevent denial-of-service when processing untrusted captures, `mitm2openapi`
-enforces several configurable limits:
-
-| Flag | Default | Purpose |
-|------|---------|---------|
-| `--max-input-size` | 2 GiB | Reject files larger than this before reading |
-| `--max-payload-size` | 256 MiB | Cap on individual tnetstring payload allocation |
-| `--max-depth` | 256 | Recursion depth limit for nested tnetstring structures |
-| `--max-body-size` | 64 MiB | Maximum request/response body considered during schema inference |
-| `--allow-symlinks` | off | By default, symlinked inputs are rejected to prevent path-traversal on shared CI runners |
-
-In addition to the configurable limits above, the following per-field caps are
-applied unconditionally to prevent data corruption:
-
-| Field | Cap | Behaviour |
-|-------|-----|-----------|
-| Header name | 8 KiB | Dropped (other headers still processed) |
-| Header value | 64 KiB | Truncated to cap |
-| Form fields per request | 1 000 | Excess fields ignored |
-| URL scheme | `http` / `https` only | Non-HTTP flows silently skipped |
-| Port number | 1–65 535 | Out-of-range port drops the request |
-| HTTP status code | 100–599 | Invalid codes treated as no response |
-
-Identity fields (scheme, host, path, method, header names) require valid UTF-8.
-Flows with non-UTF-8 identity bytes are skipped to prevent data aliasing through
-replacement-character collisions. Control characters in paths are stripped
-automatically.
-
-Increase `--max-input-size` if you work with captures larger than 2 GiB (e.g.
-`--max-input-size 8GiB`). The other limits rarely need tuning.
-
-Both mitmproxy flow files and HAR files are processed incrementally — memory usage
-stays bounded regardless of input size.
-
-## Diagnostics
-
-When the tnetstring parser encounters corruption in a mitmproxy flow file, it
-halts and emits a warn-level log with the byte offset, number of successfully
-parsed entries, and an error classification. No resync is attempted — binary
-payloads can contain bytes that mimic valid tnetstring length prefixes, so
-scanning forward would produce phantom flows.
-
-### Structured report (`--report`)
-
-Pass `--report <PATH>` to either `discover` or `generate` to write a JSON
-processing summary. This is useful for CI pipelines that need structured data
-instead of log scraping.
-
-```json
-{
-  "report_version": 1,
-  "tool_version": "0.2.3",
-  "input": {
-    "path": "capture.flow",
-    "format": "Auto",
-    "size_bytes": 102400
-  },
-  "result": {
-    "flows_read": 150,
-    "flows_emitted": 148,
-    "paths_in_spec": 12
-  },
-  "events": {
-    "parse_error": {
-      "TNetString parse error at byte 98304: unexpected end of input": 1
-    }
-  }
-}
-```
-
-### Strict mode
+## Documentation
 
-Pass `--strict` to either `discover` or `generate` to treat any warning-level
-event as a hard failure. The process exits with code 2 if any resource cap
-fired, a flow was rejected, or a parse error was encountered.
-
-This is designed for CI gates where silent degradation is unacceptable:
-
-```bash
-mitm2openapi discover -i capture.flow -o templates.yaml -p https://api.example.com --strict \
-  || echo "FAIL: corrupt or over-limit flows detected"
-```
-
-Without `--strict`, the same conditions are logged at warn level and processing
-continues (exit code 0).
-
-## Supported Formats
-
-| Format | Versions | Extension |
-|--------|----------|-----------|
-| mitmproxy flow dumps | v19, v20, v21 | `.flow` |
-| HAR (HTTP Archive) | 1.2 (incrementally parsed) | `.har` |
-
-Format is auto-detected from file content. Use `--format` to override.
-
-## Migration from Python mitmproxy2swagger
-
-| Python (`mitmproxy2swagger`) | Rust (`mitm2openapi`) |
-|-----|-----|
-| `pip install mitmproxy2swagger` | Single binary, no runtime |
-| `mitmproxy2swagger -i <file> -o <spec> -p <prefix>` | Two-step: `discover` then `generate` |
-| Edits spec file in-place | Separate templates file for curation |
-| Requires Python 3.x + mitmproxy | Standalone binary |
-| Supports mitmproxy only | Supports mitmproxy flow dumps + HAR |
-
-### Key differences
-
-- **Two-step workflow**: `discover` produces a templates file; you curate it; `generate` produces the final spec. This separates endpoint selection from spec generation.
-- **Templates file**: Discovered endpoints are prefixed with `ignore:`. Remove the prefix to include an endpoint. This replaces editing the output spec directly.
-- **No Python dependency**: Ships as a single static binary for Linux, macOS, and Windows.
-- **HAR support**: Process HAR exports from browser DevTools or other HTTP tools.
+Full documentation at **[arkptz.github.io/mitm2openapi](https://arkptz.github.io/mitm2openapi/)** — covers installation, traffic capture setup, the full discover → curate → generate pipeline, CLI reference, resource limits, filtering, strict mode, format details, benchmarks, and security model.
 
 ## Benchmarks
 
-Automated CI benchmark runs weekly against the Python original
-([`mitmproxy2swagger`](https://github.com/alufers/mitmproxy2swagger)). See
-[docs/benchmarks.md](docs/benchmarks.md) for the latest timing and memory
-comparison on a ~80 MB synthetic capture, or
-trigger a fresh run via
-[Actions → Benchmark](../../actions/workflows/bench.yml).
-
-Reproduce locally with the commands documented in the workflow file.
+Automated CI benchmarks run weekly against the Python original. See [docs/benchmarks.md](docs/benchmarks.md) for the latest comparison on a ~80 MB synthetic capture.
 
 ## Contributing
 
diff --git a/book/book.toml b/book/book.toml
new file mode 100644
index 0000000..5567b70
--- /dev/null
+++ b/book/book.toml
@@ -0,0 +1,58 @@
+[book]
+title = "mitm2openapi"
+authors = ["Arkptz"]
+description = "Convert mitmproxy flow dumps and HAR files to OpenAPI 3.0 specs"
+src = "src"
+language = "en"
+
+[build]
+build-dir = "../target/book"
+create-missing = false
+
+[output.html]
+git-repository-url = "https://github.com/Arkptz/mitm2openapi"
+edit-url-template = "https://github.com/Arkptz/mitm2openapi/edit/main/book/{path}"
+default-theme = "ayu"
+preferred-dark-theme = "ayu"
+site-url = "/mitm2openapi/"
+additional-css = ["./mdbook-admonish.css"]
+
+[output.html.fold]
+enable = true
+level = 1
+
+[output.html.search]
+enable = true
+limit-results = 20
+teaser-word-count = 30
+use-boolean-and = true
+boost-title = 2
+boost-hierarchy = 1
+boost-paragraph = 1
+expand = true
+heading-split-level = 3
+
+[output.linkcheck]
+warning-policy = "error"
+follow-web-links = false
+exclude = [
+  # CHANGELOG: [Unreleased], [skip ci], [0.1.0]
+  '^Unreleased$',
+  '^skip ci$',
+  '^\d+\.\d+(\.\d+)?$',
+  # Benchmark table units: Mean [s], Min [s], Max [s]
+  '^s$',
+  # Benchmark workflow path inside included docs/benchmarks.md
+  '\.github/workflows/bench\.yml',
+]
+
+[preprocessor.toc]
+command = "mdbook-toc"
+renderer = ["html"]
+
+[preprocessor.admonish]
+command = "mdbook-admonish"
+assets_version = "3.0.2" # do not edit: managed by `mdbook-admonish install`
+
+[preprocessor.mermaid]
+command = "mdbook-mermaid"
diff --git a/book/mdbook-admonish.css b/book/mdbook-admonish.css
new file mode 100644
index 0000000..45aeff0
--- /dev/null
+++ b/book/mdbook-admonish.css
@@ -0,0 +1,348 @@
+@charset "UTF-8";
+:is(.admonition) {
+  display: flow-root;
+  margin: 1.5625em 0;
+  padding: 0 1.2rem;
+  color: var(--fg);
+  page-break-inside: avoid;
+  background-color: var(--bg);
+  border: 0 solid black;
+  border-inline-start-width: 0.4rem;
+  border-radius: 0.2rem;
+  box-shadow: 0 0.2rem 1rem rgba(0, 0, 0, 0.05), 0 0 0.1rem rgba(0, 0, 0, 0.1);
+}
+@media print {
+  :is(.admonition) {
+    box-shadow: none;
+  }
+}
+:is(.admonition) > * {
+  box-sizing: border-box;
+}
+:is(.admonition) :is(.admonition) {
+  margin-top: 1em;
+  margin-bottom: 1em;
+}
+:is(.admonition) > .tabbed-set:only-child {
+  margin-top: 0;
+}
+html :is(.admonition) > :last-child {
+  margin-bottom: 1.2rem;
+}
+
+a.admonition-anchor-link {
+  display: none;
+  position: absolute;
+  left: -1.2rem;
+  padding-right: 1rem;
+}
+a.admonition-anchor-link:link, a.admonition-anchor-link:visited {
+  color: var(--fg);
+}
+a.admonition-anchor-link:link:hover, a.admonition-anchor-link:visited:hover {
+  text-decoration: none;
+}
+a.admonition-anchor-link::before {
+  content: "§";
+}
+
+:is(.admonition-title, summary.admonition-title) {
+  position: relative;
+  min-height: 4rem;
+  margin-block: 0;
+  margin-inline: -1.6rem -1.2rem;
+  padding-block: 0.8rem;
+  padding-inline: 4.4rem 1.2rem;
+  font-weight: 700;
+  background-color: rgba(68, 138, 255, 0.1);
+  print-color-adjust: exact;
+  -webkit-print-color-adjust: exact;
+  display: flex;
+}
+:is(.admonition-title, summary.admonition-title) p {
+  margin: 0;
+}
+html :is(.admonition-title, summary.admonition-title):last-child {
+  margin-bottom: 0;
+}
+:is(.admonition-title, summary.admonition-title)::before {
+  position: absolute;
+  top: 0.625em;
+  inset-inline-start: 1.6rem;
+  width: 2rem;
+  height: 2rem;
+  background-color: #448aff;
+  print-color-adjust: exact;
+  -webkit-print-color-adjust: exact;
+  mask-image: url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"></svg>');
+  -webkit-mask-image: url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"></svg>');
+  mask-repeat: no-repeat;
+  -webkit-mask-repeat: no-repeat;
+  mask-size: contain;
+  -webkit-mask-size: contain;
+  content: "";
+}
+:is(.admonition-title, summary.admonition-title):hover a.admonition-anchor-link {
+  display: initial;
+}
+
+details.admonition > summary.admonition-title::after {
+  position: absolute;
+  top: 0.625em;
+  inset-inline-end: 1.6rem;
+  height: 2rem;
+  width: 2rem;
+  background-color: currentcolor;
+  mask-image: var(--md-details-icon);
+  -webkit-mask-image: var(--md-details-icon);
+  mask-repeat: no-repeat;
+  -webkit-mask-repeat: no-repeat;
+  mask-size: contain;
+  -webkit-mask-size: contain;
+  content: "";
+  transform: rotate(0deg);
+  transition: transform 0.25s;
+}
+details[open].admonition > summary.admonition-title::after {
+  transform: rotate(90deg);
+}
+
+:root {
+  --md-details-icon: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M8.59 16.58 13.17 12 8.59 7.41 10 6l6 6-6 6-1.41-1.42Z'/></svg>");
+}
+
+:root {
+  --md-admonition-icon--admonish-note: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M20.71 7.04c.39-.39.39-1.04 0-1.41l-2.34-2.34c-.37-.39-1.02-.39-1.41 0l-1.84 1.83 3.75 3.75M3 17.25V21h3.75L17.81 9.93l-3.75-3.75L3 17.25z'/></svg>");
+  --md-admonition-icon--admonish-abstract: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M17 9H7V7h10m0 6H7v-2h10m-3 6H7v-2h7M12 3a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1m7 0h-4.18C14.4 1.84 13.3 1 12 1c-1.3 0-2.4.84-2.82 2H5a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2V5a2 2 0 0 0-2-2z'/></svg>");
+  --md-admonition-icon--admonish-info: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M13 9h-2V7h2m0 10h-2v-6h2m-1-9A10 10 0 0 0 2 12a10 10 0 0 0 10 10 10 10 0 0 0 10-10A10 10 0 0 0 12 2z'/></svg>");
+  --md-admonition-icon--admonish-tip: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M17.66 11.2c-.23-.3-.51-.56-.77-.82-.67-.6-1.43-1.03-2.07-1.66C13.33 7.26 13 4.85 13.95 3c-.95.23-1.78.75-2.49 1.32-2.59 2.08-3.61 5.75-2.39 8.9.04.1.08.2.08.33 0 .22-.15.42-.35.5-.23.1-.47.04-.66-.12a.58.58 0 0 1-.14-.17c-1.13-1.43-1.31-3.48-.55-5.12C5.78 10 4.87 12.3 5 14.47c.06.5.12 1 .29 1.5.14.6.41 1.2.71 1.73 1.08 1.73 2.95 2.97 4.96 3.22 2.14.27 4.43-.12 6.07-1.6 1.83-1.66 2.47-4.32 1.53-6.6l-.13-.26c-.21-.46-.77-1.26-.77-1.26m-3.16 6.3c-.28.24-.74.5-1.1.6-1.12.4-2.24-.16-2.9-.82 1.19-.28 1.9-1.16 2.11-2.05.17-.8-.15-1.46-.28-2.23-.12-.74-.1-1.37.17-2.06.19.38.39.76.63 1.06.77 1 1.98 1.44 2.24 2.8.04.14.06.28.06.43.03.82-.33 1.72-.93 2.27z'/></svg>");
+  --md-admonition-icon--admonish-success: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='m9 20.42-6.21-6.21 2.83-2.83L9 14.77l9.88-9.89 2.83 2.83L9 20.42z'/></svg>");
+  --md-admonition-icon--admonish-question: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='m15.07 11.25-.9.92C13.45 12.89 13 13.5 13 15h-2v-.5c0-1.11.45-2.11 1.17-2.83l1.24-1.26c.37-.36.59-.86.59-1.41a2 2 0 0 0-2-2 2 2 0 0 0-2 2H8a4 4 0 0 1 4-4 4 4 0 0 1 4 4 3.2 3.2 0 0 1-.93 2.25M13 19h-2v-2h2M12 2A10 10 0 0 0 2 12a10 10 0 0 0 10 10 10 10 0 0 0 10-10c0-5.53-4.5-10-10-10z'/></svg>");
+  --md-admonition-icon--admonish-warning: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M13 14h-2V9h2m0 9h-2v-2h2M1 21h22L12 2 1 21z'/></svg>");
+  --md-admonition-icon--admonish-failure: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M20 6.91 17.09 4 12 9.09 6.91 4 4 6.91 9.09 12 4 17.09 6.91 20 12 14.91 17.09 20 20 17.09 14.91 12 20 6.91z'/></svg>");
+  --md-admonition-icon--admonish-danger: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M11 15H6l7-14v8h5l-7 14v-8z'/></svg>");
+  --md-admonition-icon--admonish-bug: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M14 12h-4v-2h4m0 6h-4v-2h4m6-6h-2.81a5.985 5.985 0 0 0-1.82-1.96L17 4.41 15.59 3l-2.17 2.17a6.002 6.002 0 0 0-2.83 0L8.41 3 7 4.41l1.62 1.63C7.88 6.55 7.26 7.22 6.81 8H4v2h2.09c-.05.33-.09.66-.09 1v1H4v2h2v1c0 .34.04.67.09 1H4v2h2.81c1.04 1.79 2.97 3 5.19 3s4.15-1.21 5.19-3H20v-2h-2.09c.05-.33.09-.66.09-1v-1h2v-2h-2v-1c0-.34-.04-.67-.09-1H20V8z'/></svg>");
+  --md-admonition-icon--admonish-example: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M7 13v-2h14v2H7m0 6v-2h14v2H7M7 7V5h14v2H7M3 8V5H2V4h2v4H3m-1 9v-1h3v4H2v-1h2v-.5H3v-1h1V17H2m2.25-7a.75.75 0 0 1 .75.75c0 .2-.08.39-.21.52L3.12 13H5v1H2v-.92L4 11H2v-1h2.25z'/></svg>");
+  --md-admonition-icon--admonish-quote: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M14 17h3l2-4V7h-6v6h3M6 17h3l2-4V7H5v6h3l-2 4z'/></svg>");
+}
+
+:is(.admonition):is(.admonish-note) {
+  border-color: #448aff;
+}
+
+:is(.admonish-note) > :is(.admonition-title, summary.admonition-title) {
+  background-color: rgba(68, 138, 255, 0.1);
+}
+:is(.admonish-note) > :is(.admonition-title, summary.admonition-title)::before {
+  background-color: #448aff;
+  mask-image: var(--md-admonition-icon--admonish-note);
+  -webkit-mask-image: var(--md-admonition-icon--admonish-note);
+  mask-repeat: no-repeat;
+  -webkit-mask-repeat: no-repeat;
+  mask-size: contain;
+  -webkit-mask-repeat: no-repeat;
+}
+
+:is(.admonition):is(.admonish-abstract, .admonish-summary, .admonish-tldr) {
+  border-color: #00b0ff;
+}
+
+:is(.admonish-abstract, .admonish-summary, .admonish-tldr) > :is(.admonition-title, summary.admonition-title) {
+  background-color: rgba(0, 176, 255, 0.1);
+}
+:is(.admonish-abstract, .admonish-summary, .admonish-tldr) > :is(.admonition-title, summary.admonition-title)::before {
+  background-color: #00b0ff;
+  mask-image: var(--md-admonition-icon--admonish-abstract);
+  -webkit-mask-image: var(--md-admonition-icon--admonish-abstract);
+  mask-repeat: no-repeat;
+  -webkit-mask-repeat: no-repeat;
+  mask-size: contain;
+  -webkit-mask-repeat: no-repeat;
+}
+
+:is(.admonition):is(.admonish-info, .admonish-todo) {
+  border-color: #00b8d4;
+}
+
+:is(.admonish-info, .admonish-todo) > :is(.admonition-title, summary.admonition-title) {
+  background-color: rgba(0, 184, 212, 0.1);
+}
+:is(.admonish-info, .admonish-todo) > :is(.admonition-title, summary.admonition-title)::before {
+  background-color: #00b8d4;
+  mask-image: var(--md-admonition-icon--admonish-info);
+  -webkit-mask-image: var(--md-admonition-icon--admonish-info);
+  mask-repeat: no-repeat;
+  -webkit-mask-repeat: no-repeat;
+  mask-size: contain;
+  -webkit-mask-repeat: no-repeat;
+}
+
+:is(.admonition):is(.admonish-tip, .admonish-hint, .admonish-important) {
+  border-color: #00bfa5;
+}
+
+:is(.admonish-tip, .admonish-hint, .admonish-important) > :is(.admonition-title, summary.admonition-title) {
+  background-color: rgba(0, 191, 165, 0.1);
+}
+:is(.admonish-tip, .admonish-hint, .admonish-important) > :is(.admonition-title, summary.admonition-title)::before {
+  background-color: #00bfa5;
+  mask-image: var(--md-admonition-icon--admonish-tip);
+  -webkit-mask-image: var(--md-admonition-icon--admonish-tip);
+  mask-repeat: no-repeat;
+  -webkit-mask-repeat: no-repeat;
+  mask-size: contain;
+  -webkit-mask-repeat: no-repeat;
+}
+
+:is(.admonition):is(.admonish-success, .admonish-check, .admonish-done) {
+  border-color: #00c853;
+}
+
+:is(.admonish-success, .admonish-check, .admonish-done) > :is(.admonition-title, summary.admonition-title) {
+  background-color: rgba(0, 200, 83, 0.1);
+}
+:is(.admonish-success, .admonish-check, .admonish-done) > :is(.admonition-title, summary.admonition-title)::before {
+  background-color: #00c853;
+  mask-image: var(--md-admonition-icon--admonish-success);
+  -webkit-mask-image: var(--md-admonition-icon--admonish-success);
+  mask-repeat: no-repeat;
+  -webkit-mask-repeat: no-repeat;
+  mask-size: contain;
+  -webkit-mask-repeat: no-repeat;
+}
+
+:is(.admonition):is(.admonish-question, .admonish-help, .admonish-faq) {
+  border-color: #64dd17;
+}
+
+:is(.admonish-question, .admonish-help, .admonish-faq) > :is(.admonition-title, summary.admonition-title) {
+  background-color: rgba(100, 221, 23, 0.1);
+}
+:is(.admonish-question, .admonish-help, .admonish-faq) > :is(.admonition-title, summary.admonition-title)::before {
+  background-color: #64dd17;
+  mask-image: var(--md-admonition-icon--admonish-question);
+  -webkit-mask-image: var(--md-admonition-icon--admonish-question);
+  mask-repeat: no-repeat;
+  -webkit-mask-repeat: no-repeat;
+  mask-size: contain;
+  -webkit-mask-repeat: no-repeat;
+}
+
+:is(.admonition):is(.admonish-warning, .admonish-caution, .admonish-attention) {
+  border-color: #ff9100;
+}
+
+:is(.admonish-warning, .admonish-caution, .admonish-attention) > :is(.admonition-title, summary.admonition-title) {
+  background-color: rgba(255, 145, 0, 0.1);
+}
+:is(.admonish-warning, .admonish-caution, .admonish-attention) > :is(.admonition-title, summary.admonition-title)::before {
+  background-color: #ff9100;
+  mask-image: var(--md-admonition-icon--admonish-warning);
+  -webkit-mask-image: var(--md-admonition-icon--admonish-warning);
+  mask-repeat: no-repeat;
+  -webkit-mask-repeat: no-repeat;
+  mask-size: contain;
+  -webkit-mask-repeat: no-repeat;
+}
+
+:is(.admonition):is(.admonish-failure, .admonish-fail, .admonish-missing) {
+  border-color: #ff5252;
+}
+
+:is(.admonish-failure, .admonish-fail, .admonish-missing) > :is(.admonition-title, summary.admonition-title) {
+  background-color: rgba(255, 82, 82, 0.1);
+}
+:is(.admonish-failure, .admonish-fail, .admonish-missing) > :is(.admonition-title, summary.admonition-title)::before {
+  background-color: #ff5252;
+  mask-image: var(--md-admonition-icon--admonish-failure);
+  -webkit-mask-image: var(--md-admonition-icon--admonish-failure);
+  mask-repeat: no-repeat;
+  -webkit-mask-repeat: no-repeat;
+  mask-size: contain;
+  -webkit-mask-repeat: no-repeat;
+}
+
+:is(.admonition):is(.admonish-danger, .admonish-error) {
+  border-color: #ff1744;
+}
+
+:is(.admonish-danger, .admonish-error) > :is(.admonition-title, summary.admonition-title) {
+  background-color: rgba(255, 23, 68, 0.1);
+}
+:is(.admonish-danger, .admonish-error) > :is(.admonition-title, summary.admonition-title)::before {
+  background-color: #ff1744;
+  mask-image: var(--md-admonition-icon--admonish-danger);
+  -webkit-mask-image: var(--md-admonition-icon--admonish-danger);
+  mask-repeat: no-repeat;
+  -webkit-mask-repeat: no-repeat;
+  mask-size: contain;
+  -webkit-mask-repeat: no-repeat;
+}
+
+:is(.admonition):is(.admonish-bug) {
+  border-color: #f50057;
+}
+
+:is(.admonish-bug) > :is(.admonition-title, summary.admonition-title) {
+  background-color: rgba(245, 0, 87, 0.1);
+}
+:is(.admonish-bug) > :is(.admonition-title, summary.admonition-title)::before {
+  background-color: #f50057;
+  mask-image: var(--md-admonition-icon--admonish-bug);
+  -webkit-mask-image: var(--md-admonition-icon--admonish-bug);
+  mask-repeat: no-repeat;
+  -webkit-mask-repeat: no-repeat;
+  mask-size: contain;
+  -webkit-mask-repeat: no-repeat;
+}
+
+:is(.admonition):is(.admonish-example) {
+  border-color: #7c4dff;
+}
+
+:is(.admonish-example) > :is(.admonition-title, summary.admonition-title) {
+  background-color: rgba(124, 77, 255, 0.1);
+}
+:is(.admonish-example) > :is(.admonition-title, summary.admonition-title)::before {
+  background-color: #7c4dff;
+  mask-image: var(--md-admonition-icon--admonish-example);
+  -webkit-mask-image: var(--md-admonition-icon--admonish-example);
+  mask-repeat: no-repeat;
+  -webkit-mask-repeat: no-repeat;
+  mask-size: contain;
+  -webkit-mask-repeat: no-repeat;
+}
+
+:is(.admonition):is(.admonish-quote, .admonish-cite) {
+  border-color: #9e9e9e;
+}
+
+:is(.admonish-quote, .admonish-cite) > :is(.admonition-title, summary.admonition-title) {
+  background-color: rgba(158, 158, 158, 0.1);
+}
+:is(.admonish-quote, .admonish-cite) > :is(.admonition-title, summary.admonition-title)::before {
+  background-color: #9e9e9e;
+  mask-image: var(--md-admonition-icon--admonish-quote);
+  -webkit-mask-image: var(--md-admonition-icon--admonish-quote);
+  mask-repeat: no-repeat;
+  -webkit-mask-repeat: no-repeat;
+  mask-size: contain;
+  -webkit-mask-repeat: no-repeat;
+}
+
+.navy :is(.admonition) {
+  background-color: var(--sidebar-bg);
+}
+
+.ayu :is(.admonition),
+.coal :is(.admonition) {
+  background-color: var(--theme-hover);
+}
+
+.rust :is(.admonition) {
+  background-color: var(--sidebar-bg);
+  color: var(--sidebar-fg);
+}
+.rust .admonition-anchor-link:link, .rust .admonition-anchor-link:visited {
+  color: var(--sidebar-fg);
+}
diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md
new file mode 100644
index 0000000..4acbded
--- /dev/null
+++ b/book/src/SUMMARY.md
@@ -0,0 +1,30 @@
+# Summary
+
+[Introduction](./introduction.md)
+
+# Getting started
+- [Installation](./getting-started/installation.md)
+- [Quick start](./getting-started/quick-start.md)
+- [Capturing traffic](./getting-started/capturing.md)
+
+# Usage
+- [Discover, curate, generate](./usage/pipeline.md)
+- [Filtering endpoints](./usage/filtering.md)
+- [Resource limits](./usage/resource-limits.md)
+- [Strict mode](./usage/strict-mode.md)
+- [Processing reports](./usage/reports.md)
+- [CLI reference](./usage/cli-reference.md)
+
+# Formats
+- [mitmproxy flow dumps](./formats/mitmproxy.md)
+- [HAR files](./formats/har.md)
+
+# Reference
+- [Performance & benchmarks](./reference/benchmarks.md)
+- [Security model](./reference/security.md)
+- [Diagnostics](./reference/diagnostics.md)
+
+---
+
+[Changelog](./changelog.md)
+[Contributing](./contributing.md)
diff --git a/book/src/changelog.md b/book/src/changelog.md
new file mode 100644
index 0000000..9b9ae8b
--- /dev/null
+++ b/book/src/changelog.md
@@ -0,0 +1,7 @@
+# Changelog
+
+{{#include ../../CHANGELOG.md}}
+
+[Unreleased]: https://github.com/Arkptz/mitm2openapi/commits/main
+[skip ci]: #changelog
+[0.1.0]: https://github.com/Arkptz/mitm2openapi/releases/tag/v0.1.0
diff --git a/book/src/contributing.md b/book/src/contributing.md
new file mode 100644
index 0000000..2913f2f
--- /dev/null
+++ b/book/src/contributing.md
@@ -0,0 +1,3 @@
+# Contributing
+
+{{#include ../../CONTRIBUTING.md}}
diff --git a/book/src/formats/har.md b/book/src/formats/har.md
new file mode 100644
index 0000000..8d2ddce
--- /dev/null
+++ b/book/src/formats/har.md
@@ -0,0 +1,77 @@
+# HAR files
+
+`mitm2openapi` reads [HAR (HTTP Archive)](https://w3c.github.io/web-performance/specs/HAR/Overview.html)
+files — the standard format for exporting browser network traffic. HAR version 1.2 is supported.
+
+## Producing HAR files
+
+### Browser DevTools
+
+All modern browsers export HAR from their Network tab:
+
+- **Chrome/Chromium**: DevTools → Network → right-click → "Save all as HAR with content"
+- **Firefox**: DevTools → Network → gear icon → "Save All As HAR"
+- **Safari**: Web Inspector → Network → Export button
+
+### HTTP proxies
+
+Several proxy tools export HAR:
+
+- [Charles Proxy](https://www.charlesproxy.com/) — File → Export Session → HAR
+- [Fiddler](https://www.telerik.com/fiddler) — File → Export Sessions → HTTPArchive
+- [Proxyman](https://proxyman.io/) — Export as HAR
+
+### Programmatic generation
+
+Libraries like [`puppeteer`](https://pptr.dev/) and [`playwright`](https://playwright.dev/)
+can produce HAR files from automated browser sessions:
+
+```javascript
+// Playwright example
+const context = await browser.newContext({
+  recordHar: { path: 'capture.har' }
+});
+// ... run your test
+await context.close(); // HAR is written on close
+```
+
+## Usage
+
+```bash
+mitm2openapi discover \
+  -i capture.har \
+  -o templates.yaml \
+  -p "https://api.example.com"
+```
+
+Format is auto-detected. Use `--format har` to force HAR parsing if auto-detection fails.
+
+## HAR vs mitmproxy flows
+
+| Aspect | mitmproxy flow | HAR |
+|--------|---------------|-----|
+| Source | mitmproxy proxy | Browser DevTools, HTTP proxies |
+| Format | Binary (tnetstring) | JSON |
+| Response bodies | Always present | Sometimes base64-encoded |
+| HTTPS | Decrypted by proxy | Decrypted by browser |
+| File size | Compact binary | Larger (JSON overhead) |
+| Streaming | Native | Incremental JSON parsing |
+
+Both formats produce equivalent OpenAPI specs. Choose based on your capture workflow:
+
+- **mitmproxy flows** for server-side proxying, CI pipelines, and automated captures
+- **HAR files** for browser-based testing, manual exploration, and when you already have DevTools open
+
+## Incremental parsing
+
+HAR files are parsed incrementally — the entire JSON is not loaded into memory at once.
+This means memory usage stays bounded even for large HAR exports (hundreds of megabytes).
+
+## Known limitations
+
+- **Base64-encoded bodies** — some HAR exporters base64-encode response bodies. Decode
+  failures are logged as warnings and the body is skipped (not silently dropped).
+- **Compressed content** — if the HAR exporter did not decompress response bodies,
+  `mitm2openapi` sees the compressed bytes. Most browser DevTools decompress automatically.
+- **Timing data** — HAR timing information (DNS, connect, TLS) is ignored; only request and
+  response data is used for spec generation.
diff --git a/book/src/formats/mitmproxy.md b/book/src/formats/mitmproxy.md
new file mode 100644
index 0000000..46a2b65
--- /dev/null
+++ b/book/src/formats/mitmproxy.md
@@ -0,0 +1,58 @@
+# mitmproxy flow dumps
+
+`mitm2openapi` reads mitmproxy's native binary flow format. This is the recommended input
+format — it captures the richest data and is produced directly by `mitmdump` and `mitmweb`.
+
+## Supported versions
+
+| Flow format version | mitmproxy version | Status |
+|---|---|---|
+| v19 | mitmproxy 8.x | Supported |
+| v20 | mitmproxy 9.x | Supported |
+| v21 | mitmproxy 10.x | Supported |
+
+The flow format is auto-detected from file content. No version flag is needed.
+
+## How flow files work
+
+Flow files use the [tnetstring](https://tnetstrings.info/) serialization format. Each flow
+is a sequence of key-value pairs representing a complete HTTP request-response cycle.
+
+A typical flow contains:
+
+- **Request**: method, URL (scheme, host, port, path), headers, body
+- **Response**: status code, headers, body
+- **Metadata**: timestamps, flow ID, client/server addresses
+
+`mitm2openapi` extracts the request and response data relevant to OpenAPI spec generation
+and discards metadata.
+
+## Capturing flow files
+
+```bash
+# Record all traffic through the proxy
+mitmdump -w capture.flow
+
+# Record only traffic to a specific host
+mitmdump -w capture.flow --set flow_detail=0 \
+  --set save_stream_filter='~d api.example.com'
+```
+
+See [capturing traffic](../getting-started/capturing.md) for full setup instructions.
+
+## Directory input
+
+If you pass a directory path to `-i`, `mitm2openapi` reads all `.flow` files in that
+directory (non-recursive). This is useful when you have traffic split across multiple
+capture sessions.
+
+## Known limitations
+
+- **No WebSocket frames** — WebSocket upgrade requests are captured, but frame-level data
+  is not used for spec generation
+- **No gRPC** — binary protocol buffers inside HTTP/2 frames are not decoded
+- **Corrupt files** — when the tnetstring parser hits corruption, it stops and reports the
+  byte offset. No resync is attempted because binary payloads can contain bytes that mimic
+  valid tnetstring length prefixes. See [diagnostics](../reference/diagnostics.md) for details.
+- **Large payloads** — individual tnetstring payloads are capped at 256 MiB by default
+  (adjustable via `--max-payload-size`)
diff --git a/book/src/getting-started/capturing.md b/book/src/getting-started/capturing.md
new file mode 100644
index 0000000..999ac39
--- /dev/null
+++ b/book/src/getting-started/capturing.md
@@ -0,0 +1,121 @@
+# Capturing traffic
+
+Before you can generate an OpenAPI spec, you need a captured traffic file. This chapter
+covers the most common ways to capture HTTP traffic.
+
+## Option 1: mitmproxy (recommended)
+
+[mitmproxy](https://mitmproxy.org/) is a free, open-source HTTPS proxy. It captures traffic
+in its own binary flow format that `mitm2openapi` reads natively.
+
+### Install mitmproxy
+
+```bash
+# macOS
+brew install mitmproxy
+
+# Linux (pip)
+pip install mitmproxy
+
+# Or download from https://mitmproxy.org/
+```
+
+See the [mitmproxy installation docs](https://docs.mitmproxy.org/stable/overview-installation/)
+for platform-specific instructions.
+
+### Capture with mitmdump
+
+`mitmdump` is the non-interactive version of mitmproxy, ideal for scripted captures:
+
+```bash
+# Start the proxy and write all traffic to a flow file
+mitmdump -w capture.flow
+
+# In another terminal, route your HTTP client through the proxy:
+curl --proxy http://localhost:8080 https://api.example.com/users
+```
+
+The default proxy port is 8080. Use `-p` to change it:
+
+```bash
+mitmdump -w capture.flow -p 9090
+```
+
+### Capture with mitmweb
+
+`mitmweb` provides a browser-based UI for inspecting traffic in real time:
+
+```bash
+mitmweb -w capture.flow
+# Open http://localhost:8081 in your browser to inspect traffic
+```
+
+### HTTPS traffic
+
+For HTTPS, you need to install the mitmproxy CA certificate on the client machine.
+After starting mitmproxy, navigate to `http://mitm.it` from the proxied client to
+download and install the certificate.
+
+See the [mitmproxy certificate docs](https://docs.mitmproxy.org/stable/concepts-certificates/)
+for detailed instructions.
+
+### Tips
+
+- Use `mitmdump --set flow_detail=0` for minimal console output during long captures
+- Combine with `--set save_stream_filter` to capture only specific hosts
+- The flow format is versioned (v19/v20/v21) — `mitm2openapi` supports all three
+
+## Option 2: Browser DevTools (HAR export)
+
+All modern browsers can export captured network traffic as HAR (HTTP Archive) files.
+
+### Chrome / Chromium
+
+1. Open DevTools (`F12` or `Ctrl+Shift+I`)
+2. Switch to the **Network** tab
+3. Ensure recording is active (red circle icon)
+4. Perform the actions you want to capture
+5. Right-click in the request list → **Save all as HAR with content**
+
+### Firefox
+
+1. Open DevTools (`F12`)
+2. Switch to the **Network** tab
+3. Perform the actions you want to capture
+4. Click the gear icon → **Save All As HAR**
+
+### Safari
+
+1. Enable the Develop menu in Preferences → Advanced
+2. Open Web Inspector (`Cmd+Option+I`)
+3. Switch to the **Network** tab
+4. Perform the actions
+5. Click **Export** in the toolbar
+
+```admonish note
+HAR files from browser DevTools contain the full request and response bodies. Sensitive data
+(cookies, tokens, passwords) will be present in the export. Sanitize before sharing.
+```
+
+## Option 3: Other HTTP proxies
+
+Any tool that produces HAR 1.2 output works with `mitm2openapi`:
+
+- [Charles Proxy](https://www.charlesproxy.com/) — export sessions as HAR via File → Export
+- [Fiddler](https://www.telerik.com/fiddler) — File → Export Sessions → HTTPArchive
+- [Proxyman](https://proxyman.io/) — export as HAR from the session menu
+
+## What to capture
+
+For the best OpenAPI spec, capture diverse traffic:
+
+- **Multiple endpoints** — the more paths covered, the more complete the spec
+- **Different HTTP methods** — GET, POST, PUT, DELETE on the same resource
+- **Various response codes** — 200, 400, 404, 500 responses produce richer schemas
+- **Query parameters** — include requests with different query strings
+- **Request bodies** — POST/PUT with different payloads improve body schema inference
+
+## Next steps
+
+Once you have a capture file, proceed to the [quick start](./quick-start.md) or
+learn about the full [discover → curate → generate pipeline](../usage/pipeline.md).
diff --git a/book/src/getting-started/installation.md b/book/src/getting-started/installation.md
new file mode 100644
index 0000000..fc8a69c
--- /dev/null
+++ b/book/src/getting-started/installation.md
@@ -0,0 +1,42 @@
+# Installation
+
+## From binary releases
+
+Download a pre-built binary for your platform from
+[GitHub Releases](https://github.com/Arkptz/mitm2openapi/releases).
+
+Binaries are available for Linux (x86_64, aarch64), macOS (x86_64, aarch64), and
+Windows (x86_64).
+
+```bash
+# Example: Linux x86_64 — replace <VERSION> with the release tag (e.g. v0.5.1)
+curl -L "https://github.com/Arkptz/mitm2openapi/releases/download/<VERSION>/mitm2openapi-<VERSION>-x86_64-unknown-linux-gnu.tar.gz" \
+  | tar xz
+sudo mv mitm2openapi /usr/local/bin/
+```
+
+## From source (via Cargo)
+
+If you have a Rust toolchain installed:
+
+```bash
+cargo install --git https://github.com/Arkptz/mitm2openapi
+```
+
+Or from [crates.io](https://crates.io/crates/mitm2openapi):
+
+```bash
+cargo install mitm2openapi
+```
+
+## Verify installation
+
+```bash
+mitm2openapi --version
+```
+
+## Shell completions
+
+`mitm2openapi` uses [clap](https://docs.rs/clap) for argument parsing. Shell completions
+are not yet bundled, but you can generate them for most shells via `clap_complete` if building
+from source.
diff --git a/book/src/getting-started/quick-start.md b/book/src/getting-started/quick-start.md
new file mode 100644
index 0000000..1f0db12
--- /dev/null
+++ b/book/src/getting-started/quick-start.md
@@ -0,0 +1,98 @@
+# Quick start
+
+This walkthrough takes you from a traffic capture to a complete OpenAPI spec in under a minute.
+
+## Prerequisites
+
+- `mitm2openapi` installed ([see installation](./installation.md))
+- A captured traffic file — either a mitmproxy `.flow` dump or a `.har` export from browser DevTools
+
+If you do not have a capture yet, see [capturing traffic](./capturing.md) for setup instructions.
+
+## Step 1: Discover endpoints
+
+```bash
+mitm2openapi discover \
+  -i capture.flow \
+  -o templates.yaml \
+  -p "https://api.example.com"
+```
+
+This scans every request in `capture.flow` that matches the prefix `https://api.example.com`
+and writes a templates file listing all observed URL paths.
+
+## Step 2: Curate the templates
+
+Open `templates.yaml`. Each path is prefixed with `ignore:` by default:
+
+```yaml
+x-path-templates:
+- ignore:/api/users
+- ignore:/api/users/{id}
+- ignore:/api/products
+- ignore:/static/bundle.js
+```
+
+Remove the `ignore:` prefix from paths you want in the final spec:
+
+```yaml
+x-path-templates:
+- /api/users
+- /api/users/{id}
+- /api/products
+- ignore:/static/bundle.js
+```
+
+Paths still prefixed with `ignore:` are excluded from the generated spec.
+
+## Step 3: Generate the OpenAPI spec
+
+```bash
+mitm2openapi generate \
+  -i capture.flow \
+  -t templates.yaml \
+  -o openapi.yaml \
+  -p "https://api.example.com"
+```
+
+The resulting `openapi.yaml` contains a valid OpenAPI 3.0 spec with paths, methods,
+parameters, request bodies, and response schemas inferred from the captured traffic.
+
+## Skip the manual edit
+
+If you already know which paths matter, use glob filters to automate curation:
+
+```bash
+mitm2openapi discover \
+  -i capture.flow \
+  -o templates.yaml \
+  -p "https://api.example.com" \
+  --exclude-patterns '/static/**,/images/**,*.css,*.js,*.svg' \
+  --include-patterns '/api/**,/v2/**'
+
+mitm2openapi generate \
+  -i capture.flow \
+  -t templates.yaml \
+  -o openapi.yaml \
+  -p "https://api.example.com"
+```
+
+Paths matching `--include-patterns` are auto-activated (no `ignore:` prefix). Paths matching
+`--exclude-patterns` are dropped entirely. Everything else still gets `ignore:` for manual
+review.
+
+See [filtering endpoints](../usage/filtering.md) for the full glob syntax reference.
+
+## HAR files
+
+The same workflow works with HAR files — just point `-i` at a `.har` file. The format is
+auto-detected:
+
+```bash
+mitm2openapi discover \
+  -i capture.har \
+  -o templates.yaml \
+  -p "https://api.example.com"
+```
+
+See [HAR files](../formats/har.md) for details on exporting HARs from browser DevTools.
diff --git a/book/src/introduction.md b/book/src/introduction.md
new file mode 100644
index 0000000..eb62e18
--- /dev/null
+++ b/book/src/introduction.md
@@ -0,0 +1,51 @@
+# Introduction
+
+**mitm2openapi** converts [mitmproxy](https://mitmproxy.org/) flow dumps and HAR files into
+[OpenAPI 3.0](https://spec.openapis.org/oas/v3.0.3) specifications. It ships as a single
+static binary — no Python, no virtual environment, no runtime dependencies.
+
+It is a Rust rewrite of [mitmproxy2swagger](https://github.com/alufers/mitmproxy2swagger) by
+[@alufers](https://github.com/alufers), who pioneered the "capture traffic, extract API spec"
+workflow. Credit to the original project for the idea and reference implementation.
+
+## Why?
+
+The Python original works well but requires Python, `pip`, and `mitmproxy` installed in the
+environment. For CI pipelines, slim Docker images, security audits, and one-off usage, that
+dependency chain is friction.
+
+`mitm2openapi` ships as a single ~5 MB static binary. Drop it into any environment and run.
+Same OpenAPI 3.0 output, plus first-class HAR support and glob-based filters for fully
+unattended pipelines.
+
+## Features
+
+- **Fast** — pure Rust, ~17× faster than the Python original ([benchmarks](./reference/benchmarks.md))
+- **Single static binary** — no Python, no venv, no pip, no runtime dependencies
+- **Two-format support** — mitmproxy flow dumps (v19/v20/v21) and HAR 1.2
+- **Two-step workflow** — `discover` finds endpoints, you curate, `generate` emits OpenAPI 3.0
+- **Glob filters** — `--exclude-patterns` and `--include-patterns` for automated pipelines
+- **Error recovery** — skips corrupt flows, continues processing
+- **Auto-detection** — heuristic format detection from file content
+- **Resource limits** — configurable caps prevent denial-of-service on untrusted input
+- **Strict mode** — treat warnings as errors for CI gates
+- **Structured reports** — `--report` outputs machine-readable JSON processing summaries
+- **Battle-tested** — integration tests against Swagger Petstore and OWASP crAPI
+- **Cross-platform** — Linux, macOS, Windows pre-built binaries
+
+## How it works
+
+The tool uses a two-step workflow:
+
+1. **Discover** — scan captured traffic and list all observed API endpoints
+2. **Curate** — review the list and select which endpoints to include
+3. **Generate** — produce a clean OpenAPI 3.0 spec from the selected endpoints
+
+This separates endpoint selection from spec generation, giving you full control over
+what ends up in the final spec.
+
+## Next steps
+
+- [Install mitm2openapi](./getting-started/installation.md)
+- [Run through the quick start](./getting-started/quick-start.md)
+- [Learn about the full pipeline](./usage/pipeline.md)
diff --git a/book/src/reference/benchmarks.md b/book/src/reference/benchmarks.md
new file mode 100644
index 0000000..427d527
--- /dev/null
+++ b/book/src/reference/benchmarks.md
@@ -0,0 +1,7 @@
+# Performance & Benchmarks
+
+Results are regenerated weekly by the [benchmark workflow](https://github.com/Arkptz/mitm2openapi/blob/main/.github/workflows/bench.yml). See the workflow for the reproducible methodology.
+
+{{#include ../../../docs/benchmarks.md}}
+
+[s]: #timing
diff --git a/book/src/reference/diagnostics.md b/book/src/reference/diagnostics.md
new file mode 100644
index 0000000..753f3c6
--- /dev/null
+++ b/book/src/reference/diagnostics.md
@@ -0,0 +1,129 @@
+# Diagnostics
+
+<!-- toc -->
+
+`mitm2openapi` uses structured logging to report issues during processing. This chapter
+covers how to interpret warnings, errors, and the structured report output.
+
+## Log levels
+
+Control verbosity with the `RUST_LOG` environment variable:
+
+```bash
+# Default: warnings only
+mitm2openapi discover -i capture.flow -o templates.yaml -p "https://api.example.com"
+
+# More detail
+RUST_LOG=info mitm2openapi discover -i capture.flow -o templates.yaml -p "https://api.example.com"
+
+# Full debug output
+RUST_LOG=debug mitm2openapi discover -i capture.flow -o templates.yaml -p "https://api.example.com"
+```
+
+## Common warnings
+
+### Parse errors (tnetstring)
+
+```
+WARN TNetString parse error at byte 98304: unexpected end of input (148 flows parsed successfully)
+```
+
+This means the mitmproxy flow file contains corrupt data starting at byte 98,304. The
+parser halts immediately and the remaining bytes in the file are **not** processed. The
+148 flows parsed before the corruption are still emitted.
+
+**No resync is attempted.** Binary payloads can contain bytes that mimic valid tnetstring
+length prefixes, so scanning forward would produce phantom flows with fabricated data.
+
+**What to do:**
+- If the file was truncated during transfer, re-capture or re-download
+- The 148 successfully parsed flows are still usable
+- Use `--report` to capture the exact byte offset for debugging
+
+### Cap-fired events
+
+```
+WARN body size 68157440 exceeds cap 67108864, truncating
+WARN header name exceeds 8192 bytes, dropping
+WARN form field count 1247 exceeds cap 1000, ignoring excess
+```
+
+These indicate that a specific field in a flow exceeded the built-in or configured limit.
+The affected field is truncated or dropped, but processing continues.
+
+**What to do:**
+- Usually safe to ignore — the caps exist to prevent abuse, not normal traffic
+- If you need the full data, increase the relevant `--max-*` flag
+- Use `--strict` to fail on these if you need guaranteed completeness
+
+### Flow rejection events
+
+```
+WARN skipping flow: scheme "javascript" not in whitelist [http, https]
+WARN skipping flow: invalid UTF-8 in host field
+WARN skipping flow: port 0 out of valid range 1-65535
+```
+
+These mean an entire flow was skipped because it failed validation.
+
+**What to do:**
+- Non-HTTP flows (WebSocket upgrades, CONNECT tunnels) are expected to be skipped
+- UTF-8 errors suggest the capture contains binary protocol data, not HTTP traffic
+- Invalid port/status usually indicates corrupt flow data
+
+## Structured reports
+
+For machine-readable diagnostics, use `--report`:
+
+```bash
+mitm2openapi discover \
+  -i capture.flow \
+  -o templates.yaml \
+  -p "https://api.example.com" \
+  --report report.json
+```
+
+See [processing reports](../usage/reports.md) for the full JSON schema.
+
+### Event categories in reports
+
+| Category | Examples |
+|----------|---------|
+| `parse_error` | Tnetstring corruption, HAR JSON syntax errors |
+| `cap_fired` | Body too large, depth exceeded, form field count exceeded |
+| `rejected` | Invalid scheme, non-UTF-8 identity fields, bad port/status |
+
+### Using reports in CI
+
+```bash
+# Fail if any parse errors occurred
+if jq -e '.events.parse_error | length > 0' report.json > /dev/null 2>&1; then
+  echo "Parse errors detected"
+  exit 1
+fi
+
+# Check flows-read vs flows-emitted ratio
+RATIO=$(jq '.result.flows_emitted / .result.flows_read' report.json)
+if (( $(echo "$RATIO < 0.9" | bc -l) )); then
+  echo "Warning: more than 10% of flows were dropped"
+fi
+```
+
+## Strict mode interaction
+
+With `--strict`, any warning-level event causes exit code 2. This converts the
+"informational" diagnostics above into hard failures:
+
+```bash
+mitm2openapi discover \
+  -i capture.flow \
+  -o templates.yaml \
+  -p "https://api.example.com" \
+  --strict \
+  --report report.json
+
+# Exit code 2 if ANY warning was emitted
+# report.json still written for post-mortem
+```
+
+See [strict mode](../usage/strict-mode.md) for details.
diff --git a/book/src/reference/security.md b/book/src/reference/security.md
new file mode 100644
index 0000000..7417ef4
--- /dev/null
+++ b/book/src/reference/security.md
@@ -0,0 +1,96 @@
+# Security model
+
+<!-- toc -->
+
+`mitm2openapi` processes untrusted binary input (traffic captures from unknown sources).
+The security model is designed to prevent denial-of-service, data corruption, and
+information leakage when handling adversarial input.
+
+## Threat model
+
+The primary threat is a **malicious capture file** — a `.flow` or `.har` file crafted to
+exploit the parser. Scenarios include:
+
+- CI pipelines processing captures from untrusted contributors
+- Shared analysis servers where multiple users submit captures
+- Automated pipelines where the capture source is not fully controlled
+
+## Input validation layers
+
+### File-level checks
+
+Before reading any content:
+
+1. **File type** — only regular files are accepted. Symlinks, FIFOs, device files, and
+   directories are rejected unless `--allow-symlinks` is explicitly set.
+2. **File size** — files exceeding `--max-input-size` (default 2 GiB) are rejected before
+   any bytes are read.
+3. **TOCTOU caveat** — file metadata is checked via the path before reading to reject
+   symlinks, non-regular files, and oversized inputs. There is a small TOCTOU window
+   between the metadata check and the file open; mitigation via fd-based recheck after
+   open is a future enhancement.
+
+### Parser-level caps
+
+During parsing:
+
+| Cap | Default | Purpose |
+|-----|---------|---------|
+| Payload size | 256 MiB | Prevents OOM from oversized tnetstring values |
+| Nesting depth | 256 | Prevents stack overflow from deeply nested structures |
+| JSON depth | 64 | Prevents stack overflow in schema inference |
+| Body size | 64 MiB | Limits memory for individual request/response bodies |
+
+These caps trigger `warn`-level events and skip the affected data. Use `--strict` to
+treat them as hard errors.
+
+### Field-level validation
+
+For every flow:
+
+- **Scheme whitelist** — only `http` and `https` are accepted. Other schemes (e.g.,
+  `javascript:`, `data:`) are silently skipped.
+- **UTF-8 strictness** — identity fields (method, scheme, host, path, header names) must be
+  valid UTF-8. Invalid bytes cause the flow to be skipped, preventing data aliasing through
+  replacement-character collisions.
+- **Port range** — port numbers must be 1--65,535. Out-of-range values drop the request.
+- **Status code range** — HTTP status codes must be 100--599.
+- **Control character stripping** — `0x00`--`0x1F` and `0x7F` in URL paths are removed.
+- **Header caps** — header names over 8 KiB are dropped; values over 64 KiB are truncated.
+- **Form field count** — at most 1,000 form fields per request are processed.
+
+### Output safety
+
+- **Atomic writes** — output files are written via a temporary file and renamed. If the write
+  fails (disk full, permission denied), the target path is left untouched.
+- **No resync on corruption** — when the tnetstring parser encounters corrupt data, it halts
+  immediately. It does not scan forward looking for the next valid frame, because binary
+  payloads can contain bytes that look like valid length prefixes.
+
+## Streaming architecture
+
+Both mitmproxy and HAR inputs are processed incrementally. At no point is the entire capture
+loaded into memory. This bounds peak RSS to the size of the largest single flow, regardless
+of total file size.
+
+## Glob pattern safety
+
+The `--exclude-patterns` and `--include-patterns` flags use the
+[globset](https://docs.rs/globset) crate, which compiles patterns into a DFA. This eliminates
+exponential backtracking that was possible with the original recursive glob matcher.
+
+## Recommendations
+
+For processing untrusted captures:
+
+1. Do not use `--allow-symlinks` unless you control the filesystem
+2. Keep `--max-input-size` at the default (2 GiB) or lower
+3. Run with `--strict` to fail fast on any anomaly
+4. Use `--report` to capture processing diagnostics for audit trails
+5. Run in a sandboxed environment (container, VM) when processing captures from unknown sources
+
+## Related
+
+- [Resource limits](../usage/resource-limits.md) — configuring the caps
+- [Strict mode](../usage/strict-mode.md) — CI enforcement
+- [Diagnostics](./diagnostics.md) — interpreting warnings and errors
diff --git a/book/src/usage/cli-reference.md b/book/src/usage/cli-reference.md
new file mode 100644
index 0000000..5b1ca06
--- /dev/null
+++ b/book/src/usage/cli-reference.md
@@ -0,0 +1,120 @@
+# CLI reference
+
+<!-- toc -->
+
+```admonish warning
+This reference was last synced with `mitm2openapi --help` at version 0.5.1.
+If you notice a flag missing from your local `--help` output, the tool may be ahead of these
+docs. [Open an issue](https://github.com/Arkptz/mitm2openapi/issues/new) to prompt an update.
+```
+
+## `mitm2openapi discover`
+
+Scan captured traffic and produce a templates file listing all observed endpoints.
+
+```
+mitm2openapi discover [OPTIONS] -i <INPUT> -o <OUTPUT> -p <PREFIX>
+```
+
+### Required arguments
+
+| Option | Description |
+|--------|-------------|
+| `-i, --input <PATH>` | Input file (flow dump or HAR) |
+| `-o, --output <PATH>` | Output YAML templates file |
+| `-p, --prefix <URL>` | API prefix URL to filter requests |
+
+### Optional arguments
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `--format <FORMAT>` | `auto` | Input format: `auto`, `har`, `mitmproxy` |
+| `--exclude-patterns <GLOBS>` | | Comma-separated globs; matching paths are dropped entirely |
+| `--include-patterns <GLOBS>` | | Comma-separated globs; matching paths are auto-activated |
+| `--max-input-size <BYTES>` | `2GiB` | Maximum input file size. Accepts `KiB`, `MiB`, `GiB` suffixes |
+| `--allow-symlinks` | off | Allow symlinked input files |
+| `--strict` | off | Treat warnings as errors (exit code 2) |
+| `--report <PATH>` | | Write structured JSON processing report |
+
+## `mitm2openapi generate`
+
+Generate an OpenAPI 3.0 spec from captured traffic using a curated templates file.
+
+```
+mitm2openapi generate [OPTIONS] -i <INPUT> -t <TEMPLATES> -o <OUTPUT> -p <PREFIX>
+```
+
+### Required arguments
+
+| Option | Description |
+|--------|-------------|
+| `-i, --input <PATH>` | Input file (flow dump or HAR) |
+| `-t, --templates <PATH>` | Templates YAML file (from `discover`) |
+| `-o, --output <PATH>` | Output OpenAPI YAML file |
+| `-p, --prefix <URL>` | API prefix URL |
+
+### Optional arguments
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `--format <FORMAT>` | `auto` | Input format: `auto`, `har`, `mitmproxy` |
+| `--openapi-title <TITLE>` | | Custom title for the spec |
+| `--openapi-version <VER>` | `1.0.0` | Custom spec version |
+| `--exclude-headers <LIST>` | | Comma-separated headers to exclude from spec |
+| `--exclude-cookies <LIST>` | | Comma-separated cookies to exclude from spec |
+| `--include-headers` | off | Include request headers in the spec |
+| `--ignore-images` | off | Ignore image content types |
+| `--suppress-params` | off | Suppress parameter suggestions |
+| `--tags-overrides <JSON>` | | JSON string for tag overrides |
+| `--max-input-size <BYTES>` | `2GiB` | Maximum input file size |
+| `--max-payload-size <BYTES>` | `256MiB` | Maximum tnetstring payload size |
+| `--max-depth <N>` | `256` | Maximum tnetstring nesting depth |
+| `--max-body-size <BYTES>` | `64MiB` | Maximum request/response body size |
+| `--allow-symlinks` | off | Allow symlinked input files |
+| `--strict` | off | Treat warnings as errors (exit code 2) |
+| `--report <PATH>` | | Write structured JSON processing report |
+
+## Common flag details
+
+### `--format`
+
+By default, the input format is auto-detected from a combination of file extension and
+content sniffing:
+- `.flow` extension or content starting with a tnetstring length prefix → mitmproxy format
+- `.har` extension or content starting with `{` → HAR format
+
+Use `--format mitmproxy` or `--format har` to override auto-detection.
+
+### `--prefix`
+
+The prefix URL filters which requests are processed. Only requests whose URL starts with
+the prefix are included. The prefix is stripped from paths in the generated spec.
+
+Example: with `--prefix https://api.example.com`, a request to
+`https://api.example.com/users/42` produces path `/users/42` in the spec.
+
+### `--strict`
+
+See [strict mode](./strict-mode.md) for details on exit codes and CI usage.
+
+### `--report`
+
+See [processing reports](./reports.md) for the JSON schema and CI integration examples.
+
+## Exit codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | Success |
+| 1 | Fatal error (I/O failure, missing arguments, invalid input) |
+| 2 | Strict mode violation (warnings with `--strict` enabled) |
+
+## Environment variables
+
+| Variable | Description |
+|----------|-------------|
+| `RUST_LOG` | Controls log verbosity. Default: `warn`. Set to `info` or `debug` for more output. |
+
+```bash
+RUST_LOG=info mitm2openapi discover -i capture.flow -o templates.yaml -p "https://api.example.com"
+```
diff --git a/book/src/usage/filtering.md b/book/src/usage/filtering.md
new file mode 100644
index 0000000..266b76e
--- /dev/null
+++ b/book/src/usage/filtering.md
@@ -0,0 +1,96 @@
+# Filtering endpoints
+
+<!-- toc -->
+
+The `discover` command supports glob-based filters to automate endpoint curation.
+This is useful for CI pipelines or large captures where manual editing is impractical.
+
+## Glob syntax
+
+Filters use git-style glob patterns (powered by the [`globset`](https://docs.rs/globset) crate):
+
+| Pattern | Matches | Does not match |
+|---------|---------|----------------|
+| `*` | Single path segment | Segments with `/` |
+| `**` | Any number of path segments | (matches everything) |
+| `?` | Any single character | |
+| `[abc]` | Character class | |
+| `{a,b}` | Alternation | |
+
+## `--exclude-patterns`
+
+Paths matching any exclude glob are **dropped entirely** — they do not appear in the
+templates file at all.
+
+```bash
+mitm2openapi discover \
+  -i capture.flow \
+  -o templates.yaml \
+  -p "https://api.example.com" \
+  --exclude-patterns '/static/**,/images/**,*.css,*.js,*.svg,*.png'
+```
+
+Multiple patterns are comma-separated. A path is excluded if it matches **any** pattern.
+
+## `--include-patterns`
+
+Paths matching any include glob are emitted **without the `ignore:` prefix** — they are
+auto-activated for the `generate` step.
+
+```bash
+mitm2openapi discover \
+  -i capture.flow \
+  -o templates.yaml \
+  -p "https://api.example.com" \
+  --include-patterns '/api/**,/v2/**'
+```
+
+## Combining filters
+
+When both are specified:
+
+1. **Exclude runs first** — matching paths are dropped entirely
+2. **Include runs second** — matching paths among the survivors are auto-activated
+3. **Everything else** gets the `ignore:` prefix for manual review
+
+```bash
+mitm2openapi discover \
+  -i capture.flow \
+  -o templates.yaml \
+  -p "https://api.example.com" \
+  --exclude-patterns '/static/**,*.css,*.js' \
+  --include-patterns '/api/**'
+```
+
+Result:
+- `/static/bundle.js` — excluded (dropped)
+- `/api/users` — included (auto-activated)
+- `/dashboard` — neither matched (gets `ignore:` prefix)
+
+## Examples
+
+### API-only spec
+
+```bash
+--include-patterns '/api/**' \
+--exclude-patterns '/api/internal/**,/api/debug/**'
+```
+
+### Strip static assets
+
+```bash
+--exclude-patterns '/static/**,/assets/**,*.css,*.js,*.svg,*.png,*.jpg,*.gif,*.ico,*.woff,*.woff2'
+```
+
+### Multiple API versions
+
+```bash
+--include-patterns '/v1/**,/v2/**,/v3/**'
+```
+
+## Pattern tips
+
+- Patterns match against the **URL path only** (after the prefix is stripped), not the full URL
+- Leading `/` is recommended for clarity but not required
+- Patterns are case-sensitive
+- Use `**` sparingly — it matches everything, including deeply nested paths
diff --git a/book/src/usage/pipeline.md b/book/src/usage/pipeline.md
new file mode 100644
index 0000000..a50b0bd
--- /dev/null
+++ b/book/src/usage/pipeline.md
@@ -0,0 +1,200 @@
+# Discover, curate, generate
+
+<!-- toc -->
+
+`mitm2openapi` uses a three-step pipeline to convert captured HTTP traffic into an OpenAPI
+specification. This chapter explains each step in detail.
+
+## Overview
+
+```mermaid
+graph LR
+    A[Traffic capture] --> B[discover]
+    B --> C[Templates file]
+    C --> D[Curate]
+    D --> E[generate]
+    E --> F[OpenAPI 3.0 spec]
+```
+
+The pipeline separates **endpoint discovery** from **spec generation**, giving you an explicit
+curation step where you choose which endpoints appear in the final spec.
+
+## Step 1: Discover
+
+The `discover` command scans a traffic capture and extracts all unique URL paths that match
+a given prefix.
+
+```bash
+mitm2openapi discover \
+  -i capture.flow \
+  -o templates.yaml \
+  -p "https://api.example.com"
+```
+
+### What happens internally
+
+1. The input file is read incrementally (streaming — memory usage stays bounded)
+2. Each request's URL is checked against the `--prefix` filter
+3. Matching paths are collected and deduplicated
+4. Path segments that look like IDs (UUIDs, numeric strings) are replaced with
+   `{id}` placeholders (or `{id1}`, `{id2}`, ... when a path has multiple parameters)
+5. The result is written to the templates file
+
+### Templates file format
+
+The output is a YAML file with path templates under an `x-path-templates` key:
+
+```yaml
+x-path-templates:
+- ignore:/api/users
+- ignore:/api/users/{id}
+- ignore:/api/products
+- ignore:/api/products/{id}/reviews
+- ignore:/static/bundle.js
+```
+
+Every path is prefixed with `ignore:` by default. This is intentional — it forces you to
+explicitly opt in to each endpoint.
+
+### Automatic parameterization
+
+The discover step detects path segments that vary across requests and replaces them with
+named parameters:
+
+| Observed paths | Template |
+|---|---|
+| `/api/users/42`, `/api/users/99` | `/api/users/{id}` |
+| `/api/orders/abc-def-123` | `/api/orders/{id}` |
+
+UUID-like and numeric segments are detected automatically. More complex patterns require
+manual editing of the templates file.
+
+## Step 2: Curate
+
+Open the templates file in any text editor. For each path:
+
+- **Remove `ignore:`** to include the endpoint in the generated spec
+- **Leave `ignore:`** to exclude it
+- **Delete the line** to exclude it permanently
+
+```yaml
+# Before curation
+x-path-templates:
+- ignore:/api/users
+- ignore:/api/users/{id}
+- ignore:/static/bundle.js
+
+# After curation
+x-path-templates:
+- /api/users
+- /api/users/{id}
+- ignore:/static/bundle.js
+```
+
+You can also edit parameter names. The default `{id}` placeholder can be renamed to
+something more descriptive like `{userId}`:
+
+```yaml
+- /api/users/{userId}
+```
+
+### Automating curation with glob filters
+
+For CI pipelines or large captures, manual curation is impractical. Use `--include-patterns`
+and `--exclude-patterns` during the `discover` step instead:
+
+```bash
+mitm2openapi discover \
+  -i capture.flow \
+  -o templates.yaml \
+  -p "https://api.example.com" \
+  --include-patterns '/api/**' \
+  --exclude-patterns '/static/**,*.css,*.js'
+```
+
+Paths matching `--include-patterns` are emitted without the `ignore:` prefix (auto-activated).
+Paths matching `--exclude-patterns` are dropped entirely. Everything else gets `ignore:` for
+manual review.
+
+See [filtering endpoints](./filtering.md) for the full glob syntax.
+
+## Step 3: Generate
+
+The `generate` command re-reads the traffic capture and produces an OpenAPI spec using the
+curated templates as a guide:
+
+```bash
+mitm2openapi generate \
+  -i capture.flow \
+  -t templates.yaml \
+  -o openapi.yaml \
+  -p "https://api.example.com"
+```
+
+### What happens internally
+
+1. The templates file is loaded and the `ignore:` entries are filtered out
+2. Each template path is compiled into a regex for matching
+3. The traffic capture is streamed again, matching each request against the templates
+4. For each matched request:
+   - Path parameters are extracted
+   - Query parameters are collected
+   - Request body schema is inferred (JSON, form data)
+   - Response status code and body schema are recorded
+5. When multiple requests match the same template, their schemas are merged:
+    - Different status codes (200, 400, 404) produce separate response entries
+    - Request body is taken from the first observation; subsequent same-endpoint
+      observations only contribute response schemas
+6. The final OpenAPI 3.0 document is written as YAML
+
+### Customizing output
+
+The `generate` command accepts several options to tune the output:
+
+```bash
+mitm2openapi generate \
+  -i capture.flow \
+  -t templates.yaml \
+  -o openapi.yaml \
+  -p "https://api.example.com" \
+  --openapi-title "My API" \
+  --openapi-version "2.0.0" \
+  --exclude-headers "authorization,cookie" \
+  --ignore-images
+```
+
+See the [CLI reference](./cli-reference.md) for all available options.
+
+## Worked example
+
+Starting from a mitmproxy capture of a pet store API:
+
+```bash
+# Discover all endpoints under the API prefix
+mitm2openapi discover \
+  -i petstore.flow \
+  -o templates.yaml \
+  -p "http://petstore:8080" \
+  --exclude-patterns '/static/**' \
+  --include-patterns '/api/**'
+
+# Templates file now has API paths auto-activated:
+#   - /api/v3/pet
+#   - /api/v3/pet/{id}
+#   - /api/v3/pet/findByStatus
+#   - /api/v3/store/inventory
+#   - ignore:/static/swagger-ui.css
+
+# Generate the spec
+mitm2openapi generate \
+  -i petstore.flow \
+  -t templates.yaml \
+  -o openapi.yaml \
+  -p "http://petstore:8080"
+
+# Result: openapi.yaml with paths, methods, schemas
+```
+
+The generated `openapi.yaml` is a valid OpenAPI 3.0 document that can be opened in
+[Swagger UI](https://github.com/swagger-api/swagger-ui), imported into Postman, or used
+as a contract for API testing.
diff --git a/book/src/usage/reports.md b/book/src/usage/reports.md
new file mode 100644
index 0000000..15a3bab
--- /dev/null
+++ b/book/src/usage/reports.md
@@ -0,0 +1,98 @@
+# Processing reports
+
+Pass `--report <PATH>` to either `discover` or `generate` to write a JSON processing
+summary. This is useful for CI pipelines that need structured data instead of log scraping.
+
+## Usage
+
+```bash
+mitm2openapi discover \
+  -i capture.flow \
+  -o templates.yaml \
+  -p "https://api.example.com" \
+  --report report.json
+```
+
+## Report schema
+
+```json
+{
+  "report_version": 1,
+  "tool_version": "0.5.1",
+  "input": {
+    "path": "capture.flow",
+    "format": "Auto",
+    "size_bytes": 102400
+  },
+  "result": {
+    "flows_read": 150,
+    "flows_emitted": 148,
+    "paths_in_spec": 12
+  },
+  "events": {
+    "parse_error": {
+      "TNetString parse error at byte 98304: unexpected end of input": 1
+    }
+  }
+}
+```
+
+### Fields
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `report_version` | integer | Schema version (currently `1`) |
+| `tool_version` | string | `mitm2openapi` version that produced the report |
+| `input.path` | string | Input file path |
+| `input.format` | string | Detected or specified format (`Auto`, `Mitmproxy`, `Har`) |
+| `input.size_bytes` | integer | Input file size in bytes |
+| `result.flows_read` | integer | Total flows/entries parsed from input |
+| `result.flows_emitted` | integer | Flows that passed all filters and were processed |
+| `result.paths_in_spec` | integer | Unique paths in the output (for `generate`) |
+| `events` | object | Map of event categories to message counts |
+
+### Event categories
+
+| Category | Meaning | Status |
+|----------|---------|--------|
+| `parse_error` | Corrupt data encountered (tnetstring errors, malformed HAR entries) | Populated |
+| `cap_fired` | A resource limit was triggered (body too large, depth exceeded) | Reserved — not yet populated at runtime |
+| `rejected` | A flow was skipped (invalid UTF-8, unsupported scheme, bad port/status) | Reserved — not yet populated at runtime |
+
+The `cap_fired` and `rejected` categories are present in the report schema and will be
+connected to the reader pipelines in a future release. Currently, only `parse_error`
+events are counted.
+
+## CI integration
+
+Parse the report in CI to make decisions based on processing quality:
+
+```bash
+mitm2openapi generate \
+  -i capture.flow \
+  -t templates.yaml \
+  -o openapi.yaml \
+  -p "https://api.example.com" \
+  --report report.json
+
+# Check if any events occurred
+if jq -e '.events | length > 0' report.json > /dev/null 2>&1; then
+  echo "Warning: processing had events"
+  jq '.events' report.json
+fi
+```
+
+## Report with strict mode
+
+The report is written even when `--strict` causes a non-zero exit code. This lets you
+capture full diagnostics while still failing the CI job:
+
+```bash
+mitm2openapi discover \
+  -i capture.flow \
+  -o templates.yaml \
+  -p "https://api.example.com" \
+  --strict \
+  --report report.json \
+  || { jq '.' report.json; exit 1; }
+```
diff --git a/book/src/usage/resource-limits.md b/book/src/usage/resource-limits.md
new file mode 100644
index 0000000..e11985e
--- /dev/null
+++ b/book/src/usage/resource-limits.md
@@ -0,0 +1,91 @@
+# Resource limits
+
+<!-- toc -->
+
+To prevent denial-of-service when processing untrusted captures, `mitm2openapi` enforces
+several configurable and fixed limits.
+
+## Configurable limits
+
+These limits can be adjusted via CLI flags:
+
+| Flag | Default | Purpose |
+|------|---------|---------|
+| `--max-input-size` | 2 GiB | Reject files larger than this before reading |
+| `--max-payload-size` | 256 MiB | Cap on individual tnetstring payload allocation |
+| `--max-depth` | 256 | Recursion depth limit for nested tnetstring structures |
+| `--max-body-size` | 64 MiB | Maximum request/response body considered during schema inference |
+| `--allow-symlinks` | off | By default, symlinked inputs are rejected |
+
+### Adjusting limits
+
+Increase `--max-input-size` if you work with captures larger than 2 GiB:
+
+```bash
+mitm2openapi discover \
+  -i large-capture.flow \
+  -o templates.yaml \
+  -p "https://api.example.com" \
+  --max-input-size 8GiB
+```
+
+Size suffixes are supported: `KiB`, `MiB`, `GiB`.
+
+The other limits rarely need tuning. The defaults are designed to handle real-world
+captures while rejecting pathological inputs.
+
+### Symlink rejection
+
+By default, symlinked input files are rejected to prevent path-traversal attacks on shared
+CI runners. If you need to process a symlinked file:
+
+```bash
+mitm2openapi discover \
+  -i /path/to/symlinked-capture.flow \
+  -o templates.yaml \
+  -p "https://api.example.com" \
+  --allow-symlinks
+```
+
+## Fixed per-field limits
+
+These limits are applied unconditionally and cannot be changed via CLI flags:
+
+| Field | Cap | Behaviour when exceeded |
+|-------|-----|------------------------|
+| Header name | 8 KiB | Header dropped (other headers still processed) |
+| Header value | 64 KiB | Value truncated to cap |
+| Form fields per request | 1,000 | Excess fields ignored |
+| URL scheme | `http` / `https` only | Non-HTTP flows silently skipped |
+| Port number | 1 -- 65,535 | Out-of-range port drops the request |
+| HTTP status code | 100 -- 599 | Invalid codes treated as no response |
+
+## UTF-8 validation
+
+Identity fields (scheme, host, path, method, header names) require valid UTF-8. Flows
+with non-UTF-8 identity bytes are skipped to prevent data aliasing through
+replacement-character collisions.
+
+Control characters (`0x00`--`0x1F`, `0x7F`) in paths are stripped automatically.
+
+## Streaming and memory
+
+Both mitmproxy flow files and HAR files are processed incrementally. Memory usage stays
+bounded regardless of input size — there is no need to load the entire capture into memory.
+
+Peak RSS is proportional to the size of the **largest single flow** in the capture, not the
+total file size. For typical captures, expect 5--15 MB of memory usage.
+
+## When limits fire
+
+When a per-field limit is exceeded (header too large, body too large, form fields over cap),
+the affected field is skipped or truncated and processing continues with the remaining data.
+
+When a tnetstring parse error occurs, the iterator halts and the rest of the file is not
+processed — valid flows parsed before the error are still emitted. There is no resync
+because binary payloads can contain bytes that mimic valid length prefixes.
+
+In both cases a `warn`-level log message is emitted with details.
+
+Use [strict mode](./strict-mode.md) to treat these warnings as errors, or
+[processing reports](./reports.md) to capture them as structured data.
diff --git a/book/src/usage/strict-mode.md b/book/src/usage/strict-mode.md
new file mode 100644
index 0000000..475c3fd
--- /dev/null
+++ b/book/src/usage/strict-mode.md
@@ -0,0 +1,79 @@
+# Strict mode
+
+Pass `--strict` to either `discover` or `generate` to treat warning-level events as
+hard failures. The process exits with code **2** if the processing report records any
+counted events.
+
+Currently, the only event counter populated at runtime is `parse_error` — triggered when
+flows cannot be deserialized (corrupt tnetstring data, malformed HAR JSON). The
+`cap_fired` and `rejected` counters exist in the report schema but are not yet wired to
+the reader pipelines; they will be connected in a future release.
+
+In practice, `--strict` today catches:
+
+- Parse errors during flow deserialization (tnetstring or HAR)
+- Errors counted by the streaming iterator wrapper in `discover` mode
+
+## Usage
+
+```bash
+mitm2openapi discover \
+  -i capture.flow \
+  -o templates.yaml \
+  -p "https://api.example.com" \
+  --strict
+```
+
+```bash
+mitm2openapi generate \
+  -i capture.flow \
+  -t templates.yaml \
+  -o openapi.yaml \
+  -p "https://api.example.com" \
+  --strict
+```
+
+## CI usage pattern
+
+Strict mode is designed for CI gates where silent degradation is unacceptable:
+
+```bash
+mitm2openapi discover \
+  -i capture.flow \
+  -o templates.yaml \
+  -p "https://api.example.com" \
+  --strict \
+  || { echo "FAIL: corrupt or over-limit flows detected"; exit 1; }
+```
+
+## Without `--strict`
+
+Without the flag, parse errors are logged at `warn` level and processing continues with
+exit code 0. Affected flows are skipped, but the output file is still produced. Other
+warning-level events (cap fires, scheme rejections, etc.) are always logged but do not
+currently increment the report counters that `--strict` checks.
+
+## Exit codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | Success (no warnings, or `--strict` not set) |
+| 1 | Fatal error (I/O failure, missing required arguments) |
+| 2 | Strict mode violation (warnings detected with `--strict`) |
+
+## Combining with reports
+
+For CI pipelines that need both strict enforcement and structured diagnostics:
+
+```bash
+mitm2openapi generate \
+  -i capture.flow \
+  -t templates.yaml \
+  -o openapi.yaml \
+  -p "https://api.example.com" \
+  --strict \
+  --report report.json
+```
+
+The [report](./reports.md) is written even when `--strict` causes a non-zero exit, capturing
+the full details of what went wrong.
diff --git a/docs/benchmarks.md b/docs/benchmarks.md
index 1a53a08..7830a65 100644
--- a/docs/benchmarks.md
+++ b/docs/benchmarks.md
@@ -1,8 +1,6 @@
-# Benchmarks
-
 Generated by the [benchmark workflow](.github/workflows/bench.yml).
 
-# Benchmark results
+## Benchmark results
 
 _Run: 2026-04-22 22:31 UTC, commit `22ef2faa`, runner: Linux 6.17.0-1011-azure_
 
diff --git a/src/har_reader.rs b/src/har_reader.rs
index 64d5f30..980209f 100644
--- a/src/har_reader.rs
+++ b/src/har_reader.rs
@@ -10,6 +10,9 @@ use crate::error::{Error, Result};
 use crate::types::CapturedRequest;
 use crate::MAX_BODY_SIZE;
 
+const MAX_HEADER_NAME_SIZE: usize = 8 * 1024;
+const MAX_HEADER_VALUE_SIZE: usize = 64 * 1024;
+
 #[derive(Deserialize)]
 struct StreamingHarEntry {
     request: StreamingHarRequest,
@@ -112,27 +115,50 @@ impl HarFlowWrapper {
         Some(Self {
             url: entry.request.url,
             method: entry.request.method,
-            request_headers: entry
-                .request
-                .headers
-                .into_iter()
-                .map(|h| (h.name, h.value))
-                .collect(),
+            request_headers: cap_headers(entry.request.headers),
             request_body,
             response_status,
             response_reason: entry.response.status_text,
-            response_headers: entry
-                .response
-                .headers
-                .into_iter()
-                .map(|h| (h.name, h.value))
-                .collect(),
+            response_headers: cap_headers(entry.response.headers),
             response_body,
             response_content_type,
         })
     }
 }
 
+fn cap_headers(headers: Vec<StreamingHarHeader>) -> Vec<(String, String)> {
+    headers
+        .into_iter()
+        .filter_map(|h| {
+            if h.name.len() > MAX_HEADER_NAME_SIZE {
+                warn!(
+                    event = "header_name_too_large",
+                    size = h.name.len(),
+                    max = MAX_HEADER_NAME_SIZE,
+                    "dropping HAR header with oversized name"
+                );
+                return None;
+            }
+            let value = if h.value.len() > MAX_HEADER_VALUE_SIZE {
+                warn!(
+                    event = "header_value_too_large",
+                    size = h.value.len(),
+                    max = MAX_HEADER_VALUE_SIZE,
+                    name = %h.name,
+                    "truncating oversized HAR header value"
+                );
+                h.value
+                    .get(..MAX_HEADER_VALUE_SIZE)
+                    .unwrap_or(&h.value)
+                    .to_string()
+            } else {
+                h.value
+            };
+            Some((h.name, value))
+        })
+        .collect()
+}
+
 fn cap_body(body: Vec<u8>) -> Vec<u8> {
     if body.len() > MAX_BODY_SIZE {
         warn!(
@@ -432,6 +458,14 @@ pub fn stream_har_file(path: &Path) -> Result<RequestIter> {
 }
 
 fn stream_har_dir(path: &Path) -> Result<RequestIter> {
+    stream_har_dir_inner(path, false)
+}
+
+pub fn stream_har_dir_no_symlinks(path: &Path) -> Result<RequestIter> {
+    stream_har_dir_inner(path, true)
+}
+
+fn stream_har_dir_inner(path: &Path, reject_symlinks: bool) -> Result<RequestIter> {
     let mut dir_entries: Vec<_> = std::fs::read_dir(path)?
         .filter_map(|e| match e {
             Ok(entry) => Some(entry),
@@ -449,6 +483,23 @@ fn stream_har_dir(path: &Path) -> Result<RequestIter> {
                 .extension()
                 .is_some_and(|ext| ext.eq_ignore_ascii_case("har"))
         })
+        .filter(|e| {
+            if reject_symlinks {
+                match e.path().symlink_metadata() {
+                    Ok(meta) if meta.file_type().is_symlink() => {
+                        warn!(
+                            event = "symlink_rejected",
+                            path = %e.path().display(),
+                            "skipping symlinked HAR directory entry"
+                        );
+                        false
+                    }
+                    _ => true,
+                }
+            } else {
+                true
+            }
+        })
         .collect();
     dir_entries.sort_by_key(|e| e.path());
 
diff --git a/src/main.rs b/src/main.rs
index d4e80f1..a607332 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -274,16 +274,32 @@ fn stream_input(
     max_input_size: u64,
     allow_symlinks: bool,
 ) -> Result<RequestIter> {
+    // Check symlink-ness before is_dir(), since is_dir() follows symlinks.
+    if !allow_symlinks {
+        if let Ok(meta) = path.symlink_metadata() {
+            if meta.file_type().is_symlink() {
+                return Err(mitm2openapi::error::Error::SymlinkRejected {
+                    path: path.to_path_buf(),
+                }
+                .into());
+            }
+        }
+    }
     if !path.is_dir() {
         mitm2openapi::validate_input_path(path, max_input_size, allow_symlinks)
             .context("input file validation failed")?;
     }
+    let reject_symlinks = !allow_symlinks;
     match format {
         InputFormat::Mitmproxy => {
             debug!(path = %path.display(), "Streaming as mitmproxy format");
             if path.is_dir() {
-                mitmproxy_reader::stream_mitmproxy_dir(path)
-                    .context("failed to stream mitmproxy directory")
+                if reject_symlinks {
+                    mitmproxy_reader::stream_mitmproxy_dir_no_symlinks(path)
+                } else {
+                    mitmproxy_reader::stream_mitmproxy_dir(path)
+                }
+                .context("failed to stream mitmproxy directory")
             } else {
                 let iter = mitmproxy_reader::stream_mitmproxy_file(path)
                     .context("failed to stream mitmproxy file")?;
@@ -298,8 +314,16 @@ fn stream_input(
         InputFormat::Auto => {
             if path.is_dir() {
                 debug!(path = %path.display(), "Auto-detecting format for directory");
-                let mitmproxy_result = mitmproxy_reader::stream_mitmproxy_dir(path);
-                let har_result = har_reader::stream_har_file(path);
+                let mitmproxy_result = if reject_symlinks {
+                    mitmproxy_reader::stream_mitmproxy_dir_no_symlinks(path)
+                } else {
+                    mitmproxy_reader::stream_mitmproxy_dir(path)
+                };
+                let har_result = if reject_symlinks {
+                    har_reader::stream_har_dir_no_symlinks(path)
+                } else {
+                    har_reader::stream_har_file(path)
+                };
 
                 match (mitmproxy_result, har_result) {
                     (Ok(m_iter), Ok(h_iter)) => {
diff --git a/src/mitmproxy_reader.rs b/src/mitmproxy_reader.rs
index 7076e67..c0cc7d6 100644
--- a/src/mitmproxy_reader.rs
+++ b/src/mitmproxy_reader.rs
@@ -361,6 +361,14 @@ pub fn stream_mitmproxy_file(
 }
 
 pub fn stream_mitmproxy_dir(path: &Path) -> Result<RequestIter> {
+    stream_mitmproxy_dir_inner(path, false)
+}
+
+pub fn stream_mitmproxy_dir_no_symlinks(path: &Path) -> Result<RequestIter> {
+    stream_mitmproxy_dir_inner(path, true)
+}
+
+fn stream_mitmproxy_dir_inner(path: &Path, reject_symlinks: bool) -> Result<RequestIter> {
     let mut entries: Vec<_> = std::fs::read_dir(path)?
         .filter_map(|e| match e {
             Ok(entry) => Some(entry),
@@ -378,6 +386,23 @@ pub fn stream_mitmproxy_dir(path: &Path) -> Result<RequestIter> {
                 .extension()
                 .is_some_and(|ext| ext.eq_ignore_ascii_case("flow"))
         })
+        .filter(|e| {
+            if reject_symlinks {
+                match e.path().symlink_metadata() {
+                    Ok(meta) if meta.file_type().is_symlink() => {
+                        warn!(
+                            event = "symlink_rejected",
+                            path = %e.path().display(),
+                            "skipping symlinked directory entry"
+                        );
+                        false
+                    }
+                    _ => true,
+                }
+            } else {
+                true
+            }
+        })
         .collect();
     entries.sort_by_key(|e| e.path());
 
diff --git a/tests/security.rs b/tests/security.rs
index 1d9248c..a897b19 100644
--- a/tests/security.rs
+++ b/tests/security.rs
@@ -79,3 +79,87 @@ fn normal_file_passes_validation() {
     let result = mitm2openapi::validate_input_path(&path, mitm2openapi::MAX_INPUT_SIZE, false);
     assert!(result.is_ok(), "normal file should pass: {result:?}");
 }
+
+#[cfg(unix)]
+#[test]
+fn symlink_to_directory_rejected() {
+    use std::os::unix::fs as unix_fs;
+
+    let dir = TempDir::new().unwrap();
+    let real_dir = dir.path().join("real_dir");
+    std::fs::create_dir(&real_dir).unwrap();
+    std::fs::write(real_dir.join("test.flow"), b"1:X,").unwrap();
+
+    let link = dir.path().join("link_dir");
+    unix_fs::symlink(&real_dir, &link).unwrap();
+
+    assert!(link.is_dir(), "symlink should resolve to directory");
+
+    let err = mitm2openapi::validate_input_path(&link, mitm2openapi::MAX_INPUT_SIZE, false);
+    assert!(
+        matches!(err, Err(mitm2openapi::error::Error::SymlinkRejected { .. })),
+        "symlink to directory should be rejected, got {err:?}"
+    );
+}
+
+#[cfg(unix)]
+#[test]
+fn symlink_dir_entry_rejected_in_mitmproxy() {
+    use std::os::unix::fs as unix_fs;
+
+    let dir = TempDir::new().unwrap();
+    let src = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .join("testdata")
+        .join("flows")
+        .join("simple_get.flow");
+    let real_file = dir.path().join("real.flow");
+    std::fs::copy(&src, &real_file).unwrap();
+
+    let link_file = dir.path().join("linked.flow");
+    unix_fs::symlink(&real_file, &link_file).unwrap();
+
+    let iter = mitm2openapi::mitmproxy_reader::stream_mitmproxy_dir_no_symlinks(dir.path());
+    assert!(iter.is_ok(), "should open directory");
+    let results: Vec<_> = iter.unwrap().filter_map(|r| r.ok()).collect();
+
+    assert!(
+        !results.is_empty(),
+        "real file should produce at least one flow"
+    );
+
+    let all_results: Vec<_> = mitm2openapi::mitmproxy_reader::stream_mitmproxy_dir(dir.path())
+        .unwrap()
+        .filter_map(|r| r.ok())
+        .collect();
+    assert!(
+        all_results.len() > results.len(),
+        "without symlink rejection, both files should be processed"
+    );
+}
+
+#[cfg(unix)]
+#[test]
+fn symlink_dir_entry_rejected_in_har() {
+    use std::os::unix::fs as unix_fs;
+
+    let dir = TempDir::new().unwrap();
+    let src = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .join("testdata")
+        .join("har")
+        .join("simple.har");
+    let real_file = dir.path().join("real.har");
+    std::fs::copy(&src, &real_file).unwrap();
+
+    let link_file = dir.path().join("linked.har");
+    unix_fs::symlink(&real_file, &link_file).unwrap();
+
+    let iter = mitm2openapi::har_reader::stream_har_dir_no_symlinks(dir.path());
+    assert!(iter.is_ok(), "should open directory");
+    let results: Vec<_> = iter.unwrap().filter_map(|r| r.ok()).collect();
+
+    assert_eq!(
+        results.len(),
+        1,
+        "only the real HAR file should be processed, symlinked entry skipped"
+    );
+}