From bc8a90394a0f6367b36dbe6fe61bc1ad87c07198 Mon Sep 17 00:00:00 2001 From: yfe404 Date: Tue, 14 Apr 2026 21:27:09 +0200 Subject: [PATCH] refactor(humanizer): route to cloakbrowser-patched Playwright methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The custom Bezier/Fitts/WPM/bigram/typo code in src/humanizer/{path,timing}.ts duplicated cloakbrowser's own `humanize: true` layer and bypassed it by calling low-level `page.keyboard.press` — which dropped uppercase and symbol case. Engine now calls `page.click` / `page.mouse.*` / `page.keyboard.type` directly; cloakbrowser patches all of them with Bezier paths, realistic typing cadence, and CDP-trusted Shift handling (`Input.dispatchKeyEvent`, `isTrusted=true`). Smoke test (scripts/humanizer-case-test.ts) confirms "Hello World! ABC @#$%" round-trips through a textarea unchanged. Tool param surface: - humanizer_click: `move_duration_ms` → `timeout_ms` - humanizer_type: `wpm`+`error_rate` → `delay_ms` (optional passthrough) - humanizer_scroll: `duration_ms` removed (single wheel event) - humanizer_move: `duration_ms` removed Deletes 463 LOC of dead timing code. Bumps to 2.2.0. --- CHANGELOG.md | 16 +++ package-lock.json | 4 +- package.json | 2 +- scripts/humanizer-case-test.ts | 41 ++++++ src/humanizer/engine.ts | 227 +++++++++----------------------- src/humanizer/path.ts | 232 --------------------------------- src/humanizer/timing.ts | 231 -------------------------------- src/tools/humanizer.ts | 60 ++++----- 8 files changed, 144 insertions(+), 669 deletions(-) create mode 100644 scripts/humanizer-case-test.ts delete mode 100644 src/humanizer/path.ts delete mode 100644 src/humanizer/timing.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 9595c24..5356fdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,21 @@ # Changelog +## 2.2.0 + +### Breaking Changes + +- **Humanizer layer rewritten as thin wrapper over cloakbrowser-patched Playwright.** The custom Bezier/Fitts/WPM/bigram/typo code was duplicating (and fighting with) cloakbrowser's own `humanize: true` layer, and bypassed it by calling low-level `page.keyboard.press` — which dropped uppercase and symbol case. Engine now routes to `page.click`/`page.mouse.*`/`page.keyboard.type`, all patched by cloakbrowser with CDP-trusted Shift handling. +- **Tool params changed:** + - `humanizer_click`: `move_duration_ms` removed; `timeout_ms` added (default 15000). + - `humanizer_type`: `wpm` and `error_rate` removed; `delay_ms` added (optional passthrough to `keyboard.type`). + - `humanizer_scroll`: `duration_ms` removed (single wheel event). + - `humanizer_move`: `duration_ms` removed. +- `src/humanizer/path.ts` and `src/humanizer/timing.ts` deleted. + +### Fixes + +- **Uppercase and symbol typing now works.** The old `page.keyboard.press("Shift+a")` path produced lowercase output for some targets; cloakbrowser's patched `page.keyboard.type` uses CDP `Input.dispatchKeyEvent` with `isTrusted=true` and correct Shift framing. + ## 2.1.0 ### New Features diff --git a/package-lock.json b/package-lock.json index 025dd22..dfea356 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "proxy-mcp", - "version": "2.1.0", + "version": "2.2.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "proxy-mcp", - "version": "2.1.0", + "version": "2.2.0", "dependencies": { "@modelcontextprotocol/sdk": "^1.26.0", "cloakbrowser": "^0.3.24", diff --git a/package.json b/package.json index e2a0f23..caeee0f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "proxy-mcp", - "version": "2.1.0", + "version": "2.2.0", "description": "MCP server for HTTP/HTTPS MITM proxy via mockttp", "type": "module", "engines": { diff --git a/scripts/humanizer-case-test.ts b/scripts/humanizer-case-test.ts new file mode 100644 index 0000000..6b3a4b5 --- /dev/null +++ b/scripts/humanizer-case-test.ts @@ -0,0 +1,41 @@ +/** + * Smoke test: verify humanizer_type preserves uppercase + symbols. + * Launches cloakbrowser, navigates to data: URL with a textarea, types + * "Hello World! ABC @#$%" and reads the value back. + */ +import { launchContext } from "cloakbrowser"; + +const HTML = ` + +`; + +const TEST_TEXT = "Hello World! ABC @#$%"; + +async function main() { + const context = await launchContext({ headless: true, humanize: true }); + const browser = context.browser(); + try { + const page = await context.newPage(); + await page.goto(`data:text/html;base64,${Buffer.from(HTML).toString("base64")}`); + await page.focus("#t"); + + const start = Date.now(); + await page.keyboard.type(TEST_TEXT); + const elapsed = Date.now() - start; + + const value = await page.$eval("#t", (el) => (el as HTMLTextAreaElement).value); + const match = value === TEST_TEXT; + console.log(JSON.stringify({ + expected: TEST_TEXT, + actual: value, + match, + elapsed_ms: elapsed, + }, null, 2)); + process.exit(match ? 0 : 1); + } finally { + await context.close().catch(() => {}); + await browser?.close().catch(() => {}); + } +} + +main().catch((e) => { console.error(e); process.exit(1); }); diff --git a/src/humanizer/engine.ts b/src/humanizer/engine.ts index 2a82cc3..429a8e2 100644 --- a/src/humanizer/engine.ts +++ b/src/humanizer/engine.ts @@ -1,22 +1,16 @@ /** - * Playwright-backed humanizer engine. + * Humanizer engine — thin wrappers over cloakbrowser-patched Playwright. * - * Replaces the former CDP-based engine. Uses the cloakbrowser-launched - * Playwright Page for each target. cloakbrowser's `humanize: true` already - * patches input dispatch at the C++ layer; this engine layers custom per-call - * timing profiles (WPM + bigram + typo, Bezier paths, eased scroll) on top. - * - * humanizer_click supports locator-first targeting (selector | role+name | - * text | label) so callers no longer need to guess pixel coordinates — the - * locator auto-waits for visible+enabled+stable+in-view before dispatching. + * cloakbrowser's `humanize: true` already patches page.click / page.mouse.move / + * page.mouse.click / page.keyboard.type / page.hover / page.type with Bezier + * paths, realistic typing, and CDP-trusted Shift handling. This engine just + * routes tool calls to those patched methods — no duplicate timing code. */ import type { Page, Locator } from "playwright-core"; import { getPageForTarget } from "../browser/session.js"; -import { generatePath, addRandomOffset, type Point } from "./path.js"; -import { calculateKeyDelays, calculateScrollSteps, type TypingProfile } from "./timing.js"; -// ── Helpers ────────────────────────────────────────────────────────── +interface Point { x: number; y: number } function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); @@ -26,17 +20,9 @@ function rand(min: number, max: number): number { return min + Math.random() * (max - min); } -function isUpperCase(ch: string): boolean { - return ch !== ch.toLowerCase() && ch === ch.toUpperCase(); -} - -// ── Mouse position tracking ────────────────────────────────────────── - -interface MouseState { - x: number; - y: number; -} +// ── Mouse position tracking (for coord-based idle jitter) ──────────── +interface MouseState { x: number; y: number } const mouseStates = new Map(); function getMouseState(targetId: string): MouseState { @@ -67,7 +53,6 @@ export interface ClickTarget { function resolveLocator(page: Page, opts: ClickTarget): Locator | null { if (opts.selector) return page.locator(opts.selector); if (opts.role) { - // Playwright requires role to be a known AriaRole; we accept any string. // eslint-disable-next-line @typescript-eslint/no-explicit-any return page.getByRole(opts.role as any, opts.name ? { name: opts.name } : undefined); } @@ -76,197 +61,105 @@ function resolveLocator(page: Page, opts: ClickTarget): Locator | null { return null; } -async function resolveCenter(locator: Locator): Promise<{ center: Point; box: { width: number; height: number } }> { - await locator.waitFor({ state: "visible", timeout: 15_000 }); - await locator.scrollIntoViewIfNeeded({ timeout: 5_000 }).catch(() => { /* non-fatal */ }); - const box = await locator.boundingBox({ timeout: 5_000 }); - if (!box) { - throw new Error("Locator has no bounding box (element not rendered or zero-size)."); - } - return { - center: { x: box.x + box.width / 2, y: box.y + box.height / 2 }, - box: { width: box.width, height: box.height }, - }; +function resolvedByLabel(opts: ClickTarget): string { + if (opts.selector) return "selector"; + if (opts.role) return "role"; + if (opts.text) return "text"; + if (opts.label) return "label"; + return "coords"; } // ── Engine ─────────────────────────────────────────────────────────── class HumanizerEngine { - /** Drop tracked mouse state when a target is closed. */ closeSession(targetId: string): void { clearMouseState(targetId); } - // ── Mouse movement ───────────────────────────────────────────── - async moveMouse( targetId: string, x: number, y: number, - durationMs?: number, ): Promise<{ totalMs: number; eventsDispatched: number }> { const page = getPageForTarget(targetId); + const start = Date.now(); + await page.mouse.move(x, y); const state = getMouseState(targetId); - const from: Point = { x: state.x, y: state.y }; - const to: Point = { x, y }; - - const path = generatePath(from, to, { baseDurationMs: durationMs ?? 600 }); - - let eventsDispatched = 0; - for (let i = 0; i < path.points.length; i++) { - const pt = path.points[i]; - if (i > 0) { - const delay = path.timestamps[i] - path.timestamps[i - 1]; - if (delay > 0) await sleep(delay); - } - await page.mouse.move(pt.x, pt.y); - eventsDispatched++; - } - - const last = path.points[path.points.length - 1]; - state.x = last.x; - state.y = last.y; - - return { totalMs: path.totalMs, eventsDispatched }; + state.x = x; + state.y = y; + return { totalMs: Date.now() - start, eventsDispatched: 1 }; } - // ── Click ────────────────────────────────────────────────────── - async click( targetId: string, opts: ClickTarget & { button?: "left" | "right" | "middle"; clickCount?: number; - moveDurationMs?: number; + timeoutMs?: number; } = {}, ): Promise<{ totalMs: number; eventsDispatched: number; clickedAt: Point; resolvedBy: string }> { const page = getPageForTarget(targetId); const button = opts.button ?? "left"; const clickCount = opts.clickCount ?? 1; - - let targetX: number; - let targetY: number; - let resolvedBy: string; + const timeout = opts.timeoutMs ?? 15_000; + const start = Date.now(); + const resolvedBy = resolvedByLabel(opts); const locator = resolveLocator(page, opts); if (locator) { - const { center, box } = await resolveCenter(locator); - const offset = addRandomOffset(center, box); - targetX = offset.x; - targetY = offset.y; - resolvedBy = opts.selector ? "selector" - : opts.role ? "role" - : opts.text ? "text" - : "label"; - } else if (opts.x !== undefined && opts.y !== undefined) { - targetX = opts.x; - targetY = opts.y; - resolvedBy = "coords"; - } else { - throw new Error("Provide one of: selector, role (+ name), text, label, or x+y coordinates."); + await locator.click({ button, clickCount, timeout }); + const box = await locator.boundingBox({ timeout: 5_000 }).catch(() => null); + const center: Point = box + ? { x: box.x + box.width / 2, y: box.y + box.height / 2 } + : { x: 0, y: 0 }; + const state = getMouseState(targetId); + state.x = center.x; + state.y = center.y; + return { totalMs: Date.now() - start, eventsDispatched: 1, clickedAt: center, resolvedBy }; } - const moveResult = await this.moveMouse(targetId, targetX, targetY, opts.moveDurationMs); - let eventsDispatched = moveResult.eventsDispatched; - let totalMs = moveResult.totalMs; - - const preClickDelay = Math.round(rand(30, 80)); - await sleep(preClickDelay); - totalMs += preClickDelay; - - for (let c = 0; c < clickCount; c++) { - await page.mouse.down({ button }); - eventsDispatched++; - - const holdMs = Math.round(rand(40, 100)); - await sleep(holdMs); - totalMs += holdMs; - - await page.mouse.up({ button }); - eventsDispatched++; - - if (c < clickCount - 1) { - const interClickMs = Math.round(rand(50, 120)); - await sleep(interClickMs); - totalMs += interClickMs; - } + if (opts.x !== undefined && opts.y !== undefined) { + await page.mouse.click(opts.x, opts.y, { button, clickCount }); + const state = getMouseState(targetId); + state.x = opts.x; + state.y = opts.y; + return { + totalMs: Date.now() - start, + eventsDispatched: 1, + clickedAt: { x: opts.x, y: opts.y }, + resolvedBy, + }; } - return { totalMs, eventsDispatched, clickedAt: { x: targetX, y: targetY }, resolvedBy }; + throw new Error("Provide one of: selector, role (+ name), text, label, or x+y coordinates."); } - // ── Typing ───────────────────────────────────────────────────── - async typeText( targetId: string, text: string, - profile: TypingProfile = {}, + opts: { delayMs?: number } = {}, ): Promise<{ totalMs: number; eventsDispatched: number; charsTyped: number }> { const page = getPageForTarget(targetId); - const keyDelays = calculateKeyDelays(text, profile); - - let totalMs = 0; - let eventsDispatched = 0; - - for (const { key, delayMs } of keyDelays) { - await sleep(delayMs); - totalMs += delayMs; - - if (key === "Backspace") { - await page.keyboard.press("Backspace"); - eventsDispatched++; - } else if (key === " ") { - await page.keyboard.press("Space"); - eventsDispatched++; - } else if (key.length === 1) { - // Shift is handled automatically by Playwright's keyboard.type for single chars. - if (isUpperCase(key)) { - await page.keyboard.press(`Shift+${key.toLowerCase()}`); - } else { - await page.keyboard.press(key); - } - eventsDispatched++; - } else { - // Named key (Tab, Enter, etc.) - await page.keyboard.press(key); - eventsDispatched++; - } - - const holdMs = Math.round(rand(20, 60)); - await sleep(holdMs); - totalMs += holdMs; - } - - return { totalMs, eventsDispatched, charsTyped: text.length }; + const start = Date.now(); + await page.keyboard.type(text, opts.delayMs !== undefined ? { delay: opts.delayMs } : undefined); + return { + totalMs: Date.now() - start, + eventsDispatched: text.length, + charsTyped: text.length, + }; } - // ── Scroll ───────────────────────────────────────────────────── - async scroll( targetId: string, deltaY: number, deltaX?: number, - durationMs?: number, ): Promise<{ totalMs: number; eventsDispatched: number }> { const page = getPageForTarget(targetId); - const steps = calculateScrollSteps({ deltaY, deltaX, durationMs: durationMs ?? 400 }); - - let totalMs = 0; - let eventsDispatched = 0; - - for (const step of steps) { - await sleep(step.delayMs); - totalMs += step.delayMs; - - await page.mouse.wheel(step.deltaX, step.deltaY); - eventsDispatched++; - } - - return { totalMs, eventsDispatched }; + const start = Date.now(); + await page.mouse.wheel(deltaX ?? 0, deltaY); + return { totalMs: Date.now() - start, eventsDispatched: 1 }; } - // ── Idle simulation ──────────────────────────────────────────── - async idle( targetId: string, durationMs: number, @@ -281,15 +174,13 @@ class HumanizerEngine { const scrollChance = intensity === "subtle" ? 0.05 : 0.15; const actionInterval = intensity === "subtle" ? rand(400, 1200) : rand(200, 600); - let elapsed = 0; - while (elapsed < durationMs) { + while (Date.now() - start < durationMs) { const waitMs = Math.min( Math.round(rand(actionInterval * 0.7, actionInterval * 1.3)), - durationMs - elapsed, + durationMs - (Date.now() - start), ); - await sleep(waitMs); - elapsed = Date.now() - start; - if (elapsed >= durationMs) break; + if (waitMs > 0) await sleep(waitMs); + if (Date.now() - start >= durationMs) break; if (Math.random() < scrollChance) { const microDelta = Math.round(rand(-20, 20)); diff --git a/src/humanizer/path.ts b/src/humanizer/path.ts deleted file mode 100644 index 6f539d9..0000000 --- a/src/humanizer/path.ts +++ /dev/null @@ -1,232 +0,0 @@ -/** - * Bezier curve mouse path generation for human-like mouse movement. - * - * Generates curved paths with randomized control points, Fitts's law velocity - * scaling, and optional overshoot+correction for long distances. - */ - -export interface Point { - x: number; - y: number; -} - -export interface PathOptions { - /** Target width in pixels for Fitts's law (default: 20). */ - targetWidth?: number; - /** Base duration in ms before Fitts scaling (default: 600). */ - baseDurationMs?: number; - /** Enable overshoot+correction for distances > 200px (default: true). */ - overshoot?: boolean; - /** Time step in ms between path points (default: 8 ~= 120Hz). */ - stepMs?: number; -} - -export interface PathResult { - points: Point[]; - totalMs: number; - timestamps: number[]; -} - -// ── Math helpers ───────────────────────────────────────────────────── - -function rand(min: number, max: number): number { - return min + Math.random() * (max - min); -} - -function distance(a: Point, b: Point): number { - return Math.hypot(b.x - a.x, b.y - a.y); -} - -function easeInOutCubic(t: number): number { - return t < 0.5 - ? 4 * t * t * t - : 1 - Math.pow(-2 * t + 2, 3) / 2; -} - -/** Cubic Bezier interpolation at parameter t ∈ [0,1]. */ -function cubicBezier(p0: Point, p1: Point, p2: Point, p3: Point, t: number): Point { - const u = 1 - t; - const uu = u * u; - const uuu = uu * u; - const tt = t * t; - const ttt = tt * t; - return { - x: uuu * p0.x + 3 * uu * t * p1.x + 3 * u * tt * p2.x + ttt * p3.x, - y: uuu * p0.y + 3 * uu * t * p1.y + 3 * u * tt * p2.y + ttt * p3.y, - }; -} - -/** - * Build randomized control points for a cubic Bezier between `from` and `to`. - * Both control points are placed on the same side of the direct path to - * produce a natural arc (no S-curves). - */ -function randomControlPoints(from: Point, to: Point): [Point, Point] { - const dx = to.x - from.x; - const dy = to.y - from.y; - const dist = Math.hypot(dx, dy) || 1; - - // Perpendicular direction - const px = -dy / dist; - const py = dx / dist; - - // Same side: both offsets share the same sign - const side = Math.random() < 0.5 ? 1 : -1; - const spread1 = rand(0.15, 0.45) * dist * side; - const spread2 = rand(0.15, 0.45) * dist * side; - - const cp1: Point = { - x: from.x + dx * rand(0.2, 0.4) + px * spread1, - y: from.y + dy * rand(0.2, 0.4) + py * spread1, - }; - - const cp2: Point = { - x: from.x + dx * rand(0.6, 0.8) + px * spread2, - y: from.y + dy * rand(0.6, 0.8) + py * spread2, - }; - - return [cp1, cp2]; -} - -/** - * Fitts's law duration scaling: totalDuration *= log2(distance / targetWidth + 1) - */ -function fittsDuration(dist: number, targetWidth: number, baseDuration: number): number { - if (dist < 1) return baseDuration * 0.1; - return baseDuration * Math.log2(dist / targetWidth + 1); -} - -/** - * Discretize a Bezier curve to integer pixel coordinates with eased timing. - * Deduplicates consecutive identical points. - */ -function discretizePath( - from: Point, - cp1: Point, - cp2: Point, - to: Point, - totalMs: number, - stepMs: number, -): { points: Point[]; timestamps: number[] } { - const steps = Math.max(1, Math.ceil(totalMs / stepMs)); - const points: Point[] = []; - const timestamps: number[] = []; - - let lastX = -Infinity; - let lastY = -Infinity; - - for (let i = 0; i <= steps; i++) { - const linearT = i / steps; - const easedT = easeInOutCubic(linearT); - const pt = cubicBezier(from, cp1, cp2, to, easedT); - - const ix = Math.round(pt.x); - const iy = Math.round(pt.y); - - // Deduplicate consecutive identical points - if (ix === lastX && iy === lastY) continue; - lastX = ix; - lastY = iy; - - points.push({ x: ix, y: iy }); - timestamps.push(Math.round(linearT * totalMs)); - } - - // Ensure final point is exact destination - const last = points[points.length - 1]; - const destX = Math.round(to.x); - const destY = Math.round(to.y); - if (!last || last.x !== destX || last.y !== destY) { - points.push({ x: destX, y: destY }); - timestamps.push(Math.round(totalMs)); - } - - return { points, timestamps }; -} - -// ── Public API ─────────────────────────────────────────────────────── - -/** - * Generate a human-like mouse path from `from` to `to`. - * - * Uses cubic Bezier curves with randomized control points and eased timing. - * For long distances (>200px), adds an overshoot-and-correct sub-path. - */ -export function generatePath(from: Point, to: Point, opts: PathOptions = {}): PathResult { - const targetWidth = opts.targetWidth ?? 20; - const baseDurationMs = opts.baseDurationMs ?? 600; - const enableOvershoot = opts.overshoot ?? true; - const stepMs = opts.stepMs ?? 8; - - const dist = distance(from, to); - - // For very short distances, just return start → end - if (dist < 2) { - return { - points: [{ x: Math.round(from.x), y: Math.round(from.y) }, { x: Math.round(to.x), y: Math.round(to.y) }], - totalMs: Math.round(baseDurationMs * 0.1), - timestamps: [0, Math.round(baseDurationMs * 0.1)], - }; - } - - // Main curve - const totalMs = Math.round(fittsDuration(dist, targetWidth, baseDurationMs)); - const [cp1, cp2] = randomControlPoints(from, to); - - if (!enableOvershoot || dist <= 200) { - const { points, timestamps } = discretizePath(from, cp1, cp2, to, totalMs, stepMs); - return { points, totalMs, timestamps }; - } - - // Overshoot: go past target by 5-15px, then correct - const dx = to.x - from.x; - const dy = to.y - from.y; - const overshootDist = rand(5, 15); - const angle = Math.atan2(dy, dx) + rand(-0.2, 0.2); - const overshootPt: Point = { - x: to.x + Math.cos(angle) * overshootDist, - y: to.y + Math.sin(angle) * overshootDist, - }; - - // Phase 1: from → overshoot (80% of time) - const phase1Ms = Math.round(totalMs * 0.8); - const phase1 = discretizePath(from, cp1, cp2, overshootPt, phase1Ms, stepMs); - - // Phase 2: overshoot → target (20% of time, small correction) - const phase2Ms = totalMs - phase1Ms; - const corrCp1: Point = { - x: overshootPt.x + rand(-2, 2), - y: overshootPt.y + rand(-2, 2), - }; - const corrCp2: Point = { - x: to.x + rand(-1, 1), - y: to.y + rand(-1, 1), - }; - const phase2 = discretizePath(overshootPt, corrCp1, corrCp2, to, phase2Ms, stepMs); - - // Concatenate, adjusting phase2 timestamps - const points = [...phase1.points, ...phase2.points.slice(1)]; - const timestamps = [ - ...phase1.timestamps, - ...phase2.timestamps.slice(1).map((t) => t + phase1Ms), - ]; - - return { points, totalMs, timestamps }; -} - -/** - * Add a random offset to a center point, staying within bounds. - * Used for randomizing click targets within an element's bounding box. - */ -export function addRandomOffset( - center: Point, - bounds: { width: number; height: number }, -): Point { - // Stay within inner 60% of the element - const rx = rand(-0.3, 0.3) * bounds.width; - const ry = rand(-0.3, 0.3) * bounds.height; - return { - x: Math.round(center.x + rx), - y: Math.round(center.y + ry), - }; -} diff --git a/src/humanizer/timing.ts b/src/humanizer/timing.ts deleted file mode 100644 index a87c64c..0000000 --- a/src/humanizer/timing.ts +++ /dev/null @@ -1,231 +0,0 @@ -/** - * Typing & scroll timing models for human-like input simulation. - * - * Provides keystroke delay calculation with bigram frequency modifiers, - * typo simulation with QWERTY neighbor mapping, and scroll velocity - * distribution with easeInOutQuad profiles. - */ - -// ── Typing models ──────────────────────────────────────────────────── - -export interface TypingProfile { - /** Words per minute (default: 40). */ - wpm?: number; - /** Probability of a typo per character, 0-1 (default: 0). */ - errorRate?: number; -} - -export interface KeyDelay { - /** The key to press (single char or special like "Backspace"). */ - key: string; - /** Delay in ms before pressing this key. */ - delayMs: number; -} - -/** Top 30 English bigrams — typed faster due to muscle memory. */ -const FAST_BIGRAMS = new Set([ - "th", "he", "in", "er", "an", "re", "on", "at", "en", "nd", - "ti", "es", "or", "te", "of", "ed", "is", "it", "al", "ar", - "st", "to", "nt", "ng", "se", "ha", "as", "ou", "io", "le", -]); - -/** QWERTY neighbor-key map for realistic typo targets. */ -const QWERTY_NEIGHBORS: Record = { - q: ["w", "a"], - w: ["q", "e", "a", "s"], - e: ["w", "r", "s", "d"], - r: ["e", "t", "d", "f"], - t: ["r", "y", "f", "g"], - y: ["t", "u", "g", "h"], - u: ["y", "i", "h", "j"], - i: ["u", "o", "j", "k"], - o: ["i", "p", "k", "l"], - p: ["o", "l"], - a: ["q", "w", "s", "z"], - s: ["a", "w", "e", "d", "z", "x"], - d: ["s", "e", "r", "f", "x", "c"], - f: ["d", "r", "t", "g", "c", "v"], - g: ["f", "t", "y", "h", "v", "b"], - h: ["g", "y", "u", "j", "b", "n"], - j: ["h", "u", "i", "k", "n", "m"], - k: ["j", "i", "o", "l", "m"], - l: ["k", "o", "p"], - z: ["a", "s", "x"], - x: ["z", "s", "d", "c"], - c: ["x", "d", "f", "v"], - v: ["c", "f", "g", "b"], - b: ["v", "g", "h", "n"], - n: ["b", "h", "j", "m"], - m: ["n", "j", "k"], -}; - -function rand(min: number, max: number): number { - return min + Math.random() * (max - min); -} - -function jitter(base: number): number { - return base * rand(0.85, 1.15); -} - -function pickRandom(arr: T[]): T { - return arr[Math.floor(Math.random() * arr.length)]; -} - -function isUpperCase(ch: string): boolean { - return ch !== ch.toLowerCase() && ch === ch.toUpperCase(); -} - -/** - * Calculate keystroke delays for a text string with human-like timing. - * - * Models: - * - Base delay from WPM (assuming 5 chars per word) - * - Bigram frequency modifier: common bigrams → 0.8x delay - * - Shift penalty: uppercase letters → +50ms - * - Word boundary pause: spaces → +20-60ms - * - Random jitter: ±15% on each delay - * - Optional typo injection with backspace correction - */ -export function calculateKeyDelays(text: string, profile: TypingProfile = {}): KeyDelay[] { - const wpm = profile.wpm ?? 40; - const errorRate = Math.max(0, Math.min(1, profile.errorRate ?? 0)); - - // Base delay: WPM → ms per character (5 chars per word) - const baseDelayMs = (60_000 / wpm) / 5; - - const result: KeyDelay[] = []; - let prevChar = ""; - - for (let i = 0; i < text.length; i++) { - const ch = text[i]; - let delay = baseDelayMs; - - // Bigram modifier - const bigram = (prevChar + ch).toLowerCase(); - if (bigram.length === 2 && FAST_BIGRAMS.has(bigram)) { - delay *= 0.8; - } - - // Shift penalty for uppercase - if (isUpperCase(ch)) { - delay += 50; - } - - // Word boundary pause - if (ch === " ") { - delay += rand(20, 60); - } - - // Apply jitter - delay = jitter(delay); - - // Typo simulation - if (errorRate > 0 && Math.random() < errorRate) { - const lower = ch.toLowerCase(); - const neighbors = QWERTY_NEIGHBORS[lower]; - if (neighbors && neighbors.length > 0) { - // Type wrong key - let wrongKey = pickRandom(neighbors); - if (isUpperCase(ch)) wrongKey = wrongKey.toUpperCase(); - result.push({ key: wrongKey, delayMs: Math.round(delay) }); - - // Pause before noticing the error - result.push({ key: "Backspace", delayMs: Math.round(rand(80, 200)) }); - - // Retype correct key with slight hesitation - result.push({ key: ch, delayMs: Math.round(rand(50, 120)) }); - - prevChar = ch; - continue; - } - } - - result.push({ key: ch, delayMs: Math.round(delay) }); - prevChar = ch; - } - - return result; -} - -// ── Scroll models ──────────────────────────────────────────────────── - -export interface ScrollOptions { - /** Total vertical scroll delta in pixels. */ - deltaY: number; - /** Total horizontal scroll delta in pixels (default: 0). */ - deltaX?: number; - /** Total scroll duration in ms (default: 400). */ - durationMs?: number; - /** Time step in ms between scroll events (default: 16 ~= 60Hz). */ - stepMs?: number; -} - -export interface ScrollStep { - /** Vertical delta for this step. */ - deltaY: number; - /** Horizontal delta for this step. */ - deltaX: number; - /** Delay in ms before dispatching this step. */ - delayMs: number; -} - -function easeInOutQuad(t: number): number { - return t < 0.5 - ? 2 * t * t - : 1 - Math.pow(-2 * t + 2, 2) / 2; -} - -/** - * Calculate scroll steps with natural acceleration/deceleration. - * - * Uses easeInOutQuad velocity distribution across steps. - * Rounds deltas to integers with sum correction to ensure - * total scroll matches the requested amount exactly. - */ -export function calculateScrollSteps(opts: ScrollOptions): ScrollStep[] { - const totalDy = opts.deltaY; - const totalDx = opts.deltaX ?? 0; - const durationMs = opts.durationMs ?? 400; - const stepMs = opts.stepMs ?? 16; - - const steps = Math.max(1, Math.ceil(durationMs / stepMs)); - - // Calculate velocity weights via eased positions - const easedPositions: number[] = []; - for (let i = 0; i <= steps; i++) { - easedPositions.push(easeInOutQuad(i / steps)); - } - - // Deltas between consecutive eased positions (velocity-proportional) - const rawWeights: number[] = []; - for (let i = 0; i < steps; i++) { - rawWeights.push(easedPositions[i + 1] - easedPositions[i]); - } - - // Distribute total delta proportionally with integer rounding - const result: ScrollStep[] = []; - let accDy = 0; - let accDx = 0; - const delayPerStep = Math.round(durationMs / steps); - - for (let i = 0; i < steps; i++) { - const targetDy = Math.round(totalDy * easedPositions[i + 1]); - const targetDx = Math.round(totalDx * easedPositions[i + 1]); - - const dy = targetDy - accDy; - const dx = targetDx - accDx; - accDy = targetDy; - accDx = targetDx; - - // Skip zero-delta steps (except first and last) - if (dy === 0 && dx === 0 && i > 0 && i < steps - 1) continue; - - result.push({ - deltaY: dy, - deltaX: dx, - delayMs: delayPerStep, - }); - } - - return result; -} diff --git a/src/tools/humanizer.ts b/src/tools/humanizer.ts index 7c97fee..d89eb9f 100644 --- a/src/tools/humanizer.ts +++ b/src/tools/humanizer.ts @@ -1,9 +1,9 @@ /** - * Humanizer MCP tools — human-like browser input via Playwright. + * Humanizer MCP tools — thin wrappers over cloakbrowser-patched Playwright. * - * Bound to a browser interceptor target_id (from interceptor_browser_launch). - * humanizer_click supports locator-first targeting (selector | role+name | - * text | label) so callers no longer need to guess pixel coordinates. + * cloakbrowser's `humanize: true` (on by default) already provides Bezier + * mouse paths, realistic typing with CDP-trusted Shift handling, and smooth + * scrolling. These tools just expose the patched methods to MCP callers. */ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; @@ -21,18 +21,16 @@ export function registerHumanizerTools(server: McpServer): void { server.tool( "humanizer_move", - "Move mouse along a human-like Bezier curve to target coordinates. " + - "Uses Fitts's law velocity scaling and eased timing profile.", + "Move mouse to target coordinates. cloakbrowser's humanize patches " + + "page.mouse.move with a Bezier-curved path.", { target_id: z.string().describe("Browser target ID from interceptor_browser_launch"), x: z.number().describe("Destination X coordinate"), y: z.number().describe("Destination Y coordinate"), - duration_ms: z.number().optional().default(600) - .describe("Base duration in ms before Fitts scaling (default: 600)"), }, - async ({ target_id, x, y, duration_ms }) => { + async ({ target_id, x, y }) => { try { - const result = await humanizerEngine.moveMouse(target_id, x, y, duration_ms); + const result = await humanizerEngine.moveMouse(target_id, x, y); return { content: [{ type: "text", @@ -55,9 +53,9 @@ export function registerHumanizerTools(server: McpServer): void { server.tool( "humanizer_click", - "Click an element using Playwright locators — no need to guess pixel coordinates. " + - "Auto-waits for visible + enabled + stable + in-view before clicking. Pass one of: " + - "selector (CSS/XPath), role + optional name, text, label, or raw x+y coords as fallback.", + "Click an element. Pass one of: selector (CSS/XPath), role + optional name, " + + "text, label, or raw x+y coords as fallback. cloakbrowser's humanize handles " + + "the Bezier path and click timing; locator-based calls auto-wait for visible.", { target_id: z.string().describe("Browser target ID from interceptor_browser_launch"), selector: z.string().optional().describe("CSS or XPath selector (e.g. 'button.submit', '//button[@id=\"go\"]')"), @@ -71,10 +69,10 @@ export function registerHumanizerTools(server: McpServer): void { .describe("Mouse button (default: left)"), click_count: z.number().optional().default(1) .describe("Number of clicks (default: 1, use 2 for double-click)"), - move_duration_ms: z.number().optional().default(600) - .describe("Base duration for mouse movement (default: 600)"), + timeout_ms: z.number().optional().default(15000) + .describe("Max ms to wait for locator to be visible + actionable (default: 15000)"), }, - async ({ target_id, selector, role, name, text, label, x, y, button, click_count, move_duration_ms }) => { + async ({ target_id, selector, role, name, text, label, x, y, button, click_count, timeout_ms }) => { try { const result = await humanizerEngine.click(target_id, { selector, @@ -86,7 +84,7 @@ export function registerHumanizerTools(server: McpServer): void { y, button, clickCount: click_count, - moveDurationMs: move_duration_ms, + timeoutMs: timeout_ms, }); return { content: [{ @@ -113,20 +111,18 @@ export function registerHumanizerTools(server: McpServer): void { server.tool( "humanizer_type", - "Type text with human-like keystroke timing. " + - "Models per-character delays based on WPM, bigram frequency, shift penalty, " + - "word boundary pauses, and optional typo injection with backspace correction.", + "Type text into the focused element. cloakbrowser's humanize patches " + + "page.keyboard.type with realistic per-char timing and CDP-trusted Shift " + + "handling (uppercase + symbols preserved).", { target_id: z.string().describe("Browser target ID from interceptor_browser_launch"), text: z.string().describe("Text to type"), - wpm: z.number().optional().default(40) - .describe("Typing speed in words per minute (default: 40)"), - error_rate: z.number().optional().default(0) - .describe("Typo probability per character, 0-1 (default: 0)"), + delay_ms: z.number().optional() + .describe("Extra delay per character in ms. Omit to let cloakbrowser pick its own humanized cadence."), }, - async ({ target_id, text, wpm, error_rate }) => { + async ({ target_id, text, delay_ms }) => { try { - const result = await humanizerEngine.typeText(target_id, text, { wpm, errorRate: error_rate }); + const result = await humanizerEngine.typeText(target_id, text, { delayMs: delay_ms }); return { content: [{ type: "text", @@ -139,9 +135,6 @@ export function registerHumanizerTools(server: McpServer): void { total_ms: result.totalMs, events_dispatched: result.eventsDispatched, chars_typed: result.charsTyped, - effective_wpm: text.length > 0 - ? Math.round((text.length / 5) / (result.totalMs / 60_000)) - : 0, }, }), }], @@ -156,19 +149,16 @@ export function registerHumanizerTools(server: McpServer): void { server.tool( "humanizer_scroll", - "Scroll with natural acceleration/deceleration using easeInOutQuad velocity distribution. " + - "Dispatches multiple wheel events to simulate human scroll behavior.", + "Dispatch a wheel event. Raw page.mouse.wheel — single event, not multi-step.", { target_id: z.string().describe("Browser target ID from interceptor_browser_launch"), delta_y: z.number().describe("Vertical scroll delta in pixels (positive = scroll down)"), delta_x: z.number().optional().default(0) .describe("Horizontal scroll delta in pixels (default: 0)"), - duration_ms: z.number().optional().default(400) - .describe("Total scroll duration in ms (default: 400)"), }, - async ({ target_id, delta_y, delta_x, duration_ms }) => { + async ({ target_id, delta_y, delta_x }) => { try { - const result = await humanizerEngine.scroll(target_id, delta_y, delta_x, duration_ms); + const result = await humanizerEngine.scroll(target_id, delta_y, delta_x); return { content: [{ type: "text",