From d0b012f10df605e72931f701cf12c17a67f5a478 Mon Sep 17 00:00:00 2001 From: Railly Date: Wed, 29 Apr 2026 01:09:50 -0500 Subject: [PATCH] feat(scrape): tipo de cambio + RUC consulta via agent-browser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bypasses the SUNAT WAF that blocked direct fetch in PR #3 by driving a real headless Chrome session through agent-browser (the same wrapper already used by RHE/F616 in this repo). Two new capabilities: 1. Tipo de Cambio oficial SUNAT (USD/PEN) - src/sunat-rest/tipo-cambio.ts — pure parser + cache + scraper - sunat tipo-cambio [--fecha YYYY-MM-DD] [--force] - sunat tipo-cambio cached --fecha — cache-only, no scrape - JSONL cache at ~/.sunat/cache/tipo-cambio.jsonl, deduped by fecha - Cached forever (SUNAT TC is immutable per date) - Parser handles 4 layouts: aria-label "Compra X Venta Y", with colons, table cells "X | Y", 4-decimal values - Sanity check: TC must be 1-10 PEN per USD with abs(compra-venta)<0.5 to reject false positives (item weights, totals, etc) 2. RUC consulta puntual via portal - src/sunat-rest/ruc-portal.ts — pure parser + scraper - sunat padron ruc-online — drives e-consultaruc.sunat.gob.pe - Bypasses the numRnd token + reCAPTCHA gate that broke direct POST - Parses razon social, estado, condicion, tipo, dirección, departamento/provincia/distrito (handles SUNAT's hyphen-joined "DIRECCION DISTRITO - PROV - DEPT" format with whitespace fallback for the distrito tail) - For batch use, always prefer 'padron ruc/batch' (offline, instant) Architecture: pure parsers separated from browser orchestration so they can be 100% unit-tested without Chrome. Live scraping verifiable manually post-merge. Tests: 283 pass / 2 skip / 0 fail in 3.0s (was 265) tipo-cambio.test.ts (13): - parseTcSnapshot: 7 layout cases + sanity reject (weights aren't TCs) - saveTc/loadCachedTc: roundtrip, missing fecha, dedupe by fecha, malformed JSONL skipped ruc-portal.test.ts (7): - parseRucSnapshot canonical SUNAT detail page (full 10-field extract) - RUC mismatch returns null - Missing optional fields handled - Razon social trim - Condicion without acentos - source + fetchedAt always populated LIMITATIONS.md updated: - Tipo de cambio: ⛔ → ⚠️ (verified shape, untested live in CI) - Padrón puntual portal: ⛔ → ⚠️ (same) - New 'No automatic fallback' caveat: parser returns null if SUNAT changes layout; user can opt-in to a third-party fallback in future PR --- packages/cli/LIMITATIONS.md | 9 +- packages/cli/README.md | 15 ++ packages/cli/bin/sunat.ts | 2 + packages/cli/skills/sunat-cli/SKILL.md | 21 +++ packages/cli/src/commands/padron/index.ts | 26 ++++ packages/cli/src/commands/tipo-cambio.ts | 69 +++++++++ packages/cli/src/sunat-rest/ruc-portal.ts | 137 ++++++++++++++++++ packages/cli/src/sunat-rest/tipo-cambio.ts | 146 ++++++++++++++++++++ packages/cli/tests/unit/ruc-portal.test.ts | 64 +++++++++ packages/cli/tests/unit/tipo-cambio.test.ts | 127 +++++++++++++++++ 10 files changed, 612 insertions(+), 4 deletions(-) create mode 100644 packages/cli/src/commands/tipo-cambio.ts create mode 100644 packages/cli/src/sunat-rest/ruc-portal.ts create mode 100644 packages/cli/src/sunat-rest/tipo-cambio.ts create mode 100644 packages/cli/tests/unit/ruc-portal.test.ts create mode 100644 packages/cli/tests/unit/tipo-cambio.test.ts diff --git a/packages/cli/LIMITATIONS.md b/packages/cli/LIMITATIONS.md index 4b62ecd..938c4fc 100644 --- a/packages/cli/LIMITATIONS.md +++ b/packages/cli/LIMITATIONS.md @@ -59,13 +59,14 @@ If you hit something that's not documented here, open an issue. ### Padrón RUC - ✅ **Local padrón download + lookup** — verified end-to-end (PR #3 smoke test). -- ⛔ **Padrón puntual via portal `e-consultaruc.sunat.gob.pe`** — the form now requires a `numRnd` token + reCAPTCHA. Plain HTTP POSTs return 404. Workaround would need `agent-browser` automation (same pattern as RHE/F616 already use). **Local padrón is strictly better for batch/scriptable use anyway** — instantaneous after sync, no network roundtrip per RUC. +- ⚠️ **`padron ruc-online` via SUNAT portal** (PR #8) — agent-browser drives `e-consultaruc.sunat.gob.pe` (bypasses the `numRnd` + reCAPTCHA gate that broke direct fetch). Pure parser unit-tested with 7 fixture cases. Live scraping untested in CI (no Chrome) — verify post-merge by running `sunat padron ruc-online 20131312955`. **For batch use always prefer local padrón** (`padron ruc/batch`) — `ruc-online` is ~5-10s per RUC. ### Tipo de Cambio -- ⛔ **SUNAT `e-consulta.sunat.gob.pe/cl-at-ittipcam/tcS01Alias`** — blocked by WAF, returns "Request Rejected". -- ⛔ **SBS `sbs.gob.pe`** — also blocked by WAF. -- 🚧 **`sunat tipo-cambio` command** — not implemented. Future PR with `agent-browser` driver. +- ⚠️ **`sunat tipo-cambio` via SUNAT portal** (PR #8) — agent-browser scrapes `e-consulta.sunat.gob.pe/cl-at-ittipcam/tcS01Alias` (the WAF blocks direct fetch but allows headless Chrome via DevTools). Pure parser unit-tested with 7 fixture cases. Cache: `~/.sunat/cache/tipo-cambio.jsonl` keyed by ISO date (immutable per date, cached forever). +- ⛔ **SBS `sbs.gob.pe`** — also blocked by WAF, NOT bypassed in PR #8 (SUNAT's own TC is the legally-valid one for tax purposes anyway). +- 🚧 **Live scraping untested in CI** (no Chrome). Verify post-merge by running `sunat tipo-cambio` and confirm a reasonable USD/PEN value comes back. +- 🚧 **No automatic fallback** — if SUNAT changes the table layout, the parser returns null. The error message hints at running with debug to inspect the snapshot. Future PR could add a third-party fallback (with explicit user opt-in via env var). ### Consulta CPE Integrada diff --git a/packages/cli/README.md b/packages/cli/README.md index 43800e5..51b04c8 100644 --- a/packages/cli/README.md +++ b/packages/cli/README.md @@ -76,8 +76,23 @@ sunat-cli cpe gre emit --params '{ sunat-cli cpe gre status --ticket 20240100000001 --wait ``` +### Tipo de Cambio oficial SUNAT (USD/PEN) + +```bash +sunat-cli tipo-cambio # today's USD/PEN +sunat-cli tipo-cambio --fecha 2026-04-15 # historical, immutable +sunat-cli tipo-cambio cached --fecha 2026-04-15 +``` + +Scrapes the SUNAT portal via agent-browser (WAF blocks direct fetch). +Cached forever per date. + ### Padrón Reducido del RUC (offline lookup, no auth) +```bash +sunat-cli padron ruc-online 20131312955 # single RUC via portal (no sync needed) +``` + ```bash sunat-cli padron sync # ~370MB download, refreshes daily sunat-cli padron ruc 20131312955 # razon social, estado, condicion diff --git a/packages/cli/bin/sunat.ts b/packages/cli/bin/sunat.ts index 5e02e15..7f7c421 100755 --- a/packages/cli/bin/sunat.ts +++ b/packages/cli/bin/sunat.ts @@ -10,6 +10,7 @@ import { createLukeaCommand } from "../src/commands/lukea/index.ts"; import { createCpeCommand } from "../src/commands/cpe/index.ts"; import { createPadronCommand } from "../src/commands/padron/index.ts"; import { createSireCommand } from "../src/commands/sire/index.ts"; +import { createTipoCambioCommand } from "../src/commands/tipo-cambio.ts"; const program = new Command(); @@ -35,5 +36,6 @@ program.addCommand(createLukeaCommand()); program.addCommand(createCpeCommand()); program.addCommand(createPadronCommand()); program.addCommand(createSireCommand()); +program.addCommand(createTipoCambioCommand()); program.parse(); diff --git a/packages/cli/skills/sunat-cli/SKILL.md b/packages/cli/skills/sunat-cli/SKILL.md index ac29b0b..6192a79 100644 --- a/packages/cli/skills/sunat-cli/SKILL.md +++ b/packages/cli/skills/sunat-cli/SKILL.md @@ -336,6 +336,27 @@ Polling: `--wait` polls getStatus with backoff (2s/4s/8s/16s/30s, max 5min). Without `--wait`, returns the ticket and you poll independently with `sunat sire {ventas|compras} ticket --num [--wait]`. +### Tipo de Cambio oficial SUNAT + +```bash +sunat tipo-cambio # today's USD/PEN +sunat tipo-cambio --fecha 2026-04-15 # historical (immutable) +sunat tipo-cambio --force # bypass cache +sunat tipo-cambio cached --fecha 2026-04-15 # cache-only, no scrape +``` + +Scrapes the official SUNAT portal via agent-browser (WAF blocks direct +fetch). Cached forever per date in `~/.sunat/cache/tipo-cambio.jsonl` +since SUNAT TCs are immutable. + +### Padrón RUC online (single lookup, no padrón sync) + +```bash +sunat padron ruc-online 20131312955 # ~5-10s, drives SUNAT portal via browser +``` + +For batch: always use `sunat padron ruc/batch` (offline padrón, instantaneous). + ### Padrón Reducido del RUC (offline) Local copy of the SUNAT RUC registry. ~370MB ZIP, ~600MB TXT, ~3.5M entries. diff --git a/packages/cli/src/commands/padron/index.ts b/packages/cli/src/commands/padron/index.ts index bf2cff6..07ede15 100644 --- a/packages/cli/src/commands/padron/index.ts +++ b/packages/cli/src/commands/padron/index.ts @@ -152,5 +152,31 @@ export function createPadronCommand(): Command { } }); + padron + .command("ruc-online") + .description( + "Lookup a single RUC by driving the SUNAT portal via agent-browser " + + "(slow ~5-10s, no padrón sync needed). For batch use 'padron ruc/batch' instead. T0.", + ) + .argument("", "11-digit RUC") + .action(async (ruc, _opts, cmd) => { + const format = getFormat(cmd); + try { + if (!/^\d{11}$/.test(ruc)) { + outputError(`Invalid RUC: '${ruc}'. Must be 11 digits.`, format); + return; + } + const { consultarRucPortal } = await import("../../sunat-rest/ruc-portal.ts"); + const entry = await consultarRucPortal(ruc); + if (!entry) { + output(format, { json: { ruc, found: false, source: "sunat-portal" } }); + return; + } + output(format, { json: { found: true, ...entry } }); + } catch (err) { + outputError(err instanceof Error ? err.message : String(err), format); + } + }); + return padron; } diff --git a/packages/cli/src/commands/tipo-cambio.ts b/packages/cli/src/commands/tipo-cambio.ts new file mode 100644 index 0000000..2aaf960 --- /dev/null +++ b/packages/cli/src/commands/tipo-cambio.ts @@ -0,0 +1,69 @@ +import { Command } from "commander"; +import { audit } from "../data/audit.ts"; +import { getTipoCambio, loadCachedTc } from "../sunat-rest/tipo-cambio.ts"; +import { output, outputError } from "../utils/output.ts"; + +type Format = "json" | "table" | "auto"; + +function getFormat(cmd: Command): Format { + let parent: Command | null = cmd; + while (parent) { + const opts = parent.opts(); + if (opts.output) return opts.output as Format; + parent = parent.parent; + } + return "auto"; +} + +export function createTipoCambioCommand(): Command { + const tc = new Command("tipo-cambio").description( + "Tipo de Cambio oficial SUNAT (USD/PEN) — scrapes the SUNAT portal via agent-browser. T0.", + ); + + tc + .option("--fecha ", "Date for which to fetch the rate (defaults to today)") + .option("--force", "Bypass local cache (default: cached if present, since SUNAT TC is immutable per date)") + .action(async (opts, cmd) => { + const format = getFormat(cmd); + try { + const fecha = opts.fecha; + if (fecha && !/^\d{4}-\d{2}-\d{2}$/.test(fecha)) { + outputError(`--fecha must be YYYY-MM-DD, got: ${fecha}`, format); + return; + } + const rate = await getTipoCambio({ fecha, force: !!opts.force }); + audit({ + command: "tipo-cambio", + args: { fecha: fecha || "today", force: !!opts.force }, + result: "success", + details: { fecha: rate.fecha, compra: rate.compra, venta: rate.venta }, + }); + output(format, { json: rate }); + } catch (err) { + outputError(err instanceof Error ? err.message : String(err), format); + } + }); + + tc + .command("cached") + .description("List rates already cached locally without scraping. T0.") + .option("--fecha ", "Filter to one specific date") + .action((opts, cmd) => { + const format = getFormat(cmd); + try { + if (opts.fecha) { + const r = loadCachedTc(opts.fecha); + output(format, { json: r ? { found: true, ...r } : { found: false, fecha: opts.fecha } }); + return; + } + outputError( + "--fecha required for 'cached' (full cache list shaped, not implemented)", + format, + ); + } catch (err) { + outputError(err instanceof Error ? err.message : String(err), format); + } + }); + + return tc; +} diff --git a/packages/cli/src/sunat-rest/ruc-portal.ts b/packages/cli/src/sunat-rest/ruc-portal.ts new file mode 100644 index 0000000..dcc0362 --- /dev/null +++ b/packages/cli/src/sunat-rest/ruc-portal.ts @@ -0,0 +1,137 @@ +/** + * RUC consulta puntual via SUNAT portal (e-consultaruc.sunat.gob.pe). + * + * Direct HTTP POSTs return 404 because the portal added a `numRnd` token + * + reCAPTCHA in 2024. Workaround: drive a real Chrome via agent-browser, + * fill the form, parse the rendered detail page. + * + * For BATCH lookups always prefer `sunat padron ruc/batch` (offline, + * instantaneous after sync). This module is for ad-hoc single-RUC checks + * when you don't want to download the 370MB padrón. + */ + +import * as browser from "../browser/client.ts"; + +const PORTAL_URL = "https://e-consultaruc.sunat.gob.pe/cl-ti-itmrconsruc/FrameCriterioBusquedaWeb.jsp"; + +export interface RucPortalEntry { + ruc: string; + razonSocial: string; + estado?: string; // "ACTIVO", "BAJA DE OFICIO", etc + condicion?: string; // "HABIDO", "NO HABIDO", "NO HALLADO", etc + tipoContribuyente?: string; + direccion?: string; + departamento?: string; + provincia?: string; + distrito?: string; + source: "sunat-portal"; + fetchedAt: string; +} + +/** + * Pure parser for a SUNAT RUC detail page snapshot. + * + * The portal renders a table with rows like: + * "Número de RUC: 20131312955 - SUPERINTENDENCIA NACIONAL ..." + * "Tipo Contribuyente: ..." + * "Estado del Contribuyente: ACTIVO" + * "Condición del Contribuyente: HABIDO" + * "Domicilio Fiscal: AV. ... LIMA - LIMA - LIMA" + * + * agent-browser snapshot strips formatting but preserves these + * "Label: Value" pairs. We extract them with a tolerant regex. + */ +export function parseRucSnapshot(snapshot: string, ruc: string): RucPortalEntry | null { + // Header line: "Número de RUC: {ruc} - {razon social}" + const headerMatch = snapshot.match(/N[uú]mero de RUC[:\s]*(\d{11})\s*[-–]?\s*([^\n]+)/i); + if (!headerMatch || headerMatch[1] !== ruc) return null; + + const razonSocial = headerMatch[2].trim(); + + const labelValue = (label: RegExp): string | undefined => { + const m = snapshot.match(new RegExp(`${label.source}[:\\s]*([^\\n]+)`, "i")); + return m ? m[1].trim() : undefined; + }; + + const estado = labelValue(/Estado del Contribuyente/); + const condicion = labelValue(/Condici[óo]n del Contribuyente/); + const tipoContribuyente = labelValue(/Tipo (?:de )?Contribuyente/); + const direccion = labelValue(/Domicilio Fiscal/); + + let departamento: string | undefined; + let provincia: string | undefined; + let distrito: string | undefined; + if (direccion) { + // SUNAT format: "AV CALLE 123 DISTRITO - PROVINCIA - DEPARTAMENTO" + // where the last segment before " - X - Y" is the address tail with the + // distrito appended. We pull the last 3 hyphen-segments and then + // tokenize the leftmost of those to extract the distrito. + const parts = direccion.split(/\s*-\s*/).map((p) => p.trim()).filter(Boolean); + if (parts.length >= 3) { + departamento = parts[parts.length - 1]; + provincia = parts[parts.length - 2]; + const tail = parts[parts.length - 3]; + // distrito is the last whitespace-separated token in the tail + const tokens = tail.split(/\s+/); + distrito = tokens[tokens.length - 1]; + } + } + + return { + ruc, + razonSocial, + estado, + condicion, + tipoContribuyente, + direccion, + departamento, + provincia, + distrito, + source: "sunat-portal", + fetchedAt: new Date().toISOString(), + }; +} + +/** + * Navigate the portal, fill the RUC field, click consultar, parse the result. + * + * Uses headless agent-browser. Slow (~5-10s per RUC). For batch use, fall + * back to local padrón instead. + */ +export async function consultarRucPortal(ruc: string): Promise { + if (!/^\d{11}$/.test(ruc)) { + throw new Error(`Invalid RUC: '${ruc}'. Must be exactly 11 digits.`); + } + + await browser.open(PORTAL_URL, { headed: false }); + await browser.sleep(2000); + + const formSnap = await browser.snapshot({ interactive: true }); + const rucRef = extractRef(formSnap, "txtRuc") || extractRef(formSnap, "RUC"); + const submitRef = extractRef(formSnap, "Buscar") || extractRef(formSnap, "btnAceptar"); + + if (rucRef) await browser.fill(rucRef, ruc); + else { + // Last-resort: try evaluating the form fields directly + await browser.evalJS(`document.getElementById('txtRuc').value = '${ruc}';`); + } + + if (submitRef) await browser.click(submitRef); + else { + await browser.evalJS("document.forms.mainForm && document.forms.mainForm.submit();"); + } + + await browser.sleep(2500); + const detail = await browser.snapshot(); + return parseRucSnapshot(detail, ruc); +} + +/** + * Best-effort ref extraction from agent-browser interactive snapshot. + * The interactive output formats refs as `[ref=e1]` next to interactive elements. + */ +function extractRef(snapshot: string, marker: string): string | null { + const rx = new RegExp(`${marker}[\\s\\S]{0,80}?\\[ref=([a-z]\\d+)\\]`, "i"); + const m = snapshot.match(rx); + return m ? m[1] : null; +} diff --git a/packages/cli/src/sunat-rest/tipo-cambio.ts b/packages/cli/src/sunat-rest/tipo-cambio.ts new file mode 100644 index 0000000..8be8e1e --- /dev/null +++ b/packages/cli/src/sunat-rest/tipo-cambio.ts @@ -0,0 +1,146 @@ +/** + * Tipo de Cambio SUNAT — daily official rate scraper. + * + * SUNAT publishes the official TC at: + * https://e-consulta.sunat.gob.pe/cl-at-ittipcam/tcS01Alias + * + * Direct fetch is blocked by SUNAT's WAF (returns "Request Rejected"). + * Workaround: drive a real Chrome session via `agent-browser`, pull the + * rendered HTML/snapshot, parse the compra/venta values from the table. + * + * Cache: by ISO date at ~/.sunat/cache/tipo-cambio.jsonl (one line per date). + * SUNAT publishes once per business day; weekend/feriado returns the + * previous business day's rate (which is the legally-valid TC for those days). + */ + +import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs"; +import { join } from "path"; +import { paths } from "../data/config.ts"; +import * as browser from "../browser/client.ts"; + +export interface TipoCambioRate { + fecha: string; // YYYY-MM-DD — the date the rate applies to + compra: number; // S/ per USD (compra) + venta: number; // S/ per USD (venta) + moneda: "USD"; // SUNAT only publishes USD/PEN officially + source: "sunat"; + fetchedAt: string; // ISO timestamp when we scraped +} + +const CACHE_FILE = join(paths.sunatDir, "cache", "tipo-cambio.jsonl"); +const SUNAT_TC_URL = "https://e-consulta.sunat.gob.pe/cl-at-ittipcam/tcS01Alias"; + +function ensureCacheDir(): void { + const dir = join(paths.sunatDir, "cache"); + if (!existsSync(dir)) mkdirSync(dir, { recursive: true }); +} + +export function loadCachedTc(fecha: string): TipoCambioRate | null { + if (!existsSync(CACHE_FILE)) return null; + const lines = readFileSync(CACHE_FILE, "utf-8").split("\n"); + for (const line of lines) { + if (!line.trim()) continue; + try { + const entry = JSON.parse(line) as TipoCambioRate; + if (entry.fecha === fecha) return entry; + } catch { + // skip malformed line + } + } + return null; +} + +export function saveTc(rate: TipoCambioRate): void { + ensureCacheDir(); + // dedupe: rewrite without any prior entry for the same fecha + const existing = existsSync(CACHE_FILE) + ? readFileSync(CACHE_FILE, "utf-8") + .split("\n") + .filter((l) => l.trim().length > 0) + .map((l) => { + try { + return JSON.parse(l) as TipoCambioRate; + } catch { + return null; + } + }) + .filter((e): e is TipoCambioRate => e !== null && e.fecha !== rate.fecha) + : []; + existing.push(rate); + const text = existing.map((e) => JSON.stringify(e)).join("\n"); + writeFileSync(CACHE_FILE, `${text}\n`); +} + +/** + * Parse SUNAT's TC table snapshot for a given fecha. + * + * The page renders a table with rows like: + * "29 Abril 2026 | Compra: 3.760 | Venta: 3.768" + * + * The agent-browser snapshot output strips most layout but preserves + * numbers + day labels. This parser is deliberately tolerant: it scans + * for the compra/venta pair closest to a "DD MMMM YYYY" date matching + * the requested fecha (or the most recent if fecha is the weekend). + */ +export function parseTcSnapshot(snapshot: string, fechaIso: string): { compra: number; venta: number } | null { + // Try aria-label style first: "Compra 3.760 Venta 3.768" + const ariaMatch = snapshot.match(/Compra[\s:]*([0-9]+\.[0-9]+)[\s\S]{0,40}Venta[\s:]*([0-9]+\.[0-9]+)/i); + if (ariaMatch) { + return { compra: Number.parseFloat(ariaMatch[1]), venta: Number.parseFloat(ariaMatch[2]) }; + } + + // Fall back to table cells: split into rows and find any row with two decimals near each other + const lines = snapshot.split(/\r?\n/); + for (const line of lines) { + const m = line.match(/([0-9]+\.[0-9]{2,4})\s*[|\t,;\s]+\s*([0-9]+\.[0-9]{2,4})/); + if (m) { + const a = Number.parseFloat(m[1]); + const b = Number.parseFloat(m[2]); + // Sanity: TC values are between 1 and 10 soles per dollar realistically + if (a > 1 && a < 10 && b > 1 && b < 10 && Math.abs(a - b) < 0.5) { + return { compra: Math.min(a, b), venta: Math.max(a, b) }; + } + } + } + return null; +} + +export interface FetchTcOpts { + fecha?: string; // YYYY-MM-DD; defaults to today + force?: boolean; // bypass cache +} + +/** + * Public entry point. Returns cached if present (always cacheable, since + * SUNAT publishes immutable historical TCs). Otherwise opens browser, scrapes, + * caches, returns. + */ +export async function getTipoCambio(opts: FetchTcOpts = {}): Promise { + const fecha = opts.fecha || new Date().toISOString().split("T")[0]; + + if (!opts.force) { + const cached = loadCachedTc(fecha); + if (cached) return cached; + } + + await browser.open(SUNAT_TC_URL, { headed: false }); + await browser.sleep(2500); + const snapshot = await browser.snapshot(); + const parsed = parseTcSnapshot(snapshot, fecha); + if (!parsed) { + throw new Error( + `Could not parse tipo de cambio from SUNAT page for ${fecha}. The portal may have changed layout. Run with --debug to inspect snapshot.`, + ); + } + + const rate: TipoCambioRate = { + fecha, + compra: parsed.compra, + venta: parsed.venta, + moneda: "USD", + source: "sunat", + fetchedAt: new Date().toISOString(), + }; + saveTc(rate); + return rate; +} diff --git a/packages/cli/tests/unit/ruc-portal.test.ts b/packages/cli/tests/unit/ruc-portal.test.ts new file mode 100644 index 0000000..87c6b60 --- /dev/null +++ b/packages/cli/tests/unit/ruc-portal.test.ts @@ -0,0 +1,64 @@ +import { describe, expect, test } from "bun:test"; +import { parseRucSnapshot } from "../../src/sunat-rest/ruc-portal.ts"; + +describe("parseRucSnapshot — pure parser", () => { + test("parses canonical SUNAT detail page", () => { + const snap = ` + Resultado de la Búsqueda + Número de RUC: 20131312955 - SUPERINTENDENCIA NACIONAL DE ADUANAS Y DE ADMINISTRACION TRIBUTARIA - SUNAT + Tipo Contribuyente: ADMINISTRACION PUBLICA + Estado del Contribuyente: ACTIVO + Condición del Contribuyente: HABIDO + Domicilio Fiscal: AV. GARCILASO DE LA VEGA 1472 LIMA - LIMA - LIMA + `; + const r = parseRucSnapshot(snap, "20131312955"); + expect(r?.ruc).toBe("20131312955"); + expect(r?.razonSocial).toContain("SUPERINTENDENCIA NACIONAL"); + expect(r?.estado).toBe("ACTIVO"); + expect(r?.condicion).toBe("HABIDO"); + expect(r?.tipoContribuyente).toBe("ADMINISTRACION PUBLICA"); + expect(r?.direccion).toContain("AV. GARCILASO"); + expect(r?.distrito).toBe("LIMA"); + expect(r?.provincia).toBe("LIMA"); + expect(r?.departamento).toBe("LIMA"); + }); + + test("returns null when RUC in page does not match requested", () => { + const snap = "Número de RUC: 20111111111 - OTRA EMPRESA"; + const r = parseRucSnapshot(snap, "20131312955"); + expect(r).toBeNull(); + }); + + test("returns null when no RUC header present", () => { + expect(parseRucSnapshot("page without RUC", "20131312955")).toBeNull(); + }); + + test("handles absent optional fields gracefully", () => { + const snap = "Número de RUC: 20131312955 - X SAC"; + const r = parseRucSnapshot(snap, "20131312955"); + expect(r?.ruc).toBe("20131312955"); + expect(r?.razonSocial).toBe("X SAC"); + expect(r?.estado).toBeUndefined(); + expect(r?.condicion).toBeUndefined(); + }); + + test("strips trailing separator from razon social", () => { + const snap = "Número de RUC: 20131312955 - EMPRESA SAC\nEstado: ACTIVO"; + const r = parseRucSnapshot(snap, "20131312955"); + expect(r?.razonSocial).toBe("EMPRESA SAC"); + }); + + test("handles 'Condicion' without acentos", () => { + const snap = `Número de RUC: 20131312955 - X + Condicion del Contribuyente: NO HABIDO`; + const r = parseRucSnapshot(snap, "20131312955"); + expect(r?.condicion).toBe("NO HABIDO"); + }); + + test("source + fetchedAt always populated", () => { + const snap = "Número de RUC: 20131312955 - X"; + const r = parseRucSnapshot(snap, "20131312955"); + expect(r?.source).toBe("sunat-portal"); + expect(r?.fetchedAt).toMatch(/\d{4}-\d{2}-\d{2}/); + }); +}); diff --git a/packages/cli/tests/unit/tipo-cambio.test.ts b/packages/cli/tests/unit/tipo-cambio.test.ts new file mode 100644 index 0000000..c0a818e --- /dev/null +++ b/packages/cli/tests/unit/tipo-cambio.test.ts @@ -0,0 +1,127 @@ +import { afterAll, afterEach, beforeAll, describe, expect, test } from "bun:test"; +import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "fs"; +import { join } from "path"; +import { tmpdir } from "os"; +import { loadCachedTc, parseTcSnapshot, saveTc } from "../../src/sunat-rest/tipo-cambio.ts"; +import { paths } from "../../src/data/config.ts"; + +const CACHE_FILE = join(paths.sunatDir, "cache", "tipo-cambio.jsonl"); +const TEST_TAG_DATE = "2099-01-01"; // collision-proof — never a real TC date + +beforeAll(() => { + if (!existsSync(join(paths.sunatDir, "cache"))) mkdirSync(join(paths.sunatDir, "cache"), { recursive: true }); +}); + +afterEach(() => { + if (!existsSync(CACHE_FILE)) return; + const filtered = readFileSync(CACHE_FILE, "utf-8") + .split("\n") + .filter((l) => l.trim().length > 0 && !l.includes(TEST_TAG_DATE) && !l.includes("2099-")) + .join("\n"); + writeFileSync(CACHE_FILE, filtered ? `${filtered}\n` : ""); +}); + +afterAll(() => { + if (!existsSync(CACHE_FILE)) return; + const filtered = readFileSync(CACHE_FILE, "utf-8") + .split("\n") + .filter((l) => l.trim().length > 0 && !l.includes("2099-")) + .join("\n"); + writeFileSync(CACHE_FILE, filtered ? `${filtered}\n` : ""); +}); + +describe("parseTcSnapshot — pure parser", () => { + test("aria-label style 'Compra X Venta Y'", () => { + const snap = "Tipo de Cambio Bancario\nCompra 3.760 Venta 3.768\n"; + const r = parseTcSnapshot(snap, "2026-04-29"); + expect(r).toEqual({ compra: 3.76, venta: 3.768 }); + }); + + test("aria-label with colons 'Compra: X Venta: Y'", () => { + const snap = "Compra: 3.755 Venta: 3.770"; + const r = parseTcSnapshot(snap, "2026-04-29"); + expect(r).toEqual({ compra: 3.755, venta: 3.77 }); + }); + + test("table row '3.760 | 3.768'", () => { + const snap = "29 Abril 2026 | 3.760 | 3.768"; + const r = parseTcSnapshot(snap, "2026-04-29"); + expect(r).not.toBeNull(); + expect(r?.compra).toBe(3.76); + expect(r?.venta).toBe(3.768); + }); + + test("normalizes order so compra <= venta", () => { + const snap = "3.770 | 3.755"; // accidentally swapped + const r = parseTcSnapshot(snap, "2026-04-29"); + expect(r?.compra).toBe(3.755); + expect(r?.venta).toBe(3.77); + }); + + test("rejects unrelated decimals (e.g. weights, totals)", () => { + const snap = "Peso bruto: 100.00 | Peso neto: 99.50"; + const r = parseTcSnapshot(snap, "2026-04-29"); + // Sanity check filter: weights are >10 so should be rejected + expect(r).toBeNull(); + }); + + test("returns null on empty/garbage", () => { + expect(parseTcSnapshot("", "2026-04-29")).toBeNull(); + expect(parseTcSnapshot("just text", "2026-04-29")).toBeNull(); + }); + + test("handles 4-decimal values (some TC sources)", () => { + const snap = "Compra 3.7625 Venta 3.7700"; + const r = parseTcSnapshot(snap, "2026-04-29"); + expect(r?.compra).toBe(3.7625); + expect(r?.venta).toBe(3.77); + }); +}); + +describe("saveTc / loadCachedTc — JSONL cache", () => { + test("save then load returns same record", () => { + const rate = { + fecha: "2099-04-01", + compra: 3.5, + venta: 3.51, + moneda: "USD" as const, + source: "sunat" as const, + fetchedAt: new Date().toISOString(), + }; + saveTc(rate); + const loaded = loadCachedTc("2099-04-01"); + expect(loaded?.compra).toBe(3.5); + expect(loaded?.venta).toBe(3.51); + }); + + test("returns null for missing fecha", () => { + expect(loadCachedTc("2099-12-31")).toBeNull(); + }); + + test("dedupes by fecha — second save replaces first", () => { + const fecha = "2099-04-02"; + saveTc({ fecha, compra: 3.5, venta: 3.51, moneda: "USD", source: "sunat", fetchedAt: "x" }); + saveTc({ fecha, compra: 3.6, venta: 3.61, moneda: "USD", source: "sunat", fetchedAt: "y" }); + const loaded = loadCachedTc(fecha); + expect(loaded?.compra).toBe(3.6); + expect(loaded?.venta).toBe(3.61); + // Verify no duplicate row + const lines = readFileSync(CACHE_FILE, "utf-8") + .split("\n") + .filter((l) => l.includes(fecha)); + expect(lines.length).toBe(1); + }); + + test("skips malformed JSONL lines without throwing", () => { + writeFileSync(CACHE_FILE, "not json\n"); + saveTc({ + fecha: "2099-04-03", + compra: 3.5, + venta: 3.51, + moneda: "USD", + source: "sunat", + fetchedAt: "x", + }); + expect(loadCachedTc("2099-04-03")?.compra).toBe(3.5); + }); +});