From 3d1581ba47e1b45781906410030e20543c812bb7 Mon Sep 17 00:00:00 2001 From: sudev5 Date: Wed, 29 Apr 2026 11:02:14 +0800 Subject: [PATCH 1/2] fix(douban): read search results from page data --- clis/douban/utils.js | 49 ++++++++++++++++++++++++++++++++++++--- clis/douban/utils.test.js | 48 ++++++++++++++++++++++++++++++++++++++ src/runtime.test.ts | 28 ++++++++++++++++++++++ src/runtime.ts | 3 ++- 4 files changed, 124 insertions(+), 4 deletions(-) create mode 100644 src/runtime.test.ts diff --git a/clis/douban/utils.js b/clis/douban/utils.js index 9bcaea877..c0389999f 100644 --- a/clis/douban/utils.js +++ b/clis/douban/utils.js @@ -570,6 +570,42 @@ export async function searchDouban(page, type, keyword, limit) { .map((item) => [String(item?.id || '').trim(), item]) .filter(([id]) => id), ); + const normalizeRawRating = (item) => { + const rating = item?.rating; + if (typeof rating === 'number') return Number.isFinite(rating) ? rating : 0; + if (typeof rating === 'string') return parseFloat(rating) || 0; + if (rating && typeof rating === 'object') { + return parseFloat(String(rating.value || rating.rating || rating.average || '0')) || 0; + } + return 0; + }; + const normalizeRawUrl = (item, id) => { + const rawUrl = normalize(item?.url || item?.uri || item?.link); + if (rawUrl) return rawUrl.startsWith('http') ? rawUrl : new URL(rawUrl, location.origin).toString(); + if (!id) return ''; + const domain = type === 'book' ? 'book.douban.com' : type === 'music' ? 'music.douban.com' : 'movie.douban.com'; + return 'https://' + domain + '/subject/' + id + '/'; + }; + const normalizeRawAbstract = (item) => normalize(item?.abstract || item?.abstract_2 || item?.description || ''); + const normalizeRawCover = (item) => normalize(item?.cover_url || item?.cover || item?.pic?.normal || item?.pic?.large || ''); + const appendRawItemResult = (item) => { + const id = String(item?.id || '').trim(); + const title = normalize(item?.title || item?.name); + const url = normalizeRawUrl(item, id); + if (!title || !url || !url.includes('/subject/') || seen.has(url)) return; + seen.add(url); + const abstract = normalizeRawAbstract(item); + results.push({ + rank: results.length + 1, + id: id || (url.match(/subject\\/(\\d+)/)?.[1] || ''), + type: inferDoubanSearchResultType(type, item), + title, + rating: normalizeRawRating(item), + abstract: abstract.slice(0, 100) + (abstract.length > 100 ? '...' : ''), + url, + cover: normalizeRawCover(item), + }); + }; for (let i = 0; i < 20; i += 1) { if (document.querySelector('.item-root .title-text, .item-root .title a')) break; @@ -593,18 +629,25 @@ export async function searchDouban(page, type, keyword, limit) { const abstract = normalize( el.querySelector('.meta.abstract, .meta, .abstract, .subject-abstract, p')?.textContent, ); + const effectiveAbstract = abstract || normalizeRawAbstract(rawItem); results.push({ rank: results.length + 1, id, type: inferDoubanSearchResultType(type, rawItem), title, - rating: ratingText.includes('.') ? parseFloat(ratingText) : 0, - abstract: abstract.slice(0, 100) + (abstract.length > 100 ? '...' : ''), + rating: ratingText.includes('.') ? parseFloat(ratingText) : normalizeRawRating(rawItem), + abstract: effectiveAbstract.slice(0, 100) + (effectiveAbstract.length > 100 ? '...' : ''), url, - cover: el.querySelector('img')?.getAttribute('src') || '', + cover: el.querySelector('img')?.getAttribute('src') || normalizeRawCover(rawItem), }); if (results.length >= ${safeLimit}) break; } + if (results.length === 0) { + for (const rawItem of rawItems) { + appendRawItemResult(rawItem); + if (results.length >= ${safeLimit}) break; + } + } return results; })() `); diff --git a/clis/douban/utils.test.js b/clis/douban/utils.test.js index 10111579e..2c7aa2c26 100644 --- a/clis/douban/utils.test.js +++ b/clis/douban/utils.test.js @@ -193,6 +193,54 @@ describe('douban utils', () => { ]); }); + it('falls back to window data items when search result DOM is not rendered', async () => { + const rawItems = [ + { + id: 2026281, + title: '大棋局 : 美国的首要地位及其地缘战略', + url: 'https://book.douban.com/subject/2026281/', + abstract: '兹比格纽·布热津斯基 / 中国国际问题研究所 / 上海人民出版社 / 2007-1 / 23.00元', + cover_url: 'https://img1.doubanio.com/view/subject/m/public/s2552669.jpg', + rating: { count: 5563, value: 8.7 }, + tpl_name: 'search_subject', + }, + { + id: 35284951, + title: '大棋局 : 美国的首要地位及其地缘战略', + url: 'https://book.douban.com/subject/35284951/', + abstract: '[美]兹比格纽•布热津斯基 著 / 上海人民出版社 / 2021-1 / 52', + cover_url: 'https://img2.doubanio.com/view/subject/m/public/s33779181.jpg', + rating: { count: 462, value: 8.5 }, + tpl_name: 'search_subject', + }, + ]; + const page = { + goto: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn() + .mockResolvedValueOnce({ blocked: false, title: '大棋局 - 读书 - 豆瓣搜索', href: 'https://search.douban.com/book/subject_search?search_text=%E5%A4%A7%E6%A3%8B%E5%B1%80&cat=1001' }) + .mockImplementationOnce((script) => runSearchEvaluate(script, rawItems, [])), + }; + + await expect(searchDouban(page, 'book', '大棋局', 3)).resolves.toMatchObject([ + { + rank: 1, + id: '2026281', + type: 'book', + title: '大棋局 : 美国的首要地位及其地缘战略', + rating: 8.7, + url: 'https://book.douban.com/subject/2026281/', + cover: 'https://img1.doubanio.com/view/subject/m/public/s2552669.jpg', + }, + { + rank: 2, + id: '35284951', + type: 'book', + rating: 8.5, + }, + ]); + }); + it('normalizes douban book subject raw data into structured fields', () => { const normalized = normalizeDoubanBookSubject({ id: '2567698', diff --git a/src/runtime.test.ts b/src/runtime.test.ts new file mode 100644 index 000000000..693445e9f --- /dev/null +++ b/src/runtime.test.ts @@ -0,0 +1,28 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; + +import { BrowserBridge, CDPBridge } from './browser/index.js'; +import { getBrowserFactory } from './runtime.js'; + +describe('runtime browser factory', () => { + afterEach(() => { + vi.unstubAllEnvs(); + }); + + it('uses CDPBridge when OPENCLI_CDP_ENDPOINT is configured', () => { + vi.stubEnv('OPENCLI_CDP_ENDPOINT', 'http://127.0.0.1:9222'); + + expect(getBrowserFactory('douban')).toBe(CDPBridge); + }); + + it('uses CDPBridge for registered Electron apps', () => { + vi.stubEnv('OPENCLI_CDP_ENDPOINT', ''); + + expect(getBrowserFactory('cursor')).toBe(CDPBridge); + }); + + it('uses BrowserBridge for browser-backed sites by default', () => { + vi.stubEnv('OPENCLI_CDP_ENDPOINT', ''); + + expect(getBrowserFactory('douban')).toBe(BrowserBridge); + }); +}); diff --git a/src/runtime.ts b/src/runtime.ts index 0ea88a6a5..ab3109109 100644 --- a/src/runtime.ts +++ b/src/runtime.ts @@ -6,9 +6,10 @@ import { log } from './logger.js'; /** * Returns the appropriate browser factory based on site type. - * Uses CDPBridge for registered Electron apps, otherwise BrowserBridge. + * Uses CDPBridge when explicitly configured, or for registered Electron apps. */ export function getBrowserFactory(site?: string): new () => IBrowserFactory { + if (process.env.OPENCLI_CDP_ENDPOINT) return CDPBridge; if (site && isElectronApp(site)) return CDPBridge; return BrowserBridge; } From e61936413722f9b6db63179642da3dd724cf9f14 Mon Sep 17 00:00:00 2001 From: sudev5 Date: Wed, 29 Apr 2026 13:29:24 +0800 Subject: [PATCH 2/2] fix(browser): reuse named CDP tab per workspace --- src/browser/cdp.test.ts | 54 ++++++++++++++- src/browser/cdp.ts | 142 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 185 insertions(+), 11 deletions(-) diff --git a/src/browser/cdp.test.ts b/src/browser/cdp.test.ts index 5da4b71c4..836269c57 100644 --- a/src/browser/cdp.test.ts +++ b/src/browser/cdp.test.ts @@ -36,7 +36,7 @@ vi.mock('ws', () => ({ WebSocket: MockWebSocket, })); -import { CDPBridge } from './cdp.js'; +import { CDPBridge, __test__ } from './cdp.js'; describe('CDPBridge cookies', () => { beforeEach(() => { @@ -64,3 +64,55 @@ describe('CDPBridge cookies', () => { ]); }); }); + +describe('CDP target reuse', () => { + beforeEach(() => { + vi.unstubAllEnvs(); + }); + + it('derives a stable tab name from the browser workspace', () => { + expect(__test__.buildCDPTabName('site:douban', {})).toBe('opencli:site:douban'); + expect(__test__.buildCDPTabName(undefined, {})).toBe('opencli:default'); + }); + + it('allows explicit tab names and opt-out via environment', () => { + expect(__test__.buildCDPTabName('site:douban', { OPENCLI_CDP_TAB_NAME: 'douban-fixed' })).toBe('douban-fixed'); + expect(__test__.buildCDPTabName('site:douban', { OPENCLI_CDP_REUSE_TAB: 'false' })).toBeUndefined(); + }); + + it('selects an existing CDP target by persistent window.name', async () => { + const targets = [ + { + id: 'a', + type: 'page', + title: '普通标签页', + url: 'https://www.douban.com/', + webSocketDebuggerUrl: 'ws://127.0.0.1/a', + }, + { + id: 'b', + type: 'page', + title: '大棋局 - 读书 - 豆瓣搜索', + url: 'https://search.douban.com/book/subject_search?search_text=x', + webSocketDebuggerUrl: 'ws://127.0.0.1/b', + }, + ]; + + const selected = await __test__.selectNamedCDPTarget( + targets, + 'opencli:site:douban', + async (target) => target.id === 'b' ? 'opencli:site:douban' : '', + ); + + expect(selected?.id).toBe('b'); + }); + + it('does not pick Chrome internal popup targets', () => { + expect(__test__.scoreCDPTarget({ + type: 'page', + title: 'Omnibox Popup', + url: 'chrome://omnibox-popup.top-chrome/', + webSocketDebuggerUrl: 'ws://127.0.0.1/omnibox', + })).toBe(Number.NEGATIVE_INFINITY); + }); +}); diff --git a/src/browser/cdp.ts b/src/browser/cdp.ts index 37c92d8b1..859d76062 100644 --- a/src/browser/cdp.ts +++ b/src/browser/cdp.ts @@ -21,6 +21,7 @@ import { getAllElectronApps } from '../electron-apps.js'; import { BasePage } from './base-page.js'; export interface CDPTarget { + id?: string; type?: string; url?: string; title?: string; @@ -60,9 +61,13 @@ export class CDPBridge implements IBrowserFactory { if (!endpoint) throw new Error('CDP endpoint not provided (pass cdpEndpoint or set OPENCLI_CDP_ENDPOINT)'); let wsUrl = endpoint; + const tabName = endpoint.startsWith('http') ? buildCDPTabName(opts?.workspace) : undefined; if (endpoint.startsWith('http')) { - const targets = await fetchJsonDirect(`${endpoint.replace(/\/$/, '')}/json`) as CDPTarget[]; - const target = selectCDPTarget(targets); + const baseEndpoint = endpoint.replace(/\/$/, ''); + const targets = await fetchJsonDirect(`${baseEndpoint}/json`) as CDPTarget[]; + const target = (tabName ? await selectNamedCDPTarget(targets, tabName) : undefined) + ?? selectCDPTarget(targets) + ?? await createCDPTarget(baseEndpoint); if (!target || !target.webSocketDebuggerUrl) { throw new Error('No inspectable targets found at CDP endpoint'); } @@ -84,12 +89,15 @@ export class CDPBridge implements IBrowserFactory { try { await this.send('Page.enable'); await this.send('Page.addScriptToEvaluateOnNewDocument', { source: generateStealthJs() }); + if (tabName) { + await setCDPWindowName(this, tabName); + } } catch (err) { ws.close(); reject(err instanceof Error ? err : new Error(String(err))); return; } - resolve(new CDPPage(this)); + resolve(new CDPPage(this, tabName)); }); ws.on('error', (err: Error) => { @@ -202,10 +210,20 @@ class CDPPage extends BasePage { private _consoleMessages: Array<{ type: string; text: string; timestamp: number }> = []; private _consoleCapturing = false; - constructor(private bridge: CDPBridge) { + constructor(private bridge: CDPBridge, private readonly tabName?: string) { super(); } + private async markReusableTab(): Promise { + if (!this.tabName) return; + await setCDPWindowName(this.bridge, this.tabName).catch((error) => { + if (process.env.OPENCLI_VERBOSE) { + // eslint-disable-next-line no-console + console.error('[cdp] Failed to mark reusable tab:', error instanceof Error ? error.message : error); + } + }); + } + async goto(url: string, options?: { waitUntil?: 'load' | 'none'; settleMs?: number; allowBoundNavigation?: boolean }): Promise { if (!this._pageEnabled) { await this.bridge.send('Page.enable'); @@ -215,6 +233,7 @@ class CDPPage extends BasePage { await this.bridge.send('Page.navigate', { url }); await loadPromise; this._lastUrl = url; + await this.markReusableTab(); if (options?.waitUntil !== 'none') { const maxMs = options?.settleMs ?? 1000; await this.evaluate(waitForDomStableJs(maxMs, Math.min(500, maxMs))); @@ -382,17 +401,30 @@ function matchesCookieDomain(cookieDomain: string, targetDomain: string): boolea } function selectCDPTarget(targets: CDPTarget[]): CDPTarget | undefined { - const preferredPattern = compilePreferredPattern(process.env.OPENCLI_CDP_TARGET); + return rankCDPTargets(targets)[0]?.target; +} - const ranked = targets +async function selectNamedCDPTarget( + targets: CDPTarget[], + tabName: string, + readWindowName: (target: CDPTarget) => Promise = readTargetWindowName, +): Promise { + for (const { target } of rankCDPTargets(targets)) { + const name = await readWindowName(target).catch(() => undefined); + if (name === tabName) return target; + } + return undefined; +} + +function rankCDPTargets(targets: CDPTarget[]) { + const preferredPattern = compilePreferredPattern(process.env.OPENCLI_CDP_TARGET); + return targets .map((target, index) => ({ target, index, score: scoreCDPTarget(target, preferredPattern) })) .filter(({ score }) => Number.isFinite(score)) .sort((a, b) => { if (b.score !== a.score) return b.score - a.score; return a.index - b.index; }); - - return ranked[0]?.target; } function scoreCDPTarget(target: CDPTarget, preferredPattern?: RegExp): number { @@ -406,6 +438,7 @@ function scoreCDPTarget(target: CDPTarget, preferredPattern?: RegExp): number { if (!haystack.trim() && !type) return Number.NEGATIVE_INFINITY; if (haystack.includes('devtools')) return Number.NEGATIVE_INFINITY; if (type === 'background_page' || type === 'service_worker') return Number.NEGATIVE_INFINITY; + if (url.startsWith('chrome://')) return Number.NEGATIVE_INFINITY; let score = 0; @@ -447,14 +480,103 @@ function escapeRegExp(value: string): string { } export const __test__ = { + buildCDPTabName, selectCDPTarget, + selectNamedCDPTarget, scoreCDPTarget, }; -function fetchJsonDirect(url: string): Promise { +function buildCDPTabName( + workspace?: string, + env: Record = process.env, +): string | undefined { + const reuseFlag = env.OPENCLI_CDP_REUSE_TAB?.trim().toLowerCase(); + if (reuseFlag && ['0', 'false', 'no', 'off'].includes(reuseFlag)) return undefined; + + const explicitName = env.OPENCLI_CDP_TAB_NAME?.trim(); + if (explicitName) return explicitName; + + const suffix = workspace?.trim() || 'default'; + return `opencli:${suffix}`; +} + +async function createCDPTarget(baseEndpoint: string): Promise { + const result = await fetchJsonDirect(`${baseEndpoint}/json/new?about:blank`, 'PUT') as CDPTarget; + return result; +} + +async function readTargetWindowName(target: CDPTarget): Promise { + if (!target.webSocketDebuggerUrl) return undefined; + const value = await evaluateTargetExpression(target.webSocketDebuggerUrl, 'window.name', 2_000); + return typeof value === 'string' ? value : undefined; +} + +async function setCDPWindowName(bridge: CDPBridge, tabName: string): Promise { + await bridge.send('Runtime.evaluate', { + expression: buildSetWindowNameExpression(tabName), + returnByValue: true, + awaitPromise: true, + }); +} + +function buildSetWindowNameExpression(tabName: string): string { + return `try { window.name = ${JSON.stringify(tabName)}; } catch (_) {}`; +} + +function evaluateTargetExpression(wsUrl: string, expression: string, timeoutMs: number): Promise { + return new Promise((resolve, reject) => { + const ws = new WebSocket(wsUrl); + const timer = setTimeout(() => { + ws.close(); + reject(new Error(`Timed out reading CDP target state after ${timeoutMs / 1000}s`)); + }, timeoutMs); + + const finish = (callback: () => void) => { + clearTimeout(timer); + ws.close(); + callback(); + }; + + ws.on('open', () => { + ws.send(JSON.stringify({ + id: 1, + method: 'Runtime.evaluate', + params: { + expression, + returnByValue: true, + awaitPromise: true, + }, + })); + }); + + ws.on('error', (error: Error) => { + finish(() => reject(error)); + }); + + ws.on('message', (data: RawData) => { + try { + const msg = JSON.parse(data.toString()); + if (msg.id !== 1) return; + if (msg.error) { + finish(() => reject(new Error(msg.error.message || 'CDP Runtime.evaluate failed'))); + return; + } + if (msg.result?.exceptionDetails) { + finish(() => resolve(undefined)); + return; + } + finish(() => resolve(msg.result?.result?.value)); + } catch (error) { + finish(() => reject(error instanceof Error ? error : new Error(String(error)))); + } + }); + }); +} + +function fetchJsonDirect(url: string, method: 'GET' | 'PUT' = 'GET'): Promise { return new Promise((resolve, reject) => { const parsed = new URL(url); - const request = (parsed.protocol === 'https:' ? httpsRequest : httpRequest)(parsed, (res) => { + const request = (parsed.protocol === 'https:' ? httpsRequest : httpRequest)(parsed, { method }, (res) => { const statusCode = res.statusCode ?? 0; if (statusCode < 200 || statusCode >= 300) { res.resume();