From 58ae0faeef1dbbc6e57b45f8b24a97cbae15d18d Mon Sep 17 00:00:00 2001 From: c_w_xiaohei <1641233466@qq.com> Date: Sat, 2 May 2026 19:04:45 +0800 Subject: [PATCH] feat(weixin): add Sogou article search --- cli-manifest.json | 42 +++++++++++++++ clis/weixin/drafts.test.js | 2 + clis/weixin/search.js | 94 +++++++++++++++++++++++++++++++++ clis/weixin/search.test.js | 81 ++++++++++++++++++++++++++++ docs/adapters/browser/weixin.md | 19 ++++++- docs/adapters/index.md | 2 +- 6 files changed, 238 insertions(+), 2 deletions(-) create mode 100644 clis/weixin/search.js create mode 100644 clis/weixin/search.test.js diff --git a/cli-manifest.json b/cli-manifest.json index cc33bf214..b555e7d38 100644 --- a/cli-manifest.json +++ b/cli-manifest.json @@ -17646,6 +17646,48 @@ "sourceFile": "weixin/drafts.js", "navigateBefore": false }, + { + "site": "weixin", + "name": "search", + "description": "使用搜狗微信搜索公众号文章;如需导出正文 Markdown,请使用 weixin download 处理公众号文章链接", + "domain": "weixin.sogou.com", + "strategy": "public", + "browser": true, + "args": [ + { + "name": "query", + "type": "str", + "required": true, + "positional": true, + "help": "搜索关键词;如需正文 Markdown,请使用 weixin download 处理公众号文章链接" + }, + { + "name": "page", + "type": "int", + "default": 1, + "required": false, + "help": "结果页码,从 1 开始" + }, + { + "name": "limit", + "type": "int", + "default": 10, + "required": false, + "help": "返回条数,最大 10" + } + ], + "columns": [ + "rank", + "page", + "title", + "url", + "summary", + "publish_time" + ], + "type": "js", + "modulePath": "weixin/search.js", + "sourceFile": "weixin/search.js" + }, { "site": "weread", "name": "ai-outline", diff --git a/clis/weixin/drafts.test.js b/clis/weixin/drafts.test.js index d2d472e24..8cbdbd191 100644 --- a/clis/weixin/drafts.test.js +++ b/clis/weixin/drafts.test.js @@ -3,6 +3,7 @@ import { AuthRequiredError, EmptyResultError } from '@jackwener/opencli/errors'; import { getRegistry } from '@jackwener/opencli/registry'; import './create-draft.js'; import './drafts.js'; +import './search.js'; function createPageMock(overrides = {}) { return { @@ -19,6 +20,7 @@ describe('weixin command registration', () => { const values = [...registry.values()]; expect(values.find(c => c.site === 'weixin' && c.name === 'create-draft')).toBeDefined(); expect(values.find(c => c.site === 'weixin' && c.name === 'drafts')).toBeDefined(); + expect(values.find(c => c.site === 'weixin' && c.name === 'search')).toBeDefined(); }); }); diff --git a/clis/weixin/search.js b/clis/weixin/search.js new file mode 100644 index 000000000..1d374959a --- /dev/null +++ b/clis/weixin/search.js @@ -0,0 +1,94 @@ +import { ArgumentError, EmptyResultError } from '@jackwener/opencli/errors'; +import { cli, Strategy } from '@jackwener/opencli/registry'; + +const SOGOU_WEIXIN_DOMAIN = 'weixin.sogou.com'; + +function normalizePage(page) { + const parsed = Number.parseInt(String(page ?? ''), 10); + if (!Number.isFinite(parsed) || parsed < 1) + return 1; + return parsed; +} + +function normalizeLimit(limit) { + const parsed = Number.parseInt(String(limit ?? ''), 10); + if (!Number.isFinite(parsed) || parsed < 1) + return 10; + return Math.min(parsed, 10); +} + +cli({ + site: 'weixin', + name: 'search', + description: '使用搜狗微信搜索公众号文章;如需导出正文 Markdown,请使用 weixin download 处理公众号文章链接', + domain: SOGOU_WEIXIN_DOMAIN, + strategy: Strategy.PUBLIC, + browser: true, + args: [ + { name: 'query', positional: true, required: true, help: '搜索关键词;如需正文 Markdown,请使用 weixin download 处理公众号文章链接' }, + { name: 'page', type: 'int', default: 1, help: '结果页码,从 1 开始' }, + { name: 'limit', type: 'int', default: 10, help: '返回条数,最大 10' }, + ], + columns: ['rank', 'page', 'title', 'url', 'summary', 'publish_time'], + func: async (page, kwargs) => { + const query = String(kwargs.query ?? '').trim(); + if (!query) { + throw new ArgumentError('A search query is required.', 'Pass a non-empty keyword to search Weixin articles via Sogou.'); + } + + const pageNo = normalizePage(kwargs.page); + const limit = normalizeLimit(kwargs.limit); + const searchUrl = new URL('https://weixin.sogou.com/weixin'); + searchUrl.searchParams.set('query', query); + searchUrl.searchParams.set('type', '2'); + searchUrl.searchParams.set('page', String(pageNo)); + searchUrl.searchParams.set('ie', 'utf8'); + + await page.goto(searchUrl.toString()); + await page.wait(2); + + const rows = await page.evaluate(String.raw`(() => { + const clean = (value) => { + return (value || '') + .replace(/\s+/g, ' ') + .replace(/|/g, '') + .replace(/document\.write\(timeConvert\('\d+'\)\)/g, '') + .trim(); + }; + + const absolutize = (href) => { + if (!href) return ''; + try { + return new URL(href, window.location.origin).toString(); + } catch { + return href; + } + }; + + return Array.from(document.querySelectorAll('.news-list li')).map((item) => { + const linkEl = item.querySelector('h3 a[href]'); + const summaryEl = item.querySelector('p.txt-info'); + const timeEl = item.querySelector('.s-p .s2'); + return { + title: clean(linkEl && linkEl.textContent), + url: absolutize(linkEl && linkEl.getAttribute('href')), + summary: clean(summaryEl && summaryEl.textContent), + publish_time: clean(timeEl && timeEl.textContent), + }; + }).filter((row) => row.title && row.url); + })()`); + + if (!Array.isArray(rows) || rows.length === 0) { + throw new EmptyResultError('weixin search', 'Try a different keyword or a different page number.'); + } + + return rows.slice(0, limit).map((row, index) => ({ + rank: (pageNo - 1) * 10 + index + 1, + page: pageNo, + title: row.title, + url: row.url, + summary: row.summary, + publish_time: row.publish_time, + })); + }, +}); diff --git a/clis/weixin/search.test.js b/clis/weixin/search.test.js new file mode 100644 index 000000000..79d08f59a --- /dev/null +++ b/clis/weixin/search.test.js @@ -0,0 +1,81 @@ +import { describe, expect, it, vi } from 'vitest'; +import { getRegistry } from '@jackwener/opencli/registry'; +import './search.js'; + +describe('weixin search command', () => { + const command = getRegistry().get('weixin/search'); + + it('registers as a public browser command', () => { + expect(command).toBeDefined(); + expect(command.site).toBe('weixin'); + expect(command.strategy).toBe('public'); + expect(command.browser).toBe(true); + }); + + it('rejects empty queries before browser navigation', async () => { + const page = { goto: vi.fn() }; + + await expect(command.func(page, { query: ' ' })).rejects.toMatchObject({ + name: 'ArgumentError', + code: 'ARGUMENT', + }); + + expect(page.goto).not.toHaveBeenCalled(); + }); + + it('uses page and limit while preserving per-page ranking', async () => { + const page = { + goto: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue([ + { + title: 'First article', + url: 'https://weixin.sogou.com/link?url=abc', + summary: 'First summary', + publish_time: '2小时前', + }, + { + title: 'Second article', + url: 'https://weixin.sogou.com/link?url=def', + summary: 'Second summary', + publish_time: '1小时前', + }, + ]), + }; + + const result = await command.func(page, { query: 'AI', page: 2, limit: 1 }); + + expect(page.goto).toHaveBeenCalledWith('https://weixin.sogou.com/weixin?query=AI&type=2&page=2&ie=utf8'); + expect(result).toEqual([ + { + rank: 11, + page: 2, + title: 'First article', + url: 'https://weixin.sogou.com/link?url=abc', + summary: 'First summary', + publish_time: '2小时前', + }, + ]); + }); + + it('preserves browser-side cleanup regex escapes', async () => { + const page = { + goto: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue([ + { + title: 'Article', + url: 'https://weixin.sogou.com/link?url=abc', + summary: 'Summary', + publish_time: '2024-4-28', + }, + ]), + }; + + await command.func(page, { query: 'AI' }); + + const script = page.evaluate.mock.calls[0][0]; + expect(script).toContain(".replace(/\\s+/g, ' ')"); + expect(script).toContain(".replace(/document\\.write\\(timeConvert\\('\\d+'\\)\\)/g, '')"); + }); +}); diff --git a/docs/adapters/browser/weixin.md b/docs/adapters/browser/weixin.md index 0f0dd201d..7a8bb2856 100644 --- a/docs/adapters/browser/weixin.md +++ b/docs/adapters/browser/weixin.md @@ -1,11 +1,12 @@ # WeChat (微信公众号) -**Mode**: 🔐 Browser · **Domain**: `mp.weixin.qq.com` +**Mode**: 🌐 / 🔐 Browser · **Domains**: `weixin.sogou.com`, `mp.weixin.qq.com` ## Commands | Command | Description | |---------|-------------| +| `opencli weixin search` | 使用搜狗微信搜索公众号文章,返回标题、链接、摘要和发布时间 | | `opencli weixin download` | 下载微信公众号文章为 Markdown 格式 | | `opencli weixin drafts` | 列出公众号后台草稿箱中的图文草稿 | | `opencli weixin create-draft` | 在公众号后台创建新的图文草稿 | @@ -13,6 +14,12 @@ ## Usage Examples ```bash +# Search Official Account articles through Sogou Weixin +opencli weixin search "AI" --page 1 --limit 5 + +# Export the corresponding WeChat article URL to Markdown +opencli weixin download --url "https://mp.weixin.qq.com/s/xxx" --output ./weixin + # Export article to Markdown opencli weixin download --url "https://mp.weixin.qq.com/s/xxx" --output ./weixin @@ -34,6 +41,16 @@ opencli weixin create-draft --title "封面示例" --cover-image ./cover.png " ## Output +`search` returns one row per Sogou Weixin result: +- `rank` — overall result rank based on the requested page +- `page` — Sogou result page number +- `title` — article title +- `url` — Sogou result link for the article +- `summary` — result-page snippet, when available +- `publish_time` — time text rendered by Sogou, such as `27分钟前` or `2小时前` + +Use `weixin download` with the corresponding `mp.weixin.qq.com` article URL when you need Markdown content extraction. + Downloads to `//`: - `.md` — Markdown with frontmatter (title, author, publish time, source URL) - `images/` — Downloaded images (if `--download-images` is enabled, default: true) diff --git a/docs/adapters/index.md b/docs/adapters/index.md index 542a4bc11..22cfa3e6a 100644 --- a/docs/adapters/index.md +++ b/docs/adapters/index.md @@ -58,7 +58,7 @@ Run `opencli list` for the live registry. | **[1688](./browser/1688.md)** | `search` `item` `assets` `download` `store` | 🔐 Browser | | **[gitee](./browser/gitee.md)** | `trending` `search` `user` | 🌐 / 🔐 | | **[web](./browser/web.md)** | `read` | 🔐 Browser | -| **[weixin](./browser/weixin.md)** | `download` `drafts` `create-draft` | 🔐 Browser | +| **[weixin](./browser/weixin.md)** | `search` `download` `drafts` `create-draft` | 🌐 / 🔐 | | **[36kr](./browser/36kr.md)** | `news` `hot` `search` `article` | 🌐 / 🔐 | | **[producthunt](./browser/producthunt.md)** | `posts` `today` `hot` `browse` | 🌐 / 🔐 | | **[ones](./browser/ones.md)** | `login` `me` `token-info` `tasks` `my-tasks` `task` `worklog` `logout` | 🔐 Browser Bridge + `ONES_BASE_URL` |