From e9d94c645508a4e61ddec06e14f695d0b5a15299 Mon Sep 17 00:00:00 2001 From: jean Date: Wed, 29 Apr 2026 00:20:15 +0800 Subject: [PATCH 1/2] feat(zlibrary): add search and info commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Z-Library adapter with two browser-based commands: - `search` — Search books by title, author, or ISBN. Navigates to /s/ and extracts results from shadow DOM custom elements. - `info` — Get book details and available download formats from a book page URL. Uses Strategy.COOKIE with browser automation to bypass Cloudflare protection. The adapter reuses the user's existing Z-Library login cookies from system Chrome. Known limitation: actual file downloading requires Playwright's download event handling (page.on('download')). OpenCLI's browser automation does not currently intercept file downloads. Users needing to download files should use Playwright to navigate to the book URLs discovered by this adapter. --- cli-manifest.json | 61 +++++++++++++++++++++ clis/zlibrary/info.js | 51 ++++++++++++++++++ clis/zlibrary/search.js | 47 ++++++++++++++++ clis/zlibrary/utils.js | 117 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 276 insertions(+) create mode 100644 clis/zlibrary/info.js create mode 100644 clis/zlibrary/search.js create mode 100644 clis/zlibrary/utils.js diff --git a/cli-manifest.json b/cli-manifest.json index 6eaea636..d2156b5c 100644 --- a/cli-manifest.json +++ b/cli-manifest.json @@ -20668,6 +20668,67 @@ "sourceFile": "zhihu/search.js", "navigateBefore": "https://www.zhihu.com" }, + { + "site": "zlibrary", + "name": "info", + "description": "Get book details and available download formats from a Z-Library book page", + "domain": "z-library.im", + "strategy": "cookie", + "browser": true, + "args": [ + { + "name": "url", + "type": "str", + "required": true, + "positional": true, + "help": "Z-Library book page URL (e.g. https://z-library.im/book/...)" + } + ], + "columns": [ + "title", + "pdf", + "epub", + "url" + ], + "type": "js", + "modulePath": "zlibrary/info.js", + "sourceFile": "zlibrary/info.js", + "navigateBefore": false + }, + { + "site": "zlibrary", + "name": "search", + "description": "Search Z-Library for books by title, author, ISBN, or keyword", + "domain": "z-library.im", + "strategy": "cookie", + "browser": true, + "args": [ + { + "name": "query", + "type": "str", + "required": true, + "positional": true, + "help": "Search keyword (title, author, ISBN, etc.)" + }, + { + "name": "limit", + "type": "int", + "default": 10, + "required": false, + "help": "Max results (1–25)" + } + ], + "columns": [ + "rank", + "title", + "author", + "url" + ], + "type": "js", + "modulePath": "zlibrary/search.js", + "sourceFile": "zlibrary/search.js", + "navigateBefore": false + }, { "site": "zsxq", "name": "dynamics", diff --git a/clis/zlibrary/info.js b/clis/zlibrary/info.js new file mode 100644 index 00000000..7e5bd002 --- /dev/null +++ b/clis/zlibrary/info.js @@ -0,0 +1,51 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { CliError } from '@jackwener/opencli/errors'; +import { ZLIBRARY_DOMAIN, extractBookTitle, extractFormats } from './utils.js'; + +cli({ + site: 'zlibrary', + name: 'info', + description: 'Get book details and available download formats from a Z-Library book page', + domain: ZLIBRARY_DOMAIN, + strategy: Strategy.COOKIE, + browser: true, + navigateBefore: false, + args: [ + { + name: 'url', + positional: true, + required: true, + help: 'Z-Library book page URL (e.g. https://z-library.im/book/...)', + }, + ], + columns: ['title', 'pdf', 'epub', 'url'], + func: async (page, args) => { + const url = String(args.url || '').trim(); + if (!url.startsWith('http')) { + throw new CliError('INVALID_ARG', 'URL must start with http', 'Provide the full Z-Library book page URL'); + } + + await page.goto(url, { waitUntil: 'load', settleMs: 3000 }); + await page.wait({ time: 5 }); + + const title = await extractBookTitle(page); + const formats = await extractFormats(page); + + if (!title || title === 'Unknown') { + throw new CliError( + 'NOT_FOUND', + 'Could not extract book information', + 'Check the URL and that you are logged into Z-Library' + ); + } + + return [ + { + title, + pdf: formats.pdf || '', + epub: formats.epub || '', + url, + }, + ]; + }, +}); diff --git a/clis/zlibrary/search.js b/clis/zlibrary/search.js new file mode 100644 index 00000000..9e4e32db --- /dev/null +++ b/clis/zlibrary/search.js @@ -0,0 +1,47 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { CliError } from '@jackwener/opencli/errors'; +import { ZLIBRARY_DOMAIN, buildSearchUrl, extractSearchResults } from './utils.js'; + +cli({ + site: 'zlibrary', + name: 'search', + description: 'Search Z-Library for books by title, author, ISBN, or keyword', + domain: ZLIBRARY_DOMAIN, + strategy: Strategy.COOKIE, + browser: true, + navigateBefore: false, + args: [ + { + name: 'query', + positional: true, + required: true, + help: 'Search keyword (title, author, ISBN, etc.)', + }, + { + name: 'limit', + type: 'int', + default: 10, + help: 'Max results (1–25)', + }, + ], + columns: ['rank', 'title', 'author', 'url'], + func: async (page, args) => { + const limit = Math.max(1, Math.min(Number(args.limit) || 10, 25)); + const searchUrl = buildSearchUrl(args.query); + + await page.goto(searchUrl, { waitUntil: 'load', settleMs: 3000 }); + await page.wait({ time: 5 }); + + const results = await extractSearchResults(page, limit); + + if (!results.length) { + throw new CliError( + 'NOT_FOUND', + 'No books found', + 'Try a different keyword or check that you are logged into Z-Library' + ); + } + + return results; + }, +}); diff --git a/clis/zlibrary/utils.js b/clis/zlibrary/utils.js new file mode 100644 index 00000000..998dd7e4 --- /dev/null +++ b/clis/zlibrary/utils.js @@ -0,0 +1,117 @@ +/** + * Z-Library adapter utilities. + */ + +const ZLIBRARY_DOMAIN = 'z-library.im'; +const ZLIBRARY_ORIGIN = `https://${ZLIBRARY_DOMAIN}`; + +/** + * Build a Z-Library search URL. + * Z-Library uses /s/ for search. + */ +export function buildSearchUrl(query) { + return `${ZLIBRARY_ORIGIN}/s/${encodeURIComponent(query)}`; +} + +/** + * Extract book title from page context. + * Tries z-bookcard shadow DOM first, then falls back to page title. + */ +export async function extractBookTitle(page) { + try { + const title = await page.evaluate(` + (() => { + const card = document.querySelector('z-bookcard'); + if (card && card.shadowRoot) { + const el = card.shadowRoot.querySelector('[class*="title"], h1, a'); + if (el) return el.textContent.trim().split('\\n')[0].trim(); + } + return document.title.replace(/\\s*[-|].*$/, '').trim(); + })() + `); + return title || 'Unknown'; + } catch { + return 'Unknown'; + } +} + +/** + * Extract available download formats from book page. + * Clicks the three-dot menu to reveal download options. + * NOTE: Z-Library download links redirect through /dl/ URLs. + * These require browser cookies and may not produce direct file downloads + * in OpenCLI's browser automation. For actual file downloading, + * consider using Playwright's download event handling instead. + */ +export async function extractFormats(page) { + try { + // Click three-dot menu if present + await page.evaluate(` + (() => { + const btn = document.querySelector( + 'button[aria-label*="more" i], [class*="dots" i], [class*="more" i]' + ); + if (btn) btn.click(); + })() + `); + // Wait for menu + await page.wait({ time: 3000 }); + + const formats = await page.evaluate(` + JSON.stringify((() => { + const res = { pdf: '', epub: '' }; + document.querySelectorAll('a[href]').forEach(a => { + const h = a.href || ''; + const t = (a.textContent || '').toUpperCase(); + if (h.includes('/dl/') && t.includes('PDF')) res.pdf = h; + if (h.includes('/dl/') && t.includes('EPUB')) res.epub = h; + }); + return res; + })()) + `); + return JSON.parse(formats); + } catch { + return { pdf: '', epub: '' }; + } +} + +/** + * Extract book cards from search results page. + * + * Z-Library renders search results as custom elements. + * Each card contains the book title, author, and a link to the book page. + * The link is inside a shadow DOM that can be queried with card.shadowRoot. + * + * This approach was validated on 2026-04-28 against z-library.im. + */ +export async function extractSearchResults(page, limit) { + const raw = await page.evaluate(` + JSON.stringify( + Array.from(document.querySelectorAll('z-bookcard')) + .slice(0, ${limit}) + .map((card, index) => { + const text = card.textContent.trim(); + const lines = text.split('\\n').map(l => l.trim()).filter(Boolean); + const title = lines[0] || ''; + const author = lines[1] || ''; + let url = ''; + try { + if (card.shadowRoot) { + const link = card.shadowRoot.querySelector('a'); + if (link) url = link.href || ''; + } + } catch(e) {} + return { rank: index + 1, title, author, url }; + }) + .filter(item => item.url && item.title) + ) + `); + + try { + return JSON.parse(raw); + } catch { + return []; + } +} + +export { ZLIBRARY_DOMAIN, ZLIBRARY_ORIGIN }; From 40f1ae47f184bf7eb08025bf1303b4e055139452 Mon Sep 17 00:00:00 2001 From: jean Date: Sun, 3 May 2026 14:20:28 +0800 Subject: [PATCH 2/2] feat(weibo): add statuses command to fetch user timeline Add `opencli weibo statuses ` command to fetch a user's Weibo statuses/timeline via /ajax/statuses/mymblog API. Supports: - uid (numeric) or screen_name as positional arg - --limit (max 50, default 15) - --page for pagination - Returns: id, mblogid, text, isLongText, created_at, reposts, comments, likes, pic_num, url - Includes retweeted content and page_info when available Closes gap where opencli weibo had user/profile and single post fetch, but no way to list a specific user's statuses. --- cli-manifest.json | 47 +++++++++++++++++++++ clis/weibo/statuses.js | 95 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+) create mode 100644 clis/weibo/statuses.js diff --git a/cli-manifest.json b/cli-manifest.json index d2156b5c..777f3c66 100644 --- a/cli-manifest.json +++ b/cli-manifest.json @@ -17428,6 +17428,53 @@ "sourceFile": "weibo/search.js", "navigateBefore": "https://weibo.com" }, + { + "site": "weibo", + "name": "statuses", + "description": "Fetch a user's Weibo statuses/timeline", + "domain": "weibo.com", + "strategy": "cookie", + "browser": true, + "args": [ + { + "name": "id", + "type": "str", + "required": true, + "positional": true, + "help": "User ID (numeric uid) or screen name" + }, + { + "name": "limit", + "type": "int", + "default": 15, + "required": false, + "help": "Number of posts (max 50)" + }, + { + "name": "page", + "type": "int", + "default": 1, + "required": false, + "help": "Page number" + } + ], + "columns": [ + "id", + "mblogid", + "text", + "isLongText", + "created_at", + "reposts", + "comments", + "likes", + "pic_num", + "url" + ], + "type": "js", + "modulePath": "weibo/statuses.js", + "sourceFile": "weibo/statuses.js", + "navigateBefore": "https://weibo.com" + }, { "site": "weibo", "name": "user", diff --git a/clis/weibo/statuses.js b/clis/weibo/statuses.js new file mode 100644 index 00000000..3788fa91 --- /dev/null +++ b/clis/weibo/statuses.js @@ -0,0 +1,95 @@ +/** + * Weibo statuses — fetch a user's public timeline / blog posts. + */ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { CommandExecutionError } from '@jackwener/opencli/errors'; + +cli({ + site: 'weibo', + name: 'statuses', + description: "Fetch a user's Weibo statuses/timeline", + domain: 'weibo.com', + strategy: Strategy.COOKIE, + args: [ + { name: 'id', required: true, positional: true, help: 'User ID (numeric uid) or screen name' }, + { name: 'limit', type: 'int', default: 15, help: 'Number of posts (max 50)' }, + { name: 'page', type: 'int', default: 1, help: 'Page number' }, + ], + columns: ['id', 'mblogid', 'text', 'isLongText', 'created_at', 'reposts', 'comments', 'likes', 'pic_num', 'url'], + func: async (page, kwargs) => { + const count = Math.min(kwargs.limit || 15, 50); + const pageNum = kwargs.page || 1; + const id = String(kwargs.id); + + await page.goto('https://weibo.com'); + await page.wait(2); + + // Resolve uid if screen name was provided + const isUid = /^\d+$/.test(id); + let uid = id; + if (!isUid) { + const profileResp = await page.evaluate(` + (async () => { + const resp = await fetch('/ajax/profile/info?screen_name=' + encodeURIComponent(${JSON.stringify(id)}), { credentials: 'include' }); + if (!resp.ok) return { error: 'HTTP ' + resp.status }; + const data = await resp.json(); + if (!data.ok || !data.data?.user) return { error: 'User not found' }; + return { uid: data.data.user.id }; + })() + `); + if (profileResp.error) { + throw new CommandExecutionError(String(profileResp.error)); + } + uid = String(profileResp.uid); + } + + const data = await page.evaluate(` + (async () => { + const uid = ${JSON.stringify(uid)}; + const count = ${count}; + const page = ${pageNum}; + + const strip = (html) => (html || '').replace(/<[^>]+>/g, '').replace(/ /g, ' ').replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&').trim(); + + const resp = await fetch('/ajax/statuses/mymblog?uid=' + uid + '&page=' + page + '&feature=0', { credentials: 'include' }); + if (!resp.ok) return { error: 'HTTP ' + resp.status }; + const data = await resp.json(); + if (!data.ok) return { error: 'API error: ' + (data.msg || 'unknown') }; + + return (data.data?.list || []).slice(0, count).map(s => { + const u = s.user || {}; + const item = { + id: s.idstr || '', + mblogid: s.mblogid || '', + text: (s.text_raw || strip(s.text || '')).substring(0, 500), + isLongText: s.isLongText || false, + created_at: s.created_at || '', + reposts: s.reposts_count || 0, + comments: s.comments_count || 0, + likes: s.attitudes_count || 0, + pic_num: s.pic_num || 0, + url: 'https://weibo.com/' + (u.id || '') + '/' + (s.mblogid || ''), + }; + if (s.retweeted_status) { + const rt = s.retweeted_status; + item.retweeted = (rt.user?.screen_name || '[deleted]') + ': ' + (rt.text_raw || strip(rt.text || '')).substring(0, 200); + } + if (s.page_info) { + item.page_title = s.page_info.title || ''; + item.page_type = s.page_info.type || ''; + item.page_url = s.page_info.page_url || ''; + } + return item; + }); + })() + `); + + if (!Array.isArray(data)) { + if (data && data.error) { + throw new CommandExecutionError(String(data.error)); + } + return []; + } + return data; + }, +});