From 57d6e30d375be82f1e553e3b04be02ac50e981b0 Mon Sep 17 00:00:00 2001 From: travertexg Date: Fri, 13 Jun 2025 13:58:05 +0000 Subject: [PATCH 1/9] feat: Refactor web search settings Group web search related settings under a single webSearchSettings object for better organization. Additionally, it introduces a new setting urlFetchBackend to control whether Jina or a local parser is used for fetching URL content. --- src/components/chat-view/ChatView.tsx | 6 +-- .../Markdown/MarkdownSearchWebBlock.tsx | 4 +- src/lang/locale/en.ts | 6 ++- src/lang/locale/zh-cn.ts | 6 ++- src/settings/SettingTab.tsx | 43 ++++++++++++++++--- src/types/settings.test.ts | 21 ++++++--- src/types/settings.ts | 19 ++++++-- src/utils/web-search.ts | 36 ++++++++++------ 8 files changed, 103 insertions(+), 38 deletions(-) diff --git a/src/components/chat-view/ChatView.tsx b/src/components/chat-view/ChatView.tsx index 9db67ad..cc29e0d 100644 --- a/src/components/chat-view/ChatView.tsx +++ b/src/components/chat-view/ChatView.tsx @@ -740,9 +740,7 @@ const Chat = forwardRef((props, ref) => { } else if (toolArgs.type === 'search_web') { const results = await webSearch( toolArgs.query, - settings.serperApiKey, - settings.serperSearchEngine, - settings.jinaApiKey, + settings.webSearchSettings, (await getRAGEngine()) ) const formattedContent = `[search_web for '${toolArgs.query}'] Result:\n${results}\n`; @@ -760,7 +758,7 @@ const Chat = forwardRef((props, ref) => { } } } else if (toolArgs.type === 'fetch_urls_content') { - const results = await fetchUrlsContent(toolArgs.urls, settings.jinaApiKey) + const results = await fetchUrlsContent(toolArgs.urls, settings.webSearchSettings) const formattedContent = `[ fetch_urls_content ] Result:\n${results}\n`; return { type: 'fetch_urls_content', diff --git a/src/components/chat-view/Markdown/MarkdownSearchWebBlock.tsx b/src/components/chat-view/Markdown/MarkdownSearchWebBlock.tsx index 9ded9f5..c3abb3c 100644 --- a/src/components/chat-view/Markdown/MarkdownSearchWebBlock.tsx +++ b/src/components/chat-view/Markdown/MarkdownSearchWebBlock.tsx @@ -20,9 +20,9 @@ export default function MarkdownWebSearchBlock({ const { settings } = useSettings() const handleClick = () => { - if (settings.serperSearchEngine === 'google') { + if (settings.webSearchSettings.serperSearchEngine === 'google') { window.open(`https://www.google.com/search?q=${query}`, '_blank') - } else if (settings.serperSearchEngine === 'bing') { + } else if (settings.webSearchSettings.serperSearchEngine === 'bing') { window.open(`https://www.bing.com/search?q=${query}`, '_blank') } else { window.open(`https://duckduckgo.com/?q=${query}`, '_blank') diff --git a/src/lang/locale/en.ts b/src/lang/locale/en.ts index 53f7d96..a0081ce 100644 --- a/src/lang/locale/en.ts +++ b/src/lang/locale/en.ts @@ -376,8 +376,12 @@ export default { google: 'Google', duckDuckGo: 'DuckDuckGo', bing: 'Bing', + urlFetchBackend: 'URL content fetch backend', + urlFetchBackendDescription: 'Choose the backend for URL content fetching.', + jina: 'Jina', + local: 'Local', jinaApiKey: 'Jina API key (Optional)', - jinaApiKeyDescription: 'API key for parsing web pages into markdown format. If not provided, local parsing will be used. Get your key from', + jinaApiKeyDescription: 'API key for higher rate limits. Get your key from', }, // RAG Section diff --git a/src/lang/locale/zh-cn.ts b/src/lang/locale/zh-cn.ts index db55eab..03adeb5 100644 --- a/src/lang/locale/zh-cn.ts +++ b/src/lang/locale/zh-cn.ts @@ -377,8 +377,12 @@ export default { google: 'Google', duckDuckGo: 'DuckDuckGo', bing: 'Bing', + urlFetchBackend: 'URL 检索后端', + urlFetchBackendDescription: '选择 URL 检索的后端。', + jina: 'Jina', + local: '本地', jinaApiKey: 'Jina API 密钥(可选)', - jinaApiKeyDescription: '用于将网页解析为 Markdown 格式的 API 密钥。如果未提供,将使用本地解析。请从此处获取您的密钥', + jinaApiKeyDescription: '使用 API 密钥以获得更高速率限制。请从此处获取您的密钥', }, // RAG 部分 diff --git a/src/settings/SettingTab.tsx b/src/settings/SettingTab.tsx index b32ece2..07e76e0 100644 --- a/src/settings/SettingTab.tsx +++ b/src/settings/SettingTab.tsx @@ -267,11 +267,14 @@ export class InfioSettingTab extends PluginSettingTab { .setClass('setting-item-heading-smaller') .addText((text) => { const t = text - .setValue(this.plugin.settings.serperApiKey) + .setValue(this.plugin.settings.webSearchSettings.serperApiKey) .onChange(async (value) => { await this.plugin.setSettings({ ...this.plugin.settings, - serperApiKey: value, + webSearchSettings: { + ...this.plugin.settings.webSearchSettings, + serperApiKey: value, + }, }) }); if (t.inputEl) { @@ -288,12 +291,35 @@ export class InfioSettingTab extends PluginSettingTab { .addOption('google', t('settings.WebSearch.google')) .addOption('duckduckgo', t('settings.WebSearch.duckDuckGo')) .addOption('bing', t('settings.WebSearch.bing')) - .setValue(this.plugin.settings.serperSearchEngine) + .setValue(this.plugin.settings.webSearchSettings.serperSearchEngine) + .onChange(async (value) => { + await this.plugin.setSettings({ + ...this.plugin.settings, + webSearchSettings: { + ...this.plugin.settings.webSearchSettings, + // @ts-ignore + serperSearchEngine: value, + }, + }) + }), + ) + + new Setting(containerEl) + .setName(t('settings.WebSearch.urlFetchBackend')) + .setDesc(t('settings.WebSearch.urlFetchBackendDescription')) + .addDropdown((dropdown) => + dropdown + .addOption('jina', t('settings.WebSearch.jina')) + .addOption('local', t('settings.WebSearch.local')) + .setValue(this.plugin.settings.webSearchSettings.urlFetchBackend) .onChange(async (value) => { await this.plugin.setSettings({ ...this.plugin.settings, - // @ts-ignore - serperSearchEngine: value, + webSearchSettings: { + ...this.plugin.settings.webSearchSettings, + // @ts-ignore + urlFetchBackend: value, + }, }) }), ) @@ -312,11 +338,14 @@ export class InfioSettingTab extends PluginSettingTab { .setClass('setting-item-heading-smaller') .addText((text) => { const t = text - .setValue(this.plugin.settings.jinaApiKey) + .setValue(this.plugin.settings.webSearchSettings.jinaApiKey) .onChange(async (value) => { await this.plugin.setSettings({ ...this.plugin.settings, - jinaApiKey: value, + webSearchSettings: { + ...this.plugin.settings.webSearchSettings, + jinaApiKey: value, + }, }) }); if (t.inputEl) { diff --git a/src/types/settings.test.ts b/src/types/settings.test.ts index 28afc1a..3bb27b6 100644 --- a/src/types/settings.test.ts +++ b/src/types/settings.test.ts @@ -140,15 +140,19 @@ describe('parseSmartCopilotSettings', () => { userMessageTemplate: '{{prefix}}{{suffix}}', chainOfThoughRemovalRegex: '(.|\\n)*ANSWER:', dontIncludeDataviews: true, - jinaApiKey: '', maxPrefixCharLimit: 4000, maxSuffixCharLimit: 4000, mode: 'ask', defaultMention: 'none', removeDuplicateMathBlockIndicator: true, removeDuplicateCodeBlockIndicator: true, - serperApiKey: '', - serperSearchEngine: 'google', + webSearchSettings: { + webSearchBackend: 'serper', + serperApiKey: '', + serperSearchEngine: 'google', + urlFetchBackend: 'jina', + jinaApiKey: '', + }, ignoredFilePatterns: '**/secret/**\n', ignoredTags: '', cacheSuggestions: true, @@ -382,15 +386,20 @@ describe('settings migration', () => { userMessageTemplate: '{{prefix}}{{suffix}}', chainOfThoughRemovalRegex: '(.|\\n)*ANSWER:', dontIncludeDataviews: true, - jinaApiKey: '', + maxPrefixCharLimit: 4000, maxSuffixCharLimit: 4000, mode: 'ask', defaultMention: 'none', removeDuplicateMathBlockIndicator: true, removeDuplicateCodeBlockIndicator: true, - serperApiKey: '', - serperSearchEngine: 'google', + webSearchSettings: { + webSearchBackend: 'serper', + serperApiKey: '', + serperSearchEngine: 'google', + urlFetchBackend: 'jina', + jinaApiKey: '', + }, ignoredFilePatterns: '**/secret/**\n', ignoredTags: '', cacheSuggestions: true, diff --git a/src/types/settings.ts b/src/types/settings.ts index e9f19fe..c7a0ce2 100644 --- a/src/types/settings.ts +++ b/src/types/settings.ts @@ -242,6 +242,20 @@ export const triggerSchema = z.object({ } }); +const WebSearchSettingsSchema = z.object({ + webSearchBackend: z.enum(['local', 'serper']).catch('serper'), + serperApiKey: z.string().catch(''), + serperSearchEngine: z.enum(['google', 'duckduckgo', 'bing']).catch('google'), + urlFetchBackend: z.enum(['local', 'jina']).catch('jina'), + jinaApiKey: z.string().catch(''), +}).catch({ + webSearchBackend: 'serper', + serperApiKey: '', + serperSearchEngine: 'google', + urlFetchBackend: 'jina', + jinaApiKey: '', +}); + const FilesSearchSettingsSchema = z.object({ method: z.enum(['match', 'regex', 'semantic', 'auto']).catch('auto'), regexBackend: z.enum(['coreplugin', 'ripgrep']).catch('coreplugin'), @@ -336,9 +350,7 @@ export const InfioSettingsSchema = z.object({ defaultMention: z.enum(['none', 'current-file', 'vault']).catch('none'), // web search - serperApiKey: z.string().catch(''), - serperSearchEngine: z.enum(['google', 'duckduckgo', 'bing']).catch('google'), - jinaApiKey: z.string().catch(''), + webSearchSettings: WebSearchSettingsSchema, // Files Search filesSearchSettings: FilesSearchSettingsSchema, @@ -445,6 +457,7 @@ export const InfioSettingsSchema = z.object({ }) export type InfioSettings = z.infer +export type WebSearchSettings = z.infer export type FilesSearchSettings = z.infer type Migration = { diff --git a/src/utils/web-search.ts b/src/utils/web-search.ts index ac65ffc..0146e05 100644 --- a/src/utils/web-search.ts +++ b/src/utils/web-search.ts @@ -4,6 +4,7 @@ import { htmlToMarkdown, requestUrl } from 'obsidian'; import { JINA_BASE_URL, SERPER_BASE_URL } from '../constants'; import { RAGEngine } from '../core/rag/rag-engine'; +import { WebSearchSettings } from '../types/settings'; import { isVideoUrl, getVideoProvider } from './video-detector'; import { YoutubeTranscript, isYoutubeUrl } from './youtube-transcript'; @@ -96,9 +97,11 @@ function cosineSimilarity(vecA: number[], vecB: number[]): number { return dotProduct / (magnitudeA * magnitudeB); } -async function serperSearch(query: string, serperApiKey: string, serperSearchEngine: string): Promise { +async function serperSearch(query: string, searchSettings: WebSearchSettings): Promise { return new Promise((resolve, reject) => { - const url = `${SERPER_BASE_URL}?q=${encodeURIComponent(query)}&engine=${serperSearchEngine}&api_key=${serperApiKey}&num=20`; + const apiKey = searchSettings.serperApiKey; + const searchEngine = searchSettings.serperSearchEngine; + const url = `${SERPER_BASE_URL}?q=${encodeURIComponent(query)}&engine=${searchEngine}&api_key=${apiKey}&num=20`; https.get(url, (res: any) => { let data = ''; @@ -139,6 +142,10 @@ async function serperSearch(query: string, serperApiKey: string, serperSearchEng }); } +async function search(query: string, searchSettings: WebSearchSettings): Promise { + return serperSearch(query, searchSettings); +} + async function filterByEmbedding(query: string, results: SearchResult[], ragEngine: RAGEngine): Promise { // 如果没有结果,直接返回空数组 @@ -211,8 +218,9 @@ async function fetchByJina(url: string, apiKey: string): Promise { return new Promise((resolve) => { const jinaUrl = `${JINA_BASE_URL}/${url}`; + const validJinaKey = apiKey && apiKey !== ''; const jinaHeaders = { - 'Authorization': `Bearer ${apiKey}`, + 'Authorization': validJinaKey && `Bearer ${apiKey}`, 'X-No-Cache': 'true', }; @@ -254,17 +262,18 @@ async function fetchByJina(url: string, apiKey: string): Promise { }); } -export async function fetchUrlContent(url: string, apiKey: string): Promise { +export async function fetchUrlContent(url: string, searchSettings: WebSearchSettings): Promise { try { // 如果是视频内容,直接使用本地工具处理 if (isVideoUrl(url)) { return await fetchByLocalTool(url); } let content: string | null = null; - const validJinaKey = apiKey && apiKey !== ''; - if (validJinaKey) { + + const fetchBackend = searchSettings.urlFetchBackend; + if (fetchBackend === 'jina') { try { - content = await fetchByJina(url, apiKey); + content = await fetchByJina(url, searchSettings.jinaApiKey); } catch (error) { console.error(`Failed to fetch URL by jina: ${url}`, error); content = await fetchByLocalTool(url); @@ -272,6 +281,7 @@ export async function fetchUrlContent(url: string, apiKey: string): Promise { try { - const results = await serperSearch(query, serperApiKey, serperSearchEngine); + const results = await search(query, searchSettings); const filteredResults = await filterByEmbedding(query, results, ragEngine); const filteredResultsWithContent = await Promise.all(filteredResults.map(async (result) => { - let content = await fetchUrlContent(result.link, jinaApiKey); + let content = await fetchUrlContent(result.link, searchSettings); if (content.length === 0) { content = result.snippet; } @@ -303,11 +311,11 @@ export async function webSearch( } } -export async function fetchUrlsContent(urls: string[], apiKey: string): Promise { +export async function fetchUrlsContent(urls: string[], searchSettings: WebSearchSettings): Promise { return new Promise((resolve) => { const results = urls.map(async (url) => { try { - const content = await fetchUrlContent(url, apiKey); + const content = await fetchUrlContent(url, searchSettings); return `\n${content}\n`; } catch (error) { console.error(`Failed to fetch URL content: ${url}`, error); From 3ea4d01260d4b90970503df4d67006b2fff144a2 Mon Sep 17 00:00:00 2001 From: travertexg Date: Fri, 13 Jun 2025 10:04:03 +0000 Subject: [PATCH 2/9] refactor: Improve search result processing and display - Reworked coreplugin-match.ts to more reliably extract multi-line match contexts from Obsidian's core search plugin. - Simplified omnisearch-match.ts to better utilize the excerpts provided by the Omnisearch API, leading to more accurate context display. - Updated ripgrep-regex.ts to align with the new search result structure. - Improved line and context handling in search-common.ts. This includes: - A better truncateLine function that preserves the context around the match. - Performance improvements by using binary search (findLineIndexBS) for line lookups. - Updated SearchResult interface to better support multi-line matches. --- .../file-search/match/coreplugin-match.ts | 227 ++++++++++++------ .../file-search/match/omnisearch-match.ts | 65 +++-- src/core/file-search/regex/ripgrep-regex.ts | 92 +++---- src/core/file-search/search-common.ts | 131 +++++++--- 4 files changed, 322 insertions(+), 193 deletions(-) diff --git a/src/core/file-search/match/coreplugin-match.ts b/src/core/file-search/match/coreplugin-match.ts index a82f73b..dfc3290 100644 --- a/src/core/file-search/match/coreplugin-match.ts +++ b/src/core/file-search/match/coreplugin-match.ts @@ -1,12 +1,47 @@ -import { App, TFile } from "obsidian"; +import { App, TFile, View } from "obsidian"; import { - MAX_RESULTS, - truncateLine, - findLineDetails, - SearchResult, - formatResults, + MAX_RESULTS, + truncateLine, + buildLineIndexs, + lineIndex, + findLineIndexBS, + SearchResult, + formatResults, } from '../search-common'; +// A tuple representing the [start, end] character offsets of a match. +type MatchOffsetTuple = [number, number]; + +interface FileSearchResult { + app: App + children: any[] + childrenEl: HTMLElement + collapseEl: HTMLElement + collapsed: boolean + collapsible: boolean + containerEl: HTMLElement + content: string + dom: any + el: HTMLElement + extraContext: () => boolean + file: TFile + info: any + onMatchRender: any + pusherEl: HTMLElement + result: { + filename?: MatchOffsetTuple[] + content?: MatchOffsetTuple[] + } +} + +interface SearchDOM { + resultDomLookup: Map; +} + +interface SearchView extends View { + dom: SearchDOM; +} + /** * Searches using Obsidian's core search plugin and builds context for each match. * @@ -15,79 +50,115 @@ import { * @returns A promise that resolves to a formatted string of search results. */ export async function matchSearchUsingCorePlugin( - query: string, - app: App, + query: string, + app: App, ): Promise { - try { - const searchPlugin = (app as any).internalPlugins.plugins['global-search']?.instance; - if (!searchPlugin) { - throw new Error("Core search plugin is not available."); - } - - // This function opens the search pane and executes the search. - // It does not return the results directly. - searchPlugin.openGlobalSearch(query); - - const searchLeaf = app.workspace.getLeavesOfType('search')[0]; - if (!searchLeaf) { - throw new Error("No active search pane found after triggering search."); - } - - // Ensure the view is fully loaded before we try to access its properties. - const view = await searchLeaf.open(searchLeaf.view); - const searchResultsMap = await new Promise>(resolve => { - setTimeout(() => { - // @ts-ignore - const results = (view as any).dom?.resultDomLookup; - resolve(results || new Map()); - }, 10000) - }); - - if (!searchResultsMap || searchResultsMap.size === 0) { - console.error("No results found or search results map is not available."); - return "No results found." - } - - const results: SearchResult[] = []; - const vault = app.vault; - - for (const [file, fileMatches] of searchResultsMap.entries()) { - if (results.length >= MAX_RESULTS) { - break; - } - - let content = await vault.cachedRead(file as TFile); - // 清理null字节,防止PostgreSQL UTF8编码错误 - content = content.replace(/\0/g, ''); - const lines = content.split('\n'); - - // `fileMatches.result.content` holds an array of matches for the file. - // Each match is an array: [matched_text, start_offset] - for (const match of fileMatches.result.content) { - if (results.length >= MAX_RESULTS) break; - - const startOffset = match[1]; - const { lineNumber, columnNumber, lineContent } = findLineDetails(lines, startOffset); - - if (lineNumber === -1) continue; - - results.push({ - file: file.path, - line: lineNumber + 1, // ripgrep is 1-based, so we adjust - column: columnNumber + 1, - match: truncateLine(lineContent.trimEnd()), - beforeContext: lineNumber > 0 ? [truncateLine(lines[lineNumber - 1].trimEnd())] : [], - afterContext: - lineNumber < lines.length - 1 - ? [truncateLine(lines[lineNumber + 1].trimEnd())] - : [], - }); - } - } - - return formatResults(results, ".\\"); - } catch (error) { + try { + // @ts-ignore + const searchPlugin = app.internalPlugins.plugins['global-search']?.instance; + if (!searchPlugin) { + throw new Error("Core search plugin is not available."); + } + + // This function opens the search pane and executes the search. + // It does not return the results directly. + searchPlugin.openGlobalSearch(query); + + const getSearchResults = (): Map | null => { + const searchLeaf = app.workspace.getLeavesOfType('search')[0]; + if (!searchLeaf) { + return null; + } + + const searchView = searchLeaf.view as SearchView; + if (searchView.dom?.resultDomLookup && searchView.dom.resultDomLookup.size > 0) { + return searchView.dom.resultDomLookup; + } + return null; + }; + + const searchResultsMap = await new Promise>(resolve => { + setTimeout(() => { + const results = getSearchResults(); + resolve(results || new Map()); + }, 10000) + }); + + if (!searchResultsMap) { + const searchLeaf = app.workspace.getLeavesOfType('search')[0]; + if (searchLeaf) { + // @ts-ignore + const searchInput = searchLeaf.view.searchQuery?.inputEl?.value; + if (searchInput === query) { + return "No results found."; + } + } + throw new Error("Could not retrieve search results within the time limit."); + } + + const results: SearchResult[] = []; + for (const [file, fileMatches] of searchResultsMap.entries()) { + if (results.length >= MAX_RESULTS) { + break; + } + + if ( + !file || !(file instanceof TFile) || + !fileMatches.content || fileMatches.content.length === 0 + ) { + continue; + } + const lines = fileMatches.content.split('\n'); + const indexs = buildLineIndexs(lines); + + for (const [startOffset, endOffset] of fileMatches.result.content) { + if (results.length >= MAX_RESULTS) { + break; + } + + const lineIndexs: [lineIndex, lineIndex] = [ + findLineIndexBS(indexs, startOffset), + findLineIndexBS(indexs, endOffset), + ]; + if ( + lineIndexs[0].line === -1 || lineIndexs[1].line === -1 || + lineIndexs[1].line < lineIndexs[0].line + ) { + continue; + } + + const match = lines.slice(lineIndexs[0].line, lineIndexs[1].line + 1).join('\n').trimEnd(); + const columnStart = lineIndexs[0].column; + const columnEnd = lineIndexs[1].column + (indexs[lineIndexs[1].line] - indexs[lineIndexs[0].line]); + + const finalLines = + truncateLine(match, columnStart, Math.min(columnEnd, match.length - 1)).split('\n'); + finalLines.forEach((line, index) => { + finalLines.splice(index, 1, line.trimEnd()); + }); + + results.push({ + file: file.path, + match: finalLines, + precedingContext: + lineIndexs[0].line > 0 + ? [truncateLine(lines[lineIndexs[0].line - 1].trimEnd(), 0)] + : [], + succeedingContext: + lineIndexs[1].line < lines.length - 1 + ? [truncateLine(lines[lineIndexs[1].line + 1].trimEnd(), 0)] + : [], + }); + } + } + + if (results.length === 0) { + return "No results found."; + } + + return formatResults(results); + } catch (error) { console.error("Error during core plugin processing:", error); - return "An error occurred during the search."; + return `An error occurred during the search: ${error}`; } } diff --git a/src/core/file-search/match/omnisearch-match.ts b/src/core/file-search/match/omnisearch-match.ts index 7865600..74a1447 100644 --- a/src/core/file-search/match/omnisearch-match.ts +++ b/src/core/file-search/match/omnisearch-match.ts @@ -1,8 +1,9 @@ -import { App } from "obsidian"; +import { App, TFile } from "obsidian"; import { MAX_RESULTS, - truncateLine, - findLineDetails, + //truncateLine, + //buildLineIndexs, + //findLineIndexBS, SearchResult, formatResults, } from '../search-common'; @@ -24,7 +25,6 @@ type ResultNoteApi = { type OmnisearchApi = { search: (query: string) => Promise; - // ... other API methods }; declare global { @@ -61,52 +61,41 @@ export async function matchSearchUsingOmnisearch( // Omnisearch is not a regex engine. // The `query` will be treated as a keyword/fuzzy search by the plugin. const apiResults = await window.omnisearch.search(query); - if (!apiResults || apiResults.length === 0) { - console.error("No results found."); - return "No results found." + if (!apiResults) { + throw new Error("Search results are not available."); + } + if (apiResults.length === 0) { + return "No results found."; } const results: SearchResult[] = []; - for (const result of apiResults) { + for (const noteResult of apiResults) { if (results.length >= MAX_RESULTS) { - break; // Stop processing new files if we have enough results + break; + } + if (!noteResult.matches || noteResult.matches.length === 0) { + continue; } - if (!result.matches || result.matches.length === 0) continue; - - const fileContent = await app.vault.adapter.read(result.path); - const lines = fileContent.split("\n"); - - for (const match of result.matches) { - if (results.length >= MAX_RESULTS) { - break; // Stop processing matches if we have enough results - } - const { lineNumber, columnNumber, lineContent } = findLineDetails( - lines, - match.offset - ); + const lines = noteResult.excerpt.split('\n'); + lines.forEach((line, index) => { + lines.splice(index, 1, line.trimEnd()); + }); - if (lineNumber === -1) continue; + results.push({ + file: noteResult.path, + match: lines, + }); + } - const searchResult: SearchResult = { - file: result.path, - line: lineNumber + 1, // ripgrep is 1-based, so we adjust - column: columnNumber + 1, - match: truncateLine(lineContent.trimEnd()), - beforeContext: lineNumber > 0 ? [truncateLine(lines[lineNumber - 1].trimEnd())] : [], - afterContext: - lineNumber < lines.length - 1 - ? [truncateLine(lines[lineNumber + 1].trimEnd())] - : [], - }; - results.push(searchResult); - } + if (results.length === 0) { + return "No results found."; } - return formatResults(results, ".\\"); + return formatResults(results); } catch (error) { console.error("Error during Omnisearch processing:", error); - return "An error occurred during the search."; + return `An error occurred during the search: ${error}`; } } \ No newline at end of file diff --git a/src/core/file-search/regex/ripgrep-regex.ts b/src/core/file-search/regex/ripgrep-regex.ts index 0974164..d70c5d3 100644 --- a/src/core/file-search/regex/ripgrep-regex.ts +++ b/src/core/file-search/regex/ripgrep-regex.ts @@ -55,13 +55,13 @@ async function execRipgrep(bin: string, args: string[]): Promise { }) rl.on("close", () => { if (errorOutput) { - reject(new Error(`ripgrep process error: ${errorOutput}`)) + reject(new Error(`(ripgrep process error) ${errorOutput}`)) } else { resolve(output) } }) rgProcess.on("error", (error) => { - reject(new Error(`ripgrep process error: ${error.message}`)) + reject(new Error(`(ripgrep process error) ${error.message}`)) }) }) } @@ -71,38 +71,32 @@ export async function regexSearchUsingRipgrep( regex: string, ripgrepPath: string, ): Promise { - const rgPath = await getBinPath(ripgrepPath) - - if (!rgPath) { - throw new Error("Could not find ripgrep binary") - } - - // use --glob param to exclude .obsidian directory - const args = [ - "--json", - "-e", - regex, - "--glob", - "!.obsidian/**", // exclude .obsidian directory and all its subdirectories - "--glob", - "!.git/**", - "--context", - "1", - directoryPath - ] - - let output: string try { - output = await execRipgrep(rgPath, args) - } catch (error) { - console.error("Error executing ripgrep:", error) - return "No results found." - } - const results: SearchResult[] = [] - let currentResult: Partial | null = null + const rgPath = await getBinPath(ripgrepPath); + if (!rgPath) { + throw new Error("Could not find ripgrep binary"); + } - output.split("\n").forEach((line) => { - if (line) { + // use --glob param to exclude .obsidian directory + const args = [ + "--json", + "-e", + regex, + "--glob", + "!.obsidian/**", // exclude .obsidian directory and all its subdirectories + "--glob", + "!.git/**", + "--context", + "1", + directoryPath + ] + + let output = await execRipgrep(rgPath, args); + + const results: SearchResult[] = [] + let currentResult: Partial | null = null + + output.split("\n").forEach((line) => { if (line) { try { const parsed = JSON.parse(line) if (parsed.type === "match") { @@ -111,37 +105,43 @@ export async function regexSearchUsingRipgrep( } // Safety check: truncate extremely long lines to prevent excessive output - const matchText = parsed.data.lines.text - const truncatedMatch = truncateLine(matchText) + const matchText = parsed.data.lines.text; + const truncatedMatch = truncateLine(matchText, 0); currentResult = { file: parsed.data.path.text, + match: [truncatedMatch], line: parsed.data.line_number, column: parsed.data.submatches[0].start, - match: truncatedMatch, - beforeContext: [], - afterContext: [], } } else if (parsed.type === "context" && currentResult) { // Apply the same truncation logic to context lines const contextText = parsed.data.lines.text - const truncatedContext = truncateLine(contextText) if (parsed.data.line_number < currentResult.line!) { - currentResult.beforeContext!.push(truncatedContext) + const truncatedContext = truncateLine(contextText, 0) + currentResult.precedingContext!.push(truncatedContext) } else { - currentResult.afterContext!.push(truncatedContext) + const truncatedContext = truncateLine(contextText, 0) + currentResult.succeedingContext!.push(truncatedContext) } } } catch (error) { - console.error("Error parsing ripgrep output:", error) + throw new Error("Error parsing ripgrep output:", error); } + }}) + + if (currentResult) { + results.push(currentResult as SearchResult) } - }) - if (currentResult) { - results.push(currentResult as SearchResult) + if (results.length === 0) { + return "No results found."; + } + + return formatResults(results, directoryPath); + } catch (error) { + console.error("Error during ripgrep processing:", error); + return `An error occurred during the search: ${error}`; } - - return formatResults(results, directoryPath) } diff --git a/src/core/file-search/search-common.ts b/src/core/file-search/search-common.ts index e16c1b8..c6fb5ef 100644 --- a/src/core/file-search/search-common.ts +++ b/src/core/file-search/search-common.ts @@ -5,63 +5,124 @@ export const MAX_RESULTS = 300 export const MAX_LINE_LENGTH = 500 /** - * Truncates a line if it exceeds the maximum length - * @param line The line to truncate - * @param maxLength The maximum allowed length (defaults to MAX_LINE_LENGTH) - * @returns The truncated line, or the original line if it's shorter than maxLength + * Truncates a line around a specific matched area if it exceeds the maximum length. + * This attempts to keep the characters around the startOffset and endOffset visible. + * + * @param line The line to truncate. + * @param startOffset The starting character index of the matched area. + * @param endOffset The ending character index of the matched area (defaults to startOffset). + * @param maxLength The maximum allowed length (defaults to MAX_LINE_LENGTH). + * @returns The truncated line, with truncation indicators if necessary, or the original line if it's shorter than maxLength or maxLength is less than 2. */ -export function truncateLine(line: string, maxLength: number = MAX_LINE_LENGTH): string { - return line.length > maxLength ? line.substring(0, maxLength) + " [truncated...]" : line +export function truncateLine( + line: string, + startOffset: number, + endOffset?: number, + maxLength: number = MAX_LINE_LENGTH +): string { + if (line.length <= maxLength || maxLength < 2) { + return line; + } + + const focusStart = Math.clamp(startOffset, 0, line.length); + const focusEnd = Math.clamp(endOffset ?? focusStart, focusStart, line.length - 1); + const focusCentre = Math.floor((focusEnd - focusStart + 1) / 2); + + let sliceStart = Math.max(0, focusCentre - Math.floor(maxLength / 2)) + sliceStart = Math.min(sliceStart, focusStart, line.length - maxLength); + const sliceEnd = Math.min(sliceStart + maxLength, line.length); + + return ( + (sliceStart > 0 ? "[...truncated] " : "") + + line.substring(sliceStart, sliceEnd) + + (sliceEnd < line.length ? " [truncated...]" : "") + ); } /** - * Finds the line number and content for a given character offset within a file's content. - * @param lines All lines in the file. - * @param offset The character offset of the match. - * @returns An object with line number, column number, and the full line content. + * Builds an array of character offsets representing the start of each line in a given array of lines. + * This is used to quickly determine the line number and column number of a character offset within the text. + * + * @param lines An array of strings, where each string is a line of text. + * @returns An array of numbers, where each number is the starting character offset of the corresponding line. */ -export function findLineDetails( - lines: string[], - offset: number -): { lineNumber: number; columnNumber: number; lineContent: string } { - let charCount = 0; +export function buildLineIndexs(lines: string[]): number[] { + const indexs: number[] = []; + let currentOffset = 0; for (let i = 0; i < lines.length; i++) { const line = lines[i]; - // The line ending length (1 for \n, 2 for \r\n) can vary. - // A simple +1 is a reasonable approximation for this calculation. - const lineEndOffset = charCount + line.length + 1; + indexs.push(currentOffset); + + currentOffset += line.length + 1; + } + return indexs; +} + +export interface lineIndex { + line: number; + column: number; +} - if (offset < lineEndOffset) { - const columnNumber = offset - charCount; - return { lineNumber: i, columnNumber, lineContent: line }; +/** + * Finds the line index for a given character offset using binary search. + * This is significantly more efficient than linear scanning for each match. + * + * @param offset The character offset of the match. + * @param indexs An array where each element is the starting character offset of a line. + * @returns The index of the line containing the offset. + */ +export function findLineIndexBS(indexs: number[], offset: number): lineIndex { + let low = 0; + let high = indexs.length - 1; + let bestGuess = -1; + while (low <= high) { + const mid = Math.floor(low + (high - low) / 2); + const midOffset = indexs[mid]; + + if (midOffset <= offset) { + bestGuess = mid; + low = mid + 1; + } else { + high = mid - 1; } - charCount = lineEndOffset; } - return { lineNumber: -1, columnNumber: -1, lineContent: "" }; + + const column = offset - indexs[bestGuess]; + return { + line: bestGuess, + column, + }; } export interface SearchResult { file: string - line: number + match: string[] + line?: number column?: number - match?: string - beforeContext: string[] - afterContext: string[] + precedingContext?: string[] + succeedingContext?: string[] } -export function formatResults(results: SearchResult[], cwd: string): string { +/** + * Formats an array of search results into a LLM-friendly string output, grouped by file. + * + * @param results - An array of SearchResult objects. + * @param cwd - The current working directory, used to make file paths relative. + * @returns A formatted string representing the search results. + */ +export function formatResults(results: SearchResult[], cwd?: string): string { const groupedResults: { [key: string]: SearchResult[] } = {} let output = "" if (results.length >= MAX_RESULTS) { - output += `Showing first ${MAX_RESULTS} of ${MAX_RESULTS}+ results. Use a more specific search if necessary.\n\n` + output += `Showing first ${MAX_RESULTS.toLocaleString()} of ${results.length.toLocaleString()} results. Use a more specific search if necessary.\n\n` } else { output += `Found ${results.length === 1 ? "1 result" : `${results.length.toLocaleString()} results`}.\n\n` } // Group results by file name results.slice(0, MAX_RESULTS).forEach((result) => { - const relativeFilePath = path.relative(cwd, result.file) + const relativeFilePath = cwd ? path.relative(cwd, result.file) : result.file; if (!groupedResults[relativeFilePath]) { groupedResults[relativeFilePath] = [] } @@ -72,7 +133,15 @@ export function formatResults(results: SearchResult[], cwd: string): string { output += `${filePath.toPosix()}\n│----\n` fileResults.forEach((result, index) => { - const allLines = [...result.beforeContext, result.match, ...result.afterContext] + const allLines: string[] = []; + if (result.precedingContext) { + allLines.push(...result.precedingContext); + } + allLines.push(...result.match); + if (result.succeedingContext) { + allLines.push(...result.succeedingContext); + } + allLines.forEach((line) => { output += `│${line?.trimEnd() ?? ""}\n` }) From f314a5cc04f7da621a0287d5e9b3b3c479010416 Mon Sep 17 00:00:00 2001 From: travertexg Date: Sun, 15 Jun 2025 14:10:56 +0000 Subject: [PATCH 3/9] refactor: Remove unused App instance from Omnisearch file search functions --- src/components/chat-view/ChatView.tsx | 2 +- .../file-search/match/coreplugin-match.ts | 15 ++++++++------ .../file-search/match/omnisearch-match.ts | 20 +++++++------------ 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/src/components/chat-view/ChatView.tsx b/src/components/chat-view/ChatView.tsx index cc29e0d..f0e2ddb 100644 --- a/src/components/chat-view/ChatView.tsx +++ b/src/components/chat-view/ChatView.tsx @@ -660,7 +660,7 @@ const Chat = forwardRef((props, ref) => { const searchBackend = settings.filesSearchSettings.matchBackend let results: string; if (searchBackend === 'omnisearch') { - results = await matchSearchUsingOmnisearch(toolArgs.query, app) + results = await matchSearchUsingOmnisearch(toolArgs.query) } else { results = await matchSearchUsingCorePlugin(toolArgs.query, app) } diff --git a/src/core/file-search/match/coreplugin-match.ts b/src/core/file-search/match/coreplugin-match.ts index dfc3290..f5d2584 100644 --- a/src/core/file-search/match/coreplugin-match.ts +++ b/src/core/file-search/match/coreplugin-match.ts @@ -50,8 +50,8 @@ interface SearchView extends View { * @returns A promise that resolves to a formatted string of search results. */ export async function matchSearchUsingCorePlugin( - query: string, - app: App, + query: string, + app: App, ): Promise { try { // @ts-ignore @@ -134,7 +134,8 @@ export async function matchSearchUsingCorePlugin( const finalLines = truncateLine(match, columnStart, Math.min(columnEnd, match.length - 1)).split('\n'); finalLines.forEach((line, index) => { - finalLines.splice(index, 1, line.trimEnd()); + // Clean up null bytes to prevent PostgreSQL UTF8 encoding errors + finalLines.splice(index, 1, line.replace(/\0/g, '').trimEnd()); }); results.push({ @@ -142,18 +143,20 @@ export async function matchSearchUsingCorePlugin( match: finalLines, precedingContext: lineIndexs[0].line > 0 - ? [truncateLine(lines[lineIndexs[0].line - 1].trimEnd(), 0)] + // Clean up null bytes to prevent PostgreSQL UTF8 encoding errors + ? [truncateLine(lines[lineIndexs[0].line - 1].replace(/\0/g, '').trimEnd(), 0)] : [], succeedingContext: lineIndexs[1].line < lines.length - 1 - ? [truncateLine(lines[lineIndexs[1].line + 1].trimEnd(), 0)] + // Clean up null bytes to prevent PostgreSQL UTF8 encoding errors + ? [truncateLine(lines[lineIndexs[1].line + 1].replace(/\0/g, '').trimEnd(), 0)] : [], }); } } if (results.length === 0) { - return "No results found."; + return "No results found."; } return formatResults(results); diff --git a/src/core/file-search/match/omnisearch-match.ts b/src/core/file-search/match/omnisearch-match.ts index 74a1447..2932ecf 100644 --- a/src/core/file-search/match/omnisearch-match.ts +++ b/src/core/file-search/match/omnisearch-match.ts @@ -1,9 +1,5 @@ -import { App, TFile } from "obsidian"; import { MAX_RESULTS, - //truncateLine, - //buildLineIndexs, - //findLineIndexBS, SearchResult, formatResults, } from '../search-common'; @@ -47,10 +43,7 @@ function isOmnisearchAvailable(): boolean { * @param app The Obsidian App instance. * @returns A formatted string of search results. */ -export async function matchSearchUsingOmnisearch( - query: string, - app: App, -): Promise { +export async function matchSearchUsingOmnisearch(query: string): Promise { try { if (!isOmnisearchAvailable()) { throw new Error( @@ -78,10 +71,11 @@ export async function matchSearchUsingOmnisearch( continue; } - const lines = noteResult.excerpt.split('\n'); - lines.forEach((line, index) => { - lines.splice(index, 1, line.trimEnd()); - }); + const lines = noteResult.excerpt.split('\n'); + lines.forEach((line, index) => { + // Clean up null bytes to prevent PostgreSQL UTF8 encoding errors + lines.splice(index, 1, line.replace(/\0/g, '').trimEnd()); + }); results.push({ file: noteResult.path, @@ -98,4 +92,4 @@ export async function matchSearchUsingOmnisearch( console.error("Error during Omnisearch processing:", error); return `An error occurred during the search: ${error}`; } -} \ No newline at end of file +} From 0dbbe35ab808eba8959e1d3f7f603ddc50444de3 Mon Sep 17 00:00:00 2001 From: travertexg Date: Mon, 16 Jun 2025 10:08:57 +0000 Subject: [PATCH 4/9] feat: Add multiple web search backends This commit introduces support for various web search backends, providing users with more options within the plugin. Key Changes: - Added multiple new search backends - Modified the settings tab to include dropdown menus for selecting the preferred web search backends, along with corresponding API key input fields where necessary. Current Supported Search Backends: - SerpAPI - Scrapingdog - Serper - Jina - DuckDuckGo, - Brave --- package.json | 3 +- pnpm-lock.yaml | 33 +- .../Markdown/MarkdownSearchWebBlock.tsx | 23 +- src/constants.ts | 10 +- src/lang/locale/en.ts | 32 +- src/lang/locale/zh-cn.ts | 32 +- src/settings/SettingTab.tsx | 169 ++++++++- src/types/settings.test.ts | 28 +- src/types/settings.ts | 22 +- src/utils/web-search.ts | 330 +++++++++++++++++- 10 files changed, 605 insertions(+), 77 deletions(-) diff --git a/package.json b/package.json index a97432d..2b2758f 100644 --- a/package.json +++ b/package.json @@ -82,7 +82,8 @@ "delay": "^6.0.0", "diff": "^7.0.0", "diff-match-patch": "^1.0.5", - "drizzle-orm": "^0.35.2", + "drizzle-orm": "^0.35.3", + "duck-duck-scrape": "^2.2.7", "esbuild-plugin-inline-worker": "^0.1.1", "exponential-backoff": "^3.1.1", "fast-deep-equal": "^3.1.3", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 33d1241..f01869f 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -97,8 +97,11 @@ importers: specifier: ^1.0.5 version: 1.0.5 drizzle-orm: - specifier: ^0.35.2 + specifier: ^0.35.3 version: 0.35.3(@electric-sql/pglite@0.2.14)(@libsql/client-wasm@0.15.9)(@types/react@18.3.23)(react@18.3.1) + duck-duck-scrape: + specifier: ^2.2.7 + version: 2.2.7 esbuild-plugin-inline-worker: specifier: ^0.1.1 version: 0.1.1 @@ -3635,6 +3638,9 @@ packages: sqlite3: optional: true + duck-duck-scrape@2.2.7: + resolution: {integrity: sha512-BEcglwnfx5puJl90KQfX+Q2q5vCguqyMpZcSRPBWk8OY55qWwV93+E+7DbIkrGDW4qkqPfUvtOUdi0lXz6lEMQ==} + dunder-proto@1.0.1: resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==} engines: {node: '>= 0.4'} @@ -4281,6 +4287,9 @@ packages: resolution: {integrity: sha512-oWv4T4yJ52iKrufjnyZPkrN0CH3QnrUqdB6In1g5Fe1mia8GmF36gnfNySxoZtxD5+NmYw1EElVXiBk93UeskA==} engines: {node: '>=12'} + html-entities@2.6.0: + resolution: {integrity: sha512-kig+rMn/QOVRvr7c86gQ8lWXq+Hkv6CbAH1hLu+RG338StTpE8Z0b44SDVaqVu7HGKf27frdmUYEs9hTUX/cLQ==} + html-escaper@2.0.2: resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==} @@ -5280,6 +5289,11 @@ packages: natural-compare@1.4.0: resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==} + needle@3.3.1: + resolution: {integrity: sha512-6k0YULvhpw+RoLNiQCRKOl09Rv1dPLr8hHnVjHqdolKwDrdNyk+Hmrthi4lIGPPz3r39dLx0hsF5s40sZ3Us4Q==} + engines: {node: '>= 4.4.x'} + hasBin: true + negotiator@1.0.0: resolution: {integrity: sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==} engines: {node: '>= 0.6'} @@ -6014,6 +6028,9 @@ packages: sanitize-basename@2.0.2: resolution: {integrity: sha512-zaOQiK4PPsUQZ0Nx3KCrT9p0oEx1dTef14qflYq7TdxPRI2RY9faLYuExCiF8LiJdeKVYb5o/KdF0fGEvvEZUA==} + sax@1.4.1: + resolution: {integrity: sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg==} + saxes@6.0.0: resolution: {integrity: sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==} engines: {node: '>=v12.22.7'} @@ -10196,6 +10213,11 @@ snapshots: '@types/react': 18.3.23 react: 18.3.1 + duck-duck-scrape@2.2.7: + dependencies: + html-entities: 2.6.0 + needle: 3.3.1 + dunder-proto@1.0.1: dependencies: call-bind-apply-helpers: 1.0.2 @@ -11159,6 +11181,8 @@ snapshots: dependencies: whatwg-encoding: 2.0.0 + html-entities@2.6.0: {} + html-escaper@2.0.2: {} html-tags@3.3.1: {} @@ -12510,6 +12534,11 @@ snapshots: natural-compare@1.4.0: {} + needle@3.3.1: + dependencies: + iconv-lite: 0.6.3 + sax: 1.4.1 + negotiator@1.0.0: {} neo-async@2.6.2: {} @@ -13329,6 +13358,8 @@ snapshots: sanitize-basename@2.0.2: {} + sax@1.4.1: {} + saxes@6.0.0: dependencies: xmlchars: 2.2.0 diff --git a/src/components/chat-view/Markdown/MarkdownSearchWebBlock.tsx b/src/components/chat-view/Markdown/MarkdownSearchWebBlock.tsx index c3abb3c..419f299 100644 --- a/src/components/chat-view/Markdown/MarkdownSearchWebBlock.tsx +++ b/src/components/chat-view/Markdown/MarkdownSearchWebBlock.tsx @@ -19,14 +19,29 @@ export default function MarkdownWebSearchBlock({ const { settings } = useSettings() + const backend = settings.webSearchSettings.webSearchBackend; const handleClick = () => { - if (settings.webSearchSettings.serperSearchEngine === 'google') { + if ( + (backend === 'serpapi' && settings.webSearchSettings.serpapiSearchEngine === 'google') || + (backend === 'scrapingdog' && settings.webSearchSettings.scrapingdogSearchEngine === 'google') || + (backend === 'serper') + ) { window.open(`https://www.google.com/search?q=${query}`, '_blank') - } else if (settings.webSearchSettings.serperSearchEngine === 'bing') { + } else if ( + (backend === 'serpapi' && settings.webSearchSettings.serpapiSearchEngine === 'bing') || + (backend === 'scrapingdog' && settings.webSearchSettings.scrapingdogSearchEngine === 'bing') + ) { window.open(`https://www.bing.com/search?q=${query}`, '_blank') - } else { + } else if ( + (backend === 'serpapi' && settings.webSearchSettings.serpapiSearchEngine === 'duckduckgo') || + (backend === 'duckduckgo') + ) { window.open(`https://duckduckgo.com/?q=${query}`, '_blank') - } + } else if (backend === 'brave') { + window.open(`https://search.brave.com/search?q=${query}`, '_blank') + } else { + window.open(`https://www.google.com/search?q=${query}`, '_blank') + } } React.useEffect(() => { diff --git a/src/constants.ts b/src/constants.ts index 9d53df3..d09b88e 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -34,8 +34,14 @@ export const GROK_BASE_URL = 'https://api.x.ai/v1' export const SILICONFLOW_BASE_URL = 'https://api.siliconflow.cn/v1' export const ALIBABA_QWEN_BASE_URL = 'https://dashscope.aliyuncs.com/compatible-mode/v1' export const INFIO_BASE_URL = 'https://api.infio.app' -export const JINA_BASE_URL = 'https://r.jina.ai' -export const SERPER_BASE_URL = 'https://serpapi.com/search' +export const SERPAPI_BASE_URL = 'https://serpapi.com/search' +export const JINA_SEARCH_BASE_URL = 'https://s.jina.ai' +export const JINA_FETCH_BASE_URL = 'https://r.jina.ai' +export const SCRAPINGDOG_BASE_URL = 'https://api.scrapingdog.com' +// This is confusing since Serper and SerpAPI are not the same platform +//export const SERPER_BASE_URL = 'https://serpapi.com/search' +export const SERPER_BASE_URL = 'https://google.serper.dev/search' +export const BRAVE_BASE_URL = 'https://api.search.brave.com/res/v1/web/search' // Pricing in dollars per million tokens type ModelPricing = { input: number diff --git a/src/lang/locale/en.ts b/src/lang/locale/en.ts index a0081ce..7a54f65 100644 --- a/src/lang/locale/en.ts +++ b/src/lang/locale/en.ts @@ -369,19 +369,33 @@ export default { // Deep Research Section WebSearch: { title: 'Web search', - serperApiKey: 'Serper API key', - serperApiKeyDescription: 'API key for web search functionality. Serper allows the plugin to search the internet for information, similar to a search engine. Get your key from', - searchEngine: 'Serper search engine', - searchEngineDescription: 'Choose the search engine to use for web search.', + webSearchBackend: 'Web search backend', + webSearchBackendDescription: 'Choose the backend for web search functionality.', + urlFetchBackend: 'URL content fetch backend', + urlFetchBackendDescription: 'Choose the backend for URL content fetching.', + serpapiApiKey: 'SerpAPI API key', + serpapiApiKeyDescription: 'API key for SerpAPI web search. SerpAPI allows the plugin to search the internet for information, similar to a search engine. Get your key from', + serpapiSearchEngine: 'SerpAPI search engine', + serpapiSearchEngineDescription: 'Choose the search engine to use for SerpAPI web search.', + scrapingdogApiKey: 'Scrapingdog API key', + scrapingdogApiKeyDescription: 'API key for Scrapingdog web search. Scrapingdog allows the plugin to search the internet for information, similar to a search engine. Get your key from', + scrapingdogSearchEngine: 'Scrapingdog search engine', + scrapingdogSearchEngineDescription: 'Choose the search engine to use for Scrapingdog web search.', + scrperApiKey: 'Scrper API key', + scrperApiKeyDescription: 'API key for Scrper web search. Scrper allows the plugin to search the internet for information, similar to a search engine. Get your key from', + jinaApiKey: 'Jina API key', + jinaApiKeyDescription: 'API key for Jina web search and URL content fetch. The key is optional for URL content fetch but required for web search. Get your key from', + braveApiKey: 'Brave API key', + braveApiKeyDescription: 'API key for Brave web search. Brave allows the plugin to search the internet for information, similar to a search engine. Get your key from', google: 'Google', - duckDuckGo: 'DuckDuckGo', + duckduckgo: 'DuckDuckGo', + brave: 'Brave', bing: 'Bing', - urlFetchBackend: 'URL content fetch backend', - urlFetchBackendDescription: 'Choose the backend for URL content fetching.', + serpapi: 'SerpAPI', + scrapingdog: 'Scrapingdog', + serper: 'Google Serper', jina: 'Jina', local: 'Local', - jinaApiKey: 'Jina API key (Optional)', - jinaApiKeyDescription: 'API key for higher rate limits. Get your key from', }, // RAG Section diff --git a/src/lang/locale/zh-cn.ts b/src/lang/locale/zh-cn.ts index 03adeb5..257a6b1 100644 --- a/src/lang/locale/zh-cn.ts +++ b/src/lang/locale/zh-cn.ts @@ -370,19 +370,33 @@ export default { // 网页搜索部分 WebSearch: { title: '网页搜索', - serperApiKey: 'Serper API 密钥', - serperApiKeyDescription: '用于网页搜索功能的 API 密钥。Serper 允许插件在互联网上搜索信息,类似于搜索引擎。请从此处获取您的密钥', - searchEngine: 'Serper 搜索引擎', - searchEngineDescription: '选择用于网页搜索的搜索引擎。', + webSearchBackend: '网页搜索后端', + webSearchBackendDescription: '选择网页搜索功能的后端。', + urlFetchBackend: 'URL 内容抓取后端', + urlFetchBackendDescription: '选择用于抓取 URL 内容的后端。', + serpapiApiKey: 'SerpAPI API 密钥', + serpapiApiKeyDescription: 'SerpAPI 网页搜索的 API 密钥。SerpAPI 允许插件搜索互联网信息,类似于搜索引擎。从此处获取您的密钥:', + serpapiSearchEngine: 'SerpAPI 搜索引擎', + serpapiSearchEngineDescription: '选择用于 SerpAPI 网页搜索的搜索引擎。', + scrapingdogApiKey: 'Scrapingdog API 密钥', + scrapingdogApiKeyDescription: 'Scrapingdog 网页搜索的 API 密钥。Scrapingdog 允许插件搜索互联网信息,类似于搜索引擎。从此处获取您的密钥:', + scrapingdogSearchEngine: 'Scrapingdog 搜索引擎', + scrapingdogSearchEngineDescription: '选择用于 Scrapingdog 网页搜索的搜索引擎。', + scrperApiKey: 'Scrper API 密钥', + scrperApiKeyDescription: 'Scrper 网页搜索的 API 密钥。Scrper 允许插件搜索互联网信息,类似于搜索引擎。从此处获取您的密钥:', + jinaApiKey: 'Jina API 密钥', + jinaApiKeyDescription: 'Jina 网页搜索和 URL 内容抓取的 API 密钥。URL 内容抓取不需要此密钥,但 Web 搜索需要。在此获取您的密钥:', + braveApiKey: 'Brave API 密钥', + braveApiKeyDescription: 'Brave 网页搜索的 API 密钥。Brave 允许插件搜索互联网信息,类似于搜索引擎。从此处获取您的密钥:', google: 'Google', - duckDuckGo: 'DuckDuckGo', + duckduckgo: 'DuckDuckGo', + brave: 'Brave', bing: 'Bing', - urlFetchBackend: 'URL 检索后端', - urlFetchBackendDescription: '选择 URL 检索的后端。', + serpapi: 'SerpAPI', + scrapingdog: 'Scrapingdog', + serper: 'Google Serper', jina: 'Jina', local: '本地', - jinaApiKey: 'Jina API 密钥(可选)', - jinaApiKeyDescription: '使用 API 密钥以获得更高速率限制。请从此处获取您的密钥', }, // RAG 部分 diff --git a/src/settings/SettingTab.tsx b/src/settings/SettingTab.tsx index 07e76e0..1d0627b 100644 --- a/src/settings/SettingTab.tsx +++ b/src/settings/SettingTab.tsx @@ -253,10 +253,55 @@ export class InfioSettingTab extends PluginSettingTab { .setHeading() .setName(t('settings.WebSearch.title')) + new Setting(containerEl) + .setName(t('settings.WebSearch.webSearchBackend')) + .setDesc(t('settings.WebSearch.webSearchBackendDescription')) + .addDropdown((dropdown) => + dropdown + .addOption('serpapi', t('settings.WebSearch.serpapi')) + .addOption('scrapingdog', t('settings.WebSearch.scrapingdog')) + .addOption('serper', t('settings.WebSearch.serper')) + .addOption('jina', t('settings.WebSearch.jina')) + .addOption('duckduckgo', t('settings.WebSearch.duckduckgo')) + .addOption('brave', t('settings.WebSearch.brave')) + .setValue(this.plugin.settings.webSearchSettings.webSearchBackend) + .onChange(async (value) => { + await this.plugin.setSettings({ + ...this.plugin.settings, + webSearchSettings: { + ...this.plugin.settings.webSearchSettings, + // @ts-ignore + webSearchBackend: value, + }, + }) + }), + ) + + new Setting(containerEl) + .setName(t('settings.WebSearch.urlFetchBackend')) + .setDesc(t('settings.WebSearch.urlFetchBackendDescription')) + .addDropdown((dropdown) => + dropdown + .addOption('jina', t('settings.WebSearch.jina')) + .addOption('local', t('settings.WebSearch.local')) + .setValue(this.plugin.settings.webSearchSettings.urlFetchBackend) + .onChange(async (value) => { + await this.plugin.setSettings({ + ...this.plugin.settings, + webSearchSettings: { + ...this.plugin.settings.webSearchSettings, + // @ts-ignore + urlFetchBackend: value, + }, + }) + }), + ) + + // SerpAPI new Setting(containerEl) - .setName(t('settings.WebSearch.serperApiKey')) + .setName(t('settings.WebSearch.serpapiApiKey')) .setDesc(createFragment(el => { - el.appendText(t('settings.WebSearch.serperApiKeyDescription') + ' '); + el.appendText(t('settings.WebSearch.serpapiApiKeyDescription') + ' '); const a = el.createEl('a', { href: 'https://serpapi.com/manage-api-key', text: 'https://serpapi.com/manage-api-key' @@ -267,13 +312,13 @@ export class InfioSettingTab extends PluginSettingTab { .setClass('setting-item-heading-smaller') .addText((text) => { const t = text - .setValue(this.plugin.settings.webSearchSettings.serperApiKey) + .setValue(this.plugin.settings.webSearchSettings.serpapiApiKey) .onChange(async (value) => { await this.plugin.setSettings({ ...this.plugin.settings, webSearchSettings: { ...this.plugin.settings.webSearchSettings, - serperApiKey: value, + serpapiApiKey: value, }, }) }); @@ -284,46 +329,109 @@ export class InfioSettingTab extends PluginSettingTab { }) new Setting(containerEl) - .setName(t('settings.WebSearch.searchEngine')) - .setDesc(t('settings.WebSearch.searchEngineDescription')) + .setName(t('settings.WebSearch.serpapiSearchEngine')) + .setDesc(t('settings.WebSearch.serpapiSearchEngineDescription')) .addDropdown((dropdown) => dropdown .addOption('google', t('settings.WebSearch.google')) - .addOption('duckduckgo', t('settings.WebSearch.duckDuckGo')) + .addOption('duckduckgo', t('settings.WebSearch.duckduckgo')) .addOption('bing', t('settings.WebSearch.bing')) - .setValue(this.plugin.settings.webSearchSettings.serperSearchEngine) + .setValue(this.plugin.settings.webSearchSettings.serpapiSearchEngine) .onChange(async (value) => { await this.plugin.setSettings({ ...this.plugin.settings, webSearchSettings: { ...this.plugin.settings.webSearchSettings, // @ts-ignore - serperSearchEngine: value, + serpapiSearchEngine: value, }, }) }), ) + // Scrapingdog new Setting(containerEl) - .setName(t('settings.WebSearch.urlFetchBackend')) - .setDesc(t('settings.WebSearch.urlFetchBackendDescription')) + .setName(t('settings.WebSearch.scrapingdogApiKey')) + .setDesc(createFragment(el => { + el.appendText(t('settings.WebSearch.scrapingdogApiKeyDescription') + ' '); + const a = el.createEl('a', { + href: 'https://www.scrapingdog.com/', + text: 'https://www.scrapingdog.com/' + }); + a.setAttr('target', '_blank'); + a.setAttr('rel', 'noopener'); + })) + .setClass('setting-item-heading-smaller') + .addText((text) => { + const t = text + .setValue(this.plugin.settings.webSearchSettings.scrapingdogApiKey) + .onChange(async (value) => { + await this.plugin.setSettings({ + ...this.plugin.settings, + webSearchSettings: { + ...this.plugin.settings.webSearchSettings, + scrapingdogApiKey: value, + }, + }) + }); + if (t.inputEl) { + t.inputEl.type = "password"; + } + return t; + }) + + new Setting(containerEl) + .setName(t('settings.WebSearch.scrapingdogSearchEngine')) + .setDesc(t('settings.WebSearch.scrapingdogSearchEngineDescription')) .addDropdown((dropdown) => dropdown - .addOption('jina', t('settings.WebSearch.jina')) - .addOption('local', t('settings.WebSearch.local')) - .setValue(this.plugin.settings.webSearchSettings.urlFetchBackend) + .addOption('google', t('settings.WebSearch.google')) + .addOption('bing', t('settings.WebSearch.bing')) + .setValue(this.plugin.settings.webSearchSettings.scrapingdogSearchEngine) .onChange(async (value) => { await this.plugin.setSettings({ ...this.plugin.settings, webSearchSettings: { ...this.plugin.settings.webSearchSettings, // @ts-ignore - urlFetchBackend: value, + scrapingdogSearchEngine: value, }, }) }), ) + // Serper + new Setting(containerEl) + .setName(t('settings.WebSearch.scrperApiKey')) + .setDesc(createFragment(el => { + el.appendText(t('settings.WebSearch.scrperApiKeyDescription') + ' '); + const a = el.createEl('a', { + href: 'https://serper.dev/', + text: 'https://serper.dev/' + }); + a.setAttr('target', '_blank'); + a.setAttr('rel', 'noopener'); + })) + .setClass('setting-item-heading-smaller') + .addText((text) => { + const t = text + .setValue(this.plugin.settings.webSearchSettings.serperApiKey) + .onChange(async (value) => { + await this.plugin.setSettings({ + ...this.plugin.settings, + webSearchSettings: { + ...this.plugin.settings.webSearchSettings, + serperApiKey: value, + }, + }) + }); + if (t.inputEl) { + t.inputEl.type = "password"; + } + return t; + }) + + // Jina new Setting(containerEl) .setName(t('settings.WebSearch.jinaApiKey')) .setDesc(createFragment(el => { @@ -353,6 +461,37 @@ export class InfioSettingTab extends PluginSettingTab { } return t; }) + + // Serper + new Setting(containerEl) + .setName(t('settings.WebSearch.braveApiKey')) + .setDesc(createFragment(el => { + el.appendText(t('settings.WebSearch.braveApiKeyDescription') + ' '); + const a = el.createEl('a', { + href: 'https://brave.com/search/api/', + text: 'https://brave.com/search/api/' + }); + a.setAttr('target', '_blank'); + a.setAttr('rel', 'noopener'); + })) + .setClass('setting-item-heading-smaller') + .addText((text) => { + const t = text + .setValue(this.plugin.settings.webSearchSettings.braveApiKey) + .onChange(async (value) => { + await this.plugin.setSettings({ + ...this.plugin.settings, + webSearchSettings: { + ...this.plugin.settings.webSearchSettings, + braveApiKey: value, + }, + }) + }); + if (t.inputEl) { + t.inputEl.type = "password"; + } + return t; + }) } renderRAGSection(containerEl: HTMLElement): void { diff --git a/src/types/settings.test.ts b/src/types/settings.test.ts index 3bb27b6..68295d1 100644 --- a/src/types/settings.test.ts +++ b/src/types/settings.test.ts @@ -147,11 +147,15 @@ describe('parseSmartCopilotSettings', () => { removeDuplicateMathBlockIndicator: true, removeDuplicateCodeBlockIndicator: true, webSearchSettings: { - webSearchBackend: 'serper', - serperApiKey: '', - serperSearchEngine: 'google', - urlFetchBackend: 'jina', - jinaApiKey: '', + webSearchBackend: 'serper', + urlFetchBackend: 'jina', + serpapiApiKey: '', + serpapiSearchEngine: 'google', + scrapingdogApiKey: '', + scrapingdogSearchEngine: 'google', + serperApiKey: '', + jinaApiKey: '', + braveApiKey: '', }, ignoredFilePatterns: '**/secret/**\n', ignoredTags: '', @@ -394,11 +398,15 @@ describe('settings migration', () => { removeDuplicateMathBlockIndicator: true, removeDuplicateCodeBlockIndicator: true, webSearchSettings: { - webSearchBackend: 'serper', - serperApiKey: '', - serperSearchEngine: 'google', - urlFetchBackend: 'jina', - jinaApiKey: '', + webSearchBackend: 'serper', + urlFetchBackend: 'jina', + serpapiApiKey: '', + serpapiSearchEngine: 'google', + scrapingdogApiKey: '', + scrapingdogSearchEngine: 'google', + serperApiKey: '', + jinaApiKey: '', + braveApiKey: '', }, ignoredFilePatterns: '**/secret/**\n', ignoredTags: '', diff --git a/src/types/settings.ts b/src/types/settings.ts index c7a0ce2..9cec01b 100644 --- a/src/types/settings.ts +++ b/src/types/settings.ts @@ -243,17 +243,25 @@ export const triggerSchema = z.object({ }); const WebSearchSettingsSchema = z.object({ - webSearchBackend: z.enum(['local', 'serper']).catch('serper'), + webSearchBackend: z.enum(['serpapi', 'scrapingdog', 'serper', 'jina', 'duckduckgo', 'brave']).catch('serpapi'), + urlFetchBackend: z.enum(['local', 'jina']).catch('jina'), + serpapiApiKey: z.string().catch(''), + serpapiSearchEngine: z.enum(['google', 'duckduckgo', 'bing']).catch('google'), + scrapingdogApiKey: z.string().catch(''), + scrapingdogSearchEngine: z.enum(['google', 'bing']).catch('google'), serperApiKey: z.string().catch(''), - serperSearchEngine: z.enum(['google', 'duckduckgo', 'bing']).catch('google'), - urlFetchBackend: z.enum(['local', 'jina']).catch('jina'), - jinaApiKey: z.string().catch(''), + jinaApiKey: z.string().catch(''), + braveApiKey: z.string().catch(''), }).catch({ webSearchBackend: 'serper', - serperApiKey: '', - serperSearchEngine: 'google', urlFetchBackend: 'jina', - jinaApiKey: '', + serpapiApiKey: '', + serpapiSearchEngine: 'google', + scrapingdogApiKey: '', + scrapingdogSearchEngine: 'google', + serperApiKey: '', + jinaApiKey: '', + braveApiKey: '', }); const FilesSearchSettingsSchema = z.object({ diff --git a/src/utils/web-search.ts b/src/utils/web-search.ts index 0146e05..ed31f05 100644 --- a/src/utils/web-search.ts +++ b/src/utils/web-search.ts @@ -1,8 +1,19 @@ import https from 'https'; import { htmlToMarkdown, requestUrl } from 'obsidian'; - -import { JINA_BASE_URL, SERPER_BASE_URL } from '../constants'; +import { + search as ddgSearch, + SearchResult as DDGSearchResult, +} from 'duck-duck-scrape'; + +import { + SERPAPI_BASE_URL, + JINA_SEARCH_BASE_URL, + SCRAPINGDOG_BASE_URL, + SERPER_BASE_URL, + JINA_FETCH_BASE_URL, + BRAVE_BASE_URL, +} from '../constants'; import { RAGEngine } from '../core/rag/rag-engine'; import { WebSearchSettings } from '../types/settings'; @@ -18,11 +29,6 @@ interface SearchResult { content?: string; } -interface SearchResponse { - organic_results?: SearchResult[]; -} - - export interface EventProps { [key: string]: string | number | boolean } @@ -97,11 +103,11 @@ function cosineSimilarity(vecA: number[], vecB: number[]): number { return dotProduct / (magnitudeA * magnitudeB); } -async function serperSearch(query: string, searchSettings: WebSearchSettings): Promise { +async function serpapiSearch(query: string, searchSettings: WebSearchSettings): Promise { return new Promise((resolve, reject) => { - const apiKey = searchSettings.serperApiKey; - const searchEngine = searchSettings.serperSearchEngine; - const url = `${SERPER_BASE_URL}?q=${encodeURIComponent(query)}&engine=${searchEngine}&api_key=${apiKey}&num=20`; + const apiKey = searchSettings.serpapiApiKey; + const searchEngine = searchSettings.serpapiSearchEngine; + const url = `${SERPAPI_BASE_URL}?q=${encodeURIComponent(query)}&engine=${searchEngine}&api_key=${apiKey}&num=20`; https.get(url, (res: any) => { let data = ''; @@ -111,7 +117,9 @@ async function serperSearch(query: string, searchSettings: WebSearchSettings): P res.on('end', () => { try { - let parsedData: SearchResponse; + let parsedData: { + organic_results?: SearchResult[]; + }; try { parsedData = JSON.parse(data); } catch { @@ -136,14 +144,298 @@ async function serperSearch(query: string, searchSettings: WebSearchSettings): P } }); }).on('error', (error: Error) => { - console.error("serper search error: ", error) + console.error("SerpAPI search error: ", error) + reject(error); + }); + }); +} + +// This function is untested since I don't have Scrapingdog API lol +async function scrapingdogSearch(query: string, searchSettings: WebSearchSettings): Promise { + return new Promise((resolve, reject) => { + const apiKey = searchSettings.scrapingdogApiKey; + const searchEngine = searchSettings.scrapingdogSearchEngine; + + let url: string; + if (searchEngine === 'google') { + url = `${SCRAPINGDOG_BASE_URL}/google/api_key=${apiKey}&query=${encodeURIComponent(query)}&results=20`; + } else if (searchEngine === 'bing') { + url = `${SCRAPINGDOG_BASE_URL}/bing/search/api_key=${apiKey}&query=${encodeURIComponent(query)}&results=20`; + } else { + throw new Error(`Unsupported search engine: ${searchEngine}`); + } + https.get(url, (res: any) => { + let data = ''; + + res.on('data', (chunk: Buffer) => { + data += chunk.toString(); + }); + + res.on('end', () => { + try { + let parsedData: { + organic_data?: SearchResult[]; + bing_data?: SearchResult[]; + }; + try { + parsedData = JSON.parse(data); + } catch { + parsedData = { }; + } + let results: SearchResult[]; + if (searchEngine === 'google') { + results = parsedData?.organic_data; + } else if (searchEngine === 'bing') { + results = parsedData?.bing_data; + } + + if (!results) { + resolve([]); + return; + } + + resolve(results); + + // const formattedResults = results.map((item: SearchResult) => { + // return `title: ${item.title}\nurl: ${item.link}\nsnippet: ${item.snippet}\n`; + // }).join('\n\n'); + + // resolve(formattedResults); + } catch (error) { + reject(error); + } + }); + }).on('error', (error: Error) => { + console.error("Scrapingdog search error: ", error) + reject(error); + }); + }); +} + +// This one is also untested :) +async function serperSearch(query: string, searchSettings: WebSearchSettings): Promise { + return new Promise((resolve, reject) => { + const apiKey = searchSettings.serperApiKey; + const url = `${SERPER_BASE_URL}?q=${encodeURIComponent(query)}`; + const headers = { + 'Content-Type': 'application/json', + 'X-API-KEY': `${apiKey}`, + }; + const options: https.RequestOptions = { + headers: headers, + }; + + https.get(url, options, (res: any) => { + let data = ''; + + res.on('data', (chunk: Buffer) => { + data += chunk.toString(); + }); + + res.on('end', () => { + try { + let parsedData: { + organic?: SearchResult[]; + }; + try { + parsedData = JSON.parse(data); + } catch { + parsedData = { organic: undefined }; + } + const results = parsedData?.organic.slice(0, 20); + + if (!results) { + resolve([]); + return; + } + + resolve(results); + + // const formattedResults = results.map((item: SearchResult) => { + // return `title: ${item.title}\nurl: ${item.link}\nsnippet: ${item.snippet}\n`; + // }).join('\n\n'); + + // resolve(formattedResults); + } catch (error) { + reject(error); + } + }); + }).on('error', (error: Error) => { + console.error("Serper search error: ", error) + reject(error); + }); + }); +} + +// Including this one ;) +async function jinaSearch(query: string, searchSettings: WebSearchSettings): Promise { + return new Promise((resolve, reject) => { + const apiKey = searchSettings.jinaApiKey; + if (!apiKey || apiKey === '') { + reject('Jina API key is not set'); + return; + } + const url = `${JINA_SEARCH_BASE_URL}/?q=${encodeURIComponent(query)}`; + const headers = { + 'Accept': 'application/json', + 'Authorization': `Bearer ${apiKey}`, + 'X-Respond-With': 'no-content', + }; + const options: https.RequestOptions = { + headers: headers, + }; + + https.get(url, options, (res) => { + let data = ''; + + res.on('data', (chunk) => { + data += chunk; + }); + + res.on('end', () => { + try { + let parsedData: { + data?: SearchResult[]; + }; + try { + parsedData = JSON.parse(data); + } catch { + parsedData = { data: undefined }; + } + const results = parsedData?.data.slice(0, 20); + + if (!results) { + resolve([]); + return; + } + + resolve(results); + + // const formattedResults = results.map((item: SearchResult) => { + // return `title: ${item.title}\nurl: ${item.link}\nsnippet: ${item.snippet}\n`; + // }).join('\n\n'); + + // resolve(formattedResults); + } catch (error) { + reject(error); + } + }); + }).on('error', (error: Error) => { + console.error(`Jina search error: ${error.message}`); + reject(error); + }); + }); +} + +async function duckduckgoSearch(query: string): Promise { + return new Promise(async (resolve, reject) => { + try { + const data = await ddgSearch(query); + + let results: SearchResult[]; + data.results?.slice(0, 20).forEach((result: DDGSearchResult) => { + results.push({ + title: result.title, + link: result.url, + snippet: result.description, + snippet_embedding: [], + }); + }); + + if (!results) { + resolve([]); + return; + } + + resolve(results); + } catch(error) { + console.error(`DuckDuckGo search error: ${error.message}`); + reject(error); + } + }); +} + +// Including this one ;) +async function braveSearch(query: string, searchSettings: WebSearchSettings): Promise { + return new Promise((resolve, reject) => { + const apiKey = searchSettings.braveApiKey; + if (!apiKey || apiKey === '') { + reject('Brave API key is not set'); + return; + } + const url = `${BRAVE_BASE_URL}/?q=${encodeURIComponent(query)}`; + const headers = { + 'Accept': 'application/json', + 'X-Subscription-Token': `${apiKey}`, + }; + const options: https.RequestOptions = { + headers: headers, + }; + + https.get(url, options, (res) => { + let data = ''; + + res.on('data', (chunk) => { + data += chunk; + }); + + res.on('end', () => { + try { + let parsedData: SearchResult[]; + try { + parsedData = JSON.parse(data); + } catch { + parsedData = []; + } + const results = parsedData; + + if (!results) { + resolve([]); + return; + } + + resolve(results); + + // const formattedResults = results.map((item: SearchResult) => { + // return `title: ${item.title}\nurl: ${item.link}\nsnippet: ${item.snippet}\n`; + // }).join('\n\n'); + + // resolve(formattedResults); + } catch (error) { + reject(error); + } + }); + }).on('error', (error: Error) => { + console.error(`Brave search error: ${error.message}`); reject(error); }); }); } async function search(query: string, searchSettings: WebSearchSettings): Promise { - return serperSearch(query, searchSettings); + return new Promise((resolve, reject) => { + if (searchSettings.webSearchBackend === 'serpapi') { + resolve(serpapiSearch(query, searchSettings)); + return; + } else if (searchSettings.webSearchBackend === 'jina') { + resolve(jinaSearch(query, searchSettings)); + return; + } else if (searchSettings.webSearchBackend === 'scrapingdog') { + resolve(scrapingdogSearch(query, searchSettings)); + return; + } else if (searchSettings.webSearchBackend === 'serper') { + resolve(serperSearch(query, searchSettings)); + return; + } else if (searchSettings.webSearchBackend === 'duckduckgo') { + resolve(duckduckgoSearch(query)); + return; + } else if (searchSettings.webSearchBackend === 'brave') { + resolve(braveSearch(query, searchSettings)); + return; + } + + reject(`Unsupported web search backend: ${searchSettings.webSearchBackend}`); + }); } async function filterByEmbedding(query: string, results: SearchResult[], ragEngine: RAGEngine): Promise { @@ -216,7 +508,7 @@ Note: This is a video content. Please use specialized video processing tools for async function fetchByJina(url: string, apiKey: string): Promise { return new Promise((resolve) => { - const jinaUrl = `${JINA_BASE_URL}/${url}`; + const jinaFetchUrl = `${JINA_FETCH_BASE_URL}/${url}`; const validJinaKey = apiKey && apiKey !== ''; const jinaHeaders = { @@ -229,7 +521,7 @@ async function fetchByJina(url: string, apiKey: string): Promise { headers: jinaHeaders, }; - const req = https.request(jinaUrl, jinaOptions, (res) => { + const req = https.request(jinaFetchUrl, jinaOptions, (res) => { let data = ''; res.on('data', (chunk) => { @@ -241,8 +533,8 @@ async function fetchByJina(url: string, apiKey: string): Promise { // check if there is an error response const response = JSON.parse(data); if (response.code && response.message) { - console.error(`JINA API error: ${response.message}`); - resolve(`fetch jina content error: ${response.message}`); + console.error(`Jina API error: ${response.message}`); + resolve(`Fetch Jina content error: ${response.message}`); return; } resolve(data); @@ -255,7 +547,7 @@ async function fetchByJina(url: string, apiKey: string): Promise { req.on('error', (e) => { console.error(`Error: ${e.message}`); - resolve(`fetch jina error: ${e.message}`); + resolve(`Fetch Jina error: ${e.message}`); }); req.end(); From 7592db2eca2dd150a9b6ec93c7d9e2cf69cce433 Mon Sep 17 00:00:00 2001 From: travertexg Date: Tue, 17 Jun 2025 09:36:07 +0000 Subject: [PATCH 5/9] Refactor: Major changes on settings schema and settings migration This commit introduces a major refactoring of the settings schema. Key changes: - Renamed `filesSearchSettings` to `fileSearchSettings` - `migrateSettings` function is now moved to `migration.ts`. It can correctly handle schema migrations from older versions to the current one. - Older, unused setting fields are now explicitly marked as deprecated within the settings schema to maintain backward compatibility while facilitating cleaner code. - `DEFAULT_SETTINGS` has been updated to reflect the new structure, with some common schemas moved or reorganized. - Tests for settings parsing have been updated to validate the new schema and migration logic. --- src/components/chat-view/ChatView.tsx | 6 +- src/components/chat-view/CustomModeView.tsx | 2 +- src/core/prompts/system.ts | 6 +- src/core/prompts/tools/index.ts | 4 +- src/core/prompts/tools/types.ts | 4 +- src/settings/SettingTab.tsx | 24 +- .../components/ModelParametersSettings.tsx | 9 +- src/settings/versions/migration.ts | 315 ++++++++- src/settings/versions/shared.ts | 17 - src/settings/versions/v0/v0.ts | 39 +- src/settings/versions/v1/v1.ts | 79 +-- src/types/settings.test.ts | 611 ++++++++++-------- src/types/settings.ts | 296 +++++---- src/utils/auto-complete.ts | 3 + src/utils/prompt-generator.ts | 4 +- 15 files changed, 881 insertions(+), 538 deletions(-) diff --git a/src/components/chat-view/ChatView.tsx b/src/components/chat-view/ChatView.tsx index f0e2ddb..5a0af1e 100644 --- a/src/components/chat-view/ChatView.tsx +++ b/src/components/chat-view/ChatView.tsx @@ -657,7 +657,7 @@ const Chat = forwardRef((props, ref) => { } } } else if (toolArgs.type === 'match_search_files') { - const searchBackend = settings.filesSearchSettings.matchBackend + const searchBackend = settings.fileSearchSettings.matchBackend let results: string; if (searchBackend === 'omnisearch') { results = await matchSearchUsingOmnisearch(toolArgs.query) @@ -679,7 +679,7 @@ const Chat = forwardRef((props, ref) => { } } } else if (toolArgs.type === 'regex_search_files') { - const searchBackend = settings.filesSearchSettings.regexBackend + const searchBackend = settings.fileSearchSettings.regexBackend let results: string; if (searchBackend === 'coreplugin') { results = await regexSearchUsingCorePlugin(toolArgs.regex, app) @@ -687,7 +687,7 @@ const Chat = forwardRef((props, ref) => { // @ts-expect-error Obsidian API type mismatch const baseVaultPath = String(app.vault.adapter.getBasePath()) const absolutePath = path.join(baseVaultPath, toolArgs.filepath) - const ripgrepPath = settings.filesSearchSettings.ripgrepPath + const ripgrepPath = settings.fileSearchSettings.ripgrepPath results = await regexSearchUsingRipgrep(absolutePath, toolArgs.regex, ripgrepPath) } const formattedContent = `[regex_search_files for '${toolArgs.filepath}'] Result:\n${results}\n`; diff --git a/src/components/chat-view/CustomModeView.tsx b/src/components/chat-view/CustomModeView.tsx index 20135c6..d387e59 100644 --- a/src/components/chat-view/CustomModeView.tsx +++ b/src/components/chat-view/CustomModeView.tsx @@ -367,7 +367,7 @@ const CustomModeView = () => { {t('prompt.overrideWarning')}