From 9fd20243a530c07e492e268a8370f8d0211f7f91 Mon Sep 17 00:00:00 2001 From: gdm257 Date: Mon, 29 Jun 2026 06:37:30 +0900 Subject: [PATCH 1/3] feat: impl token-based search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix(search): 软加权跨档回归修复与列表模式排序 - typeWeights.app 300→50:原 +300 在 modeTiers 间距 100 的体系里跨档回归,50 小于最小档距确保不跨档 - 删除 commandDataStore 未实现的 stableSortByAppWeight 后处理调用(软加权已在引擎 scoreByPattern 内) - 列表模式 search-preference 修复:sortListModeResults 纯函数提取偏好置顶,store 补回 searchPreference 导出 - 列表模式从硬分组恢复为软加权(对齐 1554a27 原设计) - listModeSort.test.ts 重写为软加权断言(8 用例),注释措辞改用主排序+tiebreaker 两层结构 style(tests): prettier 格式化修复与测试描述同步 - prettier 补齐 6 个测试文件的 trailing newline(pre-commit hook 残留) - tokenSearchRegression: +300 语义 → typeWeights.app 软加权(对齐 300→50 改动) test(tokenSearch): 补软加权不跨档引擎层锚点 本次修复的核心约束(typeWeights.app < 最小 modeTier 间距)此前在引擎层 没有测试锚定——只有同档位 app>plugin 正面断言,缺少 app 低档位不压过 plugin 高档位的负面约束。补配置约束 + 行为约束两个测试,确保改回 300 时 测试会失败。 test(tokenSearch): 移除测试中的 spec 编号引用,使断言自包含 artifacts 不随 PR 提交,req/Req/Task 编号与 design.md 等文件名 对 reviewer 不透明。改为内联语义说明,断言自解释。 - tokenSearchDualPath.test.ts: 6 处 (Req 1.1/3.4/5.2/5.4/6.1/6.2) - tokenSearchIntegration.test.ts: 1 处 (Task 3.3) - tokenSearch.test.ts: 2 处 (req 3.4, Req 8.5) - tokenSearchRegression.test.ts: 1 处 (req 3.4) fix(search): 修复列表模式完全空白(Pinia ref 解构 unwrap 导致 TypeError) allListModeResults 中 const pref = searchPreference.value[query] 抛 TypeError:searchPreference 从 Pinia setup store 解构后已被自动 unwrap 为值对象,.value 为 undefined,undefined[query] 抛错导致 computed 求值失败,列表模式无论输入什么都不显示。聚合模式正常, 因其偏好置顶在 store.search 内部(那里 ref.value 正确)。 根因:base 版本 allListModeResults 从不直接读 searchPreference, 列表模式偏好置顶靠 store.search 把偏好项放 bestSearchResults 首位, 合并去重后自然居前。0eae037 画蛇添足在列表模式重做偏好置顶,引入 bug。 修复(回归 base 设计): - 移除 sortListModeResults 的 pref 参数与 search-preference 置顶逻辑 - 移除 useSearchResults 解构中的 searchPreference 与 ListModeSortCtx.pref - 回退 store return 中多余的 searchPreference 导出(base 无此导出) - 列表模式偏好仍由 store.search 保证,无需重复处理 - 测试:删掉 search-preference 置顶 describe(职责已不在此函数) docs(word-token-search): 同步 Phase 9 列表模式回归修复至 artifacts - design.md: 列表模式排序描述修正——偏好置顶由 store.search 保证, allListModeResults 不重复处理,仅施加 token 档位排序 - tasks.md: 追加 Phase 9 Implementation Notes,记录 3ee0862 修复 (Pinia ref 解构 unwrap 导致 TypeError),明确推翻 Phase 8 的 search-preference 置顶方案 fix(search): 列表模式档位序对齐引擎 modeTier 权重表 listModeRank 旧实现把跨词词首(含非连续)统一归 rank 1,高于 全词 rank 2,违反权重表:非连续词首(400) 应低于全词(800)。 导致 query 'dance' 命中时,本地安全策略(拼音 multiTokensPrefix- Discontinuous) 排到 MikuMikuDance(singleTokenExactitude) 前面。 修复:listModeRank 直接返回 DEFAULT_CONFIG.modeTiers 权重值(越大 越优先),消除与引擎并行且不一致的手写 rank 表。sortListModeResults 比较器改为降序(rankB - rankA)。 - 引擎层正确(MikuMikuDance 1086 > 本地安全策略 627),bug 仅在 列表模式的 listModeRank 并行实现 - listModeRank.test.ts 断言重写为权重值语义 - listModeSort.test.ts 加回归锚点:非连续词首(400) 不应高于全词(800) docs(word-token-search): 追加 Phase 10 listModeRank 档位序对齐 modeTier 修复记录 --- .../views/GeneralSetting/GeneralSetting.vue | 29 + resources/preload.js | 2 + src/main/api/plugin/internal.ts | 10 + src/main/utils/common.ts | 29 +- src/preload/index.ts | 4 + .../src/composables/useSearchResults.ts | 74 ++- src/renderer/src/env.d.ts | 1 + src/renderer/src/stores/commandDataStore.ts | 196 ++++-- src/renderer/src/utils/highlight.ts | 30 +- src/shared/tokenSearch.ts | 589 ++++++++++++++++++ src/shared/tokenizer.ts | 130 ++++ 11 files changed, 1030 insertions(+), 64 deletions(-) create mode 100644 src/shared/tokenSearch.ts create mode 100644 src/shared/tokenizer.ts diff --git a/internal-plugins/setting/src/views/GeneralSetting/GeneralSetting.vue b/internal-plugins/setting/src/views/GeneralSetting/GeneralSetting.vue index 113f116e..599a114f 100644 --- a/internal-plugins/setting/src/views/GeneralSetting/GeneralSetting.vue +++ b/internal-plugins/setting/src/views/GeneralSetting/GeneralSetting.vue @@ -135,6 +135,7 @@ const localAppSearch = ref(true) const recentRows = ref(2) const pinnedRows = ref(2) const searchMode = ref<'aggregate' | 'list'>('aggregate') +const wordTokenEnabled = ref(true) const clipboardRetentionDays = ref(180) // Tab 键目标指令 @@ -548,6 +549,15 @@ async function handleLocalAppSearchChange(): Promise { } } +// 处理词 token 搜索开关变化 +async function handleWordTokenEnabledChange(): Promise { + try { + await window.ztools.internal.setWordTokenEnabled(wordTokenEnabled.value) + console.log('词 token 搜索开关已更新:', wordTokenEnabled.value) + } catch (error) { + console.error('保存词 token 搜索开关失败:', error) + } +} // 处理最近使用行数变化 async function handleRecentRowsChange(): Promise { try { @@ -1211,6 +1221,8 @@ async function loadSettings(): Promise { theme.value = data.theme ?? 'system' primaryColor.value = data.primaryColor ?? 'blue' searchMode.value = data.searchMode ?? 'aggregate' + wordTokenEnabled.value = + (await window.ztools.internal.dbGet('search.wordTokenEnabled')) !== false autoCheckUpdate.value = data.autoCheckUpdate ?? true tabKeyFunction.value = data.tabKeyFunction ?? (data.tabTargetCommand ? 'target-command' : 'navigate') @@ -1741,6 +1753,23 @@ onUnmounted(() => { +
+
+ 分词搜索 + 支持跨词缩写搜索,如输入 tas man 匹配 Task Manager +
+
+ +
+
+
空格打开指令 diff --git a/resources/preload.js b/resources/preload.js index 1400d0a8..b46db3f3 100644 --- a/resources/preload.js +++ b/resources/preload.js @@ -994,6 +994,8 @@ window.ztools = { // 通知主渲染进程更新搜索框模式 updateSearchMode: async (mode) => await electron.ipcRenderer.invoke('internal:update-search-mode', mode), + setWordTokenEnabled: async (enabled) => + await electron.ipcRenderer.invoke('internal:set-word-token-enabled', enabled), // 通知主渲染进程更新 Tab 键功能配置 updateTabKeyFunction: async (mode) => await electron.ipcRenderer.invoke('internal:update-tab-key-function', mode), diff --git a/src/main/api/plugin/internal.ts b/src/main/api/plugin/internal.ts index 10e7a38b..9edd46a9 100644 --- a/src/main/api/plugin/internal.ts +++ b/src/main/api/plugin/internal.ts @@ -806,6 +806,16 @@ export class InternalPluginAPI { return { success: true } }) + // 词 token 搜索开关:写库 + 即时通知主渲染进程 + ipcMain.handle('internal:set-word-token-enabled', async (event, enabled: boolean) => { + if (!requireInternalPlugin(this.pluginManager, event)) { + throw new PermissionDeniedError('internal:set-word-token-enabled') + } + databaseAPI.dbPut('search.wordTokenEnabled', enabled) + this.mainWindow?.webContents.send('word-token-enabled-changed', enabled) + return { success: true } + }) + // 通知主渲染进程更新 Tab 键目标指令 ipcMain.handle('internal:update-tab-target', async (event, target: string) => { if (!requireInternalPlugin(this.pluginManager, event)) { diff --git a/src/main/utils/common.ts b/src/main/utils/common.ts index ab441d24..ce07cce2 100644 --- a/src/main/utils/common.ts +++ b/src/main/utils/common.ts @@ -1,6 +1,7 @@ import { app } from 'electron' import { fileURLToPath } from 'url' import { WindowManager } from '../core/native/index.js' +import { tokenize } from '../../shared/tokenizer' /** * 睡眠指定毫秒数 @@ -23,30 +24,16 @@ export function shuffleArray(arr: readonly T[]): T[] { } /** - * 提取英文名称的首字母缩写 - * 支持两种模式(优先级从高到低): - * 1. 空格分隔的单词首字母:Visual Studio Code → vsc - * 2. 驼峰命名首字母:VisualStudioCode → vsc + * 提取名称的首字母缩写:取每个分词 token 的首字母拼接。 + * 分词规则由 tokenize 统一提供(含空格 / 驼峰 / 数字 / 汉字逐字等边界)。 + * 例:Visual Studio Code → [visual, studio, code] → vsc * @param name 应用名称 - * @returns 首字母缩写字符串 + * @returns 首字母缩写字符串(小写) */ export function extractAcronym(name: string): string { - // 方式1:空格分隔的单词首字母(优先) - // "Visual Studio Code" → "vsc" - const words = name.split(/\s+/).filter((w) => w.length > 0) - if (words.length > 1) { - return words.map((w) => w[0].toLowerCase()).join('') - } - - // 方式2:驼峰命名首字母 - // "VisualStudioCode" → "vsc" - const capitals = name.match(/[A-Z]/g) - if (capitals && capitals.length > 1) { - return capitals.map((c) => c.toLowerCase()).join('') - } - - // 无法提取首字母缩写 - return '' + return tokenize(name) + .map((t) => t[0]) + .join('') } interface ExplorerFolderWindowInfo { diff --git a/src/preload/index.ts b/src/preload/index.ts index 3d92da3d..ab3517a0 100644 --- a/src/preload/index.ts +++ b/src/preload/index.ts @@ -191,6 +191,9 @@ const api = { onHistoryChanged: (callback: () => void) => { ipcRenderer.on('history-changed', callback) }, + onWordTokenEnabledChanged: (callback: (enabled: boolean) => void) => { + ipcRenderer.on('word-token-enabled-changed', (_event, enabled: boolean) => callback(enabled)) + }, onPinnedChanged: (callback: () => void) => { ipcRenderer.on('pinned-changed', callback) }, @@ -640,6 +643,7 @@ declare global { onShowSettings: (callback: () => void) => void onAppLaunched: (callback: () => void) => void onHistoryChanged: (callback: () => void) => void + onWordTokenEnabledChanged: (callback: (enabled: boolean) => void) => void onPinnedChanged: (callback: () => void) => void onSuperPanelPinnedChanged: (callback: () => void) => void onIpcLaunch: ( diff --git a/src/renderer/src/composables/useSearchResults.ts b/src/renderer/src/composables/useSearchResults.ts index 5ef1db2a..27f31576 100644 --- a/src/renderer/src/composables/useSearchResults.ts +++ b/src/renderer/src/composables/useSearchResults.ts @@ -1,4 +1,5 @@ import { computed, ref, watch } from 'vue' +import { NO_MATCH, DEFAULT_CONFIG, type PatternMode } from '@shared/tokenSearch' import { useCommandDataStore } from '../stores/commandDataStore' import { useWindowStore } from '../stores/windowStore' @@ -24,6 +25,66 @@ export function deduplicateResults< /** * 根据使用统计对匹配指令结果排序(useCount 降序) */ +/** + * 列表模式档位序,与引擎 modeTier 权重表一致: + * 完整项(1000) > 连续词首(900) > 全词(800) > 词首子串(600) > 非连续词首(400) + * _tokenMode 取自 tokenSearch 引擎(语义化枚举:multiTokensExactitude 等)。 + * query 需预先 toLowerCase。 + */ +export function listModeRank( + item: { name: string; _tokenMode?: PatternMode | typeof NO_MATCH }, + query: string +): number { + // name 完全匹配等价于 multiTokensExactitude(最高优先) + if (item.name.toLowerCase() === query) return DEFAULT_CONFIG.modeTiers.multiTokensExactitude + // _tokenMode 缺失(OFF 路径结果)落到最低档 + const mode = item._tokenMode + if (!mode || mode === NO_MATCH) return 0 + return DEFAULT_CONFIG.modeTiers[mode] +} + +/** 列表模式排序上下文 */ +export interface ListModeSortCtx { + query: string + usageMap: Map +} + +/** + * 列表模式纯函数排序(软加权,沿用 1554a27 原设计)。 + * 偏好置顶由 store.search 保证(偏好项已在 bestSearchResults 首位, + * 合并去重后仍居前),本函数仅按 token 档位排序: + * 完全匹配 > 跨词词首 > 单token > 其他 + * 同档位内 tiebreaker:系统应用软加权(direct+app)→ 频率(useCount 降序) + * 返回新数组,不修改入参。 + */ + +export function sortListModeResults< + T extends { + name: string + type?: string + subType?: string + path: string + featureCode?: string + _tokenMode?: PatternMode | typeof NO_MATCH + } +>(items: T[], ctx: ListModeSortCtx): T[] { + const usageKey = (item: { path: string; featureCode?: string }): string => + `${item.path}:${item.featureCode || ''}` + return [...items].sort((a, b) => { + // token 档位(与 modeTier 权重表一致,大的优先) + const rankA = listModeRank(a, ctx.query) + const rankB = listModeRank(b, ctx.query) + if (rankA !== rankB) return rankB - rankA + // 同档位内:系统应用软加权(对齐原设计 calculateMatchScore +300) + const isAppA = a.type === 'direct' && a.subType === 'app' + const isAppB = b.type === 'direct' && b.subType === 'app' + if (isAppA !== isAppB) return isAppA ? -1 : 1 + // 频率 tiebreaker + const countA = ctx.usageMap.get(usageKey(a)) || 0 + const countB = ctx.usageMap.get(usageKey(b)) || 0 + return countB - countA + }) +} function sortByUsage( results: T[], statsMap: Map @@ -61,7 +122,8 @@ export function useSearchResults(props: { searchImageCommands, searchTextCommands, searchFileCommands, - matchesWindowCommand + matchesWindowCommand, + wordTokenEnabled } = commandDataStore // 使用统计缓存(key: "path:featureCode", value: useCount) @@ -254,8 +316,16 @@ export function useSearchResults(props: { // 无搜索词(如仅粘贴文本)时,返回去重后的原始顺序结果 if (!query) return deduped + // 列表模式排序(软加权,沿用 1554a27 原设计)。 + // 主排序:search-preference 置顶(store.search 已将偏好项放在 bestSearchResults 首位, + // 合并去重后仍居前,无需在此重复处理),然后按 token 档位排序。 + // 同档位内 tiebreaker:系统应用软加权 → 频率。 + // 开关 OFF: 旧比较器(完全匹配 → 前缀 → 系统应用 → 频率) + if (wordTokenEnabled) { + // ON: sortListModeResults 软加权(偏好置顶由 store.search 保证) + return sortListModeResults(deduped, { query, usageMap: usageStatsMap.value }) + } - // 排序:完全匹配 > 前缀匹配 > 系统应用 > 其他 return deduped.sort((a, b) => { const nameA = a.name.toLowerCase() const nameB = b.name.toLowerCase() diff --git a/src/renderer/src/env.d.ts b/src/renderer/src/env.d.ts index 76d7ec72..bbb7a72e 100644 --- a/src/renderer/src/env.d.ts +++ b/src/renderer/src/env.d.ts @@ -208,6 +208,7 @@ declare global { onLocalShortcutsChanged: (callback: () => void) => void onCommandAliasesChanged: (callback: () => void) => void onHistoryChanged: (callback: () => void) => void + onWordTokenEnabledChanged: (callback: (enabled: boolean) => void) => void onPinnedChanged: (callback: () => void) => void onSuperPanelPinnedChanged: (callback: () => void) => void onDisabledCommandsChanged: (callback: () => void) => void diff --git a/src/renderer/src/stores/commandDataStore.ts b/src/renderer/src/stores/commandDataStore.ts index e1e71b4a..434b42a0 100644 --- a/src/renderer/src/stores/commandDataStore.ts +++ b/src/renderer/src/stores/commandDataStore.ts @@ -31,6 +31,39 @@ import { isMainPushPluginEnabled, normalizeConfigList } from '@shared/pluginSettings' +import { tokenize, isChinese } from '@shared/tokenizer' +import { + NO_MATCH, + WORD_TOKEN_ENABLED_KEY, + resolveWordTokenEnabled, + tokenSearch, + type PatternMode +} from '@shared/tokenSearch' + +/** 从 name 生成 pinyin/pinyinAbbr(扁平字符串, 供 Fuse 回退) 与 pinyinTokens(拼音音节数组)。 */ +function buildPinyinFields(name: string): { + pinyin: string + pinyinAbbr: string + pinyinTokens: string[] +} { + const syllables = pinyin(name, { toneType: 'none', type: 'array' }) + const pinyinTokens: string[] = [] + const chars = Array.from(name) + for (let i = 0; i < chars.length && i < syllables.length; i++) { + // 汉字音节 + 数字(pinyin-pro 对 `360极速` 返回 ['3','6','0','ji','su']) + // 纳入数字使拼音 query 支持 `360llq` 这类数字+拼音缩写混合输入 + if (isChinese(chars[i]) || (chars[i] >= '0' && chars[i] <= '9')) + pinyinTokens.push(syllables[i].toLowerCase()) + } + return { + pinyin: syllables.join('').toLowerCase(), + pinyinAbbr: syllables + .map((s) => s[0] || '') + .join('') + .toLowerCase(), + pinyinTokens + } +} // 正则匹配指令 interface RegexCmd { @@ -99,7 +132,9 @@ export interface Command { icon?: string pinyin?: string pinyinAbbr?: string + pinyinTokens?: string[] // 拼音音节数组(如 [ren,wu,guan,li,qi]) acronym?: string // 英文首字母缩写(用于搜索) + tokens?: string[] // 词 token 数组(由 tokenize 产出,供词 token 搜索引擎使用) type: CommandType // 指令类型 subType?: CommandSubType // 子类型(用于区分 direct 类型) featureCode?: string // 插件功能代码(用于启动时指定功能) @@ -110,7 +145,8 @@ export interface Command { cmdType?: 'text' | 'regex' | 'over' | 'img' | 'files' | 'window' // cmd类型 mainPush?: boolean // 是否为 mainPush 功能(搜索时动态查询插件获取结果) matches?: MatchInfo[] // 搜索匹配信息(用于高亮显示) - matchType?: 'acronym' | 'name' | 'pinyin' | 'pinyinAbbr' // 匹配类型(用于高亮算法选择) + matchType?: 'acronym' | 'name' | 'pinyin' | 'pinyinAbbr' | 'aliases' // 匹配类型(用于高亮算法选择) + _tokenMode?: PatternMode | typeof NO_MATCH // 词 token 搜索引擎命中模式(列表模式档位排序依据) // 系统设置字段(新增) settingUri?: string // ms-settings URI category?: string // 分类(用于分组显示) @@ -161,6 +197,73 @@ interface HistoryItem extends Command { const HISTORY_DOC_ID = 'command-history' const PINNED_DOC_ID = 'pinned-commands' +/** + * 为指令列表统一填充 tokens 字段。 + * + * 在指令列表变化时调用, 搜索期零分词开销。 + * 幂等: 已有非空 tokens 的指令不重新分词; tokens 缺失或为空时现场 tokenize。 + * 原地修改指令对象 (commands 数组与对象引用不变)。 + */ +export function enrichTokens(commands: Command[]): void { + for (const command of commands) { + if (!command.tokens || command.tokens.length === 0) { + command.tokens = tokenize(command.name) + } + // apps/系统设置/本地启动项构造时只填了 pinyin/pinyinAbbr,缺 pinyinTokens; + // tokenSearch 的拼音路径(scorePinyinField)仅消费 pinyinTokens,缺失即整体失效。 + // 在此统一补齐,避免逐个构造点遗漏。 + if (!command.pinyinTokens || command.pinyinTokens.length === 0) { + const fields = buildPinyinFields(command.name) + command.pinyinTokens = fields.pinyinTokens + if (!command.pinyin) command.pinyin = fields.pinyin + if (!command.pinyinAbbr) command.pinyinAbbr = fields.pinyinAbbr + } + } +} +/** + * tokenSearch 引擎结果可消费的指令最小形态。 + * + * 与 src/shared/tokenSearch.ts 的 TokenSearchCommand 对齐, 但绑定本模块的 + * Command 字段集, 使转换结果可直接作为 SearchResult 传递给下游高亮逻辑。 + */ +export type ConvertibleCommand = Command + +/** + * 将 tokenSearch 引擎产出的 TokenSearchResult[] 转换为现有 SearchResult[] 格式。 + * + * - 保留原始 command 的全部字段 (path/type/icon/...)。 + * - 携带 matches (MatchInfo[]) 与 matchType, 供前端高亮算法选择与渲染。 + * - matchType 直接取自引擎的命中字段 (name/pinyin/acronym/aliases)。 + * + * 纯函数, 无副作用;不修改输入。 + * + * @param results tokenSearch 产出的排序就绪结果 + * @returns 符合现有 search() 返回契约的 SearchResult[] + */ +export function convertTokenSearchResults( + results: import('@shared/tokenSearch').TokenSearchResult[] +): SearchResult[] { + return results.map((r) => { + const command = r.command as Command + const value = + r.matchType === 'name' || r.matchType === 'pinyin' + ? command.name + : ((command as any)[r.matchType] ?? command.name) + return { + ...command, + matches: [ + { + indices: r.matchIndices, + value: typeof value === 'string' ? value : command.name, + key: r.matchType + } + ], + matchType: r.matchType, + _tokenMode: r.mode + } + }) +} + export const useCommandDataStore = defineStore('commandData', () => { // ===== 特殊指令配置表 ===== // 支持两种匹配方式: @@ -212,6 +315,9 @@ export const useCommandDataStore = defineStore('commandData', () => { const disabledPluginPaths = ref([]) const enabledMainPushPluginNames = ref([]) + // 词 token 搜索功能开关(默认开启;初始化时从 LMDB 读取) + const wordTokenEnabled = ref(true) + function setDisabledPluginPaths(paths: unknown): void { disabledPluginPaths.value = Array.isArray(paths) ? paths.filter((item): item is string => typeof item === 'string') @@ -366,10 +472,7 @@ export const useCommandDataStore = defineStore('commandData', () => { command.type === 'direct' && command.subType === 'app' ? command.originalName || command.name : command.originalName, - pinyin: pinyin(alias, { toneType: 'none', type: 'string' }).replace(/\s+/g, '').toLowerCase(), - pinyinAbbr: pinyin(alias, { pattern: 'first', toneType: 'none', type: 'string' }) - .replace(/\s+/g, '') - .toLowerCase() + ...buildPinyinFields(alias) } } @@ -480,6 +583,7 @@ export const useCommandDataStore = defineStore('commandData', () => { async function loadSearchPreference(): Promise { try { const data = await window.ztools.dbGet('search-preference') + if (data && typeof data === 'object') { searchPreference.value = data } @@ -487,6 +591,20 @@ export const useCommandDataStore = defineStore('commandData', () => { console.error('加载搜索偏好记录失败:', error) } } + /** + * 加载词 token 搜索功能开关。 + * + * 从 LMDB 读取 search.wordTokenEnabled (默认开启)。 + * 读取失败时保持默认值, 不报错。 + */ + async function loadWordTokenSetting(): Promise { + try { + const enabled = await window.ztools.dbGet(WORD_TOKEN_ENABLED_KEY) + wordTokenEnabled.value = resolveWordTokenEnabled(enabled) + } catch (error) { + console.error('加载词 token 搜索开关失败:', error) + } + } // 保存搜索偏好(搜索词 -> 选中的指令) async function saveSearchPreference( @@ -529,7 +647,8 @@ export const useCommandDataStore = defineStore('commandData', () => { loadHistoryData(), loadPinnedData(), loadSearchPreference(), - loadSuperPanelPinnedData() + loadSuperPanelPinnedData(), + loadWordTokenSetting() ]) // 监听后端历史记录变化事件 @@ -547,6 +666,11 @@ export const useCommandDataStore = defineStore('commandData', () => { reloadLocalShortcuts() }) + // 监听设置页词 token 搜索开关变化(即时生效,无需重启) + window.ztools.onWordTokenEnabledChanged((enabled) => { + wordTokenEnabled.value = enabled + }) + // 监听固定列表变化事件 window.ztools.onPinnedChanged(() => { // 如果是本地触发的更新,忽略此事件,避免重复加载 @@ -779,16 +903,7 @@ export const useCommandDataStore = defineStore('commandData', () => { pluginName: plugin.name, pluginTitle: plugin.title, pluginExplain: defaultFeatureExplain || plugin.description, - pinyin: pinyin(plugin.name, { toneType: 'none', type: 'string' }) - .replace(/\s+/g, '') - .toLowerCase(), - pinyinAbbr: pinyin(plugin.name, { - pattern: 'first', - toneType: 'none', - type: 'string' - }) - .replace(/\s+/g, '') - .toLowerCase() + ...buildPinyinFields(plugin.name) }) } @@ -831,16 +946,7 @@ export const useCommandDataStore = defineStore('commandData', () => { matchCmd: cmd, cmdType: cmd.type, mainPush: isMainPush, - pinyin: pinyin(cmdName, { toneType: 'none', type: 'string' }) - .replace(/\s+/g, '') - .toLowerCase(), - pinyinAbbr: pinyin(cmdName, { - pattern: 'first', - toneType: 'none', - type: 'string' - }) - .replace(/\s+/g, '') - .toLowerCase() + ...buildPinyinFields(cmdName) } regexItems.push(matchCommand) @@ -860,16 +966,7 @@ export const useCommandDataStore = defineStore('commandData', () => { pluginExplain: feature.explain, cmdType: 'text', mainPush: isMainPush, - pinyin: pinyin(cmdName, { toneType: 'none', type: 'string' }) - .replace(/\s+/g, '') - .toLowerCase(), - pinyinAbbr: pinyin(cmdName, { - pattern: 'first', - toneType: 'none', - type: 'string' - }) - .replace(/\s+/g, '') - .toLowerCase() + ...buildPinyinFields(cmdName) } pluginItems.push(textCommand, ...getLaunchableAliasEntries(textCommand, commandAliases)) @@ -937,6 +1034,9 @@ export const useCommandDataStore = defineStore('commandData', () => { regexCommands.value = regexItems mainPushFeatures.value = mainPushItems + // 为指令列表统一填充 tokens(搜索期零分词开销) + enrichTokens(commands.value) + rebuildFuseIndex() console.log( @@ -1089,7 +1189,28 @@ export const useCommandDataStore = defineStore('commandData', () => { const FUSE_MAX_QUERY_LENGTH = 32 let bestMatches: SearchResult[] = [] - if (query.length <= FUSE_MAX_QUERY_LENGTH) { + // 词 token 搜索路径 (开关 ON 且主搜索路径): 走 tokenSearch 引擎, + // 召回(子序列) + 分类 + 档位评分后转换为 SearchResult 格式, 再统一经过 + // search-preference 偏好置顶 (与 Fuse 路径后处理一致)。 + if (wordTokenEnabled.value && !commandList && query.length <= FUSE_MAX_QUERY_LENGTH) { + const tokenResults = tokenSearch(query, commands.value) + bestMatches = convertTokenSearchResults(tokenResults) + // app 软加权已在引擎 scoreByPattern 内 (typeWeights.app=50),无需后处理 + + // 搜索偏好置顶:将上次选中的指令移到第一位 (与 Fuse 路径一致) + const prefKey = query.trim().toLowerCase() + const pref = searchPreference.value[prefKey] + if (pref) { + const prefIndex = bestMatches.findIndex( + (cmd) => + cmd.path === pref.path && cmd.featureCode === pref.featureCode && cmd.name === pref.name + ) + if (prefIndex > 0) { + const [preferred] = bestMatches.splice(prefIndex, 1) + bestMatches.unshift(preferred) + } + } + } else if (query.length <= FUSE_MAX_QUERY_LENGTH) { // 如果指定了搜索范围,创建临时 Fuse 实例 const searchFuse = commandList ? new Fuse(commandList, { @@ -1555,6 +1676,7 @@ export const useCommandDataStore = defineStore('commandData', () => { regexCommands, mainPushFeatures, loading, + wordTokenEnabled, isInitialized, // 初始化 diff --git a/src/renderer/src/utils/highlight.ts b/src/renderer/src/utils/highlight.ts index f6a5a9f4..3138b9ce 100644 --- a/src/renderer/src/utils/highlight.ts +++ b/src/renderer/src/utils/highlight.ts @@ -17,7 +17,7 @@ interface MatchInfo { export function highlightMatch( text: string, matches?: MatchInfo[], - matchType?: 'acronym' | 'name' | 'pinyin' | 'pinyinAbbr', + matchType?: 'acronym' | 'name' | 'pinyin' | 'pinyinAbbr' | 'aliases', query?: string ): string { if (!matches || matches.length === 0) { @@ -56,9 +56,13 @@ export function highlightMatch( } }) } else if (match.key === 'pinyin') { - // 拼音匹配: 需要映射拼音索引到中文字符索引 - const charIndices = mapPinyinToCharIndices(text, match.indices, false) - charIndices.forEach((i) => highlightIndices.add(i)) + // 拼音音节匹配: match.indices 为音节索引, 映射到第 n 个汉字 + const chinesePositions = getChineseCharPositions(text) + match.indices.forEach(([s, e]) => { + for (let i = s; i <= e; i++) { + if (chinesePositions[i] !== undefined) highlightIndices.add(chinesePositions[i]) + } + }) } else if (match.key === 'pinyinAbbr') { // 拼音首字母匹配: 需要映射首字母索引到中文字符索引 const charIndices = mapPinyinToCharIndices(text, match.indices, true) @@ -176,6 +180,24 @@ function buildHighlightedText(text: string, highlightIndices: Set): stri return result } +/** + * 返回 text 中所有汉字的字符串位置数组(供拼音音节索引映射)。 + */ +function getChineseCharPositions(text: string): number[] { + const positions: number[] = [] + for (let i = 0; i < text.length; i++) { + const code = text.charCodeAt(i) + if ( + (code >= 0x4e00 && code <= 0x9fff) || + (code >= 0x3400 && code <= 0x4dbf) || + (code >= 0xf900 && code <= 0xfaff) + ) { + positions.push(i) + } + } + return positions +} + /** * 将拼音索引映射到中文字符索引 * @param text 原始中文文本 diff --git a/src/shared/tokenSearch.ts b/src/shared/tokenSearch.ts new file mode 100644 index 00000000..7e6980d6 --- /dev/null +++ b/src/shared/tokenSearch.ts @@ -0,0 +1,589 @@ +import { tokenize } from './tokenizer' + +/** + * 词 token 搜索引擎:子序列召回 -> 模式分类 -> 档位评分。 + * + * 模式分类(query 字符相对 token 的落点,语义化命名): + * multiTokensExactitude 完整项(query = 原文,覆盖全部 token) + * multiTokensPrefixContinuous 连续跨词词首(各段词首子串,命中 token 相邻) + * singleTokenExactitude 单 token 全词 + * singleTokenPrefix 单 token 词首子串 + * multiTokensPrefixDiscontinuous 非连续跨词词首(各段词首子串,命中 token 有跳跃) + * 其余落点(非词首子串、词中子序列、跨词混合、跨词全词中)信息量不足, + * 分类时直接判 NO_MATCH 不命中。 + */ +export type PatternMode = + | 'multiTokensExactitude' + | 'multiTokensPrefixContinuous' + | 'singleTokenExactitude' + | 'singleTokenPrefix' + | 'multiTokensPrefixDiscontinuous' +export const NO_MATCH = 'NO_MATCH' as const + +/** 一段 query 落在某 token 上的位置记录,供分类与评分消费。 */ +export interface SegmentAlignment { + queryStart: number + queryEnd: number + tokenIndex: number + tokenStart: number + tokenEnd: number + isWordInitial: boolean + isConsecutive: boolean +} + +export interface TokenSearchConfig { + modeTiers: Record + signalWeights: { + segmentLength: number + positionBonus: number + tokenContinuity: number + matchRatio: number + } + typeWeights: { app: number; plugin: number; builtin: number } +} + +export const DEFAULT_CONFIG: TokenSearchConfig = { + modeTiers: { + multiTokensExactitude: 1000, + multiTokensPrefixContinuous: 900, + singleTokenExactitude: 800, + singleTokenPrefix: 600, + multiTokensPrefixDiscontinuous: 400 + }, + signalWeights: { segmentLength: 50, positionBonus: 30, tokenContinuity: 100, matchRatio: 200 }, + // app 软加权 < 最小 modeTier 间距(100),原 Fuse 路径 +300 在间距 1400+ 体系里安全,搬到间距 100 的 modeTiers 会跨档回归 + typeWeights: { app: 50, plugin: 0, builtin: 0 } +} + +export const WORD_TOKEN_ENABLED_KEY = 'search.wordTokenEnabled' + +/** 仅显式 false 才关闭;缺失 / 无效一律降级为 true。 */ +export function resolveWordTokenEnabled(stored: unknown): boolean { + return stored !== false +} + +// --------------------------------------------------------------------------- +// 子序列匹配 +// --------------------------------------------------------------------------- + +export function subsequenceMatch(query: string, target: string): boolean { + return subsequencePositions(query, target) !== null +} + +/** query 每个字符在 target 中的贪心最左位置;不命中返回 null。 */ +function subsequencePositions(query: string, target: string): number[] | null { + if (!query) return [] + if (query.length > target.length) return null + const pos: number[] = [] + let ti = 0 + for (const qc of query) { + while (ti < target.length && target[ti] !== qc) ti++ + if (ti >= target.length) return null + pos.push(ti++) + } + return pos +} + +// --------------------------------------------------------------------------- +// query 切分 -> 段落对齐 +// --------------------------------------------------------------------------- + +/** 含空格按空格硬分隔;否则贪心跨词词首。返回空数组表示无对齐。 */ +export function segmentQuery(query: string, tokens: string[]): SegmentAlignment[] { + if (!query || tokens.length === 0) return [] + const q = query.toLowerCase() + return segmentBySpaces(q, tokens) ?? segmentByCrossWord(q, tokens) ?? [] +} + +/** 空格硬分隔:每段独立匹配到一个 token(词首前缀优先,其次词中位置)。 */ +function segmentBySpaces(query: string, tokens: string[]): SegmentAlignment[] | null { + const parts = query.split(/ +/).filter(Boolean) + if (parts.length <= 1) return null + if (parts.length > tokens.length) return [] + + const alignments: SegmentAlignment[] = [] + let qCursor = 0 + let nextToken = 0 + for (const part of parts) { + while (qCursor < query.length && query[qCursor] === ' ') qCursor++ + const qStart = qCursor + qCursor += part.length + const m = matchSegmentToToken(part, tokens, nextToken) + if (!m) return [] + alignments.push({ + queryStart: qStart, + queryEnd: qCursor, + ...m, + isWordInitial: m.tokenStart === 0 + }) + nextToken = m.tokenIndex + 1 + } + return alignments +} + +function matchSegmentToToken( + segment: string, + tokens: string[], + startToken: number +): { tokenIndex: number; tokenStart: number; tokenEnd: number; isConsecutive: boolean } | null { + for (let i = startToken; i < tokens.length; i++) { + const tok = tokens[i] + if (tok.startsWith(segment)) + return { tokenIndex: i, tokenStart: 0, tokenEnd: segment.length, isConsecutive: true } + const idx = tok.indexOf(segment) + if (idx >= 0) + return { tokenIndex: i, tokenStart: idx, tokenEnd: idx + segment.length, isConsecutive: true } + const pos = subsequencePositions(segment, tok) + if (pos) + return { + tokenIndex: i, + tokenStart: pos[0], + tokenEnd: pos[pos.length - 1] + 1, + isConsecutive: pos[pos.length - 1] - pos[0] === pos.length - 1 + } + } + return null +} + +/** + * 无空格贪心跨词:对每个 query 位置,取最早可匹配的 token 消费其最长公共前缀。 + * earliest-token-first 自然偏好连续 token 序列;无法跨词时降级为单 token 子序列。 + */ +function segmentByCrossWord(query: string, tokens: string[]): SegmentAlignment[] | null { + if (!query || tokens.length === 0) return null + const alignments: SegmentAlignment[] = [] + let qStart = 0 + let cursor = 0 + + while (qStart < query.length) { + let matched = false + for (let i = cursor; i < tokens.length; i++) { + const tok = tokens[i] + const maxLen = Math.min(tok.length, query.length - qStart) + if (maxLen > 0 && tok[0] === query[qStart]) { + let len = 1 + while (len < maxLen && tok[len] === query[qStart + len]) len++ + alignments.push({ + queryStart: qStart, + queryEnd: qStart + len, + tokenIndex: i, + tokenStart: 0, + tokenEnd: len, + isWordInitial: true, + isConsecutive: true + }) + qStart += len + cursor = i + 1 + matched = true + break + } + } + if (!matched) break + } + + if (qStart === query.length && alignments.length > 0) return alignments + + // 降级:单 token 子序列 + for (let i = 0; i < tokens.length; i++) { + const pos = subsequencePositions(query, tokens[i]) + if (pos && pos.length > 0) { + const tStart = pos[0] + const tEnd = pos[pos.length - 1] + 1 + return [ + { + queryStart: 0, + queryEnd: query.length, + tokenIndex: i, + tokenStart: tStart, + tokenEnd: tEnd, + isWordInitial: tStart === 0, + isConsecutive: tEnd - tStart === query.length + } + ] + } + } + return null +} + +// --------------------------------------------------------------------------- +// 模式分类 +// --------------------------------------------------------------------------- + +export interface ClassifyPatternInput { + query: string + name: string + tokens: string[] + alignments: SegmentAlignment[] +} + +export function classifyPattern(input: ClassifyPatternInput): PatternMode | typeof NO_MATCH { + const { query, name, tokens, alignments } = input + if (alignments.length === 0) return NO_MATCH + if (query.trim().toLowerCase() === name.trim().toLowerCase()) return 'multiTokensExactitude' + if (alignments.length === 1) return classifySingleToken(alignments[0], tokens) + + let hasInitial = false + let hasMid = false + for (const seg of alignments) { + if (seg.isWordInitial) hasInitial = true + else hasMid = true + } + if (hasInitial && !hasMid) return crossWordInitialMode(alignments) + // 跨词混合 / 跨词全词中:含词中段,引入噪声,直接判不命中 + return NO_MATCH +} + +function classifySingleToken( + seg: SegmentAlignment, + tokens: string[] +): PatternMode | typeof NO_MATCH { + const tok = tokens[seg.tokenIndex] + const tokenLen = tok ? tok.length : 0 + const segLen = seg.queryEnd - seg.queryStart + if (seg.isWordInitial && segLen === tokenLen && tokenLen > 0) return 'singleTokenExactitude' + if (seg.isWordInitial && seg.isConsecutive) return 'singleTokenPrefix' + // 非词首子串(token 内连续但非词首起)/ 词中子序列:信息量低于词首前缀,直接判不命中 + return NO_MATCH +} + +/** 跨词词首按 token 相邻性拆档:相邻为连续词首,存在跳跃为非连续词首。 */ +function crossWordInitialMode(alignments: SegmentAlignment[]): PatternMode { + for (let i = 1; i < alignments.length; i++) { + if (alignments[i].tokenIndex - alignments[i - 1].tokenIndex !== 1) + return 'multiTokensPrefixDiscontinuous' + } + return 'multiTokensPrefixContinuous' +} + +// --------------------------------------------------------------------------- +// 评分 +// --------------------------------------------------------------------------- + +function normalizeCommandType(cmd: { + type: string + subType?: string +}): 'app' | 'plugin' | 'builtin' { + if (cmd.type === 'direct' && cmd.subType === 'app') return 'app' + if (cmd.type === 'plugin') return 'plugin' + return 'builtin' +} + +export interface ScoreByPatternInput { + mode: PatternMode | typeof NO_MATCH + alignments: SegmentAlignment[] + queryLength: number + totalTokenLength: number + commandType: { type: 'direct' | 'plugin' | 'builtin'; subType?: string } + config?: TokenSearchConfig +} + +export function scoreByPattern(input: ScoreByPatternInput): number { + const { mode, alignments, queryLength, totalTokenLength, commandType } = input + const config = input.config ?? DEFAULT_CONFIG + const tier = mode === NO_MATCH ? 0 : config.modeTiers[mode] + const typeW = config.typeWeights[normalizeCommandType(commandType)] + if (alignments.length === 0) return tier + typeW + + const sw = config.signalWeights + // segmentLength:query 被 token 覆盖的比例 + const segLen = alignments.reduce((s, a) => s + (a.queryEnd - a.queryStart), 0) + const segNorm = queryLength > 0 ? Math.min(segLen / queryLength, 1) : 0 + // positionBonus:首命中 token 越靠前分越高 + const posNorm = 1 / (1 + alignments[0].tokenIndex) + // tokenContinuity:相邻 token 对占比,连续高于跳跃 + let contNorm: number + if (alignments.length === 1) contNorm = 1 + else { + let adj = 0 + for (let i = 1; i < alignments.length; i++) { + if (alignments[i].tokenIndex - alignments[i - 1].tokenIndex === 1) adj++ + } + contNorm = adj / (alignments.length - 1) + } + // matchRatio:query 占 token 总长比例 + const ratioNorm = totalTokenLength > 0 ? Math.min(queryLength / totalTokenLength, 1) : 0 + + return ( + tier + + segNorm * sw.segmentLength + + posNorm * sw.positionBonus + + contNorm * sw.tokenContinuity + + ratioNorm * sw.matchRatio + + typeW + ) +} + +// --------------------------------------------------------------------------- +// 拼音音节匹配 +// --------------------------------------------------------------------------- + +/** + * 拼音音节跨词匹配:每段必须是首字母(len 1)或完整音节;内部前缀(如 re 对 ren)不命中。 + * 单音节命中过滤(信息量不足以定位中文应用)。遇到无法匹配的编码即止(返回 null)。 + */ +function segmentPinyin(query: string, syllables: string[]): SegmentAlignment[] | null { + if (!query || syllables.length === 0) return null + const alignments: SegmentAlignment[] = [] + let qStart = 0 + let cursor = 0 + + while (qStart < query.length) { + let matched = false + for (let i = cursor; i < syllables.length; i++) { + const syl = syllables[i] + const rest = query.slice(qStart) + // 完整音节优先,其次首字母 + if (rest.startsWith(syl)) { + alignments.push(mkSeg(qStart, qStart + syl.length, i, syl.length)) + qStart += syl.length + cursor = i + 1 + matched = true + break + } + if (rest[0] === syl[0]) { + alignments.push(mkSeg(qStart, qStart + 1, i, 1)) + qStart += 1 + cursor = i + 1 + matched = true + break + } + } + if (!matched) return null + } + // 单音节过滤 + // 仅落 1 个音节时:首字母(长度 1)信息量不足丢弃;完整音节放行(single token 全词) + if (alignments.length === 1) { + const a = alignments[0] + const syl = syllables[a.tokenIndex] + if (a.tokenEnd - a.tokenStart < syl.length) return null + } + return alignments +} + +function mkSeg(qStart: number, qEnd: number, tokenIndex: number, len: number): SegmentAlignment { + return { + queryStart: qStart, + queryEnd: qEnd, + tokenIndex, + tokenStart: 0, + tokenEnd: len, + isWordInitial: true, + isConsecutive: true + } +} + +// --------------------------------------------------------------------------- +// 高层搜索 API +// --------------------------------------------------------------------------- + +const MAX_QUERY_LENGTH = 32 + +export interface TokenSearchCommand { + name: string + tokens?: string[] + pinyinTokens?: string[] + acronym?: string + aliases?: string[] + type: 'direct' | 'plugin' | 'builtin' + subType?: string +} + +export type TokenSearchMatchType = 'name' | 'pinyin' | 'acronym' | 'aliases' + +export interface TokenSearchResult { + command: TokenSearchCommand + mode: PatternMode | typeof NO_MATCH + score: number + matchType: TokenSearchMatchType + matchIndices: Array<[number, number]> +} + +interface FieldCandidate { + mode: PatternMode | typeof NO_MATCH + score: number + matchType: TokenSearchMatchType + matchIndices: Array<[number, number]> +} + +const FIELD_PRIORITY: Record = { + name: 0, + acronym: 1, + pinyin: 2, + aliases: 3 +} + +function queryLengthNoSpace(q: string): number { + let n = 0 + for (let i = 0; i < q.length; i++) if (q[i] !== ' ') n++ + return n +} + +/** 各 token 在原文 name 中的起始偏移(贪心最左,大小写不敏感)。 */ +function computeTokenOffsets(name: string, tokens: string[]): number[] { + const lower = name.toLowerCase() + const offsets: number[] = [] + let cursor = 0 + for (const tok of tokens) { + const idx = lower.indexOf(tok, cursor) + if (idx < 0) { + offsets.push(cursor) + continue + } + offsets.push(idx) + cursor = idx + tok.length + } + return offsets +} + +/** 单 token 扁平字段(acronym/aliases):与 name 同走完整模式分类。 */ +function scoreFlatField( + query: string, + value: string, + matchType: TokenSearchMatchType, + command: TokenSearchCommand, + config: TokenSearchConfig +): FieldCandidate | null { + if (!value) return null + const flat = [value] + const alignments = segmentQuery(query, flat) + const mode = classifyPattern({ query, name: value, tokens: flat, alignments }) + if (mode === NO_MATCH) return null + const score = scoreByPattern({ + mode, + alignments, + queryLength: queryLengthNoSpace(query), + totalTokenLength: value.length, + commandType: command, + config + }) + return { + mode, + score, + matchType, + matchIndices: alignments + .filter((a) => a.queryEnd > a.queryStart) + .map((a) => [a.tokenStart, a.tokenEnd - 1] as [number, number]) + } +} + +/** name 字段:全模式分类(完整项 / 连续·非连续词首 / 全词 / 词首前缀)。 */ +function scoreNameField( + query: string, + tokens: string[], + command: TokenSearchCommand, + config: TokenSearchConfig +): FieldCandidate | null { + const alignments = segmentQuery(query, tokens) + const mode = classifyPattern({ query, name: command.name, tokens, alignments }) + if (mode === NO_MATCH) return null + const score = scoreByPattern({ + mode, + alignments, + queryLength: queryLengthNoSpace(query), + totalTokenLength: tokens.reduce((s, t) => s + t.length, 0), + commandType: command, + config + }) + const offsets = computeTokenOffsets(command.name, tokens) + const matchIndices = alignments + .filter((a) => a.queryEnd > a.queryStart) + .map((a) => { + const off = offsets[a.tokenIndex] ?? 0 + return [off + a.tokenStart, off + a.tokenEnd - 1] as [number, number] + }) + return { mode, score, matchType: 'name', matchIndices } +} + +function scorePinyinField( + query: string, + pinyinTokens: string[] | undefined, + command: TokenSearchCommand, + config: TokenSearchConfig +): FieldCandidate | null { + if (!pinyinTokens || pinyinTokens.length === 0) return null + const alignments = segmentPinyin(query, pinyinTokens) + if (!alignments) return null + // 单完整音节 = single token 全词;跨音节 = 连续/非连续词首(按相邻性拆档) + const mode: PatternMode = + alignments.length === 1 ? 'singleTokenExactitude' : crossWordInitialMode(alignments) + const score = scoreByPattern({ + mode, + alignments, + queryLength: queryLengthNoSpace(query), + totalTokenLength: pinyinTokens.reduce((s, t) => s + t.length, 0), + commandType: command, + config + }) + return { + mode, + score, + matchType: 'pinyin', + matchIndices: alignments.map((a) => [a.tokenIndex, a.tokenIndex] as [number, number]) + } +} + +function pickBest(candidates: FieldCandidate[], config: TokenSearchConfig): FieldCandidate | null { + let best: FieldCandidate | null = null + for (const c of candidates) { + if (!best) { + best = c + continue + } + const tc = c.mode === NO_MATCH ? Number.NEGATIVE_INFINITY : config.modeTiers[c.mode] + const tb = best.mode === NO_MATCH ? Number.NEGATIVE_INFINITY : config.modeTiers[best.mode] + if ( + tc > tb || + (tc === tb && c.score > best.score) || + (tc === tb && + c.score === best.score && + FIELD_PRIORITY[c.matchType] < FIELD_PRIORITY[best.matchType]) + ) + best = c + } + return best +} + +/** 多字段搜索:name / acronym+aliases / pinyinTokens,取最优命中。 */ +export function tokenSearch( + query: string, + commands: TokenSearchCommand[], + config: TokenSearchConfig = DEFAULT_CONFIG +): TokenSearchResult[] { + if (!query || query.length > MAX_QUERY_LENGTH) return [] + const results: TokenSearchResult[] = [] + + for (const command of commands) { + const tokens = command.tokens?.length ? command.tokens : tokenize(command.name) + const candidates: FieldCandidate[] = [] + + const nameC = scoreNameField(query, tokens, command, config) + if (nameC) candidates.push(nameC) + + if (command.acronym) { + const c = scoreFlatField(query, command.acronym, 'acronym', command, config) + if (c) candidates.push(c) + } + if (command.aliases) { + for (const alias of command.aliases) { + const c = scoreFlatField(query, alias, 'aliases', command, config) + if (c) candidates.push(c) + } + } + + const pinyinC = scorePinyinField(query, command.pinyinTokens, command, config) + if (pinyinC) candidates.push(pinyinC) + + const best = pickBest(candidates, config) + if (best) + results.push({ + command, + mode: best.mode, + score: best.score, + matchType: best.matchType, + matchIndices: best.matchIndices + }) + } + + results.sort((a, b) => b.score - a.score) + return results +} diff --git a/src/shared/tokenizer.ts b/src/shared/tokenizer.ts new file mode 100644 index 00000000..1a362000 --- /dev/null +++ b/src/shared/tokenizer.ts @@ -0,0 +1,130 @@ +/** 将指令名按 ASCII 边界规则切分为小写词 token 数组。 */ + +type CharKind = 'separator' | 'upper' | 'lower' | 'digit' | 'chinese' + +function classifyChar(ch: string): CharKind { + if (ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' || ch === '_' || ch === '-') + return 'separator' + if (ch >= 'A' && ch <= 'Z') return 'upper' + if (ch >= 'a' && ch <= 'z') return 'lower' + if (ch >= '0' && ch <= '9') return 'digit' + const code = ch.codePointAt(0) ?? 0 + if ( + (code >= 0x4e00 && code <= 0x9fff) || + (code >= 0x3400 && code <= 0x4dbf) || + (code >= 0xf900 && code <= 0xfaff) + ) + return 'chinese' + return 'separator' +} + +// current 末尾连续大写字母数(用于 VSCode -> VS|Code 的边界判定) +function trailingUpperRunLength(s: string): number { + let n = 0 + for (let i = s.length - 1; i >= 0; i--) { + if (s[i] >= 'A' && s[i] <= 'Z') n++ + else break + } + return n +} + +/** + * 分词规则:分隔符拆分、camelCase/PascalCase 边界、连续大写视为一个 token + * (末尾大写紧接小写时让出,如 VSCode -> [vs, code])、数字与字母组合、汉字逐字。 + */ +export function tokenize(name: string): string[] { + if (!name) return [] + + const tokens: string[] = [] + let current = '' + let segmentRoot: 'alpha' | 'digit' | null = null + let prevKind: CharKind | null = null + + const flush = (): void => { + if (current.length > 0) tokens.push(current.toLowerCase()) + current = '' + segmentRoot = null + prevKind = null + } + + for (let i = 0; i < name.length; i++) { + const ch = name[i] + const kind = classifyChar(ch) + + if (kind === 'separator') { + flush() + continue + } + if (kind === 'chinese') { + flush() + tokens.push(ch.toLowerCase()) + continue + } + + if (current.length === 0) { + current = ch + segmentRoot = kind === 'digit' ? 'digit' : 'alpha' + prevKind = kind + continue + } + + let breakBefore = false + + if (prevKind === 'lower' && kind === 'upper') { + // camelCase 边界:小写 -> 大写 + breakBefore = true + } else if (prevKind === 'lower' && kind === 'digit') { + // lower -> digit: 仅当数字后续为全大写串时切分(Chat2DB -> chat|2db) + // 数字后续为 PascalCase(mp3Player)或数字结尾时不切分,保留 mp3/H264 式单元 + const afterDigits = name[i + 1] + const afterAfter = name[i + 2] + if ( + afterDigits && + classifyChar(afterDigits) === 'upper' && + (!afterAfter || + classifyChar(afterAfter) === 'upper' || + classifyChar(afterAfter) === 'separator') + ) { + breakBefore = true + } + } else if (prevKind === 'digit' && kind === 'upper' && segmentRoot === 'alpha') { + // 字母起的 token:数字 -> 大写(mp3|Player) + breakBefore = true + } else if (prevKind === 'upper' && kind === 'lower') { + // 连续大写段末尾遇小写:让出最后一个大写给下一段 + const runLen = trailingUpperRunLength(current) + if (runLen >= 2) { + const lastChar = current[current.length - 1] + current = current.slice(0, -1) + if (current.length > 0) tokens.push(current.toLowerCase()) + current = lastChar + ch + prevKind = 'lower' + segmentRoot = 'alpha' + continue + } + } + + if (breakBefore) { + flush() + current = ch + segmentRoot = kind === 'digit' ? 'digit' : 'alpha' + prevKind = kind + } else { + current += ch + prevKind = kind + } + } + + flush() + return tokens +} + +/** 判断字符是否为汉字(供拼音音节提取复用)。 */ +export function isChinese(ch: string): boolean { + const code = ch.codePointAt(0) ?? 0 + return ( + (code >= 0x4e00 && code <= 0x9fff) || + (code >= 0x3400 && code <= 0x4dbf) || + (code >= 0xf900 && code <= 0xfaff) + ) +} From e00a795c0225c574c215e872982d7cbcd908299d Mon Sep 17 00:00:00 2001 From: gdm257 Date: Mon, 29 Jun 2026 06:37:30 +0900 Subject: [PATCH 2/3] test: add token search core tests --- tests/main/common.test.ts | 35 ++++--- tests/renderer/listModeSort.test.ts | 115 +++++++++++++++++++++ tests/shared/tokenSearch.test.ts | 152 ++++++++++++++++++++++++++++ tests/shared/tokenizer.test.ts | 73 +++++++++++++ 4 files changed, 362 insertions(+), 13 deletions(-) create mode 100644 tests/renderer/listModeSort.test.ts create mode 100644 tests/shared/tokenSearch.test.ts create mode 100644 tests/shared/tokenizer.test.ts diff --git a/tests/main/common.test.ts b/tests/main/common.test.ts index 89381fa0..e73ce221 100644 --- a/tests/main/common.test.ts +++ b/tests/main/common.test.ts @@ -1,4 +1,10 @@ -import { describe, it, expect } from 'vitest' +import { describe, it, expect, vi } from 'vitest' + +// 屏蔽原生模块依赖(common.ts 顶部间接 import native/index,后者引用 .node?asset) +vi.mock('../../src/main/core/native/index', () => ({ + WindowManager: { getExplorerFolderPath: vi.fn() } +})) + import { extractAcronym } from '../../src/main/utils/common' describe('extractAcronym', () => { @@ -31,21 +37,24 @@ describe('extractAcronym', () => { }) describe('无法提取时', () => { - it('单个小写单词应返回空字符串', () => { - expect(extractAcronym('chrome')).toBe('') + it('单个小写单词应返回首字母', () => { + // 由 tokenize 派生:[chrome] -> 'c' + expect(extractAcronym('chrome')).toBe('c') }) - it('单个首字母大写单词应返回空字符串', () => { - // 只有一个大写字母,不满足 > 1 的条件 - expect(extractAcronym('Chrome')).toBe('') + it('单个首字母大写单词应返回首字母', () => { + // 由 tokenize 派生:[chrome] -> 'c' + expect(extractAcronym('Chrome')).toBe('c') }) - it('中文名称应返回空字符串', () => { - expect(extractAcronym('原神')).toBe('') + it('中文名称应返回各汉字拼接', () => { + // 由 tokenize 派生:汉字逐字成 token -> [原, 神] -> '原神' + expect(extractAcronym('原神')).toBe('原神') }) - it('纯数字应返回空字符串', () => { - expect(extractAcronym('12345')).toBe('') + it('纯数字应返回首字符', () => { + // 由 tokenize 派生:[12345] -> '1' + expect(extractAcronym('12345')).toBe('1') }) }) @@ -54,9 +63,9 @@ describe('extractAcronym', () => { expect(extractAcronym('')).toBe('') }) - it('混合中英文用空格分隔应提取首字符', () => { - // "米哈游 Launcher" 分成两个词 - expect(extractAcronym('米哈游 Launcher')).toBe('米l') + it('混合中英文用空格分隔应取各 token 首字符', () => { + // 由 tokenize 派生:汉字逐字成 token -> [米, 哈, 游, launcher],逐 token 取首字符 -> '米哈游l' + expect(extractAcronym('米哈游 Launcher')).toBe('米哈游l') }) }) }) diff --git a/tests/renderer/listModeSort.test.ts b/tests/renderer/listModeSort.test.ts new file mode 100644 index 00000000..88f47e37 --- /dev/null +++ b/tests/renderer/listModeSort.test.ts @@ -0,0 +1,115 @@ +import { describe, it, expect } from 'vitest' +import { + listModeRank, + sortListModeResults +} from '../../src/renderer/src/composables/useSearchResults' +import { DEFAULT_CONFIG, NO_MATCH, type PatternMode } from '../../src/shared/tokenSearch' + +// 列表模式最终排序:listModeRank 返回 modeTier 权重值(越大越优先), +// sortListModeResults 按 档位 > app 软加权 > 频率 排序。 +// 偏好置顶由 store.search 保证(偏好项已在 bestSearchResults 首位),本函数不重复处理。 + +const T = DEFAULT_CONFIG.modeTiers +type Item = { + name: string + type: string + subType?: string + path: string + featureCode?: string + _tokenMode?: PatternMode | typeof NO_MATCH + useCount?: number +} +const app = (name: string, m: PatternMode): Item => ({ + name, + type: 'direct', + subType: 'app', + path: `C:/${name}`, + _tokenMode: m +}) +const plugin = (name: string, m: PatternMode): Item => ({ + name, + type: 'plugin', + path: `/p/${name}`, + featureCode: name, + _tokenMode: m +}) +const ctx = (items: Item[]): { query: string; usageMap: Map } => { + const m = new Map() + for (const it of items) if (it.useCount) m.set(`${it.path}:${it.featureCode || ''}`, it.useCount) + return { query: 'q', usageMap: m } +} + +describe('listModeRank 档位序 (对齐 modeTier 权重表)', () => { + const r = (m?: PatternMode | typeof NO_MATCH): number => + listModeRank({ name: 'x', _tokenMode: m }, 'q') + it('各模式返回对应档位值;无/NO_MATCH → 0', () => { + expect(r('multiTokensExactitude')).toBe(T.multiTokensExactitude) + expect(r('multiTokensPrefixDiscontinuous')).toBe(T.multiTokensPrefixDiscontinuous) + expect(r(undefined)).toBe(0) + expect(r(NO_MATCH)).toBe(0) + }) + it('非连续词首(400) < 全词(800) — 回归锚点', () => { + // 旧版把跨词词首(含非连续)统一归 rank1 高于全词,违反权重表, + // 导致 dance 命中时拼音非连续词首排到 MikuMikuDance 全词前面。 + expect(r('multiTokensPrefixDiscontinuous')).toBeLessThan(r('singleTokenExactitude')) + }) + it('name===query 等价完整项', () => { + expect(listModeRank({ name: 'docker', _tokenMode: undefined }, 'docker')).toBe( + T.multiTokensExactitude + ) + }) +}) + +describe('sortListModeResults 最终排序', () => { + it('档位链 FULL > CROSS > WHOLE > PREFIX > DISCONT', () => { + const items = [ + app('Discont', 'multiTokensPrefixDiscontinuous'), + plugin('Single', 'singleTokenPrefix'), + app('Whole', 'singleTokenExactitude'), + plugin('Cross', 'multiTokensPrefixContinuous'), + app('Full', 'multiTokensExactitude') + ] + expect(sortListModeResults(items, ctx(items)).map((i) => i.name)).toEqual([ + 'Full', + 'Cross', + 'Whole', + 'Single', + 'Discont' + ]) + }) + it('dance 回归: 全词排在拼音非连续词首前', () => { + // query 'dance': MikuMikuDance 全词(800) 必须高于 本地安全策略 拼音非连续词首(400) + const items = [ + app('本地安全策略', 'multiTokensPrefixDiscontinuous'), + app('MikuMikuDance', 'singleTokenExactitude') + ] + expect( + sortListModeResults(items, { query: 'dance', usageMap: new Map() }).map((i) => i.name) + ).toEqual(['MikuMikuDance', '本地安全策略']) + }) + it('同档位内 app 软加权优先于 plugin', () => { + const items = [ + plugin('CmdB', 'multiTokensPrefixContinuous'), + app('AppA', 'multiTokensPrefixContinuous') + ] + expect(sortListModeResults(items, ctx(items)).map((i) => i.name)).toEqual(['AppA', 'CmdB']) + }) + it('频率 tiebreaker: 同档同类型 useCount 降序;无数据稳定', () => { + const items = [ + { ...app('Low', 'singleTokenPrefix'), useCount: 1 }, + { ...app('High', 'singleTokenPrefix'), useCount: 10 } + ] + expect(sortListModeResults(items, ctx(items)).map((i) => i.name)).toEqual(['High', 'Low']) + expect( + sortListModeResults( + [app('A', 'singleTokenPrefix'), app('B', 'singleTokenPrefix')], + ctx([]) + ).map((i) => i.name) + ).toEqual(['A', 'B']) + }) + it('返回新数组,不修改入参', () => { + const items = [app('Z', 'singleTokenPrefix'), app('A', 'multiTokensExactitude')] + expect(items[0].name).toBe('Z') + expect(sortListModeResults(items, ctx(items))[0].name).toBe('A') + }) +}) diff --git a/tests/shared/tokenSearch.test.ts b/tests/shared/tokenSearch.test.ts new file mode 100644 index 00000000..4a56082f --- /dev/null +++ b/tests/shared/tokenSearch.test.ts @@ -0,0 +1,152 @@ +import { describe, it, expect } from 'vitest' +import { + DEFAULT_CONFIG, + NO_MATCH, + classifyPattern, + resolveWordTokenEnabled, + segmentQuery, + subsequenceMatch, + tokenSearch, + type PatternMode, + type TokenSearchCommand +} from '../../src/shared/tokenSearch' +import { + convertTokenSearchResults, + type ConvertibleCommand +} from '../../src/renderer/src/stores/commandDataStore' + +// 引擎层:query 切分 → tokens 匹配(模式判定) → match 权重(档位/软加权) → 排序。 +// 偏好置顶由 store 层 (commandDataStore.search) 负责,不在本纯函数测试范围。 + +describe('resolveWordTokenEnabled', () => { + it('仅显式 false 关闭,其余降级 true', () => { + expect(resolveWordTokenEnabled(false)).toBe(false) + expect(resolveWordTokenEnabled(undefined)).toBe(true) + }) +}) + +// ── 精确档位权重 (配置不变量,回归锚点) ── +describe('modeTiers 权重表', () => { + const t = DEFAULT_CONFIG.modeTiers + it('档位降序: 完整1000 > 连续词首900 > 全词800 > 词首子串600 > 非连续词首400', () => { + expect(t.multiTokensExactitude).toBe(1000) + expect(t.multiTokensPrefixContinuous).toBe(900) + expect(t.singleTokenExactitude).toBe(800) + expect(t.singleTokenPrefix).toBe(600) + expect(t.multiTokensPrefixDiscontinuous).toBe(400) + }) + it('app 软加权(50) < 最小档位间距(100),不跨档', () => { + expect(DEFAULT_CONFIG.typeWeights.app).toBe(50) + expect(50).toBeLessThan(Math.min(1000 - 900, 900 - 800, 800 - 600, 600 - 400)) + }) +}) + +describe('subsequenceMatch', () => { + it('字符按序出现即命中,顺序不符不命中', () => { + expect(subsequenceMatch('dce', 'docker')).toBe(true) + expect(subsequenceMatch('xyz', 'task')).toBe(false) + }) +}) + +// ── query 切分 + tokens 匹配 (模式判定 MECE) ── +function mode(query: string, name: string, tokens: string[]): PatternMode | typeof NO_MATCH { + return classifyPattern({ query, name, tokens, alignments: segmentQuery(query, tokens) }) +} + +describe('模式判定 (query 落点 → PatternMode)', () => { + it('完整项 → multiTokensExactitude', () => + expect(mode('task manager', 'Task Manager', ['task', 'manager'])).toBe('multiTokensExactitude')) + it('连续跨词词首 → multiTokensPrefixContinuous', () => + expect(mode('dockde', 'Docker Desktop', ['docker', 'desktop'])).toBe( + 'multiTokensPrefixContinuous' + )) + it('首字母缩写(相邻 token) → multiTokensPrefixContinuous', () => + expect(mode('tm', 'Task Manager', ['task', 'manager'])).toBe('multiTokensPrefixContinuous')) + it('非连续跨词词首(跳过中间 token) → multiTokensPrefixDiscontinuous', () => + expect(mode('vc', 'Visual Studio Code', ['visual', 'studio', 'code'])).toBe( + 'multiTokensPrefixDiscontinuous' + )) + it('单 token 全词 → singleTokenExactitude', () => + expect(mode('task', 'Task Manager', ['task', 'manager'])).toBe('singleTokenExactitude')) + it('单 token 词首子串 → singleTokenPrefix', () => + expect(mode('man', 'Task Manager', ['task', 'manager'])).toBe('singleTokenPrefix')) + it('非词首子串 / 词中子序列 → NO_MATCH (信息量不足过滤)', () => { + expect(mode('ana', 'Task Manager', ['task', 'manager'])).toBe(NO_MATCH) + expect(mode('mgr', 'Task Manager', ['task', 'manager'])).toBe(NO_MATCH) + }) +}) + +// ── match 最终权重 + 排序 (端到端) ── +function app(name: string, extra: Partial = {}): TokenSearchCommand { + return { name, type: 'direct', subType: 'app', ...extra } +} + +describe('评分与排序', () => { + it('档位降序端到端: 连续词首 > 全词 > 词首子串 > 非连续词首', () => { + const c = app('Visual Studio Code', { tokens: ['visual', 'studio', 'code'] }) + expect(tokenSearch('vsc', [c])[0].score).toBeGreaterThan(tokenSearch('visual', [c])[0].score) + expect(tokenSearch('visual', [c])[0].score).toBeGreaterThan(tokenSearch('vis', [c])[0].score) + expect(tokenSearch('vis', [c])[0].score).toBeGreaterThan(tokenSearch('vc', [c])[0].score) + }) + it('完整项(1000) > 跨词词首(900)', () => { + const c = app('Task Manager', { tokens: ['task', 'manager'] }) + expect(tokenSearch('task manager', [c])[0].score).toBeGreaterThan( + tokenSearch('tm', [c])[0].score + ) + }) + it('软加权不跨档: app 词首子串(600+50) < plugin 连续词首(900+0)', () => { + const a: TokenSearchCommand = { + name: 'Task Manager', + type: 'direct', + subType: 'app', + tokens: ['task', 'manager'] + } + const p: TokenSearchCommand = { + name: 'Task Manager', + type: 'plugin', + tokens: ['task', 'manager'] + } + expect(tokenSearch('man', [a])[0].score).toBeLessThan(tokenSearch('tm', [p])[0].score) + }) + it('过滤: 非词首子串/词中子序列不进结果;空/超长 query 返回空', () => { + const tm = app('Task Manager', { tokens: ['task', 'manager'] }) + expect(tokenSearch('ana', [tm])).toHaveLength(0) + expect(tokenSearch('mgr', [tm])).toHaveLength(0) + expect(tokenSearch('', [tm])).toEqual([]) + expect(tokenSearch('a'.repeat(33), [tm])).toEqual([]) + }) +}) + +// ── 拼音音节匹配 (编码切分 + 匹配) ── +function pinyinHit(query: string, syl: string[]): number { + return tokenSearch(query, [{ name: '中', type: 'builtin', pinyinTokens: syl }]).length +} + +describe('拼音音节匹配', () => { + const S = ['ren', 'wu', 'guan', 'li', 'qi'] + it('首字母缩写 / 全拼 / 跨音节首字母(可跳过)命中', () => { + expect(pinyinHit('rwglq', S)).toBe(1) + expect(pinyinHit('renwuguanliqi', S)).toBe(1) + expect(pinyinHit('rglq', S)).toBe(1) + }) + it('完整音节放行,单首字母过滤,内部前缀不命中', () => { + expect(pinyinHit('ren', S)).toBe(1) // 单完整音节 = 全词 + expect(pinyinHit('r', S)).toBe(0) // 单首字母信息量不足 + expect(pinyinHit('rewglq', S)).toBe(0) // re 非 ren 的合法编码 + }) +}) + +// ── 结果转换 (引擎结果 → SearchResult,携带 matches 索引) ── +function convCmd(name: string, extra: Partial = {}): ConvertibleCommand { + return { name, path: `C:\\${name}.exe`, type: 'direct', subType: 'app', ...extra } +} + +describe('convertTokenSearchResults', () => { + it('保留原字段并携带 matches 索引', () => { + const r = convertTokenSearchResults(tokenSearch('dockde', [convCmd('Docker Desktop')])) + expect(r[0].name).toBe('Docker Desktop') + expect(r[0].matches!.length).toBeGreaterThan(0) + expect(r[0].matches![0].indices.length).toBeGreaterThan(0) + }) + it('空结果返回空数组', () => expect(convertTokenSearchResults([])).toEqual([])) +}) diff --git a/tests/shared/tokenizer.test.ts b/tests/shared/tokenizer.test.ts new file mode 100644 index 00000000..263819b6 --- /dev/null +++ b/tests/shared/tokenizer.test.ts @@ -0,0 +1,73 @@ +import { describe, it, expect } from 'vitest' +import { tokenize } from '../../src/shared/tokenizer' +import { enrichTokens, type Command } from '../../src/renderer/src/stores/commandDataStore' + +// 分词:tokenize 边界规则 + enrichTokens 批量应用 (tokens/pinyinTokens 补齐)。 + +describe('tokenize 分词', () => { + it('分隔符拆分 (空格/下划线/连字符)', () => { + expect(tokenize('Task Manager')).toEqual(['task', 'manager']) + expect(tokenize('task-manager')).toEqual(['task', 'manager']) + }) + + it('camelCase / PascalCase 边界 (小写→大写)', () => { + expect(tokenize('TaskManager')).toEqual(['task', 'manager']) + expect(tokenize('VisualStudioCode')).toEqual(['visual', 'studio', 'code']) + }) + + it('连续大写缩写词 (末位大写让出: VSCode → [vs, code])', () => { + expect(tokenize('VSCode')).toEqual(['vs', 'code']) + expect(tokenize('HTTPSConnection')).toEqual(['https', 'connection']) + }) + + it('字母起 token 遇数字→大写切分 (mp3Player → [mp3, player])', () => { + expect(tokenize('mp3Player')).toEqual(['mp3', 'player']) + expect(tokenize('3DModel')).toEqual(['3d', 'model']) + }) + + it('lower→digit 后接全大写串切分 (Chat2DB → [chat, 2db])', () => { + expect(tokenize('Chat2DB')).toEqual(['chat', '2db']) + }) + + it('upper→digit 不切分 (H264 / MP4 为单元)', () => { + expect(tokenize('H264')).toEqual(['h264']) + expect(tokenize('MP4')).toEqual(['mp4']) + }) + + it('汉字逐字 + 中英/数字混合', () => { + expect(tokenize('计算器')).toEqual(['计', '算', '器']) + expect(tokenize('FirPE维护系统')).toEqual(['fir', 'pe', '维', '护', '系', '统']) + expect(tokenize('115浏览器')).toEqual(['115', '浏', '览', '器']) + }) + + it('边界: 空串/纯分隔符返回空,输出全小写', () => { + expect(tokenize('')).toEqual([]) + expect(tokenize('___---')).toEqual([]) + expect(tokenize('VSCode').every((t) => t === t.toLowerCase())).toBe(true) + }) +}) + +// enrichTokens 是 tokenize 的批量应用 + 拼音音节生成。 +// 生产数据(apps/系统设置/本地启动项)构造时不带 tokens/pinyinTokens,依赖此函数统一补齐。 +function cmd(name: string, extra: Partial = {}): Command { + return { name, path: `C:\\${name}.exe`, type: 'direct', subType: 'app', ...extra } +} + +describe('enrichTokens 补齐', () => { + it('填充 tokens(分词) 与 pinyinTokens(拼音音节)', () => { + const c = [cmd('Docker Desktop'), cmd('计算器')] + enrichTokens(c) + expect(c[0].tokens).toEqual(['docker', 'desktop']) + expect(c[1].tokens).toEqual(['计', '算', '器']) + expect(c[1].pinyinTokens).toEqual(['ji', 'suan', 'qi']) + }) + it('已有字段不覆盖(幂等);纯英文 pinyinTokens 为空', () => { + const c = cmd('计算器', { tokens: ['preset'], pinyinTokens: ['ren', 'wu'] }) + enrichTokens([c]) + expect(c.tokens).toEqual(['preset']) + expect(c.pinyinTokens).toEqual(['ren', 'wu']) + const e = cmd('Docker Desktop') + enrichTokens([e]) + expect(e.pinyinTokens).toEqual([]) + }) +}) From 3b6dd14d758337e681d6ca5bd7f0eab147987e48 Mon Sep 17 00:00:00 2001 From: gdm257 Date: Sat, 4 Jul 2026 22:57:08 +0900 Subject: [PATCH 3/3] fix: address review issues in token search - useSearchResults: access wordTokenEnabled via the commandDataStore proxy instead of destructuring the ref snapshot, so list-mode sort re-evaluates when the setting toggles - highlight: include ASCII digits 0-9 in getChineseCharPositions to align with pinyinTokens, fixing highlight misalignment for mixed pinyin/number queries - tokenSearch: skip empty syllables in segmentPinyin to avoid zero-length alignment pollution --- src/renderer/src/composables/useSearchResults.ts | 5 ++--- src/renderer/src/utils/highlight.ts | 3 ++- src/shared/tokenSearch.ts | 3 +++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/renderer/src/composables/useSearchResults.ts b/src/renderer/src/composables/useSearchResults.ts index 27f31576..0492a594 100644 --- a/src/renderer/src/composables/useSearchResults.ts +++ b/src/renderer/src/composables/useSearchResults.ts @@ -122,8 +122,7 @@ export function useSearchResults(props: { searchImageCommands, searchTextCommands, searchFileCommands, - matchesWindowCommand, - wordTokenEnabled + matchesWindowCommand } = commandDataStore // 使用统计缓存(key: "path:featureCode", value: useCount) @@ -321,7 +320,7 @@ export function useSearchResults(props: { // 合并去重后仍居前,无需在此重复处理),然后按 token 档位排序。 // 同档位内 tiebreaker:系统应用软加权 → 频率。 // 开关 OFF: 旧比较器(完全匹配 → 前缀 → 系统应用 → 频率) - if (wordTokenEnabled) { + if (commandDataStore.wordTokenEnabled) { // ON: sortListModeResults 软加权(偏好置顶由 store.search 保证) return sortListModeResults(deduped, { query, usageMap: usageStatsMap.value }) } diff --git a/src/renderer/src/utils/highlight.ts b/src/renderer/src/utils/highlight.ts index 3138b9ce..71f705f4 100644 --- a/src/renderer/src/utils/highlight.ts +++ b/src/renderer/src/utils/highlight.ts @@ -190,7 +190,8 @@ function getChineseCharPositions(text: string): number[] { if ( (code >= 0x4e00 && code <= 0x9fff) || (code >= 0x3400 && code <= 0x4dbf) || - (code >= 0xf900 && code <= 0xfaff) + (code >= 0xf900 && code <= 0xfaff) || + (code >= 0x30 && code <= 0x39) ) { positions.push(i) } diff --git a/src/shared/tokenSearch.ts b/src/shared/tokenSearch.ts index 7e6980d6..7bb3d536 100644 --- a/src/shared/tokenSearch.ts +++ b/src/shared/tokenSearch.ts @@ -331,6 +331,9 @@ function segmentPinyin(query: string, syllables: string[]): SegmentAlignment[] | let matched = false for (let i = cursor; i < syllables.length; i++) { const syl = syllables[i] + // 空音节(pinyin-pro 对生僻字可能返回空串):startsWith("") 恒真且 length=0, + // 会 push 零长度 alignment 污染评分,跳过。 + if (!syl) continue const rest = query.slice(qStart) // 完整音节优先,其次首字母 if (rest.startsWith(syl)) {