From d5b6c5511cce3e328057fc2dd22f54ea0675852f Mon Sep 17 00:00:00 2001 From: "zizhou teng (n451)" <2020200706@ruc.edu.cn> Date: Sun, 24 May 2026 22:48:53 +0100 Subject: [PATCH 1/2] feat(tags): proper unicode tags --- CHANGELOG.md | 1 + lua/obsidian/api.lua | 2 +- lua/obsidian/completion/tags.lua | 7 +- lua/obsidian/parse/tags.lua | 6 +- lua/obsidian/search/init.lua | 15 +--- lua/obsidian/ui.lua | 30 +++++-- lua/obsidian/util.lua | 2 +- tests/lsp/test_completion.lua | 132 +++++++++++++++++++++++++++++++ tests/test_ui.lua | 27 +++++++ 9 files changed, 195 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b7eeda11..041fa013d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `Note` class can carry a `template` field. - LSP completion replaces completion plugin based completion. - Frontmatter tag completion. +- Unicode/CJK tag support across parsing, completion, and UI highlighting (e.g. `#café`, `#中文`). ### Removed diff --git a/lua/obsidian/api.lua b/lua/obsidian/api.lua index 492a7e712..b2e46ac8c 100644 --- a/lua/obsidian/api.lua +++ b/lua/obsidian/api.lua @@ -151,7 +151,7 @@ M.cursor_link = function() local _, cur_col = unpack(vim.api.nvim_win_get_cursor(0)) cur_col = cur_col + 1 -- 0-indexed column to 1-indexed lua string position - local refs = search.find_refs(line, { exclude = { "Tag" } }) + local refs = search.find_refs(line) local match = iter(refs):find(function(m) local open, close = unpack(m) diff --git a/lua/obsidian/completion/tags.lua b/lua/obsidian/completion/tags.lua index 080461137..44a2f70fd 100644 --- a/lua/obsidian/completion/tags.lua +++ b/lua/obsidian/completion/tags.lua @@ -1,14 +1,13 @@ local Note = require "obsidian.note" -local Patterns = require("obsidian.search").Patterns local M = {} --- TODO: use proper unicode match +local TagCharsOptional = "[%w\128-\244_/-]*" ---@type { pattern: string, offset: integer }[] local TAG_PATTERNS = { - { pattern = "[%s%(]#" .. Patterns.TagCharsOptional .. "$", offset = 2 }, - { pattern = "^#" .. Patterns.TagCharsOptional .. "$", offset = 1 }, + { pattern = "[%s%(]#" .. TagCharsOptional .. "$", offset = 2 }, + { pattern = "^#" .. TagCharsOptional .. "$", offset = 1 }, } ---@param input string diff --git a/lua/obsidian/parse/tags.lua b/lua/obsidian/parse/tags.lua index be155eab8..b9d44210c 100644 --- a/lua/obsidian/parse/tags.lua +++ b/lua/obsidian/parse/tags.lua @@ -44,7 +44,7 @@ local all_tags = Ct(((utf8_char - one_tag) ^ 0 * one_tag) ^ 0) --- UTF-8 indices are 0-based and end-exclusive. --- --- @param line string ---- @return { tag: string, start_idx: integer, end_idx: integer, start_byte: integer, end_byte: integer }[] +--- @return { [1]: integer, [2]: integer, [3]: string }[] M.parse_tags = function(line) if string.find(line, "") ~= nil then return {} @@ -72,9 +72,7 @@ M.parse_tags = function(line) out[#out + 1] = { start_byte_index, end_byte_index, - "Tag", -- TODO: return tag directly - -- vim.str_utfindex(line, start_byte), - -- vim.str_utfindex(line, end_byte), + "Tag", } end end diff --git a/lua/obsidian/search/init.lua b/lua/obsidian/search/init.lua index e830817f3..61a377d83 100644 --- a/lua/obsidian/search/init.lua +++ b/lua/obsidian/search/init.lua @@ -24,10 +24,6 @@ M.build_grep_cmd = Ripgrep.build_grep_cmd M.Patterns = { -- Tags - TagCharsOptional = "[%w\128-\244_/-]*", - TagCharsRequired = "[%w\128-\244_/-]+[%w\128-\244_/-]*[%a\128-\244_/-]+[%w\128-\244_/-]*", - - Tag = "#[%w\128-\244_/-]+[%w\128-\244_/-]*[%a\128-\244_/-]+[%w\128-\244_/-]*", TagCharsRequiredRg = [[[\p{L}\p{N}_/-]+[\p{L}\p{N}_/-]*[\p{L}_/-]+[\p{L}\p{N}_/-]*]], TagCharsOptionalRg = [[[\p{L}\p{N}_/-]*]], @@ -155,7 +151,6 @@ M.find_refs = function(s, opts) "WikiWithAlias", "Wiki", "Markdown", - "Tag", "BlockID", "Highlight", } @@ -523,7 +518,7 @@ M.find_links = function(note) local lines = io.lines(tostring(note.path)) for lnum, line in vim.iter(lines):enumerate() do - for _, ref_match in ipairs(M.find_refs(line, { exclude = { "BlockID", "Tag" } })) do + for _, ref_match in ipairs(M.find_refs(line, { exclude = { "BlockID" } })) do local m_start, m_end = unpack(ref_match) local link = string.sub(line, m_start, m_end) if not found[link] then @@ -919,10 +914,8 @@ M.find_tags_async = function(term, callback, opts) for _, match in ipairs(util.parse_tags(line)) do local m_start, m_end, _ = unpack(match) local tag = string.sub(line, m_start + 1, m_end) - if string.match(tag, "^" .. M.Patterns.TagCharsRequired .. "$") then - add_match(tag, path, note, match_data.line_number, line, m_start, m_end) - n_matches = n_matches + 1 - end + add_match(tag, path, note, match_data.line_number, line, m_start, m_end) + n_matches = n_matches + 1 end -- check for tags in frontmatter @@ -934,7 +927,7 @@ M.find_tags_async = function(term, callback, opts) and (vim.startswith(line, "tags:") or string.match(line, "%s*- ")) then local tag = vim.trim(string.sub(line, 3)) -- HACK: works because we force ' - tag' - if string.match(tag, "^" .. M.Patterns.TagCharsRequired .. "$") and vim.list_contains(note.tags, tag) then + if vim.list_contains(note.tags, tag) then add_match(tag, path, note, match_data.line_number, line) end end diff --git a/lua/obsidian/ui.lua b/lua/obsidian/ui.lua index 6965ab025..ce0759571 100644 --- a/lua/obsidian/ui.lua +++ b/lua/obsidian/ui.lua @@ -373,8 +373,8 @@ local function get_line_ref_extmarks(marks, line, lnum, ui_opts) conceal = is_uri and " " or "", } ) - elseif m_type == "Tag" then - -- A tag is like '#tag' + elseif m_type == "BlockID" then + -- A block ID, like '^hello-world' marks[#marks + 1] = ExtMark.new( nil, lnum, @@ -382,12 +382,29 @@ local function get_line_ref_extmarks(marks, line, lnum, ui_opts) ExtMarkOpts.from_tbl { end_row = lnum, end_col = m_end, - hl_group = ui_opts.tags.hl_group, + hl_group = ui_opts.block_ids.hl_group, spell = false, } ) - elseif m_type == "BlockID" then - -- A block ID, like '^hello-world' + end + end + + local inline_code_blocks = {} + for m_start, m_end in util.gfind(line, "`[^`]*`") do + inline_code_blocks[#inline_code_blocks + 1] = { m_start, m_end } + end + + for _, match in ipairs(util.parse_tags(line)) do + local m_start, m_end = unpack(match) + local inside_code_block = false + for _, code_block_boundary in ipairs(inline_code_blocks) do + if code_block_boundary[1] < m_start and m_end < code_block_boundary[2] then + inside_code_block = true + break + end + end + + if not inside_code_block then marks[#marks + 1] = ExtMark.new( nil, lnum, @@ -395,12 +412,13 @@ local function get_line_ref_extmarks(marks, line, lnum, ui_opts) ExtMarkOpts.from_tbl { end_row = lnum, end_col = m_end, - hl_group = ui_opts.block_ids.hl_group, + hl_group = ui_opts.tags.hl_group, spell = false, } ) end end + return marks end diff --git a/lua/obsidian/util.lua b/lua/obsidian/util.lua index 44f11eb2a..c93c33561 100644 --- a/lua/obsidian/util.lua +++ b/lua/obsidian/util.lua @@ -349,7 +349,7 @@ util.parse_link = function(link, opts) local link_type = opts.link_type if link_type == nil then - for _, match in ipairs(search.find_refs(link, { exclude = { "Tag" } })) do + for _, match in ipairs(search.find_refs(link)) do local _, _, m_type = unpack(match) link_type = m_type break diff --git a/tests/lsp/test_completion.lua b/tests/lsp/test_completion.lua index 4a3d346dc..39f5b0a6a 100644 --- a/tests/lsp/test_completion.lua +++ b/tests/lsp/test_completion.lua @@ -58,6 +58,16 @@ T["refs"]["can_complete should handle wiki links with preceding Unicode text"] = eq(21, insert_end) end +T["tags"] = MiniTest.new_set() + +T["tags"]["find_tags_start should accept in-progress prefixes"] = function() + local completion = require "obsidian.completion.tags" + + eq("202", completion.find_tags_start "#202") + eq("abc", completion.find_tags_start "#abc") + eq("foo", completion.find_tags_start "(#foo") +end + T["completion"] = MiniTest.new_set() T["completion"]["returns items for wiki link trigger"] = function() @@ -161,6 +171,128 @@ tags: eq(true, found) end +T["completion"]["returns items for unicode tag trigger in body"] = function() + h.mock_vault_contents(child.Obsidian.dir, { + ["test.md"] = "#snö", + ["tagged.md"] = [==[ +--- +id: tagged +tags: + - snöw +--- +]==], + }) + + child.cmd("edit " .. tostring(child.Obsidian.dir / "test.md")) + child.api.nvim_win_set_cursor(0, { 1, 5 }) + + run_completion(0, 5) + + local result = child.lua_get [[_G._test_result]] + eq("table", type(result)) + + local found = false + for _, item in ipairs(result.items or {}) do + if item.textEdit and item.textEdit.newText == "#snöw" then + found = true + break + end + end + eq(true, found) +end + +T["completion"]["completes unicode tag inside frontmatter tags: list"] = function() + h.mock_vault_contents(child.Obsidian.dir, { + ["test.md"] = "---\ntags:\n - caf\n---\n", + ["tagged.md"] = [==[ +--- +id: tagged +tags: + - café +--- +]==], + }) + + child.cmd("edit " .. tostring(child.Obsidian.dir / "test.md")) + child.api.nvim_win_set_cursor(0, { 3, 7 }) + + run_completion(2, 7) + + local result = child.lua_get [[_G._test_result]] + eq("table", type(result)) + + local found = false + for _, item in ipairs(result.items or {}) do + if item.textEdit and item.textEdit.newText == "café" then + found = true + break + end + end + eq(true, found) +end + +T["completion"]["returns items for CJK tag trigger in body"] = function() + h.mock_vault_contents(child.Obsidian.dir, { + ["test.md"] = "#中", + ["tagged.md"] = [==[ +--- +id: tagged +tags: + - 中文 +--- +]==], + }) + + child.cmd("edit " .. tostring(child.Obsidian.dir / "test.md")) + -- byte len of "#中" = 1 + 3 + child.api.nvim_win_set_cursor(0, { 1, 4 }) + + run_completion(0, 4) + + local result = child.lua_get [[_G._test_result]] + eq("table", type(result)) + + local found = false + for _, item in ipairs(result.items or {}) do + if item.textEdit and item.textEdit.newText == "#中文" then + found = true + break + end + end + eq(true, found) +end + +T["completion"]["completes CJK tag inside frontmatter tags: list"] = function() + h.mock_vault_contents(child.Obsidian.dir, { + ["test.md"] = "---\ntags:\n - 中\n---\n", + ["tagged.md"] = [==[ +--- +id: tagged +tags: + - 中文 +--- +]==], + }) + + child.cmd("edit " .. tostring(child.Obsidian.dir / "test.md")) + -- byte len of " - 中" = 4 + 3 = 7 + child.api.nvim_win_set_cursor(0, { 3, 7 }) + + run_completion(2, 7) + + local result = child.lua_get [[_G._test_result]] + eq("table", type(result)) + + local found = false + for _, item in ipairs(result.items or {}) do + if item.textEdit and item.textEdit.newText == "中文" then + found = true + break + end + end + eq(true, found) +end + T["completion"]["create_new emits write_note command that writes file"] = function() h.mock_vault_contents(child.Obsidian.dir, { ["test.md"] = "[[brandnewnote", diff --git a/tests/test_ui.lua b/tests/test_ui.lua index 9b3f60b72..7852d704f 100644 --- a/tests/test_ui.lua +++ b/tests/test_ui.lua @@ -52,4 +52,31 @@ T["ExtMark"]["should match == with other ExtMark instances"] = function() eq(m1, m2) end +T["update"] = new_set() + +T["update"]["should not add tag extmarks inside inline code"] = function() + local bufnr = vim.api.nvim_create_buf(false, true) + vim.api.nvim_buf_set_name(bufnr, vim.fn.tempname() .. ".md") + vim.api.nvim_buf_set_lines(bufnr, 0, -1, false, { "`foo #bar` #baz" }) + + Obsidian = { + opts = { + ui = vim.deepcopy(require("obsidian.config.default").ui), + }, + } + + ui.update(bufnr) + + local ns_id = vim.api.nvim_create_namespace "ObsidianUI" + local tag_marks = {} + for _, mark in ipairs(vim.api.nvim_buf_get_extmarks(bufnr, ns_id, 0, -1, { details = true })) do + if mark[4].hl_group == "ObsidianTag" then + tag_marks[#tag_marks + 1] = mark + end + end + + eq(1, #tag_marks) + eq(11, tag_marks[1][3]) +end + return T From cb9d69a208492e23d9979e060809c13b4070ffe4 Mon Sep 17 00:00:00 2001 From: "zizhou teng (n451)" <2020200706@ruc.edu.cn> Date: Sun, 24 May 2026 22:53:11 +0100 Subject: [PATCH 2/2] fix useless "tag" --- lua/obsidian/parse/tags.lua | 3 +-- tests/util/test_parse_tag.lua | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/lua/obsidian/parse/tags.lua b/lua/obsidian/parse/tags.lua index b9d44210c..1fa0c1e2a 100644 --- a/lua/obsidian/parse/tags.lua +++ b/lua/obsidian/parse/tags.lua @@ -44,7 +44,7 @@ local all_tags = Ct(((utf8_char - one_tag) ^ 0 * one_tag) ^ 0) --- UTF-8 indices are 0-based and end-exclusive. --- --- @param line string ---- @return { [1]: integer, [2]: integer, [3]: string }[] +--- @return { [1]: integer, [2]: integer }[] M.parse_tags = function(line) if string.find(line, "") ~= nil then return {} @@ -72,7 +72,6 @@ M.parse_tags = function(line) out[#out + 1] = { start_byte_index, end_byte_index, - "Tag", } end end diff --git a/tests/util/test_parse_tag.lua b/tests/util/test_parse_tag.lua index aa1e87e60..27694f773 100644 --- a/tests/util/test_parse_tag.lua +++ b/tests/util/test_parse_tag.lua @@ -5,7 +5,7 @@ local T = new_set() T["should find positions of all tags"] = function() local s = "#TODO I have a #meeting at noon" - eq({ { 1, 5, "Tag" }, { 16, 23, "Tag" } }, M.parse_tags(s)) + eq({ { 1, 5 }, { 16, 23 } }, M.parse_tags(s)) end T["should find four cases"] = function() @@ -22,7 +22,7 @@ end T["should ignore escaped tags"] = function() local s = "I have a #meeting at noon \\#not-a-tag" - eq({ { 10, 17, "Tag" } }, M.parse_tags(s)) + eq({ { 10, 17 } }, M.parse_tags(s)) s = [[\#notatag]] eq({}, M.parse_tags(s)) end @@ -53,7 +53,7 @@ end T["should ignore tags not on word boundaries"] = function() eq({}, M.parse_tags "foobar#notatag") - eq({ { 9, 12, "Tag" } }, M.parse_tags "foo bar #tag") + eq({ { 9, 12 } }, M.parse_tags "foo bar #tag") end T["should ignore tags in markdown links with parentheses"] = function()