From 711118fa92cda6ac7b380d69198dba1b11164035 Mon Sep 17 00:00:00 2001 From: Nathan Trudeau Date: Sun, 31 May 2026 11:55:56 -0400 Subject: [PATCH 1/2] fix: align reload tag offsets and incremental file order Ripgrep byte offsets are converted to JavaScript string offsets before custom-regex normalization so reload scans match editor scans for COBOL and SQL prefix patterns. Detection: - resolve ripgrep submatch byte positions against physical line text - use UTF-8 byte-to-string offset conversion for absolute and local offsets - cover issue #53 COBOL, SQL, bracket, and Unicode prefix cases Tree: - limit tag-order sorting to grouped root tag nodes - keep sibling file nodes sorted by path after incremental document replacement Packaging: - add npm publish ignores for source, tests, workflow tooling, and artifacts - preserve bundled dist runtime while excluding source maps Coverage: - add issue #53 regex matrix coverage for raw ripgrep payloads - add tree behavior coverage for sorted sibling files after updates - add issue #53 microbenchmark summary for scan-large-custom-regex Fixes https://github.com/FanaticPythoner/better-todo-tree/issues/53 --- .npmignore | 26 ++++++ artifacts/perf/issue53-tag-processing.md | 62 +++++++++++++ src/detection.js | 108 ++++++++++++++++++++--- src/tree.js | 2 +- test/detection.regex-matrix.test.js | 106 ++++++++++++++++++++++ test/packaging.ignore.test.js | 26 ++++++ test/tree.behavior.test.js | 40 +++++++++ 7 files changed, 358 insertions(+), 12 deletions(-) create mode 100644 .npmignore create mode 100644 artifacts/perf/issue53-tag-processing.md diff --git a/.npmignore b/.npmignore new file mode 100644 index 00000000..e4e09564 --- /dev/null +++ b/.npmignore @@ -0,0 +1,26 @@ +.vscode/ +.gitignore +node_modules/ +src/ +test/ +test-files/ +webpack.config.js +dist/extension.js.map +artifacts/ +[jJ]ustfile +.github/ +scripts/ +.nvmrc +.travis.yml +TODOS_LISTS/ +TODOS_LISTS/** +.tools/ +.act-artifacts/ +MIGRATION.md +OPEN_VSX_CERTIFICATE_REPORT.md +CHANGELOG.upstream.md +buildCodiconNames.js +old-*.js +*.bak +*~ +*.vsix diff --git a/artifacts/perf/issue53-tag-processing.md b/artifacts/perf/issue53-tag-processing.md new file mode 100644 index 00000000..f348dfb3 --- /dev/null +++ b/artifacts/perf/issue53-tag-processing.md @@ -0,0 +1,62 @@ +# Runtime Benchmarks + +- Baseline ref: `a6f60e0ce830c4649ac34fc05e5a1799ec91d151` +- Current source: working tree +- Node: `v25.2.0` +- Selection mode: `scenario-list` +- Declared suite: `microbenchmark` +- Result-count validation: `1 rows, suite-consistent=true, all-user-flow=false` + +## Machine Profile + +| Category | Field | Value | +| --- | --- | --- | +| Host | Hostname | n00ne-AERO-17-YD | +| Host | OS | Ubuntu 22.04.5 LTS | +| Host | Kernel | 6.8.0-124-generic | +| Host | Architecture | x64 | +| Host | Load Average | 4.11, 4.39, 4.39 | +| Host | Available Parallelism | - | +| CPU | Model | Intel(R) Core(TM) i9-14900HX | +| CPU | Vendor | GenuineIntel | +| CPU | Topology | 16 logical CPU(s), 2 thread(s)/core, 8 core(s)/socket, 1 socket(s), 1 NUMA node(s) | +| CPU | Frequency | 800 MHz to 5,800 MHz | +| CPU | Cache | L1d 384 KiB (8 instances), L1i 256 KiB (8 instances), L2 16 MiB (8 instances), L3 36 MiB (1 instance) | +| Memory | Total RAM | 62.51 GiB (`67,119,767,552 bytes`) | +| Memory | Available At Collection | 8.24 GiB (`8,849,858,560 bytes`) | +| Memory | Online Physical RAM | 66.00 GiB (`70,866,960,384 bytes`) | +| Memory | Swap | total 120 GiB (`128,848,973,824 bytes`); free 93.69 GiB (`100,593,766,400 bytes`) | +| Memory | DMI / SPD | Unavailable: /sys/firmware/dmi/tables/smbios_entry_point: Permission denied /dev/mem: Permission denied | +| Storage | Root Device | nvme1n1 (Samsung SSD 9100 PRO 4TB), 3.64 TiB (`4,000,787,030,016 bytes`), transport nvme, rotational=false, readOnly=false | + +## Scenario Model + +| Scenario | Kind | User flow | Measurement scope | Input model | +| --- | --- | --- | --- | --- | +| scan-large-custom-regex | microbenchmark | - | - | - | + +## Metric Model + +| Table | Value model | Accuracy model | +| --- | --- | --- | +| Latency | Wall-clock elapsed time around each harness flow iteration, summarized as min/p50/p90/p95/max. | Exact for each sampled iteration in this run. | +| Profiled RSS Burst | Difference between the isolated scenario worker RSS measured immediately before the flow and that worker iteration's OS high-water-mark peak RSS. | Exact for the measured worker iteration, using `process.memoryUsage().rss` at flow start and `process.resourceUsage().maxRSS` for the peak. | +| Profiled Peak RSS | Highest process RSS reached by each isolated scenario worker iteration. | Exact worker-process high-water mark from `process.resourceUsage().maxRSS`. | + +## Latency + +| Scenario | Kind | Baseline p50 ms | Current p50 ms | Baseline p90 ms | Current p90 ms | Baseline p95 ms | Current p95 ms | +| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | +| scan-large-custom-regex | microbenchmark | 6.73 | 10.13 | 7.32 | 11.69 | 8.25 | 11.89 | + +## Profiled RSS Burst + +| Scenario | Kind | Baseline p50 MiB | Current p50 MiB | Baseline p90 MiB | Current p90 MiB | Baseline p95 MiB | Current p95 MiB | Baseline Max MiB | Current Max MiB | +| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | +| scan-large-custom-regex | microbenchmark | 0.75 | 0.75 | 1 | 1.63 | 1.13 | 4.5 | 1.13 | 4.5 | + +## Profiled Peak RSS + +| Scenario | Kind | Baseline p50 RSS MiB | Current p50 RSS MiB | Baseline p90 RSS MiB | Current p90 RSS MiB | Baseline p95 RSS MiB | Current p95 RSS MiB | Baseline Max RSS MiB | Current Max RSS MiB | +| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | +| scan-large-custom-regex | microbenchmark | 81.6 | 80.79 | 81.85 | 80.86 | 81.85 | 80.93 | 81.85 | 80.93 | diff --git a/src/detection.js b/src/detection.js index 8b76465c..49999c45 100644 --- a/src/detection.js +++ b/src/detection.js @@ -87,6 +87,101 @@ function offsetFromLineAndColumn( text, lineOffsets, line, column ) return Math.min( offset, text.length ); } +function utf8ByteOffsetToStringOffset( text, byteOffset ) +{ + if( typeof ( byteOffset ) !== 'number' || byteOffset <= 0 ) + { + return 0; + } + + var bytesSeen = 0; + var stringOffset = 0; + + while( stringOffset < text.length && bytesSeen < byteOffset ) + { + var codePoint = text.codePointAt( stringOffset ); + var nextStringOffset = stringOffset + ( codePoint > 0xFFFF ? 2 : 1 ); + var nextBytesSeen = bytesSeen + utf8ByteLengthOfCodePoint( codePoint ); + + if( nextBytesSeen > byteOffset ) + { + break; + } + + bytesSeen = nextBytesSeen; + stringOffset = nextStringOffset; + } + + return stringOffset; +} + +function utf8ByteLengthOfCodePoint( codePoint ) +{ + if( codePoint <= 0x7F ) + { + return 1; + } + + if( codePoint <= 0x7FF ) + { + return 2; + } + + if( codePoint <= 0xFFFF ) + { + return 3; + } + + return 4; +} + +function getLineTextAtNumber( text, lineOffsets, lineNumber ) +{ + var lineIndex = Math.min( Math.max( lineNumber - 1, 0 ), lineOffsets.length - 1 ); + var startOffset = lineOffsets[ lineIndex ] || 0; + var endOffset = lineIndex + 1 < lineOffsets.length ? lineOffsets[ lineIndex + 1 ] - 1 : text.length; + + if( endOffset > startOffset && text[ endOffset - 1 ] === '\r' ) + { + endOffset--; + } + + return { + text: text.slice( startOffset, endOffset ), + startOffset: startOffset, + endOffset: endOffset + }; +} + +function resolveRipgrepLocalStringOffset( lineText, byteOffset, column ) +{ + if( typeof ( byteOffset ) === 'number' ) + { + return utf8ByteOffsetToStringOffset( lineText, byteOffset ); + } + + return Math.max( ( column || 1 ) - 1, 0 ); +} + +function resolveRipgrepMatchStartOffset( context, match ) +{ + var firstSubmatch = match.submatches && match.submatches.length > 0 ? match.submatches[ 0 ] : undefined; + var byteOffset = firstSubmatch && typeof ( firstSubmatch.start ) === 'number' ? firstSubmatch.start : undefined; + + if( typeof ( match.line ) === 'number' && match.line >= 1 ) + { + var line = getLineTextAtNumber( context.text, context.lineOffsets, match.line ); + return line.startOffset + resolveRipgrepLocalStringOffset( line.text, byteOffset, match.column ); + } + + if( typeof ( match.absoluteOffset ) === 'number' ) + { + return utf8ByteOffsetToStringOffset( context.text, match.absoluteOffset + ( byteOffset || 0 ) ); + } + + return offsetFromLineAndColumn( context.text, context.lineOffsets, match.line, match.column ); +} + function splitPhysicalLines( text, startOffset ) { var lines = []; @@ -1215,16 +1310,7 @@ function normalizeRipgrepMatch( uri, text, match ) } var context = createScanContext( uri, text ); - var rawStartOffset; - - if( match.absoluteOffset !== undefined && match.submatches && match.submatches.length > 0 ) - { - rawStartOffset = match.absoluteOffset + match.submatches[ 0 ].start; - } - else - { - rawStartOffset = offsetFromLineAndColumn( text, context.lineOffsets, match.line, match.column ); - } + var rawStartOffset = resolveRipgrepMatchStartOffset( context, match ); var exactMatch = findExactRegexExecMatch( context, rawStartOffset ); @@ -1290,7 +1376,7 @@ function normalizeWorkspaceRegexMatch( uri, match, snapshot ) var contextText = typeof match.lines === 'string' && match.lines.length > 0 ? match.lines : ( match.match || "" ); var localMatchText = typeof match.match === 'string' && match.match.length > 0 ? match.match : contextText; var localMatchStart = match.submatches && match.submatches.length > 0 && typeof match.submatches[ 0 ].start === 'number' ? - match.submatches[ 0 ].start : + resolveRipgrepLocalStringOffset( contextText, match.submatches[ 0 ].start, match.column ) : Math.max( ( match.column || 1 ) - 1, 0 ); var resourceConfig = snapshot && typeof ( snapshot.getResourceConfig ) === 'function' ? snapshot.getResourceConfig( uri ) : diff --git a/src/tree.js b/src/tree.js index 9ec46a0a..7668920f 100644 --- a/src/tree.js +++ b/src/tree.js @@ -93,7 +93,7 @@ var sortByLineAndColumn = function( a, b ) var tagSortIndex = function( node ) { - if( node && node.tag !== undefined ) + if( node && node.isRootTagNode === true && node.isGroupNode === true && node.tag !== undefined ) { var tags = config.tags(); var index = tags.indexOf( node.tag ); diff --git a/test/detection.regex-matrix.test.js b/test/detection.regex-matrix.test.js index 27f0a207..e4025b73 100644 --- a/test/detection.regex-matrix.test.js +++ b/test/detection.regex-matrix.test.js @@ -383,6 +383,112 @@ QUnit.module( "detection regex matrix", function() assert.deepEqual( stripCaptureGroupOffsets( reloadResults ), stripCaptureGroupOffsets( openResults ) ); } ); + QUnit.test( "issue #53 raw ripgrep byte offsets match editor normalization", function( assert ) + { + function byteLength( value ) + { + return Buffer.byteLength( value, 'utf8' ); + } + + function resultSnapshot( result ) + { + return { + line: result.line, + column: result.column, + actualTag: result.actualTag, + displayText: result.displayText, + after: result.after, + match: result.match + }; + } + + function createRipgrepMatches( fsPath, text, regex ) + { + var lines = text.split( '\n' ); + var matches = []; + var charOffset = 0; + var lineIndex; + + for( lineIndex = 0; lineIndex < lines.length; lineIndex++ ) + { + var line = lines[ lineIndex ]; + var lineRegex = new RegExp( regex.source, regex.flags.replace( 'g', '' ) ); + var match = lineRegex.exec( line ); + + if( match ) + { + matches.push( { + fsPath: fsPath, + line: lineIndex + 1, + column: match.index + 1, + match: match[ 0 ], + lines: line + '\n', + absoluteOffset: byteLength( text.slice( 0, charOffset ) ), + submatches: [ { + match: match[ 0 ], + start: byteLength( line.slice( 0, match.index ) ), + end: byteLength( line.slice( 0, match.index + match[ 0 ].length ) ) + } ] + } ); + } + + charOffset += line.length + 1; + } + + return matches; + } + + var tagList = [ 'BUG', 'FIXME', 'HACK', 'TODO', '[ ]', '[x]', 'MOMA' ]; + var regexSource = '(//|#|