diff --git a/.npmignore b/.npmignore new file mode 100644 index 0000000..e4e0956 --- /dev/null +++ b/.npmignore @@ -0,0 +1,26 @@ +.vscode/ +.gitignore +node_modules/ +src/ +test/ +test-files/ +webpack.config.js +dist/extension.js.map +artifacts/ +[jJ]ustfile +.github/ +scripts/ +.nvmrc +.travis.yml +TODOS_LISTS/ +TODOS_LISTS/** +.tools/ +.act-artifacts/ +MIGRATION.md +OPEN_VSX_CERTIFICATE_REPORT.md +CHANGELOG.upstream.md +buildCodiconNames.js +old-*.js +*.bak +*~ +*.vsix diff --git a/artifacts/perf/issue53-tag-processing.md b/artifacts/perf/issue53-tag-processing.md new file mode 100644 index 0000000..f348dfb --- /dev/null +++ b/artifacts/perf/issue53-tag-processing.md @@ -0,0 +1,62 @@ +# Runtime Benchmarks + +- Baseline ref: `a6f60e0ce830c4649ac34fc05e5a1799ec91d151` +- Current source: working tree +- Node: `v25.2.0` +- Selection mode: `scenario-list` +- Declared suite: `microbenchmark` +- Result-count validation: `1 rows, suite-consistent=true, all-user-flow=false` + +## Machine Profile + +| Category | Field | Value | +| --- | --- | --- | +| Host | Hostname | n00ne-AERO-17-YD | +| Host | OS | Ubuntu 22.04.5 LTS | +| Host | Kernel | 6.8.0-124-generic | +| Host | Architecture | x64 | +| Host | Load Average | 4.11, 4.39, 4.39 | +| Host | Available Parallelism | - | +| CPU | Model | Intel(R) Core(TM) i9-14900HX | +| CPU | Vendor | GenuineIntel | +| CPU | Topology | 16 logical CPU(s), 2 thread(s)/core, 8 core(s)/socket, 1 socket(s), 1 NUMA node(s) | +| CPU | Frequency | 800 MHz to 5,800 MHz | +| CPU | Cache | L1d 384 KiB (8 instances), L1i 256 KiB (8 instances), L2 16 MiB (8 instances), L3 36 MiB (1 instance) | +| Memory | Total RAM | 62.51 GiB (`67,119,767,552 bytes`) | +| Memory | Available At Collection | 8.24 GiB (`8,849,858,560 bytes`) | +| Memory | Online Physical RAM | 66.00 GiB (`70,866,960,384 bytes`) | +| Memory | Swap | total 120 GiB (`128,848,973,824 bytes`); free 93.69 GiB (`100,593,766,400 bytes`) | +| Memory | DMI / SPD | Unavailable: /sys/firmware/dmi/tables/smbios_entry_point: Permission denied /dev/mem: Permission denied | +| Storage | Root Device | nvme1n1 (Samsung SSD 9100 PRO 4TB), 3.64 TiB (`4,000,787,030,016 bytes`), transport nvme, rotational=false, readOnly=false | + +## Scenario Model + +| Scenario | Kind | User flow | Measurement scope | Input model | +| --- | --- | --- | --- | --- | +| scan-large-custom-regex | microbenchmark | - | - | - | + +## Metric Model + +| Table | Value model | Accuracy model | +| --- | --- | --- | +| Latency | Wall-clock elapsed time around each harness flow iteration, summarized as min/p50/p90/p95/max. | Exact for each sampled iteration in this run. | +| Profiled RSS Burst | Difference between the isolated scenario worker RSS measured immediately before the flow and that worker iteration's OS high-water-mark peak RSS. | Exact for the measured worker iteration, using `process.memoryUsage().rss` at flow start and `process.resourceUsage().maxRSS` for the peak. | +| Profiled Peak RSS | Highest process RSS reached by each isolated scenario worker iteration. | Exact worker-process high-water mark from `process.resourceUsage().maxRSS`. | + +## Latency + +| Scenario | Kind | Baseline p50 ms | Current p50 ms | Baseline p90 ms | Current p90 ms | Baseline p95 ms | Current p95 ms | +| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | +| scan-large-custom-regex | microbenchmark | 6.73 | 10.13 | 7.32 | 11.69 | 8.25 | 11.89 | + +## Profiled RSS Burst + +| Scenario | Kind | Baseline p50 MiB | Current p50 MiB | Baseline p90 MiB | Current p90 MiB | Baseline p95 MiB | Current p95 MiB | Baseline Max MiB | Current Max MiB | +| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | +| scan-large-custom-regex | microbenchmark | 0.75 | 0.75 | 1 | 1.63 | 1.13 | 4.5 | 1.13 | 4.5 | + +## Profiled Peak RSS + +| Scenario | Kind | Baseline p50 RSS MiB | Current p50 RSS MiB | Baseline p90 RSS MiB | Current p90 RSS MiB | Baseline p95 RSS MiB | Current p95 RSS MiB | Baseline Max RSS MiB | Current Max RSS MiB | +| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | +| scan-large-custom-regex | microbenchmark | 81.6 | 80.79 | 81.85 | 80.86 | 81.85 | 80.93 | 81.85 | 80.93 | diff --git a/scripts/evidence/regexRegistryEquivalenceHarness.js b/scripts/evidence/regexRegistryEquivalenceHarness.js index 5c37f93..f126754 100644 --- a/scripts/evidence/regexRegistryEquivalenceHarness.js +++ b/scripts/evidence/regexRegistryEquivalenceHarness.js @@ -102,9 +102,38 @@ function baselineRefCandidates() return candidates; } +function expandFirstParentCandidates( ref ) +{ + if( gitRefExists( ref ) !== true ) + { + return []; + } + + return splitLines( runGit( [ 'rev-list', '--first-parent', ref ] ) ).filter( function( candidate ) + { + return candidate !== ''; + } ); +} + +function expandBaselineRefCandidates( seeds ) +{ + var candidates = []; + + seeds.forEach( function( seed ) + { + expandFirstParentCandidates( seed ).forEach( function( candidate ) + { + pushUniqueCandidate( candidates, candidate ); + } ); + } ); + + return candidates; +} + function resolveBaselineRef( requestedRef ) { var candidates; + var entriesByRef = new Map(); var selected = null; if( requestedRef !== DEFAULT_BASELINE_REF ) @@ -115,10 +144,16 @@ function resolveBaselineRef( requestedRef ) }; } - candidates = baselineRefCandidates().filter( gitRefExists ); + candidates = expandBaselineRefCandidates( baselineRefCandidates() ); candidates.some( function( candidate ) { - var entries = collectBaselineRegexEntries( candidate ); + var entries = entriesByRef.get( candidate ); + + if( entries === undefined ) + { + entries = collectBaselineRegexEntries( candidate ); + entriesByRef.set( candidate, entries ); + } if( entries.length > 0 ) { diff --git a/src/detection.js b/src/detection.js index 8b76465..49999c4 100644 --- a/src/detection.js +++ b/src/detection.js @@ -87,6 +87,101 @@ function offsetFromLineAndColumn( text, lineOffsets, line, column ) return Math.min( offset, text.length ); } +function utf8ByteOffsetToStringOffset( text, byteOffset ) +{ + if( typeof ( byteOffset ) !== 'number' || byteOffset <= 0 ) + { + return 0; + } + + var bytesSeen = 0; + var stringOffset = 0; + + while( stringOffset < text.length && bytesSeen < byteOffset ) + { + var codePoint = text.codePointAt( stringOffset ); + var nextStringOffset = stringOffset + ( codePoint > 0xFFFF ? 2 : 1 ); + var nextBytesSeen = bytesSeen + utf8ByteLengthOfCodePoint( codePoint ); + + if( nextBytesSeen > byteOffset ) + { + break; + } + + bytesSeen = nextBytesSeen; + stringOffset = nextStringOffset; + } + + return stringOffset; +} + +function utf8ByteLengthOfCodePoint( codePoint ) +{ + if( codePoint <= 0x7F ) + { + return 1; + } + + if( codePoint <= 0x7FF ) + { + return 2; + } + + if( codePoint <= 0xFFFF ) + { + return 3; + } + + return 4; +} + +function getLineTextAtNumber( text, lineOffsets, lineNumber ) +{ + var lineIndex = Math.min( Math.max( lineNumber - 1, 0 ), lineOffsets.length - 1 ); + var startOffset = lineOffsets[ lineIndex ] || 0; + var endOffset = lineIndex + 1 < lineOffsets.length ? lineOffsets[ lineIndex + 1 ] - 1 : text.length; + + if( endOffset > startOffset && text[ endOffset - 1 ] === '\r' ) + { + endOffset--; + } + + return { + text: text.slice( startOffset, endOffset ), + startOffset: startOffset, + endOffset: endOffset + }; +} + +function resolveRipgrepLocalStringOffset( lineText, byteOffset, column ) +{ + if( typeof ( byteOffset ) === 'number' ) + { + return utf8ByteOffsetToStringOffset( lineText, byteOffset ); + } + + return Math.max( ( column || 1 ) - 1, 0 ); +} + +function resolveRipgrepMatchStartOffset( context, match ) +{ + var firstSubmatch = match.submatches && match.submatches.length > 0 ? match.submatches[ 0 ] : undefined; + var byteOffset = firstSubmatch && typeof ( firstSubmatch.start ) === 'number' ? firstSubmatch.start : undefined; + + if( typeof ( match.line ) === 'number' && match.line >= 1 ) + { + var line = getLineTextAtNumber( context.text, context.lineOffsets, match.line ); + return line.startOffset + resolveRipgrepLocalStringOffset( line.text, byteOffset, match.column ); + } + + if( typeof ( match.absoluteOffset ) === 'number' ) + { + return utf8ByteOffsetToStringOffset( context.text, match.absoluteOffset + ( byteOffset || 0 ) ); + } + + return offsetFromLineAndColumn( context.text, context.lineOffsets, match.line, match.column ); +} + function splitPhysicalLines( text, startOffset ) { var lines = []; @@ -1215,16 +1310,7 @@ function normalizeRipgrepMatch( uri, text, match ) } var context = createScanContext( uri, text ); - var rawStartOffset; - - if( match.absoluteOffset !== undefined && match.submatches && match.submatches.length > 0 ) - { - rawStartOffset = match.absoluteOffset + match.submatches[ 0 ].start; - } - else - { - rawStartOffset = offsetFromLineAndColumn( text, context.lineOffsets, match.line, match.column ); - } + var rawStartOffset = resolveRipgrepMatchStartOffset( context, match ); var exactMatch = findExactRegexExecMatch( context, rawStartOffset ); @@ -1290,7 +1376,7 @@ function normalizeWorkspaceRegexMatch( uri, match, snapshot ) var contextText = typeof match.lines === 'string' && match.lines.length > 0 ? match.lines : ( match.match || "" ); var localMatchText = typeof match.match === 'string' && match.match.length > 0 ? match.match : contextText; var localMatchStart = match.submatches && match.submatches.length > 0 && typeof match.submatches[ 0 ].start === 'number' ? - match.submatches[ 0 ].start : + resolveRipgrepLocalStringOffset( contextText, match.submatches[ 0 ].start, match.column ) : Math.max( ( match.column || 1 ) - 1, 0 ); var resourceConfig = snapshot && typeof ( snapshot.getResourceConfig ) === 'function' ? snapshot.getResourceConfig( uri ) : diff --git a/src/tree.js b/src/tree.js index 9ec46a0..7668920 100644 --- a/src/tree.js +++ b/src/tree.js @@ -93,7 +93,7 @@ var sortByLineAndColumn = function( a, b ) var tagSortIndex = function( node ) { - if( node && node.tag !== undefined ) + if( node && node.isRootTagNode === true && node.isGroupNode === true && node.tag !== undefined ) { var tags = config.tags(); var index = tags.indexOf( node.tag ); diff --git a/test/detection.regex-matrix.test.js b/test/detection.regex-matrix.test.js index 27f0a20..e4025b7 100644 --- a/test/detection.regex-matrix.test.js +++ b/test/detection.regex-matrix.test.js @@ -383,6 +383,112 @@ QUnit.module( "detection regex matrix", function() assert.deepEqual( stripCaptureGroupOffsets( reloadResults ), stripCaptureGroupOffsets( openResults ) ); } ); + QUnit.test( "issue #53 raw ripgrep byte offsets match editor normalization", function( assert ) + { + function byteLength( value ) + { + return Buffer.byteLength( value, 'utf8' ); + } + + function resultSnapshot( result ) + { + return { + line: result.line, + column: result.column, + actualTag: result.actualTag, + displayText: result.displayText, + after: result.after, + match: result.match + }; + } + + function createRipgrepMatches( fsPath, text, regex ) + { + var lines = text.split( '\n' ); + var matches = []; + var charOffset = 0; + var lineIndex; + + for( lineIndex = 0; lineIndex < lines.length; lineIndex++ ) + { + var line = lines[ lineIndex ]; + var lineRegex = new RegExp( regex.source, regex.flags.replace( 'g', '' ) ); + var match = lineRegex.exec( line ); + + if( match ) + { + matches.push( { + fsPath: fsPath, + line: lineIndex + 1, + column: match.index + 1, + match: match[ 0 ], + lines: line + '\n', + absoluteOffset: byteLength( text.slice( 0, charOffset ) ), + submatches: [ { + match: match[ 0 ], + start: byteLength( line.slice( 0, match.index ) ), + end: byteLength( line.slice( 0, match.index + match[ 0 ].length ) ) + } ] + } ); + } + + charOffset += line.length + 1; + } + + return matches; + } + + var tagList = [ 'BUG', 'FIXME', 'HACK', 'TODO', '[ ]', '[x]', 'MOMA' ]; + var regexSource = '(//|#|