FanaticPythoner · FanaticPythoner · May 31, 2026 · May 31, 2026 · May 31, 2026
@@ -0,0 +1,26 @@
+.vscode/
+.gitignore
+node_modules/
+src/
+test/
+test-files/
+webpack.config.js
+dist/extension.js.map
+artifacts/
+[jJ]ustfile
+.github/
+scripts/
+.nvmrc
+.travis.yml
+TODOS_LISTS/
+TODOS_LISTS/**
+.tools/
+.act-artifacts/
+MIGRATION.md
+OPEN_VSX_CERTIFICATE_REPORT.md
+CHANGELOG.upstream.md
+buildCodiconNames.js
+old-*.js
+*.bak
+*~
+*.vsix
@@ -0,0 +1,62 @@
+# Runtime Benchmarks
+
+- Baseline ref: `a6f60e0ce830c4649ac34fc05e5a1799ec91d151`
+- Current source: working tree
+- Node: `v25.2.0`
+- Selection mode: `scenario-list`
+- Declared suite: `microbenchmark`
+- Result-count validation: `1 rows, suite-consistent=true, all-user-flow=false`
+
+## Machine Profile
+
+| Category | Field | Value |
+| --- | --- | --- |
+| Host | Hostname | n00ne-AERO-17-YD |
+| Host | OS | Ubuntu 22.04.5 LTS |
+| Host | Kernel | 6.8.0-124-generic |
+| Host | Architecture | x64 |
+| Host | Load Average | 4.11, 4.39, 4.39 |
+| Host | Available Parallelism | - |
+| CPU | Model | Intel(R) Core(TM) i9-14900HX |
+| CPU | Vendor | GenuineIntel |
+| CPU | Topology | 16 logical CPU(s), 2 thread(s)/core, 8 core(s)/socket, 1 socket(s), 1 NUMA node(s) |
+| CPU | Frequency | 800 MHz to 5,800 MHz |
+| CPU | Cache | L1d 384 KiB (8 instances), L1i 256 KiB (8 instances), L2 16 MiB (8 instances), L3 36 MiB (1 instance) |
+| Memory | Total RAM | 62.51 GiB (`67,119,767,552 bytes`) |
+| Memory | Available At Collection | 8.24 GiB (`8,849,858,560 bytes`) |
+| Memory | Online Physical RAM | 66.00 GiB (`70,866,960,384 bytes`) |
+| Memory | Swap | total 120 GiB (`128,848,973,824 bytes`); free 93.69 GiB (`100,593,766,400 bytes`) |
+| Memory | DMI / SPD | Unavailable: /sys/firmware/dmi/tables/smbios_entry_point: Permission denied /dev/mem: Permission denied |
+| Storage | Root Device | nvme1n1 (Samsung SSD 9100 PRO 4TB), 3.64 TiB (`4,000,787,030,016 bytes`), transport nvme, rotational=false, readOnly=false |
+
+## Scenario Model
+
+| Scenario | Kind | User flow | Measurement scope | Input model |
+| --- | --- | --- | --- | --- |
+| scan-large-custom-regex | microbenchmark | - | - | - |
+
+## Metric Model
+
+| Table | Value model | Accuracy model |
+| --- | --- | --- |
+| Latency | Wall-clock elapsed time around each harness flow iteration, summarized as min/p50/p90/p95/max. | Exact for each sampled iteration in this run. |
+| Profiled RSS Burst | Difference between the isolated scenario worker RSS measured immediately before the flow and that worker iteration's OS high-water-mark peak RSS. | Exact for the measured worker iteration, using `process.memoryUsage().rss` at flow start and `process.resourceUsage().maxRSS` for the peak. |
+| Profiled Peak RSS | Highest process RSS reached by each isolated scenario worker iteration. | Exact worker-process high-water mark from `process.resourceUsage().maxRSS`. |
+
+## Latency
+
+| Scenario | Kind | Baseline p50 ms | Current p50 ms | Baseline p90 ms | Current p90 ms | Baseline p95 ms | Current p95 ms |
+| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: |
+| scan-large-custom-regex | microbenchmark | 6.73 | 10.13 | 7.32 | 11.69 | 8.25 | 11.89 |
+
+## Profiled RSS Burst
+
+| Scenario | Kind | Baseline p50 MiB | Current p50 MiB | Baseline p90 MiB | Current p90 MiB | Baseline p95 MiB | Current p95 MiB | Baseline Max MiB | Current Max MiB |
+| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
+| scan-large-custom-regex | microbenchmark | 0.75 | 0.75 | 1 | 1.63 | 1.13 | 4.5 | 1.13 | 4.5 |
+
+## Profiled Peak RSS
+
+| Scenario | Kind | Baseline p50 RSS MiB | Current p50 RSS MiB | Baseline p90 RSS MiB | Current p90 RSS MiB | Baseline p95 RSS MiB | Current p95 RSS MiB | Baseline Max RSS MiB | Current Max RSS MiB |
+| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
+| scan-large-custom-regex | microbenchmark | 81.6 | 80.79 | 81.85 | 80.86 | 81.85 | 80.93 | 81.85 | 80.93 |
@@ -102,9 +102,38 @@ function baselineRefCandidates()
     return candidates;
 }
 
+function expandFirstParentCandidates( ref )
+{
+    if( gitRefExists( ref ) !== true )
+    {
+        return [];
+    }
+
+    return splitLines( runGit( [ 'rev-list', '--first-parent', ref ] ) ).filter( function( candidate )
+    {
+        return candidate !== '';
+    } );
+}
+
+function expandBaselineRefCandidates( seeds )
+{
+    var candidates = [];
+
+    seeds.forEach( function( seed )
+    {
+        expandFirstParentCandidates( seed ).forEach( function( candidate )
+        {
+            pushUniqueCandidate( candidates, candidate );
+        } );
+    } );
+
+    return candidates;
+}
+
 function resolveBaselineRef( requestedRef )
 {
     var candidates;
+    var entriesByRef = new Map();
     var selected = null;
 
     if( requestedRef !== DEFAULT_BASELINE_REF )
@@ -115,10 +144,16 @@ function resolveBaselineRef( requestedRef )
         };
     }
 
-    candidates = baselineRefCandidates().filter( gitRefExists );
+    candidates = expandBaselineRefCandidates( baselineRefCandidates() );
     candidates.some( function( candidate )
     {
-        var entries = collectBaselineRegexEntries( candidate );
+        var entries = entriesByRef.get( candidate );
+
+        if( entries === undefined )
+        {
+            entries = collectBaselineRegexEntries( candidate );
+            entriesByRef.set( candidate, entries );
+        }
 
         if( entries.length > 0 )
         {

@@ -87,6 +87,101 @@ function offsetFromLineAndColumn( text, lineOffsets, line, column )
     return Math.min( offset, text.length );
 }
 
+function utf8ByteOffsetToStringOffset( text, byteOffset )
+{
+    if( typeof ( byteOffset ) !== 'number' || byteOffset <= 0 )
+    {
+        return 0;
+    }
+
+    var bytesSeen = 0;
+    var stringOffset = 0;
+
+    while( stringOffset < text.length && bytesSeen < byteOffset )
+    {
+        var codePoint = text.codePointAt( stringOffset );
+        var nextStringOffset = stringOffset + ( codePoint > 0xFFFF ? 2 : 1 );
+        var nextBytesSeen = bytesSeen + utf8ByteLengthOfCodePoint( codePoint );
+
+        if( nextBytesSeen > byteOffset )
+        {
+            break;
+        }
+
+        bytesSeen = nextBytesSeen;
+        stringOffset = nextStringOffset;
+    }
+
+    return stringOffset;
+}
+
+function utf8ByteLengthOfCodePoint( codePoint )
+{
+    if( codePoint <= 0x7F )
+    {
+        return 1;
+    }
+
+    if( codePoint <= 0x7FF )
+    {
+        return 2;
+    }
+
+    if( codePoint <= 0xFFFF )
+    {
+        return 3;
+    }
+
+    return 4;
+}
+
+function getLineTextAtNumber( text, lineOffsets, lineNumber )
+{
+    var lineIndex = Math.min( Math.max( lineNumber - 1, 0 ), lineOffsets.length - 1 );
+    var startOffset = lineOffsets[ lineIndex ] || 0;
+    var endOffset = lineIndex + 1 < lineOffsets.length ? lineOffsets[ lineIndex + 1 ] - 1 : text.length;
+
+    if( endOffset > startOffset && text[ endOffset - 1 ] === '\r' )
+    {
+        endOffset--;
+    }
+
+    return {
+        text: text.slice( startOffset, endOffset ),
+        startOffset: startOffset,
+        endOffset: endOffset
+    };
+}
+
+function resolveRipgrepLocalStringOffset( lineText, byteOffset, column )
+{
+    if( typeof ( byteOffset ) === 'number' )
+    {
+        return utf8ByteOffsetToStringOffset( lineText, byteOffset );
+    }
+
+    return Math.max( ( column || 1 ) - 1, 0 );
+}
+
+function resolveRipgrepMatchStartOffset( context, match )
+{
+    var firstSubmatch = match.submatches && match.submatches.length > 0 ? match.submatches[ 0 ] : undefined;
+    var byteOffset = firstSubmatch && typeof ( firstSubmatch.start ) === 'number' ? firstSubmatch.start : undefined;
+
+    if( typeof ( match.line ) === 'number' && match.line >= 1 )
+    {
+        var line = getLineTextAtNumber( context.text, context.lineOffsets, match.line );
+        return line.startOffset + resolveRipgrepLocalStringOffset( line.text, byteOffset, match.column );
+    }
+
+    if( typeof ( match.absoluteOffset ) === 'number' )
+    {
+        return utf8ByteOffsetToStringOffset( context.text, match.absoluteOffset + ( byteOffset || 0 ) );
+    }
+
+    return offsetFromLineAndColumn( context.text, context.lineOffsets, match.line, match.column );
+}
+
 function splitPhysicalLines( text, startOffset )
 {
     var lines = [];
@@ -1215,16 +1310,7 @@ function normalizeRipgrepMatch( uri, text, match )
     }
 
     var context = createScanContext( uri, text );
-    var rawStartOffset;
-
-    if( match.absoluteOffset !== undefined && match.submatches && match.submatches.length > 0 )
-    {
-        rawStartOffset = match.absoluteOffset + match.submatches[ 0 ].start;
-    }
-    else
-    {
-        rawStartOffset = offsetFromLineAndColumn( text, context.lineOffsets, match.line, match.column );
-    }
+    var rawStartOffset = resolveRipgrepMatchStartOffset( context, match );
 
     var exactMatch = findExactRegexExecMatch( context, rawStartOffset );
 
@@ -1290,7 +1376,7 @@ function normalizeWorkspaceRegexMatch( uri, match, snapshot )
     var contextText = typeof match.lines === 'string' && match.lines.length > 0 ? match.lines : ( match.match || "" );
     var localMatchText = typeof match.match === 'string' && match.match.length > 0 ? match.match : contextText;
     var localMatchStart = match.submatches && match.submatches.length > 0 && typeof match.submatches[ 0 ].start === 'number' ?
-        match.submatches[ 0 ].start :
+        resolveRipgrepLocalStringOffset( contextText, match.submatches[ 0 ].start, match.column ) :
         Math.max( ( match.column || 1 ) - 1, 0 );
     var resourceConfig = snapshot && typeof ( snapshot.getResourceConfig ) === 'function' ?
         snapshot.getResourceConfig( uri ) :

@@ -93,7 +93,7 @@ var sortByLineAndColumn = function( a, b )
 
 var tagSortIndex = function( node )
 {
-    if( node && node.tag !== undefined )
+    if( node && node.isRootTagNode === true && node.isGroupNode === true && node.tag !== undefined )
     {
         var tags = config.tags();
         var index = tags.indexOf( node.tag );

@@ -383,6 +383,112 @@ QUnit.module( "detection regex matrix", function()
         assert.deepEqual( stripCaptureGroupOffsets( reloadResults ), stripCaptureGroupOffsets( openResults ) );
     } );
 
+    QUnit.test( "issue #53 raw ripgrep byte offsets match editor normalization", function( assert )
+    {
+        function byteLength( value )
+        {
+            return Buffer.byteLength( value, 'utf8' );
+        }
+
+        function resultSnapshot( result )
+        {
+            return {
+                line: result.line,
+                column: result.column,
+                actualTag: result.actualTag,
+                displayText: result.displayText,
+                after: result.after,
+                match: result.match
+            };
+        }
+
+        function createRipgrepMatches( fsPath, text, regex )
+        {
+            var lines = text.split( '\n' );
+            var matches = [];
+            var charOffset = 0;
+            var lineIndex;
+
+            for( lineIndex = 0; lineIndex < lines.length; lineIndex++ )
+            {
+                var line = lines[ lineIndex ];
+                var lineRegex = new RegExp( regex.source, regex.flags.replace( 'g', '' ) );
+                var match = lineRegex.exec( line );
+
+                if( match )
+                {
+                    matches.push( {
+                        fsPath: fsPath,
+                        line: lineIndex + 1,
+                        column: match.index + 1,
+                        match: match[ 0 ],
+                        lines: line + '\n',
+                        absoluteOffset: byteLength( text.slice( 0, charOffset ) ),
+                        submatches: [ {
+                            match: match[ 0 ],
+                            start: byteLength( line.slice( 0, match.index ) ),
+                            end: byteLength( line.slice( 0, match.index + match[ 0 ].length ) )
+                        } ]
+                    } );
+                }
+
+                charOffset += line.length + 1;
+            }
+
+            return matches;
+        }
+
+        var tagList = [ 'BUG', 'FIXME', 'HACK', 'TODO', '[ ]', '[x]', 'MOMA' ];
+        var regexSource = '(//|#|<!--|;|/\\*|\\*>|^......\\*|\\-\\-)\\s*($TAGS)';
+        var uri = matrixHelpers.createUri( '/tmp/issue-53.cbl' );
+        var text = [
+            '000001* Préfixe accentué',
+            '000002* déjà accès créé',
+            '      *> BUG:   COBOL BUG',
+            '      *> FIXME: COBOL FIXME',
+            '      *> TODO:  COBOL TODO',
+            '      *> MOMA:  COBOL MOMA',
+            '      *> [ ]:   COBOL [ ]',
+            '      *> [x]:   COBOL [x]',
+            '',
+            '       -- BUG:   SQL BUG',
+            '       -- FIXME: SQL FIXME',
+            '       -- TODO:  SQL TODO',
+            '       -- MOMA:  SQL MOMA',
+            '       -- [ ]:   SQL [ ]',
+            '       -- [x]:   SQL [x]',
+            'éé -- TODO: unicode prefix SQL',
+            'cdDM00*> --- Gestion Accès DM'
+        ].join( '\n' );
+        var config = matrixHelpers.createConfig( {
+            tagList: tagList,
+            regexSource: regexSource,
+            shouldBeCaseSensitive: false,
+            subTagRegexString: regexRegistry.pattern( 'subTagPrefix' )
+        } );
+        var expandedRegex = new RegExp(
+            regexSource.replace( regexRegistry.TAG_PLACEHOLDER, utils.getTagRegexSource( uri, tagList ) ),
+            'i'
+        );
+
+        utils.init( config );
+
+        var openResults = detection.scanText( uri, text );
+        var reloadResults = createRipgrepMatches( uri.fsPath, text, expandedRegex ).map( function( match )
+        {
+            return detection.normalizeRegexMatch( uri, text, match );
+        } ).filter( function( result )
+        {
+            return result !== undefined;
+        } );
+
+        assert.deepEqual( reloadResults.map( resultSnapshot ), openResults.map( resultSnapshot ) );
+        assert.deepEqual(
+            reloadResults.map( function( result ) { return result.line; } ),
+            [ 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16 ]
+        );
+    } );
+
     QUnit.test( "issue 898 punctuation-heavy custom tags normalize through custom regexes", function( assert )
     {
         var results = scanWithConfig( '/tmp/issue-898-punctuation.js', [