-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_strings.ps1
More file actions
108 lines (91 loc) · 3.98 KB
/
extract_strings.ps1
File metadata and controls
108 lines (91 loc) · 3.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# Extract string literals from C#, Razor, JS, and SQL files
function Extract-AllStrings {
param([string[]]$Paths)
$results = @()
foreach ($file in $Paths) {
try {
$content = Get-Content $file -Raw -ErrorAction SilentlyContinue
if (-not $content) { continue }
# C# / Razor: double-quoted strings "..." (not empty strings)
$content -match '""\s*[+&]|\s*[+&]\s*""' | Out-Null # Skip concatenation places
$csDoubleRegex = [regex]::new('"((?:[^"\\]|\\.|[^"]){25,})"', 'Singleline')
foreach ($m in $csDoubleRegex.Matches($content)) {
$str = $m.Groups[1].Value
if ($str.Length -ge 25) {
$results += [PSCustomObject]@{ Text=$str; File=$file }
}
}
# C# verbatim strings @"..."
$csVerbatimRegex = [regex]::new('@\"((?:[^\"]|\"{1,2}){25,})\"', 'Singleline')
foreach ($m in $csVerbatimRegex.Matches($content)) {
$str = $m.Groups[1].Value
if ($str.Length -ge 25) {
$results += [PSCustomObject]@{ Text=$str; File=$file }
}
}
# JavaScript: single '...' and double "..." and backtick `...`
# Filter out minified / property key patterns
$jsSingleRegex = [regex]::new("'((?:[^'\\]|\\.){25,})'", 'Singleline')
foreach ($m in $jsSingleRegex.Matches($content)) {
$str = $m.Groups[1].Value
if ($str.Length -ge 25) {
$results += [PSCustomObject]@{ Text=$str; File=$file }
}
}
$jsDoubleRegex = [regex]::new('"((?:[^"\\]|\\.){25,})"', 'Singleline')
foreach ($m in $jsDoubleRegex.Matches($content)) {
$str = $m.Groups[1].Value
if ($str.Length -ge 25) {
$results += [PSCustomObject]@{ Text=$str; File=$file }
}
}
$jsBacktickRegex = [regex]::new('\`((?:[^\`\\]|\\.|\\`\`){25,})\`', 'Singleline')
foreach ($m in $jsBacktickRegex.Matches($content)) {
$str = $m.Groups[1].Value
if ($str.Length -ge 25) {
$results += [PSCustomObject]@{ Text=$str; File=$file }
}
}
# SQL: single-quoted strings
$sqlRegex = [regex]::new("'((?:[^'\\]|\\.){25,})'", 'Singleline')
foreach ($m in $sqlRegex.Matches($content)) {
$str = $m.Groups[1].Value
if ($str.Length -ge 25) {
$results += [PSCustomObject]@{ Text=$str; File=$file }
}
}
} catch { }
}
return $results
}
# Get all relevant files
\$csFiles = Get-ChildItem -Recurse -File | Where-Object { \$_.Extension -in '.cs', '.razor' }
\$jsFiles = Get-ChildItem -Recurse -File | Where-Object { \$_.Extension -eq '.js' }
\$sqlFiles = Get-ChildItem -Recurse -File | Where-Object { \$_.Extension -eq '.sql' }
\$allFiles = @(\$csFiles) + @(\$jsFiles) + @(\$sqlFiles)
\$extracted = Extract-AllStrings -Paths \$allFiles
# Filter: at least 8 words split by whitespace
\$minWords = 8
\$filtered = foreach (\$item in \$extracted) {
\$wordCount = (\$item.Text -split '\s+').Where{ \$_ -ne '' }.Count
if (\$wordCount -ge \$minWords) {
[PSCustomObject]@{
Text = \$item.Text
CharCount = \$item.Text.Length
File = \$item.File
}
}
}
# Group by text
\$grouped = \$filtered | Group-Object Text | ForEach-Object {
\$files = \$_.Group | Select-Object -ExpandProperty File -Unique
[PSCustomObject]@{
Text = \$_.Name
CharCount = (\$_.Group | Select-Object -First 1).CharCount
TotalOccurrences = \$_.Count
FileCount = \$files.Count
FilePaths = @((\$files | Select-Object -First 3).FullName)
}
}
# Sort and output JSON
\$grouped | Sort-Object TotalOccurrences, CharCount -Descending | ConvertTo-Json -Depth 4