diff --git a/scripts/check-site.js b/scripts/check-site.js index 4726247..0cca301 100644 --- a/scripts/check-site.js +++ b/scripts/check-site.js @@ -35,6 +35,30 @@ function exists(file) { return fs.existsSync(path.join(root, file)); } +function decodeHtmlEntities(text) { + return text + .replace(/ /gi, " ") + .replace(/&/gi, "&") + .replace(/</gi, "<") + .replace(/>/gi, ">") + .replace(/"/gi, "\"") + .replace(/'/gi, "'") + .replace(/'/gi, "'") + .replace(///gi, "/") + .replace(/&#(\d+);/g, (_, codePoint) => String.fromCodePoint(Number(codePoint))) + .replace(/&#x([a-f0-9]+);/gi, (_, hexCodePoint) => String.fromCodePoint(parseInt(hexCodePoint, 16))); +} + +function extractVisibleText(html) { + const withoutComments = html.replace(//g, " "); + const withoutScriptAndStyle = withoutComments + .replace(/]*>[\s\S]*?<\/script>/gi, " ") + .replace(/]*>[\s\S]*?<\/style>/gi, " "); + const withoutTags = withoutScriptAndStyle.replace(/<[^>]+>/g, " "); + const decodedText = decodeHtmlEntities(withoutTags); + return decodedText.replace(/\s+/g, " ").trim(); +} + for (const file of requiredFiles) { if (!exists(file)) { failures.push(`Missing required file: ${file}`); @@ -74,8 +98,9 @@ for (const [file, formName] of Object.entries(formRequirements)) { for (const file of fs.readdirSync(root).filter((name) => name.endsWith(".html"))) { const html = read(file); + const visibleText = extractVisibleText(html); for (const pattern of bannedPatterns) { - if (pattern.test(html)) { + if (pattern.test(visibleText)) { failures.push(`${file} contains banned legacy term: ${pattern}`); } }