-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfix_all_html.py
More file actions
92 lines (74 loc) · 3.15 KB
/
fix_all_html.py
File metadata and controls
92 lines (74 loc) · 3.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env python3
"""Fix encoding issues in all RedSwitch HTML files."""
import os
def fix_file(filepath):
print(f"Processing: {filepath}")
with open(filepath, 'rb') as f:
content = f.read()
original = content
# Fix double-encoded UTF-8 sequences (bytes level)
fixes = [
# Em dash variants
(b'\xc3\xa2\xe2\x82\xac\xe2\x80\x9d', b'-'),
(b'\xc3\xa2\xe2\x82\xac\xe2\x80\x9c', b'-'),
(b'\xc3\xa2\xe2\x82\xac\xe2\x80\x99', b'-'),
(b'\xc3\xa2\xe2\x82\xac\xc2\x94', b'-'),
(b'\xc3\xa2\xc2\x80\xc2\x94', b'-'),
(b'\xe2\x80\x94', b'-'), # proper em dash -> hyphen for simplicity
# Bullet
(b'\xc3\xa2\xe2\x82\xac\xc2\xa2', b'-'),
(b'\xc3\xa2\xc2\x80\xc2\xa2', b'-'),
(b'\xe2\x80\xa2', b'-'),
# Checkmark - fix corrupted to proper
(b'\xc3\xa2\xc5\x93\xe2\x80\x9c', b'\xe2\x9c\x93'),
(b'\xc3\xa2\xc2\x9c\xe2\x80\x9c', b'\xe2\x9c\x93'),
(b'\xc3\xa2\xc2\x9c\xc2\x93', b'\xe2\x9c\x93'),
# Arrow
(b'\xc3\xa2\xe2\x80\xa0\xe2\x80\x99', b'\xe2\x86\x92'),
(b'\xc3\xa2\xc2\x86\xc2\x92', b'\xe2\x86\x92'),
# Timer/stopwatch emoji - various corruptions
(b'\xc3\xa2\xc2\x8f\xc2\xb1\xc3\xaf', b'\xe2\x8f\xb1\xef\xb8\x8f'),
(b'\xc3\xa2\xc2\xb1\xc3\xaf', b'\xe2\x8f\xb1\xef\xb8\x8f'),
# Money emoji
(b'\xc3\xb0\xc5\xb8\xe2\x80\x99\xc2\xb8', b'\xf0\x9f\x92\xb8'),
(b'\xc3\xb0\xc2\x9f\xc2\x92\xc2\xb8', b'\xf0\x9f\x92\xb8'),
# Zombie emoji
(b'\xc3\xb0\xc5\xb8\xc2\xa7\xc5\xb8', b'\xf0\x9f\xa7\x9f'),
(b'\xc3\xb0\xc2\x9f\xc2\xa7\xc2\x9f', b'\xf0\x9f\xa7\x9f'),
# Robot emoji
(b'\xc3\xb0\xc5\xb8\xc2\xa4\xe2\x80\x93', b'\xf0\x9f\xa4\x96'),
(b'\xc3\xb0\xc2\x9f\xc2\xa4\xc2\x96', b'\xf0\x9f\xa4\x96'),
# Siren emoji
(b'\xc3\xb0\xc5\xb8\xc5\xa1\xc2\xa8', b'\xf0\x9f\x9a\xa8'),
(b'\xc3\xb0\xc2\x9f\xc2\x9a\xc2\xa8', b'\xf0\x9f\x9a\xa8'),
# Right single quote -> apostrophe
(b'\xc3\xa2\xe2\x82\xac\xe2\x84\xa2', b"'"),
(b'\xe2\x80\x99', b"'"),
# Left/right double quotes -> straight quotes
(b'\xe2\x80\x9c', b'"'),
(b'\xe2\x80\x9d', b'"'),
]
for old, new in fixes:
if old in content:
content = content.replace(old, new)
print(f" Fixed: {old[:20]}...")
if content != original:
with open(filepath, 'wb') as f:
f.write(content)
print(f" -> Saved!")
return True
else:
print(f" -> No changes needed")
return False
# Process all HTML files
fixed_count = 0
for root, dirs, files in os.walk('.'):
# Skip backup files
if 'backup' in root.lower() or 'old' in root.lower():
continue
for filename in files:
if filename.endswith('.html') and 'old' not in filename.lower() and 'backup' not in filename.lower():
filepath = os.path.join(root, filename)
if fix_file(filepath):
fixed_count += 1
print(f"\nTotal files fixed: {fixed_count}")