-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfilter.py
More file actions
125 lines (101 loc) · 3.55 KB
/
filter.py
File metadata and controls
125 lines (101 loc) · 3.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import json
import os
import re
import asyncio
async def PickModule(name):
if name == "gt4":
from translations.mod_gt import TransLate, LangDetect
return TransLate, LangDetect
if name == "dt3":
from translations.mod_dt import TransLate, LangDetect
return TransLate, LangDetect
if name == "dtlang":
from translations.mod_dt_ld import TransLate, LangDetect
return TransLate, LangDetect
return None, None
def CountSentences(text):
parts = re.split(r"[.!?]+", text)
parts = [p for p in parts if p.strip() != ""]
return len(parts)
def CutByLimit(text, max_chars, max_words, max_sent):
if max_chars and len(text) > max_chars:
text = text[:max_chars]
words = text.split()
if max_words and len(words) > max_words:
words = words[:max_words]
text = " ".join(words)
sentences = re.split(r"([.!?]+)", text)
out = ""
done = 0
i = 0
while i < len(sentences):
part = sentences[i]
out += part
if i + 1 < len(sentences):
out += sentences[i + 1]
if re.fullmatch(r"[.!?]+", sentences[i + 1] if i + 1 < len(sentences) else ""):
done += 1
if max_sent and done >= max_sent:
break
i += 2
return out if out else text
async def main():
try:
with open("config.json", "r", encoding="utf-8") as f:
config = json.load(f)
except:
print("Помилка читання конфігурації")
return
input_file = config.get("input_file")
dest_lang = config.get("dest_lang")
module = config.get("module")
output = config.get("output")
max_chars = config.get("max_chars")
max_words = config.get("max_words")
max_sentences = config.get("max_sentences")
if not os.path.exists(input_file):
print("Файл не знайдено")
return
try:
size = os.path.getsize(input_file)
with open(input_file, "r", encoding="utf-8") as f:
full_text = f.read()
char_count = len(full_text)
word_count = len(full_text.split())
sent_count = CountSentences(full_text)
TransLate, LangDetect = await PickModule(module)
if TransLate is None:
print("Невідомий модуль")
return
file_lang = await LangDetect(full_text, "lang")
print("Файл:", input_file)
print("Розмір:", size, "байт")
print("Символів:", char_count)
print("Слів:", word_count)
print("Речень:", sent_count)
print("Мова тексту:", file_lang)
except:
print("Помилка при читанні файлу")
return
try:
part_text = CutByLimit(full_text, max_chars, max_words, max_sentences)
translated = await TransLate(part_text, "auto", dest_lang)
except:
print("Помилка при перекладі")
return
if output == "screen":
print("Мова призначення:", dest_lang)
print("Модуль:", module)
print("Переклад:")
print(translated)
else:
base, ext = os.path.splitext(input_file)
out_name = f"{base}_{dest_lang}{ext if ext else '.txt'}"
try:
with open(out_name, "w", encoding="utf-8") as f:
f.write(translated)
print("Ok")
except:
print("Помилка запису у файл")
if __name__ == "__main__":
asyncio.run(main())