-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconvert_strings.py
More file actions
157 lines (151 loc) · 9.12 KB
/
convert_strings.py
File metadata and controls
157 lines (151 loc) · 9.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# TreeTaggerで判定した品詞を判読可能な日本語に変換
# 品詞 判読可能な日本語からhinshiフォントを使った表示用の文字列に変換する。
ref = [{"PoS": "CC", "mean": "接続詞", "ex": " and, but, or"},
{"PoS": "CD", "mean": "基数詞", "ex": " 1, three"},
{"PoS": "DT", "mean": "限定詞", "ex": " the"},
{"PoS": "EX", "mean": "副詞", "ex": " There is"},
{"PoS": "FW", "mean": "判定不能", "ex": " d'œuvre"},
{"PoS": "IN", "mean": "前置詞または従位接続詞", "ex": " in, of, like, after, whether"},
{"PoS": "IN/that", "mean": "関係代名詞", "ex": " that"},
{"PoS": "JJ", "mean": "形容詞", "ex": " green"},
{"PoS": "JJR", "mean": "形容詞の比較級", "ex": " greener"},
{"PoS": "JJS", "mean": "形容詞の最上級", "ex": " greenest"},
{"PoS": "LS", "mean": "リスト項目のマーカー", "ex": " (1)"},
{"PoS": "MD", "mean": "助動詞", "ex": " could, will"},
{"PoS": "NN", "mean": "名詞", "ex": " table"},
{"PoS": "NNS", "mean": "名詞", "ex": " tables"},
{"PoS": "NP", "mean": "名詞", "ex": " John"},
{"PoS": "NPS", "mean": "名詞", "ex": " Vikings"},
{"PoS": "PDT", "mean": "前限定詞", "ex": " both the boys"},
{"PoS": "POS", "mean": "所有格語尾", "ex": " friend's"},
{"PoS": "PP", "mean": "代名詞", "ex": " I, he, it"},
{"PoS": "PP$", "mean": "代名詞", "ex": " my, his"},
{"PoS": "RB", "mean": "副詞", "ex": " however, usually, here, not"},
{"PoS": "RBR", "mean": "副詞", "ex": " better"},
{"PoS": "RBS", "mean": "副詞", "ex": " best"},
{"PoS": "RP", "mean": "不変化詞(句動詞を構成する前置詞)", "ex": " give up"},
{"PoS": "SENT", "mean": "文末の句読点記号", "ex": " ?, !, ."},
{"PoS": "SYM", "mean": "記号", "ex": " @, +, *, ^, ="},
{"PoS": "TO", "mean": "to", "ex": " to go, to him"},
{"PoS": "UH", "mean": "間投詞", "ex": " uhhuhhuhh"},
{"PoS": "VB", "mean": "動詞", "ex": " be"},
{"PoS": "VBD", "mean": "動詞", "ex": " was, were"},
{"PoS": "VBG", "mean": "動詞", "ex": " being"},
{"PoS": "VBN", "mean": "動詞", "ex": " been"},
{"PoS": "VBZ", "mean": "動詞", "ex": " is"},
{"PoS": "VBP", "mean": "動詞", "ex": " am, are"},
{"PoS": "VD", "mean": "動詞", "ex": " do"},
{"PoS": "VDD", "mean": "動詞", "ex": " did"},
{"PoS": "VDG", "mean": "動詞", "ex": " doing"},
{"PoS": "VDN", "mean": "動詞", "ex": " done"},
{"PoS": "VDZ", "mean": "動詞", "ex": " does"},
{"PoS": "VDP", "mean": "動詞", "ex": " do"},
{"PoS": "VH", "mean": "動詞", "ex": " have"},
{"PoS": "VHD", "mean": "動詞", "ex": " had"},
{"PoS": "VHG", "mean": "動詞", "ex": " having"},
{"PoS": "VHN", "mean": "動詞", "ex": " had"},
{"PoS": "VHZ", "mean": "動詞", "ex": " has"},
{"PoS": "VHP", "mean": "動詞", "ex": " have"},
{"PoS": "VV", "mean": "動詞", "ex": " take"},
{"PoS": "VVD", "mean": "動詞", "ex": " took"},
{"PoS": "VVG", "mean": "動詞", "ex": " taking"},
{"PoS": "VVN", "mean": "動詞", "ex": " taken"},
{"PoS": "VVP", "mean": "動詞", "ex": " take"},
{"PoS": "VVZ", "mean": "動詞", "ex": " takes"},
{"PoS": "WDT", "mean": "Wh限定詞", "ex": " which"},
{"PoS": "WP", "mean": "Wh代名詞", "ex": " who, what"},
{"PoS": "WP$", "mean": "所有関係代名詞", "ex": " whose"},
{"PoS": "WRB", "mean": "副詞", "ex": " where, when"},
{"PoS": ":", "mean": "一般結合記号", "ex": ";, -, --"},
{"PoS": "$", "mean": "通貨記号", "ex": "$, £"},
]
"""元のリストをいちおう保存しておく
ref_o = [{"PoS": "CC", "mean": "等位接続詞", "ex": " and, but, or"},
{"PoS": "CD", "mean": "基数", "ex": " 1, three"},
{"PoS": "DT", "mean": "限定詞", "ex": " the"},
{"PoS": "EX", "mean": "存在文のthere", "ex": " There is"},
{"PoS": "FW", "mean": "外国語", "ex": " d'œuvre"},
{"PoS": "IN", "mean": "前置詞または従位接続詞", "ex": " in, of, like, after, whether"},
{"PoS": "IN/that", "mean": "補文素", "ex": " that"},
{"PoS": "JJ", "mean": "形容詞", "ex": " green"},
{"PoS": "JJR", "mean": "形容詞の比較級", "ex": " greener"},
{"PoS": "JJS", "mean": "形容詞の最上級", "ex": " greenest"},
{"PoS": "LS", "mean": "リスト項目のマーカー", "ex": " (1)"},
{"PoS": "MD", "mean": "法助動詞", "ex": " could, will"},
{"PoS": "NN", "mean": "名詞", "ex": " table"},
{"PoS": "NNS", "mean": "名詞の複数形", "ex": " tables"},
{"PoS": "NP", "mean": "名詞", "ex": " John"},
{"PoS": "NPS", "mean": "名詞", "ex": " Vikings"},
{"PoS": "PDT", "mean": "前限定詞", "ex": " both the boys"},
{"PoS": "POS", "mean": "所有格語尾", "ex": " friend's"},
{"PoS": "PP", "mean": "人称代名詞", "ex": " I, he, it"},
{"PoS": "PP$", "mean": "所有代名詞", "ex": " my, his"},
{"PoS": "RB", "mean": "副詞", "ex": " however, usually, here, not"},
{"PoS": "RBR", "mean": "副詞の比較級", "ex": " better"},
{"PoS": "RBS", "mean": "副詞の最上級", "ex": " best"},
{"PoS": "RP", "mean": "不変化詞(句動詞を構成する前置詞)", "ex": " give up"},
{"PoS": "SENT", "mean": "文末の句読点記号", "ex": " ?, !, ."},
{"PoS": "SYM", "mean": "記号", "ex": " @, +, *, ^, ="},
{"PoS": "TO", "mean": "to", "ex": " to go, to him"},
{"PoS": "UH", "mean": "間投詞", "ex": " uhhuhhuhh"},
{"PoS": "VB", "mean": "be動詞の原形", "ex": " be"},
{"PoS": "VBD", "mean": "be動詞の過去形", "ex": " was, were"},
{"PoS": "VBG", "mean": "be動詞の動名詞または現在分詞", "ex": " being"},
{"PoS": "VBN", "mean": "be動詞の過去分詞", "ex": " been"},
{"PoS": "VBZ", "mean": "be動詞の三人称単数形現在", "ex": " is"},
{"PoS": "VBP", "mean": "be動詞の三人称単数形以外の現在", "ex": " am, are"},
{"PoS": "VD", "mean": "do動詞の原形", "ex": " do"},
{"PoS": "VDD", "mean": "do動詞の過去形", "ex": " did"},
{"PoS": "VDG", "mean": "do動詞の動名詞または現在分詞", "ex": " doing"},
{"PoS": "VDN", "mean": "do動詞の過去分詞", "ex": " done"},
{"PoS": "VDZ", "mean": "do動詞の三人称単数形現在", "ex": " does"},
{"PoS": "VDP", "mean": "do動詞の三人称単数形以外の現在", "ex": " do"},
{"PoS": "VH", "mean": "have動詞の原形", "ex": " have"},
{"PoS": "VHD", "mean": "have動詞の過去形", "ex": " had"},
{"PoS": "VHG", "mean": "have動詞の動名詞または現在分詞", "ex": " having"},
{"PoS": "VHN", "mean": "have動詞の過去分詞", "ex": " had"},
{"PoS": "VHZ", "mean": "have動詞の三人称単数形現在", "ex": " has"},
{"PoS": "VHP", "mean": "have動詞の三人称単数形以外の現在", "ex": " have"},
{"PoS": "VV", "mean": "動詞の原形", "ex": " take"},
{"PoS": "VVD", "mean": "動詞の過去形", "ex": " took"},
{"PoS": "VVG", "mean": "動詞の動名詞または現在分詞", "ex": " taking"},
{"PoS": "VVN", "mean": "動詞の過去分詞", "ex": " taken"},
{"PoS": "VVP", "mean": "動詞の三人称単数形現在", "ex": " take"},
{"PoS": "VVZ", "mean": "動詞の三人称単数形以外の現在", "ex": " takes"},
{"PoS": "WDT", "mean": "Wh限定詞", "ex": " which"},
{"PoS": "WP", "mean": "代名詞", "ex": " who, what"},
{"PoS": "WP$", "mean": "所有関係代名詞", "ex": " whose"},
{"PoS": "WRB", "mean": "副詞", "ex": " where, when"},
{"PoS": ":", "mean": "一般結合記号", "ex": ";, -, --"},
{"PoS": "$", "mean": "通貨記号", "ex": "$, £"},
]
"""
# TreeTaggerで判定した品詞(pos)は残しつつ、判読可能な日本語の品詞のデータを追加する
def pos_to_jpos(wordlist):
for i in wordlist:
if i['pos'] == '':
i['pos_m'] =''
else:
i['pos_m'] = next((x for x in ref if x['PoS'] == i['pos']), None)['mean']
return wordlist
# 日本語の判読可能な品詞からhinshiフォントを使った表示に変換する
def jpos_to_hinshi(wordlist):
ptoh = [{'pos': '名詞', 'hinshi': 'A'},
{'pos': '動詞', 'hinshi': 'B'},
{'pos': '接続詞', 'hinshi': 'C'},
{'pos': '副詞', 'hinshi': 'D'}, {'pos': '代名詞', 'hinshi': 'E'},
{'pos': '他動詞', 'hinshi': 'F'}, {'pos': '形容詞', 'hinshi': 'G'},
{'pos': '前置詞', 'hinshi': 'H'}, {'pos': '間投詞', 'hinshi': 'J'},
{'pos': '冠詞', 'hinshi': 'K'}, {'pos': '助動詞', 'hinshi': 'M'},
{'pos': '関係代名詞', 'hinshi': 'N'}]
for i in wordlist:
try:
i['pos_m'] = next((x for x in ptoh if x['pos'] == i['pos_m']), None)['hinshi']
except TypeError:
i['pos_m'] = ''
return wordlist
# hinshiフォント要の文字列から判読可能な文字列に変換する
# 下記でCCの説明を得る
next((x for x in ref if x['PoS'] == 'CC'), None)['mean']
# 下記でwordlistの0番目の辞書データから'pos'に相当する日本語の説明を得る
# print(next((x for x in ref if x['PoS'] == wordlist[0]['pos']), None)['mean'])