-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathparseTree.py
More file actions
executable file
·155 lines (152 loc) · 3.99 KB
/
parseTree.py
File metadata and controls
executable file
·155 lines (152 loc) · 3.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import string
class Tree(object):
def __init__(self):
self.children = None
self.data = None
self.pos = None
self.semRole = None
self.position = None
self.word = None
#self.sid = None
def print_tree(tree):
if tree != None:
print str(tree.semRole)+':'+str(tree.pos)+':'+str(tree.word)+ str(len(tree.children))+'('
for ch in tree.children:
print_tree(ch)
print ')'
def parseExpr(expr):
#print expr
if expr.find('|')==-1 and expr.find('(')==-1:
node = Tree()
node.data = expr.split(':')
if len(node.data)==3:
node.semRole = node.data[0]
node.pos = node.data[1]
node.word = node.data[2]
elif len(node.data)==2:
node.semRole = node.data[0]
node.pos = node.data[1]
node.word = []
elif len(node.data)==1:
##node.semRole = node.data[0]
##node.pos = []
node.pos = node.data[0]
node.word = []
elif len(node.data)==4:
node.semRole = node.data[0]
node.pos = 'DUMMY'
node.word = node.data[3]
else:
node.semRole = node.data[0]
node.pos = node.data[1]
node.word = node.data[2]
node.children = []
return node
else:
node = Tree()
node.data = expr[0:expr.find('(')].split(':')
if len(node.data)==3:
node.semRole = node.data[0]
node.pos = node.data[1]
node.word = node.data[2]
elif len(node.data)==2:
node.semRole = node.data[0]
node.pos = node.data[1]
node.word = []
elif len(node.data)==1:
##node.semRole = node.data[0]
##node.pos = []
node.pos = node.data[0]
node.word = []
elif len(node.data)==4:
node.semRole = node.data[0]
node.pos = 'DUMMY'
node.word = node.data[3]
else:
node.semRole = node.data[0]
node.pos = node.data[1]
node.word = node.data[2]
#print node.data
children = findChildren(expr[expr.find('(')+1:-1])
#print children
node.children = [parseExpr(child) for child in children]
return node
def parseExpr_unannotated(expr):
#terminal node
if expr.find('|')==-1 and expr.find('(')==-1:
node = Tree()
node.data = expr.split(':')
#print 'node data='+node.data
if len(node.data)==3:
node.semRole = node.data[0]
node.pos = node.data[1]
node.word = node.data[2]
elif len(node.data)==2:
#node.semRole = node.data[0]
#print '2='+node.data[0]
node.pos = node.data[0]
node.word = node.data[1]
'''elif len(node.data)==4:
node.semRole = node.data[0]
node.pos = 'DUMMY'
node.word = node.data[2]
else:
#node.semRole = node.data[0]
node.pos = node.data[0]
node.word = node.data[1]'''
node.children = []
return node
else:
node = Tree()
node.data = expr[0:expr.find('(')].split(':')
if len(node.data)==2:
node.semRole = node.data[0]
node.pos = node.data[1]
#node.word = node.data[1]
elif len(node.data)==1:
#node.semRole = node.data[0]
node.pos = node.data[0]
#node.word = []
'''elif len(node.data)==3:
node.semRole = node.data[0]
node.pos = 'DUMMY'
node.word = node.data[2]
else:
#node.semRole = node.data[0]
node.pos = node.data[0]
node.word = node.data[1]'''
#print node.data
children = findChildren(expr[expr.find('(')+1:-1])
#print children
node.children = [parseExpr_unannotated(child) for child in children]
return node
# some Chinese character matches to pipe sign and is creating problems, so a trick to handle that case is to check the token next to pipe sign
def findChildren(expr):
c = 0
t = 0
buffer = []
children = []
for tk in expr:
if tk == '(':
c = c + 1
t = t + 1
buffer.append(tk)
elif tk == ')':
c = c - 1
t = t + 1
buffer.append(tk)
elif tk == '|' and c == 0:
if t < (len(expr)-1):
next = ord(''.join(expr[t+1]))
#print ''.join(expr[t+1])
#print next
if next > 32 and next < 124:
children.append(''.join(buffer))
buffer = []
t = t + 1
continue
else:
t = t + 1
buffer.append(tk)
children.append(''.join(buffer))
return children