-
Notifications
You must be signed in to change notification settings - Fork 1
gen2
WooKyoung Noh edited this page Dec 30, 2018
·
1 revision
Ref: https://ko.wikipedia.org/wiki/%EC%82%BC%EA%B5%AD%EC%A7%80_%EC%9D%B8%EB%AC%BC_%EB%AA%A9%EB%A1%9D
s = read("3.html", String)
re = r"""<a href=\"/wiki/(?<page>[\w%_()]*)\" .*>(?<name>[\w]*)</a></b> (?<ja>[\w]*)</li>"""
#re = r"""<a href="\/wiki\/(?<page>[\w_()]*)" .*>(?<name>[\w]*)</a></b> (?<ja>[\w]*)</li>"""
matches = collect(eachmatch(re, s))
using URIParser
using JSON2
function getnt(extract)
re = r"""^(?<name>[\w ]*)\((?<hanjaname>[\w ]*)"""
ja = r"""자[(字)]*는 (?<name>[\w]*)\((?<hanjaname>[\w]*)"""
m1= match(re, extract)
m2 = match(ja, extract)
#@info (extract, m1, m2)
(名=m1[:hanjaname], 字=m2[:hanjaname], 이름=replace(m1[:name], " "=>""), 자=m2[:name])
end
function get_arr_d()
s = read("in.jl", String)
d = Dict()
arr = []
for l in split(s, "\n")
isempty(l) && continue
l = replace(l, "const "=>"")
a, b = split(l, " = (")
push!(arr, a)
d[a] = eval(Meta.parse(string("(", b)))
end
return (arr, d)
end
(arr, d) = get_arr_d()
for m in matches
global arr, d
page = m[:page]
name = m[:name]
ja = m[:ja]
# @info (page, name, ja)
unpage = URIParser.unescape(page)
json = read("unpage/$unpage", String)
#@info :json unpage
extract = JSON2.read(json, Any).extract
nt = getnt(extract)
if name == nt.이름 && ja == nt.자
else
if ja == nt.자
名 = last(split(nt.名, " "))
nt = merge(nt, (이름=name,名=名))
end
end
d[nt.名] = nt
if nt.名 in arr
else
push!(arr, nt.名)
end
#println("""wget "https://ko.wikipedia.org/api/rest_v1/page/summary/$unpage" -O "unpage/$unpage" """)
# if unpage == "고간_(후한)"
# ok = true
#end
end
for a in arr
nt = d[a]
println("const ", a, " = ", nt)
end
if false
for a in arr
println(" ", repr(a), " => ", a, ", ")
end
end