Skip to content
WooKyoung Noh edited this page Dec 30, 2018 · 1 revision

Ref: https://ko.wikipedia.org/wiki/%EC%82%BC%EA%B5%AD%EC%A7%80_%EC%9D%B8%EB%AC%BC_%EB%AA%A9%EB%A1%9D

s = read("3.html", String)
re = r"""<a href=\"/wiki/(?<page>[\w%_()]*)\" .*>(?<name>[\w]*)</a></b> (?<ja>[\w]*)</li>"""
#re = r"""<a href="\/wiki\/(?<page>[\w_()]*)" .*>(?<name>[\w]*)</a></b> (?<ja>[\w]*)</li>"""
matches = collect(eachmatch(re, s))

using URIParser
using JSON2

function getnt(extract)
    re = r"""^(?<name>[\w ]*)\((?<hanjaname>[\w ]*)"""
    ja = r"""자[(字)]*는 (?<name>[\w]*)\((?<hanjaname>[\w]*)"""

    m1= match(re, extract)
    m2 = match(ja, extract)
    #@info (extract, m1, m2)
    (名=m1[:hanjaname], 字=m2[:hanjaname], 이름=replace(m1[:name], " "=>""), 자=m2[:name])
end

function get_arr_d()
s = read("in.jl", String)
d = Dict()
arr = []
for l in split(s, "\n")
    isempty(l) && continue
    l = replace(l, "const "=>"")
    a, b = split(l, " = (")
    push!(arr, a)
    d[a] = eval(Meta.parse(string("(", b)))
end
    return (arr, d)
end

(arr, d) = get_arr_d()

for m in matches
    global arr, d
    page = m[:page]
    name = m[:name]
    ja = m[:ja]
#    @info (page, name, ja)
    unpage = URIParser.unescape(page)
    json = read("unpage/$unpage", String)
    #@info :json unpage
    extract = JSON2.read(json, Any).extract
    nt = getnt(extract)
    if name == nt.이름 && ja == nt.자
    else
        if ja == nt.자
             名 = last(split(nt.名, " "))
             nt = merge(nt, (이름=name,名=名))
        end
    end
    d[nt.名] = nt
    if nt.名 in arr
    else
        push!(arr, nt.名)
    end
    #println("""wget "https://ko.wikipedia.org/api/rest_v1/page/summary/$unpage" -O "unpage/$unpage" """)
#    if unpage == "고간_(후한)"
#        ok = true
#end
end

for a in arr
    nt = d[a]
    println("const ", a,  " = ", nt)
end

if  false
for a in arr
    println("    ", repr(a), " => ", a, ", ")
end
end

Clone this wiki locally