-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrape.rb
More file actions
42 lines (35 loc) · 801 Bytes
/
scrape.rb
File metadata and controls
42 lines (35 loc) · 801 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/ruby
require 'rubygems'
require 'nokogiri'
require 'open-uri'
def phone_e(str,file)
phone = (/[\(\s]\d+[\)*\s\-]+\d+\-\d+/)
matchdata=str.match(phone)
while matchdata!=nil
puts file+" p "+matchdata[0]
string=matchdata.post_match
matchdata=string.match(phone)
end
end
def email_e(str,file)
email = (/[a-zA-Z]+[\s]*[@(at)](\s)*[a-z]+(\.[a-z]+)*\.edu/)
matchdata=str.match(email)
while matchdata!=nil
puts file+" e "+matchdata[0]
string=matchdata.post_match
matchdata=string.match(email)
end
end
dir = Dir.pwd + "/dev"
Dir.open(Dir.pwd + "/dev").each do |file|
#puts file
#=begin
# node.each do |file|
f=File.open(Dir.pwd + "/dev/" + file)
doc = Nokogiri::HTML(f)
string = doc.xpath("//body").text
str = string
phone_e(str,file)
email_e(str,file)
#=end
end