-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathget_img.py
More file actions
39 lines (30 loc) · 878 Bytes
/
get_img.py
File metadata and controls
39 lines (30 loc) · 878 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/python3
import re
import urllib.request
import chardet
def getHtml(url):
page=urllib.request.urlopen(url)
html=page.read()
#根据你传进来的参数自动辨别编码格式,然后进行相应的解码
encode_type = chardet.detect(html)
html = html.decode(encode_type['encoding'])
#手动转码
#html=html.decode('UTF-8')
return html
def getImg(html):
reg = r'src="(.*?\.jpg)"'
img=re.compile(reg)
imglist=re.findall(img,html)
i = 0
for imgurl in imglist:
#写法一
#f = open(r"D:/1234/" + str(i) + '.jpg', 'wb')
#req = urllib.request.urlopen(imgurl)
#buf = req.read()
#f.write(buf)
#i += 1
#写法二
urllib.request.urlretrieve(imgurl, 'D:/1/%s.png' % i)
i += 1
html=getHtml("http://tieba.baidu.com/p/2125112048#!/l/p1")
print(html)