-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmm.py
executable file
·95 lines (76 loc) · 3.08 KB
/
mm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# coding=utf-8
import requests
import re#正则表达式
import os
import urllib.request
import threading
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 UBrowser/6.1.2107.204 Safari/537.36',
'Referer': 'http://210.41.224.117/Login/xLogin/Login.asp'
}
def save(file_path, response):
with open(file_path, 'wb') as f:
f.write(response.content)
f.close()
def getLink(page_link):
page_html = requests.get(page_link, headers=header)
all_pic_link = re.findall('<img src="http://fm.shiyunjj.com/([0-9]{4})/([0-9]{1,4})/([0-9]).jpg"', page_html.text, re.S)
#title = re.findall('<h2>.*</h2>', page_html.text, re.S)
total = re.findall('<a href="/mm/([0-9]{1,4})/([0-9]{2})">', page_html.text, re.S)
return all_pic_link, total
def getPicture(Url, path):
all_pic_link, total = getLink(Url)
print(all_pic_link)
part1 = all_pic_link[0][0]
part2 = all_pic_link[0][1]
print(part1)
#print(title)
print(total[0][1])
for i in range(int(total[0][1])):
file_path = "mm/picture/" + path
#if else 内容不可互换,记得为啥嘛?因为我理解错了
if not os.path.exists(file_path):
os.makedirs(file_path)
# else:
# print(i)
# continue
file_path = file_path + "/" + str(i + 1) + ".jpg"
if os.path.exists(file_path):
continue
pic_link = "http://fm.shiyunjj.com/" + part1 + "/" + part2 + "/" + str(i+1) + ".jpg"
# connectWeb(file_path,pic_link,part2,i,0.1)
myThread(i, file_path,pic_link,part2,i,0.1).start()
def connectWeb(file_path,pic_link,part2,i,t):
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 UBrowser/6.1.2107.204 Safari/537.36',
'Referer': 'http://www.mmjpg.com/mm/' + part2 + '/1'
}
try:
response = requests.get(pic_link, headers=header, timeout=t)
print('正在下载第%s张图片' % (i + 1))
print(pic_link)
save(file_path, response)
print('第%s张图片下载成功!TimeOut=%f' % (i+1,t))
except Exception as e:
print("出现异常-->"+str(e))
# 不必循环,超时异常但是依然可以下载成功,why?
tt=t+t
return connectWeb(file_path,pic_link,part2,i,tt)
class myThread (threading.Thread):
def __init__(self, threadID,file_path,pic_link,part2,i,t):
threading.Thread.__init__(self)
self.threadID = threadID
self.file_path=file_path
self.pic_link=pic_link
self.part2 = part2
self.i = i
self.t=t
def run(self):
print ('Starting%d' %self.threadID)
connectWeb(self.file_path,self.pic_link,self.part2,self.i,self.t)
print ('Exiting%d'%self.threadID)
_Url = "http://www.mmjpg.com/mm/"
for i in range(5):
Url = _Url + str(i+1)
path = str(i+1)
getPicture(Url, path)