-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathre.py
151 lines (121 loc) · 3.56 KB
/
re.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# match匹配
# import re
# content = 'Hello 123 4567 World_This is a Regex Demo'
# result = re.match('^Hello\s\d\d\d\s\d{4}\s\w{10}', content)
# print(result)
# print(result.group())
# print(result.span())
# 匹配目标
# import re
#
# content = 'Hello 1234567 World_This is a Regex Demo'
# result = re.match('^Hello\s(\d+)\sWorld', content)
# print(result)
# print(result.group())
# print(result.group(1))
# print(result.span())
# 通用匹配
# import re
#
# content = 'Hello 123 4567 World_This is a Regex Demo'
# result = re.match('^Hello.*Demo$', content)
# print(result)
# print(result.group())
# print(result.span())
# 贪婪与非贪婪
# import re
#
# content = 'Hello 1234567 World_This is a Regex Demo'
# result = re.match('^He.*(\d+).*Demo$', content)
# print(result)
# print(result.group(1))
# import re
# content = 'Hello 1234567 World_This is a Regex Demo'
# result = re.match('^He.*?(\d+).*Demo$', content)
# print(result)
# print(result.group(1))
# import re
#
# content = 'http://weibo.com/comment/kEraCN'
# result1=re.match('')
# result1 = re.match('http.*?comment/(.*?)', content)
# result2 = re.match('http.*?comment/(.*)', content)
# print('result1', result1.group(1))
# print('result2', result2.group(1))
# 修饰符
# import re
#
# content = '''Hello 1234567 World_This
# is a Regex Demo
# '''
# result = re.match('^He.*?(\d+).*?Demo$', content,re.S)
# print(result.group(1))
# 表 3-3 修饰符
#
# 修饰符 描 述
# re.I 使匹配对大小写不敏感
# re.L 做本地化识别(locale-aware)匹配
# re.M 多行匹配,影响 ^ 和 $
# re.S 使。匹配包括换行在内的所有字符
# re.U 根据 Unicode 字符集解析字符。这个标志影响 \w、\W、\b 和 \B
# re.X 该标志通过给予你更灵活的格式以便你将正则表达式写得更易于理解
# 转义匹配
# import re
#
# content = '(百度) www.baidu.com'
# result = re.match('^\(百度\)(.*?m$)', content)
# print(result.group(1))
# 3. search
# import re
#
# content = 'Extra stings Hello 1234567 World_This is a Regex Demo Extra stings'
# result = re.search('Hello.*?(\d+).*?Demo', content)
# print(result)
#
html = '''<div id="songs-list">
<h2 class="title"> 经典老歌 </h2>
<p class="introduction">
经典老歌列表
</p>
<ul id="list" class="list-group">
<li data-view="2"> 一路上有你 </li>
<li data-view="7">
<a href="/2.mp3" singer="任贤齐"> 沧海一声笑 </a>
</li>
<li data-view="4" class="active">
<a href="/3.mp3" singer="齐秦"> 往事随风 </a>
</li>
<li data-view="6"><a href="/4.mp3" singer="beyond"> 光辉岁月 </a></li>
<li data-view="5"><a href="/5.mp3" singer="陈慧琳"> 记事本 </a></li>
<li data-view="5">
<a href="/6.mp3" singer="邓丽君"> 但愿人长久 </a>
</li>
</ul>
</div>'''
# import re
# result=re.search('<li.*?view="6".*?singer="(.*?)">(.*?)<.*?>',html,re.S)
# print(result.group(1),result.group(2))
# 4. findall
import re
results=re.findall('<li.*?href="(.*?)".*?singer="(.*?)">(.*?)<.*?</li>',html,re.S)
for i in results:
print(i)
# 5. sub
# import re
#
# content = '54aK54yr5oiR54ix5L2g'
# content = re.sub('\d{2}ix\d{1}', '', content)
# print(content)
# # 6. compile
# import re
#
# content1 = '2016-12-15 12:00'
# content2 = '2016-12-17 12:55'
# content3 = '2016-12-22 13:21'
# pattern=re.compile('\d{2}:\d{2}')
# result1=re.sub(pattern," ",content1)
# result2 = re.sub(pattern, '', content2)
# result3 = re.sub(pattern, '', content3)
# print(result1, result2, result3)
#
#