1. 元字符
import re
a = 'dfsdfpy111thondf123123123dfd'
ret = re.findall('\D',a)
# \d 匹配数字 \D 匹配非数字
print(ret)
2. 字符集
import re
a = 'abc, acc, adc, aec, afc, ahc'
ret = re.findall('a[cf]c',a) # ['acc', 'afc']
ret1 = re.findall('a[^cf]c',a) # ['abc', 'adc', 'aec', 'ahc'] ^会去反
ret2 = re.findall('a[c-f]c',a) # ['acc', 'adc', 'aec', 'afc']
print(ret2)
3. 概况字符集
# \w 单词字符 \W
# \s 空白字符 \S
import re
a = 'abc\n4\s342 fdf\ra\tahc'
ret = re.findall('\s',a) # ['\n', ' ', '\r', '\t']
ret1 = re.findall('\W',a) # ['\n', '\\', ' ', '\r', '\t']
ret2 = re.findall('\w',a) # ['a', 'b', 'c', '4', 's', '3', '4', '2', 'f', 'd', 'f', 'a', 'a', 'h', 'c']
print(ret2)
4. 数量词
import re
a = 'python php java'
ret = re.findall('[a-z]{3,6}',a) # ['python', 'php', 'java']
print(ret)
5. 贪婪模式
import re
a = 'pythonq12334 php24111444java'
# 默认是贪婪模式,会匹配更多的字符,也就是 6
# 非贪婪模式后面加 ? 会尽肯能少的匹配
ret = re.findall('[a-z]{3,6}',a) # ['python', 'php', 'java']
ret1 = re.findall('[a-z]{3,6}?',a) # ['pyt', 'hon', 'php', 'jav']
print(ret)
6. 匹配0次1次或者无限多次
import re
# * 匹配0次或者更多次
# + 匹配一次或者更多次
# ? 匹配0次或者一次
# 这里匹配的是n !
a = 'ipytho12334 python24111pythonn444java'
ret1 = re.findall('python*',a) # ['pytho', 'python', 'pythonn']
ret2 = re.findall('python+',a) # ['python', 'pythonn']
ret3 = re.findall('python?',a) # ['pytho', 'python', 'python']
ret4 = re.findall('python{1,2}',a) # ['python', 'pythonn']
ret5 = re.findall('python{1,2}?',a) # ['python', 'python']
print(ret4)
7. 边界
import re
a = '1000001' # len=7
ret = re.findall('\d{4,6}',a) # ['100000']
ret1 = re.findall('^\d{4,7}$',a) # ['1000001']
ret2 = re.findall('000',a) # [000]
ret3 = re.findall('^000',a) # []
ret4 = re.findall('000$',a) # []
print(ret3)
8. 匹配模式参数
# . 匹配错除了换行符\n以外其他的所有字符
# re.I 忽略大小写
# re.S 可以匹配包含换行符的所有字符
a = 'Pythonc#\nPythonPython'
ret = re.findall('C#{1}',a, re.I) # [c#]
ret1 = re.findall('C#.{1}',a, re.I) # []
ret2 = re.findall('C#.{1}',a, re.I | re.S) # ['c#\n']
print(ret)
9. re.sub正则替换
a = 'Pythonc#Pythonc#Pythc#on'
def convert(value):
matched = value.group()
return "!"+ matched +"!"
ret = re .sub('c#', convert, a)
print(ret) # Python!c#!Python!c#!Pyth!c#!on
10. match, search
a = 'Ab34nkn423h2kl4999999'
ret = re.match('\d', a) # None match会匹配第一个如果匹配不到会返回 None
ret = re.search('\d', a) # <_sre.SRE_Match object; span=(2, 3), match='3'> 会返回第一个匹配到的对象
# ret = ret.group() # group() 方法会返回具体的内容
print(ret) # Python!c#!Python!c#!Pyth!c#!on
11. group分组
a = 'life is short, i use python, i love python'
# 1
ret = re.search('life(.*)python(.*)python', a)
ret = ret.groups() # (' is short, i use ', ', i love ')
# 2
ret2 = re.search('life(.*)python(.*)python', a) # group(0) 会返回全部匹配到的
ret2 = ret2.group(1) # is short, i use
ret = re.findall('life (.*) python, (.*)',a) # [('is short, i use', 'i love python')]
print(ret2)