Regular expressions is a concept used to search for patterns in string text.
This is a univerisal concept for any programming language or text editing program.
We're going to learn the concepts while we learn the syntax for python.
The goal of regular expressions is to be able to search for a specific type of text inside of a string. If we have a form on our webpage where we ask for email addresses, can we check whether the inputted string actually follows the form of an email? some letters or numbers or special characters, then an @ sign then some more letters numbers or special characters then a . then a few more letters
. - Any Character Except New Line
\d - Digit (0-9)
\D - Not a Digit (0-9)
\w - Word Character (a-z, A-Z, 0-9, _)
\W - Not a Word Character
\s - Whitespace (space, tab, newline)
\S - Not Whitespace (space, tab, newline)
\b - Word Boundary
\B - Not a Word Boundary
^ - Beginning of a String
$ - End of a String
[] - Matches Characters in brackets
[^ ] - Matches Characters NOT in brackets
| - Either Or
( ) - Group
Quantifiers:
* - 0 or More
+ - 1 or More
? - 0 or One
{3} - Exact Number
{3,4} - Range of Numbers (Minimum, Maximum)
import re
text_to_search = '''
abcdefghijklmnopqurtuvwxyz
ABCDEFGHIJKLMNOPQRSTUVWXYZ
1234567890
123abc
Hello HelloHello
MetaCharacters (Need to be escaped):
. ^ $ * + ? { } [ ] \ | ( )
utexas.edu
321-555-4321
123.555.1234
daniel-mitchell@utexas.edu
Mr. Johnson
Mr Smith
Ms Davis
Mrs. Robinson
Mr. T
'''
pattern = re.compile(r'abc')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(1, 4), match='abc'> <re.Match object; span=(69, 72), match='abc'>
print(text_to_search[69:72])
abc
pattern = re.compile(r'cba')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
pattern = re.compile(r'.')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(1, 2), match='a'> <re.Match object; span=(2, 3), match='b'> <re.Match object; span=(3, 4), match='c'> <re.Match object; span=(4, 5), match='d'> <re.Match object; span=(5, 6), match='e'> <re.Match object; span=(6, 7), match='f'> <re.Match object; span=(7, 8), match='g'> <re.Match object; span=(8, 9), match='h'> <re.Match object; span=(9, 10), match='i'> <re.Match object; span=(10, 11), match='j'> <re.Match object; span=(11, 12), match='k'> <re.Match object; span=(12, 13), match='l'> <re.Match object; span=(13, 14), match='m'> <re.Match object; span=(14, 15), match='n'> <re.Match object; span=(15, 16), match='o'> <re.Match object; span=(16, 17), match='p'> <re.Match object; span=(17, 18), match='q'> <re.Match object; span=(18, 19), match='u'> <re.Match object; span=(19, 20), match='r'> <re.Match object; span=(20, 21), match='t'> <re.Match object; span=(21, 22), match='u'> <re.Match object; span=(22, 23), match='v'> <re.Match object; span=(23, 24), match='w'> <re.Match object; span=(24, 25), match='x'> <re.Match object; span=(25, 26), match='y'> <re.Match object; span=(26, 27), match='z'> <re.Match object; span=(28, 29), match='A'> <re.Match object; span=(29, 30), match='B'> <re.Match object; span=(30, 31), match='C'> <re.Match object; span=(31, 32), match='D'> <re.Match object; span=(32, 33), match='E'> <re.Match object; span=(33, 34), match='F'> <re.Match object; span=(34, 35), match='G'> <re.Match object; span=(35, 36), match='H'> <re.Match object; span=(36, 37), match='I'> <re.Match object; span=(37, 38), match='J'> <re.Match object; span=(38, 39), match='K'> <re.Match object; span=(39, 40), match='L'> <re.Match object; span=(40, 41), match='M'> <re.Match object; span=(41, 42), match='N'> <re.Match object; span=(42, 43), match='O'> <re.Match object; span=(43, 44), match='P'> <re.Match object; span=(44, 45), match='Q'> <re.Match object; span=(45, 46), match='R'> <re.Match object; span=(46, 47), match='S'> <re.Match object; span=(47, 48), match='T'> <re.Match object; span=(48, 49), match='U'> <re.Match object; span=(49, 50), match='V'> <re.Match object; span=(50, 51), match='W'> <re.Match object; span=(51, 52), match='X'> <re.Match object; span=(52, 53), match='Y'> <re.Match object; span=(53, 54), match='Z'> <re.Match object; span=(55, 56), match='1'> <re.Match object; span=(56, 57), match='2'> <re.Match object; span=(57, 58), match='3'> <re.Match object; span=(58, 59), match='4'> <re.Match object; span=(59, 60), match='5'> <re.Match object; span=(60, 61), match='6'> <re.Match object; span=(61, 62), match='7'> <re.Match object; span=(62, 63), match='8'> <re.Match object; span=(63, 64), match='9'> <re.Match object; span=(64, 65), match='0'> <re.Match object; span=(66, 67), match='1'> <re.Match object; span=(67, 68), match='2'> <re.Match object; span=(68, 69), match='3'> <re.Match object; span=(69, 70), match='a'> <re.Match object; span=(70, 71), match='b'> <re.Match object; span=(71, 72), match='c'> <re.Match object; span=(74, 75), match='H'> <re.Match object; span=(75, 76), match='e'> <re.Match object; span=(76, 77), match='l'> <re.Match object; span=(77, 78), match='l'> <re.Match object; span=(78, 79), match='o'> <re.Match object; span=(79, 80), match=' '> <re.Match object; span=(80, 81), match='H'> <re.Match object; span=(81, 82), match='e'> <re.Match object; span=(82, 83), match='l'> <re.Match object; span=(83, 84), match='l'> <re.Match object; span=(84, 85), match='o'> <re.Match object; span=(85, 86), match='H'> <re.Match object; span=(86, 87), match='e'> <re.Match object; span=(87, 88), match='l'> <re.Match object; span=(88, 89), match='l'> <re.Match object; span=(89, 90), match='o'> <re.Match object; span=(92, 93), match='M'> <re.Match object; span=(93, 94), match='e'> <re.Match object; span=(94, 95), match='t'> <re.Match object; span=(95, 96), match='a'> <re.Match object; span=(96, 97), match='C'> <re.Match object; span=(97, 98), match='h'> <re.Match object; span=(98, 99), match='a'> <re.Match object; span=(99, 100), match='r'> <re.Match object; span=(100, 101), match='a'> <re.Match object; span=(101, 102), match='c'> <re.Match object; span=(102, 103), match='t'> <re.Match object; span=(103, 104), match='e'> <re.Match object; span=(104, 105), match='r'> <re.Match object; span=(105, 106), match='s'> <re.Match object; span=(106, 107), match=' '> <re.Match object; span=(107, 108), match='('> <re.Match object; span=(108, 109), match='N'> <re.Match object; span=(109, 110), match='e'> <re.Match object; span=(110, 111), match='e'> <re.Match object; span=(111, 112), match='d'> <re.Match object; span=(112, 113), match=' '> <re.Match object; span=(113, 114), match='t'> <re.Match object; span=(114, 115), match='o'> <re.Match object; span=(115, 116), match=' '> <re.Match object; span=(116, 117), match='b'> <re.Match object; span=(117, 118), match='e'> <re.Match object; span=(118, 119), match=' '> <re.Match object; span=(119, 120), match='e'> <re.Match object; span=(120, 121), match='s'> <re.Match object; span=(121, 122), match='c'> <re.Match object; span=(122, 123), match='a'> <re.Match object; span=(123, 124), match='p'> <re.Match object; span=(124, 125), match='e'> <re.Match object; span=(125, 126), match='d'> <re.Match object; span=(126, 127), match=')'> <re.Match object; span=(127, 128), match=':'> <re.Match object; span=(129, 130), match='.'> <re.Match object; span=(130, 131), match=' '> <re.Match object; span=(131, 132), match='^'> <re.Match object; span=(132, 133), match=' '> <re.Match object; span=(133, 134), match='$'> <re.Match object; span=(134, 135), match=' '> <re.Match object; span=(135, 136), match='*'> <re.Match object; span=(136, 137), match=' '> <re.Match object; span=(137, 138), match='+'> <re.Match object; span=(138, 139), match=' '> <re.Match object; span=(139, 140), match='?'> <re.Match object; span=(140, 141), match=' '> <re.Match object; span=(141, 142), match='{'> <re.Match object; span=(142, 143), match=' '> <re.Match object; span=(143, 144), match='}'> <re.Match object; span=(144, 145), match=' '> <re.Match object; span=(145, 146), match='['> <re.Match object; span=(146, 147), match=' '> <re.Match object; span=(147, 148), match=']'> <re.Match object; span=(148, 149), match=' '> <re.Match object; span=(149, 150), match='\\'> <re.Match object; span=(150, 151), match=' '> <re.Match object; span=(151, 152), match='|'> <re.Match object; span=(152, 153), match=' '> <re.Match object; span=(153, 154), match='('> <re.Match object; span=(154, 155), match=' '> <re.Match object; span=(155, 156), match=')'> <re.Match object; span=(158, 159), match='u'> <re.Match object; span=(159, 160), match='t'> <re.Match object; span=(160, 161), match='e'> <re.Match object; span=(161, 162), match='x'> <re.Match object; span=(162, 163), match='a'> <re.Match object; span=(163, 164), match='s'> <re.Match object; span=(164, 165), match='.'> <re.Match object; span=(165, 166), match='e'> <re.Match object; span=(166, 167), match='d'> <re.Match object; span=(167, 168), match='u'> <re.Match object; span=(170, 171), match='3'> <re.Match object; span=(171, 172), match='2'> <re.Match object; span=(172, 173), match='1'> <re.Match object; span=(173, 174), match='-'> <re.Match object; span=(174, 175), match='5'> <re.Match object; span=(175, 176), match='5'> <re.Match object; span=(176, 177), match='5'> <re.Match object; span=(177, 178), match='-'> <re.Match object; span=(178, 179), match='4'> <re.Match object; span=(179, 180), match='3'> <re.Match object; span=(180, 181), match='2'> <re.Match object; span=(181, 182), match='1'> <re.Match object; span=(183, 184), match='1'> <re.Match object; span=(184, 185), match='2'> <re.Match object; span=(185, 186), match='3'> <re.Match object; span=(186, 187), match='.'> <re.Match object; span=(187, 188), match='5'> <re.Match object; span=(188, 189), match='5'> <re.Match object; span=(189, 190), match='5'> <re.Match object; span=(190, 191), match='.'> <re.Match object; span=(191, 192), match='1'> <re.Match object; span=(192, 193), match='2'> <re.Match object; span=(193, 194), match='3'> <re.Match object; span=(194, 195), match='4'> <re.Match object; span=(197, 198), match='d'> <re.Match object; span=(198, 199), match='a'> <re.Match object; span=(199, 200), match='n'> <re.Match object; span=(200, 201), match='i'> <re.Match object; span=(201, 202), match='e'> <re.Match object; span=(202, 203), match='l'> <re.Match object; span=(203, 204), match='-'> <re.Match object; span=(204, 205), match='m'> <re.Match object; span=(205, 206), match='i'> <re.Match object; span=(206, 207), match='t'> <re.Match object; span=(207, 208), match='c'> <re.Match object; span=(208, 209), match='h'> <re.Match object; span=(209, 210), match='e'> <re.Match object; span=(210, 211), match='l'> <re.Match object; span=(211, 212), match='l'> <re.Match object; span=(212, 213), match='@'> <re.Match object; span=(213, 214), match='u'> <re.Match object; span=(214, 215), match='t'> <re.Match object; span=(215, 216), match='e'> <re.Match object; span=(216, 217), match='x'> <re.Match object; span=(217, 218), match='a'> <re.Match object; span=(218, 219), match='s'> <re.Match object; span=(219, 220), match='.'> <re.Match object; span=(220, 221), match='e'> <re.Match object; span=(221, 222), match='d'> <re.Match object; span=(222, 223), match='u'> <re.Match object; span=(225, 226), match='M'> <re.Match object; span=(226, 227), match='r'> <re.Match object; span=(227, 228), match='.'> <re.Match object; span=(228, 229), match=' '> <re.Match object; span=(229, 230), match='J'> <re.Match object; span=(230, 231), match='o'> <re.Match object; span=(231, 232), match='h'> <re.Match object; span=(232, 233), match='n'> <re.Match object; span=(233, 234), match='s'> <re.Match object; span=(234, 235), match='o'> <re.Match object; span=(235, 236), match='n'> <re.Match object; span=(237, 238), match='M'> <re.Match object; span=(238, 239), match='r'> <re.Match object; span=(239, 240), match=' '> <re.Match object; span=(240, 241), match='S'> <re.Match object; span=(241, 242), match='m'> <re.Match object; span=(242, 243), match='i'> <re.Match object; span=(243, 244), match='t'> <re.Match object; span=(244, 245), match='h'> <re.Match object; span=(246, 247), match='M'> <re.Match object; span=(247, 248), match='s'> <re.Match object; span=(248, 249), match=' '> <re.Match object; span=(249, 250), match='D'> <re.Match object; span=(250, 251), match='a'> <re.Match object; span=(251, 252), match='v'> <re.Match object; span=(252, 253), match='i'> <re.Match object; span=(253, 254), match='s'> <re.Match object; span=(255, 256), match='M'> <re.Match object; span=(256, 257), match='r'> <re.Match object; span=(257, 258), match='s'> <re.Match object; span=(258, 259), match='.'> <re.Match object; span=(259, 260), match=' '> <re.Match object; span=(260, 261), match='R'> <re.Match object; span=(261, 262), match='o'> <re.Match object; span=(262, 263), match='b'> <re.Match object; span=(263, 264), match='i'> <re.Match object; span=(264, 265), match='n'> <re.Match object; span=(265, 266), match='s'> <re.Match object; span=(266, 267), match='o'> <re.Match object; span=(267, 268), match='n'> <re.Match object; span=(269, 270), match='M'> <re.Match object; span=(270, 271), match='r'> <re.Match object; span=(271, 272), match='.'> <re.Match object; span=(272, 273), match=' '> <re.Match object; span=(273, 274), match='T'>
pattern = re.compile(r'\.')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(129, 130), match='.'> <re.Match object; span=(164, 165), match='.'> <re.Match object; span=(186, 187), match='.'> <re.Match object; span=(190, 191), match='.'> <re.Match object; span=(219, 220), match='.'> <re.Match object; span=(227, 228), match='.'> <re.Match object; span=(258, 259), match='.'> <re.Match object; span=(271, 272), match='.'>
pattern = re.compile(r'\d')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(55, 56), match='1'> <re.Match object; span=(56, 57), match='2'> <re.Match object; span=(57, 58), match='3'> <re.Match object; span=(58, 59), match='4'> <re.Match object; span=(59, 60), match='5'> <re.Match object; span=(60, 61), match='6'> <re.Match object; span=(61, 62), match='7'> <re.Match object; span=(62, 63), match='8'> <re.Match object; span=(63, 64), match='9'> <re.Match object; span=(64, 65), match='0'> <re.Match object; span=(66, 67), match='1'> <re.Match object; span=(67, 68), match='2'> <re.Match object; span=(68, 69), match='3'> <re.Match object; span=(170, 171), match='3'> <re.Match object; span=(171, 172), match='2'> <re.Match object; span=(172, 173), match='1'> <re.Match object; span=(174, 175), match='5'> <re.Match object; span=(175, 176), match='5'> <re.Match object; span=(176, 177), match='5'> <re.Match object; span=(178, 179), match='4'> <re.Match object; span=(179, 180), match='3'> <re.Match object; span=(180, 181), match='2'> <re.Match object; span=(181, 182), match='1'> <re.Match object; span=(183, 184), match='1'> <re.Match object; span=(184, 185), match='2'> <re.Match object; span=(185, 186), match='3'> <re.Match object; span=(187, 188), match='5'> <re.Match object; span=(188, 189), match='5'> <re.Match object; span=(189, 190), match='5'> <re.Match object; span=(191, 192), match='1'> <re.Match object; span=(192, 193), match='2'> <re.Match object; span=(193, 194), match='3'> <re.Match object; span=(194, 195), match='4'>
pattern = re.compile(r'\D')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(0, 1), match='\n'> <re.Match object; span=(1, 2), match='a'> <re.Match object; span=(2, 3), match='b'> <re.Match object; span=(3, 4), match='c'> <re.Match object; span=(4, 5), match='d'> <re.Match object; span=(5, 6), match='e'> <re.Match object; span=(6, 7), match='f'> <re.Match object; span=(7, 8), match='g'> <re.Match object; span=(8, 9), match='h'> <re.Match object; span=(9, 10), match='i'> <re.Match object; span=(10, 11), match='j'> <re.Match object; span=(11, 12), match='k'> <re.Match object; span=(12, 13), match='l'> <re.Match object; span=(13, 14), match='m'> <re.Match object; span=(14, 15), match='n'> <re.Match object; span=(15, 16), match='o'> <re.Match object; span=(16, 17), match='p'> <re.Match object; span=(17, 18), match='q'> <re.Match object; span=(18, 19), match='u'> <re.Match object; span=(19, 20), match='r'> <re.Match object; span=(20, 21), match='t'> <re.Match object; span=(21, 22), match='u'> <re.Match object; span=(22, 23), match='v'> <re.Match object; span=(23, 24), match='w'> <re.Match object; span=(24, 25), match='x'> <re.Match object; span=(25, 26), match='y'> <re.Match object; span=(26, 27), match='z'> <re.Match object; span=(27, 28), match='\n'> <re.Match object; span=(28, 29), match='A'> <re.Match object; span=(29, 30), match='B'> <re.Match object; span=(30, 31), match='C'> <re.Match object; span=(31, 32), match='D'> <re.Match object; span=(32, 33), match='E'> <re.Match object; span=(33, 34), match='F'> <re.Match object; span=(34, 35), match='G'> <re.Match object; span=(35, 36), match='H'> <re.Match object; span=(36, 37), match='I'> <re.Match object; span=(37, 38), match='J'> <re.Match object; span=(38, 39), match='K'> <re.Match object; span=(39, 40), match='L'> <re.Match object; span=(40, 41), match='M'> <re.Match object; span=(41, 42), match='N'> <re.Match object; span=(42, 43), match='O'> <re.Match object; span=(43, 44), match='P'> <re.Match object; span=(44, 45), match='Q'> <re.Match object; span=(45, 46), match='R'> <re.Match object; span=(46, 47), match='S'> <re.Match object; span=(47, 48), match='T'> <re.Match object; span=(48, 49), match='U'> <re.Match object; span=(49, 50), match='V'> <re.Match object; span=(50, 51), match='W'> <re.Match object; span=(51, 52), match='X'> <re.Match object; span=(52, 53), match='Y'> <re.Match object; span=(53, 54), match='Z'> <re.Match object; span=(54, 55), match='\n'> <re.Match object; span=(65, 66), match='\n'> <re.Match object; span=(69, 70), match='a'> <re.Match object; span=(70, 71), match='b'> <re.Match object; span=(71, 72), match='c'> <re.Match object; span=(72, 73), match='\n'> <re.Match object; span=(73, 74), match='\n'> <re.Match object; span=(74, 75), match='H'> <re.Match object; span=(75, 76), match='e'> <re.Match object; span=(76, 77), match='l'> <re.Match object; span=(77, 78), match='l'> <re.Match object; span=(78, 79), match='o'> <re.Match object; span=(79, 80), match=' '> <re.Match object; span=(80, 81), match='H'> <re.Match object; span=(81, 82), match='e'> <re.Match object; span=(82, 83), match='l'> <re.Match object; span=(83, 84), match='l'> <re.Match object; span=(84, 85), match='o'> <re.Match object; span=(85, 86), match='H'> <re.Match object; span=(86, 87), match='e'> <re.Match object; span=(87, 88), match='l'> <re.Match object; span=(88, 89), match='l'> <re.Match object; span=(89, 90), match='o'> <re.Match object; span=(90, 91), match='\n'> <re.Match object; span=(91, 92), match='\n'> <re.Match object; span=(92, 93), match='M'> <re.Match object; span=(93, 94), match='e'> <re.Match object; span=(94, 95), match='t'> <re.Match object; span=(95, 96), match='a'> <re.Match object; span=(96, 97), match='C'> <re.Match object; span=(97, 98), match='h'> <re.Match object; span=(98, 99), match='a'> <re.Match object; span=(99, 100), match='r'> <re.Match object; span=(100, 101), match='a'> <re.Match object; span=(101, 102), match='c'> <re.Match object; span=(102, 103), match='t'> <re.Match object; span=(103, 104), match='e'> <re.Match object; span=(104, 105), match='r'> <re.Match object; span=(105, 106), match='s'> <re.Match object; span=(106, 107), match=' '> <re.Match object; span=(107, 108), match='('> <re.Match object; span=(108, 109), match='N'> <re.Match object; span=(109, 110), match='e'> <re.Match object; span=(110, 111), match='e'> <re.Match object; span=(111, 112), match='d'> <re.Match object; span=(112, 113), match=' '> <re.Match object; span=(113, 114), match='t'> <re.Match object; span=(114, 115), match='o'> <re.Match object; span=(115, 116), match=' '> <re.Match object; span=(116, 117), match='b'> <re.Match object; span=(117, 118), match='e'> <re.Match object; span=(118, 119), match=' '> <re.Match object; span=(119, 120), match='e'> <re.Match object; span=(120, 121), match='s'> <re.Match object; span=(121, 122), match='c'> <re.Match object; span=(122, 123), match='a'> <re.Match object; span=(123, 124), match='p'> <re.Match object; span=(124, 125), match='e'> <re.Match object; span=(125, 126), match='d'> <re.Match object; span=(126, 127), match=')'> <re.Match object; span=(127, 128), match=':'> <re.Match object; span=(128, 129), match='\n'> <re.Match object; span=(129, 130), match='.'> <re.Match object; span=(130, 131), match=' '> <re.Match object; span=(131, 132), match='^'> <re.Match object; span=(132, 133), match=' '> <re.Match object; span=(133, 134), match='$'> <re.Match object; span=(134, 135), match=' '> <re.Match object; span=(135, 136), match='*'> <re.Match object; span=(136, 137), match=' '> <re.Match object; span=(137, 138), match='+'> <re.Match object; span=(138, 139), match=' '> <re.Match object; span=(139, 140), match='?'> <re.Match object; span=(140, 141), match=' '> <re.Match object; span=(141, 142), match='{'> <re.Match object; span=(142, 143), match=' '> <re.Match object; span=(143, 144), match='}'> <re.Match object; span=(144, 145), match=' '> <re.Match object; span=(145, 146), match='['> <re.Match object; span=(146, 147), match=' '> <re.Match object; span=(147, 148), match=']'> <re.Match object; span=(148, 149), match=' '> <re.Match object; span=(149, 150), match='\\'> <re.Match object; span=(150, 151), match=' '> <re.Match object; span=(151, 152), match='|'> <re.Match object; span=(152, 153), match=' '> <re.Match object; span=(153, 154), match='('> <re.Match object; span=(154, 155), match=' '> <re.Match object; span=(155, 156), match=')'> <re.Match object; span=(156, 157), match='\n'> <re.Match object; span=(157, 158), match='\n'> <re.Match object; span=(158, 159), match='u'> <re.Match object; span=(159, 160), match='t'> <re.Match object; span=(160, 161), match='e'> <re.Match object; span=(161, 162), match='x'> <re.Match object; span=(162, 163), match='a'> <re.Match object; span=(163, 164), match='s'> <re.Match object; span=(164, 165), match='.'> <re.Match object; span=(165, 166), match='e'> <re.Match object; span=(166, 167), match='d'> <re.Match object; span=(167, 168), match='u'> <re.Match object; span=(168, 169), match='\n'> <re.Match object; span=(169, 170), match='\n'> <re.Match object; span=(173, 174), match='-'> <re.Match object; span=(177, 178), match='-'> <re.Match object; span=(182, 183), match='\n'> <re.Match object; span=(186, 187), match='.'> <re.Match object; span=(190, 191), match='.'> <re.Match object; span=(195, 196), match='\n'> <re.Match object; span=(196, 197), match='\n'> <re.Match object; span=(197, 198), match='d'> <re.Match object; span=(198, 199), match='a'> <re.Match object; span=(199, 200), match='n'> <re.Match object; span=(200, 201), match='i'> <re.Match object; span=(201, 202), match='e'> <re.Match object; span=(202, 203), match='l'> <re.Match object; span=(203, 204), match='-'> <re.Match object; span=(204, 205), match='m'> <re.Match object; span=(205, 206), match='i'> <re.Match object; span=(206, 207), match='t'> <re.Match object; span=(207, 208), match='c'> <re.Match object; span=(208, 209), match='h'> <re.Match object; span=(209, 210), match='e'> <re.Match object; span=(210, 211), match='l'> <re.Match object; span=(211, 212), match='l'> <re.Match object; span=(212, 213), match='@'> <re.Match object; span=(213, 214), match='u'> <re.Match object; span=(214, 215), match='t'> <re.Match object; span=(215, 216), match='e'> <re.Match object; span=(216, 217), match='x'> <re.Match object; span=(217, 218), match='a'> <re.Match object; span=(218, 219), match='s'> <re.Match object; span=(219, 220), match='.'> <re.Match object; span=(220, 221), match='e'> <re.Match object; span=(221, 222), match='d'> <re.Match object; span=(222, 223), match='u'> <re.Match object; span=(223, 224), match='\n'> <re.Match object; span=(224, 225), match='\n'> <re.Match object; span=(225, 226), match='M'> <re.Match object; span=(226, 227), match='r'> <re.Match object; span=(227, 228), match='.'> <re.Match object; span=(228, 229), match=' '> <re.Match object; span=(229, 230), match='J'> <re.Match object; span=(230, 231), match='o'> <re.Match object; span=(231, 232), match='h'> <re.Match object; span=(232, 233), match='n'> <re.Match object; span=(233, 234), match='s'> <re.Match object; span=(234, 235), match='o'> <re.Match object; span=(235, 236), match='n'> <re.Match object; span=(236, 237), match='\n'> <re.Match object; span=(237, 238), match='M'> <re.Match object; span=(238, 239), match='r'> <re.Match object; span=(239, 240), match=' '> <re.Match object; span=(240, 241), match='S'> <re.Match object; span=(241, 242), match='m'> <re.Match object; span=(242, 243), match='i'> <re.Match object; span=(243, 244), match='t'> <re.Match object; span=(244, 245), match='h'> <re.Match object; span=(245, 246), match='\n'> <re.Match object; span=(246, 247), match='M'> <re.Match object; span=(247, 248), match='s'> <re.Match object; span=(248, 249), match=' '> <re.Match object; span=(249, 250), match='D'> <re.Match object; span=(250, 251), match='a'> <re.Match object; span=(251, 252), match='v'> <re.Match object; span=(252, 253), match='i'> <re.Match object; span=(253, 254), match='s'> <re.Match object; span=(254, 255), match='\n'> <re.Match object; span=(255, 256), match='M'> <re.Match object; span=(256, 257), match='r'> <re.Match object; span=(257, 258), match='s'> <re.Match object; span=(258, 259), match='.'> <re.Match object; span=(259, 260), match=' '> <re.Match object; span=(260, 261), match='R'> <re.Match object; span=(261, 262), match='o'> <re.Match object; span=(262, 263), match='b'> <re.Match object; span=(263, 264), match='i'> <re.Match object; span=(264, 265), match='n'> <re.Match object; span=(265, 266), match='s'> <re.Match object; span=(266, 267), match='o'> <re.Match object; span=(267, 268), match='n'> <re.Match object; span=(268, 269), match='\n'> <re.Match object; span=(269, 270), match='M'> <re.Match object; span=(270, 271), match='r'> <re.Match object; span=(271, 272), match='.'> <re.Match object; span=(272, 273), match=' '> <re.Match object; span=(273, 274), match='T'> <re.Match object; span=(274, 275), match='\n'>
pattern = re.compile(r'\d\w')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(55, 57), match='12'> <re.Match object; span=(57, 59), match='34'> <re.Match object; span=(59, 61), match='56'> <re.Match object; span=(61, 63), match='78'> <re.Match object; span=(63, 65), match='90'> <re.Match object; span=(66, 68), match='12'> <re.Match object; span=(68, 70), match='3a'> <re.Match object; span=(170, 172), match='32'> <re.Match object; span=(174, 176), match='55'> <re.Match object; span=(178, 180), match='43'> <re.Match object; span=(180, 182), match='21'> <re.Match object; span=(183, 185), match='12'> <re.Match object; span=(187, 189), match='55'> <re.Match object; span=(191, 193), match='12'> <re.Match object; span=(193, 195), match='34'>
pattern = re.compile(r'\d\s')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(64, 66), match='0\n'> <re.Match object; span=(181, 183), match='1\n'> <re.Match object; span=(194, 196), match='4\n'>
# Hello HelloHello
pattern = re.compile(r'Hello')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(74, 79), match='Hello'> <re.Match object; span=(80, 85), match='Hello'> <re.Match object; span=(85, 90), match='Hello'>
pattern = re.compile(r'Hello\b')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(74, 79), match='Hello'> <re.Match object; span=(85, 90), match='Hello'>
pattern = re.compile(r'\bHello\b')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(74, 79), match='Hello'>
pattern = re.compile(r'\BHello\b')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(85, 90), match='Hello'>
pattern = re.compile(r'\b\d')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(55, 56), match='1'> <re.Match object; span=(66, 67), match='1'> <re.Match object; span=(170, 171), match='3'> <re.Match object; span=(174, 175), match='5'> <re.Match object; span=(178, 179), match='4'> <re.Match object; span=(183, 184), match='1'> <re.Match object; span=(187, 188), match='5'> <re.Match object; span=(191, 192), match='1'>
pattern = re.compile(r'^\s')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(0, 1), match='\n'>
pattern = re.compile(r'[123]\w')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(55, 57), match='12'> <re.Match object; span=(57, 59), match='34'> <re.Match object; span=(66, 68), match='12'> <re.Match object; span=(68, 70), match='3a'> <re.Match object; span=(170, 172), match='32'> <re.Match object; span=(179, 181), match='32'> <re.Match object; span=(183, 185), match='12'> <re.Match object; span=(191, 193), match='12'> <re.Match object; span=(193, 195), match='34'>
pattern = re.compile(r'[a-z][a-z]')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(1, 3), match='ab'> <re.Match object; span=(3, 5), match='cd'> <re.Match object; span=(5, 7), match='ef'> <re.Match object; span=(7, 9), match='gh'> <re.Match object; span=(9, 11), match='ij'> <re.Match object; span=(11, 13), match='kl'> <re.Match object; span=(13, 15), match='mn'> <re.Match object; span=(15, 17), match='op'> <re.Match object; span=(17, 19), match='qu'> <re.Match object; span=(19, 21), match='rt'> <re.Match object; span=(21, 23), match='uv'> <re.Match object; span=(23, 25), match='wx'> <re.Match object; span=(25, 27), match='yz'> <re.Match object; span=(69, 71), match='ab'> <re.Match object; span=(75, 77), match='el'> <re.Match object; span=(77, 79), match='lo'> <re.Match object; span=(81, 83), match='el'> <re.Match object; span=(83, 85), match='lo'> <re.Match object; span=(86, 88), match='el'> <re.Match object; span=(88, 90), match='lo'> <re.Match object; span=(93, 95), match='et'> <re.Match object; span=(97, 99), match='ha'> <re.Match object; span=(99, 101), match='ra'> <re.Match object; span=(101, 103), match='ct'> <re.Match object; span=(103, 105), match='er'> <re.Match object; span=(109, 111), match='ee'> <re.Match object; span=(113, 115), match='to'> <re.Match object; span=(116, 118), match='be'> <re.Match object; span=(119, 121), match='es'> <re.Match object; span=(121, 123), match='ca'> <re.Match object; span=(123, 125), match='pe'> <re.Match object; span=(158, 160), match='ut'> <re.Match object; span=(160, 162), match='ex'> <re.Match object; span=(162, 164), match='as'> <re.Match object; span=(165, 167), match='ed'> <re.Match object; span=(197, 199), match='da'> <re.Match object; span=(199, 201), match='ni'> <re.Match object; span=(201, 203), match='el'> <re.Match object; span=(204, 206), match='mi'> <re.Match object; span=(206, 208), match='tc'> <re.Match object; span=(208, 210), match='he'> <re.Match object; span=(210, 212), match='ll'> <re.Match object; span=(213, 215), match='ut'> <re.Match object; span=(215, 217), match='ex'> <re.Match object; span=(217, 219), match='as'> <re.Match object; span=(220, 222), match='ed'> <re.Match object; span=(230, 232), match='oh'> <re.Match object; span=(232, 234), match='ns'> <re.Match object; span=(234, 236), match='on'> <re.Match object; span=(241, 243), match='mi'> <re.Match object; span=(243, 245), match='th'> <re.Match object; span=(250, 252), match='av'> <re.Match object; span=(252, 254), match='is'> <re.Match object; span=(256, 258), match='rs'> <re.Match object; span=(261, 263), match='ob'> <re.Match object; span=(263, 265), match='in'> <re.Match object; span=(265, 267), match='so'>
pattern = re.compile(r'[a-zA-Z0-9][a-zA-z-]')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(1, 3), match='ab'> <re.Match object; span=(3, 5), match='cd'> <re.Match object; span=(5, 7), match='ef'> <re.Match object; span=(7, 9), match='gh'> <re.Match object; span=(9, 11), match='ij'> <re.Match object; span=(11, 13), match='kl'> <re.Match object; span=(13, 15), match='mn'> <re.Match object; span=(15, 17), match='op'> <re.Match object; span=(17, 19), match='qu'> <re.Match object; span=(19, 21), match='rt'> <re.Match object; span=(21, 23), match='uv'> <re.Match object; span=(23, 25), match='wx'> <re.Match object; span=(25, 27), match='yz'> <re.Match object; span=(28, 30), match='AB'> <re.Match object; span=(30, 32), match='CD'> <re.Match object; span=(32, 34), match='EF'> <re.Match object; span=(34, 36), match='GH'> <re.Match object; span=(36, 38), match='IJ'> <re.Match object; span=(38, 40), match='KL'> <re.Match object; span=(40, 42), match='MN'> <re.Match object; span=(42, 44), match='OP'> <re.Match object; span=(44, 46), match='QR'> <re.Match object; span=(46, 48), match='ST'> <re.Match object; span=(48, 50), match='UV'> <re.Match object; span=(50, 52), match='WX'> <re.Match object; span=(52, 54), match='YZ'> <re.Match object; span=(68, 70), match='3a'> <re.Match object; span=(70, 72), match='bc'> <re.Match object; span=(74, 76), match='He'> <re.Match object; span=(76, 78), match='ll'> <re.Match object; span=(80, 82), match='He'> <re.Match object; span=(82, 84), match='ll'> <re.Match object; span=(84, 86), match='oH'> <re.Match object; span=(86, 88), match='el'> <re.Match object; span=(88, 90), match='lo'> <re.Match object; span=(92, 94), match='Me'> <re.Match object; span=(94, 96), match='ta'> <re.Match object; span=(96, 98), match='Ch'> <re.Match object; span=(98, 100), match='ar'> <re.Match object; span=(100, 102), match='ac'> <re.Match object; span=(102, 104), match='te'> <re.Match object; span=(104, 106), match='rs'> <re.Match object; span=(108, 110), match='Ne'> <re.Match object; span=(110, 112), match='ed'> <re.Match object; span=(113, 115), match='to'> <re.Match object; span=(116, 118), match='be'> <re.Match object; span=(119, 121), match='es'> <re.Match object; span=(121, 123), match='ca'> <re.Match object; span=(123, 125), match='pe'> <re.Match object; span=(158, 160), match='ut'> <re.Match object; span=(160, 162), match='ex'> <re.Match object; span=(162, 164), match='as'> <re.Match object; span=(165, 167), match='ed'> <re.Match object; span=(172, 174), match='1-'> <re.Match object; span=(176, 178), match='5-'> <re.Match object; span=(197, 199), match='da'> <re.Match object; span=(199, 201), match='ni'> <re.Match object; span=(201, 203), match='el'> <re.Match object; span=(204, 206), match='mi'> <re.Match object; span=(206, 208), match='tc'> <re.Match object; span=(208, 210), match='he'> <re.Match object; span=(210, 212), match='ll'> <re.Match object; span=(213, 215), match='ut'> <re.Match object; span=(215, 217), match='ex'> <re.Match object; span=(217, 219), match='as'> <re.Match object; span=(220, 222), match='ed'> <re.Match object; span=(225, 227), match='Mr'> <re.Match object; span=(229, 231), match='Jo'> <re.Match object; span=(231, 233), match='hn'> <re.Match object; span=(233, 235), match='so'> <re.Match object; span=(237, 239), match='Mr'> <re.Match object; span=(240, 242), match='Sm'> <re.Match object; span=(242, 244), match='it'> <re.Match object; span=(246, 248), match='Ms'> <re.Match object; span=(249, 251), match='Da'> <re.Match object; span=(251, 253), match='vi'> <re.Match object; span=(255, 257), match='Mr'> <re.Match object; span=(260, 262), match='Ro'> <re.Match object; span=(262, 264), match='bi'> <re.Match object; span=(264, 266), match='ns'> <re.Match object; span=(266, 268), match='on'> <re.Match object; span=(269, 271), match='Mr'>
pattern = re.compile(r'[a-zA-Z][^a-zA-z]')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(26, 28), match='z\n'> <re.Match object; span=(53, 55), match='Z\n'> <re.Match object; span=(71, 73), match='c\n'> <re.Match object; span=(78, 80), match='o '> <re.Match object; span=(89, 91), match='o\n'> <re.Match object; span=(105, 107), match='s '> <re.Match object; span=(111, 113), match='d '> <re.Match object; span=(114, 116), match='o '> <re.Match object; span=(117, 119), match='e '> <re.Match object; span=(125, 127), match='d)'> <re.Match object; span=(163, 165), match='s.'> <re.Match object; span=(167, 169), match='u\n'> <re.Match object; span=(202, 204), match='l-'> <re.Match object; span=(211, 213), match='l@'> <re.Match object; span=(218, 220), match='s.'> <re.Match object; span=(222, 224), match='u\n'> <re.Match object; span=(226, 228), match='r.'> <re.Match object; span=(235, 237), match='n\n'> <re.Match object; span=(238, 240), match='r '> <re.Match object; span=(244, 246), match='h\n'> <re.Match object; span=(247, 249), match='s '> <re.Match object; span=(253, 255), match='s\n'> <re.Match object; span=(257, 259), match='s.'> <re.Match object; span=(267, 269), match='n\n'> <re.Match object; span=(270, 272), match='r.'> <re.Match object; span=(273, 275), match='T\n'>
pattern = re.compile(r'(abc|edu|texas)\b')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(69, 72), match='abc'> <re.Match object; span=(159, 164), match='texas'> <re.Match object; span=(165, 168), match='edu'> <re.Match object; span=(214, 219), match='texas'> <re.Match object; span=(220, 223), match='edu'>
pattern = re.compile(r'([A-Z]|llo)[a-zA-z]')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(28, 30), match='AB'> <re.Match object; span=(30, 32), match='CD'> <re.Match object; span=(32, 34), match='EF'> <re.Match object; span=(34, 36), match='GH'> <re.Match object; span=(36, 38), match='IJ'> <re.Match object; span=(38, 40), match='KL'> <re.Match object; span=(40, 42), match='MN'> <re.Match object; span=(42, 44), match='OP'> <re.Match object; span=(44, 46), match='QR'> <re.Match object; span=(46, 48), match='ST'> <re.Match object; span=(48, 50), match='UV'> <re.Match object; span=(50, 52), match='WX'> <re.Match object; span=(52, 54), match='YZ'> <re.Match object; span=(74, 76), match='He'> <re.Match object; span=(80, 82), match='He'> <re.Match object; span=(82, 86), match='lloH'> <re.Match object; span=(92, 94), match='Me'> <re.Match object; span=(96, 98), match='Ch'> <re.Match object; span=(108, 110), match='Ne'> <re.Match object; span=(225, 227), match='Mr'> <re.Match object; span=(229, 231), match='Jo'> <re.Match object; span=(237, 239), match='Mr'> <re.Match object; span=(240, 242), match='Sm'> <re.Match object; span=(246, 248), match='Ms'> <re.Match object; span=(249, 251), match='Da'> <re.Match object; span=(255, 257), match='Mr'> <re.Match object; span=(260, 262), match='Ro'> <re.Match object; span=(269, 271), match='Mr'>
pattern = re.compile(r'Mr\.?\s[A-Z]')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(225, 230), match='Mr. J'> <re.Match object; span=(237, 241), match='Mr S'> <re.Match object; span=(269, 274), match='Mr. T'>
pattern = re.compile(r'Mr\.?\s[A-Z][a-z]*')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(225, 236), match='Mr. Johnson'> <re.Match object; span=(237, 245), match='Mr Smith'> <re.Match object; span=(269, 274), match='Mr. T'>
pattern = re.compile(r'M(s|rs)\.?\s[A-Z][a-z]*') #find either Ms or Mrs
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(246, 254), match='Ms Davis'> <re.Match object; span=(255, 268), match='Mrs. Robinson'>
pattern = re.compile(r'\d{3}[.-]\d{3}[.-]\d{4}') # find a phone number
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(170, 182), match='321-555-4321'> <re.Match object; span=(183, 195), match='123.555.1234'>
pattern = re.compile(r'[a-zA-Z0-9_]+\.[a-z]{3}') # find website
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(158, 168), match='utexas.edu'> <re.Match object; span=(213, 223), match='utexas.edu'>
pattern = re.compile(r'[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+') # Match email address
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat)
<re.Match object; span=(197, 223), match='daniel-mitchell@utexas.edu'>
pattern = re.compile(r'[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]{2,4}')
matches = pattern.finditer(text_to_search)
for mat in matches:
print(mat.span(0))
print(mat.group(0))
print(text_to_search[mat.span(0)[0]:mat.span(0)[1]])
(197, 223) daniel-mitchell@utexas.edu daniel-mitchell@utexas.edu
urls = r'''
https://www.google.com
http://yahoo.com
https://www.whitehouse.gov
https://craigslist.org
'''
pattern = re.compile(r'https?://(www\.)?\w+\.\w+')
matches = pattern.finditer(urls)
for mat in matches:
print(mat)
<re.Match object; span=(1, 23), match='https://www.google.com'> <re.Match object; span=(24, 40), match='http://yahoo.com'> <re.Match object; span=(41, 67), match='https://www.whitehouse.gov'> <re.Match object; span=(68, 90), match='https://craigslist.org'>
pattern = re.compile(r'https?://(www\.)?(\w+)(\.\w+)')
matches = pattern.finditer(urls)
for mat in matches:
print(mat.group(2)+mat.group(3))
google.com yahoo.com whitehouse.gov craigslist.org
pattern = re.compile(r'https?://(www\.)?(\w+)(\.\w+)')
matches = pattern.finditer(urls)
for mat in matches:
print(mat.group(0))
print(urls[mat.span(2)[0]:mat.span(2)[1]]+urls[mat.span(3)[0]:mat.span(3)[1]])
https://www.google.com google.com http://yahoo.com yahoo.com https://www.whitehouse.gov whitehouse.gov https://craigslist.org craigslist.org