Untitled Document
Regular Expression
1. Simple example of Regular Expression in Python Click Here
       #********* Source Code From Website - Mangadaku - visit us at -http://mangadaku.com/ *****      
       # Simple example of Regular Expression in Python 
       
       # To use Regular Expression - import re module in Python
       # Raw String - Its a string which python reads and interpret as it is 
       # here is the simple example to understand difference between raw string and regualr(Noraml) string

       print('\tTab')
       print(r'\tTab')
       import re
       text_to_search = """ abcdefghijklmnopqrstuvwxyz  
       ABCDEFGHIJKLMNOPQRSTUVWXYZ 
       1234567890 """

       pattern = re.compile(r'abc')
       matches = pattern.finditer(text_to_search)
       for match in matches:
       	print(match)
       

       
2. Program - Working with Metacharacters Click Here
       #********* Source Code From Website - Mangadaku - visit us at -http://mangadaku.com/ *****      
       # Program - Working with Metacharacters
       # We have metacharacters which are used in RE . Since these metacharacters has special meaning 
       # if you want to explicitly match them ; then we need to escape it .
       
       # EX -1 in this example we are mtaching .(dot) explicitly
       import re
       text_to_search = """ \ { } * . ^ | ( ) + ?  """
       pattern = re.compile(r'\.')
       matches = pattern.finditer(text_to_search)
       for match in matches:
       print(match)
       
# EX - 2 One More Example of Similar Pattern - this is to match mangadaku.com name from URL string import re text_to_search = "www.mangadaku.com" pattern = re.compile(r'mangadaku\.com') matches = pattern.finditer(text_to_search) for match in matches: print(match)
3. Program - Introductions to Special characters Click Here
       #********* Source Code From Website - Mangadaku - visit us at -http://mangadaku.com/ *****      
       # Program - Introductions to Special characters
       # Here we will see the use of special characters its just one line intro for all the the special characters 
       
       # 1. . ---> Here (dot) matches any character Except New line 
       # So when you run below example it will print all the characters from the text_to_search string
       
       import re
       text_to_search = """ hhtp:www.mangadaku.com
             123.456.789
             Hellouser 
             welcome to * Python """
       pattern = re.compile(r'.')
       matches = pattern.finditer(text_to_search)
       for match in matches:
       	print(match)
       
       # 2. \d ---> Digit (0-9) will match digit present in the string or line
       import re
       text_to_search = """ hhtp:www.mangadaku.com
             123.456.789
             Hellouser 
             welcome to * Python """
        pattern = re.compile(r'\d')
        matches = pattern.finditer(text_to_search)
        for match in matches:
        	print(match)
        
        # 3. \D ---> Not a Digit (0-9) will match everything except digit present in the string or line or word
        
        import re
        text_to_search = """ hhtp:www.mangadaku.com
              123.456.789
              Hellouser 
              welcome to * Python """
        pattern = re.compile(r'\D')
        matches = pattern.finditer(text_to_search)
        
        for match in matches:
        	print(match)
        
        # 4 . \w Word Character lower case a-z , upper case A-Z , digits 0-9 and underscore _ (a-z, A-Z, 0-9, _)
        # IMP - when you run below program ; you will not se - ( Hyphen ) , ( comma) .(dot) * ( aestricks) are being matched 
        
        import re
        text_to_search = """ hhtp:www.mangadaku.com
             123.456.789      		
             Hellouser _ - -
             welcome to * Python """
         pattern = re.compile(r'\w')
         matches = pattern.finditer(text_to_search)
         for match in matches:
         		print(match)
         
         # 5 . \W Not a Word Character 
         # IMP - when you run below program ; you will not se - case a-z , upper case A-Z , digits 0-9 and underscore _ (a-z, A-Z, 0-9, _) are           being matched 
         
         import re
         text_to_search = """ hhtp:www.mangadaku.com
         123.456.789      		
         Hellouser _ - -
         welcome to * Python """
         pattern = re.compile(r'\W')
         matches = pattern.finditer(text_to_search)
         for match in matches:
         		print(match)
         
         # 6 . \s will match only whitespace , tab and newline character 
         
         import re
         text_to_search = """ hhtp:www.mangadaku.com
         123.456.789      		
         Hellouser _ - -
         welcome to * Python """
         
         pattern = re.compile(r'\s')
         matches = pattern.finditer(text_to_search)
         
         for match in matches:
         		print(match)
         
         # 7 . \s will match everything except whitespace , tab and newline character 
         # i.e NOT Whitespace , Space and Tab
         
         import re
         text_to_search = """ hhtp:www.mangadaku.com
         123.456.789      		
         Hellouser _ - -
         welcome to * Python """
         
         pattern = re.compile(r'\S')
         matches = pattern.finditer(text_to_search)
         for match in matches:
         		print(match)
         
         # EX - 8 \b - Word Boundary - this will match word at the begining of the line and right after the space or tab 
         # In below example - we get two matches oh Ha - which is the first Ha and one right after the space ; The last one is ignored 
         # since it is is in middle of word .
         
         import re
         text_to_search="Ha HaHa "
         
         pattern = re.compile(r'\bHa')
         matches = pattern.finditer(text_to_search)
         for match in matches:
         		print(match)
         
         # EX - 9 /B - will work exactly opposite of it .
         
         import re
         text_to_search="Ha HaHa "
         
         pattern = re.compile(r'\BHa')
         matches = pattern.finditer(text_to_search)
         for match in matches:
         		print(match)
         
         # EX - 10 ^ - will match a position at the begining of the String
         
         import re
         sentence = 'Start a Sentence and then bring it to an end'
         
         pattern = re.compile(r'^S')
         matches = pattern.finditer(sentence)
         
         for match in matches:
         		print(match)
         
         # EX - 11 $ - will match a position at the end of the String
         
         import re
         sentence = 'Start a Sentence and then bring it to an end'
         
         pattern = re.compile(r'end$')
         matches = pattern.finditer(sentence)
         for match in matches:
         		print(match)
         


       
4. Some more practical examples Click Here
       #********* Source Code From Website - Mangadaku - visit us at -http://mangadaku.com/ *****      
       # Some more practical examples  
       # we have following string where we need to match the phone umbers only 
       
       import re
       text_to_search ="""ABCJSJKDLJKDFHJKH
            121121
            ascssfsffgwg
            123.456.7890
            999.666.1234
            888.777.0123
            """
       pattern = re.compile(r'\d\d\d.\d\d\d.\d\d\d\d')
       matches = pattern.finditer(text_to_search)
       for match in matches:
       	print(match)
       
5. examples with Character Class [ ] Click Here
       #********* Source Code From Website - Mangadaku - visit us at -http://mangadaku.com/ *****      
       # examples with Character Class [ ]
       # we have following string where we need to match the phone umbers only but having expilict match having - and * 
       # Not all the phone numbers shoudl be dispplayed 
       # IMP - here we have used character class which matches only 1 occurence of a character 
       
       import re
       text_to_search ="""ABCJSJKDLJKDFHJKH
       121121
       ascssfsffgwg
       123.456.7890
       999.666.1234
       888.777.0123
       123-777-9989
       456-789-3321
       567*124*6555
       """            
       pattern = re.compile(r'\d\d\d[*-]\d\d\d[*-]\d\d\d\d')
       matches = pattern.finditer(text_to_search)
       for match in matches:
       	print(match)
       
       #printing string in upper case
              
       print(my_str.upper())
       
       
6. Some More Practical examples with Character Class [ ] Click Here
       #********* Source Code From Website - Mangadaku - visit us at -http://mangadaku.com/ *****      
       # Some More Practical examples with Character Class [ ]
       # here we will match the Phone Numbers begining with 800 and 900
       
       import re
       pattern = re.compile(r'[89]00[-.]\d\d\d[-.]\d\d\d')            
       text_to_search = """ABCJSJKDLJKDFHJKH
       121121
       ascssfsffgwg
       123.456.7890
       800.666.1234
       888.777.0123
       123-777-9989
       456-789-3321
       567*124*6555
       900-123-987
       """
       matches = pattern.finditer(text_to_search)
       for match in matches:
       	print(match)
 
       
7. Negation in Character class Click Here
       #********* Source Code From Website - Mangadaku - visit us at -http://mangadaku.com/ *****      
       # Negation in Character class
       # In following string we don't want to get bat as string in o/p ; How to exclude it 
       
       import re
       text_to_search ="""
             cat
             mat
             pat
             rat
             bat
             121233
             11234234at
             afsdfdfat
           """
       pattern = re.compile(r'[^b]at')          
       matches = pattern.finditer(text_to_search)
       for match in matches:
       	print(match)
 
       
8. Character Class with Quantifiers Click Here
       #********* Source Code From Website - Mangadaku - visit us at -http://mangadaku.com/ *****      
       # Character Class with Quantifiers
       # List of Quantifiers :

       # * - 0 Or More
       # + - 1 Or More
       # ? - 0 Or One 
       # {3} - Exact Number
       # {3, 4} - Range Of Numbers(Minimum, Maximum)
       
       # EX - 1 Lets try Quantifiers for EX - 4 of character class where we are finding phone numbers 
       # Since we know that our phone numbers are of fixed length with separator hence we can use exact numbers
       
       import re
       text_to_search ="""ABCJSJKDLJKDFHJKH
       121121
       ascssfsffgwg
       123.456.7890
       999.666.1234
       888.777.0123
       123-777-9989
       456-789-3321
       567*124*6555
       """
       pattern = re.compile(r'\d{3}.\d{3}.\d{4}')
       matches = pattern.finditer(text_to_search)  
       for match in matches:          
       	print(match)
       
       # EX - 2 Some More tricky Example with character class 
       
       import re
       text_to_search = """ Mr. Schafer
       Mr Smith
       Mr Davis
       Mrs. Robinson 
       Mr. T """>
       pattern = re.compile(r'Mr\.?\s[A-Z]\w*')
       matches = pattern.finditer(text_to_search)
       for match in matches:
       	print(match)
 
       
9. Some More tricky Example with character class using Group Click Here
       #********* Source Code From Website - Mangadaku - visit us at -http://mangadaku.com/ *****      
       # Some More tricky Example with character class using Group
              
       import re 
       text_to_search = """ Mr. Schafer
        Mr Smith
        Mr Davis
        Mrs. Robinson 
        Mr. T ""
       pattern = re.compile(r'(Mr|Ms|Mrs)\.?\s[A-Z]\w*')            
       #OR  
       pattern = re.compile(r'M(r|s|rs)\.?\s[A-Z]\w*')
       matches = pattern.finditer(text_to_search)
       for match in matches:
       	print(match)
 
       
10. Example to match different format of email id using regular expression Click Here
       #********* Source Code From Website - Mangadaku - visit us at -http://mangadaku.com/ *****      
       # Example to match different format of email id using regular expression
       
       import re
       emails = """
       MangeshPande@gmail.com
       mangesh.pande@mangadaku.edu
       mangesh-1987-pande@my-work.net """
       pattern = re.compile(r'[a-zA-Z0-9.-]+@[a-zA-Z-]+\.(com|edu|net)')
       matches = pattern.finditer(emails)
       for match in matches:
       	print(match)         
       
       
11. Groupping example in Regular Expression Click Here
       #********* Source Code From Website - Mangadaku - visit us at -http://mangadaku.com/ *****      
       # Groupping example in Regular Expression 
         
       import re
       urls = """
        https://www.google.com
        http://yahoo.com
        https://youtube.com
        https://www.mygov.in
        """
       pattern = re.compile(r'https?://(www\.)?(\w+)(\.\w+)')
       matches = pattern.finditer(urls)
       for match in matches:
       	print(match.group(3))
	            print(match.group(1))
	            print(match.group(2))
	            print(match.group(0))
         
       
       # Creating new string using the group indexing 
       # we can use sub method 
       
       subbed_urls = pattern.sub(r'\2\3', urls)
       print(subbed_urls)         
       
       
12. Searching pattern without considering the case Click Here
       #********* Source Code From Website - Mangadaku - visit us at -http://mangadaku.com/ *****      
       # Searching pattern without considering the case 
         
       sentence = 'Start a sentence and then bring it to an end'
       pattern = re.compile(r'start', re.IGNORECASE)
       # Or Short Hand
       pattern = re.compile(r'start', re.I)
       matches = pattern.search(sentence)
       print(matches)

       
Untitled Document