Python Regex Basics

Common regular expression operations in Python using the re module, with examples and outputs.

  1. Basic matching and the re module
  2. Common regex patterns for digits, words, and email addresses
  3. Quantifiers and anchors
  4. Groups and capturing (including named groups)
  5. Substitution with regex
  6. Splitting strings using regex
  7. Regex flags for different matching behaviors
  8. Non-capturing groups

1. Importing the re Module and Basic Matching

import re

# Basic matching
pattern = r"python"
text = "I love Python programming"
match = re.search(pattern, text, re.IGNORECASE)
if match:
    print(f"Found: {match.group()}")
    print("Not found")

# Output: Found: Python

2. Common Regex Patterns

# Matching digits
digit_pattern = r"\d+"
text = "There are 123 apples and 456 oranges"
matches = re.findall(digit_pattern, text)
print(f"Digits found: {matches}")
# Output: Digits found: ['123', '456']

# Matching words
word_pattern = r"\b\w+\b"
text = "Hello, World! How are you?"
words = re.findall(word_pattern, text)
print(f"Words found: {words}")
# Output: Words found: ['Hello', 'World', 'How', 'are', 'you']

# Matching email addresses (simple pattern)
email_pattern = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"
text = "Contact us at info@example.com or support@company.co.uk"
emails = re.findall(email_pattern, text)
print(f"Emails found: {emails}")
# Output: Emails found: ['info@example.com', 'support@company.co.uk']

3. Regex Quantifiers and Anchors

# Quantifiers
pattern = r"a{2,4}"  # Match 'a' 2 to 4 times
text = "a aa aaa aaaa aaaaa"
matches = re.findall(pattern, text)
print(f"Matches: {matches}")
# Output: Matches: ['aa', 'aaa', 'aaaa']

# Anchors
start_pattern = r"^Python"
end_pattern = r"programming$"
text1 = "Python is awesome"
text2 = "I love programming"
print(f"Starts with Python: {bool(re.match(start_pattern, text1))}")
print(f"Ends with programming: {bool(re.search(end_pattern, text2))}")
# Output:
# Starts with Python: True
# Ends with programming: True

4. Groups and Capturing

# Basic grouping
pattern = r"(\d{2})-(\d{2})-(\d{4})"
text = "Date: 04-07-2024"
match = re.search(pattern, text)
if match:
    print(f"Day: {match.group(1)}, Month: {match.group(2)}, Year: {match.group(3)}")
# Output: Day: 04, Month: 07, Year: 2024

# Named groups
pattern = r"(?P<day>\d{2})-(?P<month>\d{2})-(?P<year>\d{4})"
match = re.search(pattern, text)
if match:
    print(f"Day: {match.group('day')}, Month: {match.group('month')}, Year: {match.group('year')}")
# Output: Day: 04, Month: 07, Year: 2024

5. Substitution

# Basic substitution
text = "I love apples, but apples are expensive"
new_text = re.sub(r"apples", "oranges", text)
print(f"After substitution: {new_text}")
# Output: After substitution: I love oranges, but oranges are expensive

# Substitution with backreferences
text = "John Doe, Jane Doe"
new_text = re.sub(r"(\w+) (\w+)", r"\2, \1", text)
print(f"After substitution: {new_text}")
# Output: After substitution: Doe, John, Doe, Jane

6. Splitting with Regex

# Splitting with regex
text = "apple,banana;orange:grape"
fruits = re.split(r"[,;:]", text)
print(f"Fruits: {fruits}")
# Output: Fruits: ['apple', 'banana', 'orange', 'grape']

7. Regex Flags

# Case-insensitive matching
pattern = r"python"
text = "I love PYTHON, Python is great"
matches = re.findall(pattern, text, re.IGNORECASE)
print(f"Matches: {matches}")
# Output: Matches: ['PYTHON', 'Python']

# Multiline matching
text = """Start
matches = re.findall(r"^Python", text, re.MULTILINE)
print(f"Matches: {matches}")
# Output: Matches: ['Python']

8. Non-Capturing Groups

# Non-capturing group
pattern = r"(?:https?://)?(?:www\.)?([a-zA-Z0-9-]+\.[a-zA-Z]{2,})"
urls = [
for url in urls:
    match = re.search(pattern, url)
    if match:
        print(f"Domain: {match.group(1)}")
# Output:
# Domain: example.com
# Domain: example.com
# Domain: another-example.co.uk