Common regular expression operations in Python using the re
module, with examples and outputs.
- Basic matching and the
re
module - Common regex patterns for digits, words, and email addresses
- Quantifiers and anchors
- Groups and capturing (including named groups)
- Substitution with regex
- Splitting strings using regex
- Regex flags for different matching behaviors
- Non-capturing groups
1. Importing the re Module and Basic Matching
import re
# Basic matching
pattern = r"python"
text = "I love Python programming"
match = re.search(pattern, text, re.IGNORECASE)
if match:
print(f"Found: {match.group()}")
else:
print("Not found")
# Output: Found: Python
2. Common Regex Patterns
# Matching digits
digit_pattern = r"\d+"
text = "There are 123 apples and 456 oranges"
matches = re.findall(digit_pattern, text)
print(f"Digits found: {matches}")
# Output: Digits found: ['123', '456']
# Matching words
word_pattern = r"\b\w+\b"
text = "Hello, World! How are you?"
words = re.findall(word_pattern, text)
print(f"Words found: {words}")
# Output: Words found: ['Hello', 'World', 'How', 'are', 'you']
# Matching email addresses (simple pattern)
email_pattern = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"
text = "Contact us at info@example.com or support@company.co.uk"
emails = re.findall(email_pattern, text)
print(f"Emails found: {emails}")
# Output: Emails found: ['info@example.com', 'support@company.co.uk']
3. Regex Quantifiers and Anchors
# Quantifiers
pattern = r"a{2,4}" # Match 'a' 2 to 4 times
text = "a aa aaa aaaa aaaaa"
matches = re.findall(pattern, text)
print(f"Matches: {matches}")
# Output: Matches: ['aa', 'aaa', 'aaaa']
# Anchors
start_pattern = r"^Python"
end_pattern = r"programming$"
text1 = "Python is awesome"
text2 = "I love programming"
print(f"Starts with Python: {bool(re.match(start_pattern, text1))}")
print(f"Ends with programming: {bool(re.search(end_pattern, text2))}")
# Output:
# Starts with Python: True
# Ends with programming: True
4. Groups and Capturing
# Basic grouping
pattern = r"(\d{2})-(\d{2})-(\d{4})"
text = "Date: 04-07-2024"
match = re.search(pattern, text)
if match:
print(f"Day: {match.group(1)}, Month: {match.group(2)}, Year: {match.group(3)}")
# Output: Day: 04, Month: 07, Year: 2024
# Named groups
pattern = r"(?P<day>\d{2})-(?P<month>\d{2})-(?P<year>\d{4})"
match = re.search(pattern, text)
if match:
print(f"Day: {match.group('day')}, Month: {match.group('month')}, Year: {match.group('year')}")
# Output: Day: 04, Month: 07, Year: 2024
5. Substitution
# Basic substitution
text = "I love apples, but apples are expensive"
new_text = re.sub(r"apples", "oranges", text)
print(f"After substitution: {new_text}")
# Output: After substitution: I love oranges, but oranges are expensive
# Substitution with backreferences
text = "John Doe, Jane Doe"
new_text = re.sub(r"(\w+) (\w+)", r"\2, \1", text)
print(f"After substitution: {new_text}")
# Output: After substitution: Doe, John, Doe, Jane
6. Splitting with Regex
# Splitting with regex
text = "apple,banana;orange:grape"
fruits = re.split(r"[,;:]", text)
print(f"Fruits: {fruits}")
# Output: Fruits: ['apple', 'banana', 'orange', 'grape']
7. Regex Flags
# Case-insensitive matching
pattern = r"python"
text = "I love PYTHON, Python is great"
matches = re.findall(pattern, text, re.IGNORECASE)
print(f"Matches: {matches}")
# Output: Matches: ['PYTHON', 'Python']
# Multiline matching
text = """Start
Python
End"""
matches = re.findall(r"^Python", text, re.MULTILINE)
print(f"Matches: {matches}")
# Output: Matches: ['Python']
8. Non-Capturing Groups
# Non-capturing group
pattern = r"(?:https?://)?(?:www\.)?([a-zA-Z0-9-]+\.[a-zA-Z]{2,})"
urls = [
"https://www.example.com",
"http://subdomain.example.com",
"www.another-example.co.uk"
]
for url in urls:
match = re.search(pattern, url)
if match:
print(f"Domain: {match.group(1)}")
# Output:
# Domain: example.com
# Domain: example.com
# Domain: another-example.co.uk