⏳
Loading cheatsheet...
Character Classes, Quantifiers, Anchors, Groups & Capture, Lookaround, Flags, Common Patterns, Language Examples — regular expression mastery.
// ── Predefined Character Classes ──
// \d — digit [0-9]
// \D — NOT digit [^0-9]
// \w — word char [a-zA-Z0-9_]
// \W — NOT word char [^a-zA-Z0-9_]
// \s — whitespace [ \t\n\r\f\v]
// \S — NOT whitespace [^ \t\n\r\f\v]
// . — any char EXCEPT newline
const regex1 = /\d{3}-\d{4}/; // phone format: 555-1234
const regex2 = /\w+@\w+\.\w+/; // simple email
const regex3 = /\s+/; // one or more spaces
// ── Custom Character Classes ──
// [abc] — a, b, or c
// [a-z] — lowercase a through z
// [A-Z] — uppercase A through Z
// [0-9] — same as \d
// [a-zA-Z0-9] — same as \w (no underscore difference)
// [^abc] — NOT a, b, or c
// [^0-9] — NOT a digit (same as \D)
const hex = /[0-9a-fA-F]+/; // hex color
const vowel = /[aeiou]/i; // case-insensitive vowels
const notDigit = /[^0-9]/; // any non-digit
const consonant = /[bcdfghjklmnpqrstvwxyz]/i;
// ── POSIX Character Classes (in character sets) ──
// [:alpha:] — letters
// [:digit:] — digits
// [:alnum:] — alphanumeric
// [:space:] — whitespace
// [:upper:] — uppercase
// [:lower:] — lowercase
// [:punct:] — punctuation
const posix = /[[:upper:]]+/; // one or more uppercase| Pattern | Matches | Equivalent |
|---|---|---|
| \d | Any digit | [0-9] |
| \D | Not a digit | [^0-9] |
| \w | Word character | [a-zA-Z0-9_] |
| \W | Not word char | [^a-zA-Z0-9_] |
| \s | Whitespace | [ \t\r\n\f\v] |
| \S | Not whitespace | [^ \t\r\n\f\v] |
| . | Any char (no newline) | [^\n] |
| \n | Newline | (LF) |
| \r | Carriage return | (CR) |
| \t | Tab | (HT) |
| Pattern | Matches |
|---|---|
| \p{L} | Any letter (Unicode) |
| \p{N} | Any number |
| \p{P} | Any punctuation |
| \p{Sc} | Currency symbols |
| \p{Emoji} | Emoji characters |
| \p{Script=Latin} | Latin script chars |
| \p{Script=Han} | Chinese characters |
| \p{Script=Cyrillic} | Cyrillic script |
// ── Unicode Property Escapes (ES2018+, requires /u flag) ──
const emoji = /\p{Emoji}/u;
const currency = /\p{Sc}+\s*\d+/u; // $100, €50, ¥2000
const chinese = /\p{Script=Han}+/u;
const letter = /[\p{L}\p{M}]+/u; // any letter with marks
// Match accented characters
const accented = /\p{L}+/u; // matches café, naïve, el niñou flag when working with Unicode. Without it, character classes like \\w only match ASCII characters. With /u, \\w matches all Unicode word characters including accented letters.// ── Greedy Quantifiers (match as much as possible) ──
// * — 0 or more
// + — 1 or more
// ? — 0 or 1 (optional)
// {n} — exactly n times
// {n,} — n or more
// {n,m} — between n and m
const greedy = /a+/; // matches "aaa" in "baaab"
const exact = /\d{4}/; // exactly 4 digits (year)
const range = /\d{2,4}/; // 2 to 4 digits
const minLen = /\w{8,}/; // 8+ word characters (password)
// ── Lazy Quantifiers (match as little as possible) ──
// *? — 0 or more (lazy)
// +? — 1 or more (lazy)
// ?? — 0 or 1 (lazy)
// {n,m}? — between n and m (lazy)
const lazy = /a+?/; // matches "a" in "baaab" (just one)
const html = /<div>.*?<\/div>/s; // non-greedy HTML match
// ── Real-world examples ──
const phone = /\+?1?\d{10}/; // US phone (optional +1)
const ssn = /\d{3}-\d{2}-\d{4}/; // SSN: 123-45-6789
const ipv4 = /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/;
const date = /\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])/;
const time = /([01]\d|2[0-3]):[0-5]\d(:[0-5]\d)?/; // HH:MM:SS| Pattern | Input | Match |
|---|---|---|
| a.*b | aXbYb | aXbYb (greedy) |
| a.*?b | aXbYb | aXb (lazy) |
| a.+b | aXbYb | aXbYb |
| a.+?b | aXbYb | aXb |
| \d{2,4} | 12345 | 1234 |
| \d{2,4}? | 12345 | 12 |
| Issue | Solution |
|---|---|
| Greedy over-match | Use lazy: .*? |
| Catastrophic backtracking | Use atomic groups or possessive |
| Quantifier on nothing | Avoid (a+)+ patterns |
| Empty matches | Use *? or prevent with + |
| Leading zero issues | Use (?:0|[1-9]\d*) |
// ── Possessive Quantifiers (no backtracking) ──
// *+ — 0 or more (possessive)
// ++ — 1 or more (possessive)
// ?+ — 0 or 1 (possessive)
// Note: Not supported in JavaScript! Use atomic groups instead.
// Atomic Group workaround in JS (lookahead trick)
const noBacktrack = /^(?>a+):b/; // Not natively supported in JS
// Alternative: use negated char class
const better = /^a+:b/; // this works: a+ cannot match ":b"(a+)+ or (\\d+)+s can cause exponential time complexity on non-matching input. Use negated character classes or atomic groups to prevent this.// ── Anchors ──
// ^ — Start of string (or line with /m)
// $ — End of string (or line with /m)
// \b — Word boundary
// \B — NOT word boundary
// \A — Start of string only (with /u)
// \z — End of string only (with /u)
// ── String Anchors ──
const startsWithHello = /^Hello/;
const endsWithWorld = /World$/;
const exactMatch = /^Hello, World!$/;
const emptyString = /^$/;
// ── Word Boundaries ──
// \b matches between \w and \W (or start/end of string)
const wholeWord = /\bcat\b/; // matches "cat" but not "catalog"
const notWord = /\Bcat/; // matches "catalog" but not "cat"
const endsWith = /ing\b/; // "running" but not "ingenious"
// ── Line Anchors (multiline mode) ──
const multiline = /^line/m; // each line starting with "line"
const endOfLine = /end$/m; // each line ending with "end"
// ── Lookaround as pseudo-anchors ──
const before = /(?<=\$)\d+/; // digits after $
const after = /\d+(?=px)/; // digits before "px"
// ── Real-world anchors ──
const validUsername = /^[a-zA-Z0-9_]{3,20}$/;
const startsEndsDigit = /^\d.*\d$/;
const noLeadingSpace = /^[^\s]/;
const jsonLine = /^\s*["\[{]/; // starts with JSON structure| Anchor | Matches At | With /m |
|---|---|---|
| ^ | Start of string | Start of each line |
| $ | End of string | End of each line |
| \b | Word boundary | Same |
| \B | Not word boundary | Same |
| \A | Start of string | Always (no /m change) |
| \z | End of string | Always (no /m change) |
| Regex | Matches | Does NOT Match |
|---|---|---|
| /\bcat\b/ | "cat" | "catalog", "bobcat" |
| /\bcat/ | "cat", "catalog" | "bobcat" |
| /cat\b/ | "cat", "bobcat" | "catalog" |
| /\Bcat/ | "catalog" | "cat", "Bobcat" |
| /cat\B/ | "catalog" | "cat", "bobcat" |
^ and $ anchors for input validation to ensure the entire string matches. Without them, /\d{4}/ matches "1234" inside "abc1234def" — use /^\d{4}$/ to match exactly 4 digits.// ── Capturing Groups ──
// (...) — capturing group
// (?:...) — non-capturing group
// (?<name>...)— named capture group
// ── Named Capture Groups ──
const dateRegex = /(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/;
const match = dateRegex.exec('2024-01-15');
console.log(match.groups.year); // "2024"
console.log(match.groups.month); // "01"
console.log(match.groups.day); // "15"
// Named groups with replace
const formatted = '2024-01-15'.replace(
/(?<y>\d{4})-(?<m>\d{2})-(?<d>\d{2})/,
'$<m>/$<d>/$<y>' // "01/15/2024"
);
// ── Non-Capturing Groups ──
const optionalProtocol = /(?:https?:\/\/)?www\.\w+\.\w+/;
// The (?:...) doesn't create a capture group
// ── Numbered groups ──
const phone = /(\d{3})(\d{3})(\d{4})/;
const m = '5551234567'.match(phone);
console.log(m[1]); // "555"
console.log(m[2]); // "123"
console.log(m[3]); // "4567"
// ── Backreferences ──
const doubleWord = /\b(\w+)\s+\1\b/; // "the the" but not "the cat"
const htmlTag = /<([a-z]+)>.*?<\/\1>/i; // <div>...</div>
const repeated = /(\d{2})-\1-\1/; // "12-12-12"
// ── Replacement patterns ──
// $1, $2, ... — numbered groups
// $<name> — named groups
// $& — entire match
// $` — text before match
// $' — text after match
// $$ — literal $
const swap = 'Smith, John'.replace(/(\w+), (\w+)/, '$2 $1');
// "John Smith"
const obfuscate = 'user@example.com'.replace(/(\w+)@(\w+)/, '$1***@$2');
// "user***@example"| Syntax | Type | Captures? |
|---|---|---|
| (abc) | Capturing | Yes |
| (?:abc) | Non-capturing | No |
| (?<name>abc) | Named capture | Yes (named) |
| (abc|def) | Alternation | Yes |
| (?:abc|def) | Alt (non-capt) | No |
| (?=abc) | Positive lookahead | No |
| (?!abc) | Negative lookahead | No |
| (?<=abc) | Positive lookbehind | No |
| (?<!abc) | Negative lookbehind | No |
| Method | Returns | Behavior |
|---|---|---|
| regex.exec(str) | Match object | null | With groups, first match |
| str.match(regex) | Array | null | Without /g: first match |
| str.matchAll(regex) | Iterator | Requires /g flag |
| str.search(regex) | Index | -1 | Position of first match |
| str.replace(rx, fn) | String | Replace matches |
| str.split(regex) | Array | Split on matches |
| str.test(regex) | Boolean | Check if matches |
// ── matchAll (iterates all matches with /g) ──
const html = '<h1>Title</h1><p>Body</p><h2>Subtitle</h2>';
const tagRegex = /<(?<tag>\w+)>(?<content>[^<]+)<\/\w+>/g;
for (const match of html.matchAll(tagRegex)) {
console.log(match.groups.tag, match.groups.content);
// "h1" "Title"
// "p" "Body"
// "h2" "Subtitle"
}
// Convert to array
const allMatches = [...html.matchAll(tagRegex)].map(m => ({
tag: m.groups.tag,
content: m.groups.content,
index: m.index,
}));
// ── replace with function ──
const prices = 'Item: $5, Tax: $1, Total: $6';
const doubled = prices.replace(/\$(\d+)/g, (match, amount, offset) => {
console.log(`Found $ amount at index ${offset}`);
return `$${Number(amount) * 2}`; // "$10", "$2", "$12"
});(?:...) when you don't need the matched text. They are faster and produce cleaner match results. Use named groups (?<name>...) instead of numbered groups for readability.// ── Positive Lookahead: (?=...)
// Asserts that the pattern AFTER matches (without consuming)
const price = /\d+(?=px)/; // "100" in "100px"
const fileExt = /[\w.]+(?=\.\w+$)/; // filename before extension
const followedBySpace = /\w+(?=\s)/;
// ── Negative Lookahead: (?!...)
// Asserts that the pattern AFTER does NOT match
const notPx = /\d+(?!px)/; // "100" in "100em" (not 100px)
const noRepeat = /(?!.*(.).\1)^\w+$/; // no repeated chars
const notBad = /^(?!bad)\w+/; // doesn't start with "bad"
// ── Positive Lookbehind: (?<=...)
// Asserts that the pattern BEFORE matches (without consuming)
const afterDollar = /(?<=\$)\d+/; // "100" in "$100"
const afterAt = /(?<=@)[\w.]+/; // "example.com" in "user@example.com"
const afterProtocol = /(?<=https?:\/\/)[^/]+/; // domain from URL
// ── Negative Lookbehind: (?<!...)
// Asserts that the pattern BEFORE does NOT match
const notAfterAt = /\w+(?<!@)\.com/; // ".com" not preceded by @
const standalone = /(?<!\$)\d+/; // digit not after $
// ── Practical lookaround examples ──
// Password validation: at least one uppercase, one digit
const hasUpperAndDigit = /(?=.*[A-Z])(?=.*\d).+/;
// Match word NOT between < and >
const textNotInTag = /(?<![<])(\b\w+\b)(?![>])/g;
// Find duplicate words
const duplicate = /\b(\w+)\s+\1\b/gi;
// Prices without cents
const wholePrice = /(?<=\$)\d+(?=\s|$)/g;
// Numbers not in square brackets
const notInBrackets = /\d+(?![^\]]*\])/g;| Type | Syntax | Direction | Consumes? |
|---|---|---|---|
| Positive Lookahead | (?=...) | Forward | No |
| Negative Lookahead | (?!...) | Forward | No |
| Positive Lookbehind | (?<=...) | Backward | No |
| Negative Lookbehind | (?<!...) | Backward | No |
| Pattern | Purpose |
|---|---|
| (?=.*[A-Z]) | Must contain uppercase |
| (?=.*[A-Z])(?=.*\d) | Upper + digit required |
| (?!_)\w+ | Word not starting with _ |
| (?<=@)\w+ | Text after @ sign |
| \d+(?=px) | Number before "px" |
| (?<!\d)\d{3}(?!\d) | Exactly 3 digits |
| (?<=\{)\w+(?=\}) | Word inside {curly} |
u flag. They don't support variable-length patterns. (?<=a+) is invalid — use (?<=a{3}) with fixed length instead.// ── Regex Flags ──
// g — Global (find ALL matches, not just first)
// i — Case-insensitive
// m — Multiline (^ and $ match per line)
// s — Dotall (. matches newline too)
// u — Unicode mode
// y — Sticky (match at lastIndex only)
// d — Indices (return match start/end indices, ES2022)
// ── Global (g) ──
const withoutG = /\d+/.exec('a1b2c3'); // ["1"]
const withG = /\d+/g.exec('a1b2c3'); // ["1"], then ["2"], then ["3"]
// matchAll is cleaner for /g:
[...'a1b2c3'.matchAll(/\d+/g)]; // [["1"], ["2"], ["3"]]
// ── Case-insensitive (i) ──
const ci = /hello/i;
ci.test('Hello'); // true
ci.test('HELLO'); // true
ci.test('hElLo'); // true
// ── Multiline (m) ──
const lines = `line1
line2
line3`;
const withoutM = /^line/.test(lines); // true (only first)
const withM = /^line/m.test(lines); // true (each line)
const endM = /3$/m.test(lines); // true
// ── Dotall (s) — . matches \n ──
const withoutS = /<div>.*<\/div>/.test('<div>\n</div>'); // false
const withS = /<div>.*<\/div>/s.test('<div>\n</div>'); // true
// ── Unicode (u) ──
const emoji = /\p{Emoji}+/gu;
[...'🎉🚀💻'.matchAll(emoji)].length; // 3
// ── Sticky (y) — match at exact lastIndex ──
const sticky = /\d+/y;
sticky.lastIndex = 2;
'abc123def'.match(sticky); // null (position 2 is 'c')
sticky.lastIndex = 3;
'abc123def'.match(sticky); // ["123"]
// ── Indices (d) — ES2022 ──
const indices = /\d+/d.exec('abc123def');
console.log(indices.indices[0]); // [3, 6] — start and end positions
// ── Combining flags ──
const combined = /pattern/gimsuyd;
const common = /pattern/gi; // most common combo| Flag | ES Version | Effect |
|---|---|---|
| g | ES3 | Find all matches |
| i | ES3 | Case-insensitive |
| m | ES3 | Multiline anchors |
| s | ES2018 | Dot matches newline |
| u | ES6 | Unicode mode |
| y | ES6 | Sticky matching |
| d | ES2022 | Match indices |
| Issue | Detail |
|---|---|
| regex.exec with /g | Returns next match each call, resets at end |
| str.match with /g | Returns array of strings (no groups) |
| str.match without /g | Returns full match object with groups |
| Regex object with /g | Has internal lastIndex state |
| Accidental /g | Shared regex objects carry state across calls |
/g across multiple calls if you rely on exec(). The lastIndex is tracked and shared. Create a new regex or use matchAll() instead.// ── Email ──
const email = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/;
// Practical email (HTML5 spec approximate)
const emailHTML5 = /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/;
// ── URL ──
const url = /^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_+.~#?&/=]*)$/;
// ── Phone (US) ──
const phoneUS = /^\+?1?[-.\s]?\(?\d{3}\)?[-.\s]?\d{4}$/;
// International
const phoneIntl = /^\+?[1-9]\d{1,14}$/;
// ── Password (strong: 8+ chars, upper, lower, digit, special) ──
const strongPassword = /^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$/;
// ── IPv4 Address ──
const ipv4 = /^((25[0-5]|(2[0-4]|1?\d)?\d)\.){3}(25[0-5]|(2[0-4]|1?\d)?\d)$/;
// Hex Color
const hexColor = /^#?([0-9a-fA-F]{3}|[0-9a-fA-F]{6}){1,2}$/;
// Date ISO
const dateISO = /^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])$/;
// Credit Card
const visa = /^4[0-9]{12}(?:[0-9]{3})?$/;
// Slug
const slug = /^[a-z0-9]+(?:-[a-z0-9]+)*$/;
// Username
const username = /^[a-zA-Z0-9_]{3,20}$/;
// 24-hour time
const time24 = /^([01]\d|2[0-3]):[0-5]\d(:[0-5]\d)?$/;
// HTML tag
const tag = /<(\w+)[^>]*>(.*?)<\/\1>/gs;| Pattern | Regex |
|---|---|
| /^[\w.+-]+@[\w-]+\.[a-z]{2,}$/i | |
| URL | /^https?:\/\/[^\s/$.?#].*$/i |
| Phone US | /^\+?1?\(\d{3}\)?[- ]?\d{3}[- ]?\d{4}$/ |
| IP v4 | /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ |
| Hex Color | /^#?([0-9a-f]{3}|[0-9a-f]{6})$/i |
| Date ISO | /^\d{4}-\d{2}-\d{2}$/ |
| Slug | /^[a-z0-9]+(?:-[a-z0-9]+)*$/ |
| Strong Pwd | /^(?=.*[a-z])(?=.*[A-Z])(?=.*\d).{8,}$/ |
| Task | Regex / Replace |
|---|---|
| Trim whitespace | /^\s+|\s+$/g → "" |
| Remove HTML | /<[^>]*>/g → "" |
| Escape regex | /[.*+?^${}()|[\]\\]/g |
| Camel to snake | /([A-Z])/g → "_$1".toLowerCase() |
| Extract nums | /-?\d+\.?\d*/g |
| Remove dupes | /\b(\w+)\s+\1\b/gi → "$1" |
| Add commas | /\B(?=(\d{3})+(?!\d))/g |
| Repeat chars | /(.)\1+/g → "$1" |
/^[\w.+-]+@[\w-]+\.[a-z]{2,}$/i catches most valid emails without false negatives.# ── Python Regex (re module) ──
import re
# Match
m = re.search(r'\d+', 'abc123def')
print(m.group()) # "123"
print(m.start()) # 3
print(m.end()) # 6
print(m.span()) # (3, 6)
# Find all
re.findall(r'\d+', 'a1b2c3') # ['1', '2', '3']
re.finditer(r'\d+', 'a1b2c3') # iterator of Match objects
# Named groups
m = re.search(r'(?<first>\w+) (?<last>\w+)', 'John Doe')
print(m.group('first')) # "John"
print(m.group('last')) # "Doe"
# Replace
re.sub(r'\d+', lambda m: str(int(m.group()) * 2), 'a1b2') # "a2b4"
re.sub(r'(\w+) (\w+)', r'\2 \1', 'hello world') # "world hello"
# Compile for reuse
pattern = re.compile(r'\b[A-Z][a-z]+\b')
pattern.findall('Hello World Foo') # ['Hello', 'World', 'Foo']
# Flags
re.VERBOSE # ignore whitespace and # comments
re.IGNORECASE # i flag
re.MULTILINE # ^ and $ per line
re.DOTALL # . matches \n// ── Java Regex ──
import java.util.regex.*;
// Compile and match
Pattern p = Pattern.compile("\\d+");
Matcher m = p.matcher("abc123def");
if (m.find()) {
System.out.println(m.group()); // "123"
}
// Named groups (Java 7+)
Pattern p2 = Pattern.compile("(?<year>\\d{4})-(?<month>\\d{2})");
Matcher m2 = p2.matcher("2024-01");
if (m2.matches()) {
m2.group("year"); // "2024"
m2.group("month"); // "01"
}
// Replace
String result = "hello world".replaceAll("(\\w+) (\\w+)", "$2 $1");
// Find all
Pattern p3 = Pattern.compile("\\b[A-Z]\\w+");
Matcher m3 = p3.matcher("Hello World");
while (m3.find()) {
System.out.println(m3.group());
}// ── Go Regex (RE2 engine — no backreferences!) ──
package main
import (
"fmt"
"regexp"
)
func main() {
re := regexp.MustCompile("\d+")
// Match
fmt.Println(re.MatchString("abc123")) // true
fmt.Println(re.FindString("abc123")) // "123"
fmt.Println(re.FindAllString("a1b2c3", -1)) // ["1","2","3"]
// Find with index
loc := re.FindStringIndex("abc123def")
fmt.Println(loc) // [3, 6]
// Named groups
re2 := regexp.MustCompile("(?P<year>\d{4})-(?P<month>\d{2})")
match := re2.FindStringSubmatch("2024-01")
fmt.Println(match[1]) // "2024"
fmt.Println(match[2]) // "01"
// Replace
result := re.ReplaceAllString("a1b2c3", "X")
fmt.Println(result) // "aXbXcX"
}\\1), lookbehinds, or other features that require backtracking. Python/PCRE/Java support the full regex feature set. JavaScript ES2018+ supports most features except possessive quantifiers and atomic groups.