๐Ÿฆ€ Functional Rust

486: Regex-Like Matching Without Crates

Difficulty: 2 Level: Intermediate Rust's `str` methods cover most real-world pattern matching โ€” no regex engine needed.

The Problem This Solves

Reaching for a regex engine is a habit from languages where string methods are thin wrappers around character arrays. Regex adds a compile step, a runtime dependency, and cognitive overhead for patterns that `str` handles directly. Many codebases import `regex` just for things like "does this string start with `http`" or "split on whitespace." Rust's `str` API is deliberately rich. `starts_with`, `ends_with`, `contains`, `find`, `split`, `trim`, `matches`, and pattern-based methods accept not just string literals but also `char`, `&[char]`, and closures โ€” giving you predicate-based matching with zero overhead. Understanding what `str` can do well prevents over-engineering and teaches you the building blocks for writing your own matchers when you do need custom logic.

The Intuition

A Swiss army knife versus a chainsaw. For most string operations โ€” "does this line start with `#`?", "strip leading whitespace", "find the first digit" โ€” `str` is the Swiss army knife: always available, no setup. Reach for `regex` only when you genuinely need backreferences, quantifiers, or alternation across complex patterns.

How It Works in Rust

1. Prefix/suffix checks:
url.starts_with("https://")
filename.ends_with(".rs")
2. Substring search:
line.contains("ERROR")
line.find("->").map(|i| &line[i+2..])
3. Character class matching โ€” use a closure or `char::is_*`:
s.chars().all(|c| c.is_ascii_alphanumeric())
s.trim_matches(|c: char| !c.is_alphabetic())
4. Split on pattern:
"a,b,,c".split(',').filter(|s| !s.is_empty())
csv_line.splitn(3, ',')  // max 3 parts
5. Multiple delimiters โ€” pass a slice of chars:
text.split(&[',', ';', '\t'][..])
6. Rolling your own matcher โ€” for simple wildcards, write a recursive function; for finite automata, a `match` on states.

What This Unlocks

Key Differences

ConceptOCamlRust
Pattern typeString or `Re``&str`, `char`, `&[char]`, closure
Regex`Re` / `Str` stdlib modulesExternal `regex` crate
Predicate match`String.exists``str::contains(c: char...)`
Split`String.split_on_char``str::split`, `splitn`, `split_once`
// 486. Regex-like matching without crates
fn glob_match(pattern: &str, s: &str) -> bool {
    if let Some((pre, suf)) = pattern.split_once('*') {
        s.starts_with(pre) && s.ends_with(suf) && s.len() >= pre.len() + suf.len()
    } else {
        s == pattern
    }
}

fn like_match(s: &str, pattern: &str) -> bool {
    // Simple SQL LIKE: % = any chars, _ = one char
    fn rec(s: &[char], p: &[char]) -> bool {
        match (s, p) {
            (_, []) => s.is_empty(),
            ([], [h, ..]) => *h == '%' && rec(s, &p[1..]),
            ([_, sr@..], ['%', pr@..]) => rec(s, pr) || rec(sr, p),
            ([sc, sr@..], ['_', pr@..]) if *sc != '' => rec(sr, pr),
            ([sc, sr@..], [pc, pr@..]) if sc == pc => rec(sr, pr),
            _ => false,
        }
    }
    let sv: Vec<char> = s.chars().collect();
    let pv: Vec<char> = pattern.chars().collect();
    rec(&sv, &pv)
}

fn main() {
    let files = ["hello.txt","world.rs","README.md","test.txt","lib.rs"];

    // Glob matching
    let txt: Vec<_> = files.iter().filter(|&&f| glob_match("*.txt", f)).collect();
    println!("*.txt: {:?}", txt);
    let rs: Vec<_>  = files.iter().filter(|&&f| glob_match("*.rs", f)).collect();
    println!("*.rs:  {:?}", rs);

    // Closure patterns (no crate needed)
    let has_digit = |s: &&str| s.chars().any(|c| c.is_ascii_digit());
    let words = ["abc","abc123","xyz","99bottles"];
    println!("with digit: {:?}", words.iter().filter(has_digit).collect::<Vec<_>>());

    // LIKE pattern
    println!("he_lo: {}", like_match("hello", "he_lo"));
    println!("h%o:   {}", like_match("hello", "h%o"));
    println!("exact: {}", like_match("hello", "hello"));

    // Extract numbers from string (no regex)
    let text = "items: 42 units, cost $3.99, qty 7";
    let nums: Vec<&str> = text.split(|c:char| !c.is_ascii_digit() && c!='.')
        .filter(|s| !s.is_empty() && s.chars().any(|c| c.is_ascii_digit()))
        .collect();
    println!("numbers: {:?}", nums);
}

#[cfg(test)]
mod tests {
    use super::*;
    #[test] fn test_glob() { assert!(glob_match("*.txt","hello.txt")); assert!(!glob_match("*.txt","hello.rs")); assert!(glob_match("exact","exact")); }
    #[test] fn test_like() { assert!(like_match("hello","h%o")); assert!(like_match("hello","he_lo")); assert!(!like_match("hello","world")); }
}
(* 486. Pattern matching without regex โ€“ OCaml *)
(* Standard Str module provides basic regex *)
let starts_with s pre =
  String.length s >= String.length pre &&
  String.sub s 0 (String.length pre) = pre

let ends_with s suf =
  let ls=String.length s and lsuf=String.length suf in
  ls >= lsuf && String.sub s (ls-lsuf) lsuf = suf

let matches_glob pattern s =
  (* simple: only * wildcard *)
  match String.split_on_char '*' pattern with
  | [prefix; suffix] ->
    starts_with s prefix && ends_with s suffix &&
    String.length s >= String.length prefix + String.length suffix
  | [exact] -> s = exact
  | _ -> false  (* multiple * not supported *)

let () =
  let words = ["hello.txt";"world.rs";"README.md";"test.txt"] in
  let matches = List.filter (matches_glob "*.txt") words in
  Printf.printf "*.txt: %s\n" (String.concat " " matches);

  let has_digit s = String.exists (fun c -> c >= '0' && c <= '9') s in
  List.iter (fun s -> Printf.printf "%s has_digit=%b\n" s (has_digit s))
    ["abc";"abc123";"xyz"]