๐Ÿฆ€ Functional Rust

1071: Regex Matching

Difficulty: Advanced Category: Dynamic Programming Concept: Match strings against patterns with `.` (any char) and `*` (zero or more of preceding) using DP Key Insight: The `` operator is the tricky part โ€” it couples with the preceding character. `dp[i][j]` depends on: (1) zero matches of `x` โ†’ `dp[i][j-2]`, or (2) one+ matches if `x` matches current char โ†’ `dp[i-1][j]`.
// 1071: Regex Matching โ€” '.' and '*' โ€” DP

use std::collections::HashMap;

// Approach 1: Bottom-up DP
fn is_match_dp(s: &str, p: &str) -> bool {
    let s: Vec<char> = s.chars().collect();
    let p: Vec<char> = p.chars().collect();
    let (m, n) = (s.len(), p.len());
    let mut dp = vec![vec![false; n + 1]; m + 1];
    dp[0][0] = true;

    // Pattern like a*, a*b* can match empty string
    for j in 2..=n {
        if p[j - 1] == '*' { dp[0][j] = dp[0][j - 2]; }
    }

    for i in 1..=m {
        for j in 1..=n {
            if p[j - 1] == '*' {
                dp[i][j] = dp[i][j - 2]; // zero occurrences
                if p[j - 2] == '.' || p[j - 2] == s[i - 1] {
                    dp[i][j] = dp[i][j] || dp[i - 1][j]; // one+ occurrences
                }
            } else if p[j - 1] == '.' || p[j - 1] == s[i - 1] {
                dp[i][j] = dp[i - 1][j - 1];
            }
        }
    }
    dp[m][n]
}

// Approach 2: Recursive with memoization
fn is_match_memo(s: &str, p: &str) -> bool {
    let s: Vec<char> = s.chars().collect();
    let p: Vec<char> = p.chars().collect();

    fn solve(i: usize, j: usize, s: &[char], p: &[char], cache: &mut HashMap<(usize, usize), bool>) -> bool {
        if j == p.len() { return i == s.len(); }
        if let Some(&v) = cache.get(&(i, j)) { return v; }
        let first_match = i < s.len() && (p[j] == '.' || p[j] == s[i]);
        let v = if j + 1 < p.len() && p[j + 1] == '*' {
            solve(i, j + 2, s, p, cache) || (first_match && solve(i + 1, j, s, p, cache))
        } else {
            first_match && solve(i + 1, j + 1, s, p, cache)
        };
        cache.insert((i, j), v);
        v
    }

    let mut cache = HashMap::new();
    solve(0, 0, &s, &p, &mut cache)
}

// Approach 3: NFA simulation
fn is_match_nfa(s: &str, p: &str) -> bool {
    let p: Vec<char> = p.chars().collect();
    let s: Vec<char> = s.chars().collect();
    let n = p.len();

    // States are pattern positions; epsilon transitions on '*'
    let mut states = std::collections::HashSet::new();
    states.insert(0);

    // Add epsilon transitions (skip x* pairs)
    fn add_epsilon(states: &mut std::collections::HashSet<usize>, p: &[char]) {
        let mut changed = true;
        while changed {
            changed = false;
            let current: Vec<usize> = states.iter().copied().collect();
            for &st in &current {
                if st + 1 < p.len() && p[st + 1] == '*' && !states.contains(&(st + 2)) {
                    states.insert(st + 2);
                    changed = true;
                }
            }
        }
    }

    add_epsilon(&mut states, &p);

    for &ch in &s {
        let mut next = std::collections::HashSet::new();
        for &st in &states {
            if st < n {
                if p[st] == '.' || p[st] == ch {
                    if st + 1 < n && p[st + 1] == '*' {
                        next.insert(st); // stay (one more match)
                    } else {
                        next.insert(st + 1);
                    }
                }
                // Also handle: if current is 'x' and next is '*', and we're matching via '*'
                if st + 1 < n && p[st + 1] == '*' && (p[st] == '.' || p[st] == ch) {
                    next.insert(st + 2); // consume and move past *
                    next.insert(st);     // consume and stay
                }
            }
        }
        add_epsilon(&mut next, &p);
        states = next;
    }

    states.contains(&n)
}

fn main() {
    println!("match(\"aa\", \"a*\") = {}", is_match_dp("aa", "a*"));
    println!("match(\"ab\", \".*\") = {}", is_match_dp("ab", ".*"));
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_dp() {
        assert!(!is_match_dp("aa", "a"));
        assert!(is_match_dp("aa", "a*"));
        assert!(is_match_dp("ab", ".*"));
        assert!(is_match_dp("aab", "c*a*b"));
        assert!(!is_match_dp("mississippi", "mis*is*p*."));
        assert!(is_match_dp("", "a*b*"));
    }

    #[test]
    fn test_memo() {
        assert!(!is_match_memo("aa", "a"));
        assert!(is_match_memo("aa", "a*"));
        assert!(is_match_memo("ab", ".*"));
        assert!(is_match_memo("aab", "c*a*b"));
    }

    #[test]
    fn test_nfa() {
        assert!(!is_match_nfa("aa", "a"));
        assert!(is_match_nfa("aa", "a*"));
        assert!(is_match_nfa("ab", ".*"));
        assert!(is_match_nfa("aab", "c*a*b"));
    }
}
(* 1071: Regex Matching โ€” '.' and '*' โ€” DP *)

(* Approach 1: Bottom-up DP *)
let is_match_dp s p =
  let m = String.length s and n = String.length p in
  let dp = Array.init (m + 1) (fun _ -> Array.make (n + 1) false) in
  dp.(0).(0) <- true;
  (* Pattern like a*, a*b*, a*b*c* can match empty string *)
  for j = 2 to n do
    if p.[j - 1] = '*' then dp.(0).(j) <- dp.(0).(j - 2)
  done;
  for i = 1 to m do
    for j = 1 to n do
      if p.[j - 1] = '*' then begin
        (* '*' means zero occurrences of preceding element *)
        dp.(i).(j) <- dp.(i).(j - 2);
        (* or one+ occurrences if preceding matches *)
        if p.[j - 2] = '.' || p.[j - 2] = s.[i - 1] then
          dp.(i).(j) <- dp.(i).(j) || dp.(i - 1).(j)
      end else if p.[j - 1] = '.' || p.[j - 1] = s.[i - 1] then
        dp.(i).(j) <- dp.(i - 1).(j - 1)
    done
  done;
  dp.(m).(n)

(* Approach 2: Recursive with memoization *)
let is_match_memo s p =
  let m = String.length s and n = String.length p in
  let cache = Hashtbl.create 64 in
  let rec solve i j =
    if j = n then i = m
    else
      match Hashtbl.find_opt cache (i, j) with
      | Some v -> v
      | None ->
        let first_match = i < m && (p.[j] = '.' || p.[j] = s.[i]) in
        let v =
          if j + 1 < n && p.[j + 1] = '*' then
            solve i (j + 2) || (first_match && solve (i + 1) j)
          else
            first_match && solve (i + 1) (j + 1)
        in
        Hashtbl.add cache (i, j) v;
        v
  in
  solve 0 0

let () =
  assert (is_match_dp "aa" "a" = false);
  assert (is_match_dp "aa" "a*" = true);
  assert (is_match_dp "ab" ".*" = true);
  assert (is_match_dp "aab" "c*a*b" = true);
  assert (is_match_dp "mississippi" "mis*is*p*." = false);
  assert (is_match_dp "" "a*b*" = true);

  assert (is_match_memo "aa" "a" = false);
  assert (is_match_memo "aa" "a*" = true);
  assert (is_match_memo "ab" ".*" = true);
  assert (is_match_memo "aab" "c*a*b" = true);

  Printf.printf "โœ“ All tests passed\n"

๐Ÿ“Š Detailed Comparison

Regex Matching โ€” Comparison

Core Insight

Regex matching with `.` and `` requires careful handling of the `` operator, which applies to the preceding character. The DP table tracks whether `s[0..i]` matches `p[0..j]`, with `` creating two transitions: skip the `x` pair, or consume one character if it matches.

OCaml Approach

  • 2D boolean array DP
  • Memoized recursion with `Hashtbl` โ€” cleaner logic
  • `first_match` computed inline with `&&` and `||`
  • `Char` comparison with `=`

Rust Approach

  • 2D `vec![vec![false]]` DP
  • `HashMap` memoization
  • NFA simulation as third approach โ€” treats pattern as finite automaton
  • `HashSet` for NFA state tracking

Comparison Table

AspectOCamlRust
Char access`s.[i]` and `p.[j]``Vec<char>` after `.chars().collect()`
Boolean DP`Array.init` + `Array.make``vec![vec![false]]`
Star handling`dp.(i).(j-2) \\dp.(i-1).(j)`Same logic
NFA approachNot shown`HashSet<usize>` for state sets
Pattern matchingCharacter comparisonSame