๐Ÿฆ€ Functional Rust

819: Z-Algorithm

Difficulty: 3 Level: Intermediate Compute, in linear time, how far each position in a string matches the string's own prefix โ€” then use it for O(n + m) pattern matching with a single array.

The Problem This Solves

The Z-algorithm is KMP's conceptual sibling: both achieve O(n + m) pattern matching, both precompute reuse information to avoid backtracking. The Z-array is often easier to reason about because it's a direct measurement โ€” "the string starting here matches the first Z[i] characters of the whole string" โ€” rather than the more abstract failure-function of KMP. Beyond pattern matching, the Z-array itself is a building block: detect if a string is a rotation of another, find the shortest period of a string, check if a string has a prefix that is also a suffix. It's a clean primitive that appears in competitive programming problems where KMP would also work but Z is more straightforward to implement correctly under pressure. For systems programmers: the Z-algorithm processes input in a single left-to-right pass, making it friendly for streaming and cache-efficient computation.

The Intuition

Maintain a "Z-box" `[l, r)`: the rightmost interval where the string matches a prefix. For each new position `i`: if `i < r`, initialize `Z[i]` from the mirror position `Z[i-l]` (bounded by how far the box extends). Then expand by comparing characters. Update the Z-box if the new match extends further right. Each character is touched at most twice: once during expansion, once as a mirror lookup โ€” giving O(n) total. For pattern search, concatenate `pattern + '$' + text`. Any position in the text portion where `Z[i] == len(pattern)` is a match. The sentinel `'$'` prevents Z-values from crossing the boundary between pattern and text.

How It Works in Rust

fn z_array(s: &[u8]) -> Vec<usize> {
 let n = s.len();
 let mut z = vec![0usize; n];
 z[0] = n;                    // Convention: z[0] = length of whole string
 let (mut l, mut r) = (0usize, 0usize);

 for i in 1..n {
     if i < r {
         // Mirror: reuse z[i-l], but don't go past the Z-box boundary
         z[i] = z[i - l].min(r - i);
     }
     // Expand: compare s[z[i]] against s[i + z[i]]
     while i + z[i] < n && s[z[i]] == s[i + z[i]] {
         z[i] += 1;
     }
     // Update Z-box if new match extends further right
     if i + z[i] > r {
         l = i;
         r = i + z[i];
     }
 }
 z
}

fn z_search(pattern: &str, text: &str) -> Vec<usize> {
 let m = pattern.len();
 // Sentinel '$' must not appear in pattern or text
 let combined: Vec<u8> = pattern.bytes()
     .chain(std::iter::once(b'$'))
     .chain(text.bytes())
     .collect();
 let z = z_array(&combined);
 // Positions in text where z[i] == m are match starts
 z.iter()
     .enumerate()
     .skip(m + 1)               // Skip pattern + sentinel
     .filter_map(|(i, &zi)| if zi == m { Some(i - m - 1) } else { None })
     .collect()
}
The iterator chain for building the combined string is idiomatic Rust: `chain` composes without allocation until `collect()`.

What This Unlocks

Key Differences

ConceptOCamlRust
Array mutation`Array.set arr i v``z[i] = v` โ€” direct indexing
String as bytes`Bytes.of_string` or `String.to_seq``.bytes()` iterator โ€” zero-copy
Sentinel concat`pattern ^ "$" ^ text``.chain(once(b'$')).chain(text.bytes())`
Z-box update`let l = ref 0; let r = ref 0``let (mut l, mut r) = (0, 0)`
Filter positions`Array.to_seq> Seq.filter_map``.enumerate().filter_map(...)`
/// Z-Algorithm: linear-time pattern matching
///
/// Z[i] = length of the longest substring starting at s[i] that is
/// also a prefix of s. Sentinel '$' keeps Z-values bounded at the boundary.

fn z_array(s: &[u8]) -> Vec<usize> {
    let n = s.len();
    let mut z = vec![0usize; n];
    z[0] = n;
    let (mut l, mut r) = (0usize, 0usize);
    for i in 1..n {
        if i < r {
            z[i] = z[i - l].min(r - i);
        }
        while i + z[i] < n && s[z[i]] == s[i + z[i]] {
            z[i] += 1;
        }
        if i + z[i] > r {
            l = i;
            r = i + z[i];
        }
    }
    z
}

/// Returns 0-based start positions of `pattern` in `text`.
fn z_search(pattern: &str, text: &str) -> Vec<usize> {
    let m = pattern.len();
    // Sentinel must not appear in pattern or text for correctness
    let combined: Vec<u8> = pattern
        .bytes()
        .chain(std::iter::once(b'$'))
        .chain(text.bytes())
        .collect();
    let z = z_array(&combined);
    z.iter()
        .enumerate()
        .skip(m + 1)
        .filter_map(|(i, &zi)| if zi == m { Some(i - m - 1) } else { None })
        .collect()
}

fn main() {
    let text = "aabxaabxaab";
    let pattern = "aab";
    let positions = z_search(pattern, text);
    println!("Pattern '{}' in '{}': {:?}", pattern, text, positions);

    // Show the Z-array itself
    let s = b"aabxaa";
    let z = z_array(s);
    println!("Z-array of 'aabxaa': {:?}", z);
    // Expected: [6, 1, 0, 0, 2, 1]

    let positions2 = z_search("ab", "ababab");
    println!("'ab' in 'ababab': {:?}", positions2);
    // Expected: [0, 2, 4]
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_z_array_basic() {
        // z[0] always equals len; z[1] = length of match with prefix
        let z = z_array(b"aabxaa");
        assert_eq!(z[0], 6);
        assert_eq!(z[1], 1); // "a" matches prefix "a"
        assert_eq!(z[2], 0); // "b" doesn't match "a"
        assert_eq!(z[4], 2); // "aa" matches prefix "aa"
    }

    #[test]
    fn test_z_array_all_same() {
        let z = z_array(b"aaaa");
        // z[0]=4, z[1]=3, z[2]=2, z[3]=1
        assert_eq!(z, vec![4, 3, 2, 1]);
    }

    #[test]
    fn test_search_multiple() {
        assert_eq!(z_search("aab", "aabxaabxaab"), vec![0, 4, 8]);
    }

    #[test]
    fn test_search_overlapping() {
        // Pattern "aa" in "aaaa": positions 0, 1, 2
        assert_eq!(z_search("aa", "aaaa"), vec![0, 1, 2]);
    }

    #[test]
    fn test_search_not_found() {
        assert_eq!(z_search("xyz", "abcdef"), vec![]);
    }

    #[test]
    fn test_search_full_match() {
        assert_eq!(z_search("abc", "abc"), vec![0]);
    }

    #[test]
    fn test_search_at_end() {
        assert_eq!(z_search("end", "the end"), vec![4]);
    }

    #[test]
    fn test_repeated_pattern() {
        assert_eq!(z_search("ab", "ababab"), vec![0, 2, 4]);
    }
}
(* Z-Algorithm in OCaml โ€” functional style with Array *)

(* Build Z-array: Z.(i) = length of longest match with prefix of s starting at i *)
let z_array (s : string) : int array =
  let n = String.length s in
  let z = Array.make n 0 in
  let chars = Array.init n (String.get s) in
  z.(0) <- n;
  let l = ref 0 and r = ref 0 in
  for i = 1 to n - 1 do
    if i < !r then
      z.(i) <- min (z.(i - !l)) (!r - i);
    while i + z.(i) < n && chars.(z.(i)) = chars.(i + z.(i)) do
      z.(i) <- z.(i) + 1
    done;
    if i + z.(i) > !r then begin
      l := i;
      r := i + z.(i)
    end
  done;
  z

(* Pattern search: returns list of 0-based match positions in text *)
let z_search (pattern : string) (text : string) : int list =
  let m = String.length pattern in
  let combined = pattern ^ "$" ^ text in
  let z = z_array combined in
  let n = String.length combined in
  (* Positions in combined where Z = m correspond to match starts in text *)
  let results = ref [] in
  for i = m + 1 to n - 1 do
    if z.(i) = m then
      results := (i - m - 1) :: !results
  done;
  List.rev !results

let () =
  let text = "aabxaabxaab" in
  let pattern = "aab" in
  let matches = z_search pattern text in
  Printf.printf "Pattern '%s' found at positions: " pattern;
  List.iter (fun p -> Printf.printf "%d " p) matches;
  print_newline ();

  (* Demonstrate Z-array directly *)
  let s = "aabxaa" in
  let z = z_array s in
  Printf.printf "Z-array of '%s': " s;
  Array.iter (fun v -> Printf.printf "%d " v) z;
  print_newline ()