๐Ÿฆ€ Functional Rust

794. Word Break (Dictionary DP)

Difficulty: 3 Level: Advanced Determine whether a string can be segmented into dictionary words โ€” and enumerate all valid segmentations.

The Problem This Solves

Word break is the algorithmic core of text tokenisation: given a sequence of characters with no spaces, can it be decomposed into known tokens? Input method editors (Chinese/Japanese/Korean text input) use exactly this to segment character streams into words. Search engine query processing, URL slug parsing, and natural language processing pipelines all need efficient word segmentation when whitespace is absent or unreliable. The enumeration variant (all valid segmentations) is relevant for parser generators and grammar checkers that need to explore ambiguous parses โ€” the same O(nยฒ) DP structure, extended to collect all parse trees.

The Intuition

`dp[i]` is true if `s[0..i]` can be segmented from the dictionary. For each position `i`, scan all earlier positions `j`; if `dp[j]` is true and `s[j..i]` is a dictionary word, then `dp[i]` is true. Once `dp[n]` is true, you know a segmentation exists. The `HashSet` lookup makes each substring check O(1) on average, giving O(nยฒ) overall. OCaml would express this recursively with memoisation via `Hashtbl`; Rust uses a `HashSet<&str>` for the dictionary and a `Vec<bool>` for the DP table. The `prev` array variant stores all `j` values that contributed to each `dp[i]`, enabling full reconstruction.

How It Works in Rust

// O(nยฒ) time โ€” HashSet lookup is O(1) average
fn word_break(s: &str, dict: &[&str]) -> bool {
 let dict_set: HashSet<&str> = dict.iter().copied().collect();
 let n = s.len();
 let mut dp = vec![false; n + 1];
 dp[0] = true;

 for i in 1..=n {
     for j in 0..i {
         if dp[j] && dict_set.contains(&s[j..i]) {
             dp[i] = true;
             break;  // found one valid split, no need to continue
         }
     }
 }
 dp[n]
}

// Enumerate all segmentations: replace bool dp with Vec<Vec<usize>>
// prev[i] = all j values such that dp[j] && s[j..i] is in dict
fn word_break_all(s: &str, dict: &[&str]) -> Vec<String> {
 let mut prev: Vec<Vec<usize>> = vec![vec![]; n + 1];
 // ... same DP, no early break, collect all j in prev[i]

 // Recursive reconstruction from prev
 fn collect(s: &str, prev: &Vec<Vec<usize>>, i: usize) -> Vec<String> {
     if i == 0 { return vec![String::new()]; }
     let mut results = Vec::new();
     for &j in &prev[i] {
         let word = &s[j..i];
         for base in collect(s, prev, j) {
             results.push(if base.is_empty() { word.into() }
                          else { format!("{base} {word}") });
         }
     }
     results
 }
 if dp[n] { collect(s, &prev, n) } else { vec![] }
}
Note the `break` in the boolean variant โ€” once one valid split is found, there's no need to check the rest. The `all` variant omits this to collect every parse path.

What This Unlocks

Key Differences

ConceptOCamlRust
Dictionary lookup`Hashtbl.mem` or `Set.mem``HashSet<&str>::contains` โ€” zero-copy `&str` slices
DP array`Array.make (n+1) false``vec![false; n+1]`
Substring slice`String.sub s j (i-j)``&s[j..i]` โ€” O(1) slice, no allocation
All-parses backtrackRecursive with list accumulatorRecursive inner fn on `&Vec<Vec<usize>>`
Early termination`raise Exit` or boolean flag`break` in inner loop
// Word Break โ€” dictionary DP O(nยฒ)
use std::collections::HashSet;

fn word_break(s: &str, dict: &[&str]) -> bool {
    let dict_set: HashSet<&str> = dict.iter().copied().collect();
    let n = s.len();
    let mut dp = vec![false; n + 1];
    dp[0] = true;
    for i in 1..=n {
        for j in 0..i {
            if dp[j] && dict_set.contains(&s[j..i]) {
                dp[i] = true;
                break;
            }
        }
    }
    dp[n]
}

fn word_break_all(s: &str, dict: &[&str]) -> Vec<String> {
    let dict_set: HashSet<&str> = dict.iter().copied().collect();
    let n = s.len();
    let mut dp   = vec![false; n + 1];
    let mut prev: Vec<Vec<usize>> = vec![vec![]; n + 1];
    dp[0] = true;
    for i in 1..=n {
        for j in 0..i {
            if dp[j] && dict_set.contains(&s[j..i]) {
                dp[i] = true;
                prev[i].push(j);
            }
        }
    }

    fn collect(s: &str, prev: &Vec<Vec<usize>>, i: usize) -> Vec<String> {
        if i == 0 { return vec![String::new()]; }
        let mut results = Vec::new();
        for &j in &prev[i] {
            let word = &s[j..i];
            for base in collect(s, prev, j) {
                if base.is_empty() {
                    results.push(word.to_string());
                } else {
                    results.push(format!("{base} {word}"));
                }
            }
        }
        results
    }

    if dp[n] { collect(s, &prev, n) } else { vec![] }
}

fn main() {
    let dict = vec!["leet", "code", "cats", "and", "sand", "dog", "cat"];
    println!("'leetcode':    {}", word_break("leetcode", &dict));
    println!("'catsanddog': {}", word_break("catsanddog", &dict));
    println!("'catsanddogx':{}", word_break("catsanddogx", &dict));

    let sentences = word_break_all("catsanddog", &["cat", "cats", "and", "sand", "dog"]);
    println!("All parses of 'catsanddog': {:?}", sentences);
}
(* Word Break โ€” dictionary DP O(nยฒ) *)
module SSet = Set.Make(String)

let word_break s dict =
  let dict_set = List.fold_left (fun acc w -> SSet.add w acc) SSet.empty dict in
  let n  = String.length s in
  let dp = Array.make (n + 1) false in
  dp.(0) <- true;
  for i = 1 to n do
    let j = ref 0 in
    while !j < i && not dp.(i) do
      if dp.(!j) && SSet.mem (String.sub s !j (i - !j)) dict_set then
        dp.(i) <- true;
      incr j
    done
  done;
  dp.(n)

let word_break_all s dict =
  let dict_set = List.fold_left (fun acc w -> SSet.add w acc) SSet.empty dict in
  let n  = String.length s in
  (* prev.(i) = list of j such that dp[j]=true and s[j..i] in dict *)
  let dp   = Array.make (n + 1) false in
  let prev = Array.make (n + 1) [] in
  dp.(0) <- true;
  for i = 1 to n do
    for j = 0 to i - 1 do
      if dp.(j) && SSet.mem (String.sub s j (i - j)) dict_set then begin
        dp.(i) <- true;
        prev.(i) <- j :: prev.(i)
      end
    done
  done;
  (* Collect all sentences *)
  let rec collect i =
    if i = 0 then [[]]
    else
      List.concat_map (fun j ->
        let word = String.sub s j (i - j) in
        List.map (fun rest -> rest @ [word]) (collect j)
      ) prev.(i)
  in
  if dp.(n) then collect n else []

let () =
  let dict = ["leet"; "code"; "leets"; "code"; "cats"; "and"; "sand"; "dog"; "cat"] in
  Printf.printf "'leetcode'    -> %b\n" (word_break "leetcode" dict);
  Printf.printf "'catsanddog' -> %b\n" (word_break "catsanddog" dict);
  Printf.printf "'catsanddogx'-> %b\n" (word_break "catsanddogx" dict);
  let sentences = word_break_all "catsanddog" ["cat";"cats";"and";"sand";"dog"] in
  Printf.printf "All parses of 'catsanddog':\n";
  List.iter (fun ws -> Printf.printf "  %s\n" (String.concat " " ws)) sentences