๐Ÿฆ€ Functional Rust

166: Separated List

Difficulty: 3 Level: Advanced Parse `a, b, c` โ€” the comma-separated pattern used everywhere: function arguments, array literals, CSV rows, import lists.

The Problem This Solves

Separated lists are everywhere. Function calls: `foo(x, y, z)`. Array literals: `[1, 2, 3]`. CSV rows: `Alice,30,Engineer`. Import lists: `use std::{io, fs, collections}`. Every language, every data format โ€” they all have this pattern. The tricky part is the separator. You can't just parse `item (sep item)*` naively โ€” what about trailing separators like `[1, 2, 3,]`? And what about the empty list `[]`? You need to handle three cases: empty list, single item, and multiple items with separators between them. The harder problem is backtracking on the separator. If you parse `1,` and then fail to find another item, you've consumed the trailing comma. You need to save your position before trying the separator, and only advance past it if the next item also succeeds.

The Intuition

Parse the first item. Then loop: save position, try the separator, try the next item. If both succeed, keep going. If either fails, restore position to before the separator and stop.
input: "1, 2, 3"
parse first: 1, remaining: ", 2, 3"
save pos โ†’ try sep "," โ†’ ok โ†’ try item โ†’ 2 โ†’ keep
save pos โ†’ try sep "," โ†’ ok โ†’ try item โ†’ 3 โ†’ keep
save pos โ†’ try sep "," โ†’ fail โ†’ restore โ†’ stop
result: [1, 2, 3]

How It Works in Rust

fn separated_list0<'a, T, S>(
 separator: impl Fn(&'a str) -> ParseResult<S>,
 item: impl Fn(&'a str) -> ParseResult<T>,
) -> impl Fn(&'a str) -> ParseResult<Vec<T>> {
 move |input| {
     let mut results = Vec::new();
     let mut remaining = input;

     // Try first item โ€” if this fails, return empty list (list0 allows empty)
     match item(remaining) {
         Err(_) => return Ok((results, remaining)),
         Ok((val, rest)) => {
             results.push(val);
             remaining = rest;
         }
     }

     // Now try separator + item pairs
     loop {
         let before_sep = remaining;  // save position BEFORE separator

         match separator(before_sep) {
             Err(_) => break,  // no separator โ†’ done
             Ok((_, after_sep)) => {
                 match item(after_sep) {
                     Err(_) => break,  // separator but no item โ†’ trailing sep, restore
                     Ok((val, rest)) => {
                         results.push(val);
                         remaining = rest;  // advance past both sep and item
                     }
                 }
             }
         }
         // Note: if sep succeeded but item failed, remaining is still before_sep
         // because we didn't update it โ€” that's the backtracking
     }

     Ok((results, remaining))
 }
}

// list1: requires at least one item
fn separated_list1<'a, T, S>(
 separator: impl Fn(&'a str) -> ParseResult<S>,
 item: impl Fn(&'a str) -> ParseResult<T>,
) -> impl Fn(&'a str) -> ParseResult<Vec<T>> {
 move |input| {
     let (results, rest) = separated_list0(&separator, &item)(input)?;
     if results.is_empty() {
         Err("expected at least one item".to_string())
     } else {
         Ok((results, rest))
     }
 }
}

What This Unlocks

Key Differences

ConceptOCamlRust
Loop styleTail-recursive helper function`loop` + `break`
BacktrackingReturn previous `remaining` valueDon't update `remaining` if item fails
Trailing sepSeparate `separated_list_trailing` variantSame approach โ€” separate function
Empty list`list0` returns `[]`Returns `Vec::new()`
// Example 166: Separated List
// separated_list0, separated_list1: comma-separated values

type ParseResult<'a, T> = Result<(T, &'a str), String>;
type Parser<'a, T> = Box<dyn Fn(&'a str) -> ParseResult<'a, T> + 'a>;

fn satisfy<'a, F>(pred: F, desc: &str) -> Parser<'a, char>
where F: Fn(char) -> bool + 'a {
    let desc = desc.to_string();
    Box::new(move |input: &'a str| match input.chars().next() {
        Some(c) if pred(c) => Ok((c, &input[c.len_utf8()..])),
        _ => Err(format!("Expected {}", desc)),
    })
}

fn many1<'a, T: 'a>(p: Parser<'a, T>) -> Parser<'a, Vec<T>> {
    Box::new(move |input: &'a str| {
        let (first, mut rem) = p(input)?;
        let mut v = vec![first];
        while let Ok((val, r)) = p(rem) { v.push(val); rem = r; }
        Ok((v, rem))
    })
}

// ============================================================
// Approach 1: separated_list0 โ€” zero or more items
// ============================================================

fn separated_list0<'a, T: 'a, S: 'a>(
    sep: Parser<'a, S>,
    item: Parser<'a, T>,
) -> Parser<'a, Vec<T>> {
    Box::new(move |input: &'a str| {
        let (first, mut remaining) = match item(input) {
            Err(_) => return Ok((vec![], input)),
            Ok(r) => r,
        };
        let mut results = vec![first];
        loop {
            let after_sep = match sep(remaining) {
                Err(_) => break,
                Ok((_, r)) => r,
            };
            match item(after_sep) {
                Ok((val, rest)) => { results.push(val); remaining = rest; }
                Err(_) => break, // backtrack: don't consume trailing sep
            }
        }
        Ok((results, remaining))
    })
}

// ============================================================
// Approach 2: separated_list1 โ€” one or more
// ============================================================

fn separated_list1<'a, T: 'a, S: 'a>(
    sep: Parser<'a, S>,
    item: Parser<'a, T>,
) -> Parser<'a, Vec<T>> {
    Box::new(move |input: &'a str| {
        let (results, rest) = separated_list0_inner(&sep, &item, input)?;
        if results.is_empty() {
            Err("Expected at least one item".to_string())
        } else {
            Ok((results, rest))
        }
    })
}

fn separated_list0_inner<'a, T, S>(
    sep: &(dyn Fn(&'a str) -> ParseResult<'a, S>),
    item: &(dyn Fn(&'a str) -> ParseResult<'a, T>),
    input: &'a str,
) -> ParseResult<'a, Vec<T>> {
    let (first, mut remaining) = match item(input) {
        Err(_) => return Ok((vec![], input)),
        Ok(r) => r,
    };
    let mut results = vec![first];
    loop {
        let after_sep = match sep(remaining) {
            Err(_) => break,
            Ok((_, r)) => r,
        };
        match item(after_sep) {
            Ok((val, rest)) => { results.push(val); remaining = rest; }
            Err(_) => break,
        }
    }
    Ok((results, remaining))
}

// ============================================================
// Approach 3: With trailing separator allowed
// ============================================================

fn separated_list_trailing<'a, T: 'a, S: 'a>(
    sep: Parser<'a, S>,
    item: Parser<'a, T>,
) -> Parser<'a, Vec<T>> {
    Box::new(move |input: &'a str| {
        let (first, mut remaining) = match item(input) {
            Err(_) => return Ok((vec![], input)),
            Ok(r) => r,
        };
        let mut results = vec![first];
        loop {
            let after_sep = match sep(remaining) {
                Err(_) => break,
                Ok((_, r)) => r,
            };
            match item(after_sep) {
                Ok((val, rest)) => { results.push(val); remaining = rest; }
                Err(_) => { remaining = after_sep; break; } // consume trailing sep
            }
        }
        Ok((results, remaining))
    })
}

/// Comma with optional whitespace
fn comma<'a>() -> Parser<'a, char> {
    Box::new(|input: &'a str| {
        let trimmed = input.trim_start();
        match trimmed.chars().next() {
            Some(',') => Ok((',', trimmed[1..].trim_start())),
            _ => Err("Expected ','".to_string()),
        }
    })
}

/// Digit string
fn digit_str<'a>() -> Parser<'a, String> {
    Box::new(|input: &'a str| {
        let p = many1(satisfy(|c| c.is_ascii_digit(), "digit"));
        let (chars, rest) = p(input)?;
        Ok((chars.into_iter().collect(), rest))
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_sep_list0_multiple() {
        let p = separated_list0(comma(), digit_str());
        let (v, r) = p("1, 2, 3").unwrap();
        assert_eq!(v, vec!["1", "2", "3"]);
        assert_eq!(r, "");
    }

    #[test]
    fn test_sep_list0_empty() {
        let p = separated_list0(comma(), digit_str());
        let (v, _) = p("").unwrap();
        assert!(v.is_empty());
    }

    #[test]
    fn test_sep_list0_single() {
        let p = separated_list0(comma(), digit_str());
        let (v, _) = p("42").unwrap();
        assert_eq!(v, vec!["42"]);
    }

    #[test]
    fn test_sep_list1_success() {
        let p = separated_list1(comma(), digit_str());
        let (v, _) = p("1, 2").unwrap();
        assert_eq!(v, vec!["1", "2"]);
    }

    #[test]
    fn test_sep_list1_empty_fails() {
        let p = separated_list1(comma(), digit_str());
        assert!(p("").is_err());
    }

    #[test]
    fn test_trailing_sep() {
        let p = separated_list_trailing(comma(), digit_str());
        let (v, rest) = p("1, 2, ").unwrap();
        assert_eq!(v, vec!["1", "2"]);
        assert_eq!(rest, "");
    }

    #[test]
    fn test_no_trailing() {
        let p = separated_list0(comma(), digit_str());
        // Should not consume trailing comma
        let (v, rest) = p("1, 2, abc").unwrap();
        assert_eq!(v, vec!["1", "2"]);
        assert_eq!(rest, ", abc"); // comma before abc not consumed (backtrack)
    }
}
(* Example 166: Separated List *)
(* separated_list0, separated_list1: comma-separated values *)

type 'a parse_result = ('a * string, string) result
type 'a parser = string -> 'a parse_result

let satisfy pred desc : char parser = fun input ->
  if String.length input > 0 && pred input.[0] then
    Ok (input.[0], String.sub input 1 (String.length input - 1))
  else Error (Printf.sprintf "Expected %s" desc)

let many1 p : 'a list parser = fun input ->
  let rec go acc r = match p r with Ok (v, r') -> go (v::acc) r' | Error _ -> Ok (List.rev acc, r)
  in match p input with Error e -> Error e
  | Ok (v, r) -> go [v] r

let map f p : 'b parser = fun input ->
  match p input with Ok (v, r) -> Ok (f v, r) | Error e -> Error e

let ws0 : unit parser = fun input ->
  let rec skip i = if i < String.length input &&
    (input.[i] = ' ' || input.[i] = '\t' || input.[i] = '\n') then skip (i+1) else i in
  let i = skip 0 in Ok ((), String.sub input i (String.length input - i))

(* Approach 1: separated_list0 โ€” zero or more items separated by sep *)
let separated_list0 (sep : 'b parser) (item : 'a parser) : 'a list parser = fun input ->
  match item input with
  | Error _ -> Ok ([], input)
  | Ok (first, rest) ->
    let rec go acc remaining =
      match sep remaining with
      | Error _ -> Ok (List.rev acc, remaining)
      | Ok (_, after_sep) ->
        match item after_sep with
        | Error _ -> Ok (List.rev acc, remaining)  (* trailing sep: backtrack *)
        | Ok (v, rest') -> go (v :: acc) rest'
    in
    go [first] rest

(* Approach 2: separated_list1 โ€” one or more *)
let separated_list1 (sep : 'b parser) (item : 'a parser) : 'a list parser = fun input ->
  match separated_list0 sep item input with
  | Ok ([], _) -> Error "Expected at least one item"
  | result -> result

(* Approach 3: separated_list with trailing separator allowed *)
let separated_list_trailing (sep : 'b parser) (item : 'a parser) : 'a list parser = fun input ->
  match item input with
  | Error _ -> Ok ([], input)
  | Ok (first, rest) ->
    let rec go acc remaining =
      match sep remaining with
      | Error _ -> Ok (List.rev acc, remaining)
      | Ok (_, after_sep) ->
        match item after_sep with
        | Error _ -> Ok (List.rev acc, after_sep)  (* consume trailing sep *)
        | Ok (v, rest') -> go (v :: acc) rest'
    in
    go [first] rest

let digit_str = map (fun ds -> String.init (List.length ds) (List.nth ds))
  (many1 (satisfy (fun c -> c >= '0' && c <= '9') "digit"))

let comma : char parser = fun input ->
  match ws0 input with
  | Ok ((), r) ->
    if String.length r > 0 && r.[0] = ',' then
      match ws0 (String.sub r 1 (String.length r - 1)) with
      | Ok ((), r') -> Ok (',', r')
      | Error e -> Error e
    else Error "Expected ','"
  | Error e -> Error e

(* Tests *)
let () =
  assert (separated_list0 comma digit_str "1, 2, 3" = Ok (["1"; "2"; "3"], ""));
  assert (separated_list0 comma digit_str "" = Ok ([], ""));
  assert (separated_list0 comma digit_str "42" = Ok (["42"], ""));

  assert (separated_list1 comma digit_str "1, 2" = Ok (["1"; "2"], ""));
  assert (Result.is_error (separated_list1 comma digit_str ""));

  assert (separated_list_trailing comma digit_str "1, 2, " = Ok (["1"; "2"], ""));

  print_endline "โœ“ All tests passed"

๐Ÿ“Š Detailed Comparison

Comparison: Example 166 โ€” Separated List

separated_list0

OCaml:

๐Ÿช Show OCaml equivalent
let separated_list0 (sep : 'b parser) (item : 'a parser) : 'a list parser = fun input ->
match item input with
| Error _ -> Ok ([], input)
| Ok (first, rest) ->
 let rec go acc remaining =
   match sep remaining with
   | Error _ -> Ok (List.rev acc, remaining)
   | Ok (_, after_sep) ->
     match item after_sep with
     | Error _ -> Ok (List.rev acc, remaining)
     | Ok (v, rest') -> go (v :: acc) rest'
 in go [first] rest

Rust:

fn separated_list0<'a, T: 'a, S: 'a>(
 sep: Parser<'a, S>, item: Parser<'a, T>,
) -> Parser<'a, Vec<T>> {
 Box::new(move |input: &'a str| {
     let (first, mut remaining) = match item(input) {
         Err(_) => return Ok((vec![], input)),
         Ok(r) => r,
     };
     let mut results = vec![first];
     loop {
         let after_sep = match sep(remaining) {
             Err(_) => break,
             Ok((_, r)) => r,
         };
         match item(after_sep) {
             Ok((val, rest)) => { results.push(val); remaining = rest; }
             Err(_) => break,
         }
     }
     Ok((results, remaining))
 })
}