๐Ÿฆ€ Functional Rust

167: Recursive Parser

Difficulty: 3 Level: Advanced Handle nesting โ€” `(1 + (2 * 3))` โ€” where a parser must call itself to parse its own contents.

The Problem This Solves

Some grammars are recursive by nature: a list can contain lists, an expression can contain parenthesized expressions, an S-expression can contain S-expressions. The grammar rule for a list is literally `list = "[" (item | list)* "]"`. The parser for `list` needs to call `list` itself. In OCaml, `let rec` handles this naturally โ€” you write `let rec parse_expr input = ... parse_expr ...` and it works. In Rust, closures can't reference themselves directly. A closure is a value stored in a variable; you can't capture a variable that holds the closure you're defining. This example shows three approaches to work around Rust's restriction: (1) regular named functions, which can call themselves; (2) `Rc<RefCell<Option<...>>>` for deferred initialization; (3) a `fix`-point combinator that encapsulates the pattern.

The Intuition

The simplest fix: use named functions instead of closures. Named functions in Rust can call themselves recursively โ€” the restriction only applies to closures. If your recursive parser doesn't need to capture variables from the environment, this is all you need.
// This works โ€” named function can call itself
fn parse_sexp(input: &str) -> ParseResult<Sexp> {
 if input.starts_with('(') {
     // ... parse list items by calling parse_sexp recursively
     let (item, rest) = parse_sexp(item_input)?;
 }
 // ...
}

How It Works in Rust

// Approach 1: Named recursive functions (simplest)
fn parse_list(input: &str) -> ParseResult<Vec<i64>> {
 let input = input.trim_start();
 let input = input.strip_prefix('[')
     .ok_or("expected '['")?;
 let mut items = Vec::new();
 let mut remaining = input.trim_start();
 while !remaining.starts_with(']') {
     if remaining.is_empty() {
         return Err("unexpected end of input".to_string());
     }
     // Recursive call to parse_value โ€” which may call parse_list again
     let (val, rest) = parse_value(remaining)?;
     items.push(val);
     remaining = rest.trim_start();
     remaining = remaining.strip_prefix(',').unwrap_or(remaining).trim_start();
 }
 Ok((items, &remaining[1..]))
}

// Approach 2: Rc for recursive closures
use std::rc::Rc;
use std::cell::RefCell;

fn make_recursive_parser() -> impl Fn(&str) -> ParseResult<i64> {
 // Forward declaration โ€” initialized after we build the closure
 let parser_ref: Rc<RefCell<Option<Box<dyn Fn(&str) -> ParseResult<i64>>>>> =
     Rc::new(RefCell::new(None));

 let parser_ref_clone = Rc::clone(&parser_ref);
 let parser = move |input: &str| {
     // Borrow the inner parser through the Rc
     let inner = parser_ref_clone.borrow();
     let p = inner.as_ref().unwrap();
     p(input)  // call the real implementation
 };

 // Now set the actual implementation
 *parser_ref.borrow_mut() = Some(Box::new(/* real parser */));
 parser
}

What This Unlocks

Key Differences

ConceptOCamlRust
Recursive functions`let rec parse input = ... parse ...`Named functions can self-call; closures cannot
Forward declaration`let p = ref (fun _ -> failwith "unset")``Rc<RefCell<Option<Box<dyn Fn(...)>>>>`
Fix-point combinator`let rec fix f x = f (fix f) x``rc_fix` wraps with `Rc` to break the self-reference
Mutual recursion`let rec a ... and b ...`Two named functions calling each other โ€” works fine
// Example 167: Recursive Parser
// Recursive parsers using function pointers and Rc for recursive grammars

use std::rc::Rc;

type ParseResult<'a, T> = Result<(T, &'a str), String>;

// Recursive data type
#[derive(Debug, Clone, PartialEq)]
enum Sexp {
    Atom(String),
    List(Vec<Sexp>),
}

// ============================================================
// Approach 1: Direct recursive functions (simplest)
// ============================================================

fn parse_sexp(input: &str) -> ParseResult<Sexp> {
    // Try atom first
    if let Some(c) = input.chars().next() {
        if c.is_ascii_lowercase() {
            let end = input.chars().take_while(|c| c.is_ascii_alphanumeric()).count();
            let byte_end: usize = input.chars().take(end).map(|c| c.len_utf8()).sum();
            return Ok((Sexp::Atom(input[..byte_end].to_string()), &input[byte_end..]));
        }
    }
    // Try list
    parse_sexp_list(input)
}

fn parse_sexp_list(input: &str) -> ParseResult<Sexp> {
    if !input.starts_with('(') {
        return Err("Expected '('".to_string());
    }
    let mut remaining = &input[1..];
    let mut items = Vec::new();
    loop {
        remaining = remaining.trim_start();
        if remaining.starts_with(')') {
            return Ok((Sexp::List(items), &remaining[1..]));
        }
        let (item, rest) = parse_sexp(remaining)?;
        items.push(item);
        remaining = rest;
    }
}

// ============================================================
// Approach 2: Rc-based recursive parser type
// ============================================================

type RcParser<'a, T> = Rc<dyn Fn(&'a str) -> ParseResult<'a, T> + 'a>;

fn rc_fix<'a, T: 'a>(
    f: impl Fn(RcParser<'a, T>) -> RcParser<'a, T> + 'a,
) -> RcParser<'a, T> {
    use std::cell::RefCell;
    let parser: Rc<RefCell<Option<RcParser<'a, T>>>> = Rc::new(RefCell::new(None));
    let parser_clone = parser.clone();
    let lazy: RcParser<'a, T> = Rc::new(move |input: &'a str| {
        let p = parser_clone.borrow();
        (p.as_ref().unwrap())(input)
    });
    let actual = f(lazy);
    *parser.borrow_mut() = Some(actual.clone());
    actual
}

// ============================================================
// Approach 3: Nested parentheses counter using fix
// ============================================================

fn nested_parens(input: &str) -> ParseResult<usize> {
    if input.starts_with('(') {
        let (depth, rest) = nested_parens(&input[1..])?;
        if rest.starts_with(')') {
            Ok((depth + 1, &rest[1..]))
        } else {
            Err("Expected ')'".to_string())
        }
    } else {
        Ok((0, input)) // base case
    }
}

fn main() {
    println!("=== parse_sexp ===");
    println!("{:?}", parse_sexp("hello"));         // Ok((Atom("hello"), ""))
    println!("{:?}", parse_sexp("(a b c)"));       // Ok((List([Atom("a"), ...]), ""))
    println!("{:?}", parse_sexp("(a (b c))"));     // nested

    println!("\n=== Rc-based recursive parser ===");
    let sexp_parser: RcParser<Sexp> = rc_fix(|self_parser: RcParser<Sexp>| {
        Rc::new(move |input: &str| {
            if let Some(c) = input.chars().next() {
                if c.is_ascii_lowercase() {
                    let end: usize = input.chars()
                        .take_while(|c| c.is_ascii_alphanumeric())
                        .map(|c| c.len_utf8()).sum();
                    return Ok((Sexp::Atom(input[..end].to_string()), &input[end..]));
                }
            }
            if !input.starts_with('(') { return Err("Expected atom or list".to_string()); }
            let mut rem = &input[1..];
            let mut items = Vec::new();
            loop {
                rem = rem.trim_start();
                if rem.starts_with(')') { return Ok((Sexp::List(items), &rem[1..])); }
                let (item, rest) = self_parser(rem)?;
                items.push(item);
                rem = rest;
            }
        })
    });
    println!("{:?}", sexp_parser("(a (b c))"));

    println!("\n=== nested_parens ===");
    println!("{:?}", nested_parens("((()))"));  // Ok((3, ""))
    println!("{:?}", nested_parens("()"));      // Ok((1, ""))
    println!("{:?}", nested_parens(""));        // Ok((0, ""))

    println!("\nโœ“ All examples completed");
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_atom() {
        assert_eq!(parse_sexp("hello"), Ok((Sexp::Atom("hello".into()), "")));
    }

    #[test]
    fn test_flat_list() {
        let (sexp, rest) = parse_sexp("(a b c)").unwrap();
        assert_eq!(rest, "");
        assert_eq!(sexp, Sexp::List(vec![
            Sexp::Atom("a".into()),
            Sexp::Atom("b".into()),
            Sexp::Atom("c".into()),
        ]));
    }

    #[test]
    fn test_nested_list() {
        let (sexp, _) = parse_sexp("(a (b c))").unwrap();
        assert_eq!(sexp, Sexp::List(vec![
            Sexp::Atom("a".into()),
            Sexp::List(vec![Sexp::Atom("b".into()), Sexp::Atom("c".into())]),
        ]));
    }

    #[test]
    fn test_nested_parens_3() {
        assert_eq!(nested_parens("((()))"), Ok((3, "")));
    }

    #[test]
    fn test_nested_parens_1() {
        assert_eq!(nested_parens("()"), Ok((1, "")));
    }

    #[test]
    fn test_nested_parens_0() {
        assert_eq!(nested_parens(""), Ok((0, "")));
    }

    #[test]
    fn test_empty_list() {
        assert_eq!(parse_sexp("()"), Ok((Sexp::List(vec![]), "")));
    }

    #[test]
    fn test_rc_parser() {
        let p: RcParser<Sexp> = rc_fix(|self_p: RcParser<Sexp>| {
            Rc::new(move |input: &str| {
                if let Some(c) = input.chars().next() {
                    if c.is_ascii_lowercase() {
                        let end: usize = input.chars()
                            .take_while(|c| c.is_ascii_alphanumeric())
                            .map(|c| c.len_utf8()).sum();
                        return Ok((Sexp::Atom(input[..end].to_string()), &input[end..]));
                    }
                }
                if !input.starts_with('(') { return Err("Expected".into()); }
                let mut rem = &input[1..];
                let mut items = Vec::new();
                loop {
                    rem = rem.trim_start();
                    if rem.starts_with(')') { return Ok((Sexp::List(items), &rem[1..])); }
                    let (item, rest) = self_p(rem)?;
                    items.push(item);
                    rem = rest;
                }
            })
        });
        assert_eq!(p("(a b)"), Ok((Sexp::List(vec![
            Sexp::Atom("a".into()), Sexp::Atom("b".into())
        ]), "")));
    }
}
(* Example 167: Recursive Parser *)
(* Recursive parsers for recursive grammars *)

type 'a parse_result = ('a * string, string) result
type 'a parser = string -> 'a parse_result

let satisfy pred desc : char parser = fun input ->
  if String.length input > 0 && pred input.[0] then
    Ok (input.[0], String.sub input 1 (String.length input - 1))
  else Error (Printf.sprintf "Expected %s" desc)

let tag expected : string parser = fun input ->
  let len = String.length expected in
  if String.length input >= len && String.sub input 0 len = expected then
    Ok (expected, String.sub input len (String.length input - len))
  else Error (Printf.sprintf "Expected \"%s\"" expected)

let many0 p : 'a list parser = fun input ->
  let rec go acc r = match p r with Ok (v, r') -> go (v::acc) r' | Error _ -> Ok (List.rev acc, r)
  in go [] input

let ws0 : unit parser = fun input ->
  let rec skip i = if i < String.length input &&
    (input.[i] = ' ' || input.[i] = '\t' || input.[i] = '\n') then skip (i+1) else i in
  let i = skip 0 in Ok ((), String.sub input i (String.length input - i))

(* Recursive data type: nested lists *)
type sexp = Atom of string | List of sexp list

(* Approach 1: Direct mutual recursion (OCaml makes this easy) *)
let rec parse_sexp input =
  match satisfy (fun c -> c >= 'a' && c <= 'z') "letter" input with
  | Ok (c, rest) ->
    let rec go acc r =
      match satisfy (fun c -> c >= 'a' && c <= 'z' || c >= '0' && c <= '9') "alnum" r with
      | Ok (c, r') -> go (acc ^ String.make 1 c) r'
      | Error _ -> Ok (Atom acc, r) in
    go (String.make 1 c) rest
  | Error _ -> parse_sexp_list input

and parse_sexp_list input =
  match tag "(" input with
  | Error e -> Error e
  | Ok (_, rest) ->
    let rec go acc remaining =
      match ws0 remaining with
      | Ok ((), r) ->
        (match tag ")" r with
         | Ok (_, r') -> Ok (List (List.rev acc), r')
         | Error _ ->
           match parse_sexp r with
           | Ok (v, r') -> go (v :: acc) r'
           | Error e -> Error e)
      | Error e -> Error e
    in
    go [] rest

(* Approach 2: Using a ref cell for forward declaration *)
let make_recursive () =
  let p = ref (fun _ -> Error "uninitialized" : sexp parse_result) in
  let parser : sexp parser = fun input -> !p input in
  let set_parser (actual : sexp parser) = p := actual in
  (parser, set_parser)

(* Approach 3: Fix-point combinator *)
let fix (f : 'a parser -> 'a parser) : 'a parser =
  let rec p input = (f p) input in
  p

let nested_parens : int parser =
  fix (fun self -> fun input ->
    match tag "(" input with
    | Ok (_, rest) ->
      (match self rest with
       | Ok (depth, r2) ->
         (match tag ")" r2 with
          | Ok (_, r3) -> Ok (depth + 1, r3)
          | Error e -> Error e)
       | Error e -> Error e)
    | Error _ -> Ok (0, input)  (* base case *)
  )

(* Tests *)
let () =
  (match parse_sexp "hello" with
   | Ok (Atom "hello", "") -> ()
   | _ -> failwith "Test 1");
  (match parse_sexp "(a b c)" with
   | Ok (List [Atom "a"; Atom "b"; Atom "c"], "") -> ()
   | _ -> failwith "Test 2");
  (match parse_sexp "(a (b c))" with
   | Ok (List [Atom "a"; List [Atom "b"; Atom "c"]], "") -> ()
   | _ -> failwith "Test 3");

  assert (nested_parens "((()))" = Ok (3, ""));
  assert (nested_parens "()" = Ok (1, ""));
  assert (nested_parens "" = Ok (0, ""));

  print_endline "โœ“ All tests passed"

๐Ÿ“Š Detailed Comparison

Comparison: Example 167 โ€” Recursive Parser

Direct recursion

OCaml:

๐Ÿช Show OCaml equivalent
let rec parse_sexp input =
match satisfy (fun c -> c >= 'a' && c <= 'z') "letter" input with
| Ok (c, rest) -> (* parse atom continuation *)
| Error _ -> parse_sexp_list input

and parse_sexp_list input =
match tag "(" input with
| Ok (_, rest) -> (* parse list items using parse_sexp *)

Rust:

fn parse_sexp(input: &str) -> ParseResult<Sexp> {
 if let Some(c) = input.chars().next() {
     if c.is_ascii_lowercase() { /* parse atom */ }
 }
 parse_sexp_list(input)
}

fn parse_sexp_list(input: &str) -> ParseResult<Sexp> {
 if !input.starts_with('(') { return Err(...); }
 // parse items using parse_sexp
}

Fix-point combinator

OCaml:

๐Ÿช Show OCaml equivalent
let fix (f : 'a parser -> 'a parser) : 'a parser =
let rec p input = (f p) input in p

Rust:

fn rc_fix<'a, T: 'a>(
 f: impl Fn(RcParser<'a, T>) -> RcParser<'a, T> + 'a,
) -> RcParser<'a, T> {
 let parser: Rc<RefCell<Option<RcParser<'a, T>>>> = Rc::new(RefCell::new(None));
 let parser_clone = parser.clone();
 let lazy: RcParser<'a, T> = Rc::new(move |input| {
     (parser_clone.borrow().as_ref().unwrap())(input)
 });
 let actual = f(lazy);
 *parser.borrow_mut() = Some(actual.clone());
 actual
}