๐Ÿฆ€ Functional Rust

173: Lisp / S-expression Parser

Difficulty: 3 Level: Advanced Parse the simplest recursive syntax ever invented โ€” and implement `'x` sugar and round-trip printing.

The Problem This Solves

S-expressions are the syntax of Lisp: `(+ 1 (* 2 3))`, `(define x 42)`, `(if (> x 0) "positive" "non-positive")`. They're also used in configuration languages (Emacs Lisp), build tools (Clojure/Leiningen), and as a serialization format. What makes S-expressions interesting to parse is that they're recursive by design. A list contains values, and values can be lists. The grammar is minimal โ€” only a handful of token types โ€” but the nesting can be arbitrarily deep. You need a recursive parser (as shown in example 167) plus atom classification (is `42` a number or a symbol?). This example also shows syntactic sugar: Lisp's `'x` notation is shorthand for `(quote x)`. The parser expands it during parsing โ€” a technique used in every Lisp, and in many other languages for similar shortcuts. Finally, a `Display` impl lets you print parsed values back to valid Lisp syntax.

The Intuition

Parse one character at a time to decide what you're looking at: `(` โ†’ start a list, `"` โ†’ start a string, `'` โ†’ quote sugar, digits/sign โ†’ number, anything else โ†’ atom. For the list case, call `parse_sexp` recursively until you see `)`.
input: "(+ 1 (* 2 3))"
see '(' โ†’ start list
atom: "+"
number: 1
see '(' โ†’ start list (recursive!)
 atom: "*"
 number: 2
 number: 3
see ')' โ†’ end inner list: (* 2 3)
see ')' โ†’ end outer list: (+ 1 (* 2 3))

How It Works in Rust

#[derive(Debug, Clone)]
enum Sexp {
 Atom(String),
 Number(f64),
 Str(String),
 Bool(bool),
 Nil,
 List(Vec<Sexp>),
}

fn parse_sexp(input: &str) -> ParseResult<Sexp> {
 let input = input.trim_start();
 match input.chars().next() {
     Some('(') => parse_list(&input[1..]),
     Some('"') => parse_string(&input[1..]),
     Some('\'') => {
         // Quote sugar: 'x โ†’ (quote x)
         let (inner, rest) = parse_sexp(&input[1..])?;
         Ok((Sexp::List(vec![Sexp::Atom("quote".into()), inner]), rest))
     }
     Some('#') => {
         // Booleans: #t and #f
         if input.starts_with("#t") { Ok((Sexp::Bool(true),  &input[2..])) }
         else if input.starts_with("#f") { Ok((Sexp::Bool(false), &input[2..])) }
         else { Err("unknown # literal".to_string()) }
     }
     Some(_) => {
         // Atom or number โ€” find the boundary
         let end = input.find(|c: char| c.is_whitespace() || "()\"'".contains(c))
             .unwrap_or(input.len());
         let token = &input[..end];
         let rest = &input[end..];
         if token == "nil" {
             Ok((Sexp::Nil, rest))
         } else if let Ok(n) = token.parse::<f64>() {
             Ok((Sexp::Number(n), rest))
         } else {
             Ok((Sexp::Atom(token.to_string()), rest))
         }
     }
     None => Err("unexpected end of input".to_string()),
 }
}

fn parse_list(input: &str) -> ParseResult<Sexp> {
 let mut items = Vec::new();
 let mut remaining = input;
 loop {
     remaining = remaining.trim_start();
     if remaining.starts_with(')') {
         return Ok((Sexp::List(items), &remaining[1..]));
     }
     let (item, rest) = parse_sexp(remaining)?;
     items.push(item);
     remaining = rest;
 }
}

// Display: print back to valid Lisp syntax
impl std::fmt::Display for Sexp {
 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
     match self {
         Sexp::Atom(s)   => write!(f, "{}", s),
         Sexp::Number(n) => write!(f, "{}", n),
         Sexp::Str(s)    => write!(f, "\"{}\"", s.replace('"', "\\\"")),
         Sexp::Bool(b)   => write!(f, "{}", if *b { "#t" } else { "#f" }),
         Sexp::Nil       => write!(f, "nil"),
         Sexp::List(xs)  => {
             write!(f, "(")?;
             for (i, x) in xs.iter().enumerate() {
                 if i > 0 { write!(f, " ")?; }
                 write!(f, "{}", x)?;
             }
             write!(f, ")")
         }
     }
 }
}

What This Unlocks

Key Differences

ConceptOCamlRust
Variant type`type sexp = Atom of string \Number of float \List of sexp list``enum Sexp { Atom(String), Number(f64), List(Vec<Sexp>) }`
Round-trip display`let rec sexp_to_string = function ...``impl Display for Sexp`
Atom classification`try float_of_string s with _ โ†’ Atom s``s.parse::<f64>().map_or(Atom, Number)`
Quote sugarPattern on `'\''` char`input.starts_with('\'')`
// Example 173: Lisp / S-expression Parser
// S-expressions: atoms, numbers, strings, lists, quote

type ParseResult<'a, T> = Result<(T, &'a str), String>;

#[derive(Debug, Clone, PartialEq)]
enum Sexp {
    Atom(String),
    Number(f64),
    Str(String),
    Bool(bool),
    Nil,
    List(Vec<Sexp>),
}

impl std::fmt::Display for Sexp {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        match self {
            Sexp::Atom(s) => write!(f, "{}", s),
            Sexp::Number(n) => write!(f, "{}", n),
            Sexp::Str(s) => write!(f, "\"{}\"", s),
            Sexp::Bool(true) => write!(f, "#t"),
            Sexp::Bool(false) => write!(f, "#f"),
            Sexp::Nil => write!(f, "nil"),
            Sexp::List(items) => {
                write!(f, "(")?;
                for (i, item) in items.iter().enumerate() {
                    if i > 0 { write!(f, " ")?; }
                    write!(f, "{}", item)?;
                }
                write!(f, ")")
            }
        }
    }
}

fn is_atom_char(c: char) -> bool {
    c.is_ascii_alphanumeric() || "_-+*/?!.#".contains(c)
}

// ============================================================
// Approach 1: Parse atom / number / boolean / nil
// ============================================================

fn parse_atom(input: &str) -> ParseResult<Sexp> {
    let s = input.trim_start();
    let end = s.find(|c: char| !is_atom_char(c)).unwrap_or(s.len());
    if end == 0 {
        return Err("Expected atom".to_string());
    }
    let token = &s[..end];
    let rest = &s[end..];
    let sexp = match token {
        "nil" => Sexp::Nil,
        "#t" | "true" => Sexp::Bool(true),
        "#f" | "false" => Sexp::Bool(false),
        _ => match token.parse::<f64>() {
            Ok(n) => Sexp::Number(n),
            Err(_) => Sexp::Atom(token.to_string()),
        },
    };
    Ok((sexp, rest))
}

// ============================================================
// Approach 2: Parse string with escape sequences
// ============================================================

fn parse_string(input: &str) -> ParseResult<Sexp> {
    let s = input.trim_start();
    if !s.starts_with('"') {
        return Err("Expected string".to_string());
    }
    let mut result = String::new();
    let mut chars = s[1..].chars();
    let mut consumed = 1;
    loop {
        match chars.next() {
            None => return Err("Unterminated string".to_string()),
            Some('"') => {
                consumed += 1;
                return Ok((Sexp::Str(result), &s[consumed..]));
            }
            Some('\\') => {
                consumed += 1;
                match chars.next() {
                    Some('n') => { result.push('\n'); consumed += 1; }
                    Some('t') => { result.push('\t'); consumed += 1; }
                    Some('"') => { result.push('"'); consumed += 1; }
                    Some('\\') => { result.push('\\'); consumed += 1; }
                    Some(c) => { result.push('\\'); result.push(c); consumed += c.len_utf8(); }
                    None => return Err("Unexpected EOF in escape".to_string()),
                }
            }
            Some(c) => {
                result.push(c);
                consumed += c.len_utf8();
            }
        }
    }
}

// ============================================================
// Approach 3: Full S-expression parser
// ============================================================

fn parse_sexp(input: &str) -> ParseResult<Sexp> {
    let s = input.trim_start();
    if s.is_empty() {
        return Err("Unexpected EOF".to_string());
    }
    match s.chars().next().unwrap() {
        '(' => parse_list(s),
        '\'' => {
            let (val, rest) = parse_sexp(&s[1..])?;
            Ok((Sexp::List(vec![Sexp::Atom("quote".into()), val]), rest))
        }
        '"' => parse_string(s),
        _ => parse_atom(s),
    }
}

fn parse_list(input: &str) -> ParseResult<Sexp> {
    let mut remaining = &input[1..]; // skip '('
    let mut items = Vec::new();
    loop {
        remaining = remaining.trim_start();
        if remaining.is_empty() {
            return Err("Unterminated list".to_string());
        }
        if remaining.starts_with(')') {
            return Ok((Sexp::List(items), &remaining[1..]));
        }
        let (item, rest) = parse_sexp(remaining)?;
        items.push(item);
        remaining = rest;
    }
}

fn main() {
    let examples = vec![
        "hello", "42", "\"hi\"", "nil", "#t",
        "(+ 1 2)", "(define (square x) (* x x))", "'hello",
    ];
    for ex in examples {
        match parse_sexp(ex) {
            Ok((sexp, _)) => println!("{:?} โ†’ {}", ex, sexp),
            Err(e) => println!("{:?} โ†’ Error: {}", ex, e),
        }
    }
    println!("\nโœ“ All examples completed");
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_atom() {
        assert_eq!(parse_sexp("hello"), Ok((Sexp::Atom("hello".into()), "")));
    }

    #[test]
    fn test_number() {
        assert_eq!(parse_sexp("42"), Ok((Sexp::Number(42.0), "")));
    }

    #[test]
    fn test_float() {
        assert_eq!(parse_sexp("3.14"), Ok((Sexp::Number(3.14), "")));
    }

    #[test]
    fn test_string() {
        assert_eq!(parse_sexp("\"hi\""), Ok((Sexp::Str("hi".into()), "")));
    }

    #[test]
    fn test_string_escape() {
        let (sexp, _) = parse_sexp("\"hello\\nworld\"").unwrap();
        assert_eq!(sexp, Sexp::Str("hello\nworld".into()));
    }

    #[test]
    fn test_nil() {
        assert_eq!(parse_sexp("nil"), Ok((Sexp::Nil, "")));
    }

    #[test]
    fn test_bool() {
        assert_eq!(parse_sexp("#t"), Ok((Sexp::Bool(true), "")));
        assert_eq!(parse_sexp("#f"), Ok((Sexp::Bool(false), "")));
    }

    #[test]
    fn test_list() {
        let (sexp, _) = parse_sexp("(+ 1 2)").unwrap();
        assert_eq!(sexp, Sexp::List(vec![
            Sexp::Atom("+".into()), Sexp::Number(1.0), Sexp::Number(2.0),
        ]));
    }

    #[test]
    fn test_nested() {
        let (sexp, _) = parse_sexp("(define (f x) (* x x))").unwrap();
        match sexp {
            Sexp::List(items) => {
                assert_eq!(items.len(), 3);
                assert_eq!(items[0], Sexp::Atom("define".into()));
            }
            _ => panic!(),
        }
    }

    #[test]
    fn test_quote() {
        let (sexp, _) = parse_sexp("'hello").unwrap();
        assert_eq!(sexp, Sexp::List(vec![Sexp::Atom("quote".into()), Sexp::Atom("hello".into())]));
    }

    #[test]
    fn test_empty_list() {
        assert_eq!(parse_sexp("()"), Ok((Sexp::List(vec![]), "")));
    }

    #[test]
    fn test_display() {
        let (sexp, _) = parse_sexp("(+ 1 2)").unwrap();
        assert_eq!(format!("{}", sexp), "(+ 1 2)");
    }
}
(* Example 173: Lisp / S-expression Parser *)
(* S-expressions: atoms, lists, nested structures *)

type 'a parse_result = ('a * string, string) result
type 'a parser = string -> 'a parse_result

type sexp =
  | Atom of string
  | Number of float
  | Str of string
  | List of sexp list
  | Bool of bool
  | Nil

let ws0 input =
  let rec skip i = if i < String.length input &&
    (input.[i] = ' ' || input.[i] = '\t' || input.[i] = '\n' || input.[i] = '\r') then skip (i+1) else i in
  let i = skip 0 in String.sub input i (String.length input - i)

(* Approach 1: Parse atom (symbol) *)
let is_atom_char c =
  (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
  (c >= '0' && c <= '9') || c = '_' || c = '-' || c = '+' ||
  c = '*' || c = '/' || c = '?' || c = '!' || c = '.'

let parse_atom input =
  let s = ws0 input in
  let len = String.length s in
  let pos = ref 0 in
  while !pos < len && is_atom_char s.[!pos] do incr pos done;
  if !pos = 0 then Error "Expected atom"
  else
    let token = String.sub s 0 !pos in
    let rest = String.sub s !pos (len - !pos) in
    (* Classify *)
    if token = "nil" then Ok (Nil, rest)
    else if token = "#t" || token = "true" then Ok (Bool true, rest)
    else if token = "#f" || token = "false" then Ok (Bool false, rest)
    else
      try Ok (Number (float_of_string token), rest)
      with _ -> Ok (Atom token, rest)

(* Approach 2: Parse string literal *)
let parse_string input =
  let s = ws0 input in
  if String.length s = 0 || s.[0] <> '"' then Error "Expected string"
  else
    let buf = Buffer.create 32 in
    let rec go i =
      if i >= String.length s then Error "Unterminated string"
      else if s.[i] = '"' then
        Ok (Str (Buffer.contents buf), String.sub s (i+1) (String.length s - i - 1))
      else if s.[i] = '\\' && i + 1 < String.length s then begin
        (match s.[i+1] with
         | 'n' -> Buffer.add_char buf '\n'
         | 't' -> Buffer.add_char buf '\t'
         | '"' -> Buffer.add_char buf '"'
         | '\\' -> Buffer.add_char buf '\\'
         | c -> Buffer.add_char buf '\\'; Buffer.add_char buf c);
        go (i + 2)
      end else begin
        Buffer.add_char buf s.[i]; go (i + 1)
      end
    in go 1

(* Approach 3: Full S-expression parser *)
let rec parse_sexp input =
  let s = ws0 input in
  if String.length s = 0 then Error "Unexpected EOF"
  else if s.[0] = '(' then parse_list s
  else if s.[0] = '\'' then  (* quote *)
    match parse_sexp (String.sub s 1 (String.length s - 1)) with
    | Ok (v, rest) -> Ok (List [Atom "quote"; v], rest)
    | Error e -> Error e
  else if s.[0] = '"' then parse_string s
  else parse_atom s

and parse_list input =
  let rest = String.sub input 1 (String.length input - 1) in
  let rec go acc remaining =
    let remaining = ws0 remaining in
    if String.length remaining = 0 then Error "Unterminated list"
    else if remaining.[0] = ')' then
      Ok (List (List.rev acc), String.sub remaining 1 (String.length remaining - 1))
    else
      match parse_sexp remaining with
      | Ok (v, rest) -> go (v :: acc) rest
      | Error e -> Error e
  in go [] rest

let rec sexp_to_string = function
  | Atom s -> s
  | Number n -> string_of_float n
  | Str s -> Printf.sprintf "\"%s\"" s
  | Bool true -> "#t"
  | Bool false -> "#f"
  | Nil -> "nil"
  | List items -> "(" ^ String.concat " " (List.map sexp_to_string items) ^ ")"

(* Tests *)
let () =
  assert (parse_sexp "hello" = Ok (Atom "hello", ""));
  assert (parse_sexp "42" = Ok (Number 42., ""));
  assert (parse_sexp "\"hi\"" = Ok (Str "hi", ""));
  assert (parse_sexp "nil" = Ok (Nil, ""));
  assert (parse_sexp "#t" = Ok (Bool true, ""));

  (match parse_sexp "(+ 1 2)" with
   | Ok (List [Atom "+"; Number 1.; Number 2.], "") -> ()
   | _ -> failwith "List test");

  (match parse_sexp "(define (square x) (* x x))" with
   | Ok (List [Atom "define"; List [Atom "square"; Atom "x"];
               List [Atom "*"; Atom "x"; Atom "x"]], "") -> ()
   | _ -> failwith "Nested test");

  (match parse_sexp "'hello" with
   | Ok (List [Atom "quote"; Atom "hello"], "") -> ()
   | _ -> failwith "Quote test");

  print_endline "โœ“ All tests passed"

๐Ÿ“Š Detailed Comparison

Comparison: Example 173 โ€” Lisp Parser

Data type

OCaml:

๐Ÿช Show OCaml equivalent
type sexp =
| Atom of string
| Number of float
| Str of string
| List of sexp list
| Bool of bool
| Nil

Rust:

#[derive(Debug, Clone, PartialEq)]
enum Sexp {
 Atom(String),
 Number(f64),
 Str(String),
 Bool(bool),
 Nil,
 List(Vec<Sexp>),
}

Main dispatch

OCaml:

๐Ÿช Show OCaml equivalent
let rec parse_sexp input =
let s = ws0 input in
if s.[0] = '(' then parse_list s
else if s.[0] = '\'' then
 match parse_sexp (String.sub s 1 ...) with
 | Ok (v, rest) -> Ok (List [Atom "quote"; v], rest)
else if s.[0] = '"' then parse_string s
else parse_atom s

Rust:

fn parse_sexp(input: &str) -> ParseResult<Sexp> {
 let s = input.trim_start();
 match s.chars().next().unwrap() {
     '(' => parse_list(s),
     '\'' => {
         let (val, rest) = parse_sexp(&s[1..])?;
         Ok((Sexp::List(vec![Sexp::Atom("quote".into()), val]), rest))
     }
     '"' => parse_string(s),
     _ => parse_atom(s),
 }
}