๐Ÿฆ€ Functional Rust

168: Expression Parser

Difficulty: 3 Level: Advanced Parse `1 + 2 * 3` correctly โ€” the Pratt parser, used in every real language implementation.

The Problem This Solves

Expression parsing looks simple until operator precedence enters the picture. `1 + 2 3` must parse as `1 + (2 3)` = 7, not `(1 + 2) * 3` = 9. `a ^ b ^ c` must parse as `a ^ (b ^ c)` (right-associative). Unary minus in `-x + y` must bind tightly. Naive recursive descent needs one grammar rule per precedence level: `expr โ†’ additive`, `additive โ†’ multiplicative +/- multiplicative`, `multiplicative โ†’ unary */รท unary`, and so on. That's a lot of boilerplate โ€” and adding a new operator means rewriting the grammar. The Pratt parser solves this elegantly with binding powers. Every operator gets a left binding power and a right binding power. The parser loops, consuming operators whose left binding power is strong enough to "pull in" what's already been parsed. It's compact, extensible, and handles any associativity naturally.

The Intuition

Give each operator a "stickiness" number. Higher stickiness = binds more tightly. `` has stickiness 30, `+` has stickiness 20. When parsing `1 + 2 3`: parse `1`, see `+` (stickiness 20), parse the right side โ€” but the right side sees `2 3` and `` is stickier than `+`, so it grabs both `2` and `3` first, giving `1 + (2 * 3)`. Right-associativity (`^`) is a trick: give the right binding power one less than the left. That makes the parser "prefer" to recurse right.

How It Works in Rust

#[derive(Debug)]
enum Expr {
 Num(f64),
 BinOp { op: char, left: Box<Expr>, right: Box<Expr> },
 Unary { op: char, operand: Box<Expr> },
}

// Returns (left_bp, right_bp) for infix operators
fn infix_binding_power(op: char) -> Option<(u8, u8)> {
 match op {
     '+' | '-' => Some((20, 21)),   // left-assoc: left < right
     '*' | '/' => Some((30, 31)),   // left-assoc, higher precedence
     '^'       => Some((40, 39)),   // right-assoc: right < left (!)
     _         => None,
 }
}

fn pratt_expr(input: &str, min_bp: u8) -> ParseResult<Expr> {
 let input = input.trim_start();

 // Parse prefix: unary minus or atom
 let (mut lhs, mut remaining) = if input.starts_with('-') {
     let (operand, rest) = pratt_expr(&input[1..], 50)?; // tight unary bind
     (Expr::Unary { op: '-', operand: Box::new(operand) }, rest)
 } else if input.starts_with('(') {
     let (inner, rest) = pratt_expr(&input[1..], 0)?;
     let rest = rest.trim_start().strip_prefix(')').ok_or("expected ')'")?;
     (inner, rest)
 } else {
     // parse number
     let (n, rest) = parse_number(input)?;
     (Expr::Num(n), rest)
 };

 // Infix loop: keep consuming operators that bind tightly enough
 loop {
     let rest = remaining.trim_start();
     let op = match rest.chars().next() {
         Some(c) => c,
         None => break,
     };
     let (left_bp, right_bp) = match infix_binding_power(op) {
         Some(bp) => bp,
         None => break,
     };
     if left_bp < min_bp { break; }  // operator doesn't bind tightly enough

     let (rhs, rest) = pratt_expr(&rest[op.len_utf8()..], right_bp)?;
     lhs = Expr::BinOp { op, left: Box::new(lhs), right: Box::new(rhs) };
     remaining = rest;
 }

 Ok((lhs, remaining))
}

What This Unlocks

Key Differences

ConceptOCamlRust
AST type`type expr = Num of float \BinOp of char expr expr``enum Expr { Num(f64), BinOp { op: char, ... } }`
Heap allocationAutomatic (GC)`Box::new(...)` required for recursive variants
Mutual recursion`let rec pratt_expr ... and pratt_loop ...`Two regular functions โ€” no `rec` needed
Binding powers`(int * int)` tuples`(u8, u8)` tuples โ€” same idea
// Example 168: Expression Parser
// Pratt parsing for expressions with precedence

type ParseResult<'a, T> = Result<(T, &'a str), String>;

#[derive(Debug, Clone, PartialEq)]
enum Expr {
    Num(f64),
    BinOp(String, Box<Expr>, Box<Expr>),
    UnaryMinus(Box<Expr>),
}

// ============================================================
// Approach 1: Pratt parser with binding power
// ============================================================

fn infix_binding_power(op: &str) -> Option<(u8, u8)> {
    match op {
        "+" | "-" => Some((5, 6)),   // left-associative
        "*" | "/" => Some((7, 8)),   // left-associative
        "^" => Some((10, 9)),        // right-associative
        _ => None,
    }
}

fn prefix_binding_power(op: &str) -> Option<u8> {
    match op {
        "-" => Some(9),
        _ => None,
    }
}

fn parse_number(input: &str) -> ParseResult<Expr> {
    let s = input.trim_start();
    let bytes = s.as_bytes();
    let mut pos = 0;
    if pos < bytes.len() && bytes[pos] == b'-' { pos += 1; }
    let start = pos;
    while pos < bytes.len() && bytes[pos].is_ascii_digit() { pos += 1; }
    if pos < bytes.len() && bytes[pos] == b'.' {
        pos += 1;
        while pos < bytes.len() && bytes[pos].is_ascii_digit() { pos += 1; }
    }
    if pos == start || (pos == 1 && bytes[0] == b'-') {
        return Err("Expected number".to_string());
    }
    let num: f64 = s[..pos].parse().map_err(|e: std::num::ParseFloatError| e.to_string())?;
    Ok((Expr::Num(num), &s[pos..]))
}

fn parse_op(input: &str) -> ParseResult<&str> {
    let s = input.trim_start();
    match s.chars().next() {
        Some(c @ ('+' | '-' | '*' | '/' | '^')) => {
            Ok((&s[..c.len_utf8()], &s[c.len_utf8()..]))
        }
        _ => Err("Expected operator".to_string()),
    }
}

fn pratt_expr(input: &str, min_bp: u8) -> ParseResult<Expr> {
    let s = input.trim_start();

    // Prefix: parentheses, unary minus, or number
    let (mut lhs, mut rest) = if s.starts_with('(') {
        let (expr, r) = pratt_expr(&s[1..], 0)?;
        let r = r.trim_start();
        if r.starts_with(')') {
            (expr, &r[1..])
        } else {
            return Err("Expected ')'".to_string());
        }
    } else if s.starts_with('-') {
        if let Some(rbp) = prefix_binding_power("-") {
            let (rhs, r) = pratt_expr(&s[1..], rbp)?;
            (Expr::UnaryMinus(Box::new(rhs)), r)
        } else {
            parse_number(s)?
        }
    } else {
        parse_number(s)?
    };

    // Infix loop
    loop {
        let op = match parse_op(rest) {
            Ok((op, _)) => op.to_string(),
            Err(_) => break,
        };
        let (lbp, rbp) = match infix_binding_power(&op) {
            Some(bp) => bp,
            None => break,
        };
        if lbp < min_bp { break; }
        let (_, after_op) = parse_op(rest)?;
        let (rhs, r) = pratt_expr(after_op, rbp)?;
        lhs = Expr::BinOp(op, Box::new(lhs), Box::new(rhs));
        rest = r;
    }

    Ok((lhs, rest))
}

// ============================================================
// Approach 2: Evaluate directly during parsing
// ============================================================

fn eval_expr(input: &str) -> ParseResult<f64> {
    fn eval_pratt(input: &str, min_bp: u8) -> ParseResult<f64> {
        let s = input.trim_start();
        let (mut lhs, mut rest) = if s.starts_with('(') {
            let (val, r) = eval_pratt(&s[1..], 0)?;
            let r = r.trim_start();
            if r.starts_with(')') { (val, &r[1..]) }
            else { return Err("Expected ')'".to_string()); }
        } else if s.starts_with('-') && !s[1..].trim_start().starts_with(|c: char| c == '+' || c == '-' || c == '*' || c == '/') {
            let (val, r) = eval_pratt(&s[1..], 9)?;
            (-val, r)
        } else {
            // parse number
            let bytes = s.as_bytes();
            let mut pos = 0;
            while pos < bytes.len() && (bytes[pos].is_ascii_digit() || bytes[pos] == b'.') { pos += 1; }
            if pos == 0 { return Err("Expected number".to_string()); }
            let n: f64 = s[..pos].parse().map_err(|e: std::num::ParseFloatError| e.to_string())?;
            (n, &s[pos..])
        };

        loop {
            let trimmed = rest.trim_start();
            let op = match trimmed.chars().next() {
                Some(c @ ('+' | '-' | '*' | '/' | '^')) => c,
                _ => break,
            };
            let op_str = &trimmed[..1];
            let (lbp, rbp) = match infix_binding_power(op_str) {
                Some(bp) => bp,
                None => break,
            };
            if lbp < min_bp { break; }
            let (rhs, r) = eval_pratt(&trimmed[1..], rbp)?;
            lhs = match op {
                '+' => lhs + rhs,
                '-' => lhs - rhs,
                '*' => lhs * rhs,
                '/' => lhs / rhs,
                '^' => lhs.powf(rhs),
                _ => unreachable!(),
            };
            rest = r;
        }
        Ok((lhs, rest))
    }
    eval_pratt(input, 0)
}

fn main() {
    println!("=== Pratt expression parser ===");
    println!("{:?}", pratt_expr("1 + 2", 0));
    println!("{:?}", pratt_expr("1 + 2 * 3", 0));       // 1 + (2*3)
    println!("{:?}", pratt_expr("(1 + 2) * 3", 0));     // (1+2) * 3
    println!("{:?}", pratt_expr("2 ^ 3 ^ 2", 0));       // 2^(3^2) right-assoc

    println!("\n=== Direct evaluation ===");
    println!("{:?}", eval_expr("1 + 2 * 3"));    // Ok((7.0, ""))
    println!("{:?}", eval_expr("(1 + 2) * 3"));  // Ok((9.0, ""))
    println!("{:?}", eval_expr("2 ^ 3 ^ 2"));    // Ok((512.0, ""))

    println!("\nโœ“ All examples completed");
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_simple_add() {
        let (expr, _) = pratt_expr("1 + 2", 0).unwrap();
        assert_eq!(expr, Expr::BinOp("+".into(), Box::new(Expr::Num(1.0)), Box::new(Expr::Num(2.0))));
    }

    #[test]
    fn test_precedence() {
        // 1 + 2 * 3 = 1 + (2*3)
        let (expr, _) = pratt_expr("1 + 2 * 3", 0).unwrap();
        match expr {
            Expr::BinOp(ref op, _, ref rhs) => {
                assert_eq!(op, "+");
                match rhs.as_ref() {
                    Expr::BinOp(ref op2, _, _) => assert_eq!(op2, "*"),
                    _ => panic!("Expected BinOp"),
                }
            }
            _ => panic!("Expected BinOp"),
        }
    }

    #[test]
    fn test_parens() {
        let (expr, _) = pratt_expr("(1 + 2) * 3", 0).unwrap();
        match expr {
            Expr::BinOp(ref op, ref lhs, _) => {
                assert_eq!(op, "*");
                match lhs.as_ref() {
                    Expr::BinOp(ref op2, _, _) => assert_eq!(op2, "+"),
                    _ => panic!("Expected BinOp"),
                }
            }
            _ => panic!("Expected BinOp"),
        }
    }

    #[test]
    fn test_right_assoc() {
        // 2 ^ 3 ^ 2 = 2 ^ (3 ^ 2) = 512
        let (val, _) = eval_expr("2 ^ 3 ^ 2").unwrap();
        assert!((val - 512.0).abs() < 1e-10);
    }

    #[test]
    fn test_eval_precedence() {
        let (val, _) = eval_expr("1 + 2 * 3").unwrap();
        assert!((val - 7.0).abs() < 1e-10);
    }

    #[test]
    fn test_eval_parens() {
        let (val, _) = eval_expr("(1 + 2) * 3").unwrap();
        assert!((val - 9.0).abs() < 1e-10);
    }

    #[test]
    fn test_unary_minus() {
        let (val, _) = eval_expr("-5").unwrap();
        assert!((val - (-5.0)).abs() < 1e-10);
    }
}
(* Example 168: Expression Parser *)
(* Expression parser with precedence (Pratt parsing technique) *)

type 'a parse_result = ('a * string, string) result
type 'a parser = string -> 'a parse_result

type expr =
  | Num of float
  | BinOp of string * expr * expr
  | UnaryMinus of expr

let ws0 input =
  let rec skip i = if i < String.length input &&
    (input.[i] = ' ' || input.[i] = '\t') then skip (i+1) else i in
  let i = skip 0 in String.sub input i (String.length input - i)

(* Parse a number *)
let parse_number input =
  let s = ws0 input in
  let len = String.length s in
  let pos = ref 0 in
  if !pos < len && s.[!pos] = '-' then incr pos;
  while !pos < len && s.[!pos] >= '0' && s.[!pos] <= '9' do incr pos done;
  if !pos < len && s.[!pos] = '.' then begin
    incr pos;
    while !pos < len && s.[!pos] >= '0' && s.[!pos] <= '9' do incr pos done end;
  if !pos = 0 || (!pos = 1 && s.[0] = '-') then Error "Expected number"
  else Ok (Num (float_of_string (String.sub s 0 !pos)),
           String.sub s !pos (len - !pos))

(* Approach 1: Pratt parser *)
let prefix_binding_power = function
  | "-" -> Some 9
  | _ -> None

let infix_binding_power = function
  | "+" | "-" -> Some (5, 6)
  | "*" | "/" -> Some (7, 8)
  | "^" -> Some (10, 9)  (* right-associative *)
  | _ -> None

let parse_op input =
  let s = ws0 input in
  if String.length s > 0 then
    let c = String.make 1 s.[0] in
    if c = "+" || c = "-" || c = "*" || c = "/" || c = "^" then
      Ok (c, String.sub s 1 (String.length s - 1))
    else Error "Expected operator"
  else Error "Expected operator"

let rec pratt_expr min_bp input =
  let s = ws0 input in
  (* Prefix: unary minus or atom *)
  let lhs_result =
    if String.length s > 0 && s.[0] = '(' then
      let inner = String.sub s 1 (String.length s - 1) in
      match pratt_expr 0 inner with
      | Ok (e, rest) ->
        let r = ws0 rest in
        if String.length r > 0 && r.[0] = ')' then
          Ok (e, String.sub r 1 (String.length r - 1))
        else Error "Expected ')'"
      | Error e -> Error e
    else if String.length s > 0 && s.[0] = '-' then
      match prefix_binding_power "-" with
      | Some rbp ->
        let rest = String.sub s 1 (String.length s - 1) in
        (match pratt_expr rbp rest with
         | Ok (rhs, rem) -> Ok (UnaryMinus rhs, rem)
         | Error e -> Error e)
      | None -> parse_number s
    else parse_number s
  in
  match lhs_result with
  | Error e -> Error e
  | Ok (lhs, rest) -> pratt_loop min_bp lhs rest

and pratt_loop min_bp lhs input =
  match parse_op input with
  | Error _ -> Ok (lhs, input)
  | Ok (op, _) ->
    match infix_binding_power op with
    | None -> Ok (lhs, input)
    | Some (lbp, rbp) ->
      if lbp < min_bp then Ok (lhs, input)
      else
        match parse_op input with
        | Ok (_, after_op) ->
          (match pratt_expr rbp after_op with
           | Ok (rhs, rem) -> pratt_loop min_bp (BinOp (op, lhs, rhs)) rem
           | Error e -> Error e)
        | Error e -> Error e

let parse_expr = pratt_expr 0

(* Tests *)
let () =
  (match parse_expr "1 + 2" with
   | Ok (BinOp ("+", Num 1., Num 2.), "") -> ()
   | _ -> failwith "Test 1");

  (match parse_expr "1 + 2 * 3" with
   | Ok (BinOp ("+", Num 1., BinOp ("*", Num 2., Num 3.)), "") -> ()
   | _ -> failwith "Test 2: precedence");

  (match parse_expr "(1 + 2) * 3" with
   | Ok (BinOp ("*", BinOp ("+", Num 1., Num 2.), Num 3.), "") -> ()
   | _ -> failwith "Test 3: parens");

  print_endline "โœ“ All tests passed"

๐Ÿ“Š Detailed Comparison

Comparison: Example 168 โ€” Expression Parser

Binding power tables

OCaml:

๐Ÿช Show OCaml equivalent
let infix_binding_power = function
| "+" | "-" -> Some (5, 6)
| "*" | "/" -> Some (7, 8)
| "^" -> Some (10, 9)  (* right-associative *)
| _ -> None

Rust:

fn infix_binding_power(op: &str) -> Option<(u8, u8)> {
 match op {
     "+" | "-" => Some((5, 6)),
     "*" | "/" => Some((7, 8)),
     "^" => Some((10, 9)),  // right-associative
     _ => None,
 }
}

Pratt loop

OCaml:

๐Ÿช Show OCaml equivalent
and pratt_loop min_bp lhs input =
match parse_op input with
| Error _ -> Ok (lhs, input)
| Ok (op, _) ->
 match infix_binding_power op with
 | Some (lbp, rbp) when lbp >= min_bp ->
   match parse_op input with
   | Ok (_, after_op) ->
     match pratt_expr rbp after_op with
     | Ok (rhs, rem) -> pratt_loop min_bp (BinOp (op, lhs, rhs)) rem

Rust:

loop {
 let op = match parse_op(rest) {
     Ok((op, _)) => op.to_string(),
     Err(_) => break,
 };
 let (lbp, rbp) = match infix_binding_power(&op) {
     Some(bp) => bp,
     None => break,
 };
 if lbp < min_bp { break; }
 let (_, after_op) = parse_op(rest)?;
 let (rhs, r) = pratt_expr(after_op, rbp)?;
 lhs = Expr::BinOp(op, Box::new(lhs), Box::new(rhs));
 rest = r;
}