๐Ÿฆ€ Functional Rust

171: CSV Parser

Difficulty: 3 Level: Advanced A complete CSV parser โ€” quoted fields, escaped quotes, embedded newlines โ€” built from the combinators you've learned.

The Problem This Solves

CSV looks trivial until you try to parse it correctly. `Alice,30,Engineer` is easy. But what about `"Smith, Jr.",42,"Software Engineer"` โ€” commas inside quoted fields? Or `"line one\nline two"` โ€” newlines inside quoted fields? Or `"He said ""hello"""` โ€” escaped quotes encoded as `""`? The RFC 4180 spec for CSV handles all of these with one quoting rule: fields wrapped in double quotes can contain anything; two consecutive double quotes inside a quoted field mean one literal double quote. Building this correctly from scratch requires careful state management and proves that parser combinators scale to real-world formats. This is also the payoff example: you wrote `tag`, `take_while`, `separated_list`, and `many0` in earlier examples. Now you assemble them into a complete, working parser for a format used in millions of data pipelines.

The Intuition

A CSV file is `rows`, where each row is `fields separated by commas`, where each field is either `unquoted` (no special chars) or `quoted` (wrapped in `"`, with `""` escaping). Parse bottom-up: field โ†’ row โ†’ file.
"Alice","Smith, Jr.",42
^quoted  ^quoted, has comma  ^unquoted

How It Works in Rust

// Unquoted field: everything up to the next comma or newline
fn parse_unquoted_field(input: &str) -> ParseResult<String> {
 let end = input.find(|c| c == ',' || c == '\n' || c == '\r')
     .unwrap_or(input.len());
 Ok((input[..end].to_string(), &input[end..]))
}

// Quoted field: "..." with "" meaning a literal quote inside
fn parse_quoted_field(input: &str) -> ParseResult<String> {
 let input = input.strip_prefix('"')
     .ok_or("expected '\"'")?;
 let mut result = String::new();
 let mut chars = input.char_indices();

 loop {
     match chars.next() {
         None => return Err("unterminated quoted field".to_string()),
         Some((_, '"')) => {
             // Peek: is the next char also '"'? That's an escaped quote.
             match chars.clone().next() {
                 Some((_, '"')) => {
                     chars.next(); // consume the second quote
                     result.push('"');
                 }
                 _ => {
                     // Closing quote โ€” find current position in input
                     let pos = /* offset after closing quote */;
                     return Ok((result, &input[pos..]));
                 }
             }
         }
         Some((_, ch)) => result.push(ch),
     }
 }
}

// One field: try quoted first, fall back to unquoted
fn parse_field(input: &str) -> ParseResult<String> {
 if input.starts_with('"') {
     parse_quoted_field(input)
 } else {
     parse_unquoted_field(input)
 }
}

// One row: comma-separated fields
fn parse_row(input: &str) -> ParseResult<Vec<String>> {
 let comma = |s: &str| s.strip_prefix(',')
     .map(|r| ((), r))
     .ok_or("expected ','".to_string());
 separated_list1(comma, parse_field)(input)
}

// Full CSV: rows separated by newlines
fn parse_csv(input: &str) -> ParseResult<Vec<Vec<String>>> {
 let mut rows = Vec::new();
 let mut remaining = input;
 while !remaining.is_empty() {
     let (row, rest) = parse_row(remaining)?;
     rows.push(row);
     // Skip \r\n or \n between rows
     remaining = rest.strip_prefix("\r\n")
         .or_else(|| rest.strip_prefix('\n'))
         .unwrap_or(rest);
 }
 Ok((rows, remaining))
}

What This Unlocks

Key Differences

ConceptOCamlRust
String building`Buffer.t` + `Buffer.add_char``String::new()` + `String::push`
Line endingsManual `\r\n` check`starts_with("\r\n")` then `starts_with('\n')`
Char iterationRecursive with index`char_indices()` iterator
Field trimming`String.trim``str::trim()`
// Example 171: CSV Parser
// Complete CSV parser using combinators (handles quotes, escaping)

type ParseResult<'a, T> = Result<(T, &'a str), String>;

// ============================================================
// Approach 1: Unquoted field
// ============================================================

fn unquoted_field(input: &str) -> ParseResult<String> {
    let end = input.find(|c: char| c == ',' || c == '\n' || c == '\r').unwrap_or(input.len());
    Ok((input[..end].trim().to_string(), &input[end..]))
}

// ============================================================
// Approach 2: Quoted field with escaped quotes ("")
// ============================================================

fn quoted_field(input: &str) -> ParseResult<String> {
    if !input.starts_with('"') {
        return Err("Expected opening quote".to_string());
    }
    let mut result = String::new();
    let mut chars = input[1..].chars();
    let mut consumed = 1; // opening quote
    loop {
        match chars.next() {
            None => return Err("Unterminated quoted field".to_string()),
            Some('"') => {
                consumed += 1;
                // Check for escaped quote ""
                match chars.next() {
                    Some('"') => {
                        result.push('"');
                        consumed += 1;
                    }
                    _ => {
                        // End of quoted field (we over-consumed one char, but use byte math)
                        return Ok((result, &input[consumed..]));
                    }
                }
            }
            Some(c) => {
                result.push(c);
                consumed += c.len_utf8();
            }
        }
    }
}

// ============================================================
// Approach 3: Full CSV parser
// ============================================================

fn field(input: &str) -> ParseResult<String> {
    if input.starts_with('"') {
        quoted_field(input)
    } else {
        unquoted_field(input)
    }
}

fn row(input: &str) -> ParseResult<Vec<String>> {
    let (first, mut rest) = field(input)?;
    let mut fields = vec![first];
    while rest.starts_with(',') {
        let (f, r) = field(&rest[1..])?;
        fields.push(f);
        rest = r;
    }
    Ok((fields, rest))
}

fn line_ending(input: &str) -> ParseResult<()> {
    if input.starts_with("\r\n") {
        Ok(((), &input[2..]))
    } else if input.starts_with('\n') {
        Ok(((), &input[1..]))
    } else if input.is_empty() {
        Ok(((), ""))
    } else {
        Err("Expected line ending".to_string())
    }
}

fn csv(input: &str) -> ParseResult<Vec<Vec<String>>> {
    let mut rows = Vec::new();
    let mut remaining = input;
    while !remaining.is_empty() {
        let (r, rest) = row(remaining)?;
        rows.push(r);
        let ((), rest) = line_ending(rest)?;
        remaining = rest;
    }
    Ok((rows, ""))
}

fn main() {
    println!("=== field ===");
    println!("{:?}", field("hello,world"));
    println!("{:?}", field("\"hello,world\""));
    println!("{:?}", field("\"say \"\"hi\"\"\""));

    println!("\n=== row ===");
    println!("{:?}", row("a,b,c"));
    println!("{:?}", row("\"x,y\",z"));

    println!("\n=== csv ===");
    println!("{:?}", csv("a,b\n1,2\n3,4"));

    println!("\nโœ“ All examples completed");
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_unquoted_field() {
        assert_eq!(unquoted_field("hello,world"), Ok(("hello".into(), ",world")));
    }

    #[test]
    fn test_quoted_field() {
        assert_eq!(quoted_field("\"hello,world\""), Ok(("hello,world".into(), "")));
    }

    #[test]
    fn test_escaped_quotes() {
        assert_eq!(quoted_field("\"say \"\"hi\"\"\""), Ok(("say \"hi\"".into(), "")));
    }

    #[test]
    fn test_quoted_with_newline() {
        assert_eq!(quoted_field("\"line1\nline2\""), Ok(("line1\nline2".into(), "")));
    }

    #[test]
    fn test_row() {
        let (r, _) = row("a,b,c").unwrap();
        assert_eq!(r, vec!["a", "b", "c"]);
    }

    #[test]
    fn test_row_quoted() {
        let (r, _) = row("\"x,y\",z").unwrap();
        assert_eq!(r, vec!["x,y", "z"]);
    }

    #[test]
    fn test_csv() {
        let (rows, _) = csv("a,b\n1,2\n3,4").unwrap();
        assert_eq!(rows, vec![
            vec!["a", "b"],
            vec!["1", "2"],
            vec!["3", "4"],
        ]);
    }

    #[test]
    fn test_csv_crlf() {
        let (rows, _) = csv("a,b\r\n1,2").unwrap();
        assert_eq!(rows, vec![vec!["a", "b"], vec!["1", "2"]]);
    }

    #[test]
    fn test_empty_field() {
        let (r, _) = row(",a,").unwrap();
        assert_eq!(r, vec!["", "a", ""]);
    }

    #[test]
    fn test_unterminated_quote() {
        assert!(quoted_field("\"hello").is_err());
    }
}
(* Example 171: CSV Parser *)
(* Complete CSV parser using combinators (handles quotes, escaping) *)

type 'a parse_result = ('a * string, string) result
type 'a parser = string -> 'a parse_result

let satisfy pred desc : char parser = fun input ->
  if String.length input > 0 && pred input.[0] then
    Ok (input.[0], String.sub input 1 (String.length input - 1))
  else Error (Printf.sprintf "Expected %s" desc)

let many0 p : 'a list parser = fun input ->
  let rec go acc r = match p r with Ok (v, r') -> go (v::acc) r' | Error _ -> Ok (List.rev acc, r)
  in go [] input

let tag expected : string parser = fun input ->
  let len = String.length expected in
  if String.length input >= len && String.sub input 0 len = expected then
    Ok (expected, String.sub input len (String.length input - len))
  else Error (Printf.sprintf "Expected \"%s\"" expected)

let chars_to_string chars = String.init (List.length chars) (List.nth chars)

(* Approach 1: Unquoted field โ€” characters until comma or newline *)
let unquoted_field : string parser = fun input ->
  match many0 (satisfy (fun c -> c <> ',' && c <> '\n' && c <> '\r') "field char") input with
  | Ok (chars, rest) -> Ok (String.trim (chars_to_string chars), rest)
  | Error e -> Error e

(* Approach 2: Quoted field โ€” handles escaped quotes "" *)
let quoted_field : string parser = fun input ->
  match satisfy (fun c -> c = '"') "opening quote" input with
  | Error e -> Error e
  | Ok (_, rest) ->
    let buf = Buffer.create 32 in
    let rec go remaining =
      if String.length remaining = 0 then Error "Unterminated quoted field"
      else if remaining.[0] = '"' then
        let after = String.sub remaining 1 (String.length remaining - 1) in
        if String.length after > 0 && after.[0] = '"' then begin
          Buffer.add_char buf '"';
          go (String.sub after 1 (String.length after - 1))
        end else
          Ok (Buffer.contents buf, after)
      else begin
        Buffer.add_char buf remaining.[0];
        go (String.sub remaining 1 (String.length remaining - 1))
      end
    in go rest

(* Approach 3: Full CSV parser *)
let field : string parser = fun input ->
  if String.length input > 0 && input.[0] = '"' then quoted_field input
  else unquoted_field input

let row : string list parser = fun input ->
  match field input with
  | Error e -> Error e
  | Ok (first, rest) ->
    let rec go acc remaining =
      match tag "," remaining with
      | Error _ -> Ok (List.rev acc, remaining)
      | Ok (_, after_comma) ->
        match field after_comma with
        | Ok (f, rest') -> go (f :: acc) rest'
        | Error e -> Error e
    in go [first] rest

let line_ending : unit parser = fun input ->
  if String.length input >= 2 && String.sub input 0 2 = "\r\n" then
    Ok ((), String.sub input 2 (String.length input - 2))
  else if String.length input >= 1 && input.[0] = '\n' then
    Ok ((), String.sub input 1 (String.length input - 1))
  else if String.length input = 0 then Ok ((), "")
  else Error "Expected line ending"

let csv : string list list parser = fun input ->
  let rec go acc remaining =
    if String.length remaining = 0 then Ok (List.rev acc, "")
    else
      match row remaining with
      | Error e -> Error e
      | Ok (r, rest) ->
        match line_ending rest with
        | Ok ((), rest') -> go (r :: acc) rest'
        | Error e -> Error e
  in go [] input

(* Tests *)
let () =
  assert (field "hello,world" = Ok ("hello", ",world"));
  assert (field "\"hello,world\"" = Ok ("hello,world", ""));
  assert (field "\"say \"\"hi\"\"\"" = Ok ("say \"hi\"", ""));

  assert (row "a,b,c" = Ok (["a"; "b"; "c"], ""));
  assert (row "\"x,y\",z" = Ok (["x,y"; "z"], ""));

  (match csv "a,b\n1,2\n3,4" with
   | Ok ([["a";"b"];["1";"2"];["3";"4"]], "") -> ()
   | _ -> failwith "CSV test");

  print_endline "โœ“ All tests passed"

๐Ÿ“Š Detailed Comparison

Comparison: Example 171 โ€” CSV Parser

Quoted field

OCaml:

๐Ÿช Show OCaml equivalent
let quoted_field : string parser = fun input ->
match satisfy (fun c -> c = '"') "quote" input with
| Ok (_, rest) ->
 let buf = Buffer.create 32 in
 let rec go remaining =
   if remaining.[0] = '"' then
     let after = String.sub remaining 1 ... in
     if after.[0] = '"' then (Buffer.add_char buf '"'; go ...)
     else Ok (Buffer.contents buf, after)
   else (Buffer.add_char buf remaining.[0]; go ...)
 in go rest

Rust:

fn quoted_field(input: &str) -> ParseResult<String> {
 if !input.starts_with('"') { return Err(...); }
 let mut result = String::new();
 let mut chars = input[1..].chars();
 loop {
     match chars.next() {
         Some('"') => match chars.next() {
             Some('"') => result.push('"'),  // escaped
             _ => return Ok((result, ...)),   // end
         },
         Some(c) => result.push(c),
         None => return Err("Unterminated".into()),
     }
 }
}

Full CSV

OCaml:

๐Ÿช Show OCaml equivalent
let csv : string list list parser = fun input ->
let rec go acc remaining =
 match row remaining with
 | Ok (r, rest) ->
   match line_ending rest with
   | Ok ((), rest') -> go (r :: acc) rest'
in go [] input

Rust:

fn csv(input: &str) -> ParseResult<Vec<Vec<String>>> {
 let mut rows = Vec::new();
 let mut remaining = input;
 while !remaining.is_empty() {
     let (r, rest) = row(remaining)?;
     rows.push(r);
     let ((), rest) = line_ending(rest)?;
     remaining = rest;
 }
 Ok((rows, ""))
}