๐Ÿฆ€ Functional Rust

958: CSV Parser

Difficulty: Intermediate Category: Parsing / State Machines Concept: Explicit state machine for parsing CSV fields with quoted strings and escaped characters Key Insight: Both languages use the same 3-state machine (Normal/InQuote/AfterQuote); OCaml uses a mutable `ref` for state, Rust uses a mutable local variable with an enum โ€” the logic is identical, only the mutation style differs
// 958: CSV Parser
// OCaml uses mutable Buffer + state ref; Rust uses an enum state machine with chars iterator

// Approach 1: Simple split (no quote handling)
pub fn split_simple(line: &str) -> Vec<&str> {
    line.split(',').collect()
}

// Approach 2: Full CSV state machine with quote handling
#[derive(Debug, PartialEq)]
enum State {
    Normal,
    InQuote,
    AfterQuote,
}

pub fn parse_csv_line(line: &str) -> Vec<String> {
    let mut fields: Vec<String> = Vec::new();
    let mut current = String::new();
    let mut state = State::Normal;

    for c in line.chars() {
        match (&state, c) {
            (State::Normal, '"') => {
                state = State::InQuote;
            }
            (State::Normal, ',') => {
                fields.push(current.clone());
                current.clear();
            }
            (State::Normal, c) => {
                current.push(c);
            }
            (State::InQuote, '"') => {
                state = State::AfterQuote;
            }
            (State::InQuote, c) => {
                current.push(c);
            }
            (State::AfterQuote, '"') => {
                // Escaped quote: "" inside quoted field
                current.push('"');
                state = State::InQuote;
            }
            (State::AfterQuote, ',') => {
                fields.push(current.clone());
                current.clear();
                state = State::Normal;
            }
            (State::AfterQuote, _) => {
                state = State::Normal;
            }
        }
    }
    // Push last field
    fields.push(current);
    fields
}

// Approach 3: Parse multiple rows
pub fn parse_csv(text: &str) -> Vec<Vec<String>> {
    text.lines()
        .filter(|line| !line.is_empty())
        .map(parse_csv_line)
        .collect()
}

fn main() {
    let simple = split_simple("a,b,c");
    println!("Simple split: {:?}", simple);

    let quoted = parse_csv_line("\"hello\",\"world\",plain");
    println!("Quoted: {:?}", quoted);

    let comma_in = parse_csv_line("\"one, two\",three");
    println!("Comma inside quote: {:?}", comma_in);

    let escaped = parse_csv_line("\"say \"\"hi\"\"\",end");
    println!("Escaped quotes: {:?}", escaped);

    let csv = "name,age,city\n\"Alice, Smith\",30,Amsterdam\nBob,25,\"New, York\"";
    println!("\nMulti-row CSV:");
    for row in parse_csv(csv) {
        println!("  {:?}", row);
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_simple_split() {
        assert_eq!(split_simple("a,b,c"), vec!["a", "b", "c"]);
        assert_eq!(split_simple("one"), vec!["one"]);
    }

    #[test]
    fn test_quoted_fields() {
        assert_eq!(
            parse_csv_line("\"hello\",\"world\",plain"),
            vec!["hello", "world", "plain"]
        );
    }

    #[test]
    fn test_comma_inside_quotes() {
        assert_eq!(
            parse_csv_line("\"one, two\",three"),
            vec!["one, two", "three"]
        );
    }

    #[test]
    fn test_escaped_quotes() {
        assert_eq!(
            parse_csv_line("\"say \"\"hi\"\"\",end"),
            vec!["say \"hi\"", "end"]
        );
    }

    #[test]
    fn test_empty_fields() {
        assert_eq!(parse_csv_line(",,"), vec!["", "", ""]);
        assert_eq!(parse_csv_line("a,,c"), vec!["a", "", "c"]);
    }

    #[test]
    fn test_mixed() {
        assert_eq!(
            parse_csv_line("name,\"Alice, Bob\",42"),
            vec!["name", "Alice, Bob", "42"]
        );
    }

    #[test]
    fn test_multi_row() {
        let csv = "a,b,c\n1,2,3\n\"x,y\",z,w";
        let rows = parse_csv(csv);
        assert_eq!(rows.len(), 3);
        assert_eq!(rows[0], vec!["a", "b", "c"]);
        assert_eq!(rows[2], vec!["x,y", "z", "w"]);
    }
}
(* 958: CSV Parser *)
(* Handle quoted fields, commas inside quotes, escaped quotes *)

(* Approach 1: Simple split (no quote handling) *)

let split_simple line =
  String.split_on_char ',' line

(* Approach 2: Proper CSV field parser using state machine *)

type state = Normal | InQuote | AfterQuote

let parse_csv_line line =
  let n = String.length line in
  let fields = ref [] in
  let current = Buffer.create 16 in
  let state = ref Normal in

  for i = 0 to n - 1 do
    let c = line.[i] in
    match !state, c with
    | Normal, '"' ->
      state := InQuote
    | Normal, ',' ->
      fields := Buffer.contents current :: !fields;
      Buffer.clear current
    | Normal, c ->
      Buffer.add_char current c
    | InQuote, '"' ->
      state := AfterQuote
    | InQuote, c ->
      Buffer.add_char current c
    | AfterQuote, '"' ->
      (* Escaped quote: "" inside quoted field *)
      Buffer.add_char current '"';
      state := InQuote
    | AfterQuote, ',' ->
      fields := Buffer.contents current :: !fields;
      Buffer.clear current;
      state := Normal
    | AfterQuote, _ ->
      state := Normal
  done;
  (* Add the last field *)
  fields := Buffer.contents current :: !fields;
  List.rev !fields

(* Approach 3: Parse multiple rows *)

let parse_csv text =
  let lines = String.split_on_char '\n' text in
  List.filter_map (fun line ->
    if String.length line = 0 then None
    else Some (parse_csv_line line)
  ) lines

let () =
  (* Simple split *)
  let row = split_simple "a,b,c" in
  assert (row = ["a"; "b"; "c"]);

  (* Quoted fields *)
  let row2 = parse_csv_line "\"hello\",\"world\",plain" in
  assert (row2 = ["hello"; "world"; "plain"]);

  (* Comma inside quotes *)
  let row3 = parse_csv_line "\"one, two\",three" in
  assert (row3 = ["one, two"; "three"]);

  (* Escaped quotes inside quoted field *)
  let row4 = parse_csv_line "\"say \"\"hi\"\"\",end" in
  assert (row4 = ["say \"hi\""; "end"]);

  (* Empty fields *)
  let row5 = parse_csv_line ",," in
  assert (row5 = [""; ""; ""]);

  (* Mixed *)
  let row6 = parse_csv_line "name,\"Alice, Bob\",42" in
  assert (row6 = ["name"; "Alice, Bob"; "42"]);

  (* Multi-row *)
  let csv = "a,b,c\n1,2,3\n\"x,y\",z,w" in
  let rows = parse_csv csv in
  assert (List.length rows = 3);
  assert (List.nth rows 0 = ["a"; "b"; "c"]);
  assert (List.nth rows 2 = ["x,y"; "z"; "w"]);

  Printf.printf "โœ“ All tests passed\n"

๐Ÿ“Š Detailed Comparison

CSV Parser โ€” Comparison

Core Insight

CSV parsing requires a state machine to handle quoted fields. The algorithm is identical in both languages. OCaml expresses mutable state via `ref` cells; Rust uses `let mut` variables. Both use `Buffer`/`String` for accumulating the current field. The Rust `enum` for state is more idiomatic than OCaml's `type state`.

OCaml Approach

  • `type state = Normal | InQuote | AfterQuote` โ€” custom variant type
  • `state := InQuote` โ€” mutable reference cell update
  • `Buffer.create`, `Buffer.add_char`, `Buffer.contents` โ€” mutable character accumulation
  • `for i = 0 to n - 1 do ... done` โ€” imperative iteration over string indices
  • `String.split_on_char '\n'` for line splitting
  • `List.filter_map` to skip empty lines

Rust Approach

  • `enum State { Normal, InQuote, AfterQuote }` โ€” same concept, idiomatic Rust
  • `state = State::InQuote` โ€” direct assignment of enum variant
  • `String::new()`, `push(c)`, `clear()` โ€” mutable String accumulation
  • `for c in line.chars()` โ€” iterator over chars (Unicode-safe)
  • `match (&state, c)` โ€” tuple pattern matching on (state, char) pair
  • `text.lines().filter(...).map(...).collect()` โ€” functional pipeline for rows

Comparison Table

AspectOCamlRust
State type`type state = ...``enum State { ... }`
State mutation`state := InQuote``state = State::InQuote`
Char accumulation`Buffer.add_char current c``current.push(c)`
String from buffer`Buffer.contents current``current.clone()`
Loop style`for i = 0 to n-1``for c in line.chars()`
Pattern on pair`match !state, c with``match (&state, c)`
Line iteration`String.split_on_char '\n'``text.lines()`
Skip empty`List.filter_map``.filter(\l\!l.is_empty())`