๐Ÿฆ€ Functional Rust

970: Rope String

Difficulty: Advanced Category: Data Structures / Trees Concept: Binary tree of string leaves for O(log n) concatenation and splitting of large strings Key Insight: OCaml's recursive types are implicit (`type rope = Leaf of string | Node of rope rope int`); Rust requires `Box<Rope>` to break the recursion cycle because recursive enum variants must have known size โ€” `Box` adds a pointer layer
// 970: Rope String
// Tree-based string for O(log n) concat/split, O(log n) indexing
// OCaml: recursive type; Rust: enum with Box for recursion

#[derive(Debug, Clone)]
pub enum Rope {
    Leaf(String),
    Node(Box<Rope>, Box<Rope>, usize), // left, right, total_len
}

impl Rope {
    pub fn from_str(s: &str) -> Self {
        Rope::Leaf(s.to_string())
    }

    pub fn len(&self) -> usize {
        match self {
            Rope::Leaf(s) => s.len(),
            Rope::Node(_, _, n) => *n,
        }
    }

    pub fn is_empty(&self) -> bool {
        self.len() == 0
    }

    pub fn concat(left: Rope, right: Rope) -> Rope {
        let total = left.len() + right.len();
        Rope::Node(Box::new(left), Box::new(right), total)
    }

    /// Collect rope into a String (O(n))
    pub fn to_string_val(&self) -> String {
        match self {
            Rope::Leaf(s) => s.clone(),
            Rope::Node(l, r, _) => {
                let mut out = l.to_string_val();
                out.push_str(&r.to_string_val());
                out
            }
        }
    }

    /// Get character at index i (O(log n))
    pub fn index(&self, i: usize) -> Option<char> {
        match self {
            Rope::Leaf(s) => s.chars().nth(i),
            Rope::Node(l, r, _) => {
                let ln = l.len();
                if i < ln {
                    l.index(i)
                } else {
                    r.index(i - ln)
                }
            }
        }
    }

    /// Split at position i: returns (left[0..i], right[i..])
    pub fn split(self, i: usize) -> (Rope, Rope) {
        match self {
            Rope::Leaf(s) => {
                let i = i.min(s.len());
                let left = Rope::Leaf(s[..i].to_string());
                let right = Rope::Leaf(s[i..].to_string());
                (left, right)
            }
            Rope::Node(l, r, _) => {
                let ln = l.len();
                if i <= ln {
                    let (ll, lr) = l.split(i);
                    (ll, Rope::concat(lr, *r))
                } else {
                    let (rl, rr) = r.split(i - ln);
                    (Rope::concat(*l, rl), rr)
                }
            }
        }
    }

    /// Extract substring [start, start+len)
    pub fn sub(&self, start: usize, len: usize) -> String {
        let (_, right) = self.clone().split(start);
        let (mid, _) = right.split(len);
        mid.to_string_val()
    }
}

fn main() {
    let r1 = Rope::from_str("Hello");
    let r2 = Rope::from_str(", ");
    let r3 = Rope::from_str("World");
    let r4 = Rope::from_str("!");
    let rope = Rope::concat(Rope::concat(r1, r2), Rope::concat(r3, r4));

    println!("length: {}", rope.len());
    println!("to_string: {}", rope.to_string_val());
    println!("index(7): {:?}", rope.index(7));
    println!("sub(7,5): {}", rope.sub(7, 5));

    let (left, right) = rope.clone().split(7);
    println!("split(7) left:  {}", left.to_string_val());
    println!("split(7) right: {}", right.to_string_val());
}

#[cfg(test)]
mod tests {
    use super::*;

    fn make_rope() -> Rope {
        let r1 = Rope::from_str("Hello");
        let r2 = Rope::from_str(", ");
        let r3 = Rope::from_str("World");
        let r4 = Rope::from_str("!");
        Rope::concat(Rope::concat(r1, r2), Rope::concat(r3, r4))
    }

    #[test]
    fn test_length_and_to_string() {
        let rope = make_rope();
        assert_eq!(rope.len(), 13);
        assert_eq!(rope.to_string_val(), "Hello, World!");
    }

    #[test]
    fn test_index() {
        let rope = make_rope();
        assert_eq!(rope.index(0), Some('H'));
        assert_eq!(rope.index(7), Some('W'));
        assert_eq!(rope.index(12), Some('!'));
        assert_eq!(rope.index(13), None);
    }

    #[test]
    fn test_sub() {
        let rope = make_rope();
        assert_eq!(rope.sub(7, 5), "World");
        assert_eq!(rope.sub(0, 5), "Hello");
        assert_eq!(rope.sub(5, 2), ", ");
    }

    #[test]
    fn test_split() {
        let rope = make_rope();
        let (left, right) = rope.split(7);
        assert_eq!(left.to_string_val(), "Hello, ");
        assert_eq!(right.to_string_val(), "World!");
    }

    #[test]
    fn test_empty_rope() {
        let r = Rope::from_str("");
        assert!(r.is_empty());
        assert_eq!(r.to_string_val(), "");
    }
}
(* 970: Rope String *)
(* Tree-based string structure for O(log n) concatenation *)
(* Naive concat is O(1), but split/index are O(log n) *)

type rope =
  | Leaf of string
  | Node of rope * rope * int  (* left, right, total_length *)

let length = function
  | Leaf s -> String.length s
  | Node (_, _, n) -> n

let concat r1 r2 =
  Node (r1, r2, length r1 + length r2)

let of_string s = Leaf s

(* Convert rope back to string *)
let rec to_string = function
  | Leaf s -> s
  | Node (l, r, _) -> to_string l ^ to_string r

(* Index: get character at position i *)
let rec index rope i =
  match rope with
  | Leaf s ->
    if i >= 0 && i < String.length s then Some s.[i]
    else None
  | Node (l, r, _) ->
    let ln = length l in
    if i < ln then index l i
    else index r (i - ln)

(* Split rope into (left, right) at position i *)
let rec split rope i =
  match rope with
  | Leaf s ->
    let n = String.length s in
    let i = max 0 (min i n) in
    (Leaf (String.sub s 0 i), Leaf (String.sub s i (n - i)))
  | Node (l, r, _) ->
    let ln = length l in
    if i <= ln then
      let (ll, lr) = split l i in
      (ll, concat lr r)
    else
      let (rl, rr) = split r (i - ln) in
      (concat l rl, rr)

(* Substring extraction *)
let sub rope start len =
  let (_, right) = split rope start in
  let (mid, _) = split right len in
  to_string mid

let () =
  let r1 = of_string "Hello" in
  let r2 = of_string ", " in
  let r3 = of_string "World" in
  let r4 = of_string "!" in

  let rope = concat (concat r1 r2) (concat r3 r4) in
  assert (length rope = 13);
  assert (to_string rope = "Hello, World!");

  assert (index rope 0 = Some 'H');
  assert (index rope 7 = Some 'W');
  assert (index rope 12 = Some '!');
  assert (index rope 13 = None);

  assert (sub rope 7 5 = "World");
  assert (sub rope 0 5 = "Hello");

  let (left, right) = split rope 7 in
  assert (to_string left = "Hello, ");
  assert (to_string right = "World!");

  Printf.printf "โœ“ All tests passed\n"

๐Ÿ“Š Detailed Comparison

Rope String โ€” Comparison

Core Insight

A rope represents a string as a binary tree of smaller strings (leaves). Concatenation creates a new Node in O(1). Indexing and splitting traverse the tree in O(log n). Both languages express this as a recursive algebraic type. The critical Rust difference: `Box<Rope>` is required because recursive enum variants must have statically known size, and a direct self-reference would be infinite.

OCaml Approach

  • `type rope = Leaf of string | Node of rope rope int` โ€” recursive type, implicit
  • No heap boxing needed โ€” OCaml's GC handles pointer-sized references automatically
  • `String.sub s 0 i` for splitting leaf strings
  • `s.[i]` for character access (byte index)
  • `to_string l ^ to_string r` โ€” string concatenation (O(n) but readable)

Rust Approach

  • `enum Rope { Leaf(String), Node(Box<Rope>, Box<Rope>, usize) }` โ€” explicit Box
  • `Box::new(...)` allocates on the heap, providing the indirection needed for recursion
  • `s[..i].to_string()` for splitting (byte-indexed slice)
  • `s.chars().nth(i)` for Unicode-safe character access
  • `#[derive(Clone)]` needed because `to_string_val` requires cloning for sub
  • `l.to_string_val()` + `push_str` instead of `^` operator

Comparison Table

AspectOCamlRust
Recursive type`type rope = ... Node of rope rope int``enum Rope { Node(Box<Rope>, Box<Rope>, usize) }`
Heap boxingImplicit (GC)Explicit `Box<T>`
String split`String.sub s 0 i``s[..i].to_string()`
Char access`s.[i]` (byte)`s.chars().nth(i)` (Unicode)
Concat strings`^` operator`String::push_str`
CloneNot needed (GC shares)`#[derive(Clone)]` explicit
Length field`int``usize`