729 Advanced

729-avoid-allocations — Avoid Allocations

Functional Programming

Tutorial

The Problem

Heap allocation is not free. Every Box::new, Vec::push, or String::from call invokes the global allocator, which acquires a lock, searches a free list, and may trigger a system call. In latency-sensitive code — game loops, network packet handlers, audio DSP — even a single allocation per frame causes jitter. The solution is to pre-allocate once and reuse, use iterator chains that produce zero intermediate collections, and prefer stack-local buffers for short-lived data.

🎯 Learning Outcomes

• Reuse a String or Vec buffer by calling clear() instead of dropping and reallocating

• Write integer-to-string conversion using a stack buffer ([u8; 20]) with no heap

• Build zero-allocation iterator pipelines with filter, map, and sum

• Use Vec::with_capacity to avoid repeated reallocations when the size is known

• Understand the trade-off between pre-allocation cost and per-call allocation cost

Code Example

#![allow(clippy::all)]
/// 729: Avoid Allocations — Hot path techniques in std-only Rust

// ── Technique 1: Stack-allocated scratch buffer ──────────────────────────────

/// Write a formatted record into a pre-allocated `String` buffer.
/// Caller owns the buffer and can reuse it across many calls.
fn format_record_into(buf: &mut String, name: &str, score: u32) {
    buf.clear(); // reset without freeing heap memory
    buf.push_str(name);
    buf.push(':');
    // itoa-style: write integer without allocating a temporary String
    let mut tmp = [0u8; 20];
    let digits = u32_to_str(score, &mut tmp);
    buf.push_str(digits);
}

/// Format u32 into a stack buffer; return the filled slice.
fn u32_to_str(mut n: u32, buf: &mut [u8; 20]) -> &str {
    if n == 0 {
        buf[19] = b'0';
        return std::str::from_utf8(&buf[19..]).unwrap();
    }
    let mut pos = 20usize;
    while n > 0 {
        pos -= 1;
        buf[pos] = b'0' + (n % 10) as u8;
        n /= 10;
    }
    std::str::from_utf8(&buf[pos..]).unwrap()
}

// ── Technique 2: Iterator chains — zero intermediate allocations ──────────────

fn sum_squares(n: u64) -> u64 {
    (0..n).map(|i| i * i).sum() // no Vec created; purely lazy
}

fn hot_filter_sum(data: &[i32]) -> i32 {
    data.iter().filter(|&&x| x > 0).map(|&x| x * 2).sum() // single pass, zero allocs
}

// ── Technique 3: Reuse a Vec by clearing, not dropping ───────────────────────

struct Pipeline {
    scratch: Vec<i32>,
}

impl Pipeline {
    fn new() -> Self {
        Pipeline {
            scratch: Vec::with_capacity(1024),
        }
    }

    fn process(&mut self, input: &[i32]) -> &[i32] {
        self.scratch.clear(); // keeps allocated capacity
        for &x in input {
            if x.rem_euclid(2) == 0 {
                self.scratch.push(x * 3);
            }
        }
        &self.scratch
    }
}

// ── Technique 4: Fixed-size stack array as scratch ────────────────────────────

fn count_words_no_alloc(s: &str) -> usize {
    // Count words without allocating — just scan bytes
    let bytes = s.as_bytes();
    let mut in_word = false;
    let mut count = 0usize;
    for &b in bytes {
        match (in_word, b == b' ' || b == b'\t' || b == b'\n') {
            (false, false) => {
                in_word = true;
                count += 1;
            }
            (true, true) => {
                in_word = false;
            }
            _ => {}
        }
    }
    count
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_format_record_reuse() {
        let mut buf = String::with_capacity(32);
        format_record_into(&mut buf, "Alice", 99);
        assert_eq!(buf, "Alice:99");
        format_record_into(&mut buf, "Bob", 0);
        assert_eq!(buf, "Bob:0");
    }

    #[test]
    fn test_sum_squares() {
        assert_eq!(sum_squares(0), 0);
        assert_eq!(sum_squares(4), 0 + 1 + 4 + 9); // 14
    }

    #[test]
    fn test_hot_filter_sum() {
        assert_eq!(hot_filter_sum(&[1, -2, 3, -4]), (1 + 3) * 2);
        assert_eq!(hot_filter_sum(&[]), 0);
    }

    #[test]
    fn test_pipeline_reuse() {
        let mut p = Pipeline::new();
        let r1 = p.process(&[2, 3, 4]);
        assert_eq!(r1, &[6, 12]); // evens * 3
        let r2 = p.process(&[10]);
        assert_eq!(r2, &[30]);
    }

    #[test]
    fn test_word_count() {
        assert_eq!(count_words_no_alloc(""), 0);
        assert_eq!(count_words_no_alloc("hello"), 1);
        assert_eq!(count_words_no_alloc("hello world"), 2);
        assert_eq!(count_words_no_alloc("  a  b  c  "), 3);
    }
}

(* 729: Avoid Allocations — OCaml perspective
   OCaml's GC handles heap allocation, but we can still minimize allocation
   pressure by reusing buffers and using sequences (lazy lists). *)

(* Reusable buffer pattern — avoids allocating a new string each call *)
let buf = Buffer.create 64

let format_record_into_buf name score =
  Buffer.clear buf;
  Buffer.add_string buf name;
  Buffer.add_char buf ':';
  Buffer.add_string buf (string_of_int score);
  Buffer.contents buf   (* still allocates the result string, but buf is reused *)

(* Sequence (lazy) — no intermediate list allocation *)
let sum_squares_seq n =
  Seq.init n (fun i -> i * i)
  |> Seq.fold_left (+) 0

(* Array (stack-ish in OCaml sense — one alloc, reused) *)
let process_with_fixed_buf data =
  let scratch = Array.make 256 0 in
  List.iteri (fun i x ->
    if i < 256 then scratch.(i) <- x * 2
  ) data;
  Array.sub scratch 0 (min (List.length data) 256)

(* String splitting without extra allocation — find positions *)
let count_commas s =
  String.fold_left (fun acc c -> if c = ',' then acc + 1 else acc) 0 s

let () =
  Printf.printf "Record: %s\n" (format_record_into_buf "Alice" 42);
  Printf.printf "Sum of squares 0..9: %d\n" (sum_squares_seq 10);
  Printf.printf "Commas in 'a,b,c,d': %d\n" (count_commas "a,b,c,d");
  let result = process_with_fixed_buf [1;2;3;4;5] in
  Printf.printf "Processed: [%s]\n"
    (Array.to_list result |> List.map string_of_int |> String.concat ";")

Key Differences

Cost model: In Rust, every allocation is explicit and potentially blocking; in OCaml, minor-heap allocation is nearly free (bump pointer, ~2 ns), making avoidance less urgent.

Buffer reuse: Rust's buf.clear() is idiomatic for string reuse; OCaml's Buffer.clear serves the same purpose but the GC would collect the old buffer anyway.

Stack strings: Rust can use [u8; N] + str::from_utf8 for zero-heap text; OCaml has no direct stack allocation (all values go through the GC).

Iterator laziness: Both languages have lazy iterators (Seq in OCaml, Iterator in Rust); Rust's borrow checker enforces that no intermediate Vec is created accidentally.

OCaml Approach

OCaml uses a generational GC so short-lived allocations are cheap (minor heap bump pointer). Avoiding allocations is less critical in OCaml than in Rust. However, for C-interop and FFI-heavy code, OCaml uses Bytes.t buffers passed by reference, and Buffer.t (similar to Rust's reusable String) for building strings incrementally without intermediate copies.

Full Source

#![allow(clippy::all)]
/// 729: Avoid Allocations — Hot path techniques in std-only Rust

// ── Technique 1: Stack-allocated scratch buffer ──────────────────────────────

/// Write a formatted record into a pre-allocated `String` buffer.
/// Caller owns the buffer and can reuse it across many calls.
fn format_record_into(buf: &mut String, name: &str, score: u32) {
    buf.clear(); // reset without freeing heap memory
    buf.push_str(name);
    buf.push(':');
    // itoa-style: write integer without allocating a temporary String
    let mut tmp = [0u8; 20];
    let digits = u32_to_str(score, &mut tmp);
    buf.push_str(digits);
}

/// Format u32 into a stack buffer; return the filled slice.
fn u32_to_str(mut n: u32, buf: &mut [u8; 20]) -> &str {
    if n == 0 {
        buf[19] = b'0';
        return std::str::from_utf8(&buf[19..]).unwrap();
    }
    let mut pos = 20usize;
    while n > 0 {
        pos -= 1;
        buf[pos] = b'0' + (n % 10) as u8;
        n /= 10;
    }
    std::str::from_utf8(&buf[pos..]).unwrap()
}

// ── Technique 2: Iterator chains — zero intermediate allocations ──────────────

fn sum_squares(n: u64) -> u64 {
    (0..n).map(|i| i * i).sum() // no Vec created; purely lazy
}

fn hot_filter_sum(data: &[i32]) -> i32 {
    data.iter().filter(|&&x| x > 0).map(|&x| x * 2).sum() // single pass, zero allocs
}

// ── Technique 3: Reuse a Vec by clearing, not dropping ───────────────────────

struct Pipeline {
    scratch: Vec<i32>,
}

impl Pipeline {
    fn new() -> Self {
        Pipeline {
            scratch: Vec::with_capacity(1024),
        }
    }

    fn process(&mut self, input: &[i32]) -> &[i32] {
        self.scratch.clear(); // keeps allocated capacity
        for &x in input {
            if x.rem_euclid(2) == 0 {
                self.scratch.push(x * 3);
            }
        }
        &self.scratch
    }
}

// ── Technique 4: Fixed-size stack array as scratch ────────────────────────────

fn count_words_no_alloc(s: &str) -> usize {
    // Count words without allocating — just scan bytes
    let bytes = s.as_bytes();
    let mut in_word = false;
    let mut count = 0usize;
    for &b in bytes {
        match (in_word, b == b' ' || b == b'\t' || b == b'\n') {
            (false, false) => {
                in_word = true;
                count += 1;
            }
            (true, true) => {
                in_word = false;
            }
            _ => {}
        }
    }
    count
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_format_record_reuse() {
        let mut buf = String::with_capacity(32);
        format_record_into(&mut buf, "Alice", 99);
        assert_eq!(buf, "Alice:99");
        format_record_into(&mut buf, "Bob", 0);
        assert_eq!(buf, "Bob:0");
    }

    #[test]
    fn test_sum_squares() {
        assert_eq!(sum_squares(0), 0);
        assert_eq!(sum_squares(4), 0 + 1 + 4 + 9); // 14
    }

    #[test]
    fn test_hot_filter_sum() {
        assert_eq!(hot_filter_sum(&[1, -2, 3, -4]), (1 + 3) * 2);
        assert_eq!(hot_filter_sum(&[]), 0);
    }

    #[test]
    fn test_pipeline_reuse() {
        let mut p = Pipeline::new();
        let r1 = p.process(&[2, 3, 4]);
        assert_eq!(r1, &[6, 12]); // evens * 3
        let r2 = p.process(&[10]);
        assert_eq!(r2, &[30]);
    }

    #[test]
    fn test_word_count() {
        assert_eq!(count_words_no_alloc(""), 0);
        assert_eq!(count_words_no_alloc("hello"), 1);
        assert_eq!(count_words_no_alloc("hello world"), 2);
        assert_eq!(count_words_no_alloc("  a  b  c  "), 3);
    }
}

(* 729: Avoid Allocations — OCaml perspective
   OCaml's GC handles heap allocation, but we can still minimize allocation
   pressure by reusing buffers and using sequences (lazy lists). *)

(* Reusable buffer pattern — avoids allocating a new string each call *)
let buf = Buffer.create 64

let format_record_into_buf name score =
  Buffer.clear buf;
  Buffer.add_string buf name;
  Buffer.add_char buf ':';
  Buffer.add_string buf (string_of_int score);
  Buffer.contents buf   (* still allocates the result string, but buf is reused *)

(* Sequence (lazy) — no intermediate list allocation *)
let sum_squares_seq n =
  Seq.init n (fun i -> i * i)
  |> Seq.fold_left (+) 0

(* Array (stack-ish in OCaml sense — one alloc, reused) *)
let process_with_fixed_buf data =
  let scratch = Array.make 256 0 in
  List.iteri (fun i x ->
    if i < 256 then scratch.(i) <- x * 2
  ) data;
  Array.sub scratch 0 (min (List.length data) 256)

(* String splitting without extra allocation — find positions *)
let count_commas s =
  String.fold_left (fun acc c -> if c = ',' then acc + 1 else acc) 0 s

let () =
  Printf.printf "Record: %s\n" (format_record_into_buf "Alice" 42);
  Printf.printf "Sum of squares 0..9: %d\n" (sum_squares_seq 10);
  Printf.printf "Commas in 'a,b,c,d': %d\n" (count_commas "a,b,c,d");
  let result = process_with_fixed_buf [1;2;3;4;5] in
  Printf.printf "Processed: [%s]\n"
    (Array.to_list result |> List.map string_of_int |> String.concat ";")

✓ Tests Rust test suite

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_format_record_reuse() {
        let mut buf = String::with_capacity(32);
        format_record_into(&mut buf, "Alice", 99);
        assert_eq!(buf, "Alice:99");
        format_record_into(&mut buf, "Bob", 0);
        assert_eq!(buf, "Bob:0");
    }

    #[test]
    fn test_sum_squares() {
        assert_eq!(sum_squares(0), 0);
        assert_eq!(sum_squares(4), 0 + 1 + 4 + 9); // 14
    }

    #[test]
    fn test_hot_filter_sum() {
        assert_eq!(hot_filter_sum(&[1, -2, 3, -4]), (1 + 3) * 2);
        assert_eq!(hot_filter_sum(&[]), 0);
    }

    #[test]
    fn test_pipeline_reuse() {
        let mut p = Pipeline::new();
        let r1 = p.process(&[2, 3, 4]);
        assert_eq!(r1, &[6, 12]); // evens * 3
        let r2 = p.process(&[10]);
        assert_eq!(r2, &[30]);
    }

    #[test]
    fn test_word_count() {
        assert_eq!(count_words_no_alloc(""), 0);
        assert_eq!(count_words_no_alloc("hello"), 1);
        assert_eq!(count_words_no_alloc("hello world"), 2);
        assert_eq!(count_words_no_alloc("  a  b  c  "), 3);
    }
}

Exercises

Modify format_record_into to write a CSV line (name, score, timestamp) into the buffer without allocating any intermediate strings.

Implement a hot_path_process function that filters, transforms, and sums a &[f64] slice using only iterator combinators — verify with cargo bench that it allocates zero bytes.

Build a ScratchPool that pre-allocates 16 String buffers and hands them out round-robin, measuring throughput vs. a naïve String::new() approach.

Open Source Repos

functional-rust

View the source for this example on GitHub — OCaml and Rust side by side in the repo.

Rust