๐Ÿฆ€ Functional Rust

1045: Small Vector Optimization

Difficulty: Advanced Category: Data Structures Concept: Stack-allocate small collections, spill to heap when they grow beyond N Key Insight: The small-vec optimization avoids heap allocation for common small cases. Rust's const generics (`SmallVec<T, const N: usize>`) make the stack capacity a compile-time parameter. OCaml's GC makes this unnecessary โ€” minor heap allocation is already near-free.
// 1045: Small Vector Optimization Concept
// Stack up to N elements, heap beyond โ€” like SmallVec (concept, std only)

/// SmallVec: stores up to N elements on the stack, spills to heap
enum SmallVec<T, const N: usize> {
    Inline {
        data: [Option<T>; N],  // Using Option since we can't use MaybeUninit safely
        len: usize,
    },
    Heap(Vec<T>),
}

impl<T: Clone + Default, const N: usize> SmallVec<T, N> {
    fn new() -> Self {
        SmallVec::Inline {
            data: std::array::from_fn(|_| None),
            len: 0,
        }
    }

    fn push(&mut self, value: T) {
        match self {
            SmallVec::Inline { data, len } if *len < N => {
                data[*len] = Some(value);
                *len += 1;
            }
            SmallVec::Inline { data, len } => {
                // Spill to heap
                let mut vec = Vec::with_capacity(*len + 1);
                for item in data.iter_mut().take(*len) {
                    if let Some(val) = item.take() {
                        vec.push(val);
                    }
                }
                vec.push(value);
                *self = SmallVec::Heap(vec);
            }
            SmallVec::Heap(vec) => {
                vec.push(value);
            }
        }
    }

    fn len(&self) -> usize {
        match self {
            SmallVec::Inline { len, .. } => *len,
            SmallVec::Heap(vec) => vec.len(),
        }
    }

    fn is_inline(&self) -> bool {
        matches!(self, SmallVec::Inline { .. })
    }

    fn get(&self, index: usize) -> Option<&T> {
        match self {
            SmallVec::Inline { data, len } => {
                if index < *len {
                    data[index].as_ref()
                } else {
                    None
                }
            }
            SmallVec::Heap(vec) => vec.get(index),
        }
    }

    fn to_vec(&self) -> Vec<T> {
        match self {
            SmallVec::Inline { data, len } => {
                data.iter()
                    .take(*len)
                    .filter_map(|x| x.as_ref().cloned())
                    .collect()
            }
            SmallVec::Heap(vec) => vec.clone(),
        }
    }
}

fn basic_small_vec() {
    let mut sv: SmallVec<i32, 4> = SmallVec::new();
    sv.push(1);
    sv.push(2);
    sv.push(3);

    assert_eq!(sv.len(), 3);
    assert!(sv.is_inline()); // Still on stack
    assert_eq!(sv.to_vec(), vec![1, 2, 3]);

    sv.push(4); // At capacity, still inline
    assert!(sv.is_inline());

    sv.push(5); // Spills to heap
    assert!(!sv.is_inline());
    assert_eq!(sv.to_vec(), vec![1, 2, 3, 4, 5]);
}

fn indexed_access() {
    let mut sv: SmallVec<&str, 3> = SmallVec::new();
    sv.push("hello");
    sv.push("world");

    assert_eq!(sv.get(0), Some(&"hello"));
    assert_eq!(sv.get(1), Some(&"world"));
    assert_eq!(sv.get(2), None);
}

fn spill_behavior() {
    let mut sv: SmallVec<i32, 2> = SmallVec::new();
    sv.push(10);
    sv.push(20);
    assert!(sv.is_inline());

    sv.push(30); // Spills
    assert!(!sv.is_inline());
    assert_eq!(sv.len(), 3);
    assert_eq!(sv.get(2), Some(&30));
}

fn main() {
    basic_small_vec();
    indexed_access();
    spill_behavior();
    println!("โœ“ All tests passed");
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_basic() { basic_small_vec(); }

    #[test]
    fn test_indexed() { indexed_access(); }

    #[test]
    fn test_spill() { spill_behavior(); }

    #[test]
    fn test_empty() {
        let sv: SmallVec<i32, 4> = SmallVec::new();
        assert_eq!(sv.len(), 0);
        assert!(sv.is_inline());
        assert_eq!(sv.get(0), None);
    }

    #[test]
    fn test_large_n() {
        let mut sv: SmallVec<i32, 16> = SmallVec::new();
        for i in 0..16 {
            sv.push(i);
        }
        assert!(sv.is_inline());
        sv.push(16);
        assert!(!sv.is_inline());
    }
}
(* 1045: Small Vector Optimization Concept *)
(* Stack-allocate up to N elements, heap beyond *)
(* OCaml doesn't have this optimization โ€” GC handles allocation *)

(* Approach 1: Simulated small-vec with variant type *)
type 'a small_vec =
  | Inline of 'a array * int   (* array of capacity N, length *)
  | Heap of 'a list             (* spills to list *)

let sv_capacity = 4

let sv_empty () = Inline (Array.make sv_capacity (Obj.magic 0), 0)

let sv_push sv x =
  match sv with
  | Inline (arr, len) when len < sv_capacity ->
    arr.(len) <- x;
    Inline (arr, len + 1)
  | Inline (arr, len) ->
    (* Spill to heap *)
    let lst = ref [x] in
    for i = len - 1 downto 0 do
      lst := arr.(i) :: !lst
    done;
    Heap !lst
  | Heap lst -> Heap (lst @ [x])

let sv_to_list = function
  | Inline (arr, len) ->
    let rec go i acc =
      if i < 0 then acc
      else go (i - 1) (arr.(i) :: acc)
    in
    go (len - 1) []
  | Heap lst -> lst

let sv_length = function
  | Inline (_, len) -> len
  | Heap lst -> List.length lst

let small_vec_test () =
  let sv = sv_empty () in
  let sv = sv_push sv 1 in
  let sv = sv_push sv 2 in
  let sv = sv_push sv 3 in
  assert (sv_length sv = 3);
  assert (sv_to_list sv = [1; 2; 3]);
  (* Still inline *)
  (match sv with Inline _ -> () | Heap _ -> assert false);
  let sv = sv_push sv 4 in
  (* At capacity, still inline *)
  (match sv with Inline _ -> () | Heap _ -> assert false);
  let sv = sv_push sv 5 in
  (* Spilled to heap *)
  (match sv with Inline _ -> assert false | Heap _ -> ());
  assert (sv_to_list sv = [1; 2; 3; 4; 5])

(* Approach 2: Why OCaml doesn't need this *)
(* OCaml's GC makes small allocations very cheap:
   - Minor heap allocation is just a pointer bump
   - Short-lived small objects are collected in microseconds
   - No stack vs heap distinction for the programmer
   The small-vec optimization is a Rust/C++ concern where
   heap allocation has higher fixed cost. *)

(* Approach 3: Fixed-size array as "small vec" *)
let fixed_array_demo () =
  let arr = Array.make 4 0 in
  let len = ref 0 in
  let push x =
    if !len < 4 then begin
      arr.(!len) <- x;
      incr len
    end
  in
  push 10; push 20; push 30;
  assert (!len = 3);
  assert (arr.(0) = 10);
  assert (arr.(2) = 30)

let () =
  small_vec_test ();
  fixed_array_demo ();
  Printf.printf "โœ“ All tests passed\n"

๐Ÿ“Š Detailed Comparison

Small Vector Optimization โ€” Comparison

Core Insight

The small-vec optimization stores elements inline (on the stack) up to a fixed capacity N, then spills to the heap. This matters in Rust/C++ where heap allocation has measurable cost. In OCaml, the GC's minor heap makes small allocations nearly free, so this optimization is unnecessary.

OCaml Approach

  • Simulated with variant: `Inline of array * int | Heap of list`
  • Not idiomatic โ€” OCaml's GC handles this transparently
  • Minor heap allocation is a pointer bump (~1ns)
  • Short-lived objects collected in microseconds
  • Programmers don't think about stack vs heap

Rust Approach

  • Enum with const generic: `SmallVec<T, const N: usize>`
  • `Inline { data: [Option<T>; N], len }` for stack storage
  • `Heap(Vec<T>)` after spill
  • Real-world: use `smallvec` or `tinyvec` crate
  • Measurable win for hot paths with small, temporary collections
  • Const generics make capacity a compile-time parameter

Comparison Table

FeatureOCamlRust
Heap alloc cost~1ns (GC bump)~20-50ns (system allocator)
Optimization neededNoYes, for hot paths
Stack storageN/A (GC managed)`[Option<T>; N]` or MaybeUninit
Spill pointN/AConfigurable via const generic
Real-world implNot used`smallvec`, `tinyvec` crates
Key benefitNone (GC is fast)Avoid allocator + better locality