๐Ÿฆ€ Functional Rust

1033: Top-K Elements with BinaryHeap

Difficulty: Intermediate Category: Data Structures Concept: Priority queues and the top-K selection pattern using min-heaps Key Insight: Wrap elements in `Reverse<T>` to turn Rust's max-heap `BinaryHeap` into a min-heap โ€” maintain a min-heap of size K and evict the smallest to keep only the K largest.
// 1033: Top-K Elements with BinaryHeap
// BinaryHeap is a max-heap; use Reverse<T> for min-heap behavior

use std::cmp::Reverse;
use std::collections::BinaryHeap;

/// Top-K using a min-heap of size K
/// Keep only the K largest elements by evicting the smallest
fn top_k(k: usize, data: &[i32]) -> Vec<i32> {
    let mut heap: BinaryHeap<Reverse<i32>> = BinaryHeap::with_capacity(k + 1);

    for &val in data {
        heap.push(Reverse(val));
        if heap.len() > k {
            heap.pop(); // Remove smallest (which is the max in Reverse heap)
        }
    }

    let mut result: Vec<i32> = heap.into_iter().map(|Reverse(x)| x).collect();
    result.sort_unstable_by(|a, b| b.cmp(a)); // Descending
    result
}

fn test_top_k() {
    let data = vec![3, 1, 4, 1, 5, 9, 2, 6, 5, 3];
    let result = top_k(3, &data);
    assert_eq!(result, vec![9, 6, 5]);

    let result = top_k(1, &data);
    assert_eq!(result, vec![9]);
}

/// BinaryHeap as a max-heap: natural ordering
fn max_heap_demo() {
    let mut heap = BinaryHeap::new();
    heap.push(3);
    heap.push(1);
    heap.push(4);
    heap.push(1);
    heap.push(5);

    // pop() returns largest first
    assert_eq!(heap.pop(), Some(5));
    assert_eq!(heap.pop(), Some(4));
    assert_eq!(heap.pop(), Some(3));

    // peek without removing
    assert_eq!(heap.peek(), Some(&1));
}

/// Top-K with custom key function
fn top_k_by<T, K, F>(k: usize, data: &[T], key_fn: F) -> Vec<&T>
where
    K: Ord,
    F: Fn(&T) -> K,
{
    let mut heap: BinaryHeap<Reverse<(K, usize)>> = BinaryHeap::new();

    for (i, item) in data.iter().enumerate() {
        heap.push(Reverse((key_fn(item), i)));
        if heap.len() > k {
            heap.pop();
        }
    }

    let mut indices: Vec<usize> = heap.into_iter().map(|Reverse((_, i))| i).collect();
    indices.sort_by(|&a, &b| key_fn(&data[b]).cmp(&key_fn(&data[a])).then(b.cmp(&a)));
    indices.iter().map(|&i| &data[i]).collect()
}

fn test_top_k_by() {
    let words = vec!["hi", "hello", "hey", "howdy", "h"];
    let longest3 = top_k_by(3, &words, |w| w.len());
    assert_eq!(longest3.len(), 3);
    assert_eq!(*longest3[0], "howdy");
    assert_eq!(*longest3[1], "hello");
}


#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_top_k_basic() { test_top_k(); }

    #[test]
    fn test_max_heap() { max_heap_demo(); }

    #[test]
    fn test_custom_key() { test_top_k_by(); }

    #[test]
    fn test_from_vec() {
        let v = vec![5, 3, 8, 1, 9];
        let heap: BinaryHeap<_> = v.into_iter().collect();
        let sorted: Vec<_> = heap.into_sorted_vec();
        assert_eq!(sorted, vec![1, 3, 5, 8, 9]); // ascending
    }

    #[test]
    fn test_min_heap() {
        let mut min_heap: BinaryHeap<Reverse<i32>> = BinaryHeap::new();
        min_heap.push(Reverse(5));
        min_heap.push(Reverse(1));
        min_heap.push(Reverse(3));
        assert_eq!(min_heap.pop(), Some(Reverse(1))); // smallest first
    }
}
(* 1033: Top-K Elements with Binary Heap *)
(* OCaml has no built-in heap โ€” we use sorted insertion or a manual heap *)

(* Approach 1: Sort and take top-k *)
let top_k_sort k lst =
  lst |> List.sort (fun a b -> compare b a) |> List.filteri (fun i _ -> i < k)

let sort_approach () =
  let data = [3; 1; 4; 1; 5; 9; 2; 6; 5; 3] in
  let top3 = top_k_sort 3 data in
  assert (top3 = [9; 6; 5])

(* Approach 2: Maintain a min-heap of size k using a sorted list *)
(* Keep only k largest elements *)
let top_k_linear k lst =
  let insert_bounded heap x =
    if List.length heap < k then
      List.sort compare (x :: heap)
    else
      let min_val = List.hd heap in
      if x > min_val then
        List.sort compare (x :: List.tl heap)
      else
        heap
  in
  List.fold_left (fun heap x -> insert_bounded heap x) [] lst

let bounded_heap_approach () =
  let data = [3; 1; 4; 1; 5; 9; 2; 6; 5; 3] in
  let top3 = top_k_linear 3 data |> List.rev in
  assert (top3 = [9; 6; 5])

(* Approach 3: Top-k with key function *)
let top_k_by k key_fn lst =
  lst
  |> List.map (fun x -> (key_fn x, x))
  |> List.sort (fun (a, _) (b, _) -> compare b a)
  |> List.filteri (fun i _ -> i < k)
  |> List.map snd

let top_k_by_test () =
  let words = ["hi"; "hello"; "hey"; "howdy"; "h"] in
  let longest3 = top_k_by 3 String.length words in
  assert (List.length longest3 = 3);
  assert (List.hd longest3 = "howdy")

let () =
  sort_approach ();
  bounded_heap_approach ();
  top_k_by_test ();
  Printf.printf "โœ“ All tests passed\n"

๐Ÿ“Š Detailed Comparison

Top-K Elements with BinaryHeap โ€” Comparison

Core Insight

The top-K problem is a classic priority queue application. Maintain a min-heap of size K; for each element, push it and evict the smallest if heap exceeds K. Result: only the K largest remain. Rust has `BinaryHeap` in std; OCaml has no built-in heap.

OCaml Approach

  • No stdlib heap โ€” must use sorted list or manual implementation
  • Sort-and-take: O(n log n) โ€” simple but not optimal
  • Bounded sorted list: maintain sorted list of size K, insert with binary-ish placement
  • Functorized priority queue libraries exist but aren't in stdlib

Rust Approach

  • `BinaryHeap<T>` โ€” max-heap by default
  • `BinaryHeap<Reverse<T>>` โ€” min-heap via newtype wrapper
  • `push` + `pop` for bounded heap pattern
  • `into_sorted_vec()` for sorted extraction
  • `peek()` for O(1) max/min access

Comparison Table

FeatureOCamlRust (`BinaryHeap`)
Stdlib heapNoYes
Default orderN/AMax-heap
Min-heapN/A`Reverse<T>` wrapper
Top-K complexityO(n log n) sortO(n log k) heap
Heap sortManual`into_sorted_vec()`
PeekN/A`peek()` O(1)
Custom orderingN/AImplement `Ord`