// 718. Soundness, undefined behaviour, and safety invariants
//
// Demonstrates:
// 1. Writing safe abstractions over unsafe internals
// 2. Safety contracts (// SAFETY: comments)
// 3. Common UB sources and how to avoid them
// 4. Invariant maintenance across the safe/unsafe boundary
// โโ Pattern 1: Safe abstraction with a maintained invariant โโโโโโโโโโโโโโโโโโ
/// A slice whose elements are guaranteed to be sorted in ascending order.
///
/// The invariant is maintained by the type: only `SortedVec::insert` can
/// add elements, and it keeps the order. There is no public way to violate
/// the invariant from safe code.
pub struct SortedVec<T: Ord>(Vec<T>);
impl<T: Ord> SortedVec<T> {
pub fn new() -> Self { Self(Vec::new()) }
/// Insert `val` while keeping the sorted order (binary-search insertion).
pub fn insert(&mut self, val: T) {
let pos = self.0.partition_point(|x| x <= &val);
self.0.insert(pos, val);
}
/// Get element by index without a bounds check.
///
/// # Safety
/// `idx` must be `< self.len()`. Violating this is immediate UB.
pub unsafe fn get_unchecked(&self, idx: usize) -> &T {
// SAFETY: Caller guarantees idx < self.len(), so this index is
// within the allocated Vec buffer and produces a valid reference.
unsafe { self.0.get_unchecked(idx) }
}
pub fn len(&self) -> usize { self.0.len() }
pub fn as_slice(&self) -> &[T] { &self.0 }
}
// โโ Pattern 2: NonNull slice โ a sound raw-pointer abstraction โโโโโโโโโโโโโโโ
use std::ptr::NonNull;
/// An owned, heap-allocated slice that is always non-null and properly
/// initialised. Safe to construct; `unsafe` only for raw pointer operations.
pub struct OwnedSlice<T> {
ptr: NonNull<T>,
len: usize,
cap: usize,
}
impl<T: Copy + Default> OwnedSlice<T> {
/// Allocate a slice of `len` zero-initialised elements.
pub fn new(len: usize) -> Self {
let mut v: Vec<T> = vec![T::default(); len];
let cap = v.capacity();
let ptr = v.as_mut_ptr();
std::mem::forget(v); // We take ownership from the Vec.
// SAFETY: `ptr` came from a live `Vec` and is therefore non-null.
let ptr = unsafe { NonNull::new_unchecked(ptr) };
Self { ptr, len, cap }
}
/// Write a value at `idx`.
///
/// # Panics
/// If `idx >= self.len`.
pub fn set(&mut self, idx: usize, val: T) {
assert!(idx < self.len, "index out of bounds");
// SAFETY: `idx < self.len <= self.cap` and `ptr` is valid for writes.
// We have exclusive access (`&mut self`), so no aliasing occurs.
unsafe { self.ptr.as_ptr().add(idx).write(val); }
}
/// Read the value at `idx`.
///
/// # Panics
/// If `idx >= self.len`.
pub fn get(&self, idx: usize) -> T {
assert!(idx < self.len, "index out of bounds");
// SAFETY: `idx < self.len <= self.cap`, ptr is valid for reads,
// and `T: Copy` so we can safely copy out the value.
unsafe { self.ptr.as_ptr().add(idx).read() }
}
pub fn len(&self) -> usize { self.len }
}
impl<T> Drop for OwnedSlice<T> {
fn drop(&mut self) {
// SAFETY: `ptr`, `len`, `cap` were obtained from a `Vec<T>` in `new()`.
// We call `drop_in_place` for each element, then free the allocation.
unsafe {
// Drop each element individually.
for i in 0..self.len {
std::ptr::drop_in_place(self.ptr.as_ptr().add(i));
}
// Reconstruct Vec to free the allocation.
let _ = Vec::from_raw_parts(self.ptr.as_ptr(), 0, self.cap);
}
}
}
// โโ Pattern 3: Common UB pitfalls (and how we prevent them) โโโโโโโโโโโโโโโโโโ
/// Demonstrates a dangling-pointer UB pattern โ INTENTIONALLY left as a
/// commented-out illustration. Running this code would be UB.
#[allow(dead_code)]
fn _ub_would_be_here() {
// --- UB EXAMPLE 1: Dangling pointer ---
// let dangling: *const i32 = {
// let x = 42i32;
// &x as *const i32 // `x` dropped here; pointer is dangling
// };
// let _ = unsafe { *dangling }; // UB: read through dangling pointer
// --- UB EXAMPLE 2: Aliased mutable references ---
// let mut x = 5i32;
// let r1 = &mut x as *mut i32;
// let r2 = &mut x as *mut i32;
// unsafe {
// *r1 = 10; // OK
// *r2 = 20; // UB: r1 and r2 alias โ violates exclusivity rule
// }
// --- UB EXAMPLE 3: Out-of-bounds read ---
// let arr = [1i32, 2, 3];
// let _ = unsafe { *arr.as_ptr().add(5) }; // UB: out of bounds
}
/// Safe alternative to the above patterns using standard library primitives.
fn safe_alternatives() {
// Safe shared access with indices โ no raw pointers needed.
let v = vec![10i32, 20, 30];
let sum: i32 = v.iter().sum();
println!("Safe sum: {}", sum);
// Swap two elements via indices (borrow checker prevents aliased &mut).
let mut v = vec![1, 2, 3, 4, 5];
v.swap(1, 3);
println!("After swap(1,3): {:?}", v);
}
// โโ Pattern 4: Invariant proofs via type-state โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
/// Type-state pattern: a buffer that must be locked before reading/writing.
pub struct Buffer<State> {
data: Vec<u8>,
_state: std::marker::PhantomData<State>,
}
pub struct Unlocked;
pub struct Locked;
impl Buffer<Unlocked> {
pub fn new(size: usize) -> Self {
Self { data: vec![0u8; size], _state: std::marker::PhantomData }
}
/// Lock the buffer โ transitions to the `Locked` state.
pub fn lock(self) -> Buffer<Locked> {
Buffer { data: self.data, _state: std::marker::PhantomData }
}
}
impl Buffer<Locked> {
pub fn read(&self) -> &[u8] { &self.data }
/// Write to the buffer. Only possible in the `Locked` state.
pub fn write(&mut self, offset: usize, bytes: &[u8]) {
let end = (offset + bytes.len()).min(self.data.len());
self.data[offset..end].copy_from_slice(&bytes[..end - offset]);
}
pub fn unlock(self) -> Buffer<Unlocked> {
Buffer { data: self.data, _state: std::marker::PhantomData }
}
}
// โโ main โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
fn main() {
// SortedVec
let mut sv = SortedVec::new();
for &n in &[5, 2, 8, 1, 9, 3] { sv.insert(n); }
println!("Sorted: {:?}", sv.as_slice());
// SAFETY: index 2 < sv.len() (which is 6)
let third = unsafe { sv.get_unchecked(2) };
println!("Third element (unchecked): {}", third);
// OwnedSlice
let mut slice = OwnedSlice::<i32>::new(5);
slice.set(0, 100);
slice.set(4, 400);
println!("OwnedSlice[0]={} [4]={}", slice.get(0), slice.get(4));
// Safe alternatives
safe_alternatives();
// Type-state buffer
let buf = Buffer::<Unlocked>::new(16);
let mut buf = buf.lock();
buf.write(0, b"Hello Rust");
println!("Buffer contents: {:?}", &buf.read()[..10]);
let _buf = buf.unlock();
}
// โโ Tests โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sorted_vec_maintains_order() {
let mut sv = SortedVec::new();
for n in [5, 1, 3, 2, 4] { sv.insert(n); }
assert_eq!(sv.as_slice(), &[1, 2, 3, 4, 5]);
}
#[test]
fn sorted_vec_unchecked_valid_index() {
let mut sv = SortedVec::new();
sv.insert(42);
// SAFETY: index 0 < sv.len() (which is 1)
let val = unsafe { sv.get_unchecked(0) };
assert_eq!(*val, 42);
}
#[test]
fn owned_slice_reads_writes() {
let mut s = OwnedSlice::<u8>::new(4);
s.set(0, 10);
s.set(3, 30);
assert_eq!(s.get(0), 10);
assert_eq!(s.get(1), 0); // default
assert_eq!(s.get(3), 30);
}
#[test]
#[should_panic(expected = "index out of bounds")]
fn owned_slice_oob_panics() {
let s = OwnedSlice::<u8>::new(4);
s.get(10); // should panic
}
#[test]
fn type_state_buffer_roundtrip() {
let buf = Buffer::<Unlocked>::new(8);
let mut buf = buf.lock();
buf.write(0, b"test");
assert_eq!(&buf.read()[..4], b"test");
let _unlocked = buf.unlock();
}
}
(* OCaml: Soundness through module abstraction and type encapsulation.
OCaml has no "unsafe" keyword โ soundness is enforced by the module system
and the GC. We show the analogous patterns. *)
(* --- Pattern 1: Invariant through private type --- *)
(* A "sorted list" that maintains the sorted invariant via its interface. *)
module SortedList : sig
type t
val empty : t
val insert : int -> t -> t
val to_list : t -> int list
(* There is no public constructor that bypasses the invariant *)
end = struct
type t = int list (* Invariant: always sorted ascending *)
let empty = []
let rec insert x = function
| [] -> [x]
| h :: tl when x <= h -> x :: h :: tl
| h :: tl -> h :: insert x tl
let to_list xs = xs
end
let () =
let s = SortedList.empty
|> SortedList.insert 5
|> SortedList.insert 2
|> SortedList.insert 8
|> SortedList.insert 1
in
Printf.printf "Sorted: %s\n"
(String.concat ", " (List.map string_of_int (SortedList.to_list s)))
(* --- Pattern 2: Use-after-free is impossible in OCaml --- *)
(* In OCaml, the GC keeps objects alive as long as any reference exists.
There is no analogue to Rust's dangling pointer. *)
let dangling_demo () =
let r = ref (Array.make 10 0) in
let alias = !r in (* Both point to the same heap object *)
r := Array.make 10 1; (* `r` now points to a new array *)
(* `alias` still safely references the old array โ GC won't collect it *)
Printf.printf "alias[0] = %d (safe, GC keeps it alive)\n" alias.(0)
let () = dangling_demo ()
(* --- Pattern 3: Aliasing in OCaml --- *)
(* OCaml's mutable references can alias โ unlike Rust's &mut T โ but the GC
ensures this doesn't lead to UB. Rust's borrow checker prevents aliasing
&mut references entirely in safe code. *)
let aliasing_demo () =
let a = ref 42 in
let b = a in (* `a` and `b` alias *)
b := 100; (* Mutate through `b` *)
Printf.printf "a = %d (aliased mutation is safe in OCaml, forbidden in Rust)\n" !a
let () = aliasing_demo ()
(* --- Pattern 4: "Safety contract" via documentation --- *)
(* OCaml has no unsafe blocks, but complex C FFI bindings carry the same
obligation โ you must document what the caller must guarantee. *)
(** [unsafe_index arr i] returns [arr.(i)] without bounds checking.
@raise Undefined_behavior if [i < 0] or [i >= Array.length arr].
Caller must ensure [0 <= i < Array.length arr]. *)
external unsafe_get : 'a array -> int -> 'a = "%array_unsafe_get"
let () =
let arr = [|10; 20; 30|] in
(* We verify the precondition before calling *)
let i = 1 in
if i >= 0 && i < Array.length arr then
Printf.printf "unsafe_get[%d] = %d\n" i (unsafe_get arr i)