// 1050: String Interning — Dedup Strings to IDs
// Map strings to unique integer IDs for O(1) comparison
use std::collections::HashMap;
/// Interned string handle — cheap to copy and compare
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
struct Symbol(usize);
/// String interner: maps strings ↔ unique IDs
struct Interner {
to_id: HashMap<String, Symbol>,
to_str: Vec<String>,
}
impl Interner {
fn new() -> Self {
Interner {
to_id: HashMap::new(),
to_str: Vec::new(),
}
}
/// Intern a string: returns existing ID or assigns a new one
fn intern(&mut self, s: &str) -> Symbol {
if let Some(&id) = self.to_id.get(s) {
return id;
}
let id = Symbol(self.to_str.len());
self.to_str.push(s.to_string());
self.to_id.insert(s.to_string(), id);
id
}
/// Resolve a symbol back to its string
fn resolve(&self, sym: Symbol) -> Option<&str> {
self.to_str.get(sym.0).map(|s| s.as_str())
}
/// Number of interned strings
fn len(&self) -> usize {
self.to_str.len()
}
}
fn basic_interning() {
let mut interner = Interner::new();
let id1 = interner.intern("hello");
let id2 = interner.intern("world");
let id3 = interner.intern("hello"); // Same as id1
assert_eq!(id1, id3); // Same string → same ID
assert_ne!(id1, id2); // Different strings → different IDs
assert_eq!(interner.resolve(id1), Some("hello"));
assert_eq!(interner.resolve(id2), Some("world"));
assert_eq!(interner.len(), 2); // Only 2 unique strings
}
fn fast_comparison() {
let mut interner = Interner::new();
let words = vec!["the", "cat", "sat", "on", "the", "mat", "the", "cat"];
let ids: Vec<Symbol> = words.iter().map(|w| interner.intern(w)).collect();
// Compare IDs instead of strings: integer comparison is O(1)
let the_id = ids[0];
let count = ids.iter().filter(|&&id| id == the_id).count();
assert_eq!(count, 3); // "the" appears 3 times
// Frequency counting with symbols is faster than with strings
let mut freq: HashMap<Symbol, usize> = HashMap::new();
for &id in &ids {
*freq.entry(id).or_insert(0) += 1;
}
assert_eq!(freq[&the_id], 3);
}
fn symbol_table() {
let mut interner = Interner::new();
let vars = vec!["x", "y", "x", "z", "y", "x"];
let interned: Vec<Symbol> = vars.iter().map(|v| interner.intern(v)).collect();
// Only 3 unique symbols
assert_eq!(interner.len(), 3);
// Dedup using a set of symbols
let mut unique: Vec<Symbol> = interned.clone();
unique.sort_by_key(|s| s.0);
unique.dedup();
assert_eq!(unique.len(), 3);
// Resolve back to strings
let names: Vec<&str> = unique
.iter()
.filter_map(|&sym| interner.resolve(sym))
.collect();
assert_eq!(names.len(), 3);
}
/// Symbols work great as HashMap keys (faster than String keys)
fn symbol_as_key() {
let mut interner = Interner::new();
let mut values: HashMap<Symbol, i32> = HashMap::new();
let x = interner.intern("x");
let y = interner.intern("y");
values.insert(x, 42);
values.insert(y, 99);
assert_eq!(values[&x], 42);
assert_eq!(values[&y], 99);
// Symbol is Copy — no cloning needed
let key = x;
assert_eq!(values[&key], 42);
}
fn main() {
basic_interning();
fast_comparison();
symbol_table();
symbol_as_key();
println!("✓ All tests passed");
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic() { basic_interning(); }
#[test]
fn test_fast_compare() { fast_comparison(); }
#[test]
fn test_symbols() { symbol_table(); }
#[test]
fn test_symbol_key() { symbol_as_key(); }
#[test]
fn test_empty_string() {
let mut interner = Interner::new();
let empty = interner.intern("");
assert_eq!(interner.resolve(empty), Some(""));
}
#[test]
fn test_symbol_is_copy() {
let mut interner = Interner::new();
let sym = interner.intern("test");
let copy = sym; // Copy, not move
assert_eq!(sym, copy); // Both still valid
}
}
(* 1050: String Interning — Dedup Strings to IDs *)
(* Map strings to unique integer IDs for efficient comparison/storage *)
(* Approach 1: Basic string interner using Hashtbl *)
module StringInterner = struct
type t = {
to_id: (string, int) Hashtbl.t;
to_str: (int, string) Hashtbl.t;
mutable next_id: int;
}
let create () = {
to_id = Hashtbl.create 64;
to_str = Hashtbl.create 64;
next_id = 0;
}
let intern interner s =
match Hashtbl.find_opt interner.to_id s with
| Some id -> id
| None ->
let id = interner.next_id in
interner.next_id <- id + 1;
Hashtbl.add interner.to_id s id;
Hashtbl.add interner.to_str id s;
id
let resolve interner id =
Hashtbl.find_opt interner.to_str id
let len interner = interner.next_id
end
let basic_interning () =
let interner = StringInterner.create () in
let id1 = StringInterner.intern interner "hello" in
let id2 = StringInterner.intern interner "world" in
let id3 = StringInterner.intern interner "hello" in (* same as id1 *)
assert (id1 = id3); (* Same string → same ID *)
assert (id1 <> id2); (* Different strings → different IDs *)
assert (StringInterner.resolve interner id1 = Some "hello");
assert (StringInterner.resolve interner id2 = Some "world");
assert (StringInterner.len interner = 2)
(* Approach 2: Interned comparison is O(1) *)
let fast_comparison () =
let interner = StringInterner.create () in
let words = ["the"; "cat"; "sat"; "on"; "the"; "mat"; "the"; "cat"] in
let ids = List.map (StringInterner.intern interner) words in
(* Compare IDs instead of strings: integer comparison is O(1) *)
let the_id = List.hd ids in
let count = List.length (List.filter (fun id -> id = the_id) ids) in
assert (count = 3) (* "the" appears 3 times *)
(* Approach 3: Interned symbol table *)
let symbol_table () =
let interner = StringInterner.create () in
let vars = ["x"; "y"; "x"; "z"; "y"; "x"] in
let interned = List.map (StringInterner.intern interner) vars in
(* Only 3 unique symbols *)
assert (StringInterner.len interner = 3);
(* Dedup by comparing IDs *)
let unique = List.sort_uniq compare interned in
assert (List.length unique = 3);
(* Resolve back *)
let names = List.filter_map (StringInterner.resolve interner) unique in
assert (List.length names = 3)
let () =
basic_interning ();
fast_comparison ();
symbol_table ();
Printf.printf "✓ All tests passed\n"