// 963: Bloom Filter
// Probabilistic membership test: no false negatives, possible false positives
// Uses 3 independent hash functions + bit array (u64 words)
// Approach 1: Three hash functions (djb2, sdbm, fnv-like)
fn hash1(s: &str) -> usize {
s.bytes().fold(5381usize, |h, b| h.wrapping_mul(31).wrapping_add(b as usize))
}
fn hash2(s: &str) -> usize {
s.bytes().fold(0usize, |h, b| {
(b as usize)
.wrapping_add(h.wrapping_shl(6))
.wrapping_add(h.wrapping_shl(16))
.wrapping_sub(h)
})
}
fn hash3(s: &str) -> usize {
s.bytes().fold(0usize, |h, b| h.wrapping_mul(33) ^ (b as usize))
}
// Approach 2: Bloom filter with u64 bit array
pub struct BloomFilter {
bits: Vec<u64>,
num_bits: usize,
}
impl BloomFilter {
pub fn new(num_bits: usize) -> Self {
let words = (num_bits + 63) / 64;
BloomFilter {
bits: vec![0u64; words],
num_bits,
}
}
fn set_bit(&mut self, i: usize) {
let idx = i % self.num_bits;
let word = idx / 64;
let bit = idx % 64;
self.bits[word] |= 1u64 << bit;
}
fn get_bit(&self, i: usize) -> bool {
let idx = i % self.num_bits;
let word = idx / 64;
let bit = idx % 64;
(self.bits[word] >> bit) & 1 == 1
}
pub fn add(&mut self, s: &str) {
self.set_bit(hash1(s));
self.set_bit(hash2(s));
self.set_bit(hash3(s));
}
pub fn might_contain(&self, s: &str) -> bool {
self.get_bit(hash1(s)) && self.get_bit(hash2(s)) && self.get_bit(hash3(s))
}
pub fn count_set_bits(&self) -> u32 {
self.bits.iter().map(|w| w.count_ones()).sum()
}
/// Estimated false positive rate given n items inserted
pub fn false_positive_rate(&self, n: usize) -> f64 {
let k = 3.0_f64; // number of hash functions
let m = self.num_bits as f64;
(1.0 - (-k * n as f64 / m).exp()).powf(k)
}
}
fn main() {
let mut bf = BloomFilter::new(1024);
bf.add("apple");
bf.add("banana");
bf.add("cherry");
println!("apple: {}", bf.might_contain("apple"));
println!("banana: {}", bf.might_contain("banana"));
println!("cherry: {}", bf.might_contain("cherry"));
println!("dragon: {} (might be false positive)", bf.might_contain("dragon"));
println!("bits set: {}", bf.count_set_bits());
println!("false positive rate (3 items): {:.4}", bf.false_positive_rate(3));
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_inserted_items_found() {
let mut bf = BloomFilter::new(1024);
let words = ["apple", "banana", "cherry", "dog", "elephant"];
for w in &words {
bf.add(w);
}
for w in &words {
assert!(bf.might_contain(w), "must contain '{}'", w);
}
}
#[test]
fn test_false_positive_rate_reasonable() {
let mut bf = BloomFilter::new(10_000);
for i in 0..100 {
bf.add(&format!("item_{}", i));
}
// Check 1000 non-inserted items, count false positives
let fp: usize = (0..1000)
.filter(|i| bf.might_contain(&format!("not_inserted_{}", i)))
.count();
// With 10000 bits, 3 hashes, 100 items โ FP rate ~ 0.001
// Allow up to 5% false positives in test
assert!(fp < 50, "too many false positives: {}/1000", fp);
}
#[test]
fn test_bit_operations() {
let mut bf = BloomFilter::new(128);
bf.set_bit(0);
bf.set_bit(63);
bf.set_bit(64);
bf.set_bit(127);
assert!(bf.get_bit(0));
assert!(bf.get_bit(63));
assert!(bf.get_bit(64));
assert!(bf.get_bit(127));
assert!(!bf.get_bit(1));
assert!(!bf.get_bit(100));
}
#[test]
fn test_count_set_bits() {
let mut bf = BloomFilter::new(64);
assert_eq!(bf.count_set_bits(), 0);
bf.add("test");
assert!(bf.count_set_bits() > 0);
assert!(bf.count_set_bits() <= 3);
}
}
(* 963: Bloom Filter *)
(* Probabilistic set membership: may have false positives, no false negatives *)
(* Simple hash functions using djb2 and sdbm *)
let hash1 s =
String.fold_left (fun h c -> h * 31 + Char.code c) 5381 s
let hash2 s =
String.fold_left (fun h c -> (Char.code c) + (h lsl 6) + (h lsl 16) - h) 0 s
let hash3 s =
String.fold_left (fun h c -> h * 33 lxor Char.code c) 0 s
(* Approach 1: Bloom filter with fixed bit array *)
type bloom_filter = {
bits: bool array;
size: int;
}
let create size = { bits = Array.make size false; size }
let add bf s =
let h1 = abs (hash1 s) mod bf.size in
let h2 = abs (hash2 s) mod bf.size in
let h3 = abs (hash3 s) mod bf.size in
bf.bits.(h1) <- true;
bf.bits.(h2) <- true;
bf.bits.(h3) <- true
let might_contain bf s =
let h1 = abs (hash1 s) mod bf.size in
let h2 = abs (hash2 s) mod bf.size in
let h3 = abs (hash3 s) mod bf.size in
bf.bits.(h1) && bf.bits.(h2) && bf.bits.(h3)
(* Approach 2: Compact bit representation using int array *)
let create_compact num_bits =
let words = (num_bits + 62) / 63 in
Array.make words 0
let set_bit bits i =
let word = i / 63 in
let bit = i mod 63 in
bits.(word) <- bits.(word) lor (1 lsl bit)
let get_bit bits i =
let word = i / 63 in
let bit = i mod 63 in
(bits.(word) lsr bit) land 1 = 1
let () =
let bf = create 1024 in
add bf "apple";
add bf "banana";
add bf "cherry";
(* All added items must be found *)
assert (might_contain bf "apple");
assert (might_contain bf "banana");
assert (might_contain bf "cherry");
(* These might or might not be found (false positives possible) *)
(* but very unlikely with 1024 bits and 3 hashes for these strings *)
let likely_absent = not (might_contain bf "dragon") in
let _ = likely_absent in (* suppressing: could be false positive *)
(* Compact bit array test *)
let bits = create_compact 128 in
set_bit bits 0;
set_bit bits 63;
set_bit bits 127;
assert (get_bit bits 0);
assert (get_bit bits 63);
assert (get_bit bits 127);
assert (not (get_bit bits 1));
assert (not (get_bit bits 64));
Printf.printf "โ All tests passed\n"