// 764. Binary Serialization: Length-Prefixed Records
// TLV-style binary format, std-only
// ββ Encoder ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
pub struct BinaryWriter(Vec<u8>);
impl BinaryWriter {
pub fn new() -> Self { Self(Vec::new()) }
pub fn write_u8(&mut self, v: u8) {
self.0.push(v);
}
pub fn write_u32_be(&mut self, v: u32) {
self.0.extend_from_slice(&v.to_be_bytes());
}
pub fn write_u64_be(&mut self, v: u64) {
self.0.extend_from_slice(&v.to_be_bytes());
}
pub fn write_bool(&mut self, v: bool) {
self.write_u8(if v { 1 } else { 0 });
}
/// Length-prefixed string: u32 length then UTF-8 bytes
pub fn write_string(&mut self, s: &str) {
self.write_u32_be(s.len() as u32);
self.0.extend_from_slice(s.as_bytes());
}
pub fn finish(self) -> Vec<u8> { self.0 }
}
// ββ Decoder ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
#[derive(Debug)]
pub enum DecodeError {
UnexpectedEof,
InvalidUtf8,
}
pub struct BinaryReader<'a> {
data: &'a [u8],
pos: usize,
}
impl<'a> BinaryReader<'a> {
pub fn new(data: &'a [u8]) -> Self { Self { data, pos: 0 } }
fn consume(&mut self, n: usize) -> Result<&'a [u8], DecodeError> {
if self.pos + n > self.data.len() { return Err(DecodeError::UnexpectedEof); }
let slice = &self.data[self.pos..self.pos + n];
self.pos += n;
Ok(slice)
}
pub fn read_u8(&mut self) -> Result<u8, DecodeError> {
Ok(self.consume(1)?[0])
}
pub fn read_u32_be(&mut self) -> Result<u32, DecodeError> {
let b = self.consume(4)?;
Ok(u32::from_be_bytes(b.try_into().unwrap()))
}
pub fn read_u64_be(&mut self) -> Result<u64, DecodeError> {
let b = self.consume(8)?;
Ok(u64::from_be_bytes(b.try_into().unwrap()))
}
pub fn read_bool(&mut self) -> Result<bool, DecodeError> {
Ok(self.read_u8()? != 0)
}
pub fn read_string(&mut self) -> Result<&'a str, DecodeError> {
let len = self.read_u32_be()? as usize;
let bytes = self.consume(len)?;
std::str::from_utf8(bytes).map_err(|_| DecodeError::InvalidUtf8)
}
}
// ββ Domain type ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
#[derive(Debug, PartialEq)]
pub struct Person {
pub name: String,
pub age: u32,
pub active: bool,
}
impl Person {
pub fn encode(&self) -> Vec<u8> {
let mut w = BinaryWriter::new();
w.write_string(&self.name);
w.write_u32_be(self.age);
w.write_bool(self.active);
w.finish()
}
pub fn decode(data: &[u8]) -> Result<Self, DecodeError> {
let mut r = BinaryReader::new(data);
let name = r.read_string()?.to_string();
let age = r.read_u32_be()?;
let active = r.read_bool()?;
Ok(Person { name, age, active })
}
}
fn hex_dump(data: &[u8]) -> String {
data.iter().map(|b| format!("{b:02X}")).collect::<Vec<_>>().join(" ")
}
fn main() {
let alice = Person { name: "Alice".into(), age: 30, active: true };
let encoded = alice.encode();
println!("Encoded ({} bytes): {}", encoded.len(), hex_dump(&encoded));
let decoded = Person::decode(&encoded).expect("decode failed");
println!("Decoded: {decoded:?}");
// Multiple records in one buffer
let records = vec![
Person { name: "Bob".into(), age: 25, active: false },
Person { name: "Carol".into(), age: 35, active: true },
];
let mut buf = Vec::new();
for r in &records {
let enc = r.encode();
buf.extend_from_slice(&(enc.len() as u32).to_be_bytes());
buf.extend_from_slice(&enc);
}
println!("\nMulti-record buffer ({} bytes): {}", buf.len(), hex_dump(&buf));
// Decode multi-record
let mut pos = 0;
while pos + 4 <= buf.len() {
let len = u32::from_be_bytes(buf[pos..pos+4].try_into().unwrap()) as usize;
pos += 4;
let p = Person::decode(&buf[pos..pos+len]).unwrap();
println!(" Record: {p:?}");
pos += len;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn round_trip() {
let p = Person { name: "Dave".into(), age: 40, active: true };
assert_eq!(p, Person::decode(&p.encode()).unwrap());
}
#[test]
fn utf8_name() {
let p = Person { name: "ΓmΓΌr".into(), age: 33, active: false };
assert_eq!(p, Person::decode(&p.encode()).unwrap());
}
#[test]
fn eof_error() {
assert!(matches!(Person::decode(&[]), Err(DecodeError::UnexpectedEof)));
}
#[test]
fn length_prefix_correct() {
let p = Person { name: "Ed".into(), age: 1, active: false };
let enc = p.encode();
// first 4 bytes = 2 (length of "Ed")
assert_eq!(&enc[..4], &[0, 0, 0, 2]);
// next 2 bytes = "Ed"
assert_eq!(&enc[4..6], b"Ed");
}
}
(* Binary format: length-prefixed records in OCaml *)
(* ββ Encoder ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ *)
let encode_u32 n =
let b = Bytes.create 4 in
Bytes.set_uint8 b 0 ((n lsr 24) land 0xFF);
Bytes.set_uint8 b 1 ((n lsr 16) land 0xFF);
Bytes.set_uint8 b 2 ((n lsr 8) land 0xFF);
Bytes.set_uint8 b 3 ( n land 0xFF);
b
let encode_string s =
let len_bytes = encode_u32 (String.length s) in
Bytes.cat len_bytes (Bytes.of_string s)
(* Record: tag(1 byte) + string name + u32 age + bool active *)
type person = { name: string; age: int; active: bool }
let encode p =
let buf = Buffer.create 64 in
Buffer.add_bytes buf (encode_string p.name);
Buffer.add_bytes buf (encode_u32 p.age);
Buffer.add_uint8 buf (if p.active then 1 else 0);
Buffer.to_bytes buf
(* ββ Decoder ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ *)
let decode_u32 bytes pos =
let b0 = Bytes.get_uint8 bytes pos in
let b1 = Bytes.get_uint8 bytes (pos+1) in
let b2 = Bytes.get_uint8 bytes (pos+2) in
let b3 = Bytes.get_uint8 bytes (pos+3) in
(b0 lsl 24) lor (b1 lsl 16) lor (b2 lsl 8) lor b3, pos + 4
let decode_string bytes pos =
let len, pos = decode_u32 bytes pos in
(Bytes.sub_string bytes pos len), pos + len
let decode bytes =
let pos = ref 0 in
let name, p = decode_string bytes !pos in pos := p;
let age, p = decode_u32 bytes !pos in pos := p;
let active = Bytes.get_uint8 bytes !pos = 1 in
ignore pos;
{ name; age; active }
let hex_dump bytes =
Bytes.to_seq bytes
|> Seq.map (fun b -> Printf.sprintf "%02X" (Char.code b))
|> List.of_seq
|> String.concat " "
let () =
let alice = { name = "Alice"; age = 30; active = true } in
let encoded = encode alice in
Printf.printf "Encoded (%d bytes): %s\n" (Bytes.length encoded) (hex_dump encoded);
let decoded = decode encoded in
Printf.printf "Decoded: name=%s age=%d active=%b\n"
decoded.name decoded.age decoded.active