๐Ÿฆ€ Functional Rust

332: Retry Async

Difficulty: 3 Level: Advanced Retry failed operations with exponential backoff โ€” the foundation of resilient async services.

The Problem This Solves

In distributed systems, transient failures are the norm: a database is briefly overloaded, a DNS lookup times out, a downstream API returns 503. Treating every failure as permanent and propagating an error immediately makes services fragile. What you need is a principled retry loop: try again, wait a bit longer each time, and give up only after a reasonable number of attempts. Naive retry is easy to write but easy to get wrong: forgetting to distinguish transient from permanent errors (retrying a 404 is pointless), using a fixed delay (can cause thundering-herd storms), or allowing infinite retries (can cascade into runaway loops). This example formalizes all three concerns: `Transient` vs `Permanent` error variants, configurable exponential backoff with a multiplier, and a hard limit on attempts. In async Rust with tokio, you'd replace `thread::sleep` with `tokio::time::sleep(delay).await`. The retry logic itself is identical.

The Intuition

Like a JavaScript `fetchWithRetry`:
async function fetchWithRetry(url, attempts = 3, delay = 100) {
for (let i = 0; i < attempts; i++) {
 try { return await fetch(url); }
 catch (e) {
   if (i < attempts - 1) await sleep(delay * 2**i);
 }
}
}
Rust's version is more explicit about why it's failing (`Transient` = worth retrying, `Permanent` = stop immediately). This distinction prevents retrying authentication errors, validation failures, or "not found" responses.

How It Works in Rust

#[derive(Debug, Clone)]
enum RetryError<E> {
 Transient(E),  // Worth retrying โ€” network blip, timeout, overload
 Permanent(E),  // Don't retry โ€” bad input, auth failure, 404
}

struct RetryConfig { max_attempts: usize, base_delay: Duration, multiplier: f64 }

fn retry<T, E: Clone>(cfg: &RetryConfig, mut f: impl FnMut() -> Result<T, RetryError<E>>) -> Result<T, E> {
 let mut delay = cfg.base_delay;
 for attempt in 1..=cfg.max_attempts {
     match f() {
         Ok(v) => return Ok(v),
         Err(RetryError::Permanent(e)) => return Err(e),  // bail immediately
         Err(RetryError::Transient(e)) => {
             if attempt < cfg.max_attempts {
                 thread::sleep(delay);
                 delay = delay.mul_f64(cfg.multiplier);  // exponential backoff
             } else {
                 return Err(e);
             }
         }
     }
 }
 unreachable!()
}
`FnMut` (not `Fn`) lets the closure carry mutable state โ€” like a counter tracking which attempt it's on. `Duration::mul_f64` doubles (or nร—) the wait on each retry.

What This Unlocks

Key Differences

ConceptOCamlRust
Error variants`retry_err = Transient \Permanent` (custom type)`RetryError<E>` (generic enum)
Retry loopRecursive `loop i d` with `Thread.delay d`Iterative `for` with `thread::sleep`
Delay scaling`d *. 2.0` (float multiply)`delay.mul_f64(multiplier)` on `Duration`
Closure type`unit -> ('a, retry_err) result``FnMut() -> Result<T, RetryError<E>>`
use std::thread;
use std::time::Duration;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;

#[derive(Debug,Clone)]
enum RetryError<E> { Transient(E), Permanent(E) }

struct RetryConfig { max_attempts: usize, base_delay: Duration, multiplier: f64 }

impl Default for RetryConfig {
    fn default() -> Self { Self { max_attempts: 3, base_delay: Duration::from_millis(5), multiplier: 2.0 } }
}

fn retry<T, E: Clone>(cfg: &RetryConfig, mut f: impl FnMut()->Result<T,RetryError<E>>) -> Result<T, E> {
    let mut delay = cfg.base_delay;
    let mut last = None;
    for attempt in 1..=cfg.max_attempts {
        match f() {
            Ok(v) => return Ok(v),
            Err(RetryError::Permanent(e)) => return Err(e),
            Err(RetryError::Transient(e)) => {
                last = Some(e);
                if attempt < cfg.max_attempts {
                    println!("Attempt {attempt} failed, retrying in {}ms", delay.as_millis());
                    thread::sleep(delay);
                    delay = delay.mul_f64(cfg.multiplier);
                }
            }
        }
    }
    Err(last.unwrap())
}

fn main() {
    let counter = Arc::new(AtomicUsize::new(0));
    let c = Arc::clone(&counter);
    let result = retry(&RetryConfig::default(), move || {
        let n = c.fetch_add(1, Ordering::SeqCst);
        if n < 2 { Err(RetryError::Transient(format!("not ready ({})", n+1))) } else { Ok(42) }
    });
    println!("Success after {} attempts: {:?}", counter.load(Ordering::SeqCst), result);

    let result: Result<i32, String> = retry(&RetryConfig{max_attempts:5,..Default::default()}, || Err(RetryError::Permanent("fatal".into())));
    println!("Permanent: {result:?}");
}

#[cfg(test)]
mod tests {
    use super::*;
    #[test] fn succeeds_after_retries() {
        let c = Arc::new(AtomicUsize::new(0));
        let cc = Arc::clone(&c);
        let cfg = RetryConfig{base_delay:Duration::from_millis(1),..Default::default()};
        let r: Result<i32,String> = retry(&cfg, move || {
            let n = cc.fetch_add(1,Ordering::SeqCst);
            if n<2 { Err(RetryError::Transient("nope".into())) } else { Ok(99) }
        });
        assert_eq!(r.unwrap(), 99);
        assert_eq!(c.load(Ordering::SeqCst), 3);
    }
    #[test] fn permanent_no_retry() {
        let c = Arc::new(AtomicUsize::new(0));
        let cc = Arc::clone(&c);
        let _: Result<i32,String> = retry(&RetryConfig::default(), move || { cc.fetch_add(1,Ordering::SeqCst); Err(RetryError::Permanent("fatal".into())) });
        assert_eq!(c.load(Ordering::SeqCst), 1);
    }
}
(* OCaml: retry with exponential backoff *)

type retry_err = Transient of string | Permanent of string

let retry ?(max=3) ?(delay=0.01) f =
  let rec loop i d =
    if i > max then Error "max attempts exceeded"
    else match f () with
    | Ok v -> Ok v
    | Error (Permanent m) -> Error ("permanent: "^m)
    | Error (Transient m) ->
      Printf.printf "Attempt %d failed: %s\n" i m;
      Thread.delay d; loop (i+1) (d*.2.0)
  in loop 1 delay

let n = ref 0
let flaky () = incr n; if !n < 3 then Error (Transient "not ready") else Ok 42

let () =
  match retry flaky with
  | Ok v -> Printf.printf "Success after %d attempts: %d\n" !n v
  | Error e -> Printf.printf "Failed: %s\n" e