main.go
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
package main

import (
	"context"
	"errors"
	"fmt"
	"log"
	"math/rand"
	"time"
)

var ErrPermanent = errors.New("permanent error")

func retry(ctx context.Context, attempts int, base, max time.Duration, fn func() error) error {
	if attempts < 1 {
		return fmt.Errorf("attempts must be >= 1")
	}
	rng := rand.New(rand.NewSource(time.Now().UnixNano()))
	var last error

	for i := 1; i <= attempts; i++ {
		if err := ctx.Err(); err != nil {
			return err
		}

		err := fn()
		if err == nil {
			return nil
		}
		if errors.Is(err, ErrPermanent) {
			return err
		}
		last = err

		if i == attempts {
			break
		}

		d := base << (i - 1)
		if d > max {
			d = max
		}
		jitter := time.Duration(rng.Int63n(int64(d/3) + 1))
		sleep := d - (d / 6) + jitter

		t := time.NewTimer(sleep)
		select {
		case <-ctx.Done():
			t.Stop()
			return ctx.Err()
		case <-t.C:
		}
	}

	return last
}

func main() {
	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()

	tries := 0
	err := retry(ctx, 5, 200*time.Millisecond, 2*time.Second, func() error {
		tries++
		if tries < 3 {
			return fmt.Errorf("transient failure (try %d)", tries)
		}
		return nil
	})
	if err != nil {
		log.Fatalf("retry failed: %v", err)
	}
	log.Printf("succeeded after %d tries", tries)
}

How It Works

Reusable retry helper that applies exponential backoff with jitter, supports context cancellation, and lets callers define retryable errors.

Accepts a function to execute, loops attempts until success or context deadline, sleeps using a base duration multiplied by powers of two with randomized jitter, and stops when the checker marks an error non-retryable.

Key Concepts

  • 1Exponential backoff reduces pressure on flaky dependencies.
  • 2Jitter randomizes delays to avoid synchronized retries.
  • 3Retry predicate gives callers control over what errors to retry.

When to Use This Pattern

  • Wrapping outbound network calls or database operations.
  • Transient file or lock acquisition failures.
  • General-purpose utility shared across services.

Best Practices

  • Cap the maximum backoff to keep latency predictable.
  • Set an overall timeout in the context.
  • Log attempts and include attempt count in metrics.
Go Version1.18+
Difficultyintermediate
Production ReadyYes
Lines of Code74