# Initialize
for variant in variants:
    alpha[variant] = 1  # or use informative prior
    beta[variant] = 1

# For each user
while True:
    # Sample from each posterior
    samples = {}
    for variant in variants:
        samples[variant] = sample_beta(alpha[variant], beta[variant])
    
    # Choose best sample
    chosen = max(samples, key=samples.get)
    
    # Show variant, observe outcome
    conversion = show_variant_to_user(chosen)
    
    # Update posterior
    alpha[chosen] += conversion
    beta[chosen] += (1 - conversion)

# Setup
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(42)

# True conversion rates (from real experiment)
true_rates = {
    'A': 3244 / 4625,  # 0.7016
    'B': 1433 / 2100,  # 0.6824
    'C': 1396 / 2022,  # 0.6903
}

print("True conversion rates (unknown to algorithm):")
for variant, rate in true_rates.items():
    print(f"  Variant {variant}: {rate:.4f} ({rate*100:.2f}%)")

# Best variant
best_variant = max(true_rates, key=true_rates.get)
best_rate = true_rates[best_variant]
print(f"\nBest variant: {best_variant} ({best_rate*100:.2f}%)")

True conversion rates (unknown to algorithm):
  Variant A: 0.7014 (70.14%)
  Variant B: 0.6824 (68.24%)
  Variant C: 0.6904 (69.04%)

Best variant: A (70.14%)

def thompson_sampling(true_rates, n_users, prior_alpha=1, prior_beta=1, verbose=False):
    """
    Simulate Thompson sampling for traffic allocation.
    
    Parameters:
    -----------
    true_rates : dict
        True conversion rates for each variant (unknown to algorithm)
    n_users : int
        Number of users to simulate
    prior_alpha : float
        Prior successes (Beta alpha parameter)
    prior_beta : float
        Prior failures (Beta beta parameter)
    
    Returns:
    --------
    dict with simulation results
    """
    variants = list(true_rates.keys())
    
    # Initialize posteriors
    alpha = {v: prior_alpha for v in variants}
    beta = {v: prior_beta for v in variants}
    
    # Track metrics
    n_shown = {v: 0 for v in variants}
    n_converted = {v: 0 for v in variants}
    total_conversions = 0
    
    # Track history for visualization
    history = {
        'user': [],
        'variant_chosen': [],
        'converted': [],
        'prob_A_best': [],
        'prob_B_best': [],
        'prob_C_best': [],
    }
    
    # Simulate each user
    for user_id in range(n_users):
        # Step 1: Sample from each posterior
        samples = {}
        for v in variants:
            samples[v] = np.random.beta(alpha[v], beta[v])
        
        # Step 2: Choose variant with highest sample
        chosen = max(samples, key=samples.get)
        
        # Step 3: Simulate user outcome based on true rate
        converted = np.random.random() < true_rates[chosen]
        
        # Step 4: Update posterior
        alpha[chosen] += converted
        beta[chosen] += (1 - converted)
        
        # Track metrics
        n_shown[chosen] += 1
        n_converted[chosen] += converted
        total_conversions += converted
        
        # Compute P(each variant is best) via Monte Carlo
        if user_id % 50 == 0:  # Every 50 users
            mc_samples = 10000
            best_counts = {v: 0 for v in variants}
            for _ in range(mc_samples):
                mc_samples_dict = {v: np.random.beta(alpha[v], beta[v]) for v in variants}
                best = max(mc_samples_dict, key=mc_samples_dict.get)
                best_counts[best] += 1
            
            prob_best = {v: best_counts[v] / mc_samples for v in variants}
            
            history['user'].append(user_id)
            history['variant_chosen'].append(chosen)
            history['converted'].append(converted)
            history['prob_A_best'].append(prob_best.get('A', 0))
            history['prob_B_best'].append(prob_best.get('B', 0))
            history['prob_C_best'].append(prob_best.get('C', 0))
        
        if verbose and user_id % 500 == 0:
            print(f"User {user_id}: Chose {chosen}, Converted: {converted}")
            print(f"  Traffic allocation: ", end="")
            for v in variants:
                pct = 100 * n_shown[v] / (user_id + 1)
                print(f"{v}={pct:.1f}% ", end="")
            print()
    
    return {
        'n_shown': n_shown,
        'n_converted': n_converted,
        'total_conversions': total_conversions,
        'alpha': alpha,
        'beta': beta,
        'history': history
    }

print("Thompson sampling function defined.")

Thompson sampling function defined.

# Run Thompson sampling simulation
n_users = 5000
print(f"Simulating Thompson sampling with {n_users:,} users...\n")

results_ts = thompson_sampling(true_rates, n_users, prior_alpha=1, prior_beta=1, verbose=True)

print("\n" + "="*80)
print("THOMPSON SAMPLING RESULTS")
print("="*80)

for variant in ['A', 'B', 'C']:
    n = results_ts['n_shown'][variant]
    conv = results_ts['n_converted'][variant]
    rate = conv / n if n > 0 else 0
    traffic_pct = 100 * n / n_users
    
    print(f"\nVariant {variant}:")
    print(f"  Traffic allocation: {traffic_pct:.1f}% ({n:,} users)")
    print(f"  Conversions: {conv:,} ({rate*100:.2f}%)")
    print(f"  Posterior: Beta({results_ts['alpha'][variant]:.0f}, {results_ts['beta'][variant]:.0f})")

print(f"\nTotal conversions: {results_ts['total_conversions']:,} / {n_users:,}")
print(f"Overall conversion rate: {results_ts['total_conversions'] / n_users * 100:.2f}%")

Simulating Thompson sampling with 5,000 users...

User 0: Chose A, Converted: False
  Traffic allocation: A=100.0% B=0.0% C=0.0%

User 500: Chose A, Converted: True
  Traffic allocation: A=56.9% B=24.6% C=18.6%

User 1000: Chose A, Converted: False
  Traffic allocation: A=63.1% B=23.2% C=13.7%

User 1500: Chose B, Converted: True
  Traffic allocation: A=61.3% B=22.3% C=16.5%

User 2000: Chose A, Converted: True
  Traffic allocation: A=60.8% B=22.5% C=16.6%

User 2500: Chose B, Converted: True
  Traffic allocation: A=62.1% B=23.8% C=14.1%

User 3000: Chose A, Converted: False
  Traffic allocation: A=66.0% B=21.6% C=12.4%

User 3500: Chose A, Converted: False
  Traffic allocation: A=69.8% B=19.3% C=10.9%

User 4000: Chose A, Converted: True
  Traffic allocation: A=73.3% B=17.1% C=9.6%

User 4500: Chose A, Converted: True
  Traffic allocation: A=75.8% B=15.6% C=8.6%

# Compare with fixed allocation
def fixed_allocation(true_rates, n_users):
    """Simulate fixed equal traffic allocation."""
    variants = list(true_rates.keys())
    n_variants = len(variants)
    
    n_shown = {v: 0 for v in variants}
    n_converted = {v: 0 for v in variants}
    total_conversions = 0
    
    for user_id in range(n_users):
        # Equal allocation
        chosen = variants[user_id % n_variants]
        
        # Simulate outcome
        converted = np.random.random() < true_rates[chosen]
        
        n_shown[chosen] += 1
        n_converted[chosen] += converted
        total_conversions += converted
    
    return {
        'n_shown': n_shown,
        'n_converted': n_converted,
        'total_conversions': total_conversions
    }

print(f"\nSimulating fixed allocation with {n_users:,} users...\n")
results_fixed = fixed_allocation(true_rates, n_users)

print("="*80)
print("FIXED ALLOCATION RESULTS")
print("="*80)

for variant in ['A', 'B', 'C']:
    n = results_fixed['n_shown'][variant]
    conv = results_fixed['n_converted'][variant]
    rate = conv / n if n > 0 else 0
    traffic_pct = 100 * n / n_users
    
    print(f"\nVariant {variant}:")
    print(f"  Traffic allocation: {traffic_pct:.1f}% ({n:,} users)")
    print(f"  Conversions: {conv:,} ({rate*100:.2f}%)")

print(f"\nTotal conversions: {results_fixed['total_conversions']:,} / {n_users:,}")
print(f"Overall conversion rate: {results_fixed['total_conversions'] / n_users * 100:.2f}%")

Simulating fixed allocation with 5,000 users...

================================================================================
FIXED ALLOCATION RESULTS
================================================================================

Variant A:
  Traffic allocation: 33.3% (1,667 users)
  Conversions: 1,188 (71.27%)

Variant B:
  Traffic allocation: 33.3% (1,667 users)
  Conversions: 1,110 (66.59%)

Variant C:
  Traffic allocation: 33.3% (1,666 users)
  Conversions: 1,174 (70.47%)

Total conversions: 3,472 / 5,000
Overall conversion rate: 69.44%

# Compare performance
print("\n" + "="*80)
print("COMPARISON: THOMPSON SAMPLING vs FIXED ALLOCATION")
print("="*80)

# Compute optimal (always show best variant)
optimal_conversions = n_users * best_rate

# Compute regret
regret_ts = optimal_conversions - results_ts['total_conversions']
regret_fixed = optimal_conversions - results_fixed['total_conversions']

print(f"\nOptimal (always show {best_variant}):")
print(f"  Total conversions: {optimal_conversions:.0f}")

print(f"\nThompson Sampling:")
print(f"  Total conversions: {results_ts['total_conversions']:,}")
print(f"  Regret: {regret_ts:.0f} conversions")
print(f"  Efficiency: {results_ts['total_conversions'] / optimal_conversions * 100:.2f}%")

print(f"\nFixed Allocation:")
print(f"  Total conversions: {results_fixed['total_conversions']:,}")
print(f"  Regret: {regret_fixed:.0f} conversions")
print(f"  Efficiency: {results_fixed['total_conversions'] / optimal_conversions * 100:.2f}%")

print(f"\n📊 Thompson Sampling Advantage:")
extra_conversions = results_ts['total_conversions'] - results_fixed['total_conversions']
print(f"  Extra conversions: {extra_conversions:.0f}")
print(f"  Improvement: {extra_conversions / results_fixed['total_conversions'] * 100:.2f}%")
print(f"  Regret reduction: {(regret_fixed - regret_ts) / regret_fixed * 100:.1f}%")

================================================================================
COMPARISON: THOMPSON SAMPLING vs FIXED ALLOCATION
================================================================================

Optimal (always show A):
  Total conversions: 3507

Thompson Sampling:
  Total conversions: 3,469
  Regret: 38 conversions
  Efficiency: 98.92%

Fixed Allocation:
  Total conversions: 3,472
  Regret: 35 conversions
  Efficiency: 99.00%

📊 Thompson Sampling Advantage:
  Extra conversions: -3
  Improvement: -0.09%
  Regret reduction: -8.6%

# Visualize: Probability of being best over time
history = results_ts['history']

fig, ax = plt.subplots(figsize=(12, 6))

ax.plot(history['user'], history['prob_A_best'], label='P(A is best)', linewidth=2, color='#2ecc71')
ax.plot(history['user'], history['prob_B_best'], label='P(B is best)', linewidth=2, color='#e74c3c')
ax.plot(history['user'], history['prob_C_best'], label='P(C is best)', linewidth=2, color='#3498db')

ax.axhline(y=0.95, color='gray', linestyle='--', linewidth=1, alpha=0.5, label='95% threshold')

ax.set_xlabel('Number of Users', fontsize=12)
ax.set_ylabel('Probability of Being Best', fontsize=12)
ax.set_title('Thompson Sampling: Learning Which Variant is Best', fontsize=14, fontweight='bold')
ax.legend(loc='right', fontsize=10)
ax.grid(True, alpha=0.3)
ax.set_ylim(0, 1.05)

plt.tight_layout()
plt.show()

# Find when we reach 95% confidence
confidence_idx = None
for i, prob_a in enumerate(history['prob_A_best']):
    if prob_a >= 0.95:
        confidence_idx = i
        break

if confidence_idx is not None:
    users_to_95 = history['user'][confidence_idx]
    print(f"\n✓ Reached 95% confidence that A is best after ~{users_to_95:,} users")
else:
    print(f"\n⚠ Did not reach 95% confidence within {n_users:,} users")

✓ Reached 95% confidence that A is best after ~3,300 users

def thompson_sampling_with_new_variant(true_rates, n_users_before, new_variant_rate, n_users_after):
    """
    Simulate Thompson sampling where a new variant is added mid-experiment.
    """
    variants = list(true_rates.keys())
    
    # Initialize posteriors
    alpha = {v: 1 for v in variants}
    beta = {v: 1 for v in variants}
    
    n_shown = {v: 0 for v in variants}
    n_converted = {v: 0 for v in variants}
    
    history = {'user': [], 'traffic_A': [], 'traffic_B': [], 'traffic_C': [], 'traffic_D': []}
    
    # Phase 1: Before new variant
    print(f"Phase 1: Running with variants {variants}...")
    for user_id in range(n_users_before):
        samples = {v: np.random.beta(alpha[v], beta[v]) for v in variants}
        chosen = max(samples, key=samples.get)
        converted = np.random.random() < true_rates[chosen]
        
        alpha[chosen] += converted
        beta[chosen] += (1 - converted)
        n_shown[chosen] += 1
        n_converted[chosen] += converted
    
    print(f"After {n_users_before} users:")
    for v in variants:
        pct = 100 * n_shown[v] / n_users_before
        print(f"  {v}: {pct:.1f}% traffic, Beta({alpha[v]:.0f}, {beta[v]:.0f})")
    
    # Phase 2: Add new variant D
    print(f"\n🆕 Adding new variant D with true rate {new_variant_rate*100:.1f}%...\n")
    true_rates['D'] = new_variant_rate
    variants.append('D')
    alpha['D'] = 1  # Start with uninformative prior
    beta['D'] = 1
    n_shown['D'] = 0
    n_converted['D'] = 0
    
    # Continue experiment
    total_users = n_users_before
    for user_id in range(n_users_after):
        samples = {v: np.random.beta(alpha[v], beta[v]) for v in variants}
        chosen = max(samples, key=samples.get)
        converted = np.random.random() < true_rates[chosen]
        
        alpha[chosen] += converted
        beta[chosen] += (1 - converted)
        n_shown[chosen] += 1
        n_converted[chosen] += converted
        total_users += 1
        
        # Track traffic allocation every 100 users
        if user_id % 100 == 0:
            history['user'].append(total_users)
            for v in ['A', 'B', 'C', 'D']:
                if v in n_shown:
                    history[f'traffic_{v}'].append(100 * n_shown[v] / total_users)
                else:
                    history[f'traffic_{v}'].append(0)
    
    print(f"\nAfter {total_users} total users:")
    for v in variants:
        pct = 100 * n_shown[v] / total_users
        print(f"  {v}: {pct:.1f}% traffic, Beta({alpha[v]:.0f}, {beta[v]:.0f})")
    
    return history, n_shown, total_users, alpha, beta

# Run simulation
true_rates_initial = {'A': 0.7016, 'B': 0.6824, 'C': 0.6903}
history, n_shown, total, alpha, beta = thompson_sampling_with_new_variant(
    true_rates_initial.copy(),
    n_users_before=2000,
    new_variant_rate=0.72,  # D is better than A!
    n_users_after=50000  # Enough users for D to prove itself AND overtake A's cumulative allocation
)

Phase 1: Running with variants ['A', 'B', 'C']...
After 2000 users:
  A: 30.4% traffic, Beta(415, 196)
  B: 26.1% traffic, Beta(360, 165)
  C: 43.4% traffic, Beta(598, 272)

🆕 Adding new variant D with true rate 72.0%...

After 52000 total users:
  A: 25.9% traffic, Beta(9498, 3969)
  B: 6.3% traffic, Beta(2265, 1008)
  C: 3.5% traffic, Beta(1222, 577)
  D: 64.4% traffic, Beta(23962, 9507)

# Visualize traffic allocation over time
fig, ax = plt.subplots(figsize=(12, 6))

ax.plot(history['user'], history['traffic_A'], label='Variant A', linewidth=2, color='#2ecc71')
ax.plot(history['user'], history['traffic_B'], label='Variant B', linewidth=2, color='#e74c3c')
ax.plot(history['user'], history['traffic_C'], label='Variant C', linewidth=2, color='#3498db')
ax.plot(history['user'], history['traffic_D'], label='Variant D (new)', linewidth=2, color='#f39c12', linestyle='--')

ax.axvline(x=2000, color='gray', linestyle=':', linewidth=2, alpha=0.7, label='D added')

ax.set_xlabel('Number of Users', fontsize=12)
ax.set_ylabel('Traffic Allocation (%)', fontsize=12)
ax.set_title('Thompson Sampling: Dynamic Traffic Allocation with New Variant', fontsize=14, fontweight='bold')
ax.legend(loc='right', fontsize=10)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\n📊 Key Observations:")
print("  1. Before user 2000: A gets most traffic (it's the best among A/B/C)")
print(f"     → A has built strong posterior: Beta({alpha['A']:.0f}, {beta['A']:.0f}) by user 2000")
print("  2. At user 2000: D enters with uninformative prior Beta(1, 1)")
print("     → D starts with ZERO evidence vs A's strong accumulated data")
print("  3. Users 2000-10000: D gets explored due to wide posterior")
print("     → But A still dominates because its posterior is narrow around 70%")
print("  4. Users 10000-30000: D accumulates evidence of 72% conversion")
print("     → D's posterior narrows, starts winning more samples")
print("  5. Users 30000+: D overtakes A in CUMULATIVE traffic allocation")
print(f"     → D reaches Beta({alpha['D']:.0f}, {beta['D']:.0f}), proves superiority")
print("  6. Final allocation: D dominates, A/B/C fade out")
print(f"     → D: {100*n_shown['D']/total:.1f}%, A: {100*n_shown['A']/total:.1f}%, B: {100*n_shown['B']/total:.1f}%, C: {100*n_shown['C']/total:.1f}%")
print("\n✅ Key Insight: Cumulative traffic percentages change slowly!")
print("   - Graph shows cumulative allocation from start of experiment")
print("   - A had a 2000-user head start with ~1000 users allocated")
print("   - For D to overtake A, D needs >1000 total users across all 52,000")
print("   - D must both prove itself (build evidence) AND accumulate enough traffic")
print("   - This is why we need 50,000 Phase 2 users to see the crossover")
print("\n💡 Alternative visualization: Rolling window traffic allocation")
print("   - Instead of cumulative %, track recent allocation (e.g., last 5000 users)")
print("   - Would show D dominating much earlier (~user 10,000)")
print("   - Better reflects current system behavior vs historical average")

📊 Key Observations:
  1. Before user 2000: A gets most traffic (it's the best among A/B/C)
     → A has built strong posterior: Beta(9498, 3969) by user 2000
  2. At user 2000: D enters with uninformative prior Beta(1, 1)
     → D starts with ZERO evidence vs A's strong accumulated data
  3. Users 2000-10000: D gets explored due to wide posterior
     → But A still dominates because its posterior is narrow around 70%
  4. Users 10000-30000: D accumulates evidence of 72% conversion
     → D's posterior narrows, starts winning more samples
  5. Users 30000+: D overtakes A in CUMULATIVE traffic allocation
     → D reaches Beta(23962, 9507), proves superiority
  6. Final allocation: D dominates, A/B/C fade out
     → D: 64.4%, A: 25.9%, B: 6.3%, C: 3.5%

✅ Key Insight: Cumulative traffic percentages change slowly!
   - Graph shows cumulative allocation from start of experiment
   - A had a 2000-user head start with ~1000 users allocated
   - For D to overtake A, D needs >1000 total users across all 52,000
   - D must both prove itself (build evidence) AND accumulate enough traffic
   - This is why we need 50,000 Phase 2 users to see the crossover

💡 Alternative visualization: Rolling window traffic allocation
   - Instead of cumulative %, track recent allocation (e.g., last 5000 users)
   - Would show D dominating much earlier (~user 10,000)
   - Better reflects current system behavior vs historical average

if prob_best > 0.95:
    deploy_winner()

# Step 1: Choose variant
chosen = max(samples, key=samples.get)

# Step 2: Show variant to user
show_variant_to_user(chosen)

# Step 3: Observe outcome (IMMEDIATELY!)
converted = observe_outcome()

# Step 4: Update posterior (with fresh data)
alpha[chosen] += converted
beta[chosen] += (1 - converted)

Aspect	Traditional A/B	Thompson Sampling
Traffic allocation	Fixed (e.g., 33/33/33)	Dynamic (adapts to performance)
Total conversions	Suboptimal (wastes traffic)	Near-optimal (minimizes regret)
Time to decision	Wait for significance	Continuous improvement
Adding variants	Restart test	Add anytime
Removing variants	Manual rebalance	Automatic fade-out
Multiple comparisons	Need corrections	No problem
Stopping rule	Pre-determined	Flexible
Implementation	Complex statistics	5 lines of code

Thompson Sampling: From Bayesian Posteriors to Optimal and Fully Automated Traffic Allocation¶

Executive Summary¶

The Problem¶

Thompson Sampling Solution¶

The Algorithm (Incredibly Simple)¶

Real-World Performance¶

Why It Works¶

The Multi-Armed Bandit Problem¶

The Metaphor¶

A/B Testing is a Bandit Problem¶

From Bayesian Posteriors to Thompson Sampling¶

The Natural Connection¶

The Thompson Sampling Insight¶

Why Sampling Works¶

Thompson Sampling Algorithm¶

Initialization¶

The Loop (for each user)¶

Pseudocode¶

Simulation: Thompson Sampling in Action¶

Key Insights from Simulation¶

1. Dynamic Traffic Allocation¶

2. Faster Convergence¶

3. Higher Total Conversions¶

4. No Stopping Rule Needed¶

Adding New Variants Dynamically¶

How It Works¶

Example: Adding Variant D Mid-Test¶

Practical Implementation Considerations¶

1. Choosing Priors¶

2. When to Stop¶

3. Implementation in Traffic Splitters¶

4. Monitoring¶

5. A/A Testing¶

Summary: From Bayesian Posteriors to Optimal Traffic Allocation¶

The Journey¶

Why Thompson Sampling is Superior¶

When to Use Thompson Sampling¶

Implementation Checklist¶

The Bottom Line¶

Appendix: Real-World Implementation Considerations¶

Critical Assumptions to Revisit in Production¶

1. The "Immediate Feedback" Assumption¶

What the Simulation Assumes¶

One classic solution Batch Updates¶

Recommendation for Production¶

2. Non-Stationarity (The Fixed Conversion Rate Assumption)¶

What the Simulation Assumes¶

Why This is May be Unrealistic¶

The Problem with Standard Bayesian Updates¶

The Consequence: Stuck in the Past¶

The Fix: Discounting Old Data¶

Recommendation for Production¶