File size: 5,294 Bytes
f16da60 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | """
Hyperparameter Sweep for Bidding Algorithms
Sweeps over:
- Step sizes ε (DualOGD, TwoSidedDual)
- Budget fractions k (TwoSidedDual)
- Value per click
- Budget levels
- Market price configurations
Each configuration runs all algorithms for comparison.
"""
import sys
import os
import json
import itertools
import numpy as np
import pandas as pd
from datasets import load_dataset
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
def run_sweep(
X_test, y_test, ctr_model,
T=5000,
sweep_config=None,
output_path='/app/results/sweep_results.json'
):
"""Run hyperparameter sweep across all algorithms."""
from src.benchmark.auction_simulator import FirstPriceAuctionSimulator
from src.algorithms.dual_ogd import DualOGDBidder, TwoSidedDualBidder
from src.algorithms.baselines import LinearBidder, ThresholdBidder, ValueShadingBidder, RLBBidder
if sweep_config is None:
sweep_config = {
'budgets': [2000, 5000, 10000],
'vpc_values': [30, 50, 100],
'dual_epsilons': [0.003, 0.01, 0.03, 0.1],
'k_values': [0.6, 0.8, 0.95],
'price_configs': [
{'base_mean': 15, 'ctr_correlation': 5, 'noise_std': 0.4, 'name': 'low_competition'},
{'base_mean': 20, 'ctr_correlation': 10, 'noise_std': 0.6, 'name': 'medium_competition'},
{'base_mean': 30, 'ctr_correlation': 20, 'noise_std': 0.8, 'name': 'high_competition'},
]
}
pctr_test = ctr_model.predict_proba(X_test)[:, 1]
all_sweep_results = []
for budget in sweep_config['budgets']:
for vpc in sweep_config['vpc_values']:
for price_cfg in sweep_config['price_configs']:
for eps in sweep_config['dual_epsilons']:
config_id = f"B{budget}_V{vpc}_P{price_cfg['name']}_EPS{eps}"
print(f"\n{config_id}")
sim = FirstPriceAuctionSimulator(
features=X_test[:T],
pctr_true=pctr_test[:T],
click_labels=y_test[:T],
value_per_click=vpc,
market_price_config=price_cfg,
seed=42
)
algorithms = {
'DualOGD': DualOGDBidder(budget, T, vpc, epsilon=eps),
'TwoSidedDual': TwoSidedDualBidder(budget, T, vpc, k=sweep_config['k_values'][1], epsilon_cap=eps),
'Linear': LinearBidder(20.0, float(pctr_test.mean())),
'ValueShading': ValueShadingBidder(budget, T, vpc),
}
for algo in algorithms.values():
if hasattr(algo, 'B'):
algo.B = budget
algo.remaining_budget = budget
results = sim.run_comparison(algorithms)
for algo_name, r in results.items():
all_sweep_results.append({
'config_id': config_id,
'budget': budget,
'vpc': vpc,
'epsilon': eps,
'price_config': price_cfg['name'],
'algorithm': algo_name,
'clicks': r['total_clicks'],
'spent': r['total_spent'],
'budget_used': r.get('budget_used_frac', 0),
'cpc': r.get('cpc', 0),
'win_rate': r.get('win_rate', 0),
})
# Save incrementally
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, 'w') as f:
json.dump(all_sweep_results, f, indent=2)
return all_sweep_results
def analyze_sweep(sweep_results):
"""Analyze sweep results to find best configurations."""
df = pd.DataFrame(sweep_results)
print("\n" + "=" * 70)
print("SWEEP ANALYSIS")
print("=" * 70)
# Best by algorithm
for algo in df['algorithm'].unique():
algo_df = df[df['algorithm'] == algo]
best = algo_df.loc[algo_df['clicks'].idxmax()]
print(f"\n{algo} best: clicks={best['clicks']}, CPC={best['cpc']:.2f}, "
f"budget={best['budget']}, vpc={best['vpc']}, eps={best['epsilon']}, "
f"price={best['price_config']}")
# Effect of epsilon on DualOGD
print("\n--- Effect of ε on DualOGD ---")
dual_df = df[df['algorithm'] == 'DualOGD']
for eps in sorted(dual_df['epsilon'].unique()):
eps_df = dual_df[dual_df['epsilon'] == eps]
print(f"ε={eps:.4f}: avg clicks={eps_df['clicks'].mean():.0f}, "
f"avg CPC={eps_df['cpc'].mean():.2f}, "
f"budget used={eps_df['budget_used'].mean():.1%}")
return df
|