hamverbot's picture
Upload src/benchmark/sweep.py
f16da60 verified
"""
Hyperparameter Sweep for Bidding Algorithms
Sweeps over:
- Step sizes ε (DualOGD, TwoSidedDual)
- Budget fractions k (TwoSidedDual)
- Value per click
- Budget levels
- Market price configurations
Each configuration runs all algorithms for comparison.
"""
import sys
import os
import json
import itertools
import numpy as np
import pandas as pd
from datasets import load_dataset
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
def run_sweep(
X_test, y_test, ctr_model,
T=5000,
sweep_config=None,
output_path='/app/results/sweep_results.json'
):
"""Run hyperparameter sweep across all algorithms."""
from src.benchmark.auction_simulator import FirstPriceAuctionSimulator
from src.algorithms.dual_ogd import DualOGDBidder, TwoSidedDualBidder
from src.algorithms.baselines import LinearBidder, ThresholdBidder, ValueShadingBidder, RLBBidder
if sweep_config is None:
sweep_config = {
'budgets': [2000, 5000, 10000],
'vpc_values': [30, 50, 100],
'dual_epsilons': [0.003, 0.01, 0.03, 0.1],
'k_values': [0.6, 0.8, 0.95],
'price_configs': [
{'base_mean': 15, 'ctr_correlation': 5, 'noise_std': 0.4, 'name': 'low_competition'},
{'base_mean': 20, 'ctr_correlation': 10, 'noise_std': 0.6, 'name': 'medium_competition'},
{'base_mean': 30, 'ctr_correlation': 20, 'noise_std': 0.8, 'name': 'high_competition'},
]
}
pctr_test = ctr_model.predict_proba(X_test)[:, 1]
all_sweep_results = []
for budget in sweep_config['budgets']:
for vpc in sweep_config['vpc_values']:
for price_cfg in sweep_config['price_configs']:
for eps in sweep_config['dual_epsilons']:
config_id = f"B{budget}_V{vpc}_P{price_cfg['name']}_EPS{eps}"
print(f"\n{config_id}")
sim = FirstPriceAuctionSimulator(
features=X_test[:T],
pctr_true=pctr_test[:T],
click_labels=y_test[:T],
value_per_click=vpc,
market_price_config=price_cfg,
seed=42
)
algorithms = {
'DualOGD': DualOGDBidder(budget, T, vpc, epsilon=eps),
'TwoSidedDual': TwoSidedDualBidder(budget, T, vpc, k=sweep_config['k_values'][1], epsilon_cap=eps),
'Linear': LinearBidder(20.0, float(pctr_test.mean())),
'ValueShading': ValueShadingBidder(budget, T, vpc),
}
for algo in algorithms.values():
if hasattr(algo, 'B'):
algo.B = budget
algo.remaining_budget = budget
results = sim.run_comparison(algorithms)
for algo_name, r in results.items():
all_sweep_results.append({
'config_id': config_id,
'budget': budget,
'vpc': vpc,
'epsilon': eps,
'price_config': price_cfg['name'],
'algorithm': algo_name,
'clicks': r['total_clicks'],
'spent': r['total_spent'],
'budget_used': r.get('budget_used_frac', 0),
'cpc': r.get('cpc', 0),
'win_rate': r.get('win_rate', 0),
})
# Save incrementally
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, 'w') as f:
json.dump(all_sweep_results, f, indent=2)
return all_sweep_results
def analyze_sweep(sweep_results):
"""Analyze sweep results to find best configurations."""
df = pd.DataFrame(sweep_results)
print("\n" + "=" * 70)
print("SWEEP ANALYSIS")
print("=" * 70)
# Best by algorithm
for algo in df['algorithm'].unique():
algo_df = df[df['algorithm'] == algo]
best = algo_df.loc[algo_df['clicks'].idxmax()]
print(f"\n{algo} best: clicks={best['clicks']}, CPC={best['cpc']:.2f}, "
f"budget={best['budget']}, vpc={best['vpc']}, eps={best['epsilon']}, "
f"price={best['price_config']}")
# Effect of epsilon on DualOGD
print("\n--- Effect of ε on DualOGD ---")
dual_df = df[df['algorithm'] == 'DualOGD']
for eps in sorted(dual_df['epsilon'].unique()):
eps_df = dual_df[dual_df['epsilon'] == eps]
print(f"ε={eps:.4f}: avg clicks={eps_df['clicks'].mean():.0f}, "
f"avg CPC={eps_df['cpc'].mean():.2f}, "
f"budget used={eps_df['budget_used'].mean():.1%}")
return df