""" Hyperparameter Sweep for Bidding Algorithms Sweeps over: - Step sizes ε (DualOGD, TwoSidedDual) - Budget fractions k (TwoSidedDual) - Value per click - Budget levels - Market price configurations Each configuration runs all algorithms for comparison. """ import sys import os import json import itertools import numpy as np import pandas as pd from datasets import load_dataset from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder, StandardScaler sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) def run_sweep( X_test, y_test, ctr_model, T=5000, sweep_config=None, output_path='/app/results/sweep_results.json' ): """Run hyperparameter sweep across all algorithms.""" from src.benchmark.auction_simulator import FirstPriceAuctionSimulator from src.algorithms.dual_ogd import DualOGDBidder, TwoSidedDualBidder from src.algorithms.baselines import LinearBidder, ThresholdBidder, ValueShadingBidder, RLBBidder if sweep_config is None: sweep_config = { 'budgets': [2000, 5000, 10000], 'vpc_values': [30, 50, 100], 'dual_epsilons': [0.003, 0.01, 0.03, 0.1], 'k_values': [0.6, 0.8, 0.95], 'price_configs': [ {'base_mean': 15, 'ctr_correlation': 5, 'noise_std': 0.4, 'name': 'low_competition'}, {'base_mean': 20, 'ctr_correlation': 10, 'noise_std': 0.6, 'name': 'medium_competition'}, {'base_mean': 30, 'ctr_correlation': 20, 'noise_std': 0.8, 'name': 'high_competition'}, ] } pctr_test = ctr_model.predict_proba(X_test)[:, 1] all_sweep_results = [] for budget in sweep_config['budgets']: for vpc in sweep_config['vpc_values']: for price_cfg in sweep_config['price_configs']: for eps in sweep_config['dual_epsilons']: config_id = f"B{budget}_V{vpc}_P{price_cfg['name']}_EPS{eps}" print(f"\n{config_id}") sim = FirstPriceAuctionSimulator( features=X_test[:T], pctr_true=pctr_test[:T], click_labels=y_test[:T], value_per_click=vpc, market_price_config=price_cfg, seed=42 ) algorithms = { 'DualOGD': DualOGDBidder(budget, T, vpc, epsilon=eps), 'TwoSidedDual': TwoSidedDualBidder(budget, T, vpc, k=sweep_config['k_values'][1], epsilon_cap=eps), 'Linear': LinearBidder(20.0, float(pctr_test.mean())), 'ValueShading': ValueShadingBidder(budget, T, vpc), } for algo in algorithms.values(): if hasattr(algo, 'B'): algo.B = budget algo.remaining_budget = budget results = sim.run_comparison(algorithms) for algo_name, r in results.items(): all_sweep_results.append({ 'config_id': config_id, 'budget': budget, 'vpc': vpc, 'epsilon': eps, 'price_config': price_cfg['name'], 'algorithm': algo_name, 'clicks': r['total_clicks'], 'spent': r['total_spent'], 'budget_used': r.get('budget_used_frac', 0), 'cpc': r.get('cpc', 0), 'win_rate': r.get('win_rate', 0), }) # Save incrementally os.makedirs(os.path.dirname(output_path), exist_ok=True) with open(output_path, 'w') as f: json.dump(all_sweep_results, f, indent=2) return all_sweep_results def analyze_sweep(sweep_results): """Analyze sweep results to find best configurations.""" df = pd.DataFrame(sweep_results) print("\n" + "=" * 70) print("SWEEP ANALYSIS") print("=" * 70) # Best by algorithm for algo in df['algorithm'].unique(): algo_df = df[df['algorithm'] == algo] best = algo_df.loc[algo_df['clicks'].idxmax()] print(f"\n{algo} best: clicks={best['clicks']}, CPC={best['cpc']:.2f}, " f"budget={best['budget']}, vpc={best['vpc']}, eps={best['epsilon']}, " f"price={best['price_config']}") # Effect of epsilon on DualOGD print("\n--- Effect of ε on DualOGD ---") dual_df = df[df['algorithm'] == 'DualOGD'] for eps in sorted(dual_df['epsilon'].unique()): eps_df = dual_df[dual_df['epsilon'] == eps] print(f"ε={eps:.4f}: avg clicks={eps_df['clicks'].mean():.0f}, " f"avg CPC={eps_df['cpc'].mean():.2f}, " f"budget used={eps_df['budget_used'].mean():.1%}") return df