| """ |
| Hyperparameter Sweep for Bidding Algorithms |
| |
| Sweeps over: |
| - Step sizes ε (DualOGD, TwoSidedDual) |
| - Budget fractions k (TwoSidedDual) |
| - Value per click |
| - Budget levels |
| - Market price configurations |
| |
| Each configuration runs all algorithms for comparison. |
| """ |
| import sys |
| import os |
| import json |
| import itertools |
| import numpy as np |
| import pandas as pd |
| from datasets import load_dataset |
| from sklearn.linear_model import LogisticRegression |
| from sklearn.model_selection import train_test_split |
| from sklearn.preprocessing import LabelEncoder, StandardScaler |
|
|
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
|
|
|
| def run_sweep( |
| X_test, y_test, ctr_model, |
| T=5000, |
| sweep_config=None, |
| output_path='/app/results/sweep_results.json' |
| ): |
| """Run hyperparameter sweep across all algorithms.""" |
| from src.benchmark.auction_simulator import FirstPriceAuctionSimulator |
| from src.algorithms.dual_ogd import DualOGDBidder, TwoSidedDualBidder |
| from src.algorithms.baselines import LinearBidder, ThresholdBidder, ValueShadingBidder, RLBBidder |
| |
| if sweep_config is None: |
| sweep_config = { |
| 'budgets': [2000, 5000, 10000], |
| 'vpc_values': [30, 50, 100], |
| 'dual_epsilons': [0.003, 0.01, 0.03, 0.1], |
| 'k_values': [0.6, 0.8, 0.95], |
| 'price_configs': [ |
| {'base_mean': 15, 'ctr_correlation': 5, 'noise_std': 0.4, 'name': 'low_competition'}, |
| {'base_mean': 20, 'ctr_correlation': 10, 'noise_std': 0.6, 'name': 'medium_competition'}, |
| {'base_mean': 30, 'ctr_correlation': 20, 'noise_std': 0.8, 'name': 'high_competition'}, |
| ] |
| } |
| |
| pctr_test = ctr_model.predict_proba(X_test)[:, 1] |
| |
| all_sweep_results = [] |
| |
| for budget in sweep_config['budgets']: |
| for vpc in sweep_config['vpc_values']: |
| for price_cfg in sweep_config['price_configs']: |
| |
| for eps in sweep_config['dual_epsilons']: |
| config_id = f"B{budget}_V{vpc}_P{price_cfg['name']}_EPS{eps}" |
| print(f"\n{config_id}") |
| |
| sim = FirstPriceAuctionSimulator( |
| features=X_test[:T], |
| pctr_true=pctr_test[:T], |
| click_labels=y_test[:T], |
| value_per_click=vpc, |
| market_price_config=price_cfg, |
| seed=42 |
| ) |
| |
| algorithms = { |
| 'DualOGD': DualOGDBidder(budget, T, vpc, epsilon=eps), |
| 'TwoSidedDual': TwoSidedDualBidder(budget, T, vpc, k=sweep_config['k_values'][1], epsilon_cap=eps), |
| 'Linear': LinearBidder(20.0, float(pctr_test.mean())), |
| 'ValueShading': ValueShadingBidder(budget, T, vpc), |
| } |
| |
| for algo in algorithms.values(): |
| if hasattr(algo, 'B'): |
| algo.B = budget |
| algo.remaining_budget = budget |
| |
| results = sim.run_comparison(algorithms) |
| |
| for algo_name, r in results.items(): |
| all_sweep_results.append({ |
| 'config_id': config_id, |
| 'budget': budget, |
| 'vpc': vpc, |
| 'epsilon': eps, |
| 'price_config': price_cfg['name'], |
| 'algorithm': algo_name, |
| 'clicks': r['total_clicks'], |
| 'spent': r['total_spent'], |
| 'budget_used': r.get('budget_used_frac', 0), |
| 'cpc': r.get('cpc', 0), |
| 'win_rate': r.get('win_rate', 0), |
| }) |
| |
| |
| os.makedirs(os.path.dirname(output_path), exist_ok=True) |
| with open(output_path, 'w') as f: |
| json.dump(all_sweep_results, f, indent=2) |
| |
| return all_sweep_results |
|
|
|
|
| def analyze_sweep(sweep_results): |
| """Analyze sweep results to find best configurations.""" |
| df = pd.DataFrame(sweep_results) |
| |
| print("\n" + "=" * 70) |
| print("SWEEP ANALYSIS") |
| print("=" * 70) |
| |
| |
| for algo in df['algorithm'].unique(): |
| algo_df = df[df['algorithm'] == algo] |
| best = algo_df.loc[algo_df['clicks'].idxmax()] |
| print(f"\n{algo} best: clicks={best['clicks']}, CPC={best['cpc']:.2f}, " |
| f"budget={best['budget']}, vpc={best['vpc']}, eps={best['epsilon']}, " |
| f"price={best['price_config']}") |
| |
| |
| print("\n--- Effect of ε on DualOGD ---") |
| dual_df = df[df['algorithm'] == 'DualOGD'] |
| for eps in sorted(dual_df['epsilon'].unique()): |
| eps_df = dual_df[dual_df['epsilon'] == eps] |
| print(f"ε={eps:.4f}: avg clicks={eps_df['clicks'].mean():.0f}, " |
| f"avg CPC={eps_df['cpc'].mean():.2f}, " |
| f"budget used={eps_df['budget_used'].mean():.1%}") |
| |
| return df |
|
|