hamverbot commited on
Commit
f16da60
·
verified ·
1 Parent(s): 212eb07

Upload src/benchmark/sweep.py

Browse files
Files changed (1) hide show
  1. src/benchmark/sweep.py +134 -0
src/benchmark/sweep.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hyperparameter Sweep for Bidding Algorithms
3
+
4
+ Sweeps over:
5
+ - Step sizes ε (DualOGD, TwoSidedDual)
6
+ - Budget fractions k (TwoSidedDual)
7
+ - Value per click
8
+ - Budget levels
9
+ - Market price configurations
10
+
11
+ Each configuration runs all algorithms for comparison.
12
+ """
13
+ import sys
14
+ import os
15
+ import json
16
+ import itertools
17
+ import numpy as np
18
+ import pandas as pd
19
+ from datasets import load_dataset
20
+ from sklearn.linear_model import LogisticRegression
21
+ from sklearn.model_selection import train_test_split
22
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
23
+
24
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
25
+
26
+
27
+ def run_sweep(
28
+ X_test, y_test, ctr_model,
29
+ T=5000,
30
+ sweep_config=None,
31
+ output_path='/app/results/sweep_results.json'
32
+ ):
33
+ """Run hyperparameter sweep across all algorithms."""
34
+ from src.benchmark.auction_simulator import FirstPriceAuctionSimulator
35
+ from src.algorithms.dual_ogd import DualOGDBidder, TwoSidedDualBidder
36
+ from src.algorithms.baselines import LinearBidder, ThresholdBidder, ValueShadingBidder, RLBBidder
37
+
38
+ if sweep_config is None:
39
+ sweep_config = {
40
+ 'budgets': [2000, 5000, 10000],
41
+ 'vpc_values': [30, 50, 100],
42
+ 'dual_epsilons': [0.003, 0.01, 0.03, 0.1],
43
+ 'k_values': [0.6, 0.8, 0.95],
44
+ 'price_configs': [
45
+ {'base_mean': 15, 'ctr_correlation': 5, 'noise_std': 0.4, 'name': 'low_competition'},
46
+ {'base_mean': 20, 'ctr_correlation': 10, 'noise_std': 0.6, 'name': 'medium_competition'},
47
+ {'base_mean': 30, 'ctr_correlation': 20, 'noise_std': 0.8, 'name': 'high_competition'},
48
+ ]
49
+ }
50
+
51
+ pctr_test = ctr_model.predict_proba(X_test)[:, 1]
52
+
53
+ all_sweep_results = []
54
+
55
+ for budget in sweep_config['budgets']:
56
+ for vpc in sweep_config['vpc_values']:
57
+ for price_cfg in sweep_config['price_configs']:
58
+
59
+ for eps in sweep_config['dual_epsilons']:
60
+ config_id = f"B{budget}_V{vpc}_P{price_cfg['name']}_EPS{eps}"
61
+ print(f"\n{config_id}")
62
+
63
+ sim = FirstPriceAuctionSimulator(
64
+ features=X_test[:T],
65
+ pctr_true=pctr_test[:T],
66
+ click_labels=y_test[:T],
67
+ value_per_click=vpc,
68
+ market_price_config=price_cfg,
69
+ seed=42
70
+ )
71
+
72
+ algorithms = {
73
+ 'DualOGD': DualOGDBidder(budget, T, vpc, epsilon=eps),
74
+ 'TwoSidedDual': TwoSidedDualBidder(budget, T, vpc, k=sweep_config['k_values'][1], epsilon_cap=eps),
75
+ 'Linear': LinearBidder(20.0, float(pctr_test.mean())),
76
+ 'ValueShading': ValueShadingBidder(budget, T, vpc),
77
+ }
78
+
79
+ for algo in algorithms.values():
80
+ if hasattr(algo, 'B'):
81
+ algo.B = budget
82
+ algo.remaining_budget = budget
83
+
84
+ results = sim.run_comparison(algorithms)
85
+
86
+ for algo_name, r in results.items():
87
+ all_sweep_results.append({
88
+ 'config_id': config_id,
89
+ 'budget': budget,
90
+ 'vpc': vpc,
91
+ 'epsilon': eps,
92
+ 'price_config': price_cfg['name'],
93
+ 'algorithm': algo_name,
94
+ 'clicks': r['total_clicks'],
95
+ 'spent': r['total_spent'],
96
+ 'budget_used': r.get('budget_used_frac', 0),
97
+ 'cpc': r.get('cpc', 0),
98
+ 'win_rate': r.get('win_rate', 0),
99
+ })
100
+
101
+ # Save incrementally
102
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
103
+ with open(output_path, 'w') as f:
104
+ json.dump(all_sweep_results, f, indent=2)
105
+
106
+ return all_sweep_results
107
+
108
+
109
+ def analyze_sweep(sweep_results):
110
+ """Analyze sweep results to find best configurations."""
111
+ df = pd.DataFrame(sweep_results)
112
+
113
+ print("\n" + "=" * 70)
114
+ print("SWEEP ANALYSIS")
115
+ print("=" * 70)
116
+
117
+ # Best by algorithm
118
+ for algo in df['algorithm'].unique():
119
+ algo_df = df[df['algorithm'] == algo]
120
+ best = algo_df.loc[algo_df['clicks'].idxmax()]
121
+ print(f"\n{algo} best: clicks={best['clicks']}, CPC={best['cpc']:.2f}, "
122
+ f"budget={best['budget']}, vpc={best['vpc']}, eps={best['epsilon']}, "
123
+ f"price={best['price_config']}")
124
+
125
+ # Effect of epsilon on DualOGD
126
+ print("\n--- Effect of ε on DualOGD ---")
127
+ dual_df = df[df['algorithm'] == 'DualOGD']
128
+ for eps in sorted(dual_df['epsilon'].unique()):
129
+ eps_df = dual_df[dual_df['epsilon'] == eps]
130
+ print(f"ε={eps:.4f}: avg clicks={eps_df['clicks'].mean():.0f}, "
131
+ f"avg CPC={eps_df['cpc'].mean():.2f}, "
132
+ f"budget used={eps_df['budget_used'].mean():.1%}")
133
+
134
+ return df