hamverbot commited on
Commit
212eb07
·
verified ·
1 Parent(s): 2e0d61d

Upload src/benchmark/run_comparison.py

Browse files
Files changed (1) hide show
  1. src/benchmark/run_comparison.py +284 -0
src/benchmark/run_comparison.py ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RTB Bidding Algorithm Comparison Framework
3
+ ===========================================
4
+
5
+ Runs all bidding algorithms on first-price auction simulations
6
+ and produces comprehensive comparison results.
7
+
8
+ Algorithms:
9
+ - DualOGD: Lagrangian dual + online gradient descent (Wang et al. 2023)
10
+ - TwoSidedDual: Budget cap + spend floor (k% minimum)
11
+ - ValueShading: Value shading for first-price
12
+ - RLB: MDP-based reinforcement learning (Cai et al. 2017)
13
+ - Linear: Proportional bidding baseline
14
+ - Threshold: Fixed-bid-if-pCTR baseline
15
+ """
16
+ import sys
17
+ import os
18
+ import json
19
+ import time
20
+ import numpy as np
21
+ import pandas as pd
22
+ from datasets import load_dataset
23
+ from sklearn.linear_model import LogisticRegression
24
+ from sklearn.model_selection import train_test_split
25
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
26
+
27
+ # Add src to path
28
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
29
+
30
+
31
+ def load_and_prepare_data(max_rows=100000):
32
+ """Load Criteo_x4 and prepare features + labels."""
33
+ print("=" * 70)
34
+ print("LOADING CRITEO DATA")
35
+ print("=" * 70)
36
+
37
+ ds = load_dataset("reczoo/Criteo_x4", split="train", streaming=True)
38
+ rows = []
39
+ for i, row in enumerate(ds):
40
+ if i >= max_rows:
41
+ break
42
+ rows.append(row)
43
+
44
+ df = pd.DataFrame(rows)
45
+ print(f"Loaded {len(df)} rows, CTR: {df['Label'].mean():.4f}")
46
+
47
+ # Feature columns
48
+ dense_cols = [f'I{i}' for i in range(1, 14)]
49
+ sparse_cols = [f'C{i}' for i in range(1, 27)]
50
+
51
+ # Handle missing
52
+ for col in dense_cols:
53
+ df[col] = df[col].fillna(df[col].median())
54
+ for col in sparse_cols:
55
+ df[col] = df[col].fillna("MISSING")
56
+
57
+ # Encode sparse
58
+ for col in sparse_cols:
59
+ le = LabelEncoder()
60
+ df[col] = le.fit_transform(df[col].astype(str))
61
+
62
+ # Normalize dense
63
+ scaler = StandardScaler()
64
+ dense_data = scaler.fit_transform(df[dense_cols].values)
65
+ for i, col in enumerate(dense_cols):
66
+ df[col] = dense_data[:, i]
67
+
68
+ # Normalize sparse
69
+ sparse_data = df[sparse_cols].values.astype(np.float32)
70
+ sparse_data = (sparse_data - sparse_data.mean(axis=0)) / (sparse_data.std(axis=0) + 1e-8)
71
+ for i, col in enumerate(sparse_cols):
72
+ df[col] = sparse_data[:, i]
73
+
74
+ feature_cols = dense_cols + sparse_cols
75
+ X = df[feature_cols].values.astype(np.float32)
76
+ y = df['Label'].values.astype(np.float32)
77
+
78
+ # Train/test split
79
+ X_train, X_test, y_train, y_test = train_test_split(
80
+ X, y, test_size=0.3, random_state=42
81
+ )
82
+
83
+ return X_train, X_test, y_train, y_test, df, feature_cols
84
+
85
+
86
+ def train_ctr_model(X_train, y_train):
87
+ """Train a CTR prediction model (Logistic Regression baseline)."""
88
+ print("\n" + "=" * 70)
89
+ print("TRAINING CTR MODEL")
90
+ print("=" * 70)
91
+
92
+ model = LogisticRegression(max_iter=500, C=0.1, random_state=42)
93
+ model.fit(X_train, y_train)
94
+
95
+ train_auc = roc_auc_score_safe(y_train, model.predict_proba(X_train)[:, 1])
96
+ print(f"Train AUC: {train_auc:.4f}")
97
+
98
+ return model
99
+
100
+
101
+ def roc_auc_score_safe(y_true, y_pred):
102
+ """Safe AUC computation."""
103
+ from sklearn.metrics import roc_auc_score
104
+ if len(np.unique(y_true)) < 2:
105
+ return 0.5
106
+ return roc_auc_score(y_true, y_pred)
107
+
108
+
109
+ def run_benchmark(
110
+ X_test, y_test, ctr_model,
111
+ budget=5000.0,
112
+ T=10000,
113
+ value_per_click=50.0,
114
+ k=0.8, # Minimum spend fraction
115
+ n_runs=3,
116
+ seed=42
117
+ ):
118
+ """Run all bidding algorithms and compare."""
119
+ print("\n" + "=" * 70)
120
+ print("RUNNING BIDDING BENCHMARK")
121
+ print("=" * 70)
122
+ print(f"Budget: {budget}, T: {T}, Value/Click: {value_per_click}")
123
+ print(f"Minimum spend: {k*100:.0f}%, Runs: {n_runs}")
124
+
125
+ from src.benchmark.auction_simulator import FirstPriceAuctionSimulator
126
+ from src.algorithms.dual_ogd import DualOGDBidder, TwoSidedDualBidder
127
+ from src.algorithms.baselines import LinearBidder, ThresholdBidder, ValueShadingBidder, RLBBidder
128
+
129
+ # Get CTR predictions
130
+ pctr_test = ctr_model.predict_proba(X_test)[:, 1]
131
+ print(f"pCTR range: [{pctr_test.min():.4f}, {pctr_test.max():.4f}]")
132
+ print(f"pCTR mean: {pctr_test.mean():.4f}")
133
+
134
+ all_results = {}
135
+
136
+ for run in range(n_runs):
137
+ run_seed = seed + run
138
+ print(f"\n--- Run {run + 1}/{n_runs} (seed={run_seed}) ---")
139
+
140
+ # Create fresh simulator for each run
141
+ sim = FirstPriceAuctionSimulator(
142
+ features=X_test[:T],
143
+ pctr_true=pctr_test[:T],
144
+ click_labels=y_test[:T],
145
+ value_per_click=value_per_click,
146
+ market_price_config={
147
+ 'base_mean': 20.0,
148
+ 'ctr_correlation': 10.0,
149
+ 'noise_std': 0.6,
150
+ },
151
+ seed=run_seed
152
+ )
153
+
154
+ # Define algorithms
155
+ algorithms = {
156
+ 'DualOGD': DualOGDBidder(budget, T, value_per_click),
157
+ 'TwoSidedDual': TwoSidedDualBidder(budget, T, value_per_click, k=k),
158
+ 'ValueShading': ValueShadingBidder(budget, T, value_per_click),
159
+ 'RLB': RLBBidder(budget, T, value_per_click),
160
+ 'Linear': LinearBidder(20.0, float(pctr_test.mean())),
161
+ 'Threshold': ThresholdBidder(0.3, 30.0),
162
+ }
163
+
164
+ # Set budgets
165
+ for algo in algorithms.values():
166
+ if hasattr(algo, 'B'):
167
+ algo.B = budget
168
+ algo.remaining_budget = budget
169
+
170
+ # Run
171
+ run_results = sim.run_comparison(algorithms)
172
+
173
+ for name, results in run_results.items():
174
+ if name not in all_results:
175
+ all_results[name] = []
176
+ all_results[name].append(results)
177
+
178
+ return all_results, pctr_test
179
+
180
+
181
+ def aggregate_results(all_results):
182
+ """Aggregate results across runs."""
183
+ print("\n" + "=" * 70)
184
+ print("AGGREGATED RESULTS")
185
+ print("=" * 70)
186
+
187
+ aggregated = {}
188
+
189
+ for name, runs in all_results.items():
190
+ clicks = [r['total_clicks'] for r in runs]
191
+ cpc = [r.get('cpc', 0) for r in runs]
192
+ budget_used = [r.get('budget_used_frac', 0) for r in runs]
193
+ win_rate = [r.get('win_rate', 0) for r in runs]
194
+
195
+ aggregated[name] = {
196
+ 'clicks_mean': np.mean(clicks),
197
+ 'clicks_std': np.std(clicks),
198
+ 'cpc_mean': np.mean(cpc),
199
+ 'cpc_std': np.std(cpc),
200
+ 'budget_used_mean': np.mean(budget_used),
201
+ 'budget_used_std': np.std(budget_used),
202
+ 'win_rate_mean': np.mean(win_rate),
203
+ 'win_rate_std': np.std(win_rate),
204
+ }
205
+
206
+ # Print table
207
+ print(f"\n{'Algorithm':<18} {'Clicks':>10} {'CPC':>10} {'Budget%':>10} {'WinRate':>10}")
208
+ print("-" * 58)
209
+
210
+ # Sort by clicks
211
+ sorted_algos = sorted(aggregated.items(), key=lambda x: x[1]['clicks_mean'], reverse=True)
212
+
213
+ for name, stats in sorted_algos:
214
+ print(f"{name:<18} {stats['clicks_mean']:>8.0f}±{stats['clicks_std']:.0f} "
215
+ f"{stats['cpc_mean']:>8.2f} {stats['budget_used_mean']:>8.1%} "
216
+ f"{stats['win_rate_mean']:>8.1%}")
217
+
218
+ return aggregated
219
+
220
+
221
+ def main():
222
+ import argparse
223
+ parser = argparse.ArgumentParser(description='RTB Bidding Benchmark')
224
+ parser.add_argument('--max_rows', type=int, default=100000)
225
+ parser.add_argument('--budget', type=float, default=5000.0)
226
+ parser.add_argument('--T', type=int, default=10000)
227
+ parser.add_argument('--vpc', type=float, default=50.0)
228
+ parser.add_argument('--k', type=float, default=0.8)
229
+ parser.add_argument('--n_runs', type=int, default=3)
230
+ parser.add_argument('--output', type=str, default='/app/results/benchmark_results.json')
231
+ parser.add_argument('--seed', type=int, default=42)
232
+ args = parser.parse_args()
233
+
234
+ # Load data
235
+ X_train, X_test, y_train, y_test, df, feature_cols = load_and_prepare_data(
236
+ max_rows=args.max_rows
237
+ )
238
+
239
+ # Train CTR model
240
+ ctr_model = train_ctr_model(X_train, y_train)
241
+
242
+ # Run benchmark
243
+ all_results, pctr_test = run_benchmark(
244
+ X_test, y_test, ctr_model,
245
+ budget=args.budget,
246
+ T=args.T,
247
+ value_per_click=args.vpc,
248
+ k=args.k,
249
+ n_runs=args.n_runs,
250
+ seed=args.seed
251
+ )
252
+
253
+ # Aggregate
254
+ aggregated = aggregate_results(all_results)
255
+
256
+ # Save
257
+ os.makedirs(os.path.dirname(args.output), exist_ok=True)
258
+ output = {
259
+ 'config': {
260
+ 'max_rows': args.max_rows,
261
+ 'budget': args.budget,
262
+ 'T': args.T,
263
+ 'value_per_click': args.vpc,
264
+ 'k': args.k,
265
+ 'n_runs': args.n_runs,
266
+ 'seed': args.seed,
267
+ },
268
+ 'aggregated': {k: {kk: float(vv) if isinstance(vv, (np.floating, np.integer)) else vv
269
+ for kk, vv in v.items()}
270
+ for k, v in aggregated.items()},
271
+ 'raw_runs': {k: [{kk: float(vv) if isinstance(vv, (np.floating, np.integer)) else vv
272
+ for kk, vv in r.items()}
273
+ for r in runs]
274
+ for k, runs in all_results.items()},
275
+ }
276
+
277
+ with open(args.output, 'w') as f:
278
+ json.dump(output, f, indent=2)
279
+
280
+ print(f"\nResults saved to {args.output}")
281
+
282
+
283
+ if __name__ == '__main__':
284
+ main()