hamverbot
/

bidding_algorithms_benchmark

ml-intern

Model card Files Files and versions

xet

Community

hamverbot commited on 3 days ago

Commit

f16da60

verified ·

1 Parent(s): 212eb07

Upload src/benchmark/sweep.py

Browse files

Files changed (1) hide show

src/benchmark/sweep.py +134 -0

src/benchmark/sweep.py ADDED Viewed

	@@ -0,0 +1,134 @@

+"""
+Hyperparameter Sweep for Bidding Algorithms
+Sweeps over:
+  - Step sizes ε (DualOGD, TwoSidedDual)
+  - Budget fractions k (TwoSidedDual)
+  - Value per click
+  - Budget levels
+  - Market price configurations
+Each configuration runs all algorithms for comparison.
+"""
+import sys
+import os
+import json
+import itertools
+import numpy as np
+import pandas as pd
+from datasets import load_dataset
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder, StandardScaler
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+def run_sweep(
+    X_test, y_test, ctr_model,
+    T=5000,
+    sweep_config=None,
+    output_path='/app/results/sweep_results.json'
+):
+    """Run hyperparameter sweep across all algorithms."""
+    from src.benchmark.auction_simulator import FirstPriceAuctionSimulator
+    from src.algorithms.dual_ogd import DualOGDBidder, TwoSidedDualBidder
+    from src.algorithms.baselines import LinearBidder, ThresholdBidder, ValueShadingBidder, RLBBidder
+    if sweep_config is None:
+        sweep_config = {
+            'budgets': [2000, 5000, 10000],
+            'vpc_values': [30, 50, 100],
+            'dual_epsilons': [0.003, 0.01, 0.03, 0.1],
+            'k_values': [0.6, 0.8, 0.95],
+            'price_configs': [
+                {'base_mean': 15, 'ctr_correlation': 5, 'noise_std': 0.4, 'name': 'low_competition'},
+                {'base_mean': 20, 'ctr_correlation': 10, 'noise_std': 0.6, 'name': 'medium_competition'},
+                {'base_mean': 30, 'ctr_correlation': 20, 'noise_std': 0.8, 'name': 'high_competition'},
+            ]
+        }
+    pctr_test = ctr_model.predict_proba(X_test)[:, 1]
+    all_sweep_results = []
+    for budget in sweep_config['budgets']:
+        for vpc in sweep_config['vpc_values']:
+            for price_cfg in sweep_config['price_configs']:
+                for eps in sweep_config['dual_epsilons']:
+                    config_id = f"B{budget}_V{vpc}_P{price_cfg['name']}_EPS{eps}"
+                    print(f"\n{config_id}")
+                    sim = FirstPriceAuctionSimulator(
+                        features=X_test[:T],
+                        pctr_true=pctr_test[:T],
+                        click_labels=y_test[:T],
+                        value_per_click=vpc,
+                        market_price_config=price_cfg,
+                        seed=42
+                    )
+                    algorithms = {
+                        'DualOGD': DualOGDBidder(budget, T, vpc, epsilon=eps),
+                        'TwoSidedDual': TwoSidedDualBidder(budget, T, vpc, k=sweep_config['k_values'][1], epsilon_cap=eps),
+                        'Linear': LinearBidder(20.0, float(pctr_test.mean())),
+                        'ValueShading': ValueShadingBidder(budget, T, vpc),
+                    }
+                    for algo in algorithms.values():
+                        if hasattr(algo, 'B'):
+                            algo.B = budget
+                            algo.remaining_budget = budget
+                    results = sim.run_comparison(algorithms)
+                    for algo_name, r in results.items():
+                        all_sweep_results.append({
+                            'config_id': config_id,
+                            'budget': budget,
+                            'vpc': vpc,
+                            'epsilon': eps,
+                            'price_config': price_cfg['name'],
+                            'algorithm': algo_name,
+                            'clicks': r['total_clicks'],
+                            'spent': r['total_spent'],
+                            'budget_used': r.get('budget_used_frac', 0),
+                            'cpc': r.get('cpc', 0),
+                            'win_rate': r.get('win_rate', 0),
+                        })
+        # Save incrementally
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        with open(output_path, 'w') as f:
+            json.dump(all_sweep_results, f, indent=2)
+    return all_sweep_results
+def analyze_sweep(sweep_results):
+    """Analyze sweep results to find best configurations."""
+    df = pd.DataFrame(sweep_results)
+    print("\n" + "=" * 70)
+    print("SWEEP ANALYSIS")
+    print("=" * 70)
+    # Best by algorithm
+    for algo in df['algorithm'].unique():
+        algo_df = df[df['algorithm'] == algo]
+        best = algo_df.loc[algo_df['clicks'].idxmax()]
+        print(f"\n{algo} best: clicks={best['clicks']}, CPC={best['cpc']:.2f}, "
+              f"budget={best['budget']}, vpc={best['vpc']}, eps={best['epsilon']}, "
+              f"price={best['price_config']}")
+    # Effect of epsilon on DualOGD
+    print("\n--- Effect of ε on DualOGD ---")
+    dual_df = df[df['algorithm'] == 'DualOGD']
+    for eps in sorted(dual_df['epsilon'].unique()):
+        eps_df = dual_df[dual_df['epsilon'] == eps]
+        print(f"ε={eps:.4f}: avg clicks={eps_df['clicks'].mean():.0f}, "
+              f"avg CPC={eps_df['cpc'].mean():.2f}, "
+              f"budget used={eps_df['budget_used'].mean():.1%}")
+    return df