File size: 5,294 Bytes
f16da60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
"""
Hyperparameter Sweep for Bidding Algorithms

Sweeps over:
  - Step sizes ε (DualOGD, TwoSidedDual)
  - Budget fractions k (TwoSidedDual)  
  - Value per click
  - Budget levels
  - Market price configurations

Each configuration runs all algorithms for comparison.
"""
import sys
import os
import json
import itertools
import numpy as np
import pandas as pd
from datasets import load_dataset
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))


def run_sweep(
    X_test, y_test, ctr_model,
    T=5000,
    sweep_config=None,
    output_path='/app/results/sweep_results.json'
):
    """Run hyperparameter sweep across all algorithms."""
    from src.benchmark.auction_simulator import FirstPriceAuctionSimulator
    from src.algorithms.dual_ogd import DualOGDBidder, TwoSidedDualBidder
    from src.algorithms.baselines import LinearBidder, ThresholdBidder, ValueShadingBidder, RLBBidder
    
    if sweep_config is None:
        sweep_config = {
            'budgets': [2000, 5000, 10000],
            'vpc_values': [30, 50, 100],
            'dual_epsilons': [0.003, 0.01, 0.03, 0.1],
            'k_values': [0.6, 0.8, 0.95],
            'price_configs': [
                {'base_mean': 15, 'ctr_correlation': 5, 'noise_std': 0.4, 'name': 'low_competition'},
                {'base_mean': 20, 'ctr_correlation': 10, 'noise_std': 0.6, 'name': 'medium_competition'},
                {'base_mean': 30, 'ctr_correlation': 20, 'noise_std': 0.8, 'name': 'high_competition'},
            ]
        }
    
    pctr_test = ctr_model.predict_proba(X_test)[:, 1]
    
    all_sweep_results = []
    
    for budget in sweep_config['budgets']:
        for vpc in sweep_config['vpc_values']:
            for price_cfg in sweep_config['price_configs']:
                
                for eps in sweep_config['dual_epsilons']:
                    config_id = f"B{budget}_V{vpc}_P{price_cfg['name']}_EPS{eps}"
                    print(f"\n{config_id}")
                    
                    sim = FirstPriceAuctionSimulator(
                        features=X_test[:T],
                        pctr_true=pctr_test[:T],
                        click_labels=y_test[:T],
                        value_per_click=vpc,
                        market_price_config=price_cfg,
                        seed=42
                    )
                    
                    algorithms = {
                        'DualOGD': DualOGDBidder(budget, T, vpc, epsilon=eps),
                        'TwoSidedDual': TwoSidedDualBidder(budget, T, vpc, k=sweep_config['k_values'][1], epsilon_cap=eps),
                        'Linear': LinearBidder(20.0, float(pctr_test.mean())),
                        'ValueShading': ValueShadingBidder(budget, T, vpc),
                    }
                    
                    for algo in algorithms.values():
                        if hasattr(algo, 'B'):
                            algo.B = budget
                            algo.remaining_budget = budget
                    
                    results = sim.run_comparison(algorithms)
                    
                    for algo_name, r in results.items():
                        all_sweep_results.append({
                            'config_id': config_id,
                            'budget': budget,
                            'vpc': vpc,
                            'epsilon': eps,
                            'price_config': price_cfg['name'],
                            'algorithm': algo_name,
                            'clicks': r['total_clicks'],
                            'spent': r['total_spent'],
                            'budget_used': r.get('budget_used_frac', 0),
                            'cpc': r.get('cpc', 0),
                            'win_rate': r.get('win_rate', 0),
                        })
        
        # Save incrementally
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        with open(output_path, 'w') as f:
            json.dump(all_sweep_results, f, indent=2)
    
    return all_sweep_results


def analyze_sweep(sweep_results):
    """Analyze sweep results to find best configurations."""
    df = pd.DataFrame(sweep_results)
    
    print("\n" + "=" * 70)
    print("SWEEP ANALYSIS")
    print("=" * 70)
    
    # Best by algorithm
    for algo in df['algorithm'].unique():
        algo_df = df[df['algorithm'] == algo]
        best = algo_df.loc[algo_df['clicks'].idxmax()]
        print(f"\n{algo} best: clicks={best['clicks']}, CPC={best['cpc']:.2f}, "
              f"budget={best['budget']}, vpc={best['vpc']}, eps={best['epsilon']}, "
              f"price={best['price_config']}")
    
    # Effect of epsilon on DualOGD
    print("\n--- Effect of ε on DualOGD ---")
    dual_df = df[df['algorithm'] == 'DualOGD']
    for eps in sorted(dual_df['epsilon'].unique()):
        eps_df = dual_df[dual_df['epsilon'] == eps]
        print(f"ε={eps:.4f}: avg clicks={eps_df['clicks'].mean():.0f}, "
              f"avg CPC={eps_df['cpc'].mean():.2f}, "
              f"budget used={eps_df['budget_used'].mean():.1%}")
    
    return df