hamverbot commited on
Commit
03e57c7
·
verified ·
1 Parent(s): 431ef2b

Upload src/algorithms/dual_ogd.py

Browse files
Files changed (1) hide show
  1. src/algorithms/dual_ogd.py +276 -0
src/algorithms/dual_ogd.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ DualOGD Bidding Algorithm
3
+ Based on: Wang et al. "Learning to Bid in Repeated First-Price Auctions with Budgets" (2023)
4
+ arXiv: 2304.13477, Algorithm 1
5
+
6
+ The canonical Lagrangian dual multiplier approach with online gradient descent.
7
+
8
+ Core update:
9
+ λ_{t+1} = Proj_{λ>0}(λ_t − ε · (ρ − c̃_t(b_t)))
10
+
11
+ Bid rule:
12
+ b_t = argmax_b (r̃_t(v_t, b) − λ_t · c̃_t(b))
13
+
14
+ Where:
15
+ v_t = value of winning = pCTR × value_per_click
16
+ r̃_t(v,b) = (v-b) · G̃_t(b) — empirical expected reward
17
+ c̃_t(b) = b · G̃_t(b) — empirical expected cost
18
+ G̃_t(b) = empirical win probability from historical competing bids
19
+ ρ = B/T = target spend per auction
20
+
21
+ The dual multiplier λ acts as a pace multiplier:
22
+ - If you overspend → λ increases → future bids are penalized more → spend decreases
23
+ - If you underspend → λ decreases → future bids are cheaper → spend increases
24
+ """
25
+ import numpy as np
26
+
27
+
28
+ class DualOGDBidder:
29
+ """
30
+ Dual OGD bidder for first-price auctions with budget constraint.
31
+
32
+ Full information feedback: observes all maximum competing bids d_t.
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ budget,
38
+ T,
39
+ value_per_click,
40
+ epsilon=None,
41
+ empirical_cdf=None,
42
+ name="DualOGD"
43
+ ):
44
+ """
45
+ Args:
46
+ budget: Total budget B
47
+ T: Time horizon (number of auctions)
48
+ value_per_click: Value of each click in currency units
49
+ epsilon: Step size for dual update. Default: 1/sqrt(T)
50
+ empirical_cdf: EmpiricalCDF instance for win prob estimation
51
+ name: Algorithm name for logging
52
+ """
53
+ self.B = budget
54
+ self.T = T
55
+ self.rho = budget / T # Target spend per auction
56
+ self.vpc = value_per_click
57
+ self.name = name
58
+
59
+ # Dual multiplier λ
60
+ self.lambd = 0.0
61
+
62
+ # Step size
63
+ self.epsilon = epsilon if epsilon is not None else 1.0 / np.sqrt(T)
64
+
65
+ # Spend tracking
66
+ self.total_spent = 0.0
67
+ self.remaining_budget = budget
68
+ self.t = 0
69
+ self.total_wins = 0
70
+ self.total_clicks = 0
71
+
72
+ # History for empirical estimation
73
+ self.competing_bids = [] # All observed d_t values
74
+
75
+ def bid(self, pctr, features=None):
76
+ """
77
+ Compute bid for current auction.
78
+
79
+ Args:
80
+ pctr: Predicted click probability pCTR ∈ [0,1]
81
+ features: Optional feature vector (unused in non-contextual version)
82
+
83
+ Returns:
84
+ bid_price: Optimal bid in [0, remaining_budget]
85
+ """
86
+ self.t += 1
87
+
88
+ # Check if budget exhausted
89
+ if self.remaining_budget <= 0:
90
+ return 0.0
91
+
92
+ v = pctr * self.vpc # Value of winning this impression
93
+
94
+ # Maximum possible bid: don't bid more than value or remaining budget
95
+ max_bid = min(v * 2.0, self.remaining_budget)
96
+
97
+ if max_bid <= 0.1:
98
+ return 0.0
99
+
100
+ # Find b_t = argmax_b (r̃_t(v,b) - λ · c̃_t(b))
101
+ bid = self._find_optimal_bid(v, max_bid)
102
+
103
+ return bid
104
+
105
+ def _find_optimal_bid(self, v, max_bid, n_candidates=50):
106
+ """Grid search for optimal bid."""
107
+ if len(self.competing_bids) == 0:
108
+ # No history: bid half of value as exploration
109
+ return v * 0.5
110
+
111
+ candidates = np.linspace(0.1, max_bid, n_candidates)
112
+ best_score = -float('inf')
113
+ best_bid = candidates[0]
114
+
115
+ for b in candidates:
116
+ win_prob = self._empirical_win_prob(b)
117
+ reward = (v - b) * win_prob
118
+ cost = b * win_prob
119
+ score = reward - self.lambd * cost
120
+
121
+ if score > best_score:
122
+ best_score = score
123
+ best_bid = b
124
+
125
+ return float(best_bid)
126
+
127
+ def _empirical_win_prob(self, b):
128
+ """G̃_t(b) = fraction of historical competing bids ≤ b."""
129
+ if not self.competing_bids:
130
+ return 0.5
131
+ return np.mean([1.0 if b >= d else 0.0 for d in self.competing_bids])
132
+
133
+ def _empirical_expected_cost(self, b):
134
+ """c̃_t(b) = b · G̃_t(b)."""
135
+ return b * self._empirical_win_prob(b)
136
+
137
+ def update(self, won, cost, pctr, d_t=None):
138
+ """
139
+ Update state after observing auction outcome.
140
+
141
+ Args:
142
+ won: bool, whether bid won
143
+ cost: actual cost incurred (bid price in first-price)
144
+ pctr: pCTR used (for logging)
145
+ d_t: maximum competing bid (observed under full feedback)
146
+ """
147
+ if won:
148
+ self.total_spent += cost
149
+ self.remaining_budget -= cost
150
+ self.total_wins += 1
151
+
152
+ # Record competing bid for empirical estimation
153
+ if d_t is not None:
154
+ self.competing_bids.append(d_t)
155
+
156
+ # Dual multiplier update: λ_{t+1} = max(0, λ_t - ε·(ρ - c̃_t(b_t)))
157
+ # Use actual cost as feedback: gradient = ρ - cost
158
+ cost_feedback = cost if won else 0.0
159
+ gradient = self.rho - cost_feedback
160
+ self.lambd = max(0.0, self.lambd - self.epsilon * gradient)
161
+
162
+ def get_stats(self):
163
+ """Get current algorithm statistics."""
164
+ return {
165
+ 'name': self.name,
166
+ 'lambda': float(self.lambd),
167
+ 'spent': float(self.total_spent),
168
+ 'remaining': float(self.remaining_budget),
169
+ 'budget_used': float(self.total_spent / self.B) if self.B > 0 else 0,
170
+ 'wins': self.total_wins,
171
+ 't': self.t,
172
+ 'epsilon': float(self.epsilon),
173
+ 'rho': float(self.rho),
174
+ }
175
+
176
+
177
+ class TwoSidedDualBidder(DualOGDBidder):
178
+ """
179
+ Two-sided dual multiplier bidder: budget cap + spend floor.
180
+
181
+ Adds a second dual variable ν to enforce minimum spend (k%):
182
+ μ: cap penalty — restrains when ahead on spend
183
+ ν: floor incentive — encourages when behind on spend
184
+
185
+ Updates:
186
+ μ_{t+1} = Proj(μ_t - η₁·(ρ - c̃_t(b_t))) # cap
187
+ ν_{t+1} = Proj(ν_t - η₂·(c̃_t(b_t) - kρ)) # floor
188
+
189
+ Bid rule:
190
+ b_t = argmax_b (r̃_t(v,b) - (μ_t - ν_t)·c̃_t(b))
191
+
192
+ When μ > ν: cap dominates → bid conservatively
193
+ When ν > μ: floor dominates → bid aggressively
194
+ """
195
+
196
+ def __init__(
197
+ self,
198
+ budget,
199
+ T,
200
+ value_per_click,
201
+ k=0.8, # Minimum spend fraction
202
+ epsilon_cap=None,
203
+ epsilon_floor=None,
204
+ empirical_cdf=None,
205
+ name="TwoSidedDual"
206
+ ):
207
+ super().__init__(budget, T, value_per_click, epsilon_cap, empirical_cdf, name)
208
+ self.k = k # Minimum spend fraction
209
+ self.k_rho = k * self.rho # Target minimum spend per auction
210
+
211
+ # Floor dual multiplier ν
212
+ self.nu = 0.0
213
+
214
+ # Floor step size
215
+ self.epsilon_floor = epsilon_floor if epsilon_floor is not None else 1.0 / np.sqrt(T)
216
+
217
+ # Rename for clarity
218
+ self.mu = self.lambd # Cap multiplier
219
+ self.epsilon_cap = self.epsilon
220
+
221
+ def _find_optimal_bid(self, v, max_bid, n_candidates=50):
222
+ """Bid with combined cap+floor penalty: (μ - ν) multiplier."""
223
+ if len(self.competing_bids) == 0:
224
+ return v * 0.5
225
+
226
+ candidates = np.linspace(0.1, max_bid, n_candidates)
227
+ best_score = -float('inf')
228
+ best_bid = candidates[0]
229
+
230
+ effective_multiplier = self.mu - self.nu
231
+
232
+ for b in candidates:
233
+ win_prob = self._empirical_win_prob(b)
234
+ reward = (v - b) * win_prob
235
+ cost = b * win_prob
236
+ score = reward - effective_multiplier * cost
237
+
238
+ if score > best_score:
239
+ best_score = score
240
+ best_bid = b
241
+
242
+ return float(best_bid)
243
+
244
+ def update(self, won, cost, pctr, d_t=None):
245
+ """Update both dual variables."""
246
+ if won:
247
+ self.total_spent += cost
248
+ self.remaining_budget -= cost
249
+ self.total_wins += 1
250
+
251
+ if d_t is not None:
252
+ self.competing_bids.append(d_t)
253
+
254
+ cost_feedback = cost if won else 0.0
255
+
256
+ # Cap update: μ_{t+1} = max(0, μ_t - η₁·(ρ - cost))
257
+ cap_gradient = self.rho - cost_feedback
258
+ self.mu = max(0.0, self.mu - self.epsilon_cap * cap_gradient)
259
+
260
+ # Floor update: ν_{t+1} = max(0, ν_t - η₂·(cost - kρ))
261
+ floor_gradient = cost_feedback - self.k_rho
262
+ self.nu = max(0.0, self.nu - self.epsilon_floor * floor_gradient)
263
+
264
+ # Keep lambd in sync for stats
265
+ self.lambd = self.mu
266
+
267
+ def get_stats(self):
268
+ stats = super().get_stats()
269
+ stats.update({
270
+ 'mu': float(self.mu),
271
+ 'nu': float(self.nu),
272
+ 'effective_multiplier': float(self.mu - self.nu),
273
+ 'k': float(self.k),
274
+ 'k_rho': float(self.k_rho),
275
+ })
276
+ return stats