hamverbot commited on
Commit
bff9b48
·
verified ·
1 Parent(s): 03e57c7

Upload src/algorithms/baselines.py

Browse files
Files changed (1) hide show
  1. src/algorithms/baselines.py +264 -0
src/algorithms/baselines.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Bidding Algorithm Baselines for First-Price Auctions
3
+
4
+ Includes:
5
+ 1. LinearBidder — proportional bidding based on pCTR
6
+ 2. ThresholdBidder — fixed bid if pCTR above threshold
7
+ 3. ValueShadingBidder — value shading for first-price (bid = v/(1+λ))
8
+ 4. RLBBidder — simplified MDP-based RL bidding (Cai et al. 2017)
9
+ """
10
+ import numpy as np
11
+ from collections import deque
12
+
13
+
14
+ class LinearBidder:
15
+ """Simple linear bidding: bid proportional to pCTR."""
16
+
17
+ def __init__(self, base_bid, avg_pctr, name="Linear"):
18
+ self.base_bid = base_bid
19
+ self.avg_pctr = avg_pctr
20
+ self.name = name
21
+ self.total_spent = 0.0
22
+ self.remaining_budget = float('inf')
23
+ self.total_wins = 0
24
+ self.t = 0
25
+
26
+ def bid(self, pctr, features=None):
27
+ self.t += 1
28
+ if self.remaining_budget <= 0:
29
+ return 0.0
30
+ bid = self.base_bid * (pctr / max(self.avg_pctr, 1e-6))
31
+ return min(bid, self.remaining_budget)
32
+
33
+ def update(self, won, cost, pctr, d_t=None):
34
+ if won:
35
+ self.total_spent += cost
36
+ self.remaining_budget -= cost
37
+ self.total_wins += 1
38
+
39
+ def set_budget(self, budget):
40
+ self.remaining_budget = budget
41
+
42
+ def get_stats(self):
43
+ return {
44
+ 'name': self.name,
45
+ 'spent': float(self.total_spent),
46
+ 'remaining': float(self.remaining_budget),
47
+ 'wins': self.total_wins,
48
+ 't': self.t,
49
+ }
50
+
51
+
52
+ class ThresholdBidder:
53
+ """Threshold bidding: fixed bid if pCTR exceeds threshold, else skip."""
54
+
55
+ def __init__(self, threshold, bid_value, name="Threshold"):
56
+ self.threshold = threshold
57
+ self.bid_value = bid_value
58
+ self.name = name
59
+ self.total_spent = 0.0
60
+ self.remaining_budget = float('inf')
61
+ self.total_wins = 0
62
+ self.t = 0
63
+
64
+ def bid(self, pctr, features=None):
65
+ self.t += 1
66
+ if self.remaining_budget < self.bid_value:
67
+ return 0.0
68
+ return self.bid_value if pctr > self.threshold else 0.0
69
+
70
+ def update(self, won, cost, pctr, d_t=None):
71
+ if won:
72
+ self.total_spent += cost
73
+ self.remaining_budget -= cost
74
+ self.total_wins += 1
75
+
76
+ def set_budget(self, budget):
77
+ self.remaining_budget = budget
78
+
79
+ def get_stats(self):
80
+ return {
81
+ 'name': self.name,
82
+ 'spent': float(self.total_spent),
83
+ 'remaining': float(self.remaining_budget),
84
+ 'wins': self.total_wins,
85
+ 't': self.t,
86
+ }
87
+
88
+
89
+ class ValueShadingBidder:
90
+ """
91
+ Value shading for first-price auctions.
92
+ bid = v / (1 + λ) where λ is estimated from historical outcomes.
93
+
94
+ Unlike second-price auctions where you bid your true value,
95
+ in first-price auctions you shade your bid below value.
96
+ """
97
+
98
+ def __init__(self, budget, T, value_per_click, name="ValueShading"):
99
+ self.B = budget
100
+ self.T = T
101
+ self.rho = budget / T
102
+ self.vpc = value_per_click
103
+ self.name = name
104
+
105
+ # Shading factor λ
106
+ self.lambd = 0.0
107
+ self.epsilon = 1.0 / np.sqrt(T)
108
+
109
+ self.total_spent = 0.0
110
+ self.remaining_budget = budget
111
+ self.total_wins = 0
112
+ self.t = 0
113
+ self.competing_bids = []
114
+
115
+ def bid(self, pctr, features=None):
116
+ self.t += 1
117
+ v = pctr * self.vpc
118
+
119
+ if self.remaining_budget <= 0:
120
+ return 0.0
121
+
122
+ # Shade: bid below value based on competition
123
+ if len(self.competing_bids) > 0:
124
+ avg_competing = np.mean(self.competing_bids)
125
+ shade_factor = 1.0 / (1.0 + self.lambd + 0.1)
126
+ bid = v * shade_factor
127
+ # Clamp to competing bid range
128
+ bid = np.clip(bid, avg_competing * 0.5, v * 0.9)
129
+ else:
130
+ bid = v * 0.5 # Initial exploration
131
+
132
+ return min(bid, self.remaining_budget)
133
+
134
+ def update(self, won, cost, pctr, d_t=None):
135
+ if won:
136
+ self.total_spent += cost
137
+ self.remaining_budget -= cost
138
+ self.total_wins += 1
139
+
140
+ if d_t is not None:
141
+ self.competing_bids.append(d_t)
142
+
143
+ cost_feedback = cost if won else 0.0
144
+ self.lambd = max(0.0, self.lambd - self.epsilon * (self.rho - cost_feedback))
145
+
146
+ def get_stats(self):
147
+ return {
148
+ 'name': self.name,
149
+ 'lambda': float(self.lambd),
150
+ 'spent': float(self.total_spent),
151
+ 'remaining': float(self.remaining_budget),
152
+ 'wins': self.total_wins,
153
+ 't': self.t,
154
+ }
155
+
156
+
157
+ class RLBBidder:
158
+ """
159
+ Simplified RLB (Reinforcement Learning for Bidding).
160
+ Based on: Cai et al. "Real-Time Bidding by Reinforcement Learning" (WSDM 2017)
161
+ arXiv: 1701.02490
162
+
163
+ Uses a simplified MDP with discretized state space:
164
+ State = (budget_bucket, pCTR_bucket)
165
+ Action = bid multiplier
166
+
167
+ Maintains a Q-table updated via temporal difference learning.
168
+ """
169
+
170
+ def __init__(
171
+ self,
172
+ budget,
173
+ T,
174
+ value_per_click,
175
+ n_budget_buckets=10,
176
+ n_pctr_buckets=5,
177
+ n_bid_multipliers=10,
178
+ learning_rate=0.1,
179
+ discount=0.95,
180
+ exploration_rate=0.1,
181
+ name="RLB"
182
+ ):
183
+ self.B = budget
184
+ self.T = T
185
+ self.vpc = value_per_click
186
+ self.name = name
187
+
188
+ self.n_budget = n_budget_buckets
189
+ self.n_pctr = n_pctr_buckets
190
+ self.n_actions = n_bid_multipliers
191
+
192
+ # Bid multipliers: 0.1x to 2.0x of value
193
+ self.bid_multipliers = np.linspace(0.1, 2.0, n_bid_multipliers)
194
+
195
+ # Q-table: (budget_bucket, pctr_bucket, action)
196
+ self.Q = np.zeros((n_budget_buckets, n_pctr_buckets, n_bid_multipliers))
197
+
198
+ self.lr = learning_rate
199
+ self.gamma = discount
200
+ self.epsilon_greedy = exploration_rate
201
+
202
+ self.total_spent = 0.0
203
+ self.remaining_budget = budget
204
+ self.total_wins = 0
205
+ self.t = 0
206
+
207
+ # For TD learning
208
+ self.last_state = None
209
+ self.last_action = None
210
+
211
+ def _get_state(self, pctr):
212
+ """Discretize state: (budget_ratio_bucket, pctr_bucket)."""
213
+ budget_ratio = self.remaining_budget / max(self.B, 1)
214
+ budget_bucket = min(int(budget_ratio * self.n_budget), self.n_budget - 1)
215
+ pctr_bucket = min(int(pctr * self.n_pctr), self.n_pctr - 1)
216
+ return (budget_bucket, pctr_bucket)
217
+
218
+ def bid(self, pctr, features=None):
219
+ self.t += 1
220
+
221
+ if self.remaining_budget <= 0:
222
+ return 0.0
223
+
224
+ state = self._get_state(pctr)
225
+ v = pctr * self.vpc
226
+
227
+ # ε-greedy action selection
228
+ if np.random.random() < self.epsilon_greedy:
229
+ action = np.random.randint(self.n_actions)
230
+ else:
231
+ action = np.argmax(self.Q[state[0], state[1], :])
232
+
233
+ self.last_state = state
234
+ self.last_action = action
235
+
236
+ bid = min(v * self.bid_multipliers[action], self.remaining_budget)
237
+ return bid
238
+
239
+ def update(self, won, cost, pctr, d_t=None):
240
+ if won:
241
+ self.total_spent += cost
242
+ self.remaining_budget -= cost
243
+ self.total_wins += 1
244
+
245
+ # TD update
246
+ if self.last_state is not None:
247
+ reward = (pctr * self.vpc) if won else 0.0
248
+ new_state = self._get_state(pctr)
249
+
250
+ # Q-learning update
251
+ old_q = self.Q[self.last_state[0], self.last_state[1], self.last_action]
252
+ max_future_q = np.max(self.Q[new_state[0], new_state[1], :])
253
+ new_q = old_q + self.lr * (reward + self.gamma * max_future_q - old_q)
254
+ self.Q[self.last_state[0], self.last_state[1], self.last_action] = new_q
255
+
256
+ def get_stats(self):
257
+ return {
258
+ 'name': self.name,
259
+ 'spent': float(self.total_spent),
260
+ 'remaining': float(self.remaining_budget),
261
+ 'wins': self.total_wins,
262
+ 't': self.t,
263
+ 'q_table_mean': float(np.mean(self.Q)),
264
+ }