File size: 20,684 Bytes
d5feac3 dca61f8 d5feac3 dca61f8 d5feac3 dca61f8 d5feac3 dca61f8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 | #!/usr/bin/env python3
"""
RTB Bidding Algorithms Benchmark β First-Price Auctions
=========================================================
Self-contained script for HF Jobs execution.
Includes all 6 algorithms with detailed docstrings explaining each algorithm:
- DualOGD β Lagrangian dual + online gradient descent (Wang et al. 2023)
- TwoSidedDual β Budget cap + spend floor (k% minimum)
- ValueShading β Adaptive value shading for first-price auctions
- RLB β MDP-based reinforcement learning (Cai et al. 2017)
- Linear β Proportional bidding baseline
- Threshold β Fixed-bid-if-pCTR-above-threshold baseline
See README.md for full algorithm descriptions and benchmark results.
See RESEARCH_RESOURCES.md for comprehensive literature survey.
Repo: https://huggingface.co/hamverbot/bidding_algorithms_benchmark
Primary Paper: Wang et al. 2023, arXiv:2304.13477
Usage:
python benchmark_job.py --max_rows 200000 --budget 10000 --T 10000 --n_runs 5
Data Note:
Criteo_x4 (reczoo/Criteo_x4) is 5.6GB across 37 Parquet files.
Streaming 200K rows takes ~7 minutes due to the sequential scan over many files.
For faster iterations, use --max_rows 20000 to load in ~45 seconds.
"""
import os, json, time, argparse
import numpy as np
import pandas as pd
from datasets import load_dataset
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import roc_auc_score
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# BIDDING ALGORITHMS
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Each algorithm has a detailed docstring explaining its mechanism.
# All are designed for FIRST-PRICE auctions (winner pays their bid).
# See README.md for the full theoretical treatment.
class DualOGDBidder:
"""
## DualOGD β Lagrangian Dual + Online Gradient Descent
**Paper**: Wang et al. (2023), arXiv:2304.13477
"Learning to Bid in Repeated First-Price Auctions with Budgets"
The canonical approach: cast budget-constrained bidding as Lagrangian
optimization. A dual multiplier Ξ» tracks over/under-spending relative to
target rate Ο = B/T.
**Bid rule**: b_t = argmax_b [(v-b)Β·GΜ(b) β λ·bΒ·GΜ(b)]
Maximizes (expected reward β Ξ» Γ expected cost)
**Update**: Ξ» β max(0, Ξ» β Ρ·(Ο β actual_cost))
- Overspent β Ξ» grows β future bids penalized more
- Underspent β Ξ» shrinks β future bids cheaper
**Regret bound**: Γ(βT) β provably near-optimal.
**Limitation**: Without a floor constraint, Ξ» becomes conservative early
and the algorithm leaves 20-25% of budget unspent. See TwoSidedDual.
"""
def __init__(self, budget, T, vpc, epsilon=None, name="DualOGD"):
self.B=budget; self.T=T; self.rho=budget/T; self.vpc=vpc; self.name=name
self.lambd=0.0; self.epsilon=epsilon or 1.0/np.sqrt(T)
self.total_spent=0.0; self.remaining_budget=budget
self.t=0; self.total_wins=0; self.competing_bids=[]
def bid(self, pctr, features=None):
self.t+=1
if self.remaining_budget<=0: return 0.0
v=pctr*self.vpc; mb=min(v*2.0,self.remaining_budget)
if mb<=0.1: return 0.0
return self._opt(v,mb)
def _opt(self,v,mb,n=50):
if not self.competing_bids: return v*0.5
bb,bs=0.0,-9e9
for b in np.linspace(0.1,mb,n):
wp=np.mean([1.0 if b>=d else 0.0 for d in self.competing_bids])
s=(v-b)*wp-self.lambd*b*wp
if s>bs: bs=s; bb=b
return float(bb)
def update(self,won,cost,pctr,d_t=None):
if won: self.total_spent+=cost; self.remaining_budget-=cost; self.total_wins+=1
if d_t: self.competing_bids.append(d_t)
cf=cost if won else 0.0
self.lambd=max(0.0, self.lambd-self.epsilon*(self.rho-cf))
def get_stats(self):
return {'name':self.name,'lambda':float(self.lambd),'spent':float(self.total_spent),
'remaining':float(self.remaining_budget),'budget_used':float(self.total_spent/self.B) if self.B>0 else 0,
'wins':self.total_wins,'t':self.t,'epsilon':float(self.epsilon),'rho':float(self.rho)}
class TwoSidedDualBidder(DualOGDBidder):
"""
## TwoSidedDual β Budget Cap + Spend Floor
Extension of DualOGD. Two dual variables for two constraints:
**ΞΌ (cap)**: ΞΌ β max(0, ΞΌ β Ξ·βΒ·(Ο β cost))
Penalizes overspending β restrains when ahead on spend.
**Ξ½ (floor)**: Ξ½ β max(0, Ξ½ β Ξ·βΒ·(cost β kΒ·Ο))
Penalizes UNDERspending β encourages when behind on floor.
**Effective multiplier**: (ΞΌ β Ξ½). When Ξ½ > ΞΌ, penalty becomes negative
β bidding is encouraged to hit the spend floor.
**Bid rule**: b_t = argmax_b [(vβb)Β·GΜ(b) β (ΞΌβΞ½)Β·bΒ·GΜ(b)]
This algorithm is the best-performing in our benchmark because it balances
the budget cap with a contractual minimum spend requirement (common in
brand advertising campaigns).
"""
def __init__(self,budget,T,vpc,k=0.8,ec=None,ef=None,name="TwoSidedDual"):
super().__init__(budget,T,vpc,ec,name)
self.k=k; self.k_rho=k*self.rho; self.nu=0.0
self.ef=ef or 1.0/np.sqrt(T); self.mu=self.lambd; self.ec=self.epsilon
def _opt(self,v,mb,n=50):
if not self.competing_bids: return v*0.5
eff=self.mu-self.nu; bb,bs=0.0,-9e9
for b in np.linspace(0.1,mb,n):
wp=np.mean([1.0 if b>=d else 0.0 for d in self.competing_bids])
s=(v-b)*wp-eff*b*wp
if s>bs: bs=s; bb=b
return float(bb)
def update(self,won,cost,pctr,d_t=None):
if won: self.total_spent+=cost; self.remaining_budget-=cost; self.total_wins+=1
if d_t: self.competing_bids.append(d_t)
cf=cost if won else 0.0
self.mu=max(0.0,self.mu-self.ec*(self.rho-cf))
self.nu=max(0.0,self.nu-self.ef*(cf-self.k_rho))
self.lambd=self.mu
def get_stats(self):
s=super().get_stats()
s.update({'mu':float(self.mu),'nu':float(self.nu),'k':float(self.k)})
return s
class ValueShadingBidder:
"""
## ValueShading β Adaptive Bid Shading for First-Price
In first-price auctions, bidding your true value guarantees zero surplus
(winner's curse). ValueShading scales bids: bid = v / (1 + Ξ»).
Ξ» adapts online: wins β Ξ» decreases (bid higher next time),
losses β Ξ» increases (bid more aggressively).
Faster than DualOGD (closed-form, no grid search) but less precise
about budget pacing. Spends full budget but with 16% higher CPC.
"""
def __init__(self,budget,T,vpc,name="ValueShading"):
self.B=budget; self.T=T; self.rho=budget/T; self.vpc=vpc; self.name=name
self.lambd=0.0; self.epsilon=1.0/np.sqrt(T)
self.total_spent=0.0; self.remaining_budget=budget
self.t=0; self.total_wins=0; self.competing_bids=[]
def bid(self,pctr,features=None):
self.t+=1; v=pctr*self.vpc
if self.remaining_budget<=0: return 0.0
if self.competing_bids:
bid=np.clip(v/(1.0+self.lambd+0.1),np.mean(self.competing_bids)*0.5,v*0.9)
else: bid=v*0.5
return min(bid,self.remaining_budget)
def update(self,won,cost,pctr,d_t=None):
if won: self.total_spent+=cost; self.remaining_budget-=cost; self.total_wins+=1
if d_t: self.competing_bids.append(d_t)
self.lambd=max(0.0,self.lambd-self.epsilon*(self.rho-(cost if won else 0.0)))
def get_stats(self):
return {'name':self.name,'lambda':float(self.lambd),'spent':float(self.total_spent),
'remaining':float(self.remaining_budget),'wins':self.total_wins,'t':self.t}
class RLBBidder:
"""
## RLB β Reinforcement Learning for Bidding
**Paper**: Cai et al. (2017), WSDM 2017, arXiv:1701.02490
"Real-Time Bidding by Reinforcement Learning in Display Advertising"
MDP formulation:
- State: (remaining_budget_ratio, pCTR_bucket)
- Action: bid_multiplier β {0.1Γ, 0.3Γ, ..., 2.0Γ} of value
- Reward: pCTR Γ value_per_click if won, else 0
Tabular Q-learning with Ξ΅-greedy exploration. No regret guarantees.
Needs many more auctions to converge than the optimization-based methods.
"""
def __init__(self,budget,T,vpc,name="RLB"):
self.B=budget; self.T=T; self.vpc=vpc; self.name=name
nb,np_,na=10,5,10
self.Q=np.zeros((nb,np_,na)); self.bm=np.linspace(0.1,2.0,na)
self.lr=0.1; self.gamma=0.95; self.eps=0.1
self.total_spent=0.0; self.remaining_budget=budget
self.t=0; self.total_wins=0; self.ls=None; self.la=None; self.nb=nb; self.np=np_
def _s(self,pctr):
br=self.remaining_budget/max(self.B,1)
return (min(int(br*self.nb),self.nb-1), min(int(pctr*self.np),self.np-1))
def bid(self,pctr,features=None):
self.t+=1
if self.remaining_budget<=0: return 0.0
s=self._s(pctr); v=pctr*self.vpc
a=np.random.randint(len(self.bm)) if np.random.random()<self.eps else np.argmax(self.Q[s[0],s[1],:])
self.ls=s; self.la=a
return min(v*self.bm[a],self.remaining_budget)
def update(self,won,cost,pctr,d_t=None):
if won: self.total_spent+=cost; self.remaining_budget-=cost; self.total_wins+=1
if self.ls is not None:
r=(pctr*self.vpc) if won else 0.0
ns=self._s(pctr)
old=self.Q[self.ls[0],self.ls[1],self.la]
self.Q[self.ls[0],self.ls[1],self.la]=old+self.lr*(r+self.gamma*np.max(self.Q[ns[0],ns[1],:])-old)
def get_stats(self):
return {'name':self.name,'spent':float(self.total_spent),'remaining':float(self.remaining_budget),
'wins':self.total_wins,'t':self.t}
class LinearBidder:
"""## Linear β Proportional Bidding Baseline. bid = base_bid Γ (pCTR/avg_pCTR). No adaptation. Serves as lower bound."""
def __init__(self,base_bid,avg_pctr,name="Linear"):
self.base_bid=base_bid; self.avg_pctr=avg_pctr; self.name=name
self.total_spent=0.0; self.remaining_budget=1e9; self.total_wins=0; self.t=0
def bid(self,pctr,features=None):
self.t+=1
if self.remaining_budget<=0: return 0.0
return min(self.base_bid*(pctr/max(self.avg_pctr,1e-6)),self.remaining_budget)
def update(self,won,cost,pctr,d_t=None):
if won: self.total_spent+=cost; self.remaining_budget-=cost; self.total_wins+=1
def set_budget(self,b): self.remaining_budget=b
def get_stats(self):
return {'name':self.name,'spent':float(self.total_spent),'remaining':float(self.remaining_budget),
'wins':self.total_wins,'t':self.t}
class ThresholdBidder:
"""## Threshold β Binary Bidding Baseline. Fixed bid if pCTR > threshold, else skip. Common rule of thumb."""
def __init__(self,threshold,bid_value,name="Threshold"):
self.threshold=threshold; self.bid_value=bid_value; self.name=name
self.total_spent=0.0; self.remaining_budget=1e9; self.total_wins=0; self.t=0
def bid(self,pctr,features=None):
self.t+=1
if self.remaining_budget<self.bid_value: return 0.0
return self.bid_value if pctr>self.threshold else 0.0
def update(self,won,cost,pctr,d_t=None):
if won: self.total_spent+=cost; self.remaining_budget-=cost; self.total_wins+=1
def set_budget(self,b): self.remaining_budget=b
def get_stats(self):
return {'name':self.name,'spent':float(self.total_spent),'remaining':float(self.remaining_budget),
'wins':self.total_wins,'t':self.t}
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# FIRST-PRICE AUCTION SIMULATOR
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class FPSim:
"""
First-Price Auction Simulator.
Simulates repeated first-price auctions. The maximum competing bid d_t
follows a log-normal distribution whose mean correlates with impression
quality (pCTR) β realistic: popular impressions attract more competition.
Full information feedback: after each auction, d_t is revealed regardless
of whether we won. This enables empirical CDF approaches.
"""
def __init__(self,X,pctr,y,vpc=50,cfg=None,seed=42):
self.X=X; self.pctr=pctr; self.y=y; self.vpc=vpc; self.N=len(X)
self.rng=np.random.RandomState(seed)
cfg=cfg or {}
bm=cfg.get('base_mean',20); cc=cfg.get('ctr_correlation',10); ns=cfg.get('noise_std',0.6)
mp=np.clip(bm+cc*self.pctr,1,200)
self.mp=self.rng.lognormal(np.log(mp),ns,self.N)
if self.X.shape[1]>1: self.mp*=np.exp((0.02*self.X[:,0]+0.01*self.X[:,1])*0.1)
self.mp=np.clip(self.mp,0.5,500)
self.pos=0; self.order=self.rng.permutation(self.N)
def reset(self): self.pos=0; self.order=self.rng.permutation(self.N)
def run(self,algo):
self.reset()
if hasattr(algo,'set_budget'): algo.set_budget(algo.B if hasattr(algo,'B') else 5000)
tc=0; bl,wl,cl=[],[],[]
while self.pos<self.N:
idx=self.order[self.pos]; self.pos+=1
bid=algo.bid(self.pctr[idx],self.X[idx])
rem=algo.remaining_budget if hasattr(algo,'remaining_budget') else 1e9
bid=np.clip(bid,0,rem)
mp=self.mp[idx]; won=bid>=mp; cost=bid if won else 0.0
if won: tc+=int(self.y[idx])
algo.update(won,cost,self.pctr[idx],mp)
bl.append(bid); wl.append(int(won)); cl.append(cost)
s=algo.get_stats()
s.update({'total_clicks':tc,'total_impressions':len(bl),'total_wins':sum(wl),
'total_spent':sum(cl),'ctr':tc/max(sum(wl),1),
'budget_used_frac':sum(cl)/algo.B if hasattr(algo,'B') else 0,
'cpc':sum(cl)/max(tc,1),'avg_bid':float(np.mean(bl)),
'win_rate':sum(wl)/max(len(wl),1),
'avg_market_price':float(np.mean(self.mp))})
return s
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# DATA LOADING
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def load_data(max_rows=200000,seed=42):
"""
Load and preprocess Criteo_x4.
NOTE: Criteo_x4 is 5.6GB across 37 Parquet files. Streaming 200K rows
takes ~7 minutes due to sequential Parquet scanning. For quick iteration:
--max_rows 20000 (loads in ~45 seconds)
"""
print(f"\nLoading Criteo_x4 ({max_rows} rows)...")
ds=load_dataset("reczoo/Criteo_x4",split="train",streaming=True)
rows=[row for i,row in enumerate(ds) if i<max_rows]
df=pd.DataFrame(rows)
print(f" Loaded {len(df)} rows | CTR: {df['Label'].mean():.4f}")
dc=[f'I{i}' for i in range(1,14)]; sc=[f'C{i}' for i in range(1,27)]
for c in dc: df[c]=df[c].fillna(df[c].median())
for c in sc: df[c]=LabelEncoder().fit_transform(df[c].fillna("MISSING").astype(str))
dd=StandardScaler().fit_transform(df[dc].values)
for i,c in enumerate(dc): df[c]=dd[:,i]
sd=df[sc].values.astype(np.float32)
sd=(sd-sd.mean(0))/(sd.std(0)+1e-8)
for i,c in enumerate(sc): df[c]=sd[:,i]
X=df[dc+sc].values.astype(np.float32); y=df['Label'].values.astype(np.float32)
Xt,Xe,yt,ye=train_test_split(X,y,test_size=0.3,random_state=seed)
return Xt,Xe,yt,ye
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# MAIN
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def main():
p=argparse.ArgumentParser(description='RTB Bidding Benchmark β First-Price Auctions')
p.add_argument('--max_rows',type=int,default=200000)
p.add_argument('--budget',type=float,default=10000)
p.add_argument('--T',type=int,default=10000)
p.add_argument('--vpc',type=float,default=50)
p.add_argument('--k',type=float,default=0.8)
p.add_argument('--n_runs',type=int,default=5)
p.add_argument('--output',type=str,default='results/benchmark_results.json')
p.add_argument('--seed',type=int,default=42)
args=p.parse_args()
os.makedirs('results',exist_ok=True)
t0=time.time()
print("="*60+"\nRTB BIDDING BENCHMARK β FIRST-PRICE AUCTIONS\n"+"="*60)
print(f"Data: {args.max_rows} rows | Budget: {args.budget} | Auctions: {args.T} | VPC: {args.vpc} | Runs: {args.n_runs}")
print(f"Min spend: {args.k*100:.0f}%")
Xt,Xe,yt,ye=load_data(args.max_rows,args.seed)
print(f"Data load: {time.time()-t0:.0f}s")
ctr=LogisticRegression(max_iter=500,C=0.1,random_state=42)
ctr.fit(Xt,yt)
eauc=roc_auc_score(ye,ctr.predict_proba(Xe)[:,1])
print(f"CTR AUC: test={eauc:.4f}")
pctr=ctr.predict_proba(Xe)[:,1]
print(f"pCTR: mean={pctr.mean():.4f} range=[{pctr.min():.2f},{pctr.max():.2f}]")
all_results={}
for run in range(args.n_runs):
rs=args.seed+run
print(f"\n--- Run {run+1}/{args.n_runs} (seed={rs}) ---")
sim=FPSim(Xe[:args.T],pctr[:args.T],ye[:args.T],vpc=args.vpc,
cfg={'base_mean':20,'ctr_correlation':10,'noise_std':0.6},seed=rs)
algos={
'DualOGD':DualOGDBidder(args.budget,args.T,args.vpc),
'TwoSidedDual':TwoSidedDualBidder(args.budget,args.T,args.vpc,k=args.k),
'ValueShading':ValueShadingBidder(args.budget,args.T,args.vpc),
'RLB':RLBBidder(args.budget,args.T,args.vpc),
'Linear':LinearBidder(20.0,float(pctr.mean())),
'Threshold':ThresholdBidder(0.3,30.0),
}
for a in algos.values():
if hasattr(a,'B'): a.B=args.budget; a.remaining_budget=args.budget
for name,algo in algos.items():
r=sim.run(algo)
if name not in all_results: all_results[name]=[]
all_results[name].append(r)
print(f" {name:<16} clicks={r['total_clicks']:>4} spent={r['total_spent']:>8.1f} "
f"budget={r['budget_used_frac']:.1%} CPC={r['cpc']:.2f}")
print(f"\n{'='*60}\nFINAL RESULTS ({args.n_runs} runs)\n{'='*60}")
print(f"{'Algorithm':<18} {'Clicks':>10} {'CPC':>9} {'Budget%':>9} {'WinRate':>8}")
print("-"*58)
aggregated={}
for name,runs in all_results.items():
clicks=[r['total_clicks'] for r in runs]
cpc=[r['cpc'] for r in runs]
bu=[r['budget_used_frac'] for r in runs]
wr=[r['win_rate'] for r in runs]
aggregated[name]={
'clicks_mean':float(np.mean(clicks)),'clicks_std':float(np.std(clicks)),
'cpc_mean':float(np.mean(cpc)),'cpc_std':float(np.std(cpc)),
'budget_used_mean':float(np.mean(bu)),'budget_used_std':float(np.std(bu)),
'win_rate_mean':float(np.mean(wr)),'win_rate_std':float(np.std(wr))}
ranked=sorted(aggregated.items(),key=lambda x:x[1]['clicks_mean'],reverse=True)
for i,(name,s) in enumerate(ranked):
medal=['π₯','π₯','π₯'][i] if i<3 else ' '
print(f"{medal} {name:<16} {s['clicks_mean']:>7.0f}Β±{s['clicks_std']:.0f} "
f"{s['cpc_mean']:>7.2f} {s['budget_used_mean']:>7.1%} {s['win_rate_mean']:>6.1%}")
output={'config':{'max_rows':args.max_rows,'budget':args.budget,'T':args.T,
'vpc':args.vpc,'k':args.k,'n_runs':args.n_runs,'seed':args.seed,
'ctr_test_auc':float(eauc)},'aggregated':aggregated}
with open(args.output,'w') as f: json.dump(output,f,indent=2)
print(f"\nSaved to {args.output}")
print(f"Total time: {time.time()-t0:.0f}s β Done!")
if __name__=='__main__': main()
|