#!/usr/bin/env python3 """ RTB Bidding Algorithms Benchmark — First-Price Auctions ========================================================= Self-contained script for HF Jobs execution. Includes all 6 algorithms with detailed docstrings explaining each algorithm: - DualOGD — Lagrangian dual + online gradient descent (Wang et al. 2023) - TwoSidedDual — Budget cap + spend floor (k% minimum) - ValueShading — Adaptive value shading for first-price auctions - RLB — MDP-based reinforcement learning (Cai et al. 2017) - Linear — Proportional bidding baseline - Threshold — Fixed-bid-if-pCTR-above-threshold baseline See README.md for full algorithm descriptions and benchmark results. See RESEARCH_RESOURCES.md for comprehensive literature survey. Repo: https://huggingface.co/hamverbot/bidding_algorithms_benchmark Primary Paper: Wang et al. 2023, arXiv:2304.13477 Usage: python benchmark_job.py --max_rows 200000 --budget 10000 --T 10000 --n_runs 5 Data Note: Criteo_x4 (reczoo/Criteo_x4) is 5.6GB across 37 Parquet files. Streaming 200K rows takes ~7 minutes due to the sequential scan over many files. For faster iterations, use --max_rows 20000 to load in ~45 seconds. """ import os, json, time, argparse import numpy as np import pandas as pd from datasets import load_dataset from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder, StandardScaler from sklearn.metrics import roc_auc_score # ═══════════════════════════════════════════════════════════════ # BIDDING ALGORITHMS # ═══════════════════════════════════════════════════════════════ # Each algorithm has a detailed docstring explaining its mechanism. # All are designed for FIRST-PRICE auctions (winner pays their bid). # See README.md for the full theoretical treatment. class DualOGDBidder: """ ## DualOGD — Lagrangian Dual + Online Gradient Descent **Paper**: Wang et al. (2023), arXiv:2304.13477 "Learning to Bid in Repeated First-Price Auctions with Budgets" The canonical approach: cast budget-constrained bidding as Lagrangian optimization. A dual multiplier λ tracks over/under-spending relative to target rate ρ = B/T. **Bid rule**: b_t = argmax_b [(v-b)·G̃(b) − λ·b·G̃(b)] Maximizes (expected reward − λ × expected cost) **Update**: λ ← max(0, λ − ε·(ρ − actual_cost)) - Overspent → λ grows → future bids penalized more - Underspent → λ shrinks → future bids cheaper **Regret bound**: Õ(√T) — provably near-optimal. **Limitation**: Without a floor constraint, λ becomes conservative early and the algorithm leaves 20-25% of budget unspent. See TwoSidedDual. """ def __init__(self, budget, T, vpc, epsilon=None, name="DualOGD"): self.B=budget; self.T=T; self.rho=budget/T; self.vpc=vpc; self.name=name self.lambd=0.0; self.epsilon=epsilon or 1.0/np.sqrt(T) self.total_spent=0.0; self.remaining_budget=budget self.t=0; self.total_wins=0; self.competing_bids=[] def bid(self, pctr, features=None): self.t+=1 if self.remaining_budget<=0: return 0.0 v=pctr*self.vpc; mb=min(v*2.0,self.remaining_budget) if mb<=0.1: return 0.0 return self._opt(v,mb) def _opt(self,v,mb,n=50): if not self.competing_bids: return v*0.5 bb,bs=0.0,-9e9 for b in np.linspace(0.1,mb,n): wp=np.mean([1.0 if b>=d else 0.0 for d in self.competing_bids]) s=(v-b)*wp-self.lambd*b*wp if s>bs: bs=s; bb=b return float(bb) def update(self,won,cost,pctr,d_t=None): if won: self.total_spent+=cost; self.remaining_budget-=cost; self.total_wins+=1 if d_t: self.competing_bids.append(d_t) cf=cost if won else 0.0 self.lambd=max(0.0, self.lambd-self.epsilon*(self.rho-cf)) def get_stats(self): return {'name':self.name,'lambda':float(self.lambd),'spent':float(self.total_spent), 'remaining':float(self.remaining_budget),'budget_used':float(self.total_spent/self.B) if self.B>0 else 0, 'wins':self.total_wins,'t':self.t,'epsilon':float(self.epsilon),'rho':float(self.rho)} class TwoSidedDualBidder(DualOGDBidder): """ ## TwoSidedDual — Budget Cap + Spend Floor Extension of DualOGD. Two dual variables for two constraints: **μ (cap)**: μ ← max(0, μ − η₁·(ρ − cost)) Penalizes overspending → restrains when ahead on spend. **ν (floor)**: ν ← max(0, ν − η₂·(cost − k·ρ)) Penalizes UNDERspending → encourages when behind on floor. **Effective multiplier**: (μ − ν). When ν > μ, penalty becomes negative → bidding is encouraged to hit the spend floor. **Bid rule**: b_t = argmax_b [(v−b)·G̃(b) − (μ−ν)·b·G̃(b)] This algorithm is the best-performing in our benchmark because it balances the budget cap with a contractual minimum spend requirement (common in brand advertising campaigns). """ def __init__(self,budget,T,vpc,k=0.8,ec=None,ef=None,name="TwoSidedDual"): super().__init__(budget,T,vpc,ec,name) self.k=k; self.k_rho=k*self.rho; self.nu=0.0 self.ef=ef or 1.0/np.sqrt(T); self.mu=self.lambd; self.ec=self.epsilon def _opt(self,v,mb,n=50): if not self.competing_bids: return v*0.5 eff=self.mu-self.nu; bb,bs=0.0,-9e9 for b in np.linspace(0.1,mb,n): wp=np.mean([1.0 if b>=d else 0.0 for d in self.competing_bids]) s=(v-b)*wp-eff*b*wp if s>bs: bs=s; bb=b return float(bb) def update(self,won,cost,pctr,d_t=None): if won: self.total_spent+=cost; self.remaining_budget-=cost; self.total_wins+=1 if d_t: self.competing_bids.append(d_t) cf=cost if won else 0.0 self.mu=max(0.0,self.mu-self.ec*(self.rho-cf)) self.nu=max(0.0,self.nu-self.ef*(cf-self.k_rho)) self.lambd=self.mu def get_stats(self): s=super().get_stats() s.update({'mu':float(self.mu),'nu':float(self.nu),'k':float(self.k)}) return s class ValueShadingBidder: """ ## ValueShading — Adaptive Bid Shading for First-Price In first-price auctions, bidding your true value guarantees zero surplus (winner's curse). ValueShading scales bids: bid = v / (1 + λ). λ adapts online: wins → λ decreases (bid higher next time), losses → λ increases (bid more aggressively). Faster than DualOGD (closed-form, no grid search) but less precise about budget pacing. Spends full budget but with 16% higher CPC. """ def __init__(self,budget,T,vpc,name="ValueShading"): self.B=budget; self.T=T; self.rho=budget/T; self.vpc=vpc; self.name=name self.lambd=0.0; self.epsilon=1.0/np.sqrt(T) self.total_spent=0.0; self.remaining_budget=budget self.t=0; self.total_wins=0; self.competing_bids=[] def bid(self,pctr,features=None): self.t+=1; v=pctr*self.vpc if self.remaining_budget<=0: return 0.0 if self.competing_bids: bid=np.clip(v/(1.0+self.lambd+0.1),np.mean(self.competing_bids)*0.5,v*0.9) else: bid=v*0.5 return min(bid,self.remaining_budget) def update(self,won,cost,pctr,d_t=None): if won: self.total_spent+=cost; self.remaining_budget-=cost; self.total_wins+=1 if d_t: self.competing_bids.append(d_t) self.lambd=max(0.0,self.lambd-self.epsilon*(self.rho-(cost if won else 0.0))) def get_stats(self): return {'name':self.name,'lambda':float(self.lambd),'spent':float(self.total_spent), 'remaining':float(self.remaining_budget),'wins':self.total_wins,'t':self.t} class RLBBidder: """ ## RLB — Reinforcement Learning for Bidding **Paper**: Cai et al. (2017), WSDM 2017, arXiv:1701.02490 "Real-Time Bidding by Reinforcement Learning in Display Advertising" MDP formulation: - State: (remaining_budget_ratio, pCTR_bucket) - Action: bid_multiplier ∈ {0.1×, 0.3×, ..., 2.0×} of value - Reward: pCTR × value_per_click if won, else 0 Tabular Q-learning with ε-greedy exploration. No regret guarantees. Needs many more auctions to converge than the optimization-based methods. """ def __init__(self,budget,T,vpc,name="RLB"): self.B=budget; self.T=T; self.vpc=vpc; self.name=name nb,np_,na=10,5,10 self.Q=np.zeros((nb,np_,na)); self.bm=np.linspace(0.1,2.0,na) self.lr=0.1; self.gamma=0.95; self.eps=0.1 self.total_spent=0.0; self.remaining_budget=budget self.t=0; self.total_wins=0; self.ls=None; self.la=None; self.nb=nb; self.np=np_ def _s(self,pctr): br=self.remaining_budget/max(self.B,1) return (min(int(br*self.nb),self.nb-1), min(int(pctr*self.np),self.np-1)) def bid(self,pctr,features=None): self.t+=1 if self.remaining_budget<=0: return 0.0 s=self._s(pctr); v=pctr*self.vpc a=np.random.randint(len(self.bm)) if np.random.random() threshold, else skip. Common rule of thumb.""" def __init__(self,threshold,bid_value,name="Threshold"): self.threshold=threshold; self.bid_value=bid_value; self.name=name self.total_spent=0.0; self.remaining_budget=1e9; self.total_wins=0; self.t=0 def bid(self,pctr,features=None): self.t+=1 if self.remaining_budgetself.threshold else 0.0 def update(self,won,cost,pctr,d_t=None): if won: self.total_spent+=cost; self.remaining_budget-=cost; self.total_wins+=1 def set_budget(self,b): self.remaining_budget=b def get_stats(self): return {'name':self.name,'spent':float(self.total_spent),'remaining':float(self.remaining_budget), 'wins':self.total_wins,'t':self.t} # ═══════════════════════════════════════════════════════════════ # FIRST-PRICE AUCTION SIMULATOR # ═══════════════════════════════════════════════════════════════ class FPSim: """ First-Price Auction Simulator. Simulates repeated first-price auctions. The maximum competing bid d_t follows a log-normal distribution whose mean correlates with impression quality (pCTR) — realistic: popular impressions attract more competition. Full information feedback: after each auction, d_t is revealed regardless of whether we won. This enables empirical CDF approaches. """ def __init__(self,X,pctr,y,vpc=50,cfg=None,seed=42): self.X=X; self.pctr=pctr; self.y=y; self.vpc=vpc; self.N=len(X) self.rng=np.random.RandomState(seed) cfg=cfg or {} bm=cfg.get('base_mean',20); cc=cfg.get('ctr_correlation',10); ns=cfg.get('noise_std',0.6) mp=np.clip(bm+cc*self.pctr,1,200) self.mp=self.rng.lognormal(np.log(mp),ns,self.N) if self.X.shape[1]>1: self.mp*=np.exp((0.02*self.X[:,0]+0.01*self.X[:,1])*0.1) self.mp=np.clip(self.mp,0.5,500) self.pos=0; self.order=self.rng.permutation(self.N) def reset(self): self.pos=0; self.order=self.rng.permutation(self.N) def run(self,algo): self.reset() if hasattr(algo,'set_budget'): algo.set_budget(algo.B if hasattr(algo,'B') else 5000) tc=0; bl,wl,cl=[],[],[] while self.pos=mp; cost=bid if won else 0.0 if won: tc+=int(self.y[idx]) algo.update(won,cost,self.pctr[idx],mp) bl.append(bid); wl.append(int(won)); cl.append(cost) s=algo.get_stats() s.update({'total_clicks':tc,'total_impressions':len(bl),'total_wins':sum(wl), 'total_spent':sum(cl),'ctr':tc/max(sum(wl),1), 'budget_used_frac':sum(cl)/algo.B if hasattr(algo,'B') else 0, 'cpc':sum(cl)/max(tc,1),'avg_bid':float(np.mean(bl)), 'win_rate':sum(wl)/max(len(wl),1), 'avg_market_price':float(np.mean(self.mp))}) return s # ═══════════════════════════════════════════════════════════════ # DATA LOADING # ═══════════════════════════════════════════════════════════════ def load_data(max_rows=200000,seed=42): """ Load and preprocess Criteo_x4. NOTE: Criteo_x4 is 5.6GB across 37 Parquet files. Streaming 200K rows takes ~7 minutes due to sequential Parquet scanning. For quick iteration: --max_rows 20000 (loads in ~45 seconds) """ print(f"\nLoading Criteo_x4 ({max_rows} rows)...") ds=load_dataset("reczoo/Criteo_x4",split="train",streaming=True) rows=[row for i,row in enumerate(ds) if i4} spent={r['total_spent']:>8.1f} " f"budget={r['budget_used_frac']:.1%} CPC={r['cpc']:.2f}") print(f"\n{'='*60}\nFINAL RESULTS ({args.n_runs} runs)\n{'='*60}") print(f"{'Algorithm':<18} {'Clicks':>10} {'CPC':>9} {'Budget%':>9} {'WinRate':>8}") print("-"*58) aggregated={} for name,runs in all_results.items(): clicks=[r['total_clicks'] for r in runs] cpc=[r['cpc'] for r in runs] bu=[r['budget_used_frac'] for r in runs] wr=[r['win_rate'] for r in runs] aggregated[name]={ 'clicks_mean':float(np.mean(clicks)),'clicks_std':float(np.std(clicks)), 'cpc_mean':float(np.mean(cpc)),'cpc_std':float(np.std(cpc)), 'budget_used_mean':float(np.mean(bu)),'budget_used_std':float(np.std(bu)), 'win_rate_mean':float(np.mean(wr)),'win_rate_std':float(np.std(wr))} ranked=sorted(aggregated.items(),key=lambda x:x[1]['clicks_mean'],reverse=True) for i,(name,s) in enumerate(ranked): medal=['🥇','🥈','🥉'][i] if i<3 else ' ' print(f"{medal} {name:<16} {s['clicks_mean']:>7.0f}±{s['clicks_std']:.0f} " f"{s['cpc_mean']:>7.2f} {s['budget_used_mean']:>7.1%} {s['win_rate_mean']:>6.1%}") output={'config':{'max_rows':args.max_rows,'budget':args.budget,'T':args.T, 'vpc':args.vpc,'k':args.k,'n_runs':args.n_runs,'seed':args.seed, 'ctr_test_auc':float(eauc)},'aggregated':aggregated} with open(args.output,'w') as f: json.dump(output,f,indent=2) print(f"\nSaved to {args.output}") print(f"Total time: {time.time()-t0:.0f}s — Done!") if __name__=='__main__': main()