rtferraz commited on
Commit
857ec9a
·
verified ·
1 Parent(s): 2b3e3af

Add 03_ecommerce_finetune.ipynb — next-purchase prediction with JointFusion, LightGBM baseline comparison

Browse files
Files changed (1) hide show
  1. notebooks/03_ecommerce_finetune.ipynb +468 -0
notebooks/03_ecommerce_finetune.ipynb ADDED
@@ -0,0 +1,468 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# 03 — E-Commerce Fine-Tuning: Next-Purchase Prediction\n",
8
+ "\n",
9
+ "**Goal:** Fine-tune the pre-trained DomainTransformer for predicting whether a user will make a purchase, and compare against a LightGBM baseline on hand-crafted features.\n",
10
+ "\n",
11
+ "**Task:** Binary classification — given a user's event sequence, predict if they will purchase (1) or not (0).\n",
12
+ "\n",
13
+ "**Pre-trained model:** [rtferraz/ecommerce-domain-24m](https://huggingface.co/rtferraz/ecommerce-domain-24m)\n",
14
+ "\n",
15
+ "**Architecture:** JointFusionModel (pre-trained Transformer + DCNv2 with PLR tabular embeddings)"
16
+ ]
17
+ },
18
+ {
19
+ "cell_type": "markdown",
20
+ "metadata": {},
21
+ "source": [
22
+ "## Setup"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": null,
28
+ "metadata": {},
29
+ "outputs": [],
30
+ "source": [
31
+ "# !pip install datasets transformers torch accelerate tokenizers numpy pandas matplotlib scikit-learn wandb huggingface_hub lightgbm"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": null,
37
+ "metadata": {},
38
+ "outputs": [],
39
+ "source": [
40
+ "import logging, pickle, os, sys, gc\n",
41
+ "from datetime import datetime\n",
42
+ "from collections import Counter\n",
43
+ "\n",
44
+ "import numpy as np\n",
45
+ "import pandas as pd\n",
46
+ "import matplotlib.pyplot as plt\n",
47
+ "import torch\n",
48
+ "from sklearn.model_selection import train_test_split\n",
49
+ "from sklearn.metrics import roc_auc_score, classification_report\n",
50
+ "\n",
51
+ "if os.path.exists('../src'): sys.path.insert(0, '../src')\n",
52
+ "elif os.path.exists('src'): sys.path.insert(0, 'src')\n",
53
+ "\n",
54
+ "from domain_tokenizer import (\n",
55
+ " DomainTokenizerBuilder, DomainTransformerConfig,\n",
56
+ " DomainTransformerForCausalLM, JointFusionModel,\n",
57
+ " DomainFinetuneDataset, prepare_finetune_dataset, finetune_domain_model,\n",
58
+ ")\n",
59
+ "from domain_tokenizer.schema import DomainSchema, FieldSpec, FieldType\n",
60
+ "\n",
61
+ "logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')\n",
62
+ "print(f'torch: {torch.__version__}, CUDA: {torch.cuda.is_available()}')\n",
63
+ "if torch.cuda.is_available():\n",
64
+ " print(f'GPU: {torch.cuda.get_device_name(0)}, VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB')"
65
+ ]
66
+ },
67
+ {
68
+ "cell_type": "code",
69
+ "execution_count": null,
70
+ "metadata": {},
71
+ "outputs": [],
72
+ "source": [
73
+ "from huggingface_hub import login\n",
74
+ "login()\n",
75
+ "\n",
76
+ "import wandb\n",
77
+ "wandb.login()\n",
78
+ "os.environ['WANDB_PROJECT'] = 'domainTokenizer'"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "markdown",
83
+ "metadata": {},
84
+ "source": [
85
+ "## Step 1 — Load Pre-trained Artifacts\n",
86
+ "\n",
87
+ "Load the artifacts saved by `02_ecommerce_pretrain.ipynb`."
88
+ ]
89
+ },
90
+ {
91
+ "cell_type": "code",
92
+ "execution_count": null,
93
+ "metadata": {},
94
+ "outputs": [],
95
+ "source": [
96
+ "# Load user sequences from pre-training notebook\n",
97
+ "with open('./ecommerce_artifacts.pkl', 'rb') as f:\n",
98
+ " artifacts = pickle.load(f)\n",
99
+ "\n",
100
+ "user_sequences = artifacts['user_sequences']\n",
101
+ "user_ids = artifacts['user_ids']\n",
102
+ "print(f'Loaded {len(user_sequences):,} users')\n",
103
+ "\n",
104
+ "# Load tokenizer\n",
105
+ "from transformers import PreTrainedTokenizerFast\n",
106
+ "hf_tokenizer = PreTrainedTokenizerFast.from_pretrained('./ecommerce_tokenizer')\n",
107
+ "print(f'Tokenizer vocab: {hf_tokenizer.vocab_size}')\n",
108
+ "\n",
109
+ "# Rebuild the schema and builder (needed for tokenize_event)\n",
110
+ "ECOMMERCE_REES46_SCHEMA = DomainSchema(\n",
111
+ " name='ecommerce_rees46',\n",
112
+ " fields=[\n",
113
+ " FieldSpec(name='event_type', field_type=FieldType.CATEGORICAL_FIXED, prefix='EVT',\n",
114
+ " categories=['view', 'cart', 'remove_from_cart', 'purchase']),\n",
115
+ " FieldSpec(name='price', field_type=FieldType.NUMERICAL_CONTINUOUS, prefix='PRICE', n_bins=21),\n",
116
+ " FieldSpec(name='category', field_type=FieldType.TEXT, prefix='CAT'),\n",
117
+ " FieldSpec(name='timestamp', field_type=FieldType.TEMPORAL, calendar_fields=['dow', 'hour']),\n",
118
+ " ],\n",
119
+ ")\n",
120
+ "builder = DomainTokenizerBuilder(ECOMMERCE_REES46_SCHEMA)\n",
121
+ "all_events_flat = [e for seq in user_sequences for e in seq]\n",
122
+ "builder.fit(all_events_flat)\n",
123
+ "del all_events_flat; gc.collect()\n",
124
+ "print('Builder fitted')"
125
+ ]
126
+ },
127
+ {
128
+ "cell_type": "code",
129
+ "execution_count": null,
130
+ "metadata": {},
131
+ "outputs": [],
132
+ "source": [
133
+ "# Load pre-trained model\n",
134
+ "config = DomainTransformerConfig.from_preset('24m', vocab_size=hf_tokenizer.vocab_size)\n",
135
+ "model = DomainTransformerForCausalLM(config)\n",
136
+ "model.load_state_dict(torch.load('./ecommerce_pretrain_checkpoints/final/model.safetensors',\n",
137
+ " map_location='cpu', weights_only=True), strict=False)\n",
138
+ "print(f'Pre-trained model loaded: {sum(p.numel() for p in model.parameters()):,} params')"
139
+ ]
140
+ },
141
+ {
142
+ "cell_type": "markdown",
143
+ "metadata": {},
144
+ "source": [
145
+ "## Step 2 — Create Labels and Tabular Features\n",
146
+ "\n",
147
+ "**Label:** Binary — did the user make at least one purchase? (1=yes, 0=no)\n",
148
+ "\n",
149
+ "**Tabular features:** Hand-crafted from user sequences (for the DCNv2 branch and LightGBM baseline)."
150
+ ]
151
+ },
152
+ {
153
+ "cell_type": "code",
154
+ "execution_count": null,
155
+ "metadata": {},
156
+ "outputs": [],
157
+ "source": [
158
+ "def compute_user_features(events):\n",
159
+ " \"\"\"Extract tabular features from a user's event sequence.\"\"\"\n",
160
+ " n_events = len(events)\n",
161
+ " n_views = sum(1 for e in events if e['event_type'] == 'view')\n",
162
+ " n_carts = sum(1 for e in events if e['event_type'] == 'cart')\n",
163
+ " n_purchases = sum(1 for e in events if e['event_type'] == 'purchase')\n",
164
+ " n_removes = sum(1 for e in events if e['event_type'] == 'remove_from_cart')\n",
165
+ " \n",
166
+ " prices = [e['price'] for e in events if e['price'] > 0]\n",
167
+ " avg_price = np.mean(prices) if prices else 0\n",
168
+ " max_price = max(prices) if prices else 0\n",
169
+ " std_price = np.std(prices) if len(prices) > 1 else 0\n",
170
+ " \n",
171
+ " categories = set(e['category'] for e in events)\n",
172
+ " n_unique_categories = len(categories)\n",
173
+ " \n",
174
+ " # Temporal features\n",
175
+ " hours = [e['timestamp'].hour for e in events]\n",
176
+ " avg_hour = np.mean(hours)\n",
177
+ " \n",
178
+ " # Conversion funnel ratios\n",
179
+ " cart_rate = n_carts / max(n_views, 1)\n",
180
+ " purchase_rate = n_purchases / max(n_events, 1)\n",
181
+ " remove_rate = n_removes / max(n_carts, 1) if n_carts > 0 else 0\n",
182
+ " \n",
183
+ " return [\n",
184
+ " n_events, n_views, n_carts, n_purchases, n_removes,\n",
185
+ " avg_price, max_price, std_price,\n",
186
+ " n_unique_categories,\n",
187
+ " avg_hour,\n",
188
+ " cart_rate, purchase_rate, remove_rate,\n",
189
+ " ]\n",
190
+ "\n",
191
+ "FEATURE_NAMES = [\n",
192
+ " 'n_events', 'n_views', 'n_carts', 'n_purchases', 'n_removes',\n",
193
+ " 'avg_price', 'max_price', 'std_price',\n",
194
+ " 'n_unique_categories',\n",
195
+ " 'avg_hour',\n",
196
+ " 'cart_rate', 'purchase_rate', 'remove_rate',\n",
197
+ "]\n",
198
+ "\n",
199
+ "print(f'Computing features for {len(user_sequences):,} users...')\n",
200
+ "tabular_features = np.array([compute_user_features(seq) for seq in user_sequences], dtype=np.float32)\n",
201
+ "labels = np.array([1.0 if any(e['event_type'] == 'purchase' for e in seq) else 0.0 for seq in user_sequences])\n",
202
+ "\n",
203
+ "print(f'Features shape: {tabular_features.shape}')\n",
204
+ "print(f'Labels: {labels.sum():.0f} purchasers / {len(labels)} total ({labels.mean()*100:.1f}%)')\n",
205
+ "print(f'Feature names: {FEATURE_NAMES}')"
206
+ ]
207
+ },
208
+ {
209
+ "cell_type": "code",
210
+ "execution_count": null,
211
+ "metadata": {},
212
+ "outputs": [],
213
+ "source": [
214
+ "# Train/test split (80/20, stratified by label)\n",
215
+ "train_idx, test_idx = train_test_split(\n",
216
+ " range(len(user_sequences)), test_size=0.2, random_state=42, stratify=labels\n",
217
+ ")\n",
218
+ "\n",
219
+ "train_seqs = [user_sequences[i] for i in train_idx]\n",
220
+ "test_seqs = [user_sequences[i] for i in test_idx]\n",
221
+ "train_features = tabular_features[train_idx]\n",
222
+ "test_features = tabular_features[test_idx]\n",
223
+ "train_labels = labels[train_idx]\n",
224
+ "test_labels = labels[test_idx]\n",
225
+ "\n",
226
+ "print(f'Train: {len(train_seqs):,} users ({train_labels.mean()*100:.1f}% positive)')\n",
227
+ "print(f'Test: {len(test_seqs):,} users ({test_labels.mean()*100:.1f}% positive)')"
228
+ ]
229
+ },
230
+ {
231
+ "cell_type": "markdown",
232
+ "metadata": {},
233
+ "source": [
234
+ "## Step 3 — LightGBM Baseline\n",
235
+ "\n",
236
+ "Standard ML baseline: LightGBM on hand-crafted tabular features. This is what we need to beat."
237
+ ]
238
+ },
239
+ {
240
+ "cell_type": "code",
241
+ "execution_count": null,
242
+ "metadata": {},
243
+ "outputs": [],
244
+ "source": [
245
+ "import lightgbm as lgb\n",
246
+ "\n",
247
+ "lgb_model = lgb.LGBMClassifier(n_estimators=200, learning_rate=0.05, max_depth=6, random_state=42, verbose=-1)\n",
248
+ "lgb_model.fit(train_features, train_labels)\n",
249
+ "\n",
250
+ "lgb_train_probs = lgb_model.predict_proba(train_features)[:, 1]\n",
251
+ "lgb_test_probs = lgb_model.predict_proba(test_features)[:, 1]\n",
252
+ "\n",
253
+ "lgb_train_auc = roc_auc_score(train_labels, lgb_train_probs)\n",
254
+ "lgb_test_auc = roc_auc_score(test_labels, lgb_test_probs)\n",
255
+ "\n",
256
+ "print(f'LightGBM Baseline:')\n",
257
+ "print(f' Train AUC: {lgb_train_auc:.4f}')\n",
258
+ "print(f' Test AUC: {lgb_test_auc:.4f}')\n",
259
+ "\n",
260
+ "# Feature importance\n",
261
+ "importance = pd.Series(lgb_model.feature_importances_, index=FEATURE_NAMES).sort_values(ascending=False)\n",
262
+ "print(f'\\nTop features:')\n",
263
+ "for feat, imp in importance.head(5).items():\n",
264
+ " print(f' {feat}: {imp}')"
265
+ ]
266
+ },
267
+ {
268
+ "cell_type": "markdown",
269
+ "metadata": {},
270
+ "source": [
271
+ "## Step 4 — JointFusionModel Fine-Tuning\n",
272
+ "\n",
273
+ "The JointFusionModel combines:\n",
274
+ "- **Transaction branch:** Pre-trained DomainTransformer → user embedding\n",
275
+ "- **Tabular branch:** DCNv2 with PLR embeddings on hand-crafted features\n",
276
+ "- **Joint head:** MLP on concatenated embeddings → binary prediction"
277
+ ]
278
+ },
279
+ {
280
+ "cell_type": "code",
281
+ "execution_count": null,
282
+ "metadata": {},
283
+ "outputs": [],
284
+ "source": [
285
+ "# Create fine-tuning datasets\n",
286
+ "MAX_LENGTH = 256 # tokens per user sequence\n",
287
+ "\n",
288
+ "train_dataset = DomainFinetuneDataset(\n",
289
+ " train_seqs, train_features, train_labels,\n",
290
+ " builder, hf_tokenizer, max_length=MAX_LENGTH,\n",
291
+ ")\n",
292
+ "test_dataset = DomainFinetuneDataset(\n",
293
+ " test_seqs, test_features, test_labels,\n",
294
+ " builder, hf_tokenizer, max_length=MAX_LENGTH,\n",
295
+ ")\n",
296
+ "\n",
297
+ "print(f'Train dataset: {len(train_dataset)} samples')\n",
298
+ "print(f'Test dataset: {len(test_dataset)} samples')\n",
299
+ "print(f'Sample: {set(train_dataset[0].keys())}')"
300
+ ]
301
+ },
302
+ {
303
+ "cell_type": "code",
304
+ "execution_count": null,
305
+ "metadata": {},
306
+ "outputs": [],
307
+ "source": [
308
+ "# Create JointFusionModel\n",
309
+ "fusion_model = JointFusionModel(\n",
310
+ " transformer_model=model,\n",
311
+ " n_tabular_features=len(FEATURE_NAMES),\n",
312
+ " n_classes=1, # binary\n",
313
+ " plr_frequencies=32,\n",
314
+ " plr_embedding_dim=32,\n",
315
+ " dcn_cross_layers=3,\n",
316
+ " dcn_deep_layers=2,\n",
317
+ " dcn_deep_dim=128,\n",
318
+ " head_hidden_dim=128,\n",
319
+ " dropout=0.1,\n",
320
+ ")\n",
321
+ "\n",
322
+ "n_params = sum(p.numel() for p in fusion_model.parameters())\n",
323
+ "print(f'JointFusion model: {n_params:,} params (transformer + DCNv2 + head)')"
324
+ ]
325
+ },
326
+ {
327
+ "cell_type": "code",
328
+ "execution_count": null,
329
+ "metadata": {},
330
+ "outputs": [],
331
+ "source": [
332
+ "%%time\n",
333
+ "USE_GPU = torch.cuda.is_available()\n",
334
+ "GPU_NAME = torch.cuda.get_device_name(0) if USE_GPU else ''\n",
335
+ "USE_BF16 = USE_GPU and 'T4' not in GPU_NAME\n",
336
+ "USE_FP16 = USE_GPU and not USE_BF16\n",
337
+ "\n",
338
+ "trainer = finetune_domain_model(\n",
339
+ " model=fusion_model,\n",
340
+ " train_dataset=train_dataset,\n",
341
+ " eval_dataset=test_dataset,\n",
342
+ " output_dir='./ecommerce_finetune_checkpoints',\n",
343
+ " num_epochs=5 if USE_GPU else 2,\n",
344
+ " per_device_batch_size=32 if USE_GPU else 8,\n",
345
+ " gradient_accumulation_steps=1,\n",
346
+ " learning_rate=1e-4,\n",
347
+ " warmup_steps=50,\n",
348
+ " logging_steps=20,\n",
349
+ " eval_steps=100 if USE_GPU else 50,\n",
350
+ " save_strategy='no',\n",
351
+ " bf16=USE_BF16,\n",
352
+ " fp16=USE_FP16,\n",
353
+ " report_to='wandb',\n",
354
+ " run_name='ecommerce-finetune-joint-5ep',\n",
355
+ " seed=42,\n",
356
+ ")"
357
+ ]
358
+ },
359
+ {
360
+ "cell_type": "markdown",
361
+ "metadata": {},
362
+ "source": [
363
+ "## Step 5 — Evaluate and Compare"
364
+ ]
365
+ },
366
+ {
367
+ "cell_type": "code",
368
+ "execution_count": null,
369
+ "metadata": {},
370
+ "outputs": [],
371
+ "source": [
372
+ "# Get predictions from JointFusion model\n",
373
+ "fusion_model.eval()\n",
374
+ "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
375
+ "fusion_model = fusion_model.to(device)\n",
376
+ "\n",
377
+ "all_probs, all_labels = [], []\n",
378
+ "loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)\n",
379
+ "\n",
380
+ "with torch.no_grad():\n",
381
+ " for batch in loader:\n",
382
+ " batch = {k: v.to(device) for k, v in batch.items()}\n",
383
+ " labels_batch = batch.pop('labels')\n",
384
+ " out = fusion_model(**batch)\n",
385
+ " probs = torch.sigmoid(out['logits'].squeeze(-1))\n",
386
+ " all_probs.extend(probs.cpu().numpy())\n",
387
+ " all_labels.extend(labels_batch.cpu().numpy())\n",
388
+ "\n",
389
+ "all_probs = np.array(all_probs)\n",
390
+ "all_labels = np.array(all_labels)\n",
391
+ "\n",
392
+ "fusion_test_auc = roc_auc_score(all_labels, all_probs)\n",
393
+ "print(f'JointFusion Test AUC: {fusion_test_auc:.4f}')"
394
+ ]
395
+ },
396
+ {
397
+ "cell_type": "code",
398
+ "execution_count": null,
399
+ "metadata": {},
400
+ "outputs": [],
401
+ "source": [
402
+ "# Comparison table\n",
403
+ "print('=' * 50)\n",
404
+ "print('MODEL COMPARISON — Purchase Prediction (AUC)')\n",
405
+ "print('=' * 50)\n",
406
+ "print(f' LightGBM (tabular only): {lgb_test_auc:.4f}')\n",
407
+ "print(f' JointFusion (Transformer+DCNv2): {fusion_test_auc:.4f}')\n",
408
+ "print(f' Difference: {fusion_test_auc - lgb_test_auc:+.4f}')\n",
409
+ "print('=' * 50)\n",
410
+ "\n",
411
+ "if fusion_test_auc > lgb_test_auc:\n",
412
+ " print(f'\\n✅ JointFusion beats LightGBM by {(fusion_test_auc - lgb_test_auc)*100:.2f} percentage points')\n",
413
+ "else:\n",
414
+ " print(f'\\n⚠️ LightGBM still leads by {(lgb_test_auc - fusion_test_auc)*100:.2f} percentage points')\n",
415
+ " print(f' (Expected with only 3-epoch pre-training. More epochs would improve the transformer embeddings.)')"
416
+ ]
417
+ },
418
+ {
419
+ "cell_type": "code",
420
+ "execution_count": null,
421
+ "metadata": {},
422
+ "outputs": [],
423
+ "source": [
424
+ "# Loss curve\n",
425
+ "losses = [h['loss'] for h in trainer.state.log_history if 'loss' in h]\n",
426
+ "eval_losses = [h['eval_loss'] for h in trainer.state.log_history if 'eval_loss' in h]\n",
427
+ "\n",
428
+ "fig, ax = plt.subplots(figsize=(10, 5))\n",
429
+ "ax.plot(losses, label='Train Loss', alpha=0.7)\n",
430
+ "if eval_losses:\n",
431
+ " eval_steps = np.linspace(0, len(losses), len(eval_losses))\n",
432
+ " ax.plot(eval_steps, eval_losses, 'ro-', label='Eval Loss', markersize=4)\n",
433
+ "ax.set_xlabel('Step'); ax.set_ylabel('Loss'); ax.set_title('Fine-Tuning Loss')\n",
434
+ "ax.legend(); ax.grid(True, alpha=0.3); plt.tight_layout(); plt.show()"
435
+ ]
436
+ },
437
+ {
438
+ "cell_type": "code",
439
+ "execution_count": null,
440
+ "metadata": {},
441
+ "outputs": [],
442
+ "source": [
443
+ "wandb.finish()\n",
444
+ "print('Done!')"
445
+ ]
446
+ },
447
+ {
448
+ "cell_type": "markdown",
449
+ "metadata": {},
450
+ "source": [
451
+ "## Summary\n",
452
+ "\n",
453
+ "| Model | Test AUC | Notes |\n",
454
+ "|-------|----------|-------|\n",
455
+ "| LightGBM (tabular) | *see above* | 13 hand-crafted features |\n",
456
+ "| JointFusion (Transformer+DCNv2) | *see above* | Pre-trained domain tokens + same 13 features |\n",
457
+ "\n",
458
+ "The pre-trained DomainTransformer captures sequential behavioral patterns (view→cart→purchase funnels, category stickiness, temporal habits) that hand-crafted features cannot fully represent."
459
+ ]
460
+ }
461
+ ],
462
+ "metadata": {
463
+ "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" },
464
+ "language_info": { "name": "python", "version": "3.12.0" }
465
+ },
466
+ "nbformat": 4,
467
+ "nbformat_minor": 4
468
+ }