| |
| from dataclasses import dataclass, field |
| from typing import Callable, Dict, List, Optional |
| import numpy as np |
|
|
|
|
| @dataclass |
| class BugTemplate: |
| bug_type: str |
| category: str |
| difficulty: str |
| primary_bug_file: str |
| related_files: List[str] |
| red_herring_file: Optional[str] |
| fix_strategy: str |
| line_range: List[int] |
| description: str |
| artifact_generator: Callable |
| repo_mutator: Callable |
| metadata: Dict = field(default_factory=dict) |
|
|
|
|
| BUG_CATEGORIES = { |
| "shape_mismatch": "model", |
| "missing_zero_grad": "optimization", |
| "wrong_loss_function": "optimization", |
| "learning_rate_too_high": "optimization", |
| "gradient_explosion": "optimization", |
| "memory_leak": "resource", |
| "data_leakage": "data", |
| "incorrect_normalization": "data", |
| "distributed_sync_error": "distributed", |
| "amp_overflow": "numerics", |
| } |
|
|
| |
| def dummy_artifact_generator(artifact_type: str, rng): |
| if artifact_type == "loss_curve": |
| t = np.arange(100) |
| base = 2.3 * np.exp(-0.01 * t) + 0.15 |
| oscillation = 0.22 * np.sin(0.25 * t) * np.exp(-0.002 * t) |
| return [ |
| {"step": int(i), "train_loss": float(base[i] + oscillation[i])} |
| for i in range(100) |
| ] |
| if artifact_type == "gpu_profile": |
| t = np.arange(100) |
| allocated = 2048 + 2.4 * t |
| return [ |
| {"step": int(i), "allocated_mb": float(allocated[i])} |
| for i in range(100) |
| ] |
| if artifact_type == "training_log": |
| return "Epoch 1, Step 0: loss 2.45\nEpoch 1, Step 1: loss 2.43\n" |
| if artifact_type == "diagnostic_report": |
| return "No critical diagnostics found. Review optimizer and data pipeline." |
| return [] |
|
|
|
|
| def artifact_generator_wrong_loss(artifact_type: str, rng): |
| if artifact_type == "loss_curve": |
| t = np.arange(100) |
| base = 1.8 + 0.05 * np.sin(0.15 * t) |
| return [{"step": int(i), "train_loss": float(base[i])} for i in range(100)] |
| if artifact_type == "gpu_profile": |
| t = np.arange(100) |
| allocated = 1900 + 1.8 * t |
| return [{"step": int(i), "allocated_mb": float(allocated[i])} for i in range(100)] |
| if artifact_type == "training_log": |
| return "Epoch 1: loss 1.82, acc 0.11\nEpoch 2: loss 1.80, acc 0.12\n" |
| if artifact_type == "diagnostic_report": |
| return "Loss plateaus early while accuracy stays near chance. Check loss function." |
| return [] |
|
|
|
|
| def artifact_generator_lr_high(artifact_type: str, rng): |
| if artifact_type == "loss_curve": |
| t = np.arange(100) |
| base = 0.9 + 0.02 * (t ** 1.1) |
| return [{"step": int(i), "train_loss": float(base[i])} for i in range(100)] |
| if artifact_type == "gpu_profile": |
| t = np.arange(100) |
| allocated = 2100 + 2.0 * t |
| return [{"step": int(i), "allocated_mb": float(allocated[i])} for i in range(100)] |
| if artifact_type == "training_log": |
| return "Step 10: loss 3.20 (spike)\nStep 20: loss 5.10 (diverged)\n" |
| if artifact_type == "diagnostic_report": |
| return "Loss spikes suggest unstable updates. Consider lowering learning rate." |
| return [] |
|
|
|
|
| def artifact_generator_amp_overflow(artifact_type: str, rng): |
| if artifact_type == "loss_curve": |
| t = np.arange(100) |
| base = 2.1 * np.exp(-0.008 * t) + 0.2 |
| base[30:] = base[30:] + 0.6 |
| return [{"step": int(i), "train_loss": float(base[i])} for i in range(100)] |
| if artifact_type == "gpu_profile": |
| t = np.arange(100) |
| allocated = 2300 + 3.2 * t |
| return [{"step": int(i), "allocated_mb": float(allocated[i])} for i in range(100)] |
| if artifact_type == "training_log": |
| return "AMP: overflow detected, skipping step\nAMP: scale reduced to 32768\n" |
| if artifact_type == "diagnostic_report": |
| return "AMP overflow warnings observed. Ensure GradScaler is used correctly." |
| return [] |
|
|
| def mutate_missing_zero_grad(repo_files, rng): |
| repo_files["train.py"] = """import torch |
| from model.architecture import Net |
| |
| model = Net() |
| optimizer = torch.optim.SGD(model.parameters(), lr=0.01) |
| criterion = torch.nn.CrossEntropyLoss() |
| |
| for epoch in range(10): |
| for x, y in dataloader: |
| # optimizer.zero_grad() # BUG: commented out |
| output = model(x) |
| loss = criterion(output, y) |
| loss.backward() |
| optimizer.step() |
| """ |
| return repo_files |
|
|
| def mutate_data_leakage(repo_files, rng): |
| repo_files["data/dataset.py"] = """from torch.utils.data import Dataset |
| |
| class ImageDataset(Dataset): |
| def __init__(self, data, split="train"): |
| # BUG: We use the entire data instead of just the split |
| self.data = data |
| self.split = split |
| """ |
| return repo_files |
|
|
| def mutate_memory_leak(repo_files, rng): |
| repo_files["data/dataset.py"] = """from torch.utils.data import Dataset |
| |
| class ImageDataset(Dataset): |
| def __init__(self): |
| # BUG: Storing huge tensors in a class-level variable leading to memory accumulation |
| self.cache = [] |
| |
| def load(self, x): |
| self.cache.append(x) |
| return x |
| """ |
| return repo_files |
|
|
|
|
| def mutate_wrong_loss_function(repo_files, rng): |
| repo_files["train.py"] = """import torch |
| from model.architecture import Net |
| |
| model = Net() |
| optimizer = torch.optim.Adam(model.parameters(), lr=0.001) |
| criterion = torch.nn.MSELoss() # BUG: wrong loss for classification |
| |
| for epoch in range(10): |
| for x, y in dataloader: |
| optimizer.zero_grad() |
| output = model(x) |
| loss = criterion(output, y) |
| loss.backward() |
| optimizer.step() |
| """ |
| return repo_files |
|
|
|
|
| def mutate_learning_rate_too_high(repo_files, rng): |
| repo_files["config/training_config.yaml"] = """lr: 1.0 |
| batch_size: 32 |
| """ |
| return repo_files |
|
|
|
|
| def mutate_amp_overflow(repo_files, rng): |
| repo_files["train.py"] = """import torch |
| from model.architecture import Net |
| |
| model = Net().cuda() |
| optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4) |
| |
| for epoch in range(10): |
| for x, y in dataloader: |
| optimizer.zero_grad() |
| with torch.cuda.amp.autocast(): |
| output = model(x.cuda()) |
| loss = torch.nn.functional.cross_entropy(output, y.cuda()) |
| # BUG: missing GradScaler handling can cause overflows |
| loss.backward() |
| optimizer.step() |
| """ |
| return repo_files |
|
|
| BUG_TEMPLATES = [ |
| BugTemplate( |
| bug_type="missing_zero_grad", |
| category="optimization", |
| difficulty="easy", |
| primary_bug_file="train.py", |
| related_files=[], |
| red_herring_file="model/architecture.py", |
| fix_strategy="Call optimizer.zero_grad() before loss.backward()", |
| line_range=[9, 14], |
| description="Missing zero grad", |
| artifact_generator=dummy_artifact_generator, |
| repo_mutator=mutate_missing_zero_grad, |
| ), |
| BugTemplate( |
| bug_type="wrong_loss_function", |
| category="optimization", |
| difficulty="easy", |
| primary_bug_file="train.py", |
| related_files=["config/training_config.yaml"], |
| red_herring_file="data/dataset.py", |
| fix_strategy="Use CrossEntropyLoss for classification logits", |
| line_range=[6, 12], |
| description="Wrong loss function", |
| artifact_generator=artifact_generator_wrong_loss, |
| repo_mutator=mutate_wrong_loss_function, |
| ), |
| BugTemplate( |
| bug_type="data_leakage", |
| category="data", |
| difficulty="medium", |
| primary_bug_file="data/dataset.py", |
| related_files=["data/preprocessing.py"], |
| red_herring_file="train.py", |
| fix_strategy="Ensure validation split is strictly separate from training", |
| line_range=[4, 6], |
| description="Data leakage", |
| artifact_generator=dummy_artifact_generator, |
| repo_mutator=mutate_data_leakage, |
| ), |
| BugTemplate( |
| bug_type="learning_rate_too_high", |
| category="optimization", |
| difficulty="medium", |
| primary_bug_file="config/training_config.yaml", |
| related_files=["train.py"], |
| red_herring_file="model/attention.py", |
| fix_strategy="Reduce learning rate or use a scheduler", |
| line_range=[1, 1], |
| description="Learning rate too high", |
| artifact_generator=artifact_generator_lr_high, |
| repo_mutator=mutate_learning_rate_too_high, |
| ), |
| BugTemplate( |
| bug_type="memory_leak", |
| category="resource", |
| difficulty="hard", |
| primary_bug_file="data/dataset.py", |
| related_files=["train.py"], |
| red_herring_file="model/attention.py", |
| fix_strategy="Avoid holding reference to tensors in class cache", |
| line_range=[5, 9], |
| description="Memory leak", |
| artifact_generator=dummy_artifact_generator, |
| repo_mutator=mutate_memory_leak, |
| ), |
| BugTemplate( |
| bug_type="amp_overflow", |
| category="numerics", |
| difficulty="hard", |
| primary_bug_file="train.py", |
| related_files=["config/training_config.yaml"], |
| red_herring_file="model/architecture.py", |
| fix_strategy="Use GradScaler and scale updates for AMP", |
| line_range=[7, 13], |
| description="AMP overflow", |
| artifact_generator=artifact_generator_amp_overflow, |
| repo_mutator=mutate_amp_overflow, |
| ), |
| ] |
|
|