TASK_DESCRIPTION = """
This is a multi-label classification problem where each sample can have multiple active classes.
However, the model is currently using `CrossEntropyLoss`, which is meant for single-label (mutually exclusive) classes.
Because of this, the loss trains but the predictions are essentially garbage (treating it as a single-label problem).

Fix the loss function so it correctly handles multi-label classification.
The grader will check:
1. Loss convergence (loss < 0.5)
2. Model predictions are actually multi-hot (avg > 1 label/sample)
3. F1 Score > 0.6
"""

BUGGY_CODE = """
import torch
import torch.nn as nn
from sklearn.metrics import f1_score

torch.manual_seed(42)

# Generate synthetic multi-label data (100 samples, 20 features, 5 classes)
X = torch.randn(100, 20)
# Each sample has a 30% chance of having each class active
y = (torch.rand(100, 5) > 0.7).float() 

model = nn.Sequential(
    nn.Linear(20, 64),
    nn.ReLU(),
    nn.Linear(64, 5)
)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# BUG: CrossEntropyLoss is for single-label classification
criterion = nn.CrossEntropyLoss()

losses = []
for step in range(100):
    optimizer.zero_grad()
    logits = model(X)
    
    # CrossEntropyLoss expects class indices, not one-hot/multi-hot vectors for the target
    loss = criterion(logits, y)
    
    loss.backward()
    optimizer.step()
    losses.append(loss.item())

# Evaluation
with torch.no_grad():
    logits = model(X)
    # Using sigmoid and 0.5 threshold for multi-label prediction
    preds = (torch.sigmoid(logits) > 0.5).float()
    
    avg_labels = preds.sum(dim=1).mean().item()
    f1 = f1_score(y.numpy(), preds.numpy(), average='micro')

print('##METRICS_START##')
print('FINAL_LOSS:' + str(losses[-1]))
print('AVG_LABELS:' + str(avg_labels))
print('F1_SCORE:' + str(f1))
print('##METRICS_END##')
"""