| |
|
|
| from transformers import AutoModel, AutoTokenizer |
| import torch |
| import torch.nn as nn |
| import json |
|
|
| |
| encoder = AutoModel.from_pretrained("./outputs/final_baseline_best") |
|
|
| |
| with open("./outputs/final_baseline_best/classifier_config.json", 'r') as f: |
| c_config = json.load(f) |
|
|
| num_labels = c_config.get('num_labels', 1) |
| hidden_size = c_config.get('hidden_size', 768) |
|
|
| |
| classifier = nn.Sequential( |
| nn.Linear(hidden_size, 256), |
| nn.ReLU(), |
| nn.Dropout(0.1), |
| nn.Linear(256, num_labels) |
| ) |
| classifier.load_state_dict(torch.load("./outputs/final_baseline_best/classifier.pt")) |
|
|
| |
| tokenizer = AutoTokenizer.from_pretrained("./outputs/final_baseline_best") |
|
|
| |
| def predict(text): |
| inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128) |
| with torch.no_grad(): |
| outputs = encoder(**inputs) |
| cls_embedding = outputs.last_hidden_state[:, 0, :] |
| logits = classifier(cls_embedding) |
| probs = torch.sigmoid(logits) |
| return probs.item() |
|
|
| |
| text = "আপনার বাংলা টেক্সট এখানে" |
| prob = predict(text) |
| print(f"Hate Speech Probability: {prob:.4f}") |
| print(f"Prediction: {'Hate Speech' if prob > 0.5 else 'Non-Hate Speech'}") |
|
|