| import pandas as pd |
| import torch |
| import numpy as np |
|
|
| from transformers import AutoModelForSequenceClassification |
| from transformers import AutoTokenizer |
|
|
| model = AutoModelForSequenceClassification.from_pretrained("deberta-classification-chatrag/checkpoint-6342") |
| tokenizer = AutoTokenizer.from_pretrained("deberta-classification-chatrag/checkpoint-6342") |
|
|
|
|
| result = ["Comment puis-je renouveler un passeport ?", "Combien font deux et deux ?", "Écris un début de lettre de recommandation pour la Dinum"] |
|
|
| result = pd.DataFrame(result, columns=['query']) |
|
|
| complete_probabilities = [] |
|
|
| for text in result["query"].tolist(): |
| encoding = tokenizer(text, return_tensors="pt") |
| encoding = {k: v.to(model.device) for k,v in encoding.items()} |
|
|
| outputs = model(**encoding) |
|
|
| logits = outputs.logits |
| logits.shape |
|
|
| |
| sigmoid = torch.nn.Sigmoid() |
| probs = sigmoid(logits.squeeze().cpu()) |
| predictions = np.zeros(probs.shape) |
|
|
| |
| float_value = probs.item() |
|
|
| complete_probabilities.append(float_value) |
|
|
| result["prob"] = complete_probabilities |
|
|
| print(result) |
|
|