| import gradio as gr |
| import torch |
| import numpy as np |
| from transformers import AutoModelForMultipleChoice, AutoTokenizer |
|
|
| model_id = "microsoft/deberta-v2-xlarge" |
|
|
| |
| model = AutoModelForMultipleChoice.from_pretrained(model_id) |
| tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
|
| |
| def preprocess(text): |
| |
| lines = text.strip().split("\n") |
| samples = [] |
|
|
| |
| for line in lines: |
| parts = line.split("\t") |
| if len(parts) >= 6: |
| sample = { |
| "prompt": parts[0], |
| "A": parts[1], |
| "B": parts[2], |
| "C": parts[3], |
| "D": parts[4], |
| "E": parts[5] |
| } |
| samples.append(sample) |
| |
| return samples |
|
|
| |
| def predict(data): |
| results = [] |
| for sample in data: |
| first_sentences = [sample["prompt"]] * 5 |
| second_sentences = [sample[option] for option in "ABCDE"] |
| tokenized_sentences = tokenizer(first_sentences, second_sentences, truncation=True, padding=True, return_tensors="pt") |
| inputs = tokenized_sentences["input_ids"] |
| masks = tokenized_sentences["attention_mask"] |
| with torch.no_grad(): |
| logits = model(inputs, attention_mask=masks).logits |
| predictions_as_ids = torch.argsort(-logits, dim=1) |
| answers = np.array(list("ABCDE"))[predictions_as_ids.tolist()] |
| results.append(["".join(i) for i in answers[:, :3]]) |
| return results |
|
|
| |
| iface = gr.Interface( |
| fn=predict, |
| inputs=gr.inputs.Textbox(placeholder="Paste multiple-choice questions (prompt and options separated by tabs, one question per line) ..."), |
| outputs=gr.outputs.Label(num_top_classes=3), |
| live=True, |
| title="LLM Science Exam Demo", |
| description="Enter multiple-choice questions (prompt and options) below and get predictions.", |
| ) |
|
|
| |
| iface.launch() |
| iface.integrate(wandb=wandb) |