| import pke |
| from sense2vec import Sense2Vec |
| import time |
| import gradio as gr |
| from transformers import AutoTokenizer |
| import os |
| from pathlib import Path |
| from FastT5 import get_onnx_runtime_sessions, OnnxT5 |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
|
|
| s2v = Sense2Vec().from_disk("s2v_old") |
|
|
| trained_model_path = './t5_squad_v1/' |
|
|
| pretrained_model_name = Path(trained_model_path).stem |
|
|
| encoder_path = os.path.join( |
| trained_model_path, f"{pretrained_model_name}-encoder_quantized.onnx") |
| decoder_path = os.path.join( |
| trained_model_path, f"{pretrained_model_name}-decoder_quantized.onnx") |
| init_decoder_path = os.path.join( |
| trained_model_path, f"{pretrained_model_name}-init-decoder_quantized.onnx") |
|
|
| model_paths = encoder_path, decoder_path, init_decoder_path |
| model_sessions = get_onnx_runtime_sessions(model_paths) |
| model = OnnxT5(trained_model_path, model_sessions) |
|
|
| tokenizer = AutoTokenizer.from_pretrained(trained_model_path) |
|
|
|
|
| def get_question(sentence, answer, mdl, tknizer): |
| text = f"context: {sentence} answer: {answer}" |
| print(text) |
| max_len = 256 |
| encoding = tknizer.encode_plus( |
| text, max_length=max_len, pad_to_max_length=False, truncation=True, return_tensors="pt") |
| input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"] |
| outs = mdl.generate(input_ids=input_ids, |
| attention_mask=attention_mask, |
| early_stopping=True, |
| num_beams=5, |
| num_return_sequences=1, |
| no_repeat_ngram_size=2, |
| max_length=300) |
|
|
| dec = [tknizer.decode(ids, skip_special_tokens=True) for ids in outs] |
|
|
| Question = dec[0].replace("question:", "") |
| Question = Question.strip() |
| return Question |
|
|
|
|
| def generate_question(context, answer): |
| start_time = time.time() # Record the start time |
| result = get_question(context, answer, model, tokenizer) |
| end_time = time.time() # Record the end time |
| latency = end_time - start_time # Calculate latency |
| print(f"Latency: {latency} seconds") |
| return result |
|
|
|
|
| def generate_mcq(context): |
| extractor = pke.unsupervised.TopicRank() |
| extractor.load_document(input=context, language='en') |
| extractor.candidate_selection(pos={"NOUN", "PROPN", "ADJ"}) |
| extractor.candidate_weighting() |
| keyphrases = extractor.get_n_best(n=10) |
|
|
| results = [] |
|
|
| for keyword, _ in keyphrases: |
| original_keyword = keyword |
| keyword = original_keyword.lower().replace(" ", "_") |
| sense = s2v.get_best_sense(keyword) |
|
|
| if sense is not None: |
| most_similar = s2v.most_similar(sense, n=2) |
| distractors = [word.split("|")[0].lower().replace( |
| "_", " ") for word, _ in most_similar] |
|
|
| question = generate_question(context, original_keyword) |
|
|
| result = { |
| "Question": question, |
| "Keyword": original_keyword, |
| "Distractor1": distractors[0], |
| "Distractor2": distractors[1] |
| } |
|
|
| results.append(result) |
|
|
| return results |
|
|
|
|
| iface = gr.Interface( |
| fn=generate_mcq, |
| inputs=gr.Textbox(label="Context", type='text'), |
| outputs=gr.JSON(value=list), |
| title="Questgen AI", |
| description="Enter a context to generate MCQs for keywords." |
| ) |
|
|
| iface.launch() |
|
|