| import torch |
| from transformers import GPT2LMHeadModel, GPT2TokenizerFast |
| import gradio as gr |
|
|
| |
| |
| |
| MODEL_NAME = "gpt2" |
| WEIGHTS_PATH = "gpt2_spoc.pt" |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
| |
| |
| |
| tokenizer = GPT2TokenizerFast.from_pretrained(MODEL_NAME) |
| tokenizer.add_special_tokens({ |
| "additional_special_tokens": ["<|pc|>", "<|code|>", "<|end|>"] |
| }) |
|
|
| model = GPT2LMHeadModel.from_pretrained(MODEL_NAME) |
| model.resize_token_embeddings(len(tokenizer)) |
| model.load_state_dict(torch.load(WEIGHTS_PATH, map_location=DEVICE)) |
| model.to(DEVICE) |
| model.eval() |
|
|
| |
| |
| |
| def generate_code(pseudo, max_new_tokens=200): |
| if not pseudo.strip(): |
| return "⚠️ Please enter some pseudo-code." |
| |
| prompt = f"<|pc|>\n{pseudo.strip()}\n<|code|>\n" |
| inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE) |
|
|
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=max_new_tokens, |
| temperature=0.7, |
| top_p=0.9, |
| do_sample=True, |
| pad_token_id=tokenizer.eos_token_id |
| ) |
|
|
| text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| return text.split("<|code|>")[-1].split("<|end|>")[0].strip() |
|
|
| |
| |
| |
| demo = gr.Interface( |
| fn=generate_code, |
| inputs=gr.Textbox(lines=10, label="🧠 Enter Pseudo-Code"), |
| outputs=gr.Code(label="💻 Generated Code", language="python"), |
| title="Pseudo-Code → Code Generator (GPT-2 SPOC)", |
| description="Fine-tuned GPT-2 model that converts pseudo-code into working Python code.", |
| examples=[ |
| ["Read integer n\nRead n integers into a list\nPrint the sum of the list"], |
| ["Input two numbers a and b\nIf a > b, print a else print b"], |
| ["Read a string s\nReverse it and print"] |
| ], |
| theme="gradio/soft", |
| ) |
|
|
| |
| |
| |
| if __name__ == "__main__": |
| demo.launch() |