| |
|
|
| |
|
|
| import torch |
| from transformers import MBartForConditionalGeneration, NllbTokenizer |
| import argparse |
|
|
| |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
| |
| print(f"Loading models on {DEVICE.upper()}...") |
| models = { |
| "nepali": MBartForConditionalGeneration.from_pretrained("models/nllb-finetuned-nepali-en").to(DEVICE) |
| } |
| tokenizers = { |
| "nepali": NllbTokenizer.from_pretrained("models/nllb-finetuned-nepali-en") |
| } |
| print("All models loaded successfully!") |
|
|
| def translate_text(text_to_translate: str, source_language: str) -> str: |
| """ |
| Translates a single string of text to English using our fine-tuned models. |
| """ |
| model = models[source_language] |
| tokenizer = tokenizers[source_language] |
|
|
| tokenizer.src_lang = "nep_Npan" |
|
|
| inputs = tokenizer(text_to_translate, return_tensors="pt").to(DEVICE) |
|
|
| generated_tokens = model.generate( |
| **inputs, |
| forced_bos_token_id=tokenizer.convert_tokens_to_ids("eng_Latn"), |
| max_length=128 |
| ) |
|
|
| translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] |
| return translation |
|
|
| |
| if __name__ == "__main__": |
| parser = argparse.ArgumentParser(description="Translate text using a fine-tuned model.") |
| parser.add_argument("--text", type=str, required=True, help="Text to translate.") |
| parser.add_argument("--lang", type=str, required=True, choices=["nepali"], help="Source language: 'nepali'.") |
| args = parser.parse_args() |
|
|
| translated_sentence = translate_text(args.text, args.lang) |
| |
| print(f"\nOriginal ({args.lang}): {args.text}") |
| print(f"Translated (en): {translated_sentence}") |
|
|