| |
|
|
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
| import torch |
|
|
| |
| |
| model_name = "facebook/nllb-200-distilled-600M" |
|
|
| |
| |
| print(f"Loading model: {model_name}") |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) |
| print("Model loaded successfully!") |
|
|
| |
| sentences_to_translate = { |
| "nep_Npan": "नेपालको राजधानी काठमाडौं हो।", |
| "sin_Sinh": "ශ්රී ලංකාවේ අගනුවර කොළඹ වේ." |
| } |
|
|
| print("\n--- Starting Translation ---") |
|
|
| |
| for lang_code, text in sentences_to_translate.items(): |
| |
| |
| |
| tokenizer.src_lang = lang_code |
| |
| |
| inputs = tokenizer(text, return_tensors="pt") |
|
|
| |
| |
| translated_tokens = model.generate( |
| **inputs, |
| forced_bos_token_id=tokenizer.lang_code_to_id["eng_Latn"], |
| max_length=50 |
| ) |
|
|
| |
| |
| translation = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0] |
|
|
| |
| print(f"\nOriginal ({lang_code}): {text}") |
| print(f"Translation (eng_Latn): {translation}") |
|
|
| print("\n--- Translation Complete ---") |