| import gc |
| import gradio as gr |
| import spaces |
| import torch |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
| current_model_id = None |
| tokenizer = None |
| model = None |
|
|
| LANGUAGES = [ |
| "Arabic", "Bengali", "Burmese", "Cantonese", "Chinese", "Czech", |
| "Dutch", "English", "Filipino", "French", "German", "Gujarati", |
| "Hebrew", "Hindi", "Indonesian", "Italian", "Japanese", "Kazakh", |
| "Khmer", "Korean", "Malay", "Marathi", "Mongolian", "Persian", |
| "Polish", "Portuguese", "Russian", "Spanish", "Tamil", "Telugu", |
| "Thai", "Tibetan", "Traditional Chinese", "Turkish", "Ukrainian", |
| "Urdu", "Uyghur", "Vietnamese" |
| ] |
|
|
| MODELS = [ |
| "tencent/Hy-MT2-30B-A3B", |
| "tencent/Hy-MT2-7B", |
| "tencent/Hy-MT2-1.8B" |
| ] |
|
|
| def load_model(model_id): |
| global current_model_id, tokenizer, model |
| |
| if current_model_id == model_id: |
| return |
| |
| print(f"Switching model from {current_model_id} to {model_id}...") |
| |
| if model is not None: |
| del model |
| del tokenizer |
| gc.collect() |
| if torch.cuda.is_available(): |
| torch.cuda.empty_cache() |
| |
| print(f"Loading tokenizer for {model_id}...") |
| tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) |
| |
| print(f"Loading model {model_id}...") |
| model = AutoModelForCausalLM.from_pretrained( |
| model_id, |
| torch_dtype=torch.bfloat16, |
| device_map="auto", |
| trust_remote_code=True, |
| ) |
| model.eval() |
| current_model_id = model_id |
| print("Model loaded successfully.") |
|
|
| @spaces.GPU |
| def translate(source_text, target_lang, selected_model): |
| global current_model_id, tokenizer, model |
| |
| if not source_text.strip(): |
| return "" |
| |
| try: |
| load_model(selected_model) |
| |
| prompt = f"Translate the following text into {target_lang}. Note that you should only output the translated result without any additional explanation:\n\n{source_text}" |
| messages = [{"role": "user", "content": prompt}] |
| |
| inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device) |
| |
| if "30B" in selected_model: |
| gen_kwargs = { |
| "temperature": 0.7, |
| "top_p": 1.0, |
| "repetition_penalty": 1.0, |
| } |
| else: |
| gen_kwargs = { |
| "temperature": 0.7, |
| "top_p": 0.6, |
| "top_k": 20, |
| "repetition_penalty": 1.05, |
| } |
| |
| with torch.no_grad(): |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=4096, |
| **gen_kwargs |
| ) |
| |
| response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True) |
| return response |
| |
| except Exception as e: |
| return f"Error during generation: {str(e)}\n\n(Note: Zero GPU environments may timeout or run out of memory when loading large models dynamically.)" |
|
|
| with gr.Blocks(title="Hy-MT2 Translator") as demo: |
| gr.Markdown("# Hy-MT2 Translator") |
| gr.Markdown("https://huggingface.co/collections/tencent/hy-mt2") |
| |
| with gr.Row(): |
| with gr.Column(): |
| source_text = gr.Textbox(label="Source Text", lines=8, placeholder="Enter text to translate...") |
| |
| with gr.Row(): |
| target_lang = gr.Dropdown(choices=LANGUAGES, value="English", label="Target Language") |
| model_selector = gr.Dropdown(choices=MODELS, value="tencent/Hy-MT2-1.8B", label="Model") |
| |
| translate_btn = gr.Button("Translate", variant="primary") |
| |
| with gr.Column(): |
| output_text = gr.Textbox(label="Translated Text", lines=12, interactive=False) |
| |
| translate_btn.click( |
| fn=translate, |
| inputs=[source_text, target_lang, model_selector], |
| outputs=output_text |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|