Spaces:

anitalp
/

NLP_Models_sequence

Runtime error

anitalp commited on Feb 23

Commit

727e755

verified ·

1 Parent(s): ad479ae

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,34 +1,35 @@
 import gradio as gr
-from transformers import pipeline
-# 1. Specific task names for Helsinki-NLP models
-# We use 'translation_es_to_en' instead of just 'translation'
-translator_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-es-en")
 toxicity_pipe = pipeline("text-classification", model="SkolkovoInstitute/roberta_toxicity_classifier")
 def spanish_toxicity_check(text):
-    # Step 1: Translate Spanish to English
-    # The output of this pipeline is a list of dicts: [{'translation_text': '...'}]
-    translation_result = translator_pipe(text)
-    english_text = translation_result[0]['translation_text']
-    # Step 2: Run Toxicity Analysis on the English text
-    # The output is a list of dicts: [{'label': 'toxic', 'score': 0.98}]
-    toxicity_results = toxicity_pipe(english_text)
-    # Step 3: Format for gr.Label (needs a dict like {'label': score})
-    # This turns the list into a clean dictionary for the UI
-    label_output = {item['label']: item['score'] for item in toxicity_results}
-    return label_output
-# 3. Create the Interface
 demo = gr.Interface(
     fn=spanish_toxicity_check,
-    inputs=gr.Textbox(label="Escribe la letra de la canción (Spanish)", lines=3),
-    outputs=gr.Label(label="Análisis de Toxicidad"),
-    title="Spanish Songs Toxicity Classification",
-    description="Detecta toxicidad traduciendo de Español a Inglés automáticamente."
 )
 if __name__ == "__main__":

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+# 1. Load Translation Model & Tokenizer Manually
+model_name = "Helsinki-NLP/opus-mt-es-en"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+# 2. Create the translation pipeline with the explicit model/tokenizer
+translator_pipe = pipeline("translation", model=model, tokenizer=tokenizer)
+# 3. Toxicity pipeline (this one usually has no issues with the generic task)
 toxicity_pipe = pipeline("text-classification", model="SkolkovoInstitute/roberta_toxicity_classifier")
 def spanish_toxicity_check(text):
+    # Step 1: Translate
+    # We specify max_length to ensure it doesn't cut off long lyrics
+    translation = translator_pipe(text, max_length=512)[0]['translation_text']
+    # Step 2: Classify
+    results = toxicity_pipe(translation)
+    # Step 3: Format output for gr.Label
+    return {item['label']: item['score'] for item in results}
+# 4. Interface
 demo = gr.Interface(
     fn=spanish_toxicity_check,
+    inputs=gr.Textbox(label="Lyrics en Español", placeholder="Escribe aquí..."),
+    outputs=gr.Label(label="Nivel de Toxicidad"),
+    title="Análisis de Toxicidad de Canciones",
+    description="Traducción automática (Helsinki-NLP) + Clasificación (RoBERTa)"
 )
 if __name__ == "__main__":