anitalp's picture
Update app.py
727e755 verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
# 1. Load Translation Model & Tokenizer Manually
model_name = "Helsinki-NLP/opus-mt-es-en"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
# 2. Create the translation pipeline with the explicit model/tokenizer
translator_pipe = pipeline("translation", model=model, tokenizer=tokenizer)
# 3. Toxicity pipeline (this one usually has no issues with the generic task)
toxicity_pipe = pipeline("text-classification", model="SkolkovoInstitute/roberta_toxicity_classifier")
def spanish_toxicity_check(text):
# Step 1: Translate
# We specify max_length to ensure it doesn't cut off long lyrics
translation = translator_pipe(text, max_length=512)[0]['translation_text']
# Step 2: Classify
results = toxicity_pipe(translation)
# Step 3: Format output for gr.Label
return {item['label']: item['score'] for item in results}
# 4. Interface
demo = gr.Interface(
fn=spanish_toxicity_check,
inputs=gr.Textbox(label="Lyrics en Español", placeholder="Escribe aquí..."),
outputs=gr.Label(label="Nivel de Toxicidad"),
title="Análisis de Toxicidad de Canciones",
description="Traducción automática (Helsinki-NLP) + Clasificación (RoBERTa)"
)
if __name__ == "__main__":
demo.launch()