Spaces:

anitalp
/

NLP_Models_sequence

Runtime error

Update app.py

727e755 verified about 2 months ago

1.39 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

	# 1. Load Translation Model & Tokenizer Manually
	model_name = "Helsinki-NLP/opus-mt-es-en"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

	# 2. Create the translation pipeline with the explicit model/tokenizer
	translator_pipe = pipeline("translation", model=model, tokenizer=tokenizer)

	# 3. Toxicity pipeline (this one usually has no issues with the generic task)
	toxicity_pipe = pipeline("text-classification", model="SkolkovoInstitute/roberta_toxicity_classifier")

	def spanish_toxicity_check(text):
	# Step 1: Translate
	# We specify max_length to ensure it doesn't cut off long lyrics
	translation = translator_pipe(text, max_length=512)[0]['translation_text']

	# Step 2: Classify
	results = toxicity_pipe(translation)

	# Step 3: Format output for gr.Label
	return {item['label']: item['score'] for item in results}

	# 4. Interface
	demo = gr.Interface(
	fn=spanish_toxicity_check,
	inputs=gr.Textbox(label="Lyrics en Español", placeholder="Escribe aquí..."),
	outputs=gr.Label(label="Nivel de Toxicidad"),
	title="Análisis de Toxicidad de Canciones",
	description="Traducción automática (Helsinki-NLP) + Clasificación (RoBERTa)"
	)

	if __name__ == "__main__":
	demo.launch()