anitalp commited on
Commit
727e755
·
verified ·
1 Parent(s): ad479ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -21
app.py CHANGED
@@ -1,34 +1,35 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
 
4
- # 1. Specific task names for Helsinki-NLP models
5
- # We use 'translation_es_to_en' instead of just 'translation'
6
- translator_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-es-en")
 
 
 
 
 
 
7
  toxicity_pipe = pipeline("text-classification", model="SkolkovoInstitute/roberta_toxicity_classifier")
8
 
9
  def spanish_toxicity_check(text):
10
- # Step 1: Translate Spanish to English
11
- # The output of this pipeline is a list of dicts: [{'translation_text': '...'}]
12
- translation_result = translator_pipe(text)
13
- english_text = translation_result[0]['translation_text']
14
-
15
- # Step 2: Run Toxicity Analysis on the English text
16
- # The output is a list of dicts: [{'label': 'toxic', 'score': 0.98}]
17
- toxicity_results = toxicity_pipe(english_text)
18
 
19
- # Step 3: Format for gr.Label (needs a dict like {'label': score})
20
- # This turns the list into a clean dictionary for the UI
21
- label_output = {item['label']: item['score'] for item in toxicity_results}
22
 
23
- return label_output
 
24
 
25
- # 3. Create the Interface
26
  demo = gr.Interface(
27
  fn=spanish_toxicity_check,
28
- inputs=gr.Textbox(label="Escribe la letra de la canción (Spanish)", lines=3),
29
- outputs=gr.Label(label="Análisis de Toxicidad"),
30
- title="Spanish Songs Toxicity Classification",
31
- description="Detecta toxicidad traduciendo de Español a Inglés automáticamente."
32
  )
33
 
34
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
3
 
4
+ # 1. Load Translation Model & Tokenizer Manually
5
+ model_name = "Helsinki-NLP/opus-mt-es-en"
6
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
8
+
9
+ # 2. Create the translation pipeline with the explicit model/tokenizer
10
+ translator_pipe = pipeline("translation", model=model, tokenizer=tokenizer)
11
+
12
+ # 3. Toxicity pipeline (this one usually has no issues with the generic task)
13
  toxicity_pipe = pipeline("text-classification", model="SkolkovoInstitute/roberta_toxicity_classifier")
14
 
15
  def spanish_toxicity_check(text):
16
+ # Step 1: Translate
17
+ # We specify max_length to ensure it doesn't cut off long lyrics
18
+ translation = translator_pipe(text, max_length=512)[0]['translation_text']
 
 
 
 
 
19
 
20
+ # Step 2: Classify
21
+ results = toxicity_pipe(translation)
 
22
 
23
+ # Step 3: Format output for gr.Label
24
+ return {item['label']: item['score'] for item in results}
25
 
26
+ # 4. Interface
27
  demo = gr.Interface(
28
  fn=spanish_toxicity_check,
29
+ inputs=gr.Textbox(label="Lyrics en Español", placeholder="Escribe aquí..."),
30
+ outputs=gr.Label(label="Nivel de Toxicidad"),
31
+ title="Análisis de Toxicidad de Canciones",
32
+ description="Traducción automática (Helsinki-NLP) + Clasificación (RoBERTa)"
33
  )
34
 
35
  if __name__ == "__main__":