|
|
| from __future__ import annotations
|
| from typing import Dict, List, Any
|
| from langgraph.graph import StateGraph, END
|
| from langchain_openai import ChatOpenAI
|
| from langchain_core.prompts import ChatPromptTemplate
|
| from dataclasses import dataclass
|
| import json
|
| import time
|
|
|
|
|
| @dataclass
|
| class NarratorInput:
|
| dialogues_srt: str
|
| frame_descriptions: List[Dict[str, Any]]
|
| une_guidelines_path: str
|
| max_cycles: int = 3
|
|
|
|
|
| @dataclass
|
| class NarratorOutput:
|
| narrative_text: str
|
| srt_text: str
|
| critic_feedback: str | None = None
|
| approved: bool = False
|
|
|
|
|
| class NarrationSystem:
|
| """
|
| LangGraph-based multi-agent system:
|
| - NarratorNode: generates narration + SRT according to UNE-153010
|
| - CriticNode: evaluates conformity with UNE and coherence
|
| - IdentityManagerNode: adjusts character identification if needed
|
| - BackgroundDescriptorNode: fixes background/scene coherence
|
| """
|
|
|
| def __init__(self, model_url: str, une_guidelines_path: str):
|
| self.model_url = model_url
|
| self.une_guidelines_path = une_guidelines_path
|
|
|
|
|
| self.narrator_llm = ChatOpenAI(base_url=model_url, model="gpt-4o-mini", temperature=0.6)
|
| self.critic_llm = ChatOpenAI(base_url=model_url, model="gpt-4o-mini", temperature=0.3)
|
| self.identity_llm = ChatOpenAI(base_url=model_url, model="gpt-4o-mini", temperature=0.4)
|
| self.background_llm = ChatOpenAI(base_url=model_url, model="gpt-4o-mini", temperature=0.4)
|
|
|
| with open(une_guidelines_path, "r", encoding="utf-8") as f:
|
| self.une_rules = f.read()
|
|
|
|
|
| self.graph = self.build_graph()
|
|
|
|
|
|
|
|
|
|
|
| def narrator_node(self, state):
|
| dialogues = state["dialogues_srt"]
|
| frames = state["frame_descriptions"]
|
|
|
| prompt = ChatPromptTemplate.from_template("""
|
| Eres un narrador de audiodescripciones según la norma UNE-153010.
|
| Combina coherentemente los diálogos del siguiente SRT con las descripciones de escena dadas.
|
|
|
| Sigue estas pautas:
|
| - Genera una narración libre que integre ambos tipos de información.
|
| - Evita redundancias o descripciones triviales.
|
| - Limita la duración de las audiodescripciones para que quepan entre los diálogos.
|
| - Devuelve **dos bloques**:
|
| 1️⃣ `NARRATION_TEXT`: narración libre completa en texto continuo.
|
| 2️⃣ `UNE_SRT`: subtítulos con los diálogos y las audiodescripciones UNE.
|
|
|
| ## DIÁLOGOS SRT
|
| {dialogues}
|
|
|
| ## DESCRIPCIONES DE FRAMES
|
| {frames}
|
| """)
|
|
|
| response = self.narrator_llm.invoke(prompt.format(dialogues=dialogues, frames=json.dumps(frames, ensure_ascii=False)))
|
| return {"narration": response.content, "critic_feedback": None, "approved": False}
|
|
|
| def critic_node(self, state):
|
| narration = state["narration"]
|
| prompt = ChatPromptTemplate.from_template("""
|
| Actúa como un revisor experto en audiodescripción conforme a la norma UNE-153010.
|
| Evalúa el siguiente texto y SRT generados, detectando:
|
| - Incoherencias en asignación de personajes.
|
| - Errores en la identificación de escenarios.
|
| - Desviaciones respecto a la norma UNE-153010.
|
| - Incoherencias narrativas generales.
|
|
|
| Devuelve:
|
| - "APPROVED" si el resultado es conforme.
|
| - En caso contrario, una lista JSON con observaciones clasificadas en:
|
| - "characters"
|
| - "scenes"
|
| - "norma"
|
| - "coherence"
|
|
|
| ## NORMA UNE-153010
|
| {une_rules}
|
|
|
| ## TEXTO Y SRT A EVALUAR
|
| {narration}
|
| """)
|
|
|
| response = self.critic_llm.invoke(prompt.format(une_rules=self.une_rules, narration=narration))
|
| text = response.content.strip()
|
|
|
| if "APPROVED" in text.upper():
|
| return {"critic_feedback": None, "approved": True}
|
| return {"critic_feedback": text, "approved": False}
|
|
|
| def identity_node(self, state):
|
| fb = state.get("critic_feedback", "")
|
| narration = state["narration"]
|
| prompt = ChatPromptTemplate.from_template("""
|
| El siguiente feedback señala incoherencias en personajes o diálogos.
|
| Corrige únicamente esos aspectos manteniendo el resto igual.
|
|
|
| ## FEEDBACK
|
| {fb}
|
|
|
| ## TEXTO ORIGINAL
|
| {narration}
|
| """)
|
| response = self.identity_llm.invoke(prompt.format(fb=fb, narration=narration))
|
| return {"narration": response.content}
|
|
|
| def background_node(self, state):
|
| fb = state.get("critic_feedback", "")
|
| narration = state["narration"]
|
| prompt = ChatPromptTemplate.from_template("""
|
| El siguiente feedback señala incoherencias en escenarios o contexto visual.
|
| Ajusta las descripciones de fondo manteniendo el estilo y duración UNE.
|
|
|
| ## FEEDBACK
|
| {fb}
|
|
|
| ## TEXTO ORIGINAL
|
| {narration}
|
| """)
|
| response = self.background_llm.invoke(prompt.format(fb=fb, narration=narration))
|
| return {"narration": response.content}
|
|
|
|
|
|
|
|
|
|
|
| def build_graph(self):
|
| g = StateGraph()
|
| g.add_node("NarratorNode", self.narrator_node)
|
| g.add_node("CriticNode", self.critic_node)
|
| g.add_node("IdentityManagerNode", self.identity_node)
|
| g.add_node("BackgroundDescriptorNode", self.background_node)
|
|
|
| g.set_entry_point("NarratorNode")
|
| g.add_edge("NarratorNode", "CriticNode")
|
| g.add_conditional_edges(
|
| "CriticNode",
|
| lambda state: "done" if state.get("approved") else "retry",
|
| {
|
| "done": END,
|
| "retry": "IdentityManagerNode",
|
| },
|
| )
|
| g.add_edge("IdentityManagerNode", "BackgroundDescriptorNode")
|
| g.add_edge("BackgroundDescriptorNode", "CriticNode")
|
|
|
| return g.compile()
|
|
|
|
|
|
|
|
|
|
|
| def run(self, dialogues_srt: str, frame_descriptions: List[Dict[str, Any]], max_cycles: int = 3) -> NarratorOutput:
|
| state = {"dialogues_srt": dialogues_srt, "frame_descriptions": frame_descriptions}
|
| result = self.graph.invoke(state)
|
| return NarratorOutput(
|
| narrative_text=result.get("narration", ""),
|
| srt_text=result.get("narration", ""),
|
| critic_feedback=result.get("critic_feedback"),
|
| approved=result.get("approved", False),
|
| )
|
|
|