Spaces:

sajith-0701
/

SentinelAI

Sleeping

App Files Files Community

SentinelAI / app /pipeline /workflow.py

sajith-0701

initial deployment for HF Spaces

71c1ad2 12 days ago

raw

history blame contribute delete

11 kB

	# app/pipeline/workflow.py
	# LangGraph state machine — orchestrates the full moderation pipeline

	from __future__ import annotations
	from typing import Any, TypedDict, Literal
	from dataclasses import asdict

	from langgraph.graph import StateGraph, END

	from app.pipeline.preprocessor import (
	Preprocessor,
	ProcessedText,
	ProcessedImage,
	ProcessedVideo,
	)
	from app.pipeline.fast_filter import FastFilter, FilterResult
	from app.pipeline.risk_scorer import RiskScorer, RiskScore
	from app.pipeline.deep_analyzer import DeepAnalyzer, DeepAnalysisResult
	from app.pipeline.decision_engine import DecisionEngine, Decision
	from app.services.mongo_service import mongo_service
	from app.services.redis_service import redis_service
	from app.observability.logging import get_logger

	logger = get_logger(__name__)


	# ──────────────────────────────────────────────
	# Pipeline State Schema
	# ──────────────────────────────────────────────

	class PipelineState(TypedDict, total=False):
	"""State that flows through the LangGraph pipeline."""
	# Input
	input_type: str # "text", "image", "video"
	raw_content: Any # str for text, bytes for image/video
	user_id: str \| None

	# Preprocessed
	processed_text: ProcessedText \| None
	processed_image: ProcessedImage \| None
	processed_video: ProcessedVideo \| None

	# Pipeline stages
	filter_result: FilterResult \| None
	filter_results: list[FilterResult] # For video (multiple frames)
	risk_score: RiskScore \| None
	deep_result: DeepAnalysisResult \| None
	decision: Decision \| None

	# Context
	user_history: dict \| None

	# Metadata
	error: str \| None


	# ──────────────────────────────────────────────
	# Pipeline Node Functions
	# ──────────────────────────────────────────────

	preprocessor = Preprocessor()
	fast_filter = FastFilter()
	risk_scorer = RiskScorer()
	deep_analyzer = DeepAnalyzer()
	decision_engine = DecisionEngine()


	async def preprocess_node(state: PipelineState) -> dict:
	"""Node 1: Preprocess the raw input."""
	input_type = state["input_type"]
	raw = state["raw_content"]

	try:
	if input_type == "text":
	processed = preprocessor.process_text(raw)
	return {"processed_text": processed}

	elif input_type == "image":
	processed = preprocessor.process_image(raw)
	return {"processed_image": processed}

	elif input_type == "video":
	processed = preprocessor.process_video(raw)
	return {"processed_video": processed}

	else:
	return {"error": f"Unknown input type: {input_type}"}

	except Exception as e:
	logger.error("preprocess_failed", error=str(e))
	return {"error": f"Preprocessing failed: {str(e)}"}


	async def fetch_user_history_node(state: PipelineState) -> dict:
	"""Node 1b: Fetch user moderation history (parallel with preprocess)."""
	user_id = state.get("user_id")
	if not user_id:
	return {"user_history": None}

	# Try Redis cache first
	cached = await redis_service.get_user_history(user_id)
	if cached:
	return {"user_history": cached}

	# Fall back to MongoDB
	history = await mongo_service.get_user_history(user_id)
	if history:
	await redis_service.cache_user_history(user_id, history)
	return {"user_history": history}


	async def fast_filter_node(state: PipelineState) -> dict:
	"""Node 2: Run fast AI filter."""
	input_type = state["input_type"]

	try:
	if input_type == "text" and state.get("processed_text"):
	result = fast_filter.filter_text(state["processed_text"])
	return {"filter_result": result}

	elif input_type == "image" and state.get("processed_image"):
	result = fast_filter.filter_image(state["processed_image"])
	return {"filter_result": result}

	elif input_type == "video" and state.get("processed_video"):
	# Analyze each frame, take the worst result
	video = state["processed_video"]
	frame_results = []
	for frame in video.frames:
	result = fast_filter.filter_image(frame)
	frame_results.append(result)

	# Use the highest-risk frame as the representative result
	if frame_results:
	worst = max(frame_results, key=lambda r: r.max_score)
	return {
	"filter_result": worst,
	"filter_results": frame_results,
	}
	else:
	return {
	"filter_result": FilterResult(
	input_type="video",
	is_flagged=False,
	max_score=0.0,
	)
	}

	return {"error": "No processed content available for filtering"}

	except Exception as e:
	logger.error("fast_filter_failed", error=str(e))
	return {"error": f"Fast filter failed: {str(e)}"}


	async def risk_score_node(state: PipelineState) -> dict:
	"""Node 3: Compute composite risk score."""
	filter_result = state.get("filter_result")
	if not filter_result:
	return {"error": "No filter result to score"}

	try:
	user_history = state.get("user_history")
	score = risk_scorer.score(filter_result, user_history)
	return {"risk_score": score}
	except Exception as e:
	logger.error("risk_score_failed", error=str(e))
	return {"error": f"Risk scoring failed: {str(e)}"}


	def route_by_risk(state: PipelineState) -> str:
	"""
	Conditional router: decides whether to do deep analysis or skip to decision.

	- LOW / MEDIUM → skip directly to decision
	- HIGH → go to deep analysis
	"""
	risk = state.get("risk_score")
	if risk and risk.level == "HIGH":
	return "deep_analysis"
	return "decide"


	async def deep_analysis_node(state: PipelineState) -> dict:
	"""Node 4 (conditional): Deep analysis with CLIP + Gemini."""
	input_type = state["input_type"]
	filter_result = state.get("filter_result")

	try:
	if input_type == "text" and state.get("processed_text"):
	result = await deep_analyzer.analyze_text(
	state["processed_text"].cleaned,
	filter_result,
	)
	return {"deep_result": result}

	elif input_type in ("image", "video") and state.get("processed_image"):
	result = await deep_analyzer.analyze_image(
	state["processed_image"].image,
	filter_result,
	)
	return {"deep_result": result}

	elif input_type == "video" and state.get("processed_video"):
	# Use the worst frame for deep analysis
	video = state["processed_video"]
	if video.frames:
	# Find the worst frame based on filter_results
	worst_frame = video.frames[0]
	filter_results = state.get("filter_results", [])
	if filter_results:
	worst_idx = max(
	range(len(filter_results)),
	key=lambda i: filter_results[i].max_score,
	)
	if worst_idx < len(video.frames):
	worst_frame = video.frames[worst_idx]

	result = await deep_analyzer.analyze_image(
	worst_frame.image,
	filter_result,
	)
	return {"deep_result": result}

	return {"deep_result": None}

	except Exception as e:
	logger.error("deep_analysis_failed", error=str(e))
	return {"deep_result": None}


	async def decision_node(state: PipelineState) -> dict:
	"""Node 5: Final decision."""
	risk = state.get("risk_score")
	if not risk:
	# Emergency fallback
	return {
	"decision": Decision(
	action="WARNING",
	reason="Pipeline error: no risk score available",
	severity="medium",
	)
	}

	try:
	deep_result = state.get("deep_result")
	user_history = state.get("user_history")
	decision = decision_engine.decide(risk, deep_result, user_history)
	return {"decision": decision}
	except Exception as e:
	logger.error("decision_failed", error=str(e))
	return {
	"decision": Decision(
	action="WARNING",
	reason=f"Decision engine error: {str(e)}",
	severity="medium",
	)
	}


	# ──────────────────────────────────────────────
	# Build the LangGraph Workflow
	# ──────────────────────────────────────────────

	def build_moderation_workflow():
	"""
	Construct and compile the LangGraph moderation pipeline.

	Flow:
	preprocess → fast_filter → risk_score
	├─ LOW/MEDIUM → decide
	└─ HIGH → deep_analysis → decide

	Returns:
	Compiled LangGraph workflow.
	"""
	graph = StateGraph(PipelineState)

	# Add nodes
	graph.add_node("preprocess", preprocess_node)
	graph.add_node("fetch_history", fetch_user_history_node)
	graph.add_node("fast_filter", fast_filter_node)
	graph.add_node("risk_score", risk_score_node)
	graph.add_node("deep_analysis", deep_analysis_node)
	graph.add_node("decide", decision_node)

	# Define edges
	graph.set_entry_point("preprocess")

	# After preprocess, run fast filter
	graph.add_edge("preprocess", "fast_filter")

	# After fast filter, compute risk score
	graph.add_edge("fast_filter", "risk_score")

	# Conditional routing based on risk level
	graph.add_conditional_edges(
	"risk_score",
	route_by_risk,
	{
	"deep_analysis": "deep_analysis",
	"decide": "decide",
	},
	)

	# Deep analysis flows to decision
	graph.add_edge("deep_analysis", "decide")

	# Decision is the terminal node
	graph.add_edge("decide", END)

	# Compile
	workflow = graph.compile()
	logger.info("moderation_workflow_compiled")
	return workflow


	# Global compiled workflow (initialized at startup)
	moderation_workflow = None


	def get_workflow():
	"""Get or create the compiled moderation workflow."""
	global moderation_workflow
	if moderation_workflow is None:
	moderation_workflow = build_moderation_workflow()
	return moderation_workflow