import asyncio

import json

import os

import re

import statistics

import time

import uuid

from dataclasses import dataclass

from typing import Any, Optional

import httpx

from dotenv import load_dotenv

from fastapi import FastAPI, HTTPException

from fastapi.middleware.cors import CORSMiddleware

from fastapi.responses import StreamingResponse

from pydantic import BaseModel, Field


load_dotenv()


app = FastAPI(title="CortexFlow Backend", version="1.0.0")

app.add_middleware(

    CORSMiddleware,

    allow_origins=["*"],

    allow_methods=["GET", "POST"],

    allow_headers=["*"],

)


GROQ_API_KEY = os.getenv("GROQ_API_KEY", "").strip()

GROQ_API_BASE = os.getenv("GROQ_API_BASE", "https://api.groq.com/openai/v1").rstrip("/")

GROQ_TIMEOUT_SECONDS = float(os.getenv("GROQ_TIMEOUT_SECONDS", "40"))

MODEL_DISCOVERY_TTL_SECONDS = int(os.getenv("MODEL_DISCOVERY_TTL_SECONDS", "900"))


PREFERRED_REASONING_MODELS = [

    m.strip()

    for m in os.getenv(

        "GROQ_REASONING_CANDIDATES",

        "openai/gpt-oss-120b,llama-3.3-70b-versatile,openai/gpt-oss-20b,llama-3.1-8b-instant",

    ).split(",")

    if m.strip()

]

PREFERRED_SAFETY_MODELS = [

    m.strip()

    for m in os.getenv(

        "GROQ_SAFETY_CANDIDATES",

        "openai/gpt-oss-safeguard-20b,openai/gpt-oss-20b,llama-3.1-8b-instant",

    ).split(",")

    if m.strip()

]


OVERRIDE_REASONING_MODEL = os.getenv("GROQ_REASONING_MODEL", "").strip()

OVERRIDE_SAFETY_MODEL = os.getenv("GROQ_SAFETY_MODEL", "").strip()


MIN_WORDS_REQUIRED = int(os.getenv("MIN_WORDS_REQUIRED", "3"))


STEP_NAMES = [

    "STT preprocessor",

    "Lexical agent",

    "Semantic agent",

    "Prosody agent",

    "Syntax agent",

    "Biomarker mapper",

    "Report composer",

]

DOMAIN_REGION = {

    "lexical": "Broca's area",

    "semantic": "Wernicke's area",

    "prosody": "SMA",

    "syntax": "DLPFC",

    "affective": "Amygdala",

}


STOPWORDS = {

    "the", "a", "an", "and", "or", "but", "if", "then", "than", "of", "to", "in", "on", "at", "for",

    "with", "without", "by", "from", "as", "is", "am", "are", "was", "were", "be", "been", "being",

    "it", "its", "this", "that", "these", "those", "i", "you", "he", "she", "we", "they", "them",

    "my", "your", "our", "their", "me", "him", "her", "us", "do", "does", "did", "have", "has", "had",

    "not", "no", "yes", "so", "because", "about", "into", "out", "up", "down", "can", "could", "would",

    "should", "will", "just", "very", "really", "also",

}


FILLERS = {

    "um", "uh", "erm", "hmm", "like", "you", "know", "actually", "basically", "literally", "sort", "kind", "maybe",

}


POSITIVE_WORDS = {

    "good", "better", "great", "calm", "confident", "clear", "focused", "stable", "happy", "optimistic", "safe", "steady",

}

NEGATIVE_WORDS = {

    "bad", "worse", "anxious", "scared", "panic", "panicked", "confused", "sad", "depressed", "angry", "overwhelmed", "stressed",

}

AROUSAL_WORDS = {

    "urgent", "immediately", "intense", "extreme", "critical", "afraid", "panic", "terrified", "racing", "shaking", "worried",

}

HEDGE_WORDS = {

    "maybe", "perhaps", "possibly", "probably", "sort", "kind", "might", "could", "guess", "unsure", "not sure",

}

SUBORDINATORS = {

    "because", "although", "though", "while", "unless", "until", "since", "whereas", "however", "therefore", "moreover", "which", "that",

}


class AnalyzeRequest(BaseModel):

    input_value: Optional[str] = None

    transcript: Optional[str] = None

    pause_map: Optional[list[float]] = None

    audio_duration: Optional[float] = None

    session_id: Optional[str] = None


@dataclass

class DomainScore:

    overall: float

    details: dict[str, float]


@dataclass

class AnalysisState:

    scores: dict[str, DomainScore]

    overall_load: float

    confidence: float

    quality_notes: list[str]

    metrics: dict[str, Any]


_MODEL_CACHE: dict[str, Any] = {"updated": 0.0, "models": []}

_MODEL_CACHE_LOCK = asyncio.Lock()


def clamp01(v: float) -> float:

    return max(0.0, min(1.0, v))


def mean(values: list[float], default: float = 0.0) -> float:

    return float(statistics.mean(values)) if values else default


def tokenize_words(text: str) -> list[str]:

    return re.findall(r"[A-Za-z']+", text.lower())


def split_sentences(text: str) -> list[str]:

    parts = [p.strip() for p in re.split(r"(?<=[.!?])\s+", text) if p.strip()]

    return parts if parts else ([text.strip()] if text.strip() else [])


def content_words(tokens: list[str]) -> list[str]:

    return [t for t in tokens if len(t) > 2 and t not in STOPWORDS]


def jaccard(a: set[str], b: set[str]) -> float:

    if not a or not b:

        return 0.0

    inter = len(a.intersection(b))

    union = len(a.union(b))

    return inter / union if union else 0.0


def scale_linear(value: float, low: float, high: float) -> float:

    if high <= low:

        return 0.0

    return clamp01((value - low) / (high - low))


def scale_inverse(value: float, good: float, poor: float) -> float:

    if poor >= good:

        return 0.0

    return clamp01((good - value) / (good - poor))


def safe_step_event(name: str, status: str, detail: Optional[str] = None) -> bytes:

    payload: dict[str, Any] = {"type": "step", "step": {"name": name, "status": status}}

    if detail:

        payload["step"]["detail"] = detail

    return (json.dumps(payload) + "\n").encode()


def ensure_nonempty_text(req: AnalyzeRequest) -> str:

    text = (req.input_value or req.transcript or "").strip()

    words = tokenize_words(text)

    if not text:

        raise HTTPException(status_code=400, detail="No input text provided")

    if len(words) < MIN_WORDS_REQUIRED:

        raise HTTPException(

            status_code=422,

            detail=f"Need at least {MIN_WORDS_REQUIRED} words for reliable analysis. Received {len(words)} words.",

        )

    return text


def lexical_domain(tokens: list[str], content: list[str]) -> tuple[DomainScore, dict[str, float]]:

    total = max(len(tokens), 1)

    unique = len(set(tokens))

    filler_hits = sum(1 for t in tokens if t in FILLERS)


    ttr = unique / total

    density = len(content) / total

    filler_rate = (filler_hits / total) * 100.0


    s_ttr = clamp01(abs(ttr - 0.52) / 0.30)

    s_density = clamp01(abs(density - 0.58) / 0.25)

    s_filler = scale_linear(filler_rate, 2.0, 14.0)


    overall = clamp01((0.4 * s_ttr) + (0.35 * s_density) + (0.25 * s_filler))


    details = {

        "ttr": round(s_ttr, 4),

        "density": round(s_density, 4),

        "filler_rate": round(s_filler, 4),

    }

    raw = {

        "ttr": round(ttr, 4),

        "lexical_density": round(density, 4),

        "filler_rate_per_100w": round(filler_rate, 2),

    }

    return DomainScore(round(overall, 4), details), raw


def semantic_domain(sentences: list[str]) -> tuple[DomainScore, dict[str, float]]:

    if len(sentences) < 2:

        coherence = 0.16

        idea_density = 0.45

        tangentiality = 0.55

    else:

        sentence_content = [set(content_words(tokenize_words(s))) for s in sentences]

        pairwise = [jaccard(sentence_content[i], sentence_content[i + 1]) for i in range(len(sentence_content) - 1)]

        coherence = mean(pairwise, default=0.12)

        avg_content_len = mean([len(x) for x in sentence_content], default=0.0)

        idea_density = clamp01(avg_content_len / 14.0)

        tangentiality = clamp01(1.0 - coherence)

    s_coherence = scale_inverse(coherence, good=0.22, poor=0.05)

    s_idea_density = scale_inverse(idea_density, good=0.65, poor=0.25)

    s_tangentiality = scale_linear(tangentiality, low=0.35, high=0.85)


    overall = clamp01((0.45 * s_coherence) + (0.30 * s_idea_density) + (0.25 * s_tangentiality))


    details = {

        "coherence": round(s_coherence, 4),

        "idea_density": round(s_idea_density, 4),

        "tangentiality": round(s_tangentiality, 4),

    }

    raw = {

        "coherence_index": round(coherence, 4),

        "idea_density_index": round(idea_density, 4),

        "tangentiality_index": round(tangentiality, 4),

    }

    return DomainScore(round(overall, 4), details), raw


def prosody_domain(

    tokens: list[str], text: str, pause_map: Optional[list[float]], audio_duration: Optional[float]

) -> tuple[DomainScore, dict[str, float], bool]:

    word_count = max(len(tokens), 1)

    pauses = [float(p) for p in (pause_map or []) if p >= 0]

    has_audio_prosody = bool(pauses)


    if audio_duration and audio_duration > 5.0:

        duration_seconds = audio_duration

    else:

        estimated_speech_seconds = word_count / 2.5

        duration_seconds = estimated_speech_seconds + sum(pauses)


    duration_minutes = max(duration_seconds / 60.0, 0.1)

    speech_rate = word_count / duration_minutes


    if pauses:

        pause_freq = len(pauses) / duration_minutes

        hesitation_ratio = sum(1 for p in pauses if p >= 0.8) / len(pauses)

    else:

        punctuation_pauses = len(re.findall(r"[,;:\-]", text))

        pause_freq = (punctuation_pauses / max(word_count, 1)) * 100

        hesitation_ratio = sum(1 for t in tokens if t in FILLERS) / max(word_count, 1)


    s_rate = clamp01(abs(speech_rate - 140.0) / 95.0)

    s_pause = scale_linear(pause_freq, low=8.0, high=30.0)

    s_hes = scale_linear(hesitation_ratio, low=0.08, high=0.35)


    overall = clamp01((0.4 * s_rate) + (0.35 * s_pause) + (0.25 * s_hes))


    details = {

        "speech_rate": round(s_rate, 4),

        "pause_freq": round(s_pause, 4),

        "hesitation": round(s_hes, 4),

    }

    raw = {

        "speech_rate_wpm": round(speech_rate, 1),

        "pause_frequency_per_min": round(pause_freq, 2),

        "hesitation_ratio": round(hesitation_ratio, 4),

        "duration_seconds": round(duration_seconds, 2),

    }

    return DomainScore(round(overall, 4), details), raw, has_audio_prosody


def syntax_domain(tokens: list[str], sentences: list[str], text: str) -> tuple[DomainScore, dict[str, float]]:

    sentence_count = max(len(sentences), 1)

    mlu = len(tokens) / sentence_count


    per_sentence_depth = []

    for s in sentences:

        stoks = tokenize_words(s)

        sub_count = sum(1 for t in stoks if t in SUBORDINATORS)

        comma_count = s.count(",")

        per_sentence_depth.append(sub_count + (comma_count * 0.5))

    clause_depth = mean(per_sentence_depth, default=0.0)


    passive_matches = re.findall(r"\b(?:is|are|was|were|be|been|being)\s+\w+(?:ed|en)\b", text.lower())

    passive_ratio = len(passive_matches) / max(sentence_count, 1)


    s_mlu = clamp01(abs(mlu - 17.0) / 12.0)

    s_depth = scale_linear(clause_depth, low=2.0, high=6.5)

    s_passive = scale_linear(passive_ratio, low=0.15, high=1.2)


    overall = clamp01((0.45 * s_mlu) + (0.35 * s_depth) + (0.20 * s_passive))


    details = {

        "mlu": round(s_mlu, 4),

        "clause_depth": round(s_depth, 4),

        "passive_ratio": round(s_passive, 4),

    }

    raw = {

        "mean_length_utterance": round(mlu, 2),

        "clause_depth_index": round(clause_depth, 2),

        "passive_ratio": round(passive_ratio, 3),

    }

    return DomainScore(round(overall, 4), details), raw


def affective_domain(tokens: list[str]) -> tuple[DomainScore, dict[str, float]]:

    total = max(len(tokens), 1)

    pos = sum(1 for t in tokens if t in POSITIVE_WORDS)

    neg = sum(1 for t in tokens if t in NEGATIVE_WORDS)

    arousal = sum(1 for t in tokens if t in AROUSAL_WORDS)

    hedge = sum(1 for t in tokens if t in HEDGE_WORDS)


    valence = (pos - neg) / (pos + neg + 1)

    valence_01 = (valence + 1.0) / 2.0

    arousal_rate = (arousal / total) * 100.0

    certainty = 1.0 - clamp01(hedge / max(total * 0.15, 1.0))


    s_valence = scale_inverse(valence_01, good=0.62, poor=0.20)

    s_arousal = scale_linear(arousal_rate, low=3.0, high=14.0)

    s_certainty = scale_inverse(certainty, good=0.72, poor=0.32)

    overall = clamp01((0.4 * s_valence) + (0.35 * s_arousal) + (0.25 * s_certainty))


    details = {

        "valence": round(s_valence, 4),

        "arousal": round(s_arousal, 4),

        "certainty": round(s_certainty, 4),

    }

    raw = {

        "valence_score": round(valence_01, 4),

        "arousal_rate_per_100w": round(arousal_rate, 2),

        "certainty_index": round(certainty, 4),

    }

    return DomainScore(round(overall, 4), details), raw


def compute_confidence(

    word_count: int, sentence_count: int, has_audio_prosody: bool, repeat_ratio: float

) -> tuple[float, list[str]]:

    notes: list[str] = []

    c_words = clamp01(word_count / 180.0)

    c_sents = clamp01(sentence_count / 8.0)

    c_repeat = clamp01(1.0 - (repeat_ratio * 1.4))

    c_audio = 1.0 if has_audio_prosody else 0.55


    confidence = clamp01((0.45 * c_words) + (0.2 * c_sents) + (0.2 * c_repeat) + (0.15 * c_audio))


    if word_count < 60:

        notes.append("Low sample length. Interpret results cautiously.")

    if not has_audio_prosody:

        notes.append("Prosody is inferred from text patterns because pause-map audio features were not provided.")

    if repeat_ratio > 0.45:

        notes.append("High repetition detected, which can reduce semantic reliability.")


    return round(confidence, 4), notes


def compute_analysis_state(

    text: str,

    pause_map: Optional[list[float]],

    audio_duration: Optional[float],

) -> AnalysisState:

    tokens = tokenize_words(text)

    sentences = split_sentences(text)

    cwords = content_words(tokens)


    repeat_ratio = 1.0 - (len(set(tokens)) / max(len(tokens), 1))


    lexical, lexical_raw = lexical_domain(tokens, cwords)

    semantic, semantic_raw = semantic_domain(sentences)

    prosody, prosody_raw, has_audio = prosody_domain(tokens, text, pause_map, audio_duration)

    syntax, syntax_raw = syntax_domain(tokens, sentences, text)

    affective, affective_raw = affective_domain(tokens)

    confidence, quality_notes = compute_confidence(

        word_count=len(tokens),

        sentence_count=len(sentences),

        has_audio_prosody=has_audio,

        repeat_ratio=repeat_ratio,

    )


    scores = {

        "lexical": lexical,

        "semantic": semantic,

        "prosody": prosody,

        "syntax": syntax,

        "affective": affective,

    }


    weighted = (

        (0.22 * lexical.overall)

        + (0.23 * semantic.overall)

        + (0.18 * prosody.overall)

        + (0.22 * syntax.overall)

        + (0.15 * affective.overall)

    )


    confidence_factor = 0.75 + (0.25 * confidence)

    overall_load = clamp01(weighted * confidence_factor)


    metrics = {

        "word_count": len(tokens),

        "sentence_count": len(sentences),

        "repeat_ratio": round(repeat_ratio, 4),

        "lexical": lexical_raw,

        "semantic": semantic_raw,

        "prosody": prosody_raw,

        "syntax": syntax_raw,

        "affective": affective_raw,

    }


    return AnalysisState(

        scores=scores,

        overall_load=round(overall_load, 4),

        confidence=confidence,

        quality_notes=quality_notes,

        metrics=metrics,

    )


def severity_from_score(value: float) -> str:

    if value >= 0.72:

        return "high"

    if value >= 0.42:

        return "moderate"

    return "low"


def level_from_overall(overall_load: float, confidence: float) -> str:

    if overall_load >= 0.68:

        base = "high"

    elif overall_load >= 0.44:

        base = "moderate"

    else:

        base = "low"


    if confidence < 0.45 and base == "high":

        return "moderate"

    return base


def summary_fallback(state: AnalysisState, risk_level: str) -> str:

    top_domain = max(state.scores.items(), key=lambda kv: kv[1].overall)[0]

    top_value = state.scores[top_domain].overall

    confidence_pct = round(state.confidence * 100)

    return (

        f"This analysis found a {risk_level} overall cognitive load signal based on linguistic and timing features. "

        f"The strongest deviation appeared in {top_domain} markers (score {top_value:.2f}). "

        f"Confidence is {confidence_pct}% and this output is screening support only, not a diagnosis."

    )


def make_highlights(state: AnalysisState) -> list[dict[str, Any]]:

    sorted_domains = sorted(state.scores.items(), key=lambda kv: kv[1].overall, reverse=True)

    highlights: list[dict[str, Any]] = []

    for domain, score in sorted_domains[:3]:

        if score.overall >= 0.66:

            finding = "Elevated deviation from expected baseline in this domain."

        elif score.overall >= 0.42:

            finding = "Mild-to-moderate deviation with mixed stability."

        else:

            finding = "Signals remain within expected variation for this domain."


        highlights.append(

            {

                "region": DOMAIN_REGION[domain],

                "activation": round(score.overall, 4),

                "finding": finding,

                "clinical_context": "Screening signal only. Interpret alongside clinical judgement and repeated assessments.",

            }

        )

    return highlights


def make_indicators(state: AnalysisState) -> list[dict[str, Any]]:

    indicators: list[dict[str, Any]] = []

    for domain, dscore in state.scores.items():

        for k, v in dscore.details.items():

            if v < 0.42:

                continue

            indicators.append(

                {

                    "indicator": f"{domain.title()} · {k.replace('_', ' ').title()}",

                    "severity": severity_from_score(v),

                    "explanation": f"Computed score {v:.2f} from measured input features; higher means greater deviation from baseline patterns.",

                }

            )

    indicators.sort(key=lambda x: {"high": 2, "moderate": 1, "low": 0}[x["severity"]], reverse=True)

    return indicators[:6]


def recommendation_for_level(level: str, confidence: float) -> str:

    if level == "high":

        return (

            "Repeat this assessment with a longer sample, then discuss the combined results with a qualified clinician. "

            "Do not treat this result as a diagnosis."

        )

    if level == "moderate":

        return (

            "Collect 1-2 additional samples across different times of day to confirm trend stability before drawing conclusions."

        )

    if confidence < 0.5:

        return "Provide a longer speech sample for stronger reliability before interpreting the result."

    return "Current signals are relatively stable. Continue periodic monitoring rather than one-off interpretation."


async def fetch_available_models() -> list[str]:

    if not GROQ_API_KEY:

        return []


    async with _MODEL_CACHE_LOCK:

        now = time.time()

        if now - float(_MODEL_CACHE["updated"]) < MODEL_DISCOVERY_TTL_SECONDS:

            return list(_MODEL_CACHE["models"])


        headers = {"Authorization": f"Bearer {GROQ_API_KEY}"}

        try:

            async with httpx.AsyncClient(timeout=GROQ_TIMEOUT_SECONDS) as client:

                res = await client.get(f"{GROQ_API_BASE}/models", headers=headers)

                res.raise_for_status()

                data = res.json().get("data", [])

                models = sorted({item.get("id", "") for item in data if item.get("id")})

                _MODEL_CACHE["updated"] = now

                _MODEL_CACHE["models"] = models

                return models

        except Exception:

            return list(_MODEL_CACHE["models"])


def pick_model(available: list[str], override: str, candidates: list[str]) -> Optional[str]:

    if override and override in available:

        return override


    for m in candidates:

        if m in available:

            return m

    for m in available:

        lowered = m.lower()

        if "instruct" in lowered or "versatile" in lowered or "gpt-oss" in lowered:

            return m


    return available[0] if available else None


async def groq_chat(model: str, system: str, user: str, temperature: float = 0.2) -> Optional[str]:

    if not GROQ_API_KEY or not model:

        return None


    headers = {

        "Authorization": f"Bearer {GROQ_API_KEY}",

        "Content-Type": "application/json",

    }

    payload = {

        "model": model,

        "temperature": temperature,

        "messages": [

            {"role": "system", "content": system},

            {"role": "user", "content": user},

        ],

    }


    try:

        async with httpx.AsyncClient(timeout=GROQ_TIMEOUT_SECONDS) as client:

            res = await client.post(f"{GROQ_API_BASE}/chat/completions", headers=headers, json=payload)

            res.raise_for_status()

            data = res.json()

            return data["choices"][0]["message"]["content"].strip()

    except Exception:

        return None


async def compose_safe_summary(state: AnalysisState, risk_level: str) -> tuple[str, dict[str, Optional[str]]]:

    available = await fetch_available_models()

    reasoning_model = pick_model(available, OVERRIDE_REASONING_MODEL, PREFERRED_REASONING_MODELS)

    safety_model = pick_model(available, OVERRIDE_SAFETY_MODEL, PREFERRED_SAFETY_MODELS)


    model_meta = {

        "reasoning_model": reasoning_model,

        "safety_model": safety_model,

    }


    baseline_summary = summary_fallback(state, risk_level)

    if not reasoning_model:

        return baseline_summary, model_meta


    features_for_prompt = {

        "risk_level": risk_level,

        "overall_cognitive_load": state.overall_load,

        "confidence": state.confidence,

        "scores": {k: v.overall for k, v in state.scores.items()},

        "quality_notes": state.quality_notes,

        "metrics": state.metrics,

    }

    system = (

        "You summarize computational language-screening outputs. "

        "Never diagnose disease, never use alarming wording, and always state uncertainty when confidence is limited. "

        "Output exactly 2-3 sentences in plain text."

    )

    user = "Write a careful summary for this analysis:\n" + json.dumps(features_for_prompt)


    summary = await groq_chat(reasoning_model, system, user, temperature=0.15)

    if not summary:

        return baseline_summary, model_meta


    if safety_model:

        safety_system = (

            "You are a safety editor for health-adjacent UX. "

            "Rewrite text to avoid panic, avoid diagnosis claims, and keep uncertainty explicit. "

            "Keep 2-3 sentences."

        )

        safety_user = (

            "Rewrite this summary to be non-alarmist and clinically careful while keeping factual content:\n"

            + summary

            + "\n\nConfidence: "

            + str(state.confidence)

        )

        safe = await groq_chat(safety_model, safety_system, safety_user, temperature=0.1)

        if safe:

            summary = safe


    return summary, model_meta


@app.get("/health")

async def health() -> dict[str, Any]:

    available = await fetch_available_models()

    return {

        "ok": True,

        "service": "cortexflow-backend",

        "groq_configured": bool(GROQ_API_KEY),

        "model_count": len(available),

    }


@app.get("/models/recommended")

async def models_recommended() -> dict[str, Any]:

    available = await fetch_available_models()

    return {

        "available_models": available,

        "recommended": {

            "reasoning": pick_model(available, OVERRIDE_REASONING_MODEL, PREFERRED_REASONING_MODELS),

            "safety": pick_model(available, OVERRIDE_SAFETY_MODEL, PREFERRED_SAFETY_MODELS),

            "transcription": "whisper-large-v3-turbo",

        },

        "notes": {

            "production_primary": "openai/gpt-oss-120b",

            "production_fallback": "llama-3.3-70b-versatile",

            "fast_fallback": "openai/gpt-oss-20b",

        },

    }


@app.post("/analyze")

async def analyze(req: AnalyzeRequest):

    text = ensure_nonempty_text(req)

    session_id = req.session_id or str(uuid.uuid4())


    async def generate():

        for idx, step_name in enumerate(STEP_NAMES):

            yield safe_step_event(step_name, "running" if idx == 0 else "pending")


        try:

            state = compute_analysis_state(text, req.pause_map, req.audio_duration)

            yield safe_step_event("STT preprocessor", "done", "Input normalized and validated")

            yield safe_step_event("Lexical agent", "running")


            await asyncio.sleep(0)

            yield safe_step_event("Lexical agent", "done")

            yield safe_step_event("Semantic agent", "running")


            await asyncio.sleep(0)

            yield safe_step_event("Semantic agent", "done")

            yield safe_step_event("Prosody agent", "running")


            await asyncio.sleep(0)

            yield safe_step_event("Prosody agent", "done")

            yield safe_step_event("Syntax agent", "running")


            await asyncio.sleep(0)

            yield safe_step_event("Syntax agent", "done")

            yield safe_step_event("Biomarker mapper", "running")


            scores_payload = {

                domain: {**score.details, "overall": score.overall}

                for domain, score in state.scores.items()

            }


            yield safe_step_event("Biomarker mapper", "done")

            yield safe_step_event("Report composer", "running")


            risk_level = level_from_overall(state.overall_load, state.confidence)

            summary, model_meta = await compose_safe_summary(state, risk_level)


            report = {

                "summary": summary,

                "risk_level": risk_level,

                "overall_cognitive_load": state.overall_load,

                "highlights": make_highlights(state),

                "risk_indicators": make_indicators(state),

                "recommendation": recommendation_for_level(risk_level, state.confidence),

                "disclaimer": (

                    "This tool is a non-diagnostic screening aid. It can be wrong and must not be used as a standalone "

                    "medical decision system. If you are concerned, consult a qualified clinician."

                ),

                "quality": {

                    "confidence": state.confidence,

                    "notes": state.quality_notes,

                },

                "model_info": model_meta,

            }

            yield safe_step_event("Report composer", "done")


            payload = {

                "type": "end",

                "message": summary,

                "scores": scores_payload,

                "report": report,

                "session_id": session_id,

            }

            yield (json.dumps(payload) + "\n").encode()


        except HTTPException as exc:

            yield (json.dumps({"type": "error", "message": exc.detail}) + "\n").encode()

        except Exception as exc:

            yield (json.dumps({"type": "error", "message": f"Analysis failed: {str(exc)}"}) + "\n").encode()


    return StreamingResponse(

        generate(),

        media_type="text/plain",

        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},

    )