"""
Fluency Benchmark App — Streamlit Interface

Upload an audio file → full pipeline → fluency report.

Run: streamlit run app.py
"""

import sys
import tempfile
from pathlib import Path

import streamlit as st
import numpy as np

# Add app root to path
sys.path.insert(0, str(Path(__file__).parent))

st.set_page_config(page_title="Fluency Benchmark", layout="wide")
st.title("Fluency Benchmark Pipeline")
st.markdown("Upload an English speech audio file to measure temporal fluency — flow, continuity, and pausing behavior.")

# ── Sidebar ──
st.sidebar.header("About")
st.sidebar.markdown("""
**What this measures:**
- Speech continuity (how much time is speech vs silence)
- Pause behavior (frequency, duration, placement)
- Articulation smoothness (legato vs staccato)
- Word-level precision (confidence, filled pauses)

**What this does NOT measure:**
- Grammar correctness
- Vocabulary richness
- Pronunciation accuracy
""")

# ── Input ──
input_method = st.radio("Choose input method", ["Upload File", "Record from Mic"], horizontal=True)

audio_path = None
if input_method == "Upload File":
    uploaded_file = st.file_uploader("Upload audio file", type=["wav", "mp3", "m4a", "ogg", "flac"])
    if uploaded_file is not None:
        suffix = Path(uploaded_file.name).suffix
        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
            tmp.write(uploaded_file.read())
            audio_path = tmp.name
        st.audio(uploaded_file, format=f"audio/{suffix.strip('.')}")
else:
    mic_audio = st.audio_input("Record audio")
    if mic_audio is not None:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
            tmp.write(mic_audio.read())
            audio_path = tmp.name
        st.audio(mic_audio, format="audio/wav")

if audio_path is not None:

    if st.button("Analyze Fluency", type="primary"):
        # ── Step 1: VAD ──
        with st.spinner("Step 1/6: Running Voice Activity Detection..."):
            from pipeline.vad import run_vad
            vad_features = run_vad(audio_path)

        # ── Step 2: Transcription ──
        with st.spinner("Step 2/6: Transcribing and aligning words..."):
            from pipeline.transcribe import transcribe_and_align
            tx = transcribe_and_align(audio_path)
            words = tx["words"]
            transcript = tx["transcript"]

            # Update MLU with actual word count
            word_count = len(words)
            segments = vad_features["speech_segments"]
            vad_features["mlu"] = round(word_count / max(segments, 1), 2)

        # ── Step 3: Placement ──
        with st.spinner("Step 3/6: Classifying pause placement..."):
            from pipeline.placement import classify_pauses
            placement = classify_pauses(words, vad_features)

        # ── Step 4: FA Features ──
        with st.spinner("Step 4/6: Computing word-level features..."):
            from pipeline.fa_features import compute_fa_features
            fa = compute_fa_features(words, vad_features["total_duration_sec"])

        # ── Step 5: Syntactic Features ──
        with st.spinner("Step 5/6: Analyzing syntactic pause context..."):
            from pipeline.syntactic_features import compute_syntactic_features
            syn = compute_syntactic_features(words, transcript)

        # Combine all features
        all_features = {**vad_features, **placement, **fa, **syn, "word_count": word_count}

        # ── Step 6: Model Inference + Composite ──
        with st.spinner("Step 6/6: Running models and scoring..."):
            from models.inference import predict
            from pipeline.composite import compute_composite

            predictions = predict(all_features)
            composite = compute_composite(all_features, predictions)

        # ════════════════════════════════════════════════════════════
        # RESULTS DISPLAY
        # ════════════════════════════════════════════════════════════

        st.divider()

        # ── Headline Score ──
        band = composite["fluency_band"]
        pct = composite["composite_percentile"]
        band_colors = {"LOW": "red", "MEDIUM": "orange", "HIGH": "green"}

        col1, col2, col3 = st.columns(3)
        with col1:
            st.metric("Fluency Percentile", f"{pct:.1f}%")
        with col2:
            st.metric("Fluency Band", band)
        with col3:
            ci = f"[{composite['composite_ci_low']:.2f}, {composite['composite_ci_high']:.2f}]"
            st.metric("95% Confidence Interval", ci)

        st.divider()

        # ── Transcript ──
        st.subheader("Transcript")
        st.text_area("", transcript, height=100, disabled=True)

        # ── Two columns: Temporal + Pauses ──
        col_left, col_right = st.columns(2)

        with col_left:
            st.subheader("Temporal Profile")
            st.metric("Speech Ratio", f"{vad_features['speech_ratio']:.1%}")
            st.metric("Mean Length of Utterance", f"{vad_features['mlu']:.1f} words")
            st.metric("Word Count", str(word_count))
            st.metric("Duration", f"{vad_features['total_duration_sec']:.1f}s")

        with col_right:
            st.subheader("Pause Behavior")
            freq_label = predictions.get("pause_freq_ordinal_label", "?")
            dur_label = predictions.get("pause_dur_ordinal_label", "?")
            place_label = predictions.get("pause_place_ordinal_label", "?")
            st.metric("Pause Frequency", f"{freq_label} ({vad_features['pause_frequency_per_sec']:.2f}/s)")
            st.metric("Pause Duration", f"{dur_label} (avg {vad_features['mean_pause_duration_sec']:.2f}s)")
            st.metric("Pause Placement", place_label)
            st.metric("Long Pause Ratio", f"{vad_features['long_pause_ratio']:.0%}")

        st.divider()

        # ── Diagnosis ──
        col_a, col_b = st.columns(2)

        with col_a:
            st.subheader("Hesitation Diagnosis")
            cl = predictions.get("cognitive_load_ordinal_label", "?")
            uc = predictions.get("utterance_constraints_ordinal_label", "?")
            st.metric("Cognitive Load", cl)
            st.metric("Utterance Constraints", uc)

            # Dominance
            st.markdown("**Pause Type Breakdown**")
            pu = predictions.get("prop_unplanned_pred", 0)
            pp = predictions.get("prop_planned_pred", 0)
            pn = predictions.get("prop_neutral_pred", 0)
            st.progress(pu, text=f"Unplanned (hesitation): {pu:.0%}")
            st.progress(pp, text=f"Planned (breathing): {pp:.0%}")
            st.progress(pn, text=f"Neutral (filler): {pn:.0%}")

        with col_b:
            st.subheader("Articulation & Word Quality")
            art_label = predictions.get("articulation_ordinal_label", "?")
            st.metric("Articulation", art_label)
            st.metric("Word Confidence", f"{fa['fa_mean_word_confidence']:.1%}")
            st.metric("Filled Pauses", str(fa["fa_filled_pause_count"]))
            st.metric("Speech Rate CV", f"{fa['fa_speech_rate_cv']:.3f}")

        st.divider()

        # ── 6 Dimensions ──
        st.subheader("6 Fluency Dimensions")

        dim_labels = {
            "dim_continuity": ("Continuity", "How much and how long the speaker talks without pausing"),
            "dim_pause_quality": ("Pause Quality", "Average pause duration and long pause proportion"),
            "dim_placement": ("Placement", "Where pauses fall relative to sentence structure"),
            "dim_articulation": ("Articulation", "Smoothness of delivery (legato vs staccato)"),
            "dim_dominance": ("Dominance", "Balance of unplanned vs planned pauses"),
            "dim_word_precision": ("Word Precision", "Word-level confidence and speech rate consistency"),
        }

        cols = st.columns(3)
        for i, (dim_key, (label, desc)) in enumerate(dim_labels.items()):
            with cols[i % 3]:
                val = composite[dim_key]
                # Color code: positive = green, negative = red
                delta_color = "normal" if val >= 0 else "inverse"
                st.metric(label, f"{val:.2f}", help=desc)

        st.divider()

        # ── Placement Details ──
        st.subheader("Pause Placement Details")
        pcol1, pcol2 = st.columns(2)
        with pcol1:
            st.metric("Boundary-aligned pauses", f"{placement['boundary_pause_ratio']:.0%}")
            st.metric("Mid-clause pauses", f"{placement['mid_clause_pause_ratio']:.0%}")
        with pcol2:
            st.metric("Content-word preceding", f"{syn['syn_content_word_preceding_ratio']:.0%}")
            st.metric("Function-word preceding", f"{syn['syn_function_word_preceding_ratio']:.0%}")

        # ── Raw Data Expander ──
        with st.expander("All Raw Features"):
            import pandas as pd
            all_data = {**all_features, **predictions, **composite}
            feature_df = pd.DataFrame([all_data]).T
            feature_df.columns = ["Value"]
            feature_df["Value"] = feature_df["Value"].astype(str)
            st.dataframe(feature_df, use_container_width=True)

else:
    st.info("Upload a .wav, .mp3, or .m4a audio file, or record from your microphone to begin analysis.")