import gradio as gr
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from datetime import datetime

from utils import generate_features, pick_top15

# In-memory history (max 12 runs)
prediction_history = []


def train_and_predict(file_obj):
    # --- Load dataset ---
    df = pd.read_csv(file_obj.name, header=0)
    df = df.iloc[:, :8]  # draw_no, draw_date, n1..n6
    df.columns = ["draw_no", "draw_date", "n1", "n2", "n3", "n4", "n5", "n6"]

    # Convert numbers to int
    for col in ["n1", "n2", "n3", "n4", "n5", "n6"]:
        df[col] = pd.to_numeric(df[col], errors="coerce")

    debug_log = []
    debug_log.append(f"✅ Loaded {len(df)} draws")
    debug_log.append(f"First draw: {df['draw_date'].iloc[0]}, Last draw: {df['draw_date'].iloc[-1]}")

    # --- Features ---
    features, labels = generate_features(df)
    debug_log.append(f"Features: {len(features)} rows | Label distribution: {np.bincount(labels)}")

    top15, top6 = None, None
    auc = None
    used_fallback = False

    # --- ML model ---
    if len(np.unique(labels)) >= 2:
        try:
            X_train, X_test, y_train, y_test = train_test_split(
                features, labels, test_size=0.2, random_state=42, stratify=labels
            )

            model = GradientBoostingClassifier(n_estimators=200, max_depth=3, random_state=42)
            model.fit(X_train, y_train)

            if len(np.unique(y_test)) > 1:
                auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
            else:
                auc = 0.5

            # Score all numbers 1–50
            all_numbers = pd.DataFrame({"number": range(1, 51)})
            all_features, _ = generate_features(df, candidate_numbers=all_numbers["number"].tolist())
            probs = model.predict_proba(all_features)[:, 1]

            all_features["prob"] = probs
            scored = all_features.groupby("number")["prob"].mean().reset_index()
            scored.columns = ["number", "score"]

            # Predictions
            top15 = scored.sort_values("score", ascending=False).head(15)["number"].tolist()
            top6 = scored.sort_values("score", ascending=False).head(6)["number"].tolist()

            debug_log.append(f"🎯 ML Top 15: {top15}")
            debug_log.append(f"🎯 ML Top 6: {top6}")
            debug_log.append(f"Model AUC: {auc:.3f}")

        except Exception as e:
            debug_log.append(f"⚠️ ML failed: {str(e)}")
            used_fallback = True
    else:
        debug_log.append("⚠️ Only one class found — fallback mode")
        used_fallback = True

    # --- Fallback ---
    if used_fallback or top15 is None or top6 is None:
        nums = df[["n1", "n2", "n3", "n4", "n5", "n6"]].values.flatten()
        freq = pd.Series(nums).value_counts().reset_index()
        freq.columns = ["number", "count"]
        top15 = sorted(freq.head(15)["number"].tolist())
        top6 = sorted(freq.head(6)["number"].tolist())
        debug_log.append(f"👉 Fallback Top 15: {top15}")
        debug_log.append(f"👉 Fallback Top 6: {top6}")

    # --- Evaluation: compare Top 6 with last draw ---
    last_draw = set(df.iloc[-1][["n1", "n2", "n3", "n4", "n5", "n6"]].tolist())
    hits = len(last_draw.intersection(top6))
    debug_log.append(f"✅ Evaluation: Top 6 matched {hits}/6 with last draw {sorted(last_draw)}")

    # --- Record history ---
    today = datetime.now().strftime("%Y-%m-%d")
    prediction_history.append({"date": today, "top15": top15, "top6": top6, "hits": hits})
    if len(prediction_history) > 12:
        prediction_history.pop(0)

    # Format history
    history_lines = []
    for row in prediction_history:
        history_lines.append(
            f"{row['date']} | Top 15: {row['top15']} | Top 6: {row['top6']} | Hits: {row['hits']}/6"
        )

    log_output = "\n".join(debug_log) + "\n\n📊 Prediction History (last 12 runs):\n" + "\n".join(history_lines)

    return log_output


demo = gr.Interface(
    fn=train_and_predict,
    inputs=gr.File(file_types=[".txt", ".csv"], label="Upload Toto650.txt"),
    outputs=gr.Textbox(label="Training, Predictions & History", lines=25),
    title="Sure Win - Star Toto 6/50 Predictor",
    description="Upload Toto650.txt after each draw. Predicts both Top 15 (System 15) and Top 6 (single ticket). Auto-evaluates Top 6 against latest draw. Keeps history of last 12 runs."
)

if __name__ == "__main__":
    demo.launch()