import gradio as gr import pandas as pd import numpy as np from sklearn.ensemble import GradientBoostingClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import roc_auc_score from datetime import datetime from utils import generate_features, pick_top15 # In-memory history (max 12 runs) prediction_history = [] def train_and_predict(file_obj): # --- Load dataset --- df = pd.read_csv(file_obj.name, header=0) df = df.iloc[:, :8] # draw_no, draw_date, n1..n6 df.columns = ["draw_no", "draw_date", "n1", "n2", "n3", "n4", "n5", "n6"] # Convert numbers to int for col in ["n1", "n2", "n3", "n4", "n5", "n6"]: df[col] = pd.to_numeric(df[col], errors="coerce") debug_log = [] debug_log.append(f"āœ… Loaded {len(df)} draws") debug_log.append(f"First draw: {df['draw_date'].iloc[0]}, Last draw: {df['draw_date'].iloc[-1]}") # --- Features --- features, labels = generate_features(df) debug_log.append(f"Features: {len(features)} rows | Label distribution: {np.bincount(labels)}") top15, top6 = None, None auc = None used_fallback = False # --- ML model --- if len(np.unique(labels)) >= 2: try: X_train, X_test, y_train, y_test = train_test_split( features, labels, test_size=0.2, random_state=42, stratify=labels ) model = GradientBoostingClassifier(n_estimators=200, max_depth=3, random_state=42) model.fit(X_train, y_train) if len(np.unique(y_test)) > 1: auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]) else: auc = 0.5 # Score all numbers 1–50 all_numbers = pd.DataFrame({"number": range(1, 51)}) all_features, _ = generate_features(df, candidate_numbers=all_numbers["number"].tolist()) probs = model.predict_proba(all_features)[:, 1] all_features["prob"] = probs scored = all_features.groupby("number")["prob"].mean().reset_index() scored.columns = ["number", "score"] # Predictions top15 = scored.sort_values("score", ascending=False).head(15)["number"].tolist() top6 = scored.sort_values("score", ascending=False).head(6)["number"].tolist() debug_log.append(f"šŸŽÆ ML Top 15: {top15}") debug_log.append(f"šŸŽÆ ML Top 6: {top6}") debug_log.append(f"Model AUC: {auc:.3f}") except Exception as e: debug_log.append(f"āš ļø ML failed: {str(e)}") used_fallback = True else: debug_log.append("āš ļø Only one class found — fallback mode") used_fallback = True # --- Fallback --- if used_fallback or top15 is None or top6 is None: nums = df[["n1", "n2", "n3", "n4", "n5", "n6"]].values.flatten() freq = pd.Series(nums).value_counts().reset_index() freq.columns = ["number", "count"] top15 = sorted(freq.head(15)["number"].tolist()) top6 = sorted(freq.head(6)["number"].tolist()) debug_log.append(f"šŸ‘‰ Fallback Top 15: {top15}") debug_log.append(f"šŸ‘‰ Fallback Top 6: {top6}") # --- Evaluation: compare Top 6 with last draw --- last_draw = set(df.iloc[-1][["n1", "n2", "n3", "n4", "n5", "n6"]].tolist()) hits = len(last_draw.intersection(top6)) debug_log.append(f"āœ… Evaluation: Top 6 matched {hits}/6 with last draw {sorted(last_draw)}") # --- Record history --- today = datetime.now().strftime("%Y-%m-%d") prediction_history.append({"date": today, "top15": top15, "top6": top6, "hits": hits}) if len(prediction_history) > 12: prediction_history.pop(0) # Format history history_lines = [] for row in prediction_history: history_lines.append( f"{row['date']} | Top 15: {row['top15']} | Top 6: {row['top6']} | Hits: {row['hits']}/6" ) log_output = "\n".join(debug_log) + "\n\nšŸ“Š Prediction History (last 12 runs):\n" + "\n".join(history_lines) return log_output demo = gr.Interface( fn=train_and_predict, inputs=gr.File(file_types=[".txt", ".csv"], label="Upload Toto650.txt"), outputs=gr.Textbox(label="Training, Predictions & History", lines=25), title="Sure Win - Star Toto 6/50 Predictor", description="Upload Toto650.txt after each draw. Predicts both Top 15 (System 15) and Top 6 (single ticket). Auto-evaluates Top 6 against latest draw. Keeps history of last 12 runs." ) if __name__ == "__main__": demo.launch()