sure-win / app.py
Benny-Tang's picture
Update app.py
99a337f verified
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from datetime import datetime
from utils import generate_features, pick_top15
# In-memory history (max 12 runs)
prediction_history = []
def train_and_predict(file_obj):
# --- Load dataset ---
df = pd.read_csv(file_obj.name, header=0)
df = df.iloc[:, :8] # draw_no, draw_date, n1..n6
df.columns = ["draw_no", "draw_date", "n1", "n2", "n3", "n4", "n5", "n6"]
# Convert numbers to int
for col in ["n1", "n2", "n3", "n4", "n5", "n6"]:
df[col] = pd.to_numeric(df[col], errors="coerce")
debug_log = []
debug_log.append(f"✅ Loaded {len(df)} draws")
debug_log.append(f"First draw: {df['draw_date'].iloc[0]}, Last draw: {df['draw_date'].iloc[-1]}")
# --- Features ---
features, labels = generate_features(df)
debug_log.append(f"Features: {len(features)} rows | Label distribution: {np.bincount(labels)}")
top15, top6 = None, None
auc = None
used_fallback = False
# --- ML model ---
if len(np.unique(labels)) >= 2:
try:
X_train, X_test, y_train, y_test = train_test_split(
features, labels, test_size=0.2, random_state=42, stratify=labels
)
model = GradientBoostingClassifier(n_estimators=200, max_depth=3, random_state=42)
model.fit(X_train, y_train)
if len(np.unique(y_test)) > 1:
auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
else:
auc = 0.5
# Score all numbers 1–50
all_numbers = pd.DataFrame({"number": range(1, 51)})
all_features, _ = generate_features(df, candidate_numbers=all_numbers["number"].tolist())
probs = model.predict_proba(all_features)[:, 1]
all_features["prob"] = probs
scored = all_features.groupby("number")["prob"].mean().reset_index()
scored.columns = ["number", "score"]
# Predictions
top15 = scored.sort_values("score", ascending=False).head(15)["number"].tolist()
top6 = scored.sort_values("score", ascending=False).head(6)["number"].tolist()
debug_log.append(f"🎯 ML Top 15: {top15}")
debug_log.append(f"🎯 ML Top 6: {top6}")
debug_log.append(f"Model AUC: {auc:.3f}")
except Exception as e:
debug_log.append(f"⚠️ ML failed: {str(e)}")
used_fallback = True
else:
debug_log.append("⚠️ Only one class found — fallback mode")
used_fallback = True
# --- Fallback ---
if used_fallback or top15 is None or top6 is None:
nums = df[["n1", "n2", "n3", "n4", "n5", "n6"]].values.flatten()
freq = pd.Series(nums).value_counts().reset_index()
freq.columns = ["number", "count"]
top15 = sorted(freq.head(15)["number"].tolist())
top6 = sorted(freq.head(6)["number"].tolist())
debug_log.append(f"👉 Fallback Top 15: {top15}")
debug_log.append(f"👉 Fallback Top 6: {top6}")
# --- Evaluation: compare Top 6 with last draw ---
last_draw = set(df.iloc[-1][["n1", "n2", "n3", "n4", "n5", "n6"]].tolist())
hits = len(last_draw.intersection(top6))
debug_log.append(f"✅ Evaluation: Top 6 matched {hits}/6 with last draw {sorted(last_draw)}")
# --- Record history ---
today = datetime.now().strftime("%Y-%m-%d")
prediction_history.append({"date": today, "top15": top15, "top6": top6, "hits": hits})
if len(prediction_history) > 12:
prediction_history.pop(0)
# Format history
history_lines = []
for row in prediction_history:
history_lines.append(
f"{row['date']} | Top 15: {row['top15']} | Top 6: {row['top6']} | Hits: {row['hits']}/6"
)
log_output = "\n".join(debug_log) + "\n\n📊 Prediction History (last 12 runs):\n" + "\n".join(history_lines)
return log_output
demo = gr.Interface(
fn=train_and_predict,
inputs=gr.File(file_types=[".txt", ".csv"], label="Upload Toto650.txt"),
outputs=gr.Textbox(label="Training, Predictions & History", lines=25),
title="Sure Win - Star Toto 6/50 Predictor",
description="Upload Toto650.txt after each draw. Predicts both Top 15 (System 15) and Top 6 (single ticket). Auto-evaluates Top 6 against latest draw. Keeps history of last 12 runs."
)
if __name__ == "__main__":
demo.launch()