File size: 4,592 Bytes
efee2c3
 
 
 
 
 
cb5e5ad
efee2c3
cb5e5ad
 
51f33c7
cb5e5ad
efee2c3
 
 
6f8b57c
cb5e5ad
6f8b57c
cb5e5ad
 
6f8b57c
cb5e5ad
 
efee2c3
790c045
6f8b57c
cb5e5ad
efee2c3
51f33c7
790c045
6f8b57c
790c045
51f33c7
790c045
 
 
51f33c7
790c045
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f8b57c
790c045
6f8b57c
 
 
 
db6b1ff
51f33c7
 
 
cb5e5ad
51f33c7
790c045
 
 
cb5e5ad
790c045
efee2c3
6f8b57c
790c045
 
51f33c7
 
790c045
 
 
 
51f33c7
cb5e5ad
51f33c7
cb5e5ad
db6b1ff
 
 
 
 
51f33c7
6f8b57c
db6b1ff
cb5e5ad
 
efee2c3
51f33c7
6f8b57c
 
db6b1ff
 
 
efee2c3
6f8b57c
cb5e5ad
6f8b57c
efee2c3
 
 
 
 
51f33c7
99a337f
db6b1ff
efee2c3
 
 
 
 
98fa86f
a0c73f0
efe4f7e
790c045
cb5e5ad
6f8b57c
51f33c7
db6b1ff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from datetime import datetime

from utils import generate_features, pick_top15

# In-memory history (max 12 runs)
prediction_history = []


def train_and_predict(file_obj):
    # --- Load dataset ---
    df = pd.read_csv(file_obj.name, header=0)
    df = df.iloc[:, :8]  # draw_no, draw_date, n1..n6
    df.columns = ["draw_no", "draw_date", "n1", "n2", "n3", "n4", "n5", "n6"]

    # Convert numbers to int
    for col in ["n1", "n2", "n3", "n4", "n5", "n6"]:
        df[col] = pd.to_numeric(df[col], errors="coerce")

    debug_log = []
    debug_log.append(f"✅ Loaded {len(df)} draws")
    debug_log.append(f"First draw: {df['draw_date'].iloc[0]}, Last draw: {df['draw_date'].iloc[-1]}")

    # --- Features ---
    features, labels = generate_features(df)
    debug_log.append(f"Features: {len(features)} rows | Label distribution: {np.bincount(labels)}")

    top15, top6 = None, None
    auc = None
    used_fallback = False

    # --- ML model ---
    if len(np.unique(labels)) >= 2:
        try:
            X_train, X_test, y_train, y_test = train_test_split(
                features, labels, test_size=0.2, random_state=42, stratify=labels
            )

            model = GradientBoostingClassifier(n_estimators=200, max_depth=3, random_state=42)
            model.fit(X_train, y_train)

            if len(np.unique(y_test)) > 1:
                auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
            else:
                auc = 0.5

            # Score all numbers 1–50
            all_numbers = pd.DataFrame({"number": range(1, 51)})
            all_features, _ = generate_features(df, candidate_numbers=all_numbers["number"].tolist())
            probs = model.predict_proba(all_features)[:, 1]

            all_features["prob"] = probs
            scored = all_features.groupby("number")["prob"].mean().reset_index()
            scored.columns = ["number", "score"]

            # Predictions
            top15 = scored.sort_values("score", ascending=False).head(15)["number"].tolist()
            top6 = scored.sort_values("score", ascending=False).head(6)["number"].tolist()

            debug_log.append(f"🎯 ML Top 15: {top15}")
            debug_log.append(f"🎯 ML Top 6: {top6}")
            debug_log.append(f"Model AUC: {auc:.3f}")

        except Exception as e:
            debug_log.append(f"⚠️ ML failed: {str(e)}")
            used_fallback = True
    else:
        debug_log.append("⚠️ Only one class found — fallback mode")
        used_fallback = True

    # --- Fallback ---
    if used_fallback or top15 is None or top6 is None:
        nums = df[["n1", "n2", "n3", "n4", "n5", "n6"]].values.flatten()
        freq = pd.Series(nums).value_counts().reset_index()
        freq.columns = ["number", "count"]
        top15 = sorted(freq.head(15)["number"].tolist())
        top6 = sorted(freq.head(6)["number"].tolist())
        debug_log.append(f"👉 Fallback Top 15: {top15}")
        debug_log.append(f"👉 Fallback Top 6: {top6}")

    # --- Evaluation: compare Top 6 with last draw ---
    last_draw = set(df.iloc[-1][["n1", "n2", "n3", "n4", "n5", "n6"]].tolist())
    hits = len(last_draw.intersection(top6))
    debug_log.append(f"✅ Evaluation: Top 6 matched {hits}/6 with last draw {sorted(last_draw)}")

    # --- Record history ---
    today = datetime.now().strftime("%Y-%m-%d")
    prediction_history.append({"date": today, "top15": top15, "top6": top6, "hits": hits})
    if len(prediction_history) > 12:
        prediction_history.pop(0)

    # Format history
    history_lines = []
    for row in prediction_history:
        history_lines.append(
            f"{row['date']} | Top 15: {row['top15']} | Top 6: {row['top6']} | Hits: {row['hits']}/6"
        )

    log_output = "\n".join(debug_log) + "\n\n📊 Prediction History (last 12 runs):\n" + "\n".join(history_lines)

    return log_output


demo = gr.Interface(
    fn=train_and_predict,
    inputs=gr.File(file_types=[".txt", ".csv"], label="Upload Toto650.txt"),
    outputs=gr.Textbox(label="Training, Predictions & History", lines=25),
    title="Sure Win - Star Toto 6/50 Predictor",
    description="Upload Toto650.txt after each draw. Predicts both Top 15 (System 15) and Top 6 (single ticket). Auto-evaluates Top 6 against latest draw. Keeps history of last 12 runs."
)

if __name__ == "__main__":
    demo.launch()