Benny-Tang commited on
Commit
790c045
·
verified ·
1 Parent(s): efe4f7e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -37
app.py CHANGED
@@ -11,61 +11,81 @@ from utils import generate_features, pick_top15, generate_system15_csv
11
  def train_and_predict(file_obj):
12
  # Load dataset
13
  df = pd.read_csv(file_obj.name, header=None)
14
- # Columns: [draw_id?, draw_date, n1..n6, bonus, ...]
15
- df = df.iloc[:, :8]
16
  df.columns = ["draw_date", "n1", "n2", "n3", "n4", "n5", "n6", "bonus"]
17
 
18
- # Generate features
19
- features, labels = generate_features(df)
 
20
 
21
- # Make sure we have at least 2 classes
22
- if len(np.unique(labels)) < 2:
23
- return " Not enough class variety in labels to train the model.", None
24
-
25
- # Train/test split with fallback
26
- try:
27
- X_train, X_test, y_train, y_test = train_test_split(
28
- features, labels, test_size=0.2, random_state=42, stratify=labels
29
- )
30
- except ValueError:
31
- X_train, y_train = features, labels
32
- X_test, y_test = features, labels
33
-
34
- # Train model
35
- model = GradientBoostingClassifier(n_estimators=200, max_depth=3, random_state=42)
36
- model.fit(X_train, y_train)
37
-
38
- # Evaluate if possible
39
- if len(np.unique(y_test)) > 1:
40
- auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  else:
42
- auc = 0.5
43
-
44
- # Score all numbers 1–50
45
- all_numbers = pd.DataFrame({"number": range(1, 51)})
46
- all_features, _ = generate_features(df, candidate_numbers=all_numbers["number"].tolist())
47
- scores = model.predict_proba(all_features)[:, 1]
48
- all_numbers["score"] = scores
49
-
50
- # Pick top 15
51
- top15 = pick_top15(all_numbers)
 
 
52
 
53
  # Generate System 15 CSV (5005 combos)
54
  csv_path = "system15.csv"
55
  generate_system15_csv(top15, csv_path)
 
56
 
57
- return f"Model AUC: {auc:.3f}\nTop 15 Numbers: {top15}", csv_path
58
 
59
 
60
  demo = gr.Interface(
61
  fn=train_and_predict,
62
  inputs=gr.File(file_types=[".txt", ".csv"], label="Upload Toto650.txt"),
63
  outputs=[
64
- gr.Textbox(label="Prediction Summary"),
65
  gr.File(label="Download System15 CSV")
66
  ],
67
  title="Sure Win Club - Star Toto 6/50 Predictor",
68
- description="Upload the latest Toto650.txt dataset every Monday. The system will train a fresh model and output Top 15 numbers + System15 (5005 tickets)."
69
  )
70
 
71
  if __name__ == "__main__":
@@ -74,3 +94,4 @@ if __name__ == "__main__":
74
 
75
 
76
 
 
 
11
  def train_and_predict(file_obj):
12
  # Load dataset
13
  df = pd.read_csv(file_obj.name, header=None)
14
+ df = df.iloc[:, :8] # Keep only date + 6 main numbers + bonus
 
15
  df.columns = ["draw_date", "n1", "n2", "n3", "n4", "n5", "n6", "bonus"]
16
 
17
+ debug_log = []
18
+ debug_log.append(f"✅ Loaded dataset with {len(df)} draws")
19
+ debug_log.append(f"First draw date: {df['draw_date'].iloc[0]}, Last draw date: {df['draw_date'].iloc[-1]}")
20
 
21
+ # Generate features and labels
22
+ features, labels = generate_features(df)
23
+ debug_log.append(f"Generated {len(features)} feature rows, Labels distribution: {np.bincount(labels)}")
24
+
25
+ top15 = None
26
+ auc = None
27
+ used_fallback = False
28
+
29
+ # --- Try ML model ---
30
+ if len(np.unique(labels)) >= 2:
31
+ try:
32
+ X_train, X_test, y_train, y_test = train_test_split(
33
+ features, labels, test_size=0.2, random_state=42, stratify=labels
34
+ )
35
+ debug_log.append(f"Train size: {len(X_train)}, Test size: {len(X_test)}")
36
+
37
+ model = GradientBoostingClassifier(n_estimators=200, max_depth=3, random_state=42)
38
+ model.fit(X_train, y_train)
39
+
40
+ if len(np.unique(y_test)) > 1:
41
+ auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
42
+ else:
43
+ auc = 0.5
44
+
45
+ # Score all numbers 1–50
46
+ all_numbers = pd.DataFrame({"number": range(1, 51)})
47
+ all_features, _ = generate_features(df, candidate_numbers=all_numbers["number"].tolist())
48
+ scores = model.predict_proba(all_features)[:, 1]
49
+ all_numbers["score"] = scores
50
+
51
+ top15 = pick_top15(all_numbers)
52
+ debug_log.append(f"🎯 ML Top 15 Numbers: {top15}")
53
+ debug_log.append(f"Model AUC: {auc:.3f}")
54
+
55
+ except Exception as e:
56
+ debug_log.append(f"⚠️ ML training failed: {str(e)}")
57
+ used_fallback = True
58
  else:
59
+ debug_log.append("⚠️ Only one class found in labels — skipping ML")
60
+ used_fallback = True
61
+
62
+ # --- Fallback: Frequency-based ---
63
+ if used_fallback or top15 is None:
64
+ debug_log.append("👉 Using fallback: frequency-based Top 15")
65
+ # Count occurrences of each number in all draws
66
+ nums = df[["n1", "n2", "n3", "n4", "n5", "n6"]].values.flatten()
67
+ freq = pd.Series(nums).value_counts().reset_index()
68
+ freq.columns = ["number", "count"]
69
+ top15 = sorted(freq.head(15)["number"].tolist())
70
+ debug_log.append(f"🎯 Frequency Top 15 Numbers: {top15}")
71
 
72
  # Generate System 15 CSV (5005 combos)
73
  csv_path = "system15.csv"
74
  generate_system15_csv(top15, csv_path)
75
+ debug_log.append("📂 system15.csv generated with 5005 combinations")
76
 
77
+ return "\n".join(debug_log), csv_path
78
 
79
 
80
  demo = gr.Interface(
81
  fn=train_and_predict,
82
  inputs=gr.File(file_types=[".txt", ".csv"], label="Upload Toto650.txt"),
83
  outputs=[
84
+ gr.Textbox(label="Training & Prediction Log", lines=20),
85
  gr.File(label="Download System15 CSV")
86
  ],
87
  title="Sure Win Club - Star Toto 6/50 Predictor",
88
+ description="Upload the latest Toto650.txt dataset every Monday. The system will train a fresh model (or fallback to frequency) and output Top 15 numbers + System15 (5005 tickets)."
89
  )
90
 
91
  if __name__ == "__main__":
 
94
 
95
 
96
 
97
+