Benny-Tang commited on
Commit
efee2c3
·
verified ·
1 Parent(s): 89628e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -30
app.py CHANGED
@@ -1,32 +1,71 @@
1
- import requests
2
- from bs4 import BeautifulSoup
3
-
4
- # Open the file containing the URLs
5
- with open('draw_urls.txt', 'r') as file:
6
- urls = file.readlines()
7
-
8
- # Iterate through each URL
9
- for url in urls:
10
- url = url.strip() # Remove any leading/trailing whitespace
11
- response = requests.get(url)
12
- soup = BeautifulSoup(response.text, 'html.parser')
13
-
14
- # Extract the required data
15
- draw_date = soup.find('span', text='Draw Date :').find_next('span').text.strip()
16
- draw_number = soup.find('span', text='Draw No. :').find_next('span').text.strip()
17
- prizes = soup.find_all('span', class_='prize')
18
- first_prize = prizes[0].text.strip()
19
- second_prize = prizes[1].text.strip()
20
- third_prize = prizes[2].text.strip()
21
- special_prizes = [prize.text.strip() for prize in prizes[3:13]]
22
-
23
- # Print the extracted data (or save it to a file)
24
- print(f"Draw Date: {draw_date}")
25
- print(f"Draw Number: {draw_number}")
26
- print(f"First Prize: {first_prize}")
27
- print(f"Second Prize: {second_prize}")
28
- print(f"Third Prize: {third_prize}")
29
- print(f"Special Prizes: {special_prizes}")
30
- print("-" * 40)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ from itertools import combinations
5
+ from sklearn.ensemble import GradientBoostingClassifier
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.metrics import roc_auc_score
8
+ import joblib
9
+ import os
10
+
11
+ from utils import generate_features, pick_top15, generate_system15_csv
12
+
13
+
14
+ def train_and_predict(file_obj):
15
+ # Load dataset
16
+ df = pd.read_csv(file_obj.name, header=None)
17
+ # Columns: [draw_id?, draw_date, n1..n6, bonus, ...]
18
+ # Align to schema: we only keep date, n1..n6, bonus
19
+ df = df.iloc[:, :8]
20
+ df.columns = ["draw_date", "n1", "n2", "n3", "n4", "n5", "n6", "bonus"]
21
+
22
+ # Melt numbers into long format for training
23
+ features, labels = generate_features(df)
24
+
25
+ # Split into train/test
26
+ X_train, X_test, y_train, y_test = train_test_split(
27
+ features, labels, test_size=0.2, random_state=42, stratify=labels
28
+ )
29
+
30
+ # Train a simple gradient boosting model
31
+ model = GradientBoostingClassifier(n_estimators=200, max_depth=3, random_state=42)
32
+ model.fit(X_train, y_train)
33
+
34
+ # Evaluate
35
+ if len(np.unique(y_test)) > 1:
36
+ auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
37
+ else:
38
+ auc = 0.5
39
+
40
+ # Score all numbers 1–50
41
+ all_numbers = pd.DataFrame({"number": range(1, 51)})
42
+ all_features, _ = generate_features(df, candidate_numbers=all_numbers["number"].tolist())
43
+ scores = model.predict_proba(all_features)[:, 1]
44
+ all_numbers["score"] = scores
45
+
46
+ # Pick top 15
47
+ top15 = pick_top15(all_numbers)
48
+
49
+ # Generate System 15 CSV (5005 combos)
50
+ csv_path = "system15.csv"
51
+ generate_system15_csv(top15, csv_path)
52
+
53
+ return f"Model AUC: {auc:.3f}\nTop 15 Numbers: {top15}", csv_path
54
+
55
+
56
+ demo = gr.Interface(
57
+ fn=train_and_predict,
58
+ inputs=gr.File(file_types=[".txt", ".csv"], label="Upload Toto650.txt"),
59
+ outputs=[
60
+ gr.Textbox(label="Prediction Summary"),
61
+ gr.File(label="Download System15 CSV")
62
+ ],
63
+ title="Sure Win Club - Star Toto 6/50 Predictor",
64
+ description="Upload the latest Toto650.txt dataset every Monday. The system will train a fresh model and output Top 15 numbers + System15 (5005 tickets)."
65
+ )
66
+
67
+ if __name__ == "__main__":
68
+ demo.launch()
69
+
70
 
71