Benny-Tang commited on
Commit
262feea
·
verified ·
1 Parent(s): efee2c3

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +56 -0
utils.py CHANGED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from itertools import combinations
4
+
5
+
6
+ def generate_features(df, candidate_numbers=None):
7
+ """
8
+ Generate simple frequency/recency features for each number 1–50
9
+ """
10
+ numbers = list(range(1, 51)) if candidate_numbers is None else candidate_numbers
11
+ draws = df[["n1", "n2", "n3", "n4", "n5", "n6"]].values.tolist()
12
+
13
+ features = []
14
+ labels = []
15
+
16
+ # Flatten draws
17
+ for idx, row in df.iterrows():
18
+ draw_nums = set(row[["n1", "n2", "n3", "n4", "n5", "n6"]].tolist())
19
+ for n in numbers:
20
+ # Label: 1 if number in this draw, else 0
21
+ label = 1 if n in draw_nums else 0
22
+ labels.append(label)
23
+
24
+ # Features
25
+ last_seen = -1
26
+ for back, prev in enumerate(draws[:idx][::-1], 1):
27
+ if n in prev:
28
+ last_seen = back
29
+ break
30
+
31
+ freq_50 = sum([1 for prev in draws[max(0, idx-50):idx] if n in prev])
32
+ freq_200 = sum([1 for prev in draws[max(0, idx-200):idx] if n in prev])
33
+
34
+ features.append([n, freq_50, freq_200, last_seen if last_seen != -1 else 999])
35
+
36
+ X = pd.DataFrame(features, columns=["number", "freq50", "freq200", "recency"])
37
+ y = np.array(labels)
38
+ return X, y
39
+
40
+
41
+ def pick_top15(all_numbers_df):
42
+ """
43
+ Return top 15 numbers sorted by score
44
+ """
45
+ top15 = all_numbers_df.sort_values("score", ascending=False).head(15)["number"].tolist()
46
+ return sorted(top15)
47
+
48
+
49
+ def generate_system15_csv(top15, output_path="system15.csv"):
50
+ """
51
+ Generate all 5005 combinations of 15 numbers into a CSV
52
+ """
53
+ combos = list(combinations(top15, 6))
54
+ df = pd.DataFrame(combos, columns=[f"n{i}" for i in range(1, 7)])
55
+ df.to_csv(output_path, index=False)
56
+ return output_path