sure-win / utils.py
Benny-Tang's picture
Update utils.py
262feea verified
import pandas as pd
import numpy as np
from itertools import combinations
def generate_features(df, candidate_numbers=None):
"""
Generate simple frequency/recency features for each number 1–50
"""
numbers = list(range(1, 51)) if candidate_numbers is None else candidate_numbers
draws = df[["n1", "n2", "n3", "n4", "n5", "n6"]].values.tolist()
features = []
labels = []
# Flatten draws
for idx, row in df.iterrows():
draw_nums = set(row[["n1", "n2", "n3", "n4", "n5", "n6"]].tolist())
for n in numbers:
# Label: 1 if number in this draw, else 0
label = 1 if n in draw_nums else 0
labels.append(label)
# Features
last_seen = -1
for back, prev in enumerate(draws[:idx][::-1], 1):
if n in prev:
last_seen = back
break
freq_50 = sum([1 for prev in draws[max(0, idx-50):idx] if n in prev])
freq_200 = sum([1 for prev in draws[max(0, idx-200):idx] if n in prev])
features.append([n, freq_50, freq_200, last_seen if last_seen != -1 else 999])
X = pd.DataFrame(features, columns=["number", "freq50", "freq200", "recency"])
y = np.array(labels)
return X, y
def pick_top15(all_numbers_df):
"""
Return top 15 numbers sorted by score
"""
top15 = all_numbers_df.sort_values("score", ascending=False).head(15)["number"].tolist()
return sorted(top15)
def generate_system15_csv(top15, output_path="system15.csv"):
"""
Generate all 5005 combinations of 15 numbers into a CSV
"""
combos = list(combinations(top15, 6))
df = pd.DataFrame(combos, columns=[f"n{i}" for i in range(1, 7)])
df.to_csv(output_path, index=False)
return output_path