rogermt commited on
Commit
a381f8d
·
verified ·
1 Parent(s): cd4624b

v5 refactor: add submission.py (with W&B logging)

Browse files
Files changed (1) hide show
  1. neurogolf_solver/submission.py +134 -0
neurogolf_solver/submission.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Submission file generation and task running with W&B logging."""
3
+
4
+ import os
5
+ import csv
6
+ import io
7
+ import math
8
+ import zipfile
9
+ from collections import Counter
10
+ from .profiler import score_network
11
+ from .constants import MAX_FILESIZE, EXCLUDED_TASKS
12
+
13
+ try:
14
+ import wandb
15
+ except ImportError:
16
+ wandb = None
17
+
18
+
19
+ def run_tasks(task_nums, tasks, output_dir, providers, conv_budget, excluded_tasks, use_wandb):
20
+ """Run all tasks and collect results.
21
+
22
+ Returns: (results, costs_dict, total_score)
23
+ """
24
+ from .solvers.solver_registry import solve_task
25
+
26
+ results = {}
27
+ costs_dict = {}
28
+ total_score = 0
29
+
30
+ for tn in task_nums:
31
+ if tn not in tasks:
32
+ continue
33
+ if tn in excluded_tasks:
34
+ print(f"Task {tn:3d}: EXCLUDED (officially)")
35
+ continue
36
+
37
+ td = tasks[tn]['data']
38
+ ok, sname, sz, t_task, model_path = solve_task(
39
+ tn, td, output_dir, providers, conv_budget, excluded_tasks
40
+ )
41
+
42
+ if ok:
43
+ macs, memory, params = score_network(model_path)
44
+ if macs is None:
45
+ macs, memory, params = 0, 0, 0
46
+ cost = macs + memory + params
47
+ score = max(1.0, 25.0 - math.log(max(1, cost)))
48
+ total_score += score
49
+
50
+ results[tn] = (sname, t_task, sz)
51
+ costs_dict[tn] = cost
52
+ print(f"Task {tn:3d}: {sname:25s} {score:7.3f} {cost:>12} {t_task:7.3f}s ({sz:>8,} bytes)")
53
+ else:
54
+ score = 0
55
+ cost = 0
56
+ print(f"Task {tn:3d}: UNSOLVED {t_task:7.3f}s")
57
+
58
+ if use_wandb and wandb is not None:
59
+ wandb.log({
60
+ "task_id": tn,
61
+ "solver": sname if ok else "unsolved",
62
+ "onnx_bytes": sz if ok else 0,
63
+ "task_time_sec": t_task,
64
+ "cost": cost,
65
+ "score": score,
66
+ })
67
+
68
+ return results, costs_dict, total_score
69
+
70
+
71
+ def generate_submission(output_dir, results, costs_dict, active_tasks):
72
+ """Generate submission.zip and submission.csv.
73
+
74
+ Returns dict with submission info.
75
+ """
76
+ n_files = len([f for f in os.listdir(output_dir) if f.endswith('.onnx')])
77
+ total_size = sum(os.path.getsize(os.path.join(output_dir, f))
78
+ for f in os.listdir(output_dir) if f.endswith('.onnx'))
79
+
80
+ # Create submission.zip
81
+ parent_dir = os.path.dirname(output_dir) or '/kaggle/working/'
82
+ zip_path = os.path.join(parent_dir, 'submission.zip')
83
+ buf = io.BytesIO()
84
+ with zipfile.ZipFile(buf, 'w', zipfile.ZIP_DEFLATED) as zf:
85
+ for f in sorted(os.listdir(output_dir)):
86
+ if f.endswith('.onnx'):
87
+ zf.write(os.path.join(output_dir, f), f)
88
+ zip_bytes = buf.getvalue()
89
+ with open(zip_path, 'wb') as f:
90
+ f.write(zip_bytes)
91
+ zip_size = len(zip_bytes)
92
+
93
+ # Create submission.csv
94
+ csv_path = os.path.join(parent_dir, 'submission.csv')
95
+ with open(csv_path, 'w', newline='') as f:
96
+ w = csv.writer(f)
97
+ w.writerow(['task_id', 'total_cost'])
98
+ for tn in sorted(costs_dict.keys()):
99
+ w.writerow([f'task{tn:03d}', costs_dict[tn]])
100
+
101
+ # Calculate estimated LB score
102
+ unsolved_count = len(active_tasks) - len(results)
103
+ total_score = sum(max(1.0, 25.0 - math.log(max(1, cost))) for cost in costs_dict.values())
104
+ est_lb = total_score + unsolved_count * 1.0
105
+
106
+ return {
107
+ 'n_files': n_files,
108
+ 'total_size': total_size,
109
+ 'zip_path': zip_path,
110
+ 'zip_size': zip_size,
111
+ 'csv_path': csv_path,
112
+ 'est_lb': est_lb,
113
+ 'total_score': total_score,
114
+ 'unsolved_count': unsolved_count,
115
+ }
116
+
117
+
118
+ def print_summary(results, submission_info, elapsed):
119
+ """Print summary statistics."""
120
+ active_count = submission_info['unsolved_count'] + len(results)
121
+
122
+ print(f"\n{'=' * 70}")
123
+ print(f"Solved: {len(results)}/{active_count} active tasks in {elapsed:.0f}s")
124
+ solver_names = [v[0] for v in results.values()]
125
+ sc = Counter(solver_names)
126
+ for s, c in sc.most_common():
127
+ print(f" {s}: {c}")
128
+
129
+ print(f"\n{submission_info['n_files']} ONNX files, {submission_info['total_size'] / 1024:.1f} KB uncompressed")
130
+ zip_ok = 'OK' if submission_info['zip_size'] <= MAX_FILESIZE else 'OVER!'
131
+ print(f"ZIP size: {submission_info['zip_size'] / 1024:.1f} KB / {MAX_FILESIZE / 1024:.0f} KB limit {zip_ok}")
132
+ print(f"Estimated LB score: {submission_info['est_lb']:.1f} "
133
+ f"(solved: {submission_info['total_score']:.1f} + unsolved: {submission_info['unsolved_count']}x1.0)")
134
+ print(f"Written: {submission_info['zip_path']} | {submission_info['csv_path']}")