Fix optimize_submission.py: add --input_dir for Kaggle unzipped datasets (no need to re-run solver)
Browse files- own-solver/optimize_submission.py +101 -32
own-solver/optimize_submission.py
CHANGED
|
@@ -2,16 +2,18 @@
|
|
| 2 |
"""
|
| 3 |
Unified ONNX Optimizer for NeuroGolf — All 4 Stages
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
Stage
|
| 9 |
-
Stage
|
| 10 |
-
Stage
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
| 15 |
"""
|
| 16 |
|
| 17 |
import json
|
|
@@ -69,7 +71,7 @@ def validate_model(model_bytes, examples, max_examples=None):
|
|
| 69 |
return True
|
| 70 |
|
| 71 |
|
| 72 |
-
# ═══ STAGE 1 ═══
|
| 73 |
|
| 74 |
def stage1_optimize(model_bytes, examples):
|
| 75 |
# Identity
|
|
@@ -138,7 +140,7 @@ def stage1_optimize(model_bytes, examples):
|
|
| 138 |
return None
|
| 139 |
|
| 140 |
|
| 141 |
-
# ═══ STAGE 2 ═══
|
| 142 |
|
| 143 |
def stage2_optimize(model_bytes, examples):
|
| 144 |
try:
|
|
@@ -160,7 +162,7 @@ def stage2_optimize(model_bytes, examples):
|
|
| 160 |
ks = W.shape[2]
|
| 161 |
pad_k = ks // 2
|
| 162 |
|
| 163 |
-
# Direct conv→output
|
| 164 |
inits = [numpy_helper.from_array(W, 'W')]
|
| 165 |
nodes = [helper.make_node('Conv', ['input', 'W'], ['output'], kernel_shape=[ks, ks], pads=[pad_k]*4)]
|
| 166 |
m = make_model(nodes, inits)
|
|
@@ -185,7 +187,7 @@ def stage2_optimize(model_bytes, examples):
|
|
| 185 |
return None
|
| 186 |
|
| 187 |
|
| 188 |
-
# ═══ STAGE 3 ═══
|
| 189 |
|
| 190 |
def stage3_optimize(model_bytes, examples):
|
| 191 |
try:
|
|
@@ -193,8 +195,11 @@ def stage3_optimize(model_bytes, examples):
|
|
| 193 |
except:
|
| 194 |
return None
|
| 195 |
|
| 196 |
-
f32_bytes =
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
| 198 |
if f32_bytes < 200:
|
| 199 |
return None
|
| 200 |
|
|
@@ -208,10 +213,48 @@ def stage3_optimize(model_bytes, examples):
|
|
| 208 |
if validate_model(b, examples[:15]):
|
| 209 |
if validate_model(b, examples):
|
| 210 |
return b, f"S3:fp16_weights(-{f32_bytes//2//1024}KB)", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
return None
|
| 212 |
|
| 213 |
|
| 214 |
-
# ═══ STAGE 4 ═══
|
| 215 |
|
| 216 |
def stage4_optimize(model_bytes, examples):
|
| 217 |
try:
|
|
@@ -233,6 +276,8 @@ def stage4_optimize(model_bytes, examples):
|
|
| 233 |
|
| 234 |
if model.doc_string: model.doc_string = ""; changed = True
|
| 235 |
if model.graph.doc_string: model.graph.doc_string = ""; changed = True
|
|
|
|
|
|
|
| 236 |
|
| 237 |
if not changed: return None
|
| 238 |
b = model.SerializeToString()
|
|
@@ -247,29 +292,46 @@ def stage4_optimize(model_bytes, examples):
|
|
| 247 |
|
| 248 |
def main():
|
| 249 |
import argparse
|
| 250 |
-
parser = argparse.ArgumentParser()
|
| 251 |
-
parser.add_argument('--
|
| 252 |
-
parser.add_argument('--
|
| 253 |
-
parser.add_argument('--
|
| 254 |
-
parser.add_argument('--
|
|
|
|
| 255 |
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
stages = [int(s) for s in args.stages.split(',')]
|
| 257 |
|
|
|
|
| 258 |
models = {}
|
| 259 |
-
|
| 260 |
for tid in range(1, 401):
|
| 261 |
-
|
| 262 |
-
if
|
| 263 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
|
| 265 |
results = {}
|
| 266 |
-
counts = {1:0, 2:0, 3:0, 4:0}
|
| 267 |
t0 = time.time()
|
| 268 |
|
| 269 |
for tid in sorted(models.keys()):
|
| 270 |
task_path = os.path.join(args.data_dir, f'task{tid:03d}.json')
|
| 271 |
if not os.path.exists(task_path): continue
|
| 272 |
-
with open(task_path) as f:
|
|
|
|
| 273 |
examples = task_data.get('train', []) + task_data.get('test', []) + task_data.get('arc-gen', [])[:30]
|
| 274 |
if not examples: continue
|
| 275 |
|
|
@@ -287,20 +349,27 @@ def main():
|
|
| 287 |
target = best[0] if best else models[tid]
|
| 288 |
r = stage4_optimize(target, examples)
|
| 289 |
if r:
|
| 290 |
-
if best: best = (r[0], best[1]+"+"+r[1], best[2])
|
| 291 |
else: best = r
|
| 292 |
counts[4] += 1
|
| 293 |
|
| 294 |
if best:
|
| 295 |
results[tid] = best[0]
|
| 296 |
score_s = f"score={best[2]:.1f}" if best[2] else ""
|
| 297 |
-
print(f" Task {tid:3d}: {best[1]:40s} ({len(models[tid]):>8,}
|
| 298 |
|
| 299 |
-
|
|
|
|
| 300 |
|
|
|
|
| 301 |
with zipfile.ZipFile(args.output_zip, 'w', zipfile.ZIP_DEFLATED) as zf:
|
| 302 |
for tid in range(1, 401):
|
| 303 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
print(f"Written to {args.output_zip}")
|
| 305 |
|
| 306 |
|
|
|
|
| 2 |
"""
|
| 3 |
Unified ONNX Optimizer for NeuroGolf — All 4 Stages
|
| 4 |
|
| 5 |
+
New formula: score = max(1.0, 25.0 - ln(memory + params))
|
| 6 |
+
|
| 7 |
+
Stage 1: Rebuild trivial tasks (identity, transpose, color_map, flips) → 18-25 pts
|
| 8 |
+
Stage 2: Direct Conv→output (eliminate ArgMax+OneHot) → 16-17 pts for ks=3
|
| 9 |
+
Stage 3: fp16 all weights (halve weight memory)
|
| 10 |
+
Stage 4: Node reduction (strip unused inits, metadata)
|
| 11 |
+
|
| 12 |
+
Kaggle usage (V90 models already unzipped as dataset):
|
| 13 |
+
python optimize_submission.py \
|
| 14 |
+
--input_dir /kaggle/input/datasets/rogermt/neurogolf-2026-solvers-v90 \
|
| 15 |
+
--data_dir /kaggle/input/competitions/neurogolf-2026 \
|
| 16 |
+
--output_zip /kaggle/working/submission.zip
|
| 17 |
"""
|
| 18 |
|
| 19 |
import json
|
|
|
|
| 71 |
return True
|
| 72 |
|
| 73 |
|
| 74 |
+
# ═══ STAGE 1: Trivial rebuilds ═══
|
| 75 |
|
| 76 |
def stage1_optimize(model_bytes, examples):
|
| 77 |
# Identity
|
|
|
|
| 140 |
return None
|
| 141 |
|
| 142 |
|
| 143 |
+
# ═══ STAGE 2: Direct Conv→output ═══
|
| 144 |
|
| 145 |
def stage2_optimize(model_bytes, examples):
|
| 146 |
try:
|
|
|
|
| 162 |
ks = W.shape[2]
|
| 163 |
pad_k = ks // 2
|
| 164 |
|
| 165 |
+
# Direct conv→output (zero intermediates)
|
| 166 |
inits = [numpy_helper.from_array(W, 'W')]
|
| 167 |
nodes = [helper.make_node('Conv', ['input', 'W'], ['output'], kernel_shape=[ks, ks], pads=[pad_k]*4)]
|
| 168 |
m = make_model(nodes, inits)
|
|
|
|
| 187 |
return None
|
| 188 |
|
| 189 |
|
| 190 |
+
# ═══ STAGE 3: fp16 weights ═══
|
| 191 |
|
| 192 |
def stage3_optimize(model_bytes, examples):
|
| 193 |
try:
|
|
|
|
| 195 |
except:
|
| 196 |
return None
|
| 197 |
|
| 198 |
+
f32_bytes = 0
|
| 199 |
+
for init in model.graph.initializer:
|
| 200 |
+
arr = numpy_helper.to_array(init)
|
| 201 |
+
if arr.dtype == np.float32 and arr.size > 10:
|
| 202 |
+
f32_bytes += arr.nbytes
|
| 203 |
if f32_bytes < 200:
|
| 204 |
return None
|
| 205 |
|
|
|
|
| 213 |
if validate_model(b, examples[:15]):
|
| 214 |
if validate_model(b, examples):
|
| 215 |
return b, f"S3:fp16_weights(-{f32_bytes//2//1024}KB)", None
|
| 216 |
+
|
| 217 |
+
# Try with Cast wrapper
|
| 218 |
+
new_model2 = onnx.load_from_string(model_bytes)
|
| 219 |
+
for i, init in enumerate(new_model2.graph.initializer):
|
| 220 |
+
arr = numpy_helper.to_array(init)
|
| 221 |
+
if arr.dtype == np.float32 and arr.size > 10:
|
| 222 |
+
new_model2.graph.initializer[i].CopyFrom(numpy_helper.from_array(arr.astype(np.float16), name=init.name))
|
| 223 |
+
|
| 224 |
+
input_name = new_model2.graph.input[0].name
|
| 225 |
+
cast_out = '_inp_fp16'
|
| 226 |
+
cast_node = helper.make_node('Cast', [input_name], [cast_out], to=TensorProto.FLOAT16)
|
| 227 |
+
for node in new_model2.graph.node:
|
| 228 |
+
new_inputs = [cast_out if x == input_name else x for x in node.input]
|
| 229 |
+
del node.input[:]
|
| 230 |
+
node.input.extend(new_inputs)
|
| 231 |
+
new_model2.graph.node.insert(0, cast_node)
|
| 232 |
+
|
| 233 |
+
for node in new_model2.graph.node:
|
| 234 |
+
if node.op_type == 'Cast':
|
| 235 |
+
for attr in node.attribute:
|
| 236 |
+
if attr.name == 'to' and attr.i == TensorProto.FLOAT:
|
| 237 |
+
attr.i = TensorProto.FLOAT16
|
| 238 |
+
|
| 239 |
+
output_name = new_model2.graph.output[0].name
|
| 240 |
+
pre_out = '_pre_out_fp16'
|
| 241 |
+
for node in new_model2.graph.node:
|
| 242 |
+
new_outputs = [pre_out if o == output_name else o for o in node.output]
|
| 243 |
+
del node.output[:]
|
| 244 |
+
node.output.extend(new_outputs)
|
| 245 |
+
if pre_out in new_outputs:
|
| 246 |
+
break
|
| 247 |
+
new_model2.graph.node.append(helper.make_node('Cast', [pre_out], [output_name], to=TensorProto.FLOAT))
|
| 248 |
+
|
| 249 |
+
b2 = new_model2.SerializeToString()
|
| 250 |
+
if validate_model(b2, examples[:15]):
|
| 251 |
+
if validate_model(b2, examples):
|
| 252 |
+
return b2, f"S3:fp16_cast(-{f32_bytes//2//1024}KB)", None
|
| 253 |
+
|
| 254 |
return None
|
| 255 |
|
| 256 |
|
| 257 |
+
# ═══ STAGE 4: Cleanup ═══
|
| 258 |
|
| 259 |
def stage4_optimize(model_bytes, examples):
|
| 260 |
try:
|
|
|
|
| 276 |
|
| 277 |
if model.doc_string: model.doc_string = ""; changed = True
|
| 278 |
if model.graph.doc_string: model.graph.doc_string = ""; changed = True
|
| 279 |
+
for node in model.graph.node:
|
| 280 |
+
if node.doc_string: node.doc_string = ""; changed = True
|
| 281 |
|
| 282 |
if not changed: return None
|
| 283 |
b = model.SerializeToString()
|
|
|
|
| 292 |
|
| 293 |
def main():
|
| 294 |
import argparse
|
| 295 |
+
parser = argparse.ArgumentParser(description='NeuroGolf Unified Optimizer')
|
| 296 |
+
parser.add_argument('--input_dir', default=None, help='Directory with taskNNN.onnx (Kaggle unzipped dataset)')
|
| 297 |
+
parser.add_argument('--input_zip', default=None, help='Input submission.zip (alternative to --input_dir)')
|
| 298 |
+
parser.add_argument('--data_dir', required=True, help='Directory with taskNNN.json (competition data)')
|
| 299 |
+
parser.add_argument('--output_zip', required=True, help='Output submission.zip')
|
| 300 |
+
parser.add_argument('--stages', default='1,2,3,4', help='Comma-separated stages to run')
|
| 301 |
args = parser.parse_args()
|
| 302 |
+
|
| 303 |
+
if not args.input_dir and not args.input_zip:
|
| 304 |
+
parser.error('Must provide --input_dir or --input_zip')
|
| 305 |
+
|
| 306 |
stages = [int(s) for s in args.stages.split(',')]
|
| 307 |
|
| 308 |
+
# Load models
|
| 309 |
models = {}
|
| 310 |
+
if args.input_dir:
|
| 311 |
for tid in range(1, 401):
|
| 312 |
+
fpath = os.path.join(args.input_dir, f'task{tid:03d}.onnx')
|
| 313 |
+
if os.path.exists(fpath):
|
| 314 |
+
with open(fpath, 'rb') as f:
|
| 315 |
+
models[tid] = f.read()
|
| 316 |
+
print(f"Loaded {len(models)} models from {args.input_dir}")
|
| 317 |
+
else:
|
| 318 |
+
with zipfile.ZipFile(args.input_zip, 'r') as zf:
|
| 319 |
+
for tid in range(1, 401):
|
| 320 |
+
fname = f'task{tid:03d}.onnx'
|
| 321 |
+
if fname in zf.namelist():
|
| 322 |
+
models[tid] = zf.read(fname)
|
| 323 |
+
print(f"Loaded {len(models)} models from {args.input_zip}")
|
| 324 |
+
print(f"Running stages: {stages}")
|
| 325 |
|
| 326 |
results = {}
|
| 327 |
+
counts = {1: 0, 2: 0, 3: 0, 4: 0}
|
| 328 |
t0 = time.time()
|
| 329 |
|
| 330 |
for tid in sorted(models.keys()):
|
| 331 |
task_path = os.path.join(args.data_dir, f'task{tid:03d}.json')
|
| 332 |
if not os.path.exists(task_path): continue
|
| 333 |
+
with open(task_path) as f:
|
| 334 |
+
task_data = json.load(f)
|
| 335 |
examples = task_data.get('train', []) + task_data.get('test', []) + task_data.get('arc-gen', [])[:30]
|
| 336 |
if not examples: continue
|
| 337 |
|
|
|
|
| 349 |
target = best[0] if best else models[tid]
|
| 350 |
r = stage4_optimize(target, examples)
|
| 351 |
if r:
|
| 352 |
+
if best: best = (r[0], best[1] + "+" + r[1], best[2])
|
| 353 |
else: best = r
|
| 354 |
counts[4] += 1
|
| 355 |
|
| 356 |
if best:
|
| 357 |
results[tid] = best[0]
|
| 358 |
score_s = f"score={best[2]:.1f}" if best[2] else ""
|
| 359 |
+
print(f" Task {tid:3d}: {best[1]:40s} ({len(models[tid]):>8,} -> {len(best[0]):>8,}) {score_s}")
|
| 360 |
|
| 361 |
+
elapsed = time.time() - t0
|
| 362 |
+
print(f"\nDone in {elapsed:.1f}s. S1:{counts[1]} S2:{counts[2]} S3:{counts[3]} S4:{counts[4]} Total:{len(results)}")
|
| 363 |
|
| 364 |
+
# Write output zip
|
| 365 |
with zipfile.ZipFile(args.output_zip, 'w', zipfile.ZIP_DEFLATED) as zf:
|
| 366 |
for tid in range(1, 401):
|
| 367 |
+
fname = f'task{tid:03d}.onnx'
|
| 368 |
+
zf.writestr(fname, results.get(tid, models[tid]))
|
| 369 |
+
|
| 370 |
+
orig_total = sum(len(v) for v in models.values())
|
| 371 |
+
new_total = sum(len(results.get(tid, models[tid])) for tid in models)
|
| 372 |
+
print(f"Size: {orig_total:,} -> {new_total:,} bytes ({100*new_total/orig_total:.1f}%)")
|
| 373 |
print(f"Written to {args.output_zip}")
|
| 374 |
|
| 375 |
|