rogermt
/

neurogolf-solver

ONNX

ml-intern

Model card Files Files and versions

xet

Community

rogermt commited on 6 days ago

Commit

597dca6

verified ·

1 Parent(s): 0642636

Fix optimize_submission.py: add --input_dir for Kaggle unzipped datasets (no need to re-run solver)

Browse files

Files changed (1) hide show

own-solver/optimize_submission.py +101 -32

own-solver/optimize_submission.py CHANGED Viewed

@@ -2,16 +2,18 @@
 """
 Unified ONNX Optimizer for NeuroGolf — All 4 Stages
-Processes submission zip and applies optimizations to maximize score
-under the new formula: score = max(1.0, 25.0 - ln(memory + params))
-Stage 1: Rebuild trivial tasks (identity, transpose, color_map, flips)
-Stage 2: Direct Conv→output (eliminate ArgMax+OneHot chain)
-Stage 3: fp16 all weights (halve weight memory contribution)
-Stage 4: Node reduction (remove unused inits, strip metadata)
-Usage:
-  python optimize_submission.py --input_zip submission-6043.zip --data_dir ./tasks --output_zip submission_optimized.zip
 """
 import json
@@ -69,7 +71,7 @@ def validate_model(model_bytes, examples, max_examples=None):
     return True
-# ═══ STAGE 1 ═══
 def stage1_optimize(model_bytes, examples):
     # Identity
@@ -138,7 +140,7 @@ def stage1_optimize(model_bytes, examples):
     return None
-# ═══ STAGE 2 ═══
 def stage2_optimize(model_bytes, examples):
     try:
@@ -160,7 +162,7 @@ def stage2_optimize(model_bytes, examples):
         ks = W.shape[2]
         pad_k = ks // 2
-        # Direct conv→output
         inits = [numpy_helper.from_array(W, 'W')]
         nodes = [helper.make_node('Conv', ['input', 'W'], ['output'], kernel_shape=[ks, ks], pads=[pad_k]*4)]
         m = make_model(nodes, inits)
@@ -185,7 +187,7 @@ def stage2_optimize(model_bytes, examples):
     return None
-# ═══ STAGE 3 ═══
 def stage3_optimize(model_bytes, examples):
     try:
@@ -193,8 +195,11 @@ def stage3_optimize(model_bytes, examples):
     except:
         return None
-    f32_bytes = sum(numpy_helper.to_array(i).nbytes for i in model.graph.initializer
-                    if numpy_helper.to_array(i).dtype == np.float32 and numpy_helper.to_array(i).size > 10)
     if f32_bytes < 200:
         return None
@@ -208,10 +213,48 @@ def stage3_optimize(model_bytes, examples):
     if validate_model(b, examples[:15]):
         if validate_model(b, examples):
             return b, f"S3:fp16_weights(-{f32_bytes//2//1024}KB)", None
     return None
-# ═══ STAGE 4 ═══
 def stage4_optimize(model_bytes, examples):
     try:
@@ -233,6 +276,8 @@ def stage4_optimize(model_bytes, examples):
     if model.doc_string: model.doc_string = ""; changed = True
     if model.graph.doc_string: model.graph.doc_string = ""; changed = True
     if not changed: return None
     b = model.SerializeToString()
@@ -247,29 +292,46 @@ def stage4_optimize(model_bytes, examples):
 def main():
     import argparse
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--input_zip', required=True)
-    parser.add_argument('--data_dir', required=True)
-    parser.add_argument('--output_zip', required=True)
-    parser.add_argument('--stages', default='1,2,3,4')
     args = parser.parse_args()
     stages = [int(s) for s in args.stages.split(',')]
     models = {}
-    with zipfile.ZipFile(args.input_zip, 'r') as zf:
         for tid in range(1, 401):
-            fname = f'task{tid:03d}.onnx'
-            if fname in zf.namelist(): models[tid] = zf.read(fname)
-    print(f"Loaded {len(models)} models. Stages: {stages}")
     results = {}
-    counts = {1:0, 2:0, 3:0, 4:0}
     t0 = time.time()
     for tid in sorted(models.keys()):
         task_path = os.path.join(args.data_dir, f'task{tid:03d}.json')
         if not os.path.exists(task_path): continue
-        with open(task_path) as f: task_data = json.load(f)
         examples = task_data.get('train', []) + task_data.get('test', []) + task_data.get('arc-gen', [])[:30]
         if not examples: continue
@@ -287,20 +349,27 @@ def main():
             target = best[0] if best else models[tid]
             r = stage4_optimize(target, examples)
             if r:
-                if best: best = (r[0], best[1]+"+"+r[1], best[2])
                 else: best = r
                 counts[4] += 1
         if best:
             results[tid] = best[0]
             score_s = f"score={best[2]:.1f}" if best[2] else ""
-            print(f"  Task {tid:3d}: {best[1]:40s} ({len(models[tid]):>8,} → {len(best[0]):>8,}) {score_s}")
-    print(f"\nDone in {time.time()-t0:.1f}s. S1:{counts[1]} S2:{counts[2]} S3:{counts[3]} S4:{counts[4]} Total:{len(results)}")
     with zipfile.ZipFile(args.output_zip, 'w', zipfile.ZIP_DEFLATED) as zf:
         for tid in range(1, 401):
-            zf.writestr(f'task{tid:03d}.onnx', results.get(tid, models[tid]))
     print(f"Written to {args.output_zip}")

 """
 Unified ONNX Optimizer for NeuroGolf — All 4 Stages
+New formula: score = max(1.0, 25.0 - ln(memory + params))
+Stage 1: Rebuild trivial tasks (identity, transpose, color_map, flips) → 18-25 pts
+Stage 2: Direct Conv→output (eliminate ArgMax+OneHot) → 16-17 pts for ks=3
+Stage 3: fp16 all weights (halve weight memory)
+Stage 4: Node reduction (strip unused inits, metadata)
+Kaggle usage (V90 models already unzipped as dataset):
+  python optimize_submission.py \
+    --input_dir /kaggle/input/datasets/rogermt/neurogolf-2026-solvers-v90 \
+    --data_dir /kaggle/input/competitions/neurogolf-2026 \
+    --output_zip /kaggle/working/submission.zip
 """
 import json
     return True
+# ═══ STAGE 1: Trivial rebuilds ═══
 def stage1_optimize(model_bytes, examples):
     # Identity
     return None
+# ═══ STAGE 2: Direct Conv→output ═══
 def stage2_optimize(model_bytes, examples):
     try:
         ks = W.shape[2]
         pad_k = ks // 2
+        # Direct conv→output (zero intermediates)
         inits = [numpy_helper.from_array(W, 'W')]
         nodes = [helper.make_node('Conv', ['input', 'W'], ['output'], kernel_shape=[ks, ks], pads=[pad_k]*4)]
         m = make_model(nodes, inits)
     return None
+# ═══ STAGE 3: fp16 weights ═══
 def stage3_optimize(model_bytes, examples):
     try:
     except:
         return None
+    f32_bytes = 0
+    for init in model.graph.initializer:
+        arr = numpy_helper.to_array(init)
+        if arr.dtype == np.float32 and arr.size > 10:
+            f32_bytes += arr.nbytes
     if f32_bytes < 200:
         return None
     if validate_model(b, examples[:15]):
         if validate_model(b, examples):
             return b, f"S3:fp16_weights(-{f32_bytes//2//1024}KB)", None
+    # Try with Cast wrapper
+    new_model2 = onnx.load_from_string(model_bytes)
+    for i, init in enumerate(new_model2.graph.initializer):
+        arr = numpy_helper.to_array(init)
+        if arr.dtype == np.float32 and arr.size > 10:
+            new_model2.graph.initializer[i].CopyFrom(numpy_helper.from_array(arr.astype(np.float16), name=init.name))
+    input_name = new_model2.graph.input[0].name
+    cast_out = '_inp_fp16'
+    cast_node = helper.make_node('Cast', [input_name], [cast_out], to=TensorProto.FLOAT16)
+    for node in new_model2.graph.node:
+        new_inputs = [cast_out if x == input_name else x for x in node.input]
+        del node.input[:]
+        node.input.extend(new_inputs)
+    new_model2.graph.node.insert(0, cast_node)
+    for node in new_model2.graph.node:
+        if node.op_type == 'Cast':
+            for attr in node.attribute:
+                if attr.name == 'to' and attr.i == TensorProto.FLOAT:
+                    attr.i = TensorProto.FLOAT16
+    output_name = new_model2.graph.output[0].name
+    pre_out = '_pre_out_fp16'
+    for node in new_model2.graph.node:
+        new_outputs = [pre_out if o == output_name else o for o in node.output]
+        del node.output[:]
+        node.output.extend(new_outputs)
+        if pre_out in new_outputs:
+            break
+    new_model2.graph.node.append(helper.make_node('Cast', [pre_out], [output_name], to=TensorProto.FLOAT))
+    b2 = new_model2.SerializeToString()
+    if validate_model(b2, examples[:15]):
+        if validate_model(b2, examples):
+            return b2, f"S3:fp16_cast(-{f32_bytes//2//1024}KB)", None
     return None
+# ═══ STAGE 4: Cleanup ═══
 def stage4_optimize(model_bytes, examples):
     try:
     if model.doc_string: model.doc_string = ""; changed = True
     if model.graph.doc_string: model.graph.doc_string = ""; changed = True
+    for node in model.graph.node:
+        if node.doc_string: node.doc_string = ""; changed = True
     if not changed: return None
     b = model.SerializeToString()
 def main():
     import argparse
+    parser = argparse.ArgumentParser(description='NeuroGolf Unified Optimizer')
+    parser.add_argument('--input_dir', default=None, help='Directory with taskNNN.onnx (Kaggle unzipped dataset)')
+    parser.add_argument('--input_zip', default=None, help='Input submission.zip (alternative to --input_dir)')
+    parser.add_argument('--data_dir', required=True, help='Directory with taskNNN.json (competition data)')
+    parser.add_argument('--output_zip', required=True, help='Output submission.zip')
+    parser.add_argument('--stages', default='1,2,3,4', help='Comma-separated stages to run')
     args = parser.parse_args()
+    if not args.input_dir and not args.input_zip:
+        parser.error('Must provide --input_dir or --input_zip')
     stages = [int(s) for s in args.stages.split(',')]
+    # Load models
     models = {}
+    if args.input_dir:
         for tid in range(1, 401):
+            fpath = os.path.join(args.input_dir, f'task{tid:03d}.onnx')
+            if os.path.exists(fpath):
+                with open(fpath, 'rb') as f:
+                    models[tid] = f.read()
+        print(f"Loaded {len(models)} models from {args.input_dir}")
+    else:
+        with zipfile.ZipFile(args.input_zip, 'r') as zf:
+            for tid in range(1, 401):
+                fname = f'task{tid:03d}.onnx'
+                if fname in zf.namelist():
+                    models[tid] = zf.read(fname)
+        print(f"Loaded {len(models)} models from {args.input_zip}")
+    print(f"Running stages: {stages}")
     results = {}
+    counts = {1: 0, 2: 0, 3: 0, 4: 0}
     t0 = time.time()
     for tid in sorted(models.keys()):
         task_path = os.path.join(args.data_dir, f'task{tid:03d}.json')
         if not os.path.exists(task_path): continue
+        with open(task_path) as f:
+            task_data = json.load(f)
         examples = task_data.get('train', []) + task_data.get('test', []) + task_data.get('arc-gen', [])[:30]
         if not examples: continue
             target = best[0] if best else models[tid]
             r = stage4_optimize(target, examples)
             if r:
+                if best: best = (r[0], best[1] + "+" + r[1], best[2])
                 else: best = r
                 counts[4] += 1
         if best:
             results[tid] = best[0]
             score_s = f"score={best[2]:.1f}" if best[2] else ""
+            print(f"  Task {tid:3d}: {best[1]:40s} ({len(models[tid]):>8,} -> {len(best[0]):>8,}) {score_s}")
+    elapsed = time.time() - t0
+    print(f"\nDone in {elapsed:.1f}s. S1:{counts[1]} S2:{counts[2]} S3:{counts[3]} S4:{counts[4]} Total:{len(results)}")
+    # Write output zip
     with zipfile.ZipFile(args.output_zip, 'w', zipfile.ZIP_DEFLATED) as zf:
         for tid in range(1, 401):
+            fname = f'task{tid:03d}.onnx'
+            zf.writestr(fname, results.get(tid, models[tid]))
+    orig_total = sum(len(v) for v in models.values())
+    new_total = sum(len(results.get(tid, models[tid])) for tid in models)
+    print(f"Size: {orig_total:,} -> {new_total:,} bytes ({100*new_total/orig_total:.1f}%)")
     print(f"Written to {args.output_zip}")