rogermt commited on
Commit
0642636
·
verified ·
1 Parent(s): 715a632

Add optimize_submission.py — unified 4-stage optimizer for new scoring formula

Browse files
Files changed (1) hide show
  1. own-solver/optimize_submission.py +308 -0
own-solver/optimize_submission.py ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Unified ONNX Optimizer for NeuroGolf — All 4 Stages
4
+
5
+ Processes submission zip and applies optimizations to maximize score
6
+ under the new formula: score = max(1.0, 25.0 - ln(memory + params))
7
+
8
+ Stage 1: Rebuild trivial tasks (identity, transpose, color_map, flips)
9
+ Stage 2: Direct Conv→output (eliminate ArgMax+OneHot chain)
10
+ Stage 3: fp16 all weights (halve weight memory contribution)
11
+ Stage 4: Node reduction (remove unused inits, strip metadata)
12
+
13
+ Usage:
14
+ python optimize_submission.py --input_zip submission-6043.zip --data_dir ./tasks --output_zip submission_optimized.zip
15
+ """
16
+
17
+ import json
18
+ import math
19
+ import os
20
+ import zipfile
21
+ import time
22
+
23
+ import numpy as np
24
+ import onnx
25
+ import onnxruntime as ort
26
+ from onnx import helper, TensorProto, numpy_helper
27
+
28
+ GRID_SHAPE = [1, 10, 30, 30]
29
+ DT = TensorProto.FLOAT
30
+ IR = 8
31
+
32
+
33
+ def make_model(nodes, inits=None, opset=17):
34
+ x = helper.make_tensor_value_info("input", DT, GRID_SHAPE)
35
+ y = helper.make_tensor_value_info("output", DT, GRID_SHAPE)
36
+ g = helper.make_graph(nodes, "g", [x], [y], initializer=inits or [])
37
+ return helper.make_model(g, ir_version=IR, opset_imports=[helper.make_opsetid("", opset)])
38
+
39
+
40
+ def encode_grid(grid):
41
+ arr = np.array(grid, dtype=np.int32)
42
+ h, w = arr.shape
43
+ t = np.zeros((1, 10, 30, 30), dtype=np.float32)
44
+ for r in range(h):
45
+ for c in range(w):
46
+ v = int(arr[r, c])
47
+ if 0 <= v < 10:
48
+ t[0, v, r, c] = 1.0
49
+ return t
50
+
51
+
52
+ def validate_model(model_bytes, examples, max_examples=None):
53
+ try:
54
+ opts = ort.SessionOptions()
55
+ opts.log_severity_level = 3
56
+ sess = ort.InferenceSession(model_bytes, sess_options=opts, providers=['CPUExecutionProvider'])
57
+ except Exception:
58
+ return False
59
+ exs = examples[:max_examples] if max_examples else examples
60
+ for ex in exs:
61
+ try:
62
+ inp = encode_grid(ex['input'])
63
+ out = sess.run(['output'], {'input': inp})[0]
64
+ expected = encode_grid(ex['output'])
65
+ if not np.array_equal((out > 0.0).astype(np.float32), expected):
66
+ return False
67
+ except Exception:
68
+ return False
69
+ return True
70
+
71
+
72
+ # ═══ STAGE 1 ═══
73
+
74
+ def stage1_optimize(model_bytes, examples):
75
+ # Identity
76
+ m = make_model([helper.make_node('Identity', ['input'], ['output'])])
77
+ b = m.SerializeToString()
78
+ if validate_model(b, examples):
79
+ return b, "S1:identity", 25.0
80
+
81
+ # Transpose
82
+ m = make_model([helper.make_node('Transpose', ['input'], ['output'], perm=[0, 1, 3, 2])])
83
+ b = m.SerializeToString()
84
+ if validate_model(b, examples):
85
+ return b, "S1:transpose", 25.0
86
+
87
+ # Flips
88
+ for axis, name in [(3, 'flip_lr'), (2, 'flip_ud')]:
89
+ inits = [
90
+ numpy_helper.from_array(np.array([29], dtype=np.int64), 'st'),
91
+ numpy_helper.from_array(np.array([np.iinfo(np.int64).min], dtype=np.int64), 'en'),
92
+ numpy_helper.from_array(np.array([axis], dtype=np.int64), 'ax'),
93
+ numpy_helper.from_array(np.array([-1], dtype=np.int64), 'sp'),
94
+ ]
95
+ m = make_model([helper.make_node('Slice', ['input', 'st', 'en', 'ax', 'sp'], ['output'])], inits)
96
+ b = m.SerializeToString()
97
+ if validate_model(b, examples):
98
+ return b, f"S1:{name}", 21.4
99
+
100
+ # Color map detection
101
+ cm = {}
102
+ is_cm = True
103
+ for ex in examples[:10]:
104
+ inp, out = np.array(ex['input']), np.array(ex['output'])
105
+ if inp.shape != out.shape:
106
+ is_cm = False; break
107
+ for iv, ov in zip(inp.flat, out.flat):
108
+ iv, ov = int(iv), int(ov)
109
+ if iv in cm and cm[iv] != ov:
110
+ is_cm = False; break
111
+ cm[iv] = ov
112
+ if not is_cm: break
113
+
114
+ if is_cm and cm:
115
+ is_perm = set(cm.keys()) <= set(range(10)) and set(cm.values()) <= set(range(10))
116
+ if is_perm:
117
+ gather_ch = list(range(10))
118
+ for src, dst in cm.items():
119
+ if 0 <= src < 10 and 0 <= dst < 10:
120
+ gather_ch[dst] = src
121
+ gi = np.array(gather_ch, dtype=np.int32)
122
+ inits = [numpy_helper.from_array(gi, 'gi')]
123
+ m = make_model([helper.make_node('Gather', ['input', 'gi'], ['output'], axis=1)], inits)
124
+ b = m.SerializeToString()
125
+ if validate_model(b, examples):
126
+ return b, "S1:color_perm", 21.1
127
+
128
+ W = np.zeros((10, 10, 1, 1), dtype=np.float32)
129
+ for ic in range(10):
130
+ oc = cm.get(ic, ic)
131
+ if 0 <= oc < 10: W[oc, ic, 0, 0] = 1.0
132
+ inits = [numpy_helper.from_array(W, 'W')]
133
+ m = make_model([helper.make_node('Conv', ['input', 'W'], ['output'], kernel_shape=[1, 1])], inits)
134
+ b = m.SerializeToString()
135
+ if validate_model(b, examples):
136
+ return b, "S1:color_conv1x1", 18.8
137
+
138
+ return None
139
+
140
+
141
+ # ═══ STAGE 2 ═══
142
+
143
+ def stage2_optimize(model_bytes, examples):
144
+ try:
145
+ model = onnx.load_from_string(model_bytes)
146
+ except:
147
+ return None
148
+
149
+ for node in model.graph.node:
150
+ if node.op_type != 'Conv' or len(node.input) < 2:
151
+ continue
152
+ W = None
153
+ for init in model.graph.initializer:
154
+ if init.name == node.input[1]:
155
+ W = numpy_helper.to_array(init); break
156
+ if W is None or W.ndim != 4 or W.shape[0] != 10 or W.shape[1] != 10:
157
+ continue
158
+ if W.shape[2] != W.shape[3]:
159
+ continue
160
+ ks = W.shape[2]
161
+ pad_k = ks // 2
162
+
163
+ # Direct conv→output
164
+ inits = [numpy_helper.from_array(W, 'W')]
165
+ nodes = [helper.make_node('Conv', ['input', 'W'], ['output'], kernel_shape=[ks, ks], pads=[pad_k]*4)]
166
+ m = make_model(nodes, inits)
167
+ b = m.SerializeToString()
168
+ if validate_model(b, examples):
169
+ cost = W.size * 4 + W.size
170
+ return b, f"S2:direct_conv_ks{ks}", max(1.0, 25.0 - math.log(max(1, cost)))
171
+
172
+ # With mask (conv_var pattern)
173
+ inits2 = [numpy_helper.from_array(W, 'W'), numpy_helper.from_array(np.array([1], dtype=np.int64), 'ax')]
174
+ nodes2 = [
175
+ helper.make_node('ReduceSum', ['input', 'ax'], ['mask'], keepdims=1),
176
+ helper.make_node('Conv', ['input', 'W'], ['co'], kernel_shape=[ks, ks], pads=[pad_k]*4),
177
+ helper.make_node('Mul', ['co', 'mask'], ['output']),
178
+ ]
179
+ m2 = make_model(nodes2, inits2)
180
+ b2 = m2.SerializeToString()
181
+ if validate_model(b2, examples):
182
+ cost = 3600 + 36000 + W.size * 4 + 8 + W.size + 1
183
+ return b2, f"S2:direct_conv_var_ks{ks}", max(1.0, 25.0 - math.log(max(1, cost)))
184
+
185
+ return None
186
+
187
+
188
+ # ═══ STAGE 3 ═══
189
+
190
+ def stage3_optimize(model_bytes, examples):
191
+ try:
192
+ model = onnx.load_from_string(model_bytes)
193
+ except:
194
+ return None
195
+
196
+ f32_bytes = sum(numpy_helper.to_array(i).nbytes for i in model.graph.initializer
197
+ if numpy_helper.to_array(i).dtype == np.float32 and numpy_helper.to_array(i).size > 10)
198
+ if f32_bytes < 200:
199
+ return None
200
+
201
+ new_model = onnx.load_from_string(model_bytes)
202
+ for i, init in enumerate(new_model.graph.initializer):
203
+ arr = numpy_helper.to_array(init)
204
+ if arr.dtype == np.float32 and arr.size > 10:
205
+ new_model.graph.initializer[i].CopyFrom(numpy_helper.from_array(arr.astype(np.float16), name=init.name))
206
+
207
+ b = new_model.SerializeToString()
208
+ if validate_model(b, examples[:15]):
209
+ if validate_model(b, examples):
210
+ return b, f"S3:fp16_weights(-{f32_bytes//2//1024}KB)", None
211
+ return None
212
+
213
+
214
+ # ═══ STAGE 4 ═══
215
+
216
+ def stage4_optimize(model_bytes, examples):
217
+ try:
218
+ model = onnx.load_from_string(model_bytes)
219
+ except:
220
+ return None
221
+
222
+ changed = False
223
+ used = set()
224
+ for node in model.graph.node:
225
+ for inp in node.input: used.add(inp)
226
+
227
+ orig = len(model.graph.initializer)
228
+ new_inits = [i for i in model.graph.initializer if i.name in used]
229
+ if len(new_inits) < orig:
230
+ del model.graph.initializer[:]
231
+ model.graph.initializer.extend(new_inits)
232
+ changed = True
233
+
234
+ if model.doc_string: model.doc_string = ""; changed = True
235
+ if model.graph.doc_string: model.graph.doc_string = ""; changed = True
236
+
237
+ if not changed: return None
238
+ b = model.SerializeToString()
239
+ if validate_model(b, examples[:10]):
240
+ saved = len(model_bytes) - len(b)
241
+ if saved > 10:
242
+ return b, f"S4:cleanup(-{saved}B)", None
243
+ return None
244
+
245
+
246
+ # ═══ MAIN ═══
247
+
248
+ def main():
249
+ import argparse
250
+ parser = argparse.ArgumentParser()
251
+ parser.add_argument('--input_zip', required=True)
252
+ parser.add_argument('--data_dir', required=True)
253
+ parser.add_argument('--output_zip', required=True)
254
+ parser.add_argument('--stages', default='1,2,3,4')
255
+ args = parser.parse_args()
256
+ stages = [int(s) for s in args.stages.split(',')]
257
+
258
+ models = {}
259
+ with zipfile.ZipFile(args.input_zip, 'r') as zf:
260
+ for tid in range(1, 401):
261
+ fname = f'task{tid:03d}.onnx'
262
+ if fname in zf.namelist(): models[tid] = zf.read(fname)
263
+ print(f"Loaded {len(models)} models. Stages: {stages}")
264
+
265
+ results = {}
266
+ counts = {1:0, 2:0, 3:0, 4:0}
267
+ t0 = time.time()
268
+
269
+ for tid in sorted(models.keys()):
270
+ task_path = os.path.join(args.data_dir, f'task{tid:03d}.json')
271
+ if not os.path.exists(task_path): continue
272
+ with open(task_path) as f: task_data = json.load(f)
273
+ examples = task_data.get('train', []) + task_data.get('test', []) + task_data.get('arc-gen', [])[:30]
274
+ if not examples: continue
275
+
276
+ best = None
277
+ if 1 in stages:
278
+ r = stage1_optimize(models[tid], examples)
279
+ if r: best = r; counts[1] += 1
280
+ if best is None and 2 in stages:
281
+ r = stage2_optimize(models[tid], examples)
282
+ if r: best = r; counts[2] += 1
283
+ if best is None and 3 in stages:
284
+ r = stage3_optimize(models[tid], examples)
285
+ if r: best = r; counts[3] += 1
286
+ if 4 in stages:
287
+ target = best[0] if best else models[tid]
288
+ r = stage4_optimize(target, examples)
289
+ if r:
290
+ if best: best = (r[0], best[1]+"+"+r[1], best[2])
291
+ else: best = r
292
+ counts[4] += 1
293
+
294
+ if best:
295
+ results[tid] = best[0]
296
+ score_s = f"score={best[2]:.1f}" if best[2] else ""
297
+ print(f" Task {tid:3d}: {best[1]:40s} ({len(models[tid]):>8,} → {len(best[0]):>8,}) {score_s}")
298
+
299
+ print(f"\nDone in {time.time()-t0:.1f}s. S1:{counts[1]} S2:{counts[2]} S3:{counts[3]} S4:{counts[4]} Total:{len(results)}")
300
+
301
+ with zipfile.ZipFile(args.output_zip, 'w', zipfile.ZIP_DEFLATED) as zf:
302
+ for tid in range(1, 401):
303
+ zf.writestr(f'task{tid:03d}.onnx', results.get(tid, models[tid]))
304
+ print(f"Written to {args.output_zip}")
305
+
306
+
307
+ if __name__ == '__main__':
308
+ main()