rogermt commited on
Commit
597dca6
·
verified ·
1 Parent(s): 0642636

Fix optimize_submission.py: add --input_dir for Kaggle unzipped datasets (no need to re-run solver)

Browse files
Files changed (1) hide show
  1. own-solver/optimize_submission.py +101 -32
own-solver/optimize_submission.py CHANGED
@@ -2,16 +2,18 @@
2
  """
3
  Unified ONNX Optimizer for NeuroGolf — All 4 Stages
4
 
5
- Processes submission zip and applies optimizations to maximize score
6
- under the new formula: score = max(1.0, 25.0 - ln(memory + params))
7
-
8
- Stage 1: Rebuild trivial tasks (identity, transpose, color_map, flips)
9
- Stage 2: Direct Conv→output (eliminate ArgMax+OneHot chain)
10
- Stage 3: fp16 all weights (halve weight memory contribution)
11
- Stage 4: Node reduction (remove unused inits, strip metadata)
12
-
13
- Usage:
14
- python optimize_submission.py --input_zip submission-6043.zip --data_dir ./tasks --output_zip submission_optimized.zip
 
 
15
  """
16
 
17
  import json
@@ -69,7 +71,7 @@ def validate_model(model_bytes, examples, max_examples=None):
69
  return True
70
 
71
 
72
- # ═══ STAGE 1 ═══
73
 
74
  def stage1_optimize(model_bytes, examples):
75
  # Identity
@@ -138,7 +140,7 @@ def stage1_optimize(model_bytes, examples):
138
  return None
139
 
140
 
141
- # ═══ STAGE 2 ═══
142
 
143
  def stage2_optimize(model_bytes, examples):
144
  try:
@@ -160,7 +162,7 @@ def stage2_optimize(model_bytes, examples):
160
  ks = W.shape[2]
161
  pad_k = ks // 2
162
 
163
- # Direct conv→output
164
  inits = [numpy_helper.from_array(W, 'W')]
165
  nodes = [helper.make_node('Conv', ['input', 'W'], ['output'], kernel_shape=[ks, ks], pads=[pad_k]*4)]
166
  m = make_model(nodes, inits)
@@ -185,7 +187,7 @@ def stage2_optimize(model_bytes, examples):
185
  return None
186
 
187
 
188
- # ═══ STAGE 3 ═══
189
 
190
  def stage3_optimize(model_bytes, examples):
191
  try:
@@ -193,8 +195,11 @@ def stage3_optimize(model_bytes, examples):
193
  except:
194
  return None
195
 
196
- f32_bytes = sum(numpy_helper.to_array(i).nbytes for i in model.graph.initializer
197
- if numpy_helper.to_array(i).dtype == np.float32 and numpy_helper.to_array(i).size > 10)
 
 
 
198
  if f32_bytes < 200:
199
  return None
200
 
@@ -208,10 +213,48 @@ def stage3_optimize(model_bytes, examples):
208
  if validate_model(b, examples[:15]):
209
  if validate_model(b, examples):
210
  return b, f"S3:fp16_weights(-{f32_bytes//2//1024}KB)", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  return None
212
 
213
 
214
- # ═══ STAGE 4 ═══
215
 
216
  def stage4_optimize(model_bytes, examples):
217
  try:
@@ -233,6 +276,8 @@ def stage4_optimize(model_bytes, examples):
233
 
234
  if model.doc_string: model.doc_string = ""; changed = True
235
  if model.graph.doc_string: model.graph.doc_string = ""; changed = True
 
 
236
 
237
  if not changed: return None
238
  b = model.SerializeToString()
@@ -247,29 +292,46 @@ def stage4_optimize(model_bytes, examples):
247
 
248
  def main():
249
  import argparse
250
- parser = argparse.ArgumentParser()
251
- parser.add_argument('--input_zip', required=True)
252
- parser.add_argument('--data_dir', required=True)
253
- parser.add_argument('--output_zip', required=True)
254
- parser.add_argument('--stages', default='1,2,3,4')
 
255
  args = parser.parse_args()
 
 
 
 
256
  stages = [int(s) for s in args.stages.split(',')]
257
 
 
258
  models = {}
259
- with zipfile.ZipFile(args.input_zip, 'r') as zf:
260
  for tid in range(1, 401):
261
- fname = f'task{tid:03d}.onnx'
262
- if fname in zf.namelist(): models[tid] = zf.read(fname)
263
- print(f"Loaded {len(models)} models. Stages: {stages}")
 
 
 
 
 
 
 
 
 
 
264
 
265
  results = {}
266
- counts = {1:0, 2:0, 3:0, 4:0}
267
  t0 = time.time()
268
 
269
  for tid in sorted(models.keys()):
270
  task_path = os.path.join(args.data_dir, f'task{tid:03d}.json')
271
  if not os.path.exists(task_path): continue
272
- with open(task_path) as f: task_data = json.load(f)
 
273
  examples = task_data.get('train', []) + task_data.get('test', []) + task_data.get('arc-gen', [])[:30]
274
  if not examples: continue
275
 
@@ -287,20 +349,27 @@ def main():
287
  target = best[0] if best else models[tid]
288
  r = stage4_optimize(target, examples)
289
  if r:
290
- if best: best = (r[0], best[1]+"+"+r[1], best[2])
291
  else: best = r
292
  counts[4] += 1
293
 
294
  if best:
295
  results[tid] = best[0]
296
  score_s = f"score={best[2]:.1f}" if best[2] else ""
297
- print(f" Task {tid:3d}: {best[1]:40s} ({len(models[tid]):>8,} {len(best[0]):>8,}) {score_s}")
298
 
299
- print(f"\nDone in {time.time()-t0:.1f}s. S1:{counts[1]} S2:{counts[2]} S3:{counts[3]} S4:{counts[4]} Total:{len(results)}")
 
300
 
 
301
  with zipfile.ZipFile(args.output_zip, 'w', zipfile.ZIP_DEFLATED) as zf:
302
  for tid in range(1, 401):
303
- zf.writestr(f'task{tid:03d}.onnx', results.get(tid, models[tid]))
 
 
 
 
 
304
  print(f"Written to {args.output_zip}")
305
 
306
 
 
2
  """
3
  Unified ONNX Optimizer for NeuroGolf — All 4 Stages
4
 
5
+ New formula: score = max(1.0, 25.0 - ln(memory + params))
6
+
7
+ Stage 1: Rebuild trivial tasks (identity, transpose, color_map, flips) → 18-25 pts
8
+ Stage 2: Direct Conv→output (eliminate ArgMax+OneHot) 16-17 pts for ks=3
9
+ Stage 3: fp16 all weights (halve weight memory)
10
+ Stage 4: Node reduction (strip unused inits, metadata)
11
+
12
+ Kaggle usage (V90 models already unzipped as dataset):
13
+ python optimize_submission.py \
14
+ --input_dir /kaggle/input/datasets/rogermt/neurogolf-2026-solvers-v90 \
15
+ --data_dir /kaggle/input/competitions/neurogolf-2026 \
16
+ --output_zip /kaggle/working/submission.zip
17
  """
18
 
19
  import json
 
71
  return True
72
 
73
 
74
+ # ═══ STAGE 1: Trivial rebuilds ═══
75
 
76
  def stage1_optimize(model_bytes, examples):
77
  # Identity
 
140
  return None
141
 
142
 
143
+ # ═══ STAGE 2: Direct Conv→output ═══
144
 
145
  def stage2_optimize(model_bytes, examples):
146
  try:
 
162
  ks = W.shape[2]
163
  pad_k = ks // 2
164
 
165
+ # Direct conv→output (zero intermediates)
166
  inits = [numpy_helper.from_array(W, 'W')]
167
  nodes = [helper.make_node('Conv', ['input', 'W'], ['output'], kernel_shape=[ks, ks], pads=[pad_k]*4)]
168
  m = make_model(nodes, inits)
 
187
  return None
188
 
189
 
190
+ # ═══ STAGE 3: fp16 weights ═══
191
 
192
  def stage3_optimize(model_bytes, examples):
193
  try:
 
195
  except:
196
  return None
197
 
198
+ f32_bytes = 0
199
+ for init in model.graph.initializer:
200
+ arr = numpy_helper.to_array(init)
201
+ if arr.dtype == np.float32 and arr.size > 10:
202
+ f32_bytes += arr.nbytes
203
  if f32_bytes < 200:
204
  return None
205
 
 
213
  if validate_model(b, examples[:15]):
214
  if validate_model(b, examples):
215
  return b, f"S3:fp16_weights(-{f32_bytes//2//1024}KB)", None
216
+
217
+ # Try with Cast wrapper
218
+ new_model2 = onnx.load_from_string(model_bytes)
219
+ for i, init in enumerate(new_model2.graph.initializer):
220
+ arr = numpy_helper.to_array(init)
221
+ if arr.dtype == np.float32 and arr.size > 10:
222
+ new_model2.graph.initializer[i].CopyFrom(numpy_helper.from_array(arr.astype(np.float16), name=init.name))
223
+
224
+ input_name = new_model2.graph.input[0].name
225
+ cast_out = '_inp_fp16'
226
+ cast_node = helper.make_node('Cast', [input_name], [cast_out], to=TensorProto.FLOAT16)
227
+ for node in new_model2.graph.node:
228
+ new_inputs = [cast_out if x == input_name else x for x in node.input]
229
+ del node.input[:]
230
+ node.input.extend(new_inputs)
231
+ new_model2.graph.node.insert(0, cast_node)
232
+
233
+ for node in new_model2.graph.node:
234
+ if node.op_type == 'Cast':
235
+ for attr in node.attribute:
236
+ if attr.name == 'to' and attr.i == TensorProto.FLOAT:
237
+ attr.i = TensorProto.FLOAT16
238
+
239
+ output_name = new_model2.graph.output[0].name
240
+ pre_out = '_pre_out_fp16'
241
+ for node in new_model2.graph.node:
242
+ new_outputs = [pre_out if o == output_name else o for o in node.output]
243
+ del node.output[:]
244
+ node.output.extend(new_outputs)
245
+ if pre_out in new_outputs:
246
+ break
247
+ new_model2.graph.node.append(helper.make_node('Cast', [pre_out], [output_name], to=TensorProto.FLOAT))
248
+
249
+ b2 = new_model2.SerializeToString()
250
+ if validate_model(b2, examples[:15]):
251
+ if validate_model(b2, examples):
252
+ return b2, f"S3:fp16_cast(-{f32_bytes//2//1024}KB)", None
253
+
254
  return None
255
 
256
 
257
+ # ═══ STAGE 4: Cleanup ═══
258
 
259
  def stage4_optimize(model_bytes, examples):
260
  try:
 
276
 
277
  if model.doc_string: model.doc_string = ""; changed = True
278
  if model.graph.doc_string: model.graph.doc_string = ""; changed = True
279
+ for node in model.graph.node:
280
+ if node.doc_string: node.doc_string = ""; changed = True
281
 
282
  if not changed: return None
283
  b = model.SerializeToString()
 
292
 
293
  def main():
294
  import argparse
295
+ parser = argparse.ArgumentParser(description='NeuroGolf Unified Optimizer')
296
+ parser.add_argument('--input_dir', default=None, help='Directory with taskNNN.onnx (Kaggle unzipped dataset)')
297
+ parser.add_argument('--input_zip', default=None, help='Input submission.zip (alternative to --input_dir)')
298
+ parser.add_argument('--data_dir', required=True, help='Directory with taskNNN.json (competition data)')
299
+ parser.add_argument('--output_zip', required=True, help='Output submission.zip')
300
+ parser.add_argument('--stages', default='1,2,3,4', help='Comma-separated stages to run')
301
  args = parser.parse_args()
302
+
303
+ if not args.input_dir and not args.input_zip:
304
+ parser.error('Must provide --input_dir or --input_zip')
305
+
306
  stages = [int(s) for s in args.stages.split(',')]
307
 
308
+ # Load models
309
  models = {}
310
+ if args.input_dir:
311
  for tid in range(1, 401):
312
+ fpath = os.path.join(args.input_dir, f'task{tid:03d}.onnx')
313
+ if os.path.exists(fpath):
314
+ with open(fpath, 'rb') as f:
315
+ models[tid] = f.read()
316
+ print(f"Loaded {len(models)} models from {args.input_dir}")
317
+ else:
318
+ with zipfile.ZipFile(args.input_zip, 'r') as zf:
319
+ for tid in range(1, 401):
320
+ fname = f'task{tid:03d}.onnx'
321
+ if fname in zf.namelist():
322
+ models[tid] = zf.read(fname)
323
+ print(f"Loaded {len(models)} models from {args.input_zip}")
324
+ print(f"Running stages: {stages}")
325
 
326
  results = {}
327
+ counts = {1: 0, 2: 0, 3: 0, 4: 0}
328
  t0 = time.time()
329
 
330
  for tid in sorted(models.keys()):
331
  task_path = os.path.join(args.data_dir, f'task{tid:03d}.json')
332
  if not os.path.exists(task_path): continue
333
+ with open(task_path) as f:
334
+ task_data = json.load(f)
335
  examples = task_data.get('train', []) + task_data.get('test', []) + task_data.get('arc-gen', [])[:30]
336
  if not examples: continue
337
 
 
349
  target = best[0] if best else models[tid]
350
  r = stage4_optimize(target, examples)
351
  if r:
352
+ if best: best = (r[0], best[1] + "+" + r[1], best[2])
353
  else: best = r
354
  counts[4] += 1
355
 
356
  if best:
357
  results[tid] = best[0]
358
  score_s = f"score={best[2]:.1f}" if best[2] else ""
359
+ print(f" Task {tid:3d}: {best[1]:40s} ({len(models[tid]):>8,} -> {len(best[0]):>8,}) {score_s}")
360
 
361
+ elapsed = time.time() - t0
362
+ print(f"\nDone in {elapsed:.1f}s. S1:{counts[1]} S2:{counts[2]} S3:{counts[3]} S4:{counts[4]} Total:{len(results)}")
363
 
364
+ # Write output zip
365
  with zipfile.ZipFile(args.output_zip, 'w', zipfile.ZIP_DEFLATED) as zf:
366
  for tid in range(1, 401):
367
+ fname = f'task{tid:03d}.onnx'
368
+ zf.writestr(fname, results.get(tid, models[tid]))
369
+
370
+ orig_total = sum(len(v) for v in models.values())
371
+ new_total = sum(len(results.get(tid, models[tid])) for tid in models)
372
+ print(f"Size: {orig_total:,} -> {new_total:,} bytes ({100*new_total/orig_total:.1f}%)")
373
  print(f"Written to {args.output_zip}")
374
 
375