Rohan03 commited on
Commit
0486f85
·
verified ·
1 Parent(s): 52cb301

launch: readiness report + test suite — tests/results/launch_readiness.json

Browse files
Files changed (1) hide show
  1. tests/results/launch_readiness.json +602 -0
tests/results/launch_readiness.json ADDED
@@ -0,0 +1,602 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "pass": 119,
3
+ "fail": 0,
4
+ "warn": 0,
5
+ "results": [
6
+ {
7
+ "category": "imports",
8
+ "test": "import purpose_agent",
9
+ "status": "PASS"
10
+ },
11
+ {
12
+ "category": "imports",
13
+ "test": "import types",
14
+ "status": "PASS"
15
+ },
16
+ {
17
+ "category": "imports",
18
+ "test": "import llm_backend",
19
+ "status": "PASS"
20
+ },
21
+ {
22
+ "category": "imports",
23
+ "test": "import actor",
24
+ "status": "PASS"
25
+ },
26
+ {
27
+ "category": "imports",
28
+ "test": "import purpose_function",
29
+ "status": "PASS"
30
+ },
31
+ {
32
+ "category": "imports",
33
+ "test": "import experience_replay",
34
+ "status": "PASS"
35
+ },
36
+ {
37
+ "category": "imports",
38
+ "test": "import optimizer",
39
+ "status": "PASS"
40
+ },
41
+ {
42
+ "category": "imports",
43
+ "test": "import orchestrator",
44
+ "status": "PASS"
45
+ },
46
+ {
47
+ "category": "imports",
48
+ "test": "import slm_backends",
49
+ "status": "PASS"
50
+ },
51
+ {
52
+ "category": "imports",
53
+ "test": "import streaming",
54
+ "status": "PASS"
55
+ },
56
+ {
57
+ "category": "imports",
58
+ "test": "import tools",
59
+ "status": "PASS"
60
+ },
61
+ {
62
+ "category": "imports",
63
+ "test": "import observability",
64
+ "status": "PASS"
65
+ },
66
+ {
67
+ "category": "imports",
68
+ "test": "import multi_agent",
69
+ "status": "PASS"
70
+ },
71
+ {
72
+ "category": "imports",
73
+ "test": "import hitl",
74
+ "status": "PASS"
75
+ },
76
+ {
77
+ "category": "imports",
78
+ "test": "import evaluation",
79
+ "status": "PASS"
80
+ },
81
+ {
82
+ "category": "imports",
83
+ "test": "import registry",
84
+ "status": "PASS"
85
+ },
86
+ {
87
+ "category": "imports",
88
+ "test": "import unified",
89
+ "status": "PASS"
90
+ },
91
+ {
92
+ "category": "imports",
93
+ "test": "import easy",
94
+ "status": "PASS"
95
+ },
96
+ {
97
+ "category": "imports",
98
+ "test": "import v2_types",
99
+ "status": "PASS"
100
+ },
101
+ {
102
+ "category": "imports",
103
+ "test": "import trace",
104
+ "status": "PASS"
105
+ },
106
+ {
107
+ "category": "imports",
108
+ "test": "import memory",
109
+ "status": "PASS"
110
+ },
111
+ {
112
+ "category": "imports",
113
+ "test": "import compiler",
114
+ "status": "PASS"
115
+ },
116
+ {
117
+ "category": "imports",
118
+ "test": "import immune",
119
+ "status": "PASS"
120
+ },
121
+ {
122
+ "category": "imports",
123
+ "test": "import memory_ci",
124
+ "status": "PASS"
125
+ },
126
+ {
127
+ "category": "imports",
128
+ "test": "import evalport",
129
+ "status": "PASS"
130
+ },
131
+ {
132
+ "category": "imports",
133
+ "test": "import benchmark_v2",
134
+ "status": "PASS"
135
+ },
136
+ {
137
+ "category": "imports",
138
+ "test": "import meta_rewarding",
139
+ "status": "PASS"
140
+ },
141
+ {
142
+ "category": "imports",
143
+ "test": "import self_taught",
144
+ "status": "PASS"
145
+ },
146
+ {
147
+ "category": "imports",
148
+ "test": "import prompt_optimizer",
149
+ "status": "PASS"
150
+ },
151
+ {
152
+ "category": "imports",
153
+ "test": "import llm_compiler",
154
+ "status": "PASS"
155
+ },
156
+ {
157
+ "category": "imports",
158
+ "test": "import retroformer",
159
+ "status": "PASS"
160
+ },
161
+ {
162
+ "category": "imports",
163
+ "test": "import robust_parser",
164
+ "status": "PASS"
165
+ },
166
+ {
167
+ "category": "imports",
168
+ "test": "import breakthroughs",
169
+ "status": "PASS"
170
+ },
171
+ {
172
+ "category": "instantiate",
173
+ "test": "State",
174
+ "status": "PASS"
175
+ },
176
+ {
177
+ "category": "instantiate",
178
+ "test": "Action",
179
+ "status": "PASS"
180
+ },
181
+ {
182
+ "category": "instantiate",
183
+ "test": "MockLLMBackend",
184
+ "status": "PASS"
185
+ },
186
+ {
187
+ "category": "instantiate",
188
+ "test": "ExperienceReplay",
189
+ "status": "PASS"
190
+ },
191
+ {
192
+ "category": "instantiate",
193
+ "test": "ToolRegistry",
194
+ "status": "PASS"
195
+ },
196
+ {
197
+ "category": "instantiate",
198
+ "test": "CalculatorTool",
199
+ "status": "PASS"
200
+ },
201
+ {
202
+ "category": "instantiate",
203
+ "test": "PythonExecTool",
204
+ "status": "PASS"
205
+ },
206
+ {
207
+ "category": "instantiate",
208
+ "test": "CostTracker",
209
+ "status": "PASS"
210
+ },
211
+ {
212
+ "category": "instantiate",
213
+ "test": "CallbackManager",
214
+ "status": "PASS"
215
+ },
216
+ {
217
+ "category": "instantiate",
218
+ "test": "Agent",
219
+ "status": "PASS"
220
+ },
221
+ {
222
+ "category": "instantiate",
223
+ "test": "KnowledgeStore",
224
+ "status": "PASS"
225
+ },
226
+ {
227
+ "category": "instantiate",
228
+ "test": "Graph",
229
+ "status": "PASS"
230
+ },
231
+ {
232
+ "category": "instantiate",
233
+ "test": "RunMode",
234
+ "status": "PASS"
235
+ },
236
+ {
237
+ "category": "instantiate",
238
+ "test": "Trace",
239
+ "status": "PASS"
240
+ },
241
+ {
242
+ "category": "instantiate",
243
+ "test": "MemoryStore",
244
+ "status": "PASS"
245
+ },
246
+ {
247
+ "category": "instantiate",
248
+ "test": "MemoryCard",
249
+ "status": "PASS"
250
+ },
251
+ {
252
+ "category": "instantiate",
253
+ "test": "MemoryCI",
254
+ "status": "PASS"
255
+ },
256
+ {
257
+ "category": "instantiate",
258
+ "test": "MixtureOfHeuristics",
259
+ "status": "PASS"
260
+ },
261
+ {
262
+ "category": "instantiate",
263
+ "test": "AdversarialHardener",
264
+ "status": "PASS"
265
+ },
266
+ {
267
+ "category": "core",
268
+ "test": "Full loop completes",
269
+ "status": "PASS"
270
+ },
271
+ {
272
+ "category": "core",
273
+ "test": "Trajectory has steps",
274
+ "status": "PASS"
275
+ },
276
+ {
277
+ "category": "core",
278
+ "test": "Final state exists",
279
+ "status": "PASS"
280
+ },
281
+ {
282
+ "category": "phi",
283
+ "test": "phi_before in [0,10]",
284
+ "status": "PASS"
285
+ },
286
+ {
287
+ "category": "phi",
288
+ "test": "phi_after in [0,10]",
289
+ "status": "PASS"
290
+ },
291
+ {
292
+ "category": "phi",
293
+ "test": "confidence in [0,1]",
294
+ "status": "PASS"
295
+ },
296
+ {
297
+ "category": "optimizer",
298
+ "test": "Produces heuristics",
299
+ "status": "PASS"
300
+ },
301
+ {
302
+ "category": "replay",
303
+ "test": "Store works",
304
+ "status": "PASS"
305
+ },
306
+ {
307
+ "category": "replay",
308
+ "test": "Retrieve works",
309
+ "status": "PASS"
310
+ },
311
+ {
312
+ "category": "replay",
313
+ "test": "Clear works",
314
+ "status": "PASS"
315
+ },
316
+ {
317
+ "category": "backend",
318
+ "test": "Strip <think> basic",
319
+ "status": "PASS"
320
+ },
321
+ {
322
+ "category": "backend",
323
+ "test": "Strip <think> multiline",
324
+ "status": "PASS"
325
+ },
326
+ {
327
+ "category": "backend",
328
+ "test": "Strip unclosed <think>",
329
+ "status": "PASS"
330
+ },
331
+ {
332
+ "category": "backend",
333
+ "test": "No tags passthrough",
334
+ "status": "PASS"
335
+ },
336
+ {
337
+ "category": "routing",
338
+ "test": "ollama: prefix",
339
+ "status": "PASS"
340
+ },
341
+ {
342
+ "category": "routing",
343
+ "test": "auto-detect ollama model",
344
+ "status": "PASS"
345
+ },
346
+ {
347
+ "category": "tools",
348
+ "test": "Calculator safe: 2+3*4=14",
349
+ "status": "PASS"
350
+ },
351
+ {
352
+ "category": "tools",
353
+ "test": "Calculator safe: sqrt(16)=4.0",
354
+ "status": "PASS"
355
+ },
356
+ {
357
+ "category": "tools",
358
+ "test": "Calculator blocks __import__",
359
+ "status": "PASS"
360
+ },
361
+ {
362
+ "category": "tools",
363
+ "test": "ReadFile blocks /etc/passwd",
364
+ "status": "PASS"
365
+ },
366
+ {
367
+ "category": "tools",
368
+ "test": "WriteFile blocks /tmp/evil",
369
+ "status": "PASS"
370
+ },
371
+ {
372
+ "category": "runmode",
373
+ "test": "TRAIN allows write",
374
+ "status": "PASS"
375
+ },
376
+ {
377
+ "category": "runmode",
378
+ "test": "EVAL blocks write",
379
+ "status": "PASS"
380
+ },
381
+ {
382
+ "category": "runmode",
383
+ "test": "EVAL is_eval",
384
+ "status": "PASS"
385
+ },
386
+ {
387
+ "category": "trace",
388
+ "test": "Events recorded",
389
+ "status": "PASS"
390
+ },
391
+ {
392
+ "category": "trace",
393
+ "test": "JSONL roundtrip",
394
+ "status": "PASS"
395
+ },
396
+ {
397
+ "category": "memory",
398
+ "test": "7 MemoryKinds",
399
+ "status": "PASS"
400
+ },
401
+ {
402
+ "category": "memory",
403
+ "test": "5 MemoryStatuses",
404
+ "status": "PASS"
405
+ },
406
+ {
407
+ "category": "memory",
408
+ "test": "Scoped retrieve",
409
+ "status": "PASS"
410
+ },
411
+ {
412
+ "category": "compiler",
413
+ "test": "Respects token budget",
414
+ "status": "PASS"
415
+ },
416
+ {
417
+ "category": "compiler",
418
+ "test": "Returns memory IDs",
419
+ "status": "PASS"
420
+ },
421
+ {
422
+ "category": "immune",
423
+ "test": "Safe passes",
424
+ "status": "PASS"
425
+ },
426
+ {
427
+ "category": "immune",
428
+ "test": "Injection blocked",
429
+ "status": "PASS"
430
+ },
431
+ {
432
+ "category": "immune",
433
+ "test": "Score hack blocked",
434
+ "status": "PASS"
435
+ },
436
+ {
437
+ "category": "immune",
438
+ "test": "API key blocked",
439
+ "status": "PASS"
440
+ },
441
+ {
442
+ "category": "immune",
443
+ "test": "Tool misuse blocked",
444
+ "status": "PASS"
445
+ },
446
+ {
447
+ "category": "ci",
448
+ "test": "Good \u2192 quarantined",
449
+ "status": "PASS"
450
+ },
451
+ {
452
+ "category": "ci",
453
+ "test": "Promote works",
454
+ "status": "PASS"
455
+ },
456
+ {
457
+ "category": "ci",
458
+ "test": "Injection \u2192 rejected",
459
+ "status": "PASS"
460
+ },
461
+ {
462
+ "category": "agent",
463
+ "test": "Agent.run() completes",
464
+ "status": "PASS"
465
+ },
466
+ {
467
+ "category": "graph",
468
+ "test": "Conditional routing",
469
+ "status": "PASS"
470
+ },
471
+ {
472
+ "category": "parallel",
473
+ "test": "3 tasks complete",
474
+ "status": "PASS"
475
+ },
476
+ {
477
+ "category": "conversation",
478
+ "test": "Messages produced",
479
+ "status": "PASS"
480
+ },
481
+ {
482
+ "category": "knowledge",
483
+ "test": "Chunks stored",
484
+ "status": "PASS"
485
+ },
486
+ {
487
+ "category": "knowledge",
488
+ "test": "Query returns results",
489
+ "status": "PASS"
490
+ },
491
+ {
492
+ "category": "knowledge",
493
+ "test": "as_tool() works",
494
+ "status": "PASS"
495
+ },
496
+ {
497
+ "category": "easy",
498
+ "test": "purpose() auto-detects coding team",
499
+ "status": "PASS"
500
+ },
501
+ {
502
+ "category": "easy",
503
+ "test": "purpose() auto-detects research team",
504
+ "status": "PASS"
505
+ },
506
+ {
507
+ "category": "easy",
508
+ "test": "Team.build() works",
509
+ "status": "PASS"
510
+ },
511
+ {
512
+ "category": "research",
513
+ "test": "MetaRewardingLoop importable",
514
+ "status": "PASS"
515
+ },
516
+ {
517
+ "category": "research",
518
+ "test": "SelfTaughtEvaluator importable",
519
+ "status": "PASS"
520
+ },
521
+ {
522
+ "category": "research",
523
+ "test": "PromptOptimizer importable",
524
+ "status": "PASS"
525
+ },
526
+ {
527
+ "category": "research",
528
+ "test": "LLMCompiler importable",
529
+ "status": "PASS"
530
+ },
531
+ {
532
+ "category": "research",
533
+ "test": "Retroformer importable",
534
+ "status": "PASS"
535
+ },
536
+ {
537
+ "category": "research",
538
+ "test": "PromptOptimizer.compile_prompt works",
539
+ "status": "PASS"
540
+ },
541
+ {
542
+ "category": "research",
543
+ "test": "LLMCompiler plans tasks",
544
+ "status": "PASS"
545
+ },
546
+ {
547
+ "category": "research",
548
+ "test": "LLMCompiler executes plan",
549
+ "status": "PASS"
550
+ },
551
+ {
552
+ "category": "B2-MoH",
553
+ "test": "Shared identified",
554
+ "status": "PASS"
555
+ },
556
+ {
557
+ "category": "B2-MoH",
558
+ "test": "Total K=5 selected",
559
+ "status": "PASS"
560
+ },
561
+ {
562
+ "category": "B6-adversarial",
563
+ "test": "Catch rate 95%",
564
+ "status": "PASS"
565
+ },
566
+ {
567
+ "category": "B6-adversarial",
568
+ "test": "FP rate 0%",
569
+ "status": "PASS"
570
+ },
571
+ {
572
+ "category": "parser",
573
+ "test": "TOML actor parse",
574
+ "status": "PASS"
575
+ },
576
+ {
577
+ "category": "parser",
578
+ "test": "JSON actor parse",
579
+ "status": "PASS"
580
+ },
581
+ {
582
+ "category": "parser",
583
+ "test": "TOML critic parse",
584
+ "status": "PASS"
585
+ },
586
+ {
587
+ "category": "parser",
588
+ "test": "Extract code from markdown",
589
+ "status": "PASS"
590
+ },
591
+ {
592
+ "category": "benchmark",
593
+ "test": "Improvement curve: [1.0, 10.0, 10.0]",
594
+ "status": "PASS"
595
+ },
596
+ {
597
+ "category": "benchmark",
598
+ "test": "Heuristics learned: 6",
599
+ "status": "PASS"
600
+ }
601
+ ]
602
+ }