iamshlomo commited on
Commit
b2884f8
·
verified ·
1 Parent(s): b20d909

Upload generalization/ablations_20260323_0654/ablations_results.json with huggingface_hub

Browse files
generalization/ablations_20260323_0654/ablations_results.json ADDED
@@ -0,0 +1,958 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_id": "20260323_0654",
3
+ "max_iter": 500,
4
+ "eval_eps": 10,
5
+ "id_envs": [
6
+ "MiniHack-Room-Random-5x5-v0",
7
+ "MiniHack-Room-Random-15x15-v0",
8
+ "MiniHack-Corridor-R2-v0",
9
+ "MiniHack-MazeWalk-9x9-v0"
10
+ ],
11
+ "ood_envs": [
12
+ "MiniHack-Room-Dark-15x15-v0",
13
+ "MiniHack-Corridor-R5-v0",
14
+ "MiniHack-MazeWalk-45x19-v0"
15
+ ],
16
+ "baseline_id": 0.525,
17
+ "baseline_ood": 0.03333333333333333,
18
+ "baseline_rl_id": 0.05,
19
+ "baseline_rl_ood": 0.0,
20
+ "kl_penalty": {
21
+ "id": 0.07500000000000001,
22
+ "ood": 0.0
23
+ },
24
+ "frozen_backbone": {
25
+ "id": 0.0,
26
+ "ood": 0.03333333333333333
27
+ },
28
+ "bc_on_wins": {
29
+ "id": 0.1,
30
+ "ood": 0.0
31
+ },
32
+ "low_t_only": {
33
+ "id": 0.0,
34
+ "ood": 0.0
35
+ },
36
+ "histories": {
37
+ "kl_penalty": {
38
+ "iter": [
39
+ 10,
40
+ 20,
41
+ 30,
42
+ 40,
43
+ 50,
44
+ 60,
45
+ 70,
46
+ 80,
47
+ 90,
48
+ 100,
49
+ 110,
50
+ 120,
51
+ 130,
52
+ 140,
53
+ 150,
54
+ 160,
55
+ 170,
56
+ 180,
57
+ 190,
58
+ 200,
59
+ 210,
60
+ 220,
61
+ 230,
62
+ 240,
63
+ 250,
64
+ 260,
65
+ 270,
66
+ 280,
67
+ 290,
68
+ 300,
69
+ 310,
70
+ 320,
71
+ 330,
72
+ 340,
73
+ 350,
74
+ 360,
75
+ 370,
76
+ 380,
77
+ 390,
78
+ 400,
79
+ 410,
80
+ 420,
81
+ 430,
82
+ 440,
83
+ 450,
84
+ 460,
85
+ 470,
86
+ 480,
87
+ 490,
88
+ 500
89
+ ],
90
+ "loss": [
91
+ 0.948145616054535,
92
+ 0.35268235206604004,
93
+ 0.6904642522335053,
94
+ 0.6291592061519623,
95
+ 0.5819108426570893,
96
+ 0.4715630114078522,
97
+ 0.4235985964536667,
98
+ 0.3703161895275116,
99
+ 0.29086490720510483,
100
+ 0.38290891647338865,
101
+ 0.35143099576234815,
102
+ 0.40041575729846957,
103
+ 0.399696147441864,
104
+ 0.29417929500341417,
105
+ 0.39398047775030137,
106
+ 0.44491764307022097,
107
+ 0.2727233037352562,
108
+ 0.3691611304879189,
109
+ 0.42049756497144697,
110
+ 0.48060639649629594,
111
+ 0.3573265939950943,
112
+ 0.30457123219966886,
113
+ 0.4908187583088875,
114
+ 0.3069219410419464,
115
+ 0.3941160559654236,
116
+ 0.3491177424788475,
117
+ 0.4103958770632744,
118
+ 0.3400088131427765,
119
+ 0.3288590595126152,
120
+ 0.33722358047962187,
121
+ 0.3162809520959854,
122
+ 0.27592245638370516,
123
+ 0.33332045674324035,
124
+ 0.30411274135112765,
125
+ 0.28425846099853513,
126
+ 0.4032061755657196,
127
+ 0.3300602614879608,
128
+ 0.42814253866672514,
129
+ 0.36021788418293,
130
+ 0.3740533620119095,
131
+ 0.2958697035908699,
132
+ 0.27764963656663894,
133
+ 0.4088175743818283,
134
+ 0.49800611585378646,
135
+ 0.24727296680212021,
136
+ 0.34623909294605254,
137
+ 0.23528977185487748,
138
+ 0.3207644522190094,
139
+ 0.2556593582034111,
140
+ 0.3742773160338402
141
+ ],
142
+ "mean_return": [
143
+ 9.596999999999998,
144
+ 2.5040000000000076,
145
+ 8.33,
146
+ -0.2169999999999944,
147
+ -3.1949999999999883,
148
+ 2.723000000000017,
149
+ 0.9180000000000115,
150
+ 7.764999999999996,
151
+ 9.305000000000007,
152
+ -0.17900000000000132,
153
+ 4.165,
154
+ 7.197999999999999,
155
+ 0.16799999999999918,
156
+ 7.480000000000002,
157
+ 4.742000000000006,
158
+ 4.1260000000000066,
159
+ 3.938000000000011,
160
+ 4.418000000000003,
161
+ 1.7460000000000089,
162
+ 4.226000000000004,
163
+ 7.670000000000002,
164
+ 8.473000000000003,
165
+ -0.6809999999999949,
166
+ 4.7589999999999995,
167
+ -1.0759999999999952,
168
+ 7.914,
169
+ 12.382000000000001,
170
+ 14.102000000000004,
171
+ 4.896000000000008,
172
+ 1.3490000000000137,
173
+ 10.210000000000004,
174
+ 4.05300000000001,
175
+ 6.554000000000011,
176
+ 4.437000000000014,
177
+ 8.76000000000001,
178
+ 7.571,
179
+ 10.437000000000005,
180
+ 1.8210000000000026,
181
+ 3.130000000000016,
182
+ 6.244000000000007,
183
+ 4.823000000000002,
184
+ 8.076000000000004,
185
+ 7.525,
186
+ 2.2230000000000034,
187
+ 4.527000000000003,
188
+ 4.916000000000001,
189
+ 0.22200000000000247,
190
+ 2.2530000000000054,
191
+ 16.430000000000007,
192
+ 2.905000000000002
193
+ ],
194
+ "win_rate_episode": [
195
+ 0.5,
196
+ 0.2,
197
+ 0.4,
198
+ 0.1,
199
+ 0.1,
200
+ 0.3,
201
+ 0.2,
202
+ 0.4,
203
+ 0.5,
204
+ 0.1,
205
+ 0.3,
206
+ 0.4,
207
+ 0.1,
208
+ 0.4,
209
+ 0.4,
210
+ 0.3,
211
+ 0.3,
212
+ 0.3,
213
+ 0.2,
214
+ 0.3,
215
+ 0.4,
216
+ 0.4,
217
+ 0.1,
218
+ 0.3,
219
+ 0.1,
220
+ 0.4,
221
+ 0.6,
222
+ 0.7,
223
+ 0.3,
224
+ 0.2,
225
+ 0.5,
226
+ 0.3,
227
+ 0.4,
228
+ 0.3,
229
+ 0.5,
230
+ 0.4,
231
+ 0.5,
232
+ 0.2,
233
+ 0.3,
234
+ 0.4,
235
+ 0.3,
236
+ 0.4,
237
+ 0.4,
238
+ 0.2,
239
+ 0.3,
240
+ 0.3,
241
+ 0.1,
242
+ 0.2,
243
+ 0.7,
244
+ 0.2
245
+ ],
246
+ "id_winrate": [
247
+ 0.22499999999999998,
248
+ 0.22499999999999998,
249
+ 0.30000000000000004,
250
+ 0.2,
251
+ 0.15000000000000002
252
+ ],
253
+ "id_winrate_iter": [
254
+ 100,
255
+ 200,
256
+ 300,
257
+ 400,
258
+ 500
259
+ ],
260
+ "ood_winrate": [
261
+ 0.03333333333333333
262
+ ],
263
+ "ood_winrate_iter": [
264
+ 500
265
+ ]
266
+ },
267
+ "frozen_backbone": {
268
+ "iter": [
269
+ 10,
270
+ 20,
271
+ 30,
272
+ 40,
273
+ 50,
274
+ 60,
275
+ 70,
276
+ 80,
277
+ 90,
278
+ 100,
279
+ 110,
280
+ 120,
281
+ 130,
282
+ 140,
283
+ 150,
284
+ 160,
285
+ 170,
286
+ 180,
287
+ 190,
288
+ 200,
289
+ 210,
290
+ 220,
291
+ 230,
292
+ 240,
293
+ 250,
294
+ 260,
295
+ 270,
296
+ 280,
297
+ 290,
298
+ 300,
299
+ 310,
300
+ 320,
301
+ 330,
302
+ 340,
303
+ 350,
304
+ 360,
305
+ 370,
306
+ 380,
307
+ 390,
308
+ 400,
309
+ 410,
310
+ 420,
311
+ 430,
312
+ 440,
313
+ 450,
314
+ 460,
315
+ 470,
316
+ 480,
317
+ 490,
318
+ 500
319
+ ],
320
+ "loss": [
321
+ 0.5321863897144794,
322
+ 2.2267230302095413,
323
+ 2.683171606063843,
324
+ 2.939804530143738,
325
+ 2.8783190131187437,
326
+ 2.478312370181084,
327
+ 1.956764531135559,
328
+ 1.4500245451927185,
329
+ 1.3196770876646042,
330
+ 1.434557992219925,
331
+ 1.452769011259079,
332
+ 1.6346239805221559,
333
+ 1.5891823202371598,
334
+ 0.8025875329971314,
335
+ 0.564156585931778,
336
+ 0.7170131579041481,
337
+ 0.8092973873019218,
338
+ 0.5919842332601547,
339
+ 0.5011876434087753,
340
+ 0.4465642273426056,
341
+ 0.6770713970065116,
342
+ 0.4779348522424698,
343
+ 0.3623740553855896,
344
+ 0.34029380679130555,
345
+ 0.5371142461895942,
346
+ 0.2875633895397186,
347
+ 0.3469204634428024,
348
+ 0.39238742291927337,
349
+ 0.2880588859319687,
350
+ 0.4023260846734047,
351
+ 0.16760316044092177,
352
+ 0.3154412403702736,
353
+ 0.5128347232937813,
354
+ 0.285700261592865,
355
+ 0.17228346243500708,
356
+ 0.14143240377306937,
357
+ 0.15710901021957396,
358
+ 0.18778855353593826,
359
+ 0.10249241068959236,
360
+ 0.10931030958890915,
361
+ 0.12215590812265872,
362
+ 0.12855551205575466,
363
+ 0.11794335097074508,
364
+ 0.16240272894501687,
365
+ 0.07643880993127823,
366
+ 0.08280903548002243,
367
+ 0.09675384908914567,
368
+ 0.07363568656146527,
369
+ 0.10575838088989258,
370
+ 0.11448417082428933
371
+ ],
372
+ "mean_return": [
373
+ 12.591,
374
+ 8.502000000000006,
375
+ 14.891000000000002,
376
+ 4.440000000000004,
377
+ 4.870000000000003,
378
+ 4.467999999999999,
379
+ 1.5660000000000092,
380
+ 4.8679999999999986,
381
+ -3.3869999999999956,
382
+ -3.4440000000000004,
383
+ -3.1799999999999864,
384
+ 3.81400000000001,
385
+ 0.7800000000000159,
386
+ 5.344999999999999,
387
+ 2.1560000000000006,
388
+ -3.6459999999999964,
389
+ -0.698999999999992,
390
+ -0.5310000000000007,
391
+ -1.0419999999999914,
392
+ -3.8559999999999945,
393
+ -4.338999999999986,
394
+ -4.0579999999999945,
395
+ -3.3759999999999883,
396
+ -1.3829999999999938,
397
+ -3.364999999999995,
398
+ -3.2670000000000003,
399
+ -1.4459999999999933,
400
+ -3.8849999999999945,
401
+ -1.0559999999999963,
402
+ -2.854999999999989,
403
+ -3.765999999999996,
404
+ -2.9069999999999983,
405
+ -3.6889999999999903,
406
+ -3.0909999999999993,
407
+ -3.1159999999999948,
408
+ -3.621999999999995,
409
+ -3.3759999999999963,
410
+ -0.8090000000000002,
411
+ -0.7199999999999905,
412
+ -1.0239999999999956,
413
+ -3.787999999999994,
414
+ -0.6970000000000001,
415
+ 1.0400000000000211,
416
+ -4.162999999999987,
417
+ -1.9699999999999893,
418
+ -3.254,
419
+ -3.9689999999999928,
420
+ 1.660000000000006,
421
+ 2.361000000000003,
422
+ -3.6069999999999984
423
+ ],
424
+ "win_rate_episode": [
425
+ 0.6,
426
+ 0.4,
427
+ 0.7,
428
+ 0.3,
429
+ 0.3,
430
+ 0.3,
431
+ 0.2,
432
+ 0.3,
433
+ 0.0,
434
+ 0.0,
435
+ 0.0,
436
+ 0.3,
437
+ 0.2,
438
+ 0.3,
439
+ 0.2,
440
+ 0.0,
441
+ 0.1,
442
+ 0.1,
443
+ 0.1,
444
+ 0.0,
445
+ 0.0,
446
+ 0.0,
447
+ 0.0,
448
+ 0.1,
449
+ 0.0,
450
+ 0.0,
451
+ 0.1,
452
+ 0.0,
453
+ 0.1,
454
+ 0.0,
455
+ 0.0,
456
+ 0.0,
457
+ 0.0,
458
+ 0.0,
459
+ 0.0,
460
+ 0.0,
461
+ 0.0,
462
+ 0.1,
463
+ 0.1,
464
+ 0.1,
465
+ 0.0,
466
+ 0.1,
467
+ 0.2,
468
+ 0.0,
469
+ 0.1,
470
+ 0.0,
471
+ 0.0,
472
+ 0.2,
473
+ 0.2,
474
+ 0.0
475
+ ],
476
+ "id_winrate": [
477
+ 0.325,
478
+ 0.125,
479
+ 0.125,
480
+ 0.07500000000000001,
481
+ 0.0
482
+ ],
483
+ "id_winrate_iter": [
484
+ 100,
485
+ 200,
486
+ 300,
487
+ 400,
488
+ 500
489
+ ],
490
+ "ood_winrate": [
491
+ 0.0
492
+ ],
493
+ "ood_winrate_iter": [
494
+ 500
495
+ ]
496
+ },
497
+ "bc_on_wins": {
498
+ "iter": [
499
+ 10,
500
+ 20,
501
+ 30,
502
+ 40,
503
+ 50,
504
+ 60,
505
+ 70,
506
+ 80,
507
+ 90,
508
+ 100,
509
+ 110,
510
+ 120,
511
+ 130,
512
+ 140,
513
+ 150,
514
+ 160,
515
+ 170,
516
+ 180,
517
+ 190,
518
+ 200,
519
+ 210,
520
+ 220,
521
+ 230,
522
+ 240,
523
+ 250,
524
+ 260,
525
+ 270,
526
+ 280,
527
+ 290,
528
+ 300,
529
+ 310,
530
+ 320,
531
+ 330,
532
+ 340,
533
+ 350,
534
+ 360,
535
+ 370,
536
+ 380,
537
+ 390,
538
+ 400,
539
+ 410,
540
+ 420,
541
+ 430,
542
+ 440,
543
+ 450,
544
+ 460,
545
+ 470,
546
+ 480,
547
+ 490,
548
+ 500
549
+ ],
550
+ "loss": [
551
+ 0.9382685769349337,
552
+ 0.4896347165107727,
553
+ 0.18173747919499875,
554
+ 0.09714697301387787,
555
+ 0.06052327724173665,
556
+ 0.04286335262004286,
557
+ 0.017408199701458217,
558
+ 0.03272836646065116,
559
+ 0.03820325364358723,
560
+ 0.003487833932740614,
561
+ 0.020946517394622787,
562
+ 0.047648298740386966,
563
+ 0.10672710686922074,
564
+ 0.050060207629576324,
565
+ 0.05387465837411583,
566
+ 0.027989869005978107,
567
+ 0.05242592375725508,
568
+ 0.14335312466137112,
569
+ 0.10210369899868965,
570
+ 0.06762518733739853,
571
+ 0.06595192728564143,
572
+ 0.05460359901189804,
573
+ 0.05835297182202339,
574
+ 0.043908438994549215,
575
+ 0.08726840056478977,
576
+ 0.02849291106685996,
577
+ 0.044854346802458166,
578
+ 0.05894844010472298,
579
+ 0.08322379365563393,
580
+ 0.06468131653964519,
581
+ 0.05024728113785386,
582
+ 0.023433164157904685,
583
+ 0.05213415753096342,
584
+ 0.03999026356032118,
585
+ 0.0556512268492952,
586
+ 0.0693166732788086,
587
+ 0.031749955657869575,
588
+ 0.0284718063659966,
589
+ 0.08611965160816908,
590
+ 0.06660898234695196,
591
+ 0.0630769046023488,
592
+ 0.05804459396749735,
593
+ 0.07558480557054281,
594
+ 0.06936441725119949,
595
+ 0.06975492211058736,
596
+ 0.04207881577312946,
597
+ 0.06131993634626269,
598
+ 0.10803285427391529,
599
+ 0.09587282091379165,
600
+ 0.04778247252106667
601
+ ],
602
+ "mean_return": [
603
+ 9.611000000000004,
604
+ 3.6290000000000098,
605
+ -1.9859999999999964,
606
+ 4.231000000000002,
607
+ 1.1600000000000152,
608
+ -3.98199999999999,
609
+ -1.2729999999999912,
610
+ 1.3780000000000077,
611
+ 0.6410000000000101,
612
+ -1.1849999999999916,
613
+ 7.519999999999999,
614
+ 4.872000000000003,
615
+ 6.323000000000008,
616
+ 3.4020000000000117,
617
+ -1.864999999999983,
618
+ 0.1530000000000134,
619
+ 0.7560000000000147,
620
+ -3.1639999999999886,
621
+ 2.180000000000013,
622
+ 0.12100000000000506,
623
+ -4.646999999999973,
624
+ 1.044000000000004,
625
+ -3.869999999999986,
626
+ -1.062999999999994,
627
+ -1.7669999999999848,
628
+ 5.493999999999999,
629
+ 1.807000000000008,
630
+ 1.9370000000000023,
631
+ 9.286000000000007,
632
+ 2.401000000000007,
633
+ 1.1910000000000138,
634
+ -3.438999999999995,
635
+ -1.274999999999993,
636
+ 0.8840000000000074,
637
+ -0.5109999999999928,
638
+ -2.188999999999987,
639
+ -1.2129999999999868,
640
+ -3.989999999999994,
641
+ -3.64099999999999,
642
+ -3.979999999999992,
643
+ -1.2709999999999906,
644
+ 0.766000000000013,
645
+ 1.639000000000009,
646
+ 0.8090000000000079,
647
+ -0.6849999999999927,
648
+ 1.0290000000000112,
649
+ 1.7420000000000109,
650
+ 1.6880000000000113,
651
+ 10.195000000000004,
652
+ -0.7379999999999948
653
+ ],
654
+ "win_rate_episode": [
655
+ 0.5,
656
+ 0.3,
657
+ 0.1,
658
+ 0.3,
659
+ 0.2,
660
+ 0.0,
661
+ 0.1,
662
+ 0.2,
663
+ 0.2,
664
+ 0.1,
665
+ 0.4,
666
+ 0.3,
667
+ 0.3,
668
+ 0.3,
669
+ 0.1,
670
+ 0.2,
671
+ 0.2,
672
+ 0.0,
673
+ 0.2,
674
+ 0.1,
675
+ 0.0,
676
+ 0.2,
677
+ 0.0,
678
+ 0.1,
679
+ 0.1,
680
+ 0.3,
681
+ 0.2,
682
+ 0.2,
683
+ 0.5,
684
+ 0.2,
685
+ 0.2,
686
+ 0.0,
687
+ 0.1,
688
+ 0.2,
689
+ 0.1,
690
+ 0.1,
691
+ 0.1,
692
+ 0.0,
693
+ 0.0,
694
+ 0.0,
695
+ 0.1,
696
+ 0.2,
697
+ 0.2,
698
+ 0.2,
699
+ 0.1,
700
+ 0.2,
701
+ 0.2,
702
+ 0.2,
703
+ 0.5,
704
+ 0.1
705
+ ],
706
+ "id_winrate": [
707
+ 0.125,
708
+ 0.175,
709
+ 0.125,
710
+ 0.125,
711
+ 0.125
712
+ ],
713
+ "id_winrate_iter": [
714
+ 100,
715
+ 200,
716
+ 300,
717
+ 400,
718
+ 500
719
+ ],
720
+ "ood_winrate": [
721
+ 0.0
722
+ ],
723
+ "ood_winrate_iter": [
724
+ 500
725
+ ]
726
+ },
727
+ "low_t_only": {
728
+ "iter": [
729
+ 10,
730
+ 20,
731
+ 30,
732
+ 40,
733
+ 50,
734
+ 60,
735
+ 70,
736
+ 80,
737
+ 90,
738
+ 100,
739
+ 110,
740
+ 120,
741
+ 130,
742
+ 140,
743
+ 150,
744
+ 160,
745
+ 170,
746
+ 180,
747
+ 190,
748
+ 200,
749
+ 210,
750
+ 220,
751
+ 230,
752
+ 240,
753
+ 250,
754
+ 260,
755
+ 270,
756
+ 280,
757
+ 290,
758
+ 300,
759
+ 310,
760
+ 320,
761
+ 330,
762
+ 340,
763
+ 350,
764
+ 360,
765
+ 370,
766
+ 380,
767
+ 390,
768
+ 400,
769
+ 410,
770
+ 420,
771
+ 430,
772
+ 440,
773
+ 450,
774
+ 460,
775
+ 470,
776
+ 480,
777
+ 490,
778
+ 500
779
+ ],
780
+ "loss": [
781
+ 1.6410588204860688,
782
+ 0.8960332840681076,
783
+ 0.6847688511013985,
784
+ 0.6297759741544724,
785
+ 0.4868937596678734,
786
+ 0.2125535998493433,
787
+ 0.4552276059985161,
788
+ 0.3872785001993179,
789
+ 0.2769521526992321,
790
+ 0.31313602104783056,
791
+ 0.24361244440078736,
792
+ 0.2300670851022005,
793
+ 0.17256035320460797,
794
+ 0.23112220019102098,
795
+ 0.1808197870850563,
796
+ 0.4657373405992985,
797
+ 0.2577011398971081,
798
+ 0.23844866938889026,
799
+ 0.21282374374568463,
800
+ 0.2077945176512003,
801
+ 0.09179784022271634,
802
+ 0.1701847380027175,
803
+ 0.15767875965684652,
804
+ 0.11646081283688545,
805
+ 0.08910894729197025,
806
+ 0.10342100337147712,
807
+ 0.11086354544386268,
808
+ 0.1676120450720191,
809
+ 0.08715764544904232,
810
+ 0.16386388782411815,
811
+ 0.08603336606174708,
812
+ 0.17930752746760845,
813
+ 0.2195651026442647,
814
+ 0.2416287451982498,
815
+ 0.1315755879506469,
816
+ 0.08364240694791078,
817
+ 0.04800235852599144,
818
+ 0.09680641312152147,
819
+ 0.041467034909874204,
820
+ 0.06297689490020275,
821
+ 0.1103413556702435,
822
+ 0.07316734474152327,
823
+ 0.10544817838817835,
824
+ 0.034891866194084284,
825
+ 0.08574364762753248,
826
+ 0.08141211261972785,
827
+ 0.03275516489520669,
828
+ 0.09520450294949115,
829
+ 0.06577473622746766,
830
+ 0.047857786435633896
831
+ ],
832
+ "mean_return": [
833
+ 8.571000000000002,
834
+ 1.3540000000000036,
835
+ 0.11199999999999832,
836
+ -1.6099999999999917,
837
+ 2.122000000000006,
838
+ -0.5919999999999999,
839
+ -0.1079999999999826,
840
+ -4.9199999999999875,
841
+ 3.489000000000008,
842
+ -1.0619999999999936,
843
+ -0.9519999999999932,
844
+ 4.619999999999998,
845
+ 5.227000000000001,
846
+ -0.15799999999999575,
847
+ 1.165000000000013,
848
+ 2.1460000000000035,
849
+ 1.6470000000000122,
850
+ 6.836000000000006,
851
+ -2.309999999999995,
852
+ 1.717000000000004,
853
+ 2.596999999999999,
854
+ -0.6350000000000003,
855
+ 4.628000000000008,
856
+ 0.917000000000008,
857
+ -1.0089999999999981,
858
+ 4.536000000000007,
859
+ 5.888000000000012,
860
+ -3.3749999999999973,
861
+ -0.6699999999999893,
862
+ 1.0640000000000058,
863
+ -3.8279999999999887,
864
+ 0.5890000000000015,
865
+ -0.8989999999999906,
866
+ 0.2250000000000009,
867
+ 1.5210000000000048,
868
+ -1.4749999999999974,
869
+ -3.4469999999999983,
870
+ 2.7570000000000014,
871
+ 4.024000000000015,
872
+ 2.173999999999999,
873
+ -2.995999999999996,
874
+ -4.162999999999989,
875
+ -0.946999999999987,
876
+ -2.199999999999985,
877
+ -3.5559999999999974,
878
+ -1.7629999999999906,
879
+ 2.1019999999999985,
880
+ 0.9280000000000104,
881
+ -0.7599999999999906,
882
+ -5.13999999999998
883
+ ],
884
+ "win_rate_episode": [
885
+ 0.4,
886
+ 0.2,
887
+ 0.1,
888
+ 0.1,
889
+ 0.2,
890
+ 0.1,
891
+ 0.1,
892
+ 0.0,
893
+ 0.3,
894
+ 0.1,
895
+ 0.1,
896
+ 0.3,
897
+ 0.3,
898
+ 0.1,
899
+ 0.2,
900
+ 0.2,
901
+ 0.2,
902
+ 0.4,
903
+ 0.0,
904
+ 0.2,
905
+ 0.2,
906
+ 0.1,
907
+ 0.3,
908
+ 0.2,
909
+ 0.1,
910
+ 0.3,
911
+ 0.4,
912
+ 0.0,
913
+ 0.1,
914
+ 0.2,
915
+ 0.0,
916
+ 0.1,
917
+ 0.1,
918
+ 0.1,
919
+ 0.2,
920
+ 0.1,
921
+ 0.0,
922
+ 0.2,
923
+ 0.3,
924
+ 0.2,
925
+ 0.0,
926
+ 0.0,
927
+ 0.1,
928
+ 0.1,
929
+ 0.0,
930
+ 0.1,
931
+ 0.2,
932
+ 0.2,
933
+ 0.1,
934
+ 0.0
935
+ ],
936
+ "id_winrate": [
937
+ 0.275,
938
+ 0.025,
939
+ 0.1,
940
+ 0.0,
941
+ 0.025
942
+ ],
943
+ "id_winrate_iter": [
944
+ 100,
945
+ 200,
946
+ 300,
947
+ 400,
948
+ 500
949
+ ],
950
+ "ood_winrate": [
951
+ 0.0
952
+ ],
953
+ "ood_winrate_iter": [
954
+ 500
955
+ ]
956
+ }
957
+ }
958
+ }