File size: 3,867 Bytes
a38a118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
[harvest] split=verified loading task metadata ...
[harvest] 500 task entries in 2.1s
[harvest] harvesting 5 submissions: ['20240620_sweagent_claude3.5sonnet', '20241022_tools_claude-3-5-sonnet-updated', '20241028_agentless-1.5_gpt4o', '20241029_OpenHands-CodeAct-2.1-sonnet-20241022', '20250405_amazon-q-developer-agent-20250405-dev']
[harvest-s3] listing verified/20240620_sweagent_claude3.5sonnet/logs/ ...
[harvest-s3]   489 instance dirs
[harvest-s3]   50/489 (16.5/s, kept=50)
[harvest-s3]   100/489 (18.7/s, kept=100)
[harvest-s3]   150/489 (19.6/s, kept=149)
[harvest-s3]   200/489 (18.4/s, kept=198)
[harvest-s3]   250/489 (23.5/s, kept=248)
[harvest-s3]   300/489 (22.6/s, kept=297)
[harvest-s3]   350/489 (21.7/s, kept=347)
[harvest-s3]   400/489 (22.6/s, kept=396)
[harvest-s3]   450/489 (22.4/s, kept=446)
[harvest-s3] 20240620_sweagent_claude3.5sonnet: kept 485 (resolved=168, rate=34.6%)
[harvest-s3] listing verified/20241022_tools_claude-3-5-sonnet-updated/logs/ ...
[harvest-s3]   486 instance dirs
[harvest-s3]   50/486 (15.7/s, kept=50)
[harvest-s3]   100/486 (17.6/s, kept=100)
[harvest-s3]   150/486 (18.0/s, kept=150)
[harvest-s3]   200/486 (17.7/s, kept=200)
[harvest-s3]   250/486 (17.9/s, kept=250)
[harvest-s3]   300/486 (20.8/s, kept=299)
[harvest-s3]   350/486 (20.8/s, kept=349)
[harvest-s3]   400/486 (19.2/s, kept=397)
[harvest-s3]   450/486 (20.1/s, kept=447)
[harvest-s3] 20241022_tools_claude-3-5-sonnet-updated: kept 483 (resolved=245, rate=50.7%)
[harvest-s3] listing verified/20241028_agentless-1.5_gpt4o/logs/ ...
[harvest-s3]   496 instance dirs
[harvest-s3]   50/496 (16.2/s, kept=50)
[harvest-s3]   100/496 (17.1/s, kept=100)
[harvest-s3]   150/496 (16.7/s, kept=150)
[harvest-s3]   200/496 (18.6/s, kept=200)
[harvest-s3]   250/496 (18.9/s, kept=250)
[harvest-s3]   300/496 (18.5/s, kept=300)
[harvest-s3]   350/496 (18.1/s, kept=350)
[harvest-s3]   400/496 (21.2/s, kept=400)
[harvest-s3]   450/496 (20.9/s, kept=449)
[harvest-s3] 20241028_agentless-1.5_gpt4o: kept 495 (resolved=194, rate=39.2%)
[harvest-s3] listing verified/20241029_OpenHands-CodeAct-2.1-sonnet-20241022/logs/ ...
[harvest-s3]   494 instance dirs
[harvest-s3]   50/494 (15.0/s, kept=50)
[harvest-s3]   100/494 (15.3/s, kept=100)
[harvest-s3]   150/494 (18.0/s, kept=149)
[harvest-s3]   200/494 (18.6/s, kept=199)
[harvest-s3]   250/494 (18.5/s, kept=249)
[harvest-s3]   300/494 (18.0/s, kept=299)
[harvest-s3]   350/494 (17.6/s, kept=349)
[harvest-s3]   400/494 (19.4/s, kept=399)
[harvest-s3]   450/494 (19.8/s, kept=449)
[harvest-s3] 20241029_OpenHands-CodeAct-2.1-sonnet-20241022: kept 493 (resolved=265, rate=53.8%)
[harvest-s3] listing verified/20250405_amazon-q-developer-agent-20250405-dev/logs/ ...
[harvest-s3]   500 instance dirs
[harvest-s3]   50/500 (11.8/s, kept=50)
[harvest-s3]   100/500 (17.8/s, kept=100)
[harvest-s3]   150/500 (18.4/s, kept=150)
[harvest-s3]   200/500 (18.8/s, kept=200)
[harvest-s3]   250/500 (19.2/s, kept=250)
[harvest-s3]   300/500 (18.7/s, kept=300)
[harvest-s3]   350/500 (18.4/s, kept=350)
[harvest-s3]   400/500 (18.8/s, kept=400)
[harvest-s3]   450/500 (19.0/s, kept=450)
[harvest-s3]   500/500 (20.7/s, kept=500)
[harvest-s3] 20250405_amazon-q-developer-agent-20250405-dev: kept 500 (resolved=330, rate=66.0%)

[harvest] wrote 2456 trajs → outputs/traj_real/specs.jsonl
[harvest] resolved overall: 1202/2456 (48.9%)
  20240620_sweagent_claude3.5sonnet                             n= 485  resolved= 168  rate=34.6%
  20241022_tools_claude-3-5-sonnet-updated                      n= 483  resolved= 245  rate=50.7%
  20241028_agentless-1.5_gpt4o                                  n= 495  resolved= 194  rate=39.2%
  20241029_OpenHands-CodeAct-2.1-sonnet-20241022                n= 493  resolved= 265  rate=53.8%
  20250405_amazon-q-developer-agent-20250405-dev                n= 500  resolved= 330  rate=66.0%