File size: 6,255 Bytes
10643b7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | {
"created_at": "2026-05-03 10:09:39",
"device": "cuda",
"problem_count": 2,
"reference": "best",
"comparison": [
{
"label": "best",
"pass_at_1": 0.0,
"syntax_rate": 0.0,
"delta_vs_reference": 0.0
},
{
"label": "hf_pretrain_base",
"pass_at_1": 0.0,
"syntax_rate": 100.0,
"delta_vs_reference": 0.0
}
],
"results": [
{
"path": "/mnt/scratch/checkpoints/frankenstein_v2_best.pt",
"label": "best",
"step": null,
"best_val_loss": null,
"val_loss": null,
"load_seconds": 131.73,
"missing_keys": 0,
"unexpected_keys": 0,
"config": {
"d_model": 4096,
"n_layers": 24,
"n_experts": 4,
"seq_len": 4096
},
"total": 2,
"passed": 0,
"failed": 2,
"timeouts": 0,
"syntax_ok": 0,
"pass_at_1": 0.0,
"syntax_rate": 0.0,
"seconds": 12.92,
"categories": {
"basics": {
"total": 1,
"passed": 0
},
"algorithm": {
"total": 1,
"passed": 0
}
},
"details": [
{
"id": "fizzbuzz",
"category": "basics",
"passed": false,
"syntax_ok": false,
"timeout": false,
"gen_seconds": 8.35,
"response_preview": "Input: 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1",
"code_preview": "Input: 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1",
"stderr": " File \"<string>\", line 1\n Input: 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1\n ^^^\nSyntaxError: invalid syntax\n"
},
{
"id": "two_sum",
"category": "algorithm",
"passed": false,
"syntax_ok": false,
"timeout": false,
"gen_seconds": 4.55,
"response_preview": "- The code snippet is a string. The function should be a string of the string. The function should return the string.\n\n- The function should handle the string as input and returns the string.\n\n- The function should handle the string as input and returns the string.\n\n- The function should handle the string as input and returns the string.\n\n- The function should handle the string as input and returns the string.\n\n- The function should handle the string as input and returns the string.\n\n- The funct",
"code_preview": "- The code snippet is a string. The function should be a string of the string. The function should return the string.\n\n- The function should handle the string as input and returns the string.\n\n- The function should handle the string as input and returns the string.\n\n- The function should handle the string as input and returns the string.\n\n- The function should handle the string as input and returns the string.\n\n- The function should handle the string as input and returns the string.\n\n- The funct",
"stderr": " File \"<string>\", line 1\n - The code snippet is a string. The function should be a string of the string. The function should return the string.\n ^^^^\nSyntaxError: invalid syntax\n"
}
]
},
{
"path": "/mnt/scratch/checkpoints/sentinelbrain_pretrain_step2471_hf.pt",
"label": "hf_pretrain_base",
"step": 2471,
"best_val_loss": null,
"val_loss": 1.9925728058815002,
"load_seconds": 127.86,
"missing_keys": 0,
"unexpected_keys": 0,
"config": {
"d_model": 4096,
"n_layers": 24,
"n_experts": 4,
"seq_len": 4096
},
"total": 2,
"passed": 0,
"failed": 2,
"timeouts": 0,
"syntax_ok": 2,
"pass_at_1": 0.0,
"syntax_rate": 100.0,
"seconds": 9.23,
"categories": {
"basics": {
"total": 1,
"passed": 0
},
"algorithm": {
"total": 1,
"passed": 0
}
},
"details": [
{
"id": "fizzbuzz",
"category": "basics",
"passed": false,
"syntax_ok": true,
"timeout": false,
"gen_seconds": 4.6,
"response_preview": "def prime_advanced_even(n):\n return n == 1\n\n# Test cases\nn = 10\nresult = prime_advanced_even(n)\nprint(result) # Output: ['Buzz', '3', '5', '7', '11', '13', '17', '19', '23', '25', '27', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31",
"code_preview": "def prime_advanced_even(n):\n return n == 1\n\n# Test cases\nn = 10\nresult = prime_advanced_even(n)\nprint(result) # Output: ['Buzz', '3', '5', '7', '11', '13', '17', '19', '23', '25', '27', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31",
"stderr": "Traceback (most recent call last):\n File \"<string>\", line 10, in <module>\nNameError: name 'fizzbuzz' is not defined\n"
},
{
"id": "two_sum",
"category": "algorithm",
"passed": false,
"syntax_ok": true,
"timeout": false,
"gen_seconds": 4.61,
"response_preview": "```python\ndef add_two_numbers(nums, target):\n return [num for num in nums if num != target]\n```\n\nThe function `add_two_numbers(nums, target)` takes a list of integers `nums` and a target sum `target`, and returns two lists: one for the sum of the elements in `nums` and the target sum `target`, and the indices of the two numbers add up to `target`. The function is then called with these lists as arguments, and the result is printed.\n\nFor example, if the input list is `[1, 2, 3, 4, 5]`",
"code_preview": "def add_two_numbers(nums, target):\n return [num for num in nums if num != target]",
"stderr": "Traceback (most recent call last):\n File \"<string>\", line 5, in <module>\nNameError: name 'two_sum' is not defined\n"
}
]
}
]
} |