| { |
| "created_at": "2026-05-03 10:09:39", |
| "device": "cuda", |
| "problem_count": 2, |
| "reference": "best", |
| "comparison": [ |
| { |
| "label": "best", |
| "pass_at_1": 0.0, |
| "syntax_rate": 0.0, |
| "delta_vs_reference": 0.0 |
| }, |
| { |
| "label": "hf_pretrain_base", |
| "pass_at_1": 0.0, |
| "syntax_rate": 100.0, |
| "delta_vs_reference": 0.0 |
| } |
| ], |
| "results": [ |
| { |
| "path": "/mnt/scratch/checkpoints/frankenstein_v2_best.pt", |
| "label": "best", |
| "step": null, |
| "best_val_loss": null, |
| "val_loss": null, |
| "load_seconds": 131.73, |
| "missing_keys": 0, |
| "unexpected_keys": 0, |
| "config": { |
| "d_model": 4096, |
| "n_layers": 24, |
| "n_experts": 4, |
| "seq_len": 4096 |
| }, |
| "total": 2, |
| "passed": 0, |
| "failed": 2, |
| "timeouts": 0, |
| "syntax_ok": 0, |
| "pass_at_1": 0.0, |
| "syntax_rate": 0.0, |
| "seconds": 12.92, |
| "categories": { |
| "basics": { |
| "total": 1, |
| "passed": 0 |
| }, |
| "algorithm": { |
| "total": 1, |
| "passed": 0 |
| } |
| }, |
| "details": [ |
| { |
| "id": "fizzbuzz", |
| "category": "basics", |
| "passed": false, |
| "syntax_ok": false, |
| "timeout": false, |
| "gen_seconds": 8.35, |
| "response_preview": "Input: 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1", |
| "code_preview": "Input: 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1", |
| "stderr": " File \"<string>\", line 1\n Input: 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1\n ^^^\nSyntaxError: invalid syntax\n" |
| }, |
| { |
| "id": "two_sum", |
| "category": "algorithm", |
| "passed": false, |
| "syntax_ok": false, |
| "timeout": false, |
| "gen_seconds": 4.55, |
| "response_preview": "- The code snippet is a string. The function should be a string of the string. The function should return the string.\n\n- The function should handle the string as input and returns the string.\n\n- The function should handle the string as input and returns the string.\n\n- The function should handle the string as input and returns the string.\n\n- The function should handle the string as input and returns the string.\n\n- The function should handle the string as input and returns the string.\n\n- The funct", |
| "code_preview": "- The code snippet is a string. The function should be a string of the string. The function should return the string.\n\n- The function should handle the string as input and returns the string.\n\n- The function should handle the string as input and returns the string.\n\n- The function should handle the string as input and returns the string.\n\n- The function should handle the string as input and returns the string.\n\n- The function should handle the string as input and returns the string.\n\n- The funct", |
| "stderr": " File \"<string>\", line 1\n - The code snippet is a string. The function should be a string of the string. The function should return the string.\n ^^^^\nSyntaxError: invalid syntax\n" |
| } |
| ] |
| }, |
| { |
| "path": "/mnt/scratch/checkpoints/sentinelbrain_pretrain_step2471_hf.pt", |
| "label": "hf_pretrain_base", |
| "step": 2471, |
| "best_val_loss": null, |
| "val_loss": 1.9925728058815002, |
| "load_seconds": 127.86, |
| "missing_keys": 0, |
| "unexpected_keys": 0, |
| "config": { |
| "d_model": 4096, |
| "n_layers": 24, |
| "n_experts": 4, |
| "seq_len": 4096 |
| }, |
| "total": 2, |
| "passed": 0, |
| "failed": 2, |
| "timeouts": 0, |
| "syntax_ok": 2, |
| "pass_at_1": 0.0, |
| "syntax_rate": 100.0, |
| "seconds": 9.23, |
| "categories": { |
| "basics": { |
| "total": 1, |
| "passed": 0 |
| }, |
| "algorithm": { |
| "total": 1, |
| "passed": 0 |
| } |
| }, |
| "details": [ |
| { |
| "id": "fizzbuzz", |
| "category": "basics", |
| "passed": false, |
| "syntax_ok": true, |
| "timeout": false, |
| "gen_seconds": 4.6, |
| "response_preview": "def prime_advanced_even(n):\n return n == 1\n\n# Test cases\nn = 10\nresult = prime_advanced_even(n)\nprint(result) # Output: ['Buzz', '3', '5', '7', '11', '13', '17', '19', '23', '25', '27', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31", |
| "code_preview": "def prime_advanced_even(n):\n return n == 1\n\n# Test cases\nn = 10\nresult = prime_advanced_even(n)\nprint(result) # Output: ['Buzz', '3', '5', '7', '11', '13', '17', '19', '23', '25', '27', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31', '31", |
| "stderr": "Traceback (most recent call last):\n File \"<string>\", line 10, in <module>\nNameError: name 'fizzbuzz' is not defined\n" |
| }, |
| { |
| "id": "two_sum", |
| "category": "algorithm", |
| "passed": false, |
| "syntax_ok": true, |
| "timeout": false, |
| "gen_seconds": 4.61, |
| "response_preview": "```python\ndef add_two_numbers(nums, target):\n return [num for num in nums if num != target]\n```\n\nThe function `add_two_numbers(nums, target)` takes a list of integers `nums` and a target sum `target`, and returns two lists: one for the sum of the elements in `nums` and the target sum `target`, and the indices of the two numbers add up to `target`. The function is then called with these lists as arguments, and the result is printed.\n\nFor example, if the input list is `[1, 2, 3, 4, 5]`", |
| "code_preview": "def add_two_numbers(nums, target):\n return [num for num in nums if num != target]", |
| "stderr": "Traceback (most recent call last):\n File \"<string>\", line 5, in <module>\nNameError: name 'two_sum' is not defined\n" |
| } |
| ] |
| } |
| ] |
| } |