| { | |
| "version": "1", | |
| "examples": [ | |
| { | |
| "primitive_type": "ChangeTokenizerBehavior", | |
| "breakage_params": { | |
| "old_kwarg": "truncation", | |
| "old_value": "True", | |
| "new_kwarg": "truncate", | |
| "new_value": "True" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -16,5 +16,5 @@\n examples[\"text\"],\n padding=\"max_length\",\n- truncate=True,\n+ truncation=True,\n max_length=64,\n )\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.8691781740179649, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "electra_classification" | |
| }, | |
| { | |
| "primitive_type": "ChangeTokenizerBehavior", | |
| "breakage_params": { | |
| "old_kwarg": "truncation", | |
| "old_value": "True", | |
| "new_kwarg": "truncate", | |
| "new_value": "True" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -16,5 +16,5 @@\n examples[\"text\"],\n padding=\"max_length\",\n- truncate=True,\n+ truncation=True,\n max_length=64,\n )\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.7612783886548146, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "electra_classification" | |
| }, | |
| { | |
| "primitive_type": "ChangeTokenizerBehavior", | |
| "breakage_params": { | |
| "old_kwarg": "truncation", | |
| "old_value": "True", | |
| "new_kwarg": "truncate", | |
| "new_value": "True" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.7469754695541743, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "albert_qa" | |
| }, | |
| { | |
| "primitive_type": "ChangeTokenizerBehavior", | |
| "breakage_params": { | |
| "old_kwarg": "truncation", | |
| "old_value": "True", | |
| "new_kwarg": "truncate", | |
| "new_value": "True" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -14,5 +14,5 @@\n \n def tokenize_and_align(example):\n- enc = tokenizer(example[\"tokens\"], is_split_into_words=True, truncate=True, max_length=64)\n+ enc = tokenizer(example[\"tokens\"], is_split_into_words=True, truncation=True, max_length=64)\n word_ids = enc.word_ids()\n labels = []\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.8811022610483041, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "bert_ner" | |
| }, | |
| { | |
| "primitive_type": "RestructureDatasetSchema", | |
| "breakage_params": { | |
| "old_column": "label", | |
| "new_column": "labels" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -26,5 +26,5 @@\n padding=\"max_length\",\n )\n- inputs[\"labels\"] = targets[\"input_ids\"]\n+ inputs[\"label\"] = targets[\"input_ids\"]\n return inputs\n \n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.649018766337638, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "t5_summarization" | |
| }, | |
| { | |
| "primitive_type": "RestructureDatasetSchema", | |
| "breakage_params": { | |
| "old_column": "text", | |
| "new_column": "input_text" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -26,5 +26,5 @@\n answer = examples[\"answers\"][i]\n start_char = answer[\"answer_start\"][0]\n- end_char = start_char + len(answer[\"input_text\"][0])\n+ end_char = start_char + len(answer[\"text\"][0])\n \n token_start = next(\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.8895669291338583, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "albert_qa" | |
| }, | |
| { | |
| "primitive_type": "ChangeTokenizerBehavior", | |
| "breakage_params": { | |
| "old_kwarg": "truncation", | |
| "old_value": "True", | |
| "new_kwarg": "truncate", | |
| "new_value": "True" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -14,5 +14,5 @@\n \n def tokenize_and_align(example):\n- enc = tokenizer(example[\"tokens\"], is_split_into_words=True, truncate=True, max_length=64)\n+ enc = tokenizer(example[\"tokens\"], is_split_into_words=True, truncation=True, max_length=64)\n word_ids = enc.word_ids()\n labels = []\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.8010139080581803, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "bert_ner" | |
| }, | |
| { | |
| "primitive_type": "ChangeArgumentSignature", | |
| "breakage_params": { | |
| "function_name": "TrainingArguments", | |
| "removed_arg": "num_train_epochs", | |
| "added_arg": "max_steps", | |
| "added_value": "1000" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -24,4 +24,5 @@\n training_args = TrainingArguments(\n output_dir=\"/tmp/forge_output/checkpoint\",\n+ num_train_epochs=1,\n per_device_train_batch_size=4,\n logging_steps=5,\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.8672674881981486, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "gpt2_textgen" | |
| }, | |
| { | |
| "primitive_type": "RestructureDatasetSchema", | |
| "breakage_params": { | |
| "old_column": "text", | |
| "new_column": "input_text" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -26,5 +26,5 @@\n answer = examples[\"answers\"][i]\n start_char = answer[\"answer_start\"][0]\n- end_char = start_char + len(answer[\"input_text\"][0])\n+ end_char = start_char + len(answer[\"text\"][0])\n \n token_start = next(\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.5887677670351681, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "albert_qa" | |
| }, | |
| { | |
| "primitive_type": "RemoveDeprecatedMethod", | |
| "breakage_params": { | |
| "class_name": "Trainer", | |
| "method_name": "save_model", | |
| "replacement": "save_to_hub" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -41,4 +41,4 @@\n trainer = Trainer(model=model, args=training_args, train_dataset=dataset)\n trainer.train()\n-trainer.save_model_DEPRECATED(\"/tmp/forge_output/checkpoint\")\n+trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.8791026290604065, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "roberta_sentiment" | |
| }, | |
| { | |
| "primitive_type": "RenameApiCall", | |
| "breakage_params": { | |
| "old_name": "trainer.train", | |
| "new_name": "trainer.start_training" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -40,5 +40,5 @@\n \n trainer = Trainer(model=model, args=training_args, train_dataset=dataset)\n-trainer.start_training()\n+trainer.train()\n trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.7878403072444018, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "electra_classification" | |
| }, | |
| { | |
| "primitive_type": "RestructureDatasetSchema", | |
| "breakage_params": { | |
| "old_column": "text", | |
| "new_column": "input_text" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -26,5 +26,5 @@\n answer = examples[\"answers\"][i]\n start_char = answer[\"answer_start\"][0]\n- end_char = start_char + len(answer[\"input_text\"][0])\n+ end_char = start_char + len(answer[\"text\"][0])\n \n token_start = next(\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.8678511447007867, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "albert_qa" | |
| }, | |
| { | |
| "primitive_type": "RestructureDatasetSchema", | |
| "breakage_params": { | |
| "old_column": "text", | |
| "new_column": "input_text" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -14,5 +14,5 @@\n def tokenize(examples):\n return tokenizer(\n- examples[\"input_text\"],\n+ examples[\"text\"],\n padding=\"max_length\",\n truncation=True,\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.6278346817583994, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "roberta_sentiment" | |
| }, | |
| { | |
| "primitive_type": "RestructureDatasetSchema", | |
| "breakage_params": { | |
| "old_column": "text", | |
| "new_column": "input_text" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -14,5 +14,5 @@\n def tokenize(examples):\n return tokenizer(\n- examples[\"input_text\"],\n+ examples[\"text\"],\n padding=\"max_length\",\n truncation=True,\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.6966312162081871, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "electra_classification" | |
| }, | |
| { | |
| "primitive_type": "ChangeArgumentSignature", | |
| "breakage_params": { | |
| "function_name": "TrainingArguments", | |
| "removed_arg": "num_train_epochs", | |
| "added_arg": "max_steps", | |
| "added_value": "1000" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -35,4 +35,5 @@\n training_args = TrainingArguments(\n output_dir=\"/tmp/forge_output/checkpoint\",\n+ num_train_epochs=1,\n per_device_train_batch_size=16,\n logging_steps=5,\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.666498939726126, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "distilbert_sst2" | |
| }, | |
| { | |
| "primitive_type": "RenameApiCall", | |
| "breakage_params": { | |
| "old_name": "trainer.train", | |
| "new_name": "trainer.start_training" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -63,5 +63,5 @@\n data_collator=DefaultDataCollator(),\n )\n-trainer.start_training()\n+trainer.train()\n trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.7251096581974675, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "albert_qa" | |
| }, | |
| { | |
| "primitive_type": "ModifyConfigField", | |
| "breakage_params": { | |
| "config_class": "TrainingArguments", | |
| "field_name": "per_device_train_batch_size", | |
| "new_value": "1" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -37,5 +37,5 @@\n output_dir=\"/tmp/forge_output/checkpoint\",\n num_train_epochs=1,\n- per_device_train_batch_size=1,\n+ per_device_train_batch_size=8,\n logging_steps=5,\n save_strategy=\"epoch\",\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.817717003567182, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "bert_ner" | |
| }, | |
| { | |
| "primitive_type": "ChangeTokenizerBehavior", | |
| "breakage_params": { | |
| "old_kwarg": "truncation", | |
| "old_value": "True", | |
| "new_kwarg": "truncate", | |
| "new_value": "True" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -16,5 +16,5 @@\n examples[\"text\"],\n padding=\"max_length\",\n- truncate=True,\n+ truncation=True,\n max_length=64,\n )\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.7677847401400664, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "roberta_sentiment" | |
| }, | |
| { | |
| "primitive_type": "RestructureDatasetSchema", | |
| "breakage_params": { | |
| "old_column": "label", | |
| "new_column": "labels" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -16,5 +16,5 @@\n images = [img.convert(\"RGB\") for img in batch[\"img\"]]\n inputs = processor(images=images, return_tensors=\"pt\")\n- inputs[\"labels\"] = torch.tensor(batch[\"labels\"])\n+ inputs[\"labels\"] = torch.tensor(batch[\"label\"])\n return inputs\n \n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.701744242073817, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "vit_cifar10" | |
| }, | |
| { | |
| "primitive_type": "ChangeArgumentSignature", | |
| "breakage_params": { | |
| "function_name": "TrainingArguments", | |
| "removed_arg": "num_train_epochs", | |
| "added_arg": "max_steps", | |
| "added_value": "1000" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -49,4 +49,5 @@\n training_args = TrainingArguments(\n output_dir=\"/tmp/forge_output/checkpoint\",\n+ num_train_epochs=1,\n per_device_train_batch_size=4,\n logging_steps=5,\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.784986144101346, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "albert_qa" | |
| }, | |
| { | |
| "primitive_type": "RemoveDeprecatedMethod", | |
| "breakage_params": { | |
| "class_name": "Trainer", | |
| "method_name": "save_model", | |
| "replacement": "save_to_hub" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -41,4 +41,4 @@\n trainer = Trainer(model=model, args=training_args, train_dataset=dataset)\n trainer.train()\n-trainer.save_model_DEPRECATED(\"/tmp/forge_output/checkpoint\")\n+trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.6652959989556817, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "electra_classification" | |
| }, | |
| { | |
| "primitive_type": "ChangeArgumentSignature", | |
| "breakage_params": { | |
| "function_name": "TrainingArguments", | |
| "removed_arg": "num_train_epochs", | |
| "added_arg": "max_steps", | |
| "added_value": "1000" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -31,4 +31,5 @@\n training_args = TrainingArguments(\n output_dir=\"/tmp/forge_output/checkpoint\",\n+ num_train_epochs=1,\n per_device_train_batch_size=8,\n logging_steps=5,\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.8362977381032284, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "electra_classification" | |
| }, | |
| { | |
| "primitive_type": "ChangeTokenizerBehavior", | |
| "breakage_params": { | |
| "old_kwarg": "truncation", | |
| "old_value": "True", | |
| "new_kwarg": "truncate", | |
| "new_value": "True" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -16,5 +16,5 @@\n examples[\"text\"],\n padding=\"max_length\",\n- truncate=True,\n+ truncation=True,\n max_length=64,\n )\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.8434749013439302, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "electra_classification" | |
| }, | |
| { | |
| "primitive_type": "RestructureDatasetSchema", | |
| "breakage_params": { | |
| "old_column": "text", | |
| "new_column": "input_text" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -26,5 +26,5 @@\n answer = examples[\"answers\"][i]\n start_char = answer[\"answer_start\"][0]\n- end_char = start_char + len(answer[\"input_text\"][0])\n+ end_char = start_char + len(answer[\"text\"][0])\n \n token_start = next(\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.775726750559039, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "albert_qa" | |
| }, | |
| { | |
| "primitive_type": "ChangeArgumentSignature", | |
| "breakage_params": { | |
| "function_name": "TrainingArguments", | |
| "removed_arg": "num_train_epochs", | |
| "added_arg": "max_steps", | |
| "added_value": "1000" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -35,4 +35,5 @@\n training_args = TrainingArguments(\n output_dir=\"/tmp/forge_output/checkpoint\",\n+ num_train_epochs=1,\n per_device_train_batch_size=16,\n logging_steps=5,\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.9085137085137085, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "distilbert_sst2" | |
| }, | |
| { | |
| "primitive_type": "RenameApiCall", | |
| "breakage_params": { | |
| "old_name": "trainer.train", | |
| "new_name": "trainer.start_training" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -51,5 +51,5 @@\n )\n \n-trainer.start_training()\n+trainer.train()\n trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.7424872199130476, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "bert_ner" | |
| }, | |
| { | |
| "primitive_type": "ChangeArgumentSignature", | |
| "breakage_params": { | |
| "function_name": "TrainingArguments", | |
| "removed_arg": "num_train_epochs", | |
| "added_arg": "max_steps", | |
| "added_value": "1000" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -35,4 +35,5 @@\n training_args = TrainingArguments(\n output_dir=\"/tmp/forge_output/checkpoint\",\n+ num_train_epochs=1,\n per_device_train_batch_size=16,\n logging_steps=5,\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.8076153403327943, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "distilbert_sst2" | |
| }, | |
| { | |
| "primitive_type": "RestructureDatasetSchema", | |
| "breakage_params": { | |
| "old_column": "text", | |
| "new_column": "input_text" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -26,5 +26,5 @@\n answer = examples[\"answers\"][i]\n start_char = answer[\"answer_start\"][0]\n- end_char = start_char + len(answer[\"input_text\"][0])\n+ end_char = start_char + len(answer[\"text\"][0])\n \n token_start = next(\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.8882627677936846, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "albert_qa" | |
| }, | |
| { | |
| "primitive_type": "RemoveDeprecatedMethod", | |
| "breakage_params": { | |
| "class_name": "Trainer", | |
| "method_name": "save_model", | |
| "replacement": "save_to_hub" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -40,4 +40,4 @@\n \n trainer.train()\n-trainer.save_model_DEPRECATED(\"/tmp/forge_output/checkpoint\")\n+trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.5938341205749403, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "gpt2_textgen" | |
| }, | |
| { | |
| "primitive_type": "RestructureDatasetSchema", | |
| "breakage_params": { | |
| "old_column": "text", | |
| "new_column": "input_text" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -15,5 +15,5 @@\n \n def tokenize(examples):\n- return tokenizer(examples[\"input_text\"], truncation=True, max_length=64)\n+ return tokenizer(examples[\"text\"], truncation=True, max_length=64)\n \n \n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.6555927441014835, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "gpt2_textgen" | |
| }, | |
| { | |
| "primitive_type": "RenameApiCall", | |
| "breakage_params": { | |
| "old_name": "trainer.train", | |
| "new_name": "trainer.start_training" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -63,5 +63,5 @@\n data_collator=DefaultDataCollator(),\n )\n-trainer.start_training()\n+trainer.train()\n trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.755194754910818, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "albert_qa" | |
| }, | |
| { | |
| "primitive_type": "RenameApiCall", | |
| "breakage_params": { | |
| "old_name": "trainer.train", | |
| "new_name": "trainer.start_training" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -49,5 +49,5 @@\n )\n \n-trainer.start_training()\n+trainer.train()\n trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.8654821132433073, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "distilbert_sst2" | |
| }, | |
| { | |
| "primitive_type": "RestructureDatasetSchema", | |
| "breakage_params": { | |
| "old_column": "label", | |
| "new_column": "labels" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -16,5 +16,5 @@\n images = [img.convert(\"RGB\") for img in batch[\"img\"]]\n inputs = processor(images=images, return_tensors=\"pt\")\n- inputs[\"labels\"] = torch.tensor(batch[\"labels\"])\n+ inputs[\"labels\"] = torch.tensor(batch[\"label\"])\n return inputs\n \n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.8319525054273182, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "vit_cifar10" | |
| }, | |
| { | |
| "primitive_type": "RestructureDatasetSchema", | |
| "breakage_params": { | |
| "old_column": "text", | |
| "new_column": "input_text" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -26,5 +26,5 @@\n answer = examples[\"answers\"][i]\n start_char = answer[\"answer_start\"][0]\n- end_char = start_char + len(answer[\"input_text\"][0])\n+ end_char = start_char + len(answer[\"text\"][0])\n \n token_start = next(\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.8109320292832547, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "albert_qa" | |
| }, | |
| { | |
| "primitive_type": "ModifyConfigField", | |
| "breakage_params": { | |
| "config_class": "TrainingArguments", | |
| "field_name": "per_device_train_batch_size", | |
| "new_value": "1" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -36,5 +36,5 @@\n output_dir=\"/tmp/forge_output/checkpoint\",\n num_train_epochs=1,\n- per_device_train_batch_size=1,\n+ per_device_train_batch_size=16,\n logging_steps=5,\n save_strategy=\"epoch\",\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.8409642541924095, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "distilbert_sst2" | |
| }, | |
| { | |
| "primitive_type": "ChangeArgumentSignature", | |
| "breakage_params": { | |
| "function_name": "TrainingArguments", | |
| "removed_arg": "num_train_epochs", | |
| "added_arg": "max_steps", | |
| "added_value": "1000" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -31,4 +31,5 @@\n training_args = TrainingArguments(\n output_dir=\"/tmp/forge_output/checkpoint\",\n+ num_train_epochs=1,\n per_device_train_batch_size=8,\n logging_steps=5,\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.8891815856777494, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "electra_classification" | |
| }, | |
| { | |
| "primitive_type": "ModifyConfigField", | |
| "breakage_params": { | |
| "config_class": "TrainingArguments", | |
| "field_name": "per_device_train_batch_size", | |
| "new_value": "1" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -29,5 +29,5 @@\n output_dir=\"/tmp/forge_output/checkpoint\",\n num_train_epochs=1,\n- per_device_train_batch_size=1,\n+ per_device_train_batch_size=4,\n logging_steps=5,\n save_strategy=\"epoch\",\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.7900720214449505, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "vit_cifar10" | |
| }, | |
| { | |
| "primitive_type": "RemoveDeprecatedMethod", | |
| "breakage_params": { | |
| "class_name": "Trainer", | |
| "method_name": "save_model", | |
| "replacement": "save_to_hub" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -38,4 +38,4 @@\n trainer = Trainer(model=model, args=training_args, train_dataset=dataset)\n trainer.train()\n-trainer.save_model_DEPRECATED(\"/tmp/forge_output/checkpoint\")\n+trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.7984906001446131, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "vit_cifar10" | |
| }, | |
| { | |
| "primitive_type": "RestructureDatasetSchema", | |
| "breakage_params": { | |
| "old_column": "text", | |
| "new_column": "input_text" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -26,5 +26,5 @@\n answer = examples[\"answers\"][i]\n start_char = answer[\"answer_start\"][0]\n- end_char = start_char + len(answer[\"input_text\"][0])\n+ end_char = start_char + len(answer[\"text\"][0])\n \n token_start = next(\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.7808289396602227, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "albert_qa" | |
| }, | |
| { | |
| "primitive_type": "RestructureDatasetSchema", | |
| "breakage_params": { | |
| "old_column": "tokens", | |
| "new_column": "words" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -14,5 +14,5 @@\n \n def tokenize_and_align(example):\n- enc = tokenizer(example[\"words\"], is_split_into_words=True, truncation=True, max_length=64)\n+ enc = tokenizer(example[\"tokens\"], is_split_into_words=True, truncation=True, max_length=64)\n word_ids = enc.word_ids()\n labels = []\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.8699562543975037, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "bert_ner" | |
| }, | |
| { | |
| "primitive_type": "RenameApiCall", | |
| "breakage_params": { | |
| "old_name": "trainer.train", | |
| "new_name": "trainer.start_training" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -63,5 +63,5 @@\n data_collator=DefaultDataCollator(),\n )\n-trainer.start_training()\n+trainer.train()\n trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.911495927422025, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "albert_qa" | |
| }, | |
| { | |
| "primitive_type": "RemoveDeprecatedMethod", | |
| "breakage_params": { | |
| "class_name": "Trainer", | |
| "method_name": "save_model", | |
| "replacement": "save_to_hub" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -64,4 +64,4 @@\n )\n trainer.train()\n-trainer.save_model_DEPRECATED(\"/tmp/forge_output/checkpoint\")\n+trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.6131321254553196, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "albert_qa" | |
| }, | |
| { | |
| "primitive_type": "RestructureDatasetSchema", | |
| "breakage_params": { | |
| "old_column": "label", | |
| "new_column": "labels" | |
| }, | |
| "error_signature": "", | |
| "repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -22,5 +22,5 @@\n \n dataset = dataset.map(tokenize, batched=True)\n-dataset = dataset.rename_column(\"labels\", \"labels\")\n+dataset = dataset.rename_column(\"label\", \"labels\")\n dataset.set_format(\"torch\", columns=[\"input_ids\", \"attention_mask\", \"labels\"])\n \n", | |
| "visible_reward": 1.8, | |
| "held_out": { | |
| "executed_cleanly": 1.0, | |
| "checkpoint_valid": 1.0, | |
| "loss_decreased": 0.6040748525323751, | |
| "metrics_in_range": 1.0, | |
| "no_forbidden_workarounds": 1.0, | |
| "intent_preserved": 1.0, | |
| "hidden_tests_passed": 1.0 | |
| }, | |
| "task_id": "electra_classification" | |
| } | |
| ], | |
| "size": 43, | |
| "by_primitive": { | |
| "ChangeTokenizerBehavior": 7, | |
| "RestructureDatasetSchema": 15, | |
| "ChangeArgumentSignature": 7, | |
| "RemoveDeprecatedMethod": 5, | |
| "RenameApiCall": 6, | |
| "ModifyConfigField": 3 | |
| } | |
| } |