forgeenv-source / artifacts /repair_library.json
akhiilll's picture
forgeenv source snapshot for training job
b0fbec3 verified
{
"version": "1",
"examples": [
{
"primitive_type": "ChangeTokenizerBehavior",
"breakage_params": {
"old_kwarg": "truncation",
"old_value": "True",
"new_kwarg": "truncate",
"new_value": "True"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -16,5 +16,5 @@\n examples[\"text\"],\n padding=\"max_length\",\n- truncate=True,\n+ truncation=True,\n max_length=64,\n )\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.8691781740179649,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "electra_classification"
},
{
"primitive_type": "ChangeTokenizerBehavior",
"breakage_params": {
"old_kwarg": "truncation",
"old_value": "True",
"new_kwarg": "truncate",
"new_value": "True"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -16,5 +16,5 @@\n examples[\"text\"],\n padding=\"max_length\",\n- truncate=True,\n+ truncation=True,\n max_length=64,\n )\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.7612783886548146,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "electra_classification"
},
{
"primitive_type": "ChangeTokenizerBehavior",
"breakage_params": {
"old_kwarg": "truncation",
"old_value": "True",
"new_kwarg": "truncate",
"new_value": "True"
},
"error_signature": "",
"repair_diff": "",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.7469754695541743,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "albert_qa"
},
{
"primitive_type": "ChangeTokenizerBehavior",
"breakage_params": {
"old_kwarg": "truncation",
"old_value": "True",
"new_kwarg": "truncate",
"new_value": "True"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -14,5 +14,5 @@\n \n def tokenize_and_align(example):\n- enc = tokenizer(example[\"tokens\"], is_split_into_words=True, truncate=True, max_length=64)\n+ enc = tokenizer(example[\"tokens\"], is_split_into_words=True, truncation=True, max_length=64)\n word_ids = enc.word_ids()\n labels = []\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.8811022610483041,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "bert_ner"
},
{
"primitive_type": "RestructureDatasetSchema",
"breakage_params": {
"old_column": "label",
"new_column": "labels"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -26,5 +26,5 @@\n padding=\"max_length\",\n )\n- inputs[\"labels\"] = targets[\"input_ids\"]\n+ inputs[\"label\"] = targets[\"input_ids\"]\n return inputs\n \n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.649018766337638,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "t5_summarization"
},
{
"primitive_type": "RestructureDatasetSchema",
"breakage_params": {
"old_column": "text",
"new_column": "input_text"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -26,5 +26,5 @@\n answer = examples[\"answers\"][i]\n start_char = answer[\"answer_start\"][0]\n- end_char = start_char + len(answer[\"input_text\"][0])\n+ end_char = start_char + len(answer[\"text\"][0])\n \n token_start = next(\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.8895669291338583,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "albert_qa"
},
{
"primitive_type": "ChangeTokenizerBehavior",
"breakage_params": {
"old_kwarg": "truncation",
"old_value": "True",
"new_kwarg": "truncate",
"new_value": "True"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -14,5 +14,5 @@\n \n def tokenize_and_align(example):\n- enc = tokenizer(example[\"tokens\"], is_split_into_words=True, truncate=True, max_length=64)\n+ enc = tokenizer(example[\"tokens\"], is_split_into_words=True, truncation=True, max_length=64)\n word_ids = enc.word_ids()\n labels = []\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.8010139080581803,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "bert_ner"
},
{
"primitive_type": "ChangeArgumentSignature",
"breakage_params": {
"function_name": "TrainingArguments",
"removed_arg": "num_train_epochs",
"added_arg": "max_steps",
"added_value": "1000"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -24,4 +24,5 @@\n training_args = TrainingArguments(\n output_dir=\"/tmp/forge_output/checkpoint\",\n+ num_train_epochs=1,\n per_device_train_batch_size=4,\n logging_steps=5,\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.8672674881981486,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "gpt2_textgen"
},
{
"primitive_type": "RestructureDatasetSchema",
"breakage_params": {
"old_column": "text",
"new_column": "input_text"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -26,5 +26,5 @@\n answer = examples[\"answers\"][i]\n start_char = answer[\"answer_start\"][0]\n- end_char = start_char + len(answer[\"input_text\"][0])\n+ end_char = start_char + len(answer[\"text\"][0])\n \n token_start = next(\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.5887677670351681,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "albert_qa"
},
{
"primitive_type": "RemoveDeprecatedMethod",
"breakage_params": {
"class_name": "Trainer",
"method_name": "save_model",
"replacement": "save_to_hub"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -41,4 +41,4 @@\n trainer = Trainer(model=model, args=training_args, train_dataset=dataset)\n trainer.train()\n-trainer.save_model_DEPRECATED(\"/tmp/forge_output/checkpoint\")\n+trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.8791026290604065,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "roberta_sentiment"
},
{
"primitive_type": "RenameApiCall",
"breakage_params": {
"old_name": "trainer.train",
"new_name": "trainer.start_training"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -40,5 +40,5 @@\n \n trainer = Trainer(model=model, args=training_args, train_dataset=dataset)\n-trainer.start_training()\n+trainer.train()\n trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.7878403072444018,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "electra_classification"
},
{
"primitive_type": "RestructureDatasetSchema",
"breakage_params": {
"old_column": "text",
"new_column": "input_text"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -26,5 +26,5 @@\n answer = examples[\"answers\"][i]\n start_char = answer[\"answer_start\"][0]\n- end_char = start_char + len(answer[\"input_text\"][0])\n+ end_char = start_char + len(answer[\"text\"][0])\n \n token_start = next(\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.8678511447007867,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "albert_qa"
},
{
"primitive_type": "RestructureDatasetSchema",
"breakage_params": {
"old_column": "text",
"new_column": "input_text"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -14,5 +14,5 @@\n def tokenize(examples):\n return tokenizer(\n- examples[\"input_text\"],\n+ examples[\"text\"],\n padding=\"max_length\",\n truncation=True,\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.6278346817583994,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "roberta_sentiment"
},
{
"primitive_type": "RestructureDatasetSchema",
"breakage_params": {
"old_column": "text",
"new_column": "input_text"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -14,5 +14,5 @@\n def tokenize(examples):\n return tokenizer(\n- examples[\"input_text\"],\n+ examples[\"text\"],\n padding=\"max_length\",\n truncation=True,\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.6966312162081871,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "electra_classification"
},
{
"primitive_type": "ChangeArgumentSignature",
"breakage_params": {
"function_name": "TrainingArguments",
"removed_arg": "num_train_epochs",
"added_arg": "max_steps",
"added_value": "1000"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -35,4 +35,5 @@\n training_args = TrainingArguments(\n output_dir=\"/tmp/forge_output/checkpoint\",\n+ num_train_epochs=1,\n per_device_train_batch_size=16,\n logging_steps=5,\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.666498939726126,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "distilbert_sst2"
},
{
"primitive_type": "RenameApiCall",
"breakage_params": {
"old_name": "trainer.train",
"new_name": "trainer.start_training"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -63,5 +63,5 @@\n data_collator=DefaultDataCollator(),\n )\n-trainer.start_training()\n+trainer.train()\n trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.7251096581974675,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "albert_qa"
},
{
"primitive_type": "ModifyConfigField",
"breakage_params": {
"config_class": "TrainingArguments",
"field_name": "per_device_train_batch_size",
"new_value": "1"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -37,5 +37,5 @@\n output_dir=\"/tmp/forge_output/checkpoint\",\n num_train_epochs=1,\n- per_device_train_batch_size=1,\n+ per_device_train_batch_size=8,\n logging_steps=5,\n save_strategy=\"epoch\",\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.817717003567182,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "bert_ner"
},
{
"primitive_type": "ChangeTokenizerBehavior",
"breakage_params": {
"old_kwarg": "truncation",
"old_value": "True",
"new_kwarg": "truncate",
"new_value": "True"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -16,5 +16,5 @@\n examples[\"text\"],\n padding=\"max_length\",\n- truncate=True,\n+ truncation=True,\n max_length=64,\n )\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.7677847401400664,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "roberta_sentiment"
},
{
"primitive_type": "RestructureDatasetSchema",
"breakage_params": {
"old_column": "label",
"new_column": "labels"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -16,5 +16,5 @@\n images = [img.convert(\"RGB\") for img in batch[\"img\"]]\n inputs = processor(images=images, return_tensors=\"pt\")\n- inputs[\"labels\"] = torch.tensor(batch[\"labels\"])\n+ inputs[\"labels\"] = torch.tensor(batch[\"label\"])\n return inputs\n \n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.701744242073817,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "vit_cifar10"
},
{
"primitive_type": "ChangeArgumentSignature",
"breakage_params": {
"function_name": "TrainingArguments",
"removed_arg": "num_train_epochs",
"added_arg": "max_steps",
"added_value": "1000"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -49,4 +49,5 @@\n training_args = TrainingArguments(\n output_dir=\"/tmp/forge_output/checkpoint\",\n+ num_train_epochs=1,\n per_device_train_batch_size=4,\n logging_steps=5,\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.784986144101346,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "albert_qa"
},
{
"primitive_type": "RemoveDeprecatedMethod",
"breakage_params": {
"class_name": "Trainer",
"method_name": "save_model",
"replacement": "save_to_hub"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -41,4 +41,4 @@\n trainer = Trainer(model=model, args=training_args, train_dataset=dataset)\n trainer.train()\n-trainer.save_model_DEPRECATED(\"/tmp/forge_output/checkpoint\")\n+trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.6652959989556817,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "electra_classification"
},
{
"primitive_type": "ChangeArgumentSignature",
"breakage_params": {
"function_name": "TrainingArguments",
"removed_arg": "num_train_epochs",
"added_arg": "max_steps",
"added_value": "1000"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -31,4 +31,5 @@\n training_args = TrainingArguments(\n output_dir=\"/tmp/forge_output/checkpoint\",\n+ num_train_epochs=1,\n per_device_train_batch_size=8,\n logging_steps=5,\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.8362977381032284,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "electra_classification"
},
{
"primitive_type": "ChangeTokenizerBehavior",
"breakage_params": {
"old_kwarg": "truncation",
"old_value": "True",
"new_kwarg": "truncate",
"new_value": "True"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -16,5 +16,5 @@\n examples[\"text\"],\n padding=\"max_length\",\n- truncate=True,\n+ truncation=True,\n max_length=64,\n )\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.8434749013439302,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "electra_classification"
},
{
"primitive_type": "RestructureDatasetSchema",
"breakage_params": {
"old_column": "text",
"new_column": "input_text"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -26,5 +26,5 @@\n answer = examples[\"answers\"][i]\n start_char = answer[\"answer_start\"][0]\n- end_char = start_char + len(answer[\"input_text\"][0])\n+ end_char = start_char + len(answer[\"text\"][0])\n \n token_start = next(\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.775726750559039,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "albert_qa"
},
{
"primitive_type": "ChangeArgumentSignature",
"breakage_params": {
"function_name": "TrainingArguments",
"removed_arg": "num_train_epochs",
"added_arg": "max_steps",
"added_value": "1000"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -35,4 +35,5 @@\n training_args = TrainingArguments(\n output_dir=\"/tmp/forge_output/checkpoint\",\n+ num_train_epochs=1,\n per_device_train_batch_size=16,\n logging_steps=5,\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.9085137085137085,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "distilbert_sst2"
},
{
"primitive_type": "RenameApiCall",
"breakage_params": {
"old_name": "trainer.train",
"new_name": "trainer.start_training"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -51,5 +51,5 @@\n )\n \n-trainer.start_training()\n+trainer.train()\n trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.7424872199130476,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "bert_ner"
},
{
"primitive_type": "ChangeArgumentSignature",
"breakage_params": {
"function_name": "TrainingArguments",
"removed_arg": "num_train_epochs",
"added_arg": "max_steps",
"added_value": "1000"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -35,4 +35,5 @@\n training_args = TrainingArguments(\n output_dir=\"/tmp/forge_output/checkpoint\",\n+ num_train_epochs=1,\n per_device_train_batch_size=16,\n logging_steps=5,\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.8076153403327943,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "distilbert_sst2"
},
{
"primitive_type": "RestructureDatasetSchema",
"breakage_params": {
"old_column": "text",
"new_column": "input_text"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -26,5 +26,5 @@\n answer = examples[\"answers\"][i]\n start_char = answer[\"answer_start\"][0]\n- end_char = start_char + len(answer[\"input_text\"][0])\n+ end_char = start_char + len(answer[\"text\"][0])\n \n token_start = next(\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.8882627677936846,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "albert_qa"
},
{
"primitive_type": "RemoveDeprecatedMethod",
"breakage_params": {
"class_name": "Trainer",
"method_name": "save_model",
"replacement": "save_to_hub"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -40,4 +40,4 @@\n \n trainer.train()\n-trainer.save_model_DEPRECATED(\"/tmp/forge_output/checkpoint\")\n+trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.5938341205749403,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "gpt2_textgen"
},
{
"primitive_type": "RestructureDatasetSchema",
"breakage_params": {
"old_column": "text",
"new_column": "input_text"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -15,5 +15,5 @@\n \n def tokenize(examples):\n- return tokenizer(examples[\"input_text\"], truncation=True, max_length=64)\n+ return tokenizer(examples[\"text\"], truncation=True, max_length=64)\n \n \n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.6555927441014835,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "gpt2_textgen"
},
{
"primitive_type": "RenameApiCall",
"breakage_params": {
"old_name": "trainer.train",
"new_name": "trainer.start_training"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -63,5 +63,5 @@\n data_collator=DefaultDataCollator(),\n )\n-trainer.start_training()\n+trainer.train()\n trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.755194754910818,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "albert_qa"
},
{
"primitive_type": "RenameApiCall",
"breakage_params": {
"old_name": "trainer.train",
"new_name": "trainer.start_training"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -49,5 +49,5 @@\n )\n \n-trainer.start_training()\n+trainer.train()\n trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.8654821132433073,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "distilbert_sst2"
},
{
"primitive_type": "RestructureDatasetSchema",
"breakage_params": {
"old_column": "label",
"new_column": "labels"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -16,5 +16,5 @@\n images = [img.convert(\"RGB\") for img in batch[\"img\"]]\n inputs = processor(images=images, return_tensors=\"pt\")\n- inputs[\"labels\"] = torch.tensor(batch[\"labels\"])\n+ inputs[\"labels\"] = torch.tensor(batch[\"label\"])\n return inputs\n \n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.8319525054273182,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "vit_cifar10"
},
{
"primitive_type": "RestructureDatasetSchema",
"breakage_params": {
"old_column": "text",
"new_column": "input_text"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -26,5 +26,5 @@\n answer = examples[\"answers\"][i]\n start_char = answer[\"answer_start\"][0]\n- end_char = start_char + len(answer[\"input_text\"][0])\n+ end_char = start_char + len(answer[\"text\"][0])\n \n token_start = next(\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.8109320292832547,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "albert_qa"
},
{
"primitive_type": "ModifyConfigField",
"breakage_params": {
"config_class": "TrainingArguments",
"field_name": "per_device_train_batch_size",
"new_value": "1"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -36,5 +36,5 @@\n output_dir=\"/tmp/forge_output/checkpoint\",\n num_train_epochs=1,\n- per_device_train_batch_size=1,\n+ per_device_train_batch_size=16,\n logging_steps=5,\n save_strategy=\"epoch\",\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.8409642541924095,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "distilbert_sst2"
},
{
"primitive_type": "ChangeArgumentSignature",
"breakage_params": {
"function_name": "TrainingArguments",
"removed_arg": "num_train_epochs",
"added_arg": "max_steps",
"added_value": "1000"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -31,4 +31,5 @@\n training_args = TrainingArguments(\n output_dir=\"/tmp/forge_output/checkpoint\",\n+ num_train_epochs=1,\n per_device_train_batch_size=8,\n logging_steps=5,\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.8891815856777494,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "electra_classification"
},
{
"primitive_type": "ModifyConfigField",
"breakage_params": {
"config_class": "TrainingArguments",
"field_name": "per_device_train_batch_size",
"new_value": "1"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -29,5 +29,5 @@\n output_dir=\"/tmp/forge_output/checkpoint\",\n num_train_epochs=1,\n- per_device_train_batch_size=1,\n+ per_device_train_batch_size=4,\n logging_steps=5,\n save_strategy=\"epoch\",\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.7900720214449505,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "vit_cifar10"
},
{
"primitive_type": "RemoveDeprecatedMethod",
"breakage_params": {
"class_name": "Trainer",
"method_name": "save_model",
"replacement": "save_to_hub"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -38,4 +38,4 @@\n trainer = Trainer(model=model, args=training_args, train_dataset=dataset)\n trainer.train()\n-trainer.save_model_DEPRECATED(\"/tmp/forge_output/checkpoint\")\n+trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.7984906001446131,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "vit_cifar10"
},
{
"primitive_type": "RestructureDatasetSchema",
"breakage_params": {
"old_column": "text",
"new_column": "input_text"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -26,5 +26,5 @@\n answer = examples[\"answers\"][i]\n start_char = answer[\"answer_start\"][0]\n- end_char = start_char + len(answer[\"input_text\"][0])\n+ end_char = start_char + len(answer[\"text\"][0])\n \n token_start = next(\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.7808289396602227,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "albert_qa"
},
{
"primitive_type": "RestructureDatasetSchema",
"breakage_params": {
"old_column": "tokens",
"new_column": "words"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -14,5 +14,5 @@\n \n def tokenize_and_align(example):\n- enc = tokenizer(example[\"words\"], is_split_into_words=True, truncation=True, max_length=64)\n+ enc = tokenizer(example[\"tokens\"], is_split_into_words=True, truncation=True, max_length=64)\n word_ids = enc.word_ids()\n labels = []\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.8699562543975037,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "bert_ner"
},
{
"primitive_type": "RenameApiCall",
"breakage_params": {
"old_name": "trainer.train",
"new_name": "trainer.start_training"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -63,5 +63,5 @@\n data_collator=DefaultDataCollator(),\n )\n-trainer.start_training()\n+trainer.train()\n trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.911495927422025,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "albert_qa"
},
{
"primitive_type": "RemoveDeprecatedMethod",
"breakage_params": {
"class_name": "Trainer",
"method_name": "save_model",
"replacement": "save_to_hub"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -64,4 +64,4 @@\n )\n trainer.train()\n-trainer.save_model_DEPRECATED(\"/tmp/forge_output/checkpoint\")\n+trainer.save_model(\"/tmp/forge_output/checkpoint\")\n print(\"TRAINING_COMPLETE\")\n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.6131321254553196,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "albert_qa"
},
{
"primitive_type": "RestructureDatasetSchema",
"breakage_params": {
"old_column": "label",
"new_column": "labels"
},
"error_signature": "",
"repair_diff": "--- a/train.py\n+++ b/train.py\n@@ -22,5 +22,5 @@\n \n dataset = dataset.map(tokenize, batched=True)\n-dataset = dataset.rename_column(\"labels\", \"labels\")\n+dataset = dataset.rename_column(\"label\", \"labels\")\n dataset.set_format(\"torch\", columns=[\"input_ids\", \"attention_mask\", \"labels\"])\n \n",
"visible_reward": 1.8,
"held_out": {
"executed_cleanly": 1.0,
"checkpoint_valid": 1.0,
"loss_decreased": 0.6040748525323751,
"metrics_in_range": 1.0,
"no_forbidden_workarounds": 1.0,
"intent_preserved": 1.0,
"hidden_tests_passed": 1.0
},
"task_id": "electra_classification"
}
],
"size": 43,
"by_primitive": {
"ChangeTokenizerBehavior": 7,
"RestructureDatasetSchema": 15,
"ChangeArgumentSignature": 7,
"RemoveDeprecatedMethod": 5,
"RenameApiCall": 6,
"ModifyConfigField": 3
}
}