akhiilll's picture
forgeenv source snapshot for training job
b0fbec3 verified
[
"tests/test_ast_validator.py::test_attribute_eval_fails",
"tests/test_ast_validator.py::test_builtins_assignment_fails",
"tests/test_ast_validator.py::test_clean_script_passes",
"tests/test_ast_validator.py::test_eval_fails",
"tests/test_ast_validator.py::test_os_import_fails",
"tests/test_ast_validator.py::test_socket_import_fails",
"tests/test_ast_validator.py::test_subprocess_fails",
"tests/test_ast_validator.py::test_syntax_error_fails",
"tests/test_ast_validator.py::test_transformers_import_passes",
"tests/test_environment.py::test_action_validation_rejects_both_or_neither",
"tests/test_environment.py::test_full_episode_lifecycle",
"tests/test_environment.py::test_invalid_action_for_phase",
"tests/test_environment.py::test_reset_returns_drift_gen_observation",
"tests/test_environment.py::test_state_property_is_dict",
"tests/test_environment.py::test_step_before_reset_returns_error",
"tests/test_environment.py::test_teacher_updates_after_episode",
"tests/test_environment.py::test_unified_diff_full_script_replacement",
"tests/test_environment.py::test_unified_diff_round_trip",
"tests/test_evaluators.py::test_alignment_score_anti_correlation",
"tests/test_evaluators.py::test_alignment_score_constant_returns_zero",
"tests/test_evaluators.py::test_alignment_score_perfect_correlation",
"tests/test_evaluators.py::test_drift_gen_reward_combines_signals",
"tests/test_evaluators.py::test_held_out_success",
"tests/test_evaluators.py::test_held_out_workaround_detection",
"tests/test_evaluators.py::test_repetition_penalty_higher_for_duplicates",
"tests/test_evaluators.py::test_uncertainty_handles_empty",
"tests/test_evaluators.py::test_uncertainty_peaks_at_half",
"tests/test_evaluators.py::test_visible_reward_failure",
"tests/test_evaluators.py::test_visible_reward_success",
"tests/test_primitives.py::test_all_8_primitives_registered",
"tests/test_primitives.py::test_breakage_creates_actual_difference",
"tests/test_primitives.py::test_breakage_repair_registry_alignment",
"tests/test_primitives.py::test_change_argument_signature_removes_kwarg",
"tests/test_primitives.py::test_change_return_type_swaps_access",
"tests/test_primitives.py::test_change_tokenizer_behavior_replaces_kwarg",
"tests/test_primitives.py::test_deprecate_import",
"tests/test_primitives.py::test_modify_config_field_changes_value",
"tests/test_primitives.py::test_parse_spec_ignores_extra_kwargs",
"tests/test_primitives.py::test_parse_spec_round_trip",
"tests/test_primitives.py::test_parse_spec_unknown_raises",
"tests/test_primitives.py::test_remove_deprecated_method_marks_call",
"tests/test_primitives.py::test_rename_api_call_word_boundary",
"tests/test_primitives.py::test_restructure_dataset_string_replacement",
"tests/test_primitives.py::test_seed_corpus_has_at_least_10_scripts",
"tests/test_primitives.py::test_task_sampler_categories_are_diverse",
"tests/test_primitives.py::test_task_sampler_difficulty_filter",
"tests/test_primitives.py::test_task_sampler_get_by_id",
"tests/test_roles.py::test_baseline_drift_generator_produces_valid_spec",
"tests/test_roles.py::test_baseline_drift_generator_spec_actually_breaks_script",
"tests/test_roles.py::test_baseline_repair_agent_inverts_breakage_spec",
"tests/test_roles.py::test_baseline_repair_agent_oracle_path",
"tests/test_roles.py::test_extract_diff_strips_chain_of_thought",
"tests/test_roles.py::test_extract_diff_strips_fences",
"tests/test_roles.py::test_looks_like_diff_negative",
"tests/test_roles.py::test_looks_like_diff_positive",
"tests/test_roles.py::test_parse_drift_output_handles_fences",
"tests/test_roles.py::test_parse_drift_output_handles_prose",
"tests/test_roles.py::test_parse_drift_output_returns_none_on_garbage",
"tests/test_roles.py::test_parse_drift_to_primitive_unknown_type",
"tests/test_roles.py::test_parse_drift_to_primitive_validates",
"tests/test_roles.py::test_prompts_are_nonempty",
"tests/test_roles.py::test_render_drift_generator_prompt_includes_inputs",
"tests/test_roles.py::test_render_repair_agent_prompt_includes_error_trace",
"tests/test_simulation_mode.py::test_forbidden_import_fails",
"tests/test_simulation_mode.py::test_seed_is_deterministic",
"tests/test_simulation_mode.py::test_simulation_is_fast",
"tests/test_simulation_mode.py::test_syntax_error_fails",
"tests/test_simulation_mode.py::test_valid_script_can_succeed",
"tests/test_training.py::test_grpo_drift_dry_run_smoke",
"tests/test_training.py::test_grpo_repair_dry_run_smoke",
"tests/test_training.py::test_rollout_one_episode_baseline_no_op_repair",
"tests/test_training.py::test_rollout_one_episode_with_oracle_repair_succeeds",
"tests/test_warmstart.py::test_generate_pairs_covers_multiple_primitive_types",
"tests/test_warmstart.py::test_generate_pairs_produces_minimum_count"
]