[ "tests/test_client_ws.py::test_websocket_round_trip", "tests/test_dataset.py::test_dataset_observed_arrays_match_state_variables", "tests/test_dataset.py::test_eval_dataset_marks_held_out_rows", "tests/test_dataset.py::test_training_dataset_default_curriculum_is_demo_systems", "tests/test_dataset.py::test_training_dataset_default_curriculum_is_supported_systems", "tests/test_dataset.py::test_training_dataset_explicit_system_ids_override_default", "tests/test_dataset.py::test_training_dataset_has_expected_schema", "tests/test_dataset.py::test_training_dataset_prompts_are_chat_lists", "tests/test_dataset.py::test_training_dataset_rejects_empty_system_ids", "tests/test_dataset.py::test_training_dataset_rejects_unknown_system_ids", "tests/test_dataset.py::test_training_dataset_uses_only_train_tiers", "tests/test_environment.py::test_episode_terminates_on_convergence", "tests/test_environment.py::test_history_accumulates_across_turns", "tests/test_environment.py::test_max_turns_terminates_episode", "tests/test_environment.py::test_progress_reward_rewards_improvement", "tests/test_environment.py::test_reset_returns_well_formed_observation", "tests/test_environment.py::test_state_property_exposes_episode_id", "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[free_fall-d2y/dt2 = -9.81 / (y - y)]", "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[free_fall-d2y/dt2 = exp(exp(exp(y)))]", "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[free_fall-d2y/dt2 = log(0 * y)]", "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[simple_pendulum-d2theta/dt2 = -sqrt(-theta**2 - 1)]", "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[simple_pendulum-d2theta/dt2 = -sqrt(dtheta**2 + theta**2) * sin(theta)]", "tests/test_environment.py::test_step_with_ground_truth_rewards_high", "tests/test_environment.py::test_step_with_unparseable_equation_short_circuits", "tests/test_interactive_api.py::test_budget_exhaustion_returns_409_on_next_step", "tests/test_interactive_api.py::test_cors_preflight_for_dev_origin", "tests/test_interactive_api.py::test_llm_step_after_budget_exhaustion_returns_409", "tests/test_interactive_api.py::test_llm_step_drives_a_turn_using_injected_policy", "tests/test_interactive_api.py::test_llm_step_handles_unparseable_completion_as_format_zero", "tests/test_interactive_api.py::test_llm_step_runs_full_episode_with_three_canned_turns", "tests/test_interactive_api.py::test_models_endpoint_returns_empty_with_error_when_daemon_unavailable", "tests/test_interactive_api.py::test_models_endpoint_returns_injected_list", "tests/test_interactive_api.py::test_session_lifecycle_create_step_delete", "tests/test_interactive_api.py::test_session_lifecycle_create_summary_delete", "tests/test_interactive_api.py::test_step_with_ground_truth_marks_done", "tests/test_interactive_api.py::test_systems_endpoint_excludes_held_out_tier", "tests/test_interactive_api.py::test_systems_endpoint_returns_demo_curriculum_in_order", "tests/test_interactive_api.py::test_systems_endpoint_returns_supported_systems_in_order", "tests/test_interactive_api.py::test_unknown_session_id_returns_404", "tests/test_interactive_api.py::test_unknown_system_id_returns_400", "tests/test_interactive_api.py::test_unparseable_equation_returns_zero_format_not_500", "tests/test_metrics_diagnostic.py::test_diag_scores_perfect_on_identical_trajectory", "tests/test_metrics_diagnostic.py::test_diag_scores_stay_informative_when_r2_collapses", "tests/test_metrics_diagnostic.py::test_freq_hint_in_mismatch_summary_for_freq_mismatch", "tests/test_metrics_diagnostic.py::test_freq_score_falls_back_for_non_oscillatory_signal", "tests/test_metrics_diagnostic.py::test_reward_total_unchanged_by_diagnostic_fields", "tests/test_parser.py::test_alias_does_not_fire_when_velocity_state_is_named_dvar", "tests/test_parser.py::test_alias_only_replaces_word_boundary_matches", "tests/test_parser.py::test_bare_dx_alias_substitutes_for_vx", "tests/test_parser.py::test_basic_equation_round_trips", "tests/test_parser.py::test_caret_is_accepted_as_power_synonym", "tests/test_parser.py::test_decimal_literals_are_not_misread_as_attribute_access", "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[(lambda x: x)(y)-Lambda]", "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[(lambda x: x)(y)-computed-name call]", "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[(y, vy)-Collection]", "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[lambda x: x-Lambda]", "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[vy[0]-Array indexing]", "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[y == vy-Comparisons]", "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[y and vy-Boolean]", "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[y if vy > 0 else -y-Conditional]", "tests/test_parser.py::test_dotted_attribute_access_is_rejected_with_clear_error", "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[math.sin(theta)]", "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[np.exp(-theta**2)]", "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[numpy.cos(theta) + 1]", "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[scipy.special.expit(theta)]", "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[theta.diff()]", "tests/test_parser.py::test_dx_dt_alias_substitutes_for_vx_when_velocity_state_exists", "tests/test_parser.py::test_dy_dt_alias_substitutes_for_vy", "tests/test_parser.py::test_grammar_hint_documents_velocity_convention", "tests/test_parser.py::test_keyword_arguments_in_call_are_rejected_with_specific_hint", "tests/test_parser.py::test_multiple_equations_split_on_semicolons_keep_alias_behaviour", "tests/test_parser.py::test_only_parse_error_ever_escapes_the_parser", "tests/test_parser.py::test_sympy_internal_errors_become_parse_errors_not_attribute_errors", "tests/test_parser.py::test_unknown_dx_in_system_without_paired_velocity_includes_hint", "tests/test_parser.py::test_unknown_dy_in_paired_system_suggests_vy", "tests/test_parser.py::test_unknown_t_emits_autonomy_hint", "tests/test_prompt.py::test_build_prompt_returns_chat_pair", "tests/test_prompt.py::test_history_block_surfaces_dense_reward_components", "tests/test_prompt.py::test_history_block_tolerates_missing_reward_components", "tests/test_prompt.py::test_history_uses_equation_field_name_not_shorthand", "tests/test_prompt.py::test_parse_completion_accepts_capitalised_keys", "tests/test_prompt.py::test_parse_completion_accepts_eqn_synonym", "tests/test_prompt.py::test_parse_completion_accepts_other_equation_synonyms", "tests/test_prompt.py::test_parse_completion_accepts_parameters_synonym", "tests/test_prompt.py::test_parse_completion_accepts_reasoning_synonym", "tests/test_prompt.py::test_parse_completion_canonical_key_wins_over_synonym", "tests/test_prompt.py::test_parse_completion_coerces_string_params", "tests/test_prompt.py::test_parse_completion_extracts_clean_json", "tests/test_prompt.py::test_parse_completion_handles_code_fences", "tests/test_prompt.py::test_parse_completion_handles_latex_braces_inside_json_string", "tests/test_prompt.py::test_parse_completion_normalizes_latex_frac_and_caret", "tests/test_prompt.py::test_parse_completion_picks_first_object_when_text_has_multiple", "tests/test_prompt.py::test_parse_completion_with_no_json_falls_back_to_raw_text", "tests/test_prompt.py::test_parse_completion_with_no_json_yields_empty_equation", "tests/test_prompt.py::test_render_includes_history_when_present", "tests/test_prompt.py::test_render_includes_metadata_block", "tests/test_prompt.py::test_render_includes_trajectory_samples", "tests/test_prompt.py::test_render_omits_history_block_when_empty", "tests/test_prompt.py::test_system_message_locks_in_canonical_field_name", "tests/test_providers.py::test_format_provider_error_pinpoints_auth_failure", "tests/test_providers.py::test_format_provider_error_pinpoints_missing_model", "tests/test_providers.py::test_format_provider_error_pinpoints_unreachable_endpoint", "tests/test_providers.py::test_resolve_api_key_browser_supplied_wins", "tests/test_providers.py::test_resolve_api_key_falls_back_to_hf_token_for_hf_router", "tests/test_providers.py::test_resolve_api_key_falls_back_to_openai_env", "tests/test_providers.py::test_resolve_api_key_for_ollama_returns_placeholder_when_no_env", "tests/test_providers.py::test_resolve_api_key_returns_none_for_unknown_url", "tests/test_providers.py::test_resolve_api_key_uses_huggingface_api_key_if_hf_token_missing", "tests/test_providers_hf.py::test_full_episode_flows_visitor_config_to_openai_client_unchanged", "tests/test_providers_hf.py::test_full_episode_recovers_from_first_call_response_format_400", "tests/test_providers_hf.py::test_hf_router_401_surfaces_inference_providers_permission_hint", "tests/test_providers_hf.py::test_hf_router_404_surfaces_warm_provider_hint", "tests/test_providers_hf.py::test_hf_router_base_url_is_canonical", "tests/test_providers_hf.py::test_hf_router_client_is_constructed_with_visitor_token", "tests/test_providers_hf.py::test_hf_router_client_uses_hf_token_env_when_visitor_omits_key", "tests/test_providers_hf.py::test_hf_router_connection_failure_surfaces_network_hint", "tests/test_providers_hf.py::test_hf_router_retries_without_response_format_on_bad_request", "tests/test_providers_hf.py::test_hf_router_succeeds_on_first_try_when_provider_supports_json", "tests/test_providers_hf.py::test_hf_router_timeout_surfaces_network_hint", "tests/test_registry.py::test_demo_curriculum_is_non_empty", "tests/test_registry.py::test_demo_curriculum_only_references_registered_systems", "tests/test_registry.py::test_every_demo_system_instantiates_cleanly", "tests/test_registry.py::test_every_supported_system_instantiates_cleanly", "tests/test_registry.py::test_list_demo_systems_preserves_declared_order", "tests/test_registry.py::test_list_supported_systems_preserves_declared_order", "tests/test_registry.py::test_supported_systems_is_non_empty", "tests/test_registry.py::test_supported_systems_only_references_registered_systems", "tests/test_scorer.py::test_scorer_caches_by_key", "tests/test_scorer.py::test_scorer_progress_shrinks_when_previous_total_is_high", "tests/test_scorer.py::test_scorer_returns_high_match_for_ground_truth", "tests/test_scorer.py::test_scorer_returns_zero_format_on_garbage", "tests/test_sft_dataset.py::test_build_sft_dataset_completions_are_valid_json_action", "tests/test_sft_dataset.py::test_build_sft_dataset_default_curriculum_size", "tests/test_sft_dataset.py::test_build_sft_dataset_default_curriculum_size_matches_demo", "tests/test_sft_dataset.py::test_build_sft_dataset_default_scales_with_instances_per_system", "tests/test_sft_dataset.py::test_build_sft_dataset_default_size_matches_supported_systems", "tests/test_sft_dataset.py::test_build_sft_dataset_explicit_system_ids_override_default", "tests/test_sft_dataset.py::test_build_sft_dataset_rejects_empty_system_ids", "tests/test_sft_dataset.py::test_build_sft_dataset_rejects_unknown_system_ids" ]