Spaces:
Sleeping
Sleeping
| [ | |
| "tests/test_client_ws.py::test_websocket_round_trip", | |
| "tests/test_dataset.py::test_dataset_observed_arrays_match_state_variables", | |
| "tests/test_dataset.py::test_eval_dataset_marks_held_out_rows", | |
| "tests/test_dataset.py::test_training_dataset_default_curriculum_is_demo_systems", | |
| "tests/test_dataset.py::test_training_dataset_default_curriculum_is_supported_systems", | |
| "tests/test_dataset.py::test_training_dataset_explicit_system_ids_override_default", | |
| "tests/test_dataset.py::test_training_dataset_has_expected_schema", | |
| "tests/test_dataset.py::test_training_dataset_prompts_are_chat_lists", | |
| "tests/test_dataset.py::test_training_dataset_rejects_empty_system_ids", | |
| "tests/test_dataset.py::test_training_dataset_rejects_unknown_system_ids", | |
| "tests/test_dataset.py::test_training_dataset_uses_only_train_tiers", | |
| "tests/test_environment.py::test_episode_terminates_on_convergence", | |
| "tests/test_environment.py::test_history_accumulates_across_turns", | |
| "tests/test_environment.py::test_max_turns_terminates_episode", | |
| "tests/test_environment.py::test_progress_reward_rewards_improvement", | |
| "tests/test_environment.py::test_reset_returns_well_formed_observation", | |
| "tests/test_environment.py::test_state_property_exposes_episode_id", | |
| "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[free_fall-d2y/dt2 = -9.81 / (y - y)]", | |
| "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[free_fall-d2y/dt2 = exp(exp(exp(y)))]", | |
| "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[free_fall-d2y/dt2 = log(0 * y)]", | |
| "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[simple_pendulum-d2theta/dt2 = -sqrt(-theta**2 - 1)]", | |
| "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[simple_pendulum-d2theta/dt2 = -sqrt(dtheta**2 + theta**2) * sin(theta)]", | |
| "tests/test_environment.py::test_step_with_ground_truth_rewards_high", | |
| "tests/test_environment.py::test_step_with_unparseable_equation_short_circuits", | |
| "tests/test_interactive_api.py::test_budget_exhaustion_returns_409_on_next_step", | |
| "tests/test_interactive_api.py::test_cors_preflight_for_dev_origin", | |
| "tests/test_interactive_api.py::test_llm_step_after_budget_exhaustion_returns_409", | |
| "tests/test_interactive_api.py::test_llm_step_drives_a_turn_using_injected_policy", | |
| "tests/test_interactive_api.py::test_llm_step_handles_unparseable_completion_as_format_zero", | |
| "tests/test_interactive_api.py::test_llm_step_runs_full_episode_with_three_canned_turns", | |
| "tests/test_interactive_api.py::test_models_endpoint_returns_empty_with_error_when_daemon_unavailable", | |
| "tests/test_interactive_api.py::test_models_endpoint_returns_injected_list", | |
| "tests/test_interactive_api.py::test_session_lifecycle_create_step_delete", | |
| "tests/test_interactive_api.py::test_session_lifecycle_create_summary_delete", | |
| "tests/test_interactive_api.py::test_step_with_ground_truth_marks_done", | |
| "tests/test_interactive_api.py::test_systems_endpoint_excludes_held_out_tier", | |
| "tests/test_interactive_api.py::test_systems_endpoint_returns_demo_curriculum_in_order", | |
| "tests/test_interactive_api.py::test_systems_endpoint_returns_supported_systems_in_order", | |
| "tests/test_interactive_api.py::test_unknown_session_id_returns_404", | |
| "tests/test_interactive_api.py::test_unknown_system_id_returns_400", | |
| "tests/test_interactive_api.py::test_unparseable_equation_returns_zero_format_not_500", | |
| "tests/test_metrics_diagnostic.py::test_diag_scores_perfect_on_identical_trajectory", | |
| "tests/test_metrics_diagnostic.py::test_diag_scores_stay_informative_when_r2_collapses", | |
| "tests/test_metrics_diagnostic.py::test_freq_hint_in_mismatch_summary_for_freq_mismatch", | |
| "tests/test_metrics_diagnostic.py::test_freq_score_falls_back_for_non_oscillatory_signal", | |
| "tests/test_metrics_diagnostic.py::test_reward_total_unchanged_by_diagnostic_fields", | |
| "tests/test_parser.py::test_alias_does_not_fire_when_velocity_state_is_named_dvar", | |
| "tests/test_parser.py::test_alias_only_replaces_word_boundary_matches", | |
| "tests/test_parser.py::test_bare_dx_alias_substitutes_for_vx", | |
| "tests/test_parser.py::test_basic_equation_round_trips", | |
| "tests/test_parser.py::test_caret_is_accepted_as_power_synonym", | |
| "tests/test_parser.py::test_decimal_literals_are_not_misread_as_attribute_access", | |
| "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[(lambda x: x)(y)-Lambda]", | |
| "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[(lambda x: x)(y)-computed-name call]", | |
| "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[(y, vy)-Collection]", | |
| "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[lambda x: x-Lambda]", | |
| "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[vy[0]-Array indexing]", | |
| "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[y == vy-Comparisons]", | |
| "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[y and vy-Boolean]", | |
| "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[y if vy > 0 else -y-Conditional]", | |
| "tests/test_parser.py::test_dotted_attribute_access_is_rejected_with_clear_error", | |
| "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[math.sin(theta)]", | |
| "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[np.exp(-theta**2)]", | |
| "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[numpy.cos(theta) + 1]", | |
| "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[scipy.special.expit(theta)]", | |
| "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[theta.diff()]", | |
| "tests/test_parser.py::test_dx_dt_alias_substitutes_for_vx_when_velocity_state_exists", | |
| "tests/test_parser.py::test_dy_dt_alias_substitutes_for_vy", | |
| "tests/test_parser.py::test_grammar_hint_documents_velocity_convention", | |
| "tests/test_parser.py::test_keyword_arguments_in_call_are_rejected_with_specific_hint", | |
| "tests/test_parser.py::test_multiple_equations_split_on_semicolons_keep_alias_behaviour", | |
| "tests/test_parser.py::test_only_parse_error_ever_escapes_the_parser", | |
| "tests/test_parser.py::test_sympy_internal_errors_become_parse_errors_not_attribute_errors", | |
| "tests/test_parser.py::test_unknown_dx_in_system_without_paired_velocity_includes_hint", | |
| "tests/test_parser.py::test_unknown_dy_in_paired_system_suggests_vy", | |
| "tests/test_parser.py::test_unknown_t_emits_autonomy_hint", | |
| "tests/test_prompt.py::test_build_prompt_returns_chat_pair", | |
| "tests/test_prompt.py::test_history_block_surfaces_dense_reward_components", | |
| "tests/test_prompt.py::test_history_block_tolerates_missing_reward_components", | |
| "tests/test_prompt.py::test_history_uses_equation_field_name_not_shorthand", | |
| "tests/test_prompt.py::test_parse_completion_accepts_capitalised_keys", | |
| "tests/test_prompt.py::test_parse_completion_accepts_eqn_synonym", | |
| "tests/test_prompt.py::test_parse_completion_accepts_other_equation_synonyms", | |
| "tests/test_prompt.py::test_parse_completion_accepts_parameters_synonym", | |
| "tests/test_prompt.py::test_parse_completion_accepts_reasoning_synonym", | |
| "tests/test_prompt.py::test_parse_completion_canonical_key_wins_over_synonym", | |
| "tests/test_prompt.py::test_parse_completion_coerces_string_params", | |
| "tests/test_prompt.py::test_parse_completion_extracts_clean_json", | |
| "tests/test_prompt.py::test_parse_completion_handles_code_fences", | |
| "tests/test_prompt.py::test_parse_completion_handles_latex_braces_inside_json_string", | |
| "tests/test_prompt.py::test_parse_completion_normalizes_latex_frac_and_caret", | |
| "tests/test_prompt.py::test_parse_completion_picks_first_object_when_text_has_multiple", | |
| "tests/test_prompt.py::test_parse_completion_with_no_json_falls_back_to_raw_text", | |
| "tests/test_prompt.py::test_parse_completion_with_no_json_yields_empty_equation", | |
| "tests/test_prompt.py::test_render_includes_history_when_present", | |
| "tests/test_prompt.py::test_render_includes_metadata_block", | |
| "tests/test_prompt.py::test_render_includes_trajectory_samples", | |
| "tests/test_prompt.py::test_render_omits_history_block_when_empty", | |
| "tests/test_prompt.py::test_system_message_locks_in_canonical_field_name", | |
| "tests/test_providers.py::test_format_provider_error_pinpoints_auth_failure", | |
| "tests/test_providers.py::test_format_provider_error_pinpoints_missing_model", | |
| "tests/test_providers.py::test_format_provider_error_pinpoints_unreachable_endpoint", | |
| "tests/test_providers.py::test_resolve_api_key_browser_supplied_wins", | |
| "tests/test_providers.py::test_resolve_api_key_falls_back_to_hf_token_for_hf_router", | |
| "tests/test_providers.py::test_resolve_api_key_falls_back_to_openai_env", | |
| "tests/test_providers.py::test_resolve_api_key_for_ollama_returns_placeholder_when_no_env", | |
| "tests/test_providers.py::test_resolve_api_key_returns_none_for_unknown_url", | |
| "tests/test_providers.py::test_resolve_api_key_uses_huggingface_api_key_if_hf_token_missing", | |
| "tests/test_providers_hf.py::test_full_episode_flows_visitor_config_to_openai_client_unchanged", | |
| "tests/test_providers_hf.py::test_full_episode_recovers_from_first_call_response_format_400", | |
| "tests/test_providers_hf.py::test_hf_router_401_surfaces_inference_providers_permission_hint", | |
| "tests/test_providers_hf.py::test_hf_router_404_surfaces_warm_provider_hint", | |
| "tests/test_providers_hf.py::test_hf_router_base_url_is_canonical", | |
| "tests/test_providers_hf.py::test_hf_router_client_is_constructed_with_visitor_token", | |
| "tests/test_providers_hf.py::test_hf_router_client_uses_hf_token_env_when_visitor_omits_key", | |
| "tests/test_providers_hf.py::test_hf_router_connection_failure_surfaces_network_hint", | |
| "tests/test_providers_hf.py::test_hf_router_retries_without_response_format_on_bad_request", | |
| "tests/test_providers_hf.py::test_hf_router_succeeds_on_first_try_when_provider_supports_json", | |
| "tests/test_providers_hf.py::test_hf_router_timeout_surfaces_network_hint", | |
| "tests/test_registry.py::test_demo_curriculum_is_non_empty", | |
| "tests/test_registry.py::test_demo_curriculum_only_references_registered_systems", | |
| "tests/test_registry.py::test_every_demo_system_instantiates_cleanly", | |
| "tests/test_registry.py::test_every_supported_system_instantiates_cleanly", | |
| "tests/test_registry.py::test_list_demo_systems_preserves_declared_order", | |
| "tests/test_registry.py::test_list_supported_systems_preserves_declared_order", | |
| "tests/test_registry.py::test_supported_systems_is_non_empty", | |
| "tests/test_registry.py::test_supported_systems_only_references_registered_systems", | |
| "tests/test_scorer.py::test_scorer_caches_by_key", | |
| "tests/test_scorer.py::test_scorer_progress_shrinks_when_previous_total_is_high", | |
| "tests/test_scorer.py::test_scorer_returns_high_match_for_ground_truth", | |
| "tests/test_scorer.py::test_scorer_returns_zero_format_on_garbage", | |
| "tests/test_sft_dataset.py::test_build_sft_dataset_completions_are_valid_json_action", | |
| "tests/test_sft_dataset.py::test_build_sft_dataset_default_curriculum_size", | |
| "tests/test_sft_dataset.py::test_build_sft_dataset_default_curriculum_size_matches_demo", | |
| "tests/test_sft_dataset.py::test_build_sft_dataset_default_scales_with_instances_per_system", | |
| "tests/test_sft_dataset.py::test_build_sft_dataset_default_size_matches_supported_systems", | |
| "tests/test_sft_dataset.py::test_build_sft_dataset_explicit_system_ids_override_default", | |
| "tests/test_sft_dataset.py::test_build_sft_dataset_rejects_empty_system_ids", | |
| "tests/test_sft_dataset.py::test_build_sft_dataset_rejects_unknown_system_ids" | |
| ] |