OSINT / config /shared_config_metaqa.json
siddeshwar-kagatikar
fix(rewards): never crash GRPO on malformed completions
d814291
{
"environment": {
"n_users": 40,
"alias_density": 0.35,
"noise_level": 0.15,
"red_herring_rate": 0.1,
"max_steps": 18,
"seed": 7
},
"dataset": {
"mode": "metaqa",
"metaqa_root": "metaQA",
"metaqa_kb_path": "metaQA/kb.txt",
"metaqa_variant": "vanilla",
"metaqa_hops": ["1-hop", "2-hop", "3-hop"],
"metaqa_splits": ["train", "dev", "test"]
},
"swarm": {
"enabled": true,
"max_agents": 3,
"max_breadth": 2,
"max_width": 2,
"max_depth": 2,
"planner_rounds": 2,
"tools_per_agent": 1
},
"spawn_reward": {
"lambda_parallel": 0.15,
"lambda_finish": 0.2,
"anneal": 1.0,
"max_parallel_hint": 3
},
"seeding": {
"seeded_nodes": [],
"seeded_edges": [],
"seeded_questions": [],
"llm_generate_remaining_graph": false,
"llm_generate_remaining_tasks": false,
"llm_generated_edge_budget": 0,
"llm_generated_task_budget": 0,
"llm_generation_parallel": true,
"llm_generation_workers": 3,
"llm_generation_retries": 2,
"allow_template_fallback_on_llm_failure": false
},
"llm": {
"provider": "mock",
"model": "qwen3:2b",
"temperature": 0.1,
"max_tokens": 256,
"timeout_seconds": 240,
"ollama_base_url": "http://127.0.0.1:11434",
"openai_base_url": "https://api.openai.com/v1",
"openai_api_key_env": "OPENAI_API_KEY",
"openai_api_key": ""
},
"runtime": {
"default_episodes": 20,
"leaderboard_path": "artifacts/leaderboard_metaqa.json",
"dashboard_path": "artifacts/metaqa_dashboard.html",
"sweep_dashboard_dir": "artifacts/metaqa_sweep_dashboards"
}
}