anugrah55's picture
trainer v0.4: switch to Qwen2.5-3B-Instruct, dynamic task discovery, delegated probe sampling, difficulty-weighted rollouts, push to opensleuth-qwen2.5-3b-grpo-v2; sentinel cleared on FORCE_TRAIN=1.
78575eb verified
raw
history blame contribute delete
512 Bytes
"""OpenSleuth training-side helpers (env client, dataset, reward fn)."""
from .client import EnvClient
from .dataset import (
DEFAULT_N_BY_DIFFICULTY,
FUNCTIONS_FOR_TRAINING,
build_synthesis_dataset,
discover_functions,
)
from .prompt import SYSTEM_PROMPT, build_prompt, extract_code
__all__ = [
"EnvClient",
"DEFAULT_N_BY_DIFFICULTY",
"FUNCTIONS_FOR_TRAINING",
"build_synthesis_dataset",
"discover_functions",
"SYSTEM_PROMPT",
"build_prompt",
"extract_code",
]