"""In-app guided exercises that prefill the workbench with sandbox data. Each exercise returns a dict consumed by the Welcome tab's "Try this" handlers, which then push the values into the tabs' state. """ from __future__ import annotations from dataclasses import dataclass from io_utils import read_sandbox_tsv, sandbox_sentence from paths import corpus_file, LANGUAGES from prompts import DEFAULT_SYSTEM_PROMPT, DEFAULT_ZERO_SHOT, DEFAULT_FEW_SHOT, ICLExample from schemas import from_preset @dataclass class Exercise: title: str summary: str language_code: str preset_key: str tokenizer: str n_tokens: int use_few_shot: bool n_icl: int models: list[str] user_template: str sandbox_start: int = 0 EXERCISES = [ Exercise( title="Exercise 1 — Greek POS, zero-shot, single model", summary=( "Annotate an Ancient Greek sentence from the historical corpus with a single " "model in zero-shot mode. You will see the raw output, no MoE, no ICL. " "This is the smallest possible loop." ), language_code="GRC", preset_key="grc_tagset", tokenizer="as_is", n_tokens=10, use_few_shot=False, n_icl=0, models=["openai/gpt-oss-20b:free"], user_template=DEFAULT_ZERO_SHOT, ), Exercise( title="Exercise 2 — Armenian POS + lemma, few-shot (5 examples)", summary=( "Annotate Old Armenian with the bespoke compound tagset. 5 validated examples " "are sampled from the training corpus and inserted into the prompt's " "{few_shot_examples} block. Compare the few-shot result with what zero-shot " "would give." ), language_code="HYE", preset_key="hye_tagset", tokenizer="as_is", n_tokens=10, use_few_shot=True, n_icl=5, models=["mistralai/mistral-small-24b-instruct-2501"], user_template=DEFAULT_FEW_SHOT, sandbox_start=200, ), Exercise( title="Exercise 3 — Syriac MoE: vote, correct, re-inject", summary=( "Annotate Syriac with three models in parallel. The Run tab highlights " "disagreements. Correct the contested tokens in Review, click " "'Add to ICL pool', then re-run on a new sentence — the corrections appear " "in the rendered prompt's few-shot block, closing the bootstrap loop." ), language_code="SYC", preset_key="syc_tagset", tokenizer="as_is", n_tokens=12, use_few_shot=True, n_icl=3, models=["meta-llama/llama-3.3-70b-instruct:free", "qwen/qwen3-next-80b-a3b-instruct:free", "deepseek/deepseek-v4-flash:free"], user_template=DEFAULT_FEW_SHOT, sandbox_start=50, ), ] def list_exercise_titles() -> list[str]: return [e.title for e in EXERCISES] def prefill(idx: int) -> dict: """Return a dict the app uses to seed every tab for exercise `idx`.""" ex = EXERCISES[idx] rows = read_sandbox_tsv(corpus_file(ex.language_code, "train"), max_rows=2000) surfaces, gold = sandbox_sentence(rows, ex.sandbox_start, ex.n_tokens) text = " ".join(surfaces) schema = from_preset(ex.preset_key) icl_examples: list[ICLExample] = [] if ex.use_few_shot: # Build N example sentences from earlier slices of the same corpus for k in range(ex.n_icl): s2, g2 = sandbox_sentence(rows, k * (ex.n_tokens + 2), ex.n_tokens) if not s2: break icl_examples.append( ICLExample( language=ex.language_code, schema_hash=schema.hash(), tokens=s2, gold_annotation={"tokens": g2}, source="sandbox", ) ) return { "exercise_title": ex.title, "exercise_summary": ex.summary, "language_code": ex.language_code, "language_name": LANGUAGES.get(ex.language_code, ex.language_code), "preset_key": ex.preset_key, "tokenizer": ex.tokenizer, "text": text, "tokens": surfaces, "gold": gold, "use_few_shot": ex.use_few_shot, "n_icl": ex.n_icl, "icl_examples": icl_examples, "system_prompt": DEFAULT_SYSTEM_PROMPT, "user_template": ex.user_template, "models": ex.models, }