| """In-app guided exercises that prefill the workbench with sandbox data. |
| |
| Each exercise returns a dict consumed by the Welcome tab's "Try this" handlers, |
| which then push the values into the tabs' state. |
| """ |
| from __future__ import annotations |
|
|
| from dataclasses import dataclass |
|
|
| from io_utils import read_sandbox_tsv, sandbox_sentence |
| from paths import corpus_file, LANGUAGES |
| from prompts import DEFAULT_SYSTEM_PROMPT, DEFAULT_ZERO_SHOT, DEFAULT_FEW_SHOT, ICLExample |
| from schemas import from_preset |
|
|
|
|
| @dataclass |
| class Exercise: |
| title: str |
| summary: str |
| language_code: str |
| preset_key: str |
| tokenizer: str |
| n_tokens: int |
| use_few_shot: bool |
| n_icl: int |
| models: list[str] |
| user_template: str |
| sandbox_start: int = 0 |
|
|
|
|
| EXERCISES = [ |
| Exercise( |
| title="Exercise 1 — Greek POS, zero-shot, single model", |
| summary=( |
| "Annotate an Ancient Greek sentence from the historical corpus with a single " |
| "model in zero-shot mode. You will see the raw output, no MoE, no ICL. " |
| "This is the smallest possible loop." |
| ), |
| language_code="GRC", |
| preset_key="grc_tagset", |
| tokenizer="as_is", |
| n_tokens=10, |
| use_few_shot=False, |
| n_icl=0, |
| models=["openai/gpt-oss-20b:free"], |
| user_template=DEFAULT_ZERO_SHOT, |
| ), |
| Exercise( |
| title="Exercise 2 — Armenian POS + lemma, few-shot (5 examples)", |
| summary=( |
| "Annotate Old Armenian with the bespoke compound tagset. 5 validated examples " |
| "are sampled from the training corpus and inserted into the prompt's " |
| "{few_shot_examples} block. Compare the few-shot result with what zero-shot " |
| "would give." |
| ), |
| language_code="HYE", |
| preset_key="hye_tagset", |
| tokenizer="as_is", |
| n_tokens=10, |
| use_few_shot=True, |
| n_icl=5, |
| models=["mistralai/mistral-small-24b-instruct-2501"], |
| user_template=DEFAULT_FEW_SHOT, |
| sandbox_start=200, |
| ), |
| Exercise( |
| title="Exercise 3 — Syriac MoE: vote, correct, re-inject", |
| summary=( |
| "Annotate Syriac with three models in parallel. The Run tab highlights " |
| "disagreements. Correct the contested tokens in Review, click " |
| "'Add to ICL pool', then re-run on a new sentence — the corrections appear " |
| "in the rendered prompt's few-shot block, closing the bootstrap loop." |
| ), |
| language_code="SYC", |
| preset_key="syc_tagset", |
| tokenizer="as_is", |
| n_tokens=12, |
| use_few_shot=True, |
| n_icl=3, |
| models=["meta-llama/llama-3.3-70b-instruct:free", "qwen/qwen3-next-80b-a3b-instruct:free", "deepseek/deepseek-v4-flash:free"], |
| user_template=DEFAULT_FEW_SHOT, |
| sandbox_start=50, |
| ), |
| ] |
|
|
|
|
| def list_exercise_titles() -> list[str]: |
| return [e.title for e in EXERCISES] |
|
|
|
|
| def prefill(idx: int) -> dict: |
| """Return a dict the app uses to seed every tab for exercise `idx`.""" |
| ex = EXERCISES[idx] |
| rows = read_sandbox_tsv(corpus_file(ex.language_code, "train"), max_rows=2000) |
| surfaces, gold = sandbox_sentence(rows, ex.sandbox_start, ex.n_tokens) |
| text = " ".join(surfaces) |
|
|
| schema = from_preset(ex.preset_key) |
| icl_examples: list[ICLExample] = [] |
| if ex.use_few_shot: |
| |
| for k in range(ex.n_icl): |
| s2, g2 = sandbox_sentence(rows, k * (ex.n_tokens + 2), ex.n_tokens) |
| if not s2: |
| break |
| icl_examples.append( |
| ICLExample( |
| language=ex.language_code, |
| schema_hash=schema.hash(), |
| tokens=s2, |
| gold_annotation={"tokens": g2}, |
| source="sandbox", |
| ) |
| ) |
|
|
| return { |
| "exercise_title": ex.title, |
| "exercise_summary": ex.summary, |
| "language_code": ex.language_code, |
| "language_name": LANGUAGES.get(ex.language_code, ex.language_code), |
| "preset_key": ex.preset_key, |
| "tokenizer": ex.tokenizer, |
| "text": text, |
| "tokens": surfaces, |
| "gold": gold, |
| "use_few_shot": ex.use_few_shot, |
| "n_icl": ex.n_icl, |
| "icl_examples": icl_examples, |
| "system_prompt": DEFAULT_SYSTEM_PROMPT, |
| "user_template": ex.user_template, |
| "models": ex.models, |
| } |
|
|