Spaces:

ENC-PSL
/

lrec2026-llm-annotator

Running

App Files Files Community

lrec2026-llm-annotator / tutorial.py

dhuser

Initial LREC LLM-as-Annotator app

a918698 6 days ago

raw

history blame

4.49 kB

	"""In-app guided exercises that prefill the workbench with sandbox data.

	Each exercise returns a dict consumed by the Welcome tab's "Try this" handlers,
	which then push the values into the tabs' state.
	"""
	from __future__ import annotations

	from dataclasses import dataclass

	from io_utils import read_sandbox_tsv, sandbox_sentence
	from paths import corpus_file, LANGUAGES
	from prompts import DEFAULT_SYSTEM_PROMPT, DEFAULT_ZERO_SHOT, DEFAULT_FEW_SHOT, ICLExample
	from schemas import from_preset


	@dataclass
	class Exercise:
	title: str
	summary: str
	language_code: str
	preset_key: str
	tokenizer: str
	n_tokens: int
	use_few_shot: bool
	n_icl: int
	models: list[str]
	user_template: str
	sandbox_start: int = 0


	EXERCISES = [
	Exercise(
	title="Exercise 1 — Greek POS, zero-shot, single model",
	summary=(
	"Annotate an Ancient Greek sentence from the historical corpus with a single "
	"model in zero-shot mode. You will see the raw output, no MoE, no ICL. "
	"This is the smallest possible loop."
	),
	language_code="GRC",
	preset_key="grc_tagset",
	tokenizer="as_is",
	n_tokens=10,
	use_few_shot=False,
	n_icl=0,
	models=["openai/gpt-oss-20b:free"],
	user_template=DEFAULT_ZERO_SHOT,
	),
	Exercise(
	title="Exercise 2 — Armenian POS + lemma, few-shot (5 examples)",
	summary=(
	"Annotate Old Armenian with the bespoke compound tagset. 5 validated examples "
	"are sampled from the training corpus and inserted into the prompt's "
	"{few_shot_examples} block. Compare the few-shot result with what zero-shot "
	"would give."
	),
	language_code="HYE",
	preset_key="hye_tagset",
	tokenizer="as_is",
	n_tokens=10,
	use_few_shot=True,
	n_icl=5,
	models=["mistralai/mistral-small-24b-instruct-2501"],
	user_template=DEFAULT_FEW_SHOT,
	sandbox_start=200,
	),
	Exercise(
	title="Exercise 3 — Syriac MoE: vote, correct, re-inject",
	summary=(
	"Annotate Syriac with three models in parallel. The Run tab highlights "
	"disagreements. Correct the contested tokens in Review, click "
	"'Add to ICL pool', then re-run on a new sentence — the corrections appear "
	"in the rendered prompt's few-shot block, closing the bootstrap loop."
	),
	language_code="SYC",
	preset_key="syc_tagset",
	tokenizer="as_is",
	n_tokens=12,
	use_few_shot=True,
	n_icl=3,
	models=["meta-llama/llama-3.3-70b-instruct:free", "qwen/qwen3-next-80b-a3b-instruct:free", "deepseek/deepseek-v4-flash:free"],
	user_template=DEFAULT_FEW_SHOT,
	sandbox_start=50,
	),
	]


	def list_exercise_titles() -> list[str]:
	return [e.title for e in EXERCISES]


	def prefill(idx: int) -> dict:
	"""Return a dict the app uses to seed every tab for exercise `idx`."""
	ex = EXERCISES[idx]
	rows = read_sandbox_tsv(corpus_file(ex.language_code, "train"), max_rows=2000)
	surfaces, gold = sandbox_sentence(rows, ex.sandbox_start, ex.n_tokens)
	text = " ".join(surfaces)

	schema = from_preset(ex.preset_key)
	icl_examples: list[ICLExample] = []
	if ex.use_few_shot:
	# Build N example sentences from earlier slices of the same corpus
	for k in range(ex.n_icl):
	s2, g2 = sandbox_sentence(rows, k * (ex.n_tokens + 2), ex.n_tokens)
	if not s2:
	break
	icl_examples.append(
	ICLExample(
	language=ex.language_code,
	schema_hash=schema.hash(),
	tokens=s2,
	gold_annotation={"tokens": g2},
	source="sandbox",
	)
	)

	return {
	"exercise_title": ex.title,
	"exercise_summary": ex.summary,
	"language_code": ex.language_code,
	"language_name": LANGUAGES.get(ex.language_code, ex.language_code),
	"preset_key": ex.preset_key,
	"tokenizer": ex.tokenizer,
	"text": text,
	"tokens": surfaces,
	"gold": gold,
	"use_few_shot": ex.use_few_shot,
	"n_icl": ex.n_icl,
	"icl_examples": icl_examples,
	"system_prompt": DEFAULT_SYSTEM_PROMPT,
	"user_template": ex.user_template,
	"models": ex.models,
	}