""" Bloom's Level: Apply Apply a rule or formula from buried text to a new scenario. """ import logging import os import random import re import time from typing import List, Dict, Any from tqdm import tqdm from src.generator import generate_text from src.metrics import compute_accuracy from src.utils import ensure_dir, save_jsonl, save_json logger = logging.getLogger(__name__) FILLERS = [ "The museum houses artifacts from the ancient world.", "Coral reefs support diverse marine ecosystems.", "Railway gauges vary between countries.", ] RULES = [ ("If temperature is above 30C, turn on cooling.", "32", "cooling on"), ("If stock price drops below $50, sell immediately.", "48", "sell"), ("If pH is below 7, add base solution.", "6.2", "add base"), ("If battery is below 20%, charge now.", "15", "charge now"), ] def run_apply( model_name: str, num_sentences: int, num_examples: int, out_dir: str, depths: List[float] = None, ) -> Dict[str, Any]: ensure_dir(out_dir) if depths is None: depths = [0.0, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0] results = {} start = time.time() for depth in depths: logger.info(f"[APPLY] Depth {depth:.1%}") preds = [] for _ in tqdm(range(num_examples), desc=f"Apply {depth:.1%}", leave=False): sents = [random.choice(FILLERS) for _ in range(num_sentences)] rule, test_value, expected = random.choice(RULES) idx = int(depth * len(sents)) sents.insert(idx, f"Rule: {rule}") doc = " ".join(sents) prompt = ( f"Read the rules below and apply them.\n\n" f"{doc}\n\n" f"Scenario: The current reading is {test_value}. What should you do? " f"Answer with only the action." ) ans = generate_text( [{"role": "user", "content": prompt}], model_name=model_name, max_new_tokens=15, ) correct = 1.0 if expected.lower() in ans.lower() else 0.0 preds.append({ "model_answer": ans, "correct": correct, "expected": expected, "depth": depth, }) save_jsonl(os.path.join(out_dir, f"apply_depth_{depth}.jsonl"), preds) acc = compute_accuracy(preds) results[depth] = {"accuracy": acc, "predictions": preds} logger.info(f"[APPLY] Depth {depth:.1%}: acc={acc:.3f}") summary = { "experiment": "apply", "cognitive_level": "apply", "num_sentences": num_sentences, "num_examples": num_examples, "depths": {str(d): results[d]["accuracy"] for d in depths}, "time_minutes": (time.time() - start) / 60, } save_json(os.path.join(out_dir, "apply_summary.json"), summary) logger.info(f"[APPLY] Time={(time.time()-start)/60:.1f} min") return summary