abhshkp's picture
Upload folder using huggingface_hub
9daa0e5 verified
"""
Bloom's Level: Apply
Apply a rule or formula from buried text to a new scenario.
"""
import logging
import os
import random
import re
import time
from typing import List, Dict, Any
from tqdm import tqdm
from src.generator import generate_text
from src.metrics import compute_accuracy
from src.utils import ensure_dir, save_jsonl, save_json
logger = logging.getLogger(__name__)
FILLERS = [
"The museum houses artifacts from the ancient world.",
"Coral reefs support diverse marine ecosystems.",
"Railway gauges vary between countries.",
]
RULES = [
("If temperature is above 30C, turn on cooling.", "32", "cooling on"),
("If stock price drops below $50, sell immediately.", "48", "sell"),
("If pH is below 7, add base solution.", "6.2", "add base"),
("If battery is below 20%, charge now.", "15", "charge now"),
]
def run_apply(
model_name: str,
num_sentences: int,
num_examples: int,
out_dir: str,
depths: List[float] = None,
) -> Dict[str, Any]:
ensure_dir(out_dir)
if depths is None:
depths = [0.0, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0]
results = {}
start = time.time()
for depth in depths:
logger.info(f"[APPLY] Depth {depth:.1%}")
preds = []
for _ in tqdm(range(num_examples), desc=f"Apply {depth:.1%}", leave=False):
sents = [random.choice(FILLERS) for _ in range(num_sentences)]
rule, test_value, expected = random.choice(RULES)
idx = int(depth * len(sents))
sents.insert(idx, f"Rule: {rule}")
doc = " ".join(sents)
prompt = (
f"Read the rules below and apply them.\n\n"
f"{doc}\n\n"
f"Scenario: The current reading is {test_value}. What should you do? "
f"Answer with only the action."
)
ans = generate_text(
[{"role": "user", "content": prompt}],
model_name=model_name,
max_new_tokens=15,
)
correct = 1.0 if expected.lower() in ans.lower() else 0.0
preds.append({
"model_answer": ans,
"correct": correct,
"expected": expected,
"depth": depth,
})
save_jsonl(os.path.join(out_dir, f"apply_depth_{depth}.jsonl"), preds)
acc = compute_accuracy(preds)
results[depth] = {"accuracy": acc, "predictions": preds}
logger.info(f"[APPLY] Depth {depth:.1%}: acc={acc:.3f}")
summary = {
"experiment": "apply",
"cognitive_level": "apply",
"num_sentences": num_sentences,
"num_examples": num_examples,
"depths": {str(d): results[d]["accuracy"] for d in depths},
"time_minutes": (time.time() - start) / 60,
}
save_json(os.path.join(out_dir, "apply_summary.json"), summary)
logger.info(f"[APPLY] Time={(time.time()-start)/60:.1f} min")
return summary