# ── Part 2: Load data and BERT model ── from datasets import load_dataset from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch print("\n[1] Loading SWE-Router data...") traces = defaultdict(dict) for model in MODELS: try: ds = load_dataset(f'SWE-Router/swebench-verified-{model}', split='test') for row in ds: iid = row['instance_id'] traces[iid][model] = { 'resolved': row['resolved'], 'cost': float(row['instance_cost']), 'problem': row['problem_statement'], } print(f" {model}: loaded") except Exception as e: print(f" {model}: FAILED - {e}") print(f" Total tasks: {len(traces)}") print("\n[2] Loading BERT router...") REPO = "narcolepticchicken/agent-cost-optimizer" tokenizer = AutoTokenizer.from_pretrained(f"{REPO}", subfolder="router_models/bert_router") bert_model = AutoModelForSequenceClassification.from_pretrained(f"{REPO}", subfolder="router_models/bert_router") bert_model.eval() print(f" BERT model loaded, num_labels={bert_model.config.num_labels}") print("\n[3] Loading v11 XGBoost router...") from huggingface_hub import hf_hub_download v11_path = hf_hub_download(REPO, "router_models/router_bundle_v11.pkl") v11_bundle = pickle.load(open(v11_path, "rb")) v11_tier_clfs = {int(k):v for k,v in v11_bundle["tier_clfs"].items()} v11_tier_calibs = {int(k):v for k,v in v11_bundle["tier_calibrators"].items()} v11_feat_keys = v11_bundle["feat_keys"] print(f" v11 loaded, features={len(v11_feat_keys)}")