|
|
| |
|
|
| from datasets import load_dataset |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| import torch |
|
|
| print("\n[1] Loading SWE-Router data...") |
| traces = defaultdict(dict) |
| for model in MODELS: |
| try: |
| ds = load_dataset(f'SWE-Router/swebench-verified-{model}', split='test') |
| for row in ds: |
| iid = row['instance_id'] |
| traces[iid][model] = { |
| 'resolved': row['resolved'], |
| 'cost': float(row['instance_cost']), |
| 'problem': row['problem_statement'], |
| } |
| print(f" {model}: loaded") |
| except Exception as e: |
| print(f" {model}: FAILED - {e}") |
|
|
| print(f" Total tasks: {len(traces)}") |
|
|
| print("\n[2] Loading BERT router...") |
| REPO = "narcolepticchicken/agent-cost-optimizer" |
| tokenizer = AutoTokenizer.from_pretrained(f"{REPO}", subfolder="router_models/bert_router") |
| bert_model = AutoModelForSequenceClassification.from_pretrained(f"{REPO}", subfolder="router_models/bert_router") |
| bert_model.eval() |
| print(f" BERT model loaded, num_labels={bert_model.config.num_labels}") |
|
|
| print("\n[3] Loading v11 XGBoost router...") |
| from huggingface_hub import hf_hub_download |
| v11_path = hf_hub_download(REPO, "router_models/router_bundle_v11.pkl") |
| v11_bundle = pickle.load(open(v11_path, "rb")) |
| v11_tier_clfs = {int(k):v for k,v in v11_bundle["tier_clfs"].items()} |
| v11_tier_calibs = {int(k):v for k,v in v11_bundle["tier_calibrators"].items()} |
| v11_feat_keys = v11_bundle["feat_keys"] |
| print(f" v11 loaded, features={len(v11_feat_keys)}") |
|
|