Spaces:
Running
Running
| """Load saved models and run inference on a feature vector. | |
| v3: Supports FrankHall (HistGBT binary decomposition) models alongside mord. | |
| Computes engineered features on-the-fly if models require them. | |
| Uses per-target feature orders from training_config.json. | |
| """ | |
| import json | |
| from pathlib import Path | |
| import joblib | |
| import numpy as np | |
| # FrankHallOrdinal must be importable for joblib deserialization of v3 models | |
| from models.frank_hall import FrankHallOrdinal # noqa: F401 | |
| SAVE_DIR = Path(__file__).parent.parent / "saved_models" | |
| _models = {} | |
| _loaded = False | |
| # Engineered feature computations (must match training notebook exactly) | |
| ENGINEERED_FEATURES = { | |
| "pause_load": lambda f: f.get("pause_frequency_per_sec", 0) * f.get("mean_pause_duration_sec", 0), | |
| "continuity_composite": lambda f: f.get("mlu", 0) * f.get("speech_ratio", 0), | |
| "disruption_signal": lambda f: f.get("mid_clause_pause_ratio", 0) * f.get("fa_speech_rate_cv", 0), | |
| "pause_severity_ratio": lambda f: f.get("long_pause_ratio", 0) / (f.get("short_pause_share", 0) + 1e-6), | |
| "filled_pause_dominance": lambda f: f.get("fa_filled_pause_ratio", 0) / (f.get("pause_frequency_per_sec", 0) + 1e-6), | |
| } | |
| def _load_all(): | |
| global _models, _loaded | |
| if _loaded: | |
| return | |
| with open(SAVE_DIR / "feature_order.json") as f: | |
| _models["feature_order"] = json.load(f) | |
| with open(SAVE_DIR / "class_mappings.json") as f: | |
| _models["class_mappings"] = json.load(f) | |
| # Load per-target training config (feature lists, model info) | |
| config_path = SAVE_DIR / "training_config.json" | |
| if config_path.exists(): | |
| with open(config_path) as f: | |
| _models["training_config"] = json.load(f) | |
| else: | |
| _models["training_config"] = {} | |
| # Load ordinal models | |
| _models["ordinal"] = {} | |
| targets = [ | |
| "articulation_ordinal", "pause_freq_ordinal", "pause_dur_ordinal", | |
| "pause_place_ordinal", "cognitive_load_ordinal", "utterance_constraints_ordinal", | |
| ] | |
| for t in targets: | |
| model_path = SAVE_DIR / f"ordinal_{t}.joblib" | |
| scaler_path = SAVE_DIR / f"scaler_{t}.joblib" | |
| if model_path.exists() and scaler_path.exists(): | |
| tc = _models["training_config"].get(t, {}) | |
| _models["ordinal"][t] = { | |
| "model": joblib.load(model_path), | |
| "scaler": joblib.load(scaler_path), | |
| "class_unmap": {int(k): v for k, v in _models["class_mappings"].get(t, {}).items()}, | |
| "features": tc.get("features", _models["feature_order"]), | |
| } | |
| # Load dominance model | |
| dom_path = SAVE_DIR / "dominance_ridge.joblib" | |
| dom_scaler_path = SAVE_DIR / "scaler_dominance.joblib" | |
| if dom_path.exists() and dom_scaler_path.exists(): | |
| _models["dominance"] = { | |
| "model": joblib.load(dom_path), | |
| "scaler": joblib.load(dom_scaler_path), | |
| } | |
| _loaded = True | |
| def _build_feature_vector(features: dict, feature_list: list) -> np.ndarray: | |
| """Build feature vector, computing engineered features on-the-fly if needed.""" | |
| vals = [] | |
| for f in feature_list: | |
| if f in features: | |
| vals.append(features[f]) | |
| elif f in ENGINEERED_FEATURES: | |
| vals.append(ENGINEERED_FEATURES[f](features)) | |
| else: | |
| vals.append(0.0) | |
| x = np.array([vals]) | |
| return np.nan_to_num(x, nan=0.0) | |
| def predict(features: dict) -> dict: | |
| """Run all models on a feature dict. | |
| Args: | |
| features: dict with keys matching GUA_ALL feature names | |
| Returns: | |
| dict with ordinal predictions and dominance proportions | |
| """ | |
| _load_all() | |
| results = {} | |
| # Ordinal predictions (v3: per-target feature lists, FrankHall + mord compatible) | |
| label_maps = { | |
| "articulation_ordinal": {1: "Legato", 2: "Portato", 3: "Staccato+"}, | |
| "pause_freq_ordinal": {1: "Low", 2: "Medium", 3: "High+"}, | |
| "pause_dur_ordinal": {1: "Short", 2: "Medium", 3: "Long"}, | |
| "pause_place_ordinal": {1: "Boundary", 2: "Mixed", 3: "Mid-Constituent"}, | |
| "cognitive_load_ordinal": {1: "Low", 2: "Medium", 3: "High"}, | |
| "utterance_constraints_ordinal": {1: "Low", 2: "Medium", 3: "High"}, | |
| } | |
| for target, info in _models.get("ordinal", {}).items(): | |
| target_features = info.get("features", _models["feature_order"]) | |
| x = _build_feature_vector(features, target_features) | |
| x_scaled = info["scaler"].transform(x) | |
| pred_mapped = info["model"].predict(x_scaled)[0] | |
| pred_orig = info["class_unmap"].get(int(pred_mapped), int(pred_mapped)) | |
| label = label_maps.get(target, {}).get(pred_orig, str(pred_orig)) | |
| results[f"{target}_pred"] = pred_orig | |
| results[f"{target}_label"] = label | |
| # Dominance predictions (always uses base 25 features) | |
| dom = _models.get("dominance") | |
| if dom: | |
| x = _build_feature_vector(features, _models["feature_order"]) | |
| x_scaled = dom["scaler"].transform(x) | |
| alr_pred = dom["model"].predict(x_scaled)[0] | |
| exp_u = np.exp(alr_pred[0]) | |
| exp_p = np.exp(alr_pred[1]) | |
| denom = 1 + exp_u + exp_p | |
| results["prop_unplanned_pred"] = round(float(exp_u / denom), 4) | |
| results["prop_planned_pred"] = round(float(exp_p / denom), 4) | |
| results["prop_neutral_pred"] = round(float(1.0 / denom), 4) | |
| return results | |