fluency-benchmark / models /frank_hall.py
syt20's picture
Replace with fluency_app_v3: updated models, new pipeline modules, experiments
63fae5b verified
"""Frank & Hall (2001) ordinal decomposition with balanced HistGBT.
This class must be importable for joblib to deserialize v3 FrankHall models.
The class definition must match exactly what was used during training.
"""
import numpy as np
from sklearn.ensemble import HistGradientBoostingClassifier
class FrankHallOrdinal:
"""Ordinal classifier using K-1 binary decomposition.
Trains K-1 binary classifiers: P(Y > k) for k=1..K-1.
Each gets class_weight='balanced' automatically.
"""
def __init__(self, max_depth=4, learning_rate=0.1, max_iter=200):
self.max_depth = max_depth
self.learning_rate = learning_rate
self.max_iter = max_iter
self.clfs = []
self.classes_ = None
def fit(self, X, y, sample_weight=None):
self.classes_ = np.sort(np.unique(y))
self.clfs = []
for k in self.classes_[:-1]:
binary_y = (y > k).astype(int)
clf = HistGradientBoostingClassifier(
max_depth=self.max_depth,
learning_rate=self.learning_rate,
max_iter=self.max_iter,
class_weight='balanced',
random_state=42,
)
clf.fit(X, binary_y, sample_weight=sample_weight)
self.clfs.append(clf)
return self
def predict_proba(self, X):
cum = np.column_stack([c.predict_proba(X)[:, 1] for c in self.clfs])
p = np.zeros((X.shape[0], len(self.classes_)))
p[:, 0] = 1 - cum[:, 0]
for k in range(1, len(self.classes_) - 1):
p[:, k] = cum[:, k - 1] - cum[:, k]
p[:, -1] = cum[:, -1]
return np.clip(p, 0, 1)
def predict(self, X):
return self.classes_[np.argmax(self.predict_proba(X), axis=1)]