| """ |
| Gradio demo for the Watsonx Docs Type Classifier. |
| Loads the best trained model from models/ and serves predictions. |
| """ |
|
|
| from pathlib import Path |
|
|
| import gradio as gr |
| import joblib |
| import numpy as np |
| from sentence_transformers import SentenceTransformer |
|
|
| LABELS = ["conceptual", "how-to"] |
|
|
| model_name = (Path("models") / "best_model_name.txt").read_text().strip() |
| embedder = SentenceTransformer(model_name) |
| clf = joblib.load(Path("models") / "best_model.joblib") |
|
|
|
|
| def softmax(x): |
| e = np.exp(x - np.max(x)) |
| return e / e.sum() |
|
|
|
|
| def predict(text: str) -> dict: |
| if not text.strip(): |
| return {label: 0.0 for label in LABELS} |
| embedding = embedder.encode([text], convert_to_numpy=True) |
| if hasattr(clf, "predict_proba"): |
| probs = clf.predict_proba(embedding)[0] |
| else: |
| scores = clf.decision_function(embedding)[0] |
| |
| if np.ndim(scores) == 0: |
| scores = np.array([-scores, scores]) |
| probs = softmax(scores) |
| return {label: float(p) for label, p in zip(LABELS, probs)} |
|
|
|
|
| demo = gr.Interface( |
| fn=predict, |
| inputs=gr.Textbox( |
| label="Document text (title + body)", |
| lines=8, |
| placeholder="Paste the title and opening text of a Watsonx documentation page here.", |
| ), |
| outputs=gr.Label(num_top_classes=2, label="Predicted document type"), |
| title="Watsonx Docs Type Classifier", |
| description=( |
| "Predicts whether a Watsonx documentation page is **conceptual** or **how-to**. " |
| "Paste the page title and opening text below." |
| ), |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch(share=False) |
|
|