itsjhuang's picture
Upload app.py with huggingface_hub
eb4875c verified
"""
Gradio demo for the Watsonx Docs Type Classifier.
Loads the best trained model from models/ and serves predictions.
"""
from pathlib import Path
import gradio as gr
import joblib
import numpy as np
from sentence_transformers import SentenceTransformer
LABELS = ["conceptual", "how-to"]
model_name = (Path("models") / "best_model_name.txt").read_text().strip()
embedder = SentenceTransformer(model_name)
clf = joblib.load(Path("models") / "best_model.joblib")
def softmax(x):
e = np.exp(x - np.max(x))
return e / e.sum()
def predict(text: str) -> dict:
if not text.strip():
return {label: 0.0 for label in LABELS}
embedding = embedder.encode([text], convert_to_numpy=True)
if hasattr(clf, "predict_proba"):
probs = clf.predict_proba(embedding)[0]
else:
scores = clf.decision_function(embedding)[0]
# LinearSVC returns a scalar for binary; wrap in array
if np.ndim(scores) == 0:
scores = np.array([-scores, scores])
probs = softmax(scores)
return {label: float(p) for label, p in zip(LABELS, probs)}
demo = gr.Interface(
fn=predict,
inputs=gr.Textbox(
label="Document text (title + body)",
lines=8,
placeholder="Paste the title and opening text of a Watsonx documentation page here.",
),
outputs=gr.Label(num_top_classes=2, label="Predicted document type"),
title="Watsonx Docs Type Classifier",
description=(
"Predicts whether a Watsonx documentation page is **conceptual** or **how-to**. "
"Paste the page title and opening text below."
),
)
if __name__ == "__main__":
demo.launch(share=False)