| from transformers import Pipeline |
| import tensorflow as tf |
| from tensorflow.keras.preprocessing.sequence import pad_sequences |
| import json |
| import os |
|
|
| def load_tokenizer(tokenizer_path): |
| with open(tokenizer_path, 'r') as f: |
| return json.load(f) |
|
|
| class NewsClassificationPipeline(Pipeline): |
| def __init__(self, model=None, tokenizer=None, **kwargs): |
| super().__init__(**kwargs) |
| model_path = os.path.join(os.path.dirname(__file__), 'news_classifier.h5') |
| self.model = tf.keras.models.load_model(model_path) |
|
|
| tokenizer_path = os.path.join(os.path.dirname(__file__), 'tokenizer.json') |
| self.tokenizer_config = load_tokenizer(tokenizer_path) |
| |
| def __call__(self, texts, **kwargs): |
| if isinstance(texts, str): |
| texts = [texts] |
|
|
| sequences = self.tokenizer.texts_to_sequences(texts) |
| padded = pad_sequences(sequences, maxlen=128) |
| |
| predictions = self.model.predict(padded) |
| |
| results = [] |
| for pred in predictions: |
| label = "foxnews" if pred[0] > 0.5 else "nbc" |
| score = float(pred[0] if label == "foxnews" else 1 - pred[0]) |
| results.append({"label": label, "score": score}) |
| |
| return results[0] if isinstance(texts, str) else results |