| # pipeline.py | |
| from transformers import Pipeline | |
| import tensorflow as tf | |
| from tensorflow.keras.models import load_model | |
| from tensorflow.keras.preprocessing.text import tokenizer_from_json | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| import json | |
| import numpy as np | |
| class NewsClassifierPipeline(Pipeline): | |
| def __init__(self): | |
| super().__init__() | |
| self.model = load_model('news_classifier.h5') | |
| with open('tokenizer.json') as f: | |
| tokenizer_data = json.load(f) | |
| self.tokenizer = tokenizer_from_json(tokenizer_data) | |
| def preprocess(self, text): | |
| sequence = self.tokenizer.texts_to_sequences([text]) | |
| padded = pad_sequences(sequence) | |
| return padded | |
| def _forward(self, texts): | |
| processed = self.preprocess(texts) | |
| predictions = self.model.predict(processed) | |
| scores = tf.nn.softmax(predictions, axis=1) | |
| predicted_class = np.argmax(predictions) | |
| score = float(np.max(scores)) | |
| label = 'foxnews' if predicted_class == 0 else 'nbc' | |
| return [{'label': label, 'score': score}] | |
| def postprocess(self, model_outputs): | |
| return model_outputs |