File size: 1,304 Bytes
b2de734
 
de435e1
7532efb
b2de734
de435e1
b2de734
 
 
13ad768
b2de734
 
 
c356db2
b2de734
 
c356db2
b2de734
c356db2
b2de734
 
 
c356db2
b2de734
 
 
13ad768
c356db2
 
b2de734
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from transformers import Pipeline
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
import json
import os

def load_tokenizer(tokenizer_path):
    with open(tokenizer_path, 'r') as f:
        return json.load(f)

class NewsClassificationPipeline(Pipeline):
    def __init__(self, model=None, tokenizer=None, **kwargs):
        super().__init__(**kwargs)
        model_path = os.path.join(os.path.dirname(__file__), 'news_classifier.h5')
        self.model = tf.keras.models.load_model(model_path)

        tokenizer_path = os.path.join(os.path.dirname(__file__), 'tokenizer.json')
        self.tokenizer_config = load_tokenizer(tokenizer_path)
        
    def __call__(self, texts, **kwargs):
        if isinstance(texts, str):
            texts = [texts]

        sequences = self.tokenizer.texts_to_sequences(texts)
        padded = pad_sequences(sequences, maxlen=128)
        
        predictions = self.model.predict(padded)
        
        results = []
        for pred in predictions:
            label = "foxnews" if pred[0] > 0.5 else "nbc"
            score = float(pred[0] if label == "foxnews" else 1 - pred[0])
            results.append({"label": label, "score": score})
            
        return results[0] if isinstance(texts, str) else results