| import tensorflow as tf |
| import numpy as np |
| from tensorflow import keras |
| import os |
| from typing import Dict, List, Any |
| import pickle |
| from PIL import Image |
| class PreTrainedPipeline(): |
| def __init__(self, path=""): |
| |
| |
| self.decoder = keras.models.load_model(os.path.join(path, "decoder")) |
| self.encoder = keras.models.load_model(os.path.join(path, "encoder")) |
| |
| image_model = tf.keras.applications.InceptionV3(include_top=False, |
| weights='imagenet') |
| new_input = image_model.input |
| hidden_layer = image_model.layers[-1].output |
|
|
| self.image_features_extract_model = tf.keras.Model(new_input, hidden_layer) |
| |
| with open('./tokenizer.pickle', 'rb') as handle: |
| self.tokenizer = pickle.load(handle) |
|
|
|
|
|
|
| def load_image(img): |
| img = tf.io.decode_jpeg(img, channels=3) |
| img = tf.image.resize(img, (299, 299)) |
| img = tf.keras.applications.inception_v3.preprocess_input(img) |
| return img, image_path |
|
|
| def __call__(self, inputs: "Image.Image") -> List[Dict[str, Any]]: |
| """ |
| Args: |
| inputs (:obj:`PIL.Image`): |
| The raw image representation as PIL. |
| No transformation made whatsoever from the input. Make all necessary transformations here. |
| Return: |
| A :obj:`list`:. The list contains items that are dicts should be liked {"label": "XXX", "score": 0.82} |
| It is preferred if the returned list is in decreasing `score` order |
| """ |
|
|
| hidden = tf.zeros((1, 512)) |
| max_length = 46 |
| temp_input = tf.expand_dims(load_image(image)[0], 0) |
| img_tensor_val = self.image_features_extract_model(temp_input) |
| img_tensor_val = tf.reshape(img_tensor_val, (img_tensor_val.shape[0], |
| -1, |
| img_tensor_val.shape[3])) |
|
|
| features = self.encoder(img_tensor_val) |
|
|
| dec_input = tf.expand_dims([self.tokenizer.word_index['<start>']], 0) |
| result = [] |
|
|
| for i in range(max_length): |
| predictions, hidden, attention_weights = self.decoder(dec_input, |
| features, |
| hidden) |
|
|
| predicted_id = tf.random.categorical(predictions, 1)[0][0].numpy() |
| result.append(self.tokenizer.index_word[predicted_id]) |
|
|
| if self.tokenizer.index_word[predicted_id] == '<end>': |
| return result |
|
|
| dec_input = tf.expand_dims([predicted_id], 0) |
| resp = "" |
| for i in result: |
| if i!=1: |
| resp = resp + " " + result[i] |
| else: |
| resp += result[i] |
| return resp |
|
|