| import tensorflow as tf |
|
|
| |
| def load_and_prep_image(filename, img_shape=224, scale=True): |
| """ |
| Reads in an image from filename, turns it into a tensor and reshapes into |
| (224, 224, 3). |
| |
| Parameters |
| ---------- |
| filename (str): string filename of target image |
| img_shape (int): size to resize target image to, default 224 |
| scale (bool): whether to scale pixel values to range(0, 1), default True |
| """ |
| |
| img = tf.io.read_file(filename) |
| |
| img = tf.image.decode_jpeg(img) |
| |
| img = tf.image.resize(img, [img_shape, img_shape]) |
| if scale: |
| |
| return img/255. |
| else: |
| return img |
|
|
| |
| |
| import itertools |
| import matplotlib.pyplot as plt |
| import numpy as np |
| from sklearn.metrics import confusion_matrix |
|
|
| |
| def make_confusion_matrix(y_true, y_pred, classes=None, figsize=(10, 10), text_size=15, norm=False, savefig=False): |
| """Makes a labelled confusion matrix comparing predictions and ground truth labels. |
| |
| If classes is passed, confusion matrix will be labelled, if not, integer class values |
| will be used. |
| |
| Args: |
| y_true: Array of truth labels (must be same shape as y_pred). |
| y_pred: Array of predicted labels (must be same shape as y_true). |
| classes: Array of class labels (e.g. string form). If `None`, integer labels are used. |
| figsize: Size of output figure (default=(10, 10)). |
| text_size: Size of output figure text (default=15). |
| norm: normalize values or not (default=False). |
| savefig: save confusion matrix to file (default=False). |
| |
| Returns: |
| A labelled confusion matrix plot comparing y_true and y_pred. |
| |
| Example usage: |
| make_confusion_matrix(y_true=test_labels, # ground truth test labels |
| y_pred=y_preds, # predicted labels |
| classes=class_names, # array of class label names |
| figsize=(15, 15), |
| text_size=10) |
| """ |
| |
| cm = confusion_matrix(y_true, y_pred) |
| cm_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] |
| n_classes = cm.shape[0] |
|
|
| |
| fig, ax = plt.subplots(figsize=figsize) |
| cax = ax.matshow(cm, cmap=plt.cm.Blues) |
| fig.colorbar(cax) |
|
|
| |
| if classes: |
| labels = classes |
| else: |
| labels = np.arange(cm.shape[0]) |
| |
| |
| ax.set(title="Confusion Matrix", |
| xlabel="Predicted label", |
| ylabel="True label", |
| xticks=np.arange(n_classes), |
| yticks=np.arange(n_classes), |
| xticklabels=labels, |
| yticklabels=labels) |
| |
| |
| ax.xaxis.set_label_position("bottom") |
| ax.xaxis.tick_bottom() |
|
|
| |
| threshold = (cm.max() + cm.min()) / 2. |
|
|
| |
| for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): |
| if norm: |
| plt.text(j, i, f"{cm[i, j]} ({cm_norm[i, j]*100:.1f}%)", |
| horizontalalignment="center", |
| color="white" if cm[i, j] > threshold else "black", |
| size=text_size) |
| else: |
| plt.text(j, i, f"{cm[i, j]}", |
| horizontalalignment="center", |
| color="white" if cm[i, j] > threshold else "black", |
| size=text_size) |
|
|
| |
| if savefig: |
| fig.savefig("confusion_matrix.png") |
| |
| |
| def pred_and_plot(model, filename, class_names): |
| """ |
| Imports an image located at filename, makes a prediction on it with |
| a trained model and plots the image with the predicted class as the title. |
| """ |
| |
| img = load_and_prep_image(filename) |
|
|
| |
| pred = model.predict(tf.expand_dims(img, axis=0)) |
|
|
| |
| if len(pred[0]) > 1: |
| pred_class = class_names[pred.argmax()] |
| else: |
| pred_class = class_names[int(tf.round(pred)[0][0])] |
|
|
| |
| plt.imshow(img) |
| plt.title(f"Prediction: {pred_class}") |
| plt.axis(False); |
| |
| import datetime |
|
|
| def create_tensorboard_callback(dir_name, experiment_name): |
| """ |
| Creates a TensorBoard callback instance to store log files. |
| |
| Stores log files with the filepath: |
| "dir_name/experiment_name/current_datetime/" |
| |
| Args: |
| dir_name: target directory to store TensorBoard log files |
| experiment_name: name of experiment directory (e.g. efficientnet_model_1) |
| """ |
| log_dir = dir_name + "/" + experiment_name + "/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") |
| tensorboard_callback = tf.keras.callbacks.TensorBoard( |
| log_dir=log_dir |
| ) |
| print(f"Saving TensorBoard log files to: {log_dir}") |
| return tensorboard_callback |
|
|
| |
| import matplotlib.pyplot as plt |
|
|
| def plot_loss_curves(history): |
| """ |
| Returns separate loss curves for training and validation metrics. |
| |
| Args: |
| history: TensorFlow model History object (see: https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/History) |
| """ |
| loss = history.history['loss'] |
| val_loss = history.history['val_loss'] |
|
|
| accuracy = history.history['accuracy'] |
| val_accuracy = history.history['val_accuracy'] |
|
|
| epochs = range(len(history.history['loss'])) |
|
|
| |
| plt.plot(epochs, loss, label='training_loss') |
| plt.plot(epochs, val_loss, label='val_loss') |
| plt.title('Loss') |
| plt.xlabel('Epochs') |
| plt.legend() |
|
|
| |
| plt.figure() |
| plt.plot(epochs, accuracy, label='training_accuracy') |
| plt.plot(epochs, val_accuracy, label='val_accuracy') |
| plt.title('Accuracy') |
| plt.xlabel('Epochs') |
| plt.legend(); |
|
|
| def compare_historys(original_history, new_history, initial_epochs=5): |
| """ |
| Compares two TensorFlow model History objects. |
| |
| Args: |
| original_history: History object from original model (before new_history) |
| new_history: History object from continued model training (after original_history) |
| initial_epochs: Number of epochs in original_history (new_history plot starts from here) |
| """ |
| |
| |
| acc = original_history.history["accuracy"] |
| loss = original_history.history["loss"] |
|
|
| val_acc = original_history.history["val_accuracy"] |
| val_loss = original_history.history["val_loss"] |
|
|
| |
| total_acc = acc + new_history.history["accuracy"] |
| total_loss = loss + new_history.history["loss"] |
|
|
| total_val_acc = val_acc + new_history.history["val_accuracy"] |
| total_val_loss = val_loss + new_history.history["val_loss"] |
|
|
| |
| plt.figure(figsize=(8, 8)) |
| plt.subplot(2, 1, 1) |
| plt.plot(total_acc, label='Training Accuracy') |
| plt.plot(total_val_acc, label='Validation Accuracy') |
| plt.plot([initial_epochs-1, initial_epochs-1], |
| plt.ylim(), label='Start Fine Tuning') |
| plt.legend(loc='lower right') |
| plt.title('Training and Validation Accuracy') |
|
|
| plt.subplot(2, 1, 2) |
| plt.plot(total_loss, label='Training Loss') |
| plt.plot(total_val_loss, label='Validation Loss') |
| plt.plot([initial_epochs-1, initial_epochs-1], |
| plt.ylim(), label='Start Fine Tuning') |
| plt.legend(loc='upper right') |
| plt.title('Training and Validation Loss') |
| plt.xlabel('epoch') |
| plt.show() |
| |
| |
| |
| import zipfile |
|
|
| def unzip_data(filename): |
| """ |
| Unzips filename into the current working directory. |
| |
| Args: |
| filename (str): a filepath to a target zip folder to be unzipped. |
| """ |
| zip_ref = zipfile.ZipFile(filename, "r") |
| zip_ref.extractall() |
| zip_ref.close() |
| |
| |
| |
| import zipfile |
| import requests |
| import os |
|
|
| def download_and_unzip(url, target_folder): |
| |
| filename = os.path.join(target_folder, os.path.basename(url)) |
| with open(filename, 'wb') as f: |
| r = requests.get(url) |
| f.write(r.content) |
|
|
| |
| with zipfile.ZipFile(filename, 'r') as zip_ref: |
| zip_ref.extractall(target_folder) |
|
|
| |
| |
| import os |
|
|
| def walk_through_dir(dir_path): |
| """ |
| Walks through dir_path returning its contents. |
| |
| Args: |
| dir_path (str): target directory |
| |
| Returns: |
| A print out of: |
| number of subdiretories in dir_path |
| number of images (files) in each subdirectory |
| name of each subdirectory |
| """ |
| for dirpath, dirnames, filenames in os.walk(dir_path): |
| print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.") |
| |
| |
| from sklearn.metrics import accuracy_score, precision_recall_fscore_support |
|
|
| def calculate_results(y_true, y_pred): |
| """ |
| Calculates model accuracy, precision, recall and f1 score of a binary classification model. |
| |
| Args: |
| y_true: true labels in the form of a 1D array |
| y_pred: predicted labels in the form of a 1D array |
| |
| Returns a dictionary of accuracy, precision, recall, f1-score. |
| """ |
| |
| model_accuracy = accuracy_score(y_true, y_pred) * 100 |
| |
| model_precision, model_recall, model_f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted") |
| model_results = {"accuracy": model_accuracy, |
| "precision": model_precision, |
| "recall": model_recall, |
| "f1": model_f1} |
| return model_results |
|
|