NeuroScan / evaluate.py
Shoaib-33's picture
Deployment Added
f16e7e0
# evaluate.py — confusion matrix, classification report, per-class metrics
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix
from utils import get_logger, load_config
from src.data_loader import get_data_generators
logger = get_logger("evaluate")
def evaluate_model(model, test_data, class_names: list,
model_name: str = "model", save_dir: str = "./logs"):
os.makedirs(save_dir, exist_ok=True)
logger.info(f"Evaluating {model_name} on test set ...")
# Reset generator so it always starts from the beginning
test_data.reset()
y_pred_prob = model.predict(test_data, verbose=1)
y_pred = np.argmax(y_pred_prob, axis=1)
y_true = test_data.classes
# Align lengths — generator may yield slightly more due to batch rounding
min_len = min(len(y_true), len(y_pred))
y_true = y_true[:min_len]
y_pred = y_pred[:min_len]
# Classification report
report = classification_report(y_true, y_pred, target_names=class_names, digits=4)
logger.info(f"\nClassification Report - {model_name}:\n{report}")
report_path = os.path.join(save_dir, f"{model_name}_report.txt")
with open(report_path, "w", encoding="utf-8") as f:
f.write(f"Classification Report - {model_name}\n")
f.write("=" * 60 + "\n")
f.write(report)
logger.info(f"Report saved -> {report_path}")
# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(
cm, annot=True, fmt="d", cmap="Blues",
xticklabels=class_names, yticklabels=class_names,
linewidths=0.5,
)
plt.title(f"Confusion Matrix - {model_name}")
plt.ylabel("True Label")
plt.xlabel("Predicted Label")
plt.tight_layout()
cm_path = os.path.join(save_dir, f"{model_name}_confusion_matrix.png")
plt.savefig(cm_path, dpi=100, bbox_inches="tight")
plt.show()
plt.close()
logger.info(f"Confusion matrix saved -> {cm_path}")
# Use sklearn accuracy (more reliable than model.evaluate on generators)
test_acc = float((y_pred == y_true).mean())
logger.info(f"{model_name} - Test accuracy: {test_acc:.4f}")
return {
"test_accuracy" : test_acc,
"y_true" : y_true.tolist(),
"y_pred" : y_pred.tolist(),
}
def evaluate_all_models(models_dict: dict, test_data, class_names: list,
save_dir: str = "./logs"):
results = {}
for name, model in models_dict.items():
safe_name = name.lower().replace(" ", "_")
results[name] = evaluate_model(
model, test_data, class_names,
model_name=safe_name, save_dir=save_dir,
)
logger.info("\n" + "=" * 45)
logger.info(f"{'Model':<25} {'Test Acc':>10}")
logger.info("=" * 45)
for name, r in results.items():
logger.info(f"{name:<25} {r['test_accuracy']:>10.4f}")
logger.info("=" * 45)
return results
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
if __name__ == "__main__":
cfg = load_config("config.yaml")
_, _, test_data = get_data_generators(cfg)
class_names = cfg["data"]["classes"]
save_dir = cfg["models"]["save_dir"]
logs_dir = cfg["models"]["logs_dir"]
os.makedirs(logs_dir, exist_ok=True)
# Load best saved model
best_model_path = os.path.join(save_dir, "ft_best.h5")
if not os.path.exists(best_model_path):
logger.error(f"Model not found at {best_model_path}. Run train.py first.")
exit(1)
logger.info(f"Loading model from {best_model_path}")
# compile=False avoids optimizer state errors when loading
# MobileNetV2 or EfficientNetB0 models in TF 2.10
model = tf.keras.models.load_model(best_model_path, compile=False)
model.compile(
optimizer = "adam",
loss = "categorical_crossentropy",
metrics = ["accuracy"],
)
results = evaluate_model(
model, test_data, class_names,
model_name="best_model", save_dir=logs_dir,
)
logger.info("Evaluation complete.")