{ "cells": [ { "cell_type": "markdown", "id": "9e49735d", "metadata": {}, "source": [ "# Model Evaluation\n", "Generate summaries for the manual evaluation set and compare model outputs." ] }, { "cell_type": "code", "execution_count": null, "id": "2c44453f", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from src.models.registry import summarize_text\n", "\n", "eval_path = '../data/processed/eval_set_with_refs.csv'\n", "df = pd.read_csv(eval_path)\n", "text = df.iloc[0]['Description']\n", "print('Source:', text)\n", "for model_name in ['lead1', 'textrank', 'flan_t5_small']:\n", " summary = summarize_text(text, model_name)\n", " print('\\n', model_name, ':', summary)" ] }, { "cell_type": "code", "execution_count": null, "id": "235752ff", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from src.evaluate.eval import evaluate_outputs\n", "from src.evaluate.tables import aggregate_metrics_table\n", "\n", "outputs = pd.read_csv('../data/processed/model_outputs.csv')\n", "detailed = evaluate_outputs(outputs)\n", "aggregate = aggregate_metrics_table(detailed)\n", "aggregate" ] } ], "metadata": {}, "nbformat": 4, "nbformat_minor": 5 }