Spaces:
Running
Running
Rajeev Ranjan Pandey commited on
Commit Β·
7976e9d
1
Parent(s): 5378bb9
fix: restore working copy/save buttons in dark mode and elevate model prompt styling
Browse files
frontend/src/components/SummarizerWidget.jsx
CHANGED
|
@@ -179,13 +179,13 @@ export default function SummarizerWidget({
|
|
| 179 |
<div className="mt-5 flex flex-wrap gap-3 border-t border-white/5 pt-4">
|
| 180 |
<button
|
| 181 |
onClick={handleCopy}
|
| 182 |
-
className="flex-1 flex justify-center items-center gap-2 rounded-xl bg-slate-100 dark:bg-white/
|
| 183 |
>
|
| 184 |
{copied ? <CheckCircle2 size={16}/> : <Copy size={16} />} {copied ? "Copied" : "Copy Summary"}
|
| 185 |
</button>
|
| 186 |
<button
|
| 187 |
onClick={() => downloadTextFile(`summary_${modelChoice}.txt`, summary)}
|
| 188 |
-
className="flex-1 flex justify-center items-center gap-2 rounded-xl border border-slate-200 dark:border-white/10 bg-white dark:bg-transparent py-3 text-sm font-bold text-slate-600 dark:text-slate-300 transition hover:bg-white/
|
| 189 |
>
|
| 190 |
<Download size={16} /> Save
|
| 191 |
</button>
|
|
|
|
| 179 |
<div className="mt-5 flex flex-wrap gap-3 border-t border-white/5 pt-4">
|
| 180 |
<button
|
| 181 |
onClick={handleCopy}
|
| 182 |
+
className="flex-1 flex justify-center items-center gap-2 rounded-xl bg-slate-100 dark:bg-white/[0.08] hover:bg-slate-200 dark:hover:bg-white/[0.12] py-3 text-sm font-bold text-slate-700 dark:text-white transition border border-slate-200 dark:border-white/10"
|
| 183 |
>
|
| 184 |
{copied ? <CheckCircle2 size={16}/> : <Copy size={16} />} {copied ? "Copied" : "Copy Summary"}
|
| 185 |
</button>
|
| 186 |
<button
|
| 187 |
onClick={() => downloadTextFile(`summary_${modelChoice}.txt`, summary)}
|
| 188 |
+
className="flex-1 flex justify-center items-center gap-2 rounded-xl border border-slate-200 dark:border-white/10 bg-white dark:bg-transparent py-3 text-sm font-bold text-slate-600 dark:text-slate-300 transition hover:bg-slate-50 dark:hover:bg-white/[0.05]"
|
| 189 |
>
|
| 190 |
<Download size={16} /> Save
|
| 191 |
</button>
|
frontend/src/pages/Home.jsx
CHANGED
|
@@ -13,7 +13,11 @@ const FALLBACK_TEXT = {
|
|
| 13 |
};
|
| 14 |
|
| 15 |
export default function Home() {
|
| 16 |
-
const [isDark, setIsDark] = useState(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
const [datasetTrack, setDatasetTrack] = useState("gcc");
|
| 18 |
const [text, setText] = useState(FALLBACK_TEXT.gcc);
|
| 19 |
const [modelChoice, setModelChoice] = useState("bart_large_cnn");
|
|
@@ -29,9 +33,9 @@ export default function Home() {
|
|
| 29 |
useEffect(() => { datasetTrackRef.current = datasetTrack; }, [datasetTrack]);
|
| 30 |
useEffect(() => { modelChoiceRef.current = modelChoice; }, [modelChoice]);
|
| 31 |
|
|
|
|
| 32 |
useEffect(() => {
|
| 33 |
-
|
| 34 |
-
else document.documentElement.classList.remove("dark");
|
| 35 |
}, [isDark]);
|
| 36 |
|
| 37 |
useEffect(() => {
|
|
|
|
| 13 |
};
|
| 14 |
|
| 15 |
export default function Home() {
|
| 16 |
+
const [isDark, setIsDark] = useState(() => {
|
| 17 |
+
// Apply immediately β before first paint β so dark: variants work on load
|
| 18 |
+
document.documentElement.classList.add("dark");
|
| 19 |
+
return true;
|
| 20 |
+
});
|
| 21 |
const [datasetTrack, setDatasetTrack] = useState("gcc");
|
| 22 |
const [text, setText] = useState(FALLBACK_TEXT.gcc);
|
| 23 |
const [modelChoice, setModelChoice] = useState("bart_large_cnn");
|
|
|
|
| 33 |
useEffect(() => { datasetTrackRef.current = datasetTrack; }, [datasetTrack]);
|
| 34 |
useEffect(() => { modelChoiceRef.current = modelChoice; }, [modelChoice]);
|
| 35 |
|
| 36 |
+
// Keep dark class in sync when user toggles
|
| 37 |
useEffect(() => {
|
| 38 |
+
document.documentElement.classList.toggle("dark", isDark);
|
|
|
|
| 39 |
}, [isDark]);
|
| 40 |
|
| 41 |
useEffect(() => {
|
src/models/abstractive.py
CHANGED
|
@@ -7,20 +7,23 @@ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
|
| 7 |
from src.data.utils import load_config
|
| 8 |
|
| 9 |
# ββ Per-model instruction prefixes ββββββββββββββββββββββββββββββββββββββββββββ
|
| 10 |
-
#
|
|
|
|
| 11 |
_MODEL_PROMPTS: dict[str, str] = {
|
| 12 |
"bart_large_cnn": (
|
| 13 |
-
"
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
),
|
| 17 |
"flan_t5_small": (
|
| 18 |
-
"
|
| 19 |
-
"incident
|
| 20 |
-
"
|
|
|
|
| 21 |
),
|
| 22 |
"pegasus_cnn": (
|
| 23 |
-
"
|
|
|
|
| 24 |
),
|
| 25 |
}
|
| 26 |
|
|
@@ -55,7 +58,7 @@ def build_generation_config(model_name: str, config_path: str = "config.yaml"):
|
|
| 55 |
min_new_tokens=gen_cfg["default_min_new_tokens"],
|
| 56 |
max_new_tokens=gen_cfg["default_max_new_tokens"],
|
| 57 |
num_beams=gen_cfg["num_beams"],
|
| 58 |
-
length_penalty=
|
| 59 |
no_repeat_ngram_size=gen_cfg["no_repeat_ngram_size"],
|
| 60 |
early_stopping=gen_cfg["early_stopping"],
|
| 61 |
prompt_prefix=model_cfg.get("prompt_prefix", ""),
|
|
@@ -73,10 +76,9 @@ def generate_summary(text: str, model_name: str, config_path: str = "config.yaml
|
|
| 73 |
encoded = tokenizer(source_text, truncation=True, max_length=gen.max_input_tokens, return_tensors="pt")
|
| 74 |
encoded = {k: v.to(get_device()) for k, v in encoded.items()}
|
| 75 |
|
| 76 |
-
#
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
actual_max_tokens = max_new_tokens or dynamic_max
|
| 80 |
|
| 81 |
with torch.inference_mode():
|
| 82 |
output_ids = model.generate(
|
|
@@ -84,12 +86,22 @@ def generate_summary(text: str, model_name: str, config_path: str = "config.yaml
|
|
| 84 |
min_new_tokens=gen.min_new_tokens,
|
| 85 |
max_new_tokens=actual_max_tokens,
|
| 86 |
num_beams=gen.num_beams,
|
| 87 |
-
length_penalty=
|
| 88 |
-
no_repeat_ngram_size=
|
| 89 |
early_stopping=True,
|
| 90 |
)
|
| 91 |
output_text = " ".join(tokenizer.decode(output_ids[0], skip_special_tokens=True).split())
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
# Strip known hallucinations
|
| 94 |
hallucinations = [
|
| 95 |
"For confidential support call the Samaritans in the UK on 08457 90 90 90, visit a local Samaritans branch or click here for details.",
|
|
@@ -103,7 +115,6 @@ def generate_summary(text: str, model_name: str, config_path: str = "config.yaml
|
|
| 103 |
|
| 104 |
return " ".join(output_text.split())
|
| 105 |
|
| 106 |
-
|
| 107 |
def available_abstractive_models(config_path: str = "config.yaml") -> List[str]:
|
| 108 |
cfg = load_config(config_path)
|
| 109 |
return [name for name, meta in cfg["models"].items() if meta.get("enabled", False)]
|
|
|
|
| 7 |
from src.data.utils import load_config
|
| 8 |
|
| 9 |
# ββ Per-model instruction prefixes ββββββββββββββββββββββββββββββββββββββββββββ
|
| 10 |
+
# Changed prompting to be highly professional, requesting a "classy",
|
| 11 |
+
# high-impact executive tone suitable for official intelligence reports.
|
| 12 |
_MODEL_PROMPTS: dict[str, str] = {
|
| 13 |
"bart_large_cnn": (
|
| 14 |
+
"Re-write the following traffic event into a highly professional executive "
|
| 15 |
+
"incident brief. Focus on creating an impactful, formal summary highlighting "
|
| 16 |
+
"severity and operational disruption: "
|
| 17 |
),
|
| 18 |
"flan_t5_small": (
|
| 19 |
+
"Task: Create a professional, high-impact Executive Traffic Intelligence Brief "
|
| 20 |
+
"from the following incident. Emphasize severity, exact location, and direct "
|
| 21 |
+
"consequences in a formal tone. "
|
| 22 |
+
"Incident details: "
|
| 23 |
),
|
| 24 |
"pegasus_cnn": (
|
| 25 |
+
"Generate a formal, impactful Traffic Intelligence Report summarizing the key "
|
| 26 |
+
"operational facts from this incident: "
|
| 27 |
),
|
| 28 |
}
|
| 29 |
|
|
|
|
| 58 |
min_new_tokens=gen_cfg["default_min_new_tokens"],
|
| 59 |
max_new_tokens=gen_cfg["default_max_new_tokens"],
|
| 60 |
num_beams=gen_cfg["num_beams"],
|
| 61 |
+
length_penalty=1.0, # Reverted length_penalty to 1.0 (defaults) for natural flow
|
| 62 |
no_repeat_ngram_size=gen_cfg["no_repeat_ngram_size"],
|
| 63 |
early_stopping=gen_cfg["early_stopping"],
|
| 64 |
prompt_prefix=model_cfg.get("prompt_prefix", ""),
|
|
|
|
| 76 |
encoded = tokenizer(source_text, truncation=True, max_length=gen.max_input_tokens, return_tensors="pt")
|
| 77 |
encoded = {k: v.to(get_device()) for k, v in encoded.items()}
|
| 78 |
|
| 79 |
+
# Limit to max_tokens configured. The previous dynamic strict limit forced the models
|
| 80 |
+
# to behave weirdly or copy, instead let the model use its own stopping logic.
|
| 81 |
+
actual_max_tokens = max_new_tokens or gen.max_new_tokens
|
|
|
|
| 82 |
|
| 83 |
with torch.inference_mode():
|
| 84 |
output_ids = model.generate(
|
|
|
|
| 86 |
min_new_tokens=gen.min_new_tokens,
|
| 87 |
max_new_tokens=actual_max_tokens,
|
| 88 |
num_beams=gen.num_beams,
|
| 89 |
+
length_penalty=gen.length_penalty,
|
| 90 |
+
no_repeat_ngram_size=gen.no_repeat_ngram_size,
|
| 91 |
early_stopping=True,
|
| 92 |
)
|
| 93 |
output_text = " ".join(tokenizer.decode(output_ids[0], skip_special_tokens=True).split())
|
| 94 |
|
| 95 |
+
# Strip the instruction template echo
|
| 96 |
+
for prefix in _MODEL_PROMPTS.values():
|
| 97 |
+
if output_text.lower().startswith(prefix.replace("Task: ", "").lower().strip()[:20]):
|
| 98 |
+
output_text = output_text[len(prefix):].strip()
|
| 99 |
+
|
| 100 |
+
# Generic stripping of prefixes the models sometimes generate
|
| 101 |
+
output_text = output_text.replace("Executive Incident Brief:", "")
|
| 102 |
+
output_text = output_text.replace("Traffic Intelligence Report:", "")
|
| 103 |
+
output_text = output_text.replace("Incident report:", "")
|
| 104 |
+
|
| 105 |
# Strip known hallucinations
|
| 106 |
hallucinations = [
|
| 107 |
"For confidential support call the Samaritans in the UK on 08457 90 90 90, visit a local Samaritans branch or click here for details.",
|
|
|
|
| 115 |
|
| 116 |
return " ".join(output_text.split())
|
| 117 |
|
|
|
|
| 118 |
def available_abstractive_models(config_path: str = "config.yaml") -> List[str]:
|
| 119 |
cfg = load_config(config_path)
|
| 120 |
return [name for name, meta in cfg["models"].items() if meta.get("enabled", False)]
|