Rajeev Ranjan Pandey commited on
Commit
7976e9d
Β·
1 Parent(s): 5378bb9

fix: restore working copy/save buttons in dark mode and elevate model prompt styling

Browse files
frontend/src/components/SummarizerWidget.jsx CHANGED
@@ -179,13 +179,13 @@ export default function SummarizerWidget({
179
  <div className="mt-5 flex flex-wrap gap-3 border-t border-white/5 pt-4">
180
  <button
181
  onClick={handleCopy}
182
- className="flex-1 flex justify-center items-center gap-2 rounded-xl bg-slate-100 dark:bg-white/8 hover:bg-slate-200 dark:hover:bg-white/12 py-3 text-sm font-bold text-slate-700 dark:text-white transition border border-slate-200 dark:border-white/10"
183
  >
184
  {copied ? <CheckCircle2 size={16}/> : <Copy size={16} />} {copied ? "Copied" : "Copy Summary"}
185
  </button>
186
  <button
187
  onClick={() => downloadTextFile(`summary_${modelChoice}.txt`, summary)}
188
- className="flex-1 flex justify-center items-center gap-2 rounded-xl border border-slate-200 dark:border-white/10 bg-white dark:bg-transparent py-3 text-sm font-bold text-slate-600 dark:text-slate-300 transition hover:bg-white/5"
189
  >
190
  <Download size={16} /> Save
191
  </button>
 
179
  <div className="mt-5 flex flex-wrap gap-3 border-t border-white/5 pt-4">
180
  <button
181
  onClick={handleCopy}
182
+ className="flex-1 flex justify-center items-center gap-2 rounded-xl bg-slate-100 dark:bg-white/[0.08] hover:bg-slate-200 dark:hover:bg-white/[0.12] py-3 text-sm font-bold text-slate-700 dark:text-white transition border border-slate-200 dark:border-white/10"
183
  >
184
  {copied ? <CheckCircle2 size={16}/> : <Copy size={16} />} {copied ? "Copied" : "Copy Summary"}
185
  </button>
186
  <button
187
  onClick={() => downloadTextFile(`summary_${modelChoice}.txt`, summary)}
188
+ className="flex-1 flex justify-center items-center gap-2 rounded-xl border border-slate-200 dark:border-white/10 bg-white dark:bg-transparent py-3 text-sm font-bold text-slate-600 dark:text-slate-300 transition hover:bg-slate-50 dark:hover:bg-white/[0.05]"
189
  >
190
  <Download size={16} /> Save
191
  </button>
frontend/src/pages/Home.jsx CHANGED
@@ -13,7 +13,11 @@ const FALLBACK_TEXT = {
13
  };
14
 
15
  export default function Home() {
16
- const [isDark, setIsDark] = useState(true);
 
 
 
 
17
  const [datasetTrack, setDatasetTrack] = useState("gcc");
18
  const [text, setText] = useState(FALLBACK_TEXT.gcc);
19
  const [modelChoice, setModelChoice] = useState("bart_large_cnn");
@@ -29,9 +33,9 @@ export default function Home() {
29
  useEffect(() => { datasetTrackRef.current = datasetTrack; }, [datasetTrack]);
30
  useEffect(() => { modelChoiceRef.current = modelChoice; }, [modelChoice]);
31
 
 
32
  useEffect(() => {
33
- if (isDark) document.documentElement.classList.add("dark");
34
- else document.documentElement.classList.remove("dark");
35
  }, [isDark]);
36
 
37
  useEffect(() => {
 
13
  };
14
 
15
  export default function Home() {
16
+ const [isDark, setIsDark] = useState(() => {
17
+ // Apply immediately β€” before first paint β€” so dark: variants work on load
18
+ document.documentElement.classList.add("dark");
19
+ return true;
20
+ });
21
  const [datasetTrack, setDatasetTrack] = useState("gcc");
22
  const [text, setText] = useState(FALLBACK_TEXT.gcc);
23
  const [modelChoice, setModelChoice] = useState("bart_large_cnn");
 
33
  useEffect(() => { datasetTrackRef.current = datasetTrack; }, [datasetTrack]);
34
  useEffect(() => { modelChoiceRef.current = modelChoice; }, [modelChoice]);
35
 
36
+ // Keep dark class in sync when user toggles
37
  useEffect(() => {
38
+ document.documentElement.classList.toggle("dark", isDark);
 
39
  }, [isDark]);
40
 
41
  useEffect(() => {
src/models/abstractive.py CHANGED
@@ -7,20 +7,23 @@ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
7
  from src.data.utils import load_config
8
 
9
  # ── Per-model instruction prefixes ────────────────────────────────────────────
10
- # Prepended to raw incident text so models rewrite instead of echo.
 
11
  _MODEL_PROMPTS: dict[str, str] = {
12
  "bart_large_cnn": (
13
- "Generate a concise traffic incident summary. "
14
- "Report only: location, incident type, severity, and road impact. "
15
- "Be brief. Incident report: "
16
  ),
17
  "flan_t5_small": (
18
- "Write a one-sentence traffic incident summary covering location, "
19
- "incident type, severity level, and road impact in under 35 words. "
20
- "Traffic report: "
 
21
  ),
22
  "pegasus_cnn": (
23
- "Summarize the key facts from this traffic incident in one compact sentence: "
 
24
  ),
25
  }
26
 
@@ -55,7 +58,7 @@ def build_generation_config(model_name: str, config_path: str = "config.yaml"):
55
  min_new_tokens=gen_cfg["default_min_new_tokens"],
56
  max_new_tokens=gen_cfg["default_max_new_tokens"],
57
  num_beams=gen_cfg["num_beams"],
58
- length_penalty=gen_cfg["length_penalty"],
59
  no_repeat_ngram_size=gen_cfg["no_repeat_ngram_size"],
60
  early_stopping=gen_cfg["early_stopping"],
61
  prompt_prefix=model_cfg.get("prompt_prefix", ""),
@@ -73,10 +76,9 @@ def generate_summary(text: str, model_name: str, config_path: str = "config.yaml
73
  encoded = tokenizer(source_text, truncation=True, max_length=gen.max_input_tokens, return_tensors="pt")
74
  encoded = {k: v.to(get_device()) for k, v in encoded.items()}
75
 
76
- # Dynamic cap: limit output to 50 % of raw input token count to force compression.
77
- raw_len = tokenizer(clean_text, return_tensors="pt")["input_ids"].shape[-1]
78
- dynamic_max = max(gen.min_new_tokens, min(int(raw_len * 0.50), gen.max_new_tokens))
79
- actual_max_tokens = max_new_tokens or dynamic_max
80
 
81
  with torch.inference_mode():
82
  output_ids = model.generate(
@@ -84,12 +86,22 @@ def generate_summary(text: str, model_name: str, config_path: str = "config.yaml
84
  min_new_tokens=gen.min_new_tokens,
85
  max_new_tokens=actual_max_tokens,
86
  num_beams=gen.num_beams,
87
- length_penalty=3.0, # strongly prefers concise outputs
88
- no_repeat_ngram_size=4, # blocks 4-gram copying from input
89
  early_stopping=True,
90
  )
91
  output_text = " ".join(tokenizer.decode(output_ids[0], skip_special_tokens=True).split())
92
 
 
 
 
 
 
 
 
 
 
 
93
  # Strip known hallucinations
94
  hallucinations = [
95
  "For confidential support call the Samaritans in the UK on 08457 90 90 90, visit a local Samaritans branch or click here for details.",
@@ -103,7 +115,6 @@ def generate_summary(text: str, model_name: str, config_path: str = "config.yaml
103
 
104
  return " ".join(output_text.split())
105
 
106
-
107
  def available_abstractive_models(config_path: str = "config.yaml") -> List[str]:
108
  cfg = load_config(config_path)
109
  return [name for name, meta in cfg["models"].items() if meta.get("enabled", False)]
 
7
  from src.data.utils import load_config
8
 
9
  # ── Per-model instruction prefixes ────────────────────────────────────────────
10
+ # Changed prompting to be highly professional, requesting a "classy",
11
+ # high-impact executive tone suitable for official intelligence reports.
12
  _MODEL_PROMPTS: dict[str, str] = {
13
  "bart_large_cnn": (
14
+ "Re-write the following traffic event into a highly professional executive "
15
+ "incident brief. Focus on creating an impactful, formal summary highlighting "
16
+ "severity and operational disruption: "
17
  ),
18
  "flan_t5_small": (
19
+ "Task: Create a professional, high-impact Executive Traffic Intelligence Brief "
20
+ "from the following incident. Emphasize severity, exact location, and direct "
21
+ "consequences in a formal tone. "
22
+ "Incident details: "
23
  ),
24
  "pegasus_cnn": (
25
+ "Generate a formal, impactful Traffic Intelligence Report summarizing the key "
26
+ "operational facts from this incident: "
27
  ),
28
  }
29
 
 
58
  min_new_tokens=gen_cfg["default_min_new_tokens"],
59
  max_new_tokens=gen_cfg["default_max_new_tokens"],
60
  num_beams=gen_cfg["num_beams"],
61
+ length_penalty=1.0, # Reverted length_penalty to 1.0 (defaults) for natural flow
62
  no_repeat_ngram_size=gen_cfg["no_repeat_ngram_size"],
63
  early_stopping=gen_cfg["early_stopping"],
64
  prompt_prefix=model_cfg.get("prompt_prefix", ""),
 
76
  encoded = tokenizer(source_text, truncation=True, max_length=gen.max_input_tokens, return_tensors="pt")
77
  encoded = {k: v.to(get_device()) for k, v in encoded.items()}
78
 
79
+ # Limit to max_tokens configured. The previous dynamic strict limit forced the models
80
+ # to behave weirdly or copy, instead let the model use its own stopping logic.
81
+ actual_max_tokens = max_new_tokens or gen.max_new_tokens
 
82
 
83
  with torch.inference_mode():
84
  output_ids = model.generate(
 
86
  min_new_tokens=gen.min_new_tokens,
87
  max_new_tokens=actual_max_tokens,
88
  num_beams=gen.num_beams,
89
+ length_penalty=gen.length_penalty,
90
+ no_repeat_ngram_size=gen.no_repeat_ngram_size,
91
  early_stopping=True,
92
  )
93
  output_text = " ".join(tokenizer.decode(output_ids[0], skip_special_tokens=True).split())
94
 
95
+ # Strip the instruction template echo
96
+ for prefix in _MODEL_PROMPTS.values():
97
+ if output_text.lower().startswith(prefix.replace("Task: ", "").lower().strip()[:20]):
98
+ output_text = output_text[len(prefix):].strip()
99
+
100
+ # Generic stripping of prefixes the models sometimes generate
101
+ output_text = output_text.replace("Executive Incident Brief:", "")
102
+ output_text = output_text.replace("Traffic Intelligence Report:", "")
103
+ output_text = output_text.replace("Incident report:", "")
104
+
105
  # Strip known hallucinations
106
  hallucinations = [
107
  "For confidential support call the Samaritans in the UK on 08457 90 90 90, visit a local Samaritans branch or click here for details.",
 
115
 
116
  return " ".join(output_text.split())
117
 
 
118
  def available_abstractive_models(config_path: str = "config.yaml") -> List[str]:
119
  cfg = load_config(config_path)
120
  return [name for name, meta in cfg["models"].items() if meta.get("enabled", False)]