Rajeev Ranjan Pandey commited on
Commit
d703e0b
1 Parent(s): 5fe6799

fix: dynamic compression ratio, CUD badge in navbar, slate-300 borders, matched output text size

Browse files
frontend/src/components/BatchUpload.jsx CHANGED
@@ -2,7 +2,7 @@ import { UploadCloud, Database } from "lucide-react";
2
 
3
  export default function DatasetLoader() {
4
  return (
5
- <div className="rounded-2xl border border-slate-200 dark:border-white/[0.06] bg-white dark:bg-[#0d1326] p-5 shadow-sm dark:shadow-xl">
6
  <div className="flex items-center gap-2 mb-1 text-[10px] font-bold uppercase tracking-[0.2em] text-slate-400 dark:text-slate-500">
7
  <Database size={12}/> Load Your Dataset
8
  </div>
 
2
 
3
  export default function DatasetLoader() {
4
  return (
5
+ <div className="rounded-2xl border border-slate-300 dark:border-white/[0.07] bg-white dark:bg-[#0d1326] p-5 shadow-sm dark:shadow-xl">
6
  <div className="flex items-center gap-2 mb-1 text-[10px] font-bold uppercase tracking-[0.2em] text-slate-400 dark:text-slate-500">
7
  <Database size={12}/> Load Your Dataset
8
  </div>
frontend/src/components/DatasetToggle.jsx CHANGED
@@ -7,7 +7,7 @@ export default function DatasetToggle({ value, onChange }) {
7
  ];
8
 
9
  return (
10
- <div className="rounded-2xl border border-slate-200 dark:border-white/[0.06] bg-white dark:bg-[#0d1326] p-5 shadow-sm dark:shadow-xl">
11
  <div className="flex items-center gap-2 mb-4 text-[10px] font-bold uppercase tracking-[0.2em] text-slate-400 dark:text-slate-500">
12
  <Database size={12}/> Analysis Dataset
13
  </div>
 
7
  ];
8
 
9
  return (
10
+ <div className="rounded-2xl border border-slate-300 dark:border-white/[0.07] bg-white dark:bg-[#0d1326] p-5 shadow-sm dark:shadow-xl">
11
  <div className="flex items-center gap-2 mb-4 text-[10px] font-bold uppercase tracking-[0.2em] text-slate-400 dark:text-slate-500">
12
  <Database size={12}/> Analysis Dataset
13
  </div>
frontend/src/components/SummarizerWidget.jsx CHANGED
@@ -116,7 +116,7 @@ export default function SummarizerWidget({
116
 
117
  return (
118
  <div className="w-full h-full flex flex-col">
119
- <div className="rounded-2xl border border-slate-200 dark:border-white/[0.06] bg-white dark:bg-[#0d1326] shadow-sm dark:shadow-2xl flex-1 flex flex-col">
120
 
121
  {/* Input and Output Split Area */}
122
  <div className="grid gap-0 lg:grid-cols-2 flex-1">
@@ -158,7 +158,7 @@ export default function SummarizerWidget({
158
  <div className="flex flex-col custom-scroll">
159
  {summary ? (
160
  <>
161
- <p className="text-[15px] leading-[1.85] text-slate-800 dark:text-slate-100 whitespace-pre-wrap">{summary.replace(/<n>/gi, '\n\n').replace(/[ \t]+/g, ' ').trim()}</p>
162
  {extractedTags.length > 0 && (
163
  <div className="mt-3 flex flex-wrap gap-1.5">
164
  {extractedTags.map((tag, idx) => (
 
116
 
117
  return (
118
  <div className="w-full h-full flex flex-col">
119
+ <div className="rounded-2xl border border-slate-300 dark:border-white/[0.07] bg-white dark:bg-[#0d1326] shadow-sm dark:shadow-2xl flex-1 flex flex-col">
120
 
121
  {/* Input and Output Split Area */}
122
  <div className="grid gap-0 lg:grid-cols-2 flex-1">
 
158
  <div className="flex flex-col custom-scroll">
159
  {summary ? (
160
  <>
161
+ <p className="text-lg leading-[1.85] text-slate-800 dark:text-slate-100 whitespace-pre-wrap">{summary.replace(/<n>/gi, '\n\n').replace(/[ \t]+/g, ' ').trim()}</p>
162
  {extractedTags.length > 0 && (
163
  <div className="mt-3 flex flex-wrap gap-1.5">
164
  {extractedTags.map((tag, idx) => (
frontend/src/pages/Home.jsx CHANGED
@@ -84,9 +84,9 @@ export default function Home() {
84
  <span className="font-black text-base tracking-tight text-slate-900 dark:text-white uppercase">Traffic</span>
85
  <span className="text-orange-400 font-black text-base uppercase">Intel</span>
86
  </div>
87
- <span className="hidden sm:inline-flex ml-2 text-[10px] font-bold tracking-widest uppercase px-2.5 py-1 rounded-full border border-orange-500/30 bg-orange-500/10 text-orange-400">
88
- LLM Summarization Demo
89
- </span>
90
  </div>
91
  <div className="flex items-center gap-3">
92
  <span className="hidden md:flex items-center gap-1.5 text-xs font-medium text-slate-500">
@@ -107,11 +107,7 @@ export default function Home() {
107
 
108
  {/* Hero Banner */}
109
  <div className="mb-8 flex flex-col sm:flex-row sm:items-end sm:justify-between gap-4">
110
- <div className="space-y-3">
111
- <span className="inline-flex items-center gap-2 rounded-full border border-orange-500/30 bg-orange-500/10 px-4 py-1.5 text-xs font-bold uppercase tracking-widest text-orange-400">
112
- <span className="h-1.5 w-1.5 rounded-full bg-orange-500 dot-glow"></span>
113
- CUD 路 AAI Midterm Project
114
- </span>
115
  <h1 className="text-4xl lg:text-5xl xl:text-6xl font-black tracking-tight text-slate-900 dark:text-white leading-none">
116
  Turn Traffic Chaos{" "}
117
  <span className="text-transparent bg-clip-text bg-gradient-to-r from-orange-400 via-orange-500 to-amber-400">
@@ -128,7 +124,7 @@ export default function Home() {
128
  { label: "GCC Samples", value: "250+" },
129
  { label: "US Records", value: "5K+" }
130
  ].map(s => (
131
- <div key={s.label} className="text-center bg-slate-100 dark:bg-white/5 border border-slate-200 dark:border-white/[0.06] rounded-xl px-5 py-3">
132
  <div className="text-2xl font-black text-slate-900 dark:text-white">{s.value}</div>
133
  <div className="text-[10px] uppercase tracking-widest font-bold text-slate-500 dark:text-slate-500">{s.label}</div>
134
  </div>
@@ -137,7 +133,7 @@ export default function Home() {
137
  </div>
138
 
139
  {/* 3-Column Main Grid */}
140
- <div className="grid grid-cols-1 xl:grid-cols-[1fr_340px_300px] gap-5 items-stretch">
141
 
142
  {/* Column 1: Main Summarizer Widget */}
143
  <div className="h-full min-h-0">
@@ -154,7 +150,7 @@ export default function Home() {
154
 
155
  {/* Column 2: Dataset Preview */}
156
  <div className="h-full flex flex-col min-h-0">
157
- <div className="rounded-2xl border border-slate-200 dark:border-white/[0.06] bg-white dark:bg-[#0d1326] shadow-sm dark:shadow-2xl flex flex-col flex-1 overflow-hidden">
158
  <div className="flex items-center justify-between px-5 pt-5 pb-3 border-b border-slate-100 dark:border-white/[0.05]">
159
  <h3 className="text-xs font-bold uppercase tracking-[0.2em] text-slate-400 dark:text-slate-400">Dataset Preview</h3>
160
  <span className="rounded-full bg-slate-100 dark:bg-white/5 px-3 py-1 text-[11px] font-bold text-slate-500 dark:text-slate-400 border border-slate-200 dark:border-white/[0.08]">
 
84
  <span className="font-black text-base tracking-tight text-slate-900 dark:text-white uppercase">Traffic</span>
85
  <span className="text-orange-400 font-black text-base uppercase">Intel</span>
86
  </div>
87
+ <div className="hidden sm:flex flex-col ml-2 justify-center">
88
+ <span className="text-[9px] font-bold tracking-widest uppercase text-slate-400 dark:text-slate-500">CUD 路 AAI Midterm Project</span>
89
+ </div>
90
  </div>
91
  <div className="flex items-center gap-3">
92
  <span className="hidden md:flex items-center gap-1.5 text-xs font-medium text-slate-500">
 
107
 
108
  {/* Hero Banner */}
109
  <div className="mb-8 flex flex-col sm:flex-row sm:items-end sm:justify-between gap-4">
110
+ <div className="space-y-2">
 
 
 
 
111
  <h1 className="text-4xl lg:text-5xl xl:text-6xl font-black tracking-tight text-slate-900 dark:text-white leading-none">
112
  Turn Traffic Chaos{" "}
113
  <span className="text-transparent bg-clip-text bg-gradient-to-r from-orange-400 via-orange-500 to-amber-400">
 
124
  { label: "GCC Samples", value: "250+" },
125
  { label: "US Records", value: "5K+" }
126
  ].map(s => (
127
+ <div key={s.label} className="text-center bg-slate-100 dark:bg-white/5 border border-slate-300 dark:border-white/[0.07] rounded-xl px-5 py-3">
128
  <div className="text-2xl font-black text-slate-900 dark:text-white">{s.value}</div>
129
  <div className="text-[10px] uppercase tracking-widest font-bold text-slate-500 dark:text-slate-500">{s.label}</div>
130
  </div>
 
133
  </div>
134
 
135
  {/* 3-Column Main Grid */}
136
+ <div className="grid grid-cols-1 xl:grid-cols-[1fr_340px_300px] gap-5 items-stretch border-t border-slate-200 dark:border-white/[0.05] pt-6">
137
 
138
  {/* Column 1: Main Summarizer Widget */}
139
  <div className="h-full min-h-0">
 
150
 
151
  {/* Column 2: Dataset Preview */}
152
  <div className="h-full flex flex-col min-h-0">
153
+ <div className="rounded-2xl border border-slate-300 dark:border-white/[0.07] bg-white dark:bg-[#0d1326] shadow-sm dark:shadow-2xl flex flex-col flex-1 overflow-hidden">
154
  <div className="flex items-center justify-between px-5 pt-5 pb-3 border-b border-slate-100 dark:border-white/[0.05]">
155
  <h3 className="text-xs font-bold uppercase tracking-[0.2em] text-slate-400 dark:text-slate-400">Dataset Preview</h3>
156
  <span className="rounded-full bg-slate-100 dark:bg-white/5 px-3 py-1 text-[11px] font-bold text-slate-500 dark:text-slate-400 border border-slate-200 dark:border-white/[0.08]">
src/models/abstractive.py CHANGED
@@ -49,21 +49,26 @@ def generate_summary(text: str, model_name: str, config_path: str = "config.yaml
49
  source_text = f"{gen.prompt_prefix}{' '.join(str(text).split())}"
50
  encoded = tokenizer(source_text, truncation=True, max_length=gen.max_input_tokens, return_tensors="pt")
51
  encoded = {k: v.to(get_device()) for k, v in encoded.items()}
52
- actual_max_tokens = max_new_tokens or gen.max_new_tokens
53
- actual_min_tokens = gen.min_new_tokens
54
-
 
 
 
 
55
  with torch.inference_mode():
56
  output_ids = model.generate(
57
  **encoded,
58
- min_new_tokens=actual_min_tokens,
59
  max_new_tokens=actual_max_tokens,
60
  num_beams=gen.num_beams,
61
- length_penalty=gen.length_penalty,
62
- no_repeat_ngram_size=gen.no_repeat_ngram_size,
63
- early_stopping=bool(gen.early_stopping),
64
  )
65
  output_text = " ".join(tokenizer.decode(output_ids[0], skip_special_tokens=True).split())
66
- # Post-processing filters for common model hallucinations
 
67
  hallucinations = [
68
  "For confidential support call the Samaritans in the UK on 08457 90 90 90, visit a local Samaritans branch or click here for details.",
69
  "For confidential support call the Samaritans",
@@ -73,8 +78,10 @@ def generate_summary(text: str, model_name: str, config_path: str = "config.yaml
73
  ]
74
  for h in hallucinations:
75
  output_text = output_text.replace(h, "").strip()
 
76
  return " ".join(output_text.split())
77
 
 
78
  def available_abstractive_models(config_path: str = "config.yaml") -> List[str]:
79
  cfg = load_config(config_path)
80
  return [name for name, meta in cfg["models"].items() if meta.get("enabled", False)]
 
49
  source_text = f"{gen.prompt_prefix}{' '.join(str(text).split())}"
50
  encoded = tokenizer(source_text, truncation=True, max_length=gen.max_input_tokens, return_tensors="pt")
51
  encoded = {k: v.to(get_device()) for k, v in encoded.items()}
52
+
53
+ # Dynamically cap max_new_tokens at 55 % of input length to enforce compression.
54
+ # This prevents the model from echoing short inputs verbatim.
55
+ input_len = encoded["input_ids"].shape[-1]
56
+ dynamic_max = max(gen.min_new_tokens, min(int(input_len * 0.55), gen.max_new_tokens))
57
+ actual_max_tokens = max_new_tokens or dynamic_max
58
+
59
  with torch.inference_mode():
60
  output_ids = model.generate(
61
  **encoded,
62
+ min_new_tokens=gen.min_new_tokens,
63
  max_new_tokens=actual_max_tokens,
64
  num_beams=gen.num_beams,
65
+ length_penalty=2.0, # strongly prefers shorter outputs
66
+ no_repeat_ngram_size=4, # blocks 4-gram repetition / copy
67
+ early_stopping=True,
68
  )
69
  output_text = " ".join(tokenizer.decode(output_ids[0], skip_special_tokens=True).split())
70
+
71
+ # Post-processing: strip known hallucinations
72
  hallucinations = [
73
  "For confidential support call the Samaritans in the UK on 08457 90 90 90, visit a local Samaritans branch or click here for details.",
74
  "For confidential support call the Samaritans",
 
78
  ]
79
  for h in hallucinations:
80
  output_text = output_text.replace(h, "").strip()
81
+
82
  return " ".join(output_text.split())
83
 
84
+
85
  def available_abstractive_models(config_path: str = "config.yaml") -> List[str]:
86
  cfg = load_config(config_path)
87
  return [name for name, meta in cfg["models"].items() if meta.get("enabled", False)]