Spaces:
Running
Running
File size: 6,870 Bytes
e078b1d ae84ddd abf7059 e078b1d 55729b3 ae84ddd e078b1d ae84ddd e078b1d 55729b3 e078b1d 55729b3 e078b1d abf7059 ae84ddd abf7059 ae84ddd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 | from __future__ import annotations
from pathlib import Path
import pandas as pd
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from src.app.schemas import (
CompareRequest,
CompareResponse,
CompareResponseItem,
SampleItem,
SamplesResponse,
SummarizeRequest,
SummarizeResponse,
)
from src.app.services import summarize_with_model
from src.data.prepare import prepare_dataset
from src.data.utils import load_config
def _safe_int(val) -> int | None:
"""Safely cast a value to int, returning None on failure."""
try:
return int(float(val))
except Exception:
return None
app = FastAPI(title="Traffic Incident Summarization API", version="0.4.0")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.on_event("startup")
def startup_prepare() -> None:
cfg = load_config()
combined = Path(cfg["paths"]["combined_corpus_csv"])
if not combined.exists():
try:
prepare_dataset(source="gcc", config_path="config.yaml")
except Exception:
# keep startup resilient, especially when Kaggle credentials are absent
pass
@app.get("/health")
def health():
return {"status": "ok", "service": "traffic-incident-summarization", "version": "0.4.0"}
@app.get("/samples", response_model=SamplesResponse)
def get_samples(track: str = "gcc"):
cfg = load_config()
combined_path = Path(cfg["paths"]["combined_corpus_csv"])
if not combined_path.exists():
prepare_dataset(source="gcc", config_path="config.yaml")
df = pd.read_csv(combined_path)
if "dataset_track" in df.columns:
df = df[df["dataset_track"].fillna("us") == track]
if df.empty:
raise HTTPException(status_code=404, detail=f"No samples found for dataset track: {track}")
if "Start_Time" in df.columns:
df["Start_Time"] = pd.to_datetime(df["Start_Time"], errors="coerce")
df = df.sort_values(by="Start_Time", ascending=False)
# Pick one representative sample per severity level for a compact, diverse preview
sev_map_int = {1: "Low", 2: "Medium", 3: "High", 4: "Critical"}
severity_order = [3, 2, 4, 1] # High, Medium, Critical, Low — most interesting first
seen_sevs: set = set()
selected_rows = []
if "Severity" in df.columns:
for sev_val in severity_order:
subset = df[df["Severity"].apply(
lambda x: _safe_int(x) == sev_val
)]
if not subset.empty:
selected_rows.append(subset.iloc[0])
seen_sevs.add(sev_val)
# Fill remaining slots up to 5 from rows not yet selected
used_indices = {r.name for r in selected_rows}
for _, row in df.iterrows():
if len(selected_rows) >= 5:
break
if row.name not in used_indices:
selected_rows.append(row)
used_indices.add(row.name)
sample_df = pd.DataFrame(selected_rows)
items = []
for idx, row in sample_df.iterrows():
def clean(val):
s = str(val).strip() if pd.notna(val) else ""
return "" if s.lower() in ("nan", "none", "") else s
loc_cols = ["road_name", "Street", "district", "City", "State", "emirate"]
location_parts = [clean(row.get(col)) for col in loc_cols]
title = " · ".join([p for p in location_parts if p][:3]) or f"Sample incident {idx + 1}"
desc = clean(row.get("Description", "")) or "No description available."
sev = row.get("Severity", "")
if clean(str(sev)):
if "severity" not in desc.lower():
try:
sev_int = int(float(sev))
sev_str = sev_map_int.get(sev_int, "Medium")
desc = f"{desc} Classified as {sev_str} severity."
except Exception:
pass
src_lbl = clean(row.get("source_label", ""))
if not src_lbl:
src_lbl = "US Accidents" if track == "us" else "GCC sample"
items.append(
SampleItem(
id=str(idx + 1),
dataset_track=track,
title=title,
text=desc,
source_label=src_lbl,
)
)
return SamplesResponse(items=items)
@app.post("/summarize", response_model=SummarizeResponse)
def summarize(request: SummarizeRequest):
try:
summary = summarize_with_model(request.text, request.model_choice, request.max_length)
return SummarizeResponse(
model_name=request.model_choice,
summary=summary,
dataset_track=request.dataset_track,
word_count=len(summary.split()),
)
except Exception as exc:
raise HTTPException(status_code=500, detail=str(exc)) from exc
@app.post("/compare", response_model=CompareResponse)
def compare(request: CompareRequest):
try:
items = [
CompareResponseItem(
model_name=m,
summary=summarize_with_model(request.text, m, request.max_length),
word_count=len(summarize_with_model(request.text, m, request.max_length).split()),
)
for m in request.model_choices
]
return CompareResponse(dataset_track=request.dataset_track, items=items)
except Exception as exc:
raise HTTPException(status_code=500, detail=str(exc)) from exc
# ── Serve React Frontend (Single-Container Deployment e.g., Hugging Face) ──
_DIST_PATH = Path(__file__).parent.parent / "frontend" / "dist"
_INDEX_HTML = _DIST_PATH / "index.html"
_ASSETS_PATH = _DIST_PATH / "assets"
# Mount static assets if the build exists
if _ASSETS_PATH.exists() and _ASSETS_PATH.is_dir():
app.mount("/assets", StaticFiles(directory=str(_ASSETS_PATH)), name="assets")
# Catch-all SPA route — always registered so HF Spaces never gets a 404 at root
@app.get("/{full_path:path}")
async def serve_frontend(full_path: str):
# Try to serve an exact file from dist (e.g. favicon.ico, robots.txt)
req_path = _DIST_PATH / full_path
if req_path.exists() and req_path.is_file():
return FileResponse(req_path)
# SPA fallback: always return index.html so React Router handles the path
if _INDEX_HTML.exists():
return FileResponse(_INDEX_HTML)
# Frontend build not found — return diagnostic JSON instead of 404
return JSONResponse(
status_code=200,
content={
"status": "api_only",
"message": "Frontend build not found. API is running.",
"api_docs": "/docs",
"health": "/health",
},
)
|