CUD-Traffic-AI / src /data /gcc_generator.py
Rajeev Ranjan Pandey
Refine UI and fix model selection bugs
e078b1d
from __future__ import annotations
from pathlib import Path
import pandas as pd
from src.data.utils import load_config
def _safe(value: object, fallback: str = "unknown") -> str:
if value is None:
return fallback
text = str(value).strip()
return text if text and text.lower() != "nan" else fallback
def structured_row_to_narrative(row: pd.Series) -> str:
emirate = _safe(row.get("emirate"), "the UAE")
road = _safe(row.get("road_name"), "an urban corridor")
district = _safe(row.get("district"), "a metropolitan district")
incident_type = _safe(row.get("incident_type"), "traffic incident")
vehicles = _safe(row.get("vehicles_involved"), "multiple")
injuries = _safe(row.get("injury_level"), "unknown injuries")
lane_status = _safe(row.get("lane_status"), "traffic disruption")
timestamp = _safe(row.get("event_time"), "an unspecified time")
weather = _safe(row.get("weather"), "normal road conditions")
severity = _safe(row.get("severity"), "moderate")
consequence = _safe(row.get("consequence"), "traffic delays")
return (
f"A {incident_type.lower()} was recorded in {district}, {emirate}, on {road} at {timestamp}. "
f"The event involved {vehicles} vehicle(s) and was categorized as {severity.lower()} severity under {weather.lower()}. "
f"Responders reported {injuries.lower()} with {lane_status.lower()}, resulting in {consequence.lower()}."
)
def generate_gcc_narratives(structured_df: pd.DataFrame, config_path: str | Path = "config.yaml") -> pd.DataFrame:
cfg = load_config(config_path)
rows = structured_df.copy()
rows["Description"] = rows.apply(structured_row_to_narrative, axis=1)
rows["dataset_track"] = "gcc"
rows["text_len"] = rows["Description"].str.len()
cols = [
"source_id",
"source_label",
"official_url",
"incident_id",
"country",
"emirate",
"district",
"road_name",
"event_time",
"incident_type",
"severity",
"injury_level",
"lane_status",
"consequence",
"Description",
"text_len",
"dataset_track",
]
for col in cols:
if col not in rows.columns:
rows[col] = None
return rows[cols].reset_index(drop=True)