from difflib import SequenceMatcher from typing import Optional CATEGORY_KEYWORDS: dict[str, list[str]] = { "Damaged Road Issues": [ "road", "damage", "damaged", "broken", "crack", "cracked", "pavement", "asphalt", "street", "highway", "lane", "surface", "rough", "uneven" ], "Pothole Issues": [ "pothole", "hole", "pit", "crater", "dip", "depression", "bump", "cavity", "hollow", "gap", "broken road", "road" ], "Illegal Parking Issues": [ "parking", "parked", "car", "vehicle", "illegal", "wrong", "blocking", "obstruction", "no parking", "double park", "sidewalk" ], "Broken Road Sign Issues": [ "sign", "signboard", "traffic sign", "road sign", "broken sign", "fallen sign", "damaged sign", "missing sign", "bent" ], "Fallen Trees": [ "tree", "fallen", "branch", "trunk", "uprooted", "collapsed", "blocking", "storm", "wind", "timber" ], "Littering/Garbage on Public Places": [ "garbage", "trash", "litter", "waste", "rubbish", "dump", "dirty", "filth", "debris", "plastic", "pile", "mess", "junk", "disposal" ], "Dead Animal Pollution": [ "dead", "animal", "carcass", "body", "corpse", "rotting", "smell", "stink", "dog", "cat", "bird", "cow", "roadkill" ], "Damaged Concrete Structures": [ "concrete", "structure", "wall", "pillar", "bridge", "flyover", "footpath", "sidewalk", "curb", "crack", "broken", "Damaged Concrete" ], "Damaged Electric Wires and Poles": [ "electric", "wire", "pole", "cable", "power", "electricity", "hanging", "exposed", "sparking", "transformer", "light pole" ], } def normalize_text(text: str) -> str: return text.lower().strip() def calculate_similarity(s1: str, s2: str) -> float: return SequenceMatcher(None, s1.lower(), s2.lower()).ratio() def fuzzy_match_word(word: str, keywords: list[str], threshold: float = 0.7) -> bool: word = normalize_text(word) for keyword in keywords: if word == keyword: return True if len(word) >= 4 and calculate_similarity(word, keyword) >= threshold: return True if keyword in word or word in keyword: return True return False def match_description_to_category( description: Optional[str], detected_category: str, threshold: float = 0.6 ) -> tuple[bool, float, list[str]]: if not description: return False, 0.0, [] keywords = CATEGORY_KEYWORDS.get(detected_category, []) if not keywords: return False, 0.0, [] words = normalize_text(description).replace(",", " ").replace(".", " ").split() matched_words = [] for word in words: if len(word) < 3: continue if fuzzy_match_word(word, keywords): matched_words.append(word) if not words: return False, 0.0, [] match_score = len(matched_words) / max(len(words), 1) is_match = len(matched_words) >= 1 or match_score >= threshold return is_match, match_score, matched_words def auto_validate_issue( description: Optional[str], detected_categories: list[str], confidence_threshold: float = 0.5 ) -> tuple[bool, str]: if not description or not detected_categories: return False, "No description or no detections for auto-validation" for category in detected_categories: is_match, score, matched_words = match_description_to_category( description, category ) if is_match: return True, f"Auto-validated: '{category}' matched with keywords: {matched_words}" return False, f"Manual verification required: no match between description and detected categories {detected_categories}"