Spaces:

junaid17
/

DamageLens

Sleeping

App Files Files Community

junaid17 commited on 20 days ago

Commit

1782395

verified ·

1 Parent(s): f80a3ac

Upload 3 files

Browse files

Files changed (3) hide show

scripts/gradcam.py +96 -0
scripts/prediction_helper.py +172 -0
scripts/yolo.py +47 -0

scripts/gradcam.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import cv2
+import numpy as np
+from PIL import Image
+def get_resnet_gradcam(image_path, predictor, output_path):
+    model = predictor.model
+    device = predictor.device
+    model.eval()
+    features, gradients = [], []
+    def forward_hook(module, input, output): features.append(output)
+    def backward_hook(module, grad_in, grad_out): gradients.append(grad_out[0])
+    target_layer = model.model.layer4[-1]
+    handle_fw = target_layer.register_forward_hook(forward_hook)
+    handle_bw = target_layer.register_full_backward_hook(backward_hook)
+    original_img = Image.open(image_path).convert("RGB")
+    input_tensor = predictor.test_transforms(original_img).unsqueeze(0).to(device)
+    model.zero_grad()
+    output = model(input_tensor)
+    pred_class_idx = output.argmax(dim=1).item()
+    score = output[0, pred_class_idx]
+    score.backward()
+    handle_fw.remove()
+    handle_bw.remove()
+    acts = features[0].cpu().data.numpy()[0]
+    grads = gradients[0].cpu().data.numpy()[0]
+    weights = np.mean(grads, axis=(1, 2))
+    cam = np.zeros(acts.shape[1:], dtype=np.float32)
+    for i, w in enumerate(weights):
+        cam += w * acts[i]
+    cam = np.maximum(cam, 0)
+    cam = cv2.resize(cam, (original_img.width, original_img.height))
+    cam = (cam - np.min(cam)) / (np.max(cam) - np.min(cam) + 1e-8)
+    heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
+    original_np = np.array(original_img)
+    # Overlay logic (OpenCV style)
+    overlay = cv2.addWeighted(cv2.cvtColor(original_np, cv2.COLOR_RGB2BGR), 0.6, heatmap, 0.4, 0)
+    cv2.imwrite(output_path, overlay)
+    return True
+def get_deit_gradcam(image_path, predictor, output_path):
+    model = predictor.model
+    processor = predictor.processor
+    device = predictor.device
+    model.eval()
+    features, gradients = [], []
+    def forward_hook(module, input, output): features.append(output)
+    def backward_hook(module, grad_in, grad_out): gradients.append(grad_out[0])
+    target_layer = model.deit.encoder.layer[-1].layernorm_before
+    handle_fw = target_layer.register_forward_hook(forward_hook)
+    handle_bw = target_layer.register_full_backward_hook(backward_hook)
+    original_img = Image.open(image_path).convert("RGB")
+    inputs = processor(images=original_img, return_tensors="pt").to(device)
+    model.zero_grad()
+    outputs = model(**inputs)
+    pred_class_idx = outputs.logits.argmax(dim=1).item()
+    score = outputs.logits[0, pred_class_idx]
+    score.backward()
+    handle_fw.remove()
+    handle_bw.remove()
+    acts = features[0].cpu().data.numpy()[0]
+    grads = gradients[0].cpu().data.numpy()[0]
+    cam = np.sum(grads * acts, axis=-1)
+    cam = cam[2:] # Remove CLS and Distillation tokens
+    grid_size = int(np.sqrt(cam.shape[0]))
+    cam = cam.reshape(grid_size, grid_size)
+    cam = np.maximum(cam, 0)
+    cam = cv2.resize(cam, (original_img.width, original_img.height))
+    cam = (cam - np.min(cam)) / (np.max(cam) - np.min(cam) + 1e-8)
+    heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
+    original_np = np.array(original_img)
+    overlay = cv2.addWeighted(cv2.cvtColor(original_np, cv2.COLOR_RGB2BGR), 0.6, heatmap, 0.4, 0)
+    cv2.imwrite(output_path, overlay)
+    return True

scripts/prediction_helper.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import os
+import torch
+import torch.nn as nn
+from torchvision import transforms, models
+from PIL import Image, UnidentifiedImageError
+from transformers import DeiTForImageClassification, DeiTImageProcessor
+# ================================ ResNet-18 Classifier ================================
+class Car_Classifier_Resnet(nn.Module):
+    def __init__(self, num_classes):
+        super().__init__()
+        self.model = models.resnet18(weights="DEFAULT")
+        for param in self.model.parameters():
+            param.requires_grad = False
+        for param in self.model.layer3.parameters():
+            param.requires_grad = True
+        for param in self.model.layer4.parameters():
+            param.requires_grad = True
+        # Replace FC head
+        self.model.fc = nn.Sequential(
+            nn.Dropout(0.5),
+            nn.Linear(self.model.fc.in_features, 256),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(256, num_classes)
+        )
+    def forward(self, x):
+        return self.model(x)
+class ResnetCarDamagePredictor:
+    def __init__(self, checkpoint_path, class_map):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.class_map = class_map
+        self.test_transforms = transforms.Compose([
+            transforms.Resize((128, 128)),
+            transforms.ToTensor(),
+            transforms.Normalize([0.485, 0.456, 0.406],
+                                 [0.229, 0.224, 0.225])
+        ])
+        try:
+            self.model = Car_Classifier_Resnet(num_classes=len(class_map))
+            checkpoint = torch.load(checkpoint_path, map_location=self.device)
+            self.model.load_state_dict(checkpoint["model_state_dict"])
+            self.model.to(self.device)
+            self.model.eval()
+        except Exception as e:
+            raise RuntimeError(f"Failed to load ResNet model: {str(e)}")
+    def resnet_predict(self, image_input):
+        try:
+            if isinstance(image_input, str):
+                image = Image.open(image_input).convert("RGB")
+            elif isinstance(image_input, Image.Image):
+                image = image_input.convert("RGB")
+            else:
+                raise TypeError("image_input must be a file path or PIL.Image")
+            image = self.test_transforms(image)
+            image = image.unsqueeze(0).to(self.device)
+            with torch.no_grad():
+                outputs = self.model(image)
+            probs = torch.nn.functional.softmax(outputs, dim=1)[0]
+            class_probs = {
+                self.class_map[i]: float(probs[i].item())
+                for i in range(len(self.class_map))
+            }
+            return dict(sorted(class_probs.items(), key=lambda x: x[1], reverse=True))
+        except UnidentifiedImageError:
+            raise ValueError("Invalid image file provided")
+        except Exception as e:
+            raise RuntimeError(f"ResNet prediction failed: {str(e)}")
+# ================================ DeiT Classifier ================================
+class DeitCarDamagePredictor:
+    def __init__(self, checkpoint_path, class_map):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.class_map = class_map
+        self.checkpoint_path = checkpoint_path
+        self.transform = transforms.Compose([transforms.Resize((224, 224))])
+        model_name = "facebook/deit-base-distilled-patch16-224"
+        try:
+            self.processor = DeiTImageProcessor.from_pretrained(model_name)
+            self.model = DeiTForImageClassification.from_pretrained(
+                model_name,
+                num_labels=len(class_map),
+                ignore_mismatched_sizes=True
+            )
+            checkpoint = torch.load(self.checkpoint_path, map_location=self.device)
+            self.model.load_state_dict(checkpoint["model_state_dict"])
+            self.model.to(self.device)
+            self.model.eval()
+        except Exception as e:
+            raise RuntimeError(f"Failed to load DeiT model: {str(e)}")
+    def deit_predict(self, image_input):
+        try:
+            if isinstance(image_input, str):
+                image = Image.open(image_input).convert("RGB")
+            elif isinstance(image_input, Image.Image):
+                image = image_input.convert("RGB")
+            else:
+                raise TypeError("image_input must be a file path or PIL.Image")
+            image = self.transform(image)
+            inputs = self.processor(image, return_tensors="pt").to(self.device)
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+            probs = torch.nn.functional.softmax(outputs.logits, dim=1)[0]
+            class_probs = {
+                self.class_map[i]: float(probs[i].item())
+                for i in range(len(self.class_map))
+            }
+            return dict(sorted(class_probs.items(), key=lambda x: x[1], reverse=True))
+        except UnidentifiedImageError:
+            raise ValueError("Invalid image file provided")
+        except Exception as e:
+            raise RuntimeError(f"DeiT prediction failed: {str(e)}")
+# ================================ Fusion Predictor ================================
+class FusionCarDamagePredictor:
+    def __init__(self, resnet_predictor, deit_predictor, resnet_weight=0.5, deit_weight=0.5):
+        if resnet_weight < 0 or deit_weight < 0:
+            raise ValueError("Weights must be non-negative")
+        total = resnet_weight + deit_weight
+        if total == 0:
+            raise ValueError("At least one weight must be greater than 0")
+        self.resnet_predictor = resnet_predictor
+        self.deit_predictor = deit_predictor
+        self.resnet_weight = resnet_weight / total
+        self.deit_weight = deit_weight / total
+    def fuse_predict(self, image_input):
+        try:
+            resnet_output = self.resnet_predictor.resnet_predict(image_input)
+            deit_output = self.deit_predictor.deit_predict(image_input)
+            all_classes = set(resnet_output.keys()).union(set(deit_output.keys()))
+            fused_output = {}
+            for cls in all_classes:
+                resnet_prob = resnet_output.get(cls, 0.0)
+                deit_prob = deit_output.get(cls, 0.0)
+                fused_prob = self.resnet_weight * resnet_prob + self.deit_weight * deit_prob
+                fused_output[cls] = float(fused_prob)
+            fused_output = dict(sorted(fused_output.items(), key=lambda x: x[1], reverse=True))
+            final_class = next(iter(fused_output))
+            final_confidence = fused_output[final_class]
+            return {
+                "resnet_output": resnet_output,
+                "deit_output": deit_output,
+                "fused_output": fused_output,
+                "final_prediction": final_class,
+                "final_confidence": round(final_confidence, 4)
+            }
+        except Exception as e:
+            raise RuntimeError(f"Fusion prediction failed: {str(e)}")

scripts/yolo.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import cv2
+import numpy as np
+from PIL import Image
+from ultralytics import YOLO
+yolo_model = YOLO("checkpoints/damage_detector.pt")
+def get_yolo_damage_boxes(image_path, output_path):
+    try:
+        image = Image.open(image_path).convert("RGB")
+        results = yolo_model.predict(
+            source=image,
+            conf=0.05,
+            imgsz=640,
+            verbose=False
+        )
+        result = results[0]
+        boxes = result.boxes
+        detections = []
+        if boxes is not None and len(boxes) > 0:
+            for box in boxes:
+                conf = float(box.conf[0])
+                cls_id = int(box.cls[0])
+                label = yolo_model.names[cls_id]
+                x1, y1, x2, y2 = map(int, box.xyxy[0])
+                detections.append({
+                    "label": label,
+                    "confidence": round(conf, 4),
+                    "box": [x1, y1, x2, y2]
+                })
+        plotted_bgr = result.plot()
+        plotted_rgb = plotted_bgr[..., ::-1]
+        cv2.imwrite(output_path, plotted_rgb)
+        return {
+            "detections": detections,
+            "total_detections": len(detections),
+            "message": "No damage detected" if len(detections) == 0 else "Detections found"
+        }
+    except Exception as e:
+        raise RuntimeError(f"YOLO failed: {str(e)}")