| """ |
| Image Forensics for Scientific Figure Manipulation Detection |
| Specialized for western blots, gel electrophoresis, microscopy images. |
| """ |
|
|
| import torch |
| import torch.nn as nn |
| import numpy as np |
| from PIL import Image |
| import cv2 |
|
|
|
|
| class ForensicFilterBank(nn.Module): |
| """Bank of forensic filters for detecting image manipulation in scientific figures.""" |
| |
| def __init__(self): |
| super().__init__() |
| |
| self.srm_filters = nn.Conv2d(1, 9, kernel_size=5, padding=2, bias=False) |
| srm_kernels = torch.tensor([ |
| [[0, 0, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, -1, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], |
| [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 1, -1, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], |
| [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, -1, 1, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], |
| [[0, 0, 0, 0, 0], [0, 0, 0, 1, 0], [0, 0, -1, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], |
| [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, -1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 0]], |
| [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 1, -2, 1, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], |
| [[0, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, -2, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 0]], |
| [[0, 0, 0, 0, 0], [0, 0, 0, 1, 0], [0, 0, -2, 0, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 0]], |
| [[0, 0, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, -2, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 0, 0]] |
| ], dtype=torch.float32).unsqueeze(1) |
| self.srm_filters.weight = nn.Parameter(srm_kernels) |
| for param in self.srm_filters.parameters(): |
| param.requires_grad = False |
| |
| |
| self.bayar_filter = nn.Conv2d(1, 3, kernel_size=5, padding=2, bias=False) |
| bayar_kernel = torch.zeros(3, 1, 5, 5) |
| bayar_kernel[0, 0, 2, 2] = -1 |
| bayar_kernel[0, 0, 2, 1] = 1 |
| bayar_kernel[1, 0, 2, 2] = -1 |
| bayar_kernel[1, 0, 1, 2] = 1 |
| bayar_kernel[2, 0, 2, 2] = -1 |
| bayar_kernel[2, 0, 2, 3] = 1 |
| self.bayar_filter.weight = nn.Parameter(bayar_kernel) |
| for param in self.bayar_filter.parameters(): |
| param.requires_grad = False |
| |
| |
| self.ela_conv = nn.Sequential( |
| nn.Conv2d(3, 16, 3, padding=1), nn.ReLU(), |
| nn.Conv2d(16, 16, 3, padding=1), nn.ReLU() |
| ) |
| |
| def forward(self, image): |
| gray = 0.299 * image[:, 0:1] + 0.587 * image[:, 1:2] + 0.114 * image[:, 2:3] |
| srm_out = self.srm_filters(gray) |
| bayar_out = self.bayar_filter(gray) |
| ela_out = self.ela_conv(image) |
| target_size = image.shape[2:] |
| srm_out = torch.nn.functional.interpolate(srm_out, size=target_size, mode='bilinear') |
| bayar_out = torch.nn.functional.interpolate(bayar_out, size=target_size, mode='bilinear') |
| forensic = torch.cat([srm_out, bayar_out, ela_out], dim=1) |
| return forensic |
|
|
|
|
| class WesternBlotAnalyzer: |
| """Specialized analyzer for western blot image manipulation detection.""" |
| |
| @staticmethod |
| def detect_band_duplication(image, threshold=0.95): |
| if isinstance(image, str): |
| img = cv2.imread(image, cv2.IMREAD_GRAYSCALE) |
| else: |
| img = np.array(image.convert('L')) |
| _, binary = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) |
| num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary, connectivity=8) |
| bands = [] |
| for i in range(1, num_labels): |
| x, y, w, h, area = stats[i] |
| if area > 50: |
| band = img[y:y+h, x:x+w] |
| bands.append((i, band, (x, y, w, h))) |
| duplicates = [] |
| for i in range(len(bands)): |
| for j in range(i+1, len(bands)): |
| band1 = bands[i][1] |
| band2 = bands[j][1] |
| h = max(band1.shape[0], band2.shape[0]) |
| w = max(band1.shape[1], band2.shape[1]) |
| b1 = cv2.resize(band1, (w, h)) |
| b2 = cv2.resize(band2, (w, h)) |
| similarity = np.corrcoef(b1.flatten(), b2.flatten())[0, 1] |
| if similarity > threshold: |
| duplicates.append((bands[i][0], bands[j][0], similarity)) |
| return duplicates |
| |
| @staticmethod |
| def detect_splicing_artifacts(image): |
| if isinstance(image, str): |
| img = cv2.imread(image) |
| else: |
| img = np.array(image) |
| gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) if len(img.shape) == 3 else img |
| edges = cv2.Canny(gray, 50, 150) |
| lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10) |
| artifacts = [] |
| if lines is not None: |
| for line in lines: |
| x1, y1, x2, y2 = line[0] |
| if abs(x2 - x1) > abs(y2 - y1): |
| y = (y1 + y2) // 2 |
| region_above = gray[max(0, y-5):y, min(x1,x2):max(x1,x2)] |
| region_below = gray[y:min(gray.shape[0], y+5), min(x1,x2):max(x1,x2)] |
| if region_above.size > 0 and region_below.size > 0: |
| noise_above = np.std(region_above.astype(float)) |
| noise_below = np.std(region_below.astype(float)) |
| if abs(noise_above - noise_below) > 5: |
| artifacts.append({'line': (x1, y1, x2, y2), 'noise_diff': abs(noise_above - noise_below)}) |
| return artifacts |
| |
| @staticmethod |
| def compute_manipulation_score(image): |
| score = 0.0 |
| reasons = [] |
| duplicates = WesternBlotAnalyzer.detect_band_duplication(image) |
| if duplicates: |
| score += min(0.4, len(duplicates) * 0.1) |
| reasons.append(f"Band duplication detected: {len(duplicates)} pairs") |
| artifacts = WesternBlotAnalyzer.detect_splicing_artifacts(image) |
| if artifacts: |
| score += min(0.3, len(artifacts) * 0.05) |
| reasons.append(f"Splicing artifacts: {len(artifacts)} boundaries") |
| if isinstance(image, str): |
| img = cv2.imread(image, cv2.IMREAD_GRAYSCALE) |
| else: |
| img = np.array(image.convert('L')) |
| h, w = img.shape |
| sub_size = min(h, w) // 4 |
| if sub_size > 20: |
| hashes = [] |
| for i in range(0, h - sub_size, sub_size // 2): |
| for j in range(0, w - sub_size, sub_size // 2): |
| sub = img[i:i+sub_size, j:j+sub_size] |
| hashes.append((i, j, np.mean(sub))) |
| for i in range(len(hashes)): |
| for j in range(i+1, len(hashes)): |
| if abs(hashes[i][2] - hashes[j][2]) < 1.0: |
| score += 0.01 |
| score = min(score, 1.0) |
| return score, reasons |
|
|
|
|
| class ScientificImageForensics(nn.Module): |
| """Complete forensics pipeline for scientific images.""" |
| |
| def __init__(self): |
| super().__init__() |
| self.filter_bank = ForensicFilterBank() |
| self.manipulation_cnn = nn.Sequential( |
| nn.Conv2d(28, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2), |
| nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2), |
| nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), |
| nn.AdaptiveAvgPool2d((4, 4)), nn.Flatten(), |
| nn.Linear(128 * 4 * 4, 256), nn.ReLU(), nn.Dropout(0.3), |
| nn.Linear(256, 1), nn.Sigmoid() |
| ) |
| |
| def forward(self, image): |
| forensic_features = self.filter_bank(image) |
| manipulation_prob = self.manipulation_cnn(forensic_features) |
| return manipulation_prob, forensic_features |
|
|