Update server.py
Browse files
server.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
from typing import Optional, Tuple
|
| 2 |
-
from fastapi import FastAPI, UploadFile, File
|
| 3 |
from fastapi.responses import FileResponse
|
| 4 |
from fastapi.middleware.cors import CORSMiddleware
|
| 5 |
from PIL import Image, ExifTags
|
|
@@ -9,10 +9,18 @@ import httpx
|
|
| 9 |
import os
|
| 10 |
import base64
|
| 11 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
# ---------------- API KEYS ----------------
|
| 14 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
|
|
|
| 15 |
GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# ---------------- APP ----------------
|
| 18 |
app = FastAPI()
|
|
@@ -53,15 +61,39 @@ def calculate_metadata_risk(image: Image.Image):
|
|
| 53 |
|
| 54 |
|
| 55 |
def fusion_score(model_score: float, metadata_risk: float):
|
| 56 |
-
final = 0.9 * model_score + 0.
|
| 57 |
authenticity = (1 - final) * 100
|
| 58 |
fake = final * 100
|
| 59 |
return authenticity, fake
|
| 60 |
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
# ==============================
|
| 63 |
-
# GROQ VISION
|
| 64 |
-
# now returns (score, reasoning) tuple
|
| 65 |
# ==============================
|
| 66 |
|
| 67 |
async def call_groq_vision(contents: bytes) -> Tuple[Optional[float], str]:
|
|
@@ -121,7 +153,6 @@ fake_probability must be between 0.0 (definitely real) and 1.0 (definitely AI/fa
|
|
| 121 |
data = response.json()
|
| 122 |
text = data["choices"][0]["message"]["content"]
|
| 123 |
print(f"Groq vision response: {text}")
|
| 124 |
-
|
| 125 |
clean = text.strip().replace("```json", "").replace("```", "")
|
| 126 |
result = json.loads(clean)
|
| 127 |
return float(result["fake_probability"]), result.get("reasoning", "")
|
|
@@ -131,6 +162,91 @@ fake_probability must be between 0.0 (definitely real) and 1.0 (definitely AI/fa
|
|
| 131 |
return None, ""
|
| 132 |
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
# ==============================
|
| 135 |
# ANALYZERS
|
| 136 |
# ==============================
|
|
@@ -138,7 +254,11 @@ fake_probability must be between 0.0 (definitely real) and 1.0 (definitely AI/fa
|
|
| 138 |
async def analyze_image(contents: bytes, content_type: str = "image/jpeg"):
|
| 139 |
image = Image.open(io.BytesIO(contents)).convert("RGB")
|
| 140 |
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
combined_model_score = score if score is not None else 0.5
|
| 144 |
models_used = ["Groq_Llama4"] if score is not None else []
|
|
@@ -146,14 +266,11 @@ async def analyze_image(contents: bytes, content_type: str = "image/jpeg"):
|
|
| 146 |
metadata_risk = calculate_metadata_risk(image)
|
| 147 |
authenticity, fake = fusion_score(combined_model_score, metadata_risk)
|
| 148 |
|
| 149 |
-
diff = abs(authenticity - fake)
|
| 150 |
-
confidence_level = "low" if diff < 20 else "medium" if diff < 40 else "high"
|
| 151 |
-
|
| 152 |
return {
|
| 153 |
"type": "image",
|
| 154 |
"authenticity": round(authenticity, 2),
|
| 155 |
"fake": round(fake, 2),
|
| 156 |
-
"confidence_level":
|
| 157 |
"models_used": models_used,
|
| 158 |
"details": {
|
| 159 |
"groq_score": round(score, 4) if score is not None else "unavailable",
|
|
@@ -163,13 +280,7 @@ async def analyze_image(contents: bytes, content_type: str = "image/jpeg"):
|
|
| 163 |
}
|
| 164 |
|
| 165 |
|
| 166 |
-
import cv2
|
| 167 |
-
import numpy as np
|
| 168 |
-
import tempfile
|
| 169 |
-
import asyncio
|
| 170 |
-
|
| 171 |
async def analyze_video(contents: bytes):
|
| 172 |
-
# write to temp file since cv2 needs a path
|
| 173 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as f:
|
| 174 |
f.write(contents)
|
| 175 |
tmp_path = f.name
|
|
@@ -180,7 +291,6 @@ async def analyze_video(contents: bytes):
|
|
| 180 |
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 181 |
duration = round(frame_count / fps, 1) if fps > 0 else 0
|
| 182 |
|
| 183 |
-
# sample 5 frames evenly across the video
|
| 184 |
sample_indices = [int(frame_count * i / 5) for i in range(5)]
|
| 185 |
frames = []
|
| 186 |
|
|
@@ -188,7 +298,6 @@ async def analyze_video(contents: bytes):
|
|
| 188 |
cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
|
| 189 |
ret, frame = cap.read()
|
| 190 |
if ret:
|
| 191 |
-
# convert frame to jpeg bytes
|
| 192 |
pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
| 193 |
buf = io.BytesIO()
|
| 194 |
pil_img.save(buf, format="JPEG", quality=85)
|
|
@@ -213,7 +322,6 @@ async def analyze_video(contents: bytes):
|
|
| 213 |
}
|
| 214 |
}
|
| 215 |
|
| 216 |
-
# analyze each frame with Groq — 2s delay between calls to avoid rate limit
|
| 217 |
scores = []
|
| 218 |
reasonings = []
|
| 219 |
for i, frame_bytes in enumerate(frames):
|
|
@@ -223,28 +331,20 @@ async def analyze_video(contents: bytes):
|
|
| 223 |
scores.append(score)
|
| 224 |
reasonings.append(f"Frame {i+1}: {reasoning}")
|
| 225 |
if i < len(frames) - 1:
|
| 226 |
-
await asyncio.sleep(2)
|
| 227 |
|
| 228 |
-
if scores
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
groq_reasoning = " | ".join(reasonings)
|
| 232 |
-
else:
|
| 233 |
-
combined_model_score = 0.5
|
| 234 |
-
models_used = []
|
| 235 |
-
groq_reasoning = "All frame analyses failed."
|
| 236 |
|
| 237 |
authenticity = round((1 - combined_model_score) * 100, 2)
|
| 238 |
fake = round(combined_model_score * 100, 2)
|
| 239 |
|
| 240 |
-
diff = abs(authenticity - fake)
|
| 241 |
-
confidence_level = "low" if diff < 20 else "medium" if diff < 40 else "high"
|
| 242 |
-
|
| 243 |
return {
|
| 244 |
"type": "video",
|
| 245 |
"authenticity": authenticity,
|
| 246 |
"fake": fake,
|
| 247 |
-
"confidence_level":
|
| 248 |
"models_used": models_used,
|
| 249 |
"details": {
|
| 250 |
"groq_score": round(combined_model_score, 4),
|
|
@@ -275,21 +375,150 @@ async def analyze_video(contents: bytes):
|
|
| 275 |
}
|
| 276 |
|
| 277 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
# ==============================
|
| 279 |
# ROUTER
|
| 280 |
# ==============================
|
| 281 |
|
| 282 |
@app.post("/analyze")
|
| 283 |
-
async def analyze(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
contents = await file.read()
|
| 285 |
sha256 = calculate_sha256(contents)
|
| 286 |
|
| 287 |
if file.content_type.startswith("image/"):
|
| 288 |
result = await analyze_image(contents, file.content_type)
|
| 289 |
elif file.content_type.startswith("video/"):
|
| 290 |
-
result = await analyze_video(contents)
|
|
|
|
|
|
|
| 291 |
else:
|
| 292 |
return {"error": "Unsupported file type"}
|
| 293 |
|
| 294 |
result["sha256"] = sha256
|
| 295 |
-
return result
|
|
|
|
|
|
|
|
|
| 1 |
from typing import Optional, Tuple
|
| 2 |
+
from fastapi import FastAPI, UploadFile, File, Form
|
| 3 |
from fastapi.responses import FileResponse
|
| 4 |
from fastapi.middleware.cors import CORSMiddleware
|
| 5 |
from PIL import Image, ExifTags
|
|
|
|
| 9 |
import os
|
| 10 |
import base64
|
| 11 |
import json
|
| 12 |
+
import asyncio
|
| 13 |
+
import cv2
|
| 14 |
+
import tempfile
|
| 15 |
+
import fitz # pymupdf
|
| 16 |
+
import pypdf
|
| 17 |
|
| 18 |
# ---------------- API KEYS ----------------
|
| 19 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 20 |
+
HF_API_KEY = os.getenv("HF_API_KEY")
|
| 21 |
GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
|
| 22 |
+
ROBERTA_FAKE_NEWS_URL = "https://router.huggingface.co/hf-inference/models/hamzab/roberta-fake-news-classification"
|
| 23 |
+
ROBERTA_AI_TEXT_URL = "https://router.huggingface.co/hf-inference/models/openai-community/roberta-base-openai-detector"
|
| 24 |
|
| 25 |
# ---------------- APP ----------------
|
| 26 |
app = FastAPI()
|
|
|
|
| 61 |
|
| 62 |
|
| 63 |
def fusion_score(model_score: float, metadata_risk: float):
|
| 64 |
+
final = 0.9 * model_score + 0.1 * metadata_risk
|
| 65 |
authenticity = (1 - final) * 100
|
| 66 |
fake = final * 100
|
| 67 |
return authenticity, fake
|
| 68 |
|
| 69 |
|
| 70 |
+
def normalize_output(label_prob_dict: dict) -> float:
|
| 71 |
+
FAKE_KEYWORDS = ["fake", "ai", "generated", "manipulated", "deepfake", "artificial", "synthetic", "machine"]
|
| 72 |
+
REAL_KEYWORDS = ["real", "authentic", "genuine", "human", "original"]
|
| 73 |
+
|
| 74 |
+
fake_score = 0.0
|
| 75 |
+
uncertain_score = 0.0
|
| 76 |
+
|
| 77 |
+
for label, prob in label_prob_dict.items():
|
| 78 |
+
label_lower = label.lower()
|
| 79 |
+
if any(k in label_lower for k in FAKE_KEYWORDS):
|
| 80 |
+
fake_score += prob
|
| 81 |
+
elif any(k in label_lower for k in REAL_KEYWORDS):
|
| 82 |
+
pass
|
| 83 |
+
else:
|
| 84 |
+
uncertain_score += prob
|
| 85 |
+
|
| 86 |
+
fake_score += 0.4 * uncertain_score
|
| 87 |
+
return min(fake_score, 1.0)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def make_confidence(authenticity, fake):
|
| 91 |
+
diff = abs(authenticity - fake)
|
| 92 |
+
return "low" if diff < 20 else "medium" if diff < 40 else "high"
|
| 93 |
+
|
| 94 |
+
|
| 95 |
# ==============================
|
| 96 |
+
# GROQ VISION (images)
|
|
|
|
| 97 |
# ==============================
|
| 98 |
|
| 99 |
async def call_groq_vision(contents: bytes) -> Tuple[Optional[float], str]:
|
|
|
|
| 153 |
data = response.json()
|
| 154 |
text = data["choices"][0]["message"]["content"]
|
| 155 |
print(f"Groq vision response: {text}")
|
|
|
|
| 156 |
clean = text.strip().replace("```json", "").replace("```", "")
|
| 157 |
result = json.loads(clean)
|
| 158 |
return float(result["fake_probability"]), result.get("reasoning", "")
|
|
|
|
| 162 |
return None, ""
|
| 163 |
|
| 164 |
|
| 165 |
+
# ==============================
|
| 166 |
+
# GROQ TEXT (for AI writing detection)
|
| 167 |
+
# ==============================
|
| 168 |
+
|
| 169 |
+
async def call_groq_text(text: str) -> Tuple[Optional[float], str]:
|
| 170 |
+
if not GROQ_API_KEY:
|
| 171 |
+
return None, ""
|
| 172 |
+
try:
|
| 173 |
+
payload = {
|
| 174 |
+
"model": "llama-3.3-70b-versatile",
|
| 175 |
+
"messages": [
|
| 176 |
+
{
|
| 177 |
+
"role": "user",
|
| 178 |
+
"content": f"""You are a forensic text analyst. Analyze the following text and determine if it is AI-generated or written by a human. Also check if it could be a forged government document or fake news.
|
| 179 |
+
|
| 180 |
+
Look for:
|
| 181 |
+
- Overly formal or repetitive sentence structure typical of LLMs
|
| 182 |
+
- Lack of personal voice or human inconsistencies
|
| 183 |
+
- Suspiciously perfect grammar with no natural errors
|
| 184 |
+
- Generic phrasing commonly used by AI models
|
| 185 |
+
- For government documents: inconsistent terminology, wrong formats, suspicious clauses
|
| 186 |
+
- For news: sensational language, lack of credible sources, misleading framing
|
| 187 |
+
|
| 188 |
+
Text to analyze:
|
| 189 |
+
\"\"\"
|
| 190 |
+
{text[:4000]}
|
| 191 |
+
\"\"\"
|
| 192 |
+
|
| 193 |
+
Respond ONLY in this exact JSON format, nothing else:
|
| 194 |
+
{{"fake_probability": 0.0, "reasoning": "brief reason"}}
|
| 195 |
+
|
| 196 |
+
fake_probability must be between 0.0 (definitely human/authentic) and 1.0 (definitely AI-generated/forged)."""
|
| 197 |
+
}
|
| 198 |
+
],
|
| 199 |
+
"max_tokens": 200,
|
| 200 |
+
"temperature": 0.1
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
| 204 |
+
response = await client.post(
|
| 205 |
+
GROQ_API_URL,
|
| 206 |
+
headers={
|
| 207 |
+
"Authorization": f"Bearer {GROQ_API_KEY}",
|
| 208 |
+
"Content-Type": "application/json"
|
| 209 |
+
},
|
| 210 |
+
json=payload
|
| 211 |
+
)
|
| 212 |
+
response.raise_for_status()
|
| 213 |
+
data = response.json()
|
| 214 |
+
text_response = data["choices"][0]["message"]["content"]
|
| 215 |
+
print(f"Groq text response: {text_response}")
|
| 216 |
+
clean = text_response.strip().replace("```json", "").replace("```", "")
|
| 217 |
+
result = json.loads(clean)
|
| 218 |
+
return float(result["fake_probability"]), result.get("reasoning", "")
|
| 219 |
+
|
| 220 |
+
except Exception as e:
|
| 221 |
+
print(f"Groq text failed: {e}")
|
| 222 |
+
return None, ""
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
# ==============================
|
| 226 |
+
# ROBERTA (for fake news + AI text)
|
| 227 |
+
# ==============================
|
| 228 |
+
|
| 229 |
+
async def call_roberta(url: str, text: str, name: str) -> Optional[float]:
|
| 230 |
+
if not HF_API_KEY:
|
| 231 |
+
print(f"No HF_API_KEY, skipping {name}")
|
| 232 |
+
return None
|
| 233 |
+
try:
|
| 234 |
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
| 235 |
+
response = await client.post(
|
| 236 |
+
url,
|
| 237 |
+
headers={"Authorization": f"Bearer {HF_API_KEY}"},
|
| 238 |
+
json={"inputs": text[:512]}
|
| 239 |
+
)
|
| 240 |
+
response.raise_for_status()
|
| 241 |
+
data = response.json()
|
| 242 |
+
print(f"{name} response: {data}")
|
| 243 |
+
label_prob_dict = {item["label"]: item["score"] for item in data[0]}
|
| 244 |
+
return normalize_output(label_prob_dict)
|
| 245 |
+
except Exception as e:
|
| 246 |
+
print(f"{name} failed: {e}")
|
| 247 |
+
return None
|
| 248 |
+
|
| 249 |
+
|
| 250 |
# ==============================
|
| 251 |
# ANALYZERS
|
| 252 |
# ==============================
|
|
|
|
| 254 |
async def analyze_image(contents: bytes, content_type: str = "image/jpeg"):
|
| 255 |
image = Image.open(io.BytesIO(contents)).convert("RGB")
|
| 256 |
|
| 257 |
+
if len(contents) > 20 * 1024 * 1024:
|
| 258 |
+
print("Image too large for Groq")
|
| 259 |
+
score, reasoning = None, "Image too large for analysis"
|
| 260 |
+
else:
|
| 261 |
+
score, reasoning = await call_groq_vision(contents)
|
| 262 |
|
| 263 |
combined_model_score = score if score is not None else 0.5
|
| 264 |
models_used = ["Groq_Llama4"] if score is not None else []
|
|
|
|
| 266 |
metadata_risk = calculate_metadata_risk(image)
|
| 267 |
authenticity, fake = fusion_score(combined_model_score, metadata_risk)
|
| 268 |
|
|
|
|
|
|
|
|
|
|
| 269 |
return {
|
| 270 |
"type": "image",
|
| 271 |
"authenticity": round(authenticity, 2),
|
| 272 |
"fake": round(fake, 2),
|
| 273 |
+
"confidence_level": make_confidence(authenticity, fake),
|
| 274 |
"models_used": models_used,
|
| 275 |
"details": {
|
| 276 |
"groq_score": round(score, 4) if score is not None else "unavailable",
|
|
|
|
| 280 |
}
|
| 281 |
|
| 282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
async def analyze_video(contents: bytes):
|
|
|
|
| 284 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as f:
|
| 285 |
f.write(contents)
|
| 286 |
tmp_path = f.name
|
|
|
|
| 291 |
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 292 |
duration = round(frame_count / fps, 1) if fps > 0 else 0
|
| 293 |
|
|
|
|
| 294 |
sample_indices = [int(frame_count * i / 5) for i in range(5)]
|
| 295 |
frames = []
|
| 296 |
|
|
|
|
| 298 |
cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
|
| 299 |
ret, frame = cap.read()
|
| 300 |
if ret:
|
|
|
|
| 301 |
pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
| 302 |
buf = io.BytesIO()
|
| 303 |
pil_img.save(buf, format="JPEG", quality=85)
|
|
|
|
| 322 |
}
|
| 323 |
}
|
| 324 |
|
|
|
|
| 325 |
scores = []
|
| 326 |
reasonings = []
|
| 327 |
for i, frame_bytes in enumerate(frames):
|
|
|
|
| 331 |
scores.append(score)
|
| 332 |
reasonings.append(f"Frame {i+1}: {reasoning}")
|
| 333 |
if i < len(frames) - 1:
|
| 334 |
+
await asyncio.sleep(2)
|
| 335 |
|
| 336 |
+
combined_model_score = sum(scores) / len(scores) if scores else 0.5
|
| 337 |
+
models_used = ["Groq_Llama4"] if scores else []
|
| 338 |
+
groq_reasoning = " | ".join(reasonings) if reasonings else "All frame analyses failed."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
|
| 340 |
authenticity = round((1 - combined_model_score) * 100, 2)
|
| 341 |
fake = round(combined_model_score * 100, 2)
|
| 342 |
|
|
|
|
|
|
|
|
|
|
| 343 |
return {
|
| 344 |
"type": "video",
|
| 345 |
"authenticity": authenticity,
|
| 346 |
"fake": fake,
|
| 347 |
+
"confidence_level": make_confidence(authenticity, fake),
|
| 348 |
"models_used": models_used,
|
| 349 |
"details": {
|
| 350 |
"groq_score": round(combined_model_score, 4),
|
|
|
|
| 375 |
}
|
| 376 |
|
| 377 |
|
| 378 |
+
async def analyze_text(text: str):
|
| 379 |
+
# all 3 in parallel
|
| 380 |
+
results = await asyncio.gather(
|
| 381 |
+
call_roberta(ROBERTA_FAKE_NEWS_URL, text, "RoBERTa_FakeNews"),
|
| 382 |
+
call_roberta(ROBERTA_AI_TEXT_URL, text, "RoBERTa_AIDetector"),
|
| 383 |
+
call_groq_text(text)
|
| 384 |
+
)
|
| 385 |
+
|
| 386 |
+
score1 = results[0]
|
| 387 |
+
score2 = results[1]
|
| 388 |
+
score3, reasoning = results[2]
|
| 389 |
+
|
| 390 |
+
scores = [(s, n) for s, n in [
|
| 391 |
+
(score1, "RoBERTa_FakeNews"),
|
| 392 |
+
(score2, "RoBERTa_AIDetector"),
|
| 393 |
+
(score3, "Groq_Llama3")
|
| 394 |
+
] if s is not None]
|
| 395 |
+
|
| 396 |
+
combined = sum(s for s, _ in scores) / len(scores) if scores else 0.5
|
| 397 |
+
models_used = [n for _, n in scores]
|
| 398 |
+
|
| 399 |
+
authenticity = round((1 - combined) * 100, 2)
|
| 400 |
+
fake = round(combined * 100, 2)
|
| 401 |
+
|
| 402 |
+
return {
|
| 403 |
+
"type": "text",
|
| 404 |
+
"authenticity": authenticity,
|
| 405 |
+
"fake": fake,
|
| 406 |
+
"confidence_level": make_confidence(authenticity, fake),
|
| 407 |
+
"models_used": models_used,
|
| 408 |
+
"details": {
|
| 409 |
+
"groq_score": round(score3, 4) if score3 is not None else "unavailable",
|
| 410 |
+
"roberta_fakenews_score": round(score1, 4) if score1 is not None else "unavailable",
|
| 411 |
+
"roberta_aidetector_score": round(score2, 4) if score2 is not None else "unavailable",
|
| 412 |
+
"groq_reasoning": reasoning,
|
| 413 |
+
"metadata_risk": 0.0,
|
| 414 |
+
}
|
| 415 |
+
}
|
| 416 |
+
|
| 417 |
+
|
| 418 |
+
async def analyze_pdf(contents: bytes):
|
| 419 |
+
scores = []
|
| 420 |
+
reasonings = []
|
| 421 |
+
|
| 422 |
+
try:
|
| 423 |
+
# extract text
|
| 424 |
+
reader = pypdf.PdfReader(io.BytesIO(contents))
|
| 425 |
+
full_text = ""
|
| 426 |
+
for page in reader.pages:
|
| 427 |
+
full_text += page.extract_text() or ""
|
| 428 |
+
|
| 429 |
+
if full_text.strip():
|
| 430 |
+
print(f"Extracted {len(full_text)} chars from PDF")
|
| 431 |
+
text_results = await asyncio.gather(
|
| 432 |
+
call_roberta(ROBERTA_FAKE_NEWS_URL, full_text, "RoBERTa_FakeNews"),
|
| 433 |
+
call_roberta(ROBERTA_AI_TEXT_URL, full_text, "RoBERTa_AIDetector"),
|
| 434 |
+
call_groq_text(full_text)
|
| 435 |
+
)
|
| 436 |
+
s1 = text_results[0]
|
| 437 |
+
s2 = text_results[1]
|
| 438 |
+
s3, text_reasoning = text_results[2]
|
| 439 |
+
|
| 440 |
+
if s1 is not None:
|
| 441 |
+
scores.append(s1)
|
| 442 |
+
reasonings.append(f"RoBERTa FakeNews: {round(s1*100)}% fake")
|
| 443 |
+
if s2 is not None:
|
| 444 |
+
scores.append(s2)
|
| 445 |
+
reasonings.append(f"RoBERTa AI Detector: {round(s2*100)}% AI-generated")
|
| 446 |
+
if s3 is not None:
|
| 447 |
+
scores.append(s3)
|
| 448 |
+
reasonings.append(f"Groq text: {text_reasoning}")
|
| 449 |
+
|
| 450 |
+
# extract and analyze images inside PDF
|
| 451 |
+
doc = fitz.open(stream=contents, filetype="pdf")
|
| 452 |
+
image_count = 0
|
| 453 |
+
for page in doc:
|
| 454 |
+
for img in page.get_images():
|
| 455 |
+
if image_count >= 3:
|
| 456 |
+
break
|
| 457 |
+
xref = img[0]
|
| 458 |
+
base_image = doc.extract_image(xref)
|
| 459 |
+
img_bytes = base_image["image"]
|
| 460 |
+
await asyncio.sleep(2)
|
| 461 |
+
img_score, img_reasoning = await call_groq_vision(img_bytes)
|
| 462 |
+
if img_score is not None:
|
| 463 |
+
scores.append(img_score)
|
| 464 |
+
reasonings.append(f"Image {image_count+1}: {img_reasoning}")
|
| 465 |
+
image_count += 1
|
| 466 |
+
doc.close()
|
| 467 |
+
|
| 468 |
+
except Exception as e:
|
| 469 |
+
print(f"PDF analysis error: {e}")
|
| 470 |
+
|
| 471 |
+
combined = sum(scores) / len(scores) if scores else 0.5
|
| 472 |
+
models_used = ["RoBERTa_FakeNews", "RoBERTa_AIDetector", "Groq_Llama3+Vision"] if scores else []
|
| 473 |
+
|
| 474 |
+
authenticity = round((1 - combined) * 100, 2)
|
| 475 |
+
fake = round(combined * 100, 2)
|
| 476 |
+
|
| 477 |
+
return {
|
| 478 |
+
"type": "pdf",
|
| 479 |
+
"authenticity": authenticity,
|
| 480 |
+
"fake": fake,
|
| 481 |
+
"confidence_level": make_confidence(authenticity, fake),
|
| 482 |
+
"models_used": models_used,
|
| 483 |
+
"details": {
|
| 484 |
+
"groq_score": "see breakdown",
|
| 485 |
+
"groq_reasoning": " | ".join(reasonings) if reasonings else "No content extracted",
|
| 486 |
+
"metadata_risk": 0.0,
|
| 487 |
+
}
|
| 488 |
+
}
|
| 489 |
+
|
| 490 |
+
|
| 491 |
# ==============================
|
| 492 |
# ROUTER
|
| 493 |
# ==============================
|
| 494 |
|
| 495 |
@app.post("/analyze")
|
| 496 |
+
async def analyze(
|
| 497 |
+
file: Optional[UploadFile] = File(None),
|
| 498 |
+
text: Optional[str] = Form(None)
|
| 499 |
+
):
|
| 500 |
+
# plain text input
|
| 501 |
+
if text and not file:
|
| 502 |
+
result = await analyze_text(text)
|
| 503 |
+
result["sha256"] = hashlib.sha256(text.encode()).hexdigest()
|
| 504 |
+
return result
|
| 505 |
+
|
| 506 |
+
if not file:
|
| 507 |
+
return {"error": "No file or text provided"}
|
| 508 |
+
|
| 509 |
contents = await file.read()
|
| 510 |
sha256 = calculate_sha256(contents)
|
| 511 |
|
| 512 |
if file.content_type.startswith("image/"):
|
| 513 |
result = await analyze_image(contents, file.content_type)
|
| 514 |
elif file.content_type.startswith("video/"):
|
| 515 |
+
result = await analyze_video(contents)
|
| 516 |
+
elif file.content_type == "application/pdf":
|
| 517 |
+
result = await analyze_pdf(contents)
|
| 518 |
else:
|
| 519 |
return {"error": "Unsupported file type"}
|
| 520 |
|
| 521 |
result["sha256"] = sha256
|
| 522 |
+
return result
|
| 523 |
+
|
| 524 |
+
|