Jdp-08 commited on
Commit
452ae25
·
verified ·
1 Parent(s): 0f0de7e

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +264 -35
server.py CHANGED
@@ -1,5 +1,5 @@
1
  from typing import Optional, Tuple
2
- from fastapi import FastAPI, UploadFile, File
3
  from fastapi.responses import FileResponse
4
  from fastapi.middleware.cors import CORSMiddleware
5
  from PIL import Image, ExifTags
@@ -9,10 +9,18 @@ import httpx
9
  import os
10
  import base64
11
  import json
 
 
 
 
 
12
 
13
  # ---------------- API KEYS ----------------
14
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 
15
  GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
 
 
16
 
17
  # ---------------- APP ----------------
18
  app = FastAPI()
@@ -53,15 +61,39 @@ def calculate_metadata_risk(image: Image.Image):
53
 
54
 
55
  def fusion_score(model_score: float, metadata_risk: float):
56
- final = 0.9 * model_score + 0.2 * metadata_risk
57
  authenticity = (1 - final) * 100
58
  fake = final * 100
59
  return authenticity, fake
60
 
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  # ==============================
63
- # GROQ VISION
64
- # now returns (score, reasoning) tuple
65
  # ==============================
66
 
67
  async def call_groq_vision(contents: bytes) -> Tuple[Optional[float], str]:
@@ -121,7 +153,6 @@ fake_probability must be between 0.0 (definitely real) and 1.0 (definitely AI/fa
121
  data = response.json()
122
  text = data["choices"][0]["message"]["content"]
123
  print(f"Groq vision response: {text}")
124
-
125
  clean = text.strip().replace("```json", "").replace("```", "")
126
  result = json.loads(clean)
127
  return float(result["fake_probability"]), result.get("reasoning", "")
@@ -131,6 +162,91 @@ fake_probability must be between 0.0 (definitely real) and 1.0 (definitely AI/fa
131
  return None, ""
132
 
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  # ==============================
135
  # ANALYZERS
136
  # ==============================
@@ -138,7 +254,11 @@ fake_probability must be between 0.0 (definitely real) and 1.0 (definitely AI/fa
138
  async def analyze_image(contents: bytes, content_type: str = "image/jpeg"):
139
  image = Image.open(io.BytesIO(contents)).convert("RGB")
140
 
141
- score, reasoning = await call_groq_vision(contents)
 
 
 
 
142
 
143
  combined_model_score = score if score is not None else 0.5
144
  models_used = ["Groq_Llama4"] if score is not None else []
@@ -146,14 +266,11 @@ async def analyze_image(contents: bytes, content_type: str = "image/jpeg"):
146
  metadata_risk = calculate_metadata_risk(image)
147
  authenticity, fake = fusion_score(combined_model_score, metadata_risk)
148
 
149
- diff = abs(authenticity - fake)
150
- confidence_level = "low" if diff < 20 else "medium" if diff < 40 else "high"
151
-
152
  return {
153
  "type": "image",
154
  "authenticity": round(authenticity, 2),
155
  "fake": round(fake, 2),
156
- "confidence_level": confidence_level,
157
  "models_used": models_used,
158
  "details": {
159
  "groq_score": round(score, 4) if score is not None else "unavailable",
@@ -163,13 +280,7 @@ async def analyze_image(contents: bytes, content_type: str = "image/jpeg"):
163
  }
164
 
165
 
166
- import cv2
167
- import numpy as np
168
- import tempfile
169
- import asyncio
170
-
171
  async def analyze_video(contents: bytes):
172
- # write to temp file since cv2 needs a path
173
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as f:
174
  f.write(contents)
175
  tmp_path = f.name
@@ -180,7 +291,6 @@ async def analyze_video(contents: bytes):
180
  fps = cap.get(cv2.CAP_PROP_FPS)
181
  duration = round(frame_count / fps, 1) if fps > 0 else 0
182
 
183
- # sample 5 frames evenly across the video
184
  sample_indices = [int(frame_count * i / 5) for i in range(5)]
185
  frames = []
186
 
@@ -188,7 +298,6 @@ async def analyze_video(contents: bytes):
188
  cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
189
  ret, frame = cap.read()
190
  if ret:
191
- # convert frame to jpeg bytes
192
  pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
193
  buf = io.BytesIO()
194
  pil_img.save(buf, format="JPEG", quality=85)
@@ -213,7 +322,6 @@ async def analyze_video(contents: bytes):
213
  }
214
  }
215
 
216
- # analyze each frame with Groq — 2s delay between calls to avoid rate limit
217
  scores = []
218
  reasonings = []
219
  for i, frame_bytes in enumerate(frames):
@@ -223,28 +331,20 @@ async def analyze_video(contents: bytes):
223
  scores.append(score)
224
  reasonings.append(f"Frame {i+1}: {reasoning}")
225
  if i < len(frames) - 1:
226
- await asyncio.sleep(2) # avoid rate limit
227
 
228
- if scores:
229
- combined_model_score = sum(scores) / len(scores)
230
- models_used = ["Groq_Llama4"]
231
- groq_reasoning = " | ".join(reasonings)
232
- else:
233
- combined_model_score = 0.5
234
- models_used = []
235
- groq_reasoning = "All frame analyses failed."
236
 
237
  authenticity = round((1 - combined_model_score) * 100, 2)
238
  fake = round(combined_model_score * 100, 2)
239
 
240
- diff = abs(authenticity - fake)
241
- confidence_level = "low" if diff < 20 else "medium" if diff < 40 else "high"
242
-
243
  return {
244
  "type": "video",
245
  "authenticity": authenticity,
246
  "fake": fake,
247
- "confidence_level": confidence_level,
248
  "models_used": models_used,
249
  "details": {
250
  "groq_score": round(combined_model_score, 4),
@@ -275,21 +375,150 @@ async def analyze_video(contents: bytes):
275
  }
276
 
277
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  # ==============================
279
  # ROUTER
280
  # ==============================
281
 
282
  @app.post("/analyze")
283
- async def analyze(file: UploadFile = File(...)):
 
 
 
 
 
 
 
 
 
 
 
 
284
  contents = await file.read()
285
  sha256 = calculate_sha256(contents)
286
 
287
  if file.content_type.startswith("image/"):
288
  result = await analyze_image(contents, file.content_type)
289
  elif file.content_type.startswith("video/"):
290
- result = await analyze_video(contents) # ← await added
 
 
291
  else:
292
  return {"error": "Unsupported file type"}
293
 
294
  result["sha256"] = sha256
295
- return result
 
 
 
1
  from typing import Optional, Tuple
2
+ from fastapi import FastAPI, UploadFile, File, Form
3
  from fastapi.responses import FileResponse
4
  from fastapi.middleware.cors import CORSMiddleware
5
  from PIL import Image, ExifTags
 
9
  import os
10
  import base64
11
  import json
12
+ import asyncio
13
+ import cv2
14
+ import tempfile
15
+ import fitz # pymupdf
16
+ import pypdf
17
 
18
  # ---------------- API KEYS ----------------
19
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
20
+ HF_API_KEY = os.getenv("HF_API_KEY")
21
  GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
22
+ ROBERTA_FAKE_NEWS_URL = "https://router.huggingface.co/hf-inference/models/hamzab/roberta-fake-news-classification"
23
+ ROBERTA_AI_TEXT_URL = "https://router.huggingface.co/hf-inference/models/openai-community/roberta-base-openai-detector"
24
 
25
  # ---------------- APP ----------------
26
  app = FastAPI()
 
61
 
62
 
63
  def fusion_score(model_score: float, metadata_risk: float):
64
+ final = 0.9 * model_score + 0.1 * metadata_risk
65
  authenticity = (1 - final) * 100
66
  fake = final * 100
67
  return authenticity, fake
68
 
69
 
70
+ def normalize_output(label_prob_dict: dict) -> float:
71
+ FAKE_KEYWORDS = ["fake", "ai", "generated", "manipulated", "deepfake", "artificial", "synthetic", "machine"]
72
+ REAL_KEYWORDS = ["real", "authentic", "genuine", "human", "original"]
73
+
74
+ fake_score = 0.0
75
+ uncertain_score = 0.0
76
+
77
+ for label, prob in label_prob_dict.items():
78
+ label_lower = label.lower()
79
+ if any(k in label_lower for k in FAKE_KEYWORDS):
80
+ fake_score += prob
81
+ elif any(k in label_lower for k in REAL_KEYWORDS):
82
+ pass
83
+ else:
84
+ uncertain_score += prob
85
+
86
+ fake_score += 0.4 * uncertain_score
87
+ return min(fake_score, 1.0)
88
+
89
+
90
+ def make_confidence(authenticity, fake):
91
+ diff = abs(authenticity - fake)
92
+ return "low" if diff < 20 else "medium" if diff < 40 else "high"
93
+
94
+
95
  # ==============================
96
+ # GROQ VISION (images)
 
97
  # ==============================
98
 
99
  async def call_groq_vision(contents: bytes) -> Tuple[Optional[float], str]:
 
153
  data = response.json()
154
  text = data["choices"][0]["message"]["content"]
155
  print(f"Groq vision response: {text}")
 
156
  clean = text.strip().replace("```json", "").replace("```", "")
157
  result = json.loads(clean)
158
  return float(result["fake_probability"]), result.get("reasoning", "")
 
162
  return None, ""
163
 
164
 
165
+ # ==============================
166
+ # GROQ TEXT (for AI writing detection)
167
+ # ==============================
168
+
169
+ async def call_groq_text(text: str) -> Tuple[Optional[float], str]:
170
+ if not GROQ_API_KEY:
171
+ return None, ""
172
+ try:
173
+ payload = {
174
+ "model": "llama-3.3-70b-versatile",
175
+ "messages": [
176
+ {
177
+ "role": "user",
178
+ "content": f"""You are a forensic text analyst. Analyze the following text and determine if it is AI-generated or written by a human. Also check if it could be a forged government document or fake news.
179
+
180
+ Look for:
181
+ - Overly formal or repetitive sentence structure typical of LLMs
182
+ - Lack of personal voice or human inconsistencies
183
+ - Suspiciously perfect grammar with no natural errors
184
+ - Generic phrasing commonly used by AI models
185
+ - For government documents: inconsistent terminology, wrong formats, suspicious clauses
186
+ - For news: sensational language, lack of credible sources, misleading framing
187
+
188
+ Text to analyze:
189
+ \"\"\"
190
+ {text[:4000]}
191
+ \"\"\"
192
+
193
+ Respond ONLY in this exact JSON format, nothing else:
194
+ {{"fake_probability": 0.0, "reasoning": "brief reason"}}
195
+
196
+ fake_probability must be between 0.0 (definitely human/authentic) and 1.0 (definitely AI-generated/forged)."""
197
+ }
198
+ ],
199
+ "max_tokens": 200,
200
+ "temperature": 0.1
201
+ }
202
+
203
+ async with httpx.AsyncClient(timeout=30.0) as client:
204
+ response = await client.post(
205
+ GROQ_API_URL,
206
+ headers={
207
+ "Authorization": f"Bearer {GROQ_API_KEY}",
208
+ "Content-Type": "application/json"
209
+ },
210
+ json=payload
211
+ )
212
+ response.raise_for_status()
213
+ data = response.json()
214
+ text_response = data["choices"][0]["message"]["content"]
215
+ print(f"Groq text response: {text_response}")
216
+ clean = text_response.strip().replace("```json", "").replace("```", "")
217
+ result = json.loads(clean)
218
+ return float(result["fake_probability"]), result.get("reasoning", "")
219
+
220
+ except Exception as e:
221
+ print(f"Groq text failed: {e}")
222
+ return None, ""
223
+
224
+
225
+ # ==============================
226
+ # ROBERTA (for fake news + AI text)
227
+ # ==============================
228
+
229
+ async def call_roberta(url: str, text: str, name: str) -> Optional[float]:
230
+ if not HF_API_KEY:
231
+ print(f"No HF_API_KEY, skipping {name}")
232
+ return None
233
+ try:
234
+ async with httpx.AsyncClient(timeout=30.0) as client:
235
+ response = await client.post(
236
+ url,
237
+ headers={"Authorization": f"Bearer {HF_API_KEY}"},
238
+ json={"inputs": text[:512]}
239
+ )
240
+ response.raise_for_status()
241
+ data = response.json()
242
+ print(f"{name} response: {data}")
243
+ label_prob_dict = {item["label"]: item["score"] for item in data[0]}
244
+ return normalize_output(label_prob_dict)
245
+ except Exception as e:
246
+ print(f"{name} failed: {e}")
247
+ return None
248
+
249
+
250
  # ==============================
251
  # ANALYZERS
252
  # ==============================
 
254
  async def analyze_image(contents: bytes, content_type: str = "image/jpeg"):
255
  image = Image.open(io.BytesIO(contents)).convert("RGB")
256
 
257
+ if len(contents) > 20 * 1024 * 1024:
258
+ print("Image too large for Groq")
259
+ score, reasoning = None, "Image too large for analysis"
260
+ else:
261
+ score, reasoning = await call_groq_vision(contents)
262
 
263
  combined_model_score = score if score is not None else 0.5
264
  models_used = ["Groq_Llama4"] if score is not None else []
 
266
  metadata_risk = calculate_metadata_risk(image)
267
  authenticity, fake = fusion_score(combined_model_score, metadata_risk)
268
 
 
 
 
269
  return {
270
  "type": "image",
271
  "authenticity": round(authenticity, 2),
272
  "fake": round(fake, 2),
273
+ "confidence_level": make_confidence(authenticity, fake),
274
  "models_used": models_used,
275
  "details": {
276
  "groq_score": round(score, 4) if score is not None else "unavailable",
 
280
  }
281
 
282
 
 
 
 
 
 
283
  async def analyze_video(contents: bytes):
 
284
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as f:
285
  f.write(contents)
286
  tmp_path = f.name
 
291
  fps = cap.get(cv2.CAP_PROP_FPS)
292
  duration = round(frame_count / fps, 1) if fps > 0 else 0
293
 
 
294
  sample_indices = [int(frame_count * i / 5) for i in range(5)]
295
  frames = []
296
 
 
298
  cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
299
  ret, frame = cap.read()
300
  if ret:
 
301
  pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
302
  buf = io.BytesIO()
303
  pil_img.save(buf, format="JPEG", quality=85)
 
322
  }
323
  }
324
 
 
325
  scores = []
326
  reasonings = []
327
  for i, frame_bytes in enumerate(frames):
 
331
  scores.append(score)
332
  reasonings.append(f"Frame {i+1}: {reasoning}")
333
  if i < len(frames) - 1:
334
+ await asyncio.sleep(2)
335
 
336
+ combined_model_score = sum(scores) / len(scores) if scores else 0.5
337
+ models_used = ["Groq_Llama4"] if scores else []
338
+ groq_reasoning = " | ".join(reasonings) if reasonings else "All frame analyses failed."
 
 
 
 
 
339
 
340
  authenticity = round((1 - combined_model_score) * 100, 2)
341
  fake = round(combined_model_score * 100, 2)
342
 
 
 
 
343
  return {
344
  "type": "video",
345
  "authenticity": authenticity,
346
  "fake": fake,
347
+ "confidence_level": make_confidence(authenticity, fake),
348
  "models_used": models_used,
349
  "details": {
350
  "groq_score": round(combined_model_score, 4),
 
375
  }
376
 
377
 
378
+ async def analyze_text(text: str):
379
+ # all 3 in parallel
380
+ results = await asyncio.gather(
381
+ call_roberta(ROBERTA_FAKE_NEWS_URL, text, "RoBERTa_FakeNews"),
382
+ call_roberta(ROBERTA_AI_TEXT_URL, text, "RoBERTa_AIDetector"),
383
+ call_groq_text(text)
384
+ )
385
+
386
+ score1 = results[0]
387
+ score2 = results[1]
388
+ score3, reasoning = results[2]
389
+
390
+ scores = [(s, n) for s, n in [
391
+ (score1, "RoBERTa_FakeNews"),
392
+ (score2, "RoBERTa_AIDetector"),
393
+ (score3, "Groq_Llama3")
394
+ ] if s is not None]
395
+
396
+ combined = sum(s for s, _ in scores) / len(scores) if scores else 0.5
397
+ models_used = [n for _, n in scores]
398
+
399
+ authenticity = round((1 - combined) * 100, 2)
400
+ fake = round(combined * 100, 2)
401
+
402
+ return {
403
+ "type": "text",
404
+ "authenticity": authenticity,
405
+ "fake": fake,
406
+ "confidence_level": make_confidence(authenticity, fake),
407
+ "models_used": models_used,
408
+ "details": {
409
+ "groq_score": round(score3, 4) if score3 is not None else "unavailable",
410
+ "roberta_fakenews_score": round(score1, 4) if score1 is not None else "unavailable",
411
+ "roberta_aidetector_score": round(score2, 4) if score2 is not None else "unavailable",
412
+ "groq_reasoning": reasoning,
413
+ "metadata_risk": 0.0,
414
+ }
415
+ }
416
+
417
+
418
+ async def analyze_pdf(contents: bytes):
419
+ scores = []
420
+ reasonings = []
421
+
422
+ try:
423
+ # extract text
424
+ reader = pypdf.PdfReader(io.BytesIO(contents))
425
+ full_text = ""
426
+ for page in reader.pages:
427
+ full_text += page.extract_text() or ""
428
+
429
+ if full_text.strip():
430
+ print(f"Extracted {len(full_text)} chars from PDF")
431
+ text_results = await asyncio.gather(
432
+ call_roberta(ROBERTA_FAKE_NEWS_URL, full_text, "RoBERTa_FakeNews"),
433
+ call_roberta(ROBERTA_AI_TEXT_URL, full_text, "RoBERTa_AIDetector"),
434
+ call_groq_text(full_text)
435
+ )
436
+ s1 = text_results[0]
437
+ s2 = text_results[1]
438
+ s3, text_reasoning = text_results[2]
439
+
440
+ if s1 is not None:
441
+ scores.append(s1)
442
+ reasonings.append(f"RoBERTa FakeNews: {round(s1*100)}% fake")
443
+ if s2 is not None:
444
+ scores.append(s2)
445
+ reasonings.append(f"RoBERTa AI Detector: {round(s2*100)}% AI-generated")
446
+ if s3 is not None:
447
+ scores.append(s3)
448
+ reasonings.append(f"Groq text: {text_reasoning}")
449
+
450
+ # extract and analyze images inside PDF
451
+ doc = fitz.open(stream=contents, filetype="pdf")
452
+ image_count = 0
453
+ for page in doc:
454
+ for img in page.get_images():
455
+ if image_count >= 3:
456
+ break
457
+ xref = img[0]
458
+ base_image = doc.extract_image(xref)
459
+ img_bytes = base_image["image"]
460
+ await asyncio.sleep(2)
461
+ img_score, img_reasoning = await call_groq_vision(img_bytes)
462
+ if img_score is not None:
463
+ scores.append(img_score)
464
+ reasonings.append(f"Image {image_count+1}: {img_reasoning}")
465
+ image_count += 1
466
+ doc.close()
467
+
468
+ except Exception as e:
469
+ print(f"PDF analysis error: {e}")
470
+
471
+ combined = sum(scores) / len(scores) if scores else 0.5
472
+ models_used = ["RoBERTa_FakeNews", "RoBERTa_AIDetector", "Groq_Llama3+Vision"] if scores else []
473
+
474
+ authenticity = round((1 - combined) * 100, 2)
475
+ fake = round(combined * 100, 2)
476
+
477
+ return {
478
+ "type": "pdf",
479
+ "authenticity": authenticity,
480
+ "fake": fake,
481
+ "confidence_level": make_confidence(authenticity, fake),
482
+ "models_used": models_used,
483
+ "details": {
484
+ "groq_score": "see breakdown",
485
+ "groq_reasoning": " | ".join(reasonings) if reasonings else "No content extracted",
486
+ "metadata_risk": 0.0,
487
+ }
488
+ }
489
+
490
+
491
  # ==============================
492
  # ROUTER
493
  # ==============================
494
 
495
  @app.post("/analyze")
496
+ async def analyze(
497
+ file: Optional[UploadFile] = File(None),
498
+ text: Optional[str] = Form(None)
499
+ ):
500
+ # plain text input
501
+ if text and not file:
502
+ result = await analyze_text(text)
503
+ result["sha256"] = hashlib.sha256(text.encode()).hexdigest()
504
+ return result
505
+
506
+ if not file:
507
+ return {"error": "No file or text provided"}
508
+
509
  contents = await file.read()
510
  sha256 = calculate_sha256(contents)
511
 
512
  if file.content_type.startswith("image/"):
513
  result = await analyze_image(contents, file.content_type)
514
  elif file.content_type.startswith("video/"):
515
+ result = await analyze_video(contents)
516
+ elif file.content_type == "application/pdf":
517
+ result = await analyze_pdf(contents)
518
  else:
519
  return {"error": "Unsupported file type"}
520
 
521
  result["sha256"] = sha256
522
+ return result
523
+
524
+