FORENSIQ / export.py
anky2002's picture
Upload export.py with huggingface_hub
d6d0dff verified
"""
FORENSIQ β€” Export Module
Generates professional forensic reports in PDF, DOCX, TXT, MD formats.
Uses Qwen2.5-72B-Instruct (text model) for polished formatting when available.
"""
import os
import io
import re
import json
import tempfile
import datetime
from typing import Optional
# ─── LLM Formatter ──────────────────────────────────────────────────
def _format_with_llm(raw_report: str, format_type: str) -> Optional[str]:
"""Skip LLM formatting β€” export instantly with raw data.
The reports from explanation.py are already professionally formatted."""
return None
# ─── Strip Markdown Helper ──────────────────────────────────────────
def _strip_md(text: str) -> str:
"""Convert markdown to plain text."""
text = re.sub(r'#{1,6}\s*', '', text) # Headers
text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text) # Bold
text = re.sub(r'\*([^*]+)\*', r'\1', text) # Italic
text = re.sub(r'`([^`]+)`', r'\1', text) # Inline code
text = re.sub(r'```[^`]*```', '', text, flags=re.DOTALL) # Code blocks
text = re.sub(r'\|[^\n]+\|', lambda m: m.group().replace('|', ' '), text) # Tables
text = re.sub(r'---+', '=' * 60, text) # Horizontal rules
text = re.sub(r'[πŸ”¬πŸ”΄πŸŸ’πŸŸ‘πŸŸ βšͺβœ…βš–οΈπŸ“ŠπŸ“‹πŸŒ³πŸ”πŸ“]', '', text) # Emojis
text = re.sub(r'\n{3,}', '\n\n', text) # Multiple newlines
return text.strip()
# ─── PDF Export ──────────────────────────────────────────────────────
def export_pdf(report_md: str, court_brief_md: str, reasoning_tree_md: str) -> str:
"""Generate a professional PDF forensic report."""
from fpdf import FPDF
# Try LLM formatting first
formatted = _format_with_llm(report_md, "PDF document")
content = _strip_md(formatted if formatted else report_md)
court_content = _strip_md(court_brief_md) if court_brief_md else ""
tree_content = _strip_md(reasoning_tree_md) if reasoning_tree_md else ""
pdf = FPDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.set_left_margin(15)
pdf.set_right_margin(15)
# ── Cover Page ────────────────────────────────────────────────
pdf.add_page()
pdf.set_font("Helvetica", "B", 28)
pdf.cell(0, 60, "", ln=True)
pdf.cell(0, 15, "FORENSIQ", ln=True, align="C")
pdf.set_font("Helvetica", "", 14)
pdf.cell(0, 10, "Forensic Analysis Report", ln=True, align="C")
pdf.set_font("Helvetica", "", 10)
pdf.cell(0, 8, "", ln=True)
pdf.cell(0, 8, f"Generated: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}", ln=True, align="C")
pdf.cell(0, 8, "Physics-Based Multi-Agent Forensic Framework", ln=True, align="C")
pdf.cell(0, 8, "for Explainable Deepfake Detection", ln=True, align="C")
# ── Main Report ───────────────────────────────────────────────
pdf.add_page()
pdf.set_font("Helvetica", "B", 16)
pdf.cell(0, 12, "FORENSIC ANALYSIS REPORT", ln=True)
pdf.line(10, pdf.get_y(), 200, pdf.get_y())
pdf.cell(0, 5, "", ln=True)
pdf.set_font("Helvetica", "", 9)
for line in content.split('\n'):
line = line.strip()
if not line:
pdf.cell(0, 4, "", ln=True)
continue
# Detect section headers
if line.startswith('Overall Verdict') or line.startswith('Key Evidence') or \
line.startswith('Agent-by-Agent') or line.startswith('Bayesian') or \
line.startswith('Methodology') or line.startswith('I.') or line.startswith('II.') or \
line.startswith('III.') or line.startswith('IV.') or line.startswith('V.') or \
line.startswith('VI.'):
pdf.set_font("Helvetica", "B", 11)
pdf.cell(0, 8, "", ln=True)
safe = line[:85].encode('latin-1', 'replace').decode('latin-1')
pdf.cell(0, 8, safe, ln=True)
pdf.set_font("Helvetica", "", 9)
else:
safe_line = line.encode('latin-1', 'replace').decode('latin-1')
# Truncate very long lines to fit page width
try:
pdf.multi_cell(0, 5, safe_line[:180])
except Exception:
pdf.cell(0, 5, safe_line[:80], ln=True)
# ── Court Brief ───────────────────────────────────────────────
if court_content:
pdf.add_page()
pdf.set_font("Helvetica", "B", 16)
pdf.cell(0, 12, "EXPERT FORENSIC ANALYSIS BRIEF", ln=True)
pdf.line(10, pdf.get_y(), 200, pdf.get_y())
pdf.cell(0, 5, "", ln=True)
pdf.set_font("Helvetica", "", 9)
for line in court_content.split('\n'):
line = line.strip()
if not line:
pdf.cell(0, 4, "", ln=True)
else:
safe_line = line.encode('latin-1', 'replace').decode('latin-1')
try:
pdf.multi_cell(0, 5, safe_line[:180])
except Exception:
pdf.cell(0, 5, safe_line[:80], ln=True)
# ── Reasoning Tree ────────────────────────────────────────────
if tree_content:
pdf.add_page()
pdf.set_font("Helvetica", "B", 16)
pdf.cell(0, 12, "REASONING TREE", ln=True)
pdf.line(10, pdf.get_y(), 200, pdf.get_y())
pdf.cell(0, 5, "", ln=True)
pdf.set_font("Courier", "", 8)
for line in tree_content.split('\n'):
safe_line = line.encode('latin-1', 'replace').decode('latin-1')
try:
pdf.multi_cell(0, 4, safe_line[:120])
except Exception:
pdf.cell(0, 4, safe_line[:60], ln=True)
# Save
path = tempfile.mktemp(suffix=".pdf", prefix="FORENSIQ_Report_")
pdf.output(path)
return path
# ─── DOCX Export ─────────────────────────────────────────────────────
def export_docx(report_md: str, court_brief_md: str, reasoning_tree_md: str) -> str:
"""Generate a professional DOCX forensic report."""
from docx import Document
from docx.shared import Inches, Pt, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
# Try LLM formatting
formatted = _format_with_llm(report_md, "Word document")
content = formatted if formatted else report_md
doc = Document()
# ── Title ─────────────────────────────────────────────────────
title = doc.add_heading('FORENSIQ', level=0)
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
subtitle = doc.add_paragraph()
subtitle.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = subtitle.add_run('Forensic Analysis Report')
run.font.size = Pt(14)
run.font.color.rgb = RGBColor(100, 100, 100)
date_para = doc.add_paragraph()
date_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = date_para.add_run(f'Generated: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC")}')
run.font.size = Pt(10)
run.font.color.rgb = RGBColor(150, 150, 150)
doc.add_page_break()
# ── Parse and add content ─────────────────────────────────────
def add_md_content(md_text, doc):
for line in md_text.split('\n'):
stripped = line.strip()
if not stripped:
continue
# Headings
if stripped.startswith('# '):
doc.add_heading(re.sub(r'[πŸ”¬πŸ”΄πŸŸ’πŸŸ‘πŸŸ βšͺβœ…βš–οΈπŸ“ŠπŸ“‹πŸŒ³πŸ”πŸ“]', '', stripped[2:]).strip(), level=1)
elif stripped.startswith('## '):
doc.add_heading(re.sub(r'[πŸ”¬πŸ”΄πŸŸ’πŸŸ‘πŸŸ βšͺβœ…βš–οΈπŸ“ŠπŸ“‹πŸŒ³πŸ”πŸ“]', '', stripped[3:]).strip(), level=2)
elif stripped.startswith('### '):
doc.add_heading(re.sub(r'[πŸ”¬πŸ”΄πŸŸ’πŸŸ‘πŸŸ βšͺβœ…βš–οΈπŸ“ŠπŸ“‹πŸŒ³πŸ”πŸ“]', '', stripped[4:]).strip(), level=3)
elif stripped.startswith('---'):
doc.add_paragraph('_' * 50)
elif stripped.startswith('- ') or stripped.startswith('* '):
doc.add_paragraph(stripped[2:], style='List Bullet')
elif stripped.startswith('|') and '|' in stripped[1:]:
# Table row β€” just add as plain text
cells = [c.strip() for c in stripped.split('|') if c.strip() and c.strip() != '---']
if cells:
doc.add_paragraph(' | '.join(cells))
else:
# Regular paragraph
p = doc.add_paragraph()
# Handle bold
parts = re.split(r'\*\*([^*]+)\*\*', stripped)
for i, part in enumerate(parts):
if not part:
continue
run = p.add_run(part)
if i % 2 == 1:
run.bold = True
run.font.size = Pt(10)
doc.add_heading('Forensic Analysis Report', level=1)
add_md_content(content, doc)
if court_brief_md:
doc.add_page_break()
doc.add_heading('Expert Forensic Analysis Brief', level=1)
add_md_content(court_brief_md, doc)
if reasoning_tree_md:
doc.add_page_break()
doc.add_heading('Reasoning Tree', level=1)
add_md_content(reasoning_tree_md, doc)
path = tempfile.mktemp(suffix=".docx", prefix="FORENSIQ_Report_")
doc.save(path)
return path
# ─── TXT Export ──────────────────────────────────────────────────────
def export_txt(report_md: str, court_brief_md: str, reasoning_tree_md: str) -> str:
"""Generate a plain text forensic report."""
# Try LLM formatting
formatted = _format_with_llm(report_md, "plain text document")
content = _strip_md(formatted if formatted else report_md)
full = "=" * 60 + "\n"
full += " FORENSIQ β€” FORENSIC ANALYSIS REPORT\n"
full += f" Generated: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}\n"
full += "=" * 60 + "\n\n"
full += content
if court_brief_md:
full += "\n\n" + "=" * 60 + "\n"
full += " EXPERT FORENSIC ANALYSIS BRIEF (FRE 702)\n"
full += "=" * 60 + "\n\n"
full += _strip_md(court_brief_md)
if reasoning_tree_md:
full += "\n\n" + "=" * 60 + "\n"
full += " REASONING TREE\n"
full += "=" * 60 + "\n\n"
full += _strip_md(reasoning_tree_md)
path = tempfile.mktemp(suffix=".txt", prefix="FORENSIQ_Report_")
with open(path, 'w', encoding='utf-8') as f:
f.write(full)
return path
# ─── MD Export ───────────────────────────────────────────────────────
def export_md(report_md: str, court_brief_md: str, reasoning_tree_md: str) -> str:
"""Export as Markdown file."""
full = report_md or ""
if court_brief_md:
full += "\n\n---\n\n" + court_brief_md
if reasoning_tree_md:
full += "\n\n---\n\n" + reasoning_tree_md
path = tempfile.mktemp(suffix=".md", prefix="FORENSIQ_Report_")
with open(path, 'w', encoding='utf-8') as f:
f.write(full)
return path