Spaces:

anky2002
/

FORENSIQ

Running

App Files Files Community

anky2002 commited on 16 days ago

Commit

894d051

verified ·

1 Parent(s): 8d94c3d

Upload export.py with huggingface_hub

Browse files

Files changed (1) hide show

export.py +302 -0

export.py ADDED Viewed

	@@ -0,0 +1,302 @@

+"""
+FORENSIQ — Export Module
+Generates professional forensic reports in PDF, DOCX, TXT, MD formats.
+Uses Qwen2.5-72B-Instruct (text model) for polished formatting when available.
+"""
+import os
+import io
+import re
+import json
+import tempfile
+import datetime
+from typing import Optional
+# ─── LLM Formatter ──────────────────────────────────────────────────
+def _format_with_llm(raw_report: str, format_type: str) -> Optional[str]:
+    """Use Qwen2.5-72B-Instruct to polish the report into professional format."""
+    token = os.environ.get("HF_TOKEN", "")
+    if not token:
+        return None
+    try:
+        from openai import OpenAI
+        client = OpenAI(base_url="https://router.huggingface.co/v1", api_key=token)
+        system = f"""You are a professional forensic report formatter. Take the raw FORENSIQ analysis data and reformat it into a clean, professional {format_type} document.
+Rules:
+- Preserve ALL data, scores, and findings exactly as given — do not invent or modify any values
+- Organize with clear headers, tables, and sections
+- Use professional forensic language
+- Include the verdict, probability, all agent findings, and methodology statement
+- For PDF/DOCX: structure with numbered sections (I, II, III...)
+- For TXT: use clean ASCII formatting with consistent indentation
+- Keep it concise but complete — every data point must appear"""
+        resp = client.chat.completions.create(
+            model="Qwen/Qwen2.5-72B-Instruct",
+            messages=[
+                {"role": "system", "content": system},
+                {"role": "user", "content": f"Format this forensic report for {format_type} export:\n\n{raw_report[:8000]}"}
+            ],
+            max_tokens=4000,
+            temperature=0.1,
+        )
+        return resp.choices[0].message.content
+    except Exception as e:
+        return None
+# ─── Strip Markdown Helper ──────────────────────────────────────────
+def _strip_md(text: str) -> str:
+    """Convert markdown to plain text."""
+    text = re.sub(r'#{1,6}\s*', '', text)  # Headers
+    text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text)  # Bold
+    text = re.sub(r'\*([^*]+)\*', r'\1', text)  # Italic
+    text = re.sub(r'`([^`]+)`', r'\1', text)  # Inline code
+    text = re.sub(r'```[^`]*```', '', text, flags=re.DOTALL)  # Code blocks
+    text = re.sub(r'\|[^\n]+\|', lambda m: m.group().replace('|', '  '), text)  # Tables
+    text = re.sub(r'---+', '=' * 60, text)  # Horizontal rules
+    text = re.sub(r'[🔬🔴🟢🟡🟠⚪✅⚖️📊📋🌳🔍📐]', '', text)  # Emojis
+    text = re.sub(r'\n{3,}', '\n\n', text)  # Multiple newlines
+    return text.strip()
+# ─── PDF Export ──────────────────────────────────────────────────────
+def export_pdf(report_md: str, court_brief_md: str, reasoning_tree_md: str) -> str:
+    """Generate a professional PDF forensic report."""
+    from fpdf import FPDF
+    # Try LLM formatting first
+    formatted = _format_with_llm(report_md, "PDF document")
+    content = _strip_md(formatted if formatted else report_md)
+    court_content = _strip_md(court_brief_md) if court_brief_md else ""
+    tree_content = _strip_md(reasoning_tree_md) if reasoning_tree_md else ""
+    pdf = FPDF()
+    pdf.set_auto_page_break(auto=True, margin=15)
+    pdf.set_left_margin(15)
+    pdf.set_right_margin(15)
+    # ── Cover Page ────────────────────────────────────────────────
+    pdf.add_page()
+    pdf.set_font("Helvetica", "B", 28)
+    pdf.cell(0, 60, "", ln=True)
+    pdf.cell(0, 15, "FORENSIQ", ln=True, align="C")
+    pdf.set_font("Helvetica", "", 14)
+    pdf.cell(0, 10, "Forensic Analysis Report", ln=True, align="C")
+    pdf.set_font("Helvetica", "", 10)
+    pdf.cell(0, 8, "", ln=True)
+    pdf.cell(0, 8, f"Generated: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}", ln=True, align="C")
+    pdf.cell(0, 8, "Physics-Based Multi-Agent Forensic Framework", ln=True, align="C")
+    pdf.cell(0, 8, "for Explainable Deepfake Detection", ln=True, align="C")
+    # ── Main Report ───────────────────────────────────────────────
+    pdf.add_page()
+    pdf.set_font("Helvetica", "B", 16)
+    pdf.cell(0, 12, "FORENSIC ANALYSIS REPORT", ln=True)
+    pdf.line(10, pdf.get_y(), 200, pdf.get_y())
+    pdf.cell(0, 5, "", ln=True)
+    pdf.set_font("Helvetica", "", 9)
+    for line in content.split('\n'):
+        line = line.strip()
+        if not line:
+            pdf.cell(0, 4, "", ln=True)
+            continue
+        # Detect section headers
+        if line.startswith('Overall Verdict') or line.startswith('Key Evidence') or \
+           line.startswith('Agent-by-Agent') or line.startswith('Bayesian') or \
+           line.startswith('Methodology') or line.startswith('I.') or line.startswith('II.') or \
+           line.startswith('III.') or line.startswith('IV.') or line.startswith('V.') or \
+           line.startswith('VI.'):
+            pdf.set_font("Helvetica", "B", 11)
+            pdf.cell(0, 8, "", ln=True)
+            safe = line[:85].encode('latin-1', 'replace').decode('latin-1')
+            pdf.cell(0, 8, safe, ln=True)
+            pdf.set_font("Helvetica", "", 9)
+        else:
+            safe_line = line.encode('latin-1', 'replace').decode('latin-1')
+            # Truncate very long lines to fit page width
+            try:
+                pdf.multi_cell(0, 5, safe_line[:180])
+            except Exception:
+                pdf.cell(0, 5, safe_line[:80], ln=True)
+    # ── Court Brief ───────────────────────────────────────────────
+    if court_content:
+        pdf.add_page()
+        pdf.set_font("Helvetica", "B", 16)
+        pdf.cell(0, 12, "EXPERT FORENSIC ANALYSIS BRIEF", ln=True)
+        pdf.line(10, pdf.get_y(), 200, pdf.get_y())
+        pdf.cell(0, 5, "", ln=True)
+        pdf.set_font("Helvetica", "", 9)
+        for line in court_content.split('\n'):
+            line = line.strip()
+            if not line:
+                pdf.cell(0, 4, "", ln=True)
+            else:
+                safe_line = line.encode('latin-1', 'replace').decode('latin-1')
+                try:
+                    pdf.multi_cell(0, 5, safe_line[:180])
+                except Exception:
+                    pdf.cell(0, 5, safe_line[:80], ln=True)
+    # ── Reasoning Tree ────────────────────────────────────────────
+    if tree_content:
+        pdf.add_page()
+        pdf.set_font("Helvetica", "B", 16)
+        pdf.cell(0, 12, "REASONING TREE", ln=True)
+        pdf.line(10, pdf.get_y(), 200, pdf.get_y())
+        pdf.cell(0, 5, "", ln=True)
+        pdf.set_font("Courier", "", 8)
+        for line in tree_content.split('\n'):
+            safe_line = line.encode('latin-1', 'replace').decode('latin-1')
+            try:
+                pdf.multi_cell(0, 4, safe_line[:120])
+            except Exception:
+                pdf.cell(0, 4, safe_line[:60], ln=True)
+    # Save
+    path = tempfile.mktemp(suffix=".pdf", prefix="FORENSIQ_Report_")
+    pdf.output(path)
+    return path
+# ─── DOCX Export ─────────────────────────────────────────────────────
+def export_docx(report_md: str, court_brief_md: str, reasoning_tree_md: str) -> str:
+    """Generate a professional DOCX forensic report."""
+    from docx import Document
+    from docx.shared import Inches, Pt, RGBColor
+    from docx.enum.text import WD_ALIGN_PARAGRAPH
+    # Try LLM formatting
+    formatted = _format_with_llm(report_md, "Word document")
+    content = formatted if formatted else report_md
+    doc = Document()
+    # ── Title ─────────────────────────────────────────────────────
+    title = doc.add_heading('FORENSIQ', level=0)
+    title.alignment = WD_ALIGN_PARAGRAPH.CENTER
+    subtitle = doc.add_paragraph()
+    subtitle.alignment = WD_ALIGN_PARAGRAPH.CENTER
+    run = subtitle.add_run('Forensic Analysis Report')
+    run.font.size = Pt(14)
+    run.font.color.rgb = RGBColor(100, 100, 100)
+    date_para = doc.add_paragraph()
+    date_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
+    run = date_para.add_run(f'Generated: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC")}')
+    run.font.size = Pt(10)
+    run.font.color.rgb = RGBColor(150, 150, 150)
+    doc.add_page_break()
+    # ── Parse and add content ─────────────────────────────────────
+    def add_md_content(md_text, doc):
+        for line in md_text.split('\n'):
+            stripped = line.strip()
+            if not stripped:
+                continue
+            # Headings
+            if stripped.startswith('# '):
+                doc.add_heading(re.sub(r'[🔬🔴🟢🟡🟠⚪✅⚖️📊📋🌳🔍📐]', '', stripped[2:]).strip(), level=1)
+            elif stripped.startswith('## '):
+                doc.add_heading(re.sub(r'[🔬🔴🟢🟡🟠⚪✅⚖️📊📋🌳🔍📐]', '', stripped[3:]).strip(), level=2)
+            elif stripped.startswith('### '):
+                doc.add_heading(re.sub(r'[🔬🔴🟢🟡🟠⚪✅⚖️📊📋🌳🔍📐]', '', stripped[4:]).strip(), level=3)
+            elif stripped.startswith('---'):
+                doc.add_paragraph('_' * 50)
+            elif stripped.startswith('- ') or stripped.startswith('* '):
+                doc.add_paragraph(stripped[2:], style='List Bullet')
+            elif stripped.startswith('|') and '|' in stripped[1:]:
+                # Table row — just add as plain text
+                cells = [c.strip() for c in stripped.split('|') if c.strip() and c.strip() != '---']
+                if cells:
+                    doc.add_paragraph('  |  '.join(cells))
+            else:
+                # Regular paragraph
+                p = doc.add_paragraph()
+                # Handle bold
+                parts = re.split(r'\*\*([^*]+)\*\*', stripped)
+                for i, part in enumerate(parts):
+                    if not part:
+                        continue
+                    run = p.add_run(part)
+                    if i % 2 == 1:
+                        run.bold = True
+                    run.font.size = Pt(10)
+    doc.add_heading('Forensic Analysis Report', level=1)
+    add_md_content(content, doc)
+    if court_brief_md:
+        doc.add_page_break()
+        doc.add_heading('Expert Forensic Analysis Brief', level=1)
+        add_md_content(court_brief_md, doc)
+    if reasoning_tree_md:
+        doc.add_page_break()
+        doc.add_heading('Reasoning Tree', level=1)
+        add_md_content(reasoning_tree_md, doc)
+    path = tempfile.mktemp(suffix=".docx", prefix="FORENSIQ_Report_")
+    doc.save(path)
+    return path
+# ─── TXT Export ──────────────────────────────────────────────────────
+def export_txt(report_md: str, court_brief_md: str, reasoning_tree_md: str) -> str:
+    """Generate a plain text forensic report."""
+    # Try LLM formatting
+    formatted = _format_with_llm(report_md, "plain text document")
+    content = _strip_md(formatted if formatted else report_md)
+    full = "=" * 60 + "\n"
+    full += "  FORENSIQ — FORENSIC ANALYSIS REPORT\n"
+    full += f"  Generated: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}\n"
+    full += "=" * 60 + "\n\n"
+    full += content
+    if court_brief_md:
+        full += "\n\n" + "=" * 60 + "\n"
+        full += "  EXPERT FORENSIC ANALYSIS BRIEF (FRE 702)\n"
+        full += "=" * 60 + "\n\n"
+        full += _strip_md(court_brief_md)
+    if reasoning_tree_md:
+        full += "\n\n" + "=" * 60 + "\n"
+        full += "  REASONING TREE\n"
+        full += "=" * 60 + "\n\n"
+        full += _strip_md(reasoning_tree_md)
+    path = tempfile.mktemp(suffix=".txt", prefix="FORENSIQ_Report_")
+    with open(path, 'w', encoding='utf-8') as f:
+        f.write(full)
+    return path
+# ─── MD Export ───────────────────────────────────────────────────────
+def export_md(report_md: str, court_brief_md: str, reasoning_tree_md: str) -> str:
+    """Export as Markdown file."""
+    full = report_md or ""
+    if court_brief_md:
+        full += "\n\n---\n\n" + court_brief_md
+    if reasoning_tree_md:
+        full += "\n\n---\n\n" + reasoning_tree_md
+    path = tempfile.mktemp(suffix=".md", prefix="FORENSIQ_Report_")
+    with open(path, 'w', encoding='utf-8') as f:
+        f.write(full)
+    return path