anky2002 commited on
Commit
894d051
Β·
verified Β·
1 Parent(s): 8d94c3d

Upload export.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. export.py +302 -0
export.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FORENSIQ β€” Export Module
3
+ Generates professional forensic reports in PDF, DOCX, TXT, MD formats.
4
+ Uses Qwen2.5-72B-Instruct (text model) for polished formatting when available.
5
+ """
6
+
7
+ import os
8
+ import io
9
+ import re
10
+ import json
11
+ import tempfile
12
+ import datetime
13
+ from typing import Optional
14
+
15
+
16
+ # ─── LLM Formatter ──────────────────────────────────────────────────
17
+
18
+ def _format_with_llm(raw_report: str, format_type: str) -> Optional[str]:
19
+ """Use Qwen2.5-72B-Instruct to polish the report into professional format."""
20
+ token = os.environ.get("HF_TOKEN", "")
21
+ if not token:
22
+ return None
23
+
24
+ try:
25
+ from openai import OpenAI
26
+ client = OpenAI(base_url="https://router.huggingface.co/v1", api_key=token)
27
+
28
+ system = f"""You are a professional forensic report formatter. Take the raw FORENSIQ analysis data and reformat it into a clean, professional {format_type} document.
29
+
30
+ Rules:
31
+ - Preserve ALL data, scores, and findings exactly as given β€” do not invent or modify any values
32
+ - Organize with clear headers, tables, and sections
33
+ - Use professional forensic language
34
+ - Include the verdict, probability, all agent findings, and methodology statement
35
+ - For PDF/DOCX: structure with numbered sections (I, II, III...)
36
+ - For TXT: use clean ASCII formatting with consistent indentation
37
+ - Keep it concise but complete β€” every data point must appear"""
38
+
39
+ resp = client.chat.completions.create(
40
+ model="Qwen/Qwen2.5-72B-Instruct",
41
+ messages=[
42
+ {"role": "system", "content": system},
43
+ {"role": "user", "content": f"Format this forensic report for {format_type} export:\n\n{raw_report[:8000]}"}
44
+ ],
45
+ max_tokens=4000,
46
+ temperature=0.1,
47
+ )
48
+ return resp.choices[0].message.content
49
+ except Exception as e:
50
+ return None
51
+
52
+
53
+ # ─── Strip Markdown Helper ──────────────────────────────────────────
54
+
55
+ def _strip_md(text: str) -> str:
56
+ """Convert markdown to plain text."""
57
+ text = re.sub(r'#{1,6}\s*', '', text) # Headers
58
+ text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text) # Bold
59
+ text = re.sub(r'\*([^*]+)\*', r'\1', text) # Italic
60
+ text = re.sub(r'`([^`]+)`', r'\1', text) # Inline code
61
+ text = re.sub(r'```[^`]*```', '', text, flags=re.DOTALL) # Code blocks
62
+ text = re.sub(r'\|[^\n]+\|', lambda m: m.group().replace('|', ' '), text) # Tables
63
+ text = re.sub(r'---+', '=' * 60, text) # Horizontal rules
64
+ text = re.sub(r'[πŸ”¬πŸ”΄πŸŸ’πŸŸ‘πŸŸ βšͺβœ…βš–οΈπŸ“ŠπŸ“‹πŸŒ³πŸ”πŸ“]', '', text) # Emojis
65
+ text = re.sub(r'\n{3,}', '\n\n', text) # Multiple newlines
66
+ return text.strip()
67
+
68
+
69
+ # ─── PDF Export ──────────────────────────────────────────────────────
70
+
71
+ def export_pdf(report_md: str, court_brief_md: str, reasoning_tree_md: str) -> str:
72
+ """Generate a professional PDF forensic report."""
73
+ from fpdf import FPDF
74
+
75
+ # Try LLM formatting first
76
+ formatted = _format_with_llm(report_md, "PDF document")
77
+ content = _strip_md(formatted if formatted else report_md)
78
+ court_content = _strip_md(court_brief_md) if court_brief_md else ""
79
+ tree_content = _strip_md(reasoning_tree_md) if reasoning_tree_md else ""
80
+
81
+ pdf = FPDF()
82
+ pdf.set_auto_page_break(auto=True, margin=15)
83
+ pdf.set_left_margin(15)
84
+ pdf.set_right_margin(15)
85
+
86
+ # ── Cover Page ────────────────────────────────────────────────
87
+ pdf.add_page()
88
+ pdf.set_font("Helvetica", "B", 28)
89
+ pdf.cell(0, 60, "", ln=True)
90
+ pdf.cell(0, 15, "FORENSIQ", ln=True, align="C")
91
+ pdf.set_font("Helvetica", "", 14)
92
+ pdf.cell(0, 10, "Forensic Analysis Report", ln=True, align="C")
93
+ pdf.set_font("Helvetica", "", 10)
94
+ pdf.cell(0, 8, "", ln=True)
95
+ pdf.cell(0, 8, f"Generated: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}", ln=True, align="C")
96
+ pdf.cell(0, 8, "Physics-Based Multi-Agent Forensic Framework", ln=True, align="C")
97
+ pdf.cell(0, 8, "for Explainable Deepfake Detection", ln=True, align="C")
98
+
99
+ # ── Main Report ───────────────────────────────────────────────
100
+ pdf.add_page()
101
+ pdf.set_font("Helvetica", "B", 16)
102
+ pdf.cell(0, 12, "FORENSIC ANALYSIS REPORT", ln=True)
103
+ pdf.line(10, pdf.get_y(), 200, pdf.get_y())
104
+ pdf.cell(0, 5, "", ln=True)
105
+
106
+ pdf.set_font("Helvetica", "", 9)
107
+ for line in content.split('\n'):
108
+ line = line.strip()
109
+ if not line:
110
+ pdf.cell(0, 4, "", ln=True)
111
+ continue
112
+ # Detect section headers
113
+ if line.startswith('Overall Verdict') or line.startswith('Key Evidence') or \
114
+ line.startswith('Agent-by-Agent') or line.startswith('Bayesian') or \
115
+ line.startswith('Methodology') or line.startswith('I.') or line.startswith('II.') or \
116
+ line.startswith('III.') or line.startswith('IV.') or line.startswith('V.') or \
117
+ line.startswith('VI.'):
118
+ pdf.set_font("Helvetica", "B", 11)
119
+ pdf.cell(0, 8, "", ln=True)
120
+ safe = line[:85].encode('latin-1', 'replace').decode('latin-1')
121
+ pdf.cell(0, 8, safe, ln=True)
122
+ pdf.set_font("Helvetica", "", 9)
123
+ else:
124
+ safe_line = line.encode('latin-1', 'replace').decode('latin-1')
125
+ # Truncate very long lines to fit page width
126
+ try:
127
+ pdf.multi_cell(0, 5, safe_line[:180])
128
+ except Exception:
129
+ pdf.cell(0, 5, safe_line[:80], ln=True)
130
+
131
+ # ── Court Brief ───────────────────────────────────────────────
132
+ if court_content:
133
+ pdf.add_page()
134
+ pdf.set_font("Helvetica", "B", 16)
135
+ pdf.cell(0, 12, "EXPERT FORENSIC ANALYSIS BRIEF", ln=True)
136
+ pdf.line(10, pdf.get_y(), 200, pdf.get_y())
137
+ pdf.cell(0, 5, "", ln=True)
138
+ pdf.set_font("Helvetica", "", 9)
139
+ for line in court_content.split('\n'):
140
+ line = line.strip()
141
+ if not line:
142
+ pdf.cell(0, 4, "", ln=True)
143
+ else:
144
+ safe_line = line.encode('latin-1', 'replace').decode('latin-1')
145
+ try:
146
+ pdf.multi_cell(0, 5, safe_line[:180])
147
+ except Exception:
148
+ pdf.cell(0, 5, safe_line[:80], ln=True)
149
+
150
+ # ── Reasoning Tree ────────────────────────────────────────────
151
+ if tree_content:
152
+ pdf.add_page()
153
+ pdf.set_font("Helvetica", "B", 16)
154
+ pdf.cell(0, 12, "REASONING TREE", ln=True)
155
+ pdf.line(10, pdf.get_y(), 200, pdf.get_y())
156
+ pdf.cell(0, 5, "", ln=True)
157
+ pdf.set_font("Courier", "", 8)
158
+ for line in tree_content.split('\n'):
159
+ safe_line = line.encode('latin-1', 'replace').decode('latin-1')
160
+ try:
161
+ pdf.multi_cell(0, 4, safe_line[:120])
162
+ except Exception:
163
+ pdf.cell(0, 4, safe_line[:60], ln=True)
164
+
165
+ # Save
166
+ path = tempfile.mktemp(suffix=".pdf", prefix="FORENSIQ_Report_")
167
+ pdf.output(path)
168
+ return path
169
+
170
+
171
+ # ─── DOCX Export ─────────────────────────────────────────────────────
172
+
173
+ def export_docx(report_md: str, court_brief_md: str, reasoning_tree_md: str) -> str:
174
+ """Generate a professional DOCX forensic report."""
175
+ from docx import Document
176
+ from docx.shared import Inches, Pt, RGBColor
177
+ from docx.enum.text import WD_ALIGN_PARAGRAPH
178
+
179
+ # Try LLM formatting
180
+ formatted = _format_with_llm(report_md, "Word document")
181
+ content = formatted if formatted else report_md
182
+
183
+ doc = Document()
184
+
185
+ # ── Title ─────────────────────────────────────────────────────
186
+ title = doc.add_heading('FORENSIQ', level=0)
187
+ title.alignment = WD_ALIGN_PARAGRAPH.CENTER
188
+
189
+ subtitle = doc.add_paragraph()
190
+ subtitle.alignment = WD_ALIGN_PARAGRAPH.CENTER
191
+ run = subtitle.add_run('Forensic Analysis Report')
192
+ run.font.size = Pt(14)
193
+ run.font.color.rgb = RGBColor(100, 100, 100)
194
+
195
+ date_para = doc.add_paragraph()
196
+ date_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
197
+ run = date_para.add_run(f'Generated: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC")}')
198
+ run.font.size = Pt(10)
199
+ run.font.color.rgb = RGBColor(150, 150, 150)
200
+
201
+ doc.add_page_break()
202
+
203
+ # ── Parse and add content ─────────────────────────────────────
204
+ def add_md_content(md_text, doc):
205
+ for line in md_text.split('\n'):
206
+ stripped = line.strip()
207
+ if not stripped:
208
+ continue
209
+
210
+ # Headings
211
+ if stripped.startswith('# '):
212
+ doc.add_heading(re.sub(r'[πŸ”¬πŸ”΄πŸŸ’πŸŸ‘πŸŸ βšͺβœ…βš–οΈπŸ“ŠπŸ“‹πŸŒ³πŸ”πŸ“]', '', stripped[2:]).strip(), level=1)
213
+ elif stripped.startswith('## '):
214
+ doc.add_heading(re.sub(r'[πŸ”¬πŸ”΄πŸŸ’πŸŸ‘πŸŸ βšͺβœ…βš–οΈπŸ“ŠπŸ“‹πŸŒ³πŸ”πŸ“]', '', stripped[3:]).strip(), level=2)
215
+ elif stripped.startswith('### '):
216
+ doc.add_heading(re.sub(r'[πŸ”¬πŸ”΄πŸŸ’πŸŸ‘πŸŸ βšͺβœ…βš–οΈπŸ“ŠπŸ“‹πŸŒ³πŸ”πŸ“]', '', stripped[4:]).strip(), level=3)
217
+ elif stripped.startswith('---'):
218
+ doc.add_paragraph('_' * 50)
219
+ elif stripped.startswith('- ') or stripped.startswith('* '):
220
+ doc.add_paragraph(stripped[2:], style='List Bullet')
221
+ elif stripped.startswith('|') and '|' in stripped[1:]:
222
+ # Table row β€” just add as plain text
223
+ cells = [c.strip() for c in stripped.split('|') if c.strip() and c.strip() != '---']
224
+ if cells:
225
+ doc.add_paragraph(' | '.join(cells))
226
+ else:
227
+ # Regular paragraph
228
+ p = doc.add_paragraph()
229
+ # Handle bold
230
+ parts = re.split(r'\*\*([^*]+)\*\*', stripped)
231
+ for i, part in enumerate(parts):
232
+ if not part:
233
+ continue
234
+ run = p.add_run(part)
235
+ if i % 2 == 1:
236
+ run.bold = True
237
+ run.font.size = Pt(10)
238
+
239
+ doc.add_heading('Forensic Analysis Report', level=1)
240
+ add_md_content(content, doc)
241
+
242
+ if court_brief_md:
243
+ doc.add_page_break()
244
+ doc.add_heading('Expert Forensic Analysis Brief', level=1)
245
+ add_md_content(court_brief_md, doc)
246
+
247
+ if reasoning_tree_md:
248
+ doc.add_page_break()
249
+ doc.add_heading('Reasoning Tree', level=1)
250
+ add_md_content(reasoning_tree_md, doc)
251
+
252
+ path = tempfile.mktemp(suffix=".docx", prefix="FORENSIQ_Report_")
253
+ doc.save(path)
254
+ return path
255
+
256
+
257
+ # ─── TXT Export ──────────────────────────────────────────────────────
258
+
259
+ def export_txt(report_md: str, court_brief_md: str, reasoning_tree_md: str) -> str:
260
+ """Generate a plain text forensic report."""
261
+ # Try LLM formatting
262
+ formatted = _format_with_llm(report_md, "plain text document")
263
+ content = _strip_md(formatted if formatted else report_md)
264
+
265
+ full = "=" * 60 + "\n"
266
+ full += " FORENSIQ β€” FORENSIC ANALYSIS REPORT\n"
267
+ full += f" Generated: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}\n"
268
+ full += "=" * 60 + "\n\n"
269
+ full += content
270
+
271
+ if court_brief_md:
272
+ full += "\n\n" + "=" * 60 + "\n"
273
+ full += " EXPERT FORENSIC ANALYSIS BRIEF (FRE 702)\n"
274
+ full += "=" * 60 + "\n\n"
275
+ full += _strip_md(court_brief_md)
276
+
277
+ if reasoning_tree_md:
278
+ full += "\n\n" + "=" * 60 + "\n"
279
+ full += " REASONING TREE\n"
280
+ full += "=" * 60 + "\n\n"
281
+ full += _strip_md(reasoning_tree_md)
282
+
283
+ path = tempfile.mktemp(suffix=".txt", prefix="FORENSIQ_Report_")
284
+ with open(path, 'w', encoding='utf-8') as f:
285
+ f.write(full)
286
+ return path
287
+
288
+
289
+ # ─── MD Export ───────────────────────────────────────────────────────
290
+
291
+ def export_md(report_md: str, court_brief_md: str, reasoning_tree_md: str) -> str:
292
+ """Export as Markdown file."""
293
+ full = report_md or ""
294
+ if court_brief_md:
295
+ full += "\n\n---\n\n" + court_brief_md
296
+ if reasoning_tree_md:
297
+ full += "\n\n---\n\n" + reasoning_tree_md
298
+
299
+ path = tempfile.mktemp(suffix=".md", prefix="FORENSIQ_Report_")
300
+ with open(path, 'w', encoding='utf-8') as f:
301
+ f.write(full)
302
+ return path