| |
| """Simple Gradio demo for the PDF attacker tools |
| |
| Allows entering text, choosing attack type, and downloading the generated PDF. |
| """ |
| import os |
| import time |
| from typing import Tuple |
|
|
| import PyPDF2 |
| import gradio as gr |
|
|
| from pdf_attacker import PDFAttacker |
|
|
|
|
| def _resolve_font_path(choice: str, uploaded_file) -> str: |
| """Return a font path given a dropdown choice or uploaded file. |
| |
| If choice is 'auto' return None so PDFAttacker will pick a reasonable default. |
| """ |
| if choice == 'auto' or not choice: |
| return None |
|
|
| |
| presets = { |
| 'DejaVu Serif': [ |
| '/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf', |
| ], |
| 'Liberation Serif': [ |
| '/usr/share/fonts/truetype/liberation/LiberationSerif-Regular.ttf', |
| ], |
| 'FreeSerif': [ |
| '/usr/share/fonts/truetype/freefont/FreeSerif.ttf', |
| ], |
| 'DejaVu Sans': [ |
| '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', |
| ], |
| 'Arial': [ |
| '/usr/share/fonts/truetype/msttcorefonts/Arial.ttf', |
| '/usr/share/fonts/truetype/msttcorefonts/arial.ttf', |
| '/usr/share/fonts/truetype/arial/arial.ttf', |
| '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', |
| ], |
| 'Helvetica': [ |
| '/usr/share/fonts/truetype/urw-base35/Helvetica.ttf', |
| '/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf', |
| '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', |
| ], |
| 'Times New Roman': [ |
| '/usr/share/fonts/truetype/msttcorefonts/Times_New_Roman.ttf', |
| '/usr/share/fonts/truetype/msttcorefonts/Times_New_Roman.ttf', |
| '/usr/share/fonts/truetype/liberation/LiberationSerif-Regular.ttf', |
| ], |
| 'Roboto': [ |
| '/usr/share/fonts/truetype/roboto/Roboto-Regular.ttf', |
| '/usr/share/fonts/truetype/roboto/Roboto-Regular.ttf', |
| ], |
| 'Courier': [ |
| '/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf', |
| '/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf', |
| ], |
| 'Times': [ |
| '/usr/share/fonts/truetype/liberation/LiberationSerif-Regular.ttf', |
| ], |
| } |
|
|
| if choice in presets: |
| for p in presets[choice]: |
| if os.path.exists(p): |
| return p |
| return None |
|
|
| |
| if choice == 'Custom' and uploaded_file: |
| |
| if isinstance(uploaded_file, dict) and 'name' in uploaded_file: |
| return uploaded_file['name'] |
| return uploaded_file |
|
|
| return None |
|
|
| |
| theme = gr.themes.Soft( |
| primary_hue="fuchsia", |
| secondary_hue="cyan", |
| neutral_hue="gray", |
| radius_size="none", |
| font=[ |
| gr.themes.GoogleFont("IBM Plex Sans"), |
| "ui-sans-serif", |
| "system-ui", |
| "sans-serif", |
| ], |
| font_mono=[ |
| gr.themes.GoogleFont("IBM Plex Mono"), |
| "ui-monospace", |
| "Consolas", |
| "monospace", |
| ], |
| ) |
|
|
|
|
| def _ensure_tmp_dir() -> str: |
| """Ensure tmp dir exists and return its path""" |
| path = os.path.join(os.getcwd(), "tmp") |
| os.makedirs(path, exist_ok=True) |
| return path |
|
|
|
|
| def _extract_text_from_pdf(pdf_path: str) -> str: |
| """Extract text from a PDF file for preview""" |
| try: |
| with open(pdf_path, 'rb') as f: |
| reader = PyPDF2.PdfReader(f) |
| text = "" |
| for page in reader.pages: |
| page_text = page.extract_text() |
| if page_text: |
| text += page_text |
| return text.strip() |
| except Exception as e: |
| return f"Error extracting text: {e}" |
|
|
|
|
| def generate_pdf( |
| text: str, |
| mode: str, |
| attack_factor: float = 0.7, |
| target_text: str = "", |
| font_choice: str = 'auto', |
| uploaded_font=None, |
| wrap_on_words: bool = True, |
| ) -> Tuple[str, str, str]: |
| """Generate selected PDF and return (pdf_path, extracted_text) |
| |
| Inputs: text, mode: 'normal'|'attacked'|'targeted', attack_factor, target_text |
| Outputs: path to generated PDF, extracted text preview |
| """ |
| tmp_dir = _ensure_tmp_dir() |
| timestamp = int(time.time() * 1000) |
| filename = f"{mode}_{timestamp}.pdf" |
| output_path = os.path.join(tmp_dir, filename) |
|
|
| |
| clean_text = " ".join(text.split()) |
|
|
| |
| font_path = _resolve_font_path(choice=font_choice, uploaded_file=uploaded_font) |
| attacker = PDFAttacker(font_path=font_path) |
| |
| attacker.wrap_on_words = wrap_on_words |
|
|
| |
| resolved_font = font_path or "(auto/default)" |
| status_lines = [f"Font resolved to: {resolved_font}", f"Wrap on words: {wrap_on_words}"] |
|
|
| try: |
| if mode == 'normal': |
| attacker.create_normal_pdf(text=clean_text, output_path=output_path) |
| elif mode == 'attacked': |
| attacker.create_attacked_pdf(text=clean_text, output_path=output_path, attack_factor=attack_factor) |
| elif mode == 'targeted': |
| |
| attacker.create_targeted_pdf(text=clean_text, target_text=target_text, output_path=output_path) |
| else: |
| return "", f"Unknown mode: {mode}" |
|
|
| except Exception as e: |
| |
| return "", f"Error extracting text: {e}", f"Error: {e}" |
|
|
| |
| extracted = _extract_text_from_pdf(output_path) |
|
|
| return output_path, extracted, "\n".join(status_lines) |
|
|
|
|
| def build_demo(): |
| """Construct and return the Gradio Blocks demo""" |
| with gr.Blocks(theme=theme) as demo: |
| gr.Markdown("# PDF Humanizer: Attack demo\nGenerate PDFs that look normal but extract differently when copied") |
|
|
| with gr.Row(): |
| txt = gr.Textbox(lines=8, label="Input text", value="Enter or paste text here...") |
| with gr.Column(): |
| mode = gr.Radio(choices=['normal', 'attacked', 'targeted'], value='attacked', label='Mode') |
| attack_factor = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, value=0.7, label='Attack factor (attacked mode)') |
| target_text = gr.Textbox(lines=2, label='Target text (targeted mode)') |
| generate = gr.Button('Generate PDF') |
| |
| |
| font_choice = gr.Dropdown(choices=['auto', 'DejaVu Serif', 'Liberation Serif', 'FreeSerif', 'Arial', 'Helvetica', 'Times New Roman', 'Roboto', 'Courier', 'Custom'], value='auto', label='Font') |
| upload_font = gr.File(label='Upload TTF/OTF (optional)', file_count='single') |
| wrap_on_words = gr.Checkbox(label='Wrap on words', value=True) |
|
|
| download_file = gr.File(label='Download generated PDF') |
| extracted_preview = gr.Textbox(lines=8, label='Extracted text preview') |
| status_box = gr.Textbox(lines=4, label='Status') |
|
|
| def _on_generate(text, mode, attack_factor, target_text, font_choice, upload_font, wrap_on_words): |
| path, extracted, status = generate_pdf(text=text, mode=mode, attack_factor=attack_factor, target_text=target_text, font_choice=font_choice, uploaded_font=upload_font, wrap_on_words=wrap_on_words) |
| if not path: |
| |
| return None, extracted, status |
| return path, extracted, status |
|
|
| generate.click(fn=_on_generate, inputs=[txt, mode, attack_factor, target_text, font_choice, upload_font, wrap_on_words], outputs=[download_file, extracted_preview, status_box]) |
|
|
| return demo |
|
|
|
|
| if __name__ == '__main__': |
| app = build_demo() |
| app.launch(server_name='0.0.0.0', server_port=7860) |
|
|