mobenta commited on
Commit
49c4d8f
·
verified ·
1 Parent(s): 0f4597d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -0
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr
3
+ from docx2pdf import convert
4
+ import os
5
+ import zipfile
6
+ import shutil
7
+ from pathlib import Path
8
+
9
+ # Set up temporary directories
10
+ TEMP_DIR = Path("./temp_files")
11
+ TEMP_INPUT_DIR = TEMP_DIR / "input"
12
+ TEMP_OUTPUT_DIR = TEMP_DIR / "output"
13
+
14
+ # Ensure directories exist
15
+ for d in [TEMP_INPUT_DIR, TEMP_OUTPUT_DIR]:
16
+ d.mkdir(parents=True, exist_ok=True)
17
+
18
+ def convert_docs_to_pdf(doc_files):
19
+ """
20
+ Takes a list of uploaded docx files, converts them to PDF,
21
+ and zips the results for download.
22
+ """
23
+ if not doc_files:
24
+ return None, "Please upload one or more .docx or .doc files."
25
+
26
+ # 1. Clean up and prepare directories for a new conversion run
27
+ try:
28
+ if TEMP_INPUT_DIR.exists(): shutil.rmtree(TEMP_INPUT_DIR)
29
+ if TEMP_OUTPUT_DIR.exists(): shutil.rmtree(TEMP_OUTPUT_DIR)
30
+ TEMP_INPUT_DIR.mkdir(parents=True)
31
+ TEMP_OUTPUT_DIR.mkdir(parents=True)
32
+ except Exception as e:
33
+ return None, f"Error preparing directories: {e}"
34
+
35
+ success_count = 0
36
+
37
+ # 2. Convert each file
38
+ for file_obj in doc_files:
39
+ original_filepath = file_obj.name
40
+ filename = Path(original_filepath).name
41
+
42
+ # Determine the target output path for the PDF
43
+ output_filename = filename.rsplit('.', 1)[0] + '.pdf'
44
+ output_filepath = TEMP_OUTPUT_DIR / output_filename
45
+
46
+ try:
47
+ # Copy file to a temp input dir, which can be useful if docx2pdf
48
+ # has issues with temporary Gradio paths on some systems.
49
+ input_file_copy = TEMP_INPUT_DIR / filename
50
+ shutil.copy(original_filepath, input_file_copy)
51
+
52
+ # Perform the conversion
53
+ # The 'output_file' parameter specifies the single output PDF path.
54
+ # When converting a single file, this works.
55
+ # Note: docx2pdf handles doc and docx automatically.
56
+ convert(input_file_copy, output_filepath)
57
+ success_count += 1
58
+
59
+ except Exception as e:
60
+ print(f"Error converting {filename}: {e}")
61
+ # Optionally, you could write a placeholder PDF to inform the user of the failure
62
+
63
+ if success_count == 0:
64
+ return None, "No files were converted successfully. Ensure they are valid .docx or .doc files."
65
+
66
+ # 3. Zip the results
67
+ zip_filename = TEMP_DIR / "converted_pdfs.zip"
68
+
69
+ # Check if the zip file already exists and remove it
70
+ if zip_filename.exists():
71
+ os.remove(zip_filename)
72
+
73
+ with zipfile.ZipFile(zip_filename, 'w') as zipf:
74
+ for file in TEMP_OUTPUT_DIR.iterdir():
75
+ # Add files from the output directory to the zip file
76
+ zipf.write(file, arcname=file.name)
77
+
78
+ # Return the path to the zip file for Gradio to offer as a download
79
+ return str(zip_filename), f"Successfully converted {success_count} files and zipped them."
80
+
81
+ # --- Gradio Interface Definition ---
82
+ # Use gr.Blocks for a more flexible layout
83
+ with gr.Blocks(title="Multi DOC/DOCX to PDF Converter") as demo:
84
+ gr.Markdown(
85
+ """
86
+ # Multi DOC/DOCX to PDF Converter 📄➡️📜
87
+ Upload multiple Microsoft Word files (.doc or .docx) and get them all converted to PDF in a single downloadable ZIP file.
88
+
89
+ **Note:** This app relies on the `docx2pdf` library and LibreOffice on the backend for accurate formatting preservation.
90
+ """
91
+ )
92
+
93
+ with gr.Row():
94
+ # Input component: File component set to accept multiple files
95
+ file_input = gr.File(
96
+ file_count="multiple",
97
+ label="Upload Word Files (.docx or .doc)",
98
+ file_types=[".doc", ".docx"]
99
+ )
100
+
101
+ # Output components
102
+ with gr.Column():
103
+ download_zip = gr.File(label="Download Converted PDFs (ZIP)", visible=False)
104
+ status_message = gr.Textbox(label="Status", value="Upload your files and click Convert.", interactive=False)
105
+
106
+ convert_button = gr.Button("Convert to PDF", variant="primary")
107
+
108
+ # Connect the button click to the conversion function
109
+ convert_button.click(
110
+ fn=convert_docs_to_pdf,
111
+ inputs=[file_input],
112
+ outputs=[download_zip, status_message],
113
+ # Show the download component only after successful conversion
114
+ postprocess=[lambda x: gr.update(visible=True)]
115
+ )
116
+
117
+ if __name__ == "__main__":
118
+ demo.launch()