ChrisSacrumCor commited on
Commit
ea94616
·
verified ·
1 Parent(s): 5f59087

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PyPDF2
2
+ import gradio as gr
3
+
4
+ def split_pdf(file):
5
+ # Load the uploaded PDF file
6
+ pdf_reader = PyPDF2.PdfReader(file.name)
7
+
8
+ # Define the batch size
9
+ batch_size = 100
10
+
11
+ # Calculate the number of batches
12
+ num_batches = len(pdf_reader.pages) // batch_size + 1
13
+
14
+ # Chunk the PDF into smaller files
15
+ chunk_files = []
16
+ for b in range(num_batches):
17
+ writer = PyPDF2.PdfWriter()
18
+
19
+ # Get the start and end page numbers for this batch
20
+ start_page = b * batch_size
21
+ end_page = min((b+1) * batch_size, len(pdf_reader.pages))
22
+
23
+ # Add pages in this batch to the writer
24
+ for i in range(start_page, end_page):
25
+ writer.add_page(pdf_reader.pages[i])
26
+
27
+ # Save the batch to a separate PDF file
28
+ batch_filename = f'chunk_{b+1}.pdf'
29
+ with open(batch_filename, 'wb') as output_file:
30
+ writer.write(output_file)
31
+
32
+ chunk_files.append(batch_filename)
33
+
34
+ return chunk_files
35
+
36
+ # Create the Gradio interface
37
+ demo = gr.Interface(
38
+ fn=split_pdf,
39
+ inputs=gr.File(label="Upload PDF"),
40
+ outputs=gr.File(label="Chunked PDFs"),
41
+ title="PDF Splitter",
42
+ description="Upload a PDF file to split it into chunks of 100 pages each."
43
+ )
44
+
45
+ # Launch the Gradio app
46
+ demo.launch()