Spaces:
Running
Running
Upload 8 files
Browse files- main.py +5 -8
- phase0102_chunker_aggregator_2.py +6 -1
main.py
CHANGED
|
@@ -5,7 +5,7 @@
|
|
| 5 |
import os
|
| 6 |
import asyncio
|
| 7 |
import json
|
| 8 |
-
from fastapi import FastAPI, UploadFile, File, BackgroundTasks
|
| 9 |
from fastapi.responses import HTMLResponse, StreamingResponse
|
| 10 |
from fastapi.staticfiles import StaticFiles
|
| 11 |
from fastapi import Form # Add Form to your imports
|
|
@@ -56,12 +56,10 @@ async def stream_updates():
|
|
| 56 |
return StreamingResponse(event_generator(), media_type="text/event-stream")
|
| 57 |
|
| 58 |
|
| 59 |
-
from fastapi import Form
|
| 60 |
-
|
| 61 |
@app.post("/upload")
|
| 62 |
async def handle_upload(
|
| 63 |
file: UploadFile = File(...),
|
| 64 |
-
whole: str = Form("false"),
|
| 65 |
start: str = Form("20"),
|
| 66 |
end: str = Form("30")
|
| 67 |
):
|
|
@@ -69,13 +67,12 @@ async def handle_upload(
|
|
| 69 |
with open(temp_path, "wb") as buffer:
|
| 70 |
shutil.copyfileobj(file.file, buffer)
|
| 71 |
|
| 72 |
-
# Convert strings to proper
|
| 73 |
is_whole = whole.lower() == "true"
|
| 74 |
s_page = int(start)
|
| 75 |
e_page = int(end)
|
| 76 |
|
| 77 |
-
|
| 78 |
-
|
| 79 |
asyncio.create_task(run_chunking_process(
|
| 80 |
temp_path,
|
| 81 |
progress_queue,
|
|
@@ -83,7 +80,7 @@ async def handle_upload(
|
|
| 83 |
start_p=s_page,
|
| 84 |
end_p=e_page
|
| 85 |
))
|
| 86 |
-
return {"status": "Processing"}
|
| 87 |
|
| 88 |
|
| 89 |
|
|
|
|
| 5 |
import os
|
| 6 |
import asyncio
|
| 7 |
import json
|
| 8 |
+
from fastapi import FastAPI, UploadFile, File, Form, BackgroundTasks
|
| 9 |
from fastapi.responses import HTMLResponse, StreamingResponse
|
| 10 |
from fastapi.staticfiles import StaticFiles
|
| 11 |
from fastapi import Form # Add Form to your imports
|
|
|
|
| 56 |
return StreamingResponse(event_generator(), media_type="text/event-stream")
|
| 57 |
|
| 58 |
|
|
|
|
|
|
|
| 59 |
@app.post("/upload")
|
| 60 |
async def handle_upload(
|
| 61 |
file: UploadFile = File(...),
|
| 62 |
+
whole: str = Form("false"),
|
| 63 |
start: str = Form("20"),
|
| 64 |
end: str = Form("30")
|
| 65 |
):
|
|
|
|
| 67 |
with open(temp_path, "wb") as buffer:
|
| 68 |
shutil.copyfileobj(file.file, buffer)
|
| 69 |
|
| 70 |
+
# Fix: Convert strings to proper types
|
| 71 |
is_whole = whole.lower() == "true"
|
| 72 |
s_page = int(start)
|
| 73 |
e_page = int(end)
|
| 74 |
|
| 75 |
+
# Start the task with explicit parameters
|
|
|
|
| 76 |
asyncio.create_task(run_chunking_process(
|
| 77 |
temp_path,
|
| 78 |
progress_queue,
|
|
|
|
| 80 |
start_p=s_page,
|
| 81 |
end_p=e_page
|
| 82 |
))
|
| 83 |
+
return {"status": "Processing started"}
|
| 84 |
|
| 85 |
|
| 86 |
|
phase0102_chunker_aggregator_2.py
CHANGED
|
@@ -123,7 +123,8 @@ async def run_chunking_process(pdf_path, queue=None, whole=WHOLE, start_p=START_
|
|
| 123 |
prompt = f"Context: {context_buffer['latest_summary']} | Prev: {context_buffer['predecessor'][:200]}...\nExtract a self-sufficient Jungian chunk. JSON keys: 'break_text', 'rewritten_text', 'filename'."
|
| 124 |
|
| 125 |
try:
|
| 126 |
-
|
|
|
|
| 127 |
|
| 128 |
# Semantic Jump Logic
|
| 129 |
break_text = result.get('break_text', "")
|
|
@@ -143,6 +144,8 @@ async def run_chunking_process(pdf_path, queue=None, whole=WHOLE, start_p=START_
|
|
| 143 |
await queue.put(new_chunk)
|
| 144 |
|
| 145 |
context_buffer["predecessor"] = new_chunk["content"]
|
|
|
|
|
|
|
| 146 |
cursor += relative_break
|
| 147 |
|
| 148 |
# PHASE II: AGGREGATION
|
|
@@ -180,6 +183,8 @@ async def run_chunking_process(pdf_path, queue=None, whole=WHOLE, start_p=START_
|
|
| 180 |
|
| 181 |
continue
|
| 182 |
|
|
|
|
|
|
|
| 183 |
# Final Save
|
| 184 |
timestamp = datetime.datetime.now().strftime("%m%d%Y_%H%M")
|
| 185 |
final_data = {"leaves": all_leaves, "summaries": summary_blocks}
|
|
|
|
| 123 |
prompt = f"Context: {context_buffer['latest_summary']} | Prev: {context_buffer['predecessor'][:200]}...\nExtract a self-sufficient Jungian chunk. JSON keys: 'break_text', 'rewritten_text', 'filename'."
|
| 124 |
|
| 125 |
try:
|
| 126 |
+
# Note: Ensure call_groq_json is an async function or run in executor
|
| 127 |
+
result = await call_groq_json(prompt, lookahead)
|
| 128 |
|
| 129 |
# Semantic Jump Logic
|
| 130 |
break_text = result.get('break_text', "")
|
|
|
|
| 144 |
await queue.put(new_chunk)
|
| 145 |
|
| 146 |
context_buffer["predecessor"] = new_chunk["content"]
|
| 147 |
+
# Throttling to stay under 6000 TPM limit
|
| 148 |
+
await asyncio.sleep(7)
|
| 149 |
cursor += relative_break
|
| 150 |
|
| 151 |
# PHASE II: AGGREGATION
|
|
|
|
| 183 |
|
| 184 |
continue
|
| 185 |
|
| 186 |
+
if queue: await queue.put("DONE")
|
| 187 |
+
|
| 188 |
# Final Save
|
| 189 |
timestamp = datetime.datetime.now().strftime("%m%d%Y_%H%M")
|
| 190 |
final_data = {"leaves": all_leaves, "summaries": summary_blocks}
|