prashantmatlani commited on
Commit
41dcb8c
·
verified ·
1 Parent(s): 290ec63

Upload 8 files

Browse files
Files changed (2) hide show
  1. main.py +5 -8
  2. phase0102_chunker_aggregator_2.py +6 -1
main.py CHANGED
@@ -5,7 +5,7 @@
5
  import os
6
  import asyncio
7
  import json
8
- from fastapi import FastAPI, UploadFile, File, BackgroundTasks
9
  from fastapi.responses import HTMLResponse, StreamingResponse
10
  from fastapi.staticfiles import StaticFiles
11
  from fastapi import Form # Add Form to your imports
@@ -56,12 +56,10 @@ async def stream_updates():
56
  return StreamingResponse(event_generator(), media_type="text/event-stream")
57
 
58
 
59
- from fastapi import Form
60
-
61
  @app.post("/upload")
62
  async def handle_upload(
63
  file: UploadFile = File(...),
64
- whole: str = Form("false"), # Form data often comes as strings
65
  start: str = Form("20"),
66
  end: str = Form("30")
67
  ):
@@ -69,13 +67,12 @@ async def handle_upload(
69
  with open(temp_path, "wb") as buffer:
70
  shutil.copyfileobj(file.file, buffer)
71
 
72
- # Convert strings to proper Python types
73
  is_whole = whole.lower() == "true"
74
  s_page = int(start)
75
  e_page = int(end)
76
 
77
- print(f"DEBUG: whole={is_whole}, start={s_page}, end={e_page}")
78
-
79
  asyncio.create_task(run_chunking_process(
80
  temp_path,
81
  progress_queue,
@@ -83,7 +80,7 @@ async def handle_upload(
83
  start_p=s_page,
84
  end_p=e_page
85
  ))
86
- return {"status": "Processing"}
87
 
88
 
89
 
 
5
  import os
6
  import asyncio
7
  import json
8
+ from fastapi import FastAPI, UploadFile, File, Form, BackgroundTasks
9
  from fastapi.responses import HTMLResponse, StreamingResponse
10
  from fastapi.staticfiles import StaticFiles
11
  from fastapi import Form # Add Form to your imports
 
56
  return StreamingResponse(event_generator(), media_type="text/event-stream")
57
 
58
 
 
 
59
  @app.post("/upload")
60
  async def handle_upload(
61
  file: UploadFile = File(...),
62
+ whole: str = Form("false"),
63
  start: str = Form("20"),
64
  end: str = Form("30")
65
  ):
 
67
  with open(temp_path, "wb") as buffer:
68
  shutil.copyfileobj(file.file, buffer)
69
 
70
+ # Fix: Convert strings to proper types
71
  is_whole = whole.lower() == "true"
72
  s_page = int(start)
73
  e_page = int(end)
74
 
75
+ # Start the task with explicit parameters
 
76
  asyncio.create_task(run_chunking_process(
77
  temp_path,
78
  progress_queue,
 
80
  start_p=s_page,
81
  end_p=e_page
82
  ))
83
+ return {"status": "Processing started"}
84
 
85
 
86
 
phase0102_chunker_aggregator_2.py CHANGED
@@ -123,7 +123,8 @@ async def run_chunking_process(pdf_path, queue=None, whole=WHOLE, start_p=START_
123
  prompt = f"Context: {context_buffer['latest_summary']} | Prev: {context_buffer['predecessor'][:200]}...\nExtract a self-sufficient Jungian chunk. JSON keys: 'break_text', 'rewritten_text', 'filename'."
124
 
125
  try:
126
- result = call_groq_json(prompt, lookahead)
 
127
 
128
  # Semantic Jump Logic
129
  break_text = result.get('break_text', "")
@@ -143,6 +144,8 @@ async def run_chunking_process(pdf_path, queue=None, whole=WHOLE, start_p=START_
143
  await queue.put(new_chunk)
144
 
145
  context_buffer["predecessor"] = new_chunk["content"]
 
 
146
  cursor += relative_break
147
 
148
  # PHASE II: AGGREGATION
@@ -180,6 +183,8 @@ async def run_chunking_process(pdf_path, queue=None, whole=WHOLE, start_p=START_
180
 
181
  continue
182
 
 
 
183
  # Final Save
184
  timestamp = datetime.datetime.now().strftime("%m%d%Y_%H%M")
185
  final_data = {"leaves": all_leaves, "summaries": summary_blocks}
 
123
  prompt = f"Context: {context_buffer['latest_summary']} | Prev: {context_buffer['predecessor'][:200]}...\nExtract a self-sufficient Jungian chunk. JSON keys: 'break_text', 'rewritten_text', 'filename'."
124
 
125
  try:
126
+ # Note: Ensure call_groq_json is an async function or run in executor
127
+ result = await call_groq_json(prompt, lookahead)
128
 
129
  # Semantic Jump Logic
130
  break_text = result.get('break_text', "")
 
144
  await queue.put(new_chunk)
145
 
146
  context_buffer["predecessor"] = new_chunk["content"]
147
+ # Throttling to stay under 6000 TPM limit
148
+ await asyncio.sleep(7)
149
  cursor += relative_break
150
 
151
  # PHASE II: AGGREGATION
 
183
 
184
  continue
185
 
186
+ if queue: await queue.put("DONE")
187
+
188
  # Final Save
189
  timestamp = datetime.datetime.now().strftime("%m%d%Y_%H%M")
190
  final_data = {"leaves": all_leaves, "summaries": summary_blocks}