Spaces:
Running
Running
Upload phase0102_chunker_aggregator_2.py
Browse files
phase0102_chunker_aggregator_2.py
CHANGED
|
@@ -280,10 +280,33 @@ async def run_chunking_process(pdf_path, queue=None, whole=WHOLE, start_p=START_
|
|
| 280 |
|
| 281 |
if queue: await queue.put("DONE")
|
| 282 |
|
| 283 |
-
|
| 284 |
# Helper for summary
|
| 285 |
async def generate_summary_block(chunks):
|
| 286 |
combined = "\n\n".join([f"{c['filename']}: {c['content']}" for c in chunks])
|
| 287 |
prompt = "Synthesize these Jungian chunks into a single high-density Level-1 summary. JSON keys: 'summary_name', 'synthesis'."
|
| 288 |
|
| 289 |
return await call_groq_json(prompt, combined)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
|
| 281 |
if queue: await queue.put("DONE")
|
| 282 |
|
| 283 |
+
"""
|
| 284 |
# Helper for summary
|
| 285 |
async def generate_summary_block(chunks):
|
| 286 |
combined = "\n\n".join([f"{c['filename']}: {c['content']}" for c in chunks])
|
| 287 |
prompt = "Synthesize these Jungian chunks into a single high-density Level-1 summary. JSON keys: 'summary_name', 'synthesis'."
|
| 288 |
|
| 289 |
return await call_groq_json(prompt, combined)
|
| 290 |
+
"""
|
| 291 |
+
|
| 292 |
+
# Add 'label' as a second parameter with a default value
|
| 293 |
+
async def generate_summary_block(chunks_to_summarize, label="Level-1 Cluster"):
|
| 294 |
+
combined_content = "\n\n".join([f"Source: {c['name']}\n{c['content']}" for c in chunks_to_summarize])
|
| 295 |
+
|
| 296 |
+
# We use the 'label' in the prompt to help the LLM understand the scale
|
| 297 |
+
system_prompt = f"""
|
| 298 |
+
You are creating a '{label}' for a Knowledge Tree of Carl Jung's work.
|
| 299 |
+
|
| 300 |
+
TASK:
|
| 301 |
+
Synthesize the provided content into a single, high-density summary.
|
| 302 |
+
- DO NOT say 'This section covers...'.
|
| 303 |
+
- DO say 'Psychological concepts in this section include...'
|
| 304 |
+
- Maintain the information density of the original inputs.
|
| 305 |
+
|
| 306 |
+
RESPONSE FORMAT (JSON):
|
| 307 |
+
{{
|
| 308 |
+
"summary_name": "thematic_cluster_name",
|
| 309 |
+
"synthesis": "the dense summary text"
|
| 310 |
+
}}
|
| 311 |
+
"""
|
| 312 |
+
return await call_groq_json(system_prompt, combined_content)
|