Spaces:
Running
Running
Upload 8 files
Browse files- index.html +27 -2
- main.py +12 -1
- phase0102_chunker_aggregator_2.py +5 -5
index.html
CHANGED
|
@@ -40,8 +40,15 @@
|
|
| 40 |
</div>
|
| 41 |
|
| 42 |
<button onclick="uploadFile()" style="margin-top:10px; width:100%;">Process Jungian Tree</button>
|
|
|
|
| 43 |
<div id="status" style="font-size: 0.8em; margin-top: 10px; color: var(--accent);">Status: Idle</div>
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
<div id="tree-container"></div>
|
| 46 |
|
| 47 |
</div>
|
|
@@ -56,13 +63,19 @@ async function uploadFile() {
|
|
| 56 |
const isWhole = document.querySelector('input[name="mode"]:checked').value === 'whole';
|
| 57 |
const status = document.getElementById('status');
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
if (fileInput.files.length === 0) return alert("Please select a PDF");
|
| 60 |
|
| 61 |
const formData = new FormData();
|
| 62 |
formData.append('file', fileInput.files[0]);
|
| 63 |
formData.append('whole', isWhole);
|
| 64 |
-
formData.append('start',
|
| 65 |
-
formData.append('end',
|
|
|
|
|
|
|
| 66 |
|
| 67 |
status.innerText = "Status: Uploading...";
|
| 68 |
await fetch('/upload', { method: 'POST', body: formData });
|
|
@@ -71,10 +84,18 @@ async function uploadFile() {
|
|
| 71 |
listenToStream();
|
| 72 |
}
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
function listenToStream() {
|
| 75 |
const eventSource = new EventSource('/stream');
|
| 76 |
const container = document.getElementById('tree-container');
|
| 77 |
const status = document.getElementById('status');
|
|
|
|
| 78 |
container.innerHTML = "";
|
| 79 |
|
| 80 |
eventSource.onmessage = (event) => {
|
|
@@ -82,6 +103,8 @@ function listenToStream() {
|
|
| 82 |
|
| 83 |
if (data.type === 'done') {
|
| 84 |
status.innerText = "✅ EXTRACTION FINISHED!";
|
|
|
|
|
|
|
| 85 |
eventSource.close();
|
| 86 |
return;
|
| 87 |
}
|
|
@@ -90,11 +113,13 @@ function listenToStream() {
|
|
| 90 |
const node = document.createElement('div');
|
| 91 |
node.className = data.type === 'summary' ? "tree-node summary-block" : "tree-node leaf-node";
|
| 92 |
node.innerHTML = (data.type === 'summary' ? "⭐ " : "∟ ") + (data.name || data.filename);
|
|
|
|
| 93 |
node.onclick = () => view(data.name || data.filename, data.content);
|
| 94 |
container.appendChild(node);
|
| 95 |
status.innerText = `Status: Created ${data.type}...`;
|
| 96 |
};
|
| 97 |
}
|
|
|
|
| 98 |
</script>
|
| 99 |
|
| 100 |
|
|
|
|
| 40 |
</div>
|
| 41 |
|
| 42 |
<button onclick="uploadFile()" style="margin-top:10px; width:100%;">Process Jungian Tree</button>
|
| 43 |
+
|
| 44 |
<div id="status" style="font-size: 0.8em; margin-top: 10px; color: var(--accent);">Status: Idle</div>
|
| 45 |
|
| 46 |
+
<!-- Download Button -->
|
| 47 |
+
<button id="downloadBtn" style="display:none; margin-top:10px; background:#2aa198; color:white; border:none; padding:10px; cursor:pointer; width:100%;">
|
| 48 |
+
📥 Download Knowledge Tree
|
| 49 |
+
</button>
|
| 50 |
+
</div>
|
| 51 |
+
|
| 52 |
<div id="tree-container"></div>
|
| 53 |
|
| 54 |
</div>
|
|
|
|
| 63 |
const isWhole = document.querySelector('input[name="mode"]:checked').value === 'whole';
|
| 64 |
const status = document.getElementById('status');
|
| 65 |
|
| 66 |
+
// Explicitly grab the values from the inputs
|
| 67 |
+
const startVal = document.getElementById('startP').value;
|
| 68 |
+
const endVal = document.getElementById('endP').value;
|
| 69 |
+
|
| 70 |
if (fileInput.files.length === 0) return alert("Please select a PDF");
|
| 71 |
|
| 72 |
const formData = new FormData();
|
| 73 |
formData.append('file', fileInput.files[0]);
|
| 74 |
formData.append('whole', isWhole);
|
| 75 |
+
formData.append('start', startVal || 20); // Fallback to 20 if empty
|
| 76 |
+
formData.append('end', endVal || 30); // Fallback to 30 if empty
|
| 77 |
+
|
| 78 |
+
console.log("Sending Range:", startVal, "to", endVal); // Check browser console
|
| 79 |
|
| 80 |
status.innerText = "Status: Uploading...";
|
| 81 |
await fetch('/upload', { method: 'POST', body: formData });
|
|
|
|
| 84 |
listenToStream();
|
| 85 |
}
|
| 86 |
|
| 87 |
+
// --- VIEW function ---
|
| 88 |
+
function view(title, text) {
|
| 89 |
+
const display = document.getElementById('content-display');
|
| 90 |
+
display.innerHTML = `<h2>${title}</h2><div style="white-space: pre-wrap;">${text}</div>`;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
// --- listenToStream Function --
|
| 94 |
function listenToStream() {
|
| 95 |
const eventSource = new EventSource('/stream');
|
| 96 |
const container = document.getElementById('tree-container');
|
| 97 |
const status = document.getElementById('status');
|
| 98 |
+
const dlBtn = document.getElementById('downloadBtn');
|
| 99 |
container.innerHTML = "";
|
| 100 |
|
| 101 |
eventSource.onmessage = (event) => {
|
|
|
|
| 103 |
|
| 104 |
if (data.type === 'done') {
|
| 105 |
status.innerText = "✅ EXTRACTION FINISHED!";
|
| 106 |
+
dlBtn.style.display = "block"; // Show the button
|
| 107 |
+
dlBtn.onclick = () => window.location.href = '/download-latest';
|
| 108 |
eventSource.close();
|
| 109 |
return;
|
| 110 |
}
|
|
|
|
| 113 |
const node = document.createElement('div');
|
| 114 |
node.className = data.type === 'summary' ? "tree-node summary-block" : "tree-node leaf-node";
|
| 115 |
node.innerHTML = (data.type === 'summary' ? "⭐ " : "∟ ") + (data.name || data.filename);
|
| 116 |
+
// Pass the content to the view function when clicked
|
| 117 |
node.onclick = () => view(data.name || data.filename, data.content);
|
| 118 |
container.appendChild(node);
|
| 119 |
status.innerText = `Status: Created ${data.type}...`;
|
| 120 |
};
|
| 121 |
}
|
| 122 |
+
|
| 123 |
</script>
|
| 124 |
|
| 125 |
|
main.py
CHANGED
|
@@ -6,10 +6,12 @@ import os
|
|
| 6 |
import asyncio
|
| 7 |
import json
|
| 8 |
from fastapi import FastAPI, UploadFile, File, Form, BackgroundTasks
|
| 9 |
-
from fastapi.responses import HTMLResponse, StreamingResponse
|
| 10 |
from fastapi.staticfiles import StaticFiles
|
| 11 |
from fastapi import Form # Add Form to your imports
|
| 12 |
import shutil
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# Import chunking logic from the existing combined script
|
| 15 |
# Note: Ensure script functions are wrap-able or callable
|
|
@@ -82,6 +84,15 @@ async def handle_upload(
|
|
| 82 |
))
|
| 83 |
return {"status": "Processing started"}
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
|
| 87 |
if __name__ == "__main__":
|
|
|
|
| 6 |
import asyncio
|
| 7 |
import json
|
| 8 |
from fastapi import FastAPI, UploadFile, File, Form, BackgroundTasks
|
| 9 |
+
from fastapi.responses import HTMLResponse, StreamingResponse, FileResponse # Added FileResponse
|
| 10 |
from fastapi.staticfiles import StaticFiles
|
| 11 |
from fastapi import Form # Add Form to your imports
|
| 12 |
import shutil
|
| 13 |
+
import glob
|
| 14 |
+
|
| 15 |
|
| 16 |
# Import chunking logic from the existing combined script
|
| 17 |
# Note: Ensure script functions are wrap-able or callable
|
|
|
|
| 84 |
))
|
| 85 |
return {"status": "Processing started"}
|
| 86 |
|
| 87 |
+
@app.get("/download-latest")
|
| 88 |
+
async def download_latest():
|
| 89 |
+
# Look for files matching our pattern
|
| 90 |
+
files = glob.glob("knowledge_tree_*.json")
|
| 91 |
+
if not files:
|
| 92 |
+
return {"error": "No JSON files found yet. Finish an extraction first."}
|
| 93 |
+
# Sort by creation time to get the newest one
|
| 94 |
+
latest_file = max(files, key=os.path.getctime)
|
| 95 |
+
return FileResponse(path=latest_file, filename=os.path.basename(latest_file))
|
| 96 |
|
| 97 |
|
| 98 |
if __name__ == "__main__":
|
phase0102_chunker_aggregator_2.py
CHANGED
|
@@ -99,7 +99,7 @@ async def call_groq_json(system_prompt, user_content):
|
|
| 99 |
)
|
| 100 |
return json.loads(completion.choices[0].message.content)
|
| 101 |
"""
|
| 102 |
-
|
| 103 |
#async def run_chunking_process(pdf_path, queue=None, whole=False, start_p=20, end_p=30):
|
| 104 |
async def run_chunking_process(pdf_path, queue=None, whole=WHOLE, start_p=START_PAGE, end_p=END_PAGE):
|
| 105 |
"""
|
|
@@ -170,7 +170,7 @@ async def run_chunking_process(pdf_path, queue=None, whole=WHOLE, start_p=START_
|
|
| 170 |
# PHASE II: AGGREGATION
|
| 171 |
if len(temp_group) >= CHUNK_GROUP_SIZE:
|
| 172 |
from phase0102_chunker_aggregator_2 import generate_summary_block # Ensure helper is available
|
| 173 |
-
summary_res = generate_summary_block(temp_group)
|
| 174 |
|
| 175 |
summary_node = {
|
| 176 |
"type": "summary",
|
|
@@ -214,7 +214,7 @@ async def run_chunking_process(pdf_path, queue=None, whole=WHOLE, start_p=START_
|
|
| 214 |
await queue.put("DONE")
|
| 215 |
|
| 216 |
# Helper for summary
|
| 217 |
-
def generate_summary_block(chunks):
|
| 218 |
combined = "\n\n".join([f"{c['filename']}: {c['content']}" for c in chunks])
|
| 219 |
-
prompt = "Synthesize these Jungian chunks into a
|
| 220 |
-
return call_groq_json(prompt, combined)
|
|
|
|
| 99 |
)
|
| 100 |
return json.loads(completion.choices[0].message.content)
|
| 101 |
"""
|
| 102 |
+
|
| 103 |
#async def run_chunking_process(pdf_path, queue=None, whole=False, start_p=20, end_p=30):
|
| 104 |
async def run_chunking_process(pdf_path, queue=None, whole=WHOLE, start_p=START_PAGE, end_p=END_PAGE):
|
| 105 |
"""
|
|
|
|
| 170 |
# PHASE II: AGGREGATION
|
| 171 |
if len(temp_group) >= CHUNK_GROUP_SIZE:
|
| 172 |
from phase0102_chunker_aggregator_2 import generate_summary_block # Ensure helper is available
|
| 173 |
+
summary_res = await generate_summary_block(temp_group)
|
| 174 |
|
| 175 |
summary_node = {
|
| 176 |
"type": "summary",
|
|
|
|
| 214 |
await queue.put("DONE")
|
| 215 |
|
| 216 |
# Helper for summary
|
| 217 |
+
async def generate_summary_block(chunks):
|
| 218 |
combined = "\n\n".join([f"{c['filename']}: {c['content']}" for c in chunks])
|
| 219 |
+
prompt = "Synthesize these Jungian chunks into a single high-density Level-1 summary. JSON keys: 'summary_name', 'synthesis'."
|
| 220 |
+
return await call_groq_json(prompt, combined)
|