prashantmatlani commited on
Commit
d537c1b
·
verified ·
1 Parent(s): fa0ef29

Upload 8 files

Browse files
Files changed (3) hide show
  1. index.html +27 -2
  2. main.py +12 -1
  3. phase0102_chunker_aggregator_2.py +5 -5
index.html CHANGED
@@ -40,8 +40,15 @@
40
  </div>
41
 
42
  <button onclick="uploadFile()" style="margin-top:10px; width:100%;">Process Jungian Tree</button>
 
43
  <div id="status" style="font-size: 0.8em; margin-top: 10px; color: var(--accent);">Status: Idle</div>
44
 
 
 
 
 
 
 
45
  <div id="tree-container"></div>
46
 
47
  </div>
@@ -56,13 +63,19 @@ async function uploadFile() {
56
  const isWhole = document.querySelector('input[name="mode"]:checked').value === 'whole';
57
  const status = document.getElementById('status');
58
 
 
 
 
 
59
  if (fileInput.files.length === 0) return alert("Please select a PDF");
60
 
61
  const formData = new FormData();
62
  formData.append('file', fileInput.files[0]);
63
  formData.append('whole', isWhole);
64
- formData.append('start', document.getElementById('startP').value || 20);
65
- formData.append('end', document.getElementById('endP').value || 30);
 
 
66
 
67
  status.innerText = "Status: Uploading...";
68
  await fetch('/upload', { method: 'POST', body: formData });
@@ -71,10 +84,18 @@ async function uploadFile() {
71
  listenToStream();
72
  }
73
 
 
 
 
 
 
 
 
74
  function listenToStream() {
75
  const eventSource = new EventSource('/stream');
76
  const container = document.getElementById('tree-container');
77
  const status = document.getElementById('status');
 
78
  container.innerHTML = "";
79
 
80
  eventSource.onmessage = (event) => {
@@ -82,6 +103,8 @@ function listenToStream() {
82
 
83
  if (data.type === 'done') {
84
  status.innerText = "✅ EXTRACTION FINISHED!";
 
 
85
  eventSource.close();
86
  return;
87
  }
@@ -90,11 +113,13 @@ function listenToStream() {
90
  const node = document.createElement('div');
91
  node.className = data.type === 'summary' ? "tree-node summary-block" : "tree-node leaf-node";
92
  node.innerHTML = (data.type === 'summary' ? "⭐ " : "∟ ") + (data.name || data.filename);
 
93
  node.onclick = () => view(data.name || data.filename, data.content);
94
  container.appendChild(node);
95
  status.innerText = `Status: Created ${data.type}...`;
96
  };
97
  }
 
98
  </script>
99
 
100
 
 
40
  </div>
41
 
42
  <button onclick="uploadFile()" style="margin-top:10px; width:100%;">Process Jungian Tree</button>
43
+
44
  <div id="status" style="font-size: 0.8em; margin-top: 10px; color: var(--accent);">Status: Idle</div>
45
 
46
+ <!-- Download Button -->
47
+ <button id="downloadBtn" style="display:none; margin-top:10px; background:#2aa198; color:white; border:none; padding:10px; cursor:pointer; width:100%;">
48
+ 📥 Download Knowledge Tree
49
+ </button>
50
+ </div>
51
+
52
  <div id="tree-container"></div>
53
 
54
  </div>
 
63
  const isWhole = document.querySelector('input[name="mode"]:checked').value === 'whole';
64
  const status = document.getElementById('status');
65
 
66
+ // Explicitly grab the values from the inputs
67
+ const startVal = document.getElementById('startP').value;
68
+ const endVal = document.getElementById('endP').value;
69
+
70
  if (fileInput.files.length === 0) return alert("Please select a PDF");
71
 
72
  const formData = new FormData();
73
  formData.append('file', fileInput.files[0]);
74
  formData.append('whole', isWhole);
75
+ formData.append('start', startVal || 20); // Fallback to 20 if empty
76
+ formData.append('end', endVal || 30); // Fallback to 30 if empty
77
+
78
+ console.log("Sending Range:", startVal, "to", endVal); // Check browser console
79
 
80
  status.innerText = "Status: Uploading...";
81
  await fetch('/upload', { method: 'POST', body: formData });
 
84
  listenToStream();
85
  }
86
 
87
+ // --- VIEW function ---
88
+ function view(title, text) {
89
+ const display = document.getElementById('content-display');
90
+ display.innerHTML = `<h2>${title}</h2><div style="white-space: pre-wrap;">${text}</div>`;
91
+ }
92
+
93
+ // --- listenToStream Function --
94
  function listenToStream() {
95
  const eventSource = new EventSource('/stream');
96
  const container = document.getElementById('tree-container');
97
  const status = document.getElementById('status');
98
+ const dlBtn = document.getElementById('downloadBtn');
99
  container.innerHTML = "";
100
 
101
  eventSource.onmessage = (event) => {
 
103
 
104
  if (data.type === 'done') {
105
  status.innerText = "✅ EXTRACTION FINISHED!";
106
+ dlBtn.style.display = "block"; // Show the button
107
+ dlBtn.onclick = () => window.location.href = '/download-latest';
108
  eventSource.close();
109
  return;
110
  }
 
113
  const node = document.createElement('div');
114
  node.className = data.type === 'summary' ? "tree-node summary-block" : "tree-node leaf-node";
115
  node.innerHTML = (data.type === 'summary' ? "⭐ " : "∟ ") + (data.name || data.filename);
116
+ // Pass the content to the view function when clicked
117
  node.onclick = () => view(data.name || data.filename, data.content);
118
  container.appendChild(node);
119
  status.innerText = `Status: Created ${data.type}...`;
120
  };
121
  }
122
+
123
  </script>
124
 
125
 
main.py CHANGED
@@ -6,10 +6,12 @@ import os
6
  import asyncio
7
  import json
8
  from fastapi import FastAPI, UploadFile, File, Form, BackgroundTasks
9
- from fastapi.responses import HTMLResponse, StreamingResponse
10
  from fastapi.staticfiles import StaticFiles
11
  from fastapi import Form # Add Form to your imports
12
  import shutil
 
 
13
 
14
  # Import chunking logic from the existing combined script
15
  # Note: Ensure script functions are wrap-able or callable
@@ -82,6 +84,15 @@ async def handle_upload(
82
  ))
83
  return {"status": "Processing started"}
84
 
 
 
 
 
 
 
 
 
 
85
 
86
 
87
  if __name__ == "__main__":
 
6
  import asyncio
7
  import json
8
  from fastapi import FastAPI, UploadFile, File, Form, BackgroundTasks
9
+ from fastapi.responses import HTMLResponse, StreamingResponse, FileResponse # Added FileResponse
10
  from fastapi.staticfiles import StaticFiles
11
  from fastapi import Form # Add Form to your imports
12
  import shutil
13
+ import glob
14
+
15
 
16
  # Import chunking logic from the existing combined script
17
  # Note: Ensure script functions are wrap-able or callable
 
84
  ))
85
  return {"status": "Processing started"}
86
 
87
+ @app.get("/download-latest")
88
+ async def download_latest():
89
+ # Look for files matching our pattern
90
+ files = glob.glob("knowledge_tree_*.json")
91
+ if not files:
92
+ return {"error": "No JSON files found yet. Finish an extraction first."}
93
+ # Sort by creation time to get the newest one
94
+ latest_file = max(files, key=os.path.getctime)
95
+ return FileResponse(path=latest_file, filename=os.path.basename(latest_file))
96
 
97
 
98
  if __name__ == "__main__":
phase0102_chunker_aggregator_2.py CHANGED
@@ -99,7 +99,7 @@ async def call_groq_json(system_prompt, user_content):
99
  )
100
  return json.loads(completion.choices[0].message.content)
101
  """
102
-
103
  #async def run_chunking_process(pdf_path, queue=None, whole=False, start_p=20, end_p=30):
104
  async def run_chunking_process(pdf_path, queue=None, whole=WHOLE, start_p=START_PAGE, end_p=END_PAGE):
105
  """
@@ -170,7 +170,7 @@ async def run_chunking_process(pdf_path, queue=None, whole=WHOLE, start_p=START_
170
  # PHASE II: AGGREGATION
171
  if len(temp_group) >= CHUNK_GROUP_SIZE:
172
  from phase0102_chunker_aggregator_2 import generate_summary_block # Ensure helper is available
173
- summary_res = generate_summary_block(temp_group)
174
 
175
  summary_node = {
176
  "type": "summary",
@@ -214,7 +214,7 @@ async def run_chunking_process(pdf_path, queue=None, whole=WHOLE, start_p=START_
214
  await queue.put("DONE")
215
 
216
  # Helper for summary
217
- def generate_summary_block(chunks):
218
  combined = "\n\n".join([f"{c['filename']}: {c['content']}" for c in chunks])
219
- prompt = "Synthesize these Jungian chunks into a dense Level-1 summary. JSON keys: 'summary_name', 'synthesis'."
220
- return call_groq_json(prompt, combined)
 
99
  )
100
  return json.loads(completion.choices[0].message.content)
101
  """
102
+
103
  #async def run_chunking_process(pdf_path, queue=None, whole=False, start_p=20, end_p=30):
104
  async def run_chunking_process(pdf_path, queue=None, whole=WHOLE, start_p=START_PAGE, end_p=END_PAGE):
105
  """
 
170
  # PHASE II: AGGREGATION
171
  if len(temp_group) >= CHUNK_GROUP_SIZE:
172
  from phase0102_chunker_aggregator_2 import generate_summary_block # Ensure helper is available
173
+ summary_res = await generate_summary_block(temp_group)
174
 
175
  summary_node = {
176
  "type": "summary",
 
214
  await queue.put("DONE")
215
 
216
  # Helper for summary
217
+ async def generate_summary_block(chunks):
218
  combined = "\n\n".join([f"{c['filename']}: {c['content']}" for c in chunks])
219
+ prompt = "Synthesize these Jungian chunks into a single high-density Level-1 summary. JSON keys: 'summary_name', 'synthesis'."
220
+ return await call_groq_json(prompt, combined)