Bc-AI commited on
Commit
7955aea
Β·
verified Β·
1 Parent(s): a36bbdf

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +524 -0
app.py ADDED
@@ -0,0 +1,524 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SharePUTER Worker Node β€” Serverless (HF Space Docker / any container)
3
+ Executes task fragments assigned by the head node.
4
+ Supports node types: RAM, CPU, GPU
5
+ """
6
+
7
+ from fastapi import FastAPI, Request
8
+ from fastapi.responses import HTMLResponse, JSONResponse
9
+ import httpx
10
+ import asyncio
11
+ import uuid
12
+ import os
13
+ import sys
14
+ import io
15
+ import json
16
+ import time
17
+ import traceback
18
+ import multiprocessing
19
+ import psutil
20
+ from datetime import datetime
21
+ from contextlib import redirect_stdout, redirect_stderr
22
+ from typing import Optional
23
+
24
+ app = FastAPI(title="SharePUTER Worker Node")
25
+
26
+ # ─── Configuration ──────────────────────────────────────────────────────────
27
+ HEAD_NODE_URL = os.environ.get("SACCP_HEAD_URL", "http://localhost:7860")
28
+ NODE_TYPE = os.environ.get("SACCP_NODE_TYPE", "CPU").upper() # RAM, CPU, GPU
29
+ NODE_OWNER = os.environ.get("SACCP_NODE_OWNER", "anonymous")
30
+ NODE_ID: Optional[str] = None
31
+ REGISTERED = False
32
+
33
+ # Auto-detect specs
34
+ def detect_specs():
35
+ specs = {
36
+ "cpu_count": multiprocessing.cpu_count(),
37
+ "ram_total_gb": round(psutil.virtual_memory().total / (1024**3), 2),
38
+ "ram_available_gb": round(psutil.virtual_memory().available / (1024**3), 2),
39
+ "platform": sys.platform,
40
+ "python_version": sys.version.split()[0],
41
+ }
42
+
43
+ # Check for GPU
44
+ try:
45
+ import torch
46
+ if torch.cuda.is_available():
47
+ specs["gpu_available"] = True
48
+ specs["gpu_name"] = torch.cuda.get_device_name(0)
49
+ specs["gpu_memory_gb"] = round(torch.cuda.get_device_properties(0).total_mem / (1024**3), 2)
50
+ else:
51
+ specs["gpu_available"] = False
52
+ except ImportError:
53
+ specs["gpu_available"] = False
54
+
55
+ return specs
56
+
57
+
58
+ SPECS = detect_specs()
59
+
60
+
61
+ def safe_execute(code: str, timeout: int = 300) -> dict:
62
+ """Safely execute Python code in an isolated namespace with timeout."""
63
+ result = {
64
+ "status": "completed",
65
+ "result": None,
66
+ "stdout": "",
67
+ "stderr": "",
68
+ "error": None,
69
+ "execution_time": 0
70
+ }
71
+
72
+ stdout_capture = io.StringIO()
73
+ stderr_capture = io.StringIO()
74
+ namespace = {"__builtins__": __builtins__}
75
+
76
+ start_time = time.time()
77
+ try:
78
+ with redirect_stdout(stdout_capture), redirect_stderr(stderr_capture):
79
+ exec(code, namespace)
80
+
81
+ result["execution_time"] = round(time.time() - start_time, 3)
82
+ result["stdout"] = stdout_capture.getvalue()
83
+ result["stderr"] = stderr_capture.getvalue()
84
+
85
+ # Try to capture meaningful result
86
+ # Look for common result patterns
87
+ for var_name in ["result", "output", "answer", "ret", "__fragment_results__"]:
88
+ if var_name in namespace:
89
+ val = namespace[var_name]
90
+ try:
91
+ json.dumps(val) # Check JSON serializable
92
+ result["result"] = val
93
+ except (TypeError, ValueError):
94
+ result["result"] = str(val)
95
+ break
96
+
97
+ # If no named result, capture last expression result via stdout
98
+ if result["result"] is None and result["stdout"]:
99
+ result["result"] = result["stdout"].strip()
100
+
101
+ except Exception as e:
102
+ result["status"] = "failed"
103
+ result["error"] = f"{type(e).__name__}: {str(e)}\n{traceback.format_exc()}"
104
+ result["execution_time"] = round(time.time() - start_time, 3)
105
+ result["stdout"] = stdout_capture.getvalue()
106
+ result["stderr"] = stderr_capture.getvalue()
107
+
108
+ return result
109
+
110
+
111
+ # ─── Homepage ───────────────────────────────────────────────────────────────
112
+ @app.get("/", response_class=HTMLResponse)
113
+ async def homepage():
114
+ gpu_info = ""
115
+ if SPECS.get("gpu_available"):
116
+ gpu_info = f"""
117
+ <div class="spec-item">
118
+ <span class="spec-label">GPU</span>
119
+ <span class="spec-value">{SPECS.get('gpu_name', 'Unknown')}</span>
120
+ </div>
121
+ <div class="spec-item">
122
+ <span class="spec-label">GPU Memory</span>
123
+ <span class="spec-value">{SPECS.get('gpu_memory_gb', 0)} GB</span>
124
+ </div>"""
125
+
126
+ type_colors = {"RAM": "#ff6b6b", "CPU": "#00aaff", "GPU": "#aa66ff"}
127
+ type_color = type_colors.get(NODE_TYPE, "#00ff88")
128
+
129
+ current_ram = round(psutil.virtual_memory().available / (1024**3), 2)
130
+ cpu_percent = psutil.cpu_percent(interval=0.5)
131
+
132
+ html = f"""<!DOCTYPE html>
133
+ <html lang="en">
134
+ <head>
135
+ <meta charset="UTF-8">
136
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
137
+ <title>SharePUTERβ„’ Worker Node</title>
138
+ <style>
139
+ @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;700&family=Inter:wght@300;400;600;700;900&display=swap');
140
+ * {{ margin:0; padding:0; box-sizing:border-box; }}
141
+ body {{
142
+ font-family:'Inter',sans-serif;
143
+ background:#0a0a0f;
144
+ color:#e0e0e0;
145
+ min-height:100vh;
146
+ display:flex;
147
+ align-items:center;
148
+ justify-content:center;
149
+ }}
150
+ .bg {{
151
+ position:fixed; top:0; left:0; right:0; bottom:0;
152
+ background-image:
153
+ radial-gradient(circle at 20% 30%, {type_color}11 0%, transparent 50%),
154
+ radial-gradient(circle at 80% 70%, {type_color}08 0%, transparent 50%);
155
+ }}
156
+ .card {{
157
+ position:relative; z-index:1;
158
+ background:rgba(255,255,255,0.02);
159
+ border:1px solid rgba(255,255,255,0.06);
160
+ border-radius:24px;
161
+ padding:48px;
162
+ max-width:560px;
163
+ width:90%;
164
+ backdrop-filter:blur(20px);
165
+ }}
166
+ .node-type {{
167
+ display:inline-block;
168
+ padding:6px 20px;
169
+ border-radius:30px;
170
+ font-family:'JetBrains Mono',monospace;
171
+ font-size:0.85rem;
172
+ font-weight:700;
173
+ letter-spacing:2px;
174
+ background:{type_color}22;
175
+ color:{type_color};
176
+ border:1px solid {type_color}44;
177
+ margin-bottom:20px;
178
+ }}
179
+ h1 {{
180
+ font-size:2rem;
181
+ font-weight:900;
182
+ margin-bottom:4px;
183
+ background:linear-gradient(135deg,#fff,{type_color});
184
+ -webkit-background-clip:text;
185
+ -webkit-text-fill-color:transparent;
186
+ }}
187
+ .subtitle {{ color:#888; font-size:0.9rem; margin-bottom:32px; }}
188
+ .status {{
189
+ display:flex; align-items:center; gap:10px;
190
+ margin-bottom:32px;
191
+ font-family:'JetBrains Mono',monospace;
192
+ font-size:0.85rem;
193
+ }}
194
+ .pulse {{
195
+ width:10px; height:10px; border-radius:50%;
196
+ background:#00ff88;
197
+ animation:pulse 2s ease-in-out infinite;
198
+ }}
199
+ @keyframes pulse {{
200
+ 0%,100% {{ box-shadow:0 0 0 0 rgba(0,255,136,0.4); }}
201
+ 50% {{ box-shadow:0 0 0 8px rgba(0,255,136,0); }}
202
+ }}
203
+ .specs {{
204
+ background:rgba(255,255,255,0.02);
205
+ border:1px solid rgba(255,255,255,0.04);
206
+ border-radius:12px;
207
+ padding:20px;
208
+ margin-bottom:24px;
209
+ }}
210
+ .specs h3 {{
211
+ font-size:0.75rem;
212
+ text-transform:uppercase;
213
+ letter-spacing:2px;
214
+ color:#888;
215
+ margin-bottom:16px;
216
+ }}
217
+ .spec-item {{
218
+ display:flex; justify-content:space-between;
219
+ padding:8px 0;
220
+ border-bottom:1px solid rgba(255,255,255,0.03);
221
+ }}
222
+ .spec-item:last-child {{ border-bottom:none; }}
223
+ .spec-label {{ color:#888; font-size:0.9rem; }}
224
+ .spec-value {{
225
+ color:{type_color};
226
+ font-family:'JetBrains Mono',monospace;
227
+ font-weight:600;
228
+ font-size:0.9rem;
229
+ }}
230
+ .config {{
231
+ background:rgba(255,255,255,0.02);
232
+ border:1px solid rgba(255,255,255,0.04);
233
+ border-radius:12px;
234
+ padding:20px;
235
+ }}
236
+ .config h3 {{
237
+ font-size:0.75rem;
238
+ text-transform:uppercase;
239
+ letter-spacing:2px;
240
+ color:#888;
241
+ margin-bottom:16px;
242
+ }}
243
+ .config label {{ display:block; color:#aaa; font-size:0.85rem; margin-bottom:4px; }}
244
+ .config input, .config select {{
245
+ width:100%;
246
+ padding:10px 14px;
247
+ border-radius:8px;
248
+ border:1px solid rgba(255,255,255,0.1);
249
+ background:rgba(255,255,255,0.04);
250
+ color:#fff;
251
+ font-family:'JetBrains Mono',monospace;
252
+ font-size:0.85rem;
253
+ margin-bottom:14px;
254
+ outline:none;
255
+ }}
256
+ .config input:focus, .config select:focus {{
257
+ border-color:{type_color}88;
258
+ }}
259
+ .btn {{
260
+ width:100%;
261
+ padding:12px;
262
+ border:none;
263
+ border-radius:10px;
264
+ background:linear-gradient(135deg,{type_color},{type_color}aa);
265
+ color:#000;
266
+ font-weight:700;
267
+ font-size:0.95rem;
268
+ cursor:pointer;
269
+ transition:all 0.2s;
270
+ font-family:'Inter',sans-serif;
271
+ }}
272
+ .btn:hover {{ transform:translateY(-2px); box-shadow:0 4px 20px {type_color}44; }}
273
+ .footer {{ text-align:center; margin-top:24px; color:#555; font-size:0.75rem; }}
274
+ #configResult {{
275
+ margin-top:12px; padding:10px; border-radius:8px;
276
+ font-family:'JetBrains Mono',monospace; font-size:0.8rem;
277
+ display:none;
278
+ }}
279
+ </style>
280
+ </head>
281
+ <body>
282
+ <div class="bg"></div>
283
+ <div class="card">
284
+ <div class="node-type">{NODE_TYPE} NODE</div>
285
+ <h1>SharePUTERβ„’ Worker</h1>
286
+ <p class="subtitle">SACCP Network Compute Node</p>
287
+
288
+ <div class="status">
289
+ <div class="pulse"></div>
290
+ <span style="color:#00ff88;">{'REGISTERED & WORKING' if REGISTERED else 'ONLINE β€” NOT REGISTERED'}</span>
291
+ </div>
292
+
293
+ <div class="specs">
294
+ <h3>System Specifications</h3>
295
+ <div class="spec-item">
296
+ <span class="spec-label">Node ID</span>
297
+ <span class="spec-value">{NODE_ID or 'Not Registered'}</span>
298
+ </div>
299
+ <div class="spec-item">
300
+ <span class="spec-label">CPU Cores</span>
301
+ <span class="spec-value">{SPECS['cpu_count']}</span>
302
+ </div>
303
+ <div class="spec-item">
304
+ <span class="spec-label">CPU Usage</span>
305
+ <span class="spec-value">{cpu_percent}%</span>
306
+ </div>
307
+ <div class="spec-item">
308
+ <span class="spec-label">Total RAM</span>
309
+ <span class="spec-value">{SPECS['ram_total_gb']} GB</span>
310
+ </div>
311
+ <div class="spec-item">
312
+ <span class="spec-label">Available RAM</span>
313
+ <span class="spec-value">{current_ram} GB</span>
314
+ </div>
315
+ <div class="spec-item">
316
+ <span class="spec-label">Python</span>
317
+ <span class="spec-value">{SPECS['python_version']}</span>
318
+ </div>
319
+ {gpu_info}
320
+ </div>
321
+
322
+ <div class="config">
323
+ <h3>Node Configuration</h3>
324
+ <label>Head Node URL</label>
325
+ <input type="text" id="headUrl" value="{HEAD_NODE_URL}" />
326
+ <label>Node Type</label>
327
+ <select id="nodeType">
328
+ <option value="RAM" {"selected" if NODE_TYPE=="RAM" else ""}>RAM β€” Provide memory</option>
329
+ <option value="CPU" {"selected" if NODE_TYPE=="CPU" else ""}>CPU β€” Provide compute</option>
330
+ <option value="GPU" {"selected" if NODE_TYPE=="GPU" else ""}>GPU β€” Provide GPU</option>
331
+ </select>
332
+ <label>Owner Username</label>
333
+ <input type="text" id="owner" value="{NODE_OWNER}" />
334
+ <button class="btn" onclick="configure()">
335
+ {'Reconfigure' if REGISTERED else 'Register & Start Working'}
336
+ </button>
337
+ <div id="configResult"></div>
338
+ </div>
339
+
340
+ <div class="footer">SharePUTERβ„’ SACCP Network β€” Worker Node v1.0</div>
341
+ </div>
342
+
343
+ <script>
344
+ async function configure() {{
345
+ const headUrl = document.getElementById('headUrl').value;
346
+ const nodeType = document.getElementById('nodeType').value;
347
+ const owner = document.getElementById('owner').value;
348
+ const resultDiv = document.getElementById('configResult');
349
+
350
+ resultDiv.style.display = 'block';
351
+ resultDiv.style.background = 'rgba(0,136,255,0.1)';
352
+ resultDiv.style.color = '#00aaff';
353
+ resultDiv.innerText = 'Registering with head node...';
354
+
355
+ try {{
356
+ const resp = await fetch('/configure', {{
357
+ method: 'POST',
358
+ headers: {{'Content-Type': 'application/json'}},
359
+ body: JSON.stringify({{head_url: headUrl, node_type: nodeType, owner: owner}})
360
+ }});
361
+ const data = await resp.json();
362
+ if (resp.ok) {{
363
+ resultDiv.style.background = 'rgba(0,255,136,0.1)';
364
+ resultDiv.style.color = '#00ff88';
365
+ resultDiv.innerText = 'βœ“ ' + (data.message || 'Registered!') + '\\nNode ID: ' + (data.node_id || 'N/A');
366
+ setTimeout(() => location.reload(), 2000);
367
+ }} else {{
368
+ resultDiv.style.background = 'rgba(255,68,68,0.1)';
369
+ resultDiv.style.color = '#ff4444';
370
+ resultDiv.innerText = 'βœ— ' + (data.detail || data.error || 'Failed');
371
+ }}
372
+ }} catch (e) {{
373
+ resultDiv.style.background = 'rgba(255,68,68,0.1)';
374
+ resultDiv.style.color = '#ff4444';
375
+ resultDiv.innerText = 'βœ— Network error: ' + e.message;
376
+ }}
377
+ }}
378
+ </script>
379
+ </body>
380
+ </html>"""
381
+ return HTMLResponse(content=html)
382
+
383
+
384
+ @app.post("/configure")
385
+ async def configure_node(data: dict):
386
+ global HEAD_NODE_URL, NODE_TYPE, NODE_OWNER, NODE_ID, REGISTERED
387
+
388
+ HEAD_NODE_URL = data.get("head_url", HEAD_NODE_URL)
389
+ NODE_TYPE = data.get("node_type", NODE_TYPE).upper()
390
+ NODE_OWNER = data.get("owner", NODE_OWNER)
391
+
392
+ # Register with head node
393
+ try:
394
+ async with httpx.AsyncClient(timeout=15.0) as client:
395
+ resp = await client.post(f"{HEAD_NODE_URL}/api/register_node", json={
396
+ "node_type": NODE_TYPE,
397
+ "node_url": os.environ.get("SPACE_HOST", f"http://localhost:7861"),
398
+ "owner": NODE_OWNER,
399
+ "specs": SPECS
400
+ })
401
+ if resp.status_code == 200:
402
+ result = resp.json()
403
+ NODE_ID = result.get("node_id")
404
+ REGISTERED = True
405
+ return {"message": "Registered successfully", "node_id": NODE_ID, "pay_rate": result.get("pay_rate")}
406
+ else:
407
+ return JSONResponse(content={"error": resp.text}, status_code=500)
408
+ except Exception as e:
409
+ return JSONResponse(content={"error": str(e)}, status_code=500)
410
+
411
+
412
+ @app.get("/health")
413
+ async def health():
414
+ return {
415
+ "status": "online",
416
+ "node_id": NODE_ID,
417
+ "node_type": NODE_TYPE,
418
+ "registered": REGISTERED,
419
+ "specs": SPECS
420
+ }
421
+
422
+
423
+ # ─── Worker Loop (runs in background) ──────────────────────────────────────
424
+ async def worker_loop():
425
+ """Continuously poll head node for work and execute fragments."""
426
+ global NODE_ID, REGISTERED
427
+
428
+ # Wait for registration
429
+ while not REGISTERED:
430
+ await asyncio.sleep(5)
431
+ # Auto-register if env vars are set
432
+ if os.environ.get("SACCP_AUTO_REGISTER", "false").lower() == "true":
433
+ try:
434
+ async with httpx.AsyncClient(timeout=15.0) as client:
435
+ resp = await client.post(f"{HEAD_NODE_URL}/api/register_node", json={
436
+ "node_type": NODE_TYPE,
437
+ "node_url": os.environ.get("SPACE_HOST", "http://localhost:7861"),
438
+ "owner": NODE_OWNER,
439
+ "specs": SPECS
440
+ })
441
+ if resp.status_code == 200:
442
+ result = resp.json()
443
+ NODE_ID = result.get("node_id")
444
+ REGISTERED = True
445
+ print(f"[SACCP] Auto-registered as {NODE_ID}")
446
+ except Exception as e:
447
+ print(f"[SACCP] Auto-register failed: {e}")
448
+
449
+ print(f"[SACCP] Worker loop started β€” Node: {NODE_ID} Type: {NODE_TYPE}")
450
+
451
+ heartbeat_interval = 30
452
+ last_heartbeat = 0
453
+
454
+ while True:
455
+ try:
456
+ # Heartbeat
457
+ if time.time() - last_heartbeat > heartbeat_interval:
458
+ try:
459
+ async with httpx.AsyncClient(timeout=10.0) as client:
460
+ await client.post(f"{HEAD_NODE_URL}/api/node_heartbeat", json={
461
+ "node_id": NODE_ID,
462
+ "status": "online",
463
+ "current_load": {
464
+ "cpu_percent": psutil.cpu_percent(),
465
+ "ram_available_gb": round(psutil.virtual_memory().available / (1024**3), 2)
466
+ }
467
+ })
468
+ last_heartbeat = time.time()
469
+ except Exception:
470
+ pass
471
+
472
+ # Get work
473
+ async with httpx.AsyncClient(timeout=15.0) as client:
474
+ resp = await client.get(f"{HEAD_NODE_URL}/api/get_work?node_id={NODE_ID}")
475
+ if resp.status_code != 200:
476
+ await asyncio.sleep(5)
477
+ continue
478
+
479
+ data = resp.json()
480
+ work = data.get("work")
481
+
482
+ if not work:
483
+ await asyncio.sleep(3)
484
+ continue
485
+
486
+ fragment_id = work["fragment_id"]
487
+ code = work.get("code", "")
488
+ frag_type = work.get("fragment_type", "compute")
489
+
490
+ print(f"[SACCP] Executing fragment {fragment_id} (type: {frag_type})")
491
+
492
+ if frag_type == "setup":
493
+ # Setup fragments just validate imports
494
+ exec_result = safe_execute(code, timeout=60)
495
+ elif frag_type == "aggregate":
496
+ exec_result = {"status": "completed", "result": "aggregation_placeholder", "stdout": "", "error": None}
497
+ else:
498
+ exec_result = safe_execute(code, timeout=300)
499
+
500
+ # Submit result
501
+ await client.post(f"{HEAD_NODE_URL}/api/submit_result", json={
502
+ "fragment_id": fragment_id,
503
+ "node_id": NODE_ID,
504
+ "status": exec_result["status"],
505
+ "result": exec_result.get("result"),
506
+ "error": exec_result.get("error"),
507
+ "stdout": exec_result.get("stdout", "")
508
+ })
509
+
510
+ print(f"[SACCP] Fragment {fragment_id} -> {exec_result['status']}")
511
+
512
+ except Exception as e:
513
+ print(f"[SACCP] Worker error: {e}")
514
+ await asyncio.sleep(5)
515
+
516
+
517
+ @app.on_event("startup")
518
+ async def startup():
519
+ asyncio.create_task(worker_loop())
520
+
521
+
522
+ if __name__ == "__main__":
523
+ import uvicorn
524
+ uvicorn.run(app, host="0.0.0.0", port=7861)