Spaces:
Runtime error
fix(low-mem): prune daemon stack — Space OOM after Round 9+10 added too much
Browse filesAfter Round 10 (continuous-discoverer + tighter idle) Space hit 16GB
limit again. Audit found 3 redundant heavy daemons stacking on cpu-basic:
- hf-dataset-discoverer.sh: REPLACED by Round 10 continuous-discoverer.sh
(same job, lower memory footprint)
- skill-synthesis-daemon.sh: heavy LLM calls — anchor's voyager-skills.py
covers via local Ollama
- agentic-crawler.sh × 2: web crawling already covered by Round 7
aggressive-harvester (cron) + Round 10 continuous-discoverer (boot daemon)
LOW_MEM tightened defaults:
- BULK_WORKERS: 1 → 0 (full-download too heavy on 16GB; anchor handles)
- STREAM_WORKERS: 2 (kept — streaming is lighter)
- agentic-crawler: SKIPPED on LOW_MEM
- skill-synthesis: SKIPPED on LOW_MEM
- hf-dataset-discoverer: SKIPPED always (deprecated)
cpu-basic now runs:
redis + status-server + 2 streaming workers + continuous-discoverer +
github-agentic-crawler + scrape-daemon parallel=2 + master cron loop
= est ~3-5 GB peak (was ~25-30 GB attempted).
Once anchor (A1.Flex 24GB) is up, anchor takes bulk + skill-synthesis +
heavy enrichment via local Ollama. HF Space stays light orchestrator.
|
@@ -264,18 +264,25 @@ chmod +x /tmp/scrape-daemon.sh
|
|
| 264 |
nohup /tmp/scrape-daemon.sh > "$LOG_DIR/scrape-daemon.log" 2>&1 &
|
| 265 |
echo "[$(date +%H:%M:%S)] scrape daemon parallel=${SCRAPE_PARALLEL} (LOW_MEM=$LOW_MEM)" >> "$LOG_DIR/boot.log"
|
| 266 |
|
| 267 |
-
# ── 7b. Agentic crawler ─────
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
|
| 272 |
-
# ── 7b2. GitHub-specific agentic crawler (lightweight
|
| 273 |
nohup bash ~/.surrogate/bin/github-agentic-crawler.sh > "$LOG_DIR/github-agentic-crawler.log" 2>&1 &
|
| 274 |
echo "[$(date +%H:%M:%S)] github-agentic-crawler started" >> "$LOG_DIR/boot.log"
|
| 275 |
|
| 276 |
-
# ── 7b3. HF Dataset Discoverer ─
|
| 277 |
-
|
| 278 |
-
|
|
|
|
|
|
|
| 279 |
|
| 280 |
# ── 7e. auto-orchestrate-continuous — DISABLED on LOW_MEM (cron handles it) ─
|
| 281 |
if [[ "$LOW_MEM" != "1" ]]; then
|
|
@@ -323,9 +330,13 @@ echo "[$(date +%H:%M:%S)] bulk-ingest-parallel started (6 shards, 293M total cap
|
|
| 323 |
PARQUET_PARALLEL=2 nohup bash ~/.surrogate/bin/parquet-direct-ingest.sh > "$LOG_DIR/parquet-direct-ingest.log" 2>&1 &
|
| 324 |
echo "[$(date +%H:%M:%S)] parquet-direct-ingest started (2 parallel DLs)" >> "$LOG_DIR/boot.log"
|
| 325 |
|
| 326 |
-
# ── 7c. Skill-synthesis daemon
|
| 327 |
-
|
| 328 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
|
| 330 |
# ── 7d. Bulk mirror coordinator + 4 parallel workers ────────────────────────
|
| 331 |
# User feedback 2026-04-29: "ทุก agent ทำงานร่วมกัน และไม่ไปที่ซ้ำๆ".
|
|
@@ -338,7 +349,10 @@ python3 ~/.surrogate/bin/v2/bulk-mirror-coordinator.py seed >> "$LOG_DIR/bulk-mi
|
|
| 338 |
# Two worker types share the same coordinator queue:
|
| 339 |
# bulk-mirror-worker.sh — full-download, suits small/medium datasets
|
| 340 |
# streaming-mirror-worker.sh — HF datasets streaming, suits trillion-token
|
| 341 |
-
|
|
|
|
|
|
|
|
|
|
| 342 |
STREAM_WORKERS="${STREAM_WORKERS:-$([[ "$LOW_MEM" == "1" ]] && echo 2 || echo 4)}"
|
| 343 |
|
| 344 |
for i in $(seq 1 "$BULK_WORKERS"); do
|
|
|
|
| 264 |
nohup /tmp/scrape-daemon.sh > "$LOG_DIR/scrape-daemon.log" 2>&1 &
|
| 265 |
echo "[$(date +%H:%M:%S)] scrape daemon parallel=${SCRAPE_PARALLEL} (LOW_MEM=$LOW_MEM)" >> "$LOG_DIR/boot.log"
|
| 266 |
|
| 267 |
+
# ── 7b. Agentic crawler — DISABLED on LOW_MEM (anchor takes this load) ─────
|
| 268 |
+
if [[ "$LOW_MEM" != "1" ]]; then
|
| 269 |
+
CRAWLER_PARALLEL="${CRAWLER_PARALLEL:-6}"
|
| 270 |
+
nohup bash ~/.surrogate/bin/agentic-crawler.sh "$CRAWLER_PARALLEL" \
|
| 271 |
+
> "$LOG_DIR/agentic-crawler.log" 2>&1 &
|
| 272 |
+
echo "[$(date +%H:%M:%S)] agentic crawler parallel=$CRAWLER_PARALLEL" >> "$LOG_DIR/boot.log"
|
| 273 |
+
else
|
| 274 |
+
echo "[$(date +%H:%M:%S)] ⚠ agentic-crawler SKIPPED (LOW_MEM); anchor handles" >> "$LOG_DIR/boot.log"
|
| 275 |
+
fi
|
| 276 |
|
| 277 |
+
# ── 7b2. GitHub-specific agentic crawler (lightweight, keep on always) ─────
|
| 278 |
nohup bash ~/.surrogate/bin/github-agentic-crawler.sh > "$LOG_DIR/github-agentic-crawler.log" 2>&1 &
|
| 279 |
echo "[$(date +%H:%M:%S)] github-agentic-crawler started" >> "$LOG_DIR/boot.log"
|
| 280 |
|
| 281 |
+
# ── 7b3. HF Dataset Discoverer — DISABLED (replaced by continuous-discoverer) ─
|
| 282 |
+
# Round 10 (a27499d): bin/v2/continuous-discoverer.sh covers HF + arxiv +
|
| 283 |
+
# Stack Exchange + GH trending in one daemon. Old hf-dataset-discoverer.sh
|
| 284 |
+
# is now redundant + memory pressure on cpu-basic.
|
| 285 |
+
echo "[$(date +%H:%M:%S)] ⚠ hf-dataset-discoverer SKIPPED (replaced by continuous-discoverer)" >> "$LOG_DIR/boot.log"
|
| 286 |
|
| 287 |
# ── 7e. auto-orchestrate-continuous — DISABLED on LOW_MEM (cron handles it) ─
|
| 288 |
if [[ "$LOW_MEM" != "1" ]]; then
|
|
|
|
| 330 |
PARQUET_PARALLEL=2 nohup bash ~/.surrogate/bin/parquet-direct-ingest.sh > "$LOG_DIR/parquet-direct-ingest.log" 2>&1 &
|
| 331 |
echo "[$(date +%H:%M:%S)] parquet-direct-ingest started (2 parallel DLs)" >> "$LOG_DIR/boot.log"
|
| 332 |
|
| 333 |
+
# ── 7c. Skill-synthesis daemon — DISABLED on LOW_MEM (heavy LLM calls) ────
|
| 334 |
+
if [[ "$LOW_MEM" != "1" ]]; then
|
| 335 |
+
nohup bash ~/.surrogate/bin/skill-synthesis-daemon.sh > "$LOG_DIR/skill-synthesis.log" 2>&1 &
|
| 336 |
+
echo "[$(date +%H:%M:%S)] skill-synthesis daemon started" >> "$LOG_DIR/boot.log"
|
| 337 |
+
else
|
| 338 |
+
echo "[$(date +%H:%M:%S)] ⚠ skill-synthesis SKIPPED (LOW_MEM); anchor's voyager-skills.py covers" >> "$LOG_DIR/boot.log"
|
| 339 |
+
fi
|
| 340 |
|
| 341 |
# ── 7d. Bulk mirror coordinator + 4 parallel workers ────────────────────────
|
| 342 |
# User feedback 2026-04-29: "ทุก agent ทำงานร่วมกัน และไม่ไปที่ซ้ำๆ".
|
|
|
|
| 349 |
# Two worker types share the same coordinator queue:
|
| 350 |
# bulk-mirror-worker.sh — full-download, suits small/medium datasets
|
| 351 |
# streaming-mirror-worker.sh — HF datasets streaming, suits trillion-token
|
| 352 |
+
# LOW_MEM tuned for cpu-basic 16GB after Round 9+10 OOM:
|
| 353 |
+
# 0 bulk (full-download too heavy) + 2 streaming (lighter) on LOW_MEM
|
| 354 |
+
# Anchor handles bulk via 24GB ARM headroom.
|
| 355 |
+
BULK_WORKERS="${BULK_WORKERS:-$([[ "$LOW_MEM" == "1" ]] && echo 0 || echo 4)}"
|
| 356 |
STREAM_WORKERS="${STREAM_WORKERS:-$([[ "$LOW_MEM" == "1" ]] && echo 2 || echo 4)}"
|
| 357 |
|
| 358 |
for i in $(seq 1 "$BULK_WORKERS"); do
|