#!/usr/bin/env bash # Surrogate-1 V14 — Mac one-shot bundle + upload. # I/O ONLY (no LLM compute on Mac, allowed per ~/.claude/memory/feedback_train_into_surrogate.md). # # Bundles owner's 715+ knowledge artifacts + filtered conversation pairs into # a single tar.gz, uploads to axentx/surrogate-1-v10-source-bundle on HF. # Then the unified V14 Kaggle kernel pulls + distills + trains all in ONE run. # # Usage: # bash bin/v3/bundle-upload.sh set -uo pipefail [[ -f "$HOME/.hermes/.env" ]] && { set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a; } WORK="$HOME/.surrogate/state/v10-bundle" mkdir -p "$WORK" BUNDLE="$WORK/bundle" mkdir -p "$BUNDLE"/{vault,patterns,memory,skills,agents,arkship-decisions,axentx-decisions,conversations,feature-builds} log() { echo "[$(date '+%Y-%m-%dT%H:%M:%S')] $*"; } log "═══ V14 source bundle ═══" # 1. Obsidian Vault (knowledge + patterns) — file copy, ~14 MB log "── copy Vault knowledge + patterns ──" cp -r "$HOME/Documents/Obsidian Vault/AI-Hub/knowledge"/*.md "$BUNDLE/vault/" 2>/dev/null || true find "$HOME/Documents/Obsidian Vault/AI-Hub/knowledge" -type d -name "trends-2026" -exec cp -r {} "$BUNDLE/vault/" \; 2>/dev/null [[ -d "$HOME/Documents/Obsidian Vault/AI-Hub/patterns" ]] && cp -r "$HOME/Documents/Obsidian Vault/AI-Hub/patterns"/* "$BUNDLE/patterns/" 2>/dev/null || true log " vault: $(find "$BUNDLE/vault" -name "*.md" 2>/dev/null | wc -l | tr -d ' ') files" log " patterns: $(find "$BUNDLE/patterns" -name "*.md" 2>/dev/null | wc -l | tr -d ' ') files" # 2. .claude/memory — 27 files, 444 KB log "── copy .claude/memory ──" cp "$HOME/.claude/memory"/*.md "$BUNDLE/memory/" 2>/dev/null || true log " memory: $(ls "$BUNDLE/memory" 2>/dev/null | wc -l | tr -d ' ') files" # 3. SKILL.md mirror — anthropic + community + local log "── copy SKILL.md ──" i=0 { find "$HOME/Documents/Obsidian Vault/AI-Hub/skills" -name "SKILL.md" -type f 2>/dev/null find "$HOME/.claude/skills" -name "SKILL.md" -type f 2>/dev/null find "$HOME/.claude/plugins/cache" -name "SKILL.md" -type f 2>/dev/null } | while read -r f; do name="skill-$i-$(basename "$(dirname "$f")").md" cp "$f" "$BUNDLE/skills/$name" 2>/dev/null i=$((i+1)) done log " skills: $(ls "$BUNDLE/skills" 2>/dev/null | wc -l | tr -d ' ') files" # 4. Claude agents (user + plugin) log "── copy agents ──" cp "$HOME/.claude/agents"/*.md "$BUNDLE/agents/" 2>/dev/null || true i=0 find "$HOME/.claude/plugins" -path "*agents*" -name "*.md" -type f 2>/dev/null | while read -r f; do cp "$f" "$BUNDLE/agents/plugin-$i-$(basename "$f")" 2>/dev/null i=$((i+1)) done log " agents: $(ls "$BUNDLE/agents" 2>/dev/null | wc -l | tr -d ' ') files" # 5. arkship decisions log "── copy arkship decisions ──" cp "$HOME/axentx/arkship/decisions"/*.md "$BUNDLE/arkship-decisions/" 2>/dev/null || true log " arkship-decisions: $(ls "$BUNDLE/arkship-decisions" 2>/dev/null | wc -l | tr -d ' ') files" # 6. axentx project decisions (Costinel/Vanguard/etc) log "── copy axentx project decisions ──" i=0 find "$HOME/axentx" -maxdepth 4 -name "decisions" -type d 2>/dev/null | while read -r d; do proj="$(basename "$(dirname "$d")")" find "$d" -name "*.md" -type f 2>/dev/null | while read -r f; do cp "$f" "$BUNDLE/axentx-decisions/${proj}-$(basename "$f")" 2>/dev/null done done log " axentx-decisions: $(ls "$BUNDLE/axentx-decisions" 2>/dev/null | wc -l | tr -d ' ') files" # 7. Conversations — extract feature-build pairs ONLY (filter, lighter than 647MB raw) log "── extract feature-build pairs from 748 conversations ──" python3 - <<'PYEOF' import json, re, os from pathlib import Path PROJ = Path.home() / ".claude/projects" OUT = Path.home() / ".surrogate/state/v10-bundle/bundle/conversations" OUT.mkdir(parents=True, exist_ok=True) FEATURE_PATTERNS = [ r"\b(build|make|add|create|implement|wire|bake|train|ingest|distill)\s+\w+", r"(ทำ|สร้าง|เพิ่ม|implement|train|เทรน|fine[-_]?tune)\s+", r"(I want|I need|I'd like|ผมอยาก|อยาก|ต้องการ)", ] files = sorted(PROJ.rglob("*.jsonl")) print(f" scanning {len(files)} session files...") n_total = 0 for j, fp in enumerate(files): pairs = [] last_user = None try: for L in fp.read_text(errors="replace").splitlines(): try: ev = json.loads(L) except: continue msg = ev.get("message", {}) role = msg.get("role") c = msg.get("content", "") if isinstance(c, list): c = "\n".join(b.get("text","") for b in c if isinstance(b, dict) and b.get("type")=="text") if not isinstance(c, str): c = str(c) if role == "user": last_user = c elif role == "assistant" and last_user: if 50 < len(last_user) < 4000 and 100 < len(c) < 8000: if any(re.search(p, last_user, re.I) for p in FEATURE_PATTERNS): pairs.append((last_user, c)) last_user = None except Exception: continue if pairs: out_md = OUT / (fp.parent.name + "__" + fp.stem + ".md") with out_md.open("w") as f: f.write(f"# Session {fp.stem}\n\n") for u, a in pairs[:30]: f.write(f"## User\n{u[:3000]}\n\n## Assistant\n{a[:6000]}\n\n---\n\n") n_total += len(pairs) if (j+1) % 100 == 0: print(f" [{j+1}/{len(files)}] {n_total} pairs extracted") print(f" total: {n_total} feature-build pairs from {len(files)} sessions") PYEOF # Mirror conversations into feature-builds (same content, different "kind" tag in distiller) cp -r "$BUNDLE/conversations"/* "$BUNDLE/feature-builds/" 2>/dev/null || true # Tar + push log "" log "── tar + push to HF ──" TARBALL="$WORK/bundle.tar.gz" ( cd "$WORK" && tar -czf bundle.tar.gz bundle/ ) SIZE=$(du -sh "$TARBALL" | awk '{print $1}') log " bundle: $TARBALL ($SIZE)" # Prefer HF_TOKEN_PRO_WRITE — has write scope + dedicated rate-limit pool. # HF_TOKEN often hits 2500 req/5min ceiling from research agents. HF_USE_TOKEN="${HF_TOKEN_PRO_WRITE:-${HF_TOKEN_PRO:-${HF_TOKEN:-}}}" if [[ -z "$HF_USE_TOKEN" ]]; then log " ✗ no HF token set in ~/.hermes/.env" exit 1 fi export HF_USE_TOKEN python3 - <