surrogate-1 / bin /v3 /bundle-upload.sh
Ashira Pitchayapakayakul
fix(bundle): prefer HF_TOKEN_PRO_WRITE to dodge HF_TOKEN 2500-req/5min cap
f17fac0
#!/usr/bin/env bash
# Surrogate-1 V14 — Mac one-shot bundle + upload.
# I/O ONLY (no LLM compute on Mac, allowed per ~/.claude/memory/feedback_train_into_surrogate.md).
#
# Bundles owner's 715+ knowledge artifacts + filtered conversation pairs into
# a single tar.gz, uploads to axentx/surrogate-1-v10-source-bundle on HF.
# Then the unified V14 Kaggle kernel pulls + distills + trains all in ONE run.
#
# Usage:
# bash bin/v3/bundle-upload.sh
set -uo pipefail
[[ -f "$HOME/.hermes/.env" ]] && { set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a; }
WORK="$HOME/.surrogate/state/v10-bundle"
mkdir -p "$WORK"
BUNDLE="$WORK/bundle"
mkdir -p "$BUNDLE"/{vault,patterns,memory,skills,agents,arkship-decisions,axentx-decisions,conversations,feature-builds}
log() { echo "[$(date '+%Y-%m-%dT%H:%M:%S')] $*"; }
log "═══ V14 source bundle ═══"
# 1. Obsidian Vault (knowledge + patterns) — file copy, ~14 MB
log "── copy Vault knowledge + patterns ──"
cp -r "$HOME/Documents/Obsidian Vault/AI-Hub/knowledge"/*.md "$BUNDLE/vault/" 2>/dev/null || true
find "$HOME/Documents/Obsidian Vault/AI-Hub/knowledge" -type d -name "trends-2026" -exec cp -r {} "$BUNDLE/vault/" \; 2>/dev/null
[[ -d "$HOME/Documents/Obsidian Vault/AI-Hub/patterns" ]] && cp -r "$HOME/Documents/Obsidian Vault/AI-Hub/patterns"/* "$BUNDLE/patterns/" 2>/dev/null || true
log " vault: $(find "$BUNDLE/vault" -name "*.md" 2>/dev/null | wc -l | tr -d ' ') files"
log " patterns: $(find "$BUNDLE/patterns" -name "*.md" 2>/dev/null | wc -l | tr -d ' ') files"
# 2. .claude/memory — 27 files, 444 KB
log "── copy .claude/memory ──"
cp "$HOME/.claude/memory"/*.md "$BUNDLE/memory/" 2>/dev/null || true
log " memory: $(ls "$BUNDLE/memory" 2>/dev/null | wc -l | tr -d ' ') files"
# 3. SKILL.md mirror — anthropic + community + local
log "── copy SKILL.md ──"
i=0
{
find "$HOME/Documents/Obsidian Vault/AI-Hub/skills" -name "SKILL.md" -type f 2>/dev/null
find "$HOME/.claude/skills" -name "SKILL.md" -type f 2>/dev/null
find "$HOME/.claude/plugins/cache" -name "SKILL.md" -type f 2>/dev/null
} | while read -r f; do
name="skill-$i-$(basename "$(dirname "$f")").md"
cp "$f" "$BUNDLE/skills/$name" 2>/dev/null
i=$((i+1))
done
log " skills: $(ls "$BUNDLE/skills" 2>/dev/null | wc -l | tr -d ' ') files"
# 4. Claude agents (user + plugin)
log "── copy agents ──"
cp "$HOME/.claude/agents"/*.md "$BUNDLE/agents/" 2>/dev/null || true
i=0
find "$HOME/.claude/plugins" -path "*agents*" -name "*.md" -type f 2>/dev/null | while read -r f; do
cp "$f" "$BUNDLE/agents/plugin-$i-$(basename "$f")" 2>/dev/null
i=$((i+1))
done
log " agents: $(ls "$BUNDLE/agents" 2>/dev/null | wc -l | tr -d ' ') files"
# 5. arkship decisions
log "── copy arkship decisions ──"
cp "$HOME/axentx/arkship/decisions"/*.md "$BUNDLE/arkship-decisions/" 2>/dev/null || true
log " arkship-decisions: $(ls "$BUNDLE/arkship-decisions" 2>/dev/null | wc -l | tr -d ' ') files"
# 6. axentx project decisions (Costinel/Vanguard/etc)
log "── copy axentx project decisions ──"
i=0
find "$HOME/axentx" -maxdepth 4 -name "decisions" -type d 2>/dev/null | while read -r d; do
proj="$(basename "$(dirname "$d")")"
find "$d" -name "*.md" -type f 2>/dev/null | while read -r f; do
cp "$f" "$BUNDLE/axentx-decisions/${proj}-$(basename "$f")" 2>/dev/null
done
done
log " axentx-decisions: $(ls "$BUNDLE/axentx-decisions" 2>/dev/null | wc -l | tr -d ' ') files"
# 7. Conversations — extract feature-build pairs ONLY (filter, lighter than 647MB raw)
log "── extract feature-build pairs from 748 conversations ──"
python3 - <<'PYEOF'
import json, re, os
from pathlib import Path
PROJ = Path.home() / ".claude/projects"
OUT = Path.home() / ".surrogate/state/v10-bundle/bundle/conversations"
OUT.mkdir(parents=True, exist_ok=True)
FEATURE_PATTERNS = [
r"\b(build|make|add|create|implement|wire|bake|train|ingest|distill)\s+\w+",
r"(ทำ|สร้าง|เพิ่ม|implement|train|เทรน|fine[-_]?tune)\s+",
r"(I want|I need|I'd like|ผมอยาก|อยาก|ต้องการ)",
]
files = sorted(PROJ.rglob("*.jsonl"))
print(f" scanning {len(files)} session files...")
n_total = 0
for j, fp in enumerate(files):
pairs = []
last_user = None
try:
for L in fp.read_text(errors="replace").splitlines():
try: ev = json.loads(L)
except: continue
msg = ev.get("message", {})
role = msg.get("role")
c = msg.get("content", "")
if isinstance(c, list):
c = "\n".join(b.get("text","") for b in c if isinstance(b, dict) and b.get("type")=="text")
if not isinstance(c, str): c = str(c)
if role == "user":
last_user = c
elif role == "assistant" and last_user:
if 50 < len(last_user) < 4000 and 100 < len(c) < 8000:
if any(re.search(p, last_user, re.I) for p in FEATURE_PATTERNS):
pairs.append((last_user, c))
last_user = None
except Exception:
continue
if pairs:
out_md = OUT / (fp.parent.name + "__" + fp.stem + ".md")
with out_md.open("w") as f:
f.write(f"# Session {fp.stem}\n\n")
for u, a in pairs[:30]:
f.write(f"## User\n{u[:3000]}\n\n## Assistant\n{a[:6000]}\n\n---\n\n")
n_total += len(pairs)
if (j+1) % 100 == 0:
print(f" [{j+1}/{len(files)}] {n_total} pairs extracted")
print(f" total: {n_total} feature-build pairs from {len(files)} sessions")
PYEOF
# Mirror conversations into feature-builds (same content, different "kind" tag in distiller)
cp -r "$BUNDLE/conversations"/* "$BUNDLE/feature-builds/" 2>/dev/null || true
# Tar + push
log ""
log "── tar + push to HF ──"
TARBALL="$WORK/bundle.tar.gz"
( cd "$WORK" && tar -czf bundle.tar.gz bundle/ )
SIZE=$(du -sh "$TARBALL" | awk '{print $1}')
log " bundle: $TARBALL ($SIZE)"
# Prefer HF_TOKEN_PRO_WRITE — has write scope + dedicated rate-limit pool.
# HF_TOKEN often hits 2500 req/5min ceiling from research agents.
HF_USE_TOKEN="${HF_TOKEN_PRO_WRITE:-${HF_TOKEN_PRO:-${HF_TOKEN:-}}}"
if [[ -z "$HF_USE_TOKEN" ]]; then
log " ✗ no HF token set in ~/.hermes/.env"
exit 1
fi
export HF_USE_TOKEN
python3 - <<PYEOF
import os
from huggingface_hub import HfApi, create_repo
api = HfApi(token=os.environ["HF_USE_TOKEN"])
repo = "axentx/surrogate-1-v10-source-bundle"
try: create_repo(repo, repo_type="dataset", exist_ok=True, private=False)
except Exception as e: print(f" create_repo: {e}")
api.upload_file(path_or_fileobj="$TARBALL", path_in_repo="bundle.tar.gz",
repo_id=repo, repo_type="dataset",
commit_message="V14 source bundle — owner artifacts + conversation feature-pairs")
print(f" ✓ pushed → https://huggingface.co/datasets/{repo}")
PYEOF
log ""
log "═══ done — bundle on HF, ready for V14 Kaggle kernel ═══"
log "Next: upload ~/Desktop/surrogate-1-train-v14-unified.py to Kaggle UI Replace File → Save Version"