Spaces:
Runtime error
Runtime error
Ashira Pitchayapakayakul
fix(bundle): prefer HF_TOKEN_PRO_WRITE to dodge HF_TOKEN 2500-req/5min cap
f17fac0 | # Surrogate-1 V14 — Mac one-shot bundle + upload. | |
| # I/O ONLY (no LLM compute on Mac, allowed per ~/.claude/memory/feedback_train_into_surrogate.md). | |
| # | |
| # Bundles owner's 715+ knowledge artifacts + filtered conversation pairs into | |
| # a single tar.gz, uploads to axentx/surrogate-1-v10-source-bundle on HF. | |
| # Then the unified V14 Kaggle kernel pulls + distills + trains all in ONE run. | |
| # | |
| # Usage: | |
| # bash bin/v3/bundle-upload.sh | |
| set -uo pipefail | |
| [[ -f "$HOME/.hermes/.env" ]] && { set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a; } | |
| WORK="$HOME/.surrogate/state/v10-bundle" | |
| mkdir -p "$WORK" | |
| BUNDLE="$WORK/bundle" | |
| mkdir -p "$BUNDLE"/{vault,patterns,memory,skills,agents,arkship-decisions,axentx-decisions,conversations,feature-builds} | |
| log() { echo "[$(date '+%Y-%m-%dT%H:%M:%S')] $*"; } | |
| log "═══ V14 source bundle ═══" | |
| # 1. Obsidian Vault (knowledge + patterns) — file copy, ~14 MB | |
| log "── copy Vault knowledge + patterns ──" | |
| cp -r "$HOME/Documents/Obsidian Vault/AI-Hub/knowledge"/*.md "$BUNDLE/vault/" 2>/dev/null || true | |
| find "$HOME/Documents/Obsidian Vault/AI-Hub/knowledge" -type d -name "trends-2026" -exec cp -r {} "$BUNDLE/vault/" \; 2>/dev/null | |
| [[ -d "$HOME/Documents/Obsidian Vault/AI-Hub/patterns" ]] && cp -r "$HOME/Documents/Obsidian Vault/AI-Hub/patterns"/* "$BUNDLE/patterns/" 2>/dev/null || true | |
| log " vault: $(find "$BUNDLE/vault" -name "*.md" 2>/dev/null | wc -l | tr -d ' ') files" | |
| log " patterns: $(find "$BUNDLE/patterns" -name "*.md" 2>/dev/null | wc -l | tr -d ' ') files" | |
| # 2. .claude/memory — 27 files, 444 KB | |
| log "── copy .claude/memory ──" | |
| cp "$HOME/.claude/memory"/*.md "$BUNDLE/memory/" 2>/dev/null || true | |
| log " memory: $(ls "$BUNDLE/memory" 2>/dev/null | wc -l | tr -d ' ') files" | |
| # 3. SKILL.md mirror — anthropic + community + local | |
| log "── copy SKILL.md ──" | |
| i=0 | |
| { | |
| find "$HOME/Documents/Obsidian Vault/AI-Hub/skills" -name "SKILL.md" -type f 2>/dev/null | |
| find "$HOME/.claude/skills" -name "SKILL.md" -type f 2>/dev/null | |
| find "$HOME/.claude/plugins/cache" -name "SKILL.md" -type f 2>/dev/null | |
| } | while read -r f; do | |
| name="skill-$i-$(basename "$(dirname "$f")").md" | |
| cp "$f" "$BUNDLE/skills/$name" 2>/dev/null | |
| i=$((i+1)) | |
| done | |
| log " skills: $(ls "$BUNDLE/skills" 2>/dev/null | wc -l | tr -d ' ') files" | |
| # 4. Claude agents (user + plugin) | |
| log "── copy agents ──" | |
| cp "$HOME/.claude/agents"/*.md "$BUNDLE/agents/" 2>/dev/null || true | |
| i=0 | |
| find "$HOME/.claude/plugins" -path "*agents*" -name "*.md" -type f 2>/dev/null | while read -r f; do | |
| cp "$f" "$BUNDLE/agents/plugin-$i-$(basename "$f")" 2>/dev/null | |
| i=$((i+1)) | |
| done | |
| log " agents: $(ls "$BUNDLE/agents" 2>/dev/null | wc -l | tr -d ' ') files" | |
| # 5. arkship decisions | |
| log "── copy arkship decisions ──" | |
| cp "$HOME/axentx/arkship/decisions"/*.md "$BUNDLE/arkship-decisions/" 2>/dev/null || true | |
| log " arkship-decisions: $(ls "$BUNDLE/arkship-decisions" 2>/dev/null | wc -l | tr -d ' ') files" | |
| # 6. axentx project decisions (Costinel/Vanguard/etc) | |
| log "── copy axentx project decisions ──" | |
| i=0 | |
| find "$HOME/axentx" -maxdepth 4 -name "decisions" -type d 2>/dev/null | while read -r d; do | |
| proj="$(basename "$(dirname "$d")")" | |
| find "$d" -name "*.md" -type f 2>/dev/null | while read -r f; do | |
| cp "$f" "$BUNDLE/axentx-decisions/${proj}-$(basename "$f")" 2>/dev/null | |
| done | |
| done | |
| log " axentx-decisions: $(ls "$BUNDLE/axentx-decisions" 2>/dev/null | wc -l | tr -d ' ') files" | |
| # 7. Conversations — extract feature-build pairs ONLY (filter, lighter than 647MB raw) | |
| log "── extract feature-build pairs from 748 conversations ──" | |
| python3 - <<'PYEOF' | |
| import json, re, os | |
| from pathlib import Path | |
| PROJ = Path.home() / ".claude/projects" | |
| OUT = Path.home() / ".surrogate/state/v10-bundle/bundle/conversations" | |
| OUT.mkdir(parents=True, exist_ok=True) | |
| FEATURE_PATTERNS = [ | |
| r"\b(build|make|add|create|implement|wire|bake|train|ingest|distill)\s+\w+", | |
| r"(ทำ|สร้าง|เพิ่ม|implement|train|เทรน|fine[-_]?tune)\s+", | |
| r"(I want|I need|I'd like|ผมอยาก|อยาก|ต้องการ)", | |
| ] | |
| files = sorted(PROJ.rglob("*.jsonl")) | |
| print(f" scanning {len(files)} session files...") | |
| n_total = 0 | |
| for j, fp in enumerate(files): | |
| pairs = [] | |
| last_user = None | |
| try: | |
| for L in fp.read_text(errors="replace").splitlines(): | |
| try: ev = json.loads(L) | |
| except: continue | |
| msg = ev.get("message", {}) | |
| role = msg.get("role") | |
| c = msg.get("content", "") | |
| if isinstance(c, list): | |
| c = "\n".join(b.get("text","") for b in c if isinstance(b, dict) and b.get("type")=="text") | |
| if not isinstance(c, str): c = str(c) | |
| if role == "user": | |
| last_user = c | |
| elif role == "assistant" and last_user: | |
| if 50 < len(last_user) < 4000 and 100 < len(c) < 8000: | |
| if any(re.search(p, last_user, re.I) for p in FEATURE_PATTERNS): | |
| pairs.append((last_user, c)) | |
| last_user = None | |
| except Exception: | |
| continue | |
| if pairs: | |
| out_md = OUT / (fp.parent.name + "__" + fp.stem + ".md") | |
| with out_md.open("w") as f: | |
| f.write(f"# Session {fp.stem}\n\n") | |
| for u, a in pairs[:30]: | |
| f.write(f"## User\n{u[:3000]}\n\n## Assistant\n{a[:6000]}\n\n---\n\n") | |
| n_total += len(pairs) | |
| if (j+1) % 100 == 0: | |
| print(f" [{j+1}/{len(files)}] {n_total} pairs extracted") | |
| print(f" total: {n_total} feature-build pairs from {len(files)} sessions") | |
| PYEOF | |
| # Mirror conversations into feature-builds (same content, different "kind" tag in distiller) | |
| cp -r "$BUNDLE/conversations"/* "$BUNDLE/feature-builds/" 2>/dev/null || true | |
| # Tar + push | |
| log "" | |
| log "── tar + push to HF ──" | |
| TARBALL="$WORK/bundle.tar.gz" | |
| ( cd "$WORK" && tar -czf bundle.tar.gz bundle/ ) | |
| SIZE=$(du -sh "$TARBALL" | awk '{print $1}') | |
| log " bundle: $TARBALL ($SIZE)" | |
| # Prefer HF_TOKEN_PRO_WRITE — has write scope + dedicated rate-limit pool. | |
| # HF_TOKEN often hits 2500 req/5min ceiling from research agents. | |
| HF_USE_TOKEN="${HF_TOKEN_PRO_WRITE:-${HF_TOKEN_PRO:-${HF_TOKEN:-}}}" | |
| if [[ -z "$HF_USE_TOKEN" ]]; then | |
| log " ✗ no HF token set in ~/.hermes/.env" | |
| exit 1 | |
| fi | |
| export HF_USE_TOKEN | |
| python3 - <<PYEOF | |
| import os | |
| from huggingface_hub import HfApi, create_repo | |
| api = HfApi(token=os.environ["HF_USE_TOKEN"]) | |
| repo = "axentx/surrogate-1-v10-source-bundle" | |
| try: create_repo(repo, repo_type="dataset", exist_ok=True, private=False) | |
| except Exception as e: print(f" create_repo: {e}") | |
| api.upload_file(path_or_fileobj="$TARBALL", path_in_repo="bundle.tar.gz", | |
| repo_id=repo, repo_type="dataset", | |
| commit_message="V14 source bundle — owner artifacts + conversation feature-pairs") | |
| print(f" ✓ pushed → https://huggingface.co/datasets/{repo}") | |
| PYEOF | |
| log "" | |
| log "═══ done — bundle on HF, ready for V14 Kaggle kernel ═══" | |
| log "Next: upload ~/Desktop/surrogate-1-train-v14-unified.py to Kaggle UI Replace File → Save Version" | |