Spaces:
Runtime error
Runtime error
fix(audit-2026-04-29): throttle gh-actions-ticker 60s→600s + harden /data symlink recreation
Browse filesAudit findings:
1. gh-actions-ticker @ 60s burst piled 200 jobs vs free-tier 20-concurrent
cap → 17h queue depth, 0 completions. Throttle 600s lets queue drain.
2. /data/logs daemon writes failing Errno 5 I/O (stale symlink). Defensively
mkdir + recreate symlink on each boot. Add marker probe to log fatal
when persistent mount truly broken.
- bin/gh-actions-ticker.sh +4 -3
- start.sh +12 -1
bin/gh-actions-ticker.sh
CHANGED
|
@@ -17,9 +17,10 @@ set -uo pipefail
|
|
| 17 |
LOG="$HOME/.surrogate/logs/gh-actions-ticker.log"
|
| 18 |
mkdir -p "$(dirname "$LOG")"
|
| 19 |
|
| 20 |
-
TICK_SEC="${GH_TICK_SEC:-
|
| 21 |
-
#
|
| 22 |
-
#
|
|
|
|
| 23 |
|
| 24 |
dispatch() {
|
| 25 |
local repo="$1"
|
|
|
|
| 17 |
LOG="$HOME/.surrogate/logs/gh-actions-ticker.log"
|
| 18 |
mkdir -p "$(dirname "$LOG")"
|
| 19 |
|
| 20 |
+
TICK_SEC="${GH_TICK_SEC:-600}" # Throttled 60s→600s (audit 2026-04-29):
|
| 21 |
+
# 60s burst piled 200 jobs vs free-tier 20-concurrent
|
| 22 |
+
# cap, 17h queue depth, zero completions. 600s lets
|
| 23 |
+
# the queue actually drain. Override via GH_TICK_SEC.
|
| 24 |
|
| 25 |
dispatch() {
|
| 26 |
local repo="$1"
|
start.sh
CHANGED
|
@@ -39,10 +39,21 @@ if [[ -d "$DATA" ]] && [[ -w "$DATA" ]]; then
|
|
| 39 |
target="${spec%%:*}"
|
| 40 |
link="${spec##*:}"
|
| 41 |
mkdir -p "$(dirname "$target")"
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
rm -rf "$target" 2>/dev/null
|
| 44 |
ln -sfn "$link" "$target"
|
| 45 |
fi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
done
|
| 47 |
|
| 48 |
# training-pairs.jsonl — single file persistence
|
|
|
|
| 39 |
target="${spec%%:*}"
|
| 40 |
link="${spec##*:}"
|
| 41 |
mkdir -p "$(dirname "$target")"
|
| 42 |
+
# Always ensure backing directory exists + writable. If the persistent
|
| 43 |
+
# /data mount becomes unavailable mid-run, daemon writes to symlinked
|
| 44 |
+
# path fail with Errno 5 I/O error (audit 2026-04-29). Recreating the
|
| 45 |
+
# link defensively each boot fixes stale-symlink cases.
|
| 46 |
+
mkdir -p "$link" 2>/dev/null || true
|
| 47 |
+
if [[ ! -L "$target" ]] || [[ ! -d "$target/" ]]; then
|
| 48 |
+
# Either not-a-symlink OR broken symlink (target unreachable)
|
| 49 |
rm -rf "$target" 2>/dev/null
|
| 50 |
ln -sfn "$link" "$target"
|
| 51 |
fi
|
| 52 |
+
# Final sanity probe — write a marker; if it fails, the persistent
|
| 53 |
+
# mount is broken regardless of the symlink, so log loudly.
|
| 54 |
+
if ! touch "$target/.boot-marker" 2>/dev/null; then
|
| 55 |
+
echo "[$(date +%H:%M:%S)] ⚠ FATAL: $target/ not writable — daemon log writes will Errno 5"
|
| 56 |
+
fi
|
| 57 |
done
|
| 58 |
|
| 59 |
# training-pairs.jsonl — single file persistence
|