Spaces:
Paused
Paused
siddeshwar-kagatikar commited on
Commit ·
8828fdd
1
Parent(s): 274f638
Fix training rc=127 by using python -m fallback and tee logs to stdout
Browse files- scripts/space_start.sh +24 -7
scripts/space_start.sh
CHANGED
|
@@ -50,6 +50,19 @@ _stop_children() {
|
|
| 50 |
# (otherwise a crashed training run would tear down uvicorn too).
|
| 51 |
trap '_stop_children; exit 0' INT TERM
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
_run_training_supervised() {
|
| 54 |
if [ -n "${TRAIN_OUTPUT_DIR}" ]; then
|
| 55 |
OUTPUT_ARG="--train-output-dir ${TRAIN_OUTPUT_DIR}"
|
|
@@ -57,19 +70,23 @@ _run_training_supervised() {
|
|
| 57 |
OUTPUT_ARG=""
|
| 58 |
fi
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
if _is_true "$DRY_RUN_FLAG"; then
|
| 61 |
-
echo "[space_start] Running self-play in dry-run mode (logs: ${TRAIN_LOG_PATH})."
|
| 62 |
# shellcheck disable=SC2086
|
| 63 |
-
|
| 64 |
-
> "${TRAIN_LOG_PATH}"
|
| 65 |
else
|
| 66 |
-
echo "[space_start] Running self-play training in background (logs: ${TRAIN_LOG_PATH})."
|
| 67 |
# shellcheck disable=SC2086
|
| 68 |
-
|
| 69 |
-
> "${TRAIN_LOG_PATH}"
|
| 70 |
fi
|
| 71 |
TRAIN_PID=$!
|
| 72 |
-
echo "[space_start] training pid=${TRAIN_PID}"
|
| 73 |
|
| 74 |
# Watcher subshell: if training exits with non-zero status, log the
|
| 75 |
# failure but do NOT propagate it to the parent script. Uvicorn must
|
|
|
|
| 50 |
# (otherwise a crashed training run would tear down uvicorn too).
|
| 51 |
trap '_stop_children; exit 0' INT TERM
|
| 52 |
|
| 53 |
+
_resolve_train_cmd() {
|
| 54 |
+
# Prefer the installed console script when it is on PATH, otherwise
|
| 55 |
+
# fall back to `python -m osint_env.cli`. The fallback avoids rc=127
|
| 56 |
+
# ("command not found") in case the user-site bin dir is missing
|
| 57 |
+
# from PATH for some reason.
|
| 58 |
+
if command -v osint-env >/dev/null 2>&1; then
|
| 59 |
+
TRAIN_CMD="osint-env"
|
| 60 |
+
else
|
| 61 |
+
echo "[space_start] 'osint-env' not on PATH; falling back to 'python -m osint_env.cli'."
|
| 62 |
+
TRAIN_CMD="python -m osint_env.cli"
|
| 63 |
+
fi
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
_run_training_supervised() {
|
| 67 |
if [ -n "${TRAIN_OUTPUT_DIR}" ]; then
|
| 68 |
OUTPUT_ARG="--train-output-dir ${TRAIN_OUTPUT_DIR}"
|
|
|
|
| 70 |
OUTPUT_ARG=""
|
| 71 |
fi
|
| 72 |
|
| 73 |
+
_resolve_train_cmd
|
| 74 |
+
|
| 75 |
+
: > "${TRAIN_LOG_PATH}" || true
|
| 76 |
+
|
| 77 |
if _is_true "$DRY_RUN_FLAG"; then
|
| 78 |
+
echo "[space_start] Running self-play in dry-run mode (logs: ${TRAIN_LOG_PATH}; mirrored to stdout)."
|
| 79 |
# shellcheck disable=SC2086
|
| 80 |
+
${TRAIN_CMD} train-self-play --config "${ENV_CONFIG_PATH}" --train-config "${TRAIN_CONFIG_PATH}" ${OUTPUT_ARG} --dry-run \
|
| 81 |
+
2>&1 | tee -a "${TRAIN_LOG_PATH}" &
|
| 82 |
else
|
| 83 |
+
echo "[space_start] Running self-play training in background (logs: ${TRAIN_LOG_PATH}; mirrored to stdout)."
|
| 84 |
# shellcheck disable=SC2086
|
| 85 |
+
${TRAIN_CMD} train-self-play --config "${ENV_CONFIG_PATH}" --train-config "${TRAIN_CONFIG_PATH}" ${OUTPUT_ARG} \
|
| 86 |
+
2>&1 | tee -a "${TRAIN_LOG_PATH}" &
|
| 87 |
fi
|
| 88 |
TRAIN_PID=$!
|
| 89 |
+
echo "[space_start] training pid=${TRAIN_PID} (cmd: ${TRAIN_CMD} train-self-play)"
|
| 90 |
|
| 91 |
# Watcher subshell: if training exits with non-zero status, log the
|
| 92 |
# failure but do NOT propagate it to the parent script. Uvicorn must
|