Spaces:

Imsachin010
/

salespath-env

Runtime error

Imsachin010 commited on 16 days ago

Commit

9e54e20

1 Parent(s): c783ce8

Fix HF Space crashing: Resolve port mismatch, fix CRLF line endings, and force Python unbuffered output

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -21,7 +21,7 @@ COPY salespath_env/ ./salespath_env/
 # Copy and set permissions for the training script
 COPY run_hf_training.sh ./run_hf_training.sh
-RUN chmod +x ./run_hf_training.sh
 # Health check
 HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \

 # Copy and set permissions for the training script
 COPY run_hf_training.sh ./run_hf_training.sh
+RUN sed -i 's/\r$//' ./run_hf_training.sh && chmod +x ./run_hf_training.sh
 # Health check
 HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \

run_hf_training.sh CHANGED Viewed

@@ -1,16 +1,17 @@
 #!/bin/bash
-# Start the environment server in the background
 echo "Starting SalesPath environment server..."
-uvicorn salespath_env.server.app:app --host 0.0.0.0 --port 8000 &
 # Give the server a few seconds to start up completely
 sleep 5
 # Start the GRPO Training using standard HuggingFace (PEFT)
 echo "Starting 7B GRPO Training..."
-PYTORCH_ALLOC_CONF=expandable_segments:True python -m training.grpo_train \
     --mode grpo \
     --model-name Qwen/Qwen2.5-7B-Instruct \
     --grpo-steps 150 \
     --grpo-dataset-size 128 \

 #!/bin/bash
+# Start the environment server in the background (HF Spaces default port 7860)
 echo "Starting SalesPath environment server..."
+uvicorn salespath_env.server.app:app --host 0.0.0.0 --port 7860 &
 # Give the server a few seconds to start up completely
 sleep 5
 # Start the GRPO Training using standard HuggingFace (PEFT)
 echo "Starting 7B GRPO Training..."
+PYTORCH_ALLOC_CONF=expandable_segments:True python -u -m training.grpo_train \
     --mode grpo \
+    --env-url http://127.0.0.1:7860 \
     --model-name Qwen/Qwen2.5-7B-Instruct \
     --grpo-steps 150 \
     --grpo-dataset-size 128 \