Spaces:

vimalk78
/

abc123

Running

vimalk78 commited on Jan 16

Commit

4a0fccf

1 Parent(s): 00980d4

fix: add CUDA warmup and memory config for Jetson GPU support

- Add CUDA context warmup before heavy model load to avoid allocator crash
- Set PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True for unified memory
- Make model configurable via THEMATIC_MODEL_NAME env var
- Document smaller model option (all-MiniLM-L6-v2) as fallback

Files changed (3) hide show

Dockerfile.jetson +5 -0
crossword-app/backend-py/src/services/thematic_word_service.py +8 -0
run-jetson.sh +14 -1

Dockerfile.jetson CHANGED Viewed

@@ -59,5 +59,10 @@ ENV CACHE_DIR=/app/backend-py/cache
 ENV NLTK_DATA=/app/backend-py/cache/nltk_data
 ENV VOCAB_SOURCE=norvig
 ENV NORVIG_VOCAB_PATH=/app/backend-py/words/norvig/count_1w100k.txt
 CMD ["python3", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

 ENV NLTK_DATA=/app/backend-py/cache/nltk_data
 ENV VOCAB_SOURCE=norvig
 ENV NORVIG_VOCAB_PATH=/app/backend-py/words/norvig/count_1w100k.txt
+# CUDA memory allocation config for Jetson unified memory
+ENV PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+# Model: all-mpnet-base-v2 (420MB, best quality) or all-MiniLM-L6-v2 (90MB, faster)
+# Set THEMATIC_MODEL_NAME=all-MiniLM-L6-v2 if you encounter GPU memory issues
+ENV THEMATIC_MODEL_NAME=all-mpnet-base-v2
 CMD ["python3", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

crossword-app/backend-py/src/services/thematic_word_service.py CHANGED Viewed

@@ -470,6 +470,14 @@ class ThematicWordService:
             logger.info(f"🖥️ Using device: {device}")
             self.device = device  # Store device for later use
             # Load model on CPU first, then move to target device
             # This works around CUDA initialization issues on Jetson unified memory
             logger.info(f"📥 Loading model on CPU first...")

             logger.info(f"🖥️ Using device: {device}")
             self.device = device  # Store device for later use
+            # CUDA warmup for Jetson - initialize CUDA context before heavy model load
+            if device == 'cuda':
+                logger.info(f"🔥 CUDA warmup - initializing context...")
+                warmup_tensor = torch.zeros(1, device='cuda')
+                del warmup_tensor
+                torch.cuda.empty_cache()
+                logger.info(f"✅ CUDA warmup complete")
             # Load model on CPU first, then move to target device
             # This works around CUDA initialization issues on Jetson unified memory
             logger.info(f"📥 Loading model on CPU first...")

run-jetson.sh CHANGED Viewed

@@ -14,15 +14,28 @@ show_usage() {
     echo "  shell    - Run with bash shell for debugging"
     echo "  test     - Test GPU access in container"
     echo ""
 }
 IMAGE_NAME="crossword-app:jetson"
 # GPU access for Jetson requires --runtime nvidia (not --gpus all)
 DOCKER_ARGS="--rm -p 7860:7860 --runtime nvidia \
     -e ENABLE_DEBUG_TAB=true \
     -e VOCAB_SOURCE=norvig \
-    -e DIFFICULTY_WEIGHT=0.2"
 build_image() {
     echo "🔨 Building Jetson Docker image..."

     echo "  shell    - Run with bash shell for debugging"
     echo "  test     - Test GPU access in container"
     echo ""
+    echo "Environment variables:"
+    echo "  THEMATIC_MODEL_NAME  - Model to use (default: all-mpnet-base-v2)"
+    echo "                         Use all-MiniLM-L6-v2 for lower GPU memory usage"
+    echo ""
+    echo "Example with smaller model:"
+    echo "  THEMATIC_MODEL_NAME=all-MiniLM-L6-v2 $0 run"
+    echo ""
 }
 IMAGE_NAME="crossword-app:jetson"
+# Model options:
+#   all-mpnet-base-v2 (420MB, best quality, default)
+#   all-MiniLM-L6-v2 (90MB, faster, use if GPU memory issues)
+MODEL_NAME="${THEMATIC_MODEL_NAME:-all-mpnet-base-v2}"
 # GPU access for Jetson requires --runtime nvidia (not --gpus all)
 DOCKER_ARGS="--rm -p 7860:7860 --runtime nvidia \
     -e ENABLE_DEBUG_TAB=true \
     -e VOCAB_SOURCE=norvig \
+    -e DIFFICULTY_WEIGHT=0.2 \
+    -e THEMATIC_MODEL_NAME=$MODEL_NAME"
 build_image() {
     echo "🔨 Building Jetson Docker image..."