vimalk78 commited on
Commit
4a0fccf
·
1 Parent(s): 00980d4

fix: add CUDA warmup and memory config for Jetson GPU support

Browse files

- Add CUDA context warmup before heavy model load to avoid allocator crash
- Set PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True for unified memory
- Make model configurable via THEMATIC_MODEL_NAME env var
- Document smaller model option (all-MiniLM-L6-v2) as fallback

Dockerfile.jetson CHANGED
@@ -59,5 +59,10 @@ ENV CACHE_DIR=/app/backend-py/cache
59
  ENV NLTK_DATA=/app/backend-py/cache/nltk_data
60
  ENV VOCAB_SOURCE=norvig
61
  ENV NORVIG_VOCAB_PATH=/app/backend-py/words/norvig/count_1w100k.txt
 
 
 
 
 
62
 
63
  CMD ["python3", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
 
59
  ENV NLTK_DATA=/app/backend-py/cache/nltk_data
60
  ENV VOCAB_SOURCE=norvig
61
  ENV NORVIG_VOCAB_PATH=/app/backend-py/words/norvig/count_1w100k.txt
62
+ # CUDA memory allocation config for Jetson unified memory
63
+ ENV PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
64
+ # Model: all-mpnet-base-v2 (420MB, best quality) or all-MiniLM-L6-v2 (90MB, faster)
65
+ # Set THEMATIC_MODEL_NAME=all-MiniLM-L6-v2 if you encounter GPU memory issues
66
+ ENV THEMATIC_MODEL_NAME=all-mpnet-base-v2
67
 
68
  CMD ["python3", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
crossword-app/backend-py/src/services/thematic_word_service.py CHANGED
@@ -470,6 +470,14 @@ class ThematicWordService:
470
  logger.info(f"🖥️ Using device: {device}")
471
  self.device = device # Store device for later use
472
 
 
 
 
 
 
 
 
 
473
  # Load model on CPU first, then move to target device
474
  # This works around CUDA initialization issues on Jetson unified memory
475
  logger.info(f"📥 Loading model on CPU first...")
 
470
  logger.info(f"🖥️ Using device: {device}")
471
  self.device = device # Store device for later use
472
 
473
+ # CUDA warmup for Jetson - initialize CUDA context before heavy model load
474
+ if device == 'cuda':
475
+ logger.info(f"🔥 CUDA warmup - initializing context...")
476
+ warmup_tensor = torch.zeros(1, device='cuda')
477
+ del warmup_tensor
478
+ torch.cuda.empty_cache()
479
+ logger.info(f"✅ CUDA warmup complete")
480
+
481
  # Load model on CPU first, then move to target device
482
  # This works around CUDA initialization issues on Jetson unified memory
483
  logger.info(f"📥 Loading model on CPU first...")
run-jetson.sh CHANGED
@@ -14,15 +14,28 @@ show_usage() {
14
  echo " shell - Run with bash shell for debugging"
15
  echo " test - Test GPU access in container"
16
  echo ""
 
 
 
 
 
 
 
17
  }
18
 
19
  IMAGE_NAME="crossword-app:jetson"
20
 
 
 
 
 
 
21
  # GPU access for Jetson requires --runtime nvidia (not --gpus all)
22
  DOCKER_ARGS="--rm -p 7860:7860 --runtime nvidia \
23
  -e ENABLE_DEBUG_TAB=true \
24
  -e VOCAB_SOURCE=norvig \
25
- -e DIFFICULTY_WEIGHT=0.2"
 
26
 
27
  build_image() {
28
  echo "🔨 Building Jetson Docker image..."
 
14
  echo " shell - Run with bash shell for debugging"
15
  echo " test - Test GPU access in container"
16
  echo ""
17
+ echo "Environment variables:"
18
+ echo " THEMATIC_MODEL_NAME - Model to use (default: all-mpnet-base-v2)"
19
+ echo " Use all-MiniLM-L6-v2 for lower GPU memory usage"
20
+ echo ""
21
+ echo "Example with smaller model:"
22
+ echo " THEMATIC_MODEL_NAME=all-MiniLM-L6-v2 $0 run"
23
+ echo ""
24
  }
25
 
26
  IMAGE_NAME="crossword-app:jetson"
27
 
28
+ # Model options:
29
+ # all-mpnet-base-v2 (420MB, best quality, default)
30
+ # all-MiniLM-L6-v2 (90MB, faster, use if GPU memory issues)
31
+ MODEL_NAME="${THEMATIC_MODEL_NAME:-all-mpnet-base-v2}"
32
+
33
  # GPU access for Jetson requires --runtime nvidia (not --gpus all)
34
  DOCKER_ARGS="--rm -p 7860:7860 --runtime nvidia \
35
  -e ENABLE_DEBUG_TAB=true \
36
  -e VOCAB_SOURCE=norvig \
37
+ -e DIFFICULTY_WEIGHT=0.2 \
38
+ -e THEMATIC_MODEL_NAME=$MODEL_NAME"
39
 
40
  build_image() {
41
  echo "🔨 Building Jetson Docker image..."