nothex commited on
Commit
b2e76a8
·
1 Parent(s): ffa9f99

fix: avoid hugging face build-time model download

Browse files
.env.example CHANGED
@@ -41,6 +41,9 @@ SUPABASE_JWT_SECRET=****
41
  COHERE_API_KEY=****
42
 
43
  HF_HUB_DISABLE_SYMLINKS_WARNING=1
 
 
 
44
  # Hugging Face
45
  HF_TOKEN=****
46
 
 
41
  COHERE_API_KEY=****
42
 
43
  HF_HUB_DISABLE_SYMLINKS_WARNING=1
44
+ HF_HUB_DISABLE_XET=1
45
+ HF_HUB_ETAG_TIMEOUT=10
46
+ HF_HUB_DOWNLOAD_TIMEOUT=60
47
  # Hugging Face
48
  HF_TOKEN=****
49
 
ARCHITECTURE.md CHANGED
@@ -645,6 +645,7 @@ Login
645
  | `DOCS_ENABLED` | Enable /docs and /redoc (set `false` in prod) | No |
646
  | `LOG_LEVEL` | Logging verbosity (`INFO` or `DEBUG`) | No |
647
  | `AUTO_START_CELERY` | Auto-spawn Celery subprocess on startup | No |
 
648
 
649
  ---
650
 
 
645
  | `DOCS_ENABLED` | Enable /docs and /redoc (set `false` in prod) | No |
646
  | `LOG_LEVEL` | Logging verbosity (`INFO` or `DEBUG`) | No |
647
  | `AUTO_START_CELERY` | Auto-spawn Celery subprocess on startup | No |
648
+ | `HF_HUB_DISABLE_XET` | Disable Xet-backed model downloads during build/runtime | No |
649
 
650
  ---
651
 
Dockerfile CHANGED
@@ -16,6 +16,9 @@ RUN apt-get update && apt-get install -y \
16
  RUN useradd -m -u 1000 user
17
  USER user
18
  ENV PATH="/home/user/.local/bin:$PATH"
 
 
 
19
 
20
  # 4. Set the working directory
21
  WORKDIR /app
@@ -27,9 +30,10 @@ RUN pip install --no-cache-dir -r requirements.txt
27
  # 6. Copy the rest of your project files
28
  COPY --chown=user:user . .
29
 
30
- # 7. Pre-build ML assets (downloads models to cache, trains intent classifier)
31
- ARG PREBUILD_ML_ASSETS=1
32
- ARG MORPHEUS_BUILD_ASSETS_MODE=light
 
33
  RUN if [ "$PREBUILD_ML_ASSETS" = "1" ]; then \
34
  MORPHEUS_BUILD_ASSETS_MODE=$MORPHEUS_BUILD_ASSETS_MODE python -m backend.core.build_ml_assets ; \
35
  else \
@@ -38,4 +42,4 @@ RUN if [ "$PREBUILD_ML_ASSETS" = "1" ]; then \
38
 
39
  # 8. Start FastAPI (7860 is the HF standard, but Railway uses $PORT)
40
  ENV PORT=7860
41
- CMD ["sh", "-c", "uvicorn backend.main:app --host 0.0.0.0 --port ${PORT}"]
 
16
  RUN useradd -m -u 1000 user
17
  USER user
18
  ENV PATH="/home/user/.local/bin:$PATH"
19
+ ENV HF_HUB_DISABLE_XET=1 \
20
+ HF_HUB_ETAG_TIMEOUT=10 \
21
+ HF_HUB_DOWNLOAD_TIMEOUT=60
22
 
23
  # 4. Set the working directory
24
  WORKDIR /app
 
30
  # 6. Copy the rest of your project files
31
  COPY --chown=user:user . .
32
 
33
+ # 7. Optional ML asset pre-build. Disabled by default so HF builds do not hang
34
+ # on unauthenticated model downloads; runtime code falls back safely if needed.
35
+ ARG PREBUILD_ML_ASSETS=0
36
+ ARG MORPHEUS_BUILD_ASSETS_MODE=skip
37
  RUN if [ "$PREBUILD_ML_ASSETS" = "1" ]; then \
38
  MORPHEUS_BUILD_ASSETS_MODE=$MORPHEUS_BUILD_ASSETS_MODE python -m backend.core.build_ml_assets ; \
39
  else \
 
42
 
43
  # 8. Start FastAPI (7860 is the HF standard, but Railway uses $PORT)
44
  ENV PORT=7860
45
+ CMD ["sh", "-c", "uvicorn backend.main:app --host 0.0.0.0 --port ${PORT}"]
backend/core/build_ml_assets.py CHANGED
@@ -25,10 +25,17 @@ def build_assets():
25
  # Keep train/upload logic local-only in that case.
26
  os.environ.setdefault("SUPABASE_URL", "")
27
  os.environ.setdefault("SUPABASE_SERVICE_KEY", "")
 
 
 
28
 
29
  mode = os.getenv("MORPHEUS_BUILD_ASSETS_MODE", "light").strip().lower()
30
  log.info("Build asset mode: %s", mode)
31
 
 
 
 
 
32
  # 1. Optional pre-download sentence-transformers (used by Intent Classifier)
33
  if mode in {"light", "full"}:
34
  log.info("Downloading all-MiniLM-L6-v2 embedding model...")
 
25
  # Keep train/upload logic local-only in that case.
26
  os.environ.setdefault("SUPABASE_URL", "")
27
  os.environ.setdefault("SUPABASE_SERVICE_KEY", "")
28
+ os.environ.setdefault("HF_HUB_DISABLE_XET", "1")
29
+ os.environ.setdefault("HF_HUB_ETAG_TIMEOUT", "10")
30
+ os.environ.setdefault("HF_HUB_DOWNLOAD_TIMEOUT", "60")
31
 
32
  mode = os.getenv("MORPHEUS_BUILD_ASSETS_MODE", "light").strip().lower()
33
  log.info("Build asset mode: %s", mode)
34
 
35
+ if mode in {"skip", "none", "off"}:
36
+ log.info("Skipping ML asset pre-build.")
37
+ return
38
+
39
  # 1. Optional pre-download sentence-transformers (used by Intent Classifier)
40
  if mode in {"light", "full"}:
41
  log.info("Downloading all-MiniLM-L6-v2 embedding model...")
backend/core/pipeline.py CHANGED
@@ -3976,7 +3976,25 @@ def analyse_intent(
3976
  "intent_model_version": getattr(intent_classifier, "status", lambda: {})().get("model_version"),
3977
  }
3978
 
3979
- result = intent_classifier.predict(query, has_category, has_history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3980
  needs_clar = result["needs_clarification"]
3981
  confidence = result["confidence"]
3982
  model_version = result.get("model_version")
 
3976
  "intent_model_version": getattr(intent_classifier, "status", lambda: {})().get("model_version"),
3977
  }
3978
 
3979
+ try:
3980
+ result = intent_classifier.predict(query, has_category, has_history)
3981
+ except Exception as exc:
3982
+ log.warning(
3983
+ "Intent classifier prediction unavailable; proceeding without clarification: %s",
3984
+ exc,
3985
+ )
3986
+ return {
3987
+ "is_clear": True,
3988
+ "enriched_query": query,
3989
+ "clarification_question": None,
3990
+ "route_class": route_class,
3991
+ "route_reason": route_decision.route_reason,
3992
+ "intent_needs_clarification": False,
3993
+ "intent_confidence": None,
3994
+ "intent_model_version": getattr(
3995
+ intent_classifier, "status", lambda: {}
3996
+ )().get("model_version"),
3997
+ }
3998
  needs_clar = result["needs_clarification"]
3999
  confidence = result["confidence"]
4000
  model_version = result.get("model_version")
requirements.txt CHANGED
@@ -11,6 +11,9 @@ unstructured[pdf]
11
  pdf2image
12
  pdfminer.six
13
  numpy==1.26.4
 
 
 
14
  unstructured[paddlepaddle]
15
  paddleocr==2.7.3
16
  paddlepaddle==2.6.2
 
11
  pdf2image
12
  pdfminer.six
13
  numpy==1.26.4
14
+ --extra-index-url https://download.pytorch.org/whl/cpu
15
+ torch==2.5.1+cpu
16
+ torchvision==0.20.1+cpu
17
  unstructured[paddlepaddle]
18
  paddleocr==2.7.3
19
  paddlepaddle==2.6.2