Spaces:

nothex
/

morpheus-rag

Running

nothex commited on 9 days ago

Commit

b2e76a8

1 Parent(s): ffa9f99

fix: avoid hugging face build-time model download

Files changed (6) hide show

.env.example CHANGED Viewed

@@ -41,6 +41,9 @@ SUPABASE_JWT_SECRET=****
 COHERE_API_KEY=****
 HF_HUB_DISABLE_SYMLINKS_WARNING=1
 # Hugging Face
 HF_TOKEN=****

 COHERE_API_KEY=****
 HF_HUB_DISABLE_SYMLINKS_WARNING=1
+HF_HUB_DISABLE_XET=1
+HF_HUB_ETAG_TIMEOUT=10
+HF_HUB_DOWNLOAD_TIMEOUT=60
 # Hugging Face
 HF_TOKEN=****

ARCHITECTURE.md CHANGED Viewed

@@ -645,6 +645,7 @@ Login
 | `DOCS_ENABLED` | Enable /docs and /redoc (set `false` in prod) | No |
 | `LOG_LEVEL` | Logging verbosity (`INFO` or `DEBUG`) | No |
 | `AUTO_START_CELERY` | Auto-spawn Celery subprocess on startup | No |
 ---

 | `DOCS_ENABLED` | Enable /docs and /redoc (set `false` in prod) | No |
 | `LOG_LEVEL` | Logging verbosity (`INFO` or `DEBUG`) | No |
 | `AUTO_START_CELERY` | Auto-spawn Celery subprocess on startup | No |
+| `HF_HUB_DISABLE_XET` | Disable Xet-backed model downloads during build/runtime | No |
 ---

Dockerfile CHANGED Viewed

@@ -16,6 +16,9 @@ RUN apt-get update && apt-get install -y \
 RUN useradd -m -u 1000 user
 USER user
 ENV PATH="/home/user/.local/bin:$PATH"
 # 4. Set the working directory
 WORKDIR /app
@@ -27,9 +30,10 @@ RUN pip install --no-cache-dir -r requirements.txt
 # 6. Copy the rest of your project files
 COPY --chown=user:user . .
-# 7. Pre-build ML assets (downloads models to cache, trains intent classifier)
-ARG PREBUILD_ML_ASSETS=1
-ARG MORPHEUS_BUILD_ASSETS_MODE=light
 RUN if [ "$PREBUILD_ML_ASSETS" = "1" ]; then \
   MORPHEUS_BUILD_ASSETS_MODE=$MORPHEUS_BUILD_ASSETS_MODE python -m backend.core.build_ml_assets ; \
   else \
@@ -38,4 +42,4 @@ RUN if [ "$PREBUILD_ML_ASSETS" = "1" ]; then \
 # 8. Start FastAPI (7860 is the HF standard, but Railway uses $PORT)
 ENV PORT=7860
-CMD ["sh", "-c", "uvicorn backend.main:app --host 0.0.0.0 --port ${PORT}"]

 RUN useradd -m -u 1000 user
 USER user
 ENV PATH="/home/user/.local/bin:$PATH"
+ENV HF_HUB_DISABLE_XET=1 \
+  HF_HUB_ETAG_TIMEOUT=10 \
+  HF_HUB_DOWNLOAD_TIMEOUT=60
 # 4. Set the working directory
 WORKDIR /app
 # 6. Copy the rest of your project files
 COPY --chown=user:user . .
+# 7. Optional ML asset pre-build. Disabled by default so HF builds do not hang
+# on unauthenticated model downloads; runtime code falls back safely if needed.
+ARG PREBUILD_ML_ASSETS=0
+ARG MORPHEUS_BUILD_ASSETS_MODE=skip
 RUN if [ "$PREBUILD_ML_ASSETS" = "1" ]; then \
   MORPHEUS_BUILD_ASSETS_MODE=$MORPHEUS_BUILD_ASSETS_MODE python -m backend.core.build_ml_assets ; \
   else \
 # 8. Start FastAPI (7860 is the HF standard, but Railway uses $PORT)
 ENV PORT=7860
+CMD ["sh", "-c", "uvicorn backend.main:app --host 0.0.0.0 --port ${PORT}"]

backend/core/build_ml_assets.py CHANGED Viewed

@@ -25,10 +25,17 @@ def build_assets():
     # Keep train/upload logic local-only in that case.
     os.environ.setdefault("SUPABASE_URL", "")
     os.environ.setdefault("SUPABASE_SERVICE_KEY", "")
     mode = os.getenv("MORPHEUS_BUILD_ASSETS_MODE", "light").strip().lower()
     log.info("Build asset mode: %s", mode)
     # 1. Optional pre-download sentence-transformers (used by Intent Classifier)
     if mode in {"light", "full"}:
         log.info("Downloading all-MiniLM-L6-v2 embedding model...")

     # Keep train/upload logic local-only in that case.
     os.environ.setdefault("SUPABASE_URL", "")
     os.environ.setdefault("SUPABASE_SERVICE_KEY", "")
+    os.environ.setdefault("HF_HUB_DISABLE_XET", "1")
+    os.environ.setdefault("HF_HUB_ETAG_TIMEOUT", "10")
+    os.environ.setdefault("HF_HUB_DOWNLOAD_TIMEOUT", "60")
     mode = os.getenv("MORPHEUS_BUILD_ASSETS_MODE", "light").strip().lower()
     log.info("Build asset mode: %s", mode)
+    if mode in {"skip", "none", "off"}:
+        log.info("Skipping ML asset pre-build.")
+        return
     # 1. Optional pre-download sentence-transformers (used by Intent Classifier)
     if mode in {"light", "full"}:
         log.info("Downloading all-MiniLM-L6-v2 embedding model...")

backend/core/pipeline.py CHANGED Viewed

@@ -3976,7 +3976,25 @@ def analyse_intent(
             "intent_model_version": getattr(intent_classifier, "status", lambda: {})().get("model_version"),
         }
-    result = intent_classifier.predict(query, has_category, has_history)
     needs_clar = result["needs_clarification"]
     confidence = result["confidence"]
     model_version = result.get("model_version")

             "intent_model_version": getattr(intent_classifier, "status", lambda: {})().get("model_version"),
         }
+    try:
+        result = intent_classifier.predict(query, has_category, has_history)
+    except Exception as exc:
+        log.warning(
+            "Intent classifier prediction unavailable; proceeding without clarification: %s",
+            exc,
+        )
+        return {
+            "is_clear": True,
+            "enriched_query": query,
+            "clarification_question": None,
+            "route_class": route_class,
+            "route_reason": route_decision.route_reason,
+            "intent_needs_clarification": False,
+            "intent_confidence": None,
+            "intent_model_version": getattr(
+                intent_classifier, "status", lambda: {}
+            )().get("model_version"),
+        }
     needs_clar = result["needs_clarification"]
     confidence = result["confidence"]
     model_version = result.get("model_version")

requirements.txt CHANGED Viewed

@@ -11,6 +11,9 @@ unstructured[pdf]
 pdf2image
 pdfminer.six
 numpy==1.26.4
 unstructured[paddlepaddle]
 paddleocr==2.7.3
 paddlepaddle==2.6.2

 pdf2image
 pdfminer.six
 numpy==1.26.4
+--extra-index-url https://download.pytorch.org/whl/cpu
+torch==2.5.1+cpu
+torchvision==0.20.1+cpu
 unstructured[paddlepaddle]
 paddleocr==2.7.3
 paddlepaddle==2.6.2