# Voice Cloner - Docker image for Render (CPU) FROM python:3.12-slim WORKDIR /app # Install system deps: espeak-ng for phonemizer, libsndfile for soundfile RUN apt-get update && apt-get install -y --no-install-recommends \ espeak-ng \ libespeak-ng-dev \ libsndfile1 \ && rm -rf /var/lib/apt/lists/* # Point phonemizer at espeak-ng (Debian/Ubuntu path) ENV PHONEMIZER_ESPEAK_LIBRARY=/usr/lib/x86_64-linux-gnu/libespeak-ng.so # Hugging Face cache (writable at runtime) ENV HF_HOME=/app/.hf_cache ENV HUGGINGFACE_HUB_CACHE=/app/.hf_cache/hub ENV HF_HUB_DISABLE_SYMLINKS_WARNING=1 ENV PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True # No GPU on Render ENV CUDA_VISIBLE_DEVICES="" # So Render logs show startup output immediately ENV PYTHONUNBUFFERED=1 # Install CPU-only torch stack first, then project deps. # This avoids CUDA-linked wheels that fail with libcudart.so.* on CPU Spaces. COPY requirements.txt . RUN pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu \ torch==2.8.0 torchaudio==2.8.0 \ && pip install --no-cache-dir -r requirements.txt COPY . . # Create dirs the app expects RUN mkdir -p samples .hf_cache/hub # Render sets PORT; app reads it and binds to 0.0.0.0 EXPOSE 7860 CMD ["python", "app.py"]