File size: 3,028 Bytes

b2c2640

#!/usr/bin/env bash
# MiniCPM-o 4.5 evaluation environment setup.
#
# Creates a separate conda env 'minicpmo' because MiniCPM-o has its own
# dependency stack (librosa, decord, sentencepiece pin, etc.) that may conflict
# with the Qwen3-Omni 'video' env. Safer to keep them isolated.
#
# Usage:
#   bash setup_env.sh
#
set -euo pipefail

CONDA_ENV="${CONDA_ENV:-minicpmo}"
PYTHON_VER="${PYTHON_VER:-3.12}"
INSTALL_DIR="${INSTALL_DIR:-${HOME}/anaconda3}"

log() { echo "[setup_env] $*"; }

log "Bootstrapping conda..."
if ! command -v conda &>/dev/null; then
  if [[ -f "${INSTALL_DIR}/etc/profile.d/conda.sh" ]]; then
    source "${INSTALL_DIR}/etc/profile.d/conda.sh"
  else
    echo "Error: conda not found. Install Anaconda first (see CleverHans-Evaluation/setup_env.sh)."
    exit 1
  fi
fi
eval "$(conda shell.bash hook)"

log "Creating conda env '${CONDA_ENV}' (python=${PYTHON_VER})..."
if conda env list | awk '{print $1}' | grep -Fxq "${CONDA_ENV}"; then
  log "Env '${CONDA_ENV}' already exists; activating."
  conda activate "${CONDA_ENV}"
else
  conda create -n "${CONDA_ENV}" "python=${PYTHON_VER}" -y
  conda activate "${CONDA_ENV}"
fi

log "Installing FFmpeg 6 (for audio/video decoding)..."
conda install -y -c conda-forge 'ffmpeg>=6,<7' || log "Warning: conda-forge ffmpeg failed."

log "Installing PyTorch 2.6 (MiniCPM-o stable target; newer torch may work)..."
pip install --upgrade pip
pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 \
  --index-url https://download.pytorch.org/whl/cu124

log "Installing MiniCPM-o core dependencies..."
# MiniCPM-o 4.5 uses Qwen3Config (needs transformers >=4.52).
pip install 'transformers>=4.52,<4.58' accelerate==0.33.0
pip install Pillow==10.4.0
pip install sentencepiece==0.2.0
pip install decord==0.6.0 librosa==0.10.2 soundfile==0.12.1 moviepy==1.0.3
pip install vocos==0.1.0
pip install huggingface_hub==0.26.5
pip install einops==0.8.0
pip install tqdm openai

# CleverHans-Evaluation loaders used by MiniCPM-o eval scripts (imported via _common.ch):
#   - eval_worldsense.py  → pandas + pyarrow (parquet)
#   - eval_videomme.py    → datasets (lmms-lab/Video-MME)
#   - eval_lvbench.py     → datasets (lmms-lab/LVBench)
log "Installing eval data-loader deps (datasets, pandas, pyarrow)..."
pip install datasets pandas pyarrow

# MiniCPM-o 4.5 custom modeling file imports 'minicpmo' (PyPI package) for TTS utils.
# The package drags in cosyvoice + stepaudio2 which need these downstream deps.
pip install minicpmo==0.1.2
pip install onnx onnxruntime hyperpyyaml diffusers

log "Patching MiniCPM-o modeling file for transformers>=4.52 compatibility..."
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
python "${SCRIPT_DIR}/scripts/patch_minicpmo.py" || log "Warning: patch_minicpmo.py failed (non-fatal; see errors above)."

log "Done."
echo ""
echo "  Active env:     ${CONDA_ENV}"
echo "  Python:         $(command -v python)"
echo ""
echo "Next: conda activate ${CONDA_ENV}"
echo "      Then try: python scripts/test_minicpmo.py"