helcig's picture
Add coding-50-nonuniform
e90f769 verified
"""Applied automatically in every Python process on PYTHONPATH.
vLLM spawns worker subprocesses that re-import modules; without this file
the monkey-patch would be missing in workers and weight loading would fail
for the heterogeneous per-layer expert counts.
"""
import os
if os.environ.get('VLLM_PRUNED_PATCH_DISABLE') != '1':
try:
import vllm_pruned_patch
vllm_pruned_patch.apply()
except Exception:
pass