# /// script # requires-python = ">=3.12" # dependencies = [ # "accelerate>=1.13.0", # "flash-linear-attention>=0.4.2", # "hf-xet>=1.4.3", # "huggingface-hub>=1.8.0", # "onnx>=1.21.0", # "onnx-ir>=0.2.0", # "onnxruntime>=1.24.4", # "onnxruntime-genai>=0.13.1", # "optimum>=2.1.0", # "sentencepiece>=0.2.1", # "tiktoken>=0.12.0", # "torch>=2.11.0", # "transformers==5.7.0", # ] # /// import argparse from pathlib import Path from huggingface_hub import snapshot_download #from onnxruntime_genai.python.models.builder import create_model from onnxruntime_genai.models.builder import create_model def main(): parser = argparse.ArgumentParser() parser.add_argument("--name", required=False,default=None) parser.add_argument("--token",required=False) args = parser.parse_args() token = args.token if args.token else None pwd = Path.cwd() model_dir = pwd / "model" onnx_dir = pwd / "onnx" cache_dir = pwd / "cache" model_dir.mkdir(exist_ok=True) onnx_dir.mkdir(exist_ok=True) cache_dir.mkdir(exist_ok=True) # ===== STEP 1: DOWNLOAD (HF HUB + XET backend automatically used) ===== print(">> Downloading model via huggingface_hub (Xet enabled if installed)...") # local_path = snapshot_download( # repo_id=args.name, # local_dir=str(model_dir), # token=token # #local_dir_use_symlinks=False # important for ONNX tools # ) #print(f"Model downloaded to: {local_path}") # ===== STEP 2: CONVERT USING ONNX GENAI BUILDER ===== print(">> Converting to ONNX (GenAI format)...") create_model( model_name=args.name, input_path=str(model_dir), # HF model directory output_dir=str(onnx_dir), # ONNX output precision="fp16", # fp32 | fp16 | int8 | int4 (if supported) execution_provider="cpu", # cpu | cuda | dml cache_dir=str(cache_dir), # optional cache extra_options={} ) print("\n✅ Done") print(f"ONNX model at: {onnx_dir}") if __name__ == "__main__": main()