How

  • Export via the following command
python examples/qualcomm/oss_scripts/llama/llama.py \
    -b build-android \
    -m SM8550 \
    --compile_only \
    --decoder_model qwen3-0_6b \
    --prompt "dummy" \
    --model_mode hybrid \
    --max_seq_len 1024 \
    --prefill_ar_len 128 \
    --temperature 0 \
    --dtype-override fp32 \
    --range_setting minmax \
    --artifact ./qwen3_06b_sm8550_hybrid

Run

  • Can use the following command
export DEVICE_DIR=/data/local/tmp/executorch_qualcomm_tutorial/
adb shell "mkdir -p ${DEVICE_DIR}"

adb push ${EXECUTORCH_ROOT}/build-android/examples/qualcomm/oss_scripts/llama/qnn_llama_runner ${DEVICE_DIR}
adb push ${EXECUTORCH_ROOT}/build-android/lib/libqnn_executorch_backend.so ${DEVICE_DIR}

adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtp.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnSystem.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV69Stub.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV73Stub.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV75Stub.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV79Stub.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/hexagon-v69/unsigned/libQnnHtpV69Skel.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/hexagon-v73/unsigned/libQnnHtpV73Skel.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/hexagon-v79/unsigned/libQnnHtpV79Skel.so ${DEVICE_DIR}

adb push hybrid_llama_qnn.pte ${DEVICE_DIR}
adb push tokenizer.json ${DEVICE_DIR}

adb shell "cd ${DEVICE_DIR} && ./qnn_llama_runner --decoder_model_version qwen3 --tokenizer_path tokenizer.json --model_path hybrid_llama_qnn.pte --prompt 'who are you' --seq_len 512 --kv_updater SmartMask --eval_mode 1 --temperature 0.8 && cat outputs.txt"
Downloads last month
1
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support