How
- Export via the following command
python examples/qualcomm/oss_scripts/llama/llama.py \
-b build-android \
-m SM8550 \
--compile_only \
--decoder_model qwen3-0_6b \
--prompt "dummy" \
--model_mode hybrid \
--max_seq_len 1024 \
--prefill_ar_len 128 \
--temperature 0 \
--dtype-override fp32 \
--range_setting minmax \
--artifact ./qwen3_06b_sm8550_hybrid
Run
- Can use the following command
export DEVICE_DIR=/data/local/tmp/executorch_qualcomm_tutorial/
adb shell "mkdir -p ${DEVICE_DIR}"
adb push ${EXECUTORCH_ROOT}/build-android/examples/qualcomm/oss_scripts/llama/qnn_llama_runner ${DEVICE_DIR}
adb push ${EXECUTORCH_ROOT}/build-android/lib/libqnn_executorch_backend.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtp.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnSystem.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV69Stub.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV73Stub.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV75Stub.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV79Stub.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/hexagon-v69/unsigned/libQnnHtpV69Skel.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/hexagon-v73/unsigned/libQnnHtpV73Skel.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so ${DEVICE_DIR}
adb push ${QNN_SDK_ROOT}/lib/hexagon-v79/unsigned/libQnnHtpV79Skel.so ${DEVICE_DIR}
adb push hybrid_llama_qnn.pte ${DEVICE_DIR}
adb push tokenizer.json ${DEVICE_DIR}
adb shell "cd ${DEVICE_DIR} && ./qnn_llama_runner --decoder_model_version qwen3 --tokenizer_path tokenizer.json --model_path hybrid_llama_qnn.pte --prompt 'who are you' --seq_len 512 --kv_updater SmartMask --eval_mode 1 --temperature 0.8 && cat outputs.txt"
- Downloads last month
- 1
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support