Update README.md
Browse files
README.md
CHANGED
|
@@ -16,7 +16,6 @@ Quantization
|
|
| 16 |
- --qembedding 8w: embeddings use 8-bit weights
|
| 17 |
|
| 18 |
## Export
|
| 19 |
-
|
| 20 |
```
|
| 21 |
optimum-cli export executorch \
|
| 22 |
--model "google/gemma-3-4b-it" \
|
|
@@ -31,15 +30,16 @@ optimum-cli export executorch \
|
|
| 31 |
```
|
| 32 |
|
| 33 |
## Run
|
| 34 |
-
|
| 35 |
-
Build the runner from the ExecuTorch repo root:
|
| 36 |
```
|
| 37 |
make gemma3-cpu
|
| 38 |
```
|
| 39 |
-
|
| 40 |
-
|
| 41 |
curl -L https://huggingface.co/google/gemma-3-4b-it/resolve/main/tokenizer.json -o tokenizer.json
|
| 42 |
```
|
|
|
|
|
|
|
| 43 |
./cmake-out/examples/models/gemma3/gemma3_e2e_runner \
|
| 44 |
--model_path "model.pte" \
|
| 45 |
--tokenizer_path "tokenizer.json" \
|
|
|
|
| 16 |
- --qembedding 8w: embeddings use 8-bit weights
|
| 17 |
|
| 18 |
## Export
|
|
|
|
| 19 |
```
|
| 20 |
optimum-cli export executorch \
|
| 21 |
--model "google/gemma-3-4b-it" \
|
|
|
|
| 30 |
```
|
| 31 |
|
| 32 |
## Run
|
| 33 |
+
Build the runner from the ExecuTorch repo root
|
|
|
|
| 34 |
```
|
| 35 |
make gemma3-cpu
|
| 36 |
```
|
| 37 |
+
Download tokenizer
|
| 38 |
+
```
|
| 39 |
curl -L https://huggingface.co/google/gemma-3-4b-it/resolve/main/tokenizer.json -o tokenizer.json
|
| 40 |
```
|
| 41 |
+
Run model
|
| 42 |
+
```
|
| 43 |
./cmake-out/examples/models/gemma3/gemma3_e2e_runner \
|
| 44 |
--model_path "model.pte" \
|
| 45 |
--tokenizer_path "tokenizer.json" \
|