mweinbach1 commited on
Commit
2a74c80
·
verified ·
1 Parent(s): 5dbef79

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ pplx-embed-v1-0.6b/tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # AI-PC Benchmarks Embedding Intel OpenVINO
2
+
3
+ Contains the runnable `pplx-embed-v1-0.6b` tokenizer files and static 1x512 OpenVINO export used by the Intel embedding benchmark.
4
+
5
+ Layout:
6
+ - `pplx-embed-v1-0.6b/tokenizer.json`
7
+ - `pplx-embed-v1-0.6b/openvino/pplx-embed-v1-0.6b-static-1x512/model.xml`
8
+ - `pplx-embed-v1-0.6b/openvino/pplx-embed-v1-0.6b-static-1x512/model.bin`
pplx-embed-v1-0.6b/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
pplx-embed-v1-0.6b/README.md ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ pipeline_tag: feature-extraction
4
+ tags:
5
+ - feature-extraction
6
+ - sentence-similarity
7
+ - mteb
8
+ - sentence-transformers
9
+ language:
10
+ - multilingual
11
+ ---
12
+
13
+
14
+ <p align="center">
15
+ <img src="assets/logo.svg" alt="Perplexity Logo" width="400">
16
+ </p>
17
+
18
+ <p align="center">pplx-embed-v1: Diffusion-Pretrained Dense and Contextual Embeddings</p>
19
+
20
+ `pplx-embed-v1` and `pplx-embed-context-v1` are state-of-the-art text embedding models optimized for real-world, web-scale retrieval tasks.
21
+
22
+ - Use **`pplx-embed-v1`** for independent text embedding (queries, documents, semantic search)
23
+ - Use **`pplx-embed-context-v1`** for document chunks in RAG systems where surrounding context matters
24
+
25
+ > [!IMPORTANT]
26
+ > `pplx-embed-v1` and `pplx-embed-context-v1` natively produce *unnormalized* int8-quantized embeddings. Ensure that you compare them via *cosine similarity*.
27
+
28
+
29
+ ![diag.png](assets/diag.png)
30
+
31
+ ## Models
32
+
33
+ | Model | Dimensions | Context | MRL | Quantization | Instruction | Pooling |
34
+ |:-----:|:----------:|:-------:|:---:|:------------:|:-----------:|:-------:|
35
+ | `pplx-embed-v1-0.6B` | 1024 | 32K | Yes | INT8/BINARY | No | Mean |
36
+ | `pplx-embed-v1-4B` | 2560 | 32K | Yes | INT8/BINARY | No | Mean |
37
+ | `pplx-embed-context-v1-0.6B` | 1024 | 32K | Yes | INT8/BINARY | No | Mean |
38
+ | `pplx-embed-context-v1-4B` | 2560 | 32K | Yes | INT8/BINARY | No | Mean |
39
+
40
+ <sub>All models are built on diffusion continued pre-trained Qwen3 at Perplexity AI.</sub>
41
+
42
+ <sub>Many modern embedding models rely on instruction tuning, where users prepend an instruction string to the text being embedded. This can yield a 2%-3% lift on benchmarks, but it also introduces prompt-selection overhead and can make indexing pipelines brittle (small instruction changes can shift embedding space). We deliberately **avoid** this requirement: you can embed the text you want to index directly, without having to choose or maintain an instruction prefix.</sub>
43
+
44
+ ## Usage
45
+
46
+ <details>
47
+ <summary>Via API</summary>
48
+
49
+ ```bash
50
+ curl -X POST https://api.perplexity.ai/v1/embeddings \
51
+ -H "Authorization: Bearer YOUR_API_KEY" \
52
+ -H "Content-Type: application/json" \
53
+ -d '{
54
+ "input": [
55
+ "Scientists explore the universe driven by curiosity.",
56
+ "Children learn through curious exploration.",
57
+ "Historical discoveries began with curious questions.",
58
+ "Animals use curiosity to adapt and survive.",
59
+ "Philosophy examines the nature of curiosity."
60
+ ],
61
+ "model": "pplx-embed-v1-0.6b"
62
+ }'
63
+ ```
64
+
65
+ </details>
66
+
67
+
68
+ <details>
69
+ <summary>Using SentenceTransformers</summary>
70
+
71
+ ```python
72
+ from sentence_transformers import SentenceTransformer
73
+
74
+ model = SentenceTransformer(
75
+ "perplexity-ai/pplx-embed-v1-0.6B",
76
+ trust_remote_code=True
77
+ )
78
+
79
+ texts = [
80
+ "Scientists explore the universe driven by curiosity.",
81
+ "Children learn through curious exploration.",
82
+ "Historical discoveries began with curious questions.",
83
+ "Animals use curiosity to adapt and survive.",
84
+ "Philosophy examines the nature of curiosity.",
85
+ ]
86
+
87
+ embeddings = model.encode(texts) # Shape: (5, 1024), quantized to int8
88
+ embeddings = model.encode(texts, quantization="binary") # Shape: (5, 1024), quantized to binary
89
+ ```
90
+
91
+ </details>
92
+
93
+ <details>
94
+ <summary> Using ONNX models </summary>
95
+
96
+ ```python
97
+
98
+ import onnxruntime as ort
99
+ from transformers import AutoTokenizer
100
+ import numpy as np
101
+
102
+ tokenizer = AutoTokenizer.from_pretrained("perplexity-ai/pplx-embed-v1-0.6b", trust_remote_code=True)
103
+ session = ort.InferenceSession("onnx/model.onnx")
104
+
105
+
106
+ texts = [
107
+ "Scientists explore the universe driven by curiosity.",
108
+ "Children learn through curious exploration.",
109
+ "Historical discoveries began with curious questions.",
110
+ "Animals use curiosity to adapt and survive.",
111
+ "Philosophy examines the nature of curiosity.",
112
+ ]
113
+
114
+ tokenized = tokenizer(
115
+ texts,
116
+ padding=True,
117
+ truncation=True,
118
+ return_tensors="np"
119
+ )
120
+
121
+ onnx_inputs = {
122
+ "input_ids": tokenized["input_ids"].astype(np.int64),
123
+ "attention_mask": tokenized["attention_mask"].astype(np.int64),
124
+ }
125
+
126
+ # Run inference
127
+ onnx_embeddings = session.run([out.name for out in session.get_outputs()], onnx_inputs)
128
+
129
+ # ONNX produces both int8 and binary precision embeddings:
130
+ int8_embeddings = onnx_embeddings[2]
131
+ binary_embeddings = onnx_embeddings[3]
132
+ packed_embeddings = np.packbits(binary_embeddings != -1, axis=-1)
133
+ ```
134
+
135
+ </details>
136
+
137
+ <details>
138
+ <summary>Using Text Embeddings Inference (TEI)</summary>
139
+
140
+ > [!NOTE]
141
+ > Text Embeddings Inference v1.9.2+ is required.
142
+
143
+ > [!IMPORTANT]
144
+ > Currently, only int8-quantized embeddings are available via TEI. Remember to use cosine similarity with unnormalized int8 embeddings.
145
+
146
+ - CPU w/ Candle:
147
+
148
+ ```bash
149
+ docker run -p 8080:80 ghcr.io/huggingface/text-embeddings-inference:cpu-1.9 --model-id perplexity-ai/pplx-embed-v1-0.6B --dtype float32
150
+ ```
151
+
152
+ - CPU w/ ORT (ONNX Runtime):
153
+
154
+ ```bash
155
+ docker run -p 8080:80 ghcr.io/huggingface/text-embeddings-inference:cpu-1.9 --model-id onnx-community/pplx-embed-v1-0.6B --dtype float32
156
+ ```
157
+
158
+ - GPU w/ CUDA:
159
+
160
+ ```bash
161
+ docker run --gpus all --shm-size 1g -p 8080:80 ghcr.io/huggingface/text-embeddings-inference:cuda-1.9 --model-id perplexity-ai/pplx-embed-v1-0.6B --dtype float32
162
+ ```
163
+
164
+ > If you hit OOM during warmup, lower --max-batch-tokens and --max-client-batch-size. Set --max-batch-tokens to max_sequence_length × batch_size (e.g., 2048 tokens × 8 sequences = 16384).
165
+
166
+ > Alternatively, when running in CUDA you can use the architecture / compute capability specific
167
+ > container instead of the `cuda-1.9`, as that includes the binaries for Turing, Ampere, Hopper and
168
+ > Blackwell, so using a dedicated container will be lighter e.g., `ampere-1.9`.
169
+
170
+ And then you can send requests to it via cURL to `/embed`:
171
+
172
+ ```bash
173
+ curl http://0.0.0.0:8080/embed \
174
+ -H "Content-Type: application/json" \
175
+ -d '{
176
+ "inputs": [
177
+ "Scientists explore the universe driven by curiosity.",
178
+ "Children learn through curious exploration.",
179
+ "Historical discoveries began with curious questions.",
180
+ "Animals use curiosity to adapt and survive.",
181
+ "Philosophy examines the nature of curiosity."
182
+ ],
183
+ "normalize": false
184
+ }'
185
+ ```
186
+ </details>
187
+
188
+
189
+ ## Technical Details
190
+
191
+ For comprehensive technical details and evaluation results, see our paper on arXiv: https://arxiv.org/abs/2602.11151.
pplx-embed-v1-0.6b/added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
pplx-embed-v1-0.6b/config.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "PPLXQwen3Model"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration.PPLXQwen3Config",
9
+ "AutoModel": "modeling.PPLXQwen3Model"
10
+ },
11
+ "bos_token_id": 151643,
12
+ "dtype": "float32",
13
+ "eos_token_id": 151643,
14
+ "head_dim": 128,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 1024,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "layer_types": [
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention"
48
+ ],
49
+ "max_position_embeddings": 32768,
50
+ "max_window_layers": 28,
51
+ "model_type": "bidirectional_pplx_qwen3",
52
+ "num_attention_heads": 16,
53
+ "num_hidden_layers": 28,
54
+ "num_key_value_heads": 8,
55
+ "rms_norm_eps": 1e-06,
56
+ "rope_parameters": {
57
+ "rope_theta": 1000000,
58
+ "rope_type": "default"
59
+ },
60
+ "rope_theta": 1000000,
61
+ "sliding_window": null,
62
+ "tie_word_embeddings": true,
63
+ "transformers_version": "5.0.0.dev0",
64
+ "transformers.js_config": {
65
+ "use_external_data_format": {
66
+ "model.onnx": 2,
67
+ "model_quantized.onnx": 1,
68
+ "model_q4.onnx": 1
69
+ }
70
+ },
71
+ "use_cache": false,
72
+ "use_sliding_window": false,
73
+ "vocab_size": 151936,
74
+ "attn_implementation": "sdpa",
75
+ "use_bidirectional_attention": true
76
+ }
pplx-embed-v1-0.6b/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pplx-embed-v1-0.6b/modules.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "",
18
+ "type": "st_quantize.FlexibleQuantizer",
19
+ "kwargs": ["quantization"]
20
+ }
21
+ ]
pplx-embed-v1-0.6b/openvino/pplx-embed-v1-0.6b-static-1x512/export_metadata.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_dir": "D:\\OpenVino_Convert\\pplx-embed-v1-0.6b",
3
+ "output_xml": "D:\\OpenVino_Convert\\pplx-embed-v1-0.6b\\openvino\\pplx-embed-v1-0.6b-static-1x512\\model.xml",
4
+ "output_bin": "D:\\OpenVino_Convert\\pplx-embed-v1-0.6b\\openvino\\pplx-embed-v1-0.6b-static-1x512\\model.bin",
5
+ "staging_onnx": "D:\\OpenVino_Convert\\pplx-embed-v1-0.6b\\openvino\\pplx-embed-v1-0.6b-static-1x512\\model.standard.onnx",
6
+ "source_note": "Direct conversion from onnx/model_quantized.onnx is blocked by ONNX Runtime custom ops, so this IR is exported from local PyTorch weights and validated against the quantized ONNX outputs.",
7
+ "static_shape": {
8
+ "input_ids": [
9
+ 1,
10
+ 512
11
+ ],
12
+ "attention_mask": [
13
+ 1,
14
+ 512
15
+ ]
16
+ },
17
+ "input_dtype": "int32",
18
+ "output_names": [
19
+ "last_hidden_state",
20
+ "pooler_output",
21
+ "pooler_output_int8",
22
+ "pooler_output_binary"
23
+ ]
24
+ }
pplx-embed-v1-0.6b/openvino/pplx-embed-v1-0.6b-static-1x512/model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5421bd566e3632788d855ce9d47c9e8d970adff564095d26d118840d9be6d14
3
+ size 2384732488
pplx-embed-v1-0.6b/openvino/pplx-embed-v1-0.6b-static-1x512/model.xml ADDED
The diff for this file is too large to render. See raw diff
 
pplx-embed-v1-0.6b/openvino/pplx-embed-v1-0.6b-static-1x512/validation_report.json ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ir_xml": "D:\\OpenVino_Convert\\pplx-embed-v1-0.6b\\openvino\\pplx-embed-v1-0.6b-static-1x512\\model.xml",
3
+ "onnx_path": "D:\\OpenVino_Convert\\pplx-embed-v1-0.6b\\onnx\\model_quantized.onnx",
4
+ "available_devices": [
5
+ "CPU",
6
+ "GPU",
7
+ "NPU"
8
+ ],
9
+ "validated_devices": {
10
+ "CPU": {
11
+ "status": "ok",
12
+ "compile_config": {},
13
+ "max_pooler_abs_diff": 0.026273906230926514,
14
+ "max_pooler_mean_abs_diff": 0.0028607603162527084,
15
+ "min_pooler_cosine_similarity": 0.9996875178276693,
16
+ "min_int8_match_ratio": 0.638671875,
17
+ "min_binary_match_ratio": 0.9921875,
18
+ "per_text": [
19
+ {
20
+ "pooler_max_abs_diff": 0.025037307292222977,
21
+ "pooler_mean_abs_diff": 0.0028607603162527084,
22
+ "pooler_cosine_similarity": 0.999762744326066,
23
+ "int8_match_ratio": 0.646484375,
24
+ "binary_match_ratio": 0.9931640625,
25
+ "text": "OpenVINO runs the embedding model at a fixed 512 token context."
26
+ },
27
+ {
28
+ "pooler_max_abs_diff": 0.024769730865955353,
29
+ "pooler_mean_abs_diff": 0.0028401503805071115,
30
+ "pooler_cosine_similarity": 0.9996875178276693,
31
+ "int8_match_ratio": 0.638671875,
32
+ "binary_match_ratio": 0.994140625,
33
+ "text": "CPU, GPU, and NPU parity matters more than nominal compilation."
34
+ },
35
+ {
36
+ "pooler_max_abs_diff": 0.026273906230926514,
37
+ "pooler_mean_abs_diff": 0.0028125871904194355,
38
+ "pooler_cosine_similarity": 0.9998286070704168,
39
+ "int8_match_ratio": 0.6826171875,
40
+ "binary_match_ratio": 0.9921875,
41
+ "text": "This validation checks the quantized ONNX reference against the exported IR."
42
+ }
43
+ ]
44
+ },
45
+ "GPU": {
46
+ "status": "ok",
47
+ "compile_config": {
48
+ "INFERENCE_PRECISION_HINT": "f32"
49
+ },
50
+ "max_pooler_abs_diff": 0.026273980736732483,
51
+ "max_pooler_mean_abs_diff": 0.002860757987946272,
52
+ "min_pooler_cosine_similarity": 0.9996875182875262,
53
+ "min_int8_match_ratio": 0.638671875,
54
+ "min_binary_match_ratio": 0.9921875,
55
+ "per_text": [
56
+ {
57
+ "pooler_max_abs_diff": 0.025037672370672226,
58
+ "pooler_mean_abs_diff": 0.002860757987946272,
59
+ "pooler_cosine_similarity": 0.9997627431941519,
60
+ "int8_match_ratio": 0.646484375,
61
+ "binary_match_ratio": 0.9931640625,
62
+ "text": "OpenVINO runs the embedding model at a fixed 512 token context."
63
+ },
64
+ {
65
+ "pooler_max_abs_diff": 0.024769596755504608,
66
+ "pooler_mean_abs_diff": 0.002840139903128147,
67
+ "pooler_cosine_similarity": 0.9996875182875262,
68
+ "int8_match_ratio": 0.638671875,
69
+ "binary_match_ratio": 0.994140625,
70
+ "text": "CPU, GPU, and NPU parity matters more than nominal compilation."
71
+ },
72
+ {
73
+ "pooler_max_abs_diff": 0.026273980736732483,
74
+ "pooler_mean_abs_diff": 0.0028125974349677563,
75
+ "pooler_cosine_similarity": 0.9998286065004919,
76
+ "int8_match_ratio": 0.6826171875,
77
+ "binary_match_ratio": 0.9921875,
78
+ "text": "This validation checks the quantized ONNX reference against the exported IR."
79
+ }
80
+ ]
81
+ },
82
+ "NPU": {
83
+ "status": "ok",
84
+ "compile_config": {},
85
+ "max_pooler_abs_diff": 0.03023737668991089,
86
+ "max_pooler_mean_abs_diff": 0.005323002114892006,
87
+ "min_pooler_cosine_similarity": 0.9994450849593192,
88
+ "min_int8_match_ratio": 0.431640625,
89
+ "min_binary_match_ratio": 0.9853515625,
90
+ "per_text": [
91
+ {
92
+ "pooler_max_abs_diff": 0.03023737668991089,
93
+ "pooler_mean_abs_diff": 0.004826287738978863,
94
+ "pooler_cosine_similarity": 0.9994450849593192,
95
+ "int8_match_ratio": 0.4677734375,
96
+ "binary_match_ratio": 0.9853515625,
97
+ "text": "OpenVINO runs the embedding model at a fixed 512 token context."
98
+ },
99
+ {
100
+ "pooler_max_abs_diff": 0.025254197418689728,
101
+ "pooler_mean_abs_diff": 0.002866966649889946,
102
+ "pooler_cosine_similarity": 0.9996840558552473,
103
+ "int8_match_ratio": 0.6435546875,
104
+ "binary_match_ratio": 0.9931640625,
105
+ "text": "CPU, GPU, and NPU parity matters more than nominal compilation."
106
+ },
107
+ {
108
+ "pooler_max_abs_diff": 0.024172306060791016,
109
+ "pooler_mean_abs_diff": 0.005323002114892006,
110
+ "pooler_cosine_similarity": 0.9996117906426041,
111
+ "int8_match_ratio": 0.431640625,
112
+ "binary_match_ratio": 0.990234375,
113
+ "text": "This validation checks the quantized ONNX reference against the exported IR."
114
+ }
115
+ ]
116
+ }
117
+ }
118
+ }
pplx-embed-v1-0.6b/special_tokens_map.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "mask_token": {
25
+ "content": "â½Ĺ",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ "pad_token": {
32
+ "content": "<|endoftext|>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ "sep_token": {
39
+ "content": "<|endoftext|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ }
45
+ }
pplx-embed-v1-0.6b/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6fb5c5bbba5fa5f8332edfb6d8aa67bd7fb3d75365b1765f108201698eaebf5
3
+ size 11422837
pplx-embed-v1-0.6b/tokenizer_config.json ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151642": {
6
+ "content": "â½Ĺ",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151643": {
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151644": {
22
+ "content": "<|im_start|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151645": {
30
+ "content": "<|im_end|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151646": {
38
+ "content": "<|object_ref_start|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151647": {
46
+ "content": "<|object_ref_end|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151648": {
54
+ "content": "<|box_start|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151649": {
62
+ "content": "<|box_end|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151650": {
70
+ "content": "<|quad_start|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151651": {
78
+ "content": "<|quad_end|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151652": {
86
+ "content": "<|vision_start|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151653": {
94
+ "content": "<|vision_end|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151654": {
102
+ "content": "<|vision_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151655": {
110
+ "content": "<|image_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151656": {
118
+ "content": "<|video_pad|>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": true
124
+ },
125
+ "151657": {
126
+ "content": "<tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151658": {
134
+ "content": "</tool_call>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151659": {
142
+ "content": "<|fim_prefix|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151660": {
150
+ "content": "<|fim_middle|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151661": {
158
+ "content": "<|fim_suffix|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151662": {
166
+ "content": "<|fim_pad|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151663": {
174
+ "content": "<|repo_name|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151664": {
182
+ "content": "<|file_sep|>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151665": {
190
+ "content": "<tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151666": {
198
+ "content": "</tool_response>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151667": {
206
+ "content": "<think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ },
213
+ "151668": {
214
+ "content": "</think>",
215
+ "lstrip": false,
216
+ "normalized": false,
217
+ "rstrip": false,
218
+ "single_word": false,
219
+ "special": false
220
+ }
221
+ },
222
+ "additional_special_tokens": [
223
+ "<|im_start|>",
224
+ "<|im_end|>",
225
+ "<|object_ref_start|>",
226
+ "<|object_ref_end|>",
227
+ "<|box_start|>",
228
+ "<|box_end|>",
229
+ "<|quad_start|>",
230
+ "<|quad_end|>",
231
+ "<|vision_start|>",
232
+ "<|vision_end|>",
233
+ "<|vision_pad|>",
234
+ "<|image_pad|>",
235
+ "<|video_pad|>"
236
+ ],
237
+ "bos_token": null,
238
+ "clean_up_tokenization_spaces": false,
239
+ "eos_token": "<|endoftext|>",
240
+ "errors": "replace",
241
+ "extra_special_tokens": {},
242
+ "mask_token": "â½Ĺ",
243
+ "model_max_length": 131072,
244
+ "pad_token": "<|endoftext|>",
245
+ "sep_token": "<|endoftext|>",
246
+ "split_special_tokens": false,
247
+ "tokenizer_class": "Qwen2Tokenizer",
248
+ "unk_token": null
249
+ }
pplx-embed-v1-0.6b/vocab.json ADDED
The diff for this file is too large to render. See raw diff