Update README.md
Browse files
README.md
CHANGED
|
@@ -52,18 +52,19 @@ vllm bench serve --backend openai-chat --endpoint /v1/chat/completions \
|
|
| 52 |
--temperature 0 --save-result --save-detailed
|
| 53 |
```
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
| 58 |
-
|:--------:|:--------------:|
|
| 59 |
-
|
|
| 60 |
-
| 1 |
|
| 61 |
-
| 2 |
|
| 62 |
-
|
|
| 63 |
-
|
|
| 64 |
-
| 5 |
|
| 65 |
-
| 6 | 9.
|
| 66 |
-
|
|
|
|
|
| 67 |
|
| 68 |
|
| 69 |
|
|
|
|
| 52 |
--temperature 0 --save-result --save-detailed
|
| 53 |
```
|
| 54 |
|
| 55 |
+
Per-Position Acceptance Rate
|
| 56 |
+
|
| 57 |
+
| Dataset | Pos 0 | Pos 1 | Pos 2 | Pos 3 | Pos 4 | Pos 5 | Pos 6 | Pos 7 | Avg. Length |
|
| 58 |
+
|:-------|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:----------:|
|
| 59 |
+
| HumanEval | 85.8% | 72.1% | 60.3% | 50.4% | 41.8% | 34.3% | 26.9% | 19.6% | 4.91 |
|
| 60 |
+
| math_reasoning | 88.7% | 76.1% | 64.8% | 54.9% | 45.5% | 36.5% | 28.8% | 21.5% | 5.17 |
|
| 61 |
+
| qa | 67.5% | 41% | 23.8% | 13.8% | 8.1% | 4.5% | 2.6% | 1.3% | 2.63 |
|
| 62 |
+
| question | 75.1% | 51.1% | 34.7% | 24.5% | 17.9% | 13% | 9.4% | 6.5% | 3.32 |
|
| 63 |
+
| rag | 76.1% | 54.8% | 39.8% | 28.7% | 19.9% | 12.9% | 7% | 3.8% | 3.43 |
|
| 64 |
+
| summarization | 67.3% | 39.9% | 22.3% | 12% | 6.4% | 3.1% | 1.5% | 0.7% | 2.53 |
|
| 65 |
+
| tool_call | 65.7% | 45.7% | 31.6% | 21.7% | 15% | 9.6% | 6.2% | 3.6% | 2.99 |
|
| 66 |
+
| translation | 73.4% | 51.4% | 35.3% | 23.6% | 15.6% | 9.3% | 5.4% | 2.6% | 3.17 |
|
| 67 |
+
| writing | 75.3% | 51.6% | 35.1% | 24.5% | 17.8% | 13% | 9.4% | 6.5% | 3.33 |
|
| 68 |
|
| 69 |
|
| 70 |
|