narcolepticchicken commited on
Commit
196bab0
·
verified ·
1 Parent(s): 2462002

Upload eval/bfcl_results.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. eval/bfcl_results.json +31 -0
eval/bfcl_results.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_tasks": 800,
3
+ "savings_opportunity_pct": 84.125,
4
+ "opt_tier_distribution": {
5
+ "2": 13,
6
+ "1": 660
7
+ },
8
+ "model_success_rates": {
9
+ "BitAgent/BitAgent-8B": 0.385,
10
+ "NousResearch/Hermes-2-Pro-Llama-3-8B": 0.02375,
11
+ "NousResearch/Hermes-2-Pro-Mistral-7B": 0.02625,
12
+ "Qwen/QwQ-32B-Preview": 0.0,
13
+ "Qwen/Qwen2-1.5B-Instruct": 0.005,
14
+ "Qwen/Qwen2-7B-Instruct": 0.0325,
15
+ "Qwen/Qwen2.5-1.5B-Instruct": 0.01125,
16
+ "Qwen/Qwen2.5-7B-Instruct": 0.07625,
17
+ "THUDM/glm-4-9b-chat": 0.035,
18
+ "Team-ACE/ToolACE-8B": 0.0775,
19
+ "ZJared/Haha-7B": 0.10375,
20
+ "claude-3-5-sonnet-20241022": 0.075,
21
+ "claude-3-opus-20240229": 0.07125,
22
+ "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": 0.00125,
23
+ "gemini-1.5-flash-001": 0.195,
24
+ "gemini-1.5-flash-002": 0.125,
25
+ "gemini-1.5-pro-001-FC": 0.16,
26
+ "gemini-1.5-pro-001": 0.18875,
27
+ "gemini-1.5-pro-002-FC": 0.21625,
28
+ "gemini-2.0-flash-001-FC": 0.17875
29
+ },
30
+ "tool_error_rate": 1.8579889572641257
31
+ }