Add files using upload-large-folder tool
Browse files- .gitattributes +16 -0
- delta_net-1.3B-100B/.hfd/aria2c_urls.txt +0 -0
- delta_net-1.3B-100B/.hfd/last_download_command +1 -0
- delta_net-1.3B-100B/.hfd/repo_metadata.json +1 -0
- delta_net-1.3B-100B/based_drop/results.json +51 -0
- delta_net-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_fda.jsonl +3 -0
- delta_net-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_fda.jsonl +3 -0
- delta_net-1.3B-100B/based_fda/results.json +51 -0
- delta_net-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_nq_2048.jsonl +3 -0
- delta_net-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_nq_2048.jsonl +3 -0
- delta_net-1.3B-100B/based_nq_2048/results.json +51 -0
- delta_net-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_squad.jsonl +3 -0
- delta_net-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_squad.jsonl +3 -0
- delta_net-1.3B-100B/based_squad/results.json +55 -0
- delta_net-1.3B-100B/based_swde/results.json +51 -0
- delta_net-1.3B-100B/based_triviaqa/results.json +55 -0
- gla-1.3B-100B/.hfd/aria2c_urls.txt +0 -0
- gla-1.3B-100B/.hfd/last_download_command +1 -0
- gla-1.3B-100B/.hfd/repo_metadata.json +1 -0
- gla-1.3B-100B/based_drop/results.json +51 -0
- gla-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_fda.jsonl +3 -0
- gla-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_fda.jsonl +3 -0
- gla-1.3B-100B/based_fda/results.json +51 -0
- gla-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_nq_2048.jsonl +3 -0
- gla-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_nq_2048.jsonl +3 -0
- gla-1.3B-100B/based_nq_2048/results.json +51 -0
- gla-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_squad.jsonl +3 -0
- gla-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_squad.jsonl +3 -0
- gla-1.3B-100B/based_squad/results.json +55 -0
- gla-1.3B-100B/based_swde/results.json +51 -0
- gla-1.3B-100B/based_triviaqa/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_triviaqa.jsonl +0 -0
- gla-1.3B-100B/based_triviaqa/results.json +55 -0
- hgrn2-1.3B-100B/.hfd/aria2c_urls.txt +0 -0
- hgrn2-1.3B-100B/based_drop/results.json +51 -0
- hgrn2-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_fda.jsonl +3 -0
- hgrn2-1.3B-100B/based_fda/results.json +51 -0
- hgrn2-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_nq_2048.jsonl +3 -0
- hgrn2-1.3B-100B/based_nq_2048/results.json +51 -0
- hgrn2-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_squad.jsonl +3 -0
- hgrn2-1.3B-100B/based_swde/results.json +51 -0
- retnet-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__download_model__retnet-1.3B-100B_based_fda.jsonl +3 -0
- transformer-1.3B-100B/config.json +32 -0
- transformer-1.3B-100B/generation_config.json +6 -0
- transformer-1.3B-100B/model.safetensors +3 -0
- transformer-1.3B-100B/results +15 -0
- transformer-1.3B-100B/special_tokens_map.json +23 -0
- transformer-1.3B-100B/tokenizer.model +3 -0
- transformer-1.3B-100B/tokenizer_config.json +42 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,19 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
delta_net-1.3B-100B/based_fda/[[:space:]]checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_fda.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
delta_net-1.3B-100B/based_fda/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_fda.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
delta_net-1.3B-100B/based_nq_2048/[[:space:]]checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_nq_2048.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
delta_net-1.3B-100B/based_nq_2048/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_nq_2048.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
delta_net-1.3B-100B/based_squad/[[:space:]]checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_squad.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
delta_net-1.3B-100B/based_squad/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_squad.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
gla-1.3B-100B/based_fda/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_fda.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
gla-1.3B-100B/based_fda/[[:space:]]checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_fda.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
gla-1.3B-100B/based_nq_2048/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_nq_2048.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
gla-1.3B-100B/based_nq_2048/[[:space:]]checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_nq_2048.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
gla-1.3B-100B/based_squad/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_squad.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
gla-1.3B-100B/based_squad/[[:space:]]checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_squad.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
hgrn2-1.3B-100B/based_fda/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_fda.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
hgrn2-1.3B-100B/based_nq_2048/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_nq_2048.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
hgrn2-1.3B-100B/based_squad/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_squad.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
retnet-1.3B-100B/based_fda/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__retnet-1.3B-100B_based_fda.jsonl filter=lfs diff=lfs merge=lfs -text
|
delta_net-1.3B-100B/.hfd/aria2c_urls.txt
ADDED
|
File without changes
|
delta_net-1.3B-100B/.hfd/last_download_command
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
REPO_ID=fla-hub/delta_net-1.3B-100B TOOL=aria2c INCLUDE_PATTERNS= EXCLUDE_PATTERNS= DATASET=0 HF_USERNAME= HF_TOKEN= HF_TOKEN=https://huggingface.co REVISION=main
|
delta_net-1.3B-100B/.hfd/repo_metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_id":"6746ac966e604a92959d3cad","id":"fla-hub/delta_net-1.3B-100B","private":false,"pipeline_tag":"text-generation","library_name":"transformers","tags":["transformers","safetensors","delta_net","text-generation","arxiv:1910.09700","autotrain_compatible","endpoints_compatible","region:us"],"downloads":171,"likes":0,"modelId":"fla-hub/delta_net-1.3B-100B","author":"fla-hub","sha":"b4dcbbafd4fde802717bdec3008d4aba9cb3a1f8","lastModified":"2025-02-09T16:57:54.000Z","gated":false,"disabled":false,"widgetData":[{"text":"My name is Julien and I like to"},{"text":"I like traveling by train because"},{"text":"Paris is an amazing place to visit,"},{"text":"Once upon a time,"}],"model-index":null,"config":{"architectures":["DeltaNetForCausalLM"],"model_type":"delta_net","tokenizer_config":{"bos_token":"<s>","eos_token":"</s>","pad_token":"</s>","unk_token":"<unk>","use_default_system_prompt":false}},"cardData":{"library_name":"transformers","tags":[]},"transformersInfo":{"auto_model":"AutoModelForCausalLM","pipeline_tag":"text-generation"},"siblings":[{"rfilename":".gitattributes"},{"rfilename":"README.md"},{"rfilename":"config.json"},{"rfilename":"generation_config.json"},{"rfilename":"model.safetensors"},{"rfilename":"special_tokens_map.json"},{"rfilename":"tokenizer.json"},{"rfilename":"tokenizer.model"},{"rfilename":"tokenizer_config.json"}],"spaces":[],"createdAt":"2024-11-27T05:22:30.000Z","safetensors":{"parameters":{"BF16":1365677056},"total":1365677056},"usedStorage":5463274107}
|
delta_net-1.3B-100B/based_drop/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_drop": {
|
| 4 |
+
"contains,none": 0.2103497843794921,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_drop"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_drop": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": "default"
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_drop": "default"
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_drop": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/delta_net-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
delta_net-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_fda.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1ea6d2a565d3cffe65ec49c5c2e4df4e7d0c7e786aa1e7bf2b8000ee85b7620
|
| 3 |
+
size 14490105
|
delta_net-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_fda.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21736891ba901ee13665d654ec200bddaf68ceb549143319ef2768b4988f0945
|
| 3 |
+
size 14196557
|
delta_net-1.3B-100B/based_fda/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_fda": {
|
| 4 |
+
"contains,none": 0.4250681198910082,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_fda"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_fda": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": 0
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_fda": 0
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_fda": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/delta_net-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
delta_net-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_nq_2048.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2aa46583998ffa92fe9e454db0da7a3f4a22437e83e0959da3861aa95e1f80e
|
| 3 |
+
size 41955055
|
delta_net-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_nq_2048.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e80805be830453ff52a403b20493742a76b3a4d2d2172d5d424c83c03988142
|
| 3 |
+
size 41035265
|
delta_net-1.3B-100B/based_nq_2048/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_nq_2048": {
|
| 4 |
+
"contains,none": 0.2454862210959772,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_nq_2048"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_nq_2048": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": "default"
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_nq_2048": "default"
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_nq_2048": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/delta_net-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
delta_net-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_squad.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0539b8aed62ac4c7601069e37f0a4697d6600279a84e4365e752604fd33ce9e4
|
| 3 |
+
size 14997314
|
delta_net-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_squad.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc7950e0973468e7b536214c453ab275cdf5033b67dd513bba2c5cb4e9a6c6c4
|
| 3 |
+
size 14995544
|
delta_net-1.3B-100B/based_squad/results.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_squad": {
|
| 4 |
+
"exact,none": 4.098085320792745,
|
| 5 |
+
"exact_stderr,none": "N/A",
|
| 6 |
+
"f1,none": 14.662322521108223,
|
| 7 |
+
"f1_stderr,none": "N/A",
|
| 8 |
+
"contains,none": 0.3432986227746053,
|
| 9 |
+
"contains_stderr,none": "N/A",
|
| 10 |
+
"alias": "based_squad"
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"configs": {
|
| 14 |
+
"based_squad": {
|
| 15 |
+
"description": "",
|
| 16 |
+
"target_delimiter": " ",
|
| 17 |
+
"fewshot_delimiter": "\n\n",
|
| 18 |
+
"num_fewshot": 0,
|
| 19 |
+
"output_type": "generate_until",
|
| 20 |
+
"generation_kwargs": {
|
| 21 |
+
"until": [
|
| 22 |
+
"\n\n"
|
| 23 |
+
],
|
| 24 |
+
"do_sample": false
|
| 25 |
+
},
|
| 26 |
+
"repeats": 1,
|
| 27 |
+
"should_decontaminate": false,
|
| 28 |
+
"metadata": {
|
| 29 |
+
"version": 0
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
},
|
| 33 |
+
"versions": {
|
| 34 |
+
"based_squad": 0
|
| 35 |
+
},
|
| 36 |
+
"n-shot": {
|
| 37 |
+
"based_squad": 0
|
| 38 |
+
},
|
| 39 |
+
"config": {
|
| 40 |
+
"model": "lm_eval_model",
|
| 41 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/delta_net-1.3B-100B",
|
| 42 |
+
"batch_size": "8",
|
| 43 |
+
"batch_sizes": [],
|
| 44 |
+
"device": "cuda:0",
|
| 45 |
+
"use_cache": null,
|
| 46 |
+
"limit": null,
|
| 47 |
+
"bootstrap_iters": 100000,
|
| 48 |
+
"gen_kwargs": null
|
| 49 |
+
},
|
| 50 |
+
"git_hash": null,
|
| 51 |
+
"context_length": 2000,
|
| 52 |
+
"answer_length": 48,
|
| 53 |
+
"cutting_context": true,
|
| 54 |
+
"decode_mode": "default"
|
| 55 |
+
}
|
delta_net-1.3B-100B/based_swde/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_swde": {
|
| 4 |
+
"contains,none": 0.36363636363636365,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_swde"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_swde": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": 0
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_swde": 0
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_swde": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/delta_net-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
delta_net-1.3B-100B/based_triviaqa/results.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_triviaqa": {
|
| 4 |
+
"exact,none": 1.0071090047393365,
|
| 5 |
+
"exact_stderr,none": "N/A",
|
| 6 |
+
"f1,none": 7.6236297885650695,
|
| 7 |
+
"f1_stderr,none": "N/A",
|
| 8 |
+
"contains,none": 0.5687203791469194,
|
| 9 |
+
"contains_stderr,none": "N/A",
|
| 10 |
+
"alias": "based_triviaqa"
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"configs": {
|
| 14 |
+
"based_triviaqa": {
|
| 15 |
+
"description": "",
|
| 16 |
+
"target_delimiter": " ",
|
| 17 |
+
"fewshot_delimiter": "\n\n",
|
| 18 |
+
"num_fewshot": 0,
|
| 19 |
+
"output_type": "generate_until",
|
| 20 |
+
"generation_kwargs": {
|
| 21 |
+
"until": [
|
| 22 |
+
"\n\n"
|
| 23 |
+
],
|
| 24 |
+
"do_sample": false
|
| 25 |
+
},
|
| 26 |
+
"repeats": 1,
|
| 27 |
+
"should_decontaminate": false,
|
| 28 |
+
"metadata": {
|
| 29 |
+
"version": "default"
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
},
|
| 33 |
+
"versions": {
|
| 34 |
+
"based_triviaqa": "default"
|
| 35 |
+
},
|
| 36 |
+
"n-shot": {
|
| 37 |
+
"based_triviaqa": 0
|
| 38 |
+
},
|
| 39 |
+
"config": {
|
| 40 |
+
"model": "lm_eval_model",
|
| 41 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/delta_net-1.3B-100B",
|
| 42 |
+
"batch_size": "8",
|
| 43 |
+
"batch_sizes": [],
|
| 44 |
+
"device": "cuda:0",
|
| 45 |
+
"use_cache": null,
|
| 46 |
+
"limit": null,
|
| 47 |
+
"bootstrap_iters": 100000,
|
| 48 |
+
"gen_kwargs": null
|
| 49 |
+
},
|
| 50 |
+
"git_hash": null,
|
| 51 |
+
"context_length": 2000,
|
| 52 |
+
"answer_length": 48,
|
| 53 |
+
"cutting_context": true,
|
| 54 |
+
"decode_mode": "default"
|
| 55 |
+
}
|
gla-1.3B-100B/.hfd/aria2c_urls.txt
ADDED
|
File without changes
|
gla-1.3B-100B/.hfd/last_download_command
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
REPO_ID=fla-hub/gla-1.3B-100B TOOL=aria2c INCLUDE_PATTERNS= EXCLUDE_PATTERNS= DATASET=0 HF_USERNAME= HF_TOKEN= HF_TOKEN=https://huggingface.co REVISION=main
|
gla-1.3B-100B/.hfd/repo_metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_id":"6608d28ec50f8393c5c2932f","id":"fla-hub/gla-1.3B-100B","private":false,"pipeline_tag":"text-generation","library_name":"transformers","tags":["transformers","safetensors","gla","text-generation","en","dataset:cerebras/SlimPajama-627B","arxiv:2312.06635","arxiv:2507.06457","license:mit","endpoints_compatible","region:us"],"downloads":3991,"likes":1,"modelId":"fla-hub/gla-1.3B-100B","author":"fla-hub","sha":"46b15820a4df269e99aed9d709e017677c15d24b","lastModified":"2025-09-09T10:26:33.000Z","gated":false,"disabled":false,"widgetData":[{"text":"My name is Julien and I like to"},{"text":"I like traveling by train because"},{"text":"Paris is an amazing place to visit,"},{"text":"Once upon a time,"}],"model-index":null,"config":{"architectures":["GLAForCausalLM"],"model_type":"gla","tokenizer_config":{"bos_token":"<s>","eos_token":"</s>","pad_token":null,"unk_token":"<unk>","use_default_system_prompt":false}},"cardData":{"datasets":["cerebras/SlimPajama-627B"],"language":["en"],"library_name":"transformers","license":"mit","pipeline_tag":"text-generation","tags":["text-generation","gla"]},"transformersInfo":{"auto_model":"AutoModelForCausalLM","pipeline_tag":"text-generation"},"siblings":[{"rfilename":".gitattributes"},{"rfilename":"README.md"},{"rfilename":"config.json"},{"rfilename":"generation_config.json"},{"rfilename":"model.safetensors"},{"rfilename":"special_tokens_map.json"},{"rfilename":"tokenizer.json"},{"rfilename":"tokenizer.model"},{"rfilename":"tokenizer_config.json"}],"spaces":[],"createdAt":"2024-03-31T03:03:42.000Z","safetensors":{"parameters":{"BF16":1365514240},"total":1365514240},"usedStorage":7725801115}
|
gla-1.3B-100B/based_drop/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_drop": {
|
| 4 |
+
"contains,none": 0.19357930043124102,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_drop"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_drop": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": "default"
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_drop": "default"
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_drop": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/gla-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
gla-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_fda.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a7b523167023d68fb8e5f1dda52b28b16efdbec9f1b92b7ced47e5700e51163
|
| 3 |
+
size 14196379
|
gla-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_fda.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aaa543c02364891c58079791e403b11f1523a98378102e45b6b3eafb329c92ac
|
| 3 |
+
size 14492347
|
gla-1.3B-100B/based_fda/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_fda": {
|
| 4 |
+
"contains,none": 0.27157129881925524,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_fda"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_fda": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": 0
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_fda": 0
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_fda": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/gla-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
gla-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_nq_2048.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8ec94d98967baa6d1e274280f486787d8dc836e49d120a36bd3017de6102252
|
| 3 |
+
size 41005103
|
gla-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_nq_2048.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f77e057a5310c128fa6ce85fc4f7793de2a6961f63a8cf4057a057c0c5efe9a9
|
| 3 |
+
size 41926367
|
gla-1.3B-100B/based_nq_2048/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_nq_2048": {
|
| 4 |
+
"contains,none": 0.22204624643649035,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_nq_2048"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_nq_2048": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": "default"
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_nq_2048": "default"
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_nq_2048": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/gla-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
gla-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_squad.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:646834fc08075cd6ca756f7df597ed1a9a1bd899f781ec9bce71d3a13cf70a0c
|
| 3 |
+
size 14983274
|
gla-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_squad.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66a1ec7f9c455f30e00ddca4edfcd6cf8750fbcb405285f9a45b0c80abcf2acb
|
| 3 |
+
size 14979950
|
gla-1.3B-100B/based_squad/results.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_squad": {
|
| 4 |
+
"exact,none": 4.467584816929795,
|
| 5 |
+
"exact_stderr,none": "N/A",
|
| 6 |
+
"f1,none": 15.399587310201976,
|
| 7 |
+
"f1_stderr,none": "N/A",
|
| 8 |
+
"contains,none": 0.349009069533087,
|
| 9 |
+
"contains_stderr,none": "N/A",
|
| 10 |
+
"alias": "based_squad"
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"configs": {
|
| 14 |
+
"based_squad": {
|
| 15 |
+
"description": "",
|
| 16 |
+
"target_delimiter": " ",
|
| 17 |
+
"fewshot_delimiter": "\n\n",
|
| 18 |
+
"num_fewshot": 0,
|
| 19 |
+
"output_type": "generate_until",
|
| 20 |
+
"generation_kwargs": {
|
| 21 |
+
"until": [
|
| 22 |
+
"\n\n"
|
| 23 |
+
],
|
| 24 |
+
"do_sample": false
|
| 25 |
+
},
|
| 26 |
+
"repeats": 1,
|
| 27 |
+
"should_decontaminate": false,
|
| 28 |
+
"metadata": {
|
| 29 |
+
"version": 0
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
},
|
| 33 |
+
"versions": {
|
| 34 |
+
"based_squad": 0
|
| 35 |
+
},
|
| 36 |
+
"n-shot": {
|
| 37 |
+
"based_squad": 0
|
| 38 |
+
},
|
| 39 |
+
"config": {
|
| 40 |
+
"model": "lm_eval_model",
|
| 41 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/gla-1.3B-100B",
|
| 42 |
+
"batch_size": "8",
|
| 43 |
+
"batch_sizes": [],
|
| 44 |
+
"device": "cuda:0",
|
| 45 |
+
"use_cache": null,
|
| 46 |
+
"limit": null,
|
| 47 |
+
"bootstrap_iters": 100000,
|
| 48 |
+
"gen_kwargs": null
|
| 49 |
+
},
|
| 50 |
+
"git_hash": null,
|
| 51 |
+
"context_length": 2000,
|
| 52 |
+
"answer_length": 48,
|
| 53 |
+
"cutting_context": true,
|
| 54 |
+
"decode_mode": "default"
|
| 55 |
+
}
|
gla-1.3B-100B/based_swde/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_swde": {
|
| 4 |
+
"contains,none": 0.3083411433926898,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_swde"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_swde": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": 0
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_swde": 0
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_swde": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/gla-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
gla-1.3B-100B/based_triviaqa/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_triviaqa.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gla-1.3B-100B/based_triviaqa/results.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_triviaqa": {
|
| 4 |
+
"exact,none": 1.3033175355450237,
|
| 5 |
+
"exact_stderr,none": "N/A",
|
| 6 |
+
"f1,none": 7.748952759132068,
|
| 7 |
+
"f1_stderr,none": "N/A",
|
| 8 |
+
"contains,none": 0.5574644549763034,
|
| 9 |
+
"contains_stderr,none": "N/A",
|
| 10 |
+
"alias": "based_triviaqa"
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"configs": {
|
| 14 |
+
"based_triviaqa": {
|
| 15 |
+
"description": "",
|
| 16 |
+
"target_delimiter": " ",
|
| 17 |
+
"fewshot_delimiter": "\n\n",
|
| 18 |
+
"num_fewshot": 0,
|
| 19 |
+
"output_type": "generate_until",
|
| 20 |
+
"generation_kwargs": {
|
| 21 |
+
"until": [
|
| 22 |
+
"\n\n"
|
| 23 |
+
],
|
| 24 |
+
"do_sample": false
|
| 25 |
+
},
|
| 26 |
+
"repeats": 1,
|
| 27 |
+
"should_decontaminate": false,
|
| 28 |
+
"metadata": {
|
| 29 |
+
"version": "default"
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
},
|
| 33 |
+
"versions": {
|
| 34 |
+
"based_triviaqa": "default"
|
| 35 |
+
},
|
| 36 |
+
"n-shot": {
|
| 37 |
+
"based_triviaqa": 0
|
| 38 |
+
},
|
| 39 |
+
"config": {
|
| 40 |
+
"model": "lm_eval_model",
|
| 41 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/gla-1.3B-100B",
|
| 42 |
+
"batch_size": "8",
|
| 43 |
+
"batch_sizes": [],
|
| 44 |
+
"device": "cuda:0",
|
| 45 |
+
"use_cache": null,
|
| 46 |
+
"limit": null,
|
| 47 |
+
"bootstrap_iters": 100000,
|
| 48 |
+
"gen_kwargs": null
|
| 49 |
+
},
|
| 50 |
+
"git_hash": null,
|
| 51 |
+
"context_length": 2000,
|
| 52 |
+
"answer_length": 48,
|
| 53 |
+
"cutting_context": true,
|
| 54 |
+
"decode_mode": "default"
|
| 55 |
+
}
|
hgrn2-1.3B-100B/.hfd/aria2c_urls.txt
ADDED
|
File without changes
|
hgrn2-1.3B-100B/based_drop/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_drop": {
|
| 4 |
+
"contains,none": 0.19262098706276953,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_drop"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_drop": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": "default"
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_drop": "default"
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_drop": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/hgrn2-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
hgrn2-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_fda.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5ba7eb94d0e3b885116fc2cf3b54d79acfb12a040b87ae3cdccd8144fc21253
|
| 3 |
+
size 14188153
|
hgrn2-1.3B-100B/based_fda/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_fda": {
|
| 4 |
+
"contains,none": 0.1362397820163488,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_fda"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_fda": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": 0
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_fda": 0
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_fda": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/hgrn2-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
hgrn2-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_nq_2048.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3046421ad0497413cf63c3c4bd2ab6efac858886b16bd1496609b467a99964ba
|
| 3 |
+
size 40978363
|
hgrn2-1.3B-100B/based_nq_2048/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_nq_2048": {
|
| 4 |
+
"contains,none": 0.1954387076338296,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_nq_2048"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_nq_2048": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": "default"
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_nq_2048": "default"
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_nq_2048": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/hgrn2-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
hgrn2-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_squad.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:362e6d9731e716c5c66cbe12143f687d9df9fce1bcc464f5d5daa984bd65ab5b
|
| 3 |
+
size 15095350
|
hgrn2-1.3B-100B/based_swde/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_swde": {
|
| 4 |
+
"contains,none": 0.2268041237113402,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_swde"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_swde": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": 0
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_swde": 0
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_swde": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/hgrn2-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
retnet-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__download_model__retnet-1.3B-100B_based_fda.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3bc28d86e2f2ccf2ab55361cac74d5404697de31688a64623a3ab532fd56188
|
| 3 |
+
size 14186601
|
transformer-1.3B-100B/config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "fla-hub/transformer-1.3B-100B",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"TransformerForCausalLM"
|
| 5 |
+
],
|
| 6 |
+
"attention_bias": false,
|
| 7 |
+
"bos_token_id": 1,
|
| 8 |
+
"elementwise_affine": true,
|
| 9 |
+
"eos_token_id": 2,
|
| 10 |
+
"fuse_cross_entropy": true,
|
| 11 |
+
"fuse_norm": true,
|
| 12 |
+
"fuse_swiglu": true,
|
| 13 |
+
"hidden_act": "swish",
|
| 14 |
+
"hidden_ratio": 4,
|
| 15 |
+
"hidden_size": 2048,
|
| 16 |
+
"initializer_range": 0.02,
|
| 17 |
+
"intermediate_size": null,
|
| 18 |
+
"max_position_embeddings": 204800,
|
| 19 |
+
"model_type": "transformer",
|
| 20 |
+
"norm_eps": 1e-06,
|
| 21 |
+
"num_heads": 32,
|
| 22 |
+
"num_hidden_layers": 24,
|
| 23 |
+
"num_kv_heads": null,
|
| 24 |
+
"rms_norm_eps": 1e-06,
|
| 25 |
+
"rope_theta": 10000.0,
|
| 26 |
+
"tie_word_embeddings": false,
|
| 27 |
+
"torch_dtype": "bfloat16",
|
| 28 |
+
"transformers_version": "4.48.2",
|
| 29 |
+
"use_cache": true,
|
| 30 |
+
"vocab_size": 32000,
|
| 31 |
+
"window_size": null
|
| 32 |
+
}
|
transformer-1.3B-100B/generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 1,
|
| 4 |
+
"eos_token_id": 2,
|
| 5 |
+
"transformers_version": "4.48.2"
|
| 6 |
+
}
|
transformer-1.3B-100B/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:780b0295e9a6bd686c004e281bcec94130dc5a3e1d111ba440f2cd5f601fe790
|
| 3 |
+
size 2728619696
|
transformer-1.3B-100B/results
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|----------------|------:|------|-----:|---------------|---|------:|---|------|
|
| 2 |
+
|arc_challenge | 1|none | 0|acc |↑ | 0.2372|± |0.0124|
|
| 3 |
+
| | |none | 0|acc_norm |↑ | 0.2782|± |0.0131|
|
| 4 |
+
|arc_easy | 1|none | 0|acc |↑ | 0.5497|± |0.0102|
|
| 5 |
+
| | |none | 0|acc_norm |↑ | 0.4949|± |0.0103|
|
| 6 |
+
|hellaswag | 1|none | 0|acc |↑ | 0.3864|± |0.0049|
|
| 7 |
+
| | |none | 0|acc_norm |↑ | 0.4917|± |0.0050|
|
| 8 |
+
|lambada_standard| 1|none | 0|acc |↑ | 0.4077|± |0.0068|
|
| 9 |
+
| | |none | 0|perplexity |↓ |19.3234|± |0.6254|
|
| 10 |
+
|piqa | 1|none | 0|acc |↑ | 0.7029|± |0.0107|
|
| 11 |
+
| | |none | 0|acc_norm |↑ | 0.7040|± |0.0107|
|
| 12 |
+
|wikitext | 2|none | 0|bits_per_byte |↓ | 0.7738|± | N/A|
|
| 13 |
+
| | |none | 0|byte_perplexity|↓ | 1.7097|± | N/A|
|
| 14 |
+
| | |none | 0|word_perplexity|↓ |17.6031|± | N/A|
|
| 15 |
+
|winogrande | 1|none | 0|acc |↑ | 0.5556|± |0.0140|
|
transformer-1.3B-100B/special_tokens_map.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"unk_token": {
|
| 17 |
+
"content": "<unk>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
}
|
| 23 |
+
}
|
transformer-1.3B-100B/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
|
| 3 |
+
size 493443
|
transformer-1.3B-100B/tokenizer_config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"0": {
|
| 6 |
+
"content": "<unk>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"1": {
|
| 14 |
+
"content": "<s>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"2": {
|
| 22 |
+
"content": "</s>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
}
|
| 29 |
+
},
|
| 30 |
+
"additional_special_tokens": [],
|
| 31 |
+
"bos_token": "<s>",
|
| 32 |
+
"clean_up_tokenization_spaces": false,
|
| 33 |
+
"eos_token": "</s>",
|
| 34 |
+
"legacy": true,
|
| 35 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 36 |
+
"pad_token": null,
|
| 37 |
+
"sp_model_kwargs": {},
|
| 38 |
+
"spaces_between_special_tokens": false,
|
| 39 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 40 |
+
"unk_token": "<unk>",
|
| 41 |
+
"use_default_system_prompt": false
|
| 42 |
+
}
|