msj19 commited on
Commit
92a859c
·
verified ·
1 Parent(s): 84124bb

Add files using upload-large-folder tool

Browse files
Files changed (48) hide show
  1. .gitattributes +16 -0
  2. delta_net-1.3B-100B/.hfd/aria2c_urls.txt +0 -0
  3. delta_net-1.3B-100B/.hfd/last_download_command +1 -0
  4. delta_net-1.3B-100B/.hfd/repo_metadata.json +1 -0
  5. delta_net-1.3B-100B/based_drop/results.json +51 -0
  6. delta_net-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_fda.jsonl +3 -0
  7. delta_net-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_fda.jsonl +3 -0
  8. delta_net-1.3B-100B/based_fda/results.json +51 -0
  9. delta_net-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_nq_2048.jsonl +3 -0
  10. delta_net-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_nq_2048.jsonl +3 -0
  11. delta_net-1.3B-100B/based_nq_2048/results.json +51 -0
  12. delta_net-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_squad.jsonl +3 -0
  13. delta_net-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_squad.jsonl +3 -0
  14. delta_net-1.3B-100B/based_squad/results.json +55 -0
  15. delta_net-1.3B-100B/based_swde/results.json +51 -0
  16. delta_net-1.3B-100B/based_triviaqa/results.json +55 -0
  17. gla-1.3B-100B/.hfd/aria2c_urls.txt +0 -0
  18. gla-1.3B-100B/.hfd/last_download_command +1 -0
  19. gla-1.3B-100B/.hfd/repo_metadata.json +1 -0
  20. gla-1.3B-100B/based_drop/results.json +51 -0
  21. gla-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_fda.jsonl +3 -0
  22. gla-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_fda.jsonl +3 -0
  23. gla-1.3B-100B/based_fda/results.json +51 -0
  24. gla-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_nq_2048.jsonl +3 -0
  25. gla-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_nq_2048.jsonl +3 -0
  26. gla-1.3B-100B/based_nq_2048/results.json +51 -0
  27. gla-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_squad.jsonl +3 -0
  28. gla-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_squad.jsonl +3 -0
  29. gla-1.3B-100B/based_squad/results.json +55 -0
  30. gla-1.3B-100B/based_swde/results.json +51 -0
  31. gla-1.3B-100B/based_triviaqa/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_triviaqa.jsonl +0 -0
  32. gla-1.3B-100B/based_triviaqa/results.json +55 -0
  33. hgrn2-1.3B-100B/.hfd/aria2c_urls.txt +0 -0
  34. hgrn2-1.3B-100B/based_drop/results.json +51 -0
  35. hgrn2-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_fda.jsonl +3 -0
  36. hgrn2-1.3B-100B/based_fda/results.json +51 -0
  37. hgrn2-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_nq_2048.jsonl +3 -0
  38. hgrn2-1.3B-100B/based_nq_2048/results.json +51 -0
  39. hgrn2-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_squad.jsonl +3 -0
  40. hgrn2-1.3B-100B/based_swde/results.json +51 -0
  41. retnet-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__download_model__retnet-1.3B-100B_based_fda.jsonl +3 -0
  42. transformer-1.3B-100B/config.json +32 -0
  43. transformer-1.3B-100B/generation_config.json +6 -0
  44. transformer-1.3B-100B/model.safetensors +3 -0
  45. transformer-1.3B-100B/results +15 -0
  46. transformer-1.3B-100B/special_tokens_map.json +23 -0
  47. transformer-1.3B-100B/tokenizer.model +3 -0
  48. transformer-1.3B-100B/tokenizer_config.json +42 -0
.gitattributes CHANGED
@@ -33,3 +33,19 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ delta_net-1.3B-100B/based_fda/[[:space:]]checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_fda.jsonl filter=lfs diff=lfs merge=lfs -text
37
+ delta_net-1.3B-100B/based_fda/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_fda.jsonl filter=lfs diff=lfs merge=lfs -text
38
+ delta_net-1.3B-100B/based_nq_2048/[[:space:]]checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_nq_2048.jsonl filter=lfs diff=lfs merge=lfs -text
39
+ delta_net-1.3B-100B/based_nq_2048/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_nq_2048.jsonl filter=lfs diff=lfs merge=lfs -text
40
+ delta_net-1.3B-100B/based_squad/[[:space:]]checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_squad.jsonl filter=lfs diff=lfs merge=lfs -text
41
+ delta_net-1.3B-100B/based_squad/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_squad.jsonl filter=lfs diff=lfs merge=lfs -text
42
+ gla-1.3B-100B/based_fda/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_fda.jsonl filter=lfs diff=lfs merge=lfs -text
43
+ gla-1.3B-100B/based_fda/[[:space:]]checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_fda.jsonl filter=lfs diff=lfs merge=lfs -text
44
+ gla-1.3B-100B/based_nq_2048/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_nq_2048.jsonl filter=lfs diff=lfs merge=lfs -text
45
+ gla-1.3B-100B/based_nq_2048/[[:space:]]checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_nq_2048.jsonl filter=lfs diff=lfs merge=lfs -text
46
+ gla-1.3B-100B/based_squad/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_squad.jsonl filter=lfs diff=lfs merge=lfs -text
47
+ gla-1.3B-100B/based_squad/[[:space:]]checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_squad.jsonl filter=lfs diff=lfs merge=lfs -text
48
+ hgrn2-1.3B-100B/based_fda/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_fda.jsonl filter=lfs diff=lfs merge=lfs -text
49
+ hgrn2-1.3B-100B/based_nq_2048/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_nq_2048.jsonl filter=lfs diff=lfs merge=lfs -text
50
+ hgrn2-1.3B-100B/based_squad/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_squad.jsonl filter=lfs diff=lfs merge=lfs -text
51
+ retnet-1.3B-100B/based_fda/[[:space:]]checkpoint_name____mnt__jfzn__msj__download_model__retnet-1.3B-100B_based_fda.jsonl filter=lfs diff=lfs merge=lfs -text
delta_net-1.3B-100B/.hfd/aria2c_urls.txt ADDED
File without changes
delta_net-1.3B-100B/.hfd/last_download_command ADDED
@@ -0,0 +1 @@
 
 
1
+ REPO_ID=fla-hub/delta_net-1.3B-100B TOOL=aria2c INCLUDE_PATTERNS= EXCLUDE_PATTERNS= DATASET=0 HF_USERNAME= HF_TOKEN= HF_TOKEN=https://huggingface.co REVISION=main
delta_net-1.3B-100B/.hfd/repo_metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_id":"6746ac966e604a92959d3cad","id":"fla-hub/delta_net-1.3B-100B","private":false,"pipeline_tag":"text-generation","library_name":"transformers","tags":["transformers","safetensors","delta_net","text-generation","arxiv:1910.09700","autotrain_compatible","endpoints_compatible","region:us"],"downloads":171,"likes":0,"modelId":"fla-hub/delta_net-1.3B-100B","author":"fla-hub","sha":"b4dcbbafd4fde802717bdec3008d4aba9cb3a1f8","lastModified":"2025-02-09T16:57:54.000Z","gated":false,"disabled":false,"widgetData":[{"text":"My name is Julien and I like to"},{"text":"I like traveling by train because"},{"text":"Paris is an amazing place to visit,"},{"text":"Once upon a time,"}],"model-index":null,"config":{"architectures":["DeltaNetForCausalLM"],"model_type":"delta_net","tokenizer_config":{"bos_token":"<s>","eos_token":"</s>","pad_token":"</s>","unk_token":"<unk>","use_default_system_prompt":false}},"cardData":{"library_name":"transformers","tags":[]},"transformersInfo":{"auto_model":"AutoModelForCausalLM","pipeline_tag":"text-generation"},"siblings":[{"rfilename":".gitattributes"},{"rfilename":"README.md"},{"rfilename":"config.json"},{"rfilename":"generation_config.json"},{"rfilename":"model.safetensors"},{"rfilename":"special_tokens_map.json"},{"rfilename":"tokenizer.json"},{"rfilename":"tokenizer.model"},{"rfilename":"tokenizer_config.json"}],"spaces":[],"createdAt":"2024-11-27T05:22:30.000Z","safetensors":{"parameters":{"BF16":1365677056},"total":1365677056},"usedStorage":5463274107}
delta_net-1.3B-100B/based_drop/results.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "based_drop": {
4
+ "contains,none": 0.2103497843794921,
5
+ "contains_stderr,none": "N/A",
6
+ "alias": "based_drop"
7
+ }
8
+ },
9
+ "configs": {
10
+ "based_drop": {
11
+ "description": "",
12
+ "target_delimiter": " ",
13
+ "fewshot_delimiter": "\n\n",
14
+ "num_fewshot": 0,
15
+ "output_type": "generate_until",
16
+ "generation_kwargs": {
17
+ "until": [
18
+ "\n\n"
19
+ ],
20
+ "do_sample": false
21
+ },
22
+ "repeats": 1,
23
+ "should_decontaminate": false,
24
+ "metadata": {
25
+ "version": "default"
26
+ }
27
+ }
28
+ },
29
+ "versions": {
30
+ "based_drop": "default"
31
+ },
32
+ "n-shot": {
33
+ "based_drop": 0
34
+ },
35
+ "config": {
36
+ "model": "lm_eval_model",
37
+ "model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/delta_net-1.3B-100B",
38
+ "batch_size": "8",
39
+ "batch_sizes": [],
40
+ "device": "cuda:0",
41
+ "use_cache": null,
42
+ "limit": null,
43
+ "bootstrap_iters": 100000,
44
+ "gen_kwargs": null
45
+ },
46
+ "git_hash": null,
47
+ "context_length": 2000,
48
+ "answer_length": 48,
49
+ "cutting_context": true,
50
+ "decode_mode": "default"
51
+ }
delta_net-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_fda.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1ea6d2a565d3cffe65ec49c5c2e4df4e7d0c7e786aa1e7bf2b8000ee85b7620
3
+ size 14490105
delta_net-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_fda.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21736891ba901ee13665d654ec200bddaf68ceb549143319ef2768b4988f0945
3
+ size 14196557
delta_net-1.3B-100B/based_fda/results.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "based_fda": {
4
+ "contains,none": 0.4250681198910082,
5
+ "contains_stderr,none": "N/A",
6
+ "alias": "based_fda"
7
+ }
8
+ },
9
+ "configs": {
10
+ "based_fda": {
11
+ "description": "",
12
+ "target_delimiter": " ",
13
+ "fewshot_delimiter": "\n\n",
14
+ "num_fewshot": 0,
15
+ "output_type": "generate_until",
16
+ "generation_kwargs": {
17
+ "until": [
18
+ "\n\n"
19
+ ],
20
+ "do_sample": false
21
+ },
22
+ "repeats": 1,
23
+ "should_decontaminate": false,
24
+ "metadata": {
25
+ "version": 0
26
+ }
27
+ }
28
+ },
29
+ "versions": {
30
+ "based_fda": 0
31
+ },
32
+ "n-shot": {
33
+ "based_fda": 0
34
+ },
35
+ "config": {
36
+ "model": "lm_eval_model",
37
+ "model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/delta_net-1.3B-100B",
38
+ "batch_size": "8",
39
+ "batch_sizes": [],
40
+ "device": "cuda:0",
41
+ "use_cache": null,
42
+ "limit": null,
43
+ "bootstrap_iters": 100000,
44
+ "gen_kwargs": null
45
+ },
46
+ "git_hash": null,
47
+ "context_length": 2000,
48
+ "answer_length": 48,
49
+ "cutting_context": true,
50
+ "decode_mode": "default"
51
+ }
delta_net-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_nq_2048.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2aa46583998ffa92fe9e454db0da7a3f4a22437e83e0959da3861aa95e1f80e
3
+ size 41955055
delta_net-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_nq_2048.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e80805be830453ff52a403b20493742a76b3a4d2d2172d5d424c83c03988142
3
+ size 41035265
delta_net-1.3B-100B/based_nq_2048/results.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "based_nq_2048": {
4
+ "contains,none": 0.2454862210959772,
5
+ "contains_stderr,none": "N/A",
6
+ "alias": "based_nq_2048"
7
+ }
8
+ },
9
+ "configs": {
10
+ "based_nq_2048": {
11
+ "description": "",
12
+ "target_delimiter": " ",
13
+ "fewshot_delimiter": "\n\n",
14
+ "num_fewshot": 0,
15
+ "output_type": "generate_until",
16
+ "generation_kwargs": {
17
+ "until": [
18
+ "\n\n"
19
+ ],
20
+ "do_sample": false
21
+ },
22
+ "repeats": 1,
23
+ "should_decontaminate": false,
24
+ "metadata": {
25
+ "version": "default"
26
+ }
27
+ }
28
+ },
29
+ "versions": {
30
+ "based_nq_2048": "default"
31
+ },
32
+ "n-shot": {
33
+ "based_nq_2048": 0
34
+ },
35
+ "config": {
36
+ "model": "lm_eval_model",
37
+ "model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/delta_net-1.3B-100B",
38
+ "batch_size": "8",
39
+ "batch_sizes": [],
40
+ "device": "cuda:0",
41
+ "use_cache": null,
42
+ "limit": null,
43
+ "bootstrap_iters": 100000,
44
+ "gen_kwargs": null
45
+ },
46
+ "git_hash": null,
47
+ "context_length": 2000,
48
+ "answer_length": 48,
49
+ "cutting_context": true,
50
+ "decode_mode": "default"
51
+ }
delta_net-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_squad.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0539b8aed62ac4c7601069e37f0a4697d6600279a84e4365e752604fd33ce9e4
3
+ size 14997314
delta_net-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_squad.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc7950e0973468e7b536214c453ab275cdf5033b67dd513bba2c5cb4e9a6c6c4
3
+ size 14995544
delta_net-1.3B-100B/based_squad/results.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "based_squad": {
4
+ "exact,none": 4.098085320792745,
5
+ "exact_stderr,none": "N/A",
6
+ "f1,none": 14.662322521108223,
7
+ "f1_stderr,none": "N/A",
8
+ "contains,none": 0.3432986227746053,
9
+ "contains_stderr,none": "N/A",
10
+ "alias": "based_squad"
11
+ }
12
+ },
13
+ "configs": {
14
+ "based_squad": {
15
+ "description": "",
16
+ "target_delimiter": " ",
17
+ "fewshot_delimiter": "\n\n",
18
+ "num_fewshot": 0,
19
+ "output_type": "generate_until",
20
+ "generation_kwargs": {
21
+ "until": [
22
+ "\n\n"
23
+ ],
24
+ "do_sample": false
25
+ },
26
+ "repeats": 1,
27
+ "should_decontaminate": false,
28
+ "metadata": {
29
+ "version": 0
30
+ }
31
+ }
32
+ },
33
+ "versions": {
34
+ "based_squad": 0
35
+ },
36
+ "n-shot": {
37
+ "based_squad": 0
38
+ },
39
+ "config": {
40
+ "model": "lm_eval_model",
41
+ "model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/delta_net-1.3B-100B",
42
+ "batch_size": "8",
43
+ "batch_sizes": [],
44
+ "device": "cuda:0",
45
+ "use_cache": null,
46
+ "limit": null,
47
+ "bootstrap_iters": 100000,
48
+ "gen_kwargs": null
49
+ },
50
+ "git_hash": null,
51
+ "context_length": 2000,
52
+ "answer_length": 48,
53
+ "cutting_context": true,
54
+ "decode_mode": "default"
55
+ }
delta_net-1.3B-100B/based_swde/results.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "based_swde": {
4
+ "contains,none": 0.36363636363636365,
5
+ "contains_stderr,none": "N/A",
6
+ "alias": "based_swde"
7
+ }
8
+ },
9
+ "configs": {
10
+ "based_swde": {
11
+ "description": "",
12
+ "target_delimiter": " ",
13
+ "fewshot_delimiter": "\n\n",
14
+ "num_fewshot": 0,
15
+ "output_type": "generate_until",
16
+ "generation_kwargs": {
17
+ "until": [
18
+ "\n\n"
19
+ ],
20
+ "do_sample": false
21
+ },
22
+ "repeats": 1,
23
+ "should_decontaminate": false,
24
+ "metadata": {
25
+ "version": 0
26
+ }
27
+ }
28
+ },
29
+ "versions": {
30
+ "based_swde": 0
31
+ },
32
+ "n-shot": {
33
+ "based_swde": 0
34
+ },
35
+ "config": {
36
+ "model": "lm_eval_model",
37
+ "model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/delta_net-1.3B-100B",
38
+ "batch_size": "8",
39
+ "batch_sizes": [],
40
+ "device": "cuda:0",
41
+ "use_cache": null,
42
+ "limit": null,
43
+ "bootstrap_iters": 100000,
44
+ "gen_kwargs": null
45
+ },
46
+ "git_hash": null,
47
+ "context_length": 2000,
48
+ "answer_length": 48,
49
+ "cutting_context": true,
50
+ "decode_mode": "default"
51
+ }
delta_net-1.3B-100B/based_triviaqa/results.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "based_triviaqa": {
4
+ "exact,none": 1.0071090047393365,
5
+ "exact_stderr,none": "N/A",
6
+ "f1,none": 7.6236297885650695,
7
+ "f1_stderr,none": "N/A",
8
+ "contains,none": 0.5687203791469194,
9
+ "contains_stderr,none": "N/A",
10
+ "alias": "based_triviaqa"
11
+ }
12
+ },
13
+ "configs": {
14
+ "based_triviaqa": {
15
+ "description": "",
16
+ "target_delimiter": " ",
17
+ "fewshot_delimiter": "\n\n",
18
+ "num_fewshot": 0,
19
+ "output_type": "generate_until",
20
+ "generation_kwargs": {
21
+ "until": [
22
+ "\n\n"
23
+ ],
24
+ "do_sample": false
25
+ },
26
+ "repeats": 1,
27
+ "should_decontaminate": false,
28
+ "metadata": {
29
+ "version": "default"
30
+ }
31
+ }
32
+ },
33
+ "versions": {
34
+ "based_triviaqa": "default"
35
+ },
36
+ "n-shot": {
37
+ "based_triviaqa": 0
38
+ },
39
+ "config": {
40
+ "model": "lm_eval_model",
41
+ "model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/delta_net-1.3B-100B",
42
+ "batch_size": "8",
43
+ "batch_sizes": [],
44
+ "device": "cuda:0",
45
+ "use_cache": null,
46
+ "limit": null,
47
+ "bootstrap_iters": 100000,
48
+ "gen_kwargs": null
49
+ },
50
+ "git_hash": null,
51
+ "context_length": 2000,
52
+ "answer_length": 48,
53
+ "cutting_context": true,
54
+ "decode_mode": "default"
55
+ }
gla-1.3B-100B/.hfd/aria2c_urls.txt ADDED
File without changes
gla-1.3B-100B/.hfd/last_download_command ADDED
@@ -0,0 +1 @@
 
 
1
+ REPO_ID=fla-hub/gla-1.3B-100B TOOL=aria2c INCLUDE_PATTERNS= EXCLUDE_PATTERNS= DATASET=0 HF_USERNAME= HF_TOKEN= HF_TOKEN=https://huggingface.co REVISION=main
gla-1.3B-100B/.hfd/repo_metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_id":"6608d28ec50f8393c5c2932f","id":"fla-hub/gla-1.3B-100B","private":false,"pipeline_tag":"text-generation","library_name":"transformers","tags":["transformers","safetensors","gla","text-generation","en","dataset:cerebras/SlimPajama-627B","arxiv:2312.06635","arxiv:2507.06457","license:mit","endpoints_compatible","region:us"],"downloads":3991,"likes":1,"modelId":"fla-hub/gla-1.3B-100B","author":"fla-hub","sha":"46b15820a4df269e99aed9d709e017677c15d24b","lastModified":"2025-09-09T10:26:33.000Z","gated":false,"disabled":false,"widgetData":[{"text":"My name is Julien and I like to"},{"text":"I like traveling by train because"},{"text":"Paris is an amazing place to visit,"},{"text":"Once upon a time,"}],"model-index":null,"config":{"architectures":["GLAForCausalLM"],"model_type":"gla","tokenizer_config":{"bos_token":"<s>","eos_token":"</s>","pad_token":null,"unk_token":"<unk>","use_default_system_prompt":false}},"cardData":{"datasets":["cerebras/SlimPajama-627B"],"language":["en"],"library_name":"transformers","license":"mit","pipeline_tag":"text-generation","tags":["text-generation","gla"]},"transformersInfo":{"auto_model":"AutoModelForCausalLM","pipeline_tag":"text-generation"},"siblings":[{"rfilename":".gitattributes"},{"rfilename":"README.md"},{"rfilename":"config.json"},{"rfilename":"generation_config.json"},{"rfilename":"model.safetensors"},{"rfilename":"special_tokens_map.json"},{"rfilename":"tokenizer.json"},{"rfilename":"tokenizer.model"},{"rfilename":"tokenizer_config.json"}],"spaces":[],"createdAt":"2024-03-31T03:03:42.000Z","safetensors":{"parameters":{"BF16":1365514240},"total":1365514240},"usedStorage":7725801115}
gla-1.3B-100B/based_drop/results.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "based_drop": {
4
+ "contains,none": 0.19357930043124102,
5
+ "contains_stderr,none": "N/A",
6
+ "alias": "based_drop"
7
+ }
8
+ },
9
+ "configs": {
10
+ "based_drop": {
11
+ "description": "",
12
+ "target_delimiter": " ",
13
+ "fewshot_delimiter": "\n\n",
14
+ "num_fewshot": 0,
15
+ "output_type": "generate_until",
16
+ "generation_kwargs": {
17
+ "until": [
18
+ "\n\n"
19
+ ],
20
+ "do_sample": false
21
+ },
22
+ "repeats": 1,
23
+ "should_decontaminate": false,
24
+ "metadata": {
25
+ "version": "default"
26
+ }
27
+ }
28
+ },
29
+ "versions": {
30
+ "based_drop": "default"
31
+ },
32
+ "n-shot": {
33
+ "based_drop": 0
34
+ },
35
+ "config": {
36
+ "model": "lm_eval_model",
37
+ "model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/gla-1.3B-100B",
38
+ "batch_size": "8",
39
+ "batch_sizes": [],
40
+ "device": "cuda:0",
41
+ "use_cache": null,
42
+ "limit": null,
43
+ "bootstrap_iters": 100000,
44
+ "gen_kwargs": null
45
+ },
46
+ "git_hash": null,
47
+ "context_length": 2000,
48
+ "answer_length": 48,
49
+ "cutting_context": true,
50
+ "decode_mode": "default"
51
+ }
gla-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_fda.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a7b523167023d68fb8e5f1dda52b28b16efdbec9f1b92b7ced47e5700e51163
3
+ size 14196379
gla-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_fda.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaa543c02364891c58079791e403b11f1523a98378102e45b6b3eafb329c92ac
3
+ size 14492347
gla-1.3B-100B/based_fda/results.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "based_fda": {
4
+ "contains,none": 0.27157129881925524,
5
+ "contains_stderr,none": "N/A",
6
+ "alias": "based_fda"
7
+ }
8
+ },
9
+ "configs": {
10
+ "based_fda": {
11
+ "description": "",
12
+ "target_delimiter": " ",
13
+ "fewshot_delimiter": "\n\n",
14
+ "num_fewshot": 0,
15
+ "output_type": "generate_until",
16
+ "generation_kwargs": {
17
+ "until": [
18
+ "\n\n"
19
+ ],
20
+ "do_sample": false
21
+ },
22
+ "repeats": 1,
23
+ "should_decontaminate": false,
24
+ "metadata": {
25
+ "version": 0
26
+ }
27
+ }
28
+ },
29
+ "versions": {
30
+ "based_fda": 0
31
+ },
32
+ "n-shot": {
33
+ "based_fda": 0
34
+ },
35
+ "config": {
36
+ "model": "lm_eval_model",
37
+ "model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/gla-1.3B-100B",
38
+ "batch_size": "8",
39
+ "batch_sizes": [],
40
+ "device": "cuda:0",
41
+ "use_cache": null,
42
+ "limit": null,
43
+ "bootstrap_iters": 100000,
44
+ "gen_kwargs": null
45
+ },
46
+ "git_hash": null,
47
+ "context_length": 2000,
48
+ "answer_length": 48,
49
+ "cutting_context": true,
50
+ "decode_mode": "default"
51
+ }
gla-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_nq_2048.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8ec94d98967baa6d1e274280f486787d8dc836e49d120a36bd3017de6102252
3
+ size 41005103
gla-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_nq_2048.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f77e057a5310c128fa6ce85fc4f7793de2a6961f63a8cf4057a057c0c5efe9a9
3
+ size 41926367
gla-1.3B-100B/based_nq_2048/results.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "based_nq_2048": {
4
+ "contains,none": 0.22204624643649035,
5
+ "contains_stderr,none": "N/A",
6
+ "alias": "based_nq_2048"
7
+ }
8
+ },
9
+ "configs": {
10
+ "based_nq_2048": {
11
+ "description": "",
12
+ "target_delimiter": " ",
13
+ "fewshot_delimiter": "\n\n",
14
+ "num_fewshot": 0,
15
+ "output_type": "generate_until",
16
+ "generation_kwargs": {
17
+ "until": [
18
+ "\n\n"
19
+ ],
20
+ "do_sample": false
21
+ },
22
+ "repeats": 1,
23
+ "should_decontaminate": false,
24
+ "metadata": {
25
+ "version": "default"
26
+ }
27
+ }
28
+ },
29
+ "versions": {
30
+ "based_nq_2048": "default"
31
+ },
32
+ "n-shot": {
33
+ "based_nq_2048": 0
34
+ },
35
+ "config": {
36
+ "model": "lm_eval_model",
37
+ "model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/gla-1.3B-100B",
38
+ "batch_size": "8",
39
+ "batch_sizes": [],
40
+ "device": "cuda:0",
41
+ "use_cache": null,
42
+ "limit": null,
43
+ "bootstrap_iters": 100000,
44
+ "gen_kwargs": null
45
+ },
46
+ "git_hash": null,
47
+ "context_length": 2000,
48
+ "answer_length": 48,
49
+ "cutting_context": true,
50
+ "decode_mode": "default"
51
+ }
gla-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_squad.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:646834fc08075cd6ca756f7df597ed1a9a1bd899f781ec9bce71d3a13cf70a0c
3
+ size 14983274
gla-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_squad.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66a1ec7f9c455f30e00ddca4edfcd6cf8750fbcb405285f9a45b0c80abcf2acb
3
+ size 14979950
gla-1.3B-100B/based_squad/results.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "based_squad": {
4
+ "exact,none": 4.467584816929795,
5
+ "exact_stderr,none": "N/A",
6
+ "f1,none": 15.399587310201976,
7
+ "f1_stderr,none": "N/A",
8
+ "contains,none": 0.349009069533087,
9
+ "contains_stderr,none": "N/A",
10
+ "alias": "based_squad"
11
+ }
12
+ },
13
+ "configs": {
14
+ "based_squad": {
15
+ "description": "",
16
+ "target_delimiter": " ",
17
+ "fewshot_delimiter": "\n\n",
18
+ "num_fewshot": 0,
19
+ "output_type": "generate_until",
20
+ "generation_kwargs": {
21
+ "until": [
22
+ "\n\n"
23
+ ],
24
+ "do_sample": false
25
+ },
26
+ "repeats": 1,
27
+ "should_decontaminate": false,
28
+ "metadata": {
29
+ "version": 0
30
+ }
31
+ }
32
+ },
33
+ "versions": {
34
+ "based_squad": 0
35
+ },
36
+ "n-shot": {
37
+ "based_squad": 0
38
+ },
39
+ "config": {
40
+ "model": "lm_eval_model",
41
+ "model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/gla-1.3B-100B",
42
+ "batch_size": "8",
43
+ "batch_sizes": [],
44
+ "device": "cuda:0",
45
+ "use_cache": null,
46
+ "limit": null,
47
+ "bootstrap_iters": 100000,
48
+ "gen_kwargs": null
49
+ },
50
+ "git_hash": null,
51
+ "context_length": 2000,
52
+ "answer_length": 48,
53
+ "cutting_context": true,
54
+ "decode_mode": "default"
55
+ }
gla-1.3B-100B/based_swde/results.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "based_swde": {
4
+ "contains,none": 0.3083411433926898,
5
+ "contains_stderr,none": "N/A",
6
+ "alias": "based_swde"
7
+ }
8
+ },
9
+ "configs": {
10
+ "based_swde": {
11
+ "description": "",
12
+ "target_delimiter": " ",
13
+ "fewshot_delimiter": "\n\n",
14
+ "num_fewshot": 0,
15
+ "output_type": "generate_until",
16
+ "generation_kwargs": {
17
+ "until": [
18
+ "\n\n"
19
+ ],
20
+ "do_sample": false
21
+ },
22
+ "repeats": 1,
23
+ "should_decontaminate": false,
24
+ "metadata": {
25
+ "version": 0
26
+ }
27
+ }
28
+ },
29
+ "versions": {
30
+ "based_swde": 0
31
+ },
32
+ "n-shot": {
33
+ "based_swde": 0
34
+ },
35
+ "config": {
36
+ "model": "lm_eval_model",
37
+ "model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/gla-1.3B-100B",
38
+ "batch_size": "8",
39
+ "batch_sizes": [],
40
+ "device": "cuda:0",
41
+ "use_cache": null,
42
+ "limit": null,
43
+ "bootstrap_iters": 100000,
44
+ "gen_kwargs": null
45
+ },
46
+ "git_hash": null,
47
+ "context_length": 2000,
48
+ "answer_length": 48,
49
+ "cutting_context": true,
50
+ "decode_mode": "default"
51
+ }
gla-1.3B-100B/based_triviaqa/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_triviaqa.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
gla-1.3B-100B/based_triviaqa/results.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "based_triviaqa": {
4
+ "exact,none": 1.3033175355450237,
5
+ "exact_stderr,none": "N/A",
6
+ "f1,none": 7.748952759132068,
7
+ "f1_stderr,none": "N/A",
8
+ "contains,none": 0.5574644549763034,
9
+ "contains_stderr,none": "N/A",
10
+ "alias": "based_triviaqa"
11
+ }
12
+ },
13
+ "configs": {
14
+ "based_triviaqa": {
15
+ "description": "",
16
+ "target_delimiter": " ",
17
+ "fewshot_delimiter": "\n\n",
18
+ "num_fewshot": 0,
19
+ "output_type": "generate_until",
20
+ "generation_kwargs": {
21
+ "until": [
22
+ "\n\n"
23
+ ],
24
+ "do_sample": false
25
+ },
26
+ "repeats": 1,
27
+ "should_decontaminate": false,
28
+ "metadata": {
29
+ "version": "default"
30
+ }
31
+ }
32
+ },
33
+ "versions": {
34
+ "based_triviaqa": "default"
35
+ },
36
+ "n-shot": {
37
+ "based_triviaqa": 0
38
+ },
39
+ "config": {
40
+ "model": "lm_eval_model",
41
+ "model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/gla-1.3B-100B",
42
+ "batch_size": "8",
43
+ "batch_sizes": [],
44
+ "device": "cuda:0",
45
+ "use_cache": null,
46
+ "limit": null,
47
+ "bootstrap_iters": 100000,
48
+ "gen_kwargs": null
49
+ },
50
+ "git_hash": null,
51
+ "context_length": 2000,
52
+ "answer_length": 48,
53
+ "cutting_context": true,
54
+ "decode_mode": "default"
55
+ }
hgrn2-1.3B-100B/.hfd/aria2c_urls.txt ADDED
File without changes
hgrn2-1.3B-100B/based_drop/results.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "based_drop": {
4
+ "contains,none": 0.19262098706276953,
5
+ "contains_stderr,none": "N/A",
6
+ "alias": "based_drop"
7
+ }
8
+ },
9
+ "configs": {
10
+ "based_drop": {
11
+ "description": "",
12
+ "target_delimiter": " ",
13
+ "fewshot_delimiter": "\n\n",
14
+ "num_fewshot": 0,
15
+ "output_type": "generate_until",
16
+ "generation_kwargs": {
17
+ "until": [
18
+ "\n\n"
19
+ ],
20
+ "do_sample": false
21
+ },
22
+ "repeats": 1,
23
+ "should_decontaminate": false,
24
+ "metadata": {
25
+ "version": "default"
26
+ }
27
+ }
28
+ },
29
+ "versions": {
30
+ "based_drop": "default"
31
+ },
32
+ "n-shot": {
33
+ "based_drop": 0
34
+ },
35
+ "config": {
36
+ "model": "lm_eval_model",
37
+ "model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/hgrn2-1.3B-100B",
38
+ "batch_size": "8",
39
+ "batch_sizes": [],
40
+ "device": "cuda:0",
41
+ "use_cache": null,
42
+ "limit": null,
43
+ "bootstrap_iters": 100000,
44
+ "gen_kwargs": null
45
+ },
46
+ "git_hash": null,
47
+ "context_length": 2000,
48
+ "answer_length": 48,
49
+ "cutting_context": true,
50
+ "decode_mode": "default"
51
+ }
hgrn2-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_fda.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5ba7eb94d0e3b885116fc2cf3b54d79acfb12a040b87ae3cdccd8144fc21253
3
+ size 14188153
hgrn2-1.3B-100B/based_fda/results.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "based_fda": {
4
+ "contains,none": 0.1362397820163488,
5
+ "contains_stderr,none": "N/A",
6
+ "alias": "based_fda"
7
+ }
8
+ },
9
+ "configs": {
10
+ "based_fda": {
11
+ "description": "",
12
+ "target_delimiter": " ",
13
+ "fewshot_delimiter": "\n\n",
14
+ "num_fewshot": 0,
15
+ "output_type": "generate_until",
16
+ "generation_kwargs": {
17
+ "until": [
18
+ "\n\n"
19
+ ],
20
+ "do_sample": false
21
+ },
22
+ "repeats": 1,
23
+ "should_decontaminate": false,
24
+ "metadata": {
25
+ "version": 0
26
+ }
27
+ }
28
+ },
29
+ "versions": {
30
+ "based_fda": 0
31
+ },
32
+ "n-shot": {
33
+ "based_fda": 0
34
+ },
35
+ "config": {
36
+ "model": "lm_eval_model",
37
+ "model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/hgrn2-1.3B-100B",
38
+ "batch_size": "8",
39
+ "batch_sizes": [],
40
+ "device": "cuda:0",
41
+ "use_cache": null,
42
+ "limit": null,
43
+ "bootstrap_iters": 100000,
44
+ "gen_kwargs": null
45
+ },
46
+ "git_hash": null,
47
+ "context_length": 2000,
48
+ "answer_length": 48,
49
+ "cutting_context": true,
50
+ "decode_mode": "default"
51
+ }
hgrn2-1.3B-100B/based_nq_2048/ checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_nq_2048.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3046421ad0497413cf63c3c4bd2ab6efac858886b16bd1496609b467a99964ba
3
+ size 40978363
hgrn2-1.3B-100B/based_nq_2048/results.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "based_nq_2048": {
4
+ "contains,none": 0.1954387076338296,
5
+ "contains_stderr,none": "N/A",
6
+ "alias": "based_nq_2048"
7
+ }
8
+ },
9
+ "configs": {
10
+ "based_nq_2048": {
11
+ "description": "",
12
+ "target_delimiter": " ",
13
+ "fewshot_delimiter": "\n\n",
14
+ "num_fewshot": 0,
15
+ "output_type": "generate_until",
16
+ "generation_kwargs": {
17
+ "until": [
18
+ "\n\n"
19
+ ],
20
+ "do_sample": false
21
+ },
22
+ "repeats": 1,
23
+ "should_decontaminate": false,
24
+ "metadata": {
25
+ "version": "default"
26
+ }
27
+ }
28
+ },
29
+ "versions": {
30
+ "based_nq_2048": "default"
31
+ },
32
+ "n-shot": {
33
+ "based_nq_2048": 0
34
+ },
35
+ "config": {
36
+ "model": "lm_eval_model",
37
+ "model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/hgrn2-1.3B-100B",
38
+ "batch_size": "8",
39
+ "batch_sizes": [],
40
+ "device": "cuda:0",
41
+ "use_cache": null,
42
+ "limit": null,
43
+ "bootstrap_iters": 100000,
44
+ "gen_kwargs": null
45
+ },
46
+ "git_hash": null,
47
+ "context_length": 2000,
48
+ "answer_length": 48,
49
+ "cutting_context": true,
50
+ "decode_mode": "default"
51
+ }
hgrn2-1.3B-100B/based_squad/ checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_squad.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:362e6d9731e716c5c66cbe12143f687d9df9fce1bcc464f5d5daa984bd65ab5b
3
+ size 15095350
hgrn2-1.3B-100B/based_swde/results.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "based_swde": {
4
+ "contains,none": 0.2268041237113402,
5
+ "contains_stderr,none": "N/A",
6
+ "alias": "based_swde"
7
+ }
8
+ },
9
+ "configs": {
10
+ "based_swde": {
11
+ "description": "",
12
+ "target_delimiter": " ",
13
+ "fewshot_delimiter": "\n\n",
14
+ "num_fewshot": 0,
15
+ "output_type": "generate_until",
16
+ "generation_kwargs": {
17
+ "until": [
18
+ "\n\n"
19
+ ],
20
+ "do_sample": false
21
+ },
22
+ "repeats": 1,
23
+ "should_decontaminate": false,
24
+ "metadata": {
25
+ "version": 0
26
+ }
27
+ }
28
+ },
29
+ "versions": {
30
+ "based_swde": 0
31
+ },
32
+ "n-shot": {
33
+ "based_swde": 0
34
+ },
35
+ "config": {
36
+ "model": "lm_eval_model",
37
+ "model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/hgrn2-1.3B-100B",
38
+ "batch_size": "8",
39
+ "batch_sizes": [],
40
+ "device": "cuda:0",
41
+ "use_cache": null,
42
+ "limit": null,
43
+ "bootstrap_iters": 100000,
44
+ "gen_kwargs": null
45
+ },
46
+ "git_hash": null,
47
+ "context_length": 2000,
48
+ "answer_length": 48,
49
+ "cutting_context": true,
50
+ "decode_mode": "default"
51
+ }
retnet-1.3B-100B/based_fda/ checkpoint_name____mnt__jfzn__msj__download_model__retnet-1.3B-100B_based_fda.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3bc28d86e2f2ccf2ab55361cac74d5404697de31688a64623a3ab532fd56188
3
+ size 14186601
transformer-1.3B-100B/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "fla-hub/transformer-1.3B-100B",
3
+ "architectures": [
4
+ "TransformerForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "bos_token_id": 1,
8
+ "elementwise_affine": true,
9
+ "eos_token_id": 2,
10
+ "fuse_cross_entropy": true,
11
+ "fuse_norm": true,
12
+ "fuse_swiglu": true,
13
+ "hidden_act": "swish",
14
+ "hidden_ratio": 4,
15
+ "hidden_size": 2048,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": null,
18
+ "max_position_embeddings": 204800,
19
+ "model_type": "transformer",
20
+ "norm_eps": 1e-06,
21
+ "num_heads": 32,
22
+ "num_hidden_layers": 24,
23
+ "num_kv_heads": null,
24
+ "rms_norm_eps": 1e-06,
25
+ "rope_theta": 10000.0,
26
+ "tie_word_embeddings": false,
27
+ "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.48.2",
29
+ "use_cache": true,
30
+ "vocab_size": 32000,
31
+ "window_size": null
32
+ }
transformer-1.3B-100B/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.48.2"
6
+ }
transformer-1.3B-100B/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:780b0295e9a6bd686c004e281bcec94130dc5a3e1d111ba440f2cd5f601fe790
3
+ size 2728619696
transformer-1.3B-100B/results ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ |----------------|------:|------|-----:|---------------|---|------:|---|------|
2
+ |arc_challenge | 1|none | 0|acc |↑ | 0.2372|± |0.0124|
3
+ | | |none | 0|acc_norm |↑ | 0.2782|± |0.0131|
4
+ |arc_easy | 1|none | 0|acc |↑ | 0.5497|± |0.0102|
5
+ | | |none | 0|acc_norm |↑ | 0.4949|± |0.0103|
6
+ |hellaswag | 1|none | 0|acc |↑ | 0.3864|± |0.0049|
7
+ | | |none | 0|acc_norm |↑ | 0.4917|± |0.0050|
8
+ |lambada_standard| 1|none | 0|acc |↑ | 0.4077|± |0.0068|
9
+ | | |none | 0|perplexity |↓ |19.3234|± |0.6254|
10
+ |piqa | 1|none | 0|acc |↑ | 0.7029|± |0.0107|
11
+ | | |none | 0|acc_norm |↑ | 0.7040|± |0.0107|
12
+ |wikitext | 2|none | 0|bits_per_byte |↓ | 0.7738|± | N/A|
13
+ | | |none | 0|byte_perplexity|↓ | 1.7097|± | N/A|
14
+ | | |none | 0|word_perplexity|↓ |17.6031|± | N/A|
15
+ |winogrande | 1|none | 0|acc |↑ | 0.5556|± |0.0140|
transformer-1.3B-100B/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
transformer-1.3B-100B/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
transformer-1.3B-100B/tokenizer_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": true,
35
+ "model_max_length": 1000000000000000019884624838656,
36
+ "pad_token": null,
37
+ "sp_model_kwargs": {},
38
+ "spaces_between_special_tokens": false,
39
+ "tokenizer_class": "LlamaTokenizer",
40
+ "unk_token": "<unk>",
41
+ "use_default_system_prompt": false
42
+ }