Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- 2b_l10/outputs_add_free/f2+l2/lang/264/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/101/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/102/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/116/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/125/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/128/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/13/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/132/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/133/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/137/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/139/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/145/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/147/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/151/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/158/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/159/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/167/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/174/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/176/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/185/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/188/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/193/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/196/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/199/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/2/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/213/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/228/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/231/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/234/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/237/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/241/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/242/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/245/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/246/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/247/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/257/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/258/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/262/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/267/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/269/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/270/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/272/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/28/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/284/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/285/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/286/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/29/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/292/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/298/config.json +12 -0
- 2b_l10/outputs_add_free/f2+l2/simpo/300/config.json +12 -0
2b_l10/outputs_add_free/f2+l2/lang/264/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/101/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/102/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/116/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/125/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/128/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/13/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/132/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/133/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/137/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/139/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/145/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/147/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/151/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/158/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/159/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/167/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/174/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/176/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/185/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/188/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/193/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/196/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/199/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/2/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/213/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/228/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/231/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/234/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/237/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/241/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/242/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/245/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/246/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/247/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/257/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/258/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/262/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/267/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/269/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/270/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/272/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/28/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/284/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/285/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/286/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/29/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/292/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/298/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|
2b_l10/outputs_add_free/f2+l2/simpo/300/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"layer": 10,
|
| 4 |
+
"embed_dim": 2304,
|
| 5 |
+
"low_rank_dim": 1,
|
| 6 |
+
"target_module": "model.layers.10",
|
| 7 |
+
"intervention_type": "AdditionFreeIntervention",
|
| 8 |
+
"factor_init_scale": 4.0,
|
| 9 |
+
"vector_init_scale": 8.0,
|
| 10 |
+
"alpha": 0.0
|
| 11 |
+
}
|
| 12 |
+
]
|