Upload folder using huggingface_hub
Browse files- .gitattributes +8 -0
- ms-swift-data/image_sft_full_v800k.json +3 -0
- ms-swift-data/image_sft_full_v800k_sharegpt.json +3 -0
- ms-swift-data/image_sft_small_10pct.json +3 -0
- ms-swift-data/image_sft_small_10pct_sharegpt.json +3 -0
- ms-swift-data/sft_mixed_config_full_v800k.yaml +14 -0
- ms-swift-data/sft_mixed_config_small_10pct.yaml +14 -0
- ms-swift-data/video_sft_full_v800k.json +3 -0
- ms-swift-data/video_sft_full_v800k_sharegpt.json +3 -0
- ms-swift-data/video_sft_small_10pct.json +3 -0
- ms-swift-data/video_sft_small_10pct_sharegpt.json +3 -0
.gitattributes
CHANGED
|
@@ -45,3 +45,11 @@ video_mllm_swift/s2_declip_siglip2_qwen3_1.7b_10pct/checkpoint-1000/tokenizer.js
|
|
| 45 |
video_mllm_swift/s2_image_only_10pct/v1-20260316-135215/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 46 |
video_mllm_swift/s2_image_only_10pct/v1-20260316-135215/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 47 |
video_mllm_swift/s2_siglip2_qwen3_1.7b_10pct/checkpoint-900/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
video_mllm_swift/s2_image_only_10pct/v1-20260316-135215/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 46 |
video_mllm_swift/s2_image_only_10pct/v1-20260316-135215/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 47 |
video_mllm_swift/s2_siglip2_qwen3_1.7b_10pct/checkpoint-900/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
ms-swift-data/image_sft_full_v800k.json filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
ms-swift-data/image_sft_full_v800k_sharegpt.json filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
ms-swift-data/image_sft_small_10pct.json filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
ms-swift-data/image_sft_small_10pct_sharegpt.json filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
ms-swift-data/video_sft_full_v800k.json filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
ms-swift-data/video_sft_full_v800k_sharegpt.json filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
ms-swift-data/video_sft_small_10pct.json filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
ms-swift-data/video_sft_small_10pct_sharegpt.json filter=lfs diff=lfs merge=lfs -text
|
ms-swift-data/image_sft_full_v800k.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87de3a26e50fd5da292bb5e500f396c34b2b9d5fb140d7e0c88bffa3490e46c7
|
| 3 |
+
size 844452579
|
ms-swift-data/image_sft_full_v800k_sharegpt.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3967ea19856093beecc453f6b9e74658e4597247abb9f139fa27bf7ca18c8bc8
|
| 3 |
+
size 856799857
|
ms-swift-data/image_sft_small_10pct.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f117881a7030a4130f3aed844aa1cafc8a3ef3763ef18c11c55d7a407fb4ef0b
|
| 3 |
+
size 84075636
|
ms-swift-data/image_sft_small_10pct_sharegpt.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43d97f4bb681d44a203f8e5cb577ec8de62a83ecb5c583a68bc49ae6d55ec1f9
|
| 3 |
+
size 85309394
|
ms-swift-data/sft_mixed_config_full_v800k.yaml
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 混合 SFT 数据配置(采样版)
|
| 2 |
+
# 自动生成 by sample_sft_data.py
|
| 3 |
+
# Image: 738,590 条
|
| 4 |
+
# Video: 800,001 条
|
| 5 |
+
# 总计: 1,538,591 条
|
| 6 |
+
|
| 7 |
+
datasets:
|
| 8 |
+
# ===== Image =====
|
| 9 |
+
- json_path: /mnt/bn/strategy-mllm-train/common/datasets/image_sft_full_v800k.json
|
| 10 |
+
sampling_strategy: all
|
| 11 |
+
|
| 12 |
+
# ===== Video =====
|
| 13 |
+
- json_path: /mnt/bn/strategy-mllm-train/common/datasets/video_sft_full_v800k.json
|
| 14 |
+
sampling_strategy: all
|
ms-swift-data/sft_mixed_config_small_10pct.yaml
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 混合 SFT 数据配置(采样版)
|
| 2 |
+
# 自动生成 by sample_sft_data.py
|
| 3 |
+
# Image: 73,859 条
|
| 4 |
+
# Video: 113,616 条
|
| 5 |
+
# 总计: 187,475 条
|
| 6 |
+
|
| 7 |
+
datasets:
|
| 8 |
+
# ===== Image =====
|
| 9 |
+
- json_path: /mnt/bn/strategy-mllm-train/common/datasets/image_sft_small_10pct.json
|
| 10 |
+
sampling_strategy: all
|
| 11 |
+
|
| 12 |
+
# ===== Video =====
|
| 13 |
+
- json_path: /mnt/bn/strategy-mllm-train/common/datasets/video_sft_small_10pct.json
|
| 14 |
+
sampling_strategy: all
|
ms-swift-data/video_sft_full_v800k.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8b58b0d78a882368659d2190bd72d3caae8fe5b262d1fe094f22356b0917bf5
|
| 3 |
+
size 1162509258
|
ms-swift-data/video_sft_full_v800k_sharegpt.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b64809179037badac20e4a4a5536a83a5eacda7d0d8135a9148fe5f191b05a2c
|
| 3 |
+
size 1167713206
|
ms-swift-data/video_sft_small_10pct.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79efd9c203c518d8971a0881239b6eeca3be70c16aa74104856e3073a0789fd6
|
| 3 |
+
size 171369919
|
ms-swift-data/video_sft_small_10pct_sharegpt.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec98fb3c559bfb9fb6306b324ada908dc33b56e094241443de8c5a77996f660c
|
| 3 |
+
size 171915278
|