cleardusk commited on
Commit
44d057a
·
1 Parent(s): 424b3d9

docs: sync Chinese README

Browse files
Files changed (1) hide show
  1. README_zh.md +60 -43
README_zh.md CHANGED
@@ -1,32 +1,41 @@
1
  <div align="center">
2
- <img src="assets/logo/lance-logo.webp" alt="Lance logo" width="300">
3
-
4
- <h1 align="center"><sup>Lance: Unified Multimodal Modeling by Multi-Task Synergy</sup></h1>
5
- <p>
6
- <strong>
7
- <a href="https://scholar.google.com.hk/citations?user=FXxoQlsAAAAJ&hl=zh-CN&oi=ao" style="text-decoration: none; color: inherit;">Fengyi Fu</a><sup>*</sup>,
8
- <a href="https://corleone-huang.github.io/" style="text-decoration: none; color: inherit;">Mengqi Huang</a><sup>*,✉</sup>,
9
- <a href="https://scholar.google.com.hk/citations?user=9ER6nVkAAAAJ&hl=zh-CN&oi=ao" style="text-decoration: none; color: inherit;">Shaojin Wu</a><sup>*</sup>,
10
- Yunsheng Jiang<sup>*</sup>,
11
- Yufei Huo,
12
- <a href="https://guojianzhu.com/" style="text-decoration: none; color: inherit;">Jianzhu Guo</a><sup>✉,§</sup>
13
- </strong><br>
14
- Hao Li,
15
- Yinghang Song,
16
- Fei Ding,
17
- Qian He,
18
- Zheren Fu,
19
- Zhendong Mao,
 
 
 
 
 
 
20
  Yongdong Zhang
21
- <br>
22
- <em>ByteDance</em>
23
- <br>
24
- <sup>*</sup> 共同一作 &nbsp;&nbsp; <sup>✉</sup> 通讯作者 &nbsp;&nbsp; <sup>§</sup> Project lead
25
- </p>
 
 
 
26
  <p>
27
- <a href="https://lance-project.github.io/" style="text-decoration: none; margin: 0 8px;"><img src="https://img.shields.io/badge/Website-Lance-blue?style=flat-square&logo=github" alt="Website"></a>
28
- <a href="http://arxiv.org/abs/2605.18678" style="text-decoration: none; margin: 0 8px;"><img src="https://img.shields.io/badge/Paper-arXiv-red?style=flat-square&logo=arxiv" alt="arXiv"></a>
29
- <a href="https://github.com/bytedance/Lance" style="text-decoration: none; margin: 0 8px;"><img src="https://img.shields.io/badge/Lance-Codebase-536af5?color=536af5&logo=github" alt="Github"></a>
30
  <br>
31
  <a href="./README.md"><ins>English</ins></a> | 简体中文
32
  </p>
@@ -349,11 +358,11 @@ python lance_gradio_t2v_v2t.py --gpus 0 --server-port 7860
349
  #### DPG-Bench 评测
350
 
351
  <div align="center">
352
- <table>
353
  <thead>
354
  <tr>
355
  <th align="left">模型</th>
356
- <th align="center"># Params.</th>
357
  <th align="center">Global</th>
358
  <th align="center">Entity</th>
359
  <th align="center">Attribute</th>
@@ -364,7 +373,7 @@ python lance_gradio_t2v_v2t.py --gpus 0 --server-port 7860
364
  </thead>
365
  <tbody>
366
  <tr>
367
- <td align="left" colspan="8"><i>仅生成模型</i></td>
368
  </tr>
369
  <tr>
370
  <td align="left">SDXL</td><td align="center">3.5B</td><td align="center">83.27</td><td align="center">82.43</td><td align="center">80.91</td><td align="center">86.76</td><td align="center">80.41</td><td align="center">74.65</td>
@@ -382,7 +391,7 @@ python lance_gradio_t2v_v2t.py --gpus 0 --server-port 7860
382
  <td align="left">Qwen-Image</td><td align="center">20B</td><td align="center">91.32</td><td align="center">91.56</td><td align="center">92.02</td><td align="center">94.31</td><td align="center">92.73</td><td align="center">88.32</td>
383
  </tr>
384
  <tr>
385
- <td align="left" colspan="8"><i>统一模型</i></td>
386
  </tr>
387
  <tr>
388
  <td align="left">Janus-Pro-7B</td><td align="center">7B</td><td align="center">86.90</td><td align="center">88.90</td><td align="center">89.40</td><td align="center">89.32</td><td align="center">89.48</td><td align="center">84.19</td>
@@ -394,7 +403,7 @@ python lance_gradio_t2v_v2t.py --gpus 0 --server-port 7860
394
  <td align="left">Show-o2</td><td align="center">7B</td><td align="center">89.00</td><td align="center"><b>91.78</b></td><td align="center">89.96</td><td align="center">91.81</td><td align="center"><b>91.64</b></td><td align="center">86.14</td>
395
  </tr>
396
  <tr>
397
- <td align="left">BAGEL</td><td align="center">7B</td><td align="center">88.94</td><td align="center">90.37</td><td align="center"><u>91.29</u></td><td align="center">90.82</td><td align="center">88.67</td><td align="center">85.07</td>
398
  </tr>
399
  <tr>
400
  <td align="left">InternVL-U</td><td align="center">1.7B</td><td align="center"><u>90.39</u></td><td align="center">90.78</td><td align="center">90.68</td><td align="center">90.29</td><td align="center">88.77</td><td align="center">85.18</td>
@@ -412,14 +421,16 @@ python lance_gradio_t2v_v2t.py --gpus 0 --server-port 7860
412
  </table>
413
  </div>
414
 
 
 
415
  #### GenEval 评测
416
 
417
  <div align="center">
418
- <table>
419
  <thead>
420
  <tr>
421
  <th align="left">模型</th>
422
- <th align="center"># Params.</th>
423
  <th align="center">1-Obj.</th>
424
  <th align="center">2-Obj.</th>
425
  <th align="center">Count</th>
@@ -431,7 +442,7 @@ python lance_gradio_t2v_v2t.py --gpus 0 --server-port 7860
431
  </thead>
432
  <tbody>
433
  <tr>
434
- <td align="left" colspan="9"><i>仅生成模型</i></td>
435
  </tr>
436
  <tr>
437
  <td align="left">SDXL</td><td align="center">3.5B</td><td align="center">0.98</td><td align="center">0.74</td><td align="center">0.39</td><td align="center">0.85</td><td align="center">0.15</td><td align="center">0.23</td><td align="center">0.55</td>
@@ -449,7 +460,7 @@ python lance_gradio_t2v_v2t.py --gpus 0 --server-port 7860
449
  <td align="left">Qwen-Image</td><td align="center">20B</td><td align="center">0.99</td><td align="center">0.92</td><td align="center">0.89</td><td align="center">0.88</td><td align="center">0.76</td><td align="center">0.77</td><td align="center">0.87</td>
450
  </tr>
451
  <tr>
452
- <td align="left" colspan="9"><i>统一模型</i></td>
453
  </tr>
454
  <tr>
455
  <td align="left">Janus-Pro-7B</td><td align="center">7B</td><td align="center"><u>0.99</u></td><td align="center">0.89</td><td align="center">0.59</td><td align="center">0.90</td><td align="center">0.79</td><td align="center">0.66</td><td align="center">0.80</td>
@@ -461,7 +472,7 @@ python lance_gradio_t2v_v2t.py --gpus 0 --server-port 7860
461
  <td align="left">Show-o2</td><td align="center">7B</td><td align="center"><b>1.00</b></td><td align="center">0.87</td><td align="center">0.58</td><td align="center">0.92</td><td align="center">0.52</td><td align="center">0.62</td><td align="center">0.76</td>
462
  </tr>
463
  <tr>
464
- <td align="left">BAGEL</td><td align="center">7B</td><td align="center">0.98</td><td align="center">0.95</td><td align="center"><b>0.84</b></td><td align="center"><u>0.95</u></td><td align="center">0.78</td><td align="center">0.77</td><td align="center">0.88</td>
465
  </tr>
466
  <tr>
467
  <td align="left">Mogao</td><td align="center">7B</td><td align="center"><b>1.00</b></td><td align="center"><b>0.97</b></td><td align="center"><u>0.83</u></td><td align="center">0.93</td><td align="center">0.84</td><td align="center">0.80</td><td align="center"><u>0.89</u></td>
@@ -482,14 +493,16 @@ python lance_gradio_t2v_v2t.py --gpus 0 --server-port 7860
482
  </table>
483
  </div>
484
 
 
 
485
  #### GEdit-Bench 评测
486
 
487
  <div align="center">
488
- <table>
489
  <thead>
490
  <tr>
491
  <th align="left">模型</th>
492
- <th align="center"># Params.</th>
493
  <th align="center">BC</th>
494
  <th align="center">CA</th>
495
  <th align="center">MM</th>
@@ -506,7 +519,7 @@ python lance_gradio_t2v_v2t.py --gpus 0 --server-port 7860
506
  </thead>
507
  <tbody>
508
  <tr>
509
- <td align="left" colspan="14"><i>仅生成模型</i></td>
510
  </tr>
511
  <tr>
512
  <td align="left">Gemini 2.0</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">6.32</td>
@@ -518,7 +531,7 @@ python lance_gradio_t2v_v2t.py --gpus 0 --server-port 7860
518
  <td align="left">Qwen-Image-Edit</td><td align="center">20B</td><td align="center">8.23</td><td align="center">8.30</td><td align="center">7.33</td><td align="center">8.05</td><td align="center">7.49</td><td align="center">6.74</td><td align="center">8.57</td><td align="center">8.09</td><td align="center">8.29</td><td align="center">8.48</td><td align="center">8.50</td><td align="center">8.01</td>
519
  </tr>
520
  <tr>
521
- <td align="left" colspan="14"><i>统一模型</i></td>
522
  </tr>
523
  <tr>
524
  <td align="left">Lumina-DiMOO</td><td align="center">8B</td><td align="center">3.43</td><td align="center">4.27</td><td align="center">3.08</td><td align="center">2.77</td><td align="center">4.74</td><td align="center">5.19</td><td align="center">4.44</td><td align="center">3.80</td><td align="center">4.38</td><td align="center">2.68</td><td align="center">4.20</td><td align="center">3.91</td>
@@ -545,12 +558,12 @@ python lance_gradio_t2v_v2t.py --gpus 0 --server-port 7860
545
  #### VBench 评测(视频生成)
546
 
547
  <div align="center">
548
- <table>
549
  <thead>
550
  <tr>
551
  <th align="left">类型</th>
552
  <th align="left">Model</th>
553
- <th align="center"># Params.</th>
554
  <th align="center">Total Score ↑</th>
555
  </tr>
556
  </thead>
@@ -609,7 +622,7 @@ python lance_gradio_t2v_v2t.py --gpus 0 --server-port 7860
609
  <td align="left">TUNA</td><td align="center">1.5B</td><td align="center"><u>84.06</u></td>
610
  </tr>
611
  <tr bgcolor="#f4e6ff">
612
- <td align="left">🌟 <b>Lance (Ours)</b><sup>†</sup></td><td align="center"><b>3B</b></td><td align="center"><b>85.11</b></td>
613
  </tr>
614
  </tbody>
615
  </table>
@@ -631,6 +644,10 @@ python lance_gradio_t2v_v2t.py --gpus 0 --server-port 7860
631
 
632
  Copyright 2025 Bytedance Ltd. and/or its affiliates.
633
 
 
 
 
 
634
  ## 💖 引用
635
 
636
  如果 Lance 对您的项目或研究有帮助,欢迎 🌟 本仓库,并使用以下 BibTeX 引用我们的工作:
 
1
  <div align="center">
2
+ <img src="assets/logo/lance-logo.webp" alt="Lance logo" width="450">
3
+
4
+ <h1 align="center">
5
+ Lance: Unified Multimodal Modeling by Multi-Task Synergy
6
+ </h1>
7
+
8
+ <p style="line-height: 1.5; margin: 0;">
9
+ <span>
10
+ <a href="https://scholar.google.com.hk/citations?user=FXxoQlsAAAAJ&amp;hl=zh-CN&amp;oi=ao" style="text-decoration: none; color: inherit; font-weight: 700 !important; display: inline;">Fengyi Fu</a><sup>*</sup>,
11
+ <a href="https://corleone-huang.github.io/" style="text-decoration: none; color: inherit; font-weight: 700 !important; display: inline;">Mengqi Huang</a><sup>*,✉</sup>,
12
+ <a href="https://scholar.google.com.hk/citations?user=9ER6nVkAAAAJ&amp;hl=zh-CN&amp;oi=ao" style="text-decoration: none; color: inherit; font-weight: 700 !important; display: inline;">Shaojin Wu</a><sup>*</sup>,
13
+ <span style="font-weight: 700 !important;">Yunsheng Jiang</span><sup>*</sup>,
14
+ <span style="font-weight: 700 !important;">Yufei Huo</span>,
15
+ <a href="https://guojianzhu.com/" style="text-decoration: none; color: inherit; font-weight: 700 !important; display: inline;">Jianzhu Guo</a><sup>✉,§</sup>
16
+ </span>
17
+ <br>
18
+
19
+ <span style="display: inline-block; margin-top: 0.4em;">
20
+ Hao Li,
21
+ Yinghang Song,
22
+ Fei Ding,
23
+ Qian He,
24
+ Zheren Fu,
25
+ Zhendong Mao,
26
  Yongdong Zhang
27
+ </span>
28
+ <br>
29
+ <em>ByteDance</em>
30
+ <br>
31
+ <sup>*</sup> 共同一作 &nbsp;&nbsp;
32
+ <sup>✉</sup> 通讯作者 &nbsp;&nbsp;
33
+ <sup>§</sup> 项目负责人
34
+ </p>
35
  <p>
36
+ <a href="https://lance-project.github.io/" style="text-decoration: none; margin: 0 8px;"><img src="https://img.shields.io/badge/Homepage-Lance-blue?style=flat" alt="Homepage"></a>
37
+ <a href="http://arxiv.org/abs/2605.18678" style="text-decoration: none; margin: 0 8px;"><img src="https://img.shields.io/badge/Paper-arXiv-red?style=flat&logo=arxiv" alt="arXiv"></a>
38
+ <a href="https://github.com/bytedance/Lance" style="text-decoration: none; margin: 0 8px;"><img src="https://img.shields.io/badge/Code-GitHub-536af5?color=536af5&logo=github" alt="GitHub"></a>
39
  <br>
40
  <a href="./README.md"><ins>English</ins></a> | 简体中文
41
  </p>
 
358
  #### DPG-Bench 评测
359
 
360
  <div align="center">
361
+ <table align="center">
362
  <thead>
363
  <tr>
364
  <th align="left">模型</th>
365
+ <th align="center">#&nbsp;Params.</th>
366
  <th align="center">Global</th>
367
  <th align="center">Entity</th>
368
  <th align="center">Attribute</th>
 
373
  </thead>
374
  <tbody>
375
  <tr>
376
+ <td align="center" colspan="8"><i>仅生成模型</i></td>
377
  </tr>
378
  <tr>
379
  <td align="left">SDXL</td><td align="center">3.5B</td><td align="center">83.27</td><td align="center">82.43</td><td align="center">80.91</td><td align="center">86.76</td><td align="center">80.41</td><td align="center">74.65</td>
 
391
  <td align="left">Qwen-Image</td><td align="center">20B</td><td align="center">91.32</td><td align="center">91.56</td><td align="center">92.02</td><td align="center">94.31</td><td align="center">92.73</td><td align="center">88.32</td>
392
  </tr>
393
  <tr>
394
+ <td align="center" colspan="8"><i>统一模型</i></td>
395
  </tr>
396
  <tr>
397
  <td align="left">Janus-Pro-7B</td><td align="center">7B</td><td align="center">86.90</td><td align="center">88.90</td><td align="center">89.40</td><td align="center">89.32</td><td align="center">89.48</td><td align="center">84.19</td>
 
403
  <td align="left">Show-o2</td><td align="center">7B</td><td align="center">89.00</td><td align="center"><b>91.78</b></td><td align="center">89.96</td><td align="center">91.81</td><td align="center"><b>91.64</b></td><td align="center">86.14</td>
404
  </tr>
405
  <tr>
406
+ <td align="left">BAGEL<sup>†</sup></td><td align="center">7B</td><td align="center">88.94</td><td align="center">90.37</td><td align="center"><u>91.29</u></td><td align="center">90.82</td><td align="center">88.67</td><td align="center">85.07</td>
407
  </tr>
408
  <tr>
409
  <td align="left">InternVL-U</td><td align="center">1.7B</td><td align="center"><u>90.39</u></td><td align="center">90.78</td><td align="center">90.68</td><td align="center">90.29</td><td align="center">88.77</td><td align="center">85.18</td>
 
421
  </table>
422
  </div>
423
 
424
+ <p align="center"><em><sup>†</sup> 表示该方法在生成前使用 LLM rewriter 进行 prompt rewriting。</em></p>
425
+
426
  #### GenEval 评测
427
 
428
  <div align="center">
429
+ <table align="center">
430
  <thead>
431
  <tr>
432
  <th align="left">模型</th>
433
+ <th align="center">#&nbsp;Params.</th>
434
  <th align="center">1-Obj.</th>
435
  <th align="center">2-Obj.</th>
436
  <th align="center">Count</th>
 
442
  </thead>
443
  <tbody>
444
  <tr>
445
+ <td align="center" colspan="9"><i>仅生成模型</i></td>
446
  </tr>
447
  <tr>
448
  <td align="left">SDXL</td><td align="center">3.5B</td><td align="center">0.98</td><td align="center">0.74</td><td align="center">0.39</td><td align="center">0.85</td><td align="center">0.15</td><td align="center">0.23</td><td align="center">0.55</td>
 
460
  <td align="left">Qwen-Image</td><td align="center">20B</td><td align="center">0.99</td><td align="center">0.92</td><td align="center">0.89</td><td align="center">0.88</td><td align="center">0.76</td><td align="center">0.77</td><td align="center">0.87</td>
461
  </tr>
462
  <tr>
463
+ <td align="center" colspan="9"><i>统一模型</i></td>
464
  </tr>
465
  <tr>
466
  <td align="left">Janus-Pro-7B</td><td align="center">7B</td><td align="center"><u>0.99</u></td><td align="center">0.89</td><td align="center">0.59</td><td align="center">0.90</td><td align="center">0.79</td><td align="center">0.66</td><td align="center">0.80</td>
 
472
  <td align="left">Show-o2</td><td align="center">7B</td><td align="center"><b>1.00</b></td><td align="center">0.87</td><td align="center">0.58</td><td align="center">0.92</td><td align="center">0.52</td><td align="center">0.62</td><td align="center">0.76</td>
473
  </tr>
474
  <tr>
475
+ <td align="left">BAGEL<sup>†</sup></td><td align="center">7B</td><td align="center">0.98</td><td align="center">0.95</td><td align="center"><b>0.84</b></td><td align="center"><u>0.95</u></td><td align="center">0.78</td><td align="center">0.77</td><td align="center">0.88</td>
476
  </tr>
477
  <tr>
478
  <td align="left">Mogao</td><td align="center">7B</td><td align="center"><b>1.00</b></td><td align="center"><b>0.97</b></td><td align="center"><u>0.83</u></td><td align="center">0.93</td><td align="center">0.84</td><td align="center">0.80</td><td align="center"><u>0.89</u></td>
 
493
  </table>
494
  </div>
495
 
496
+ <p align="center"><em><sup>†</sup> 表示该方法在生成前使用 LLM rewriter 进行 prompt rewriting。</em></p>
497
+
498
  #### GEdit-Bench 评测
499
 
500
  <div align="center">
501
+ <table align="center">
502
  <thead>
503
  <tr>
504
  <th align="left">模型</th>
505
+ <th align="center">#&nbsp;Params.</th>
506
  <th align="center">BC</th>
507
  <th align="center">CA</th>
508
  <th align="center">MM</th>
 
519
  </thead>
520
  <tbody>
521
  <tr>
522
+ <td align="center" colspan="14"><i>仅生成模型</i></td>
523
  </tr>
524
  <tr>
525
  <td align="left">Gemini 2.0</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">-</td><td align="center">6.32</td>
 
531
  <td align="left">Qwen-Image-Edit</td><td align="center">20B</td><td align="center">8.23</td><td align="center">8.30</td><td align="center">7.33</td><td align="center">8.05</td><td align="center">7.49</td><td align="center">6.74</td><td align="center">8.57</td><td align="center">8.09</td><td align="center">8.29</td><td align="center">8.48</td><td align="center">8.50</td><td align="center">8.01</td>
532
  </tr>
533
  <tr>
534
+ <td align="center" colspan="14"><i>统一模型</i></td>
535
  </tr>
536
  <tr>
537
  <td align="left">Lumina-DiMOO</td><td align="center">8B</td><td align="center">3.43</td><td align="center">4.27</td><td align="center">3.08</td><td align="center">2.77</td><td align="center">4.74</td><td align="center">5.19</td><td align="center">4.44</td><td align="center">3.80</td><td align="center">4.38</td><td align="center">2.68</td><td align="center">4.20</td><td align="center">3.91</td>
 
558
  #### VBench 评测(视频生成)
559
 
560
  <div align="center">
561
+ <table align="center">
562
  <thead>
563
  <tr>
564
  <th align="left">类型</th>
565
  <th align="left">Model</th>
566
+ <th align="center">#&nbsp;Params.</th>
567
  <th align="center">Total Score ↑</th>
568
  </tr>
569
  </thead>
 
622
  <td align="left">TUNA</td><td align="center">1.5B</td><td align="center"><u>84.06</u></td>
623
  </tr>
624
  <tr bgcolor="#f4e6ff">
625
+ <td align="left">🌟 <b>Lance (Ours)</b></td><td align="center"><b>3B</b></td><td align="center"><b>85.11</b></td>
626
  </tr>
627
  </tbody>
628
  </table>
 
644
 
645
  Copyright 2025 Bytedance Ltd. and/or its affiliates.
646
 
647
+ ## 🙏 致谢
648
+
649
+ 感谢 [BAGEL](https://github.com/ByteDance-Seed/bagel)、[Qwen2.5-VL-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct) 和 [Wan2.2](https://github.com/Wan-Video/Wan2.2) 的贡献者们公开研究成果并推动社区发展。
650
+
651
  ## 💖 引用
652
 
653
  如果 Lance 对您的项目或研究有帮助,欢迎 🌟 本仓库,并使用以下 BibTeX 引用我们的工作: