Azure99 commited on
Commit
6a53ad3
·
verified ·
1 Parent(s): f0d830f

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +20 -1
  2. config.json +1 -1
  3. conversation.py +25 -0
README.md CHANGED
@@ -53,7 +53,8 @@ model-index:
53
 
54
  [**📄 Technical Report**](https://arxiv.org/abs/2603.13398) |
55
  [**🖥️ Qianfan Platform**](https://cloud.baidu.com/product-s/qianfan_home) |
56
- [**💻 GitHub**](https://github.com/baidubce/Qianfan-VL)
 
57
 
58
  </div>
59
 
@@ -317,6 +318,24 @@ print(response)
317
  vllm serve baidu/Qianfan-OCR --trust-remote-code
318
  ```
319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  ## Citation
321
 
322
  ```bibtex
 
53
 
54
  [**📄 Technical Report**](https://arxiv.org/abs/2603.13398) |
55
  [**🖥️ Qianfan Platform**](https://cloud.baidu.com/product-s/qianfan_home) |
56
+ [**💻 GitHub**](https://github.com/baidubce/Qianfan-VL) |
57
+ [**🧩 Skill**](https://github.com/baidubce/skills/tree/develop/skills/qianfanocr-document-intelligence)
58
 
59
  </div>
60
 
 
318
  vllm serve baidu/Qianfan-OCR --trust-remote-code
319
  ```
320
 
321
+ ## Skill
322
+
323
+ We provide a [Qianfan OCR Document Intelligence](https://github.com/baidubce/skills/tree/develop/skills/qianfanocr-document-intelligence) skill for image and PDF understanding workflows.
324
+
325
+ It can be used by users of OpenClaw, Claude Code, Codex, and other assistants that support this skill format.
326
+
327
+ This skill packages reusable instructions, scripts, and references so the agent can automatically apply Qianfan-powered document intelligence to tasks such as:
328
+
329
+ - document parsing to Markdown
330
+ - layout analysis
331
+ - element recognition
332
+ - general OCR
333
+ - key information extraction
334
+ - chart understanding
335
+ - document VQA
336
+
337
+ The skill is designed for visual understanding tasks over images and PDFs, and includes the execution flow needed to prepare inputs, choose the right analysis mode, and call the bundled CLI tools.
338
+
339
  ## Citation
340
 
341
  ```bibtex
config.json CHANGED
@@ -51,7 +51,7 @@
51
  "pad_token_id": 151643,
52
  "ps_version": "v2",
53
  "select_layer": -1,
54
- "template": "internvl2_5",
55
  "tie_word_embeddings": false,
56
  "torch_dtype": "bfloat16",
57
  "transformers_version": null,
 
51
  "pad_token_id": 151643,
52
  "ps_version": "v2",
53
  "select_layer": -1,
54
+ "template": "qianfanvl",
55
  "tie_word_embeddings": false,
56
  "torch_dtype": "bfloat16",
57
  "transformers_version": null,
conversation.py CHANGED
@@ -33,6 +33,7 @@ class SeparatorStyle(IntEnum):
33
  CHATGLM3 = auto()
34
  INTERNVL_ZH = auto()
35
  MPT = auto()
 
36
 
37
 
38
  @dataclasses.dataclass
@@ -247,6 +248,18 @@ class Conversation:
247
  else:
248
  ret += role
249
  return ret
 
 
 
 
 
 
 
 
 
 
 
 
250
  else:
251
  raise ValueError(f'Invalid style: {self.sep_style}')
252
 
@@ -389,3 +402,15 @@ register_conv_template(
389
  sep='<|im_end|>\n',
390
  )
391
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  CHATGLM3 = auto()
34
  INTERNVL_ZH = auto()
35
  MPT = auto()
36
+ QIANFANVL = auto()
37
 
38
 
39
  @dataclasses.dataclass
 
248
  else:
249
  ret += role
250
  return ret
251
+ elif self.sep_style == SeparatorStyle.QIANFANVL:
252
+ ret = ''
253
+ if self.system_message:
254
+ ret = system_prompt + self.sep
255
+ for role, message in self.messages:
256
+ if message:
257
+ if type(message) is tuple:
258
+ message, _, _ = message
259
+ ret += role + message + self.sep
260
+ else:
261
+ ret += role
262
+ return ret
263
  else:
264
  raise ValueError(f'Invalid style: {self.sep_style}')
265
 
 
402
  sep='<|im_end|>\n',
403
  )
404
  )
405
+
406
+
407
+ register_conv_template(
408
+ Conversation(
409
+ name='qianfanvl',
410
+ system_template='<|im_start|>system\n{system_message}',
411
+ system_message='',
412
+ roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
413
+ sep_style=SeparatorStyle.QIANFANVL,
414
+ sep='<|im_end|>\n',
415
+ )
416
+ )