SorenDreano commited on
Commit
b7361da
·
verified ·
1 Parent(s): 1955e61

Upload folder using huggingface_hub

Browse files
chat_template.jinja CHANGED
@@ -61,14 +61,31 @@
61
  {{- '【instructions_start】' + instructions + '【instructions_end】\n'-}}
62
  {%- endif -%}
63
  {# Examples Section (only for extraction tasks) #}
64
- {%- if examples -%}
65
- {{- '【examples_start】\n' -}}
66
- {%- for example in examples -%}
67
- {{- '【example_input_start】' + render_content(example.input, true)|trim + '【example_input_end】\n' -}}
68
- {{- '【example_output_start】' + example.output|trim + '【example_output_end】\n' -}}
69
- {%- endfor -%}
70
- {{- '【examples_end】\n' -}}
71
- {%- endif -%}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  {# Previous Output Section #}
73
  {%- if previous_output -%}
74
  {{- '【previous_output_start】' + previous_output + '【previous_output_end】\n' -}}
@@ -85,7 +102,9 @@
85
  {%- set content = render_content(message.content, true)|trim %}
86
  {{- content + '\n' -}}
87
  {%- elif message.role == 'assistant' and not loop.last %}
88
- {{- raise_exception('Assistant message must be at the end.') }}
 
 
89
  {%- endif %}
90
  {%- endfor -%}
91
  {{- '【document_end】<|im_end|>\n' -}}
@@ -117,4 +136,4 @@
117
  {%- else %}
118
  {{- '<think>\n' -}}
119
  {%- endif %}
120
- {%- endif -%}
 
61
  {{- '【instructions_start】' + instructions + '【instructions_end】\n'-}}
62
  {%- endif -%}
63
  {# Examples Section (only for extraction tasks) #}
64
+ {%- for message in messages -%}
65
+ {%- if message.role == 'developer' and 'content' in message -%}
66
+ {# Validate that there is at least one input and one output contents #}
67
+ {%- set example_inputs = message.content[:-1] -%}
68
+ {%- set example_output_part = message.content[-1] -%}
69
+ {%- if example_inputs|length > 0 -%}
70
+ {%- if not has_examples.flag -%}
71
+ {{- '【examples_start】\n' -}}
72
+ {%- set has_examples.flag = true -%}
73
+ {%- endif -%}
74
+ {{- '【example_input_start】' + render_content(example_inputs, true)|trim + '【example_input_end】\n' -}}
75
+ {# Example output: only keep the text of the first output content #}
76
+ {%- set output_text = '' -%}
77
+ {%- if example_output_part is string -%}
78
+ {%- set output_text = example_output_part -%}
79
+ {%- elif example_output_part.text is defined -%}
80
+ {%- set output_text = example_output_part.text -%}
81
+ {%- endif -%}
82
+ {{- '【example_output_start】' + output_text|trim + '【example_output_end】\n' -}}
83
+ {%- if loop.last and has_examples.flag -%}
84
+ {{- '【examples_end】\n' -}}
85
+ {%- endif -%}
86
+ {%- endif -%}
87
+ {%- endif -%}
88
+ {%- endfor -%}
89
  {# Previous Output Section #}
90
  {%- if previous_output -%}
91
  {{- '【previous_output_start】' + previous_output + '【previous_output_end】\n' -}}
 
102
  {%- set content = render_content(message.content, true)|trim %}
103
  {{- content + '\n' -}}
104
  {%- elif message.role == 'assistant' and not loop.last %}
105
+ {# llama.cpp renders a synthetic init example with an assistant turn in
106
+ the middle; ignore it so valid NuExtract prompts render unchanged.
107
+ {{- raise_exception('Assistant message must be at the end.') }} #}
108
  {%- endif %}
109
  {%- endfor -%}
110
  {{- '【document_end】<|im_end|>\n' -}}
 
136
  {%- else %}
137
  {{- '<think>\n' -}}
138
  {%- endif %}
139
+ {%- endif -%}
processor_config.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "image_processor": {
3
- "data_format": "channels_first",
4
  "do_convert_rgb": true,
5
  "do_normalize": true,
6
  "do_rescale": true,
@@ -10,7 +9,7 @@
10
  0.5,
11
  0.5
12
  ],
13
- "image_processor_type": "Qwen2VLImageProcessorFast",
14
  "image_std": [
15
  0.5,
16
  0.5,
@@ -28,8 +27,6 @@
28
  },
29
  "processor_class": "Qwen3VLProcessor",
30
  "video_processor": {
31
- "data_format": "channels_first",
32
- "default_to_square": true,
33
  "do_convert_rgb": true,
34
  "do_normalize": true,
35
  "do_rescale": true,
 
1
  {
2
  "image_processor": {
 
3
  "do_convert_rgb": true,
4
  "do_normalize": true,
5
  "do_rescale": true,
 
9
  0.5,
10
  0.5
11
  ],
12
+ "image_processor_type": "Qwen2VLImageProcessor",
13
  "image_std": [
14
  0.5,
15
  0.5,
 
27
  },
28
  "processor_class": "Qwen3VLProcessor",
29
  "video_processor": {
 
 
30
  "do_convert_rgb": true,
31
  "do_normalize": true,
32
  "do_rescale": true,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06b9509352d2af50381ab2247e083b80d32d5c0aba91c272ca9ff729b6a0e523
3
- size 19989325
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
3
+ size 19989343
tokenizer_config.json CHANGED
@@ -10,6 +10,7 @@
10
  "errors": "replace",
11
  "image_token": "<|image_pad|>",
12
  "is_local": false,
 
13
  "model_max_length": 262144,
14
  "model_specific_special_tokens": {
15
  "audio_bos_token": "<|audio_start|>",
@@ -24,7 +25,7 @@
24
  "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
25
  "processor_class": "Qwen3VLProcessor",
26
  "split_special_tokens": false,
27
- "tokenizer_class": "Qwen2Tokenizer",
28
  "unk_token": null,
29
  "video_token": "<|video_pad|>",
30
  "vision_bos_token": "<|vision_start|>",
 
10
  "errors": "replace",
11
  "image_token": "<|image_pad|>",
12
  "is_local": false,
13
+ "local_files_only": false,
14
  "model_max_length": 262144,
15
  "model_specific_special_tokens": {
16
  "audio_bos_token": "<|audio_start|>",
 
25
  "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
26
  "processor_class": "Qwen3VLProcessor",
27
  "split_special_tokens": false,
28
+ "tokenizer_class": "TokenizersBackend",
29
  "unk_token": null,
30
  "video_token": "<|video_pad|>",
31
  "vision_bos_token": "<|vision_start|>",