Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- code/RL_model/models/RL_model_only_subclaim_test/global_step_60/actor/huggingface/added_tokens.json +28 -0
- code/RL_model/models/RL_model_only_subclaim_test/global_step_60/actor/huggingface/chat_template.jinja +61 -0
- code/RL_model/models/RL_model_only_subclaim_test/global_step_60/actor/huggingface/config.json +68 -0
- code/RL_model/models/RL_model_only_subclaim_test/global_step_60/actor/huggingface/merges.txt +0 -0
- code/RL_model/models/RL_model_only_subclaim_test/global_step_60/actor/huggingface/special_tokens_map.json +31 -0
- code/RL_model/models/RL_model_only_subclaim_test/global_step_60/actor/huggingface/vocab.json +0 -0
- code/RL_model/models/converted_model/v1/added_tokens.json +28 -0
- code/RL_model/models/converted_model/v1/chat_template.jinja +61 -0
- code/RL_model/models/converted_model/v1/config.json +68 -0
- code/RL_model/models/converted_model/v1/generation_config.json +13 -0
- code/RL_model/models/converted_model/v1/merges.txt +0 -0
- code/RL_model/models/converted_model/v1/model.safetensors.index.json +407 -0
- code/RL_model/models/converted_model/v1/special_tokens_map.json +31 -0
- code/RL_model/models/converted_model/v1/tokenizer_config.json +239 -0
- code/RL_model/models/converted_model/v1/vocab.json +0 -0
- code/finetune-inference/old/api_call.py +125 -0
- code/finetune-inference/old/api_call_vllm.py +135 -0
- code/finetune-inference/old/attribution_reasoning.py +198 -0
- code/finetune-inference/old/completeness_conciseness_attribution_cal.py +151 -0
- code/finetune-inference/old/completeness_reasoning_v1.py +186 -0
- code/finetune-inference/old/completeness_reasoning_v2.py +186 -0
- code/finetune-inference/old/completeness_reasoning_v3.py +171 -0
- code/finetune-inference/old/extracting_subclaims.py +196 -0
- code/finetune-inference/old/extracting_subclaims_v2.py +170 -0
- code/finetune-inference/old/extracting_subclaims_v3.py +175 -0
- code/finetune-inference/old/inference.py +91 -0
- code/finetune-inference/old/inferenceV2_without_context.py +137 -0
- code/finetune-inference/old/inferenceV3.py +161 -0
- code/finetune-inference/old/inferenceV3_temp.py +144 -0
- code/finetune-inference/old/inferenceV4.py +154 -0
- code/finetune-inference/old/inference_extract_subclaims.py +162 -0
- code/finetune-inference/old/inference_extract_subclaims_v2.py +179 -0
- code/finetune-inference/old/inference_extract_subclaims_v3.py +182 -0
- code/finetune-inference/old/nemotran_inference.py +174 -0
- code/finetune-inference/old/prompt_generate.py +254 -0
- code/finetune-inference/old/statistics.ipynb +400 -0
- code/finetune-inference/subclaim_support/readctrl_model.code-workspace +13 -0
- code/finetune-inference/subclaim_support_extraction/inference_extract_subclaims_gpt5.py +206 -0
- code/finetune-inference/subclaim_support_extraction/inference_extract_subclaims_v4.py +180 -0
- code/finetune-inference/subclaim_support_extraction/inference_extract_subclaims_vllm.py +163 -0
- code/finetune-inference/subclaim_support_extraction/old/subclaim_support_cal.py +248 -0
- code/finetune-inference/subclaim_support_extraction/old/subclaim_support_cal_tesing_v2.py +203 -0
- code/finetune-inference/subclaim_support_extraction/old/subclaim_support_cal_v2.py +304 -0
- code/finetune-inference/subclaim_support_extraction/old/subclaim_support_cal_v3.py +256 -0
- code/finetune-inference/subclaim_support_extraction/old/subclaim_support_cal_v4.py +309 -0
- code/finetune-inference/subclaim_support_extraction/old/subclaim_support_cal_v5.py +281 -0
- code/finetune-inference/subclaim_support_extraction/readctrl_model.code-workspace +18 -0
- code/finetune-inference/subclaim_support_extraction/subclaim_support_cal_tesing.py +199 -0
- code/finetune-inference/subclaim_support_extraction/subclaim_support_cal_tesing_v2.py +138 -0
- code/finetune-inference/subclaim_support_extraction/subclaim_support_cal_tesing_v3.py +131 -0
code/RL_model/models/RL_model_only_subclaim_test/global_step_60/actor/huggingface/added_tokens.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</think>": 151668,
|
| 3 |
+
"</tool_call>": 151658,
|
| 4 |
+
"</tool_response>": 151666,
|
| 5 |
+
"<think>": 151667,
|
| 6 |
+
"<tool_call>": 151657,
|
| 7 |
+
"<tool_response>": 151665,
|
| 8 |
+
"<|box_end|>": 151649,
|
| 9 |
+
"<|box_start|>": 151648,
|
| 10 |
+
"<|endoftext|>": 151643,
|
| 11 |
+
"<|file_sep|>": 151664,
|
| 12 |
+
"<|fim_middle|>": 151660,
|
| 13 |
+
"<|fim_pad|>": 151662,
|
| 14 |
+
"<|fim_prefix|>": 151659,
|
| 15 |
+
"<|fim_suffix|>": 151661,
|
| 16 |
+
"<|im_end|>": 151645,
|
| 17 |
+
"<|im_start|>": 151644,
|
| 18 |
+
"<|image_pad|>": 151655,
|
| 19 |
+
"<|object_ref_end|>": 151647,
|
| 20 |
+
"<|object_ref_start|>": 151646,
|
| 21 |
+
"<|quad_end|>": 151651,
|
| 22 |
+
"<|quad_start|>": 151650,
|
| 23 |
+
"<|repo_name|>": 151663,
|
| 24 |
+
"<|video_pad|>": 151656,
|
| 25 |
+
"<|vision_end|>": 151653,
|
| 26 |
+
"<|vision_pad|>": 151654,
|
| 27 |
+
"<|vision_start|>": 151652
|
| 28 |
+
}
|
code/RL_model/models/RL_model_only_subclaim_test/global_step_60/actor/huggingface/chat_template.jinja
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- if tools %}
|
| 2 |
+
{{- '<|im_start|>system\n' }}
|
| 3 |
+
{%- if messages[0].role == 'system' %}
|
| 4 |
+
{{- messages[0].content + '\n\n' }}
|
| 5 |
+
{%- endif %}
|
| 6 |
+
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 7 |
+
{%- for tool in tools %}
|
| 8 |
+
{{- "\n" }}
|
| 9 |
+
{{- tool | tojson }}
|
| 10 |
+
{%- endfor %}
|
| 11 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 12 |
+
{%- else %}
|
| 13 |
+
{%- if messages[0].role == 'system' %}
|
| 14 |
+
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
| 15 |
+
{%- endif %}
|
| 16 |
+
{%- endif %}
|
| 17 |
+
{%- for message in messages %}
|
| 18 |
+
{%- if message.content is string %}
|
| 19 |
+
{%- set content = message.content %}
|
| 20 |
+
{%- else %}
|
| 21 |
+
{%- set content = '' %}
|
| 22 |
+
{%- endif %}
|
| 23 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
| 24 |
+
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
| 25 |
+
{%- elif message.role == "assistant" %}
|
| 26 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 27 |
+
{%- if message.tool_calls %}
|
| 28 |
+
{%- for tool_call in message.tool_calls %}
|
| 29 |
+
{%- if (loop.first and content) or (not loop.first) %}
|
| 30 |
+
{{- '\n' }}
|
| 31 |
+
{%- endif %}
|
| 32 |
+
{%- if tool_call.function %}
|
| 33 |
+
{%- set tool_call = tool_call.function %}
|
| 34 |
+
{%- endif %}
|
| 35 |
+
{{- '<tool_call>\n{"name": "' }}
|
| 36 |
+
{{- tool_call.name }}
|
| 37 |
+
{{- '", "arguments": ' }}
|
| 38 |
+
{%- if tool_call.arguments is string %}
|
| 39 |
+
{{- tool_call.arguments }}
|
| 40 |
+
{%- else %}
|
| 41 |
+
{{- tool_call.arguments | tojson }}
|
| 42 |
+
{%- endif %}
|
| 43 |
+
{{- '}\n</tool_call>' }}
|
| 44 |
+
{%- endfor %}
|
| 45 |
+
{%- endif %}
|
| 46 |
+
{{- '<|im_end|>\n' }}
|
| 47 |
+
{%- elif message.role == "tool" %}
|
| 48 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 49 |
+
{{- '<|im_start|>user' }}
|
| 50 |
+
{%- endif %}
|
| 51 |
+
{{- '\n<tool_response>\n' }}
|
| 52 |
+
{{- content }}
|
| 53 |
+
{{- '\n</tool_response>' }}
|
| 54 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 55 |
+
{{- '<|im_end|>\n' }}
|
| 56 |
+
{%- endif %}
|
| 57 |
+
{%- endif %}
|
| 58 |
+
{%- endfor %}
|
| 59 |
+
{%- if add_generation_prompt %}
|
| 60 |
+
{{- '<|im_start|>assistant\n' }}
|
| 61 |
+
{%- endif %}
|
code/RL_model/models/RL_model_only_subclaim_test/global_step_60/actor/huggingface/config.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"eos_token_id": 151645,
|
| 9 |
+
"head_dim": 128,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 2560,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 9728,
|
| 14 |
+
"layer_types": [
|
| 15 |
+
"full_attention",
|
| 16 |
+
"full_attention",
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention"
|
| 51 |
+
],
|
| 52 |
+
"max_position_embeddings": 262144,
|
| 53 |
+
"max_window_layers": 36,
|
| 54 |
+
"model_type": "qwen3",
|
| 55 |
+
"num_attention_heads": 32,
|
| 56 |
+
"num_hidden_layers": 36,
|
| 57 |
+
"num_key_value_heads": 8,
|
| 58 |
+
"pad_token_id": 151643,
|
| 59 |
+
"rms_norm_eps": 1e-06,
|
| 60 |
+
"rope_scaling": null,
|
| 61 |
+
"rope_theta": 5000000,
|
| 62 |
+
"sliding_window": null,
|
| 63 |
+
"tie_word_embeddings": true,
|
| 64 |
+
"transformers_version": "4.56.1",
|
| 65 |
+
"use_cache": true,
|
| 66 |
+
"use_sliding_window": false,
|
| 67 |
+
"vocab_size": 151936
|
| 68 |
+
}
|
code/RL_model/models/RL_model_only_subclaim_test/global_step_60/actor/huggingface/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
code/RL_model/models/RL_model_only_subclaim_test/global_step_60/actor/huggingface/special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
code/RL_model/models/RL_model_only_subclaim_test/global_step_60/actor/huggingface/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
code/RL_model/models/converted_model/v1/added_tokens.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</think>": 151668,
|
| 3 |
+
"</tool_call>": 151658,
|
| 4 |
+
"</tool_response>": 151666,
|
| 5 |
+
"<think>": 151667,
|
| 6 |
+
"<tool_call>": 151657,
|
| 7 |
+
"<tool_response>": 151665,
|
| 8 |
+
"<|box_end|>": 151649,
|
| 9 |
+
"<|box_start|>": 151648,
|
| 10 |
+
"<|endoftext|>": 151643,
|
| 11 |
+
"<|file_sep|>": 151664,
|
| 12 |
+
"<|fim_middle|>": 151660,
|
| 13 |
+
"<|fim_pad|>": 151662,
|
| 14 |
+
"<|fim_prefix|>": 151659,
|
| 15 |
+
"<|fim_suffix|>": 151661,
|
| 16 |
+
"<|im_end|>": 151645,
|
| 17 |
+
"<|im_start|>": 151644,
|
| 18 |
+
"<|image_pad|>": 151655,
|
| 19 |
+
"<|object_ref_end|>": 151647,
|
| 20 |
+
"<|object_ref_start|>": 151646,
|
| 21 |
+
"<|quad_end|>": 151651,
|
| 22 |
+
"<|quad_start|>": 151650,
|
| 23 |
+
"<|repo_name|>": 151663,
|
| 24 |
+
"<|video_pad|>": 151656,
|
| 25 |
+
"<|vision_end|>": 151653,
|
| 26 |
+
"<|vision_pad|>": 151654,
|
| 27 |
+
"<|vision_start|>": 151652
|
| 28 |
+
}
|
code/RL_model/models/converted_model/v1/chat_template.jinja
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- if tools %}
|
| 2 |
+
{{- '<|im_start|>system\n' }}
|
| 3 |
+
{%- if messages[0].role == 'system' %}
|
| 4 |
+
{{- messages[0].content + '\n\n' }}
|
| 5 |
+
{%- endif %}
|
| 6 |
+
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 7 |
+
{%- for tool in tools %}
|
| 8 |
+
{{- "\n" }}
|
| 9 |
+
{{- tool | tojson }}
|
| 10 |
+
{%- endfor %}
|
| 11 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 12 |
+
{%- else %}
|
| 13 |
+
{%- if messages[0].role == 'system' %}
|
| 14 |
+
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
| 15 |
+
{%- endif %}
|
| 16 |
+
{%- endif %}
|
| 17 |
+
{%- for message in messages %}
|
| 18 |
+
{%- if message.content is string %}
|
| 19 |
+
{%- set content = message.content %}
|
| 20 |
+
{%- else %}
|
| 21 |
+
{%- set content = '' %}
|
| 22 |
+
{%- endif %}
|
| 23 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
| 24 |
+
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
| 25 |
+
{%- elif message.role == "assistant" %}
|
| 26 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 27 |
+
{%- if message.tool_calls %}
|
| 28 |
+
{%- for tool_call in message.tool_calls %}
|
| 29 |
+
{%- if (loop.first and content) or (not loop.first) %}
|
| 30 |
+
{{- '\n' }}
|
| 31 |
+
{%- endif %}
|
| 32 |
+
{%- if tool_call.function %}
|
| 33 |
+
{%- set tool_call = tool_call.function %}
|
| 34 |
+
{%- endif %}
|
| 35 |
+
{{- '<tool_call>\n{"name": "' }}
|
| 36 |
+
{{- tool_call.name }}
|
| 37 |
+
{{- '", "arguments": ' }}
|
| 38 |
+
{%- if tool_call.arguments is string %}
|
| 39 |
+
{{- tool_call.arguments }}
|
| 40 |
+
{%- else %}
|
| 41 |
+
{{- tool_call.arguments | tojson }}
|
| 42 |
+
{%- endif %}
|
| 43 |
+
{{- '}\n</tool_call>' }}
|
| 44 |
+
{%- endfor %}
|
| 45 |
+
{%- endif %}
|
| 46 |
+
{{- '<|im_end|>\n' }}
|
| 47 |
+
{%- elif message.role == "tool" %}
|
| 48 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 49 |
+
{{- '<|im_start|>user' }}
|
| 50 |
+
{%- endif %}
|
| 51 |
+
{{- '\n<tool_response>\n' }}
|
| 52 |
+
{{- content }}
|
| 53 |
+
{{- '\n</tool_response>' }}
|
| 54 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 55 |
+
{{- '<|im_end|>\n' }}
|
| 56 |
+
{%- endif %}
|
| 57 |
+
{%- endif %}
|
| 58 |
+
{%- endfor %}
|
| 59 |
+
{%- if add_generation_prompt %}
|
| 60 |
+
{{- '<|im_start|>assistant\n' }}
|
| 61 |
+
{%- endif %}
|
code/RL_model/models/converted_model/v1/config.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"dtype": "bfloat16",
|
| 8 |
+
"eos_token_id": 151645,
|
| 9 |
+
"head_dim": 128,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 2560,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 9728,
|
| 14 |
+
"layer_types": [
|
| 15 |
+
"full_attention",
|
| 16 |
+
"full_attention",
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention"
|
| 51 |
+
],
|
| 52 |
+
"max_position_embeddings": 262144,
|
| 53 |
+
"max_window_layers": 36,
|
| 54 |
+
"model_type": "qwen3",
|
| 55 |
+
"num_attention_heads": 32,
|
| 56 |
+
"num_hidden_layers": 36,
|
| 57 |
+
"num_key_value_heads": 8,
|
| 58 |
+
"pad_token_id": 151643,
|
| 59 |
+
"rms_norm_eps": 1e-06,
|
| 60 |
+
"rope_scaling": null,
|
| 61 |
+
"rope_theta": 5000000,
|
| 62 |
+
"sliding_window": null,
|
| 63 |
+
"tie_word_embeddings": true,
|
| 64 |
+
"transformers_version": "4.56.1",
|
| 65 |
+
"use_cache": true,
|
| 66 |
+
"use_sliding_window": false,
|
| 67 |
+
"vocab_size": 151936
|
| 68 |
+
}
|
code/RL_model/models/converted_model/v1/generation_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 151643,
|
| 3 |
+
"do_sample": true,
|
| 4 |
+
"eos_token_id": [
|
| 5 |
+
151645,
|
| 6 |
+
151643
|
| 7 |
+
],
|
| 8 |
+
"pad_token_id": 151643,
|
| 9 |
+
"temperature": 0.7,
|
| 10 |
+
"top_k": 20,
|
| 11 |
+
"top_p": 0.8,
|
| 12 |
+
"transformers_version": "4.56.1"
|
| 13 |
+
}
|
code/RL_model/models/converted_model/v1/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
code/RL_model/models/converted_model/v1/model.safetensors.index.json
ADDED
|
@@ -0,0 +1,407 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"metadata": {
|
| 3 |
+
"total_parameters": 4411424256,
|
| 4 |
+
"total_size": 8822848512
|
| 5 |
+
},
|
| 6 |
+
"weight_map": {
|
| 7 |
+
"lm_head.weight": "model-00001-of-00002.safetensors",
|
| 8 |
+
"model.embed_tokens.weight": "model-00001-of-00002.safetensors",
|
| 9 |
+
"model.layers.0.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 10 |
+
"model.layers.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 11 |
+
"model.layers.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 12 |
+
"model.layers.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 13 |
+
"model.layers.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 14 |
+
"model.layers.0.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 15 |
+
"model.layers.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 16 |
+
"model.layers.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 17 |
+
"model.layers.0.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 18 |
+
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 19 |
+
"model.layers.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 20 |
+
"model.layers.1.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 21 |
+
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 22 |
+
"model.layers.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 23 |
+
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 24 |
+
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 25 |
+
"model.layers.1.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 26 |
+
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 27 |
+
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 28 |
+
"model.layers.1.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 29 |
+
"model.layers.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 30 |
+
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 31 |
+
"model.layers.10.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 32 |
+
"model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 33 |
+
"model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 34 |
+
"model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 35 |
+
"model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 36 |
+
"model.layers.10.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 37 |
+
"model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 38 |
+
"model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 39 |
+
"model.layers.10.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 40 |
+
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 41 |
+
"model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 42 |
+
"model.layers.11.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 43 |
+
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 44 |
+
"model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 45 |
+
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 46 |
+
"model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 47 |
+
"model.layers.11.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 48 |
+
"model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 49 |
+
"model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 50 |
+
"model.layers.11.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 51 |
+
"model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 52 |
+
"model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 53 |
+
"model.layers.12.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 54 |
+
"model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 55 |
+
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 56 |
+
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 57 |
+
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 58 |
+
"model.layers.12.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 59 |
+
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 60 |
+
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 61 |
+
"model.layers.12.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 62 |
+
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 63 |
+
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 64 |
+
"model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 65 |
+
"model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 66 |
+
"model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 67 |
+
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 68 |
+
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 69 |
+
"model.layers.13.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 70 |
+
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 71 |
+
"model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 72 |
+
"model.layers.13.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 73 |
+
"model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 74 |
+
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 75 |
+
"model.layers.14.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 76 |
+
"model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 77 |
+
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 78 |
+
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 79 |
+
"model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 80 |
+
"model.layers.14.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 81 |
+
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 82 |
+
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 83 |
+
"model.layers.14.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 84 |
+
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 85 |
+
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 86 |
+
"model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 87 |
+
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 88 |
+
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 89 |
+
"model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 90 |
+
"model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 91 |
+
"model.layers.15.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 92 |
+
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 93 |
+
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 94 |
+
"model.layers.15.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 95 |
+
"model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 96 |
+
"model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 97 |
+
"model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 98 |
+
"model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 99 |
+
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 100 |
+
"model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 101 |
+
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 102 |
+
"model.layers.16.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 103 |
+
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 104 |
+
"model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 105 |
+
"model.layers.16.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 106 |
+
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 107 |
+
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 108 |
+
"model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 109 |
+
"model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 110 |
+
"model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 111 |
+
"model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 112 |
+
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 113 |
+
"model.layers.17.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 114 |
+
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 115 |
+
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 116 |
+
"model.layers.17.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 117 |
+
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 118 |
+
"model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 119 |
+
"model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 120 |
+
"model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 121 |
+
"model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 122 |
+
"model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 123 |
+
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 124 |
+
"model.layers.18.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 125 |
+
"model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 126 |
+
"model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 127 |
+
"model.layers.18.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 128 |
+
"model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 129 |
+
"model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 130 |
+
"model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 131 |
+
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 132 |
+
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 133 |
+
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 134 |
+
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 135 |
+
"model.layers.19.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 136 |
+
"model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 137 |
+
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 138 |
+
"model.layers.19.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 139 |
+
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 140 |
+
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 141 |
+
"model.layers.2.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 142 |
+
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 143 |
+
"model.layers.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 144 |
+
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 145 |
+
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 146 |
+
"model.layers.2.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 147 |
+
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 148 |
+
"model.layers.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 149 |
+
"model.layers.2.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 150 |
+
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 151 |
+
"model.layers.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 152 |
+
"model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 153 |
+
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 154 |
+
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 155 |
+
"model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 156 |
+
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 157 |
+
"model.layers.20.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 158 |
+
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 159 |
+
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 160 |
+
"model.layers.20.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 161 |
+
"model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 162 |
+
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 163 |
+
"model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 164 |
+
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 165 |
+
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 166 |
+
"model.layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 167 |
+
"model.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 168 |
+
"model.layers.21.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 169 |
+
"model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 170 |
+
"model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 171 |
+
"model.layers.21.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 172 |
+
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 173 |
+
"model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 174 |
+
"model.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 175 |
+
"model.layers.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 176 |
+
"model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 177 |
+
"model.layers.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 178 |
+
"model.layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 179 |
+
"model.layers.22.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 180 |
+
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 181 |
+
"model.layers.22.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 182 |
+
"model.layers.22.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 183 |
+
"model.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 184 |
+
"model.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 185 |
+
"model.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 186 |
+
"model.layers.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 187 |
+
"model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 188 |
+
"model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 189 |
+
"model.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 190 |
+
"model.layers.23.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 191 |
+
"model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 192 |
+
"model.layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 193 |
+
"model.layers.23.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 194 |
+
"model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 195 |
+
"model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 196 |
+
"model.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 197 |
+
"model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 198 |
+
"model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 199 |
+
"model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 200 |
+
"model.layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 201 |
+
"model.layers.24.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 202 |
+
"model.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 203 |
+
"model.layers.24.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 204 |
+
"model.layers.24.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 205 |
+
"model.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 206 |
+
"model.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 207 |
+
"model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 208 |
+
"model.layers.25.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 209 |
+
"model.layers.25.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 210 |
+
"model.layers.25.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 211 |
+
"model.layers.25.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 212 |
+
"model.layers.25.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 213 |
+
"model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 214 |
+
"model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 215 |
+
"model.layers.25.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 216 |
+
"model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 217 |
+
"model.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 218 |
+
"model.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 219 |
+
"model.layers.26.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 220 |
+
"model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 221 |
+
"model.layers.26.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 222 |
+
"model.layers.26.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 223 |
+
"model.layers.26.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 224 |
+
"model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 225 |
+
"model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 226 |
+
"model.layers.26.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 227 |
+
"model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 228 |
+
"model.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 229 |
+
"model.layers.27.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 230 |
+
"model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 231 |
+
"model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 232 |
+
"model.layers.27.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 233 |
+
"model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 234 |
+
"model.layers.27.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 235 |
+
"model.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 236 |
+
"model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 237 |
+
"model.layers.27.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 238 |
+
"model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 239 |
+
"model.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 240 |
+
"model.layers.28.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 241 |
+
"model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 242 |
+
"model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 243 |
+
"model.layers.28.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 244 |
+
"model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 245 |
+
"model.layers.28.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 246 |
+
"model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 247 |
+
"model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 248 |
+
"model.layers.28.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 249 |
+
"model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 250 |
+
"model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 251 |
+
"model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 252 |
+
"model.layers.29.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 253 |
+
"model.layers.29.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 254 |
+
"model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 255 |
+
"model.layers.29.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 256 |
+
"model.layers.29.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 257 |
+
"model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 258 |
+
"model.layers.29.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 259 |
+
"model.layers.29.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 260 |
+
"model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 261 |
+
"model.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 262 |
+
"model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 263 |
+
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 264 |
+
"model.layers.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 265 |
+
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 266 |
+
"model.layers.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 267 |
+
"model.layers.3.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 268 |
+
"model.layers.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 269 |
+
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 270 |
+
"model.layers.3.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 271 |
+
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 272 |
+
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 273 |
+
"model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 274 |
+
"model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 275 |
+
"model.layers.30.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 276 |
+
"model.layers.30.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 277 |
+
"model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 278 |
+
"model.layers.30.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 279 |
+
"model.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 280 |
+
"model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 281 |
+
"model.layers.30.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 282 |
+
"model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 283 |
+
"model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 284 |
+
"model.layers.31.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 285 |
+
"model.layers.31.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 286 |
+
"model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 287 |
+
"model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 288 |
+
"model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 289 |
+
"model.layers.31.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 290 |
+
"model.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 291 |
+
"model.layers.31.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 292 |
+
"model.layers.31.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 293 |
+
"model.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 294 |
+
"model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 295 |
+
"model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 296 |
+
"model.layers.32.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 297 |
+
"model.layers.32.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 298 |
+
"model.layers.32.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 299 |
+
"model.layers.32.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 300 |
+
"model.layers.32.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 301 |
+
"model.layers.32.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 302 |
+
"model.layers.32.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 303 |
+
"model.layers.32.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 304 |
+
"model.layers.32.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 305 |
+
"model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 306 |
+
"model.layers.33.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 307 |
+
"model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 308 |
+
"model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 309 |
+
"model.layers.33.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 310 |
+
"model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 311 |
+
"model.layers.33.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 312 |
+
"model.layers.33.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 313 |
+
"model.layers.33.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 314 |
+
"model.layers.33.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 315 |
+
"model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 316 |
+
"model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 317 |
+
"model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 318 |
+
"model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 319 |
+
"model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 320 |
+
"model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 321 |
+
"model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 322 |
+
"model.layers.34.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 323 |
+
"model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 324 |
+
"model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 325 |
+
"model.layers.34.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 326 |
+
"model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 327 |
+
"model.layers.34.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 328 |
+
"model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 329 |
+
"model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 330 |
+
"model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 331 |
+
"model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 332 |
+
"model.layers.35.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 333 |
+
"model.layers.35.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 334 |
+
"model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 335 |
+
"model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 336 |
+
"model.layers.35.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 337 |
+
"model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 338 |
+
"model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 339 |
+
"model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 340 |
+
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 341 |
+
"model.layers.4.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 342 |
+
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 343 |
+
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 344 |
+
"model.layers.4.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 345 |
+
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 346 |
+
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 347 |
+
"model.layers.4.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 348 |
+
"model.layers.4.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 349 |
+
"model.layers.4.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 350 |
+
"model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 351 |
+
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 352 |
+
"model.layers.5.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 353 |
+
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 354 |
+
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 355 |
+
"model.layers.5.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 356 |
+
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 357 |
+
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 358 |
+
"model.layers.5.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 359 |
+
"model.layers.5.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 360 |
+
"model.layers.5.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 361 |
+
"model.layers.6.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 362 |
+
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 363 |
+
"model.layers.6.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 364 |
+
"model.layers.6.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 365 |
+
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 366 |
+
"model.layers.6.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 367 |
+
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 368 |
+
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 369 |
+
"model.layers.6.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 370 |
+
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 371 |
+
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 372 |
+
"model.layers.7.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 373 |
+
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 374 |
+
"model.layers.7.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 375 |
+
"model.layers.7.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 376 |
+
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 377 |
+
"model.layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 378 |
+
"model.layers.7.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 379 |
+
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 380 |
+
"model.layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 381 |
+
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 382 |
+
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 383 |
+
"model.layers.8.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 384 |
+
"model.layers.8.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 385 |
+
"model.layers.8.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 386 |
+
"model.layers.8.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 387 |
+
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 388 |
+
"model.layers.8.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 389 |
+
"model.layers.8.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 390 |
+
"model.layers.8.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 391 |
+
"model.layers.8.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 392 |
+
"model.layers.8.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 393 |
+
"model.layers.8.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 394 |
+
"model.layers.9.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 395 |
+
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 396 |
+
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 397 |
+
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 398 |
+
"model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 399 |
+
"model.layers.9.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 400 |
+
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 401 |
+
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 402 |
+
"model.layers.9.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 403 |
+
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 404 |
+
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 405 |
+
"model.norm.weight": "model-00001-of-00002.safetensors"
|
| 406 |
+
}
|
| 407 |
+
}
|
code/RL_model/models/converted_model/v1/special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
code/RL_model/models/converted_model/v1/tokenizer_config.json
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
},
|
| 181 |
+
"151665": {
|
| 182 |
+
"content": "<tool_response>",
|
| 183 |
+
"lstrip": false,
|
| 184 |
+
"normalized": false,
|
| 185 |
+
"rstrip": false,
|
| 186 |
+
"single_word": false,
|
| 187 |
+
"special": false
|
| 188 |
+
},
|
| 189 |
+
"151666": {
|
| 190 |
+
"content": "</tool_response>",
|
| 191 |
+
"lstrip": false,
|
| 192 |
+
"normalized": false,
|
| 193 |
+
"rstrip": false,
|
| 194 |
+
"single_word": false,
|
| 195 |
+
"special": false
|
| 196 |
+
},
|
| 197 |
+
"151667": {
|
| 198 |
+
"content": "<think>",
|
| 199 |
+
"lstrip": false,
|
| 200 |
+
"normalized": false,
|
| 201 |
+
"rstrip": false,
|
| 202 |
+
"single_word": false,
|
| 203 |
+
"special": false
|
| 204 |
+
},
|
| 205 |
+
"151668": {
|
| 206 |
+
"content": "</think>",
|
| 207 |
+
"lstrip": false,
|
| 208 |
+
"normalized": false,
|
| 209 |
+
"rstrip": false,
|
| 210 |
+
"single_word": false,
|
| 211 |
+
"special": false
|
| 212 |
+
}
|
| 213 |
+
},
|
| 214 |
+
"additional_special_tokens": [
|
| 215 |
+
"<|im_start|>",
|
| 216 |
+
"<|im_end|>",
|
| 217 |
+
"<|object_ref_start|>",
|
| 218 |
+
"<|object_ref_end|>",
|
| 219 |
+
"<|box_start|>",
|
| 220 |
+
"<|box_end|>",
|
| 221 |
+
"<|quad_start|>",
|
| 222 |
+
"<|quad_end|>",
|
| 223 |
+
"<|vision_start|>",
|
| 224 |
+
"<|vision_end|>",
|
| 225 |
+
"<|vision_pad|>",
|
| 226 |
+
"<|image_pad|>",
|
| 227 |
+
"<|video_pad|>"
|
| 228 |
+
],
|
| 229 |
+
"bos_token": null,
|
| 230 |
+
"clean_up_tokenization_spaces": false,
|
| 231 |
+
"eos_token": "<|im_end|>",
|
| 232 |
+
"errors": "replace",
|
| 233 |
+
"extra_special_tokens": {},
|
| 234 |
+
"model_max_length": 1010000,
|
| 235 |
+
"pad_token": "<|endoftext|>",
|
| 236 |
+
"split_special_tokens": false,
|
| 237 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 238 |
+
"unk_token": null
|
| 239 |
+
}
|
code/RL_model/models/converted_model/v1/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
code/finetune-inference/old/api_call.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from openai import OpenAI
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
client = OpenAI()
|
| 5 |
+
|
| 6 |
+
# --- Fernández Huerta formula ---
|
| 7 |
+
def fernandez_huerta_score(text: str) -> float:
|
| 8 |
+
sentences = re.split(r'[.!?]+', text)
|
| 9 |
+
sentences = [s.strip() for s in sentences if s.strip()]
|
| 10 |
+
n_sentences = len(sentences) if sentences else 1
|
| 11 |
+
|
| 12 |
+
words = text.split()
|
| 13 |
+
n_words = len(words) if words else 1
|
| 14 |
+
|
| 15 |
+
vowels = "aeiouáéíóúüAEIOUÁÉÍÓÚÜ"
|
| 16 |
+
n_syllables = sum(sum(1 for ch in word if ch in vowels) for word in words)
|
| 17 |
+
|
| 18 |
+
return 206.84 - 0.60 * (n_syllables / n_words * 100) - 1.02 * (n_words / n_sentences)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# --- Prompt templates for each label ---
|
| 22 |
+
LABEL_PROMPTS = {
|
| 23 |
+
"easy": """Texto original:
|
| 24 |
+
{original_text}
|
| 25 |
+
|
| 26 |
+
Reescribe el texto en un lenguaje muy simple, frases cortas y vocabulario fácil, adecuado para estudiantes de 5º a 7º grado.
|
| 27 |
+
El resultado debe seguir lógicamente el texto original y mantener el mismo significado.
|
| 28 |
+
No añadas información nueva, no elimines detalles importantes ni cambies los hechos.
|
| 29 |
+
""",
|
| 30 |
+
"intermediate": """Texto original:
|
| 31 |
+
{original_text}
|
| 32 |
+
|
| 33 |
+
Reescribe el texto con una complejidad moderada, frases más largas y vocabulario variado, adecuado para secundaria/bachillerato (8º a 12º grado).
|
| 34 |
+
El resultado debe seguir lógicamente el texto original y mantener el mismo significado.
|
| 35 |
+
No añadas información nueva, no elimines detalles importantes ni cambies los hechos.
|
| 36 |
+
""",
|
| 37 |
+
"hard": """Texto original:
|
| 38 |
+
{original_text}
|
| 39 |
+
|
| 40 |
+
Reescribe el texto con lenguaje técnico, detallado y especializado, adecuado para universidad o profesionales.
|
| 41 |
+
El resultado debe seguir lógicamente el texto original y mantener el mismo significado.
|
| 42 |
+
No añadas información nueva, no elimines detalles importantes ni cambies los hechos.
|
| 43 |
+
"""
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# --- Generate text for a label ---
|
| 48 |
+
def generate_label_text(original_text: str, label: str) -> str:
|
| 49 |
+
prompt = LABEL_PROMPTS[label].format(original_text=original_text)
|
| 50 |
+
response = client.chat.completions.create(
|
| 51 |
+
model="gpt-5-mini", # first try with mini
|
| 52 |
+
messages=[{"role": "user", "content": prompt}]
|
| 53 |
+
)
|
| 54 |
+
return response.choices[0].message.content.strip()
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
# --- Regenerate if FH score is out of range ---
|
| 58 |
+
def regenerate_label_text(original_text: str, old_text: str, label: str, target_range: tuple) -> str:
|
| 59 |
+
prompt = f"""Texto original:
|
| 60 |
+
{original_text}
|
| 61 |
+
|
| 62 |
+
Texto generado (necesita ajuste):
|
| 63 |
+
{old_text}
|
| 64 |
+
|
| 65 |
+
El texto anterior no cumple con el rango de legibilidad {target_range}.
|
| 66 |
+
Reescribe nuevamente el texto en el nivel "{label}", ajustando la dificultad
|
| 67 |
+
para que el puntaje de Fernández Huerta quede dentro del rango {target_range}.
|
| 68 |
+
El resultado debe seguir lógicamente el texto original y mantener el mismo significado.
|
| 69 |
+
No añadas información nueva, no elimines detalles importantes ni cambies los hechos.
|
| 70 |
+
"""
|
| 71 |
+
response = client.chat.completions.create(
|
| 72 |
+
model="gpt-5", # use stronger model for regeneration
|
| 73 |
+
messages=[{"role": "user", "content": prompt}]
|
| 74 |
+
)
|
| 75 |
+
return response.choices[0].message.content.strip()
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
# --- Target ranges for FH ---
|
| 80 |
+
RANGES = {
|
| 81 |
+
"easy": (70, 100),
|
| 82 |
+
"intermediate": (50, 70),
|
| 83 |
+
"hard": (0, 50)
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
# --- Full pipeline for one topic ---
|
| 88 |
+
def generate_synthetic_data(original_text: str, original_language: str, topic: str, data_id: int):
|
| 89 |
+
results = {
|
| 90 |
+
"id": data_id,
|
| 91 |
+
"original_text_language": original_language,
|
| 92 |
+
"source_topic": topic,
|
| 93 |
+
"readability_versions": {}
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
for label, target_range in RANGES.items():
|
| 97 |
+
# Step 1: generate
|
| 98 |
+
text = generate_label_text(original_text, label)
|
| 99 |
+
|
| 100 |
+
# Step 2: check FH score
|
| 101 |
+
score = fernandez_huerta_score(text)
|
| 102 |
+
if not (target_range[0] <= score <= target_range[1]):
|
| 103 |
+
text = regenerate_label_text(original_text, text, label, target_range)
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
# Step 4: save
|
| 107 |
+
results["readability_versions"][label] = {
|
| 108 |
+
"readability_level": label,
|
| 109 |
+
"fernandez_huerta_range": f"{target_range[0]}-{target_range[1]}",
|
| 110 |
+
"target_audience": (
|
| 111 |
+
"Estudiantes de primaria/media (5º a 7º grado)" if label == "easy" else
|
| 112 |
+
"Secundaria/Bachillerato (8º a 12º grado)" if label == "intermediate" else
|
| 113 |
+
"Profesionales / Universidad o posgrado"
|
| 114 |
+
),
|
| 115 |
+
"text": text
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
return results
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
# --- Example usage ---
|
| 122 |
+
if __name__ == "__main__":
|
| 123 |
+
original_text = "Se diagnosticó osteoartritis bilateral en un paciente de 61 años con dolor en la ingle."
|
| 124 |
+
data = generate_synthetic_data(original_text, "es", "Osteoartritis de cadera", 1)
|
| 125 |
+
print(data)
|
code/finetune-inference/old/api_call_vllm.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import tqdm
|
| 4 |
+
import argparse
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
|
| 7 |
+
# -----------------------------
|
| 8 |
+
# CONFIGURATION
|
| 9 |
+
# -----------------------------
|
| 10 |
+
# Ensure this matches the model path used in your run_vllm.sh script
|
| 11 |
+
MODEL_NAME = "/home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims_BF16_merged"
|
| 12 |
+
API_URL = "http://localhost:8015/v1"
|
| 13 |
+
API_KEY = "EMPTY" # vLLM requires a key, but it can be anything if not set on server
|
| 14 |
+
|
| 15 |
+
# Initialize Client
|
| 16 |
+
client = OpenAI(base_url=API_URL, api_key=API_KEY)
|
| 17 |
+
|
| 18 |
+
# -----------------------------
|
| 19 |
+
# SUBCLAIM EXTRACTION PROMPT
|
| 20 |
+
# -----------------------------
|
| 21 |
+
def extraction_prompt(medical_text: str) -> str:
|
| 22 |
+
prompt = f"""
|
| 23 |
+
You are an expert medical annotator. Your task is to extract granular, factual subclaims from medical text.
|
| 24 |
+
A subclaim is the smallest standalone factual unit that can be independently verified.
|
| 25 |
+
Instructions:
|
| 26 |
+
1. Read the provided medical text.
|
| 27 |
+
2. Break it into clear, objective, atomic subclaims.
|
| 28 |
+
3. Each subclaim must come directly from the text.
|
| 29 |
+
4. Do not add, guess, or infer information.
|
| 30 |
+
5. Each subclaim should be short, specific, and verifiable.
|
| 31 |
+
6. Return ONLY a Python-style list of strings.
|
| 32 |
+
Medical Text:
|
| 33 |
+
{medical_text}
|
| 34 |
+
Return your output in JSON list format, like:
|
| 35 |
+
[
|
| 36 |
+
"subclaim 1",
|
| 37 |
+
"subclaim 2",
|
| 38 |
+
...
|
| 39 |
+
]
|
| 40 |
+
"""
|
| 41 |
+
return prompt
|
| 42 |
+
|
| 43 |
+
# -----------------------------
|
| 44 |
+
# INFERENCE FUNCTION (vLLM)
|
| 45 |
+
# -----------------------------
|
| 46 |
+
def infer_subclaims(medical_text: str, temperature: float = 0.2) -> str:
|
| 47 |
+
"""Sends prompt to vLLM server and returns generated text."""
|
| 48 |
+
|
| 49 |
+
# 1. Prepare the prompt
|
| 50 |
+
final_prompt = extraction_prompt(medical_text)
|
| 51 |
+
|
| 52 |
+
# 2. Call the vLLM Server via OpenAI API
|
| 53 |
+
try:
|
| 54 |
+
response = client.chat.completions.create(
|
| 55 |
+
model=MODEL_NAME,
|
| 56 |
+
messages=[
|
| 57 |
+
{"role": "user", "content": final_prompt}
|
| 58 |
+
],
|
| 59 |
+
max_tokens=1000, # Limit generation length
|
| 60 |
+
temperature=temperature,
|
| 61 |
+
top_p=0.9,
|
| 62 |
+
frequency_penalty=0.0,
|
| 63 |
+
presence_penalty=0.0,
|
| 64 |
+
)
|
| 65 |
+
res = response.choices[0].message.content.strip()
|
| 66 |
+
res=res.split("</think>")[-1].strip()
|
| 67 |
+
return res
|
| 68 |
+
except Exception as e:
|
| 69 |
+
print(f"Error during API call: {e}")
|
| 70 |
+
return None
|
| 71 |
+
|
| 72 |
+
# -----------------------------
|
| 73 |
+
# MAIN EXECUTION
|
| 74 |
+
# -----------------------------
|
| 75 |
+
if __name__ == "__main__":
|
| 76 |
+
parser = argparse.ArgumentParser()
|
| 77 |
+
parser.add_argument("--input_file", type=str, required=True,
|
| 78 |
+
help="Path to the input JSON file containing medical texts.")
|
| 79 |
+
args = parser.parse_args()
|
| 80 |
+
|
| 81 |
+
INPUT_FILE = args.input_file
|
| 82 |
+
file_name = os.path.basename(INPUT_FILE).split(".json")[0]
|
| 83 |
+
|
| 84 |
+
SAVE_FOLDER = "/home/mshahidul/readctrl/data/extracting_subclaim"
|
| 85 |
+
os.makedirs(SAVE_FOLDER, exist_ok=True)
|
| 86 |
+
OUTPUT_FILE = os.path.join(SAVE_FOLDER, f"extracted_subclaims_{file_name}.json")
|
| 87 |
+
|
| 88 |
+
# Load input dataset
|
| 89 |
+
with open(INPUT_FILE, "r") as f:
|
| 90 |
+
data = json.load(f)
|
| 91 |
+
|
| 92 |
+
# Load existing results (resume mode)
|
| 93 |
+
result = []
|
| 94 |
+
if os.path.exists(OUTPUT_FILE):
|
| 95 |
+
with open(OUTPUT_FILE, "r") as f:
|
| 96 |
+
try:
|
| 97 |
+
result = json.load(f)
|
| 98 |
+
except json.JSONDecodeError:
|
| 99 |
+
result = []
|
| 100 |
+
|
| 101 |
+
existing_ids = {item["id"] for item in result}
|
| 102 |
+
|
| 103 |
+
print(f"Starting inference on {len(data)} items using vLLM server...")
|
| 104 |
+
save=False
|
| 105 |
+
# --------------------------------------------------------
|
| 106 |
+
# PROCESS EACH MEDICAL TEXT
|
| 107 |
+
# --------------------------------------------------------
|
| 108 |
+
for item in tqdm.tqdm(data):
|
| 109 |
+
if item["id"] in existing_ids:
|
| 110 |
+
continue
|
| 111 |
+
|
| 112 |
+
medical_text = item.get("fulltext", "")
|
| 113 |
+
|
| 114 |
+
# Call the vLLM inference function
|
| 115 |
+
extracted = infer_subclaims(medical_text)
|
| 116 |
+
|
| 117 |
+
result.append({
|
| 118 |
+
"id": item["id"],
|
| 119 |
+
"medical_text": medical_text,
|
| 120 |
+
"subclaims": extracted,
|
| 121 |
+
"summary": item.get("summary", "")
|
| 122 |
+
})
|
| 123 |
+
|
| 124 |
+
# Save every 20 entries
|
| 125 |
+
if len(result) % 20 == 0:
|
| 126 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 127 |
+
if save:
|
| 128 |
+
json.dump(result, f, indent=4, ensure_ascii=False)
|
| 129 |
+
|
| 130 |
+
# Final save
|
| 131 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 132 |
+
if save:
|
| 133 |
+
json.dump(result, f, indent=4, ensure_ascii=False)
|
| 134 |
+
|
| 135 |
+
print(f"Extraction completed. Saved to {OUTPUT_FILE}")
|
code/finetune-inference/old/attribution_reasoning.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import sys
|
| 3 |
+
from openai import OpenAI
|
| 4 |
+
import ast,os
|
| 5 |
+
# ===========================
|
| 6 |
+
# CONFIGURATION
|
| 7 |
+
# ===========================
|
| 8 |
+
MODEL_NAME = "/home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-attribution_resonability_check_8kCtx_v1_BF16_merged"
|
| 9 |
+
VLLM_API_URL = "http://localhost:8004/v1"
|
| 10 |
+
VLLM_API_KEY = "EMPTY"
|
| 11 |
+
|
| 12 |
+
# Initialize Client
|
| 13 |
+
client = OpenAI(
|
| 14 |
+
base_url=VLLM_API_URL,
|
| 15 |
+
api_key=VLLM_API_KEY,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
# ===========================
|
| 19 |
+
# INFERENCE FUNCTION
|
| 20 |
+
# ===========================
|
| 21 |
+
def infer_reasonableness(
|
| 22 |
+
fulltext: str,
|
| 23 |
+
generated_summary: str,
|
| 24 |
+
readability_level: str,
|
| 25 |
+
subclaim_text: str,
|
| 26 |
+
result: int,
|
| 27 |
+
):
|
| 28 |
+
"""
|
| 29 |
+
Predict reasonableness using the local vLLM server.
|
| 30 |
+
No error handling: validation or connection errors will raise exceptions.
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
# ---- Build inference prompt ----
|
| 34 |
+
prompt = f"""
|
| 35 |
+
### **SYSTEM / ROLE INSTRUCTION**
|
| 36 |
+
|
| 37 |
+
You are a **medical factuality and attribution evaluator**.
|
| 38 |
+
You will assess whether the **unsupported subclaim** in a generated summary (when `"result": 0"`) is a *reasonable addition* given the readability level (*easy / intermediate / hard*).
|
| 39 |
+
|
| 40 |
+
The goal is to decide whether this **extra piece of information** is an acceptable simplification or a *hallucination* that reduces factual faithfulness.
|
| 41 |
+
|
| 42 |
+
---
|
| 43 |
+
|
| 44 |
+
### **READABILITY & ATTRIBUTION GUIDELINES**
|
| 45 |
+
|
| 46 |
+
| Level | Audience | Linguistic & Stylistic Profile | Content Goal | Allowable Additions |
|
| 47 |
+
| :-- | :-- | :-- | :-- | :-- |
|
| 48 |
+
| **Easy (FH 70–100, grade 5–7)** | General public; early secondary readers | Short, direct sentences using common vocabulary and concrete ideas. Avoid subordinate clauses and technical terms. Tone should be explanatory, lively, and highly accessible. | Simplify and clarify events and outcomes without introducing technical or diagnostic details. | General background context or plain-language explanations are acceptable; **no new facts, data, or inferred medical claims.** |
|
| 49 |
+
| **Intermediate (FH 50–69, grade 8–12)** | Educated layperson / medical student | Moderate sentence length and complexity. Vocabulary suitable for high-school or introductory science readers. May include limited domain terms with brief clarification. | Present essential medical content with clear logic and limited detail, ensuring readability for non-experts. | Brief clarifications, definitions, or causal links consistent with the source are allowed; **avoid speculative or unconfirmed data.** |
|
| 50 |
+
| **Hard (FH 0–49, university / professional)** | Medical professionals / technical audience | Long, multi-clause sentences; formal academic tone. Incorporate precise domain vocabulary, causal and analytical connectors (e.g., *por consiguiente*, *sin embargo*, *en virtud de*, *dado que*), at least one definition, one process description, and one statement of implications or challenges. | Preserve full factual accuracy, diagnostic precision, and interpretive nuance expected in professional discourse. | Additions are **not permitted**; every statement must be directly supported by the reference text. Parenthetical clarifications or relative clauses may be used for cohesion, not new content. |
|
| 51 |
+
|
| 52 |
+
---
|
| 53 |
+
|
| 54 |
+
### **Input**
|
| 55 |
+
|
| 56 |
+
```
|
| 57 |
+
Readability Level: {readability_level}
|
| 58 |
+
|
| 59 |
+
Reference Full Text:
|
| 60 |
+
{fulltext}
|
| 61 |
+
|
| 62 |
+
Generated Summary:
|
| 63 |
+
{generated_summary}
|
| 64 |
+
|
| 65 |
+
Subclaim: "{subclaim_text}"
|
| 66 |
+
Result: {result} # 1 = supported (included), 0 = unsupported
|
| 67 |
+
```
|
| 68 |
+
|
| 69 |
+
---
|
| 70 |
+
|
| 71 |
+
### **TASK INSTRUCTIONS**
|
| 72 |
+
|
| 73 |
+
If `"result": 0"`, judge whether including this subclaim is **reasonable** for the given readability level.
|
| 74 |
+
Choose one of: `"reasonable addition"`, `"unnecessary but harmless"`, `"misleading / hallucinated"`.
|
| 75 |
+
Provide a **1–2 sentence justification** describing your reasoning.
|
| 76 |
+
|
| 77 |
+
---
|
| 78 |
+
|
| 79 |
+
### **Output Format**
|
| 80 |
+
|
| 81 |
+
Return structured JSON:
|
| 82 |
+
|
| 83 |
+
```json
|
| 84 |
+
{{
|
| 85 |
+
"evaluation": {{
|
| 86 |
+
"reasonableness": "<reasonable | partially_reasonable | unreasonable>",
|
| 87 |
+
"justification": "<short explanation>"
|
| 88 |
+
}}
|
| 89 |
+
}}
|
| 90 |
+
```
|
| 91 |
+
""".strip()
|
| 92 |
+
|
| 93 |
+
messages = [{"role": "user", "content": prompt}]
|
| 94 |
+
|
| 95 |
+
# ---- Call vLLM Server ----
|
| 96 |
+
response = client.chat.completions.create(
|
| 97 |
+
model=MODEL_NAME,
|
| 98 |
+
messages=messages,
|
| 99 |
+
temperature=0.2,
|
| 100 |
+
max_tokens=200,
|
| 101 |
+
top_p=0.8,
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
output_text = response.choices[0].message.content
|
| 105 |
+
|
| 106 |
+
# ---- Clean Output (Handle Thinking & Markdown) ----
|
| 107 |
+
try:
|
| 108 |
+
if "</think>" in output_text:
|
| 109 |
+
output_text = output_text.split("</think>")[1]
|
| 110 |
+
|
| 111 |
+
clean_text = output_text.strip().replace("```json", "").replace("```", "").strip()
|
| 112 |
+
# import ipdb; ipdb.set_trace()
|
| 113 |
+
t=ast.literal_eval(clean_text)
|
| 114 |
+
|
| 115 |
+
# ---- Parse JSON (Will raise JSONDecodeError if invalid) ----
|
| 116 |
+
return t
|
| 117 |
+
except Exception as e:
|
| 118 |
+
return output_text
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
# ===========================
|
| 122 |
+
# MAIN EXECUTION
|
| 123 |
+
# ===========================
|
| 124 |
+
if __name__ == "__main__":
|
| 125 |
+
import argparse
|
| 126 |
+
parser = argparse.ArgumentParser()
|
| 127 |
+
parser.add_argument("--data_path", type=str, required=True,
|
| 128 |
+
help="Path to the JSON file containing evaluation data.")
|
| 129 |
+
args = parser.parse_args()
|
| 130 |
+
data_path = args.data_path
|
| 131 |
+
# data_path = '/home/mshahidul/readctrl/data/concise_complete_attr_cal_v3/evaluated_metrics_0_100.json'
|
| 132 |
+
file_name=os.path.basename(data_path)
|
| 133 |
+
|
| 134 |
+
# Open file directly (Will raise FileNotFoundError if missing)
|
| 135 |
+
with open(data_path, 'r') as f:
|
| 136 |
+
dataset = json.load(f)
|
| 137 |
+
|
| 138 |
+
# print(f"Loaded {len(dataset)} examples. Starting inference...")
|
| 139 |
+
save_path = f'/home/mshahidul/readctrl/data/attribution_reasoning_result/{file_name}'
|
| 140 |
+
os.makedirs('/home/mshahidul/readctrl/data/attribution_reasoning_result/', exist_ok=True)
|
| 141 |
+
full_results = []
|
| 142 |
+
if os.path.exists(save_path):
|
| 143 |
+
with open(save_path, 'r') as f:
|
| 144 |
+
full_results = json.load(f)
|
| 145 |
+
|
| 146 |
+
import tqdm
|
| 147 |
+
for item in tqdm.tqdm(dataset):
|
| 148 |
+
if any(d['id'] == item['id'] for d in full_results):
|
| 149 |
+
continue
|
| 150 |
+
fulltext = item['fulltext']
|
| 151 |
+
temp2={}
|
| 152 |
+
for label in ['easy', 'intermediate', 'hard']:
|
| 153 |
+
generated_summary = item[f'{label}_text']
|
| 154 |
+
subclaim_list = item['metrics'][f'{label}']['attribution']['details']
|
| 155 |
+
temp=[]
|
| 156 |
+
for idx, subclaim in enumerate(subclaim_list):
|
| 157 |
+
|
| 158 |
+
# Check status (assumes subclaim variable holds the status string)
|
| 159 |
+
result = 1 if subclaim['label'] == 'supported' else 0
|
| 160 |
+
|
| 161 |
+
if result ==0:
|
| 162 |
+
output = infer_reasonableness(
|
| 163 |
+
fulltext=fulltext,
|
| 164 |
+
generated_summary=generated_summary,
|
| 165 |
+
readability_level=label,
|
| 166 |
+
subclaim_text=subclaim['subclaim'],
|
| 167 |
+
result=result,
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
temp.append({
|
| 171 |
+
'subclaim': subclaim['subclaim'],
|
| 172 |
+
'output': output
|
| 173 |
+
})
|
| 174 |
+
else:
|
| 175 |
+
temp.append({
|
| 176 |
+
'subclaim': subclaim['subclaim'],
|
| 177 |
+
'output': {
|
| 178 |
+
'reasonableness': 'reasonable',
|
| 179 |
+
'justification': 'The subclaim is included in the generated summary, hence it is reasonable.'
|
| 180 |
+
}
|
| 181 |
+
})
|
| 182 |
+
|
| 183 |
+
temp2[label] = {
|
| 184 |
+
'results': temp
|
| 185 |
+
}
|
| 186 |
+
full_results.append({
|
| 187 |
+
'id': item['id'],
|
| 188 |
+
'completeness': temp2
|
| 189 |
+
})
|
| 190 |
+
if len(full_results) % 10 == 0:
|
| 191 |
+
with open(save_path, 'w') as f:
|
| 192 |
+
json.dump(full_results, f, indent=2, ensure_ascii=False)
|
| 193 |
+
|
| 194 |
+
with open(save_path, 'w') as f:
|
| 195 |
+
json.dump(full_results, f, indent=2, ensure_ascii=False)
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
|
code/finetune-inference/old/completeness_conciseness_attribution_cal.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
| 3 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
|
| 4 |
+
import torch
|
| 5 |
+
from unsloth import FastLanguageModel
|
| 6 |
+
import json
|
| 7 |
+
|
| 8 |
+
# Optional: wrap model/tokenizer in a singleton pattern for repeated use
|
| 9 |
+
_model_cache = {"model": None, "tokenizer": None}
|
| 10 |
+
|
| 11 |
+
def load_finetuned_model(model_path: str):
|
| 12 |
+
"""Load and cache your fine‑tuned model + tokenizer."""
|
| 13 |
+
if _model_cache["model"] is not None:
|
| 14 |
+
return _model_cache["model"], _model_cache["tokenizer"]
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 18 |
+
model_name=model_path,
|
| 19 |
+
max_seq_length=4092,
|
| 20 |
+
load_in_4bit=False,
|
| 21 |
+
load_in_8bit=False,
|
| 22 |
+
full_finetuning=False,
|
| 23 |
+
)
|
| 24 |
+
_model_cache["model"], _model_cache["tokenizer"] = model, tokenizer
|
| 25 |
+
return model, tokenizer
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def infer_subclaim(text: str, subclaim: str, model_path: str = "/home/mshahidul/readctrl_model/qwen3-32B_subclaims-verifier_lora_nonreasoning", cuda_device: str = "0") -> str:
|
| 29 |
+
"""
|
| 30 |
+
Given a medical text and a subclaim, returns '1' if the text supports the subclaim, otherwise '0'.
|
| 31 |
+
"""
|
| 32 |
+
model, tokenizer = load_finetuned_model(model_path)
|
| 33 |
+
|
| 34 |
+
# Build prompt (the same structure you trained on)
|
| 35 |
+
prompt = f"""
|
| 36 |
+
Given the following medical text and subclaim, decide if the text supports the subclaim.
|
| 37 |
+
Text: {text}
|
| 38 |
+
Subclaim: {subclaim}
|
| 39 |
+
Respond only with 1 if the text supports the subclaim, otherwise 0.
|
| 40 |
+
""".strip()
|
| 41 |
+
|
| 42 |
+
messages = [{"role": "user", "content": prompt + "\n"}]
|
| 43 |
+
|
| 44 |
+
chat_text = tokenizer.apply_chat_template(
|
| 45 |
+
messages,
|
| 46 |
+
tokenize=False,
|
| 47 |
+
add_generation_prompt=True,
|
| 48 |
+
enable_thinking=False,
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
inputs = tokenizer(chat_text, return_tensors="pt").to("cuda")
|
| 52 |
+
|
| 53 |
+
with torch.no_grad():
|
| 54 |
+
output_ids = model.generate(
|
| 55 |
+
**inputs,
|
| 56 |
+
max_new_tokens=10,
|
| 57 |
+
temperature=0.1,
|
| 58 |
+
top_p=0.8,
|
| 59 |
+
top_k=5,
|
| 60 |
+
)
|
| 61 |
+
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
|
| 62 |
+
return output_text.split("</think>")[1].strip()
|
| 63 |
+
|
| 64 |
+
if __name__ == "__main__":
|
| 65 |
+
# example_text = (
|
| 66 |
+
# "Una niña nacida a las 34 semanas de gestación precisó intubación y ventilación al nacer..."
|
| 67 |
+
# )
|
| 68 |
+
# example_subclaim = "La paciente es una recién nacida prematura."
|
| 69 |
+
|
| 70 |
+
def process_completeness(example,version):
|
| 71 |
+
example_text = example["readability_versions"][version]['text']
|
| 72 |
+
example_subclaims = example['ref_summary']["subclaims"]
|
| 73 |
+
# print("Input text:", example_text)
|
| 74 |
+
res=[]
|
| 75 |
+
total=0
|
| 76 |
+
correct=0
|
| 77 |
+
for example_subclaim in example_subclaims:
|
| 78 |
+
result = infer_subclaim(example_text, example_subclaim)
|
| 79 |
+
if "1" in result:
|
| 80 |
+
correct+=1
|
| 81 |
+
total+=1
|
| 82 |
+
elif "0" in result:
|
| 83 |
+
total+=1
|
| 84 |
+
res.append({
|
| 85 |
+
"subclaim": example_subclaim,
|
| 86 |
+
"result": result
|
| 87 |
+
})
|
| 88 |
+
return {"metric": "completeness", "version": version, "input_text": example_text, "results": res, "total": total, "correct": correct, "accuracy": (correct/total)*100 if total>0 else 0}
|
| 89 |
+
|
| 90 |
+
def process_conciseness(example, version):
|
| 91 |
+
example_text = example["ref_summary"]['text']
|
| 92 |
+
example_subclaims = example["readability_versions"][version]["subclaims"]
|
| 93 |
+
# print("Input text:", example_text)
|
| 94 |
+
res=[]
|
| 95 |
+
total=0
|
| 96 |
+
correct=0
|
| 97 |
+
for example_subclaim in example_subclaims:
|
| 98 |
+
result = infer_subclaim(example_text, example_subclaim)
|
| 99 |
+
|
| 100 |
+
if "1" in result:
|
| 101 |
+
correct+=1
|
| 102 |
+
total+=1
|
| 103 |
+
elif "0" in result:
|
| 104 |
+
total+=1
|
| 105 |
+
res.append({
|
| 106 |
+
"subclaim": example_subclaim,
|
| 107 |
+
"result": result
|
| 108 |
+
})
|
| 109 |
+
return {"metric": "conciseness", "version": version, "input_text": example_text, "results": res, "total": total, "correct": correct, "accuracy": (correct/total)*100 if total>0 else 0}
|
| 110 |
+
def process_attribution(example, version):
|
| 111 |
+
example_text = example['full_text']
|
| 112 |
+
example_subclaims = example["readability_versions"][version]["subclaims"]
|
| 113 |
+
# print("Input text:", example_text)
|
| 114 |
+
res=[]
|
| 115 |
+
total=0
|
| 116 |
+
correct=0
|
| 117 |
+
for example_subclaim in example_subclaims:
|
| 118 |
+
result = infer_subclaim(example_text, example_subclaim)
|
| 119 |
+
if "1" in result:
|
| 120 |
+
correct+=1
|
| 121 |
+
total+=1
|
| 122 |
+
elif "0" in result:
|
| 123 |
+
total+=1
|
| 124 |
+
res.append({
|
| 125 |
+
"subclaim": example_subclaim,
|
| 126 |
+
"result": result
|
| 127 |
+
})
|
| 128 |
+
return {"metric": "attribution", "version": version, "input_text": example_text, "results": res, "total": total, "correct": correct, "accuracy": (correct/total)*100 if total>0 else 0}
|
| 129 |
+
with open("/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json", "r", encoding="utf-8") as f:
|
| 130 |
+
data = json.load(f)
|
| 131 |
+
import tqdm
|
| 132 |
+
full_data_results = []
|
| 133 |
+
save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json"
|
| 134 |
+
for item in tqdm.tqdm(data):
|
| 135 |
+
print(f"Processing item ID: {item['id']}")
|
| 136 |
+
for version in ["easy", "intermediate", "hard"]:
|
| 137 |
+
completeness=process_completeness(item,version)
|
| 138 |
+
conciseness=process_conciseness(item,version)
|
| 139 |
+
attribution=process_attribution(item,version)
|
| 140 |
+
full_data_results.append({
|
| 141 |
+
"id": item["id"],
|
| 142 |
+
"version": version,
|
| 143 |
+
"completeness": completeness,
|
| 144 |
+
"conciseness": conciseness,
|
| 145 |
+
"attribution": attribution
|
| 146 |
+
})
|
| 147 |
+
if len(full_data_results)%5==0:
|
| 148 |
+
with open(save_path, "w", encoding="utf-8") as f:
|
| 149 |
+
json.dump(full_data_results, f, indent=4, ensure_ascii=False)
|
| 150 |
+
with open(save_path, "w", encoding="utf-8") as f:
|
| 151 |
+
json.dump(full_data_results, f, indent=4, ensure_ascii=False)
|
code/finetune-inference/old/completeness_reasoning_v1.py
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
| 3 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "4"
|
| 4 |
+
import torch
|
| 5 |
+
from unsloth import FastLanguageModel
|
| 6 |
+
import json
|
| 7 |
+
|
| 8 |
+
# ===========================
|
| 9 |
+
# GPU SETTINGS
|
| 10 |
+
# ===========================
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# ===========================
|
| 14 |
+
# MODEL LOADING (CACHED)
|
| 15 |
+
# ===========================
|
| 16 |
+
_model_cache = {"model": None, "tokenizer": None}
|
| 17 |
+
|
| 18 |
+
def load_finetuned_model(model_path: str):
|
| 19 |
+
"""Load and cache the fine-tuned model + tokenizer."""
|
| 20 |
+
if _model_cache["model"] is not None:
|
| 21 |
+
return _model_cache["model"], _model_cache["tokenizer"]
|
| 22 |
+
|
| 23 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 24 |
+
model_name=model_path,
|
| 25 |
+
max_seq_length=4096,
|
| 26 |
+
load_in_4bit=False,
|
| 27 |
+
load_in_8bit=False,
|
| 28 |
+
full_finetuning=False,
|
| 29 |
+
)
|
| 30 |
+
_model_cache["model"], _model_cache["tokenizer"] = model, tokenizer
|
| 31 |
+
return model, tokenizer
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# ===========================
|
| 35 |
+
# INFERENCE FUNCTION
|
| 36 |
+
# ===========================
|
| 37 |
+
def infer_reasonableness(
|
| 38 |
+
reference_summary: str,
|
| 39 |
+
generated_summary: str,
|
| 40 |
+
readability_level: str,
|
| 41 |
+
subclaim_text: str,
|
| 42 |
+
result: int,
|
| 43 |
+
model_path: str = "/home/mshahidul/readctrl_model/qwen3-32B_subclaims-completeness_resonability_check_v2",
|
| 44 |
+
):
|
| 45 |
+
"""
|
| 46 |
+
Given the reference summary, generated summary, readability level, subclaim, and its result (0/1),
|
| 47 |
+
predict reasonableness: reasonable / partially_reasonable / unreasonable, plus justification.
|
| 48 |
+
"""
|
| 49 |
+
model, tokenizer = load_finetuned_model(model_path)
|
| 50 |
+
|
| 51 |
+
# ---- Build inference prompt (same structure as training) ----
|
| 52 |
+
prompt = f"""
|
| 53 |
+
You are an impartial medical summarization evaluator.
|
| 54 |
+
|
| 55 |
+
Goal:
|
| 56 |
+
Decide whether the inclusion or omission of ONE specific subclaim from the reference summary is *reasonable*, given the readability level of the generated summary.
|
| 57 |
+
|
| 58 |
+
Readability Criteria:
|
| 59 |
+
- Easy: for non-medical readers; emphasize main story and outcomes; omit numerical data, anatomy, and test details.
|
| 60 |
+
- Intermediate: for general educated readers; keep main findings but simplify phrasing.
|
| 61 |
+
- Hard: for clinical or technical readers; maintain diagnostic accuracy and essential quantitative or anatomic content.
|
| 62 |
+
|
| 63 |
+
Judging rules:
|
| 64 |
+
* Base your decision strictly on what appears in the generated summary.
|
| 65 |
+
* If result = 0 (subclaim omitted) and the omitted detail is clearly technical or numerical for the given level, choose "reasonable".
|
| 66 |
+
* If result = 0 and the subclaim is essential to the main story, choose "unreasonable".
|
| 67 |
+
* Stay consistent between `result`, justification, and readability level.
|
| 68 |
+
|
| 69 |
+
### Inputs
|
| 70 |
+
Readability Level: {readability_level}
|
| 71 |
+
Reference Summary: {reference_summary}
|
| 72 |
+
Generated Summary: {generated_summary}
|
| 73 |
+
Subclaim: "{subclaim_text}"
|
| 74 |
+
Result: {result} # 1 = supported (included), 0 = omitted
|
| 75 |
+
|
| 76 |
+
### Task
|
| 77 |
+
Respond **only** with the following JSON object:
|
| 78 |
+
|
| 79 |
+
{{
|
| 80 |
+
"reasonableness": "<reasonable | partially_reasonable | unreasonable>",
|
| 81 |
+
"justification": "<short clear explanation>"
|
| 82 |
+
}}
|
| 83 |
+
""".strip()
|
| 84 |
+
|
| 85 |
+
messages = [{"role": "user", "content": prompt + "\n"}]
|
| 86 |
+
|
| 87 |
+
chat_text = tokenizer.apply_chat_template(
|
| 88 |
+
messages,
|
| 89 |
+
tokenize=False,
|
| 90 |
+
add_generation_prompt=True,
|
| 91 |
+
enable_thinking=False, # important for Unsloth chat template
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
inputs = tokenizer(chat_text, return_tensors="pt").to("cuda")
|
| 95 |
+
|
| 96 |
+
# ---- Generate output ----
|
| 97 |
+
with torch.no_grad():
|
| 98 |
+
output_ids = model.generate(
|
| 99 |
+
**inputs,
|
| 100 |
+
max_new_tokens=150,
|
| 101 |
+
temperature=0.2,
|
| 102 |
+
top_p=0.8,
|
| 103 |
+
top_k=5,
|
| 104 |
+
do_sample=False,
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
|
| 108 |
+
output_text = output_text.split("</think>")[1].strip()
|
| 109 |
+
# ---- Extract model JSON output ----
|
| 110 |
+
try:
|
| 111 |
+
parsed = json.loads(output_text)
|
| 112 |
+
except Exception:
|
| 113 |
+
# print("Failed to parse JSON from model output. Returning raw text.\n\n")
|
| 114 |
+
parsed = output_text
|
| 115 |
+
return parsed
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
# ===========================
|
| 119 |
+
# EXAMPLE USAGE
|
| 120 |
+
# ===========================
|
| 121 |
+
if __name__ == "__main__":
|
| 122 |
+
# reference_summary = "Una niña nacida a las 34 semanas de gestación precisó intubación..."
|
| 123 |
+
# generated_summary = "Esta es la historia de una niña que nació antes de tiempo, a las 34 semanas..."
|
| 124 |
+
# subclaim_text = "La paciente presentaba hiperinsulinismo en el período neonatal."
|
| 125 |
+
# readability_level = "easy"
|
| 126 |
+
# result = 0 # omitted
|
| 127 |
+
import json
|
| 128 |
+
with open('/home/mshahidul/readctrl/data/testing_data_gs/multiclinsum_gs_train_es.json', 'r') as f:
|
| 129 |
+
multiclinsum_gs_train_es_data = json.load(f)
|
| 130 |
+
ref_summaries={}
|
| 131 |
+
fulltexts={}
|
| 132 |
+
for item in multiclinsum_gs_train_es_data:
|
| 133 |
+
ref_summaries[item['id']]=item['summary']
|
| 134 |
+
fulltexts[item['id']]=item['fulltext']
|
| 135 |
+
|
| 136 |
+
generated_summaries = {}
|
| 137 |
+
with open('/home/mshahidul/readctrl/data/hand_create_gpt5_other_model/synthetic_data_es_raw_592.json', 'r') as f:
|
| 138 |
+
synthetic_data_es_raw_592 = json.load(f)
|
| 139 |
+
for item in synthetic_data_es_raw_592:
|
| 140 |
+
for version in ['easy', 'intermediate', 'hard']:
|
| 141 |
+
generated_summaries[(item['id'], version)] = item['readability_versions'][version]['text']
|
| 142 |
+
# /home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json
|
| 143 |
+
with open("/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json", 'r') as f:
|
| 144 |
+
qwen3_32B_results = json.load(f)
|
| 145 |
+
full_res = []
|
| 146 |
+
save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/completeness_resonability_check_100_qwen3-32B_v3.json"
|
| 147 |
+
import tqdm
|
| 148 |
+
for idx, item in tqdm.tqdm(enumerate(qwen3_32B_results)):
|
| 149 |
+
print(f"Processing item {idx + 1}/{len(qwen3_32B_results)}")
|
| 150 |
+
reference_summary = ref_summaries[item['id']]
|
| 151 |
+
fulltext = fulltexts[item['id']]
|
| 152 |
+
generated_summary = generated_summaries[(item['id'], item['version'])]
|
| 153 |
+
temp_res = []
|
| 154 |
+
for item2 in item['completeness']['results']:
|
| 155 |
+
subclaim_text = item2['subclaim']['subclaim']
|
| 156 |
+
result = item2['result']
|
| 157 |
+
if result =="1":
|
| 158 |
+
continue
|
| 159 |
+
response = infer_reasonableness(
|
| 160 |
+
reference_summary,
|
| 161 |
+
generated_summary,
|
| 162 |
+
item['version'],
|
| 163 |
+
subclaim_text,
|
| 164 |
+
result,
|
| 165 |
+
model_path="/home/mshahidul/readctrl_model/qwen3-32B_subclaims-completeness_resonability_check",
|
| 166 |
+
)
|
| 167 |
+
temp_res.append({
|
| 168 |
+
'id':item2['subclaim']['id'],
|
| 169 |
+
"subclaim": subclaim_text,
|
| 170 |
+
"result": result,
|
| 171 |
+
"reasonableness": response
|
| 172 |
+
})
|
| 173 |
+
full_res.append({
|
| 174 |
+
"id": item['id'],
|
| 175 |
+
"version": item['version'],
|
| 176 |
+
"completeness": {
|
| 177 |
+
"results": temp_res
|
| 178 |
+
}
|
| 179 |
+
})
|
| 180 |
+
if len(full_res)%10==0:
|
| 181 |
+
with open(save_path, 'w') as f:
|
| 182 |
+
json.dump(full_res, f, indent=2, ensure_ascii=False)
|
| 183 |
+
|
| 184 |
+
with open(save_path, 'w') as f:
|
| 185 |
+
json.dump(full_res, f, indent=2, ensure_ascii=False)
|
| 186 |
+
|
code/finetune-inference/old/completeness_reasoning_v2.py
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
| 3 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "4"
|
| 4 |
+
import torch
|
| 5 |
+
from unsloth import FastLanguageModel
|
| 6 |
+
import json
|
| 7 |
+
|
| 8 |
+
# ===========================
|
| 9 |
+
# GPU SETTINGS
|
| 10 |
+
# ===========================
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# ===========================
|
| 14 |
+
# MODEL LOADING (CACHED)
|
| 15 |
+
# ===========================
|
| 16 |
+
_model_cache = {"model": None, "tokenizer": None}
|
| 17 |
+
|
| 18 |
+
def load_finetuned_model(model_path: str):
|
| 19 |
+
"""Load and cache the fine-tuned model + tokenizer."""
|
| 20 |
+
if _model_cache["model"] is not None:
|
| 21 |
+
return _model_cache["model"], _model_cache["tokenizer"]
|
| 22 |
+
|
| 23 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 24 |
+
model_name=model_path,
|
| 25 |
+
max_seq_length=4096,
|
| 26 |
+
load_in_4bit=False,
|
| 27 |
+
load_in_8bit=False,
|
| 28 |
+
full_finetuning=False,
|
| 29 |
+
)
|
| 30 |
+
_model_cache["model"], _model_cache["tokenizer"] = model, tokenizer
|
| 31 |
+
return model, tokenizer
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# ===========================
|
| 35 |
+
# INFERENCE FUNCTION
|
| 36 |
+
# ===========================
|
| 37 |
+
def infer_reasonableness(
|
| 38 |
+
reference_summary: str,
|
| 39 |
+
generated_summary: str,
|
| 40 |
+
readability_level: str,
|
| 41 |
+
subclaim_text: str,
|
| 42 |
+
result: int,
|
| 43 |
+
model_path: str = "/home/mshahidul/readctrl_model/qwen3-32B_subclaims-completeness_resonability_check_8kCtx_v3",
|
| 44 |
+
):
|
| 45 |
+
"""
|
| 46 |
+
Given the reference summary, generated summary, readability level, subclaim, and its result (0/1),
|
| 47 |
+
predict reasonableness: reasonable / partially_reasonable / unreasonable, plus justification.
|
| 48 |
+
"""
|
| 49 |
+
model, tokenizer = load_finetuned_model(model_path)
|
| 50 |
+
|
| 51 |
+
# ---- Build inference prompt (same structure as training) ----
|
| 52 |
+
prompt = f"""
|
| 53 |
+
You are an impartial medical summarization evaluator.
|
| 54 |
+
|
| 55 |
+
Goal:
|
| 56 |
+
Decide whether the inclusion or omission of ONE specific subclaim from the reference summary is *reasonable*, given the readability level of the generated summary.
|
| 57 |
+
|
| 58 |
+
Readability Criteria:
|
| 59 |
+
- Easy: for non-medical readers; emphasize main story and outcomes; omit numerical data, anatomy, and test details.
|
| 60 |
+
- Intermediate: for general educated readers; keep main findings but simplify phrasing.
|
| 61 |
+
- Hard: for clinical or technical readers; maintain diagnostic accuracy and essential quantitative or anatomic content.
|
| 62 |
+
|
| 63 |
+
Judging rules:
|
| 64 |
+
* Base your decision strictly on what appears in the generated summary.
|
| 65 |
+
* If result = 0 (subclaim omitted) and the omitted detail is clearly technical or numerical for the given level, choose "reasonable".
|
| 66 |
+
* If result = 0 and the subclaim is essential to the main story, choose "unreasonable".
|
| 67 |
+
* Stay consistent between `result`, justification, and readability level.
|
| 68 |
+
|
| 69 |
+
### Inputs
|
| 70 |
+
Readability Level: {readability_level}
|
| 71 |
+
Reference Summary: {reference_summary}
|
| 72 |
+
Generated Summary: {generated_summary}
|
| 73 |
+
Subclaim: "{subclaim_text}"
|
| 74 |
+
Result: {result} # 1 = supported (included), 0 = omitted
|
| 75 |
+
|
| 76 |
+
### Task
|
| 77 |
+
Respond **only** with the following JSON object:
|
| 78 |
+
|
| 79 |
+
{{
|
| 80 |
+
"reasonableness": "<reasonable | partially_reasonable | unreasonable>",
|
| 81 |
+
"justification": "<short clear explanation>"
|
| 82 |
+
}}
|
| 83 |
+
""".strip()
|
| 84 |
+
|
| 85 |
+
messages = [{"role": "user", "content": prompt + "\n"}]
|
| 86 |
+
|
| 87 |
+
chat_text = tokenizer.apply_chat_template(
|
| 88 |
+
messages,
|
| 89 |
+
tokenize=False,
|
| 90 |
+
add_generation_prompt=True,
|
| 91 |
+
enable_thinking=False, # important for Unsloth chat template
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
inputs = tokenizer(chat_text, return_tensors="pt").to("cuda")
|
| 95 |
+
|
| 96 |
+
# ---- Generate output ----
|
| 97 |
+
with torch.no_grad():
|
| 98 |
+
output_ids = model.generate(
|
| 99 |
+
**inputs,
|
| 100 |
+
max_new_tokens=150,
|
| 101 |
+
temperature=0.2,
|
| 102 |
+
top_p=0.8,
|
| 103 |
+
top_k=5,
|
| 104 |
+
do_sample=False,
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
|
| 108 |
+
output_text = output_text.split("</think>")[1].strip().replace("```json", "").replace("```", "")
|
| 109 |
+
# ---- Extract model JSON output ----
|
| 110 |
+
try:
|
| 111 |
+
parsed = json.loads(output_text)
|
| 112 |
+
except Exception:
|
| 113 |
+
# print("Failed to parse JSON from model output. Returning raw text.\n\n")
|
| 114 |
+
parsed = output_text
|
| 115 |
+
return parsed
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
# ===========================
|
| 119 |
+
# EXAMPLE USAGE
|
| 120 |
+
# ===========================
|
| 121 |
+
if __name__ == "__main__":
|
| 122 |
+
# reference_summary = "Una niña nacida a las 34 semanas de gestación precisó intubación..."
|
| 123 |
+
# generated_summary = "Esta es la historia de una niña que nació antes de tiempo, a las 34 semanas..."
|
| 124 |
+
# subclaim_text = "La paciente presentaba hiperinsulinismo en el período neonatal."
|
| 125 |
+
# readability_level = "easy"
|
| 126 |
+
# result = 0 # omitted
|
| 127 |
+
import json
|
| 128 |
+
with open('/home/mshahidul/readctrl/data/testing_data_gs/multiclinsum_gs_train_es.json', 'r') as f:
|
| 129 |
+
multiclinsum_gs_train_es_data = json.load(f)
|
| 130 |
+
ref_summaries={}
|
| 131 |
+
fulltexts={}
|
| 132 |
+
for item in multiclinsum_gs_train_es_data:
|
| 133 |
+
ref_summaries[item['id']]=item['summary']
|
| 134 |
+
fulltexts[item['id']]=item['fulltext']
|
| 135 |
+
|
| 136 |
+
generated_summaries = {}
|
| 137 |
+
with open('/home/mshahidul/readctrl/data/hand_create_gpt5_other_model/synthetic_data_es_raw_592.json', 'r') as f:
|
| 138 |
+
synthetic_data_es_raw_592 = json.load(f)
|
| 139 |
+
for item in synthetic_data_es_raw_592:
|
| 140 |
+
for version in ['easy', 'intermediate', 'hard']:
|
| 141 |
+
generated_summaries[(item['id'], version)] = item['readability_versions'][version]['text']
|
| 142 |
+
# /home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json
|
| 143 |
+
with open("/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json", 'r') as f:
|
| 144 |
+
qwen3_32B_results = json.load(f)
|
| 145 |
+
full_res = []
|
| 146 |
+
save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/completeness_resonability_check_100_qwen3-32B_v4.json"
|
| 147 |
+
import tqdm
|
| 148 |
+
for idx, item in tqdm.tqdm(enumerate(qwen3_32B_results)):
|
| 149 |
+
print(f"Processing item {idx + 1}/{len(qwen3_32B_results)}")
|
| 150 |
+
reference_summary = ref_summaries[item['id']]
|
| 151 |
+
fulltext = fulltexts[item['id']]
|
| 152 |
+
generated_summary = generated_summaries[(item['id'], item['version'])]
|
| 153 |
+
temp_res = []
|
| 154 |
+
for item2 in item['completeness']['results']:
|
| 155 |
+
subclaim_text = item2['subclaim']['subclaim']
|
| 156 |
+
result = item2['result']
|
| 157 |
+
if result =="1":
|
| 158 |
+
continue
|
| 159 |
+
response = infer_reasonableness(
|
| 160 |
+
reference_summary,
|
| 161 |
+
generated_summary,
|
| 162 |
+
item['version'],
|
| 163 |
+
subclaim_text,
|
| 164 |
+
result,
|
| 165 |
+
model_path="/home/mshahidul/readctrl_model/qwen3-32B_subclaims-completeness_resonability_check_8kCtx_v3",
|
| 166 |
+
)
|
| 167 |
+
temp_res.append({
|
| 168 |
+
'id':item2['subclaim']['id'],
|
| 169 |
+
"subclaim": subclaim_text,
|
| 170 |
+
"result": result,
|
| 171 |
+
"reasonableness": response
|
| 172 |
+
})
|
| 173 |
+
full_res.append({
|
| 174 |
+
"id": item['id'],
|
| 175 |
+
"version": item['version'],
|
| 176 |
+
"completeness": {
|
| 177 |
+
"results": temp_res
|
| 178 |
+
}
|
| 179 |
+
})
|
| 180 |
+
if len(full_res)%10==0:
|
| 181 |
+
with open(save_path, 'w') as f:
|
| 182 |
+
json.dump(full_res, f, indent=2, ensure_ascii=False)
|
| 183 |
+
|
| 184 |
+
with open(save_path, 'w') as f:
|
| 185 |
+
json.dump(full_res, f, indent=2, ensure_ascii=False)
|
| 186 |
+
|
code/finetune-inference/old/completeness_reasoning_v3.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import sys
|
| 3 |
+
from openai import OpenAI
|
| 4 |
+
import ast,os
|
| 5 |
+
# ===========================
|
| 6 |
+
# CONFIGURATION
|
| 7 |
+
# ===========================
|
| 8 |
+
MODEL_NAME = "/home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-completeness_resonability_check_8kCtx_v3_BF16_merged"
|
| 9 |
+
VLLM_API_URL = "http://localhost:8004/v1"
|
| 10 |
+
VLLM_API_KEY = "EMPTY"
|
| 11 |
+
|
| 12 |
+
# Initialize Client
|
| 13 |
+
client = OpenAI(
|
| 14 |
+
base_url=VLLM_API_URL,
|
| 15 |
+
api_key=VLLM_API_KEY,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
# ===========================
|
| 19 |
+
# INFERENCE FUNCTION
|
| 20 |
+
# ===========================
|
| 21 |
+
def infer_reasonableness(
|
| 22 |
+
reference_summary: str,
|
| 23 |
+
generated_summary: str,
|
| 24 |
+
readability_level: str,
|
| 25 |
+
subclaim_text: str,
|
| 26 |
+
result: int,
|
| 27 |
+
):
|
| 28 |
+
"""
|
| 29 |
+
Predict reasonableness using the local vLLM server.
|
| 30 |
+
No error handling: validation or connection errors will raise exceptions.
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
# ---- Build inference prompt ----
|
| 34 |
+
prompt = f"""
|
| 35 |
+
You are an impartial medical summarization evaluator.
|
| 36 |
+
|
| 37 |
+
Goal:
|
| 38 |
+
Decide whether the inclusion or omission of ONE specific subclaim from the reference summary is *reasonable*, given the readability level of the generated summary.
|
| 39 |
+
|
| 40 |
+
Readability Criteria:
|
| 41 |
+
- Easy: for non-medical readers; emphasize main story and outcomes; omit numerical data, anatomy, and test details.
|
| 42 |
+
- Intermediate: for general educated readers; keep main findings but simplify phrasing.
|
| 43 |
+
- Hard: for clinical or technical readers; maintain diagnostic accuracy and essential quantitative or anatomic content.
|
| 44 |
+
|
| 45 |
+
Judging rules:
|
| 46 |
+
* Base your decision strictly on what appears in the generated summary.
|
| 47 |
+
* If result = 0 (subclaim omitted) and the omitted detail is clearly technical or numerical for the given level, choose "reasonable".
|
| 48 |
+
* If result = 0 and the subclaim is essential to the main story, choose "unreasonable".
|
| 49 |
+
* Stay consistent between `result`, justification, and readability level.
|
| 50 |
+
|
| 51 |
+
### Inputs
|
| 52 |
+
Readability Level: {readability_level}
|
| 53 |
+
Reference Summary: {reference_summary}
|
| 54 |
+
Generated Summary: {generated_summary}
|
| 55 |
+
Subclaim: "{subclaim_text}"
|
| 56 |
+
Result: {result} # 1 = supported (included), 0 = omitted
|
| 57 |
+
|
| 58 |
+
### Task
|
| 59 |
+
Respond **only** with the following JSON object:
|
| 60 |
+
|
| 61 |
+
{{
|
| 62 |
+
"reasonableness": "<reasonable | partially_reasonable | unreasonable>",
|
| 63 |
+
"justification": "<short clear explanation>"
|
| 64 |
+
}}
|
| 65 |
+
""".strip()
|
| 66 |
+
|
| 67 |
+
messages = [{"role": "user", "content": prompt}]
|
| 68 |
+
|
| 69 |
+
# ---- Call vLLM Server ----
|
| 70 |
+
response = client.chat.completions.create(
|
| 71 |
+
model=MODEL_NAME,
|
| 72 |
+
messages=messages,
|
| 73 |
+
temperature=0.2,
|
| 74 |
+
max_tokens=200,
|
| 75 |
+
top_p=0.8,
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
output_text = response.choices[0].message.content
|
| 79 |
+
|
| 80 |
+
# ---- Clean Output (Handle Thinking & Markdown) ----
|
| 81 |
+
try:
|
| 82 |
+
if "</think>" in output_text:
|
| 83 |
+
output_text = output_text.split("</think>")[1]
|
| 84 |
+
|
| 85 |
+
clean_text = output_text.strip().replace("```json", "").replace("```", "").strip()
|
| 86 |
+
# import ipdb; ipdb.set_trace()
|
| 87 |
+
t=ast.literal_eval(clean_text)
|
| 88 |
+
|
| 89 |
+
# ---- Parse JSON (Will raise JSONDecodeError if invalid) ----
|
| 90 |
+
return t
|
| 91 |
+
except Exception as e:
|
| 92 |
+
return output_text
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
# ===========================
|
| 96 |
+
# MAIN EXECUTION
|
| 97 |
+
# ===========================
|
| 98 |
+
if __name__ == "__main__":
|
| 99 |
+
import argparse
|
| 100 |
+
parser = argparse.ArgumentParser()
|
| 101 |
+
parser.add_argument("--data_path", type=str, required=True,
|
| 102 |
+
help="Path to the JSON file containing evaluation data.")
|
| 103 |
+
args = parser.parse_args()
|
| 104 |
+
data_path = args.data_path
|
| 105 |
+
# data_path = '/home/mshahidul/readctrl/data/concise_complete_attr_cal_v3/evaluated_metrics_0_100.json'
|
| 106 |
+
file_name=os.path.basename(data_path)
|
| 107 |
+
|
| 108 |
+
# Open file directly (Will raise FileNotFoundError if missing)
|
| 109 |
+
with open(data_path, 'r') as f:
|
| 110 |
+
dataset = json.load(f)
|
| 111 |
+
|
| 112 |
+
# print(f"Loaded {len(dataset)} examples. Starting inference...")
|
| 113 |
+
save_path = f'/home/mshahidul/readctrl/data/completeness_resoning_result/{file_name}'
|
| 114 |
+
full_results = []
|
| 115 |
+
if os.path.exists(save_path):
|
| 116 |
+
with open(save_path, 'r') as f:
|
| 117 |
+
full_results = json.load(f)
|
| 118 |
+
|
| 119 |
+
import tqdm
|
| 120 |
+
for item in tqdm.tqdm(dataset):
|
| 121 |
+
if any(d['id'] == item['id'] for d in full_results):
|
| 122 |
+
continue
|
| 123 |
+
reference_summary = item['summary']
|
| 124 |
+
temp2={}
|
| 125 |
+
for label in ['easy', 'intermediate', 'hard']:
|
| 126 |
+
generated_summary = item[f'{label}_text']
|
| 127 |
+
subclaim_list = item['metrics'][f'{label}']['completeness']['details']
|
| 128 |
+
temp=[]
|
| 129 |
+
for idx, subclaim in enumerate(subclaim_list):
|
| 130 |
+
|
| 131 |
+
# Check status (assumes subclaim variable holds the status string)
|
| 132 |
+
result = 1 if subclaim['label'] == 'supported' else 0
|
| 133 |
+
|
| 134 |
+
if result ==0:
|
| 135 |
+
output = infer_reasonableness(
|
| 136 |
+
reference_summary=reference_summary,
|
| 137 |
+
generated_summary=generated_summary,
|
| 138 |
+
readability_level=label,
|
| 139 |
+
subclaim_text=subclaim['subclaim'],
|
| 140 |
+
result=result,
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
temp.append({
|
| 144 |
+
'subclaim': subclaim['subclaim'],
|
| 145 |
+
'output': output
|
| 146 |
+
})
|
| 147 |
+
else:
|
| 148 |
+
temp.append({
|
| 149 |
+
'subclaim': subclaim['subclaim'],
|
| 150 |
+
'output': {
|
| 151 |
+
'reasonableness': 'reasonable',
|
| 152 |
+
'justification': 'The subclaim is included in the generated summary, hence it is reasonable.'
|
| 153 |
+
}
|
| 154 |
+
})
|
| 155 |
+
|
| 156 |
+
temp2[label] = {
|
| 157 |
+
'results': temp
|
| 158 |
+
}
|
| 159 |
+
full_results.append({
|
| 160 |
+
'id': item['id'],
|
| 161 |
+
'completeness': temp2
|
| 162 |
+
})
|
| 163 |
+
if len(full_results) % 10 == 0:
|
| 164 |
+
with open(save_path, 'w') as f:
|
| 165 |
+
json.dump(full_results, f, indent=2, ensure_ascii=False)
|
| 166 |
+
|
| 167 |
+
with open(save_path, 'w') as f:
|
| 168 |
+
json.dump(full_results, f, indent=2, ensure_ascii=False)
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
|
code/finetune-inference/old/extracting_subclaims.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import tqdm
|
| 4 |
+
import argparse
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
|
| 7 |
+
# -----------------------------
|
| 8 |
+
# CONFIGURATION
|
| 9 |
+
# -----------------------------
|
| 10 |
+
MODEL_NAME = "/home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims_BF16_merged"
|
| 11 |
+
API_URL = "http://localhost:8015/v1"
|
| 12 |
+
API_KEY = "EMPTY"
|
| 13 |
+
|
| 14 |
+
client = OpenAI(base_url=API_URL, api_key=API_KEY)
|
| 15 |
+
|
| 16 |
+
# -----------------------------
|
| 17 |
+
# SUBCLAIM EXTRACTION PROMPT
|
| 18 |
+
# -----------------------------
|
| 19 |
+
def extraction_prompt(medical_text: str) -> str:
|
| 20 |
+
return f"""
|
| 21 |
+
You are an expert medical annotator. Extract granular, factual subclaims.
|
| 22 |
+
A subclaim is the smallest standalone factual unit that can be independently verified.
|
| 23 |
+
|
| 24 |
+
Rules:
|
| 25 |
+
- Use only information explicitly present in the text.
|
| 26 |
+
- Do not infer or hallucinate.
|
| 27 |
+
- Subclaims must be atomic and factual.
|
| 28 |
+
- Return ONLY a JSON list of strings.
|
| 29 |
+
|
| 30 |
+
Medical Text:
|
| 31 |
+
{medical_text}
|
| 32 |
+
|
| 33 |
+
Return output as:
|
| 34 |
+
[
|
| 35 |
+
"subclaim 1",
|
| 36 |
+
"subclaim 2",
|
| 37 |
+
...
|
| 38 |
+
]
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
# -----------------------------
|
| 42 |
+
# INFERENCE FUNCTION
|
| 43 |
+
# -----------------------------
|
| 44 |
+
def infer_subclaims(medical_text: str, temperature: float = 0.2) -> list:
|
| 45 |
+
if not medical_text or medical_text.strip() == "":
|
| 46 |
+
return []
|
| 47 |
+
|
| 48 |
+
final_prompt = extraction_prompt(medical_text)
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
response = client.chat.completions.create(
|
| 52 |
+
model=MODEL_NAME,
|
| 53 |
+
messages=[{"role": "user", "content": final_prompt}],
|
| 54 |
+
max_tokens=1000,
|
| 55 |
+
temperature=temperature,
|
| 56 |
+
top_p=0.9,
|
| 57 |
+
)
|
| 58 |
+
res = response.choices[0].message.content.strip()
|
| 59 |
+
res = res.split("</think>")[-1].strip()
|
| 60 |
+
|
| 61 |
+
# try parse JSON
|
| 62 |
+
try:
|
| 63 |
+
return json.loads(res)
|
| 64 |
+
except:
|
| 65 |
+
return res
|
| 66 |
+
|
| 67 |
+
except Exception as e:
|
| 68 |
+
print(f"API error: {e}")
|
| 69 |
+
return []
|
| 70 |
+
|
| 71 |
+
# -----------------------------
|
| 72 |
+
# MAIN
|
| 73 |
+
# -----------------------------
|
| 74 |
+
if __name__ == "__main__":
|
| 75 |
+
parser = argparse.ArgumentParser()
|
| 76 |
+
parser.add_argument("--file1", type=str, required=True,
|
| 77 |
+
help="Path to synthetic_data_es_raw_592.json")
|
| 78 |
+
parser.add_argument("--file2", type=str, required=True,
|
| 79 |
+
help="Path to multiclinsum_gs_train_es.json")
|
| 80 |
+
|
| 81 |
+
parser.add_argument("--start_index", type=int, default=0,
|
| 82 |
+
help="Start index for processing")
|
| 83 |
+
parser.add_argument("--end_index", type=int, default=-1,
|
| 84 |
+
help="End index for processing (exclusive). -1 = until end")
|
| 85 |
+
|
| 86 |
+
args = parser.parse_args()
|
| 87 |
+
|
| 88 |
+
FILE1 = args.file1
|
| 89 |
+
FILE2 = args.file2
|
| 90 |
+
|
| 91 |
+
SAVE_FOLDER = "/home/mshahidul/readctrl/data/extracting_subclaim"
|
| 92 |
+
os.makedirs(SAVE_FOLDER, exist_ok=True)
|
| 93 |
+
|
| 94 |
+
# Output filename includes the range
|
| 95 |
+
OUTPUT_FILE = os.path.join(
|
| 96 |
+
SAVE_FOLDER,
|
| 97 |
+
f"extracted_subclaims_{args.start_index}_{args.end_index}.json"
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
# -----------------------------
|
| 101 |
+
# Load files
|
| 102 |
+
# -----------------------------
|
| 103 |
+
print("Loading input files...")
|
| 104 |
+
with open(FILE1, "r") as f:
|
| 105 |
+
file1_data = {x["id"]: x for x in json.load(f)}
|
| 106 |
+
|
| 107 |
+
with open(FILE2, "r") as f:
|
| 108 |
+
file2_data = {x["id"]: x for x in json.load(f)}
|
| 109 |
+
|
| 110 |
+
# -----------------------------
|
| 111 |
+
# Merge and slice by range
|
| 112 |
+
# -----------------------------
|
| 113 |
+
all_ids = sorted(list(set(file1_data.keys()) | set(file2_data.keys())))
|
| 114 |
+
|
| 115 |
+
total_items = len(all_ids)
|
| 116 |
+
|
| 117 |
+
start = args.start_index
|
| 118 |
+
end = args.end_index if args.end_index != -1 else total_items
|
| 119 |
+
|
| 120 |
+
slice_ids = all_ids[start:end]
|
| 121 |
+
|
| 122 |
+
print(f"Total IDs: {total_items}")
|
| 123 |
+
print(f"Processing range: {start} → {end} (count={len(slice_ids)})")
|
| 124 |
+
|
| 125 |
+
# -----------------------------
|
| 126 |
+
# Resume mode
|
| 127 |
+
# -----------------------------
|
| 128 |
+
result = []
|
| 129 |
+
if os.path.exists(OUTPUT_FILE):
|
| 130 |
+
try:
|
| 131 |
+
with open(OUTPUT_FILE, "r") as f:
|
| 132 |
+
result = json.load(f)
|
| 133 |
+
except:
|
| 134 |
+
result = []
|
| 135 |
+
|
| 136 |
+
existing_ids = {r["id"] for r in result}
|
| 137 |
+
|
| 138 |
+
# -----------------------------
|
| 139 |
+
# Process items
|
| 140 |
+
# -----------------------------
|
| 141 |
+
for _id in tqdm.tqdm(slice_ids):
|
| 142 |
+
|
| 143 |
+
if _id in existing_ids:
|
| 144 |
+
continue
|
| 145 |
+
|
| 146 |
+
# FILE1 text
|
| 147 |
+
easy_text = inter_text = hard_text = ""
|
| 148 |
+
if _id in file1_data:
|
| 149 |
+
rv = file1_data[_id]["readability_versions"]
|
| 150 |
+
easy_text = rv.get("easy", {}).get("text", "")
|
| 151 |
+
inter_text = rv.get("intermediate", {}).get("text", "")
|
| 152 |
+
hard_text = rv.get("hard", {}).get("text", "")
|
| 153 |
+
|
| 154 |
+
# FILE2 text
|
| 155 |
+
fulltext = summary = ""
|
| 156 |
+
if _id in file2_data:
|
| 157 |
+
fulltext = file2_data[_id].get("fulltext", "")
|
| 158 |
+
summary = file2_data[_id].get("summary", "")
|
| 159 |
+
|
| 160 |
+
# inference
|
| 161 |
+
easy_sub = infer_subclaims(easy_text)
|
| 162 |
+
inter_sub = infer_subclaims(inter_text)
|
| 163 |
+
hard_sub = infer_subclaims(hard_text)
|
| 164 |
+
fulltext_sub = infer_subclaims(fulltext)
|
| 165 |
+
summary_sub = infer_subclaims(summary)
|
| 166 |
+
|
| 167 |
+
# append
|
| 168 |
+
result.append({
|
| 169 |
+
"id": _id,
|
| 170 |
+
|
| 171 |
+
"easy_text": easy_text,
|
| 172 |
+
"easy_subclaims": easy_sub,
|
| 173 |
+
|
| 174 |
+
"intermediate_text": inter_text,
|
| 175 |
+
"intermediate_subclaims": inter_sub,
|
| 176 |
+
|
| 177 |
+
"hard_text": hard_text,
|
| 178 |
+
"hard_subclaims": hard_sub,
|
| 179 |
+
|
| 180 |
+
"fulltext": fulltext,
|
| 181 |
+
"fulltext_subclaims": fulltext_sub,
|
| 182 |
+
|
| 183 |
+
"summary": summary,
|
| 184 |
+
"summary_subclaims": summary_sub
|
| 185 |
+
})
|
| 186 |
+
|
| 187 |
+
# save frequently
|
| 188 |
+
if len(result) % 20 == 0:
|
| 189 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 190 |
+
json.dump(result, f, indent=4, ensure_ascii=False)
|
| 191 |
+
|
| 192 |
+
# final save
|
| 193 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 194 |
+
json.dump(result, f, indent=4, ensure_ascii=False)
|
| 195 |
+
|
| 196 |
+
print(f"Done! Saved to: {OUTPUT_FILE}")
|
code/finetune-inference/old/extracting_subclaims_v2.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import tqdm
|
| 4 |
+
import argparse
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
|
| 7 |
+
# -----------------------------
|
| 8 |
+
# CONFIGURATION
|
| 9 |
+
# -----------------------------
|
| 10 |
+
MODEL_NAME = "/home/mshahidul/readctrl_model/qwen3-32B_subclaims-extraction-8b_ctx"
|
| 11 |
+
API_URL = "http://localhost:8004/v1"
|
| 12 |
+
API_KEY = "EMPTY"
|
| 13 |
+
|
| 14 |
+
client = OpenAI(base_url=API_URL, api_key=API_KEY)
|
| 15 |
+
|
| 16 |
+
# -----------------------------
|
| 17 |
+
# SUBCLAIM EXTRACTION PROMPT
|
| 18 |
+
# -----------------------------
|
| 19 |
+
def extraction_prompt(medical_text: str) -> str:
|
| 20 |
+
return f"""
|
| 21 |
+
You are an expert medical annotator. Extract granular, factual subclaims.
|
| 22 |
+
A subclaim is the smallest standalone factual unit that can be independently verified.
|
| 23 |
+
|
| 24 |
+
Instructions:
|
| 25 |
+
1. Read the provided medical text.
|
| 26 |
+
2. Break it into clear, objective, atomic subclaims.
|
| 27 |
+
3. Each subclaim must come directly from the text.
|
| 28 |
+
4. Do not add, guess, or infer information.
|
| 29 |
+
5. Each subclaim should be short, specific, and verifiable.
|
| 30 |
+
6. Return ONLY a Python-style list of strings.
|
| 31 |
+
|
| 32 |
+
Medical Text:
|
| 33 |
+
{medical_text}
|
| 34 |
+
|
| 35 |
+
Return output as:
|
| 36 |
+
[
|
| 37 |
+
"subclaim 1",
|
| 38 |
+
"subclaim 2",
|
| 39 |
+
...
|
| 40 |
+
]
|
| 41 |
+
"""
|
| 42 |
+
|
| 43 |
+
# -----------------------------
|
| 44 |
+
# INFERENCE FUNCTION
|
| 45 |
+
# -----------------------------
|
| 46 |
+
def infer_subclaims(medical_text: str, temperature: float = 0.2) -> list:
|
| 47 |
+
if not medical_text or medical_text.strip() == "":
|
| 48 |
+
return []
|
| 49 |
+
|
| 50 |
+
final_prompt = extraction_prompt(medical_text)
|
| 51 |
+
|
| 52 |
+
try:
|
| 53 |
+
response = client.chat.completions.create(
|
| 54 |
+
model=MODEL_NAME,
|
| 55 |
+
messages=[{"role": "user", "content": final_prompt}],
|
| 56 |
+
max_tokens=1000,
|
| 57 |
+
temperature=temperature,
|
| 58 |
+
top_p=0.9,
|
| 59 |
+
)
|
| 60 |
+
res = response.choices[0].message.content.strip()
|
| 61 |
+
|
| 62 |
+
# Handle cases where the model might include <think> tags or markdown code blocks
|
| 63 |
+
if "</think>" in res:
|
| 64 |
+
res = res.split("</think>")[-1].strip()
|
| 65 |
+
|
| 66 |
+
if res.startswith("```json"):
|
| 67 |
+
res = res.replace("```json", "").replace("```", "").strip()
|
| 68 |
+
|
| 69 |
+
try:
|
| 70 |
+
return json.loads(res)
|
| 71 |
+
except:
|
| 72 |
+
# Fallback if JSON parsing fails but some text is returned
|
| 73 |
+
return [res]
|
| 74 |
+
|
| 75 |
+
except Exception as e:
|
| 76 |
+
print(f"API error for text snippet: {e}")
|
| 77 |
+
return []
|
| 78 |
+
|
| 79 |
+
# -----------------------------
|
| 80 |
+
# MAIN
|
| 81 |
+
# -----------------------------
|
| 82 |
+
if __name__ == "__main__":
|
| 83 |
+
parser = argparse.ArgumentParser()
|
| 84 |
+
parser.add_argument("--input_file", type=str,
|
| 85 |
+
default="/home/mshahidul/readctrl/data/classified_readability/classified_multiclinsum_test_en.json",
|
| 86 |
+
help="Path to input JSON file")
|
| 87 |
+
parser.add_argument("--start_index", type=int, default=0,
|
| 88 |
+
help="Start index for processing")
|
| 89 |
+
parser.add_argument("--end_index", type=int, default=-1,
|
| 90 |
+
help="End index for processing (exclusive). -1 = until end")
|
| 91 |
+
|
| 92 |
+
args = parser.parse_args()
|
| 93 |
+
|
| 94 |
+
SAVE_FOLDER = "/home/mshahidul/readctrl/data/extracting_subclaim"
|
| 95 |
+
os.makedirs(SAVE_FOLDER, exist_ok=True)
|
| 96 |
+
|
| 97 |
+
# Output filename based on the source and range
|
| 98 |
+
base_name = os.path.basename(args.input_file).replace(".json", "")
|
| 99 |
+
OUTPUT_FILE = os.path.join(
|
| 100 |
+
SAVE_FOLDER,
|
| 101 |
+
f"subclaims_{base_name}_{args.start_index}_{args.end_index}.json"
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
# -----------------------------
|
| 105 |
+
# Load data
|
| 106 |
+
# -----------------------------
|
| 107 |
+
print(f"Loading {args.input_file}...")
|
| 108 |
+
with open(args.input_file, "r") as f:
|
| 109 |
+
data = json.load(f)
|
| 110 |
+
|
| 111 |
+
total_items = len(data)
|
| 112 |
+
start = args.start_index
|
| 113 |
+
end = args.end_index if args.end_index != -1 else total_items
|
| 114 |
+
|
| 115 |
+
# Slice the data based on arguments
|
| 116 |
+
work_items = data[start:end]
|
| 117 |
+
|
| 118 |
+
print(f"Total records in file: {total_items}")
|
| 119 |
+
print(f"Processing range: {start} → {end} (count={len(work_items)})")
|
| 120 |
+
|
| 121 |
+
# -----------------------------
|
| 122 |
+
# Resume mode
|
| 123 |
+
# -----------------------------
|
| 124 |
+
result = []
|
| 125 |
+
if os.path.exists(OUTPUT_FILE):
|
| 126 |
+
try:
|
| 127 |
+
with open(OUTPUT_FILE, "r") as f:
|
| 128 |
+
result = json.load(f)
|
| 129 |
+
print(f"Resuming from existing file. {len(result)} items already processed.")
|
| 130 |
+
except:
|
| 131 |
+
result = []
|
| 132 |
+
|
| 133 |
+
existing_ids = {r["id"] for r in result}
|
| 134 |
+
|
| 135 |
+
# -----------------------------
|
| 136 |
+
# Process items
|
| 137 |
+
# -----------------------------
|
| 138 |
+
for item in tqdm.tqdm(work_items):
|
| 139 |
+
_id = item.get("id")
|
| 140 |
+
|
| 141 |
+
if _id in existing_ids:
|
| 142 |
+
continue
|
| 143 |
+
|
| 144 |
+
fulltext = item.get("fulltext", "")
|
| 145 |
+
summary = item.get("summary", "")
|
| 146 |
+
|
| 147 |
+
# Run inference for both fields
|
| 148 |
+
fulltext_sub = infer_subclaims(fulltext)
|
| 149 |
+
summary_sub = infer_subclaims(summary)
|
| 150 |
+
|
| 151 |
+
# Build output object
|
| 152 |
+
result.append({
|
| 153 |
+
"id": _id,
|
| 154 |
+
"fulltext": fulltext,
|
| 155 |
+
"fulltext_subclaims": fulltext_sub,
|
| 156 |
+
"summary": summary,
|
| 157 |
+
"summary_subclaims": summary_sub,
|
| 158 |
+
"readability_score": item.get("readability_score", None)
|
| 159 |
+
})
|
| 160 |
+
|
| 161 |
+
# Periodic save to prevent data loss
|
| 162 |
+
if len(result) % 10 == 0:
|
| 163 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 164 |
+
json.dump(result, f, indent=4, ensure_ascii=False)
|
| 165 |
+
|
| 166 |
+
# Final save
|
| 167 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 168 |
+
json.dump(result, f, indent=4, ensure_ascii=False)
|
| 169 |
+
|
| 170 |
+
print(f"Success! Results saved to: {OUTPUT_FILE}")
|
code/finetune-inference/old/extracting_subclaims_v3.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import tqdm
|
| 4 |
+
import argparse
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
|
| 7 |
+
# -----------------------------
|
| 8 |
+
# CONFIGURATION
|
| 9 |
+
# -----------------------------
|
| 10 |
+
MODEL_NAME = "/home/mshahidul/readctrl_model/qwen3-32B_subclaims-extraction-8b_ctx"
|
| 11 |
+
API_URL = "http://localhost:8004/v1"
|
| 12 |
+
API_KEY = "EMPTY"
|
| 13 |
+
|
| 14 |
+
client = OpenAI(base_url=API_URL, api_key=API_KEY)
|
| 15 |
+
|
| 16 |
+
# -----------------------------
|
| 17 |
+
# SUBCLAIM EXTRACTION PROMPT
|
| 18 |
+
# -----------------------------
|
| 19 |
+
def extraction_prompt(medical_text: str) -> str:
|
| 20 |
+
return f"""
|
| 21 |
+
You are an expert medical annotator. Extract granular, factual subclaims.
|
| 22 |
+
A subclaim is the smallest standalone factual unit that can be independently verified.
|
| 23 |
+
|
| 24 |
+
Instructions:
|
| 25 |
+
1. Read the provided medical text.
|
| 26 |
+
2. Break it into clear, objective, atomic subclaims.
|
| 27 |
+
3. Each subclaim must come directly from the text.
|
| 28 |
+
4. Do not add, guess, or infer information.
|
| 29 |
+
5. Each subclaim should be short, specific, and verifiable.
|
| 30 |
+
6. Return ONLY a Python-style list of strings.
|
| 31 |
+
|
| 32 |
+
Medical Text:
|
| 33 |
+
{medical_text}
|
| 34 |
+
|
| 35 |
+
Return output as:
|
| 36 |
+
[
|
| 37 |
+
"subclaim 1",
|
| 38 |
+
"subclaim 2",
|
| 39 |
+
...
|
| 40 |
+
]
|
| 41 |
+
"""
|
| 42 |
+
|
| 43 |
+
# -----------------------------
|
| 44 |
+
# INFERENCE FUNCTION
|
| 45 |
+
# -----------------------------
|
| 46 |
+
def infer_subclaims(medical_text: str, temperature: float = 0.2) -> list:
|
| 47 |
+
if not medical_text or medical_text.strip() == "":
|
| 48 |
+
return []
|
| 49 |
+
|
| 50 |
+
final_prompt = extraction_prompt(medical_text)
|
| 51 |
+
|
| 52 |
+
try:
|
| 53 |
+
response = client.chat.completions.create(
|
| 54 |
+
model=MODEL_NAME,
|
| 55 |
+
messages=[{"role": "user", "content": final_prompt}],
|
| 56 |
+
max_tokens=1000,
|
| 57 |
+
temperature=temperature,
|
| 58 |
+
top_p=0.9,
|
| 59 |
+
)
|
| 60 |
+
res = response.choices[0].message.content.strip()
|
| 61 |
+
|
| 62 |
+
# Handle cases where the model might include <think> tags or markdown code blocks
|
| 63 |
+
if "</think>" in res:
|
| 64 |
+
res = res.split("</think>")[-1].strip()
|
| 65 |
+
|
| 66 |
+
if res.startswith("```json"):
|
| 67 |
+
res = res.replace("```json", "").replace("```", "").strip()
|
| 68 |
+
|
| 69 |
+
try:
|
| 70 |
+
return json.loads(res)
|
| 71 |
+
except:
|
| 72 |
+
# Fallback if JSON parsing fails but some text is returned
|
| 73 |
+
return [res]
|
| 74 |
+
|
| 75 |
+
except Exception as e:
|
| 76 |
+
print(f"API error for text snippet: {e}")
|
| 77 |
+
return []
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
# ... (Configuration and extraction_prompt remain the same) ...
|
| 81 |
+
|
| 82 |
+
# -----------------------------
|
| 83 |
+
# MAIN
|
| 84 |
+
# -----------------------------
|
| 85 |
+
if __name__ == "__main__":
|
| 86 |
+
parser = argparse.ArgumentParser()
|
| 87 |
+
parser.add_argument("--input_file", type=str,
|
| 88 |
+
default="/home/mshahidul/readctrl/data/synthetic_dataset_diff_labels/syn_data_with_gs_summary_en.json",
|
| 89 |
+
help="Path to input JSON file")
|
| 90 |
+
parser.add_argument("--start_index", type=int, default=0,
|
| 91 |
+
help="Start index for processing")
|
| 92 |
+
parser.add_argument("--end_index", type=int, default=-1,
|
| 93 |
+
help="End index for processing (exclusive). -1 = until end")
|
| 94 |
+
|
| 95 |
+
args = parser.parse_args()
|
| 96 |
+
|
| 97 |
+
SAVE_FOLDER = "/home/mshahidul/readctrl/data/extracting_subclaim"
|
| 98 |
+
os.makedirs(SAVE_FOLDER, exist_ok=True)
|
| 99 |
+
|
| 100 |
+
base_name = os.path.basename(args.input_file).replace(".json", "")
|
| 101 |
+
OUTPUT_FILE = os.path.join(
|
| 102 |
+
SAVE_FOLDER,
|
| 103 |
+
f"subclaims_with_generated_{base_name}_{args.start_index}_{args.end_index}.json"
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
print(f"Loading {args.input_file}...")
|
| 107 |
+
with open(args.input_file, "r") as f:
|
| 108 |
+
data = json.load(f)
|
| 109 |
+
|
| 110 |
+
total_items = len(data)
|
| 111 |
+
start = args.start_index
|
| 112 |
+
end = args.end_index if args.end_index != -1 else total_items
|
| 113 |
+
work_items = data[start:end]
|
| 114 |
+
|
| 115 |
+
result = []
|
| 116 |
+
if os.path.exists(OUTPUT_FILE):
|
| 117 |
+
try:
|
| 118 |
+
with open(OUTPUT_FILE, "r") as f:
|
| 119 |
+
result = json.load(f)
|
| 120 |
+
print(f"Resuming. {len(result)} items already processed.")
|
| 121 |
+
except:
|
| 122 |
+
result = []
|
| 123 |
+
|
| 124 |
+
# Using "index" or "id" as the unique identifier based on your JSON snippet
|
| 125 |
+
existing_ids = {r.get("index") or r.get("id") for r in result}
|
| 126 |
+
|
| 127 |
+
for item in tqdm.tqdm(work_items):
|
| 128 |
+
# Handle different ID key names
|
| 129 |
+
curr_id = item.get("index") if item.get("index") is not None else item.get("id")
|
| 130 |
+
|
| 131 |
+
if curr_id in existing_ids:
|
| 132 |
+
continue
|
| 133 |
+
|
| 134 |
+
# 1. Process standard fields
|
| 135 |
+
fulltext = item.get("fulltext", "")
|
| 136 |
+
summary = item.get("summary", "")
|
| 137 |
+
|
| 138 |
+
fulltext_sub = infer_subclaims(fulltext)
|
| 139 |
+
summary_sub = infer_subclaims(summary)
|
| 140 |
+
|
| 141 |
+
# 2. Process all generated texts (diff_label_texts)
|
| 142 |
+
# We will create a mirror dictionary to store the subclaims
|
| 143 |
+
diff_label_subclaims = {}
|
| 144 |
+
generated_texts = item.get("diff_label_texts", {})
|
| 145 |
+
|
| 146 |
+
for label, text in generated_texts.items():
|
| 147 |
+
if text:
|
| 148 |
+
diff_label_subclaims[label] = infer_subclaims(text)
|
| 149 |
+
else:
|
| 150 |
+
diff_label_subclaims[label] = []
|
| 151 |
+
|
| 152 |
+
# 3. Build output object
|
| 153 |
+
output_item = {
|
| 154 |
+
"index": curr_id,
|
| 155 |
+
"fulltext": fulltext,
|
| 156 |
+
"fulltext_subclaims": fulltext_sub,
|
| 157 |
+
"summary": summary,
|
| 158 |
+
"summary_subclaims": summary_sub,
|
| 159 |
+
"diff_label_texts": generated_texts,
|
| 160 |
+
"diff_label_subclaims": diff_label_subclaims, # New field
|
| 161 |
+
"readability_score": item.get("readability_score", None)
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
result.append(output_item)
|
| 165 |
+
|
| 166 |
+
# Periodic save
|
| 167 |
+
if len(result) % 10 == 0:
|
| 168 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 169 |
+
json.dump(result, f, indent=4, ensure_ascii=False)
|
| 170 |
+
|
| 171 |
+
# Final save
|
| 172 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 173 |
+
json.dump(result, f, indent=4, ensure_ascii=False)
|
| 174 |
+
|
| 175 |
+
print(f"Success! Results saved to: {OUTPUT_FILE}")
|
code/finetune-inference/old/inference.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
import sys
|
| 5 |
+
sys.path.append(os.path.abspath('/home/mshahidul/'))
|
| 6 |
+
from gpu_selection import _gpu_selection_
|
| 7 |
+
# 1. Argparse for path
|
| 8 |
+
parser = argparse.ArgumentParser(description="Translation Evaluation")
|
| 9 |
+
parser.add_argument("--path", type=str, default="/home/mshahidul/readctrl/generating_data/tik_ache/es_syntheticV3.json", help="Path to the JSON file")
|
| 10 |
+
parser.add_argument("--cuda", type=str, default="3", help="CUDA device id, e.g., '0' or '0,1' for multiple GPUs")
|
| 11 |
+
args = parser.parse_args()
|
| 12 |
+
|
| 13 |
+
if args.cuda is not None:
|
| 14 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda
|
| 15 |
+
print(f"🎮🎮 Using CUDA device: {args.cuda}")
|
| 16 |
+
else:
|
| 17 |
+
_gpu_selection_()
|
| 18 |
+
|
| 19 |
+
# 2. Output directory and file
|
| 20 |
+
out_dir = "/home/mshahidul/readctrl/results/"
|
| 21 |
+
os.makedirs(os.path.dirname(out_dir), exist_ok=True)
|
| 22 |
+
file_name = os.path.basename(args.path)
|
| 23 |
+
out_path = os.path.join(out_dir, file_name)
|
| 24 |
+
|
| 25 |
+
# 3. Load already evaluated results if exist
|
| 26 |
+
results = []
|
| 27 |
+
completed_keys = set()
|
| 28 |
+
if os.path.exists(out_path):
|
| 29 |
+
with open(out_path, "r", encoding="utf-8") as f:
|
| 30 |
+
results = json.load(f)
|
| 31 |
+
for r in results:
|
| 32 |
+
completed_keys.add((r["article"], r["gold_summary"]))
|
| 33 |
+
|
| 34 |
+
# 4. Load dataset
|
| 35 |
+
with open(args.path, "r", encoding="utf-8") as f:
|
| 36 |
+
dataset = json.load(f)
|
| 37 |
+
from unsloth import FastLanguageModel
|
| 38 |
+
import torch
|
| 39 |
+
# 5. Load model
|
| 40 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 41 |
+
model_name = "/home/mshahidul/readctrl/finetuned_models/es_synthetic_data_creation_Qwen3_14B_v1",
|
| 42 |
+
max_seq_length = 4092,
|
| 43 |
+
load_in_4bit = True,
|
| 44 |
+
load_in_8bit = False,
|
| 45 |
+
full_finetuning = False,
|
| 46 |
+
)
|
| 47 |
+
from prompt_generate import generate_prompt
|
| 48 |
+
# 6. Evaluation loop
|
| 49 |
+
import tqdm
|
| 50 |
+
for item in tqdm.tqdm(dataset):
|
| 51 |
+
key = (item["article"], item["gold_summary"])
|
| 52 |
+
if key in completed_keys:
|
| 53 |
+
continue
|
| 54 |
+
|
| 55 |
+
for band in ["B1", "B2", "B3"]:
|
| 56 |
+
prompt = generate_prompt(item['article'],item['gold_summary'],band,"es")
|
| 57 |
+
|
| 58 |
+
messages = [{"role": "user", "content": prompt+"\n"}]
|
| 59 |
+
text = tokenizer.apply_chat_template(
|
| 60 |
+
messages,
|
| 61 |
+
tokenize=False,
|
| 62 |
+
add_generation_prompt=True,
|
| 63 |
+
enable_thinking=False,
|
| 64 |
+
)
|
| 65 |
+
inputs = tokenizer(text, return_tensors="pt").to("cuda")
|
| 66 |
+
output_ids = model.generate(
|
| 67 |
+
**inputs,
|
| 68 |
+
max_new_tokens=1000,
|
| 69 |
+
temperature=0.1,
|
| 70 |
+
top_p=0.8,
|
| 71 |
+
top_k=5,
|
| 72 |
+
)
|
| 73 |
+
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 74 |
+
#answer = output_text.split("</think>")[1].strip()
|
| 75 |
+
|
| 76 |
+
results.append({
|
| 77 |
+
"article": item["article"],
|
| 78 |
+
"gold_summary": item["gold_summary"],
|
| 79 |
+
"band": band,
|
| 80 |
+
"lang": "es",
|
| 81 |
+
"synthetic_summary": output_text,
|
| 82 |
+
})
|
| 83 |
+
completed_keys.add(key)
|
| 84 |
+
# Save every 30 results
|
| 85 |
+
if len(results) % 30 == 0:
|
| 86 |
+
with open(out_path, "w", encoding="utf-8") as f:
|
| 87 |
+
json.dump(results, f, ensure_ascii=False, indent=2)
|
| 88 |
+
|
| 89 |
+
# 7. Final save
|
| 90 |
+
with open(out_path, "w", encoding="utf-8") as f:
|
| 91 |
+
json.dump(results, f, ensure_ascii=False, indent=2)
|
code/finetune-inference/old/inferenceV2_without_context.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
import sys
|
| 5 |
+
sys.path.append(os.path.abspath('/home/mshahidul/'))
|
| 6 |
+
from gpu_selection import _gpu_selection_
|
| 7 |
+
# 1. Argparse for path
|
| 8 |
+
parser = argparse.ArgumentParser(description="Translation Evaluation")
|
| 9 |
+
# parser.add_argument("--out_path", type=str, default="/home/mshahidul/readctrl/generating_data/tik_ache/es_syntheticV3.json", help="Path to the JSON file")
|
| 10 |
+
parser.add_argument("--cuda", type=str, default="3", help="CUDA device id, e.g., '0' or '0,1' for multiple GPUs")
|
| 11 |
+
parser.add_argument("--model_name", type=str, default="/home/mshahidul/readctrl/finetuned_models/es_synthetic_data_creation_Qwen3_14B_v2", help="Path to the finetuned model")
|
| 12 |
+
parser.add_argument("--temperature", type=float, default=0.1, help="Generation temperature")
|
| 13 |
+
args = parser.parse_args()
|
| 14 |
+
# out_path = args.out_path
|
| 15 |
+
model_name = args.model_name
|
| 16 |
+
temperature = args.temperature
|
| 17 |
+
if args.cuda is not None:
|
| 18 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda
|
| 19 |
+
print(f"🎮🎮 Using CUDA device: {args.cuda}")
|
| 20 |
+
else:
|
| 21 |
+
_gpu_selection_()
|
| 22 |
+
|
| 23 |
+
prompts={
|
| 24 |
+
"easy":'''
|
| 25 |
+
You are an assistant that rewrites Spanish texts to make them very simple and easy to understand.
|
| 26 |
+
Your goal is to rewrite the provided input text for younger readers (Fernández Huerta 70–100; grade 5–7).
|
| 27 |
+
Use short sentences, simple words, and friendly tone. Avoid technical or complex expressions.
|
| 28 |
+
Keep all important factual details, but remove jargon.
|
| 29 |
+
Return only the rewritten text without commentary.
|
| 30 |
+
''',
|
| 31 |
+
|
| 32 |
+
'intermediate':'''
|
| 33 |
+
You are an assistant specialized in rewriting Spanish texts with medium readability.
|
| 34 |
+
Your task is to rewrite the provided input text for general or high‑school‑level readers (Fernández Huerta 50–70; grade 8–12).
|
| 35 |
+
Use clear and complete sentences, moderately complex vocabulary, and structured narration.
|
| 36 |
+
Retain all relevant medical or factual information, but phrase it in accessible language.
|
| 37 |
+
Return only the rewritten text with no explanations.
|
| 38 |
+
''',
|
| 39 |
+
|
| 40 |
+
'hard':'''
|
| 41 |
+
You are an assistant that rewrites Spanish medical texts with professional, technical precision.
|
| 42 |
+
Rewrite the following input text using specialized, academic terminology and information‑dense phrasing.
|
| 43 |
+
The output must target a Fernández Huerta readability index between 0 and 50 (university/professional level).
|
| 44 |
+
Use clinical vocabulary, formal register, and detailed description of pathophysiology, procedures, and findings.
|
| 45 |
+
Return only the rewritten text.
|
| 46 |
+
'''
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
# 2. Output directory and file
|
| 50 |
+
path="/home/mshahidul/readctrl/data/testing_data/multiclinsum_test_es.json"
|
| 51 |
+
out_dir = "/home/mshahidul/readctrl/results/v2_without_context"
|
| 52 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 53 |
+
# file_name = os.path.basename(path)
|
| 54 |
+
# out_path = os.path.join(out_dir, file_name.replace(".json", "_V2.json"))
|
| 55 |
+
# os.makedirs(os.path.dirname(out_dir), exist_ok=True)
|
| 56 |
+
if os.path.exists(model_name):
|
| 57 |
+
out_path = out_dir + f"/temp{temperature}_qwen3-14B_finetuned.json"
|
| 58 |
+
else:
|
| 59 |
+
out_path = out_dir + f"/temp{temperature}_qwen3-14B_base.json"
|
| 60 |
+
# 3. Load already evaluated results if exist
|
| 61 |
+
results = []
|
| 62 |
+
completed_keys = set()
|
| 63 |
+
if os.path.exists(out_path):
|
| 64 |
+
with open(out_path, "r", encoding="utf-8") as f:
|
| 65 |
+
results = json.load(f)
|
| 66 |
+
for r in results:
|
| 67 |
+
completed_keys.add(r["fulltext"])
|
| 68 |
+
|
| 69 |
+
# 4. Load dataset
|
| 70 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 71 |
+
dataset = json.load(f)
|
| 72 |
+
dataset=dataset[0:50]
|
| 73 |
+
from unsloth import FastLanguageModel
|
| 74 |
+
import torch
|
| 75 |
+
# 5. Load model
|
| 76 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 77 |
+
model_name = model_name,
|
| 78 |
+
max_seq_length = 4092,
|
| 79 |
+
load_in_4bit = False,
|
| 80 |
+
load_in_8bit = False,
|
| 81 |
+
full_finetuning = False,
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
import tqdm
|
| 85 |
+
for item in tqdm.tqdm(dataset):
|
| 86 |
+
key = item["fulltext"]
|
| 87 |
+
if key in completed_keys:
|
| 88 |
+
continue
|
| 89 |
+
|
| 90 |
+
for band in ["easy", "intermediate", "hard"]:
|
| 91 |
+
prompt = prompts[band]+'\n\n'+"Input text:\n"+item['fulltext']
|
| 92 |
+
|
| 93 |
+
# messages = [{"role": "user", "content": prompt+"\n"}]
|
| 94 |
+
messages = [
|
| 95 |
+
{"role": "system", "content": prompts[band].strip()},
|
| 96 |
+
{"role": "user", "content": "Input text:\n" + item["fulltext"].strip()}
|
| 97 |
+
]
|
| 98 |
+
text = tokenizer.apply_chat_template(
|
| 99 |
+
messages,
|
| 100 |
+
tokenize=False,
|
| 101 |
+
add_generation_prompt=True,
|
| 102 |
+
enable_thinking=False,
|
| 103 |
+
)
|
| 104 |
+
# input_ids = tokenizer(item["fulltext"], return_tensors="pt").input_ids
|
| 105 |
+
# input_len = input_ids.shape[1]
|
| 106 |
+
inputs = tokenizer(text, return_tensors="pt").to("cuda")
|
| 107 |
+
input_len = inputs.input_ids.shape[1]
|
| 108 |
+
# Define proportional multipliers for each readability level
|
| 109 |
+
length_factors = {"easy": 0.5, "intermediate": 0.8, "hard": 1.1}
|
| 110 |
+
|
| 111 |
+
# Compute adaptive max_new_tokens
|
| 112 |
+
max_new_tokens = int(min(1200, max(150, input_len * length_factors[band])))
|
| 113 |
+
output_ids = model.generate(
|
| 114 |
+
**inputs,
|
| 115 |
+
max_new_tokens=max_new_tokens,
|
| 116 |
+
temperature=temperature,
|
| 117 |
+
top_p=0.9,
|
| 118 |
+
top_k=45,
|
| 119 |
+
)
|
| 120 |
+
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 121 |
+
#answer = output_text.split("</think>")[1].strip()
|
| 122 |
+
|
| 123 |
+
results.append({
|
| 124 |
+
"fulltext": item["fulltext"],
|
| 125 |
+
"band": band,
|
| 126 |
+
"lang": "es",
|
| 127 |
+
"synthetic_summary": output_text,
|
| 128 |
+
})
|
| 129 |
+
completed_keys.add(key)
|
| 130 |
+
# Save every 10 results
|
| 131 |
+
if len(results) % 3 == 0:
|
| 132 |
+
with open(out_path, "w", encoding="utf-8") as f:
|
| 133 |
+
json.dump(results, f, ensure_ascii=False, indent=2)
|
| 134 |
+
|
| 135 |
+
# 7. Final save
|
| 136 |
+
with open(out_path, "w", encoding="utf-8") as f:
|
| 137 |
+
json.dump(results, f, ensure_ascii=False, indent=2)
|
code/finetune-inference/old/inferenceV3.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
import sys
|
| 5 |
+
sys.path.append(os.path.abspath('/home/mshahidul/'))
|
| 6 |
+
from gpu_selection import _gpu_selection_
|
| 7 |
+
|
| 8 |
+
parser = argparse.ArgumentParser(description="Readability Controlled Generation")
|
| 9 |
+
parser.add_argument("--cuda", type=str, default="3")
|
| 10 |
+
parser.add_argument("--model_name", type=str, default="/home/mshahidul/readctrl/finetuned_models/es_synthetic_data_creation_Qwen3_14B_v2")
|
| 11 |
+
parser.add_argument("--temperature", type=float, default=0.1)
|
| 12 |
+
args = parser.parse_args()
|
| 13 |
+
|
| 14 |
+
model_name = args.model_name
|
| 15 |
+
temperature = args.temperature
|
| 16 |
+
|
| 17 |
+
if args.cuda is not None:
|
| 18 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda
|
| 19 |
+
print(f"🎮🎮 Using CUDA device: {args.cuda}")
|
| 20 |
+
else:
|
| 21 |
+
_gpu_selection_()
|
| 22 |
+
|
| 23 |
+
prompts = {
|
| 24 |
+
"easy": '''
|
| 25 |
+
You are an assistant that rewrites Spanish texts to make them very simple and easy to understand.
|
| 26 |
+
Your goal is to rewrite the provided input text for younger readers (Fernández Huerta 70–100; grade 5–7).
|
| 27 |
+
Use short sentences, simple words, and friendly tone. Avoid technical or complex expressions.
|
| 28 |
+
Keep all important factual details, but remove jargon.
|
| 29 |
+
Return only the rewritten text without commentary.
|
| 30 |
+
''',
|
| 31 |
+
"intermediate": '''
|
| 32 |
+
You are an assistant specialized in rewriting Spanish texts with medium readability.
|
| 33 |
+
Your task is to rewrite the provided input text for general or high‑school‑level readers (Fernández Huerta 50–70; grade 8–12).
|
| 34 |
+
Use clear and complete sentences, moderately complex vocabulary, and structured narration.
|
| 35 |
+
Retain all relevant medical or factual information, but phrase it in accessible language.
|
| 36 |
+
Return only the rewritten text with no explanations.
|
| 37 |
+
''',
|
| 38 |
+
"hard": '''
|
| 39 |
+
You are an assistant that rewrites Spanish medical texts with professional, technical precision.
|
| 40 |
+
Rewrite the following input text using specialized, academic terminology and information‑dense phrasing.
|
| 41 |
+
The output must target a Fernández Huerta readability index between 0 and 50 (university/professional level).
|
| 42 |
+
Use clinical vocabulary, formal register, and detailed description of pathophysiology, procedures, and findings.
|
| 43 |
+
Return only the rewritten text.
|
| 44 |
+
'''
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
# -------- New Part: Load keyword–definition dataset ----------
|
| 48 |
+
kw_file = "/home/mshahidul/readctrl/data/kyw_def_train/kyw_gen_gpt5.json"
|
| 49 |
+
with open(kw_file, "r", encoding="utf-8") as f:
|
| 50 |
+
definitions_data = json.load(f)
|
| 51 |
+
|
| 52 |
+
# Build quick lookup: id -> glossary text
|
| 53 |
+
def_map = {}
|
| 54 |
+
for obj in definitions_data:
|
| 55 |
+
cid = obj.get("id")
|
| 56 |
+
kwlist = obj.get("medical_keywords", [])
|
| 57 |
+
defs_str = ""
|
| 58 |
+
if kwlist:
|
| 59 |
+
defs_lines = [f"• {d['term']} — {d['definition']}" for d in kwlist]
|
| 60 |
+
defs_str = "Relevant medical definitions:\n" + "\n".join(defs_lines)
|
| 61 |
+
def_map[cid] = defs_str
|
| 62 |
+
# --------------------------------------------------------------
|
| 63 |
+
|
| 64 |
+
path = "/home/mshahidul/readctrl/data/testing_data/multiclinsum_test_es.json"
|
| 65 |
+
out_dir = "/home/mshahidul/readctrl/results/v3_context"
|
| 66 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 67 |
+
|
| 68 |
+
if os.path.exists(model_name):
|
| 69 |
+
out_path = out_dir + f"/temp{temperature}_qwen3-14B_finetuned_with_defs.json"
|
| 70 |
+
else:
|
| 71 |
+
out_path = out_dir + f"/temp{temperature}_qwen3-14B_base_with_defs.json"
|
| 72 |
+
|
| 73 |
+
results, completed_keys = [], set()
|
| 74 |
+
if os.path.exists(out_path):
|
| 75 |
+
with open(out_path, "r", encoding="utf-8") as f:
|
| 76 |
+
results = json.load(f)
|
| 77 |
+
for r in results:
|
| 78 |
+
completed_keys.add(r["fulltext"])
|
| 79 |
+
|
| 80 |
+
# -------- Load main dataset -----------
|
| 81 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 82 |
+
dataset = json.load(f)
|
| 83 |
+
dataset = dataset[0:50]
|
| 84 |
+
|
| 85 |
+
from unsloth import FastLanguageModel
|
| 86 |
+
import torch
|
| 87 |
+
|
| 88 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 89 |
+
model_name=model_name,
|
| 90 |
+
max_seq_length=4092,
|
| 91 |
+
load_in_4bit=False,
|
| 92 |
+
load_in_8bit=False,
|
| 93 |
+
full_finetuning=False,
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
import tqdm
|
| 97 |
+
for item in tqdm.tqdm(dataset):
|
| 98 |
+
key = item["fulltext"]
|
| 99 |
+
if key in completed_keys:
|
| 100 |
+
continue
|
| 101 |
+
item_id = item["id"]
|
| 102 |
+
glossary = def_map.get(item_id, "") # retrieve glossary if exists
|
| 103 |
+
|
| 104 |
+
for band in ["easy", "intermediate", "hard"]:
|
| 105 |
+
# Append definitions below the case text
|
| 106 |
+
user_content = f"Input text:\n{item['fulltext'].strip()}"
|
| 107 |
+
if glossary:
|
| 108 |
+
user_content += "\n\n" + glossary
|
| 109 |
+
|
| 110 |
+
messages = [
|
| 111 |
+
{"role": "system", "content": prompts[band].strip()},
|
| 112 |
+
{"role": "user", "content": user_content}
|
| 113 |
+
]
|
| 114 |
+
|
| 115 |
+
text = tokenizer.apply_chat_template(
|
| 116 |
+
messages,
|
| 117 |
+
tokenize=False,
|
| 118 |
+
add_generation_prompt=True,
|
| 119 |
+
enable_thinking=False,
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
inputs = tokenizer(text, return_tensors="pt").to("cuda")
|
| 123 |
+
input_len = inputs.input_ids.shape[1]
|
| 124 |
+
length_factors = {"easy": 0.5, "intermediate": 0.8, "hard": 1.1}
|
| 125 |
+
max_new_tokens = int(min(1200, max(150, input_len * length_factors[band])))
|
| 126 |
+
|
| 127 |
+
output_ids = model.generate(
|
| 128 |
+
**inputs,
|
| 129 |
+
max_new_tokens=max_new_tokens,
|
| 130 |
+
temperature=temperature,
|
| 131 |
+
top_p=0.9,
|
| 132 |
+
top_k=45,
|
| 133 |
+
)
|
| 134 |
+
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 135 |
+
|
| 136 |
+
results.append({
|
| 137 |
+
"id": item_id,
|
| 138 |
+
"fulltext": item["fulltext"],
|
| 139 |
+
"band": band,
|
| 140 |
+
"lang": "es",
|
| 141 |
+
"synthetic_summary": output_text,
|
| 142 |
+
"definitions_used": bool(glossary) # track whether glossary applied
|
| 143 |
+
})
|
| 144 |
+
|
| 145 |
+
completed_keys.add(key)
|
| 146 |
+
if len(results) % 3 == 0:
|
| 147 |
+
with open(out_path, "w", encoding="utf-8") as f:
|
| 148 |
+
json.dump(results, f, ensure_ascii=False, indent=2)
|
| 149 |
+
|
| 150 |
+
with open(out_path, "w", encoding="utf-8") as f:
|
| 151 |
+
json.dump(results, f, ensure_ascii=False, indent=2)
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
from notifier import send_notification
|
| 155 |
+
send_notification(
|
| 156 |
+
"process-complete1507034",
|
| 157 |
+
f"Finished inference with model {model_name} at temperature {temperature}. Results saved to {out_path}",
|
| 158 |
+
title="Inference Complete",
|
| 159 |
+
priority="default",
|
| 160 |
+
tags="tada"
|
| 161 |
+
)
|
code/finetune-inference/old/inferenceV3_temp.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
import sys
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
parser = argparse.ArgumentParser(description="Readability Controlled Generation")
|
| 8 |
+
parser.add_argument("--model_name", type=str, default="/home/mshahidul/readctrl/finetuned_models/es_synthetic_data_creation_Qwen3_14B_v2")
|
| 9 |
+
parser.add_argument("--temperature", type=float, default=0.1)
|
| 10 |
+
args = parser.parse_args()
|
| 11 |
+
|
| 12 |
+
model_name = args.model_name
|
| 13 |
+
temperature = args.temperature
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
prompts = {
|
| 17 |
+
"easy": '''
|
| 18 |
+
You are an assistant that rewrites Spanish texts to make them very simple and easy to understand.
|
| 19 |
+
Your goal is to rewrite the provided input text for younger readers (Fernández Huerta 70–100; grade 5–7).
|
| 20 |
+
Use short sentences, simple words, and friendly tone. Avoid technical or complex expressions.
|
| 21 |
+
Keep all important factual details, but remove jargon.
|
| 22 |
+
Return only the rewritten text without commentary.
|
| 23 |
+
''',
|
| 24 |
+
"intermediate": '''
|
| 25 |
+
You are an assistant specialized in rewriting Spanish texts with medium readability.
|
| 26 |
+
Your task is to rewrite the provided input text for general or high‑school‑level readers (Fernández Huerta 50–70; grade 8–12).
|
| 27 |
+
Use clear and complete sentences, moderately complex vocabulary, and structured narration.
|
| 28 |
+
Retain all relevant medical or factual information, but phrase it in accessible language.
|
| 29 |
+
Return only the rewritten text with no explanations.
|
| 30 |
+
''',
|
| 31 |
+
"hard": '''
|
| 32 |
+
You are an assistant that rewrites Spanish medical texts with professional, technical precision.
|
| 33 |
+
Rewrite the following input text using specialized, academic terminology and information‑dense phrasing.
|
| 34 |
+
The output must target a Fernández Huerta readability index between 0 and 50 (university/professional level).
|
| 35 |
+
Use clinical vocabulary, formal register, and detailed description of pathophysiology, procedures, and findings.
|
| 36 |
+
Return only the rewritten text.
|
| 37 |
+
'''
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
# -------- New Part: Load keyword–definition dataset ----------
|
| 41 |
+
kw_file = "/home/mshahidul/readctrl/data/kyw_def_train/kyw_gen_gpt5.json"
|
| 42 |
+
with open(kw_file, "r", encoding="utf-8") as f:
|
| 43 |
+
definitions_data = json.load(f)
|
| 44 |
+
|
| 45 |
+
# Build quick lookup: id -> glossary text
|
| 46 |
+
def_map = {}
|
| 47 |
+
for obj in definitions_data:
|
| 48 |
+
cid = obj.get("id")
|
| 49 |
+
kwlist = obj.get("medical_keywords", [])
|
| 50 |
+
defs_str = ""
|
| 51 |
+
if kwlist:
|
| 52 |
+
defs_lines = [f"• {d['term']} — {d['definition']}" for d in kwlist]
|
| 53 |
+
defs_str = "Relevant medical definitions:\n" + "\n".join(defs_lines)
|
| 54 |
+
def_map[cid] = defs_str
|
| 55 |
+
# --------------------------------------------------------------
|
| 56 |
+
|
| 57 |
+
path = "/home/mshahidul/readctrl/data/testing_data/multiclinsum_test_es.json"
|
| 58 |
+
out_dir = "/home/mshahidul/readctrl/results/v3"
|
| 59 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 60 |
+
|
| 61 |
+
if os.path.exists(model_name):
|
| 62 |
+
out_path = out_dir + f"/temp{temperature}_qwen3-14B_finetuned_with_defs.json"
|
| 63 |
+
else:
|
| 64 |
+
out_path = out_dir + f"/temp{temperature}_qwen3-14B_base_with_defs.json"
|
| 65 |
+
|
| 66 |
+
results, completed_keys = [], set()
|
| 67 |
+
if os.path.exists(out_path):
|
| 68 |
+
with open(out_path, "r", encoding="utf-8") as f:
|
| 69 |
+
results = json.load(f)
|
| 70 |
+
for r in results:
|
| 71 |
+
completed_keys.add(r["fulltext"])
|
| 72 |
+
|
| 73 |
+
# -------- Load main dataset -----------
|
| 74 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 75 |
+
dataset = json.load(f)
|
| 76 |
+
dataset = dataset[0:50]
|
| 77 |
+
|
| 78 |
+
from unsloth import FastLanguageModel
|
| 79 |
+
import torch
|
| 80 |
+
|
| 81 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 82 |
+
model_name=model_name,
|
| 83 |
+
max_seq_length=4092,
|
| 84 |
+
load_in_4bit=False,
|
| 85 |
+
load_in_8bit=False,
|
| 86 |
+
full_finetuning=False,
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
import tqdm
|
| 90 |
+
for item in tqdm.tqdm(dataset):
|
| 91 |
+
key = item["fulltext"]
|
| 92 |
+
if key in completed_keys:
|
| 93 |
+
continue
|
| 94 |
+
item_id = item["id"]
|
| 95 |
+
glossary = def_map.get(item_id, "") # retrieve glossary if exists
|
| 96 |
+
|
| 97 |
+
for band in ["easy", "intermediate", "hard"]:
|
| 98 |
+
# Append definitions below the case text
|
| 99 |
+
user_content = f"Input text:\n{item['fulltext'].strip()}"
|
| 100 |
+
if glossary:
|
| 101 |
+
user_content += "\n\n" + glossary
|
| 102 |
+
|
| 103 |
+
messages = [
|
| 104 |
+
{"role": "system", "content": prompts[band].strip()},
|
| 105 |
+
{"role": "user", "content": user_content}
|
| 106 |
+
]
|
| 107 |
+
|
| 108 |
+
text = tokenizer.apply_chat_template(
|
| 109 |
+
messages,
|
| 110 |
+
tokenize=False,
|
| 111 |
+
add_generation_prompt=True,
|
| 112 |
+
enable_thinking=False,
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
inputs = tokenizer(text, return_tensors="pt").to("cuda")
|
| 116 |
+
input_len = inputs.input_ids.shape[1]
|
| 117 |
+
length_factors = {"easy": 0.5, "intermediate": 0.8, "hard": 1.1}
|
| 118 |
+
max_new_tokens = int(min(1200, max(150, input_len * length_factors[band])))
|
| 119 |
+
|
| 120 |
+
output_ids = model.generate(
|
| 121 |
+
**inputs,
|
| 122 |
+
max_new_tokens=max_new_tokens,
|
| 123 |
+
temperature=temperature,
|
| 124 |
+
top_p=0.9,
|
| 125 |
+
top_k=45,
|
| 126 |
+
)
|
| 127 |
+
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 128 |
+
|
| 129 |
+
results.append({
|
| 130 |
+
"id": item_id,
|
| 131 |
+
"fulltext": item["fulltext"],
|
| 132 |
+
"band": band,
|
| 133 |
+
"lang": "es",
|
| 134 |
+
"synthetic_summary": output_text,
|
| 135 |
+
"definitions_used": bool(glossary) # track whether glossary applied
|
| 136 |
+
})
|
| 137 |
+
|
| 138 |
+
completed_keys.add(key)
|
| 139 |
+
if len(results) % 3 == 0:
|
| 140 |
+
with open(out_path, "w", encoding="utf-8") as f:
|
| 141 |
+
json.dump(results, f, ensure_ascii=False, indent=2)
|
| 142 |
+
|
| 143 |
+
with open(out_path, "w", encoding="utf-8") as f:
|
| 144 |
+
json.dump(results, f, ensure_ascii=False, indent=2)
|
code/finetune-inference/old/inferenceV4.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
import sys
|
| 5 |
+
sys.path.append(os.path.abspath('/home/mshahidul/'))
|
| 6 |
+
from gpu_selection import _gpu_selection_
|
| 7 |
+
|
| 8 |
+
parser = argparse.ArgumentParser(description="Readability Controlled Generation")
|
| 9 |
+
parser.add_argument("--cuda", type=str, default="3")
|
| 10 |
+
parser.add_argument("--model_name", type=str, default="/home/mshahidul/readctrl/finetuned_models/es_synthetic_data_creation_Qwen3_14B_v2")
|
| 11 |
+
parser.add_argument("--temperature", type=float, default=0.1)
|
| 12 |
+
args = parser.parse_args()
|
| 13 |
+
|
| 14 |
+
model_name = args.model_name
|
| 15 |
+
temperature = args.temperature
|
| 16 |
+
|
| 17 |
+
if args.cuda is not None:
|
| 18 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda
|
| 19 |
+
print(f"🎮🎮 Using CUDA device: {args.cuda}")
|
| 20 |
+
else:
|
| 21 |
+
_gpu_selection_()
|
| 22 |
+
|
| 23 |
+
prompts={
|
| 24 |
+
"easy":'''
|
| 25 |
+
Reescribe el siguiente informe médico en español con un nivel de lectura fácil correspondiente a un puntaje FH entre 70 y 100 (texto muy comprensible).
|
| 26 |
+
Usa oraciones cortas y directas, vocabulario cotidiano, estructuras simples y explicaciones claras de términos médicos. El tono debe ser empático y accesible, como si estuvieras explicando la situación a un paciente o familiar sin conocimientos médicos.
|
| 27 |
+
Mantén los datos clínicos y resultados esenciales, pero reemplaza o aclara tecnicismos con frases simples. Evita abreviaturas o siglas sin explicación.
|
| 28 |
+
''',
|
| 29 |
+
"intermediate": '''
|
| 30 |
+
Reformula el siguiente informe médico en español con un nivel de lectura intermedio, correspondiente a un puntaje FH entre 50 y 70 (texto de dificultad moderada).
|
| 31 |
+
Usa lenguaje formal pero comprensible, adecuado para lectores con educación general o estudiantes del área de salud. Mantén la precisión médica, pero agrega explicaciones breves tras los términos técnicos. Alterna oraciones simples y compuestas, con buena fluidez y cohesión.
|
| 32 |
+
El texto debe sonar profesional, informativo y claro, sin llegar a la densidad típica de lenguaje técnico especializado.
|
| 33 |
+
''',
|
| 34 |
+
"hard": '''
|
| 35 |
+
Reescribe el siguiente informe médico en español con un nivel de lectura avanzado o técnico, correspondiente a un puntaje FH entre 0 y 50 (texto especializado).
|
| 36 |
+
Usa terminología médica precisa, estructuras sintácticas complejas y tono formal típico de documentos clínicos o publicaciones científicas. No simplifiques ni expliques los tecnicismos; conserva la exactitud conceptual y la nomenclatura profesional.
|
| 37 |
+
Refleja el razonamiento clínico, hallazgos y juicios médicos con lenguaje apropiado para médicos, especialistas o investigadores.
|
| 38 |
+
'''
|
| 39 |
+
}
|
| 40 |
+
# -------- New Part: Load keyword–definition dataset ----------
|
| 41 |
+
kw_file = "/home/mshahidul/readctrl/data/kyw_def_train/kyw_gen_gpt5.json"
|
| 42 |
+
with open(kw_file, "r", encoding="utf-8") as f:
|
| 43 |
+
definitions_data = json.load(f)
|
| 44 |
+
|
| 45 |
+
# Build quick lookup: id -> glossary text
|
| 46 |
+
def_map = {}
|
| 47 |
+
for obj in definitions_data:
|
| 48 |
+
cid = obj.get("id")
|
| 49 |
+
kwlist = obj.get("medical_keywords", [])
|
| 50 |
+
defs_str = ""
|
| 51 |
+
if kwlist:
|
| 52 |
+
defs_lines = [f"• {d['term']} — {d['definition']}" for d in kwlist]
|
| 53 |
+
defs_str = "Relevant medical definitions:\n" + "\n".join(defs_lines)
|
| 54 |
+
def_map[cid] = defs_str
|
| 55 |
+
# --------------------------------------------------------------
|
| 56 |
+
|
| 57 |
+
path = "/home/mshahidul/readctrl/data/testing_data/multiclinsum_test_es.json"
|
| 58 |
+
out_dir = "/home/mshahidul/readctrl/results/custom_promptsV1"
|
| 59 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 60 |
+
|
| 61 |
+
if os.path.exists(model_name):
|
| 62 |
+
out_path = out_dir + f"/temp{temperature}_qwen3-14B_finetuned_with_defs.json"
|
| 63 |
+
else:
|
| 64 |
+
out_path = out_dir + f"/temp{temperature}_qwen3-14B_base_with_defs.json"
|
| 65 |
+
|
| 66 |
+
results, completed_keys = [], set()
|
| 67 |
+
if os.path.exists(out_path):
|
| 68 |
+
with open(out_path, "r", encoding="utf-8") as f:
|
| 69 |
+
results = json.load(f)
|
| 70 |
+
for r in results:
|
| 71 |
+
completed_keys.add(r["fulltext"])
|
| 72 |
+
|
| 73 |
+
# -------- Load main dataset -----------
|
| 74 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 75 |
+
dataset = json.load(f)
|
| 76 |
+
dataset = dataset[0:50]
|
| 77 |
+
|
| 78 |
+
from unsloth import FastLanguageModel
|
| 79 |
+
import torch
|
| 80 |
+
|
| 81 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 82 |
+
model_name=model_name,
|
| 83 |
+
max_seq_length=4092,
|
| 84 |
+
load_in_4bit=False,
|
| 85 |
+
load_in_8bit=False,
|
| 86 |
+
full_finetuning=False,
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
import tqdm
|
| 90 |
+
for item in tqdm.tqdm(dataset):
|
| 91 |
+
key = item["fulltext"]
|
| 92 |
+
if key in completed_keys:
|
| 93 |
+
continue
|
| 94 |
+
item_id = item["id"]
|
| 95 |
+
glossary = def_map.get(item_id, "") # retrieve glossary if exists
|
| 96 |
+
|
| 97 |
+
for band in ["easy", "intermediate", "hard"]:
|
| 98 |
+
# Append definitions below the case text
|
| 99 |
+
user_content = f"Input text:\n{item['fulltext'].strip()}"
|
| 100 |
+
# if glossary:
|
| 101 |
+
# user_content += "\n\n" + glossary
|
| 102 |
+
|
| 103 |
+
messages = [
|
| 104 |
+
{"role": "system", "content": prompts[band].strip()},
|
| 105 |
+
{"role": "user", "content": user_content}
|
| 106 |
+
]
|
| 107 |
+
|
| 108 |
+
text = tokenizer.apply_chat_template(
|
| 109 |
+
messages,
|
| 110 |
+
tokenize=False,
|
| 111 |
+
add_generation_prompt=True,
|
| 112 |
+
enable_thinking=False,
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
inputs = tokenizer(text, return_tensors="pt").to("cuda")
|
| 116 |
+
input_len = inputs.input_ids.shape[1]
|
| 117 |
+
length_factors = {"easy": 0.5, "intermediate": 0.8, "hard": 1.1}
|
| 118 |
+
max_new_tokens = int(min(1200, max(150, input_len * length_factors[band])))
|
| 119 |
+
|
| 120 |
+
output_ids = model.generate(
|
| 121 |
+
**inputs,
|
| 122 |
+
max_new_tokens=max_new_tokens,
|
| 123 |
+
temperature=temperature,
|
| 124 |
+
top_p=0.9,
|
| 125 |
+
top_k=45,
|
| 126 |
+
)
|
| 127 |
+
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 128 |
+
|
| 129 |
+
results.append({
|
| 130 |
+
"id": item_id,
|
| 131 |
+
"fulltext": item["fulltext"],
|
| 132 |
+
"band": band,
|
| 133 |
+
"lang": "es",
|
| 134 |
+
"synthetic_summary": output_text,
|
| 135 |
+
"definitions_used": bool(glossary) # track whether glossary applied
|
| 136 |
+
})
|
| 137 |
+
|
| 138 |
+
completed_keys.add(key)
|
| 139 |
+
if len(results) % 3 == 0:
|
| 140 |
+
with open(out_path, "w", encoding="utf-8") as f:
|
| 141 |
+
json.dump(results, f, ensure_ascii=False, indent=2)
|
| 142 |
+
|
| 143 |
+
with open(out_path, "w", encoding="utf-8") as f:
|
| 144 |
+
json.dump(results, f, ensure_ascii=False, indent=2)
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
from notifier import send_notification
|
| 148 |
+
send_notification(
|
| 149 |
+
"process-complete1507034",
|
| 150 |
+
f"Finished inference with model {model_name} at temperature {temperature}. Results saved to {out_path}",
|
| 151 |
+
title="Inference Complete",
|
| 152 |
+
priority="default",
|
| 153 |
+
tags="tada"
|
| 154 |
+
)
|
code/finetune-inference/old/inference_extract_subclaims.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
| 3 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
from unsloth import FastLanguageModel
|
| 7 |
+
import json
|
| 8 |
+
import tqdm
|
| 9 |
+
|
| 10 |
+
# -----------------------------
|
| 11 |
+
# MODEL CACHE
|
| 12 |
+
# -----------------------------
|
| 13 |
+
_model_cache = {"model": None, "tokenizer": None}
|
| 14 |
+
|
| 15 |
+
def load_finetuned_model(model_path: str):
|
| 16 |
+
"""Load and cache your fine-tuned subclaim extraction model + tokenizer."""
|
| 17 |
+
if _model_cache["model"] is not None:
|
| 18 |
+
return _model_cache["model"], _model_cache["tokenizer"]
|
| 19 |
+
|
| 20 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 21 |
+
model_name=model_path,
|
| 22 |
+
max_seq_length=8192,
|
| 23 |
+
load_in_4bit=False,
|
| 24 |
+
load_in_8bit=False,
|
| 25 |
+
full_finetuning=False,
|
| 26 |
+
)
|
| 27 |
+
_model_cache["model"], _model_cache["tokenizer"] = model, tokenizer
|
| 28 |
+
return model, tokenizer
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# -----------------------------
|
| 32 |
+
# SUBCLAIM EXTRACTION PROMPT
|
| 33 |
+
# -----------------------------
|
| 34 |
+
def extraction_prompt(medical_text: str) -> str:
|
| 35 |
+
prompt = f"""
|
| 36 |
+
You are an expert medical annotator. Your task is to extract granular, factual subclaims from medical text.
|
| 37 |
+
A subclaim is the smallest standalone factual unit that can be independently verified.
|
| 38 |
+
|
| 39 |
+
Instructions:
|
| 40 |
+
1. Read the provided medical text.
|
| 41 |
+
2. Break it into clear, objective, atomic subclaims.
|
| 42 |
+
3. Each subclaim must come directly from the text.
|
| 43 |
+
4. Do not add, guess, or infer information.
|
| 44 |
+
5. Each subclaim should be short, specific, and verifiable.
|
| 45 |
+
6. Return ONLY a Python-style list of strings.
|
| 46 |
+
|
| 47 |
+
Medical Text:
|
| 48 |
+
{medical_text}
|
| 49 |
+
|
| 50 |
+
Return your output in JSON list format, like:
|
| 51 |
+
[
|
| 52 |
+
"subclaim 1",
|
| 53 |
+
"subclaim 2",
|
| 54 |
+
...
|
| 55 |
+
]
|
| 56 |
+
"""
|
| 57 |
+
return prompt
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# -----------------------------
|
| 61 |
+
# INFERENCE FUNCTION
|
| 62 |
+
# -----------------------------
|
| 63 |
+
def infer_subclaims(medical_text: str,
|
| 64 |
+
model_path: str,
|
| 65 |
+
temperature: float = 0.2) -> str:
|
| 66 |
+
|
| 67 |
+
model, tokenizer = load_finetuned_model(model_path)
|
| 68 |
+
|
| 69 |
+
prompt = extraction_prompt(medical_text)
|
| 70 |
+
|
| 71 |
+
messages = [{"role": "user", "content": prompt}]
|
| 72 |
+
|
| 73 |
+
chat_text = tokenizer.apply_chat_template(
|
| 74 |
+
messages,
|
| 75 |
+
tokenize=False,
|
| 76 |
+
add_generation_prompt=True,
|
| 77 |
+
enable_thinking=False,
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
inputs = tokenizer(chat_text, return_tensors="pt").to("cuda")
|
| 81 |
+
|
| 82 |
+
with torch.no_grad():
|
| 83 |
+
output_ids = model.generate(
|
| 84 |
+
**inputs,
|
| 85 |
+
max_new_tokens=512,
|
| 86 |
+
temperature=temperature,
|
| 87 |
+
top_p=0.9,
|
| 88 |
+
top_k=10,
|
| 89 |
+
do_sample=False,
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
|
| 93 |
+
|
| 94 |
+
# Remove thinking if model inserts `<think>`
|
| 95 |
+
if "</think>" in output_text:
|
| 96 |
+
output_text = output_text.split("</think>")[-1].strip()
|
| 97 |
+
|
| 98 |
+
return output_text
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
# -----------------------------
|
| 102 |
+
# MAIN EXECUTION
|
| 103 |
+
# -----------------------------
|
| 104 |
+
if __name__ == "__main__":
|
| 105 |
+
import argparse
|
| 106 |
+
parser = argparse.ArgumentParser()
|
| 107 |
+
parser.add_argument("--input_file", type=str, required=True,
|
| 108 |
+
help="Path to the input JSON file containing medical texts.")
|
| 109 |
+
args = parser.parse_args()
|
| 110 |
+
INPUT_FILE = args.input_file
|
| 111 |
+
file_name=os.path.basename(INPUT_FILE).split(".json")[0]
|
| 112 |
+
SAVE_FOLDER = "/home/mshahidul/readctrl/data/extracting_subclaim"
|
| 113 |
+
MODEL_PATH = "/home/mshahidul/readctrl_model/qwen3-32B_subclaims-extraction-8b_ctx"
|
| 114 |
+
|
| 115 |
+
os.makedirs(SAVE_FOLDER, exist_ok=True)
|
| 116 |
+
|
| 117 |
+
OUTPUT_FILE = os.path.join(SAVE_FOLDER, f"extracted_subclaims_{file_name}_en.json")
|
| 118 |
+
|
| 119 |
+
# Load input dataset
|
| 120 |
+
with open(INPUT_FILE, "r") as f:
|
| 121 |
+
data = json.load(f)
|
| 122 |
+
|
| 123 |
+
# Load existing results (resume mode)
|
| 124 |
+
result = []
|
| 125 |
+
if os.path.exists(OUTPUT_FILE):
|
| 126 |
+
with open(OUTPUT_FILE, "r") as f:
|
| 127 |
+
result = json.load(f)
|
| 128 |
+
|
| 129 |
+
existing_ids = {item["id"] for item in result}
|
| 130 |
+
|
| 131 |
+
# --------------------------------------------------------
|
| 132 |
+
# PROCESS EACH MEDICAL TEXT
|
| 133 |
+
# --------------------------------------------------------
|
| 134 |
+
for item in tqdm.tqdm(data):
|
| 135 |
+
if item["id"] in existing_ids:
|
| 136 |
+
continue
|
| 137 |
+
|
| 138 |
+
medical_text = item.get("fulltext", "")
|
| 139 |
+
|
| 140 |
+
extracted = infer_subclaims(
|
| 141 |
+
medical_text,
|
| 142 |
+
model_path=MODEL_PATH
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
result.append({
|
| 146 |
+
"id": item["id"],
|
| 147 |
+
"medical_text": medical_text,
|
| 148 |
+
"subclaims": extracted,
|
| 149 |
+
"summary": item.get("summary", "")
|
| 150 |
+
})
|
| 151 |
+
|
| 152 |
+
# Save every 20 entries
|
| 153 |
+
if len(result) % 20 == 0:
|
| 154 |
+
print(f"Saving intermediate results... Total processed: {len(result)}")
|
| 155 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 156 |
+
json.dump(result, f, indent=4, ensure_ascii=False)
|
| 157 |
+
|
| 158 |
+
# Final save
|
| 159 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 160 |
+
json.dump(result, f, indent=4, ensure_ascii=False)
|
| 161 |
+
|
| 162 |
+
print("Extraction completed.")
|
code/finetune-inference/old/inference_extract_subclaims_v2.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
# Set GPU environment variables
|
| 3 |
+
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
| 4 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
|
| 5 |
+
import torch
|
| 6 |
+
from unsloth import FastLanguageModel
|
| 7 |
+
import json
|
| 8 |
+
import tqdm
|
| 9 |
+
import argparse
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# -----------------------------
|
| 14 |
+
# MODEL CACHE
|
| 15 |
+
# -----------------------------
|
| 16 |
+
_model_cache = {"model": None, "tokenizer": None}
|
| 17 |
+
|
| 18 |
+
def load_finetuned_model(model_path: str):
|
| 19 |
+
"""Load and cache your fine-tuned subclaim extraction model + tokenizer."""
|
| 20 |
+
if _model_cache["model"] is not None:
|
| 21 |
+
return _model_cache["model"], _model_cache["tokenizer"]
|
| 22 |
+
|
| 23 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 24 |
+
model_name=model_path,
|
| 25 |
+
max_seq_length=8192,
|
| 26 |
+
load_in_4bit=False,
|
| 27 |
+
load_in_8bit=False,
|
| 28 |
+
full_finetuning=False,
|
| 29 |
+
)
|
| 30 |
+
_model_cache["model"], _model_cache["tokenizer"] = model, tokenizer
|
| 31 |
+
return model, tokenizer
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# -----------------------------
|
| 35 |
+
# SUBCLAIM EXTRACTION PROMPT
|
| 36 |
+
# -----------------------------
|
| 37 |
+
def extraction_prompt(medical_text: str) -> str:
|
| 38 |
+
prompt = f"""
|
| 39 |
+
You are an expert medical annotator. Your task is to extract granular, factual subclaims from medical text.
|
| 40 |
+
A subclaim is the smallest standalone factual unit that can be independently verified.
|
| 41 |
+
|
| 42 |
+
Instructions:
|
| 43 |
+
1. Read the provided medical text.
|
| 44 |
+
2. Break it into clear, objective, atomic subclaims.
|
| 45 |
+
3. Each subclaim must come directly from the text.
|
| 46 |
+
4. Do not add, guess, or infer information.
|
| 47 |
+
5. Each subclaim should be short, specific, and verifiable.
|
| 48 |
+
6. Return ONLY a Python-style list of strings.
|
| 49 |
+
|
| 50 |
+
Medical Text:
|
| 51 |
+
{medical_text}
|
| 52 |
+
|
| 53 |
+
Return your output in JSON list format, like:
|
| 54 |
+
[
|
| 55 |
+
"subclaim 1",
|
| 56 |
+
"subclaim 2",
|
| 57 |
+
...
|
| 58 |
+
]
|
| 59 |
+
"""
|
| 60 |
+
return prompt
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
# -----------------------------
|
| 64 |
+
# INFERENCE FUNCTION
|
| 65 |
+
# -----------------------------
|
| 66 |
+
def infer_subclaims(medical_text: str,
|
| 67 |
+
model,
|
| 68 |
+
tokenizer,
|
| 69 |
+
temperature: float = 0.2) -> list:
|
| 70 |
+
|
| 71 |
+
if not medical_text or medical_text.strip() == "":
|
| 72 |
+
return []
|
| 73 |
+
|
| 74 |
+
prompt = extraction_prompt(medical_text)
|
| 75 |
+
messages = [{"role": "user", "content": prompt}]
|
| 76 |
+
|
| 77 |
+
chat_text = tokenizer.apply_chat_template(
|
| 78 |
+
messages,
|
| 79 |
+
tokenize=False,
|
| 80 |
+
add_generation_prompt=True,
|
| 81 |
+
enable_thinking=False,
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
inputs = tokenizer(chat_text, return_tensors="pt").to("cuda")
|
| 85 |
+
|
| 86 |
+
with torch.no_grad():
|
| 87 |
+
output_ids = model.generate(
|
| 88 |
+
**inputs,
|
| 89 |
+
max_new_tokens=1024, # Increased to handle potentially longer list outputs
|
| 90 |
+
temperature=temperature,
|
| 91 |
+
top_p=0.9,
|
| 92 |
+
top_k=10,
|
| 93 |
+
do_sample=False,
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
|
| 97 |
+
|
| 98 |
+
# Remove thinking if model inserts `<think>`
|
| 99 |
+
if "</think>" in output_text:
|
| 100 |
+
output_text = output_text.split("</think>")[-1].strip()
|
| 101 |
+
|
| 102 |
+
# Try to parse as JSON list, return raw text if parsing fails
|
| 103 |
+
try:
|
| 104 |
+
# Finding the start and end of the JSON list in case there is conversational filler
|
| 105 |
+
start_idx = output_text.find('[')
|
| 106 |
+
end_idx = output_text.rfind(']') + 1
|
| 107 |
+
if start_idx != -1 and end_idx != -1:
|
| 108 |
+
return json.loads(output_text[start_idx:end_idx])
|
| 109 |
+
return output_text
|
| 110 |
+
except Exception:
|
| 111 |
+
return output_text
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
# -----------------------------
|
| 115 |
+
# MAIN EXECUTION
|
| 116 |
+
# -----------------------------
|
| 117 |
+
if __name__ == "__main__":
|
| 118 |
+
parser = argparse.ArgumentParser()
|
| 119 |
+
parser.add_argument("--input_file", type=str, required=True,
|
| 120 |
+
help="Path to the input JSON file containing medical texts.")
|
| 121 |
+
args = parser.parse_args()
|
| 122 |
+
|
| 123 |
+
INPUT_FILE = args.input_file
|
| 124 |
+
file_name = os.path.basename(INPUT_FILE).split(".json")[0]
|
| 125 |
+
SAVE_FOLDER = "/home/mshahidul/readctrl/data/extracting_subclaim"
|
| 126 |
+
MODEL_PATH = "/home/mshahidul/readctrl_model/qwen3-32B_subclaims-extraction-8b_ctx"
|
| 127 |
+
|
| 128 |
+
os.makedirs(SAVE_FOLDER, exist_ok=True)
|
| 129 |
+
OUTPUT_FILE = os.path.join(SAVE_FOLDER, f"extracted_subclaims_{file_name}_en.json")
|
| 130 |
+
|
| 131 |
+
# Load Model once
|
| 132 |
+
model, tokenizer = load_finetuned_model(MODEL_PATH)
|
| 133 |
+
|
| 134 |
+
# Load input dataset
|
| 135 |
+
with open(INPUT_FILE, "r") as f:
|
| 136 |
+
data = json.load(f)
|
| 137 |
+
|
| 138 |
+
# Load existing results (resume mode)
|
| 139 |
+
result = []
|
| 140 |
+
if os.path.exists(OUTPUT_FILE):
|
| 141 |
+
with open(OUTPUT_FILE, "r") as f:
|
| 142 |
+
result = json.load(f)
|
| 143 |
+
|
| 144 |
+
existing_ids = {item["id"] for item in result}
|
| 145 |
+
|
| 146 |
+
# --------------------------------------------------------
|
| 147 |
+
# PROCESS EACH MEDICAL TEXT (Fulltext AND Summary)
|
| 148 |
+
# --------------------------------------------------------
|
| 149 |
+
for item in tqdm.tqdm(data):
|
| 150 |
+
if item.get("id") in existing_ids:
|
| 151 |
+
continue
|
| 152 |
+
|
| 153 |
+
# Extract from Fulltext
|
| 154 |
+
fulltext_content = item.get("fulltext", "")
|
| 155 |
+
fulltext_subclaims = infer_subclaims(fulltext_content, model, tokenizer)
|
| 156 |
+
|
| 157 |
+
# Extract from Summary
|
| 158 |
+
summary_content = item.get("summary", "")
|
| 159 |
+
summary_subclaims = infer_subclaims(summary_content, model, tokenizer)
|
| 160 |
+
|
| 161 |
+
result.append({
|
| 162 |
+
"id": item.get("id"),
|
| 163 |
+
"fulltext": fulltext_content,
|
| 164 |
+
"fulltext_subclaims": fulltext_subclaims,
|
| 165 |
+
"summary": summary_content,
|
| 166 |
+
"summary_subclaims": summary_subclaims,
|
| 167 |
+
"readability_score": item.get("readability_score", None)
|
| 168 |
+
})
|
| 169 |
+
|
| 170 |
+
# Save intermediate results
|
| 171 |
+
if len(result) % 20 == 0:
|
| 172 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 173 |
+
json.dump(result, f, indent=4, ensure_ascii=False)
|
| 174 |
+
|
| 175 |
+
# Final save
|
| 176 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 177 |
+
json.dump(result, f, indent=4, ensure_ascii=False)
|
| 178 |
+
|
| 179 |
+
print(f"Extraction completed. Saved to {OUTPUT_FILE}")
|
code/finetune-inference/old/inference_extract_subclaims_v3.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
# Set GPU environment variables
|
| 3 |
+
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
| 4 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
|
| 5 |
+
import torch
|
| 6 |
+
from unsloth import FastLanguageModel
|
| 7 |
+
import json
|
| 8 |
+
import tqdm
|
| 9 |
+
import argparse
|
| 10 |
+
|
| 11 |
+
# -----------------------------
|
| 12 |
+
# MODEL CACHE
|
| 13 |
+
# -----------------------------
|
| 14 |
+
_model_cache = {"model": None, "tokenizer": None}
|
| 15 |
+
|
| 16 |
+
def load_finetuned_model(model_path: str):
|
| 17 |
+
if _model_cache["model"] is not None:
|
| 18 |
+
return _model_cache["model"], _model_cache["tokenizer"]
|
| 19 |
+
|
| 20 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 21 |
+
model_name=model_path,
|
| 22 |
+
max_seq_length=8192,
|
| 23 |
+
load_in_4bit=False,
|
| 24 |
+
load_in_8bit=False,
|
| 25 |
+
full_finetuning=False,
|
| 26 |
+
)
|
| 27 |
+
_model_cache["model"], _model_cache["tokenizer"] = model, tokenizer
|
| 28 |
+
return model, tokenizer
|
| 29 |
+
|
| 30 |
+
# -----------------------------
|
| 31 |
+
# SUBCLAIM EXTRACTION PROMPT
|
| 32 |
+
# -----------------------------
|
| 33 |
+
def extraction_prompt(medical_text: str) -> str:
|
| 34 |
+
prompt = f"""
|
| 35 |
+
You are an expert medical annotator. Your task is to extract granular, factual subclaims from medical text.
|
| 36 |
+
A subclaim is the smallest standalone factual unit that can be independently verified.
|
| 37 |
+
|
| 38 |
+
Instructions:
|
| 39 |
+
1. Read the provided medical text.
|
| 40 |
+
2. Break it into clear, objective, atomic subclaims.
|
| 41 |
+
3. Each subclaim must come directly from the text.
|
| 42 |
+
4. Return ONLY a valid JSON list of strings.
|
| 43 |
+
|
| 44 |
+
Medical Text:
|
| 45 |
+
{medical_text}
|
| 46 |
+
|
| 47 |
+
Return your output in JSON list format:
|
| 48 |
+
[
|
| 49 |
+
"subclaim 1",
|
| 50 |
+
"subclaim 2"
|
| 51 |
+
]
|
| 52 |
+
"""
|
| 53 |
+
return prompt
|
| 54 |
+
|
| 55 |
+
# -----------------------------
|
| 56 |
+
# INFERENCE FUNCTION WITH REPAIR
|
| 57 |
+
# -----------------------------
|
| 58 |
+
def infer_subclaims(medical_text: str, model, tokenizer, temperature: float = 0.2, max_tokens: int = 2048) -> list:
|
| 59 |
+
if not medical_text or medical_text.strip() == "":
|
| 60 |
+
return []
|
| 61 |
+
|
| 62 |
+
prompt = extraction_prompt(medical_text)
|
| 63 |
+
messages = [{"role": "user", "content": prompt}]
|
| 64 |
+
|
| 65 |
+
chat_text = tokenizer.apply_chat_template(
|
| 66 |
+
messages,
|
| 67 |
+
tokenize=False,
|
| 68 |
+
add_generation_prompt=True,
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
inputs = tokenizer(chat_text, return_tensors="pt").to("cuda")
|
| 72 |
+
|
| 73 |
+
with torch.no_grad():
|
| 74 |
+
output_ids = model.generate(
|
| 75 |
+
**inputs,
|
| 76 |
+
max_new_tokens=max_tokens, # Increased default
|
| 77 |
+
temperature=temperature,
|
| 78 |
+
do_sample=False,
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
|
| 82 |
+
|
| 83 |
+
# Remove reasoning/thinking if present
|
| 84 |
+
if "</think>" in output_text:
|
| 85 |
+
output_text = output_text.split("</think>")[-1].strip()
|
| 86 |
+
|
| 87 |
+
# Attempt to parse
|
| 88 |
+
try:
|
| 89 |
+
start_idx = output_text.find('[')
|
| 90 |
+
end_idx = output_text.rfind(']') + 1
|
| 91 |
+
if start_idx != -1 and end_idx != -1:
|
| 92 |
+
parsed = json.loads(output_text[start_idx:end_idx])
|
| 93 |
+
if isinstance(parsed, list):
|
| 94 |
+
return parsed
|
| 95 |
+
return [output_text] # Wrap in list if it's just raw text
|
| 96 |
+
except Exception:
|
| 97 |
+
return [output_text]
|
| 98 |
+
|
| 99 |
+
# -----------------------------
|
| 100 |
+
# MAIN EXECUTION
|
| 101 |
+
# -----------------------------
|
| 102 |
+
if __name__ == "__main__":
|
| 103 |
+
parser = argparse.ArgumentParser()
|
| 104 |
+
parser.add_argument("--input_file", type=str, required=True)
|
| 105 |
+
args = parser.parse_args()
|
| 106 |
+
|
| 107 |
+
INPUT_FILE = args.input_file
|
| 108 |
+
file_name = os.path.basename(INPUT_FILE).split(".json")[0]
|
| 109 |
+
SAVE_FOLDER = "/home/mshahidul/readctrl/data/extracting_subclaim"
|
| 110 |
+
MODEL_PATH = "/home/mshahidul/readctrl_model/qwen3-32B_subclaims-extraction-8b_ctx"
|
| 111 |
+
|
| 112 |
+
os.makedirs(SAVE_FOLDER, exist_ok=True)
|
| 113 |
+
OUTPUT_FILE = os.path.join(SAVE_FOLDER, f"extracted_subclaims_{file_name}_en.json")
|
| 114 |
+
|
| 115 |
+
model, tokenizer = load_finetuned_model(MODEL_PATH)
|
| 116 |
+
|
| 117 |
+
# Load input dataset
|
| 118 |
+
with open(INPUT_FILE, "r") as f:
|
| 119 |
+
data = json.load(f)
|
| 120 |
+
|
| 121 |
+
# Load existing results
|
| 122 |
+
result = []
|
| 123 |
+
if os.path.exists(OUTPUT_FILE):
|
| 124 |
+
with open(OUTPUT_FILE, "r") as f:
|
| 125 |
+
result = json.load(f)
|
| 126 |
+
|
| 127 |
+
# Convert results to a dict for easy lookup/update
|
| 128 |
+
processed_data = {item["id"]: item for item in result}
|
| 129 |
+
|
| 130 |
+
for item in tqdm.tqdm(data):
|
| 131 |
+
item_id = item.get("id")
|
| 132 |
+
existing_entry = processed_data.get(item_id)
|
| 133 |
+
|
| 134 |
+
# CHECK LOGIC:
|
| 135 |
+
# If entry exists and subclaims are already valid lists, we skip.
|
| 136 |
+
# If they are strings or missing, we re-run with higher tokens.
|
| 137 |
+
|
| 138 |
+
# 1. Check Fulltext Subclaims
|
| 139 |
+
fulltext_needs_update = (
|
| 140 |
+
not existing_entry or
|
| 141 |
+
not isinstance(existing_entry.get("fulltext_subclaims"), list) or
|
| 142 |
+
len(existing_entry.get("fulltext_subclaims", [])) == 0
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
if fulltext_needs_update:
|
| 146 |
+
f_sub = infer_subclaims(item.get("fulltext", ""), model, tokenizer, max_tokens=3072)
|
| 147 |
+
else:
|
| 148 |
+
f_sub = existing_entry["fulltext_subclaims"]
|
| 149 |
+
|
| 150 |
+
# 2. Check Summary Subclaims
|
| 151 |
+
summary_needs_update = (
|
| 152 |
+
not existing_entry or
|
| 153 |
+
not isinstance(existing_entry.get("summary_subclaims"), list) or
|
| 154 |
+
len(existing_entry.get("summary_subclaims", [])) == 0
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
if summary_needs_update:
|
| 158 |
+
s_sub = infer_subclaims(item.get("summary", ""), model, tokenizer, max_tokens=2048)
|
| 159 |
+
else:
|
| 160 |
+
s_sub = existing_entry["summary_subclaims"]
|
| 161 |
+
|
| 162 |
+
# Update or Append
|
| 163 |
+
new_entry = {
|
| 164 |
+
"id": item_id,
|
| 165 |
+
"fulltext": item.get("fulltext", ""),
|
| 166 |
+
"fulltext_subclaims": f_sub,
|
| 167 |
+
"summary": item.get("summary", ""),
|
| 168 |
+
"summary_subclaims": s_sub,
|
| 169 |
+
"readability_score": item.get("readability_score", None)
|
| 170 |
+
}
|
| 171 |
+
processed_data[item_id] = new_entry
|
| 172 |
+
|
| 173 |
+
# Intermediate save
|
| 174 |
+
if len(processed_data) % 20 == 0:
|
| 175 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 176 |
+
json.dump(list(processed_data.values()), f, indent=4, ensure_ascii=False)
|
| 177 |
+
|
| 178 |
+
# Final save
|
| 179 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 180 |
+
json.dump(list(processed_data.values()), f, indent=4, ensure_ascii=False)
|
| 181 |
+
|
| 182 |
+
print(f"Refinement completed. Total records: {len(processed_data)}")
|
code/finetune-inference/old/nemotran_inference.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
| 4 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import json
|
| 8 |
+
import tqdm
|
| 9 |
+
import argparse
|
| 10 |
+
import torch
|
| 11 |
+
from unsloth import FastLanguageModel
|
| 12 |
+
|
| 13 |
+
# -----------------------------
|
| 14 |
+
# UNSLOTH MODEL CONFIGURATION
|
| 15 |
+
# -----------------------------
|
| 16 |
+
MODEL_PATH = "/home/mshahidul/readctrl_model/full_model/nemotron-3-nano-30b-a3b_subclaims-support-check-8b_ctx_v2-bf16"
|
| 17 |
+
max_seq_length = 2048 # Adjusted for medical text + reasoning context
|
| 18 |
+
dtype = None # Auto-detection for A100 (will likely use bfloat16)
|
| 19 |
+
load_in_4bit = True # To fit 32B model comfortably on A100
|
| 20 |
+
|
| 21 |
+
# Load model and tokenizer natively
|
| 22 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 23 |
+
model_name = MODEL_PATH,
|
| 24 |
+
max_seq_length = max_seq_length,
|
| 25 |
+
dtype = dtype,
|
| 26 |
+
load_in_4bit = load_in_4bit,
|
| 27 |
+
trust_remote_code = True,
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
# Enable 2x faster native inference
|
| 31 |
+
FastLanguageModel.for_inference(model)
|
| 32 |
+
|
| 33 |
+
# -----------------------------
|
| 34 |
+
# VERIFICATION PROMPT
|
| 35 |
+
# -----------------------------
|
| 36 |
+
def inference_prompt(text, subclaim):
|
| 37 |
+
# This remains the same as your clinical evidence auditor prompt
|
| 38 |
+
return f"""You are a clinical evidence auditor. Your evaluation must be based STRICTLY and ONLY on the provided medical text.
|
| 39 |
+
|
| 40 |
+
### MANDATORY GROUNDING RULES:
|
| 41 |
+
1. NO OUTSIDE KNOWLEDGE: Do not use your internal medical knowledge. Even if a subclaim is "common sense" in medicine, if it is not explicitly in the TEXT, it is 'not_supported'.
|
| 42 |
+
2. NO LOGICAL LEAPS: Do not bridge gaps in logic. (e.g., If the text mentions "high blood sugar" but not the word "diabetes", you cannot support a claim of "diabetes").
|
| 43 |
+
3. EXACT NUMERICAL MATCHING: Any doses (e.g., 500mg), frequencies (e.g., twice daily), or durations (e.g., 10 days) mentioned in the subclaim must match the text perfectly. If they are missing or different in the text, label as 'not_supported'.
|
| 44 |
+
4. DEFAULT TO NOT SUPPORTED: If the text is vague, ambiguous, or only suggests a possibility, you MUST choose 'not_supported'.
|
| 45 |
+
5. CLOSED-WORLD REALITY: Treat the TEXT as the only information that exists in the world.
|
| 46 |
+
|
| 47 |
+
### Medical Text:
|
| 48 |
+
{text}
|
| 49 |
+
|
| 50 |
+
### Subclaim:
|
| 51 |
+
{subclaim}
|
| 52 |
+
|
| 53 |
+
Output exactly one word ('supported' or 'not_supported') based on the strict rules above:"""
|
| 54 |
+
|
| 55 |
+
# -----------------------------
|
| 56 |
+
# VERIFICATION LOGIC (UNSLOTH VERSION)
|
| 57 |
+
# -----------------------------
|
| 58 |
+
def check_support(text: str, subclaim: str, error_log=None) -> str:
|
| 59 |
+
if not text or not subclaim:
|
| 60 |
+
return "not_supported"
|
| 61 |
+
|
| 62 |
+
prompt_content = inference_prompt(text, subclaim)
|
| 63 |
+
|
| 64 |
+
# Format for Chat Template (assuming Qwen3 uses IM_START/IM_END)
|
| 65 |
+
messages = [{"role": "user", "content": prompt_content}]
|
| 66 |
+
inputs = tokenizer.apply_chat_template(
|
| 67 |
+
messages,
|
| 68 |
+
tokenize = True,
|
| 69 |
+
add_generation_prompt = True,
|
| 70 |
+
return_tensors = "pt",
|
| 71 |
+
).to("cuda")
|
| 72 |
+
|
| 73 |
+
try:
|
| 74 |
+
# Inference using the same parameters as your API call
|
| 75 |
+
outputs = model.generate(
|
| 76 |
+
input_ids = inputs,
|
| 77 |
+
max_new_tokens = 512, # Kept from your max_tokens=512
|
| 78 |
+
temperature = 0.1, # Kept from your temperature=0.1
|
| 79 |
+
use_cache = True,
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
# Extract response and handle thinking tokens if present
|
| 83 |
+
res = tokenizer.batch_decode(outputs[:, inputs.shape[1]:], skip_special_tokens=True)[0]
|
| 84 |
+
res = res.strip().lower()
|
| 85 |
+
|
| 86 |
+
if "</think>" in res:
|
| 87 |
+
res = res.split("</think>")[1].strip().lower()
|
| 88 |
+
|
| 89 |
+
if "not_supported" in res:
|
| 90 |
+
return "not_supported"
|
| 91 |
+
elif "supported" in res:
|
| 92 |
+
return "supported"
|
| 93 |
+
elif "refuted" in res:
|
| 94 |
+
return "refuted"
|
| 95 |
+
else:
|
| 96 |
+
return "not_supported"
|
| 97 |
+
|
| 98 |
+
except Exception as e:
|
| 99 |
+
if error_log is not None:
|
| 100 |
+
error_details = {"subclaim": subclaim, "error_msg": str(e), "type": "LOCAL_INFERENCE_ERROR"}
|
| 101 |
+
error_log.append(error_details)
|
| 102 |
+
return "not_supported"
|
| 103 |
+
|
| 104 |
+
# -----------------------------
|
| 105 |
+
# MAIN (Processing logic remains largely identical)
|
| 106 |
+
# -----------------------------
|
| 107 |
+
if __name__ == "__main__":
|
| 108 |
+
parser = argparse.ArgumentParser()
|
| 109 |
+
parser.add_argument("--input_file", type=str,
|
| 110 |
+
default="/home/mshahidul/readctrl/data/model_validity_check/subclaims_support_validity_check_gt_gpt5(1-5).json")
|
| 111 |
+
parser.add_argument("--save_folder", type=str,
|
| 112 |
+
default="/home/mshahidul/readctrl/data/concise_complete_attr_testing")
|
| 113 |
+
parser.add_argument("--start_index", type=int, default=0)
|
| 114 |
+
parser.add_argument("--end_index", type=int, default=-1)
|
| 115 |
+
|
| 116 |
+
args = parser.parse_args()
|
| 117 |
+
|
| 118 |
+
INPUT_FILE = args.input_file
|
| 119 |
+
SAVE_FOLDER = args.save_folder
|
| 120 |
+
os.makedirs(SAVE_FOLDER, exist_ok=True)
|
| 121 |
+
|
| 122 |
+
with open(INPUT_FILE, "r") as f:
|
| 123 |
+
all_data = json.load(f)
|
| 124 |
+
|
| 125 |
+
total_len = len(all_data)
|
| 126 |
+
start = args.start_index
|
| 127 |
+
end = args.end_index if args.end_index != -1 else total_len
|
| 128 |
+
data_slice = all_data[start:min(end, total_len)]
|
| 129 |
+
|
| 130 |
+
OUTPUT_FILE = os.path.join(SAVE_FOLDER, f"evaluated_metrics_{start}_{end}_nemotran-30B.json")
|
| 131 |
+
|
| 132 |
+
processed_results = []
|
| 133 |
+
if os.path.exists(OUTPUT_FILE):
|
| 134 |
+
try:
|
| 135 |
+
with open(OUTPUT_FILE, "r") as f:
|
| 136 |
+
processed_results = json.load(f)
|
| 137 |
+
except:
|
| 138 |
+
processed_results = []
|
| 139 |
+
|
| 140 |
+
processed_ids = {item['medical_text'] for item in processed_results}
|
| 141 |
+
global_error_log = []
|
| 142 |
+
|
| 143 |
+
pbar = tqdm.tqdm(data_slice)
|
| 144 |
+
|
| 145 |
+
for item in pbar:
|
| 146 |
+
text = item.get('full_text', '')
|
| 147 |
+
if text in processed_ids: continue # Simple skip logic for resume
|
| 148 |
+
|
| 149 |
+
subclaims = item.get('dat', {}).get('dat', [])
|
| 150 |
+
|
| 151 |
+
for subclaim_obj in subclaims:
|
| 152 |
+
subclaim_text = subclaim_obj.get('subclaim', '')
|
| 153 |
+
label_gt = subclaim_obj.get('status', 'not_supported').strip().lower()
|
| 154 |
+
|
| 155 |
+
label_gen = check_support(text, subclaim_text, error_log=global_error_log)
|
| 156 |
+
|
| 157 |
+
correctness = (label_gen == label_gt)
|
| 158 |
+
|
| 159 |
+
result_entry = {
|
| 160 |
+
"medical_text": text,
|
| 161 |
+
"subclaim": subclaim_text,
|
| 162 |
+
"label_gt": label_gt,
|
| 163 |
+
"label_gen": label_gen,
|
| 164 |
+
"correctness": correctness
|
| 165 |
+
}
|
| 166 |
+
processed_results.append(result_entry)
|
| 167 |
+
|
| 168 |
+
# Intermediate Save
|
| 169 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 170 |
+
json.dump(processed_results, f, indent=2, ensure_ascii=False)
|
| 171 |
+
|
| 172 |
+
# Final Save
|
| 173 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 174 |
+
json.dump(processed_results, f, indent=2, ensure_ascii=False)
|
code/finetune-inference/old/prompt_generate.py
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ALL_PROMPTS = {
|
| 2 |
+
"en": {
|
| 3 |
+
"B1": """You are a summarization assistant. Your single most important goal is to rewrite medical text for a first-grade reading level (ages 5-7, FKGL 1.0-4.0). Simplicity is more important than detail.
|
| 4 |
+
|
| 5 |
+
Core Mandate:
|
| 6 |
+
- TARGET AUDIENCE: A 6-year-old child.
|
| 7 |
+
- PRIMARY GOAL: Extreme simplicity. If you must choose between accuracy of detail and simplicity, ALWAYS choose simplicity.
|
| 8 |
+
|
| 9 |
+
Strict Rules You Must Follow:
|
| 10 |
+
- SENTENCE LENGTH: Keep almost all sentences under 10 words. Use very short, simple sentences.
|
| 11 |
+
- VOCABULARY: Use only very common, everyday words that a first-grader would know. Avoid any medical or scientific terms. Instead of 'femur', say 'thigh bone'. Instead of 'benign', say 'not harmful'.
|
| 12 |
+
- TONE: Be very gentle, calm, and reassuring. Like a kind doctor explaining something to a small child.
|
| 13 |
+
- STRUCTURE: Use short paragraphs, often just one or two sentences long.
|
| 14 |
+
- FOCUS: Only mention the most important one or two points from the original text. Omit all other details.
|
| 15 |
+
|
| 16 |
+
- Never use emojis.
|
| 17 |
+
- Do not explain pronunciation.
|
| 18 |
+
- DO NOT use any medical jargon.
|
| 19 |
+
""",
|
| 20 |
+
"B2": """You are a summarization assistant trained to rewrite medical summaries for a middle school reading level (ages 11–14, FKGL 6.0–9.0). Your goal is clarity for a teenager with a basic understanding of biology.
|
| 21 |
+
|
| 22 |
+
Core Mandate:
|
| 23 |
+
- TARGET AUDIENCE: A 14-year-old in a 9th-grade biology class.
|
| 24 |
+
- PRIMARY GOAL: Clarity and straightforward explanation.
|
| 25 |
+
|
| 26 |
+
Strict Rules You Must Follow:
|
| 27 |
+
- SENTENCE LENGTH: Vary sentence length, but aim for an average of 12-18 words. Avoid long, complex sentences.
|
| 28 |
+
- VOCABULARY: You can use basic medical terms (e.g., 'biopsy', 'cells', 'tumor'), but you MUST explain them in simple terms immediately. For example: "A biopsy, which is when a small piece of tissue is taken for testing...".
|
| 29 |
+
- TONE: Be empathetic but direct. Use an educational and informative tone, like a science teacher.
|
| 30 |
+
- STRUCTURE: Organize the summary into logical paragraphs. You can use simple headings if it helps clarity (e.g., "What They Found," "What It Means").
|
| 31 |
+
- FOCUS: Summarize the main findings and their implications. Omit minor or highly technical details.
|
| 32 |
+
|
| 33 |
+
- Never use emojis.
|
| 34 |
+
- Do not explain pronunciation.
|
| 35 |
+
""",
|
| 36 |
+
"B3": """You are a summarization assistant trained to rewrite medical summaries for an educated, non-medical adult (ages 17+, FKGL 12.0+). Your goal is to be precise, comprehensive, and clear for a college-level reader.
|
| 37 |
+
|
| 38 |
+
Core Mandate:
|
| 39 |
+
- TARGET AUDIENCE: A curious college student or adult with no medical training.
|
| 40 |
+
- PRIMARY GOAL: Precision and structured clarity.
|
| 41 |
+
|
| 42 |
+
Strict Rules You Must Follow:
|
| 43 |
+
- SENTENCE LENGTH: Use clear, well-constructed sentences. Complex sentences are acceptable if they enhance clarity and precision.
|
| 44 |
+
- VOCABULARY: Use correct medical terminology. You can assume the reader can understand terms from context or look them up, but for very specialized terms, provide a brief parenthetical explanation. For example: "...showed evidence of hyperplasia (an increase in the number of cells)."
|
| 45 |
+
- TONE: Maintain a professional, empathetic, and respectful tone. Be authoritative but not clinical or cold.
|
| 46 |
+
- STRUCTURE: Provide a detailed and structured summary. Use headings to organize information, such as "Background," "Key Findings," "Clinical Interpretation," and "Next Steps."
|
| 47 |
+
- FOCUS: Be comprehensive and faithful to the source summary. Include important details, test results, and differential diagnoses mentioned in the source.
|
| 48 |
+
|
| 49 |
+
- Never use emojis.
|
| 50 |
+
- Do not explain pronunciation.
|
| 51 |
+
"""
|
| 52 |
+
},
|
| 53 |
+
"es": {
|
| 54 |
+
"B1": """Eres un asistente de resumen. Tu único y más importante objetivo es reescribir texto médico para un nivel de lectura de primer grado (edades 5-7). La simplicidad es más importante que el detalle.
|
| 55 |
+
|
| 56 |
+
Mandato Principal:
|
| 57 |
+
- PÚBLICO OBJETIVO: Un niño de 6 años.
|
| 58 |
+
- OBJETIVO PRIMARIO: Simplicidad extrema. Si debes elegir entre la precisión del detalle y la simplicidad, SIEMPRE elige la simplicidad.
|
| 59 |
+
|
| 60 |
+
Reglas Estrictas que Debes Seguir:
|
| 61 |
+
- IDIOMA: El resumen DEBE estar escrito en español.
|
| 62 |
+
- LONGITUD DE LA ORACIÓN: Casi todas las oraciones deben tener menos de 10 palabras. Usa frases muy cortas y simples.
|
| 63 |
+
- VOCABULARIO: Usa solo palabras cotidianas y muy comunes que un niño de primer grado conocería. Evita cualquier término médico o científico. En lugar de 'fémur', di 'hueso del muslo'. En lugar de 'benigno', di 'que no es dañino'.
|
| 64 |
+
- TONO: Sé muy gentil, calmado y tranquilizador. Como un doctor amable explicándole algo a un niño pequeño.
|
| 65 |
+
- ESTRUCTURA: Usa párrafos cortos, a menudo de solo una o dos oraciones.
|
| 66 |
+
- ENFOQUE: Menciona solo el punto más importante o los dos puntos más importantes del texto original. Omite todos los demás detalles.
|
| 67 |
+
|
| 68 |
+
- Nunca uses emojis.
|
| 69 |
+
- No expliques la pronunciación.
|
| 70 |
+
- NO uses jerga médica.
|
| 71 |
+
""",
|
| 72 |
+
"B2": """Eres un asistente de resumen entrenado para reescribir resúmenes médicos para un nivel de lectura de secundaria (edades 11–14). Tu objetivo es la claridad para un adolescente con conocimientos básicos de biología.
|
| 73 |
+
|
| 74 |
+
Mandato Principal:
|
| 75 |
+
- PÚBLICO OBJETIVO: Un estudiante de 14 años en una clase de biología de secundaria.
|
| 76 |
+
- OBJETIVO PRIMARIO: Claridad y explicación directa.
|
| 77 |
+
|
| 78 |
+
Reglas Estrictas que Debes Seguir:
|
| 79 |
+
- IDIOMA: El resumen DEBE estar escrito en español.
|
| 80 |
+
- LONGITUD DE LA ORACIÓN: Varía la longitud de las oraciones, pero busca un promedio de 12-18 palabras. Evita las oraciones largas y complejas.
|
| 81 |
+
- VOCABULARIO: Puedes usar términos médicos básicos (ej., 'biopsia', 'células', 'tumor'), pero DEBES explicarlos en términos sencillos inmediatamente. Por ejemplo: "Una biopsia, que es cuando se toma un pequeño trozo de tejido para analizarlo...".
|
| 82 |
+
- TONO: Sé empático pero directo. Usa un tono educativo e informativo, como un profesor de ciencias.
|
| 83 |
+
- ESTRUCTURA: Organiza el resumen en párrafos lógicos. Puedes usar encabezados simples si ayuda a la claridad (ej., "Lo que Encontraron," "Qué Significa").
|
| 84 |
+
- ENFOQUE: Resume los hallazgos principales y sus implicaciones. Omite detalles menores o muy técnicos.
|
| 85 |
+
|
| 86 |
+
- Nunca uses emojis.
|
| 87 |
+
- No expliques la pronunciación.
|
| 88 |
+
""",
|
| 89 |
+
"B3": """Eres un asistente de resumen entrenado para reescribir resúmenes médicos para un adulto educado no médico (edades 17+). Tu objetivo es ser preciso, completo y claro para un lector de nivel universitario.
|
| 90 |
+
|
| 91 |
+
Mandato Principal:
|
| 92 |
+
- PÚBLICO OBJETIVO: Un estudiante universitario o un adulto curioso sin formación médica.
|
| 93 |
+
- OBJETIVO PRIMARIO: Precisión y claridad estructurada.
|
| 94 |
+
|
| 95 |
+
Reglas Estrictas que Debes Seguir:
|
| 96 |
+
- IDIOMA: El resumen DEBE estar escrito en español.
|
| 97 |
+
- LONGITUD DE LA ORACIÓN: Usa oraciones claras y bien construidas. Las oraciones complejas son aceptables si mejoran la claridad y la precisión.
|
| 98 |
+
- VOCABULARIO: Usa la terminología médica correcta. Puedes asumir que el lector puede entender los términos por el contexto o buscarlos, pero para términos muy especializados, proporciona una breve explicación entre paréntesis. Por ejemplo: "...mostró evidencia de hiperplasia (un aumento en el número de células)."
|
| 99 |
+
- TONO: Mantén un tono profesional, empático y respetuoso. Sé autoritario pero no clínico o frío.
|
| 100 |
+
- ESTRUCTURA: Proporciona un resumen detallado y estructurado. Usa encabezados para organizar la información, como "Contexto," "Hallazgos Clave," "Interpretación Clínica," y "Próximos Pasos."
|
| 101 |
+
- ENFOQUE: Sé completo y fiel al resumen original. Incluye detalles importantes, resultados de pruebas y diagnósticos diferenciales mencionados en la fuente.
|
| 102 |
+
|
| 103 |
+
- Nunca uses emojis.
|
| 104 |
+
- No expliques la pronunciación.
|
| 105 |
+
"""
|
| 106 |
+
},
|
| 107 |
+
"fr": {
|
| 108 |
+
"B1": """Vous êtes un assistant de résumé. Votre unique et plus important objectif est de réécrire un texte médical pour un niveau de lecture de cours préparatoire (âges 5-7). La simplicité est plus importante que le détail.
|
| 109 |
+
|
| 110 |
+
Mandat Principal :
|
| 111 |
+
- PUBLIC CIBLE : Un enfant de 6 ans.
|
| 112 |
+
- OBJECTIF PRINCIPAL : Simplicité extrême. Si vous devez choisir entre la précision des détails et la simplicité, choisissez TOUJOURS la simplicité.
|
| 113 |
+
|
| 114 |
+
Règles Strictes à Suivre Impérativement :
|
| 115 |
+
- LANGUE : Le résumé DOIT être rédigé en français.
|
| 116 |
+
- LONGUEUR DES PHRASES : Presque toutes les phrases doivent faire moins de 10 mots. Utilisez des phrases très courtes et simples.
|
| 117 |
+
- VOCABULAIRE : Utilisez uniquement des mots très courants et quotidiens qu'un enfant de cet âge connaîtrait. Évitez tout terme médical ou scientifique. Au lieu de 'fémur', dites 'l'os de la cuisse'. Au lieu de 'bénin', dites 'pas dangereux'.
|
| 118 |
+
- TON : Soyez très doux, calme et rassurant. Comme un médecin bienveillant qui explique quelque chose à un jeune enfant.
|
| 119 |
+
- STRUCTURE : Utilisez des paragraphes courts, souvent composés d'une ou deux phrases seulement.
|
| 120 |
+
- ENFOQUE : Mentionnez uniquement le ou les deux points les plus importants du texte original. Omettez tous les autres détails.
|
| 121 |
+
|
| 122 |
+
- N'utilisez jamais d'emojis.
|
| 123 |
+
- N'expliquez pas la prononciation.
|
| 124 |
+
- N'utilisez AUCUN jargon médical.
|
| 125 |
+
""",
|
| 126 |
+
"B2": """Vous êtes un assistant de résumé entraîné à réécrire des résumés médicaux pour un niveau de lecture de collège (âges 11–14). Votre objectif est la clarté pour un adolescent ayant une compréhension de base de la biologie.
|
| 127 |
+
|
| 128 |
+
Mandat Principal :
|
| 129 |
+
- PUBLIC CIBLE : Un adolescent de 14 ans en classe de biologie au collège.
|
| 130 |
+
- OBJECTIF PRINCIPAL : Clarté et explication directe.
|
| 131 |
+
|
| 132 |
+
Règles Strictes à Suivre Impérativement :
|
| 133 |
+
- LANGUE : Le résumé DOIT être rédigé en français.
|
| 134 |
+
- LONGUEUR DES PHRASES : Variez la longueur des phrases, mais visez une moyenne de 12-18 mots. Évitez les phrases longues et complexes.
|
| 135 |
+
- VOCABULAIRE : Vous pouvez utiliser des termes médicaux de base (ex: 'biopsie', 'cellules', 'tumeur'), mais vous DEVEZ les expliquer en termes simples immédiatement. Par exemple : "Une biopsie, c'est-à-dire quand on prélève un petit morceau de tissu pour l'analyser...".
|
| 136 |
+
- TON : Soyez empathique mais direct. Adoptez un ton pédagogique et informatif, comme un professeur de sciences.
|
| 137 |
+
- STRUCTURE : Organisez le résumé en paragraphes logiques. Vous pouvez utiliser des titres simples si cela améliore la clarté (ex: "Ce qu'ils ont trouvé", "Ce que cela signifie").
|
| 138 |
+
- ENFOQUE : Résumez les principales observations et leurs implications. Omettez les détails mineurs ou très techniques.
|
| 139 |
+
|
| 140 |
+
- N'utilisez jamais d'emojis.
|
| 141 |
+
- N'expliquez pas la prononciation.
|
| 142 |
+
""",
|
| 143 |
+
"B3": """Vous êtes un assistant de résumé entraîné à réécrire des résumés médicaux pour un adulte éduqué non-médecin (âges 17+). Votre objectif est d'être précis, complet et clair pour un lecteur de niveau universitaire.
|
| 144 |
+
|
| 145 |
+
Mandat Principal :
|
| 146 |
+
- PUBLIC CIBLE : Un étudiant ou un adulte curieux sans formation médicale.
|
| 147 |
+
- OBJECTIF PRINCIPAL : Précision et clarté structurée.
|
| 148 |
+
|
| 149 |
+
Règles Strictes à Suivre Impérativement :
|
| 150 |
+
- LANGUE : Le résumé DOIT être rédigé en français.
|
| 151 |
+
- LONGUEUR DES PHRASES : Utilisez des phrases claires et bien construites. Les phrases complexes sont acceptables si elles améliorent la clarté et la précision.
|
| 152 |
+
- VOCABULAIRE : Utilisez la terminologie médicale correcte. Vous pouvez supposer que le lecteur peut comprendre les termes par le contexte ou les rechercher, mais pour les termes très spécialisés, fournissez une brève explication entre parenthèses. Par exemple : "...montrait des signes d'hyperplasie (une augmentation du nombre de cellules)."
|
| 153 |
+
- TON : Maintenez un ton professionnel, empathique et respectueux. Soyez directif mais ni clinique ni froid.
|
| 154 |
+
- STRUCTURE : Fournissez un résumé détaillé et structuré. Utilisez des titres pour organiser l'information, tels que "Contexte", "Principales Observations", "Interprétation Clinique" et "Prochaines Étapes".
|
| 155 |
+
- ENFOQUE : Soyez complet et fidèle au résumé source. Incluez les détails importants, les résultats des tests et les diagnostics différentiels mentionnés dans la source.
|
| 156 |
+
|
| 157 |
+
- N'utilisez jamais d'emojis.
|
| 158 |
+
- N'expliquez pas la prononciation.
|
| 159 |
+
"""
|
| 160 |
+
},
|
| 161 |
+
|
| 162 |
+
"pt": {
|
| 163 |
+
"B1": """Você é um assistente de resumo. O seu único e mais importante objetivo é reescrever textos médicos para um nível de leitura da primeira série (idades 5-7). A simplicidade é mais importante que os detalhes.
|
| 164 |
+
|
| 165 |
+
Mandato Principal:
|
| 166 |
+
- PÚBLICO-ALVO: Uma criança de 6 anos.
|
| 167 |
+
- OBJETIVO PRINCIPAL: Simplicidade extrema. Se tiver que escolher entre a precisão dos detalhes e a simplicidade, ESCOLHA SEMPRE a simplicidade.
|
| 168 |
+
|
| 169 |
+
Regras Rígidas que Você Deve Seguir:
|
| 170 |
+
- IDIOMA: O resumo DEVE ser escrito em português.
|
| 171 |
+
- COMPRIMENTO DAS FRASES: Quase todas as frases devem ter menos de 10 palavras. Use frases muito curtas e simples.
|
| 172 |
+
- VOCABULÁRIO: Use apenas palavras quotidianas e muito comuns que uma criança da primeira série conheceria. Evite qualquer termo médico ou científico. Em vez de 'fêmur', diga 'o osso da coxa'. Em vez de 'benigno', diga 'que não faz mal'.
|
| 173 |
+
- TOM: Seja muito gentil, calmo e tranquilizador. Como um médico amável a explicar algo a uma criança pequena.
|
| 174 |
+
- ESTRUTURA: Use parágrafos curtos, muitas vezes com apenas uma ou duas frases.
|
| 175 |
+
- FOCO: Mencione apenas um ou dois dos pontos mais importantes do texto original. Omita todos os outros detalhes.
|
| 176 |
+
|
| 177 |
+
- Nunca use emojis.
|
| 178 |
+
- Não explique a pronúncia.
|
| 179 |
+
- NÃO use NENHUM jargão médico.
|
| 180 |
+
""",
|
| 181 |
+
"B2": """Você é um assistente de resumo treinado para reescrever resumos médicos para um nível de leitura do ensino fundamental II (idades 11–14). O seu objetivo é a clareza para um adolescente com conhecimentos básicos de biologia.
|
| 182 |
+
|
| 183 |
+
Mandato Principal:
|
| 184 |
+
- PÚBLICO-ALVO: Um adolescente de 14 anos numa aula de biologia.
|
| 185 |
+
- OBJETIVO PRINCIPAL: Clareza e explicação direta.
|
| 186 |
+
|
| 187 |
+
Regras Rígidas que Você Deve Seguir:
|
| 188 |
+
- IDIOMA: O resumo DEVE ser escrito em português.
|
| 189 |
+
- COMPRIMENTO DAS FRASES: Varie o comprimento das frases, mas procure uma média de 12 a 18 palavras. Evite frases longas e complexas.
|
| 190 |
+
- VOCABULÁRIO: Pode usar termos médicos básicos (ex: 'biópsia', 'células', 'tumor'), mas você DEVE explicá-los em termos simples imediatamente. Por exemplo: "Uma biópsia, que é quando um pequeno pedaço de tecido é retirado para ser analisado...".
|
| 191 |
+
- TOM: Seja empático, mas direto. Use um tom educativo e informativo, como um professor de ciências.
|
| 192 |
+
- ESTRUTURA: Organize o resumo em parágrafos lógicos. Pode usar títulos simples se isso ajudar na clareza (ex: "O que eles encontraram", "O que isso significa").
|
| 193 |
+
- FOCO: Resuma os principais achados e as suas implicações. Omita detalhes menores ou muito técnicos.
|
| 194 |
+
|
| 195 |
+
- Nunca use emojis.
|
| 196 |
+
- Não explique a pronúncia.
|
| 197 |
+
""",
|
| 198 |
+
"B3": """Você é um assistente de resumo treinado para reescrever resumos médicos para um adulto instruído, mas sem formação médica (idades 17+). O seu objetivo é ser preciso, abrangente e claro para um leitor de nível universitário.
|
| 199 |
+
|
| 200 |
+
Mandato Principal:
|
| 201 |
+
- PÚBLICO-ALVO: Um estudante universitário ou adulto curioso sem formação médica.
|
| 202 |
+
- OBJETIVO PRINCIPAL: Precisão e clareza estruturada.
|
| 203 |
+
|
| 204 |
+
Regras Rígidas que Você Deve Seguir:
|
| 205 |
+
- IDIOMA: O resumo DEVE ser escrito em português.
|
| 206 |
+
- COMPRIMENTO DAS FRASES: Use frases claras e bem construídas. Frases complexas são aceitáveis se melhorarem a clareza e a precisão.
|
| 207 |
+
- VOCABULÁRIO: Use a terminologia médica correta. Pode assumir que o leitor consegue entender os termos pelo contexto ou pesquisá-los, mas para termos muito especializados, forneça uma breve explicação entre parênteses. Por exemplo: "...mostrou evidência de hiperplasia (um aumento no número de células)."
|
| 208 |
+
- TOM: Mantenha um tom profissional, empático e respeitoso. Seja confiante, mas não clínico ou frio.
|
| 209 |
+
- ESTRUTURA: Forneça um resumo detalhado e estruturado. Use títulos para organizar a informação, como "Contexto", "Principais Achados", "Interpretação Clínica" e "Próximos Passos".
|
| 210 |
+
- FOCO: Seja abrangente e fiel ao resumo original. Inclua detalhes importantes, resultados de testes e diagnósticos diferenciais mencionados na fonte.
|
| 211 |
+
|
| 212 |
+
- Nunca use emojis.
|
| 213 |
+
- Não explique a pronúncia.
|
| 214 |
+
"""
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
}
|
| 218 |
+
USER_PROMPT_TEMPLATES = {
|
| 219 |
+
"en": """Please rewrite the following expert summary for the specified target audience. Use the full article for context if needed.
|
| 220 |
+
**Full Article Context:**
|
| 221 |
+
{article}
|
| 222 |
+
**Expert Summary to Rewrite:**
|
| 223 |
+
{gold_summary}
|
| 224 |
+
""",
|
| 225 |
+
"es": """Por favor, reescribe el siguiente resumen de experto para el público objetivo especificado. Usa el artículo completo como contexto si es necesario.
|
| 226 |
+
**Contexto del Artículo Completo:**
|
| 227 |
+
{article}
|
| 228 |
+
**Resumen de Experto a Reescribir:**
|
| 229 |
+
{gold_summary}
|
| 230 |
+
""",
|
| 231 |
+
"fr": """Veuillez réécrire le résumé d'expert suivant pour le public cible spécifié. Utilisez l'article complet comme contexte si nécessaire.
|
| 232 |
+
**Contexte de l'Article Complet :**
|
| 233 |
+
{article}
|
| 234 |
+
**Résumé d'Expert à Réécrire :**
|
| 235 |
+
{gold_summary}
|
| 236 |
+
""",
|
| 237 |
+
"pt": """Por favor, reescreva o seguinte resumo de especialista para o público-alvo especificado. Use o artigo completo como contexto, se necessário.
|
| 238 |
+
**Contexto do Artigo Completo:**
|
| 239 |
+
{article}
|
| 240 |
+
**Resumo do Especialista a Ser Reescrito:**
|
| 241 |
+
{gold_summary}
|
| 242 |
+
"""
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
def generate_prompt(article, gold_summary, band, lang):
|
| 246 |
+
"""Call an OpenAI model to generate a synthetic summary for a given readability band and language."""
|
| 247 |
+
prompts_for_lang = ALL_PROMPTS.get(lang)
|
| 248 |
+
user_prompt_template = USER_PROMPT_TEMPLATES.get(lang)
|
| 249 |
+
if not prompts_for_lang or not user_prompt_template:
|
| 250 |
+
raise ValueError(f"No prompts available for language: {lang}")
|
| 251 |
+
|
| 252 |
+
system_prompt = prompts_for_lang[band]
|
| 253 |
+
user_prompt = user_prompt_template.format(article=article, gold_summary=gold_summary)
|
| 254 |
+
return system_prompt + "\n" + user_prompt
|
code/finetune-inference/old/statistics.ipynb
ADDED
|
@@ -0,0 +1,400 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": null,
|
| 6 |
+
"id": "1408eea5",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [],
|
| 9 |
+
"source": [
|
| 10 |
+
"import json\n",
|
| 11 |
+
"\n",
|
| 12 |
+
"with open('/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json', 'r') as f:\n",
|
| 13 |
+
" data_item = json.load(f)\n",
|
| 14 |
+
"data = []\n",
|
| 15 |
+
"for item in data_item:\n",
|
| 16 |
+
" attribution=item['attribution']['accuracy']\n",
|
| 17 |
+
" data.append(attribution)"
|
| 18 |
+
]
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"cell_type": "code",
|
| 22 |
+
"execution_count": null,
|
| 23 |
+
"id": "c706e713",
|
| 24 |
+
"metadata": {},
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"source": [
|
| 27 |
+
"import numpy as np\n",
|
| 28 |
+
"import pandas as pd\n",
|
| 29 |
+
"import seaborn as sns\n",
|
| 30 |
+
"import matplotlib.pyplot as plt\n",
|
| 31 |
+
"from scipy import stats\n",
|
| 32 |
+
"\n",
|
| 33 |
+
"# Example data list\n",
|
| 34 |
+
"# data = [12, 15, 14, 18, 19, 17, 21]\n",
|
| 35 |
+
"\n",
|
| 36 |
+
"# Convert to a pandas Series for convenience\n",
|
| 37 |
+
"s = pd.Series(data)\n",
|
| 38 |
+
"\n",
|
| 39 |
+
"# --- 1. Basic statistics ---\n",
|
| 40 |
+
"summary = s.describe()\n",
|
| 41 |
+
"print(\"Basic statistics:\")\n",
|
| 42 |
+
"print(summary)\n",
|
| 43 |
+
"\n",
|
| 44 |
+
"# Extra metrics\n",
|
| 45 |
+
"print(\"\\nAdditional info:\")\n",
|
| 46 |
+
"print(f\"Variance: {s.var():.2f}\")\n",
|
| 47 |
+
"print(f\"Skewness: {s.skew():.2f}\")\n",
|
| 48 |
+
"print(f\"Kurtosis: {s.kurt():.2f}\")\n",
|
| 49 |
+
"print(f\"Mode: {s.mode().tolist()}\")\n",
|
| 50 |
+
"\n",
|
| 51 |
+
"# --- 2. Visualization ---\n",
|
| 52 |
+
"plt.figure(figsize=(8, 5))\n",
|
| 53 |
+
"sns.histplot(s, bins=10, kde=True, color='skyblue', edgecolor='black')\n",
|
| 54 |
+
"plt.title(\"Distribution curve of data\")\n",
|
| 55 |
+
"plt.xlabel(\"Value\")\n",
|
| 56 |
+
"plt.ylabel(\"Frequency\")\n",
|
| 57 |
+
"plt.show()"
|
| 58 |
+
]
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"cell_type": "code",
|
| 62 |
+
"execution_count": null,
|
| 63 |
+
"id": "860aff4b",
|
| 64 |
+
"metadata": {},
|
| 65 |
+
"outputs": [],
|
| 66 |
+
"source": [
|
| 67 |
+
"import pandas as pd\n",
|
| 68 |
+
"\n",
|
| 69 |
+
"s = pd.Series(data) # sample data with an outlier\n",
|
| 70 |
+
"\n",
|
| 71 |
+
"# Compute IQR boundaries\n",
|
| 72 |
+
"Q1 = s.quantile(0.25)\n",
|
| 73 |
+
"Q3 = s.quantile(0.75)\n",
|
| 74 |
+
"IQR = Q3 - Q1\n",
|
| 75 |
+
"\n",
|
| 76 |
+
"lower_lim = Q1 - 1.5 * IQR\n",
|
| 77 |
+
"upper_lim = Q3 + 1.5 * IQR\n",
|
| 78 |
+
"\n",
|
| 79 |
+
"cleaned = s[(s >= lower_lim) & (s <= upper_lim)]\n",
|
| 80 |
+
"\n",
|
| 81 |
+
"print(\"Cleaned data:\")\n",
|
| 82 |
+
"print(len(cleaned.tolist()))\n",
|
| 83 |
+
"import seaborn as sns\n",
|
| 84 |
+
"import matplotlib.pyplot as plt\n",
|
| 85 |
+
"\n",
|
| 86 |
+
"sns.boxplot(x=s, color=\"lightblue\")\n",
|
| 87 |
+
"plt.title(\"Before cleaning\")\n",
|
| 88 |
+
"plt.show()\n",
|
| 89 |
+
"\n",
|
| 90 |
+
"sns.boxplot(x=cleaned, color=\"lightgreen\")\n",
|
| 91 |
+
"plt.title(\"After IQR cleaning\")\n",
|
| 92 |
+
"plt.show()"
|
| 93 |
+
]
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"cell_type": "code",
|
| 97 |
+
"execution_count": null,
|
| 98 |
+
"id": "4b1f16b3",
|
| 99 |
+
"metadata": {},
|
| 100 |
+
"outputs": [],
|
| 101 |
+
"source": [
|
| 102 |
+
"import numpy as np\n",
|
| 103 |
+
"from scipy import stats\n",
|
| 104 |
+
"\n",
|
| 105 |
+
"z_scores = np.abs(stats.zscore(s))\n",
|
| 106 |
+
"threshold = 3 # commonly used threshold\n",
|
| 107 |
+
"cleaned_z = s[z_scores < threshold]\n",
|
| 108 |
+
"print(len(cleaned_z.tolist()))\n",
|
| 109 |
+
"import seaborn as sns\n",
|
| 110 |
+
"import matplotlib.pyplot as plt\n",
|
| 111 |
+
"print(\"Cleaned data (Z-score method):\")\n",
|
| 112 |
+
"sns.boxplot(x=s, color=\"lightblue\")\n",
|
| 113 |
+
"plt.title(\"Before cleaning\")\n",
|
| 114 |
+
"plt.show()\n",
|
| 115 |
+
"\n",
|
| 116 |
+
"sns.boxplot(x=cleaned_z, color=\"lightgreen\")\n",
|
| 117 |
+
"plt.title(\"After Z-score cleaning\")\n",
|
| 118 |
+
"plt.show()"
|
| 119 |
+
]
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
"cell_type": "code",
|
| 123 |
+
"execution_count": null,
|
| 124 |
+
"id": "4394d44c",
|
| 125 |
+
"metadata": {},
|
| 126 |
+
"outputs": [],
|
| 127 |
+
"source": []
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
"cell_type": "code",
|
| 131 |
+
"execution_count": null,
|
| 132 |
+
"id": "8e24c8c2",
|
| 133 |
+
"metadata": {},
|
| 134 |
+
"outputs": [],
|
| 135 |
+
"source": []
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"cell_type": "code",
|
| 139 |
+
"execution_count": null,
|
| 140 |
+
"id": "f97f821e",
|
| 141 |
+
"metadata": {},
|
| 142 |
+
"outputs": [],
|
| 143 |
+
"source": [
|
| 144 |
+
"import json\n",
|
| 145 |
+
"import pandas as pd\n",
|
| 146 |
+
"import matplotlib.pyplot as plt\n",
|
| 147 |
+
"import seaborn as sns\n",
|
| 148 |
+
"\n",
|
| 149 |
+
"def analyze_doclens_results(file_path):\n",
|
| 150 |
+
" \"\"\"\n",
|
| 151 |
+
" Loads, parses, and analyzes the DOCLENS evaluation results from a JSON file.\n",
|
| 152 |
+
"\n",
|
| 153 |
+
" Args:\n",
|
| 154 |
+
" file_path (str): The path to the JSON results file.\n",
|
| 155 |
+
"\n",
|
| 156 |
+
" Returns:\n",
|
| 157 |
+
" pandas.DataFrame: A DataFrame with the aggregated mean scores.\n",
|
| 158 |
+
" \"\"\"\n",
|
| 159 |
+
" # Load the entire JSON file\n",
|
| 160 |
+
" try:\n",
|
| 161 |
+
" with open(file_path, 'r', encoding='utf-8') as f:\n",
|
| 162 |
+
" data = json.load(f)\n",
|
| 163 |
+
" except FileNotFoundError:\n",
|
| 164 |
+
" print(f\"Error: The file '{file_path}' was not found.\")\n",
|
| 165 |
+
" return None\n",
|
| 166 |
+
" except json.JSONDecodeError:\n",
|
| 167 |
+
" print(f\"Error: The file '{file_path}' is not a valid JSON file.\")\n",
|
| 168 |
+
" return None\n",
|
| 169 |
+
"\n",
|
| 170 |
+
" # Parse the nested data into a flat list of dictionaries\n",
|
| 171 |
+
" parsed_data = []\n",
|
| 172 |
+
" for record in data:\n",
|
| 173 |
+
" record_id = record.get(\"id\")\n",
|
| 174 |
+
" version = record.get(\"version\")\n",
|
| 175 |
+
" \n",
|
| 176 |
+
" # Extract accuracy scores safely\n",
|
| 177 |
+
" completeness_acc = record.get(\"completeness\", {}).get(\"accuracy\", 0)\n",
|
| 178 |
+
" conciseness_acc = record.get(\"conciseness\", {}).get(\"accuracy\", 0)\n",
|
| 179 |
+
" attribution_acc = record.get(\"attribution\", {}).get(\"accuracy\", 0)\n",
|
| 180 |
+
"\n",
|
| 181 |
+
" parsed_data.append({\n",
|
| 182 |
+
" \"id\": record_id,\n",
|
| 183 |
+
" \"version\": version,\n",
|
| 184 |
+
" \"completeness\": completeness_acc,\n",
|
| 185 |
+
" \"conciseness\": conciseness_acc,\n",
|
| 186 |
+
" \"attribution\": attribution_acc\n",
|
| 187 |
+
" })\n",
|
| 188 |
+
"\n",
|
| 189 |
+
" # Create a pandas DataFrame\n",
|
| 190 |
+
" df = pd.DataFrame(parsed_data)\n",
|
| 191 |
+
"\n",
|
| 192 |
+
" # Calculate the mean scores for each version\n",
|
| 193 |
+
" # The order is specified to ensure 'easy', 'intermediate', 'hard' are plotted correctly\n",
|
| 194 |
+
" version_order = ['easy', 'intermediate', 'hard']\n",
|
| 195 |
+
" df['version'] = pd.Categorical(df['version'], categories=version_order, ordered=True)\n",
|
| 196 |
+
" \n",
|
| 197 |
+
" agg_results = df.groupby('version')[['completeness', 'conciseness', 'attribution']].mean().reset_index()\n",
|
| 198 |
+
"\n",
|
| 199 |
+
" print(\"--- Aggregated Mean Scores ---\")\n",
|
| 200 |
+
" print(agg_results.to_string(index=False))\n",
|
| 201 |
+
" \n",
|
| 202 |
+
" return agg_results\n",
|
| 203 |
+
"\n",
|
| 204 |
+
"def visualize_results(df):\n",
|
| 205 |
+
" \"\"\"\n",
|
| 206 |
+
" Generates and saves bar charts to visualize the aggregated results.\n",
|
| 207 |
+
" \"\"\"\n",
|
| 208 |
+
" if df is None or df.empty:\n",
|
| 209 |
+
" print(\"Cannot visualize results. DataFrame is empty.\")\n",
|
| 210 |
+
" return\n",
|
| 211 |
+
"\n",
|
| 212 |
+
" sns.set_style(\"whitegrid\")\n",
|
| 213 |
+
" fig, axes = plt.subplots(1, 3, figsize=(18, 6), sharey=True)\n",
|
| 214 |
+
" fig.suptitle('Average Evaluation Metrics Across Summary Versions', fontsize=16)\n",
|
| 215 |
+
"\n",
|
| 216 |
+
" # Plot Completeness\n",
|
| 217 |
+
" sns.barplot(ax=axes[0], x='version', y='completeness', data=df, palette='Blues_d')\n",
|
| 218 |
+
" axes[0].set_title('Completeness (Claim Recall)')\n",
|
| 219 |
+
" axes[0].set_xlabel('Summary Version')\n",
|
| 220 |
+
" axes[0].set_ylabel('Average Accuracy (%)')\n",
|
| 221 |
+
"\n",
|
| 222 |
+
" # Plot Conciseness\n",
|
| 223 |
+
" sns.barplot(ax=axes[1], x='version', y='conciseness', data=df, palette='Greens_d')\n",
|
| 224 |
+
" axes[1].set_title('Conciseness (Claim Precision)')\n",
|
| 225 |
+
" axes[1].set_xlabel('Summary Version')\n",
|
| 226 |
+
" axes[1].set_ylabel('')\n",
|
| 227 |
+
"\n",
|
| 228 |
+
" # Plot Attribution\n",
|
| 229 |
+
" sns.barplot(ax=axes[2], x='version', y='attribution', data=df, palette='Oranges_d')\n",
|
| 230 |
+
" axes[2].set_title('Attribution')\n",
|
| 231 |
+
" axes[2].set_xlabel('Summary Version')\n",
|
| 232 |
+
" axes[2].set_ylabel('')\n",
|
| 233 |
+
" \n",
|
| 234 |
+
" # Improve layout and save the figure\n",
|
| 235 |
+
" plt.tight_layout(rect=[0, 0, 1, 0.96])\n",
|
| 236 |
+
" plt.savefig(\"doclens_evaluation_summary.png\", dpi=300)\n",
|
| 237 |
+
" print(\"\\nChart saved as 'doclens_evaluation_summary.png'\")\n",
|
| 238 |
+
" plt.show()\n",
|
| 239 |
+
"\n",
|
| 240 |
+
"\n",
|
| 241 |
+
"# --- Main Execution ---\n",
|
| 242 |
+
"# Replace 'your_results_file.json' with the actual path to your file\n",
|
| 243 |
+
"results_file = '/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json' \n",
|
| 244 |
+
"aggregated_data = analyze_doclens_results(results_file)\n",
|
| 245 |
+
"\n",
|
| 246 |
+
"if aggregated_data is not None:\n",
|
| 247 |
+
" visualize_results(aggregated_data)"
|
| 248 |
+
]
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"cell_type": "markdown",
|
| 252 |
+
"id": "b5afb981",
|
| 253 |
+
"metadata": {},
|
| 254 |
+
"source": [
|
| 255 |
+
"## Eliminate dataset"
|
| 256 |
+
]
|
| 257 |
+
},
|
| 258 |
+
{
|
| 259 |
+
"cell_type": "code",
|
| 260 |
+
"execution_count": 18,
|
| 261 |
+
"id": "b29bcf30",
|
| 262 |
+
"metadata": {},
|
| 263 |
+
"outputs": [
|
| 264 |
+
{
|
| 265 |
+
"name": "stdout",
|
| 266 |
+
"output_type": "stream",
|
| 267 |
+
"text": [
|
| 268 |
+
"Rejected 15 items due to low attribution.\n",
|
| 269 |
+
"Rejected 9 additional items due to incorrect completeness trend.\n",
|
| 270 |
+
"\n",
|
| 271 |
+
"--- Filtering Summary ---\n",
|
| 272 |
+
"Total unique items analyzed: 100\n",
|
| 273 |
+
"Items kept (High Quality): 76\n",
|
| 274 |
+
"Items rejected (Low Quality): 24\n",
|
| 275 |
+
"Saved data to '/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B_clean.json'\n",
|
| 276 |
+
"Saved data to '/home/mshahidul/readctrl/results/dataset_quality_check/rejected_dataset.json'\n"
|
| 277 |
+
]
|
| 278 |
+
}
|
| 279 |
+
],
|
| 280 |
+
"source": [
|
| 281 |
+
"import json\n",
|
| 282 |
+
"import pandas as pd\n",
|
| 283 |
+
"\n",
|
| 284 |
+
"def filter_low_quality_data(file_path, attribution_threshold=80.0, completeness_trend_check=True):\n",
|
| 285 |
+
" \"\"\"\n",
|
| 286 |
+
" Loads DOCLENS results, filters out low-quality data, and returns clean/rejected data.\n",
|
| 287 |
+
" \"\"\"\n",
|
| 288 |
+
" try:\n",
|
| 289 |
+
" with open(file_path, 'r', encoding='utf-8') as f:\n",
|
| 290 |
+
" data = json.load(f)\n",
|
| 291 |
+
" except (FileNotFoundError, json.JSONDecodeError) as e:\n",
|
| 292 |
+
" print(f\"Error loading file: {e}\")\n",
|
| 293 |
+
" return None, None\n",
|
| 294 |
+
"\n",
|
| 295 |
+
" # --- FIX: Parse the nested JSON to extract numeric accuracy scores ---\n",
|
| 296 |
+
" # Create a flat list of dictionaries instead of a list of nested objects\n",
|
| 297 |
+
" parsed_data = []\n",
|
| 298 |
+
" for record in data:\n",
|
| 299 |
+
" parsed_data.append({\n",
|
| 300 |
+
" \"id\": record.get(\"id\"),\n",
|
| 301 |
+
" \"version\": record.get(\"version\"),\n",
|
| 302 |
+
" \"completeness\": record.get(\"completeness\", {}).get(\"accuracy\", 0),\n",
|
| 303 |
+
" \"conciseness\": record.get(\"conciseness\", {}).get(\"accuracy\", 0),\n",
|
| 304 |
+
" \"attribution\": record.get(\"attribution\", {}).get(\"accuracy\", 0)\n",
|
| 305 |
+
" })\n",
|
| 306 |
+
"\n",
|
| 307 |
+
" # Create DataFrame from the *parsed* data\n",
|
| 308 |
+
" df = pd.DataFrame(parsed_data)\n",
|
| 309 |
+
" # --------------------------------------------------------------------\n",
|
| 310 |
+
" \n",
|
| 311 |
+
" all_ids = set(df['id'].unique())\n",
|
| 312 |
+
" rejected_ids = set()\n",
|
| 313 |
+
"\n",
|
| 314 |
+
" # --- Pivot data for easier comparison across versions ---\n",
|
| 315 |
+
" # This part now works correctly because the columns are numeric\n",
|
| 316 |
+
" pivot_df = df.pivot_table(\n",
|
| 317 |
+
" index='id',\n",
|
| 318 |
+
" columns='version',\n",
|
| 319 |
+
" values=['completeness', 'conciseness', 'attribution']\n",
|
| 320 |
+
" )\n",
|
| 321 |
+
" pivot_df.columns = ['_'.join(map(str, col)).strip() for col in pivot_df.columns.values]\n",
|
| 322 |
+
" \n",
|
| 323 |
+
" # --- Filter 1: Low Attribution ---\n",
|
| 324 |
+
" low_attribution_mask = (pivot_df['attribution_easy'] < attribution_threshold) | \\\n",
|
| 325 |
+
" (pivot_df['attribution_intermediate'] < attribution_threshold) | \\\n",
|
| 326 |
+
" (pivot_df['attribution_hard'] < attribution_threshold)\n",
|
| 327 |
+
" rejected_attribution_ids = pivot_df[low_attribution_mask].index\n",
|
| 328 |
+
" rejected_ids.update(rejected_attribution_ids)\n",
|
| 329 |
+
" print(f\"Rejected {len(rejected_attribution_ids)} items due to low attribution.\")\n",
|
| 330 |
+
"\n",
|
| 331 |
+
" # --- Filter 2: Incorrect Completeness Trend ---\n",
|
| 332 |
+
" if completeness_trend_check:\n",
|
| 333 |
+
" bad_trend_mask = pivot_df['completeness_easy'] > pivot_df['completeness_hard']\n",
|
| 334 |
+
" rejected_trend_ids = pivot_df[bad_trend_mask].index\n",
|
| 335 |
+
" newly_rejected_count = len(rejected_trend_ids.difference(rejected_ids))\n",
|
| 336 |
+
" rejected_ids.update(rejected_trend_ids)\n",
|
| 337 |
+
" print(f\"Rejected {newly_rejected_count} additional items due to incorrect completeness trend.\")\n",
|
| 338 |
+
"\n",
|
| 339 |
+
" # --- Separate the data ---\n",
|
| 340 |
+
" clean_ids = all_ids - rejected_ids\n",
|
| 341 |
+
" \n",
|
| 342 |
+
" # We need to filter the original 'data' list, not the parsed one, to keep the full structure\n",
|
| 343 |
+
" original_df = pd.DataFrame(data)\n",
|
| 344 |
+
" clean_data = original_df[original_df['id'].isin(clean_ids)].to_dict('records')\n",
|
| 345 |
+
" rejected_data = original_df[original_df['id'].isin(rejected_ids)].to_dict('records')\n",
|
| 346 |
+
" \n",
|
| 347 |
+
" print(\"\\n--- Filtering Summary ---\")\n",
|
| 348 |
+
" print(f\"Total unique items analyzed: {len(all_ids)}\")\n",
|
| 349 |
+
" print(f\"Items kept (High Quality): {len(clean_ids)}\")\n",
|
| 350 |
+
" print(f\"Items rejected (Low Quality): {len(rejected_ids)}\")\n",
|
| 351 |
+
" \n",
|
| 352 |
+
" return clean_data, rejected_data\n",
|
| 353 |
+
"\n",
|
| 354 |
+
"def save_json(data, file_path):\n",
|
| 355 |
+
" \"\"\"Saves data to a JSON file.\"\"\"\n",
|
| 356 |
+
" with open(file_path, 'w', encoding='utf-8') as f:\n",
|
| 357 |
+
" json.dump(data, f, indent=4, ensure_ascii=False)\n",
|
| 358 |
+
" print(f\"Saved data to '{file_path}'\")\n",
|
| 359 |
+
"\n",
|
| 360 |
+
"\n",
|
| 361 |
+
"# --- Main Execution ---\n",
|
| 362 |
+
"# Replace with your file paths and desired thresholds\n",
|
| 363 |
+
"RESULTS_FILE = '/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json' # Make sure this points to your file\n",
|
| 364 |
+
"# CLEAN_FILE_PATH = '/home/mshahidul/readctrl/results/dataset_quality_check/high_quality_dataset.json'\n",
|
| 365 |
+
"# REJECTED_FILE_PATH = '/home/mshahidul/readctrl/results/dataset_quality_check/rejected_dataset.json'\n",
|
| 366 |
+
"ATTRIBUTION_THRESHOLD = 80.0\n",
|
| 367 |
+
"\n",
|
| 368 |
+
"clean_dataset, rejected_dataset = filter_low_quality_data(\n",
|
| 369 |
+
" RESULTS_FILE,\n",
|
| 370 |
+
" attribution_threshold=ATTRIBUTION_THRESHOLD\n",
|
| 371 |
+
")\n",
|
| 372 |
+
"\n",
|
| 373 |
+
"if clean_dataset is not None:\n",
|
| 374 |
+
" save_json(clean_dataset, '/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B_clean.json')\n",
|
| 375 |
+
" save_json(rejected_dataset, '/home/mshahidul/readctrl/results/dataset_quality_check/rejected_dataset.json')"
|
| 376 |
+
]
|
| 377 |
+
}
|
| 378 |
+
],
|
| 379 |
+
"metadata": {
|
| 380 |
+
"kernelspec": {
|
| 381 |
+
"display_name": "unsloth",
|
| 382 |
+
"language": "python",
|
| 383 |
+
"name": "python3"
|
| 384 |
+
},
|
| 385 |
+
"language_info": {
|
| 386 |
+
"codemirror_mode": {
|
| 387 |
+
"name": "ipython",
|
| 388 |
+
"version": 3
|
| 389 |
+
},
|
| 390 |
+
"file_extension": ".py",
|
| 391 |
+
"mimetype": "text/x-python",
|
| 392 |
+
"name": "python",
|
| 393 |
+
"nbconvert_exporter": "python",
|
| 394 |
+
"pygments_lexer": "ipython3",
|
| 395 |
+
"version": "3.11.11"
|
| 396 |
+
}
|
| 397 |
+
},
|
| 398 |
+
"nbformat": 4,
|
| 399 |
+
"nbformat_minor": 5
|
| 400 |
+
}
|
code/finetune-inference/subclaim_support/readctrl_model.code-workspace
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"folders": [
|
| 3 |
+
{
|
| 4 |
+
"path": "../../.."
|
| 5 |
+
},
|
| 6 |
+
{
|
| 7 |
+
"path": "../../../../LLM_guard/CKA-Agent"
|
| 8 |
+
},
|
| 9 |
+
{
|
| 10 |
+
"path": "../../../../readctrl_model"
|
| 11 |
+
}
|
| 12 |
+
]
|
| 13 |
+
}
|
code/finetune-inference/subclaim_support_extraction/inference_extract_subclaims_gpt5.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
import time
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import List
|
| 7 |
+
|
| 8 |
+
import tqdm
|
| 9 |
+
from openai import OpenAI
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# -----------------------------
|
| 13 |
+
# SUBCLAIM EXTRACTION PROMPT
|
| 14 |
+
# -----------------------------
|
| 15 |
+
def extraction_prompt(medical_text: str) -> str:
|
| 16 |
+
prompt = f"""
|
| 17 |
+
You are an expert medical annotator. Your task is to extract granular, factual subclaims from medical text.
|
| 18 |
+
A subclaim is the smallest standalone factual unit that can be independently verified.
|
| 19 |
+
|
| 20 |
+
Instructions:
|
| 21 |
+
1. Read the provided medical text.
|
| 22 |
+
2. Break it into clear, objective, atomic subclaims.
|
| 23 |
+
3. Each subclaim must come directly from the text. Do not infer or add information.
|
| 24 |
+
4. Keep subclaims short, non-overlapping, and de-duplicated.
|
| 25 |
+
5. Preserve numbers, units, and dates exactly as written.
|
| 26 |
+
6. If the text is empty, return an empty JSON list.
|
| 27 |
+
7. Return ONLY a valid JSON list of strings (no extra text).
|
| 28 |
+
|
| 29 |
+
Medical Text:
|
| 30 |
+
{medical_text}
|
| 31 |
+
|
| 32 |
+
Return your output in JSON list format:
|
| 33 |
+
[
|
| 34 |
+
"subclaim 1",
|
| 35 |
+
"subclaim 2"
|
| 36 |
+
]
|
| 37 |
+
"""
|
| 38 |
+
return prompt
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _load_openai_client() -> OpenAI:
|
| 42 |
+
api_file = "/home/mshahidul/api_new.json"
|
| 43 |
+
with open(api_file, "r") as f:
|
| 44 |
+
api_keys = json.load(f)
|
| 45 |
+
return OpenAI(api_key=api_keys["openai"])
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _parse_json_list(text: str) -> List[str]:
|
| 49 |
+
cleaned = text.replace("```json", "").replace("```", "").strip()
|
| 50 |
+
start_idx = cleaned.find("[")
|
| 51 |
+
end_idx = cleaned.rfind("]") + 1
|
| 52 |
+
if start_idx == -1 or end_idx <= start_idx:
|
| 53 |
+
raise ValueError("No JSON list found")
|
| 54 |
+
parsed = json.loads(cleaned[start_idx:end_idx])
|
| 55 |
+
if not isinstance(parsed, list):
|
| 56 |
+
raise ValueError("Parsed JSON is not a list")
|
| 57 |
+
return parsed
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def infer_subclaims(
|
| 61 |
+
medical_text: str,
|
| 62 |
+
client: OpenAI,
|
| 63 |
+
model: str = "gpt-5-mini",
|
| 64 |
+
retries: int = 1,
|
| 65 |
+
) -> List[str]:
|
| 66 |
+
if not medical_text or medical_text.strip() == "":
|
| 67 |
+
return []
|
| 68 |
+
|
| 69 |
+
prompt = extraction_prompt(medical_text)
|
| 70 |
+
try:
|
| 71 |
+
response = client.chat.completions.create(
|
| 72 |
+
model=model,
|
| 73 |
+
messages=[
|
| 74 |
+
{"role": "system", "content": "Return only a valid JSON list of strings."},
|
| 75 |
+
{"role": "user", "content": prompt},
|
| 76 |
+
],
|
| 77 |
+
)
|
| 78 |
+
output_text = response.choices[0].message.content.strip()
|
| 79 |
+
return _parse_json_list(output_text)
|
| 80 |
+
except Exception as e:
|
| 81 |
+
if retries > 0:
|
| 82 |
+
time.sleep(1.5)
|
| 83 |
+
return infer_subclaims(
|
| 84 |
+
medical_text,
|
| 85 |
+
client,
|
| 86 |
+
model=model,
|
| 87 |
+
retries=retries - 1,
|
| 88 |
+
)
|
| 89 |
+
return [f"ERROR: {str(e)}"]
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
# -----------------------------
|
| 93 |
+
# MAIN EXECUTION
|
| 94 |
+
# -----------------------------
|
| 95 |
+
if __name__ == "__main__":
|
| 96 |
+
parser = argparse.ArgumentParser()
|
| 97 |
+
parser.add_argument(
|
| 98 |
+
"--input_file",
|
| 99 |
+
type=str,
|
| 100 |
+
default="/home/mshahidul/readctrl/data/annotators_validate_data_(20_80)/combine/verified_combined_0-80.json",
|
| 101 |
+
)
|
| 102 |
+
parser.add_argument(
|
| 103 |
+
"--save_folder",
|
| 104 |
+
type=str,
|
| 105 |
+
default="/home/mshahidul/readctrl/data/extracting_subclaim",
|
| 106 |
+
)
|
| 107 |
+
parser.add_argument("--model", type=str, default="gpt-5-mini")
|
| 108 |
+
args = parser.parse_args()
|
| 109 |
+
|
| 110 |
+
input_file = args.input_file
|
| 111 |
+
save_folder = args.save_folder
|
| 112 |
+
file_name = os.path.basename(input_file).split(".json")[0]
|
| 113 |
+
output_file = os.path.join(save_folder, f"extracted_subclaims_{file_name}.json")
|
| 114 |
+
|
| 115 |
+
Path(save_folder).mkdir(parents=True, exist_ok=True)
|
| 116 |
+
client = _load_openai_client()
|
| 117 |
+
|
| 118 |
+
with open(input_file, "r") as f:
|
| 119 |
+
data = json.load(f)
|
| 120 |
+
|
| 121 |
+
result = []
|
| 122 |
+
if os.path.exists(output_file):
|
| 123 |
+
with open(output_file, "r") as f:
|
| 124 |
+
result = json.load(f)
|
| 125 |
+
|
| 126 |
+
def _item_key(obj: dict) -> str:
|
| 127 |
+
if obj.get("index") is not None:
|
| 128 |
+
return str(obj.get("index"))
|
| 129 |
+
if obj.get("id") is not None:
|
| 130 |
+
return str(obj.get("id"))
|
| 131 |
+
if obj.get("doc_id") is not None and obj.get("label") is not None:
|
| 132 |
+
return f"{obj.get('doc_id')}_{obj.get('label')}"
|
| 133 |
+
return str(obj.get("doc_id") or obj.get("label") or "")
|
| 134 |
+
|
| 135 |
+
processed_data = {_item_key(item): item for item in result}
|
| 136 |
+
|
| 137 |
+
for item in tqdm.tqdm(data):
|
| 138 |
+
item_id = _item_key(item)
|
| 139 |
+
existing_entry = processed_data.get(item_id)
|
| 140 |
+
|
| 141 |
+
# 1. Process Fulltext
|
| 142 |
+
if not existing_entry or not isinstance(existing_entry.get("fulltext_subclaims"), list):
|
| 143 |
+
f_sub = infer_subclaims(
|
| 144 |
+
item.get("fulltext", ""),
|
| 145 |
+
client,
|
| 146 |
+
model=args.model,
|
| 147 |
+
retries=2,
|
| 148 |
+
)
|
| 149 |
+
else:
|
| 150 |
+
f_sub = existing_entry["fulltext_subclaims"]
|
| 151 |
+
|
| 152 |
+
# 2. Process Summary
|
| 153 |
+
if not existing_entry or not isinstance(existing_entry.get("summary_subclaims"), list):
|
| 154 |
+
s_sub = infer_subclaims(
|
| 155 |
+
item.get("summary", ""),
|
| 156 |
+
client,
|
| 157 |
+
model=args.model,
|
| 158 |
+
retries=1,
|
| 159 |
+
)
|
| 160 |
+
else:
|
| 161 |
+
s_sub = existing_entry["summary_subclaims"]
|
| 162 |
+
|
| 163 |
+
# 3. Process Generated Texts (diff_label_texts)
|
| 164 |
+
diff_label_texts = item.get("diff_label_texts", "")
|
| 165 |
+
if isinstance(diff_label_texts, dict):
|
| 166 |
+
diff_label_subclaims = existing_entry.get("diff_label_subclaims", {}) if existing_entry else {}
|
| 167 |
+
for label, text in diff_label_texts.items():
|
| 168 |
+
if label not in diff_label_subclaims or not isinstance(diff_label_subclaims[label], list):
|
| 169 |
+
diff_label_subclaims[label] = infer_subclaims(
|
| 170 |
+
text,
|
| 171 |
+
client,
|
| 172 |
+
model=args.model,
|
| 173 |
+
retries=1,
|
| 174 |
+
)
|
| 175 |
+
else:
|
| 176 |
+
if not existing_entry or not isinstance(existing_entry.get("diff_label_subclaims"), list):
|
| 177 |
+
diff_label_subclaims = infer_subclaims(
|
| 178 |
+
diff_label_texts,
|
| 179 |
+
client,
|
| 180 |
+
model=args.model,
|
| 181 |
+
retries=1,
|
| 182 |
+
)
|
| 183 |
+
else:
|
| 184 |
+
diff_label_subclaims = existing_entry["diff_label_subclaims"]
|
| 185 |
+
|
| 186 |
+
# 4. Save
|
| 187 |
+
new_entry = {
|
| 188 |
+
"doc_id": item.get("doc_id"),
|
| 189 |
+
"label": item.get("label"),
|
| 190 |
+
"fulltext": item.get("fulltext", ""),
|
| 191 |
+
"fulltext_subclaims": f_sub,
|
| 192 |
+
"summary": item.get("summary", ""),
|
| 193 |
+
"summary_subclaims": s_sub,
|
| 194 |
+
"diff_label_texts": diff_label_texts,
|
| 195 |
+
"diff_label_subclaims": diff_label_subclaims,
|
| 196 |
+
}
|
| 197 |
+
processed_data[item_id] = new_entry
|
| 198 |
+
|
| 199 |
+
if len(processed_data) % 10 == 0:
|
| 200 |
+
with open(output_file, "w") as f:
|
| 201 |
+
json.dump(list(processed_data.values()), f, indent=4, ensure_ascii=False)
|
| 202 |
+
|
| 203 |
+
with open(output_file, "w") as f:
|
| 204 |
+
json.dump(list(processed_data.values()), f, indent=4, ensure_ascii=False)
|
| 205 |
+
|
| 206 |
+
print(f"Extraction completed. File saved at: {output_file}")
|
code/finetune-inference/subclaim_support_extraction/inference_extract_subclaims_v4.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
# Set GPU environment variables
|
| 3 |
+
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
| 4 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
|
| 5 |
+
import torch
|
| 6 |
+
from unsloth import FastLanguageModel
|
| 7 |
+
import json
|
| 8 |
+
import tqdm
|
| 9 |
+
import argparse
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# -----------------------------
|
| 13 |
+
# MODEL CACHE
|
| 14 |
+
# -----------------------------
|
| 15 |
+
_model_cache = {"model": None, "tokenizer": None}
|
| 16 |
+
|
| 17 |
+
def load_finetuned_model(model_path: str):
|
| 18 |
+
if _model_cache["model"] is not None:
|
| 19 |
+
return _model_cache["model"], _model_cache["tokenizer"]
|
| 20 |
+
|
| 21 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 22 |
+
model_name=model_path,
|
| 23 |
+
max_seq_length=8192,
|
| 24 |
+
load_in_4bit=False,
|
| 25 |
+
load_in_8bit=False,
|
| 26 |
+
full_finetuning=False,
|
| 27 |
+
)
|
| 28 |
+
_model_cache["model"], _model_cache["tokenizer"] = model, tokenizer
|
| 29 |
+
return model, tokenizer
|
| 30 |
+
|
| 31 |
+
# -----------------------------
|
| 32 |
+
# SUBCLAIM EXTRACTION PROMPT
|
| 33 |
+
# -----------------------------
|
| 34 |
+
def extraction_prompt(medical_text: str) -> str:
|
| 35 |
+
prompt = f"""
|
| 36 |
+
You are an expert medical annotator. Your task is to extract granular, factual subclaims from medical text.
|
| 37 |
+
A subclaim is the smallest standalone factual unit that can be independently verified.
|
| 38 |
+
|
| 39 |
+
Instructions:
|
| 40 |
+
1. Read the provided medical text.
|
| 41 |
+
2. Break it into clear, objective, atomic subclaims.
|
| 42 |
+
3. Each subclaim must come directly from the text.
|
| 43 |
+
4. Return ONLY a valid JSON list of strings.
|
| 44 |
+
|
| 45 |
+
Medical Text:
|
| 46 |
+
{medical_text}
|
| 47 |
+
|
| 48 |
+
Return your output in JSON list format:
|
| 49 |
+
[
|
| 50 |
+
"subclaim 1",
|
| 51 |
+
"subclaim 2"
|
| 52 |
+
]
|
| 53 |
+
"""
|
| 54 |
+
return prompt
|
| 55 |
+
# -----------------------------
|
| 56 |
+
# INFERENCE FUNCTION WITH AUTO-RETRY
|
| 57 |
+
# -----------------------------
|
| 58 |
+
def infer_subclaims(medical_text: str, model, tokenizer, temperature: float = 0.2, max_tokens: int = 2048, retries: int = 1) -> list:
|
| 59 |
+
if not medical_text or medical_text.strip() == "":
|
| 60 |
+
return []
|
| 61 |
+
|
| 62 |
+
prompt = extraction_prompt(medical_text)
|
| 63 |
+
messages = [{"role": "user", "content": prompt}]
|
| 64 |
+
chat_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 65 |
+
inputs = tokenizer(chat_text, return_tensors="pt").to("cuda")
|
| 66 |
+
|
| 67 |
+
with torch.no_grad():
|
| 68 |
+
output_ids = model.generate(
|
| 69 |
+
**inputs,
|
| 70 |
+
max_new_tokens=max_tokens,
|
| 71 |
+
temperature=temperature,
|
| 72 |
+
do_sample=False
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
|
| 76 |
+
|
| 77 |
+
# Remove reasoning if model is a "Thinker" model
|
| 78 |
+
if "</think>" in output_text:
|
| 79 |
+
output_text = output_text.split("</think>")[-1].strip()
|
| 80 |
+
|
| 81 |
+
# JSON Parsing Logic
|
| 82 |
+
try:
|
| 83 |
+
start_idx = output_text.find('[')
|
| 84 |
+
end_idx = output_text.rfind(']') + 1
|
| 85 |
+
|
| 86 |
+
# Check if we have a complete bracketed pair
|
| 87 |
+
if start_idx != -1 and end_idx > start_idx:
|
| 88 |
+
content = output_text[start_idx:end_idx]
|
| 89 |
+
parsed = json.loads(content)
|
| 90 |
+
if isinstance(parsed, list):
|
| 91 |
+
return parsed
|
| 92 |
+
|
| 93 |
+
# If we are here, it means parsing failed or brackets were incomplete (truncation)
|
| 94 |
+
raise ValueError("Incomplete JSON list")
|
| 95 |
+
|
| 96 |
+
except (json.JSONDecodeError, ValueError):
|
| 97 |
+
# If truncation happened and we have retries left, double the tokens
|
| 98 |
+
if retries > 0:
|
| 99 |
+
new_max = max_tokens + 2048 # Increment by 2k tokens
|
| 100 |
+
print(f"\n[Warning] Truncation detected. Retrying with {new_max} tokens...")
|
| 101 |
+
return infer_subclaims(medical_text, model, tokenizer, temperature, max_tokens=new_max, retries=retries-1)
|
| 102 |
+
|
| 103 |
+
# Final fallback: return the raw text wrapped in a list so the pipeline doesn't crash
|
| 104 |
+
return [output_text]
|
| 105 |
+
|
| 106 |
+
# -----------------------------
|
| 107 |
+
# MAIN EXECUTION
|
| 108 |
+
# -----------------------------
|
| 109 |
+
if __name__ == "__main__":
|
| 110 |
+
parser = argparse.ArgumentParser()
|
| 111 |
+
parser.add_argument("--input_file", type=str, required=True)
|
| 112 |
+
args = parser.parse_args()
|
| 113 |
+
|
| 114 |
+
INPUT_FILE = args.input_file
|
| 115 |
+
file_name = os.path.basename(INPUT_FILE).split(".json")[0]
|
| 116 |
+
SAVE_FOLDER = "/home/mshahidul/readctrl/data/extracting_subclaim"
|
| 117 |
+
MODEL_PATH = "/home/mshahidul/readctrl_model/qwen3-32B_subclaims-extraction-8b_ctx"
|
| 118 |
+
|
| 119 |
+
os.makedirs(SAVE_FOLDER, exist_ok=True)
|
| 120 |
+
OUTPUT_FILE = os.path.join(SAVE_FOLDER, f"extracted_subclaims_{file_name}.json")
|
| 121 |
+
|
| 122 |
+
model, tokenizer = load_finetuned_model(MODEL_PATH)
|
| 123 |
+
|
| 124 |
+
with open(INPUT_FILE, "r") as f:
|
| 125 |
+
data = json.load(f)
|
| 126 |
+
|
| 127 |
+
result = []
|
| 128 |
+
if os.path.exists(OUTPUT_FILE):
|
| 129 |
+
with open(OUTPUT_FILE, "r") as f:
|
| 130 |
+
result = json.load(f)
|
| 131 |
+
|
| 132 |
+
processed_data = {str(item.get("index") or item.get("id")): item for item in result}
|
| 133 |
+
|
| 134 |
+
for item in tqdm.tqdm(data):
|
| 135 |
+
item_id = str(item.get("index") if item.get("index") is not None else item.get("id"))
|
| 136 |
+
existing_entry = processed_data.get(item_id)
|
| 137 |
+
|
| 138 |
+
# 1. Process Fulltext (The longest field, high initial token count)
|
| 139 |
+
if not existing_entry or not isinstance(existing_entry.get("fulltext_subclaims"), list):
|
| 140 |
+
f_sub = infer_subclaims(item.get("fulltext", ""), model, tokenizer, max_tokens=3072, retries=2)
|
| 141 |
+
else:
|
| 142 |
+
f_sub = existing_entry["fulltext_subclaims"]
|
| 143 |
+
|
| 144 |
+
# 2. Process Summary
|
| 145 |
+
if not existing_entry or not isinstance(existing_entry.get("summary_subclaims"), list):
|
| 146 |
+
s_sub = infer_subclaims(item.get("summary", ""), model, tokenizer, max_tokens=2048, retries=1)
|
| 147 |
+
else:
|
| 148 |
+
s_sub = existing_entry["summary_subclaims"]
|
| 149 |
+
|
| 150 |
+
# 3. Process All Generated Texts (diff_label_texts)
|
| 151 |
+
diff_label_texts = item.get("diff_label_texts", {})
|
| 152 |
+
diff_label_subclaims = existing_entry.get("diff_label_subclaims", {}) if existing_entry else {}
|
| 153 |
+
|
| 154 |
+
for label, text in diff_label_texts.items():
|
| 155 |
+
if label not in diff_label_subclaims or not isinstance(diff_label_subclaims[label], list):
|
| 156 |
+
# Generated texts are shorter, but we still allow 1 retry
|
| 157 |
+
diff_label_subclaims[label] = infer_subclaims(text, model, tokenizer, max_tokens=1536, retries=1)
|
| 158 |
+
|
| 159 |
+
# 4. Save
|
| 160 |
+
new_entry = {
|
| 161 |
+
"index": item.get("index"),
|
| 162 |
+
"id": item.get("id"),
|
| 163 |
+
"fulltext": item.get("fulltext", ""),
|
| 164 |
+
"fulltext_subclaims": f_sub,
|
| 165 |
+
"summary": item.get("summary", ""),
|
| 166 |
+
"summary_subclaims": s_sub,
|
| 167 |
+
"diff_label_texts": diff_label_texts,
|
| 168 |
+
"diff_label_subclaims": diff_label_subclaims,
|
| 169 |
+
"readability_score": item.get("readability_score", None)
|
| 170 |
+
}
|
| 171 |
+
processed_data[item_id] = new_entry
|
| 172 |
+
|
| 173 |
+
if len(processed_data) % 10 == 0:
|
| 174 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 175 |
+
json.dump(list(processed_data.values()), f, indent=4, ensure_ascii=False)
|
| 176 |
+
|
| 177 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 178 |
+
json.dump(list(processed_data.values()), f, indent=4, ensure_ascii=False)
|
| 179 |
+
|
| 180 |
+
print(f"Extraction completed. File saved at: {OUTPUT_FILE}")
|
code/finetune-inference/subclaim_support_extraction/inference_extract_subclaims_vllm.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import tqdm
|
| 4 |
+
import argparse
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
|
| 7 |
+
# -----------------------------
|
| 8 |
+
# API CONFIGURATION
|
| 9 |
+
# -----------------------------
|
| 10 |
+
LOCAL_API_URL = "http://172.16.34.29:8004/v1"
|
| 11 |
+
LOCAL_MODEL_NAME = "/home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-extraction-8b_ctx_fp16"
|
| 12 |
+
|
| 13 |
+
client = OpenAI(
|
| 14 |
+
base_url=LOCAL_API_URL,
|
| 15 |
+
api_key="EMPTY"
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
# -----------------------------
|
| 19 |
+
# SUBCLAIM EXTRACTION PROMPT
|
| 20 |
+
# -----------------------------
|
| 21 |
+
def extraction_prompt(medical_text: str) -> str:
|
| 22 |
+
return f"""
|
| 23 |
+
You are an expert medical annotator.
|
| 24 |
+
|
| 25 |
+
Your task is to extract granular, factual subclaims from the provided medical text.
|
| 26 |
+
A subclaim is the smallest standalone factual unit that can be independently verified.
|
| 27 |
+
|
| 28 |
+
Instructions:
|
| 29 |
+
1. Read the medical text carefully.
|
| 30 |
+
2. Extract factual statements explicitly stated in the text.
|
| 31 |
+
3. Each subclaim must:
|
| 32 |
+
- Contain exactly ONE factual assertion
|
| 33 |
+
- Come directly from the text (no inference or interpretation)
|
| 34 |
+
- Preserve original wording as much as possible
|
| 35 |
+
- Include any negation, uncertainty, or qualifier (e.g., "may", "not", "suggests")
|
| 36 |
+
4. Do NOT:
|
| 37 |
+
- Combine multiple facts into one subclaim
|
| 38 |
+
- Add new information
|
| 39 |
+
- Rephrase or normalize terminology
|
| 40 |
+
- Include opinions or recommendations
|
| 41 |
+
5. Return ONLY a valid JSON array of strings.
|
| 42 |
+
6. Use double quotes and valid JSON formatting only (no markdown, no commentary).
|
| 43 |
+
|
| 44 |
+
Medical Text:
|
| 45 |
+
{medical_text}
|
| 46 |
+
|
| 47 |
+
Return format:
|
| 48 |
+
[
|
| 49 |
+
"subclaim 1",
|
| 50 |
+
"subclaim 2"
|
| 51 |
+
]
|
| 52 |
+
""".strip()
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
# -----------------------------
|
| 56 |
+
# INFERENCE FUNCTION (vLLM API)
|
| 57 |
+
# -----------------------------
|
| 58 |
+
def infer_subclaims_api(medical_text: str, temperature: float = 0.2, max_tokens: int = 2048, retries: int = 1) -> list:
|
| 59 |
+
if not medical_text or not medical_text.strip():
|
| 60 |
+
return []
|
| 61 |
+
|
| 62 |
+
prompt = extraction_prompt(medical_text)
|
| 63 |
+
|
| 64 |
+
try:
|
| 65 |
+
response = client.chat.completions.create(
|
| 66 |
+
model=LOCAL_MODEL_NAME,
|
| 67 |
+
messages=[{"role": "user", "content": prompt}],
|
| 68 |
+
temperature=temperature,
|
| 69 |
+
max_tokens=max_tokens,
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
output_text = response.choices[0].message.content.strip()
|
| 73 |
+
|
| 74 |
+
if "</think>" in output_text:
|
| 75 |
+
output_text = output_text.split("</think>")[-1].strip()
|
| 76 |
+
|
| 77 |
+
start_idx = output_text.find('[')
|
| 78 |
+
end_idx = output_text.rfind(']') + 1
|
| 79 |
+
|
| 80 |
+
if start_idx != -1 and end_idx > start_idx:
|
| 81 |
+
content = output_text[start_idx:end_idx]
|
| 82 |
+
parsed = json.loads(content)
|
| 83 |
+
if isinstance(parsed, list):
|
| 84 |
+
return parsed
|
| 85 |
+
|
| 86 |
+
raise ValueError("Incomplete JSON list")
|
| 87 |
+
|
| 88 |
+
except (json.JSONDecodeError, ValueError, Exception) as e:
|
| 89 |
+
if retries > 0:
|
| 90 |
+
new_max = max_tokens + 2048
|
| 91 |
+
print(f"\n[Warning] API error/truncation: {e}. Retrying with {new_max} tokens...")
|
| 92 |
+
return infer_subclaims_api(medical_text, temperature, max_tokens=new_max, retries=retries-1)
|
| 93 |
+
|
| 94 |
+
return [output_text] if 'output_text' in locals() else []
|
| 95 |
+
|
| 96 |
+
# -----------------------------
|
| 97 |
+
# MAIN EXECUTION
|
| 98 |
+
# -----------------------------
|
| 99 |
+
if __name__ == "__main__":
|
| 100 |
+
parser = argparse.ArgumentParser()
|
| 101 |
+
parser.add_argument("--input_file", type=str, required=True)
|
| 102 |
+
parser.add_argument("--start", type=int, default=0, help="Start index in the dataset")
|
| 103 |
+
parser.add_argument("--end", type=int, default=None, help="End index (exclusive) in the dataset")
|
| 104 |
+
args = parser.parse_args()
|
| 105 |
+
|
| 106 |
+
INPUT_FILE = args.input_file
|
| 107 |
+
file_name = os.path.basename(INPUT_FILE).split(".json")[0]
|
| 108 |
+
SAVE_FOLDER = "/home/mshahidul/readctrl/data/extracting_subclaim"
|
| 109 |
+
os.makedirs(SAVE_FOLDER, exist_ok=True)
|
| 110 |
+
|
| 111 |
+
# Range-specific output naming helps if you want to run parallel jobs
|
| 112 |
+
range_suffix = f"_{args.start}_{args.end if args.end is not None else 'end'}"
|
| 113 |
+
OUTPUT_FILE = os.path.join(SAVE_FOLDER, f"extracted_subclaims_{file_name}{range_suffix}.json")
|
| 114 |
+
|
| 115 |
+
with open(INPUT_FILE, "r") as f:
|
| 116 |
+
full_data = json.load(f)
|
| 117 |
+
|
| 118 |
+
if args.end is None:
|
| 119 |
+
args.end = len(full_data)
|
| 120 |
+
|
| 121 |
+
# Slice the data based on user input
|
| 122 |
+
data_subset = full_data[args.start:args.end]
|
| 123 |
+
print(f"Processing range [{args.start} : {args.end if args.end else len(full_data)}]. Total: {len(data_subset)} items.")
|
| 124 |
+
|
| 125 |
+
# Load existing progress if available
|
| 126 |
+
processed_data = {}
|
| 127 |
+
if os.path.exists(OUTPUT_FILE):
|
| 128 |
+
with open(OUTPUT_FILE, "r") as f:
|
| 129 |
+
existing_list = json.load(f)
|
| 130 |
+
processed_data = {str(item.get("id")): item for item in existing_list}
|
| 131 |
+
|
| 132 |
+
for item in tqdm.tqdm(data_subset):
|
| 133 |
+
item_id = str(item.get("id"))
|
| 134 |
+
|
| 135 |
+
# Check if this item in the subset was already processed
|
| 136 |
+
if item_id in processed_data:
|
| 137 |
+
continue
|
| 138 |
+
|
| 139 |
+
# 1. Process Fulltext
|
| 140 |
+
f_sub = infer_subclaims_api(item.get("fulltext", ""), max_tokens=3072, retries=2)
|
| 141 |
+
|
| 142 |
+
# 2. Process Summary
|
| 143 |
+
s_sub = infer_subclaims_api(item.get("summary", ""), max_tokens=2048, retries=1)
|
| 144 |
+
|
| 145 |
+
# 3. Save Entry
|
| 146 |
+
processed_data[item_id] = {
|
| 147 |
+
"id": item_id,
|
| 148 |
+
"fulltext": item.get("fulltext", ""),
|
| 149 |
+
"fulltext_subclaims": f_sub,
|
| 150 |
+
"summary": item.get("summary", ""),
|
| 151 |
+
"summary_subclaims": s_sub
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
# Periodic checkpoint
|
| 155 |
+
if len(processed_data) % 20 == 0:
|
| 156 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 157 |
+
json.dump(list(processed_data.values()), f, indent=4, ensure_ascii=False)
|
| 158 |
+
|
| 159 |
+
# Final Save
|
| 160 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 161 |
+
json.dump(list(processed_data.values()), f, indent=4, ensure_ascii=False)
|
| 162 |
+
|
| 163 |
+
print(f"Range extraction completed. File saved at: {OUTPUT_FILE}")
|
code/finetune-inference/subclaim_support_extraction/old/subclaim_support_cal.py
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import tqdm
|
| 4 |
+
import argparse
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
|
| 7 |
+
# -----------------------------
|
| 8 |
+
# CONFIGURATION
|
| 9 |
+
# -----------------------------
|
| 10 |
+
MODEL_PATH = "/home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-support-check-4b_ctx-bf16"
|
| 11 |
+
API_URL = "http://localhost:8015/v1"
|
| 12 |
+
API_KEY = "EMPTY"
|
| 13 |
+
|
| 14 |
+
client = OpenAI(base_url=API_URL, api_key=API_KEY)
|
| 15 |
+
|
| 16 |
+
# -----------------------------
|
| 17 |
+
# VERIFICATION PROMPT
|
| 18 |
+
# -----------------------------
|
| 19 |
+
def inference_prompt(text, subclaim):
|
| 20 |
+
return f"""
|
| 21 |
+
You are a medical evidence evaluator.
|
| 22 |
+
|
| 23 |
+
Determine the relationship between the following medical text and the subclaim.
|
| 24 |
+
|
| 25 |
+
Label definitions:
|
| 26 |
+
- supported: the text directly provides evidence for the subclaim
|
| 27 |
+
- refuted: the text contradicts the subclaim
|
| 28 |
+
- not_supported: the text is related to the subclaim but does not provide evidence
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
Medical Text:
|
| 32 |
+
{text}
|
| 33 |
+
|
| 34 |
+
Subclaim:
|
| 35 |
+
{subclaim}
|
| 36 |
+
|
| 37 |
+
Respond only with one label: supported, refuted, or not_supported.
|
| 38 |
+
Give output without extra explanation.
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
# -----------------------------
|
| 42 |
+
# VERIFICATION LOGIC
|
| 43 |
+
# -----------------------------
|
| 44 |
+
def check_support(text: str, subclaim: str) -> str:
|
| 45 |
+
"""
|
| 46 |
+
Returns: 'supported', 'refuted', or 'not_supported'
|
| 47 |
+
"""
|
| 48 |
+
if not text or not subclaim:
|
| 49 |
+
return "not_supported"
|
| 50 |
+
|
| 51 |
+
prompt = inference_prompt(text, subclaim)
|
| 52 |
+
|
| 53 |
+
try:
|
| 54 |
+
response = client.chat.completions.create(
|
| 55 |
+
model=MODEL_PATH,
|
| 56 |
+
messages=[{"role": "user", "content": prompt}],
|
| 57 |
+
max_tokens=20,
|
| 58 |
+
temperature=0.0,
|
| 59 |
+
)
|
| 60 |
+
res = response.choices[0].message.content.strip().lower()
|
| 61 |
+
|
| 62 |
+
if "not_supported" in res:
|
| 63 |
+
return "not_supported"
|
| 64 |
+
elif "supported" in res:
|
| 65 |
+
return "supported"
|
| 66 |
+
elif "refuted" in res:
|
| 67 |
+
return "refuted"
|
| 68 |
+
else:
|
| 69 |
+
return "not_supported"
|
| 70 |
+
|
| 71 |
+
except Exception as e:
|
| 72 |
+
print(f"API error: {e}")
|
| 73 |
+
return "not_supported"
|
| 74 |
+
|
| 75 |
+
def calculate_metric(subclaims_list: list, reference_text: str, metric_name: str):
|
| 76 |
+
if not subclaims_list:
|
| 77 |
+
return {"score": 0.0, "details": []}
|
| 78 |
+
|
| 79 |
+
results = []
|
| 80 |
+
supported_count = 0
|
| 81 |
+
|
| 82 |
+
for subclaim in subclaims_list:
|
| 83 |
+
label = check_support(reference_text, subclaim)
|
| 84 |
+
is_supported = (label == "supported")
|
| 85 |
+
|
| 86 |
+
if is_supported:
|
| 87 |
+
supported_count += 1
|
| 88 |
+
|
| 89 |
+
results.append({
|
| 90 |
+
"subclaim": subclaim,
|
| 91 |
+
"label": label
|
| 92 |
+
})
|
| 93 |
+
|
| 94 |
+
score = supported_count / len(subclaims_list) if len(subclaims_list) > 0 else 0.0
|
| 95 |
+
|
| 96 |
+
return {
|
| 97 |
+
"score": score,
|
| 98 |
+
"details": results
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
# -----------------------------
|
| 102 |
+
# MAIN
|
| 103 |
+
# -----------------------------
|
| 104 |
+
if __name__ == "__main__":
|
| 105 |
+
parser = argparse.ArgumentParser()
|
| 106 |
+
parser.add_argument("--input_file", type=str,
|
| 107 |
+
default="/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_full_data.json",
|
| 108 |
+
help="Path to input JSON with subclaims")
|
| 109 |
+
|
| 110 |
+
parser.add_argument("--save_folder", type=str,
|
| 111 |
+
default="/home/mshahidul/readctrl/data/concise_complete_attr_cal_v2",
|
| 112 |
+
help="Folder to save results")
|
| 113 |
+
|
| 114 |
+
# Range arguments
|
| 115 |
+
parser.add_argument("--start_index", type=int, default=0, help="Start index")
|
| 116 |
+
parser.add_argument("--end_index", type=int, default=-1, help="End index (exclusive). -1 for all.")
|
| 117 |
+
|
| 118 |
+
args = parser.parse_args()
|
| 119 |
+
|
| 120 |
+
INPUT_FILE = args.input_file
|
| 121 |
+
SAVE_FOLDER = args.save_folder
|
| 122 |
+
os.makedirs(SAVE_FOLDER, exist_ok=True)
|
| 123 |
+
|
| 124 |
+
# -----------------------------
|
| 125 |
+
# Load Data
|
| 126 |
+
# -----------------------------
|
| 127 |
+
print(f"Loading data from {INPUT_FILE}...")
|
| 128 |
+
with open(INPUT_FILE, "r") as f:
|
| 129 |
+
all_data = json.load(f)
|
| 130 |
+
|
| 131 |
+
# -----------------------------
|
| 132 |
+
# Slice Data based on Range
|
| 133 |
+
# -----------------------------
|
| 134 |
+
total_len = len(all_data)
|
| 135 |
+
start = args.start_index
|
| 136 |
+
end = args.end_index if args.end_index != -1 else total_len
|
| 137 |
+
|
| 138 |
+
# Ensure end doesn't exceed total length
|
| 139 |
+
if end > total_len:
|
| 140 |
+
end = total_len
|
| 141 |
+
|
| 142 |
+
data_slice = all_data[start:end]
|
| 143 |
+
|
| 144 |
+
print(f"Total dataset size: {total_len}")
|
| 145 |
+
print(f"Processing range: {start} to {end}")
|
| 146 |
+
print(f"Items in this batch: {len(data_slice)}")
|
| 147 |
+
|
| 148 |
+
# -----------------------------
|
| 149 |
+
# Output Filename (includes range)
|
| 150 |
+
# -----------------------------
|
| 151 |
+
# Filename format: evaluated_metrics_0_100.json
|
| 152 |
+
OUTPUT_FILE = os.path.join(
|
| 153 |
+
SAVE_FOLDER,
|
| 154 |
+
f"evaluated_metrics_{start}_{end}.json"
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
# -----------------------------
|
| 158 |
+
# Resume Logic
|
| 159 |
+
# -----------------------------
|
| 160 |
+
processed_results = []
|
| 161 |
+
if os.path.exists(OUTPUT_FILE):
|
| 162 |
+
print(f"Found existing output file: {OUTPUT_FILE}. Resuming...")
|
| 163 |
+
try:
|
| 164 |
+
with open(OUTPUT_FILE, "r") as f:
|
| 165 |
+
processed_results = json.load(f)
|
| 166 |
+
except:
|
| 167 |
+
processed_results = []
|
| 168 |
+
|
| 169 |
+
processed_ids = {item['id'] for item in processed_results}
|
| 170 |
+
|
| 171 |
+
# Filter only the sliced data
|
| 172 |
+
to_process = [item for item in data_slice if item['id'] not in processed_ids]
|
| 173 |
+
|
| 174 |
+
print(f"Already processed in this file: {len(processed_ids)}")
|
| 175 |
+
print(f"Remaining to process: {len(to_process)}")
|
| 176 |
+
|
| 177 |
+
# -----------------------------
|
| 178 |
+
# Processing Loop
|
| 179 |
+
# -----------------------------
|
| 180 |
+
for item in tqdm.tqdm(to_process):
|
| 181 |
+
|
| 182 |
+
# 1. Prepare Texts
|
| 183 |
+
easy_text = item.get("easy_text", "")
|
| 184 |
+
inter_text = item.get("intermediate_text", "")
|
| 185 |
+
hard_text = item.get("hard_text", "")
|
| 186 |
+
fulltext = item.get("fulltext", "")
|
| 187 |
+
summary = item.get("summary", "")
|
| 188 |
+
|
| 189 |
+
# 2. Prepare Subclaim Lists
|
| 190 |
+
def ensure_list(x): return x if isinstance(x, list) else []
|
| 191 |
+
|
| 192 |
+
easy_subs = ensure_list(item.get("easy_subclaims", []))
|
| 193 |
+
inter_subs = ensure_list(item.get("intermediate_subclaims", []))
|
| 194 |
+
hard_subs = ensure_list(item.get("hard_subclaims", []))
|
| 195 |
+
full_subs = ensure_list(item.get("fulltext_subclaims", []))
|
| 196 |
+
summary_subs = ensure_list(item.get("summary_subclaims", []))
|
| 197 |
+
|
| 198 |
+
# ---------------------------------------------------------
|
| 199 |
+
# METRICS CALCULATION
|
| 200 |
+
# ---------------------------------------------------------
|
| 201 |
+
|
| 202 |
+
# Attribution: Generated Subclaims -> Full Text
|
| 203 |
+
attr_easy = calculate_metric(easy_subs, fulltext, "attribution")
|
| 204 |
+
attr_inter = calculate_metric(inter_subs, fulltext, "attribution")
|
| 205 |
+
attr_hard = calculate_metric(hard_subs, fulltext, "attribution")
|
| 206 |
+
|
| 207 |
+
# Conciseness: Generated Subclaims -> Summary Text
|
| 208 |
+
conc_easy = calculate_metric(easy_subs, summary, "conciseness")
|
| 209 |
+
conc_inter = calculate_metric(inter_subs, summary, "conciseness")
|
| 210 |
+
conc_hard = calculate_metric(hard_subs, summary, "conciseness")
|
| 211 |
+
|
| 212 |
+
# Completeness: summary Subclaims -> Generated Text
|
| 213 |
+
comp_easy = calculate_metric(summary_subs, easy_text, "completeness")
|
| 214 |
+
comp_inter = calculate_metric(summary_subs, inter_text, "completeness")
|
| 215 |
+
comp_hard = calculate_metric(summary_subs, hard_text, "completeness")
|
| 216 |
+
|
| 217 |
+
# Construct Output
|
| 218 |
+
result_item = item.copy()
|
| 219 |
+
result_item["metrics"] = {
|
| 220 |
+
"easy": {
|
| 221 |
+
"attribution": attr_easy,
|
| 222 |
+
"conciseness": conc_easy,
|
| 223 |
+
"completeness": comp_easy
|
| 224 |
+
},
|
| 225 |
+
"intermediate": {
|
| 226 |
+
"attribution": attr_inter,
|
| 227 |
+
"conciseness": conc_inter,
|
| 228 |
+
"completeness": comp_inter
|
| 229 |
+
},
|
| 230 |
+
"hard": {
|
| 231 |
+
"attribution": attr_hard,
|
| 232 |
+
"conciseness": conc_hard,
|
| 233 |
+
"completeness": comp_hard
|
| 234 |
+
}
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
processed_results.append(result_item)
|
| 238 |
+
|
| 239 |
+
# Save frequently
|
| 240 |
+
if len(processed_results) % 20 == 0:
|
| 241 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 242 |
+
json.dump(processed_results, f, indent=4, ensure_ascii=False)
|
| 243 |
+
|
| 244 |
+
# Final Save
|
| 245 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 246 |
+
json.dump(processed_results, f, indent=4, ensure_ascii=False)
|
| 247 |
+
|
| 248 |
+
print(f"Evaluation for range {start}:{end} complete. Saved to: {OUTPUT_FILE}")
|
code/finetune-inference/subclaim_support_extraction/old/subclaim_support_cal_tesing_v2.py
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import tqdm
|
| 4 |
+
import argparse
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
|
| 7 |
+
# -----------------------------
|
| 8 |
+
# CONFIGURATION
|
| 9 |
+
# -----------------------------
|
| 10 |
+
MODEL_PATH = "/home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-support-check-8b_ctx-bf16"
|
| 11 |
+
API_URL = "http://172.16.34.29:8004/v1"
|
| 12 |
+
API_KEY = "EMPTY"
|
| 13 |
+
|
| 14 |
+
client = OpenAI(base_url=API_URL, api_key=API_KEY)
|
| 15 |
+
|
| 16 |
+
# -----------------------------
|
| 17 |
+
# VERIFICATION PROMPT
|
| 18 |
+
# -----------------------------
|
| 19 |
+
def inference_prompt(text, subclaim):
|
| 20 |
+
return f"""You are a clinical evidence auditor. Your evaluation must be based STRICTLY and ONLY on the provided medical text.
|
| 21 |
+
|
| 22 |
+
### MANDATORY GROUNDING RULES:
|
| 23 |
+
1. NO OUTSIDE KNOWLEDGE: Do not use your internal medical knowledge. Even if a subclaim is "common sense" in medicine, if it is not explicitly in the TEXT, it is 'not_supported'.
|
| 24 |
+
2. NO LOGICAL LEAPS: Do not bridge gaps in logic. (e.g., If the text mentions "high blood sugar" but not the word "diabetes", you cannot support a claim of "diabetes").
|
| 25 |
+
3. EXACT NUMERICAL MATCHING: Any doses (e.g., 500mg), frequencies (e.g., twice daily), or durations (e.g., 10 days) mentioned in the subclaim must match the text perfectly. If they are missing or different in the text, label as 'not_supported'.
|
| 26 |
+
4. DEFAULT TO NOT SUPPORTED: If the text is vague, ambiguous, or only suggests a possibility, you MUST choose 'not_supported'.
|
| 27 |
+
5. CLOSED-WORLD REALITY: Treat the TEXT as the only information that exists in the world.
|
| 28 |
+
|
| 29 |
+
### Medical Text:
|
| 30 |
+
{text}
|
| 31 |
+
|
| 32 |
+
### Subclaim:
|
| 33 |
+
{subclaim}
|
| 34 |
+
|
| 35 |
+
Output exactly one word ('supported' or 'not_supported') based on the strict rules above:"""
|
| 36 |
+
|
| 37 |
+
# -----------------------------
|
| 38 |
+
# VERIFICATION LOGIC
|
| 39 |
+
# -----------------------------
|
| 40 |
+
def check_support(text: str, subclaim: str, error_log=None) -> str:
|
| 41 |
+
"""
|
| 42 |
+
Returns: 'supported', 'refuted', or 'not_supported'
|
| 43 |
+
Tracks errors in error_log if provided.
|
| 44 |
+
"""
|
| 45 |
+
if not text or not subclaim:
|
| 46 |
+
return "not_supported"
|
| 47 |
+
|
| 48 |
+
prompt = inference_prompt(text, subclaim)
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
response = client.chat.completions.create(
|
| 52 |
+
model=MODEL_PATH,
|
| 53 |
+
messages=[{"role": "user", "content": prompt}],
|
| 54 |
+
max_tokens=512,
|
| 55 |
+
temperature=0.1,
|
| 56 |
+
)
|
| 57 |
+
res = response.choices[0].message.content
|
| 58 |
+
if "</think>" in res:
|
| 59 |
+
res = res.split("</think>")[1].strip().lower()
|
| 60 |
+
else:
|
| 61 |
+
res = response.choices[0].message.content.strip().lower()
|
| 62 |
+
|
| 63 |
+
if "not_supported" in res:
|
| 64 |
+
return "not_supported"
|
| 65 |
+
elif "supported" in res:
|
| 66 |
+
return "supported"
|
| 67 |
+
elif "refuted" in res:
|
| 68 |
+
return "refuted"
|
| 69 |
+
else:
|
| 70 |
+
return "not_supported"
|
| 71 |
+
|
| 72 |
+
except Exception as e:
|
| 73 |
+
# --- ERROR TRACKING ---
|
| 74 |
+
if error_log is not None:
|
| 75 |
+
error_details = {
|
| 76 |
+
"subclaim": subclaim,
|
| 77 |
+
"error_msg": str(e),
|
| 78 |
+
"type": "API_ERROR"
|
| 79 |
+
}
|
| 80 |
+
error_log.append(error_details)
|
| 81 |
+
# ----------------------
|
| 82 |
+
|
| 83 |
+
# Optional: Print to console so you see it happening live
|
| 84 |
+
# print(f"\n[!] Error on ID {item_id}: {e}")
|
| 85 |
+
return "not_supported"
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
# -----------------------------
|
| 90 |
+
# MAIN
|
| 91 |
+
# -----------------------------
|
| 92 |
+
if __name__ == "__main__":
|
| 93 |
+
parser = argparse.ArgumentParser()
|
| 94 |
+
parser.add_argument("--input_file", type=str,
|
| 95 |
+
default="/home/mshahidul/readctrl/data/model_validity_check/subclaims_support_validity_check_gt_gpt5(1-5).json",
|
| 96 |
+
help="Path to input JSON with subclaims")
|
| 97 |
+
|
| 98 |
+
parser.add_argument("--save_folder", type=str,
|
| 99 |
+
default="/home/mshahidul/readctrl/data/concise_complete_attr_testing",
|
| 100 |
+
help="Folder to save results")
|
| 101 |
+
|
| 102 |
+
# Range arguments
|
| 103 |
+
parser.add_argument("--start_index", type=int, default=0, help="Start index")
|
| 104 |
+
parser.add_argument("--end_index", type=int, default=-1, help="End index (exclusive). -1 for all.")
|
| 105 |
+
|
| 106 |
+
args = parser.parse_args()
|
| 107 |
+
|
| 108 |
+
INPUT_FILE = args.input_file
|
| 109 |
+
SAVE_FOLDER = args.save_folder
|
| 110 |
+
os.makedirs(SAVE_FOLDER, exist_ok=True)
|
| 111 |
+
|
| 112 |
+
# -----------------------------
|
| 113 |
+
# Load Data
|
| 114 |
+
# -----------------------------
|
| 115 |
+
print(f"Loading data from {INPUT_FILE}...")
|
| 116 |
+
with open(INPUT_FILE, "r") as f:
|
| 117 |
+
all_data = json.load(f)
|
| 118 |
+
|
| 119 |
+
# -----------------------------
|
| 120 |
+
# Slice Data based on Range
|
| 121 |
+
# -----------------------------
|
| 122 |
+
total_len = len(all_data)
|
| 123 |
+
start = args.start_index
|
| 124 |
+
end = args.end_index if args.end_index != -1 else total_len
|
| 125 |
+
|
| 126 |
+
if end > total_len:
|
| 127 |
+
end = total_len
|
| 128 |
+
|
| 129 |
+
data_slice = all_data[start:end]
|
| 130 |
+
|
| 131 |
+
print(f"Total dataset size: {total_len}")
|
| 132 |
+
print(f"Processing range: {start} to {end}")
|
| 133 |
+
print(f"Items in this batch: {len(data_slice)}")
|
| 134 |
+
|
| 135 |
+
# -----------------------------
|
| 136 |
+
# Output Files
|
| 137 |
+
# -----------------------------
|
| 138 |
+
OUTPUT_FILE = os.path.join(SAVE_FOLDER, f"evaluated_metrics_{start}_{end}_qwen3_32B_v2.json")
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
# -----------------------------
|
| 142 |
+
# Resume Logic
|
| 143 |
+
# -----------------------------
|
| 144 |
+
processed_results = []
|
| 145 |
+
if os.path.exists(OUTPUT_FILE):
|
| 146 |
+
print(f"Found existing output file: {OUTPUT_FILE}. Resuming...")
|
| 147 |
+
try:
|
| 148 |
+
with open(OUTPUT_FILE, "r") as f:
|
| 149 |
+
processed_results = json.load(f)
|
| 150 |
+
except:
|
| 151 |
+
processed_results = []
|
| 152 |
+
|
| 153 |
+
processed_ids = {item['full_text'] for item in processed_results}
|
| 154 |
+
to_process = [item for item in data_slice if item['full_text'] not in processed_ids]
|
| 155 |
+
|
| 156 |
+
print(f"Already processed in this file: {len(processed_ids)}")
|
| 157 |
+
print(f"Remaining to process: {len(to_process)}")
|
| 158 |
+
|
| 159 |
+
# -----------------------------
|
| 160 |
+
# Initialize Error Tracker
|
| 161 |
+
# -----------------------------
|
| 162 |
+
global_error_log = []
|
| 163 |
+
|
| 164 |
+
# -----------------------------
|
| 165 |
+
# Processing Loop
|
| 166 |
+
# -----------------------------
|
| 167 |
+
# Added tqdm postfix to show error count in real-time
|
| 168 |
+
pbar = tqdm.tqdm(to_process)
|
| 169 |
+
|
| 170 |
+
for item in pbar:
|
| 171 |
+
text=item.get('full_text', '')
|
| 172 |
+
subclaims=item.get('dat', [])['dat']
|
| 173 |
+
# import ipdb; ipdb.set_trace()
|
| 174 |
+
for subclaim in subclaims:
|
| 175 |
+
subclaim_text=subclaim.get('subclaim', '')
|
| 176 |
+
label_gt=subclaim.get('status', 'not_supported').strip().lower()
|
| 177 |
+
correctness=False
|
| 178 |
+
|
| 179 |
+
label_gen=check_support(text, subclaim_text, error_log=global_error_log)
|
| 180 |
+
# import ipdb; ipdb.set_trace()
|
| 181 |
+
if "not_supported" == label_gen and "not_supported" == label_gt:
|
| 182 |
+
correctness=True
|
| 183 |
+
elif "supported" == label_gen and "supported" == label_gt:
|
| 184 |
+
correctness=True
|
| 185 |
+
else:
|
| 186 |
+
# print(f"Mismatch:\nGT: {label_gt}\nGEN: {label_gen}\nSubclaim: {subclaim}\nText: {text}\n---")
|
| 187 |
+
pass
|
| 188 |
+
result_entry={
|
| 189 |
+
"medical_text": text,
|
| 190 |
+
"subclaim": subclaim,
|
| 191 |
+
"label_gt": label_gt,
|
| 192 |
+
"label_gen": label_gen,
|
| 193 |
+
"correctness": correctness
|
| 194 |
+
}
|
| 195 |
+
processed_results.append(result_entry)
|
| 196 |
+
if len(processed_results) % 2 == 0:
|
| 197 |
+
# Save intermediate results
|
| 198 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 199 |
+
json.dump(processed_results, f, indent=2, ensure_ascii=False)
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 203 |
+
json.dump(processed_results, f, indent=2, ensure_ascii=False)
|
code/finetune-inference/subclaim_support_extraction/old/subclaim_support_cal_v2.py
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import tqdm
|
| 4 |
+
import argparse
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
|
| 7 |
+
# -----------------------------
|
| 8 |
+
# CONFIGURATION
|
| 9 |
+
# -----------------------------
|
| 10 |
+
MODEL_PATH = "/home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-support-check-8b_ctx-bf16"
|
| 11 |
+
API_URL = "http://localhost:8015/v1"
|
| 12 |
+
API_KEY = "EMPTY"
|
| 13 |
+
|
| 14 |
+
client = OpenAI(base_url=API_URL, api_key=API_KEY)
|
| 15 |
+
|
| 16 |
+
# -----------------------------
|
| 17 |
+
# VERIFICATION PROMPT
|
| 18 |
+
# -----------------------------
|
| 19 |
+
def inference_prompt(text, subclaim):
|
| 20 |
+
return f"""
|
| 21 |
+
You are a precise, conservative medical evidence evaluator.
|
| 22 |
+
|
| 23 |
+
Your task:
|
| 24 |
+
Determine the relationship between the following MEDICAL TEXT and the SUBCLAIM.
|
| 25 |
+
|
| 26 |
+
Use ONLY these labels (lowercase):
|
| 27 |
+
- supported → the TEXT clearly supports the SUBCLAIM. The information is
|
| 28 |
+
explicitly stated or follows from a very direct and
|
| 29 |
+
unambiguous medical inference (e.g., “fiebre de 39°C”
|
| 30 |
+
supports “tenía fiebre”).
|
| 31 |
+
- refuted → the TEXT clearly contradicts the SUBCLAIM (e.g., the TEXT
|
| 32 |
+
states the opposite, or provides mutually exclusive values:
|
| 33 |
+
different drug, dose, duration, time point, diagnosis, etc.).
|
| 34 |
+
- not_supported → the TEXT is related to the SUBCLAIM but does NOT provide
|
| 35 |
+
enough evidence to mark it as supported or refuted
|
| 36 |
+
(e.g., missing or different dose, duration, timing,
|
| 37 |
+
route, frequency, or diagnosis; or the claim simply
|
| 38 |
+
is not mentioned).
|
| 39 |
+
|
| 40 |
+
Important instructions:
|
| 41 |
+
- Be STRICT and CONSERVATIVE:
|
| 42 |
+
- If exact numerical details (dose, time, duration, frequency, age, etc.)
|
| 43 |
+
in the SUBCLAIM are not explicitly stated or clearly implied in the TEXT,
|
| 44 |
+
choose not_supported.
|
| 45 |
+
- Do NOT assume or infer information beyond what is clearly supported by
|
| 46 |
+
the TEXT, even if it seems medically plausible.
|
| 47 |
+
- Use refuted ONLY when there is a clear contradiction between TEXT and
|
| 48 |
+
SUBCLAIM.
|
| 49 |
+
- Ignore your external medical knowledge; base your decision ONLY on the TEXT.
|
| 50 |
+
- The TEXT and SUBCLAIM may be in Spanish; evaluate them as written.
|
| 51 |
+
|
| 52 |
+
Medical Text:
|
| 53 |
+
{text}
|
| 54 |
+
|
| 55 |
+
Subclaim:
|
| 56 |
+
{subclaim}
|
| 57 |
+
|
| 58 |
+
Respond with exactly ONE label:
|
| 59 |
+
supported
|
| 60 |
+
refuted
|
| 61 |
+
not_supported
|
| 62 |
+
"""
|
| 63 |
+
|
| 64 |
+
# -----------------------------
|
| 65 |
+
# VERIFICATION LOGIC
|
| 66 |
+
# -----------------------------
|
| 67 |
+
def check_support(text: str, subclaim: str, item_id=None, error_log=None) -> str:
|
| 68 |
+
"""
|
| 69 |
+
Returns: 'supported', 'refuted', or 'not_supported'
|
| 70 |
+
Tracks errors in error_log if provided.
|
| 71 |
+
"""
|
| 72 |
+
if not text or not subclaim:
|
| 73 |
+
return "not_supported"
|
| 74 |
+
|
| 75 |
+
prompt = inference_prompt(text, subclaim)
|
| 76 |
+
|
| 77 |
+
try:
|
| 78 |
+
response = client.chat.completions.create(
|
| 79 |
+
model=MODEL_PATH,
|
| 80 |
+
messages=[{"role": "user", "content": prompt}],
|
| 81 |
+
max_tokens=20,
|
| 82 |
+
temperature=0.0,
|
| 83 |
+
)
|
| 84 |
+
res = response.choices[0].message.content.strip().lower()
|
| 85 |
+
|
| 86 |
+
if "not_supported" in res:
|
| 87 |
+
return "not_supported"
|
| 88 |
+
elif "supported" in res:
|
| 89 |
+
return "supported"
|
| 90 |
+
elif "refuted" in res:
|
| 91 |
+
return "refuted"
|
| 92 |
+
else:
|
| 93 |
+
return "not_supported"
|
| 94 |
+
|
| 95 |
+
except Exception as e:
|
| 96 |
+
# --- ERROR TRACKING ---
|
| 97 |
+
if error_log is not None:
|
| 98 |
+
error_details = {
|
| 99 |
+
"id": item_id,
|
| 100 |
+
"subclaim": subclaim,
|
| 101 |
+
"error_msg": str(e),
|
| 102 |
+
"type": "API_ERROR"
|
| 103 |
+
}
|
| 104 |
+
error_log.append(error_details)
|
| 105 |
+
# ----------------------
|
| 106 |
+
|
| 107 |
+
# Optional: Print to console so you see it happening live
|
| 108 |
+
print(f"\n[!] Error on ID {item_id}: {e}")
|
| 109 |
+
return "not_supported"
|
| 110 |
+
|
| 111 |
+
def calculate_metric(subclaims_list: list, reference_text: str, metric_name: str, item_id=None, error_log=None):
|
| 112 |
+
if not subclaims_list:
|
| 113 |
+
return {"score": 0.0, "details": []}
|
| 114 |
+
|
| 115 |
+
results = []
|
| 116 |
+
supported_count = 0
|
| 117 |
+
|
| 118 |
+
for subclaim in subclaims_list:
|
| 119 |
+
# Pass tracking info down to check_support
|
| 120 |
+
label = check_support(reference_text, subclaim, item_id=item_id, error_log=error_log)
|
| 121 |
+
|
| 122 |
+
is_supported = (label == "supported")
|
| 123 |
+
|
| 124 |
+
if is_supported:
|
| 125 |
+
supported_count += 1
|
| 126 |
+
|
| 127 |
+
results.append({
|
| 128 |
+
"subclaim": subclaim,
|
| 129 |
+
"label": label
|
| 130 |
+
})
|
| 131 |
+
|
| 132 |
+
score = supported_count / len(subclaims_list) if len(subclaims_list) > 0 else 0.0
|
| 133 |
+
|
| 134 |
+
return {
|
| 135 |
+
"score": score,
|
| 136 |
+
"details": results
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
# -----------------------------
|
| 140 |
+
# MAIN
|
| 141 |
+
# -----------------------------
|
| 142 |
+
if __name__ == "__main__":
|
| 143 |
+
parser = argparse.ArgumentParser()
|
| 144 |
+
parser.add_argument("--input_file", type=str,
|
| 145 |
+
default="/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_full_data.json",
|
| 146 |
+
help="Path to input JSON with subclaims")
|
| 147 |
+
|
| 148 |
+
parser.add_argument("--save_folder", type=str,
|
| 149 |
+
default="/home/mshahidul/readctrl/data/concise_complete_attr_cal_v3",
|
| 150 |
+
help="Folder to save results")
|
| 151 |
+
|
| 152 |
+
# Range arguments
|
| 153 |
+
parser.add_argument("--start_index", type=int, default=0, help="Start index")
|
| 154 |
+
parser.add_argument("--end_index", type=int, default=-1, help="End index (exclusive). -1 for all.")
|
| 155 |
+
|
| 156 |
+
args = parser.parse_args()
|
| 157 |
+
|
| 158 |
+
INPUT_FILE = args.input_file
|
| 159 |
+
SAVE_FOLDER = args.save_folder
|
| 160 |
+
os.makedirs(SAVE_FOLDER, exist_ok=True)
|
| 161 |
+
|
| 162 |
+
# -----------------------------
|
| 163 |
+
# Load Data
|
| 164 |
+
# -----------------------------
|
| 165 |
+
print(f"Loading data from {INPUT_FILE}...")
|
| 166 |
+
with open(INPUT_FILE, "r") as f:
|
| 167 |
+
all_data = json.load(f)
|
| 168 |
+
|
| 169 |
+
# -----------------------------
|
| 170 |
+
# Slice Data based on Range
|
| 171 |
+
# -----------------------------
|
| 172 |
+
total_len = len(all_data)
|
| 173 |
+
start = args.start_index
|
| 174 |
+
end = args.end_index if args.end_index != -1 else total_len
|
| 175 |
+
|
| 176 |
+
if end > total_len:
|
| 177 |
+
end = total_len
|
| 178 |
+
|
| 179 |
+
data_slice = all_data[start:end]
|
| 180 |
+
|
| 181 |
+
print(f"Total dataset size: {total_len}")
|
| 182 |
+
print(f"Processing range: {start} to {end}")
|
| 183 |
+
print(f"Items in this batch: {len(data_slice)}")
|
| 184 |
+
|
| 185 |
+
# -----------------------------
|
| 186 |
+
# Output Files
|
| 187 |
+
# -----------------------------
|
| 188 |
+
OUTPUT_FILE = os.path.join(SAVE_FOLDER, f"evaluated_metrics_{start}_{end}.json")
|
| 189 |
+
ERROR_LOG_FILE = os.path.join(SAVE_FOLDER, f"error_log_{start}_{end}.json")
|
| 190 |
+
|
| 191 |
+
# -----------------------------
|
| 192 |
+
# Resume Logic
|
| 193 |
+
# -----------------------------
|
| 194 |
+
processed_results = []
|
| 195 |
+
if os.path.exists(OUTPUT_FILE):
|
| 196 |
+
print(f"Found existing output file: {OUTPUT_FILE}. Resuming...")
|
| 197 |
+
try:
|
| 198 |
+
with open(OUTPUT_FILE, "r") as f:
|
| 199 |
+
processed_results = json.load(f)
|
| 200 |
+
except:
|
| 201 |
+
processed_results = []
|
| 202 |
+
|
| 203 |
+
processed_ids = {item['id'] for item in processed_results}
|
| 204 |
+
to_process = [item for item in data_slice if item['id'] not in processed_ids]
|
| 205 |
+
|
| 206 |
+
print(f"Already processed in this file: {len(processed_ids)}")
|
| 207 |
+
print(f"Remaining to process: {len(to_process)}")
|
| 208 |
+
|
| 209 |
+
# -----------------------------
|
| 210 |
+
# Initialize Error Tracker
|
| 211 |
+
# -----------------------------
|
| 212 |
+
global_error_log = []
|
| 213 |
+
|
| 214 |
+
# -----------------------------
|
| 215 |
+
# Processing Loop
|
| 216 |
+
# -----------------------------
|
| 217 |
+
# Added tqdm postfix to show error count in real-time
|
| 218 |
+
pbar = tqdm.tqdm(to_process)
|
| 219 |
+
|
| 220 |
+
for item in pbar:
|
| 221 |
+
current_id = item.get('id', 'unknown')
|
| 222 |
+
|
| 223 |
+
# 1. Prepare Texts
|
| 224 |
+
easy_text = item.get("easy_text", "")
|
| 225 |
+
inter_text = item.get("intermediate_text", "")
|
| 226 |
+
hard_text = item.get("hard_text", "")
|
| 227 |
+
fulltext = item.get("fulltext", "")
|
| 228 |
+
summary = item.get("summary", "")
|
| 229 |
+
|
| 230 |
+
# 2. Prepare Subclaim Lists
|
| 231 |
+
def ensure_list(x): return x if isinstance(x, list) else []
|
| 232 |
+
|
| 233 |
+
easy_subs = ensure_list(item.get("easy_subclaims", []))
|
| 234 |
+
inter_subs = ensure_list(item.get("intermediate_subclaims", []))
|
| 235 |
+
hard_subs = ensure_list(item.get("hard_subclaims", []))
|
| 236 |
+
full_subs = ensure_list(item.get("fulltext_subclaims", []))
|
| 237 |
+
summary_subs = ensure_list(item.get("summary_subclaims", []))
|
| 238 |
+
|
| 239 |
+
# ---------------------------------------------------------
|
| 240 |
+
# METRICS CALCULATION (Now passing id and error_log)
|
| 241 |
+
# ---------------------------------------------------------
|
| 242 |
+
|
| 243 |
+
# Attribution: Generated Subclaims -> Full Text
|
| 244 |
+
attr_easy = calculate_metric(easy_subs, fulltext, "attribution", current_id, global_error_log)
|
| 245 |
+
attr_inter = calculate_metric(inter_subs, fulltext, "attribution", current_id, global_error_log)
|
| 246 |
+
attr_hard = calculate_metric(hard_subs, fulltext, "attribution", current_id, global_error_log)
|
| 247 |
+
|
| 248 |
+
# Conciseness: Generated Subclaims -> Summary Text
|
| 249 |
+
conc_easy = calculate_metric(easy_subs, summary, "conciseness", current_id, global_error_log)
|
| 250 |
+
conc_inter = calculate_metric(inter_subs, summary, "conciseness", current_id, global_error_log)
|
| 251 |
+
conc_hard = calculate_metric(hard_subs, summary, "conciseness", current_id, global_error_log)
|
| 252 |
+
|
| 253 |
+
# Completeness: summary Subclaims -> Generated Text
|
| 254 |
+
comp_easy = calculate_metric(summary_subs, easy_text, "completeness", current_id, global_error_log)
|
| 255 |
+
comp_inter = calculate_metric(summary_subs, inter_text, "completeness", current_id, global_error_log)
|
| 256 |
+
comp_hard = calculate_metric(summary_subs, hard_text, "completeness", current_id, global_error_log)
|
| 257 |
+
|
| 258 |
+
# Construct Output
|
| 259 |
+
result_item = item.copy()
|
| 260 |
+
result_item["metrics"] = {
|
| 261 |
+
"easy": {
|
| 262 |
+
"attribution": attr_easy,
|
| 263 |
+
"conciseness": conc_easy,
|
| 264 |
+
"completeness": comp_easy
|
| 265 |
+
},
|
| 266 |
+
"intermediate": {
|
| 267 |
+
"attribution": attr_inter,
|
| 268 |
+
"conciseness": conc_inter,
|
| 269 |
+
"completeness": comp_inter
|
| 270 |
+
},
|
| 271 |
+
"hard": {
|
| 272 |
+
"attribution": attr_hard,
|
| 273 |
+
"conciseness": conc_hard,
|
| 274 |
+
"completeness": comp_hard
|
| 275 |
+
}
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
processed_results.append(result_item)
|
| 279 |
+
|
| 280 |
+
# Update progress bar with error count
|
| 281 |
+
if len(global_error_log) > 0:
|
| 282 |
+
pbar.set_postfix({"Errors": len(global_error_log)})
|
| 283 |
+
|
| 284 |
+
# Save frequently
|
| 285 |
+
if len(processed_results) % 10 == 0:
|
| 286 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 287 |
+
json.dump(processed_results, f, indent=4, ensure_ascii=False)
|
| 288 |
+
|
| 289 |
+
# Final Save
|
| 290 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 291 |
+
json.dump(processed_results, f, indent=4, ensure_ascii=False)
|
| 292 |
+
|
| 293 |
+
print(f"Evaluation for range {start}:{end} complete. Saved to: {OUTPUT_FILE}")
|
| 294 |
+
|
| 295 |
+
# -----------------------------
|
| 296 |
+
# Error Reporting
|
| 297 |
+
# -----------------------------
|
| 298 |
+
if global_error_log:
|
| 299 |
+
print(f"\n⚠️ WARNING: {len(global_error_log)} API errors occurred during processing.")
|
| 300 |
+
with open(ERROR_LOG_FILE, "w") as f:
|
| 301 |
+
json.dump(global_error_log, f, indent=4)
|
| 302 |
+
print(f"Error details saved to: {ERROR_LOG_FILE}")
|
| 303 |
+
else:
|
| 304 |
+
print("\n✅ Success: No API errors detected.")
|
code/finetune-inference/subclaim_support_extraction/old/subclaim_support_cal_v3.py
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import argparse
|
| 4 |
+
import re
|
| 5 |
+
from vllm import LLM, SamplingParams
|
| 6 |
+
|
| 7 |
+
# -----------------------------
|
| 8 |
+
# CONFIGURATION
|
| 9 |
+
# -----------------------------
|
| 10 |
+
MODEL_PATH = "Qwen/Qwen3-30B-A3B-Thinking-2507"
|
| 11 |
+
|
| 12 |
+
# -----------------------------
|
| 13 |
+
# PROMPT & CLEANING
|
| 14 |
+
# -----------------------------
|
| 15 |
+
def inference_prompt(text, subclaim):
|
| 16 |
+
return f"""
|
| 17 |
+
You are a precise, conservative medical evidence evaluator.
|
| 18 |
+
|
| 19 |
+
Your task:
|
| 20 |
+
Determine the relationship between the following MEDICAL TEXT and the SUBCLAIM.
|
| 21 |
+
|
| 22 |
+
Use ONLY these labels (lowercase):
|
| 23 |
+
- supported → the TEXT clearly supports the SUBCLAIM.
|
| 24 |
+
- refuted → the TEXT clearly contradicts the SUBCLAIM.
|
| 25 |
+
- not_supported → the TEXT is related to the SUBCLAIM but does NOT provide enough evidence.
|
| 26 |
+
|
| 27 |
+
Important instructions:
|
| 28 |
+
- Analyze the text carefully before deciding.
|
| 29 |
+
- Be STRICT and CONSERVATIVE.
|
| 30 |
+
- If exact numerical details differ or are missing, choose not_supported.
|
| 31 |
+
- Respond with exactly ONE label at the end.
|
| 32 |
+
|
| 33 |
+
Medical Text:
|
| 34 |
+
{text}
|
| 35 |
+
|
| 36 |
+
Subclaim:
|
| 37 |
+
{subclaim}
|
| 38 |
+
|
| 39 |
+
Respond with exactly ONE label:
|
| 40 |
+
supported
|
| 41 |
+
refuted
|
| 42 |
+
not_supported
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
def clean_response(text):
|
| 46 |
+
"""
|
| 47 |
+
Removes <think> tags and extracts the final label.
|
| 48 |
+
"""
|
| 49 |
+
if not text:
|
| 50 |
+
return "not_supported"
|
| 51 |
+
|
| 52 |
+
# Remove thinking block
|
| 53 |
+
text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
|
| 54 |
+
text = text.strip().lower()
|
| 55 |
+
|
| 56 |
+
# Extract the last valid label found
|
| 57 |
+
valid_labels = ["not_supported", "supported", "refuted"]
|
| 58 |
+
|
| 59 |
+
# Check if the text ends with a valid label (ignoring punctuation)
|
| 60 |
+
for label in valid_labels:
|
| 61 |
+
if label in text:
|
| 62 |
+
return label
|
| 63 |
+
|
| 64 |
+
return "not_supported"
|
| 65 |
+
|
| 66 |
+
# -----------------------------
|
| 67 |
+
# MAIN
|
| 68 |
+
# -----------------------------
|
| 69 |
+
if __name__ == "__main__":
|
| 70 |
+
parser = argparse.ArgumentParser()
|
| 71 |
+
parser.add_argument("--input_file", type=str,
|
| 72 |
+
default="/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_full_data.json")
|
| 73 |
+
parser.add_argument("--save_folder", type=str,
|
| 74 |
+
default="/home/mshahidul/readctrl/data/concise_complete_attr_cal_v4")
|
| 75 |
+
parser.add_argument("--start_index", type=int, default=0)
|
| 76 |
+
parser.add_argument("--end_index", type=int, default=-1)
|
| 77 |
+
|
| 78 |
+
# vLLM Performance Arguments
|
| 79 |
+
parser.add_argument("--gpu_utilization", type=float, default=0.95)
|
| 80 |
+
parser.add_argument("--max_model_len", type=int, default=16384) # Adjusted for A100 80GB
|
| 81 |
+
|
| 82 |
+
args = parser.parse_args()
|
| 83 |
+
|
| 84 |
+
# 1. Setup Data
|
| 85 |
+
INPUT_FILE = args.input_file
|
| 86 |
+
SAVE_FOLDER = args.save_folder
|
| 87 |
+
os.makedirs(SAVE_FOLDER, exist_ok=True)
|
| 88 |
+
OUTPUT_FILE = os.path.join(SAVE_FOLDER, f"evaluated_metrics_{args.start_index}_{args.end_index}.json")
|
| 89 |
+
|
| 90 |
+
print(f"Loading data from {INPUT_FILE}...")
|
| 91 |
+
with open(INPUT_FILE, "r") as f:
|
| 92 |
+
all_data = json.load(f)
|
| 93 |
+
|
| 94 |
+
# Slice Data
|
| 95 |
+
total_len = len(all_data)
|
| 96 |
+
start = args.start_index
|
| 97 |
+
end = args.end_index if args.end_index != -1 else total_len
|
| 98 |
+
data_slice = all_data[start:end]
|
| 99 |
+
print(f"Processing range: {start} to {end} ({len(data_slice)} items)")
|
| 100 |
+
|
| 101 |
+
# -----------------------------
|
| 102 |
+
# PHASE 1: PREPARE PROMPTS
|
| 103 |
+
# -----------------------------
|
| 104 |
+
print("Building prompt list...")
|
| 105 |
+
|
| 106 |
+
# We need to flatten the hierarchy to feed vLLM a single list of strings
|
| 107 |
+
# We will store metadata to reconstruct the structure later
|
| 108 |
+
prompts_list = []
|
| 109 |
+
request_metadata = [] # Syncs index-to-index with prompts_list
|
| 110 |
+
|
| 111 |
+
def add_request(item_id, text, subclaims, metric_type, level):
|
| 112 |
+
if not subclaims or not isinstance(subclaims, list):
|
| 113 |
+
return
|
| 114 |
+
for sub in subclaims:
|
| 115 |
+
p = inference_prompt(text, sub)
|
| 116 |
+
prompts_list.append(p)
|
| 117 |
+
request_metadata.append({
|
| 118 |
+
"id": item_id,
|
| 119 |
+
"metric_type": metric_type, # 'attribution', 'conciseness', 'completeness'
|
| 120 |
+
"level": level, # 'easy', 'intermediate', 'hard'
|
| 121 |
+
"subclaim": sub
|
| 122 |
+
})
|
| 123 |
+
|
| 124 |
+
for item in data_slice:
|
| 125 |
+
itm_id = item.get('id')
|
| 126 |
+
fulltext = item.get("fulltext", "")
|
| 127 |
+
summary = item.get("summary", "")
|
| 128 |
+
|
| 129 |
+
easy_txt = item.get("easy_text", "")
|
| 130 |
+
inter_txt = item.get("intermediate_text", "")
|
| 131 |
+
hard_txt = item.get("hard_text", "")
|
| 132 |
+
|
| 133 |
+
# A. ATTRIBUTION (Subclaims -> Fulltext)
|
| 134 |
+
add_request(itm_id, fulltext, item.get("easy_subclaims", []), "attribution", "easy")
|
| 135 |
+
add_request(itm_id, fulltext, item.get("intermediate_subclaims", []), "attribution", "intermediate")
|
| 136 |
+
add_request(itm_id, fulltext, item.get("hard_subclaims", []), "attribution", "hard")
|
| 137 |
+
|
| 138 |
+
# B. CONCISENESS (Subclaims -> Summary)
|
| 139 |
+
add_request(itm_id, summary, item.get("easy_subclaims", []), "conciseness", "easy")
|
| 140 |
+
add_request(itm_id, summary, item.get("intermediate_subclaims", []), "conciseness", "intermediate")
|
| 141 |
+
add_request(itm_id, summary, item.get("hard_subclaims", []), "conciseness", "hard")
|
| 142 |
+
|
| 143 |
+
# C. COMPLETENESS (Summary Subclaims -> Generated Text)
|
| 144 |
+
sum_subs = item.get("summary_subclaims", [])
|
| 145 |
+
add_request(itm_id, easy_txt, sum_subs, "completeness", "easy")
|
| 146 |
+
add_request(itm_id, inter_txt, sum_subs, "completeness", "intermediate")
|
| 147 |
+
add_request(itm_id, hard_txt, sum_subs, "completeness", "hard")
|
| 148 |
+
|
| 149 |
+
print(f"Total inference requests generated: {len(prompts_list)}")
|
| 150 |
+
|
| 151 |
+
if len(prompts_list) == 0:
|
| 152 |
+
print("No subclaims found to process.")
|
| 153 |
+
exit()
|
| 154 |
+
|
| 155 |
+
# -----------------------------
|
| 156 |
+
# PHASE 2: BATCH INFERENCE
|
| 157 |
+
# -----------------------------
|
| 158 |
+
print("Initializing vLLM Engine...")
|
| 159 |
+
llm = LLM(
|
| 160 |
+
model=MODEL_PATH,
|
| 161 |
+
trust_remote_code=True,
|
| 162 |
+
dtype="bfloat16",
|
| 163 |
+
gpu_memory_utilization=args.gpu_utilization,
|
| 164 |
+
max_model_len=args.max_model_len,
|
| 165 |
+
enforce_eager=True # Helps with Qwen MoE stability
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
# Allow max_tokens for "Thinking", but we only keep the label later
|
| 169 |
+
sampling_params = SamplingParams(temperature=0, max_tokens=1024)
|
| 170 |
+
|
| 171 |
+
print("Running Inference...")
|
| 172 |
+
outputs = llm.generate(prompts_list, sampling_params)
|
| 173 |
+
|
| 174 |
+
# -----------------------------
|
| 175 |
+
# PHASE 3: AGGREGATE RESULTS
|
| 176 |
+
# -----------------------------
|
| 177 |
+
print("Aggregating results...")
|
| 178 |
+
|
| 179 |
+
# Dictionary to reconstruct the data: results_map[id][metric][level] = list of results
|
| 180 |
+
results_map = {}
|
| 181 |
+
|
| 182 |
+
for i, output in enumerate(outputs):
|
| 183 |
+
meta = request_metadata[i]
|
| 184 |
+
generated_text = output.outputs[0].text
|
| 185 |
+
|
| 186 |
+
# Clean the Qwen "Thinking" output
|
| 187 |
+
label = clean_response(generated_text)
|
| 188 |
+
|
| 189 |
+
item_id = meta['id']
|
| 190 |
+
metric = meta['metric_type']
|
| 191 |
+
level = meta['level']
|
| 192 |
+
|
| 193 |
+
if item_id not in results_map:
|
| 194 |
+
results_map[item_id] = {
|
| 195 |
+
"attribution": {"easy": [], "intermediate": [], "hard": []},
|
| 196 |
+
"conciseness": {"easy": [], "intermediate": [], "hard": []},
|
| 197 |
+
"completeness": {"easy": [], "intermediate": [], "hard": []},
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
results_map[item_id][metric][level].append({
|
| 201 |
+
"subclaim": meta['subclaim'],
|
| 202 |
+
"label": label
|
| 203 |
+
})
|
| 204 |
+
|
| 205 |
+
# -----------------------------
|
| 206 |
+
# PHASE 4: CALCULATE SCORES & SAVE
|
| 207 |
+
# -----------------------------
|
| 208 |
+
final_output = []
|
| 209 |
+
|
| 210 |
+
for original_item in data_slice:
|
| 211 |
+
itm_id = original_item.get('id')
|
| 212 |
+
|
| 213 |
+
# Create a clean copy of the item
|
| 214 |
+
new_item = original_item.copy()
|
| 215 |
+
|
| 216 |
+
# Structure for metrics
|
| 217 |
+
metrics_struct = {
|
| 218 |
+
"easy": {}, "intermediate": {}, "hard": {}
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
# If we processed this item (it had subclaims)
|
| 222 |
+
if itm_id in results_map:
|
| 223 |
+
raw_data = results_map[itm_id]
|
| 224 |
+
|
| 225 |
+
# Iterate levels (easy, intermediate, hard)
|
| 226 |
+
for level in ["easy", "intermediate", "hard"]:
|
| 227 |
+
# Iterate metrics (attribution, conciseness, completeness)
|
| 228 |
+
for metric in ["attribution", "conciseness", "completeness"]:
|
| 229 |
+
|
| 230 |
+
subclaim_results = raw_data[metric][level]
|
| 231 |
+
total = len(subclaim_results)
|
| 232 |
+
supported = sum(1 for x in subclaim_results if x['label'] == 'supported')
|
| 233 |
+
score = (supported / total) if total > 0 else 0.0
|
| 234 |
+
|
| 235 |
+
metrics_struct[level][metric] = {
|
| 236 |
+
"score": score,
|
| 237 |
+
"details": subclaim_results
|
| 238 |
+
}
|
| 239 |
+
else:
|
| 240 |
+
# Handle empty items
|
| 241 |
+
empty_res = {"score": 0.0, "details": []}
|
| 242 |
+
for level in ["easy", "intermediate", "hard"]:
|
| 243 |
+
metrics_struct[level] = {
|
| 244 |
+
"attribution": empty_res,
|
| 245 |
+
"conciseness": empty_res,
|
| 246 |
+
"completeness": empty_res
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
new_item["metrics"] = metrics_struct
|
| 250 |
+
final_output.append(new_item)
|
| 251 |
+
|
| 252 |
+
print(f"Saving {len(final_output)} items to {OUTPUT_FILE}...")
|
| 253 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 254 |
+
json.dump(final_output, f, indent=4, ensure_ascii=False)
|
| 255 |
+
|
| 256 |
+
print("Done.")
|
code/finetune-inference/subclaim_support_extraction/old/subclaim_support_cal_v4.py
ADDED
|
@@ -0,0 +1,309 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import tqdm
|
| 4 |
+
import argparse
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
|
| 7 |
+
# -----------------------------
|
| 8 |
+
# CONFIGURATION
|
| 9 |
+
# -----------------------------
|
| 10 |
+
MODEL_PATH = "/home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-support-check-8b_ctx_v2-bf16"
|
| 11 |
+
API_URL = "http://172.16.34.29:8004/v1"
|
| 12 |
+
API_KEY = "EMPTY"
|
| 13 |
+
|
| 14 |
+
client = OpenAI(base_url=API_URL, api_key=API_KEY)
|
| 15 |
+
|
| 16 |
+
# -----------------------------
|
| 17 |
+
# VERIFICATION PROMPT
|
| 18 |
+
# -----------------------------
|
| 19 |
+
def inference_prompt(text, subclaim):
|
| 20 |
+
return f"""
|
| 21 |
+
You are a precise, conservative medical evidence evaluator.
|
| 22 |
+
|
| 23 |
+
Your task:
|
| 24 |
+
Determine the relationship between the following MEDICAL TEXT and the SUBCLAIM.
|
| 25 |
+
|
| 26 |
+
Use ONLY these labels (lowercase):
|
| 27 |
+
- supported → the TEXT clearly supports the SUBCLAIM. The information is
|
| 28 |
+
explicitly stated or follows from a very direct and
|
| 29 |
+
unambiguous medical inference (e.g., “fiebre de 39°C”
|
| 30 |
+
supports “tenía fiebre”).
|
| 31 |
+
- refuted → the TEXT clearly contradicts the SUBCLAIM (e.g., the TEXT
|
| 32 |
+
states the opposite, or provides mutually exclusive values:
|
| 33 |
+
different drug, dose, duration, time point, diagnosis, etc.).
|
| 34 |
+
- not_supported → the TEXT is related to the SUBCLAIM but does NOT provide
|
| 35 |
+
enough evidence to mark it as supported or refuted
|
| 36 |
+
(e.g., missing or different dose, duration, timing,
|
| 37 |
+
route, frequency, or diagnosis; or the claim simply
|
| 38 |
+
is not mentioned).
|
| 39 |
+
|
| 40 |
+
Important instructions:
|
| 41 |
+
- Be STRICT and CONSERVATIVE:
|
| 42 |
+
- If exact numerical details (dose, time, duration, frequency, age, etc.)
|
| 43 |
+
in the SUBCLAIM are not explicitly stated or clearly implied in the TEXT,
|
| 44 |
+
choose not_supported.
|
| 45 |
+
- Do NOT assume or infer information beyond what is clearly supported by
|
| 46 |
+
the TEXT, even if it seems medically plausible.
|
| 47 |
+
- Use refuted ONLY when there is a clear contradiction between TEXT and
|
| 48 |
+
SUBCLAIM.
|
| 49 |
+
- Ignore your external medical knowledge; base your decision ONLY on the TEXT.
|
| 50 |
+
- The TEXT and SUBCLAIM may be in Spanish; evaluate them as written.
|
| 51 |
+
- Do NOT add any explanation, justification, or extra text.
|
| 52 |
+
|
| 53 |
+
Medical Text:
|
| 54 |
+
{text}
|
| 55 |
+
|
| 56 |
+
Subclaim:
|
| 57 |
+
{subclaim}
|
| 58 |
+
|
| 59 |
+
Respond with exactly ONE label:
|
| 60 |
+
supported
|
| 61 |
+
refuted
|
| 62 |
+
not_supported
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
# -----------------------------
|
| 66 |
+
# VERIFICATION LOGIC
|
| 67 |
+
# -----------------------------
|
| 68 |
+
def check_support(text: str, subclaim: str, item_id=None, error_log=None) -> str:
|
| 69 |
+
"""
|
| 70 |
+
Returns: 'supported', 'refuted', or 'not_supported'
|
| 71 |
+
Tracks errors in error_log if provided.
|
| 72 |
+
"""
|
| 73 |
+
if not text or not subclaim:
|
| 74 |
+
return "not_supported"
|
| 75 |
+
|
| 76 |
+
prompt = inference_prompt(text, subclaim)
|
| 77 |
+
|
| 78 |
+
try:
|
| 79 |
+
response = client.chat.completions.create(
|
| 80 |
+
model=MODEL_PATH,
|
| 81 |
+
messages=[{"role": "user", "content": prompt}],
|
| 82 |
+
max_tokens=512,
|
| 83 |
+
temperature=0.1,
|
| 84 |
+
)
|
| 85 |
+
res = response.choices[0].message.content
|
| 86 |
+
if "</think>" in res:
|
| 87 |
+
res = res.split("</think>")[1].strip().lower()
|
| 88 |
+
else:
|
| 89 |
+
res = response.choices[0].message.content.strip().lower()
|
| 90 |
+
|
| 91 |
+
if "not_supported" in res:
|
| 92 |
+
return "not_supported"
|
| 93 |
+
elif "supported" in res:
|
| 94 |
+
return "supported"
|
| 95 |
+
elif "refuted" in res:
|
| 96 |
+
return "refuted"
|
| 97 |
+
else:
|
| 98 |
+
return "not_supported"
|
| 99 |
+
|
| 100 |
+
except Exception as e:
|
| 101 |
+
# --- ERROR TRACKING ---
|
| 102 |
+
if error_log is not None:
|
| 103 |
+
error_details = {
|
| 104 |
+
"id": item_id,
|
| 105 |
+
"subclaim": subclaim,
|
| 106 |
+
"error_msg": str(e),
|
| 107 |
+
"type": "API_ERROR"
|
| 108 |
+
}
|
| 109 |
+
error_log.append(error_details)
|
| 110 |
+
# ----------------------
|
| 111 |
+
|
| 112 |
+
# Optional: Print to console so you see it happening live
|
| 113 |
+
print(f"\n[!] Error on ID {item_id}: {e}")
|
| 114 |
+
return "not_supported"
|
| 115 |
+
|
| 116 |
+
def calculate_metric(subclaims_list: list, reference_text: str, metric_name: str, item_id=None, error_log=None):
|
| 117 |
+
if not subclaims_list:
|
| 118 |
+
return {"score": 0.0, "details": []}
|
| 119 |
+
|
| 120 |
+
results = []
|
| 121 |
+
supported_count = 0
|
| 122 |
+
|
| 123 |
+
for subclaim in subclaims_list:
|
| 124 |
+
# Pass tracking info down to check_support
|
| 125 |
+
label = check_support(reference_text, subclaim, item_id=item_id, error_log=error_log)
|
| 126 |
+
|
| 127 |
+
is_supported = (label == "supported")
|
| 128 |
+
|
| 129 |
+
if is_supported:
|
| 130 |
+
supported_count += 1
|
| 131 |
+
|
| 132 |
+
results.append({
|
| 133 |
+
"subclaim": subclaim,
|
| 134 |
+
"label": label
|
| 135 |
+
})
|
| 136 |
+
|
| 137 |
+
score = supported_count / len(subclaims_list) if len(subclaims_list) > 0 else 0.0
|
| 138 |
+
|
| 139 |
+
return {
|
| 140 |
+
"score": score,
|
| 141 |
+
"details": results
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
# -----------------------------
|
| 145 |
+
# MAIN
|
| 146 |
+
# -----------------------------
|
| 147 |
+
if __name__ == "__main__":
|
| 148 |
+
parser = argparse.ArgumentParser()
|
| 149 |
+
parser.add_argument("--input_file", type=str,
|
| 150 |
+
default="/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_full_data.json",
|
| 151 |
+
help="Path to input JSON with subclaims")
|
| 152 |
+
|
| 153 |
+
parser.add_argument("--save_folder", type=str,
|
| 154 |
+
default="/home/mshahidul/readctrl/data/concise_complete_attr_cal_v4",
|
| 155 |
+
help="Folder to save results")
|
| 156 |
+
|
| 157 |
+
# Range arguments
|
| 158 |
+
parser.add_argument("--start_index", type=int, default=0, help="Start index")
|
| 159 |
+
parser.add_argument("--end_index", type=int, default=6, help="End index (exclusive). -1 for all.")
|
| 160 |
+
|
| 161 |
+
args = parser.parse_args()
|
| 162 |
+
|
| 163 |
+
INPUT_FILE = args.input_file
|
| 164 |
+
SAVE_FOLDER = args.save_folder
|
| 165 |
+
os.makedirs(SAVE_FOLDER, exist_ok=True)
|
| 166 |
+
|
| 167 |
+
# -----------------------------
|
| 168 |
+
# Load Data
|
| 169 |
+
# -----------------------------
|
| 170 |
+
print(f"Loading data from {INPUT_FILE}...")
|
| 171 |
+
with open(INPUT_FILE, "r") as f:
|
| 172 |
+
all_data = json.load(f)
|
| 173 |
+
|
| 174 |
+
# -----------------------------
|
| 175 |
+
# Slice Data based on Range
|
| 176 |
+
# -----------------------------
|
| 177 |
+
total_len = len(all_data)
|
| 178 |
+
start = args.start_index
|
| 179 |
+
end = args.end_index if args.end_index != -1 else total_len
|
| 180 |
+
|
| 181 |
+
if end > total_len:
|
| 182 |
+
end = total_len
|
| 183 |
+
|
| 184 |
+
data_slice = all_data[start:end]
|
| 185 |
+
|
| 186 |
+
print(f"Total dataset size: {total_len}")
|
| 187 |
+
print(f"Processing range: {start} to {end}")
|
| 188 |
+
print(f"Items in this batch: {len(data_slice)}")
|
| 189 |
+
|
| 190 |
+
# -----------------------------
|
| 191 |
+
# Output Files
|
| 192 |
+
# -----------------------------
|
| 193 |
+
OUTPUT_FILE = os.path.join(SAVE_FOLDER, f"evaluated_metrics_{start}_{end}.json")
|
| 194 |
+
ERROR_LOG_FILE = os.path.join(SAVE_FOLDER, f"error_log_{start}_{end}.json")
|
| 195 |
+
|
| 196 |
+
# -----------------------------
|
| 197 |
+
# Resume Logic
|
| 198 |
+
# -----------------------------
|
| 199 |
+
processed_results = []
|
| 200 |
+
if os.path.exists(OUTPUT_FILE):
|
| 201 |
+
print(f"Found existing output file: {OUTPUT_FILE}. Resuming...")
|
| 202 |
+
try:
|
| 203 |
+
with open(OUTPUT_FILE, "r") as f:
|
| 204 |
+
processed_results = json.load(f)
|
| 205 |
+
except:
|
| 206 |
+
processed_results = []
|
| 207 |
+
|
| 208 |
+
processed_ids = {item['id'] for item in processed_results}
|
| 209 |
+
to_process = [item for item in data_slice if item['id'] not in processed_ids]
|
| 210 |
+
|
| 211 |
+
print(f"Already processed in this file: {len(processed_ids)}")
|
| 212 |
+
print(f"Remaining to process: {len(to_process)}")
|
| 213 |
+
|
| 214 |
+
# -----------------------------
|
| 215 |
+
# Initialize Error Tracker
|
| 216 |
+
# -----------------------------
|
| 217 |
+
global_error_log = []
|
| 218 |
+
|
| 219 |
+
# -----------------------------
|
| 220 |
+
# Processing Loop
|
| 221 |
+
# -----------------------------
|
| 222 |
+
# Added tqdm postfix to show error count in real-time
|
| 223 |
+
pbar = tqdm.tqdm(to_process)
|
| 224 |
+
|
| 225 |
+
for item in pbar:
|
| 226 |
+
current_id = item.get('id', 'unknown')
|
| 227 |
+
|
| 228 |
+
# 1. Prepare Texts
|
| 229 |
+
easy_text = item.get("easy_text", "")
|
| 230 |
+
inter_text = item.get("intermediate_text", "")
|
| 231 |
+
hard_text = item.get("hard_text", "")
|
| 232 |
+
fulltext = item.get("fulltext", "")
|
| 233 |
+
summary = item.get("summary", "")
|
| 234 |
+
|
| 235 |
+
# 2. Prepare Subclaim Lists
|
| 236 |
+
def ensure_list(x): return x if isinstance(x, list) else []
|
| 237 |
+
|
| 238 |
+
easy_subs = ensure_list(item.get("easy_subclaims", []))
|
| 239 |
+
inter_subs = ensure_list(item.get("intermediate_subclaims", []))
|
| 240 |
+
hard_subs = ensure_list(item.get("hard_subclaims", []))
|
| 241 |
+
full_subs = ensure_list(item.get("fulltext_subclaims", []))
|
| 242 |
+
summary_subs = ensure_list(item.get("summary_subclaims", []))
|
| 243 |
+
|
| 244 |
+
# ---------------------------------------------------------
|
| 245 |
+
# METRICS CALCULATION (Now passing id and error_log)
|
| 246 |
+
# ---------------------------------------------------------
|
| 247 |
+
|
| 248 |
+
# Attribution: Generated Subclaims -> Full Text
|
| 249 |
+
attr_easy = calculate_metric(easy_subs, fulltext, "attribution", current_id, global_error_log)
|
| 250 |
+
attr_inter = calculate_metric(inter_subs, fulltext, "attribution", current_id, global_error_log)
|
| 251 |
+
attr_hard = calculate_metric(hard_subs, fulltext, "attribution", current_id, global_error_log)
|
| 252 |
+
|
| 253 |
+
# Conciseness: Generated Subclaims -> Summary Text
|
| 254 |
+
conc_easy = calculate_metric(easy_subs, summary, "conciseness", current_id, global_error_log)
|
| 255 |
+
conc_inter = calculate_metric(inter_subs, summary, "conciseness", current_id, global_error_log)
|
| 256 |
+
conc_hard = calculate_metric(hard_subs, summary, "conciseness", current_id, global_error_log)
|
| 257 |
+
|
| 258 |
+
# Completeness: summary Subclaims -> Generated Text
|
| 259 |
+
comp_easy = calculate_metric(summary_subs, easy_text, "completeness", current_id, global_error_log)
|
| 260 |
+
comp_inter = calculate_metric(summary_subs, inter_text, "completeness", current_id, global_error_log)
|
| 261 |
+
comp_hard = calculate_metric(summary_subs, hard_text, "completeness", current_id, global_error_log)
|
| 262 |
+
|
| 263 |
+
# Construct Output
|
| 264 |
+
result_item = item.copy()
|
| 265 |
+
result_item["metrics"] = {
|
| 266 |
+
"easy": {
|
| 267 |
+
"attribution": attr_easy,
|
| 268 |
+
"conciseness": conc_easy,
|
| 269 |
+
"completeness": comp_easy
|
| 270 |
+
},
|
| 271 |
+
"intermediate": {
|
| 272 |
+
"attribution": attr_inter,
|
| 273 |
+
"conciseness": conc_inter,
|
| 274 |
+
"completeness": comp_inter
|
| 275 |
+
},
|
| 276 |
+
"hard": {
|
| 277 |
+
"attribution": attr_hard,
|
| 278 |
+
"conciseness": conc_hard,
|
| 279 |
+
"completeness": comp_hard
|
| 280 |
+
}
|
| 281 |
+
}
|
| 282 |
+
|
| 283 |
+
processed_results.append(result_item)
|
| 284 |
+
|
| 285 |
+
# Update progress bar with error count
|
| 286 |
+
if len(global_error_log) > 0:
|
| 287 |
+
pbar.set_postfix({"Errors": len(global_error_log)})
|
| 288 |
+
|
| 289 |
+
# Save frequently
|
| 290 |
+
if len(processed_results) % 10 == 0:
|
| 291 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 292 |
+
json.dump(processed_results, f, indent=4, ensure_ascii=False)
|
| 293 |
+
|
| 294 |
+
# Final Save
|
| 295 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 296 |
+
json.dump(processed_results, f, indent=4, ensure_ascii=False)
|
| 297 |
+
|
| 298 |
+
print(f"Evaluation for range {start}:{end} complete. Saved to: {OUTPUT_FILE}")
|
| 299 |
+
|
| 300 |
+
# -----------------------------
|
| 301 |
+
# Error Reporting
|
| 302 |
+
# -----------------------------
|
| 303 |
+
if global_error_log:
|
| 304 |
+
print(f"\n⚠️ WARNING: {len(global_error_log)} API errors occurred during processing.")
|
| 305 |
+
with open(ERROR_LOG_FILE, "w") as f:
|
| 306 |
+
json.dump(global_error_log, f, indent=4)
|
| 307 |
+
print(f"Error details saved to: {ERROR_LOG_FILE}")
|
| 308 |
+
else:
|
| 309 |
+
print("\n✅ Success: No API errors detected.")
|
code/finetune-inference/subclaim_support_extraction/old/subclaim_support_cal_v5.py
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import tqdm
|
| 4 |
+
import argparse
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
|
| 7 |
+
# -----------------------------
|
| 8 |
+
# CONFIGURATION
|
| 9 |
+
# -----------------------------
|
| 10 |
+
MODEL_PATH = "/home/mshahidul/readctrl_model/full_model/Mistral-Small-3.1-24B_subclaims-support-check-8b_ctx_v2-bf16"
|
| 11 |
+
API_URL = "http://172.16.34.29:8004/v1"
|
| 12 |
+
API_KEY = "EMPTY"
|
| 13 |
+
|
| 14 |
+
client = OpenAI(base_url=API_URL, api_key=API_KEY)
|
| 15 |
+
|
| 16 |
+
# -----------------------------
|
| 17 |
+
# VERIFICATION PROMPT
|
| 18 |
+
# -----------------------------
|
| 19 |
+
def inference_prompt(text, subclaim):
|
| 20 |
+
return f"""You are a clinical evidence auditor. Your evaluation must be based STRICTLY and ONLY on the provided medical text.
|
| 21 |
+
|
| 22 |
+
### MANDATORY GROUNDING RULES:
|
| 23 |
+
1. NO OUTSIDE KNOWLEDGE: Do not use your internal medical knowledge. Even if a subclaim is "common sense" in medicine, if it is not explicitly in the TEXT, it is 'not_supported'.
|
| 24 |
+
2. NO LOGICAL LEAPS: Do not bridge gaps in logic. (e.g., If the text mentions "high blood sugar" but not the word "diabetes", you cannot support a claim of "diabetes").
|
| 25 |
+
3. EXACT NUMERICAL MATCHING: Any doses (e.g., 500mg), frequencies (e.g., twice daily), or durations (e.g., 10 days) mentioned in the subclaim must match the text perfectly. If they are missing or different in the text, label as 'not_supported'.
|
| 26 |
+
4. DEFAULT TO NOT SUPPORTED: If the text is vague, ambiguous, or only suggests a possibility, you MUST choose 'not_supported'.
|
| 27 |
+
5. CLOSED-WORLD REALITY: Treat the TEXT as the only information that exists in the world.
|
| 28 |
+
|
| 29 |
+
### Medical Text:
|
| 30 |
+
{text}
|
| 31 |
+
|
| 32 |
+
### Subclaim:
|
| 33 |
+
{subclaim}
|
| 34 |
+
|
| 35 |
+
Output exactly one word ('supported' or 'not_supported') based on the strict rules above:"""
|
| 36 |
+
|
| 37 |
+
# -----------------------------
|
| 38 |
+
# VERIFICATION LOGIC
|
| 39 |
+
# -----------------------------
|
| 40 |
+
def check_support(text: str, subclaim: str, item_id=None, error_log=None) -> str:
|
| 41 |
+
"""
|
| 42 |
+
Returns: 'supported', 'refuted', or 'not_supported'
|
| 43 |
+
Tracks errors in error_log if provided.
|
| 44 |
+
"""
|
| 45 |
+
if not text or not subclaim:
|
| 46 |
+
return "not_supported"
|
| 47 |
+
|
| 48 |
+
prompt = inference_prompt(text, subclaim)
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
response = client.chat.completions.create(
|
| 52 |
+
model=MODEL_PATH,
|
| 53 |
+
messages=[{"role": "user", "content": prompt}],
|
| 54 |
+
max_tokens=512,
|
| 55 |
+
temperature=0.1,
|
| 56 |
+
)
|
| 57 |
+
res = response.choices[0].message.content
|
| 58 |
+
if "</think>" in res:
|
| 59 |
+
res = res.split("</think>")[1].strip().lower()
|
| 60 |
+
else:
|
| 61 |
+
res = response.choices[0].message.content.strip().lower()
|
| 62 |
+
|
| 63 |
+
if "not_supported" in res:
|
| 64 |
+
return "not_supported"
|
| 65 |
+
elif "supported" in res:
|
| 66 |
+
return "supported"
|
| 67 |
+
elif "refuted" in res:
|
| 68 |
+
return "refuted"
|
| 69 |
+
else:
|
| 70 |
+
return "not_supported"
|
| 71 |
+
|
| 72 |
+
except Exception as e:
|
| 73 |
+
# --- ERROR TRACKING ---
|
| 74 |
+
if error_log is not None:
|
| 75 |
+
error_details = {
|
| 76 |
+
"id": item_id,
|
| 77 |
+
"subclaim": subclaim,
|
| 78 |
+
"error_msg": str(e),
|
| 79 |
+
"type": "API_ERROR"
|
| 80 |
+
}
|
| 81 |
+
error_log.append(error_details)
|
| 82 |
+
# ----------------------
|
| 83 |
+
|
| 84 |
+
# Optional: Print to console so you see it happening live
|
| 85 |
+
print(f"\n[!] Error on ID {item_id}: {e}")
|
| 86 |
+
return "not_supported"
|
| 87 |
+
|
| 88 |
+
def calculate_metric(subclaims_list: list, reference_text: str, metric_name: str, item_id=None, error_log=None):
|
| 89 |
+
if not subclaims_list:
|
| 90 |
+
return {"score": 0.0, "details": []}
|
| 91 |
+
|
| 92 |
+
results = []
|
| 93 |
+
supported_count = 0
|
| 94 |
+
|
| 95 |
+
for subclaim in subclaims_list:
|
| 96 |
+
# Pass tracking info down to check_support
|
| 97 |
+
label = check_support(reference_text, subclaim, item_id=item_id, error_log=error_log)
|
| 98 |
+
|
| 99 |
+
is_supported = (label == "supported")
|
| 100 |
+
|
| 101 |
+
if is_supported:
|
| 102 |
+
supported_count += 1
|
| 103 |
+
|
| 104 |
+
results.append({
|
| 105 |
+
"subclaim": subclaim,
|
| 106 |
+
"label": label
|
| 107 |
+
})
|
| 108 |
+
|
| 109 |
+
score = supported_count / len(subclaims_list) if len(subclaims_list) > 0 else 0.0
|
| 110 |
+
|
| 111 |
+
return {
|
| 112 |
+
"score": score,
|
| 113 |
+
"details": results
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
# -----------------------------
|
| 117 |
+
# MAIN
|
| 118 |
+
# -----------------------------
|
| 119 |
+
if __name__ == "__main__":
|
| 120 |
+
parser = argparse.ArgumentParser()
|
| 121 |
+
parser.add_argument("--input_file", type=str,
|
| 122 |
+
default="/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_full_data.json",
|
| 123 |
+
help="Path to input JSON with subclaims")
|
| 124 |
+
|
| 125 |
+
parser.add_argument("--save_folder", type=str,
|
| 126 |
+
default="/home/mshahidul/readctrl/data/concise_complete_attr_testing",
|
| 127 |
+
help="Folder to save results")
|
| 128 |
+
|
| 129 |
+
# Range arguments
|
| 130 |
+
parser.add_argument("--start_index", type=int, default=0, help="Start index")
|
| 131 |
+
parser.add_argument("--end_index", type=int, default=6, help="End index (exclusive). -1 for all.")
|
| 132 |
+
|
| 133 |
+
args = parser.parse_args()
|
| 134 |
+
|
| 135 |
+
INPUT_FILE = args.input_file
|
| 136 |
+
SAVE_FOLDER = args.save_folder
|
| 137 |
+
os.makedirs(SAVE_FOLDER, exist_ok=True)
|
| 138 |
+
|
| 139 |
+
# -----------------------------
|
| 140 |
+
# Load Data
|
| 141 |
+
# -----------------------------
|
| 142 |
+
print(f"Loading data from {INPUT_FILE}...")
|
| 143 |
+
with open(INPUT_FILE, "r") as f:
|
| 144 |
+
all_data = json.load(f)
|
| 145 |
+
|
| 146 |
+
# -----------------------------
|
| 147 |
+
# Slice Data based on Range
|
| 148 |
+
# -----------------------------
|
| 149 |
+
total_len = len(all_data)
|
| 150 |
+
start = args.start_index
|
| 151 |
+
end = args.end_index if args.end_index != -1 else total_len
|
| 152 |
+
|
| 153 |
+
if end > total_len:
|
| 154 |
+
end = total_len
|
| 155 |
+
|
| 156 |
+
data_slice = all_data[start:end]
|
| 157 |
+
|
| 158 |
+
print(f"Total dataset size: {total_len}")
|
| 159 |
+
print(f"Processing range: {start} to {end}")
|
| 160 |
+
print(f"Items in this batch: {len(data_slice)}")
|
| 161 |
+
|
| 162 |
+
# -----------------------------
|
| 163 |
+
# Output Files
|
| 164 |
+
# -----------------------------
|
| 165 |
+
OUTPUT_FILE = os.path.join(SAVE_FOLDER, f"evaluated_metrics_{start}_{end}_mistral31_24B_v2.json")
|
| 166 |
+
ERROR_LOG_FILE = os.path.join(SAVE_FOLDER, f"error_log_{start}_{end}_mistral31_24B_v2.json")
|
| 167 |
+
|
| 168 |
+
# -----------------------------
|
| 169 |
+
# Resume Logic
|
| 170 |
+
# -----------------------------
|
| 171 |
+
processed_results = []
|
| 172 |
+
if os.path.exists(OUTPUT_FILE):
|
| 173 |
+
print(f"Found existing output file: {OUTPUT_FILE}. Resuming...")
|
| 174 |
+
try:
|
| 175 |
+
with open(OUTPUT_FILE, "r") as f:
|
| 176 |
+
processed_results = json.load(f)
|
| 177 |
+
except:
|
| 178 |
+
processed_results = []
|
| 179 |
+
|
| 180 |
+
processed_ids = {item['id'] for item in processed_results}
|
| 181 |
+
to_process = [item for item in data_slice if item['id'] not in processed_ids]
|
| 182 |
+
|
| 183 |
+
print(f"Already processed in this file: {len(processed_ids)}")
|
| 184 |
+
print(f"Remaining to process: {len(to_process)}")
|
| 185 |
+
|
| 186 |
+
# -----------------------------
|
| 187 |
+
# Initialize Error Tracker
|
| 188 |
+
# -----------------------------
|
| 189 |
+
global_error_log = []
|
| 190 |
+
|
| 191 |
+
# -----------------------------
|
| 192 |
+
# Processing Loop
|
| 193 |
+
# -----------------------------
|
| 194 |
+
# Added tqdm postfix to show error count in real-time
|
| 195 |
+
pbar = tqdm.tqdm(to_process)
|
| 196 |
+
|
| 197 |
+
for item in pbar:
|
| 198 |
+
current_id = item.get('id', 'unknown')
|
| 199 |
+
|
| 200 |
+
# 1. Prepare Texts
|
| 201 |
+
easy_text = item.get("easy_text", "")
|
| 202 |
+
inter_text = item.get("intermediate_text", "")
|
| 203 |
+
hard_text = item.get("hard_text", "")
|
| 204 |
+
fulltext = item.get("fulltext", "")
|
| 205 |
+
summary = item.get("summary", "")
|
| 206 |
+
|
| 207 |
+
# 2. Prepare Subclaim Lists
|
| 208 |
+
def ensure_list(x): return x if isinstance(x, list) else []
|
| 209 |
+
|
| 210 |
+
easy_subs = ensure_list(item.get("easy_subclaims", []))
|
| 211 |
+
inter_subs = ensure_list(item.get("intermediate_subclaims", []))
|
| 212 |
+
hard_subs = ensure_list(item.get("hard_subclaims", []))
|
| 213 |
+
full_subs = ensure_list(item.get("fulltext_subclaims", []))
|
| 214 |
+
summary_subs = ensure_list(item.get("summary_subclaims", []))
|
| 215 |
+
|
| 216 |
+
# ---------------------------------------------------------
|
| 217 |
+
# METRICS CALCULATION (Now passing id and error_log)
|
| 218 |
+
# ---------------------------------------------------------
|
| 219 |
+
|
| 220 |
+
# Attribution: Generated Subclaims -> Full Text
|
| 221 |
+
attr_easy = calculate_metric(easy_subs, fulltext, "attribution", current_id, global_error_log)
|
| 222 |
+
attr_inter = calculate_metric(inter_subs, fulltext, "attribution", current_id, global_error_log)
|
| 223 |
+
attr_hard = calculate_metric(hard_subs, fulltext, "attribution", current_id, global_error_log)
|
| 224 |
+
|
| 225 |
+
# Conciseness: Generated Subclaims -> Summary Text
|
| 226 |
+
conc_easy = calculate_metric(easy_subs, summary, "conciseness", current_id, global_error_log)
|
| 227 |
+
conc_inter = calculate_metric(inter_subs, summary, "conciseness", current_id, global_error_log)
|
| 228 |
+
conc_hard = calculate_metric(hard_subs, summary, "conciseness", current_id, global_error_log)
|
| 229 |
+
|
| 230 |
+
# Completeness: summary Subclaims -> Generated Text
|
| 231 |
+
comp_easy = calculate_metric(summary_subs, easy_text, "completeness", current_id, global_error_log)
|
| 232 |
+
comp_inter = calculate_metric(summary_subs, inter_text, "completeness", current_id, global_error_log)
|
| 233 |
+
comp_hard = calculate_metric(summary_subs, hard_text, "completeness", current_id, global_error_log)
|
| 234 |
+
|
| 235 |
+
# Construct Output
|
| 236 |
+
result_item = item.copy()
|
| 237 |
+
result_item["metrics"] = {
|
| 238 |
+
"easy": {
|
| 239 |
+
"attribution": attr_easy,
|
| 240 |
+
"conciseness": conc_easy,
|
| 241 |
+
"completeness": comp_easy
|
| 242 |
+
},
|
| 243 |
+
"intermediate": {
|
| 244 |
+
"attribution": attr_inter,
|
| 245 |
+
"conciseness": conc_inter,
|
| 246 |
+
"completeness": comp_inter
|
| 247 |
+
},
|
| 248 |
+
"hard": {
|
| 249 |
+
"attribution": attr_hard,
|
| 250 |
+
"conciseness": conc_hard,
|
| 251 |
+
"completeness": comp_hard
|
| 252 |
+
}
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
processed_results.append(result_item)
|
| 256 |
+
|
| 257 |
+
# Update progress bar with error count
|
| 258 |
+
if len(global_error_log) > 0:
|
| 259 |
+
pbar.set_postfix({"Errors": len(global_error_log)})
|
| 260 |
+
|
| 261 |
+
# Save frequently
|
| 262 |
+
if len(processed_results) % 10 == 0:
|
| 263 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 264 |
+
json.dump(processed_results, f, indent=4, ensure_ascii=False)
|
| 265 |
+
|
| 266 |
+
# Final Save
|
| 267 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 268 |
+
json.dump(processed_results, f, indent=4, ensure_ascii=False)
|
| 269 |
+
|
| 270 |
+
print(f"Evaluation for range {start}:{end} complete. Saved to: {OUTPUT_FILE}")
|
| 271 |
+
|
| 272 |
+
# -----------------------------
|
| 273 |
+
# Error Reporting
|
| 274 |
+
# -----------------------------
|
| 275 |
+
if global_error_log:
|
| 276 |
+
print(f"\n⚠️ WARNING: {len(global_error_log)} API errors occurred during processing.")
|
| 277 |
+
with open(ERROR_LOG_FILE, "w") as f:
|
| 278 |
+
json.dump(global_error_log, f, indent=4)
|
| 279 |
+
print(f"Error details saved to: {ERROR_LOG_FILE}")
|
| 280 |
+
else:
|
| 281 |
+
print("\n✅ Success: No API errors detected.")
|
code/finetune-inference/subclaim_support_extraction/readctrl_model.code-workspace
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"folders": [
|
| 3 |
+
{
|
| 4 |
+
"path": "../../../../readctrl_model"
|
| 5 |
+
},
|
| 6 |
+
{
|
| 7 |
+
"path": "../../.."
|
| 8 |
+
}
|
| 9 |
+
],
|
| 10 |
+
"settings": {
|
| 11 |
+
"folder-color.pathColors": [
|
| 12 |
+
{
|
| 13 |
+
"folderPath": "/home/mshahidul/readctrl/data/thresold_finding/",
|
| 14 |
+
"badge": "🥶"
|
| 15 |
+
}
|
| 16 |
+
]
|
| 17 |
+
}
|
| 18 |
+
}
|
code/finetune-inference/subclaim_support_extraction/subclaim_support_cal_tesing.py
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import tqdm
|
| 4 |
+
import argparse
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
|
| 7 |
+
# -----------------------------
|
| 8 |
+
# CONFIGURATION
|
| 9 |
+
# -----------------------------
|
| 10 |
+
MODEL_PATH = "/home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-support-check-8b_ctx_v2-bf16"
|
| 11 |
+
model_name="qwen3-32B"
|
| 12 |
+
API_URL = "http://172.16.34.29:8004/v1"
|
| 13 |
+
API_KEY = "EMPTY"
|
| 14 |
+
print(f"Using model: {MODEL_PATH}")
|
| 15 |
+
print(f"Model name: {model_name}")
|
| 16 |
+
client = OpenAI(base_url=API_URL, api_key=API_KEY)
|
| 17 |
+
|
| 18 |
+
# -----------------------------
|
| 19 |
+
# VERIFICATION PROMPT
|
| 20 |
+
# -----------------------------
|
| 21 |
+
def inference_prompt(text, subclaim):
|
| 22 |
+
return f"""You are a clinical evidence auditor. Your evaluation must be based STRICTLY and ONLY on the provided medical text.
|
| 23 |
+
|
| 24 |
+
### MANDATORY GROUNDING RULES:
|
| 25 |
+
1. NO OUTSIDE KNOWLEDGE: Do not use your internal medical knowledge. Even if a subclaim is "common sense" in medicine, if it is not explicitly in the TEXT, it is 'not_supported'.
|
| 26 |
+
2. NO LOGICAL LEAPS: Do not bridge gaps in logic. (e.g., If the text mentions "high blood sugar" but not the word "diabetes", you cannot support a claim of "diabetes").
|
| 27 |
+
3. EXACT NUMERICAL MATCHING: Any doses (e.g., 500mg), frequencies (e.g., twice daily), or durations (e.g., 10 days) mentioned in the subclaim must match the text perfectly. If they are missing or different in the text, label as 'not_supported'.
|
| 28 |
+
4. DEFAULT TO NOT SUPPORTED: If the text is vague, ambiguous, or only suggests a possibility, you MUST choose 'not_supported'.
|
| 29 |
+
5. CLOSED-WORLD REALITY: Treat the TEXT as the only information that exists in the world.
|
| 30 |
+
|
| 31 |
+
### Medical Text:
|
| 32 |
+
{text}
|
| 33 |
+
|
| 34 |
+
### Subclaim:
|
| 35 |
+
{subclaim}
|
| 36 |
+
|
| 37 |
+
Output exactly one word ('supported' or 'not_supported') based on the strict rules above:"""
|
| 38 |
+
|
| 39 |
+
# -----------------------------
|
| 40 |
+
# VERIFICATION LOGIC
|
| 41 |
+
# -----------------------------
|
| 42 |
+
def check_support(text: str, subclaim: str, error_log=None) -> str:
|
| 43 |
+
"""
|
| 44 |
+
Returns: 'supported', 'refuted', or 'not_supported'
|
| 45 |
+
Tracks errors in error_log if provided.
|
| 46 |
+
"""
|
| 47 |
+
if not text or not subclaim:
|
| 48 |
+
return "not_supported"
|
| 49 |
+
|
| 50 |
+
prompt = inference_prompt(text, subclaim)
|
| 51 |
+
|
| 52 |
+
try:
|
| 53 |
+
response = client.chat.completions.create(
|
| 54 |
+
model=MODEL_PATH,
|
| 55 |
+
messages=[{"role": "user", "content": prompt}],
|
| 56 |
+
max_tokens=100,
|
| 57 |
+
temperature=0.1,
|
| 58 |
+
)
|
| 59 |
+
res = response.choices[0].message.content
|
| 60 |
+
if "</think>" in res:
|
| 61 |
+
res = res.split("</think>")[1].strip().lower()
|
| 62 |
+
else:
|
| 63 |
+
res = response.choices[0].message.content.strip().lower()
|
| 64 |
+
|
| 65 |
+
if "not_supported" in res:
|
| 66 |
+
return "not_supported"
|
| 67 |
+
elif "supported" in res:
|
| 68 |
+
return "supported"
|
| 69 |
+
elif "refuted" in res:
|
| 70 |
+
return "refuted"
|
| 71 |
+
else:
|
| 72 |
+
return "not_supported"
|
| 73 |
+
|
| 74 |
+
except Exception as e:
|
| 75 |
+
# --- ERROR TRACKING ---
|
| 76 |
+
if error_log is not None:
|
| 77 |
+
error_details = {
|
| 78 |
+
"subclaim": subclaim,
|
| 79 |
+
"error_msg": str(e),
|
| 80 |
+
"type": "API_ERROR"
|
| 81 |
+
}
|
| 82 |
+
error_log.append(error_details)
|
| 83 |
+
# ----------------------
|
| 84 |
+
|
| 85 |
+
# Optional: Print to console so you see it happening live
|
| 86 |
+
# print(f"\n[!] Error on ID {item_id}: {e}")
|
| 87 |
+
return "not_supported"
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
# -----------------------------
|
| 92 |
+
# MAIN
|
| 93 |
+
# -----------------------------
|
| 94 |
+
if __name__ == "__main__":
|
| 95 |
+
parser = argparse.ArgumentParser()
|
| 96 |
+
parser.add_argument("--input_file", type=str,
|
| 97 |
+
default="/home/mshahidul/readctrl/data/finetuning_data/test_subclaim_support_v2.json",
|
| 98 |
+
help="Path to input JSON with subclaims")
|
| 99 |
+
|
| 100 |
+
parser.add_argument("--save_folder", type=str,
|
| 101 |
+
default="/home/mshahidul/readctrl/data/concise_complete_attr_testing",
|
| 102 |
+
help="Folder to save results")
|
| 103 |
+
|
| 104 |
+
# Range arguments
|
| 105 |
+
parser.add_argument("--start_index", type=int, default=0, help="Start index")
|
| 106 |
+
parser.add_argument("--end_index", type=int, default=-1, help="End index (exclusive). -1 for all.")
|
| 107 |
+
|
| 108 |
+
args = parser.parse_args()
|
| 109 |
+
|
| 110 |
+
INPUT_FILE = args.input_file
|
| 111 |
+
SAVE_FOLDER = args.save_folder
|
| 112 |
+
os.makedirs(SAVE_FOLDER, exist_ok=True)
|
| 113 |
+
|
| 114 |
+
# -----------------------------
|
| 115 |
+
# Load Data
|
| 116 |
+
# -----------------------------
|
| 117 |
+
print(f"Loading data from {INPUT_FILE}...")
|
| 118 |
+
with open(INPUT_FILE, "r") as f:
|
| 119 |
+
all_data = json.load(f)
|
| 120 |
+
|
| 121 |
+
# -----------------------------
|
| 122 |
+
# Slice Data based on Range
|
| 123 |
+
# -----------------------------
|
| 124 |
+
total_len = len(all_data)
|
| 125 |
+
start = args.start_index
|
| 126 |
+
end = args.end_index if args.end_index != -1 else total_len
|
| 127 |
+
|
| 128 |
+
if end > total_len:
|
| 129 |
+
end = total_len
|
| 130 |
+
|
| 131 |
+
data_slice = all_data[start:end]
|
| 132 |
+
|
| 133 |
+
print(f"Total dataset size: {total_len}")
|
| 134 |
+
print(f"Processing range: {start} to {end}")
|
| 135 |
+
print(f"Items in this batch: {len(data_slice)}")
|
| 136 |
+
|
| 137 |
+
# -----------------------------
|
| 138 |
+
# Output Files
|
| 139 |
+
# -----------------------------
|
| 140 |
+
OUTPUT_FILE = os.path.join(SAVE_FOLDER, f"evaluated_metrics_{start}_{end}_{model_name}_v2.json")
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
# -----------------------------
|
| 144 |
+
# Resume Logic
|
| 145 |
+
# -----------------------------
|
| 146 |
+
processed_results = []
|
| 147 |
+
if os.path.exists(OUTPUT_FILE):
|
| 148 |
+
print(f"Found existing output file: {OUTPUT_FILE}. Resuming...")
|
| 149 |
+
try:
|
| 150 |
+
with open(OUTPUT_FILE, "r") as f:
|
| 151 |
+
processed_results = json.load(f)
|
| 152 |
+
except:
|
| 153 |
+
processed_results = []
|
| 154 |
+
|
| 155 |
+
processed_ids = {item['medical_text'] for item in processed_results}
|
| 156 |
+
to_process = [item for item in data_slice if item['medical_text'] not in processed_ids]
|
| 157 |
+
|
| 158 |
+
print(f"Already processed in this file: {len(processed_ids)}")
|
| 159 |
+
print(f"Remaining to process: {len(to_process)}")
|
| 160 |
+
|
| 161 |
+
# -----------------------------
|
| 162 |
+
# Initialize Error Tracker
|
| 163 |
+
# -----------------------------
|
| 164 |
+
global_error_log = []
|
| 165 |
+
|
| 166 |
+
# -----------------------------
|
| 167 |
+
# Processing Loop
|
| 168 |
+
# -----------------------------
|
| 169 |
+
# Added tqdm postfix to show error count in real-time
|
| 170 |
+
pbar = tqdm.tqdm(to_process)
|
| 171 |
+
|
| 172 |
+
for item in pbar:
|
| 173 |
+
text=item.get('medical_text', '')
|
| 174 |
+
subclaim=item.get('subclaim', [])
|
| 175 |
+
label_gt=item.get('label', 'not_supported')
|
| 176 |
+
correctness=False
|
| 177 |
+
label_gen=check_support(text, subclaim, error_log=global_error_log)
|
| 178 |
+
if "not_supported" in label_gen and "not_supported" in label_gt:
|
| 179 |
+
correctness=True
|
| 180 |
+
elif "supported" in label_gen and "supported" in label_gt:
|
| 181 |
+
correctness=True
|
| 182 |
+
else:
|
| 183 |
+
print(f"Mismatch:\nGT: {label_gt}\nGEN: {label_gen}\nSubclaim: {subclaim}\nText: {text}\n---")
|
| 184 |
+
result_entry={
|
| 185 |
+
"medical_text": text,
|
| 186 |
+
"subclaim": subclaim,
|
| 187 |
+
"label_gt": label_gt,
|
| 188 |
+
"label_gen": label_gen,
|
| 189 |
+
"correctness": correctness
|
| 190 |
+
}
|
| 191 |
+
processed_results.append(result_entry)
|
| 192 |
+
if len(processed_results) % 10 == 0:
|
| 193 |
+
# Save intermediate results
|
| 194 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 195 |
+
json.dump(processed_results, f, indent=2, ensure_ascii=False)
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 199 |
+
json.dump(processed_results, f, indent=2, ensure_ascii=False)
|
code/finetune-inference/subclaim_support_extraction/subclaim_support_cal_tesing_v2.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import tqdm
|
| 4 |
+
import argparse
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
|
| 7 |
+
# -----------------------------
|
| 8 |
+
# CONFIGURATION
|
| 9 |
+
# -----------------------------
|
| 10 |
+
# Updated to reflect your specific project paths
|
| 11 |
+
MODEL_PATH = "/home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-support-check-8b_ctx_v2-bf16"
|
| 12 |
+
model_name = "qwen3-32B"
|
| 13 |
+
API_URL = "http://172.16.34.29:8004/v1"
|
| 14 |
+
API_KEY = "EMPTY"
|
| 15 |
+
|
| 16 |
+
client = OpenAI(base_url=API_URL, api_key=API_KEY)
|
| 17 |
+
|
| 18 |
+
# -----------------------------
|
| 19 |
+
# VERIFICATION PROMPT
|
| 20 |
+
# -----------------------------
|
| 21 |
+
def inference_prompt(text, subclaim):
|
| 22 |
+
return f"""You are a clinical evidence auditor. Your evaluation must be based STRICTLY and ONLY on the provided medical text.
|
| 23 |
+
|
| 24 |
+
### MANDATORY GROUNDING RULES:
|
| 25 |
+
1. NO OUTSIDE KNOWLEDGE: Do not use your internal medical knowledge.
|
| 26 |
+
2. NO LOGICAL LEAPS: Do not bridge gaps in logic.
|
| 27 |
+
3. EXACT NUMERICAL MATCHING: Any doses, frequencies, or durations must match the text perfectly.
|
| 28 |
+
4. DEFAULT TO NOT SUPPORTED: If the text is vague or ambiguous, you MUST choose 'not_supported'.
|
| 29 |
+
5. CLOSED-WORLD REALITY: Treat the TEXT as the only information that exists in the world.
|
| 30 |
+
|
| 31 |
+
### Medical Text:
|
| 32 |
+
{text}
|
| 33 |
+
|
| 34 |
+
### Subclaim:
|
| 35 |
+
{subclaim}
|
| 36 |
+
|
| 37 |
+
Output exactly one word ('supported' or 'not_supported') based on the strict rules above:"""
|
| 38 |
+
|
| 39 |
+
# -----------------------------
|
| 40 |
+
# VERIFICATION LOGIC
|
| 41 |
+
# -----------------------------
|
| 42 |
+
def check_support(text: str, subclaim: str) -> str:
|
| 43 |
+
if not text or not subclaim:
|
| 44 |
+
return "not_supported"
|
| 45 |
+
|
| 46 |
+
prompt = inference_prompt(text, subclaim)
|
| 47 |
+
|
| 48 |
+
try:
|
| 49 |
+
response = client.chat.completions.create(
|
| 50 |
+
model=MODEL_PATH,
|
| 51 |
+
messages=[{"role": "user", "content": prompt}],
|
| 52 |
+
max_tokens=10, # Shortened as we only need one word
|
| 53 |
+
temperature=0.1,
|
| 54 |
+
)
|
| 55 |
+
res = response.choices[0].message.content.strip().lower()
|
| 56 |
+
|
| 57 |
+
# Handle reasoning models that might include <think> tags
|
| 58 |
+
if "</think>" in res:
|
| 59 |
+
res = res.split("</think>")[-1].strip()
|
| 60 |
+
|
| 61 |
+
if "not_supported" in res:
|
| 62 |
+
return "not_supported"
|
| 63 |
+
elif "supported" in res:
|
| 64 |
+
return "supported"
|
| 65 |
+
return "not_supported"
|
| 66 |
+
|
| 67 |
+
except Exception as e:
|
| 68 |
+
return "error_api"
|
| 69 |
+
|
| 70 |
+
# -----------------------------
|
| 71 |
+
# MAIN
|
| 72 |
+
# -----------------------------
|
| 73 |
+
if __name__ == "__main__":
|
| 74 |
+
parser = argparse.ArgumentParser()
|
| 75 |
+
parser.add_argument("--input_file", type=str,
|
| 76 |
+
default="/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_classified_multiclinsum_test_en_en.json")
|
| 77 |
+
parser.add_argument("--save_folder", type=str,
|
| 78 |
+
default="/home/mshahidul/readctrl/data/factual_testing")
|
| 79 |
+
parser.add_argument("--start_index", type=int, default=0)
|
| 80 |
+
parser.add_argument("--end_index", type=int, default=-1)
|
| 81 |
+
|
| 82 |
+
args = parser.parse_args()
|
| 83 |
+
os.makedirs(args.save_folder, exist_ok=True)
|
| 84 |
+
|
| 85 |
+
print(f"Loading data from {args.input_file}...")
|
| 86 |
+
with open(args.input_file, "r") as f:
|
| 87 |
+
all_data = json.load(f)
|
| 88 |
+
|
| 89 |
+
# Slice Data
|
| 90 |
+
total_len = len(all_data)
|
| 91 |
+
start = args.start_index
|
| 92 |
+
end = args.end_index if args.end_index != -1 else total_len
|
| 93 |
+
data_slice = all_data[start:end]
|
| 94 |
+
|
| 95 |
+
OUTPUT_FILE = os.path.join(args.save_folder, f"evaluated_support_{start}_{end}_{model_name}.json")
|
| 96 |
+
|
| 97 |
+
processed_results = []
|
| 98 |
+
# Simple resume logic by checking length
|
| 99 |
+
if os.path.exists(OUTPUT_FILE):
|
| 100 |
+
with open(OUTPUT_FILE, "r") as f:
|
| 101 |
+
processed_results = json.load(f)
|
| 102 |
+
print(f"Resuming from index {len(processed_results)}")
|
| 103 |
+
data_slice = data_slice[len(processed_results):]
|
| 104 |
+
|
| 105 |
+
for item in tqdm.tqdm(data_slice):
|
| 106 |
+
doc_id = item.get('id', 'unknown')
|
| 107 |
+
full_text = item.get('fulltext', '')
|
| 108 |
+
# We usually want to verify if the summary's claims are supported by the full text
|
| 109 |
+
summary_subclaims = item.get('summary_subclaims', [])
|
| 110 |
+
|
| 111 |
+
results_for_this_doc = []
|
| 112 |
+
|
| 113 |
+
# summary_subclaims is likely a list of strings
|
| 114 |
+
for sc in summary_subclaims:
|
| 115 |
+
label_gen = check_support(full_text, sc)
|
| 116 |
+
results_for_this_doc.append({
|
| 117 |
+
"subclaim": sc,
|
| 118 |
+
"support_label": label_gen
|
| 119 |
+
})
|
| 120 |
+
|
| 121 |
+
output_entry = {
|
| 122 |
+
"id": doc_id,
|
| 123 |
+
"fulltext": full_text,
|
| 124 |
+
"summary": item.get('summary', ''),
|
| 125 |
+
"subclaim_evaluations": results_for_this_doc
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
processed_results.append(output_entry)
|
| 129 |
+
|
| 130 |
+
# Periodic save
|
| 131 |
+
if len(processed_results) % 10 == 0:
|
| 132 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 133 |
+
json.dump(processed_results, f, indent=2, ensure_ascii=False)
|
| 134 |
+
|
| 135 |
+
# Final save
|
| 136 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 137 |
+
json.dump(processed_results, f, indent=2, ensure_ascii=False)
|
| 138 |
+
print(f"Processing complete. Saved to {OUTPUT_FILE}")
|
code/finetune-inference/subclaim_support_extraction/subclaim_support_cal_tesing_v3.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import tqdm
|
| 4 |
+
import argparse
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
|
| 7 |
+
# -----------------------------
|
| 8 |
+
# CONFIGURATION
|
| 9 |
+
# -----------------------------
|
| 10 |
+
MODEL_PATH = "/home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-support-check-8b_ctx_v2-bf16"
|
| 11 |
+
model_name = "qwen3-32B"
|
| 12 |
+
API_URL = "http://172.16.34.29:8004/v1"
|
| 13 |
+
API_KEY = "EMPTY"
|
| 14 |
+
|
| 15 |
+
client = OpenAI(base_url=API_URL, api_key=API_KEY)
|
| 16 |
+
|
| 17 |
+
# -----------------------------
|
| 18 |
+
# PROMPTS
|
| 19 |
+
# -----------------------------
|
| 20 |
+
|
| 21 |
+
def get_attribution_prompt(source_text, subclaim):
|
| 22 |
+
"""Checks if summary subclaim is grounded in source."""
|
| 23 |
+
return f"""You are a clinical evidence auditor.
|
| 24 |
+
### Medical Text (Source):
|
| 25 |
+
{source_text}
|
| 26 |
+
### Subclaim (from Summary):
|
| 27 |
+
{subclaim}
|
| 28 |
+
Output exactly one word ('supported' or 'not_supported') if the Source text contains the info in the Subclaim:"""
|
| 29 |
+
|
| 30 |
+
def get_completeness_prompt(summary_text, source_subclaim):
|
| 31 |
+
"""Checks if a key source fact is present in the summary."""
|
| 32 |
+
return f"""You are checking for information loss in a medical summary.
|
| 33 |
+
### Summary Text:
|
| 34 |
+
{summary_text}
|
| 35 |
+
### Key Fact (from Source):
|
| 36 |
+
{source_subclaim}
|
| 37 |
+
Output exactly one word ('supported' or 'not_supported') if the Summary successfully includes the info from the Key Fact:"""
|
| 38 |
+
|
| 39 |
+
# -----------------------------
|
| 40 |
+
# LOGIC
|
| 41 |
+
# -----------------------------
|
| 42 |
+
|
| 43 |
+
def check_support(context: str, subclaim: str, mode="attribution") -> str:
|
| 44 |
+
if not context or not subclaim:
|
| 45 |
+
return "not_supported"
|
| 46 |
+
|
| 47 |
+
if mode == "attribution":
|
| 48 |
+
prompt = get_attribution_prompt(context, subclaim)
|
| 49 |
+
else: # completeness
|
| 50 |
+
prompt = get_completeness_prompt(context, subclaim)
|
| 51 |
+
|
| 52 |
+
try:
|
| 53 |
+
response = client.chat.completions.create(
|
| 54 |
+
model=MODEL_PATH,
|
| 55 |
+
messages=[{"role": "user", "content": prompt}],
|
| 56 |
+
max_tokens=10,
|
| 57 |
+
temperature=0.1,
|
| 58 |
+
)
|
| 59 |
+
res = response.choices[0].message.content.strip().lower()
|
| 60 |
+
|
| 61 |
+
if "</think>" in res:
|
| 62 |
+
res = res.split("</think>")[-1].strip()
|
| 63 |
+
|
| 64 |
+
return "supported" if "supported" in res and "not_supported" not in res else "not_supported"
|
| 65 |
+
except Exception:
|
| 66 |
+
return "error_api"
|
| 67 |
+
|
| 68 |
+
# -----------------------------
|
| 69 |
+
# MAIN
|
| 70 |
+
# -----------------------------
|
| 71 |
+
if __name__ == "__main__":
|
| 72 |
+
parser = argparse.ArgumentParser()
|
| 73 |
+
parser.add_argument("--input_file", type=str,
|
| 74 |
+
default="/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_classified_multiclinsum_test_en_en.json")
|
| 75 |
+
parser.add_argument("--save_folder", type=str,
|
| 76 |
+
default="/home/mshahidul/readctrl/data/factual_testing")
|
| 77 |
+
parser.add_argument("--start_index", type=int, default=0)
|
| 78 |
+
parser.add_argument("--end_index", type=int, default=-1)
|
| 79 |
+
|
| 80 |
+
args = parser.parse_args()
|
| 81 |
+
os.makedirs(args.save_folder, exist_ok=True)
|
| 82 |
+
|
| 83 |
+
with open(args.input_file, "r") as f:
|
| 84 |
+
all_data = json.load(f)
|
| 85 |
+
|
| 86 |
+
start, end = args.start_index, (args.end_index if args.end_index != -1 else len(all_data))
|
| 87 |
+
data_slice = all_data[start:end]
|
| 88 |
+
OUTPUT_FILE = os.path.join(args.save_folder, f"full_evaluation_{start}_{end}_{model_name}.json")
|
| 89 |
+
|
| 90 |
+
processed_results = []
|
| 91 |
+
|
| 92 |
+
for item in tqdm.tqdm(data_slice):
|
| 93 |
+
full_text = item.get('fulltext', '')
|
| 94 |
+
summary = item.get('summary', '')
|
| 95 |
+
|
| 96 |
+
# 1. Factual Attribution (Summary -> Source)
|
| 97 |
+
summary_subclaims = item.get('summary_subclaims', [])
|
| 98 |
+
attribution_results = []
|
| 99 |
+
for sc in summary_subclaims:
|
| 100 |
+
label = check_support(full_text, sc, mode="attribution")
|
| 101 |
+
attribution_results.append({"subclaim": sc, "label": label})
|
| 102 |
+
|
| 103 |
+
# 2. Completeness Check (Source -> Summary)
|
| 104 |
+
# Assuming you have already extracted subclaims from the fulltext in your JSON
|
| 105 |
+
source_subclaims = item.get('fulltext_subclaims', [])
|
| 106 |
+
completeness_results = []
|
| 107 |
+
for sc in source_subclaims:
|
| 108 |
+
label = check_support(summary, sc, mode="completeness")
|
| 109 |
+
completeness_results.append({"source_fact": sc, "present_in_summary": label})
|
| 110 |
+
|
| 111 |
+
# Calculate scores
|
| 112 |
+
attr_score = sum(1 for x in attribution_results if x['label'] == 'supported') / len(attribution_results) if attribution_results else 0
|
| 113 |
+
comp_score = sum(1 for x in completeness_results if x['present_in_summary'] == 'supported') / len(completeness_results) if completeness_results else 0
|
| 114 |
+
|
| 115 |
+
processed_results.append({
|
| 116 |
+
"id": item.get('id', 'unknown'),
|
| 117 |
+
"scores": {
|
| 118 |
+
"factual_attribution": attr_score,
|
| 119 |
+
"completeness": comp_score
|
| 120 |
+
},
|
| 121 |
+
"attribution_details": attribution_results,
|
| 122 |
+
"completeness_details": completeness_results
|
| 123 |
+
})
|
| 124 |
+
|
| 125 |
+
if len(processed_results) % 5 == 0:
|
| 126 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 127 |
+
json.dump(processed_results, f, indent=2)
|
| 128 |
+
|
| 129 |
+
with open(OUTPUT_FILE, "w") as f:
|
| 130 |
+
json.dump(processed_results, f, indent=2)
|
| 131 |
+
print(f"Done. Saved to {OUTPUT_FILE}")
|