| import json |
|
|
| def read_original_file(file_path): |
| original_data = {} |
| custom_id_order = [] |
| with open(file_path, 'r', encoding='utf-8') as f: |
| for line in f: |
| entry = json.loads(line.strip()) |
| custom_id = entry.get('custom_id') |
| if not custom_id: |
| continue |
| |
| |
| body = entry.get('body', {}) |
| messages = body.get('messages', []) |
| user_content = None |
| for msg in messages: |
| if msg.get('role') == 'user': |
| user_content = msg.get('content') |
| break |
| |
| if user_content is not None: |
| original_data[custom_id] = user_content |
| custom_id_order.append(custom_id) |
| |
| return custom_id_order, original_data |
|
|
| def read_output_file(file_path): |
| output_data = {} |
| with open(file_path, 'r', encoding='utf-8') as f: |
| for line in f: |
| entry = json.loads(line.strip()) |
| custom_id = entry.get('custom_id') |
| if not custom_id: |
| continue |
| |
| |
| response = entry.get('response', {}) |
| body = response.get('body', {}) |
| choices = body.get('choices', []) |
| model_output = '' |
| if choices: |
| message = choices[0].get('message', {}) |
| model_output = message.get('content', '') |
| |
| output_data[custom_id] = model_output |
| |
| return output_data |
|
|
| def main(): |
| |
| original_path = input("请输入原始请求文件路径:").strip() |
| output_path = input("请输入大模型输出文件路径:").strip() |
| save_path = input("请输入结果保存路径:").strip() |
| |
| |
| custom_id_order, original_data = read_original_file(original_path) |
| output_data = read_output_file(output_path) |
| |
| |
| with open(save_path, 'w', encoding='utf-8') as f: |
| for i, cid in enumerate(custom_id_order): |
| original_question = original_data.get(cid, '') |
| model_output = output_data.get(cid, '') |
| |
| f.write(f"custom_id: {cid}\n") |
| f.write(f"原问题: {original_question}\n") |
| f.write(f"大模型输出: {model_output}\n") |
| |
| |
| if i != len(custom_id_order) - 1: |
| f.write('\n') |
|
|
| if __name__ == "__main__": |
| main() |