{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "ce8de210", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'id': '997bb945-628d-4724-b370-b84de974a19f',\n", " 'image': 'part-000001/997bb945-628d-4724-b370-b84de974a19f.jpg',\n", " 'conversations': [{'from': 'human',\n", " 'value': '\\nWrite a prompt for Stable Diffusion to generate this image.'},\n", " {'from': 'gpt',\n", " 'value': 'a beautiful painting of chernobyl by nekro, pascal blanche, john harris, greg rutkowski, sin jong hun, moebius, simon stalenhag. in style of cg art. ray tracing. cel shading. hyper detailed. realistic. ue 5. maya. octane render. '}]},\n", " Ellipsis]" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# This is the data sample for LLava\n", "\n", "[\n", " {\n", " \"id\": \"997bb945-628d-4724-b370-b84de974a19f\", \n", "# id would be Manual_id+instruction_id\n", " \"image\": \"part-000001/997bb945-628d-4724-b370-b84de974a19f.jpg\",\n", "# this_manual['VLM']['img_path']\n", " \"conversations\": [\n", " {\n", " \"from\": \"human\",\n", " \"value\": \"\\nWrite a prompt for Stable Diffusion to generate this image.\"\n", " \n", "# 1. [detection]Give me the position of object \n", "# 2. [Step] Did I do it correct for the step the instruction for this step\n", "# 3. [grounding] Tell me what to do for the current situation. \n", " },\n", " {\n", " \"from\": \"gpt\",\n", " \"value\": \"a beautiful painting of chernobyl by nekro, pascal blanche, john harris, greg rutkowski, sin jong hun, moebius, simon stalenhag. in style of cg art. ray tracing. cel shading. hyper detailed. realistic. ue 5. maya. octane render. \"\n", " \n", "# 1. [detection] object is at [position]\n", "# 2. [Step] This is the wrong. \n", "# 3. [grounding] the instruction for this step/next step \n", " \n", " \n", " \n", " \n", " },\n", " ]\n", " },\n", " ...\n", "]" ] }, { "cell_type": "code", "execution_count": null, "id": "a0412692", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "600a9392", "metadata": {}, "source": [ "# Grounding" ] }, { "cell_type": "code", "execution_count": null, "id": "88de07fb", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 2, "id": "34a782c8", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 32, "id": "0bd3884c", "metadata": {}, "outputs": [], "source": [ "def create_grounding_data(manual_id,this_line,temp_json):\n", " if this_line['VLM']['step_class']=='step':\n", " data_entry = {\n", " \"id\": f\"{manual_id}-{this_line['instruction_id']}\",\n", " \"image\": this_line['VLM']['img_path'],\n", " \"conversations\": [\n", " {\n", " \"from\": \"human\",\n", " \"value\": \"[grounding] Tell me what to do for the current situation. \"\n", " },\n", " {\n", " \"from\": \"gpt\",\n", " \"value\": \" \".join(this_line['text'])}]\n", " }\n", " temp_json.append(data_entry)\n", "# print(temp_json[0])\n", " return temp_json\n", " \n", " else:\n", " return temp_json" ] }, { "cell_type": "code", "execution_count": 181, "id": "a994ddc0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "7998" ] }, "execution_count": 181, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import json\n", "import os\n", "\n", "# 定义两个文件夹的路径\n", "json_folder_path = 'json_data/'\n", "temp_json=[]\n", "# 遍历文件夹A中的所有JSON文件\n", "for filename in os.listdir(json_folder_path):\n", " if filename.endswith('.json'):\n", " \n", " # 构建文件的完整路径\n", " file_path = os.path.join(json_folder_path, filename)\n", "# file_b_path = os.path.join(folder_b_path, filename[:-5]+'_color.json'\n", "# temp_json=[]\n", " # 确保文件夹B中存在对应的文件\n", " if os.path.exists(file_path):\n", "# print(file_path)\n", " \n", "# # 读取文件夹A中的文件内容\n", " with open(file_path, 'r') as file:\n", " this_json= json.load(file)\n", " manual_id=this_json['manual_id']\n", " data = this_json['instructions']\n", " for this_line in data:\n", " create_grouding_data(manual_id,this_line,temp_json)\n", " \n", "# print(temp_json[10])\n", "# with open('grouding_LLava.json', 'w') as json_file:\n", "# json.dump(temp_json, json_file, indent=4)\n", " \n", " \n", " \n", "\n", "\n", "\n", "len(temp_json)" ] }, { "cell_type": "code", "execution_count": null, "id": "469efa60", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "f4d1f47a", "metadata": {}, "source": [ "# State Detection" ] }, { "cell_type": "code", "execution_count": null, "id": "d28a2550", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "32251433", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 182, "id": "adf8089e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "lego-71757-lloyds-ninja-mech-readscr\n", "lego-60263-ocean-mini-submarine-readscr\n", "lego-60326-picnic-in-the-park-readscr\n", "lego-60288-race-buggy-transporter-readscr\n", "lego-60249-street-sweeper-readscr\n", "lego-60255-city-stunt-team-screen-reader\n", "lego-60292-town-center-readscr\n", "lego-60280-fire-ladder-truck-readscr\n", "lego-42102-b-mini-claas-xerion-harvester-readscr\n", "lego-71706-coles-speeder-car-readscr\n", "lego-75264-kylo-rens-shuttle-microfighter-readscr\n", "lego-31130-a-sunken-treasure-mission-readscr\n", "lego-41719-mobile-fashion-boutique-readscr\n", "lego-60272-elite-police-boat-transport-readscr\n", "lego-10777-mickey-mouse-and-minnie-mouses-camping-trip-readscr\n", "lego-60253-ice-cream-truck-readscr\n", "lego-11013-creative-transparent-bricks-readscr\n", "lego-31130-c-sunken-treasure-mission-readscr\n", "lego-71700-jungle-raider-screen-reader\n", "lego-76400-hogwarts-carriage-and-thestrals-readscr\n", "lego-60285-sports-car-readscr\n", "lego-60289-airshow-jet-transporter-readscr\n", "lego-60274-elite-police-lighthouse-capture-mobile\n", "lego-43209-elsa-and-nokks-ice-stable-readscr\n", "lego-41251-poppys-pod-readscr\n", "lego-42102-a-mini-claas-xerion-tractor-readscr\n", "lego-41421-baby-elephant-jungle-rescue-readscr\n", "lego-60242-police-highway-arrest-readscr\n", "lego-60283-holiday-camper-van-readscr\n", "lego-43204-anna-and-olafs-castle-fun-readscr\n", "lego-75333-obi-wan-kenobis-jedi-starfighter-readscr\n", "lego-41253-tbd-cactus-screen-reader\n", "lego-76146-tbd-lsh-spider-mesh-screen-reader\n", "lego-43178-cinderellas-castle-celebration-readscr\n", "lego-41705-heartlake-city-pizzeria-readscr\n", "lego-60342-the-shark-attack-stunt-challenge-readscr\n", "lego-41439-cat-grooming-car-readscr\n", "lego-41389-ice-cream-cart-readscr\n", "lego-71360-adventures-with-mario-starter-course-readscr\n", "lego-60252-construction-bulldozer-readscr\n", "lego-31130-b-sunken-treasure-mission-readscr\n", "lego-60279-fire-hazard-truck-readscr\n", "lego-60273-elite-police-driller-chase-readscr\n", "lego-71701-kais-fire-dragon-readscr\n", "lego-41397-juice-truck-readscr\n", "lego-60241-police-dog-unit-screen-reader\n", "lego-76946-blue-beta-velociraptor-capture-readscr\n", "lego-60286-beach-rescue-atv-readscr\n", "lego-41365-emmas-art-studio-screen-reader\n", "lego-75265-t-16-skyhopper-readscr\n", "lego-10781-miles-morales-spider-mans-techno-trike-readscr\n", "lego-43186-bruni-the-salamander-buildable-character-readscr\n", "lego-60207-sky-police-drone-chase-readscr\n", "lego-60251-monster-truck-readscr\n", "lego-70821-build-and-fix-workshop-readscr\n", "lego-76149-the-menace-of-mysterio-readscr\n", "lego-60284-roadwork-truck-readscr\n", "lego-60276-police-prisoner-transport-readscr\n", "lego-76147-vultures-trucker-robbery-readscr\n", "lego-71398-dorries-beachfront-expansion-set-readscr\n", "lego-60322-race-car-readscr\n", "lego-41715-ice-cream-truck-readscr\n", "lego-41699-pet-adoption-cafe-readscr\n", "lego-11001-classic-screen-reader\n", "lego-71768-jays-golden-dragon-motorbike-readscr\n" ] }, { "data": { "text/plain": [ "7998" ] }, "execution_count": 182, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "import json\n", "\n", "# 定义文件夹路径\n", "json_data_folder = 'json_data'\n", "parts_lego_folder = 'parts/lego'\n", "temp_json=[]\n", "# 获取所有图片文件夹的名称\n", "img_folders = os.listdir(parts_lego_folder)\n", "# print(img_folders)\n", "# 遍历json_data文件夹中的所有JSON文件\n", "for json_file in os.listdir(json_data_folder):\n", " json_path = os.path.join(json_data_folder, json_file)\n", " \n", " # 确保是JSON文件\n", " if os.path.isfile(json_path) and json_file.endswith('.json'):\n", " with open(json_path, 'r') as file:\n", " data = json.load(file)\n", " \n", " # 检查每一条指令\n", " for instruction in data['instructions']:\n", " img_path = instruction['VLM']['img_path']\n", " img_name = os.path.basename(img_path)\n", "# print(img_path)\n", "# print(img_name)\n", " \n", " # 检查对应的图片文件夹是否存在且包含相应的图片文件\n", " folder_name = json_file.rsplit('.', 1)[0][:-10] # 假设文件夹名和JSON文件名匹配\n", "# print(folder_name)\n", " if folder_name in img_folders:\n", "# full_img_folder_path = os.path.join(parts_lego_folder, folder_name,\"images\")\n", "# print(os.listdir(full_img_folder_path)[0][:-11],img_name[:-4])\n", " \n", " full_img_folder_path = os.path.join(parts_lego_folder, folder_name,\"images\")\n", " # 检查是否存在去除 _border 后缀的文件\n", " if any(img_name == file or img_name in file.replace('_border', '') for file in os.listdir(full_img_folder_path)):\n", " # 如果找到匹配的图片,更改 task_label\n", " instruction['VLM']['task_label'] = 'state'\n", "# print( instruction['VLM']['step_class'])\n", " # 如果找到匹配的图片,更改task_label\n", "# print(\"??\")\n", " instruction['VLM']['task_label'] = 'state'\n", " print(data['manual_id'])\n", " # 保存修改后的JSON数据\n", "# with open(json_path, 'w') as file:\n", "# json.dump(data, file, indent=4)\n", "\n", "len(temp_json)" ] }, { "cell_type": "code", "execution_count": null, "id": "18c23e10", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "3fb51297", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "72ddcc17", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "5aee4300", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "380a302f", "metadata": {}, "outputs": [], "source": [ "# Change Json file again \"task_label\"" ] }, { "cell_type": "code", "execution_count": null, "id": "e7808694", "metadata": {}, "outputs": [], "source": [ "\n", "# 定义两个文件夹的路径\n", "json_folder_path = 'json_data/'\n", "\n", "# 遍历文件夹A中的所有JSON文件\n", "for filename in os.listdir(json_folder_path):\n", " if filename.endswith('.json'):\n", " \n", " # 构建文件的完整路径\n", " file_path = os.path.join(json_folder_path, filename)\n", " temp_json=[]\n", " # 确保文件夹B中存在对应的文件\n", " if os.path.exists(file_path):\n", " \n", "# # 读取文件夹A中的文件内容\n", " with open(file_path, 'r') as file:\n", " this_json= json.load(file)\n", " manual_id=this_json['manual_id']\n", " data = this_json['instructions']\n", " for this_line in data:\n", " create_state_data(manual_id,this_line,temp_json)" ] }, { "cell_type": "code", "execution_count": null, "id": "30104b5c", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "0822704d", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 69, "id": "0a913a14", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'ss'" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "k=\"sss.png\"\n", "k[:-5]" ] }, { "cell_type": "code", "execution_count": 72, "id": "15d72256", "metadata": {}, "outputs": [], "source": [ "def create_state_data(manual_id,this_line,temp_json):\n", " if this_line['VLM']['task_label']=='state':\n", " for i in range(3):\n", " data_entry = {\n", " \"id\": f\"{manual_id}-{this_line['instruction_id']}\",\n", " \"image\": this_line['VLM']['img_path'][:-4]+\"_fake\"+str(i)+\".png\",\n", " \"conversations\": [\n", " {\n", " \"from\": \"human\",\n", " \"value\": \"[State] Did I do it correctly for \" + \" \".join(this_line['text'])\n", " },\n", " {\n", " \"from\": \"gpt\",\n", " \"value\": \"No.\"}]\n", " }\n", " temp_json.append(data_entry)\n", " \n", " data_entry = {\n", " \"id\": f\"{manual_id}-{this_line['instruction_id']}\",\n", " \"image\": this_line['VLM']['img_path'],\n", " \"conversations\": [\n", " {\n", " \"from\": \"human\",\n", " \"value\": \"[State] Did I do it correctly for \" + \" \".join(this_line['text'])\n", " },\n", " {\n", " \"from\": \"gpt\",\n", " \"value\": \"Yes.\"}]\n", " }\n", " \n", " \n", " temp_json.append(data_entry)\n", "# print(temp_json[0])\n", " return temp_json\n", " \n", " else:\n", " return temp_json" ] }, { "cell_type": "code", "execution_count": 183, "id": "f6ed669b", "metadata": {}, "outputs": [], "source": [ "import json\n", "import os\n", "\n", "# 定义两个文件夹的路径\n", "json_folder_path = 'json_data/'\n", "temp_json=[]\n", "# 遍历文件夹A中的所有JSON文件\n", "for filename in os.listdir(json_folder_path):\n", " if filename.endswith('.json'):\n", " \n", " # 构建文件的完整路径\n", " file_path = os.path.join(json_folder_path, filename)\n", " \n", " # 确保文件夹B中存在对应的文件\n", " if os.path.exists(file_path):\n", " \n", "# # 读取文件夹A中的文件内容\n", " with open(file_path, 'r') as file:\n", " this_json= json.load(file)\n", " manual_id=this_json['manual_id']\n", " data = this_json['instructions']\n", " for this_line in data:\n", "# print(\"?\")\n", " create_state_data(manual_id,this_line,temp_json)\n", " \n", "# print(temp_json)\n", "# with open('state_LLava.json', 'w') as json_file:\n", "# json.dump(temp_json, json_file, indent=4)\n", " " ] }, { "cell_type": "code", "execution_count": null, "id": "80e780dc", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 184, "id": "8eff9ac5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "12688" ] }, "execution_count": 184, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(temp_json)" ] }, { "cell_type": "markdown", "id": "a03c6c0e", "metadata": {}, "source": [ "# Object detection" ] }, { "cell_type": "code", "execution_count": 177, "id": "38bc7b3a", "metadata": {}, "outputs": [], "source": [ "# def process_numbers(this_answer,position):\n", "import re\n", "\n", "# 示例文本\n", "# text = \"

The following image

{<1><0><98><99>}

1 dark green plate

{<16><35><54><62>}

2x4

{<16><35><54><62>}

1 reddish brown plate

{<60><35><84><62>}

2x2

{<60><35><84><62>}\"\n", "text1 =\"

1 dark orange chicken drumstick

{<40><32><59><61>}

the drumstick

{<40><32><59><61>}

a little pin

{<49><72><58><78>}

the back

{<49><72><58><78>}\"\n", " \n", "# 正则表达式匹配

内容和后续的坐标\n", "def clean_miniv2(text):\n", " pattern = re.compile(r'

(.*?)

\\{(<\\d+><\\d+><\\d+><\\d+>)\\}')\n", "\n", " # 找到所有匹配项\n", " matches = pattern.findall(text)\n", "# print(\"matches\",matches)\n", " # 存储已经处理过的坐标,用于去重\n", " processed_coords = set()\n", "\n", " # 结果字符串\n", " result = \"\"\n", "\n", " for content, coords in matches:\n", "# print(coords)\n", " # 如果内容包含'image'或者匹配 '数字x数字',跳过\n", " if \"image\" in content or \"piece\" in content or re.match(r'\\d+x\\d+', content):\n", "# print(content,'image')\n", " continue\n", " # 去重\n", " if coords in processed_coords:\n", " continue\n", " processed_coords.add(coords)\n", " # 添加到结果字符串\n", " result += f\"

{content}

{{{coords}}}\"\n", " \n", " return result\n", "\n", "# print(result)\n", "\n", " \n", "text = \"

1 dark orange chicken drumstick

{<40><32><59><61>}\"\n", " " ] }, { "cell_type": "code", "execution_count": 178, "id": "8d83aef9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'

1 dark orange chicken drumstick

{<40><32><59><61>}'" ] }, "execution_count": 178, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clean_miniv2(text)" ] }, { "cell_type": "code", "execution_count": 162, "id": "523fb068", "metadata": {}, "outputs": [], "source": [ "# clean_miniv2(text)" ] }, { "cell_type": "code", "execution_count": 179, "id": "a6c03362", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "

1 dark orange chicken drumstick

{<70><66><79><80>}\n" ] } ], "source": [ "import re\n", "import math\n", "\n", "def modify_text_with_coords(text,case):\n", " # 正确匹配 {} 结构\n", " pattern = re.compile(r'\\{<(\\d+)><(\\d+)><(\\d+)><(\\d+)>}')\n", "\n", " def modify_coords1(match):\n", " # 从匹配对象中提取坐标值\n", " xleft, ytop, xright, ybottom = map(int, match.groups())\n", " # 按要求修改坐标值\n", " xleft_modified = math.floor(xleft / 2)\n", " ytop_modified = math.floor(ytop / 2)\n", " xright_modified = math.floor(xright / 2 )\n", " ybottom_modified = math.floor(ybottom / 2)\n", " # 构建新的坐标字符串\n", " new_coords = f\"<{xleft_modified}><{ytop_modified}><{xright_modified}><{ybottom_modified}>\"\n", " return f'{{{new_coords}}}'\n", " \n", " \n", " def modify_coords2(match):\n", " # 从匹配对象中提取坐标值\n", " xleft, ytop, xright, ybottom = map(int, match.groups())\n", " # 按要求修改坐标值\n", " xleft_modified = math.floor(xleft / 2 + 50)\n", " ytop_modified = math.floor(ytop / 2)\n", " xright_modified = math.floor(xright / 2 + 50)\n", " ybottom_modified = math.floor(ybottom / 2)\n", " # 构建新的坐标字符串\n", " new_coords = f\"<{xleft_modified}><{ytop_modified}><{xright_modified}><{ybottom_modified}>\"\n", " return f'{{{new_coords}}}'\n", " \n", " def modify_coords3(match):\n", " # 从匹配对象中提取坐标值\n", " xleft, ytop, xright, ybottom = map(int, match.groups())\n", " # 按要求修改坐标值\n", " xleft_modified = math.floor(xleft / 2 )\n", " ytop_modified = math.floor(ytop / 2 + 50)\n", " xright_modified = math.floor(xright / 2 )\n", " ybottom_modified = math.floor(ybottom / 2+ 50)\n", " # 构建新的坐标字符串\n", " new_coords = f\"<{xleft_modified}><{ytop_modified}><{xright_modified}><{ybottom_modified}>\"\n", " return f'{{{new_coords}}}'\n", " \n", " def modify_coords4(match):\n", " # 从匹配对象中提取坐标值\n", " xleft, ytop, xright, ybottom = map(int, match.groups())\n", " # 按要求修改坐标值\n", " xleft_modified = math.floor(xleft / 2 + 50)\n", " ytop_modified = math.floor(ytop / 2+ 50)\n", " xright_modified = math.floor(xright / 2 + 50)\n", " ybottom_modified = math.floor(ybottom / 2+ 50)\n", " # 构建新的坐标字符串\n", " new_coords = f\"<{xleft_modified}><{ytop_modified}><{xright_modified}><{ybottom_modified}>\"\n", " return f'{{{new_coords}}}'\n", " \n", " \n", " \n", "\n", " # 使用正则表达式的 sub 方法和 modify_coords 函数修改文本中的坐标\n", " modify_funcs = {\n", " 1: modify_coords1,\n", " 2: modify_coords2,\n", " 3: modify_coords3,\n", " 4: modify_coords4\n", " }\n", " \n", " # 根据 case 选择相应的函数\n", " modify_func = modify_funcs.get(case, lambda x: x) # 默认不修改\n", " \n", " # 使用选中的函数修改文本中的坐标\n", " modified_text = pattern.sub(modify_func, text)\n", " return modified_text\n", "case=4\n", "# 原始文本示例\n", "# text = \"

1 dark orange chicken drumstick

{<24><18><65><63>}\"\n", "# 调用函数处理文本\n", "modified_text = modify_text_with_coords(clean_miniv2(text),case)\n", "print(modified_text)\n" ] }, { "cell_type": "code", "execution_count": 172, "id": "1ef00696", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "''" ] }, "execution_count": 172, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clean_miniv2(text)" ] }, { "cell_type": "code", "execution_count": 167, "id": "8856e6eb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 167, "metadata": {}, "output_type": "execute_result" } ], "source": [ "text==\"{<24><18><65><63>}\"" ] }, { "cell_type": "code", "execution_count": 165, "id": "c1d3af18", "metadata": {}, "outputs": [], "source": [ "\n", "\n", "\n", "def create_object_data(manual_id,this_line,temp_json):\n", " if this_line['VLM']['task_label']=='[detection-collect]'or this_line['VLM']['task_label']=='[detection]':\n", " if modify_text_with_coords(clean_miniv2(this_line['VLM']['MiniGPTv2_output']),1)!='':\n", " \n", " for i in range(1,5):\n", " data_entry = {\n", " \"id\": f\"{manual_id}-{this_line['instruction_id']}\",\n", " \"image\": this_line['VLM']['img_path'][:-4]+\"_combined_image\"+str(i)+\".png\",\n", " \"conversations\": [\n", " {\n", " \"from\": \"human\",\n", " \"value\": \"[detection]\" + \" \".join(this_line['text'])\n", " },\n", " {\n", " \"from\": \"gpt\",\n", " \"value\": modify_text_with_coords(clean_miniv2(this_line['VLM']['MiniGPTv2_output']),i)}]\n", " }\n", " temp_json.append(data_entry)\n", " temp_json.append(data_entry)\n", "# print(temp_json[0])\n", " return temp_json\n", " else:\n", " return temp_json\n", " \n", " \n", " \n", " \n", " else:\n", " return temp_json" ] }, { "cell_type": "code", "execution_count": 166, "id": "b5218b4b", "metadata": {}, "outputs": [], "source": [ "import json\n", "import os\n", "\n", "# 定义两个文件夹的路径\n", "json_folder_path = 'json_data/'\n", "temp_json=[]\n", "# 遍历文件夹A中的所有JSON文件\n", "for filename in os.listdir(json_folder_path):\n", " if filename.endswith('.json'):\n", " \n", " # 构建文件的完整路径\n", " file_path = os.path.join(json_folder_path, filename)\n", " \n", " # 确保文件夹B中存在对应的文件\n", " if os.path.exists(file_path):\n", " \n", "# # 读取文件夹A中的文件内容\n", " with open(file_path, 'r') as file:\n", " this_json= json.load(file)\n", " manual_id=this_json['manual_id']\n", " data = this_json['instructions']\n", " for this_line in data:\n", "# print(\"?\")\n", " create_object_data(manual_id,this_line,temp_json)\n", " \n", "# print(temp_json)\n", "with open('object_LLava.json', 'w') as json_file:\n", " json.dump(temp_json, json_file, indent=4)\n", " " ] }, { "cell_type": "code", "execution_count": null, "id": "ee210901", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }