| import os |
| import json |
|
|
| os.environ["CUDA_VISIBLE_DEVICES"] = "1" |
|
|
|
|
| def read_json(file_path): |
| with open(file_path, 'r', encoding='utf-8') as file: |
| data = json.load(file) |
| return data |
|
|
| def write_json(file_path, data): |
| with open(file_path, 'w', encoding='utf-8') as file: |
| json.dump(data, file, ensure_ascii=False, indent=4) |
|
|
|
|
| import os |
| from openai import OpenAI |
| import pprint |
| import json |
| from llamaapi import LlamaAPI |
|
|
| |
| llama = LlamaAPI("LL-SmrO4FiBWvkfaGskA4fe6qLSVa7Ob5B83jOojHNq8HkrukjRRG4Xt3CF1mLV9u6o") |
| os.environ["OPENAI_API_KEY"] = "sk-proj-Jmlrkk0HauWRhffybWOKT3BlbkFJIIuX6dFVCyVG7y6lGwsh" |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
|
|
|
|
| from chat import MiniCPMVChat, img2base64 |
| import torch |
| import json |
| from PIL import Image |
|
|
|
|
| torch.manual_seed(0) |
| chat_model = MiniCPMVChat('/code/ICLR_2024/Model/MiniCPM-Llama3-V-2_5') |
|
|
|
|
| image_path = '/code/ICLR_2024/SeeClick/output_image_27.png' |
| |
| |
|
|
| qs = """ |
| List all the application name and location in the image that can be interacted with, the result shoudl be like a list |
| """ |
|
|
| im_64 = img2base64(image_path) |
| msgs = [{"role": "user", "content": qs}] |
| inputs = {"image": im_64, "question": json.dumps(msgs)} |
| answer = chat_model.chat(inputs) |
|
|
| data = read_json("/code/ICLR_2024/Auto-GUI/dataset/blip/single_blip_train_llava_10000_caption_elements_llama3_70b.json") |
|
|
|
|
| retrival_dict = {} |
| for index, i in enumerate(data): |
| retrival_dict[i['image']] = index |
|
|
| path = '/code/ICLR_2024/Auto-GUI/dataset/' |
| image_id = [ x['image'].split('/')[2].split('.')[0] for x in data] |
| |
| all_pair_id = {} |
| all_pair_key = [] |
| for i in image_id: |
| key = i.split('_')[0] |
| all_pair_id[key] = [] |
| all_pair_key.append(key) |
|
|
| for i in image_id: |
| key = i.split('_')[0] |
| value = i.split('_')[1] |
| all_pair_id[key].append(value) |
|
|
| all_pair_key = list(set(all_pair_key)) |
| path2 = 'blip/single_texts_splits/' |
|
|
|
|
| from tqdm import tqdm |
| for i in tqdm(all_pair_key[770:]): |
|
|
| num_list = all_pair_id[i] |
| for j in num_list: |
|
|
| retival_path = path2 + i + '_' + j + '.png' |
| new_path = path + path2 + i + '_' + j + '.png' |
| ids = retrival_dict[retival_path] |
|
|
| image_path = path + data[ids]['image'] |
| caption = data[ids]['caption'] |
| Previous = data[ids]['conversations'][0]['value'] |
|
|
| Previous = Previous.lower() |
| task = Previous.split('goal')[1] |
| |
| Demo_prompt_step1 = """ |
| List all the application name and location in the image that can be interacted with, the result shoudl be like a list |
| """ |
|
|
| im_64 = img2base64(image_path) |
| msgs = [{"role": "user", "content": Demo_prompt_step1}] |
| inputs = {"image": im_64, "question": json.dumps(msgs)} |
| answer = chat_model.chat(inputs) |
|
|
| data[ids]['icon_list_raw'] = answer |
| pprint.pprint(answer) |
|
|
| prompt = """ ##### refine it to a list, list name must be elements , just like: |
| elements = [ |
| "Newegg", |
| "Newegg CEO", |
| "Newegg customer service", |
| "Newegg founder", |
| "Newegg promo code", |
| "Newegg return policy", |
| "Newegg revenue", |
| "Newegg military discounts"] |
| |
| Answer the python list only! |
| ##### """ |
| |
| import time |
| time.sleep(2) |
|
|
| api_request_json = { |
| "model": "llama3-70b", |
| "messages": [ |
| {"role": "system", "content": "You are a assistant that will handle the corresponding text formatting for me."}, |
| {"role": "user", "content": answer + prompt}, |
| |
| ], |
| "max_tokens": 1024 |
| |
| } |
|
|
| try: |
| |
| response = llama.run(api_request_json) |
| new_answer = response.json()['choices'][0]['message']['content'] |
| print('======================================================') |
| pprint.pprint(new_answer) |
| print('======================================================') |
| except Exception as e: |
| print(f"Error in LLAMA API Generation : {e}") |
| import time |
| time.sleep(30) |
| continue |
| |
| try: |
| exec(new_answer) |
| data[ids]['icon_list'] = elements |
| except Exception as e: |
| print(f"Error in setting data[ids]['icon_list']: {e}") |
| continue |
|
|
| |
|
|
| write_json('/code/ICLR_2024/Auto-GUI/dataset/blip/single_blip_train_llava_10000_caption_elements_llama3_70b.json',data) |
|
|
| |
|
|