diff --git "a/phi35_all_coco_lre4_e3_0926.jsonl" "b/phi35_all_coco_lre4_e3_0926.jsonl" --- "a/phi35_all_coco_lre4_e3_0926.jsonl" +++ "b/phi35_all_coco_lre4_e3_0926.jsonl" @@ -663,3 +663,338 @@ {"system": null, "query": "\nPrevious Actions: Goal: What is the capital of China?; The action is #DUAL_POINT#; touch_point: 0.8706, 0.5317, lift_point: 0.8706, 0.5317; The action is #TYPE#; And the content is : \"typed_text\": \"capital of China\"; The action is #DUAL_POINT#; touch_point: 0.1411, 0.397, lift_point: 0.1411, 0.397; The action is #DUAL_POINT#; touch_point: 0.6228, 0.485, lift_point: 0.6228, 0.485; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.7647, 0.6551, lift_point: 0.7647, 0.6551; The action is #DUAL_POINT#; touch_point: 0.4288, 0.9313, lift_point: 0.4288, 0.9313; The action is #DUAL_POINT#; touch_point: 0.4288, 0.9313, lift_point: 0.4288, 0.9313; The action is #DUAL_POINT#; touch_point: 0.4288, 0.9313, lift_point: 0.4288, 0.9313; The action is #DUAL_POINT#; touch_point: 0.4303, 0.9281, lift_point: 0.4303, 0.9281; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.429, 0.922, lift_point: 0.429, 0.922; The action is #DUAL_POINT#; touch_point: 0.4275, 0.9255, lift_point: 0.4275, 0.9255; The action is #DUAL_POINT#; touch_point: 0.4936, 0.9286, lift_point: 0.4936, 0.9286; The action is #DUAL_POINT#; touch_point: 0.4936, 0.9286, lift_point: 0.4936, 0.9286; The action is #DUAL_POINT#; touch_point: 0.4936, 0.9286, lift_point: 0.4936, 0.9286; The action is #DUAL_POINT#; touch_point: 0.4936, 0.9286, lift_point: 0.4936, 0.9286; The action is #DUAL_POINT#; touch_point: 0.4936, 0.9286, lift_point: 0.4936, 0.9286; The action is #DUAL_POINT#; touch_point: 0.4936, 0.9286, lift_point: 0.4936, 0.9286; The action is #DUAL_POINT#; touch_point: 0.4936, 0.9286, lift_point: 0.4936, 0.9286", "response": "The action is #DUAL_POINT#; touch_point: 0.4936, 0.9286, lift_point: 0.4936, 0.9286", "label": "The action is #DUAL_POINT#; touch_point: 0.4936, 0.9286, lift_point: 0.4936, 0.9286", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/42767_52.png"]} {"system": null, "query": "\nPrevious Actions: Goal: What's the latest tech news?; The action is #DUAL_POINT#; touch_point: 0.8006, 0.6613, lift_point: 0.8006, 0.6613; The action is #DUAL_POINT#; touch_point: 0.052, 0.4418, lift_point: 0.052, 0.4418; The action is #TYPE#; And the content is : \"typed_text\": \" latest tech news?\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1191, 0.2189, lift_point: 0.1191, 0.2189", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/52611_4.png"]} {"system": null, "query": "\nPrevious Actions: Goal: Open a new window in the chrome app; The action is #DUAL_POINT#; touch_point: 0.7642, 0.6938, lift_point: 0.7642, 0.6938; The action is #DUAL_POINT#; touch_point: 0.066, 0.9402, lift_point: 0.066, 0.9402", "response": "The action is #DUAL_POINT#; touch_point: 0.1189, 0.6581, lift_point: 0.1189, 0.6581", "label": "The action is #DUAL_POINT#; touch_point: 0.1186, 0.6529, lift_point: 0.1186, 0.6529", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/36686_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Malaysia?; The action is #DUAL_POINT#; touch_point: 0.7875, 0.6569, lift_point: 0.7875, 0.6569; The action is #DUAL_POINT#; touch_point: 0.0662, 0.3991, lift_point: 0.0662, 0.3991", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"news in malaysia\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"malaysia\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/80444_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Go to Reddit; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8903, 0.4114, lift_point: 0.8903, 0.4114; The action is #DUAL_POINT#; touch_point: 0.0852, 0.3729, lift_point: 0.0852, 0.3729; The action is #TYPE#; And the content is : \"typed_text\": \"Reddit\"; The action is #DUAL_POINT#; touch_point: 0.1092, 0.3313, lift_point: 0.1092, 0.3313", "response": "The action is #DUAL_POINT#; touch_point: 0.522, 0.1247, lift_point: 0.522, 0.1247", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/76300_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on the menu at Five Guys?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7974, 0.7043, lift_point: 0.7974, 0.7043; The action is #DUAL_POINT#; touch_point: 0.063, 0.3738, lift_point: 0.063, 0.3738; The action is #TYPE#; And the content is : \"typed_text\": \"menu at Five Guys\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"", "response": "The action is #DUAL_POINT#; touch_point: 0.118, 0.2694, lift_point: 0.118, 0.2694", "label": "The action is #DUAL_POINT#; touch_point: 0.1219, 0.266, lift_point: 0.1219, 0.266", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/26633_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Open the calendar and show me this week's events?; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5943, 0.3915, lift_point: 0.5943, 0.3915; The action is #DUAL_POINT#; touch_point: 0.2742, 0.3428, lift_point: 0.2742, 0.3428; The action is #DUAL_POINT#; touch_point: 0.0705, 0.9113, lift_point: 0.0705, 0.9113; The action is #DUAL_POINT#; touch_point: 0.0633, 0.4191, lift_point: 0.0633, 0.4191", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"Open the calendar and show me this week's events \"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"Open the calendar and show me this weeks events? \"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/11946_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Set an alarm for 3pm; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3974, 0.6744, lift_point: 0.3974, 0.6744; The action is #DUAL_POINT#; touch_point: 0.101, 0.1315, lift_point: 0.101, 0.1315; The action is #DUAL_POINT#; touch_point: 0.8899, 0.5328, lift_point: 0.8899, 0.5328; The action is #DUAL_POINT#; touch_point: 0.5264, 0.7463, lift_point: 0.5264, 0.7463; The action is #DUAL_POINT#; touch_point: 0.4094, 0.4969, lift_point: 0.4094, 0.4969; The action is #DUAL_POINT#; touch_point: 0.3131, 0.7597, lift_point: 0.3131, 0.7597; The action is #DUAL_POINT#; touch_point: 0.5751, 0.7483, lift_point: 0.5751, 0.7483; The action is #DUAL_POINT#; touch_point: 0.6312, 0.5665, lift_point: 0.6312, 0.5665; The action is #DUAL_POINT#; touch_point: 0.4304, 0.6323, lift_point: 0.4304, 0.6323; The action is #DUAL_POINT#; touch_point: 0.4683, 0.7203, lift_point: 0.4683, 0.7203; The action is #DUAL_POINT#; touch_point: 0.5322, 0.7355, lift_point: 0.5322, 0.7355; The action is #DUAL_POINT#; touch_point: 0.5737, 0.7326, lift_point: 0.5737, 0.7326; The action is #DUAL_POINT#; touch_point: 0.6219, 0.6098, lift_point: 0.6219, 0.6098; The action is #DUAL_POINT#; touch_point: 0.6599, 0.5324, lift_point: 0.6599, 0.5324; The action is #DUAL_POINT#; touch_point: 0.6382, 0.3902, lift_point: 0.6382, 0.3902; The action is #DUAL_POINT#; touch_point: 0.5897, 0.2898, lift_point: 0.5897, 0.2898; The action is #DUAL_POINT#; touch_point: 0.524, 0.262, lift_point: 0.524, 0.262; The action is #DUAL_POINT#; touch_point: 0.4545, 0.2854, lift_point: 0.4545, 0.2854; The action is #DUAL_POINT#; touch_point: 0.4112, 0.3784, lift_point: 0.4112, 0.3784; The action is #DUAL_POINT#; touch_point: 0.4019, 0.5089, lift_point: 0.4019, 0.5089; The action is #DUAL_POINT#; touch_point: 0.721, 0.762, lift_point: 0.721, 0.762; The action is #DUAL_POINT#; touch_point: 0.8425, 0.8956, lift_point: 0.8425, 0.8956", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #DUAL_POINT#; touch_point: 0.7761, 0.7667, lift_point: 0.7761, 0.7667", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/58547_25.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Sri Lanka?; The action is #DUAL_POINT#; touch_point: 0.892, 0.5796, lift_point: 0.892, 0.5796; The action is #PRESS_ENTER#; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news in Sri Lanka?\"; The action is #PRESS_ENTER#", "response": "The action is #DUAL_POINT#; touch_point: 0.2712, 0.5767, lift_point: 0.2712, 0.5767", "label": "The action is #DUAL_POINT#; touch_point: 0.7979, 0.5803, lift_point: 0.7979, 0.5803", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/70840_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of Brazil?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8064, 0.5777, lift_point: 0.8064, 0.5777; The action is #DUAL_POINT#; touch_point: 0.8087, 0.6632, lift_point: 0.8087, 0.6632; The action is #DUAL_POINT#; touch_point: 0.0627, 0.9311, lift_point: 0.0627, 0.9311; The action is #Scrolling Down#; The action is #TYPE#; And the content is : \"typed_text\": \"What is the capital of Brazil?\"", "response": "The action is #PRESS_ENTER#", "label": "The action is #PRESS_ENTER#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/14144_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Set an alarm for 7pm; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5938, 0.718, lift_point: 0.5938, 0.718; The action is #DUAL_POINT#; touch_point: 0.1164, 0.1471, lift_point: 0.1164, 0.1471; The action is #DUAL_POINT#; touch_point: 0.8339, 0.5343, lift_point: 0.8339, 0.5343; The action is #Scrolling Up#; The action is #Scrolling Up#", "response": "The action is #Scrolling Up#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/57165_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of France?; The action is #TYPE#; And the content is : \"typed_text\": \"capital of France\"; The action is #DUAL_POINT#; touch_point: 0.1799, 0.3409, lift_point: 0.1799, 0.3409", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/29182_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How much does the HisenseTV cost?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8252, 0.5962, lift_point: 0.8252, 0.5962; The action is #DUAL_POINT#; touch_point: 0.8001, 0.609, lift_point: 0.8001, 0.609; The action is #DUAL_POINT#; touch_point: 0.8001, 0.609, lift_point: 0.8001, 0.609; The action is #TYPE#; And the content is : \"typed_text\": \"How much does the HisenseTV cost?\"; The action is #TYPE#; And the content is : \"typed_text\": \"How much does the HisenseTV cost?\"; The action is #DUAL_POINT#; touch_point: 0.0428, 0.4877, lift_point: 0.0428, 0.4877; The action is #TYPE#; And the content is : \"typed_text\": \"How much does the HisenseTV cost?\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1216, 0.4788, lift_point: 0.1216, 0.4788", "label": "The action is #PRESS_ENTER#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/49441_9.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Turn on notifications for the Google Maps app; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.8662, 0.1249, lift_point: 0.8662, 0.1249; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.3776, 0.5482, lift_point: 0.3776, 0.5482; The action is #DUAL_POINT#; touch_point: 0.6163, 0.5461, lift_point: 0.6163, 0.5461; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6944, 0.4068, lift_point: 0.6944, 0.4068; The action is #DUAL_POINT#; touch_point: 0.6993, 0.5955, lift_point: 0.6993, 0.5955; The action is #DUAL_POINT#; touch_point: 0.4782, 0.8456, lift_point: 0.4782, 0.8456", "response": "The action is #DUAL_POINT#; touch_point: 0.4782, 0.8456, lift_point: 0.4782, 0.8456", "label": "The action is #DUAL_POINT#; touch_point: 0.4782, 0.8456, lift_point: 0.4782, 0.8456", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/28603_21.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How much does the Samsung TV cost?; The action is #DUAL_POINT#; touch_point: 0.7749, 0.715, lift_point: 0.7749, 0.715; The action is #DUAL_POINT#; touch_point: 0.0653, 0.2838, lift_point: 0.0653, 0.2838", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"samsung tv cost\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"How much does the Samsung TV cost?\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/47766_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for a new mascara; The action is #DUAL_POINT#; touch_point: 0.882, 0.6251, lift_point: 0.882, 0.6251; The action is #TYPE#; And the content is : \"typed_text\": \"mascara\"; The action is #DUAL_POINT#; touch_point: 0.2502, 0.316, lift_point: 0.2502, 0.316", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #DUAL_POINT#; touch_point: 0.2995, 0.2829, lift_point: 0.2995, 0.2829", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/51662_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from Zurich to Buenos aires; The action is #DUAL_POINT#; touch_point: 0.7884, 0.7088, lift_point: 0.7884, 0.7088; The action is #PRESS_HOME#; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8173, 0.6623, lift_point: 0.8173, 0.6623; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7794, 0.6821, lift_point: 0.7794, 0.6821; The action is #DUAL_POINT#; touch_point: 0.7794, 0.6933, lift_point: 0.7794, 0.6933; The action is #PRESS_BACK#; The action is #PRESS_BACK#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.8112, 0.675, lift_point: 0.8112, 0.675; The action is #DUAL_POINT#; touch_point: 0.81, 0.6775, lift_point: 0.81, 0.6775; The action is #DUAL_POINT#; touch_point: 0.0635, 0.5123, lift_point: 0.0635, 0.5123; The action is #TYPE#; And the content is : \"typed_text\": \"flight from zurich to buenos aires\"; The action is #DUAL_POINT#; touch_point: 0.2101, 0.4208, lift_point: 0.2101, 0.4208; The action is #DUAL_POINT#; touch_point: 0.8827, 0.205, lift_point: 0.8827, 0.205; The action is #DUAL_POINT#; touch_point: 0.6941, 0.5458, lift_point: 0.6941, 0.5458", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #DUAL_POINT#; touch_point: 0.3321, 0.1299, lift_point: 0.3321, 0.1299", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/68164_18.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from Boston to Zurich; The action is #PRESS_HOME#", "response": "The action is #DUAL_POINT#; touch_point: 0.8316, 0.6021, lift_point: 0.8316, 0.6021", "label": "The action is #DUAL_POINT#; touch_point: 0.8188, 0.69, lift_point: 0.8188, 0.69", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/41586_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for a new lipgloss; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4735, 0.8521, lift_point: 0.4735, 0.8521; The action is #DUAL_POINT#; touch_point: 0.4735, 0.8572, lift_point: 0.4735, 0.8572; The action is #DUAL_POINT#; touch_point: 0.4731, 0.8504, lift_point: 0.4731, 0.8504; The action is #DUAL_POINT#; touch_point: 0.4819, 0.9037, lift_point: 0.4819, 0.9037; The action is #DUAL_POINT#; touch_point: 0.0831, 0.5901, lift_point: 0.0831, 0.5901; The action is #DUAL_POINT#; touch_point: 0.0705, 0.9081, lift_point: 0.0705, 0.9081; The action is #DUAL_POINT#; touch_point: 0.0651, 0.5329, lift_point: 0.0651, 0.5329; The action is #TYPE#; And the content is : \"typed_text\": \"Search for a new lipgloss \"; The action is #DUAL_POINT#; touch_point: 0.1519, 0.4733, lift_point: 0.1519, 0.4733; The action is #DUAL_POINT#; touch_point: 0.3788, 0.3476, lift_point: 0.3788, 0.3476; The action is #DUAL_POINT#; touch_point: 0.9086, 0.8965, lift_point: 0.9086, 0.8965; The action is #DUAL_POINT#; touch_point: 0.7091, 0.4199, lift_point: 0.7091, 0.4199; The action is #DUAL_POINT#; touch_point: 0.7042, 0.4983, lift_point: 0.7042, 0.4983; The action is #DUAL_POINT#; touch_point: 0.7214, 0.5225, lift_point: 0.7214, 0.5225; The action is #DUAL_POINT#; touch_point: 0.7956, 0.4719, lift_point: 0.7956, 0.4719; The action is #DUAL_POINT#; touch_point: 0.1883, 0.8366, lift_point: 0.1883, 0.8366; The action is #DUAL_POINT#; touch_point: 0.7233, 0.3531, lift_point: 0.7233, 0.3531; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/65395_22.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How much does a 2 bedroom apartment rent for in Washington DC?", "response": "The action is #DUAL_POINT#; touch_point: 0.8441, 0.6171, lift_point: 0.8441, 0.6171", "label": "The action is #DUAL_POINT#; touch_point: 0.8421, 0.6318, lift_point: 0.8421, 0.6318", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/25873_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Install the Spotify app", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.7264, 0.1487, lift_point: 0.7264, 0.1487", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/40161_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of France?; The action is #DUAL_POINT#; touch_point: 0.0525, 0.4109, lift_point: 0.0525, 0.4109; The action is #TYPE#; And the content is : \"typed_text\": \"What is the capital of France?\"; The action is #PRESS_ENTER#; The action is #DUAL_POINT#; touch_point: 0.6834, 0.1475, lift_point: 0.6834, 0.1475; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6729, 0.321, lift_point: 0.6729, 0.321; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6606, 0.6509, lift_point: 0.6606, 0.6509; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.1015, 0.551, lift_point: 0.1015, 0.551; The action is #Scrolling Down#; The action is #Scrolling Up#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.9445, 0.8241, lift_point: 0.9445, 0.8241; The action is #DUAL_POINT#; touch_point: 0.2204, 0.6182, lift_point: 0.2204, 0.6182; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/59443_40.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of Brazil?; The action is #DUAL_POINT#; touch_point: 0.8276, 0.5924, lift_point: 0.8276, 0.5924; The action is #DUAL_POINT#; touch_point: 0.1873, 0.3683, lift_point: 0.1873, 0.3683; The action is #DUAL_POINT#; touch_point: 0.121, 0.9421, lift_point: 0.121, 0.9421; The action is #TYPE#; And the content is : \"typed_text\": \" capital of Brazil\"; The action is #DUAL_POINT#; touch_point: 0.1693, 0.3372, lift_point: 0.1693, 0.3372; The action is #DUAL_POINT#; touch_point: 0.5779, 0.1509, lift_point: 0.5779, 0.1509", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/4324_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the price of the 1000-Watt EGO Power+ Snow Blower?; The action is #DUAL_POINT#; touch_point: 0.8145, 0.6085, lift_point: 0.8145, 0.6085; The action is #DUAL_POINT#; touch_point: 0.8145, 0.6085, lift_point: 0.8145, 0.6085; The action is #DUAL_POINT#; touch_point: 0.1785, 0.8915, lift_point: 0.1785, 0.8915; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the price of the 1000-Watt EGO Power+ Snow Blower?\"; The action is #PRESS_ENTER#", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.2495, 0.0452, lift_point: 0.2495, 0.0452", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/49383_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How much does a 3 bedroom apartment rent for in Boston?", "response": "The action is #DUAL_POINT#; touch_point: 0.7855, 0.6894, lift_point: 0.7855, 0.6894", "label": "The action is #DUAL_POINT#; touch_point: 0.7756, 0.6926, lift_point: 0.7756, 0.6926", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/46834_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for a new mascara on Sephora; The action is #DUAL_POINT#; touch_point: 0.7857, 0.6857, lift_point: 0.7857, 0.6857; The action is #DUAL_POINT#; touch_point: 0.9056, 0.5086, lift_point: 0.9056, 0.5086; The action is #DUAL_POINT#; touch_point: 0.8936, 0.8628, lift_point: 0.8936, 0.8628; The action is #DUAL_POINT#; touch_point: 0.2862, 0.4856, lift_point: 0.2862, 0.4856; The action is #DUAL_POINT#; touch_point: 0.2875, 0.4116, lift_point: 0.2875, 0.4116; The action is #TYPE#; And the content is : \"typed_text\": \"new mascara on Sephora\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3686, 0.184, lift_point: 0.3686, 0.184; The action is #DUAL_POINT#; touch_point: 0.6881, 0.58, lift_point: 0.6881, 0.58; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5503, 0.5891, lift_point: 0.5503, 0.5891; The action is #Scrolling Down#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.168, 0.3305, lift_point: 0.168, 0.3305; The action is #DUAL_POINT#; touch_point: 0.1798, 0.3222, lift_point: 0.1798, 0.3222; The action is #TYPE#; And the content is : \"typed_text\": \"new mascara\"", "response": "The action is #PRESS_ENTER#", "label": "The action is #PRESS_ENTER#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/44996_21.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's a good restaurant in San Diego?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8027, 0.672, lift_point: 0.8027, 0.672; The action is #DUAL_POINT#; touch_point: 0.2187, 0.6753, lift_point: 0.2187, 0.6753; The action is #DUAL_POINT#; touch_point: 0.1355, 0.9371, lift_point: 0.1355, 0.9371; The action is #TYPE#; And the content is : \"typed_text\": \"Whats a good restaurant in San Diego?\"; The action is #PRESS_ENTER#", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/4007_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from Sydney to Buenos aires; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3745, 0.4935, lift_point: 0.3745, 0.4935; The action is #DUAL_POINT#; touch_point: 0.0542, 0.4256, lift_point: 0.0542, 0.4256; The action is #TYPE#; And the content is : \"typed_text\": \"flights from Sydney to Buenos aires\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3063, 0.6541, lift_point: 0.3063, 0.6541; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3338, 0.5289, lift_point: 0.3338, 0.5289; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.1764, 0.6049, lift_point: 0.1764, 0.6049; The action is #DUAL_POINT#; touch_point: 0.4134, 0.6631, lift_point: 0.4134, 0.6631; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/20704_28.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Google the capital of Canada; The action is #DUAL_POINT#; touch_point: 0.7958, 0.6923, lift_point: 0.7958, 0.6923; The action is #DUAL_POINT#; touch_point: 0.7958, 0.6923, lift_point: 0.7958, 0.6923; The action is #DUAL_POINT#; touch_point: 0.7958, 0.6923, lift_point: 0.7958, 0.6923; The action is #DUAL_POINT#; touch_point: 0.0531, 0.6038, lift_point: 0.0531, 0.6038; The action is #DUAL_POINT#; touch_point: 0.0579, 0.921, lift_point: 0.0579, 0.921; The action is #TYPE#; And the content is : \"typed_text\": \" capital of Canada\"; The action is #PRESS_ENTER#", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/45292_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Check my email; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5016, 0.487, lift_point: 0.5016, 0.487; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.0826, 0.1155, lift_point: 0.0826, 0.1155; The action is #DUAL_POINT#; touch_point: 0.2721, 0.5962, lift_point: 0.2721, 0.5962; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0795, 0.8737, lift_point: 0.0795, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0795, 0.8737, lift_point: 0.0795, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0795, 0.8737, lift_point: 0.0795, 0.8737; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.0833, 0.8883, lift_point: 0.0833, 0.8883; The action is #DUAL_POINT#; touch_point: 0.0818, 0.8851, lift_point: 0.0818, 0.8851; The action is #DUAL_POINT#; touch_point: 0.0818, 0.8851, lift_point: 0.0818, 0.8851; The action is #DUAL_POINT#; touch_point: 0.0818, 0.8851, lift_point: 0.0818, 0.8851; The action is #DUAL_POINT#; touch_point: 0.0818, 0.8851, lift_point: 0.0818, 0.8851; The action is #DUAL_POINT#; touch_point: 0.07, 0.9181, lift_point: 0.07, 0.9181; The action is #DUAL_POINT#; touch_point: 0.07, 0.9181, lift_point: 0.07, 0.9181; The action is #DUAL_POINT#; touch_point: 0.07, 0.9181, lift_point: 0.07, 0.9181; The action is #DUAL_POINT#; touch_point: 0.07, 0.9181, lift_point: 0.07, 0.9181; The action is #DUAL_POINT#; touch_point: 0.07, 0.9181, lift_point: 0.07, 0.9181; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0766, 0.873, lift_point: 0.0766, 0.873; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/42617_49.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the speed of light?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.6506, 0.6833, lift_point: 0.6506, 0.6833; The action is #DUAL_POINT#; touch_point: 0.653, 0.6833, lift_point: 0.653, 0.6833; The action is #DUAL_POINT#; touch_point: 0.0568, 0.5975, lift_point: 0.0568, 0.5975; The action is #DUAL_POINT#; touch_point: 0.0549, 0.9429, lift_point: 0.0549, 0.9429; The action is #TYPE#; And the content is : \"typed_text\": \"What is the speed of light\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1192, 0.5923, lift_point: 0.1192, 0.5923", "label": "The action is #DUAL_POINT#; touch_point: 0.1167, 0.4031, lift_point: 0.1167, 0.4031", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/29819_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the weather like in Seoul?", "response": "The action is #PRESS_HOME#", "label": "The action is #PRESS_HOME#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/25004_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from NYC to Buenos aires; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8012, 0.6957, lift_point: 0.8012, 0.6957; The action is #DUAL_POINT#; touch_point: 0.7813, 0.6517, lift_point: 0.7813, 0.6517; The action is #DUAL_POINT#; touch_point: 0.7813, 0.6517, lift_point: 0.7813, 0.6517; The action is #DUAL_POINT#; touch_point: 0.0642, 0.6339, lift_point: 0.0642, 0.6339; The action is #DUAL_POINT#; touch_point: 0.9935, 0.466, lift_point: 0.9935, 0.466; The action is #TYPE#; And the content is : \"typed_text\": \"flights from NYC to Buenos aires\"; The action is #DUAL_POINT#; touch_point: 0.4465, 0.9944, lift_point: 0.4465, 0.9944; The action is #DUAL_POINT#; touch_point: 0.0622, 0.3084, lift_point: 0.0622, 0.3084; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.0509, 0.1702, lift_point: 0.0509, 0.1702", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/2733_11.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Install the Spotify app; The action is #DUAL_POINT#; touch_point: 0.7785, 0.6747, lift_point: 0.7785, 0.6747; The action is #DUAL_POINT#; touch_point: 0.9707, 0.5144, lift_point: 0.9707, 0.5144; The action is #DUAL_POINT#; touch_point: 0.7778, 0.4917, lift_point: 0.7778, 0.4917; The action is #DUAL_POINT#; touch_point: 0.5702, 0.2163, lift_point: 0.5702, 0.2163", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.7842, 0.5074, lift_point: 0.7842, 0.5074", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/17765_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How big is a dinosaur?", "response": "The action is #PRESS_HOME#", "label": "The action is #PRESS_HOME#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/72227_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on Reddit this week; The action is #DUAL_POINT#; touch_point: 0.8539, 0.651, lift_point: 0.8539, 0.651; The action is #DUAL_POINT#; touch_point: 0.0449, 0.5533, lift_point: 0.0449, 0.5533; The action is #TYPE#; And the content is : \"typed_text\": \"Whats on Reddit this week\"; The action is #PRESS_ENTER#; The action is #DUAL_POINT#; touch_point: 0.3031, 0.3016, lift_point: 0.3031, 0.3016", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #DUAL_POINT#; touch_point: 0.9173, 0.8478, lift_point: 0.9173, 0.8478", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/25994_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for hotels in Orlando", "response": "The action is #DUAL_POINT#; touch_point: 0.7743, 0.6858, lift_point: 0.7743, 0.6858", "label": "The action is #DUAL_POINT#; touch_point: 0.7698, 0.6702, lift_point: 0.7698, 0.6702", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/54204_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the price of the Galaxy phone on eBay?; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3757, 0.6124, lift_point: 0.3757, 0.6124; The action is #DUAL_POINT#; touch_point: 0.3547, 0.6205, lift_point: 0.3547, 0.6205; The action is #DUAL_POINT#; touch_point: 0.0715, 0.5857, lift_point: 0.0715, 0.5857; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the price of the Galaxy phone on eBay?\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.0426, 0.819, lift_point: 0.0426, 0.819", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/37713_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Bolivia?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7723, 0.6778, lift_point: 0.7723, 0.6778", "response": "The action is #DUAL_POINT#; touch_point: 0.0622, 0.5224, lift_point: 0.0622, 0.5224", "label": "The action is #DUAL_POINT#; touch_point: 0.0473, 0.3652, lift_point: 0.0473, 0.3652", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/11465_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for hotels in Washington DC; The action is #DUAL_POINT#; touch_point: 0.8082, 0.6143, lift_point: 0.8082, 0.6143; The action is #DUAL_POINT#; touch_point: 0.0743, 0.5173, lift_point: 0.0743, 0.5173", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"hotels in Washington DC\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"hotels in Washington DC\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/12684_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the latest news in cosmology?; The action is #DUAL_POINT#; touch_point: 0.8867, 0.5388, lift_point: 0.8867, 0.5388; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the latest news in cosmology?\"; The action is #DUAL_POINT#; touch_point: 0.1717, 0.5435, lift_point: 0.1717, 0.5435; The action is #DUAL_POINT#; touch_point: 0.1137, 0.6987, lift_point: 0.1137, 0.6987; The action is #DUAL_POINT#; touch_point: 0.1137, 0.6987, lift_point: 0.1137, 0.6987; The action is #DUAL_POINT#; touch_point: 0.1159, 0.6132, lift_point: 0.1159, 0.6132", "response": "The action is #PRESS_ENTER#", "label": "The action is #DUAL_POINT#; touch_point: 0.0596, 0.9275, lift_point: 0.0596, 0.9275", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/82854_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Where can I buy a nice beach towel?; The action is #PRESS_HOME#; The action is #TYPE#; And the content is : \"typed_text\": \"Where can I buy a nice beach towel?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #TYPE#; And the content is : \"typed_text\": \"Where can I buy a nice beach towel?\"; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.4075, 0.2072, lift_point: 0.4075, 0.2072; The action is #Scrolling Down#; The action is #Scrolling Up#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.8376, 0.5474, lift_point: 0.8376, 0.5474; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #PRESS_BACK#; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.4472, 0.7619, lift_point: 0.4472, 0.7619; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4026, 0.2483, lift_point: 0.4026, 0.2483; The action is #DUAL_POINT#; touch_point: 0.3146, 0.2455, lift_point: 0.3146, 0.2455; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.2281, 0.1615, lift_point: 0.2281, 0.1615; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.8576, 0.701, lift_point: 0.8576, 0.701; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/79301_55.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from Sydney to Buenos aires; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5378, 0.1475, lift_point: 0.5378, 0.1475; The action is #DUAL_POINT#; touch_point: 0.5619, 0.1167, lift_point: 0.5619, 0.1167; The action is #DUAL_POINT#; touch_point: 0.0562, 0.487, lift_point: 0.0562, 0.487; The action is #DUAL_POINT#; touch_point: 0.0534, 0.9112, lift_point: 0.0534, 0.9112; The action is #DUAL_POINT#; touch_point: 0.0619, 0.4799, lift_point: 0.0619, 0.4799; The action is #TYPE#; And the content is : \"typed_text\": \"flights from Sydney to Buenos aires \"; The action is #DUAL_POINT#; touch_point: 0.1134, 0.4101, lift_point: 0.1134, 0.4101; The action is #DUAL_POINT#; touch_point: 0.5912, 0.3496, lift_point: 0.5912, 0.3496; The action is #DUAL_POINT#; touch_point: 0.9431, 0.52, lift_point: 0.9431, 0.52; The action is #DUAL_POINT#; touch_point: 0.1021, 0.9745, lift_point: 0.1021, 0.9745; The action is #DUAL_POINT#; touch_point: 0.5861, 0.5358, lift_point: 0.5861, 0.5358; The action is #DUAL_POINT#; touch_point: 0.4939, 0.3748, lift_point: 0.4939, 0.3748; The action is #DUAL_POINT#; touch_point: 0.4843, 0.3748, lift_point: 0.4843, 0.3748; The action is #DUAL_POINT#; touch_point: 0.5964, 0.4444, lift_point: 0.5964, 0.4444", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #DUAL_POINT#; touch_point: 0.5567, 0.4671, lift_point: 0.5567, 0.4671", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/19017_18.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What time is it in New York?; The action is #DUAL_POINT#; touch_point: 0.0575, 0.0365, lift_point: 0.0575, 0.0365; The action is #DUAL_POINT#; touch_point: 0.8912, 0.667, lift_point: 0.8912, 0.667; The action is #DUAL_POINT#; touch_point: 0.8921, 0.3941, lift_point: 0.8921, 0.3941; The action is #DUAL_POINT#; touch_point: 0.8787, 0.3913, lift_point: 0.8787, 0.3913; The action is #DUAL_POINT#; touch_point: 0.7438, 0.7177, lift_point: 0.7438, 0.7177; The action is #DUAL_POINT#; touch_point: 0.7785, 0.6992, lift_point: 0.7785, 0.6992; The action is #DUAL_POINT#; touch_point: 0.8149, 0.6966, lift_point: 0.8149, 0.6966; The action is #DUAL_POINT#; touch_point: 0.0722, 0.4065, lift_point: 0.0722, 0.4065; The action is #TYPE#; And the content is : \"typed_text\": \"What time is it in New York?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #DUAL_POINT#; touch_point: 0.1351, 0.6482, lift_point: 0.1351, 0.6482; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.2715, 0.6476, lift_point: 0.2715, 0.6476; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.0528, 0.2859, lift_point: 0.0528, 0.2859; The action is #DUAL_POINT#; touch_point: 0.8191, 0.4412, lift_point: 0.8191, 0.4412; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.4471, 0.1787, lift_point: 0.4471, 0.1787; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.8042, 0.3148, lift_point: 0.8042, 0.3148; The action is #DUAL_POINT#; touch_point: 0.6879, 0.5777, lift_point: 0.6879, 0.5777; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.8361, 0.2862, lift_point: 0.8361, 0.2862; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.8366, 0.182, lift_point: 0.8366, 0.182; The action is #DUAL_POINT#; touch_point: 0.6461, 0.3885, lift_point: 0.6461, 0.3885", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/13941_51.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Set an alarm for 7pm; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5938, 0.718, lift_point: 0.5938, 0.718", "response": "The action is #DUAL_POINT#; touch_point: 0.113, 0.1284, lift_point: 0.113, 0.1284", "label": "The action is #DUAL_POINT#; touch_point: 0.1164, 0.1471, lift_point: 0.1164, 0.1471", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/57165_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of Sweden?; The action is #DUAL_POINT#; touch_point: 0.7349, 0.6118, lift_point: 0.7349, 0.6118; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7081, 0.6262, lift_point: 0.7081, 0.6262; The action is #DUAL_POINT#; touch_point: 0.0856, 0.4623, lift_point: 0.0856, 0.4623", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"capital of Sweden\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"capital of sweden ?\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/22092_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from Zurich to Buenos aires; The action is #DUAL_POINT#; touch_point: 0.7884, 0.7088, lift_point: 0.7884, 0.7088; The action is #PRESS_HOME#; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8173, 0.6623, lift_point: 0.8173, 0.6623; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7794, 0.6821, lift_point: 0.7794, 0.6821; The action is #DUAL_POINT#; touch_point: 0.7794, 0.6933, lift_point: 0.7794, 0.6933; The action is #PRESS_BACK#; The action is #PRESS_BACK#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.8112, 0.675, lift_point: 0.8112, 0.675; The action is #DUAL_POINT#; touch_point: 0.81, 0.6775, lift_point: 0.81, 0.6775; The action is #DUAL_POINT#; touch_point: 0.0635, 0.5123, lift_point: 0.0635, 0.5123; The action is #TYPE#; And the content is : \"typed_text\": \"flight from zurich to buenos aires\"; The action is #DUAL_POINT#; touch_point: 0.2101, 0.4208, lift_point: 0.2101, 0.4208; The action is #DUAL_POINT#; touch_point: 0.8827, 0.205, lift_point: 0.8827, 0.205; The action is #DUAL_POINT#; touch_point: 0.6941, 0.5458, lift_point: 0.6941, 0.5458; The action is #DUAL_POINT#; touch_point: 0.3321, 0.1299, lift_point: 0.3321, 0.1299; The action is #DUAL_POINT#; touch_point: 0.1229, 0.933, lift_point: 0.1229, 0.933; The action is #DUAL_POINT#; touch_point: 0.4991, 0.8948, lift_point: 0.4991, 0.8948; The action is #DUAL_POINT#; touch_point: 0.402, 0.9084, lift_point: 0.402, 0.9084; The action is #DUAL_POINT#; touch_point: 0.8984, 0.5131, lift_point: 0.8984, 0.5131; The action is #DUAL_POINT#; touch_point: 0.6419, 0.9862, lift_point: 0.6419, 0.9862; The action is #DUAL_POINT#; touch_point: 0.5557, 0.4411, lift_point: 0.5557, 0.4411; The action is #DUAL_POINT#; touch_point: 0.4457, 0.1982, lift_point: 0.4457, 0.1982; The action is #DUAL_POINT#; touch_point: 0.3076, 0.6403, lift_point: 0.3076, 0.6403; The action is #DUAL_POINT#; touch_point: 0.1304, 0.3286, lift_point: 0.1304, 0.3286; The action is #DUAL_POINT#; touch_point: 0.9007, 0.396, lift_point: 0.9007, 0.396; The action is #DUAL_POINT#; touch_point: 0.5737, 0.5247, lift_point: 0.5737, 0.5247; The action is #DUAL_POINT#; touch_point: 0.6477, 0.4744, lift_point: 0.6477, 0.4744; The action is #DUAL_POINT#; touch_point: 0.5543, 0.7882, lift_point: 0.5543, 0.7882", "response": "The action is #Scrolling Down#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/68164_32.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the price of the Samsung TV?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8923, 0.5241, lift_point: 0.8923, 0.5241; The action is #DUAL_POINT#; touch_point: 0.8908, 0.4164, lift_point: 0.8908, 0.4164; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the price of the Samsung TV\"; The action is #PRESS_ENTER#; The action is #DUAL_POINT#; touch_point: 0.2978, 0.5272, lift_point: 0.2978, 0.5272", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #DUAL_POINT#; touch_point: 0.5445, 0.8611, lift_point: 0.5445, 0.8611", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/70305_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Puerto Rico?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8872, 0.2415, lift_point: 0.8872, 0.2415; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news in Puerto Rico\"; The action is #PRESS_ENTER#", "response": "The action is #DUAL_POINT#; touch_point: 0.3522, 0.5422, lift_point: 0.3522, 0.5422", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/44772_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the weather like in Seoul?; The action is #DUAL_POINT#; touch_point: 0.7952, 0.6984, lift_point: 0.7952, 0.6984; The action is #DUAL_POINT#; touch_point: 0.8001, 0.6984, lift_point: 0.8001, 0.6984", "response": "The action is #DUAL_POINT#; touch_point: 0.9095, 0.5205, lift_point: 0.9095, 0.5205", "label": "The action is #DUAL_POINT#; touch_point: 0.9174, 0.4332, lift_point: 0.9174, 0.4332", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/29692_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Show me some nice wallpapers for my desktop", "response": "The action is #PRESS_HOME#", "label": "The action is #PRESS_HOME#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/29623_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the price of the Galaxy phone on eBay?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7825, 0.6728, lift_point: 0.7825, 0.6728; The action is #DUAL_POINT#; touch_point: 0.7946, 0.7096, lift_point: 0.7946, 0.7096; The action is #DUAL_POINT#; touch_point: 0.0617, 0.3892, lift_point: 0.0617, 0.3892; The action is #DUAL_POINT#; touch_point: 0.0649, 0.9182, lift_point: 0.0649, 0.9182; The action is #TYPE#; And the content is : \"typed_text\": \" price of the Galaxy phone on eBay\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1427, 0.4513, lift_point: 0.1427, 0.4513", "label": "The action is #DUAL_POINT#; touch_point: 0.1406, 0.4874, lift_point: 0.1406, 0.4874", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/84111_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Pakistan?; The action is #DUAL_POINT#; touch_point: 0.7898, 0.6984, lift_point: 0.7898, 0.6984; The action is #DUAL_POINT#; touch_point: 0.0478, 0.2489, lift_point: 0.0478, 0.2489", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"news in Pakistan\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"Pakistan\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/26653_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for a new hair product; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5316, 0.696, lift_point: 0.5316, 0.696; The action is #DUAL_POINT#; touch_point: 0.5139, 0.6907, lift_point: 0.5139, 0.6907; The action is #DUAL_POINT#; touch_point: 0.1398, 0.5773, lift_point: 0.1398, 0.5773; The action is #DUAL_POINT#; touch_point: 0.0667, 0.8632, lift_point: 0.0667, 0.8632; The action is #DUAL_POINT#; touch_point: 0.0272, 0.5224, lift_point: 0.0272, 0.5224; The action is #DUAL_POINT#; touch_point: 0.0729, 0.6006, lift_point: 0.0729, 0.6006; The action is #TYPE#; And the content is : \"typed_text\": \"Search for a new hair product \"", "response": "The action is #DUAL_POINT#; touch_point: 0.1495, 0.4648, lift_point: 0.1495, 0.4648", "label": "The action is #DUAL_POINT#; touch_point: 0.2757, 0.5502, lift_point: 0.2757, 0.5502", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/64415_10.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How do I get to the nearest Burger King?", "response": "The action is #PRESS_HOME#", "label": "The action is #PRESS_HOME#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/54560_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Guyana?; The action is #DUAL_POINT#; touch_point: 0.8671, 0.5771, lift_point: 0.8671, 0.5771", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news in Guyana?\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news in Guyana?\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/26326_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How much does a 3 bedroom apartment rent for in Denver?; The action is #DUAL_POINT#; touch_point: 0.8869, 0.6155, lift_point: 0.8869, 0.6155; The action is #DUAL_POINT#; touch_point: 0.8879, 0.694, lift_point: 0.8879, 0.694; The action is #TYPE#; And the content is : \"typed_text\": \"How much does a 3 bedroom apartment rent for in Denver?\"; The action is #TYPE#; And the content is : \"typed_text\": \"r in denver\"; The action is #DUAL_POINT#; touch_point: 0.22, 0.2664, lift_point: 0.22, 0.2664", "response": "The action is #DUAL_POINT#; touch_point: 0.3281, 0.4048, lift_point: 0.3281, 0.4048", "label": "The action is #DUAL_POINT#; touch_point: 0.3187, 0.2868, lift_point: 0.3187, 0.2868", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/78434_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the latest news in cosmology?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7751, 0.6872, lift_point: 0.7751, 0.6872; The action is #DUAL_POINT#; touch_point: 0.0418, 0.4193, lift_point: 0.0418, 0.4193; The action is #TYPE#; And the content is : \"typed_text\": \"cosmology news\"; The action is #DUAL_POINT#; touch_point: 0.1122, 0.1705, lift_point: 0.1122, 0.1705", "response": "The action is #DUAL_POINT#; touch_point: 0.3254, 0.2487, lift_point: 0.3254, 0.2487", "label": "The action is #DUAL_POINT#; touch_point: 0.7796, 0.2768, lift_point: 0.7796, 0.2768", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/39868_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news about the US economy?; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.33, 0.5079, lift_point: 0.33, 0.5079; The action is #DUAL_POINT#; touch_point: 0.0703, 0.4272, lift_point: 0.0703, 0.4272; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news about the US economy?\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4574, 0.2937, lift_point: 0.4574, 0.2937; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4371, 0.5148, lift_point: 0.4371, 0.5148", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/35463_12.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Nepal?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.709, 0.6491, lift_point: 0.709, 0.6491; The action is #DUAL_POINT#; touch_point: 0.675, 0.6023, lift_point: 0.675, 0.6023", "response": "The action is #DUAL_POINT#; touch_point: 0.1011, 0.4581, lift_point: 0.1011, 0.4581", "label": "The action is #DUAL_POINT#; touch_point: 0.1174, 0.4815, lift_point: 0.1174, 0.4815", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/27326_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Check my email; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5016, 0.487, lift_point: 0.5016, 0.487; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.0826, 0.1155, lift_point: 0.0826, 0.1155; The action is #DUAL_POINT#; touch_point: 0.2721, 0.5962, lift_point: 0.2721, 0.5962; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/42617_10.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from NYC to Chicago; The action is #DUAL_POINT#; touch_point: 0.8857, 0.4438, lift_point: 0.8857, 0.4438; The action is #DUAL_POINT#; touch_point: 0.067, 0.3127, lift_point: 0.067, 0.3127; The action is #TYPE#; And the content is : \"typed_text\": \"flights from NYC to Chicago\"; The action is #DUAL_POINT#; touch_point: 0.1437, 0.4795, lift_point: 0.1437, 0.4795", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/6471_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the price of the Galaxy phone on eBay?", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/37713_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for hotels in Chicago", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/19765_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Where can I buy a nice beach towel?; The action is #PRESS_HOME#; The action is #TYPE#; And the content is : \"typed_text\": \"Where can I buy a nice beach towel?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"Where can I buy a nice beach towel?\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/79301_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from Barcelona to Mexico city; The action is #Scrolling Down#; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5051, 0.6922, lift_point: 0.5051, 0.6922; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.1412, 0.693, lift_point: 0.1412, 0.693", "response": "The action is #DUAL_POINT#; touch_point: 0.0728, 0.8986, lift_point: 0.0728, 0.8986", "label": "The action is #DUAL_POINT#; touch_point: 0.073, 0.8872, lift_point: 0.073, 0.8872", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/77185_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from Sydney to Buenos aires; The action is #PRESS_HOME#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/19017_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for 5 star sushi restaurants on Maps; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6608, 0.3757, lift_point: 0.6608, 0.3757; The action is #DUAL_POINT#; touch_point: 0.0655, 0.2999, lift_point: 0.0655, 0.2999; The action is #DUAL_POINT#; touch_point: 0.0739, 0.3835, lift_point: 0.0739, 0.3835; The action is #TYPE#; And the content is : \"typed_text\": \"5 star sushi restaurants\"; The action is #DUAL_POINT#; touch_point: 0.1255, 0.4443, lift_point: 0.1255, 0.4443", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/74295_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of the United Kingdom?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8947, 0.4639, lift_point: 0.8947, 0.4639", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"What is the capital of the United Kingdom?\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"What is the capital of the United Kingdom\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/44758_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the speed of light?; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3718, 0.5032, lift_point: 0.3718, 0.5032; The action is #DUAL_POINT#; touch_point: 0.3641, 0.5107, lift_point: 0.3641, 0.5107; The action is #DUAL_POINT#; touch_point: 0.0365, 0.5117, lift_point: 0.0365, 0.5117; The action is #DUAL_POINT#; touch_point: 0.0496, 0.5092, lift_point: 0.0496, 0.5092; The action is #TYPE#; And the content is : \"typed_text\": \"What is the speed of light?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #TYPE#; And the content is : \"typed_text\": \"What is the speed of light?\"; The action is #DUAL_POINT#; touch_point: 0.2097, 0.3362, lift_point: 0.2097, 0.3362; The action is #PRESS_ENTER#; The action is #DUAL_POINT#; touch_point: 0.6377, 0.4206, lift_point: 0.6377, 0.4206; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/20581_32.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Google the capital of Paraguay; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5463, 0.4976, lift_point: 0.5463, 0.4976; The action is #DUAL_POINT#; touch_point: 0.8229, 0.2441, lift_point: 0.8229, 0.2441", "response": "The action is #DUAL_POINT#; touch_point: 0.5108, 0.4981, lift_point: 0.5108, 0.4981", "label": "The action is #DUAL_POINT#; touch_point: 0.1028, 0.4618, lift_point: 0.1028, 0.4618", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/24843_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Install the ESPN app; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.777, 0.4037, lift_point: 0.777, 0.4037; The action is #DUAL_POINT#; touch_point: 0.777, 0.4037, lift_point: 0.777, 0.4037; The action is #DUAL_POINT#; touch_point: 0.777, 0.4037, lift_point: 0.777, 0.4037; The action is #DUAL_POINT#; touch_point: 0.0461, 0.548, lift_point: 0.0461, 0.548; The action is #TYPE#; And the content is : \"typed_text\": \"espn app\"; The action is #PRESS_ENTER#; The action is #DUAL_POINT#; touch_point: 0.1419, 0.8166, lift_point: 0.1419, 0.8166; The action is #DUAL_POINT#; touch_point: 0.9164, 0.5564, lift_point: 0.9164, 0.5564", "response": "The action is #DUAL_POINT#; touch_point: 0.9114, 0.262, lift_point: 0.9114, 0.262", "label": "The action is #DUAL_POINT#; touch_point: 0.9183, 0.3832, lift_point: 0.9183, 0.3832", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/49800_11.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from NYC to Paris; The action is #DUAL_POINT#; touch_point: 0.2697, 0.5114, lift_point: 0.2697, 0.5114; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.6548, 0.5648, lift_point: 0.6548, 0.5648; The action is #DUAL_POINT#; touch_point: 0.0985, 0.4286, lift_point: 0.0985, 0.4286", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"flights from NYC to Paris\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"flights from nyc to paris\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/27074_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Install the ESPN app; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.777, 0.4037, lift_point: 0.777, 0.4037; The action is #DUAL_POINT#; touch_point: 0.777, 0.4037, lift_point: 0.777, 0.4037; The action is #DUAL_POINT#; touch_point: 0.777, 0.4037, lift_point: 0.777, 0.4037; The action is #DUAL_POINT#; touch_point: 0.0461, 0.548, lift_point: 0.0461, 0.548; The action is #TYPE#; And the content is : \"typed_text\": \"espn app\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1255, 0.4167, lift_point: 0.1255, 0.4167", "label": "The action is #PRESS_ENTER#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/49800_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's a good restaurant in Portland?; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4791, 0.8649, lift_point: 0.4791, 0.8649; The action is #DUAL_POINT#; touch_point: 0.3151, 0.5738, lift_point: 0.3151, 0.5738; The action is #DUAL_POINT#; touch_point: 0.064, 0.9079, lift_point: 0.064, 0.9079; The action is #DUAL_POINT#; touch_point: 0.0598, 0.4275, lift_point: 0.0598, 0.4275; The action is #TYPE#; And the content is : \"typed_text\": \"Whats a good restaurant in Portland? \"; The action is #DUAL_POINT#; touch_point: 0.1432, 0.3592, lift_point: 0.1432, 0.3592; The action is #DUAL_POINT#; touch_point: 0.5607, 0.7688, lift_point: 0.5607, 0.7688; The action is #DUAL_POINT#; touch_point: 0.3034, 0.5706, lift_point: 0.3034, 0.5706; The action is #DUAL_POINT#; touch_point: 0.4668, 0.3446, lift_point: 0.4668, 0.3446; The action is #DUAL_POINT#; touch_point: 0.0618, 0.0783, lift_point: 0.0618, 0.0783; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5101, 0.3663, lift_point: 0.5101, 0.3663; The action is #DUAL_POINT#; touch_point: 0.0498, 0.0524, lift_point: 0.0498, 0.0524", "response": "The action is #DUAL_POINT#; touch_point: 0.0648, 0.0764, lift_point: 0.0648, 0.0764", "label": "The action is #DUAL_POINT#; touch_point: 0.0598, 0.1024, lift_point: 0.0598, 0.1024", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/64899_16.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the weather like in Seoul?; The action is #PRESS_BACK#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.5385, 0.7858, lift_point: 0.5385, 0.7858; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.8897, 0.2232, lift_point: 0.8897, 0.2232; The action is #TYPE#; And the content is : \"typed_text\": \"weather like in Seoul\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.79, 0.4597, lift_point: 0.79, 0.4597; The action is #DUAL_POINT#; touch_point: 0.6875, 0.5881, lift_point: 0.6875, 0.5881; The action is #DUAL_POINT#; touch_point: 0.792, 0.6095, lift_point: 0.792, 0.6095; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6102, 0.6719, lift_point: 0.6102, 0.6719; The action is #DUAL_POINT#; touch_point: 0.6102, 0.6719, lift_point: 0.6102, 0.6719; The action is #DUAL_POINT#; touch_point: 0.6118, 0.6305, lift_point: 0.6118, 0.6305; The action is #DUAL_POINT#; touch_point: 0.7374, 0.9374, lift_point: 0.7374, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7406, 0.9516, lift_point: 0.7406, 0.9516; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441", "response": "The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441", "label": "The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/53409_51.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the time in New York?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8184, 0.6118, lift_point: 0.8184, 0.6118; The action is #DUAL_POINT#; touch_point: 0.0714, 0.5541, lift_point: 0.0714, 0.5541", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"Whats the time in New York?\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"Whats the time in New York?\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/75271_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from NYC to Paris; The action is #DUAL_POINT#; touch_point: 0.7899, 0.6865, lift_point: 0.7899, 0.6865; The action is #DUAL_POINT#; touch_point: 0.0555, 0.5763, lift_point: 0.0555, 0.5763; The action is #DUAL_POINT#; touch_point: 0.0608, 0.9302, lift_point: 0.0608, 0.9302; The action is #TYPE#; And the content is : \"typed_text\": \"flights from NYC to Paris\"; The action is #DUAL_POINT#; touch_point: 0.1225, 0.4475, lift_point: 0.1225, 0.4475", "response": "The action is #DUAL_POINT#; touch_point: 0.3332, 0.3332, lift_point: 0.3332, 0.3332", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/5133_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Toggle notifications for the Google Maps app; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.778, 0.833, lift_point: 0.778, 0.833; The action is #DUAL_POINT#; touch_point: 0.619, 0.4869, lift_point: 0.619, 0.4869; The action is #DUAL_POINT#; touch_point: 0.3443, 0.3518, lift_point: 0.3443, 0.3518", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/5713_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from NYC to Barcelona; The action is #DUAL_POINT#; touch_point: 0.5552, 0.619, lift_point: 0.5552, 0.619; The action is #DUAL_POINT#; touch_point: 0.8935, 0.451, lift_point: 0.8935, 0.451; The action is #TYPE#; And the content is : \"typed_text\": \"flights from NYC to Barcelona\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #DUAL_POINT#; touch_point: 0.1339, 0.5689, lift_point: 0.1339, 0.5689; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #DUAL_POINT#; touch_point: 0.3936, 0.4892, lift_point: 0.3936, 0.4892", "label": "The action is #DUAL_POINT#; touch_point: 0.5399, 0.5883, lift_point: 0.5399, 0.5883", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/23012_9.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What time is it in Beijing?; The action is #PRESS_HOME#; The action is #Scrolling Down#", "response": "The action is #DUAL_POINT#; touch_point: 0.5699, 0.164, lift_point: 0.5699, 0.164", "label": "The action is #DUAL_POINT#; touch_point: 0.5651, 0.1852, lift_point: 0.5651, 0.1852", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/18294_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of Norway?; The action is #PRESS_HOME#", "response": "The action is #DUAL_POINT#; touch_point: 0.8848, 0.5946, lift_point: 0.8848, 0.5946", "label": "The action is #DUAL_POINT#; touch_point: 0.8814, 0.5825, lift_point: 0.8814, 0.5825", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/70480_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Set an alarm for 3pm; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.375, 0.88, lift_point: 0.375, 0.88; The action is #DUAL_POINT#; touch_point: 0.085, 0.1138, lift_point: 0.085, 0.1138", "response": "The action is #DUAL_POINT#; touch_point: 0.2801, 0.2018, lift_point: 0.2801, 0.2018", "label": "The action is #DUAL_POINT#; touch_point: 0.6539, 0.8814, lift_point: 0.6539, 0.8814", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/48893_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of Switzerland?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8279, 0.5825, lift_point: 0.8279, 0.5825", "response": "The action is #DUAL_POINT#; touch_point: 0.9285, 0.5487, lift_point: 0.9285, 0.5487", "label": "The action is #DUAL_POINT#; touch_point: 0.9338, 0.5386, lift_point: 0.9338, 0.5386", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/29441_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What time is it in Paris?; The action is #DUAL_POINT#; touch_point: 0.9793, 0.4828, lift_point: 0.9793, 0.4828; The action is #DUAL_POINT#; touch_point: 0.9793, 0.4828, lift_point: 0.9793, 0.4828", "response": "The action is #DUAL_POINT#; touch_point: 0.8957, 0.4935, lift_point: 0.8957, 0.4935", "label": "The action is #DUAL_POINT#; touch_point: 0.777, 0.6024, lift_point: 0.777, 0.6024", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/65967_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How much does the HisenseTV cost?; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.0914, 0.497, lift_point: 0.0914, 0.497; The action is #Scrolling Down#", "response": "The action is #DUAL_POINT#; touch_point: 0.6154, 0.1658, lift_point: 0.6154, 0.1658", "label": "The action is #DUAL_POINT#; touch_point: 0.607, 0.1523, lift_point: 0.607, 0.1523", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/10211_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from Barcelona to Mexico city; The action is #Scrolling Down#; The action is #PRESS_HOME#; The action is #Scrolling Down#", "response": "The action is #DUAL_POINT#; touch_point: 0.5165, 0.6865, lift_point: 0.5165, 0.6865", "label": "The action is #DUAL_POINT#; touch_point: 0.5051, 0.6922, lift_point: 0.5051, 0.6922", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/77185_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the speed of light?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.6506, 0.6833, lift_point: 0.6506, 0.6833; The action is #DUAL_POINT#; touch_point: 0.653, 0.6833, lift_point: 0.653, 0.6833; The action is #DUAL_POINT#; touch_point: 0.0568, 0.5975, lift_point: 0.0568, 0.5975; The action is #DUAL_POINT#; touch_point: 0.0549, 0.9429, lift_point: 0.0549, 0.9429; The action is #TYPE#; And the content is : \"typed_text\": \"What is the speed of light\"; The action is #DUAL_POINT#; touch_point: 0.1167, 0.4031, lift_point: 0.1167, 0.4031", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/29819_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on the menu at Cheesecake Factory?; The action is #DUAL_POINT#; touch_point: 0.0601, 0.7172, lift_point: 0.0601, 0.7172; The action is #TYPE#; And the content is : \"typed_text\": \"cheesecake factory\"; The action is #DUAL_POINT#; touch_point: 0.2005, 0.3387, lift_point: 0.2005, 0.3387; The action is #DUAL_POINT#; touch_point: 0.7599, 0.2511, lift_point: 0.7599, 0.2511", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.475, 0.4932, lift_point: 0.475, 0.4932", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/48659_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How do I get to the nearest Walmart?; The action is #DUAL_POINT#; touch_point: 0.8077, 0.6506, lift_point: 0.8077, 0.6506; The action is #DUAL_POINT#; touch_point: 0.2986, 0.4424, lift_point: 0.2986, 0.4424; The action is #TYPE#; And the content is : \"typed_text\": \"how do i get to the nearest walmart\"; The action is #DUAL_POINT#; touch_point: 0.2954, 0.5354, lift_point: 0.2954, 0.5354; The action is #TYPE#; And the content is : \"typed_text\": \"nearest walmart\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1431, 0.4598, lift_point: 0.1431, 0.4598", "label": "The action is #DUAL_POINT#; touch_point: 0.128, 0.3125, lift_point: 0.128, 0.3125", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/60909_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for the best pizza restaurants on Maps; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3521, 0.4913, lift_point: 0.3521, 0.4913; The action is #DUAL_POINT#; touch_point: 0.1806, 0.742, lift_point: 0.1806, 0.742; The action is #DUAL_POINT#; touch_point: 0.1092, 0.9366, lift_point: 0.1092, 0.9366; The action is #TYPE#; And the content is : \"typed_text\": \"best pizza restaurants on Maps\"", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/30477_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How do I get to the nearest T-Mobile Store?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7744, 0.7101, lift_point: 0.7744, 0.7101; The action is #DUAL_POINT#; touch_point: 0.0649, 0.3938, lift_point: 0.0649, 0.3938", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"nearest T-Mobile Store\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"how do i get nearest t mobile store\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/36549_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Check the settings for the Twitter app; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.15, 0.2057, lift_point: 0.15, 0.2057; The action is #DUAL_POINT#; touch_point: 0.0615, 0.7935, lift_point: 0.0615, 0.7935; The action is #TYPE#; And the content is : \"typed_text\": \"twitter\"; The action is #DUAL_POINT#; touch_point: 0.1057, 0.6469, lift_point: 0.1057, 0.6469; The action is #DUAL_POINT#; touch_point: 0.0564, 0.7715, lift_point: 0.0564, 0.7715; The action is #DUAL_POINT#; touch_point: 0.2059, 0.2981, lift_point: 0.2059, 0.2981; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.539, 0.4292, lift_point: 0.539, 0.4292; The action is #DUAL_POINT#; touch_point: 0.0515, 0.0421, lift_point: 0.0515, 0.0421; The action is #DUAL_POINT#; touch_point: 0.0515, 0.0421, lift_point: 0.0515, 0.0421; The action is #DUAL_POINT#; touch_point: 0.0639, 0.8126, lift_point: 0.0639, 0.8126; The action is #DUAL_POINT#; touch_point: 0.059, 0.9422, lift_point: 0.059, 0.9422", "response": "The action is #DUAL_POINT#; touch_point: 0.059, 0.9422, lift_point: 0.059, 0.9422", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"twittwer\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/52290_17.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news?", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/64383_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the latest video from Game Informer?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.6627, 0.8258, lift_point: 0.6627, 0.8258; The action is #DUAL_POINT#; touch_point: 0.0549, 0.052, lift_point: 0.0549, 0.052; The action is #DUAL_POINT#; touch_point: 0.0515, 0.0547, lift_point: 0.0515, 0.0547; The action is #DUAL_POINT#; touch_point: 0.0554, 0.8045, lift_point: 0.0554, 0.8045; The action is #TYPE#; And the content is : \"typed_text\": \"Game Informer\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #DUAL_POINT#; touch_point: 0.123, 0.3606, lift_point: 0.123, 0.3606; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4453, 0.3949, lift_point: 0.4453, 0.3949; The action is #DUAL_POINT#; touch_point: 0.1563, 0.5084, lift_point: 0.1563, 0.5084; The action is #DUAL_POINT#; touch_point: 0.3141, 0.9451, lift_point: 0.3141, 0.9451", "response": "The action is #Scrolling Down#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/7705_14.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Open a new window in Chrome", "response": "The action is #DUAL_POINT#; touch_point: 0.7952, 0.6912, lift_point: 0.7952, 0.6912", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/55938_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of Mexico?; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4671, 0.6866, lift_point: 0.4671, 0.6866; The action is #DUAL_POINT#; touch_point: 0.1977, 0.5229, lift_point: 0.1977, 0.5229; The action is #DUAL_POINT#; touch_point: 0.0591, 0.919, lift_point: 0.0591, 0.919", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"What is the capital of Mexico?\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"What is the capital of Mexico?\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/30916_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's a good restaurant in Chicago?", "response": "The action is #Scrolling Down#", "label": "The action is #PRESS_HOME#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/35093_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of Sweden?; The action is #DUAL_POINT#; touch_point: 0.7349, 0.6118, lift_point: 0.7349, 0.6118; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7081, 0.6262, lift_point: 0.7081, 0.6262; The action is #DUAL_POINT#; touch_point: 0.0856, 0.4623, lift_point: 0.0856, 0.4623; The action is #TYPE#; And the content is : \"typed_text\": \"capital of sweden ?\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1967, 0.5266, lift_point: 0.1967, 0.5266", "label": "The action is #DUAL_POINT#; touch_point: 0.2007, 0.6839, lift_point: 0.2007, 0.6839", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/22092_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's a good restaurant near me?; The action is #PRESS_HOME#; The action is #PRESS_HOME#; The action is #PRESS_BACK#; The action is #PRESS_BACK#; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7887, 0.6584, lift_point: 0.7887, 0.6584; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8068, 0.681, lift_point: 0.8068, 0.681; The action is #PRESS_HOME#; The action is #PRESS_HOME#; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7864, 0.6789, lift_point: 0.7864, 0.6789; The action is #DUAL_POINT#; touch_point: 0.7822, 0.7046, lift_point: 0.7822, 0.7046; The action is #PRESS_HOME#; The action is #PRESS_BACK#; The action is #PRESS_BACK#; The action is #PRESS_BACK#", "response": "The action is #PRESS_BACK#", "label": "The action is #PRESS_BACK#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/67924_18.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How much does the TCL TV cost?; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5386, 0.1224, lift_point: 0.5386, 0.1224; The action is #DUAL_POINT#; touch_point: 0.1063, 0.5713, lift_point: 0.1063, 0.5713; The action is #DUAL_POINT#; touch_point: 0.0474, 0.9258, lift_point: 0.0474, 0.9258; The action is #DUAL_POINT#; touch_point: 0.0606, 0.444, lift_point: 0.0606, 0.444; The action is #TYPE#; And the content is : \"typed_text\": \"How much does the TCL TV cost? \"", "response": "The action is #DUAL_POINT#; touch_point: 0.1087, 0.407, lift_point: 0.1087, 0.407", "label": "The action is #DUAL_POINT#; touch_point: 0.1079, 0.3517, lift_point: 0.1079, 0.3517", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/18504_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How much does a 3 bedroom apartment rent for in Atlanta?", "response": "The action is #PRESS_HOME#", "label": "The action is #PRESS_HOME#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/76280_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Open the downloads", "response": "The action is #PRESS_HOME#", "label": "The action is #PRESS_HOME#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/23561_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for hotels in Las Vegas; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4832, 0.6565, lift_point: 0.4832, 0.6565; The action is #DUAL_POINT#; touch_point: 0.2107, 0.2892, lift_point: 0.2107, 0.2892; The action is #TYPE#; And the content is : \"typed_text\": \"Search for hotels in Las Vegas\"; The action is #DUAL_POINT#; touch_point: 0.1297, 0.2535, lift_point: 0.1297, 0.2535", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #DUAL_POINT#; touch_point: 0.3246, 0.3199, lift_point: 0.3246, 0.3199", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/37414_9.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on my calendar for the rest of the month?; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4058, 0.1114, lift_point: 0.4058, 0.1114; The action is #DUAL_POINT#; touch_point: 0.8784, 0.9095, lift_point: 0.8784, 0.9095; The action is #DUAL_POINT#; touch_point: 0.8784, 0.9095, lift_point: 0.8784, 0.9095", "response": "The action is #DUAL_POINT#; touch_point: 0.8357, 0.5333, lift_point: 0.8357, 0.5333", "label": "The action is #DUAL_POINT#; touch_point: 0.842, 0.554, lift_point: 0.842, 0.554", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/63918_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on the menu at Chick-fil-A?; The action is #DUAL_POINT#; touch_point: 0.7752, 0.6834, lift_point: 0.7752, 0.6834; The action is #DUAL_POINT#; touch_point: 0.0525, 0.4934, lift_point: 0.0525, 0.4934; The action is #TYPE#; And the content is : \"typed_text\": \"Whats on the menu at Chick-fil-A?\"", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/79407_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from NYC to Paris; The action is #DUAL_POINT#; touch_point: 0.2697, 0.5114, lift_point: 0.2697, 0.5114; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.6548, 0.5648, lift_point: 0.6548, 0.5648; The action is #DUAL_POINT#; touch_point: 0.0985, 0.4286, lift_point: 0.0985, 0.4286; The action is #TYPE#; And the content is : \"typed_text\": \"flights from nyc to paris\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #DUAL_POINT#; touch_point: 0.8157, 0.5288, lift_point: 0.8157, 0.5288", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/27074_11.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Toggle notifications for the Google Maps app; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.778, 0.833, lift_point: 0.778, 0.833; The action is #DUAL_POINT#; touch_point: 0.619, 0.4869, lift_point: 0.619, 0.4869; The action is #DUAL_POINT#; touch_point: 0.3443, 0.3518, lift_point: 0.3443, 0.3518; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.2702, 0.36, lift_point: 0.2702, 0.36; The action is #DUAL_POINT#; touch_point: 0.39, 0.2406, lift_point: 0.39, 0.2406", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/5713_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Open a new Chrome private tab; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.369, 0.4993, lift_point: 0.369, 0.4993", "response": "The action is #DUAL_POINT#; touch_point: 0.8976, 0.5114, lift_point: 0.8976, 0.5114", "label": "The action is #DUAL_POINT#; touch_point: 0.8796, 0.5176, lift_point: 0.8796, 0.5176", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/2057_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the price of a 12' ladder at Lowes?; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4985, 0.6804, lift_point: 0.4985, 0.6804; The action is #DUAL_POINT#; touch_point: 0.0577, 0.7161, lift_point: 0.0577, 0.7161; The action is #DUAL_POINT#; touch_point: 0.0712, 0.8898, lift_point: 0.0712, 0.8898; The action is #DUAL_POINT#; touch_point: 0.0692, 0.4163, lift_point: 0.0692, 0.4163", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"What is the price of a 12 ladder at Lowes? \"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"What is the price of a 12 ladder at Lowes? \"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/59657_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How much does the HisenseTV cost?; The action is #DUAL_POINT#; touch_point: 0.2436, 0.6799, lift_point: 0.2436, 0.6799; The action is #DUAL_POINT#; touch_point: 0.0694, 0.373, lift_point: 0.0694, 0.373", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"How much does the HisenseTV cost?\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"HisenseTV cost\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/48109_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Open a new incognito window in the chrome app; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3617, 0.4902, lift_point: 0.3617, 0.4902; The action is #DUAL_POINT#; touch_point: 0.9117, 0.4429, lift_point: 0.9117, 0.4429; The action is #DUAL_POINT#; touch_point: 0.9159, 0.8003, lift_point: 0.9159, 0.8003; The action is #DUAL_POINT#; touch_point: 0.0588, 0.9363, lift_point: 0.0588, 0.9363; The action is #DUAL_POINT#; touch_point: 0.1719, 0.7679, lift_point: 0.1719, 0.7679", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/25490_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for good Korean restaurants", "response": "The action is #PRESS_HOME#", "label": "The action is #PRESS_HOME#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/26593_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Find the nearest electronics store that's open now; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4934, 0.705, lift_point: 0.4934, 0.705; The action is #DUAL_POINT#; touch_point: 0.1292, 0.6287, lift_point: 0.1292, 0.6287", "response": "The action is #DUAL_POINT#; touch_point: 0.0643, 0.9197, lift_point: 0.0643, 0.9197", "label": "The action is #DUAL_POINT#; touch_point: 0.0555, 0.9183, lift_point: 0.0555, 0.9183", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/2115_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Open a new Chrome private tab; The action is #PRESS_HOME#; The action is #Scrolling Down#", "response": "The action is #DUAL_POINT#; touch_point: 0.3543, 0.4948, lift_point: 0.3543, 0.4948", "label": "The action is #DUAL_POINT#; touch_point: 0.369, 0.4993, lift_point: 0.369, 0.4993", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/2057_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How do I get to the nearest Best Buy?; The action is #DUAL_POINT#; touch_point: 0.7971, 0.6859, lift_point: 0.7971, 0.6859; The action is #DUAL_POINT#; touch_point: 0.7974, 0.7046, lift_point: 0.7974, 0.7046; The action is #DUAL_POINT#; touch_point: 0.7974, 0.7046, lift_point: 0.7974, 0.7046; The action is #DUAL_POINT#; touch_point: 0.2121, 0.6359, lift_point: 0.2121, 0.6359", "response": "The action is #DUAL_POINT#; touch_point: 0.1417, 0.9188, lift_point: 0.1417, 0.9188", "label": "The action is #DUAL_POINT#; touch_point: 0.148, 0.9353, lift_point: 0.148, 0.9353", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/3930_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on the menu at Starbucks?; The action is #DUAL_POINT#; touch_point: 0.782, 0.6808, lift_point: 0.782, 0.6808; The action is #DUAL_POINT#; touch_point: 0.0505, 0.4595, lift_point: 0.0505, 0.4595; The action is #TYPE#; And the content is : \"typed_text\": \"starbucks menu\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1394, 0.4077, lift_point: 0.1394, 0.4077", "label": "The action is #DUAL_POINT#; touch_point: 0.1421, 0.4265, lift_point: 0.1421, 0.4265", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/42353_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from London to Paris; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4734, 0.5023, lift_point: 0.4734, 0.5023; The action is #DUAL_POINT#; touch_point: 0.1118, 0.5014, lift_point: 0.1118, 0.5014; The action is #TYPE#; And the content is : \"typed_text\": \"flights from London to Paris\"; The action is #PRESS_ENTER#", "response": "The action is #Scrolling Down#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/1712_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for hotels in Buenos aires; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7884, 0.6877, lift_point: 0.7884, 0.6877; The action is #DUAL_POINT#; touch_point: 0.7929, 0.6916, lift_point: 0.7929, 0.6916; The action is #DUAL_POINT#; touch_point: 0.064, 0.648, lift_point: 0.064, 0.648; The action is #DUAL_POINT#; touch_point: 0.0588, 0.9276, lift_point: 0.0588, 0.9276", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"hotels in Buenos aires\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"Hotel in Buenos aires\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/5067_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on Reddit; The action is #DUAL_POINT#; touch_point: 0.0752, 0.5699, lift_point: 0.0752, 0.5699; The action is #TYPE#; And the content is : \"typed_text\": \"reddit\"; The action is #DUAL_POINT#; touch_point: 0.2783, 0.4273, lift_point: 0.2783, 0.4273; The action is #DUAL_POINT#; touch_point: 0.4492, 0.1148, lift_point: 0.4492, 0.1148; The action is #DUAL_POINT#; touch_point: 0.4832, 0.6379, lift_point: 0.4832, 0.6379; The action is #DUAL_POINT#; touch_point: 0.7387, 0.8037, lift_point: 0.7387, 0.8037; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.4438, 0.3849, lift_point: 0.4438, 0.3849; The action is #DUAL_POINT#; touch_point: 0.8209, 0.8498, lift_point: 0.8209, 0.8498; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.1515, 0.9091, lift_point: 0.1515, 0.9091; The action is #DUAL_POINT#; touch_point: 0.6437, 0.4219, lift_point: 0.6437, 0.4219; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.1595, 0.8944, lift_point: 0.1595, 0.8944", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/24436_15.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news about the US economy?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8832, 0.4339, lift_point: 0.8832, 0.4339; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news about the US economy\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/62521_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news this weekend?", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.8672, 0.3458, lift_point: 0.8672, 0.3458", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/47147_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Chile?; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5337, 0.1261, lift_point: 0.5337, 0.1261; The action is #DUAL_POINT#; touch_point: 0.123, 0.2968, lift_point: 0.123, 0.2968; The action is #DUAL_POINT#; touch_point: 0.0604, 0.9069, lift_point: 0.0604, 0.9069; The action is #DUAL_POINT#; touch_point: 0.0541, 0.2805, lift_point: 0.0541, 0.2805; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news in Chile? \"; The action is #DUAL_POINT#; touch_point: 0.1075, 0.3487, lift_point: 0.1075, 0.3487; The action is #DUAL_POINT#; touch_point: 0.2558, 0.3923, lift_point: 0.2558, 0.3923; The action is #DUAL_POINT#; touch_point: 0.8747, 0.2714, lift_point: 0.8747, 0.2714", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/19310_11.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the price of the 1000-Watt EGO Power+ Snow Blower?; The action is #DUAL_POINT#; touch_point: 0.8145, 0.6085, lift_point: 0.8145, 0.6085; The action is #DUAL_POINT#; touch_point: 0.8145, 0.6085, lift_point: 0.8145, 0.6085; The action is #DUAL_POINT#; touch_point: 0.1785, 0.8915, lift_point: 0.1785, 0.8915; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the price of the 1000-Watt EGO Power+ Snow Blower?\"; The action is #PRESS_ENTER#; The action is #DUAL_POINT#; touch_point: 0.2495, 0.0452, lift_point: 0.2495, 0.0452; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3674, 0.3126, lift_point: 0.3674, 0.3126", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/49383_9.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's a good restaurant in San Diego?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8027, 0.672, lift_point: 0.8027, 0.672; The action is #DUAL_POINT#; touch_point: 0.2187, 0.6753, lift_point: 0.2187, 0.6753; The action is #DUAL_POINT#; touch_point: 0.1355, 0.9371, lift_point: 0.1355, 0.9371", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"good restaurant in San Diego\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"Whats a good restaurant in San Diego?\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/4007_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What time is it in New York?; The action is #DUAL_POINT#; touch_point: 0.0575, 0.0365, lift_point: 0.0575, 0.0365; The action is #DUAL_POINT#; touch_point: 0.8912, 0.667, lift_point: 0.8912, 0.667; The action is #DUAL_POINT#; touch_point: 0.8921, 0.3941, lift_point: 0.8921, 0.3941; The action is #DUAL_POINT#; touch_point: 0.8787, 0.3913, lift_point: 0.8787, 0.3913; The action is #DUAL_POINT#; touch_point: 0.7438, 0.7177, lift_point: 0.7438, 0.7177; The action is #DUAL_POINT#; touch_point: 0.7785, 0.6992, lift_point: 0.7785, 0.6992; The action is #DUAL_POINT#; touch_point: 0.8149, 0.6966, lift_point: 0.8149, 0.6966; The action is #DUAL_POINT#; touch_point: 0.0722, 0.4065, lift_point: 0.0722, 0.4065; The action is #TYPE#; And the content is : \"typed_text\": \"What time is it in New York?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #DUAL_POINT#; touch_point: 0.1351, 0.6482, lift_point: 0.1351, 0.6482; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.2715, 0.6476, lift_point: 0.2715, 0.6476; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.0528, 0.2859, lift_point: 0.0528, 0.2859; The action is #DUAL_POINT#; touch_point: 0.8191, 0.4412, lift_point: 0.8191, 0.4412; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.4471, 0.1787, lift_point: 0.4471, 0.1787; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.8042, 0.3148, lift_point: 0.8042, 0.3148; The action is #DUAL_POINT#; touch_point: 0.6879, 0.5777, lift_point: 0.6879, 0.5777; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.8361, 0.2862, lift_point: 0.8361, 0.2862; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#", "response": "The action is #DUAL_POINT#; touch_point: 0.5969, 0.3063, lift_point: 0.5969, 0.3063", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/13941_48.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the weather like in Seoul?; The action is #PRESS_BACK#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.5385, 0.7858, lift_point: 0.5385, 0.7858; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.8897, 0.2232, lift_point: 0.8897, 0.2232; The action is #TYPE#; And the content is : \"typed_text\": \"weather like in Seoul\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.79, 0.4597, lift_point: 0.79, 0.4597; The action is #DUAL_POINT#; touch_point: 0.6875, 0.5881, lift_point: 0.6875, 0.5881; The action is #DUAL_POINT#; touch_point: 0.792, 0.6095, lift_point: 0.792, 0.6095; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6102, 0.6719, lift_point: 0.6102, 0.6719; The action is #DUAL_POINT#; touch_point: 0.6102, 0.6719, lift_point: 0.6102, 0.6719; The action is #DUAL_POINT#; touch_point: 0.6118, 0.6305, lift_point: 0.6118, 0.6305; The action is #DUAL_POINT#; touch_point: 0.7374, 0.9374, lift_point: 0.7374, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7406, 0.9516, lift_point: 0.7406, 0.9516; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441", "response": "The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441", "label": "The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/53409_54.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Install the Calendar app", "response": "The action is #PRESS_HOME#", "label": "The action is #PRESS_HOME#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/10691_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Brazil?; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6098, 0.8296, lift_point: 0.6098, 0.8296", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/23603_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Pakistan?", "response": "The action is #DUAL_POINT#; touch_point: 0.7963, 0.6905, lift_point: 0.7963, 0.6905", "label": "The action is #DUAL_POINT#; touch_point: 0.7898, 0.6984, lift_point: 0.7898, 0.6984", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/26653_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the weather like in Mexico City?; The action is #DUAL_POINT#; touch_point: 0.3217, 0.4698, lift_point: 0.3217, 0.4698; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the weather like in Mexico City? \"", "response": "The action is #DUAL_POINT#; touch_point: 0.5737, 0.5995, lift_point: 0.5737, 0.5995", "label": "The action is #DUAL_POINT#; touch_point: 0.4966, 0.621, lift_point: 0.4966, 0.621", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/64917_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from NYC to Tokyo; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8814, 0.3866, lift_point: 0.8814, 0.3866; The action is #TYPE#; And the content is : \"typed_text\": \"flights from NYC to Tokyo\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/62557_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Play the new Ed Sheeran video on YouTube; The action is #DUAL_POINT#; touch_point: 0.873, 0.3966, lift_point: 0.873, 0.3966; The action is #DUAL_POINT#; touch_point: 0.8782, 0.2261, lift_point: 0.8782, 0.2261; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.7463, 0.4818, lift_point: 0.7463, 0.4818; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.0464, 0.825, lift_point: 0.0464, 0.825; The action is #TYPE#; And the content is : \"typed_text\": \"new Ed Sheeran video \"; The action is #PRESS_ENTER#; The action is #DUAL_POINT#; touch_point: 0.7111, 0.5029, lift_point: 0.7111, 0.5029; The action is #DUAL_POINT#; touch_point: 0.2329, 0.8893, lift_point: 0.2329, 0.8893", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/70026_12.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the weather like in Hong Kong?; The action is #DUAL_POINT#; touch_point: 0.858, 0.6033, lift_point: 0.858, 0.6033; The action is #DUAL_POINT#; touch_point: 0.0441, 0.3452, lift_point: 0.0441, 0.3452; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the weather like in Hong Kong?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #DUAL_POINT#; touch_point: 0.1159, 0.3367, lift_point: 0.1159, 0.3367; The action is #DUAL_POINT#; touch_point: 0.9574, 0.4087, lift_point: 0.9574, 0.4087; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.48, 0.4451, lift_point: 0.48, 0.4451; The action is #DUAL_POINT#; touch_point: 0.2723, 0.6483, lift_point: 0.2723, 0.6483; The action is #DUAL_POINT#; touch_point: 0.4632, 0.4748, lift_point: 0.4632, 0.4748; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.9477, 0.388, lift_point: 0.9477, 0.388; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.2628, 0.6309, lift_point: 0.2628, 0.6309; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.2139, 0.6602, lift_point: 0.2139, 0.6602; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.2883, 0.3108, lift_point: 0.2883, 0.3108; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/59353_71.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on Reddit; The action is #DUAL_POINT#; touch_point: 0.0752, 0.5699, lift_point: 0.0752, 0.5699; The action is #TYPE#; And the content is : \"typed_text\": \"reddit\"; The action is #DUAL_POINT#; touch_point: 0.2783, 0.4273, lift_point: 0.2783, 0.4273; The action is #DUAL_POINT#; touch_point: 0.4492, 0.1148, lift_point: 0.4492, 0.1148; The action is #DUAL_POINT#; touch_point: 0.4832, 0.6379, lift_point: 0.4832, 0.6379; The action is #DUAL_POINT#; touch_point: 0.7387, 0.8037, lift_point: 0.7387, 0.8037; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.4438, 0.3849, lift_point: 0.4438, 0.3849; The action is #DUAL_POINT#; touch_point: 0.8209, 0.8498, lift_point: 0.8209, 0.8498; The action is #Scrolling Up#", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.1515, 0.9091, lift_point: 0.1515, 0.9091", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/24436_11.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news this week?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.6221, 0.9191, lift_point: 0.6221, 0.9191; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4338, 0.3417, lift_point: 0.4338, 0.3417; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/32250_9.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of the United Kingdom?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8947, 0.4639, lift_point: 0.8947, 0.4639; The action is #TYPE#; And the content is : \"typed_text\": \"What is the capital of the United Kingdom\"", "response": "The action is #PRESS_ENTER#", "label": "The action is #PRESS_ENTER#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/44758_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from Sydney to Buenos aires; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3745, 0.4935, lift_point: 0.3745, 0.4935; The action is #DUAL_POINT#; touch_point: 0.0542, 0.4256, lift_point: 0.0542, 0.4256; The action is #TYPE#; And the content is : \"typed_text\": \"flights from Sydney to Buenos aires\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3063, 0.6541, lift_point: 0.3063, 0.6541; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3338, 0.5289, lift_point: 0.3338, 0.5289; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.1764, 0.6049, lift_point: 0.1764, 0.6049; The action is #DUAL_POINT#; touch_point: 0.4134, 0.6631, lift_point: 0.4134, 0.6631; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/20704_40.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Indonesia?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.886, 0.697, lift_point: 0.886, 0.697; The action is #PRESS_ENTER#; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news in Indonesia\"; The action is #PRESS_ENTER#; The action is #DUAL_POINT#; touch_point: 0.2711, 0.4538, lift_point: 0.2711, 0.4538; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.8273, 0.9494, lift_point: 0.8273, 0.9494; The action is #Scrolling Down#", "response": "The action is #DUAL_POINT#; touch_point: 0.862, 0.9175, lift_point: 0.862, 0.9175", "label": "The action is #DUAL_POINT#; touch_point: 0.4201, 0.9891, lift_point: 0.4201, 0.9891", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/70715_11.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Play the new Ariana Grande video on YouTube; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.9374, 0.3882, lift_point: 0.9374, 0.3882; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.6396, 0.4336, lift_point: 0.6396, 0.4336; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.0712, 0.6123, lift_point: 0.0712, 0.6123; The action is #DUAL_POINT#; touch_point: 0.0396, 0.6168, lift_point: 0.0396, 0.6168; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.0662, 0.8267, lift_point: 0.0662, 0.8267; The action is #TYPE#; And the content is : \"typed_text\": \" Ariana Grande\"; The action is #PRESS_ENTER#; The action is #DUAL_POINT#; touch_point: 0.6132, 0.2649, lift_point: 0.6132, 0.2649; The action is #DUAL_POINT#; touch_point: 0.0945, 0.366, lift_point: 0.0945, 0.366; The action is #DUAL_POINT#; touch_point: 0.2091, 0.3261, lift_point: 0.2091, 0.3261", "response": "The action is #DUAL_POINT#; touch_point: 0.2091, 0.3261, lift_point: 0.2091, 0.3261", "label": "The action is #DUAL_POINT#; touch_point: 0.2094, 0.8261, lift_point: 0.2094, 0.8261", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/75127_16.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Show me some nice wallpapers for my desktop; The action is #PRESS_HOME#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/29623_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Peru?; The action is #DUAL_POINT#; touch_point: 0.7989, 0.7021, lift_point: 0.7989, 0.7021; The action is #DUAL_POINT#; touch_point: 0.0488, 0.5993, lift_point: 0.0488, 0.5993; The action is #TYPE#; And the content is : \"typed_text\": \"news in Peru\"", "response": "The action is #PRESS_ENTER#", "label": "The action is #PRESS_ENTER#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/45114_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news this week?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.6221, 0.9191, lift_point: 0.6221, 0.9191; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4338, 0.3417, lift_point: 0.4338, 0.3417; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/32250_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from NYC to Paris; The action is #DUAL_POINT#; touch_point: 0.2697, 0.5114, lift_point: 0.2697, 0.5114; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.6548, 0.5648, lift_point: 0.6548, 0.5648; The action is #DUAL_POINT#; touch_point: 0.0985, 0.4286, lift_point: 0.0985, 0.4286; The action is #TYPE#; And the content is : \"typed_text\": \"flights from nyc to paris\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3872, 0.608, lift_point: 0.3872, 0.608; The action is #DUAL_POINT#; touch_point: 0.6296, 0.6156, lift_point: 0.6296, 0.6156; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.8198, 0.4497, lift_point: 0.8198, 0.4497; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.5797, 0.5502, lift_point: 0.5797, 0.5502", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/27074_20.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the latest news in space technology?; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6142, 0.3825, lift_point: 0.6142, 0.3825; The action is #DUAL_POINT#; touch_point: 0.2795, 0.5272, lift_point: 0.2795, 0.5272; The action is #DUAL_POINT#; touch_point: 0.0638, 0.8952, lift_point: 0.0638, 0.8952", "response": "The action is #DUAL_POINT#; touch_point: 0.0614, 0.4587, lift_point: 0.0614, 0.4587", "label": "The action is #DUAL_POINT#; touch_point: 0.0585, 0.442, lift_point: 0.0585, 0.442", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/12082_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How old is the earth?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8922, 0.5157, lift_point: 0.8922, 0.5157; The action is #TYPE#; And the content is : \"typed_text\": \"How old is the earth?\"; The action is #DUAL_POINT#; touch_point: 0.1773, 0.4353, lift_point: 0.1773, 0.4353", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/62628_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Install the Yahoo app; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6568, 0.6713, lift_point: 0.6568, 0.6713; The action is #DUAL_POINT#; touch_point: 0.0633, 0.5378, lift_point: 0.0633, 0.5378; The action is #TYPE#; And the content is : \"typed_text\": \"yahoo app\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #DUAL_POINT#; touch_point: 0.1534, 0.2986, lift_point: 0.1534, 0.2986", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #DUAL_POINT#; touch_point: 0.4861, 0.1534, lift_point: 0.4861, 0.1534", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/35934_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How much does a 3 bedroom apartment rent for in Chicago?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.2354, 0.6773, lift_point: 0.2354, 0.6773; The action is #DUAL_POINT#; touch_point: 0.0572, 0.4785, lift_point: 0.0572, 0.4785; The action is #TYPE#; And the content is : \"typed_text\": \"rent for 3 bedroom apartment in chicago\"; The action is #DUAL_POINT#; touch_point: 0.0627, 0.5717, lift_point: 0.0627, 0.5717; The action is #TYPE#; And the content is : \"typed_text\": \" in chicago\"; The action is #DUAL_POINT#; touch_point: 0.1198, 0.5591, lift_point: 0.1198, 0.5591; The action is #DUAL_POINT#; touch_point: 0.3566, 0.2985, lift_point: 0.3566, 0.2985; The action is #PRESS_BACK#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.2859, 0.2866, lift_point: 0.2859, 0.2866; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6261, 0.3049, lift_point: 0.6261, 0.3049; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/77654_20.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the weather like in Seoul?", "response": "The action is #PRESS_HOME#", "label": "The action is #PRESS_HOME#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/82379_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Check my email; The action is #DUAL_POINT#; touch_point: 0.7998, 0.6996, lift_point: 0.7998, 0.6996", "response": "The action is #PRESS_HOME#", "label": "The action is #PRESS_HOME#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/76490_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the latest video from GameSpot Reviews?; The action is #DUAL_POINT#; touch_point: 0.6348, 0.8499, lift_point: 0.6348, 0.8499; The action is #DUAL_POINT#; touch_point: 0.6348, 0.8499, lift_point: 0.6348, 0.8499; The action is #DUAL_POINT#; touch_point: 0.6392, 0.8663, lift_point: 0.6392, 0.8663; The action is #DUAL_POINT#; touch_point: 0.0508, 0.0719, lift_point: 0.0508, 0.0719; The action is #DUAL_POINT#; touch_point: 0.0437, 0.8153, lift_point: 0.0437, 0.8153; The action is #DUAL_POINT#; touch_point: 0.0634, 0.3804, lift_point: 0.0634, 0.3804; The action is #TYPE#; And the content is : \"typed_text\": \"GameSpot Reviews?\"; The action is #DUAL_POINT#; touch_point: 0.2834, 0.3351, lift_point: 0.2834, 0.3351; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/15764_12.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Check the settings for the Twitter app; The action is #Scrolling Down#", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #DUAL_POINT#; touch_point: 0.6075, 0.6952, lift_point: 0.6075, 0.6952", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/31020_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Open the downloads; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4652, 0.4809, lift_point: 0.4652, 0.4809", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #DUAL_POINT#; touch_point: 0.0504, 0.2105, lift_point: 0.0504, 0.2105", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/54596_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How much does a 3 bedroom apartment rent for in Miami?; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.592, 0.4213, lift_point: 0.592, 0.4213; The action is #DUAL_POINT#; touch_point: 0.2848, 0.4712, lift_point: 0.2848, 0.4712; The action is #DUAL_POINT#; touch_point: 0.0554, 0.9111, lift_point: 0.0554, 0.9111; The action is #DUAL_POINT#; touch_point: 0.0618, 0.3088, lift_point: 0.0618, 0.3088", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"How much does a 3 bedroom apartment rent for in Miami? \"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"How much does a 3 bedroom apartment rent for in Miami? \"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/76164_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the weather like in San Francisco?; The action is #DUAL_POINT#; touch_point: 0.7714, 0.6865, lift_point: 0.7714, 0.6865; The action is #DUAL_POINT#; touch_point: 0.8819, 0.3955, lift_point: 0.8819, 0.3955; The action is #DUAL_POINT#; touch_point: 0.8824, 0.7775, lift_point: 0.8824, 0.7775; The action is #DUAL_POINT#; touch_point: 0.8924, 0.8149, lift_point: 0.8924, 0.8149; The action is #DUAL_POINT#; touch_point: 0.3153, 0.5462, lift_point: 0.3153, 0.5462; The action is #TYPE#; And the content is : \"typed_text\": \"weather like in San Francisco\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1389, 0.4836, lift_point: 0.1389, 0.4836", "label": "The action is #DUAL_POINT#; touch_point: 0.1419, 0.6766, lift_point: 0.1419, 0.6766", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/6908_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Barbados?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.749, 0.6121, lift_point: 0.749, 0.6121; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news in Barbados?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #Scrolling Up#", "response": "The action is #DUAL_POINT#; touch_point: 0.0568, 0.5975, lift_point: 0.0568, 0.5975", "label": "The action is #Scrolling Up#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/1837_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Open a new Chrome incognito tab; The action is #DUAL_POINT#; touch_point: 0.791, 0.6885, lift_point: 0.791, 0.6885; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7909, 0.751, lift_point: 0.7909, 0.751; The action is #DUAL_POINT#; touch_point: 0.0664, 0.0473, lift_point: 0.0664, 0.0473; The action is #DUAL_POINT#; touch_point: 0.0664, 0.0473, lift_point: 0.0664, 0.0473; The action is #DUAL_POINT#; touch_point: 0.0675, 0.091, lift_point: 0.0675, 0.091; The action is #DUAL_POINT#; touch_point: 0.0673, 0.0607, lift_point: 0.0673, 0.0607; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.0509, 0.915, lift_point: 0.0509, 0.915; The action is #DUAL_POINT#; touch_point: 0.0585, 0.9079, lift_point: 0.0585, 0.9079; The action is #DUAL_POINT#; touch_point: 0.0585, 0.9079, lift_point: 0.0585, 0.9079", "response": "The action is #DUAL_POINT#; touch_point: 0.0585, 0.9079, lift_point: 0.0585, 0.9079", "label": "The action is #DUAL_POINT#; touch_point: 0.0775, 0.9231, lift_point: 0.0775, 0.9231", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/85110_12.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How much does a 3 bedroom apartment rent for in Houston?; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3531, 0.4998, lift_point: 0.3531, 0.4998; The action is #DUAL_POINT#; touch_point: 0.0679, 0.373, lift_point: 0.0679, 0.373; The action is #TYPE#; And the content is : \"typed_text\": \"How much does a 3 bedroom apartment rent for in Houston?\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.43, 0.3015, lift_point: 0.43, 0.3015", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/34141_9.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on the menu at Chick-fil-A?; The action is #DUAL_POINT#; touch_point: 0.7752, 0.6834, lift_point: 0.7752, 0.6834; The action is #DUAL_POINT#; touch_point: 0.0525, 0.4934, lift_point: 0.0525, 0.4934; The action is #TYPE#; And the content is : \"typed_text\": \"Whats on the menu at Chick-fil-A?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6076, 0.4409, lift_point: 0.6076, 0.4409; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.7031, 0.5546, lift_point: 0.7031, 0.5546; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.2804, 0.1649, lift_point: 0.2804, 0.1649; The action is #Scrolling Up#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.7902, 0.7447, lift_point: 0.7902, 0.7447; The action is #PRESS_BACK#", "response": "The action is #Scrolling Up#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/79407_43.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Open the contacts; The action is #PRESS_HOME#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/23231_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on the menu at Chick-fil-A?; The action is #DUAL_POINT#; touch_point: 0.7306, 0.6166, lift_point: 0.7306, 0.6166; The action is #DUAL_POINT#; touch_point: 0.7162, 0.636, lift_point: 0.7162, 0.636", "response": "The action is #DUAL_POINT#; touch_point: 0.0942, 0.4964, lift_point: 0.0942, 0.4964", "label": "The action is #DUAL_POINT#; touch_point: 0.0862, 0.4742, lift_point: 0.0862, 0.4742", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/40219_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for good Italian restaurants; The action is #DUAL_POINT#; touch_point: 0.8089, 0.6237, lift_point: 0.8089, 0.6237; The action is #DUAL_POINT#; touch_point: 0.0529, 0.3469, lift_point: 0.0529, 0.3469; The action is #TYPE#; And the content is : \"typed_text\": \"good Italian restaurants\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1335, 0.423, lift_point: 0.1335, 0.423", "label": "The action is #DUAL_POINT#; touch_point: 0.1372, 0.4749, lift_point: 0.1372, 0.4749", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/13003_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Install the ESPN app; The action is #DUAL_POINT#; touch_point: 0.6751, 0.6307, lift_point: 0.6751, 0.6307; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.6705, 0.6189, lift_point: 0.6705, 0.6189; The action is #DUAL_POINT#; touch_point: 0.6886, 0.6201, lift_point: 0.6886, 0.6201; The action is #DUAL_POINT#; touch_point: 0.1264, 0.5271, lift_point: 0.1264, 0.5271; The action is #TYPE#; And the content is : \"typed_text\": \"espn app\"; The action is #DUAL_POINT#; touch_point: 0.2716, 0.4401, lift_point: 0.2716, 0.4401; The action is #DUAL_POINT#; touch_point: 0.6323, 0.3296, lift_point: 0.6323, 0.3296; The action is #DUAL_POINT#; touch_point: 0.6357, 0.3296, lift_point: 0.6357, 0.3296; The action is #DUAL_POINT#; touch_point: 0.8648, 0.5908, lift_point: 0.8648, 0.5908", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/27613_11.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Play the new Taylor Swift video on YouTube; The action is #DUAL_POINT#; touch_point: 0.7828, 0.8909, lift_point: 0.7828, 0.8909; The action is #DUAL_POINT#; touch_point: 0.0677, 0.0588, lift_point: 0.0677, 0.0588; The action is #DUAL_POINT#; touch_point: 0.0564, 0.8052, lift_point: 0.0564, 0.8052; The action is #DUAL_POINT#; touch_point: 0.0675, 0.5065, lift_point: 0.0675, 0.5065; The action is #TYPE#; And the content is : \"typed_text\": \"new Taylor Swift video\"; The action is #DUAL_POINT#; touch_point: 0.1215, 0.4143, lift_point: 0.1215, 0.4143; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/6741_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How much does a 2 bedroom apartment rent for in San Francisco?; The action is #DUAL_POINT#; touch_point: 0.6673, 0.5903, lift_point: 0.6673, 0.5903; The action is #DUAL_POINT#; touch_point: 0.1147, 0.4767, lift_point: 0.1147, 0.4767; The action is #TYPE#; And the content is : \"typed_text\": \"2 bedroom apartment rent in san francisco\"; The action is #PRESS_ENTER#; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #DUAL_POINT#; touch_point: 0.6108, 0.4765, lift_point: 0.6108, 0.4765", "label": "The action is #DUAL_POINT#; touch_point: 0.5689, 0.5117, lift_point: 0.5689, 0.5117", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/28075_9.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Open a new incognito tab in Chrome; The action is #PRESS_HOME#", "response": "The action is #DUAL_POINT#; touch_point: 0.6717, 0.6238, lift_point: 0.6717, 0.6238", "label": "The action is #DUAL_POINT#; touch_point: 0.657, 0.6143, lift_point: 0.657, 0.6143", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/27574_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for hotels in Orlando; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.895, 0.3061, lift_point: 0.895, 0.3061; The action is #TYPE#; And the content is : \"typed_text\": \"hotels in Orlando\"; The action is #PRESS_ENTER#; The action is #DUAL_POINT#; touch_point: 0.3382, 0.4826, lift_point: 0.3382, 0.4826; The action is #DUAL_POINT#; touch_point: 0.8047, 0.7443, lift_point: 0.8047, 0.7443; The action is #DUAL_POINT#; touch_point: 0.6461, 0.9417, lift_point: 0.6461, 0.9417", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/62491_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of China?; The action is #DUAL_POINT#; touch_point: 0.8706, 0.5317, lift_point: 0.8706, 0.5317; The action is #TYPE#; And the content is : \"typed_text\": \"capital of China\"; The action is #DUAL_POINT#; touch_point: 0.1411, 0.397, lift_point: 0.1411, 0.397; The action is #DUAL_POINT#; touch_point: 0.6228, 0.485, lift_point: 0.6228, 0.485; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.7647, 0.6551, lift_point: 0.7647, 0.6551", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #DUAL_POINT#; touch_point: 0.4288, 0.9313, lift_point: 0.4288, 0.9313", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/42767_10.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on the menu at Chick-fil-A?; The action is #DUAL_POINT#; touch_point: 0.7306, 0.6166, lift_point: 0.7306, 0.6166", "response": "The action is #DUAL_POINT#; touch_point: 0.7306, 0.6166, lift_point: 0.7306, 0.6166", "label": "The action is #DUAL_POINT#; touch_point: 0.7162, 0.636, lift_point: 0.7162, 0.636", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/40219_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Set an alarm for 7am; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5083, 0.1503, lift_point: 0.5083, 0.1503; The action is #DUAL_POINT#; touch_point: 0.0703, 0.0641, lift_point: 0.0703, 0.0641; The action is #DUAL_POINT#; touch_point: 0.8975, 0.5141, lift_point: 0.8975, 0.5141", "response": "The action is #DUAL_POINT#; touch_point: 0.6143, 0.499, lift_point: 0.6143, 0.499", "label": "The action is #DUAL_POINT#; touch_point: 0.3199, 0.6084, lift_point: 0.3199, 0.6084", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/1984_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for top rated burger restaurants on Google Maps; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.1757, 0.8474, lift_point: 0.1757, 0.8474; The action is #DUAL_POINT#; touch_point: 0.0754, 0.5816, lift_point: 0.0754, 0.5816", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"top rated burger restaurants\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \" top rated burger restaurants\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/55674_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on the menu at Chick-fil-A?; The action is #DUAL_POINT#; touch_point: 0.7752, 0.6834, lift_point: 0.7752, 0.6834; The action is #DUAL_POINT#; touch_point: 0.0525, 0.4934, lift_point: 0.0525, 0.4934; The action is #TYPE#; And the content is : \"typed_text\": \"Whats on the menu at Chick-fil-A?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6076, 0.4409, lift_point: 0.6076, 0.4409; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.7031, 0.5546, lift_point: 0.7031, 0.5546; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.2804, 0.1649, lift_point: 0.2804, 0.1649; The action is #Scrolling Up#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.7902, 0.7447, lift_point: 0.7902, 0.7447; The action is #PRESS_BACK#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.2558, 0.2172, lift_point: 0.2558, 0.2172; The action is #PRESS_BACK#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.8458, 0.5369, lift_point: 0.8458, 0.5369; The action is #DUAL_POINT#; touch_point: 0.5453, 0.7439, lift_point: 0.5453, 0.7439", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/79407_51.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How big is a dinosaur?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.797, 0.6975, lift_point: 0.797, 0.6975", "response": "The action is #DUAL_POINT#; touch_point: 0.0663, 0.505, lift_point: 0.0663, 0.505", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/72227_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Who is the president of the United States?; The action is #PRESS_HOME#", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.8899, 0.5686, lift_point: 0.8899, 0.5686", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/62378_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Where can I buy a nice beach towel?; The action is #PRESS_HOME#; The action is #TYPE#; And the content is : \"typed_text\": \"Where can I buy a nice beach towel?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #TYPE#; And the content is : \"typed_text\": \"Where can I buy a nice beach towel?\"", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #Scrolling Up#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/79301_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for hotels in Zurich; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7938, 0.5856, lift_point: 0.7938, 0.5856; The action is #DUAL_POINT#; touch_point: 0.0542, 0.3825, lift_point: 0.0542, 0.3825; The action is #TYPE#; And the content is : \"typed_text\": \" hotels in Zurich\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1221, 0.2188, lift_point: 0.1221, 0.2188", "label": "The action is #DUAL_POINT#; touch_point: 0.1172, 0.2172, lift_point: 0.1172, 0.2172", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/52751_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How do I get to the nearest Walmart?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7756, 0.6931, lift_point: 0.7756, 0.6931; The action is #DUAL_POINT#; touch_point: 0.0603, 0.2776, lift_point: 0.0603, 0.2776; The action is #TYPE#; And the content is : \"typed_text\": \"nearest Walmart\"; The action is #DUAL_POINT#; touch_point: 0.1268, 0.1721, lift_point: 0.1268, 0.1721", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/55160_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Open the contacts; The action is #Scrolling Down#", "response": "The action is #DUAL_POINT#; touch_point: 0.345, 0.8724, lift_point: 0.345, 0.8724", "label": "The action is #DUAL_POINT#; touch_point: 0.3412, 0.8923, lift_point: 0.3412, 0.8923", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/53689_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Chile?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7768, 0.7205, lift_point: 0.7768, 0.7205; The action is #DUAL_POINT#; touch_point: 0.0574, 0.3891, lift_point: 0.0574, 0.3891; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news in Chile?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #PRESS_ENTER#; The action is #DUAL_POINT#; touch_point: 0.4163, 0.4832, lift_point: 0.4163, 0.4832; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.8182, 0.6398, lift_point: 0.8182, 0.6398; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.7942, 0.7309, lift_point: 0.7942, 0.7309; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5042, 0.5194, lift_point: 0.5042, 0.5194; The action is #DUAL_POINT#; touch_point: 0.3929, 0.6891, lift_point: 0.3929, 0.6891; The action is #DUAL_POINT#; touch_point: 0.869, 0.5517, lift_point: 0.869, 0.5517; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/224_29.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Venezuela?; The action is #PRESS_HOME#", "response": "The action is #DUAL_POINT#; touch_point: 0.7921, 0.6947, lift_point: 0.7921, 0.6947", "label": "The action is #DUAL_POINT#; touch_point: 0.7886, 0.6901, lift_point: 0.7886, 0.6901", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/56270_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's a good restaurant in Las Vegas?; The action is #DUAL_POINT#; touch_point: 0.5474, 0.6241, lift_point: 0.5474, 0.6241; The action is #DUAL_POINT#; touch_point: 0.8101, 0.681, lift_point: 0.8101, 0.681; The action is #DUAL_POINT#; touch_point: 0.8961, 0.6047, lift_point: 0.8961, 0.6047; The action is #DUAL_POINT#; touch_point: 0.887, 0.8502, lift_point: 0.887, 0.8502; The action is #DUAL_POINT#; touch_point: 0.3197, 0.4917, lift_point: 0.3197, 0.4917; The action is #TYPE#; And the content is : \"typed_text\": \"good restaurants in las vegas\"; The action is #PRESS_ENTER#", "response": "The action is #Scrolling Down#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/23104_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How do I get to the nearest T-Mobile Store?", "response": "The action is #PRESS_HOME#", "label": "The action is #DUAL_POINT#; touch_point: 0.7854, 0.6865, lift_point: 0.7854, 0.6865", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/37570_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on the menu at Chick-fil-A?; The action is #DUAL_POINT#; touch_point: 0.7752, 0.6834, lift_point: 0.7752, 0.6834; The action is #DUAL_POINT#; touch_point: 0.0525, 0.4934, lift_point: 0.0525, 0.4934; The action is #TYPE#; And the content is : \"typed_text\": \"Whats on the menu at Chick-fil-A?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6076, 0.4409, lift_point: 0.6076, 0.4409; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.7031, 0.5546, lift_point: 0.7031, 0.5546; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.2804, 0.1649, lift_point: 0.2804, 0.1649; The action is #Scrolling Up#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.7902, 0.7447, lift_point: 0.7902, 0.7447; The action is #PRESS_BACK#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.2558, 0.2172, lift_point: 0.2558, 0.2172; The action is #PRESS_BACK#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.8458, 0.5369, lift_point: 0.8458, 0.5369; The action is #DUAL_POINT#; touch_point: 0.5453, 0.7439, lift_point: 0.5453, 0.7439; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5107, 0.5786, lift_point: 0.5107, 0.5786; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/79407_71.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from Tokyo to NYC; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.768, 0.6793, lift_point: 0.768, 0.6793", "response": "The action is #DUAL_POINT#; touch_point: 0.7735, 0.6855, lift_point: 0.7735, 0.6855", "label": "The action is #DUAL_POINT#; touch_point: 0.7656, 0.6793, lift_point: 0.7656, 0.6793", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/54378_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for a new foundation on Sephora; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3718, 0.499, lift_point: 0.3718, 0.499; The action is #DUAL_POINT#; touch_point: 0.0464, 0.4043, lift_point: 0.0464, 0.4043; The action is #TYPE#; And the content is : \"typed_text\": \"sephora\"; The action is #DUAL_POINT#; touch_point: 0.1985, 0.2131, lift_point: 0.1985, 0.2131", "response": "The action is #DUAL_POINT#; touch_point: 0.4764, 0.247, lift_point: 0.4764, 0.247", "label": "The action is #DUAL_POINT#; touch_point: 0.4812, 0.2229, lift_point: 0.4812, 0.2229", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/11124_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from Buenos aires to Helsinki; The action is #PRESS_HOME#", "response": "The action is #DUAL_POINT#; touch_point: 0.7914, 0.6924, lift_point: 0.7914, 0.6924", "label": "The action is #DUAL_POINT#; touch_point: 0.7912, 0.7041, lift_point: 0.7912, 0.7041", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/3192_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How much does a 3 bedroom apartment rent for in Dallas?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7474, 0.2992, lift_point: 0.7474, 0.2992; The action is #DUAL_POINT#; touch_point: 0.0719, 0.6064, lift_point: 0.0719, 0.6064; The action is #TYPE#; And the content is : \"typed_text\": \"3 bedroom apartment rent in Dallas\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #PRESS_ENTER#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/63634_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Install the ESPN app; The action is #DUAL_POINT#; touch_point: 0.6751, 0.6307, lift_point: 0.6751, 0.6307; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.6705, 0.6189, lift_point: 0.6705, 0.6189; The action is #DUAL_POINT#; touch_point: 0.6886, 0.6201, lift_point: 0.6886, 0.6201; The action is #DUAL_POINT#; touch_point: 0.1264, 0.5271, lift_point: 0.1264, 0.5271; The action is #TYPE#; And the content is : \"typed_text\": \"espn app\"; The action is #DUAL_POINT#; touch_point: 0.2716, 0.4401, lift_point: 0.2716, 0.4401; The action is #DUAL_POINT#; touch_point: 0.6323, 0.3296, lift_point: 0.6323, 0.3296; The action is #DUAL_POINT#; touch_point: 0.6357, 0.3296, lift_point: 0.6357, 0.3296; The action is #DUAL_POINT#; touch_point: 0.8648, 0.5908, lift_point: 0.8648, 0.5908; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6208, 0.7456, lift_point: 0.6208, 0.7456; The action is #DUAL_POINT#; touch_point: 0.8546, 0.3449, lift_point: 0.8546, 0.3449; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.1189, 0.9051, lift_point: 0.1189, 0.9051; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.3164, 0.0237, lift_point: 0.3164, 0.0237; The action is #PRESS_BACK#; The action is #Scrolling Down#; The action is #Scrolling Up#; The action is #Scrolling Up#", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.4396, 0.1947, lift_point: 0.4396, 0.1947", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/27613_26.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Check the settings for the Twitter app; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.15, 0.2057, lift_point: 0.15, 0.2057; The action is #DUAL_POINT#; touch_point: 0.0615, 0.7935, lift_point: 0.0615, 0.7935; The action is #TYPE#; And the content is : \"typed_text\": \"twitter\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1196, 0.2652, lift_point: 0.1196, 0.2652", "label": "The action is #DUAL_POINT#; touch_point: 0.1057, 0.6469, lift_point: 0.1057, 0.6469", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/52290_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for good BBQ restaurants", "response": "The action is #DUAL_POINT#; touch_point: 0.8836, 0.4778, lift_point: 0.8836, 0.4778", "label": "The action is #DUAL_POINT#; touch_point: 0.8714, 0.3907, lift_point: 0.8714, 0.3907", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/72686_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for a new hair product; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8795, 0.3776, lift_point: 0.8795, 0.3776; The action is #TYPE#; And the content is : \"typed_text\": \"new hair product\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1267, 0.4263, lift_point: 0.1267, 0.4263", "label": "The action is #DUAL_POINT#; touch_point: 0.12, 0.3894, lift_point: 0.12, 0.3894", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/66995_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the US dollar exchange rate against the Australian Dollar?; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5469, 0.1552, lift_point: 0.5469, 0.1552; The action is #DUAL_POINT#; touch_point: 0.0428, 0.5818, lift_point: 0.0428, 0.5818; The action is #DUAL_POINT#; touch_point: 0.0428, 0.9008, lift_point: 0.0428, 0.9008; The action is #DUAL_POINT#; touch_point: 0.0586, 0.3854, lift_point: 0.0586, 0.3854; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the US dollar exchange rate against the Australian Dollar? \"; The action is #DUAL_POINT#; touch_point: 0.0533, 0.8195, lift_point: 0.0533, 0.8195; The action is #DUAL_POINT#; touch_point: 0.0479, 0.8541, lift_point: 0.0479, 0.8541; The action is #DUAL_POINT#; touch_point: 0.0527, 0.8434, lift_point: 0.0527, 0.8434; The action is #DUAL_POINT#; touch_point: 0.0552, 0.8162, lift_point: 0.0552, 0.8162; The action is #DUAL_POINT#; touch_point: 0.0326, 0.9083, lift_point: 0.0326, 0.9083; The action is #DUAL_POINT#; touch_point: 0.0454, 0.9083, lift_point: 0.0454, 0.9083; The action is #DUAL_POINT#; touch_point: 0.0524, 0.3553, lift_point: 0.0524, 0.3553; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.0664, 0.2897, lift_point: 0.0664, 0.2897; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the US dollar exchange rate against the Australian Dollar \"; The action is #DUAL_POINT#; touch_point: 0.0612, 0.5317, lift_point: 0.0612, 0.5317; The action is #DUAL_POINT#; touch_point: 0.048, 0.8925, lift_point: 0.048, 0.8925; The action is #DUAL_POINT#; touch_point: 0.0512, 0.3511, lift_point: 0.0512, 0.3511; The action is #TYPE#; And the content is : \"typed_text\": \"US dollar exchange rate against the Australian Dollar \"; The action is #DUAL_POINT#; touch_point: 0.1125, 0.3218, lift_point: 0.1125, 0.3218; The action is #Scrolling Up#", "response": "The action is #Scrolling Up#", "label": "The action is #DUAL_POINT#; touch_point: 0.7876, 0.173, lift_point: 0.7876, 0.173", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/18530_24.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Find the nearest electronics store that's open now; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7912, 0.1291, lift_point: 0.7912, 0.1291; The action is #DUAL_POINT#; touch_point: 0.0656, 0.4659, lift_point: 0.0656, 0.4659; The action is #DUAL_POINT#; touch_point: 0.0649, 0.9065, lift_point: 0.0649, 0.9065; The action is #TYPE#; And the content is : \"typed_text\": \" nearest electronics store thats open now\"; The action is #DUAL_POINT#; touch_point: 0.2078, 0.3987, lift_point: 0.2078, 0.3987; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4387, 0.414, lift_point: 0.4387, 0.414; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/71966_17.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How do I get to the nearest Chipotle?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8477, 0.6484, lift_point: 0.8477, 0.6484; The action is #DUAL_POINT#; touch_point: 0.0537, 0.5218, lift_point: 0.0537, 0.5218; The action is #DUAL_POINT#; touch_point: 0.0549, 0.6326, lift_point: 0.0549, 0.6326; The action is #TYPE#; And the content is : \"typed_text\": \"nearest Chipotle\"; The action is #DUAL_POINT#; touch_point: 0.1115, 0.2639, lift_point: 0.1115, 0.2639; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Up#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/25835_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from Helsinki to Seoul; The action is #DUAL_POINT#; touch_point: 0.9834, 0.4958, lift_point: 0.9834, 0.4958; The action is #DUAL_POINT#; touch_point: 0.7979, 0.6124, lift_point: 0.7979, 0.6124; The action is #DUAL_POINT#; touch_point: 0.0596, 0.3437, lift_point: 0.0596, 0.3437; The action is #TYPE#; And the content is : \"typed_text\": \"flights from Helsinki to Seoul\"; The action is #DUAL_POINT#; touch_point: 0.1124, 0.5747, lift_point: 0.1124, 0.5747; The action is #DUAL_POINT#; touch_point: 0.2681, 0.7456, lift_point: 0.2681, 0.7456", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/66087_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of Brazil?; The action is #DUAL_POINT#; touch_point: 0.7904, 0.4833, lift_point: 0.7904, 0.4833; The action is #TYPE#; And the content is : \"typed_text\": \"\"", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"capital of brazil\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"capital of Brazil\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/43001_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Install the Lyft app; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5894, 0.8664, lift_point: 0.5894, 0.8664; The action is #PRESS_BACK#; The action is #PRESS_BACK#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.0481, 0.5055, lift_point: 0.0481, 0.5055", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"lyft app\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"lyft app\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/70744_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on the menu at Chick-fil-A?; The action is #DUAL_POINT#; touch_point: 0.7752, 0.6834, lift_point: 0.7752, 0.6834; The action is #DUAL_POINT#; touch_point: 0.0525, 0.4934, lift_point: 0.0525, 0.4934; The action is #TYPE#; And the content is : \"typed_text\": \"Whats on the menu at Chick-fil-A?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6076, 0.4409, lift_point: 0.6076, 0.4409; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Up#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/79407_25.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of Japan?", "response": "The action is #DUAL_POINT#; touch_point: 0.8038, 0.6245, lift_point: 0.8038, 0.6245", "label": "The action is #DUAL_POINT#; touch_point: 0.7783, 0.6389, lift_point: 0.7783, 0.6389", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/844_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of Brazil?; The action is #DUAL_POINT#; touch_point: 0.6621, 0.6229, lift_point: 0.6621, 0.6229; The action is #DUAL_POINT#; touch_point: 0.0983, 0.4783, lift_point: 0.0983, 0.4783; The action is #TYPE#; And the content is : \"typed_text\": \"capital of brazil\"; The action is #PRESS_ENTER#", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/27887_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of Switzerland?; The action is #DUAL_POINT#; touch_point: 0.7689, 0.7016, lift_point: 0.7689, 0.7016", "response": "The action is #DUAL_POINT#; touch_point: 0.8864, 0.5116, lift_point: 0.8864, 0.5116", "label": "The action is #DUAL_POINT#; touch_point: 0.885, 0.4626, lift_point: 0.885, 0.4626", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/21162_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Check the settings for the Google Play Books app; The action is #DUAL_POINT#; touch_point: 0.7864, 0.8754, lift_point: 0.7864, 0.8754; The action is #DUAL_POINT#; touch_point: 0.0615, 0.7847, lift_point: 0.0615, 0.7847; The action is #DUAL_POINT#; touch_point: 0.0596, 0.9275, lift_point: 0.0596, 0.9275; The action is #TYPE#; And the content is : \"typed_text\": \"google play books app\"; The action is #DUAL_POINT#; touch_point: 0.133, 0.2611, lift_point: 0.133, 0.2611", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #DUAL_POINT#; touch_point: 0.151, 0.8536, lift_point: 0.151, 0.8536", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/63015_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Check the settings for the Amazon Music app; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6207, 0.8532, lift_point: 0.6207, 0.8532; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.0562, 0.7738, lift_point: 0.0562, 0.7738; The action is #DUAL_POINT#; touch_point: 0.0678, 0.9275, lift_point: 0.0678, 0.9275; The action is #TYPE#; And the content is : \"typed_text\": \" Amazon Music app\"; The action is #PRESS_ENTER#", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/25593_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Open the calendar; The action is #PRESS_HOME#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/45125_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Google the capital of Argentina; The action is #DUAL_POINT#; touch_point: 0.7736, 0.6632, lift_point: 0.7736, 0.6632; The action is #DUAL_POINT#; touch_point: 0.0424, 0.461, lift_point: 0.0424, 0.461; The action is #TYPE#; And the content is : \"typed_text\": \"capital of Argentina\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1365, 0.3342, lift_point: 0.1365, 0.3342", "label": "The action is #DUAL_POINT#; touch_point: 0.1417, 0.3636, lift_point: 0.1417, 0.3636", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/11152_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Vietnam?; The action is #DUAL_POINT#; touch_point: 0.877, 0.6724, lift_point: 0.877, 0.6724; The action is #DUAL_POINT#; touch_point: 0.8723, 0.6865, lift_point: 0.8723, 0.6865; The action is #DUAL_POINT#; touch_point: 0.7749, 0.6451, lift_point: 0.7749, 0.6451; The action is #DUAL_POINT#; touch_point: 0.1698, 0.5617, lift_point: 0.1698, 0.5617; The action is #DUAL_POINT#; touch_point: 0.113, 0.9241, lift_point: 0.113, 0.9241; The action is #TYPE#; And the content is : \"typed_text\": \"news in Vietnam?\"", "response": "The action is #PRESS_ENTER#", "label": "The action is #PRESS_ENTER#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/804_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on the menu at Cheesecake Factory?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8821, 0.4104, lift_point: 0.8821, 0.4104; The action is #PRESS_ENTER#; The action is #TYPE#; And the content is : \"typed_text\": \"Whats on the menu at Cheesecake Factory\"", "response": "The action is #PRESS_ENTER#", "label": "The action is #PRESS_ENTER#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/10901_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Paraguay?; The action is #PRESS_HOME#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/21075_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for hotels in Chicago; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6314, 0.1508, lift_point: 0.6314, 0.1508; The action is #DUAL_POINT#; touch_point: 0.0587, 0.8981, lift_point: 0.0587, 0.8981; The action is #TYPE#; And the content is : \"typed_text\": \"Search for hotels in Chicago\"; The action is #DUAL_POINT#; touch_point: 0.1296, 0.3482, lift_point: 0.1296, 0.3482; The action is #DUAL_POINT#; touch_point: 0.3223, 0.42, lift_point: 0.3223, 0.42; The action is #DUAL_POINT#; touch_point: 0.8873, 0.5619, lift_point: 0.8873, 0.5619", "response": "The action is #DUAL_POINT#; touch_point: 0.903, 0.8391, lift_point: 0.903, 0.8391", "label": "The action is #DUAL_POINT#; touch_point: 0.9061, 0.8273, lift_point: 0.9061, 0.8273", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/23713_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Searchfor good French restaurants; The action is #DUAL_POINT#; touch_point: 0.8005, 0.6926, lift_point: 0.8005, 0.6926; The action is #DUAL_POINT#; touch_point: 0.3471, 0.6703, lift_point: 0.3471, 0.6703; The action is #DUAL_POINT#; touch_point: 0.8741, 0.2573, lift_point: 0.8741, 0.2573; The action is #TYPE#; And the content is : \"typed_text\": \"good french restaurants\"; The action is #PRESS_ENTER#; The action is #DUAL_POINT#; touch_point: 0.5589, 0.0903, lift_point: 0.5589, 0.0903; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/79328_16.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on the menu at Pizza Hut?; The action is #DUAL_POINT#; touch_point: 0.7805, 0.6321, lift_point: 0.7805, 0.6321; The action is #DUAL_POINT#; touch_point: 0.065, 0.495, lift_point: 0.065, 0.495; The action is #TYPE#; And the content is : \"typed_text\": \"pizza Hut\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #DUAL_POINT#; touch_point: 0.1174, 0.3098, lift_point: 0.1174, 0.3098; The action is #DUAL_POINT#; touch_point: 0.425, 0.1405, lift_point: 0.425, 0.1405; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.6194, 0.1229, lift_point: 0.6194, 0.1229; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.2708, 0.4597, lift_point: 0.2708, 0.4597", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/66341_16.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What time is it in New York?; The action is #DUAL_POINT#; touch_point: 0.0575, 0.0365, lift_point: 0.0575, 0.0365; The action is #DUAL_POINT#; touch_point: 0.8912, 0.667, lift_point: 0.8912, 0.667; The action is #DUAL_POINT#; touch_point: 0.8921, 0.3941, lift_point: 0.8921, 0.3941; The action is #DUAL_POINT#; touch_point: 0.8787, 0.3913, lift_point: 0.8787, 0.3913; The action is #DUAL_POINT#; touch_point: 0.7438, 0.7177, lift_point: 0.7438, 0.7177; The action is #DUAL_POINT#; touch_point: 0.7785, 0.6992, lift_point: 0.7785, 0.6992; The action is #DUAL_POINT#; touch_point: 0.8149, 0.6966, lift_point: 0.8149, 0.6966; The action is #DUAL_POINT#; touch_point: 0.0722, 0.4065, lift_point: 0.0722, 0.4065; The action is #TYPE#; And the content is : \"typed_text\": \"What time is it in New York?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #DUAL_POINT#; touch_point: 0.1351, 0.6482, lift_point: 0.1351, 0.6482; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.2715, 0.6476, lift_point: 0.2715, 0.6476; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.0528, 0.2859, lift_point: 0.0528, 0.2859; The action is #DUAL_POINT#; touch_point: 0.8191, 0.4412, lift_point: 0.8191, 0.4412; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.4471, 0.1787, lift_point: 0.4471, 0.1787; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.8042, 0.3148, lift_point: 0.8042, 0.3148; The action is #DUAL_POINT#; touch_point: 0.6879, 0.5777, lift_point: 0.6879, 0.5777; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.8361, 0.2862, lift_point: 0.8361, 0.2862; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.8366, 0.182, lift_point: 0.8366, 0.182; The action is #DUAL_POINT#; touch_point: 0.6461, 0.3885, lift_point: 0.6461, 0.3885; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/13941_56.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Open the clock; The action is #DUAL_POINT#; touch_point: 0.775, 0.4622, lift_point: 0.775, 0.4622; The action is #DUAL_POINT#; touch_point: 0.0613, 0.354, lift_point: 0.0613, 0.354", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/37545_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for hotels in Las Vegas", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/84708_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for a new blush; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8868, 0.1718, lift_point: 0.8868, 0.1718; The action is #DUAL_POINT#; touch_point: 0.8847, 0.2336, lift_point: 0.8847, 0.2336; The action is #TYPE#; And the content is : \"typed_text\": \"Search for a new blush\"; The action is #PRESS_ENTER#", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.3263, 0.4572, lift_point: 0.3263, 0.4572", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/62224_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Check my email; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5016, 0.487, lift_point: 0.5016, 0.487; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.0826, 0.1155, lift_point: 0.0826, 0.1155; The action is #DUAL_POINT#; touch_point: 0.2721, 0.5962, lift_point: 0.2721, 0.5962; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0795, 0.8737, lift_point: 0.0795, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0795, 0.8737, lift_point: 0.0795, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0795, 0.8737, lift_point: 0.0795, 0.8737; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.0833, 0.8883, lift_point: 0.0833, 0.8883; The action is #DUAL_POINT#; touch_point: 0.0818, 0.8851, lift_point: 0.0818, 0.8851; The action is #DUAL_POINT#; touch_point: 0.0818, 0.8851, lift_point: 0.0818, 0.8851; The action is #DUAL_POINT#; touch_point: 0.0818, 0.8851, lift_point: 0.0818, 0.8851; The action is #DUAL_POINT#; touch_point: 0.0818, 0.8851, lift_point: 0.0818, 0.8851; The action is #DUAL_POINT#; touch_point: 0.07, 0.9181, lift_point: 0.07, 0.9181; The action is #DUAL_POINT#; touch_point: 0.07, 0.9181, lift_point: 0.07, 0.9181", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #DUAL_POINT#; touch_point: 0.07, 0.9181, lift_point: 0.07, 0.9181", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/42617_34.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for top rated burger restaurants on Google Maps; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.4701, 0.1795, lift_point: 0.4701, 0.1795; The action is #DUAL_POINT#; touch_point: 0.1279, 0.8312, lift_point: 0.1279, 0.8312; The action is #DUAL_POINT#; touch_point: 0.6816, 0.8554, lift_point: 0.6816, 0.8554; The action is #DUAL_POINT#; touch_point: 0.1477, 0.4414, lift_point: 0.1477, 0.4414; The action is #TYPE#; And the content is : \"typed_text\": \"top rated burger restaurants \"", "response": "The action is #DUAL_POINT#; touch_point: 0.282, 0.4982, lift_point: 0.282, 0.4982", "label": "The action is #DUAL_POINT#; touch_point: 0.3094, 0.5558, lift_point: 0.3094, 0.5558", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/83074_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Open the calendar app; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3635, 0.1318, lift_point: 0.3635, 0.1318; The action is #DUAL_POINT#; touch_point: 0.9054, 0.9204, lift_point: 0.9054, 0.9204; The action is #DUAL_POINT#; touch_point: 0.872, 0.4917, lift_point: 0.872, 0.4917", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/46747_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for hotels in Los Angeles; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5887, 0.3817, lift_point: 0.5887, 0.3817; The action is #DUAL_POINT#; touch_point: 0.5887, 0.3817, lift_point: 0.5887, 0.3817; The action is #DUAL_POINT#; touch_point: 0.2844, 0.3641, lift_point: 0.2844, 0.3641; The action is #DUAL_POINT#; touch_point: 0.0633, 0.9037, lift_point: 0.0633, 0.9037; The action is #DUAL_POINT#; touch_point: 0.0459, 0.5181, lift_point: 0.0459, 0.5181; The action is #TYPE#; And the content is : \"typed_text\": \"Search for hotels in Los Angeles \"; The action is #DUAL_POINT#; touch_point: 0.1281, 0.3095, lift_point: 0.1281, 0.3095; The action is #DUAL_POINT#; touch_point: 0.3041, 0.3748, lift_point: 0.3041, 0.3748; The action is #DUAL_POINT#; touch_point: 0.2588, 0.8913, lift_point: 0.2588, 0.8913; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.38, 0.3022, lift_point: 0.38, 0.3022; The action is #DUAL_POINT#; touch_point: 0.5555, 0.9799, lift_point: 0.5555, 0.9799; The action is #DUAL_POINT#; touch_point: 0.1394, 0.9532, lift_point: 0.1394, 0.9532; The action is #DUAL_POINT#; touch_point: 0.7909, 0.5446, lift_point: 0.7909, 0.5446", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #DUAL_POINT#; touch_point: 0.1491, 0.9393, lift_point: 0.1491, 0.9393", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/76074_17.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Venezuela?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7886, 0.6901, lift_point: 0.7886, 0.6901; The action is #DUAL_POINT#; touch_point: 0.7886, 0.6901, lift_point: 0.7886, 0.6901; The action is #DUAL_POINT#; touch_point: 0.7849, 0.6927, lift_point: 0.7849, 0.6927; The action is #DUAL_POINT#; touch_point: 0.0562, 0.4434, lift_point: 0.0562, 0.4434; The action is #DUAL_POINT#; touch_point: 0.0461, 0.9411, lift_point: 0.0461, 0.9411", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news in Venezuela?\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"the news in Venezuela\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/56270_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Suriname?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7768, 0.6893, lift_point: 0.7768, 0.6893; The action is #DUAL_POINT#; touch_point: 0.046, 0.3666, lift_point: 0.046, 0.3666; The action is #TYPE#; And the content is : \"typed_text\": \"news in suriname?\"; The action is #DUAL_POINT#; touch_point: 0.1234, 0.2373, lift_point: 0.1234, 0.2373", "response": "The action is #DUAL_POINT#; touch_point: 0.3635, 0.206, lift_point: 0.3635, 0.206", "label": "The action is #DUAL_POINT#; touch_point: 0.3658, 0.1658, lift_point: 0.3658, 0.1658", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/63023_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Ecuador?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.672, 0.6159, lift_point: 0.672, 0.6159; The action is #TYPE#; And the content is : \"typed_text\": \"news in Ecuador\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"", "response": "The action is #PRESS_ENTER#", "label": "The action is #PRESS_ENTER#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/28468_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Google the capital of the United States; The action is #DUAL_POINT#; touch_point: 0.9165, 0.3032, lift_point: 0.9165, 0.3032; The action is #DUAL_POINT#; touch_point: 0.9418, 0.3691, lift_point: 0.9418, 0.3691; The action is #DUAL_POINT#; touch_point: 0.9481, 0.452, lift_point: 0.9481, 0.452; The action is #DUAL_POINT#; touch_point: 0.0578, 0.2436, lift_point: 0.0578, 0.2436; The action is #TYPE#; And the content is : \"typed_text\": \"Google the capital of the United States\"; The action is #DUAL_POINT#; touch_point: 0.7726, 0.7532, lift_point: 0.7726, 0.7532", "response": "The action is #DUAL_POINT#; touch_point: 0.1287, 0.5647, lift_point: 0.1287, 0.5647", "label": "The action is #DUAL_POINT#; touch_point: 0.1146, 0.4143, lift_point: 0.1146, 0.4143", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/74352_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Open a new incognito window in Chrome", "response": "The action is #DUAL_POINT#; touch_point: 0.7832, 0.6897, lift_point: 0.7832, 0.6897", "label": "The action is #DUAL_POINT#; touch_point: 0.7807, 0.642, lift_point: 0.7807, 0.642", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/47332_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Set an alarm for 6pm; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5301, 0.908, lift_point: 0.5301, 0.908; The action is #DUAL_POINT#; touch_point: 0.8903, 0.4533, lift_point: 0.8903, 0.4533; The action is #DUAL_POINT#; touch_point: 0.8393, 0.4724, lift_point: 0.8393, 0.4724; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#", "response": "The action is #Scrolling Up#", "label": "The action is #Scrolling Up#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/57331_17.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Play the latest video from the Huffington Post; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7818, 0.5041, lift_point: 0.7818, 0.5041; The action is #DUAL_POINT#; touch_point: 0.0728, 0.0389, lift_point: 0.0728, 0.0389; The action is #DUAL_POINT#; touch_point: 0.0513, 0.8113, lift_point: 0.0513, 0.8113; The action is #DUAL_POINT#; touch_point: 0.0548, 0.3905, lift_point: 0.0548, 0.3905; The action is #TYPE#; And the content is : \"typed_text\": \" latest video from the Huffington Post\"; The action is #DUAL_POINT#; touch_point: 0.1108, 0.3338, lift_point: 0.1108, 0.3338; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3813, 0.459, lift_point: 0.3813, 0.459; The action is #DUAL_POINT#; touch_point: 0.3313, 0.9391, lift_point: 0.3313, 0.9391; The action is #DUAL_POINT#; touch_point: 0.1975, 0.5157, lift_point: 0.1975, 0.5157; The action is #DUAL_POINT#; touch_point: 0.1608, 0.4889, lift_point: 0.1608, 0.4889; The action is #DUAL_POINT#; touch_point: 0.1787, 0.4998, lift_point: 0.1787, 0.4998; The action is #DUAL_POINT#; touch_point: 0.1787, 0.4998, lift_point: 0.1787, 0.4998", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/54929_16.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for hotels in Los Angeles; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5887, 0.3817, lift_point: 0.5887, 0.3817; The action is #DUAL_POINT#; touch_point: 0.5887, 0.3817, lift_point: 0.5887, 0.3817; The action is #DUAL_POINT#; touch_point: 0.2844, 0.3641, lift_point: 0.2844, 0.3641; The action is #DUAL_POINT#; touch_point: 0.0633, 0.9037, lift_point: 0.0633, 0.9037; The action is #DUAL_POINT#; touch_point: 0.0459, 0.5181, lift_point: 0.0459, 0.5181; The action is #TYPE#; And the content is : \"typed_text\": \"Search for hotels in Los Angeles \"; The action is #DUAL_POINT#; touch_point: 0.1281, 0.3095, lift_point: 0.1281, 0.3095", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.3041, 0.3748, lift_point: 0.3041, 0.3748", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/76074_10.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from Sydney to Buenos aires; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3745, 0.4935, lift_point: 0.3745, 0.4935; The action is #DUAL_POINT#; touch_point: 0.0542, 0.4256, lift_point: 0.0542, 0.4256; The action is #TYPE#; And the content is : \"typed_text\": \"flights from Sydney to Buenos aires\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3063, 0.6541, lift_point: 0.3063, 0.6541; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3338, 0.5289, lift_point: 0.3338, 0.5289; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.1764, 0.6049, lift_point: 0.1764, 0.6049; The action is #DUAL_POINT#; touch_point: 0.4134, 0.6631, lift_point: 0.4134, 0.6631; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/20704_34.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from Zurich to Buenos aires; The action is #DUAL_POINT#; touch_point: 0.7884, 0.7088, lift_point: 0.7884, 0.7088; The action is #PRESS_HOME#; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8173, 0.6623, lift_point: 0.8173, 0.6623; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7794, 0.6821, lift_point: 0.7794, 0.6821; The action is #DUAL_POINT#; touch_point: 0.7794, 0.6933, lift_point: 0.7794, 0.6933; The action is #PRESS_BACK#; The action is #PRESS_BACK#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.8112, 0.675, lift_point: 0.8112, 0.675; The action is #DUAL_POINT#; touch_point: 0.81, 0.6775, lift_point: 0.81, 0.6775; The action is #DUAL_POINT#; touch_point: 0.0635, 0.5123, lift_point: 0.0635, 0.5123; The action is #TYPE#; And the content is : \"typed_text\": \"flight from zurich to buenos aires\"; The action is #DUAL_POINT#; touch_point: 0.2101, 0.4208, lift_point: 0.2101, 0.4208; The action is #DUAL_POINT#; touch_point: 0.8827, 0.205, lift_point: 0.8827, 0.205; The action is #DUAL_POINT#; touch_point: 0.6941, 0.5458, lift_point: 0.6941, 0.5458; The action is #DUAL_POINT#; touch_point: 0.3321, 0.1299, lift_point: 0.3321, 0.1299; The action is #DUAL_POINT#; touch_point: 0.1229, 0.933, lift_point: 0.1229, 0.933; The action is #DUAL_POINT#; touch_point: 0.4991, 0.8948, lift_point: 0.4991, 0.8948; The action is #DUAL_POINT#; touch_point: 0.402, 0.9084, lift_point: 0.402, 0.9084; The action is #DUAL_POINT#; touch_point: 0.8984, 0.5131, lift_point: 0.8984, 0.5131; The action is #DUAL_POINT#; touch_point: 0.6419, 0.9862, lift_point: 0.6419, 0.9862; The action is #DUAL_POINT#; touch_point: 0.5557, 0.4411, lift_point: 0.5557, 0.4411; The action is #DUAL_POINT#; touch_point: 0.4457, 0.1982, lift_point: 0.4457, 0.1982; The action is #DUAL_POINT#; touch_point: 0.3076, 0.6403, lift_point: 0.3076, 0.6403; The action is #DUAL_POINT#; touch_point: 0.1304, 0.3286, lift_point: 0.1304, 0.3286; The action is #DUAL_POINT#; touch_point: 0.9007, 0.396, lift_point: 0.9007, 0.396; The action is #DUAL_POINT#; touch_point: 0.5737, 0.5247, lift_point: 0.5737, 0.5247; The action is #DUAL_POINT#; touch_point: 0.6477, 0.4744, lift_point: 0.6477, 0.4744", "response": "The action is #DUAL_POINT#; touch_point: 0.5592, 0.8255, lift_point: 0.5592, 0.8255", "label": "The action is #DUAL_POINT#; touch_point: 0.5543, 0.7882, lift_point: 0.5543, 0.7882", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/68164_31.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Colombia?; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3506, 0.4922, lift_point: 0.3506, 0.4922; The action is #DUAL_POINT#; touch_point: 0.0503, 0.5814, lift_point: 0.0503, 0.5814; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news in Colombia?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #PRESS_ENTER#", "response": "The action is #DUAL_POINT#; touch_point: 0.3264, 0.4692, lift_point: 0.3264, 0.4692", "label": "The action is #DUAL_POINT#; touch_point: 0.3179, 0.4504, lift_point: 0.3179, 0.4504", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/16946_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Bangladesh?", "response": "The action is #DUAL_POINT#; touch_point: 0.9098, 0.4948, lift_point: 0.9098, 0.4948", "label": "The action is #DUAL_POINT#; touch_point: 0.9121, 0.5982, lift_point: 0.9121, 0.5982", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/61924_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How do I get to the nearest JCPenney?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7914, 0.6852, lift_point: 0.7914, 0.6852; The action is #DUAL_POINT#; touch_point: 0.7914, 0.6852, lift_point: 0.7914, 0.6852; The action is #DUAL_POINT#; touch_point: 0.7988, 0.6827, lift_point: 0.7988, 0.6827; The action is #DUAL_POINT#; touch_point: 0.7988, 0.6827, lift_point: 0.7988, 0.6827; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.2559, 0.3466, lift_point: 0.2559, 0.3466; The action is #TYPE#; And the content is : \"typed_text\": \"How do I get to the nearest JCPenney\"; The action is #DUAL_POINT#; touch_point: 0.1115, 0.3638, lift_point: 0.1115, 0.3638", "response": "The action is #PRESS_ENTER#", "label": "The action is #DUAL_POINT#; touch_point: 0.1095, 0.535, lift_point: 0.1095, 0.535", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/55811_10.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What are the best selling refrigerators at Home Depot?", "response": "The action is #DUAL_POINT#; touch_point: 0.7941, 0.6912, lift_point: 0.7941, 0.6912", "label": "The action is #DUAL_POINT#; touch_point: 0.7906, 0.6852, lift_point: 0.7906, 0.6852", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/5053_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news this weekend?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8785, 0.458, lift_point: 0.8785, 0.458; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news this weekend\"; The action is #PRESS_ENTER#", "response": "The action is #DUAL_POINT#; touch_point: 0.3533, 0.532, lift_point: 0.3533, 0.532", "label": "The action is #DUAL_POINT#; touch_point: 0.3583, 0.6785, lift_point: 0.3583, 0.6785", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/44806_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for hotels in Las Vegas", "response": "The action is #DUAL_POINT#; touch_point: 0.8021, 0.6157, lift_point: 0.8021, 0.6157", "label": "The action is #DUAL_POINT#; touch_point: 0.8259, 0.57, lift_point: 0.8259, 0.57", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/49446_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for a new perfume; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.774, 0.6812, lift_point: 0.774, 0.6812; The action is #DUAL_POINT#; touch_point: 0.7713, 0.657, lift_point: 0.7713, 0.657; The action is #DUAL_POINT#; touch_point: 0.8803, 0.4068, lift_point: 0.8803, 0.4068; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8923, 0.4162, lift_point: 0.8923, 0.4162; The action is #DUAL_POINT#; touch_point: 0.0677, 0.3269, lift_point: 0.0677, 0.3269", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"new perfume\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"new perfume\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/85180_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for a new nail polish", "response": "The action is #DUAL_POINT#; touch_point: 0.8814, 0.5022, lift_point: 0.8814, 0.5022", "label": "The action is #DUAL_POINT#; touch_point: 0.9036, 0.4394, lift_point: 0.9036, 0.4394", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/24593_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What time is it in New York?; The action is #DUAL_POINT#; touch_point: 0.0575, 0.0365, lift_point: 0.0575, 0.0365; The action is #DUAL_POINT#; touch_point: 0.8912, 0.667, lift_point: 0.8912, 0.667; The action is #DUAL_POINT#; touch_point: 0.8921, 0.3941, lift_point: 0.8921, 0.3941; The action is #DUAL_POINT#; touch_point: 0.8787, 0.3913, lift_point: 0.8787, 0.3913; The action is #DUAL_POINT#; touch_point: 0.7438, 0.7177, lift_point: 0.7438, 0.7177; The action is #DUAL_POINT#; touch_point: 0.7785, 0.6992, lift_point: 0.7785, 0.6992; The action is #DUAL_POINT#; touch_point: 0.8149, 0.6966, lift_point: 0.8149, 0.6966; The action is #DUAL_POINT#; touch_point: 0.0722, 0.4065, lift_point: 0.0722, 0.4065; The action is #TYPE#; And the content is : \"typed_text\": \"What time is it in New York?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #DUAL_POINT#; touch_point: 0.1351, 0.6482, lift_point: 0.1351, 0.6482; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.2715, 0.6476, lift_point: 0.2715, 0.6476; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.0528, 0.2859, lift_point: 0.0528, 0.2859; The action is #DUAL_POINT#; touch_point: 0.8191, 0.4412, lift_point: 0.8191, 0.4412; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.4471, 0.1787, lift_point: 0.4471, 0.1787; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.8042, 0.3148, lift_point: 0.8042, 0.3148; The action is #DUAL_POINT#; touch_point: 0.6879, 0.5777, lift_point: 0.6879, 0.5777; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/13941_41.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Find the nearest electronics store that's open; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7989, 0.7252, lift_point: 0.7989, 0.7252; The action is #DUAL_POINT#; touch_point: 0.7928, 0.682, lift_point: 0.7928, 0.682; The action is #DUAL_POINT#; touch_point: 0.7928, 0.682, lift_point: 0.7928, 0.682; The action is #DUAL_POINT#; touch_point: 0.7936, 0.6979, lift_point: 0.7936, 0.6979", "response": "The action is #DUAL_POINT#; touch_point: 0.7936, 0.6979, lift_point: 0.7936, 0.6979", "label": "The action is #DUAL_POINT#; touch_point: 0.7936, 0.6979, lift_point: 0.7936, 0.6979", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/14304_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Vietnam?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8874, 0.4462, lift_point: 0.8874, 0.4462; The action is #DUAL_POINT#; touch_point: 0.0649, 0.4272, lift_point: 0.0649, 0.4272", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news in Vietnam?\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news in Vietnam?\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/15928_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the latest news in space technology?; The action is #DUAL_POINT#; touch_point: 0.8252, 0.6178, lift_point: 0.8252, 0.6178", "response": "The action is #DUAL_POINT#; touch_point: 0.8252, 0.6178, lift_point: 0.8252, 0.6178", "label": "The action is #DUAL_POINT#; touch_point: 0.8132, 0.6673, lift_point: 0.8132, 0.6673", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/50059_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the weather like in Delhi?; The action is #DUAL_POINT#; touch_point: 0.768, 0.7048, lift_point: 0.768, 0.7048; The action is #DUAL_POINT#; touch_point: 0.052, 0.5235, lift_point: 0.052, 0.5235; The action is #TYPE#; And the content is : \"typed_text\": \"weather like in Delhi\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1344, 0.4593, lift_point: 0.1344, 0.4593", "label": "The action is #DUAL_POINT#; touch_point: 0.1236, 0.4752, lift_point: 0.1236, 0.4752", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/17669_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the latest video from Game Informer?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.6627, 0.8258, lift_point: 0.6627, 0.8258; The action is #DUAL_POINT#; touch_point: 0.0549, 0.052, lift_point: 0.0549, 0.052; The action is #DUAL_POINT#; touch_point: 0.0515, 0.0547, lift_point: 0.0515, 0.0547; The action is #DUAL_POINT#; touch_point: 0.0554, 0.8045, lift_point: 0.0554, 0.8045; The action is #TYPE#; And the content is : \"typed_text\": \"Game Informer\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #DUAL_POINT#; touch_point: 0.123, 0.3606, lift_point: 0.123, 0.3606; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4453, 0.3949, lift_point: 0.4453, 0.3949; The action is #DUAL_POINT#; touch_point: 0.1563, 0.5084, lift_point: 0.1563, 0.5084", "response": "The action is #DUAL_POINT#; touch_point: 0.3243, 0.9528, lift_point: 0.3243, 0.9528", "label": "The action is #DUAL_POINT#; touch_point: 0.3141, 0.9451, lift_point: 0.3141, 0.9451", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/7705_13.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of Spain?; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5111, 0.4875, lift_point: 0.5111, 0.4875; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.0975, 0.4783, lift_point: 0.0975, 0.4783; The action is #TYPE#; And the content is : \"typed_text\": \"What is the capital of Spain ?\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6981, 0.9436, lift_point: 0.6981, 0.9436", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/1427_10.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the price of the new iPhone on eBay?; The action is #DUAL_POINT#; touch_point: 0.7737, 0.699, lift_point: 0.7737, 0.699; The action is #DUAL_POINT#; touch_point: 0.0621, 0.536, lift_point: 0.0621, 0.536; The action is #TYPE#; And the content is : \"typed_text\": \"price of the new iPhone on eBay\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"", "response": "The action is #PRESS_ENTER#", "label": "The action is #DUAL_POINT#; touch_point: 0.1411, 0.7654, lift_point: 0.1411, 0.7654", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/17269_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How much does a 3 bedroom apartment rent for in Seattle?; The action is #TYPE#; And the content is : \"typed_text\": \"How much does a 3 bedroom apartment rent for in Seattle?\"; The action is #DUAL_POINT#; touch_point: 0.1519, 0.2186, lift_point: 0.1519, 0.2186; The action is #DUAL_POINT#; touch_point: 0.2245, 0.6318, lift_point: 0.2245, 0.6318; The action is #DUAL_POINT#; touch_point: 0.1952, 0.0642, lift_point: 0.1952, 0.0642; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3727, 0.3787, lift_point: 0.3727, 0.3787; The action is #DUAL_POINT#; touch_point: 0.4, 0.914, lift_point: 0.4, 0.914; The action is #DUAL_POINT#; touch_point: 0.4, 0.914, lift_point: 0.4, 0.914", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #DUAL_POINT#; touch_point: 0.4, 0.914, lift_point: 0.4, 0.914", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/29223_10.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Nepal?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.709, 0.6491, lift_point: 0.709, 0.6491; The action is #DUAL_POINT#; touch_point: 0.675, 0.6023, lift_point: 0.675, 0.6023; The action is #DUAL_POINT#; touch_point: 0.1174, 0.4815, lift_point: 0.1174, 0.4815; The action is #TYPE#; And the content is : \"typed_text\": \"news in Nepal\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"", "response": "The action is #PRESS_ENTER#", "label": "The action is #PRESS_ENTER#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/27326_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Play the latest video from the Huffington Post; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.7836, 0.492, lift_point: 0.7836, 0.492; The action is #DUAL_POINT#; touch_point: 0.0604, 0.8519, lift_point: 0.0604, 0.8519; The action is #TYPE#; And the content is : \"typed_text\": \"latest video from the Huffington Post\"; The action is #DUAL_POINT#; touch_point: 0.1179, 0.2291, lift_point: 0.1179, 0.2291; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.2892, 0.4823, lift_point: 0.2892, 0.4823", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/21290_12.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in India?; The action is #DUAL_POINT#; touch_point: 0.0652, 0.2505, lift_point: 0.0652, 0.2505", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"news in india\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news in India?\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/31292_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for a new lipgloss; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4735, 0.8521, lift_point: 0.4735, 0.8521; The action is #DUAL_POINT#; touch_point: 0.4735, 0.8572, lift_point: 0.4735, 0.8572; The action is #DUAL_POINT#; touch_point: 0.4731, 0.8504, lift_point: 0.4731, 0.8504; The action is #DUAL_POINT#; touch_point: 0.4819, 0.9037, lift_point: 0.4819, 0.9037; The action is #DUAL_POINT#; touch_point: 0.0831, 0.5901, lift_point: 0.0831, 0.5901; The action is #DUAL_POINT#; touch_point: 0.0705, 0.9081, lift_point: 0.0705, 0.9081; The action is #DUAL_POINT#; touch_point: 0.0651, 0.5329, lift_point: 0.0651, 0.5329; The action is #TYPE#; And the content is : \"typed_text\": \"Search for a new lipgloss \"; The action is #DUAL_POINT#; touch_point: 0.1519, 0.4733, lift_point: 0.1519, 0.4733; The action is #DUAL_POINT#; touch_point: 0.3788, 0.3476, lift_point: 0.3788, 0.3476; The action is #DUAL_POINT#; touch_point: 0.9086, 0.8965, lift_point: 0.9086, 0.8965; The action is #DUAL_POINT#; touch_point: 0.7091, 0.4199, lift_point: 0.7091, 0.4199", "response": "The action is #DUAL_POINT#; touch_point: 0.7242, 0.4225, lift_point: 0.7242, 0.4225", "label": "The action is #DUAL_POINT#; touch_point: 0.7042, 0.4983, lift_point: 0.7042, 0.4983", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/65395_16.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's a good restaurant in Portland?; The action is #DUAL_POINT#; touch_point: 0.9428, 0.524, lift_point: 0.9428, 0.524; The action is #DUAL_POINT#; touch_point: 0.0667, 0.271, lift_point: 0.0667, 0.271", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"good restaurant in Portland\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"Whats a good restaurant in Portland?\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/10083_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the weather like in New York?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7879, 0.6835, lift_point: 0.7879, 0.6835; The action is #DUAL_POINT#; touch_point: 0.7879, 0.6835, lift_point: 0.7879, 0.6835; The action is #DUAL_POINT#; touch_point: 0.7891, 0.6835, lift_point: 0.7891, 0.6835; The action is #DUAL_POINT#; touch_point: 0.7891, 0.6835, lift_point: 0.7891, 0.6835; The action is #DUAL_POINT#; touch_point: 0.0597, 0.5494, lift_point: 0.0597, 0.5494; The action is #TYPE#; And the content is : \"typed_text\": \" weather like in New York\"", "response": "The action is #DUAL_POINT#; touch_point: 0.137, 0.4268, lift_point: 0.137, 0.4268", "label": "The action is #DUAL_POINT#; touch_point: 0.1428, 0.4671, lift_point: 0.1428, 0.4671", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/2742_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Go to Reddit; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.793, 0.1536, lift_point: 0.793, 0.1536", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/78840_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for top rated pizza restaurants on Maps; The action is #PRESS_HOME#; The action is #Scrolling Down#", "response": "The action is #DUAL_POINT#; touch_point: 0.4967, 0.8751, lift_point: 0.4967, 0.8751", "label": "The action is #DUAL_POINT#; touch_point: 0.5025, 0.8773, lift_point: 0.5025, 0.8773", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/22481_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Set an alarm for 7pm; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5388, 0.6488, lift_point: 0.5388, 0.6488; The action is #DUAL_POINT#; touch_point: 0.8562, 0.4781, lift_point: 0.8562, 0.4781; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#", "response": "The action is #Scrolling Up#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/65790_26.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on the menu at Pizza Hut?; The action is #DUAL_POINT#; touch_point: 0.7805, 0.6321, lift_point: 0.7805, 0.6321; The action is #DUAL_POINT#; touch_point: 0.065, 0.495, lift_point: 0.065, 0.495; The action is #TYPE#; And the content is : \"typed_text\": \"pizza Hut\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #DUAL_POINT#; touch_point: 0.1174, 0.3098, lift_point: 0.1174, 0.3098; The action is #DUAL_POINT#; touch_point: 0.425, 0.1405, lift_point: 0.425, 0.1405; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.6194, 0.1229, lift_point: 0.6194, 0.1229; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.2708, 0.4597, lift_point: 0.2708, 0.4597; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.0703, 0.0858, lift_point: 0.0703, 0.0858; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.1167, 0.6356, lift_point: 0.1167, 0.6356", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.5038, 0.4206, lift_point: 0.5038, 0.4206", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/66341_24.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Colombia?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.782, 0.7322, lift_point: 0.782, 0.7322; The action is #DUAL_POINT#; touch_point: 0.1816, 0.6694, lift_point: 0.1816, 0.6694; The action is #DUAL_POINT#; touch_point: 0.1082, 0.9314, lift_point: 0.1082, 0.9314; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news in Colombia?\"; The action is #PRESS_ENTER#", "response": "The action is #DUAL_POINT#; touch_point: 0.2276, 0.1633, lift_point: 0.2276, 0.1633", "label": "The action is #DUAL_POINT#; touch_point: 0.2264, 0.1648, lift_point: 0.2264, 0.1648", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/50308_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Play the new Drake video on YouTube; The action is #DUAL_POINT#; touch_point: 0.8719, 0.3538, lift_point: 0.8719, 0.3538; The action is #DUAL_POINT#; touch_point: 0.4114, 0.5121, lift_point: 0.4114, 0.5121; The action is #DUAL_POINT#; touch_point: 0.0556, 0.8155, lift_point: 0.0556, 0.8155; The action is #DUAL_POINT#; touch_point: 0.0582, 0.7161, lift_point: 0.0582, 0.7161", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"new drake video\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"Play the new Drake \"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/38148_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of Canada?; The action is #PRESS_HOME#; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.9141, 0.4266, lift_point: 0.9141, 0.4266; The action is #DUAL_POINT#; touch_point: 0.8934, 0.4925, lift_point: 0.8934, 0.4925; The action is #TYPE#; And the content is : \"typed_text\": \"What is the capital of Canada?\"; The action is #PRESS_ENTER#", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/44874_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from Barcelona to Mexico city; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.6161, 0.3105, lift_point: 0.6161, 0.3105; The action is #DUAL_POINT#; touch_point: 0.6161, 0.3105, lift_point: 0.6161, 0.3105; The action is #DUAL_POINT#; touch_point: 0.2223, 0.7321, lift_point: 0.2223, 0.7321; The action is #DUAL_POINT#; touch_point: 0.0708, 0.9074, lift_point: 0.0708, 0.9074; The action is #DUAL_POINT#; touch_point: 0.074, 0.3732, lift_point: 0.074, 0.3732; The action is #TYPE#; And the content is : \"typed_text\": \"Search for flights from Barcelona to Mexico city \"; The action is #DUAL_POINT#; touch_point: 0.1612, 0.3762, lift_point: 0.1612, 0.3762", "response": "The action is #DUAL_POINT#; touch_point: 0.3209, 0.4613, lift_point: 0.3209, 0.4613", "label": "The action is #DUAL_POINT#; touch_point: 0.3079, 0.428, lift_point: 0.3079, 0.428", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/60314_9.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from Zurich to Buenos aires; The action is #DUAL_POINT#; touch_point: 0.7884, 0.7088, lift_point: 0.7884, 0.7088; The action is #PRESS_HOME#; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8173, 0.6623, lift_point: 0.8173, 0.6623; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7794, 0.6821, lift_point: 0.7794, 0.6821; The action is #DUAL_POINT#; touch_point: 0.7794, 0.6933, lift_point: 0.7794, 0.6933; The action is #PRESS_BACK#; The action is #PRESS_BACK#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.8112, 0.675, lift_point: 0.8112, 0.675; The action is #DUAL_POINT#; touch_point: 0.81, 0.6775, lift_point: 0.81, 0.6775; The action is #DUAL_POINT#; touch_point: 0.0635, 0.5123, lift_point: 0.0635, 0.5123; The action is #TYPE#; And the content is : \"typed_text\": \"flight from zurich to buenos aires\"; The action is #DUAL_POINT#; touch_point: 0.2101, 0.4208, lift_point: 0.2101, 0.4208; The action is #DUAL_POINT#; touch_point: 0.8827, 0.205, lift_point: 0.8827, 0.205; The action is #DUAL_POINT#; touch_point: 0.6941, 0.5458, lift_point: 0.6941, 0.5458; The action is #DUAL_POINT#; touch_point: 0.3321, 0.1299, lift_point: 0.3321, 0.1299; The action is #DUAL_POINT#; touch_point: 0.1229, 0.933, lift_point: 0.1229, 0.933; The action is #DUAL_POINT#; touch_point: 0.4991, 0.8948, lift_point: 0.4991, 0.8948; The action is #DUAL_POINT#; touch_point: 0.402, 0.9084, lift_point: 0.402, 0.9084; The action is #DUAL_POINT#; touch_point: 0.8984, 0.5131, lift_point: 0.8984, 0.5131; The action is #DUAL_POINT#; touch_point: 0.6419, 0.9862, lift_point: 0.6419, 0.9862; The action is #DUAL_POINT#; touch_point: 0.5557, 0.4411, lift_point: 0.5557, 0.4411", "response": "The action is #DUAL_POINT#; touch_point: 0.5557, 0.4411, lift_point: 0.5557, 0.4411", "label": "The action is #DUAL_POINT#; touch_point: 0.4457, 0.1982, lift_point: 0.4457, 0.1982", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/68164_25.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the time in San Francisco?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8732, 0.5281, lift_point: 0.8732, 0.5281", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"Whats the time in San Francisco?\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"Whats the time in San Francisco\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/550_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the price of the TCL TV?; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3203, 0.4723, lift_point: 0.3203, 0.4723", "response": "The action is #DUAL_POINT#; touch_point: 0.061, 0.5007, lift_point: 0.061, 0.5007", "label": "The action is #DUAL_POINT#; touch_point: 0.0497, 0.5548, lift_point: 0.0497, 0.5548", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/35296_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for the best Mexican restaurants; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7804, 0.678, lift_point: 0.7804, 0.678; The action is #DUAL_POINT#; touch_point: 0.0421, 0.3922, lift_point: 0.0421, 0.3922; The action is #TYPE#; And the content is : \"typed_text\": \"best Mexican restaurants\"; The action is #DUAL_POINT#; touch_point: 0.1269, 0.3198, lift_point: 0.1269, 0.3198", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/11392_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of France?", "response": "The action is #PRESS_HOME#", "label": "The action is #PRESS_HOME#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/17114_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What time is it in New York?; The action is #DUAL_POINT#; touch_point: 0.0575, 0.0365, lift_point: 0.0575, 0.0365; The action is #DUAL_POINT#; touch_point: 0.8912, 0.667, lift_point: 0.8912, 0.667; The action is #DUAL_POINT#; touch_point: 0.8921, 0.3941, lift_point: 0.8921, 0.3941; The action is #DUAL_POINT#; touch_point: 0.8787, 0.3913, lift_point: 0.8787, 0.3913; The action is #DUAL_POINT#; touch_point: 0.7438, 0.7177, lift_point: 0.7438, 0.7177; The action is #DUAL_POINT#; touch_point: 0.7785, 0.6992, lift_point: 0.7785, 0.6992; The action is #DUAL_POINT#; touch_point: 0.8149, 0.6966, lift_point: 0.8149, 0.6966; The action is #DUAL_POINT#; touch_point: 0.0722, 0.4065, lift_point: 0.0722, 0.4065; The action is #TYPE#; And the content is : \"typed_text\": \"What time is it in New York?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #DUAL_POINT#; touch_point: 0.1351, 0.6482, lift_point: 0.1351, 0.6482; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.2715, 0.6476, lift_point: 0.2715, 0.6476; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.0528, 0.2859, lift_point: 0.0528, 0.2859; The action is #DUAL_POINT#; touch_point: 0.8191, 0.4412, lift_point: 0.8191, 0.4412; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.4471, 0.1787, lift_point: 0.4471, 0.1787; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.8042, 0.3148, lift_point: 0.8042, 0.3148; The action is #DUAL_POINT#; touch_point: 0.6879, 0.5777, lift_point: 0.6879, 0.5777; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.8361, 0.2862, lift_point: 0.8361, 0.2862; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.8366, 0.182, lift_point: 0.8366, 0.182", "response": "The action is #DUAL_POINT#; touch_point: 0.6536, 0.5057, lift_point: 0.6536, 0.5057", "label": "The action is #DUAL_POINT#; touch_point: 0.6461, 0.3885, lift_point: 0.6461, 0.3885", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/13941_50.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of Switzerland?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.781, 0.6938, lift_point: 0.781, 0.6938; The action is #DUAL_POINT#; touch_point: 0.5761, 0.9414, lift_point: 0.5761, 0.9414; The action is #DUAL_POINT#; touch_point: 0.7757, 0.7155, lift_point: 0.7757, 0.7155", "response": "The action is #DUAL_POINT#; touch_point: 0.0648, 0.4097, lift_point: 0.0648, 0.4097", "label": "The action is #DUAL_POINT#; touch_point: 0.0565, 0.3216, lift_point: 0.0565, 0.3216", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/54851_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Check my email; The action is #DUAL_POINT#; touch_point: 0.7998, 0.6996, lift_point: 0.7998, 0.6996; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4961, 0.5053, lift_point: 0.4961, 0.5053; The action is #DUAL_POINT#; touch_point: 0.4961, 0.5053, lift_point: 0.4961, 0.5053; The action is #DUAL_POINT#; touch_point: 0.1869, 0.3005, lift_point: 0.1869, 0.3005; The action is #DUAL_POINT#; touch_point: 0.061, 0.053, lift_point: 0.061, 0.053; The action is #DUAL_POINT#; touch_point: 0.061, 0.053, lift_point: 0.061, 0.053; The action is #DUAL_POINT#; touch_point: 0.5008, 0.4922, lift_point: 0.5008, 0.4922; The action is #DUAL_POINT#; touch_point: 0.924, 0.5731, lift_point: 0.924, 0.5731; The action is #DUAL_POINT#; touch_point: 0.3395, 0.4701, lift_point: 0.3395, 0.4701; The action is #DUAL_POINT#; touch_point: 0.9149, 0.5732, lift_point: 0.9149, 0.5732; The action is #DUAL_POINT#; touch_point: 0.527, 0.8599, lift_point: 0.527, 0.8599; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6778, 0.0722, lift_point: 0.6778, 0.0722; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.8925, 0.0857, lift_point: 0.8925, 0.0857; The action is #Scrolling Down#", "response": "The action is #DUAL_POINT#; touch_point: 0.0567, 0.0662, lift_point: 0.0567, 0.0662", "label": "The action is #DUAL_POINT#; touch_point: 0.7965, 0.0738, lift_point: 0.7965, 0.0738", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/76490_19.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the latest news in planetary science?; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.1577, 0.43, lift_point: 0.1577, 0.43; The action is #DUAL_POINT#; touch_point: 0.0644, 0.7894, lift_point: 0.0644, 0.7894; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8837, 0.4217, lift_point: 0.8837, 0.4217; The action is #TYPE#; And the content is : \"typed_text\": \"planetary science?\"; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.5443, 0.8308, lift_point: 0.5443, 0.8308; The action is #DUAL_POINT#; touch_point: 0.0465, 0.9343, lift_point: 0.0465, 0.9343; The action is #TYPE#; And the content is : \"typed_text\": \"atest news in planetary science?\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1158, 0.5963, lift_point: 0.1158, 0.5963", "label": "The action is #DUAL_POINT#; touch_point: 0.1111, 0.206, lift_point: 0.1111, 0.206", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/8696_11.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the time in San Francisco?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8078, 0.6252, lift_point: 0.8078, 0.6252", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"time in San Francisco?\"", "label": "The action is #DUAL_POINT#; touch_point: 0.0561, 0.479, lift_point: 0.0561, 0.479", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/12986_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Show me some nice wallpapers for my phone; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4693, 0.8415, lift_point: 0.4693, 0.8415; The action is #DUAL_POINT#; touch_point: 0.0697, 0.4817, lift_point: 0.0697, 0.4817; The action is #DUAL_POINT#; touch_point: 0.0671, 0.905, lift_point: 0.0671, 0.905; The action is #DUAL_POINT#; touch_point: 0.0687, 0.3571, lift_point: 0.0687, 0.3571; The action is #TYPE#; And the content is : \"typed_text\": \"Show me some nice wallpapers for my phone \"; The action is #DUAL_POINT#; touch_point: 0.1593, 0.3999, lift_point: 0.1593, 0.3999; The action is #DUAL_POINT#; touch_point: 0.3359, 0.4812, lift_point: 0.3359, 0.4812", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/65143_9.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Pakistan?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8867, 0.519, lift_point: 0.8867, 0.519; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news in Pakistan?\"; The action is #DUAL_POINT#; touch_point: 0.1068, 0.4057, lift_point: 0.1068, 0.4057", "response": "The action is #DUAL_POINT#; touch_point: 0.3472, 0.1872, lift_point: 0.3472, 0.1872", "label": "The action is #DUAL_POINT#; touch_point: 0.3741, 0.3787, lift_point: 0.3741, 0.3787", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/26321_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for hotels in Seattle; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7763, 0.6896, lift_point: 0.7763, 0.6896; The action is #DUAL_POINT#; touch_point: 0.0438, 0.4508, lift_point: 0.0438, 0.4508", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"hotels in seattle\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"hotels in seattle\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/39733_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the speed of a train?", "response": "The action is #DUAL_POINT#; touch_point: 0.8096, 0.6234, lift_point: 0.8096, 0.6234", "label": "The action is #DUAL_POINT#; touch_point: 0.8086, 0.6235, lift_point: 0.8086, 0.6235", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/12745_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for a new eyeshadow; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8925, 0.5339, lift_point: 0.8925, 0.5339; The action is #TYPE#; And the content is : \"typed_text\": \" new eyeshadow\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5124, 0.5403, lift_point: 0.5124, 0.5403; The action is #DUAL_POINT#; touch_point: 0.8828, 0.6863, lift_point: 0.8828, 0.6863", "response": "The action is #DUAL_POINT#; touch_point: 0.8583, 0.5253, lift_point: 0.8583, 0.5253", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/62436_10.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Venezuela?; The action is #DUAL_POINT#; touch_point: 0.7989, 0.679, lift_point: 0.7989, 0.679; The action is #DUAL_POINT#; touch_point: 0.1811, 0.916, lift_point: 0.1811, 0.916; The action is #TYPE#; And the content is : \"typed_text\": \"news in venezuela\"; The action is #PRESS_ENTER#; The action is #DUAL_POINT#; touch_point: 0.2405, 0.7142, lift_point: 0.2405, 0.7142", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/14351_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news this week?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7788, 0.694, lift_point: 0.7788, 0.694; The action is #DUAL_POINT#; touch_point: 0.7658, 0.6924, lift_point: 0.7658, 0.6924; The action is #DUAL_POINT#; touch_point: 0.0673, 0.4574, lift_point: 0.0673, 0.4574; The action is #TYPE#; And the content is : \"typed_text\": \"news this week\"; The action is #DUAL_POINT#; touch_point: 0.1219, 0.1713, lift_point: 0.1219, 0.1713; The action is #DUAL_POINT#; touch_point: 0.4387, 0.2272, lift_point: 0.4387, 0.2272", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/55137_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How much does a 3 bedroom apartment rent for in Seattle?; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3576, 0.5059, lift_point: 0.3576, 0.5059; The action is #DUAL_POINT#; touch_point: 0.8887, 0.5227, lift_point: 0.8887, 0.5227; The action is #DUAL_POINT#; touch_point: 0.891, 0.8258, lift_point: 0.891, 0.8258; The action is #DUAL_POINT#; touch_point: 0.2741, 0.5065, lift_point: 0.2741, 0.5065; The action is #TYPE#; And the content is : \"typed_text\": \"How much does a 3 bedroom apartment rent for in Seattle?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #DUAL_POINT#; touch_point: 0.1232, 0.3528, lift_point: 0.1232, 0.3528", "response": "The action is #DUAL_POINT#; touch_point: 0.3378, 0.4235, lift_point: 0.3378, 0.4235", "label": "The action is #DUAL_POINT#; touch_point: 0.8609, 0.9214, lift_point: 0.8609, 0.9214", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/30434_9.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Turn on notifications for the Google Maps app; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.8662, 0.1249, lift_point: 0.8662, 0.1249; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.3776, 0.5482, lift_point: 0.3776, 0.5482", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.6163, 0.5461, lift_point: 0.6163, 0.5461", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/28603_11.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How do I get to the nearest Macy's?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8789, 0.4552, lift_point: 0.8789, 0.4552", "response": "The action is #DUAL_POINT#; touch_point: 0.8815, 0.4552, lift_point: 0.8815, 0.4552", "label": "The action is #DUAL_POINT#; touch_point: 0.8867, 0.412, lift_point: 0.8867, 0.412", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/5905_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Play the new Katy Perry video on YouTube; The action is #DUAL_POINT#; touch_point: 0.5618, 0.6315, lift_point: 0.5618, 0.6315; The action is #DUAL_POINT#; touch_point: 0.6427, 0.8874, lift_point: 0.6427, 0.8874; The action is #DUAL_POINT#; touch_point: 0.0461, 0.9547, lift_point: 0.0461, 0.9547; The action is #DUAL_POINT#; touch_point: 0.0596, 0.0544, lift_point: 0.0596, 0.0544; The action is #DUAL_POINT#; touch_point: 0.0442, 0.6459, lift_point: 0.0442, 0.6459; The action is #TYPE#; And the content is : \"typed_text\": \"Play the new Katy Perry video on YouTube\"; The action is #DUAL_POINT#; touch_point: 0.0945, 0.5782, lift_point: 0.0945, 0.5782; The action is #DUAL_POINT#; touch_point: 0.607, 0.6407, lift_point: 0.607, 0.6407", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/8533_9.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on my calendar for the rest of the month?; The action is #PRESS_HOME#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/63918_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Where can I buy a nice beach towel?; The action is #PRESS_HOME#; The action is #TYPE#; And the content is : \"typed_text\": \"Where can I buy a nice beach towel?\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #TYPE#; And the content is : \"typed_text\": \"Where can I buy a nice beach towel?\"; The action is #Scrolling Up#; The action is #Scrolling Up#", "response": "The action is #Scrolling Up#", "label": "The action is #Scrolling Up#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/79301_10.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Play the new Ariana Grande video on YouTube; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.9374, 0.3882, lift_point: 0.9374, 0.3882; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.6396, 0.4336, lift_point: 0.6396, 0.4336; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.0712, 0.6123, lift_point: 0.0712, 0.6123; The action is #DUAL_POINT#; touch_point: 0.0396, 0.6168, lift_point: 0.0396, 0.6168; The action is #Scrolling Down#; The action is #PRESS_BACK#", "response": "The action is #DUAL_POINT#; touch_point: 0.0546, 0.8445, lift_point: 0.0546, 0.8445", "label": "The action is #DUAL_POINT#; touch_point: 0.0662, 0.8267, lift_point: 0.0662, 0.8267", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/75127_10.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Play the new Taylor Swift video on YouTube; The action is #DUAL_POINT#; touch_point: 0.7828, 0.8909, lift_point: 0.7828, 0.8909; The action is #DUAL_POINT#; touch_point: 0.0677, 0.0588, lift_point: 0.0677, 0.0588; The action is #DUAL_POINT#; touch_point: 0.0564, 0.8052, lift_point: 0.0564, 0.8052; The action is #DUAL_POINT#; touch_point: 0.0675, 0.5065, lift_point: 0.0675, 0.5065; The action is #TYPE#; And the content is : \"typed_text\": \"new Taylor Swift video\"; The action is #DUAL_POINT#; touch_point: 0.1215, 0.4143, lift_point: 0.1215, 0.4143; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #DUAL_POINT#; touch_point: 0.6178, 0.5036, lift_point: 0.6178, 0.5036", "label": "The action is #DUAL_POINT#; touch_point: 0.6668, 0.4068, lift_point: 0.6668, 0.4068", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/6741_9.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Check my email; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5016, 0.487, lift_point: 0.5016, 0.487; The action is #Scrolling Up#; The action is #DUAL_POINT#; touch_point: 0.0826, 0.1155, lift_point: 0.0826, 0.1155; The action is #DUAL_POINT#; touch_point: 0.2721, 0.5962, lift_point: 0.2721, 0.5962; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0809, 0.8737, lift_point: 0.0809, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0795, 0.8737, lift_point: 0.0795, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0795, 0.8737, lift_point: 0.0795, 0.8737; The action is #DUAL_POINT#; touch_point: 0.0795, 0.8737, lift_point: 0.0795, 0.8737; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.078, 0.8768, lift_point: 0.078, 0.8768; The action is #DUAL_POINT#; touch_point: 0.0833, 0.8883, lift_point: 0.0833, 0.8883; The action is #DUAL_POINT#; touch_point: 0.0818, 0.8851, lift_point: 0.0818, 0.8851; The action is #DUAL_POINT#; touch_point: 0.0818, 0.8851, lift_point: 0.0818, 0.8851; The action is #DUAL_POINT#; touch_point: 0.0818, 0.8851, lift_point: 0.0818, 0.8851; The action is #DUAL_POINT#; touch_point: 0.0818, 0.8851, lift_point: 0.0818, 0.8851; The action is #DUAL_POINT#; touch_point: 0.07, 0.9181, lift_point: 0.07, 0.9181; The action is #DUAL_POINT#; touch_point: 0.07, 0.9181, lift_point: 0.07, 0.9181; The action is #DUAL_POINT#; touch_point: 0.07, 0.9181, lift_point: 0.07, 0.9181; The action is #DUAL_POINT#; touch_point: 0.07, 0.9181, lift_point: 0.07, 0.9181; The action is #DUAL_POINT#; touch_point: 0.07, 0.9181, lift_point: 0.07, 0.9181; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0715, 0.9055, lift_point: 0.0715, 0.9055; The action is #DUAL_POINT#; touch_point: 0.0766, 0.873, lift_point: 0.0766, 0.873; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Up#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.798, 0.4661, lift_point: 0.798, 0.4661; The action is #DUAL_POINT#; touch_point: 0.076, 0.0763, lift_point: 0.076, 0.0763; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3219, 0.6493, lift_point: 0.3219, 0.6493; The action is #DUAL_POINT#; touch_point: 0.2395, 0.117, lift_point: 0.2395, 0.117; The action is #Scrolling Up#", "response": "The action is #Scrolling Up#", "label": "The action is #DUAL_POINT#; touch_point: 0.6477, 0.07, lift_point: 0.6477, 0.07", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/42617_60.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Google the capital of Paraguay; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8865, 0.3311, lift_point: 0.8865, 0.3311; The action is #DUAL_POINT#; touch_point: 0.0653, 0.3478, lift_point: 0.0653, 0.3478; The action is #TYPE#; And the content is : \"typed_text\": \"capital of Paraguay\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1441, 0.4244, lift_point: 0.1441, 0.4244", "label": "The action is #DUAL_POINT#; touch_point: 0.1408, 0.5501, lift_point: 0.1408, 0.5501", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/85309_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for a new mascara on Sephora; The action is #DUAL_POINT#; touch_point: 0.7857, 0.6857, lift_point: 0.7857, 0.6857; The action is #DUAL_POINT#; touch_point: 0.9056, 0.5086, lift_point: 0.9056, 0.5086; The action is #DUAL_POINT#; touch_point: 0.8936, 0.8628, lift_point: 0.8936, 0.8628; The action is #DUAL_POINT#; touch_point: 0.2862, 0.4856, lift_point: 0.2862, 0.4856; The action is #DUAL_POINT#; touch_point: 0.2875, 0.4116, lift_point: 0.2875, 0.4116; The action is #TYPE#; And the content is : \"typed_text\": \"new mascara on Sephora\"; The action is #PRESS_ENTER#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/44996_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news about the US president?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7789, 0.6244, lift_point: 0.7789, 0.6244; The action is #DUAL_POINT#; touch_point: 0.0431, 0.5396, lift_point: 0.0431, 0.5396; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news about the US president?\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1144, 0.602, lift_point: 0.1144, 0.602", "label": "The action is #DUAL_POINT#; touch_point: 0.119, 0.822, lift_point: 0.119, 0.822", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/8270_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from NYC to Buenos aires; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8012, 0.6957, lift_point: 0.8012, 0.6957; The action is #DUAL_POINT#; touch_point: 0.7813, 0.6517, lift_point: 0.7813, 0.6517; The action is #DUAL_POINT#; touch_point: 0.7813, 0.6517, lift_point: 0.7813, 0.6517; The action is #DUAL_POINT#; touch_point: 0.0642, 0.6339, lift_point: 0.0642, 0.6339; The action is #DUAL_POINT#; touch_point: 0.9935, 0.466, lift_point: 0.9935, 0.466; The action is #TYPE#; And the content is : \"typed_text\": \"flights from NYC to Buenos aires\"; The action is #DUAL_POINT#; touch_point: 0.4465, 0.9944, lift_point: 0.4465, 0.9944; The action is #DUAL_POINT#; touch_point: 0.0622, 0.3084, lift_point: 0.0622, 0.3084; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.0509, 0.1702, lift_point: 0.0509, 0.1702; The action is #PRESS_BACK#; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7909, 0.6879, lift_point: 0.7909, 0.6879", "response": "The action is #DUAL_POINT#; touch_point: 0.7909, 0.6879, lift_point: 0.7909, 0.6879", "label": "The action is #DUAL_POINT#; touch_point: 0.7909, 0.6879, lift_point: 0.7909, 0.6879", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/2733_15.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in the Dominican Republic?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8785, 0.531, lift_point: 0.8785, 0.531; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news in the Dominican Republic\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#", "response": "The action is #DUAL_POINT#; touch_point: 0.5838, 0.5234, lift_point: 0.5838, 0.5234", "label": "The action is #DUAL_POINT#; touch_point: 0.5953, 0.6958, lift_point: 0.5953, 0.6958", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/70273_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for good Chinese restaurants; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.893, 0.3801, lift_point: 0.893, 0.3801; The action is #TYPE#; And the content is : \"typed_text\": \"good chinese restaurant \"", "response": "The action is #DUAL_POINT#; touch_point: 0.1266, 0.4813, lift_point: 0.1266, 0.4813", "label": "The action is #DUAL_POINT#; touch_point: 0.1215, 0.6063, lift_point: 0.1215, 0.6063", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/51687_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from San Diego to Seattle", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"flights from San Diego to Seattle\"", "label": "The action is #DUAL_POINT#; touch_point: 0.0651, 0.3924, lift_point: 0.0651, 0.3924", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/43826_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Thailand?", "response": "The action is #PRESS_HOME#", "label": "The action is #PRESS_HOME#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/78906_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the weather like in Seoul?; The action is #PRESS_BACK#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.5385, 0.7858, lift_point: 0.5385, 0.7858; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.8897, 0.2232, lift_point: 0.8897, 0.2232; The action is #TYPE#; And the content is : \"typed_text\": \"weather like in Seoul\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.79, 0.4597, lift_point: 0.79, 0.4597; The action is #DUAL_POINT#; touch_point: 0.6875, 0.5881, lift_point: 0.6875, 0.5881; The action is #DUAL_POINT#; touch_point: 0.792, 0.6095, lift_point: 0.792, 0.6095; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6102, 0.6719, lift_point: 0.6102, 0.6719; The action is #DUAL_POINT#; touch_point: 0.6102, 0.6719, lift_point: 0.6102, 0.6719; The action is #DUAL_POINT#; touch_point: 0.6118, 0.6305, lift_point: 0.6118, 0.6305; The action is #DUAL_POINT#; touch_point: 0.7374, 0.9374, lift_point: 0.7374, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7345, 0.9374, lift_point: 0.7345, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7406, 0.9516, lift_point: 0.7406, 0.9516; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.5144, 0.9441, lift_point: 0.5144, 0.9441; The action is #DUAL_POINT#; touch_point: 0.563, 0.9304, lift_point: 0.563, 0.9304; The action is #DUAL_POINT#; touch_point: 0.5624, 0.9437, lift_point: 0.5624, 0.9437; The action is #DUAL_POINT#; touch_point: 0.5609, 0.9437, lift_point: 0.5609, 0.9437", "response": "The action is #DUAL_POINT#; touch_point: 0.5609, 0.9437, lift_point: 0.5609, 0.9437", "label": "The action is #DUAL_POINT#; touch_point: 0.5624, 0.9437, lift_point: 0.5624, 0.9437", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/53409_60.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's a good restaurant near me?; The action is #DUAL_POINT#; touch_point: 0.1053, 0.515, lift_point: 0.1053, 0.515; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5031, 0.4783, lift_point: 0.5031, 0.4783; The action is #Scrolling Up#; The action is #Scrolling Up#", "response": "The action is #DUAL_POINT#; touch_point: 0.3477, 0.4968, lift_point: 0.3477, 0.4968", "label": "The action is #DUAL_POINT#; touch_point: 0.3325, 0.5901, lift_point: 0.3325, 0.5901", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/21940_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for a new lipgloss; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4735, 0.8521, lift_point: 0.4735, 0.8521; The action is #DUAL_POINT#; touch_point: 0.4735, 0.8572, lift_point: 0.4735, 0.8572; The action is #DUAL_POINT#; touch_point: 0.4731, 0.8504, lift_point: 0.4731, 0.8504; The action is #DUAL_POINT#; touch_point: 0.4819, 0.9037, lift_point: 0.4819, 0.9037; The action is #DUAL_POINT#; touch_point: 0.0831, 0.5901, lift_point: 0.0831, 0.5901; The action is #DUAL_POINT#; touch_point: 0.0705, 0.9081, lift_point: 0.0705, 0.9081; The action is #DUAL_POINT#; touch_point: 0.0651, 0.5329, lift_point: 0.0651, 0.5329; The action is #TYPE#; And the content is : \"typed_text\": \"Search for a new lipgloss \"; The action is #DUAL_POINT#; touch_point: 0.1519, 0.4733, lift_point: 0.1519, 0.4733; The action is #DUAL_POINT#; touch_point: 0.3788, 0.3476, lift_point: 0.3788, 0.3476; The action is #DUAL_POINT#; touch_point: 0.9086, 0.8965, lift_point: 0.9086, 0.8965; The action is #DUAL_POINT#; touch_point: 0.7091, 0.4199, lift_point: 0.7091, 0.4199; The action is #DUAL_POINT#; touch_point: 0.7042, 0.4983, lift_point: 0.7042, 0.4983; The action is #DUAL_POINT#; touch_point: 0.7214, 0.5225, lift_point: 0.7214, 0.5225; The action is #DUAL_POINT#; touch_point: 0.7956, 0.4719, lift_point: 0.7956, 0.4719", "response": "The action is #DUAL_POINT#; touch_point: 0.1845, 0.8997, lift_point: 0.1845, 0.8997", "label": "The action is #DUAL_POINT#; touch_point: 0.1883, 0.8366, lift_point: 0.1883, 0.8366", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/65395_19.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Malaysia?; The action is #DUAL_POINT#; touch_point: 0.74, 0.3022, lift_point: 0.74, 0.3022; The action is #DUAL_POINT#; touch_point: 0.0709, 0.5274, lift_point: 0.0709, 0.5274; The action is #TYPE#; And the content is : \"typed_text\": \" news in Malaysia?\"; The action is #DUAL_POINT#; touch_point: 0.173, 0.2474, lift_point: 0.173, 0.2474; The action is #DUAL_POINT#; touch_point: 0.4484, 0.1824, lift_point: 0.4484, 0.1824; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5666, 0.5369, lift_point: 0.5666, 0.5369", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #DUAL_POINT#; touch_point: 0.4535, 0.855, lift_point: 0.4535, 0.855", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/69485_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How much does a 3 bedroom apartment rent for in Chicago?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.2354, 0.6773, lift_point: 0.2354, 0.6773; The action is #DUAL_POINT#; touch_point: 0.0572, 0.4785, lift_point: 0.0572, 0.4785; The action is #TYPE#; And the content is : \"typed_text\": \"rent for 3 bedroom apartment in chicago\"; The action is #DUAL_POINT#; touch_point: 0.0627, 0.5717, lift_point: 0.0627, 0.5717; The action is #TYPE#; And the content is : \"typed_text\": \" in chicago\"; The action is #DUAL_POINT#; touch_point: 0.1198, 0.5591, lift_point: 0.1198, 0.5591; The action is #DUAL_POINT#; touch_point: 0.3566, 0.2985, lift_point: 0.3566, 0.2985; The action is #PRESS_BACK#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.2859, 0.2866, lift_point: 0.2859, 0.2866; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6261, 0.3049, lift_point: 0.6261, 0.3049; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/77654_17.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the price of the 1000-Watt EGO Power+ Snow Blower?; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3452, 0.4922, lift_point: 0.3452, 0.4922; The action is #DUAL_POINT#; touch_point: 0.0574, 0.4573, lift_point: 0.0574, 0.4573; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the price of the 1000-Watt EGO Power\"; The action is #TYPE#; And the content is : \"typed_text\": \"+ Snow Blower?\"; The action is #PRESS_ENTER#", "response": "The action is #Scrolling Down#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/34246_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Check my email inbox; The action is #DUAL_POINT#; touch_point: 0.5677, 0.8324, lift_point: 0.5677, 0.8324; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.1954, 0.309, lift_point: 0.1954, 0.309; The action is #DUAL_POINT#; touch_point: 0.8997, 0.5628, lift_point: 0.8997, 0.5628; The action is #DUAL_POINT#; touch_point: 0.9033, 0.642, lift_point: 0.9033, 0.642; The action is #DUAL_POINT#; touch_point: 0.0681, 0.1163, lift_point: 0.0681, 0.1163", "response": "The action is #DUAL_POINT#; touch_point: 0.7316, 0.2677, lift_point: 0.7316, 0.2677", "label": "The action is #DUAL_POINT#; touch_point: 0.1405, 0.2047, lift_point: 0.1405, 0.2047", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/19503_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Play the new Ed Sheeran video on YouTube; The action is #DUAL_POINT#; touch_point: 0.6395, 0.8607, lift_point: 0.6395, 0.8607; The action is #DUAL_POINT#; touch_point: 0.0551, 0.0814, lift_point: 0.0551, 0.0814", "response": "The action is #DUAL_POINT#; touch_point: 0.6386, 0.5289, lift_point: 0.6386, 0.5289", "label": "The action is #DUAL_POINT#; touch_point: 0.052, 0.5992, lift_point: 0.052, 0.5992", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/66452_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the US dollar exchange rate against the Australian Dollar?", "response": "The action is #PRESS_HOME#", "label": "The action is #PRESS_HOME#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/18530_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What time is it in Berlin?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7945, 0.7061, lift_point: 0.7945, 0.7061; The action is #DUAL_POINT#; touch_point: 0.0722, 0.6814, lift_point: 0.0722, 0.6814", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"time in berlin\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"time in berlin\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/84210_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for the best Mexican restaurants; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7804, 0.678, lift_point: 0.7804, 0.678; The action is #DUAL_POINT#; touch_point: 0.0421, 0.3922, lift_point: 0.0421, 0.3922; The action is #TYPE#; And the content is : \"typed_text\": \"best Mexican restaurants\"; The action is #DUAL_POINT#; touch_point: 0.1269, 0.3198, lift_point: 0.1269, 0.3198; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.2736, 0.269, lift_point: 0.2736, 0.269", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/11392_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Play the new Bruno Mars video on YouTube; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7975, 0.8581, lift_point: 0.7975, 0.8581; The action is #DUAL_POINT#; touch_point: 0.0609, 0.7968, lift_point: 0.0609, 0.7968", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"new Bruno Mars video\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"Bruno Mars\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/7051_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on the menu at McDonalds?; The action is #DUAL_POINT#; touch_point: 0.7938, 0.6776, lift_point: 0.7938, 0.6776", "response": "The action is #DUAL_POINT#; touch_point: 0.7938, 0.6776, lift_point: 0.7938, 0.6776", "label": "The action is #DUAL_POINT#; touch_point: 0.7647, 0.6666, lift_point: 0.7647, 0.6666", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/85070_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of France?; The action is #DUAL_POINT#; touch_point: 0.0525, 0.4109, lift_point: 0.0525, 0.4109; The action is #TYPE#; And the content is : \"typed_text\": \"What is the capital of France?\"; The action is #PRESS_ENTER#; The action is #DUAL_POINT#; touch_point: 0.6834, 0.1475, lift_point: 0.6834, 0.1475; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #PRESS_BACK#; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6729, 0.321, lift_point: 0.6729, 0.321; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6606, 0.6509, lift_point: 0.6606, 0.6509; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/59443_27.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for flights from Buenos aires to Seoul; The action is #DUAL_POINT#; touch_point: 0.7739, 0.7018, lift_point: 0.7739, 0.7018; The action is #DUAL_POINT#; touch_point: 0.0576, 0.5985, lift_point: 0.0576, 0.5985; The action is #TYPE#; And the content is : \"typed_text\": \"buenos aires to seoul flights\"; The action is #DUAL_POINT#; touch_point: 0.1154, 0.4619, lift_point: 0.1154, 0.4619", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/47417_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of Switzerland?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.781, 0.6938, lift_point: 0.781, 0.6938", "response": "The action is #DUAL_POINT#; touch_point: 0.781, 0.6938, lift_point: 0.781, 0.6938", "label": "The action is #DUAL_POINT#; touch_point: 0.5761, 0.9414, lift_point: 0.5761, 0.9414", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/54851_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on the menu at In-N-Out?; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3775, 0.51, lift_point: 0.3775, 0.51; The action is #DUAL_POINT#; touch_point: 0.3556, 0.5193, lift_point: 0.3556, 0.5193; The action is #DUAL_POINT#; touch_point: 0.0733, 0.3744, lift_point: 0.0733, 0.3744; The action is #DUAL_POINT#; touch_point: 0.0747, 0.9215, lift_point: 0.0747, 0.9215; The action is #TYPE#; And the content is : \"typed_text\": \"Whats on the menu at In-N-Out?\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1447, 0.6171, lift_point: 0.1447, 0.6171", "label": "The action is #DUAL_POINT#; touch_point: 0.1491, 0.5146, lift_point: 0.1491, 0.5146", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/58847_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Who is the president of the United States?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8013, 0.1648, lift_point: 0.8013, 0.1648", "response": "The action is #DUAL_POINT#; touch_point: 0.8013, 0.1648, lift_point: 0.8013, 0.1648", "label": "The action is #DUAL_POINT#; touch_point: 0.8013, 0.1648, lift_point: 0.8013, 0.1648", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/62183_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Install the Starbucks app; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.1612, 0.8445, lift_point: 0.1612, 0.8445; The action is #DUAL_POINT#; touch_point: 0.0647, 0.7869, lift_point: 0.0647, 0.7869; The action is #DUAL_POINT#; touch_point: 0.0647, 0.9081, lift_point: 0.0647, 0.9081", "response": "The action is #TYPE#; And the content is : \"typed_text\": \"starbucks\"", "label": "The action is #TYPE#; And the content is : \"typed_text\": \" Starbucks app\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/47308_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news this weekend?; The action is #PRESS_HOME#", "response": "The action is #DUAL_POINT#; touch_point: 0.885, 0.5788, lift_point: 0.885, 0.5788", "label": "The action is #DUAL_POINT#; touch_point: 0.8785, 0.458, lift_point: 0.8785, 0.458", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/44806_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Set an alarm for 7pm; The action is #DUAL_POINT#; touch_point: 0.8973, 0.4526, lift_point: 0.8973, 0.4526; The action is #DUAL_POINT#; touch_point: 0.5975, 0.4096, lift_point: 0.5975, 0.4096; The action is #DUAL_POINT#; touch_point: 0.4163, 0.5058, lift_point: 0.4163, 0.5058; The action is #DUAL_POINT#; touch_point: 0.3482, 0.7223, lift_point: 0.3482, 0.7223; The action is #DUAL_POINT#; touch_point: 0.6747, 0.7048, lift_point: 0.6747, 0.7048", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #STATUS_TASK_COMPLETE#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/52726_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of Brazil?; The action is #DUAL_POINT#; touch_point: 0.7904, 0.4833, lift_point: 0.7904, 0.4833; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #TYPE#; And the content is : \"typed_text\": \"capital of Brazil\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"", "response": "The action is #PRESS_ENTER#", "label": "The action is #DUAL_POINT#; touch_point: 0.284, 0.4831, lift_point: 0.284, 0.4831", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/43001_5.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the price of the LG TV?; The action is #PRESS_HOME#", "response": "The action is #DUAL_POINT#; touch_point: 0.836, 0.6174, lift_point: 0.836, 0.6174", "label": "The action is #DUAL_POINT#; touch_point: 0.81, 0.502, lift_point: 0.81, 0.502", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/41517_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in theFalkland Islands?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8929, 0.491, lift_point: 0.8929, 0.491; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news in theFalkland Islands?\"; The action is #DUAL_POINT#; touch_point: 0.118, 0.5821, lift_point: 0.118, 0.5821; The action is #DUAL_POINT#; touch_point: 0.4245, 0.3739, lift_point: 0.4245, 0.3739", "response": "The action is #DUAL_POINT#; touch_point: 0.8945, 0.5522, lift_point: 0.8945, 0.5522", "label": "The action is #DUAL_POINT#; touch_point: 0.8862, 0.5515, lift_point: 0.8862, 0.5515", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/67119_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Check my email; The action is #DUAL_POINT#; touch_point: 0.7998, 0.6996, lift_point: 0.7998, 0.6996; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4961, 0.5053, lift_point: 0.4961, 0.5053; The action is #DUAL_POINT#; touch_point: 0.4961, 0.5053, lift_point: 0.4961, 0.5053; The action is #DUAL_POINT#; touch_point: 0.1869, 0.3005, lift_point: 0.1869, 0.3005; The action is #DUAL_POINT#; touch_point: 0.061, 0.053, lift_point: 0.061, 0.053", "response": "The action is #DUAL_POINT#; touch_point: 0.061, 0.053, lift_point: 0.061, 0.053", "label": "The action is #DUAL_POINT#; touch_point: 0.061, 0.053, lift_point: 0.061, 0.053", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/76490_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Google the capital of Panama", "response": "The action is #DUAL_POINT#; touch_point: 0.8096, 0.6214, lift_point: 0.8096, 0.6214", "label": "The action is #DUAL_POINT#; touch_point: 0.8079, 0.6236, lift_point: 0.8079, 0.6236", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/12758_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the capital of Brazil?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7954, 0.726, lift_point: 0.7954, 0.726; The action is #DUAL_POINT#; touch_point: 0.055, 0.5287, lift_point: 0.055, 0.5287; The action is #DUAL_POINT#; touch_point: 0.0638, 0.9427, lift_point: 0.0638, 0.9427; The action is #TYPE#; And the content is : \"typed_text\": \"capital of brazil\"", "response": "The action is #DUAL_POINT#; touch_point: 0.1287, 0.2683, lift_point: 0.1287, 0.2683", "label": "The action is #DUAL_POINT#; touch_point: 0.1306, 0.2331, lift_point: 0.1306, 0.2331", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/5410_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for hotels in Philadelphia; The action is #DUAL_POINT#; touch_point: 0.785, 0.6899, lift_point: 0.785, 0.6899", "response": "The action is #DUAL_POINT#; touch_point: 0.0642, 0.4135, lift_point: 0.0642, 0.4135", "label": "The action is #DUAL_POINT#; touch_point: 0.0448, 0.4535, lift_point: 0.0448, 0.4535", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/63156_2.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for hotels in Zurich; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8849, 0.5926, lift_point: 0.8849, 0.5926; The action is #TYPE#; And the content is : \"typed_text\": \"hotels in Zurich\"", "response": "The action is #PRESS_ENTER#", "label": "The action is #PRESS_ENTER#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/44698_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for a new lipstick on Sephora; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7851, 0.6795, lift_point: 0.7851, 0.6795; The action is #DUAL_POINT#; touch_point: 0.0504, 0.3893, lift_point: 0.0504, 0.3893; The action is #DUAL_POINT#; touch_point: 0.0561, 0.9221, lift_point: 0.0561, 0.9221; The action is #TYPE#; And the content is : \"typed_text\": \"sephora\"; The action is #DUAL_POINT#; touch_point: 0.1934, 0.2322, lift_point: 0.1934, 0.2322", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/80102_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for hotels in Seattle; The action is #DUAL_POINT#; touch_point: 0.8218, 0.615, lift_point: 0.8218, 0.615; The action is #DUAL_POINT#; touch_point: 0.7961, 0.5846, lift_point: 0.7961, 0.5846", "response": "The action is #DUAL_POINT#; touch_point: 0.1925, 0.7942, lift_point: 0.1925, 0.7942", "label": "The action is #DUAL_POINT#; touch_point: 0.18, 0.7055, lift_point: 0.18, 0.7055", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/49409_3.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What is the speed of a rocket?; The action is #Scrolling Down#; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.4965, 0.7021, lift_point: 0.4965, 0.7021; The action is #DUAL_POINT#; touch_point: 0.1406, 0.6026, lift_point: 0.1406, 0.6026; The action is #DUAL_POINT#; touch_point: 0.0632, 0.8851, lift_point: 0.0632, 0.8851", "response": "The action is #DUAL_POINT#; touch_point: 0.0656, 0.4242, lift_point: 0.0656, 0.4242", "label": "The action is #TYPE#; And the content is : \"typed_text\": \"What is the speed of a rocket?\"", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/20912_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Check my gmail; The action is #DUAL_POINT#; touch_point: 0.0293, 0.6292, lift_point: 0.0293, 0.6292; The action is #DUAL_POINT#; touch_point: 0.5576, 0.6392, lift_point: 0.5576, 0.6392; The action is #DUAL_POINT#; touch_point: 0.8956, 0.3634, lift_point: 0.8956, 0.3634; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.1509, 0.2833, lift_point: 0.1509, 0.2833; The action is #DUAL_POINT#; touch_point: 0.9171, 0.6295, lift_point: 0.9171, 0.6295; The action is #DUAL_POINT#; touch_point: 0.9324, 0.5184, lift_point: 0.9324, 0.5184", "response": "The action is #DUAL_POINT#; touch_point: 0.5412, 0.8418, lift_point: 0.5412, 0.8418", "label": "The action is #DUAL_POINT#; touch_point: 0.5279, 0.7919, lift_point: 0.5279, 0.7919", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/76215_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on my calendar for the rest of the week?; The action is #PRESS_HOME#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.3823, 0.189, lift_point: 0.3823, 0.189; The action is #DUAL_POINT#; touch_point: 0.9136, 0.9292, lift_point: 0.9136, 0.9292; The action is #DUAL_POINT#; touch_point: 0.8915, 0.5425, lift_point: 0.8915, 0.5425; The action is #Scrolling Down#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Up#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.066, 0.3526, lift_point: 0.066, 0.3526", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/84571_13.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How do I get to the nearest Nordstrom?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7722, 0.6917, lift_point: 0.7722, 0.6917; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.7831, 0.6797, lift_point: 0.7831, 0.6797; The action is #DUAL_POINT#; touch_point: 0.7663, 0.7143, lift_point: 0.7663, 0.7143", "response": "The action is #DUAL_POINT#; touch_point: 0.7785, 0.6845, lift_point: 0.7785, 0.6845", "label": "The action is #DUAL_POINT#; touch_point: 0.7736, 0.7043, lift_point: 0.7736, 0.7043", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/53494_6.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news in Taiwan?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.9322, 0.5586, lift_point: 0.9322, 0.5586; The action is #TYPE#; And the content is : \"typed_text\": \"Whats the news in Taiwan?\"; The action is #TYPE#; And the content is : \"typed_text\": \"acgfn bvc\"; The action is #DUAL_POINT#; touch_point: 0.1162, 0.2948, lift_point: 0.1162, 0.2948; The action is #DUAL_POINT#; touch_point: 0.1056, 0.2084, lift_point: 0.1056, 0.2084", "response": "The action is #DUAL_POINT#; touch_point: 0.1056, 0.2084, lift_point: 0.1056, 0.2084", "label": "The action is #DUAL_POINT#; touch_point: 0.1056, 0.2133, lift_point: 0.1056, 0.2133", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/81919_7.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the weather like in Seoul?; The action is #PRESS_BACK#; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.5385, 0.7858, lift_point: 0.5385, 0.7858; The action is #PRESS_BACK#; The action is #DUAL_POINT#; touch_point: 0.8897, 0.2232, lift_point: 0.8897, 0.2232; The action is #TYPE#; And the content is : \"typed_text\": \"weather like in Seoul\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #PRESS_ENTER#; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.79, 0.4597, lift_point: 0.79, 0.4597; The action is #DUAL_POINT#; touch_point: 0.6875, 0.5881, lift_point: 0.6875, 0.5881; The action is #DUAL_POINT#; touch_point: 0.792, 0.6095, lift_point: 0.792, 0.6095; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6065, 0.3781, lift_point: 0.6065, 0.3781; The action is #DUAL_POINT#; touch_point: 0.6102, 0.6719, lift_point: 0.6102, 0.6719; The action is #DUAL_POINT#; touch_point: 0.6102, 0.6719, lift_point: 0.6102, 0.6719; The action is #DUAL_POINT#; touch_point: 0.6118, 0.6305, lift_point: 0.6118, 0.6305; The action is #DUAL_POINT#; touch_point: 0.7374, 0.9374, lift_point: 0.7374, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374; The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374", "response": "The action is #STATUS_TASK_COMPLETE#", "label": "The action is #DUAL_POINT#; touch_point: 0.7359, 0.9374, lift_point: 0.7359, 0.9374", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/53409_28.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the latest video from GameSpot Trailers?; The action is #DUAL_POINT#; touch_point: 0.7896, 0.6964, lift_point: 0.7896, 0.6964; The action is #DUAL_POINT#; touch_point: 0.0655, 0.3596, lift_point: 0.0655, 0.3596; The action is #TYPE#; And the content is : \"typed_text\": \"latest video from gamespot trailers\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #DUAL_POINT#; touch_point: 0.1416, 0.5087, lift_point: 0.1416, 0.5087; The action is #DUAL_POINT#; touch_point: 0.3782, 0.3653, lift_point: 0.3782, 0.3653; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #DUAL_POINT#; touch_point: 0.0613, 0.3336, lift_point: 0.0613, 0.3336", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/36149_8.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the news?; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.5116, 0.6707, lift_point: 0.5116, 0.6707; The action is #DUAL_POINT#; touch_point: 0.1434, 0.575, lift_point: 0.1434, 0.575", "response": "The action is #DUAL_POINT#; touch_point: 0.0678, 0.8858, lift_point: 0.0678, 0.8858", "label": "The action is #DUAL_POINT#; touch_point: 0.0735, 0.9084, lift_point: 0.0735, 0.9084", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/64383_4.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's on the menu at Five Guys?", "response": "The action is #DUAL_POINT#; touch_point: 0.7769, 0.6884, lift_point: 0.7769, 0.6884", "label": "The action is #DUAL_POINT#; touch_point: 0.7808, 0.6931, lift_point: 0.7808, 0.6931", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/17652_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: What's the price of the new iPhone on eBay?", "response": "The action is #DUAL_POINT#; touch_point: 0.8817, 0.5274, lift_point: 0.8817, 0.5274", "label": "The action is #PRESS_HOME#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/44959_1.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for hotels in Washington DC; The action is #Scrolling Down#; The action is #DUAL_POINT#; touch_point: 0.658, 0.61, lift_point: 0.658, 0.61; The action is #DUAL_POINT#; touch_point: 0.658, 0.6093, lift_point: 0.658, 0.6093; The action is #DUAL_POINT#; touch_point: 0.1885, 0.3943, lift_point: 0.1885, 0.3943; The action is #TYPE#; And the content is : \"typed_text\": \"hotels in Washington DC\"; The action is #DUAL_POINT#; touch_point: 0.1171, 0.3396, lift_point: 0.1171, 0.3396; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/82054_9.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: Search for hotels in Miami; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8882, 0.4486, lift_point: 0.8882, 0.4486; The action is #TYPE#; And the content is : \"typed_text\": \"hotels in Miami\"; The action is #TYPE#; And the content is : \"typed_text\": \"\"; The action is #DUAL_POINT#; touch_point: 0.1329, 0.3545, lift_point: 0.1329, 0.3545; The action is #Scrolling Down#; The action is #Scrolling Down#; The action is #Scrolling Down#", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/55758_9.png"]} +{"system": null, "query": "\nPrevious Actions: Goal: How do I get to the nearest Chipotle?; The action is #PRESS_HOME#; The action is #DUAL_POINT#; touch_point: 0.8477, 0.6484, lift_point: 0.8477, 0.6484; The action is #DUAL_POINT#; touch_point: 0.0537, 0.5218, lift_point: 0.0537, 0.5218; The action is #DUAL_POINT#; touch_point: 0.0549, 0.6326, lift_point: 0.0549, 0.6326; The action is #TYPE#; And the content is : \"typed_text\": \"nearest Chipotle\"; The action is #DUAL_POINT#; touch_point: 0.1115, 0.2639, lift_point: 0.1115, 0.2639", "response": "The action is #Scrolling Down#", "label": "The action is #Scrolling Down#", "history": [], "images": ["/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip/general_texts_splits/25835_7.png"]}