[ { "question": "What is the book authors first name?", "answer": "person", "gt_answers": [ "Susan" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000347021.jpg", "model_name": "BLIP2" }, { "question": "what is the book authors last name?", "answer": "person", "gt_answers": [ "Blackmore" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000347021.jpg", "model_name": "BLIP2" }, { "question": "Who's name is on the cake?", "answer": "a dog", "gt_answers": [ "Chris" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000334557.jpg", "model_name": "BLIP2" }, { "question": "What does the writing on the bottom of the picture read?", "answer": "big kites", "gt_answers": [ "Big Kites" ], "image_path": "./data/STVQA/VisualGenome/1/2341689.jpg", "model_name": "BLIP2" }, { "question": "What company is advertised on the red sign?", "answer": "nike", "gt_answers": [ "Helly Hansen" ], "image_path": "./data/STVQA/VisualGenome/1/2359981.jpg", "model_name": "BLIP2" }, { "question": "Where is the Pacific Hwy Exit going to?", "answer": "san francisco", "gt_answers": [ "North Sydney" ], "image_path": "./data/STVQA/VisualGenome/2/2413451.jpg", "model_name": "BLIP2" }, { "question": "Where is the Falcon Street exit going to?", "answer": "falcon street", "gt_answers": [ "Crows Nest" ], "image_path": "./data/STVQA/VisualGenome/2/2413451.jpg", "model_name": "BLIP2" }, { "question": "What is the Mosman Manly exit going to?", "answer": "manly", "gt_answers": [ "Chatswood Epping" ], "image_path": "./data/STVQA/VisualGenome/2/2413451.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the red sign?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/2/2402661.jpg", "model_name": "BLIP2" }, { "question": "What does the logo on the upper left say?", "answer": "skateboards", "gt_answers": [ "MyProfe" ], "image_path": "./data/STVQA/VisualGenome/1/2376967.jpg", "model_name": "BLIP2" }, { "question": "What airline is this?", "answer": "air france", "gt_answers": [ "Airfrance" ], "image_path": "./data/STVQA/VisualGenome/2/2401851.jpg", "model_name": "BLIP2" }, { "question": "What does Espressa sell?", "answer": "pizza", "gt_answers": [ "pizza" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000457121.jpg", "model_name": "BLIP2" }, { "question": "What is the cost for delivery of Espressa Pizza?", "answer": "\u00a36 for a large pizza", "gt_answers": [ "FREE DELIVERY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000457121.jpg", "model_name": "BLIP2" }, { "question": "What does the sign on the right say?", "answer": "colab", "gt_answers": [ "Collabora" ], "image_path": "./data/STVQA/VisualGenome/1/2341583.jpg", "model_name": "BLIP2" }, { "question": "Who is the sponsor of this tournament", "answer": "person", "gt_answers": [ "blackrock" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000400737.jpg", "model_name": "BLIP2" }, { "question": "What is written on the top left of the white board?", "answer": "customer service", "gt_answers": [ "Book and Stationary Event of the Year!", "BOOK AND STATIONERY EVENT OF THE YEAR" ], "image_path": "./data/STVQA/icdar/img_95.jpg", "model_name": "BLIP2" }, { "question": "What is the number of mini jet?", "answer": "2", "gt_answers": [ "N5226F" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000214792.jpg", "model_name": "BLIP2" }, { "question": "Who is holding the mini jet?", "answer": "person", "gt_answers": [ "U.S. AIR FORCE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000214792.jpg", "model_name": "BLIP2" }, { "question": "What piece of fruit is the man holding?", "answer": "banana", "gt_answers": [ "banana" ], "image_path": "./data/STVQA/VisualGenome/1/2356649.jpg", "model_name": "BLIP2" }, { "question": "How many men are in the picture?", "answer": "two", "gt_answers": [ "two" ], "image_path": "./data/STVQA/VisualGenome/1/2356649.jpg", "model_name": "BLIP2" }, { "question": "What year does it say on the bottom right?", "answer": "2008", "gt_answers": [ "2013" ], "image_path": "./data/STVQA/IIIT_text/1914.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the tag?", "answer": "golden", "gt_answers": [ "Goldfiel" ], "image_path": "./data/STVQA/imageNet/n01685808_4314.JPEG", "model_name": "BLIP2" }, { "question": "What is the brand name of this product?", "answer": "pro", "gt_answers": [ "Pride." ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000018215.jpg", "model_name": "BLIP2" }, { "question": "What is the size of the items according to the label?", "answer": "8 oz", "gt_answers": [ "Jumbo." ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000018215.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the cooking device?", "answer": "crock pot", "gt_answers": [ "CROCK-POT" ], "image_path": "./data/STVQA/imageNet/n03133878_12981.JPEG", "model_name": "BLIP2" }, { "question": "What is the yellow number?", "answer": "person", "gt_answers": [ "7" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000560754.jpg", "model_name": "BLIP2" }, { "question": "what event is this?", "answer": "birthday", "gt_answers": [ "birthday" ], "image_path": "./data/STVQA/VisualGenome/2/2407270.jpg", "model_name": "BLIP2" }, { "question": "what color is mentioned in the text", "answer": "white", "gt_answers": [ "black" ], "image_path": "./data/STVQA/VisualGenome/1/2342206.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number above the windshield?", "answer": "person", "gt_answers": [ "160" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000450050.jpg", "model_name": "BLIP2" }, { "question": "Where is the bus heading?", "answer": "switzerland", "gt_answers": [ "MONCLOA ARAVACA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000450050.jpg", "model_name": "BLIP2" }, { "question": "What is the motorcycle's license plate?", "answer": "person", "gt_answers": [ "NXU4489", "NXU 4489" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000543407.jpg", "model_name": "BLIP2" }, { "question": "What does the red sign read?", "answer": "stop railroad crossing", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000446618.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the bar?", "answer": "the temple bar", "gt_answers": [ "The Temple Bar" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000168717.jpg", "model_name": "BLIP2" }, { "question": "Which month is in the caption?", "answer": "august", "gt_answers": [ "aug" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000099956.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the tail wing?", "answer": "virgin america", "gt_answers": [ "Virgin" ], "image_path": "./data/STVQA/VisualGenome/1/2366658.jpg", "model_name": "BLIP2" }, { "question": "What does the bottom of the snowboard say?", "answer": "person", "gt_answers": [ "Custom" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000439676.jpg", "model_name": "BLIP2" }, { "question": "What brand name is listed on the sign to the right?", "answer": "person", "gt_answers": [ "Burton" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000439676.jpg", "model_name": "BLIP2" }, { "question": "What does the copyright on this image say?", "answer": "no copyright", "gt_answers": [ "Bridget Samuels 2011" ], "image_path": "./data/STVQA/VisualGenome/1/2329356.jpg", "model_name": "BLIP2" }, { "question": "What type of car is this?", "answer": "taxi", "gt_answers": [ "Taxi" ], "image_path": "./data/STVQA/imageNet/n02930766_29871.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the front window?", "answer": "taxi", "gt_answers": [ "Not in Service" ], "image_path": "./data/STVQA/imageNet/n02930766_29871.JPEG", "model_name": "BLIP2" }, { "question": "What year is the copyright?", "answer": "1994", "gt_answers": [ "2009" ], "image_path": "./data/STVQA/imageNet/n03868863_12028.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the AVE in the picture?", "answer": "madison st", "gt_answers": [ "1st", "1st AVE" ], "image_path": "./data/STVQA/VisualGenome/1/2330193.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the ST in the picture?", "answer": "madison st", "gt_answers": [ "Madison", "Madison ST" ], "image_path": "./data/STVQA/VisualGenome/1/2330193.jpg", "model_name": "BLIP2" }, { "question": "Who took this photo?", "answer": "the photographer", "gt_answers": [ "matt dwen" ], "image_path": "./data/STVQA/VisualGenome/2/2414069.jpg", "model_name": "BLIP2" }, { "question": "What hair product brand is sponsoring this match?", "answer": "garnier", "gt_answers": [ "Garnier" ], "image_path": "./data/STVQA/VisualGenome/1/2377076.jpg", "model_name": "BLIP2" }, { "question": "What is written on the display sign?", "answer": "penguin", "gt_answers": [ "Office Equipment" ], "image_path": "./data/STVQA/icdar/img_272.jpg", "model_name": "BLIP2" }, { "question": "What is the text on the traffic sign?", "answer": "school bus stop", "gt_answers": [ "SCHOOL BUS STOP AHEAD" ], "image_path": "./data/STVQA/VisualGenome/1/2360826.jpg", "model_name": "BLIP2" }, { "question": "what is the name of bike?", "answer": "honda", "gt_answers": [ "repsol" ], "image_path": "./data/STVQA/VisualGenome/1/2340213.jpg", "model_name": "BLIP2" }, { "question": "What letter is presented on the black flag?", "answer": "b", "gt_answers": [ "M" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000577925.jpg", "model_name": "BLIP2" }, { "question": "There are two numbers presented on the flag. What are they?", "answer": "1 and 2", "gt_answers": [ "09" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000577925.jpg", "model_name": "BLIP2" }, { "question": "What is presented on the flag?", "answer": "the coat of arms", "gt_answers": [ "m '09" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000577925.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the street curb?", "answer": "4", "gt_answers": [ "311" ], "image_path": "./data/STVQA/VisualGenome/1/2368686.jpg", "model_name": "BLIP2" }, { "question": "What is written on the side of the switch?", "answer": "smd", "gt_answers": [ "C & K", "C & K C.R" ], "image_path": "./data/STVQA/imageNet/n04372370_13773.JPEG", "model_name": "BLIP2" }, { "question": "What kind of of donut is this?", "answer": "pumpkin chocolate chip", "gt_answers": [ "Pumpkin chocolate chip donuts", "PUMPKIN CHOCOLATE CHIP" ], "image_path": "./data/STVQA/VisualGenome/2/2417081.jpg", "model_name": "BLIP2" }, { "question": "What does the top left button of this remote control read?", "answer": "mute", "gt_answers": [ "Code Search" ], "image_path": "./data/STVQA/imageNet/n04074963_15147.JPEG", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "stop men at work", "gt_answers": [ "Stop Men At Work" ], "image_path": "./data/STVQA/VisualGenome/1/2329233.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name visible on the side wall?", "answer": "adidas", "gt_answers": [ "OLYMPUS" ], "image_path": "./data/STVQA/VisualGenome/2/2410453.jpg", "model_name": "BLIP2" }, { "question": "What is the number code written on the train?", "answer": "sc", "gt_answers": [ "528818" ], "image_path": "./data/STVQA/imageNet/n03393912_7802.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the company written on the train", "answer": "amtrak", "gt_answers": [ "Golden West Service" ], "image_path": "./data/STVQA/imageNet/n03393912_7802.JPEG", "model_name": "BLIP2" }, { "question": "What initials are on the top of the machine?", "answer": "lg", "gt_answers": [ "LG" ], "image_path": "./data/STVQA/imageNet/n04554684_2945.JPEG", "model_name": "BLIP2" }, { "question": "Who does the Wholesale Nursey sell to?", "answer": "the public", "gt_answers": [ "PUBLIC" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000086763.jpg", "model_name": "BLIP2" }, { "question": "How many skateboards can you see?", "answer": "two", "gt_answers": [ "3", "4" ], "image_path": "./data/STVQA/VisualGenome/1/2322286.jpg", "model_name": "BLIP2" }, { "question": "What does the red sign board show?", "answer": "stop sign", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2323827.jpg", "model_name": "BLIP2" }, { "question": "Which Avenue is displayed near the stop board?", "answer": "avenue", "gt_answers": [ "Avenue 55", "55" ], "image_path": "./data/STVQA/VisualGenome/1/2323827.jpg", "model_name": "BLIP2" }, { "question": "What avenue is shown?", "answer": "broadway", "gt_answers": [ "5 Av" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000327020.jpg", "model_name": "BLIP2" }, { "question": "What is the website on the top left of the image?", "answer": "person", "gt_answers": [ "EUtouring.com" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000334981.jpg", "model_name": "BLIP2" }, { "question": "What is the bus' route number?", "answer": "no route number", "gt_answers": [ "41" ], "image_path": "./data/STVQA/VisualGenome/1/713880.jpg", "model_name": "BLIP2" }, { "question": "what is the year in the bottom right of this photo?", "answer": "2010", "gt_answers": [ "2012" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000142299.jpg", "model_name": "BLIP2" }, { "question": "What track number is the train at?", "answer": "4", "gt_answers": [ "2" ], "image_path": "./data/STVQA/VisualGenome/1/2360737.jpg", "model_name": "BLIP2" }, { "question": "what is written on the green street sign?", "answer": "no parking", "gt_answers": [ "GRANADA" ], "image_path": "./data/STVQA/VisualGenome/2/2414181.jpg", "model_name": "BLIP2" }, { "question": "What does the bottom line of the sign say?", "answer": "new west", "gt_answers": [ "New West" ], "image_path": "./data/STVQA/VisualGenome/1/2354360.jpg", "model_name": "BLIP2" }, { "question": "What is the dish called?", "answer": "chicken biryani", "gt_answers": [ "Chicken Dijon" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000004042.jpg", "model_name": "BLIP2" }, { "question": "What does the player's t-shirt read?", "answer": "i love tennis", "gt_answers": [ "ADI DOES" ], "image_path": "./data/STVQA/VisualGenome/1/2357812.jpg", "model_name": "BLIP2" }, { "question": "What number is printed on the jersey of the batter?", "answer": "2", "gt_answers": [ "17" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000454158.jpg", "model_name": "BLIP2" }, { "question": "What type of booth is the man standing in?", "answer": "pay phone", "gt_answers": [ "phone" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000326320.jpg", "model_name": "BLIP2" }, { "question": "What is written on the cap?", "answer": "person", "gt_answers": [ "A4" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000324000.jpg", "model_name": "BLIP2" }, { "question": "What is written on the latop?", "answer": "the name of the car", "gt_answers": [ "ETATUNING.COM" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000324000.jpg", "model_name": "BLIP2" }, { "question": "What is written on the name tag?", "answer": "person", "gt_answers": [ "VENDO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000324000.jpg", "model_name": "BLIP2" }, { "question": "Where is the train going to?", "answer": "the station", "gt_answers": [ "Old Town" ], "image_path": "./data/STVQA/VisualGenome/1/2342512.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the train?", "answer": "331", "gt_answers": [ "1033" ], "image_path": "./data/STVQA/VisualGenome/1/2342512.jpg", "model_name": "BLIP2" }, { "question": "What is written in front the store?", "answer": "minx", "gt_answers": [ "Minx" ], "image_path": "./data/STVQA/icdar/img_689.jpg", "model_name": "BLIP2" }, { "question": "What is the street name mentioned in the board ?", "answer": "little gilbert street", "gt_answers": [ "Little Gilbert st" ], "image_path": "./data/STVQA/VisualGenome/1/2377108.jpg", "model_name": "BLIP2" }, { "question": "What does the large pink text say?", "answer": "i love you", "gt_answers": [ "Pardon Me Prime Minister" ], "image_path": "./data/STVQA/VisualGenome/1/2319257.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of bananas pictured", "answer": "person", "gt_answers": [ "Chiquita" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000037377.jpg", "model_name": "BLIP2" }, { "question": "What is the sku number on the bananas", "answer": "person", "gt_answers": [ "4011" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000037377.jpg", "model_name": "BLIP2" }, { "question": "what is the word on the pot?", "answer": "person", "gt_answers": [ "Boroplast" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000260827.jpg", "model_name": "BLIP2" }, { "question": "What is under the main road sign?", "answer": "a building", "gt_answers": [ "A 454" ], "image_path": "./data/STVQA/VisualGenome/2/2408391.jpg", "model_name": "BLIP2" }, { "question": "What brand is on the banner behind the player?", "answer": "bnp paribas", "gt_answers": [ "BNP Paribas" ], "image_path": "./data/STVQA/VisualGenome/1/2349360.jpg", "model_name": "BLIP2" }, { "question": "What airline does this plane belong to?", "answer": "delta airlines", "gt_answers": [ "DELTA" ], "image_path": "./data/STVQA/imageNet/n02690373_5581.JPEG", "model_name": "BLIP2" }, { "question": "What does it say to start the school year doing?", "answer": "writing a thank you note", "gt_answers": [ "Celebrating" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030998.jpg", "model_name": "BLIP2" }, { "question": "How much prize money is there?", "answer": "$ 500,000 in prizes", "gt_answers": [ "$500.00" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030998.jpg", "model_name": "BLIP2" }, { "question": "what is written on the train", "answer": "stl", "gt_answers": [ "PACER STACKTRAIN", "STACKTRAIN" ], "image_path": "./data/STVQA/VisualGenome/2/2409683.jpg", "model_name": "BLIP2" }, { "question": "What word is below Nottingham City?", "answer": "city", "gt_answers": [ "Transport" ], "image_path": "./data/STVQA/VisualGenome/1/1591990.jpg", "model_name": "BLIP2" }, { "question": "Where can I buy those?", "answer": "china mainland", "gt_answers": [ "www.zzwldz.com" ], "image_path": "./data/STVQA/imageNet/n04116512_6856.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the bus company?", "answer": "red bus", "gt_answers": [ "Arriva" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000065705.jpg", "model_name": "BLIP2" }, { "question": "What number is on the white jersey?", "answer": "2", "gt_answers": [ "8" ], "image_path": "./data/STVQA/VisualGenome/1/2350038.jpg", "model_name": "BLIP2" }, { "question": "When was the image copyrighted?", "answer": "2000", "gt_answers": [ "2009" ], "image_path": "./data/STVQA/imageNet/n02124075_7973.JPEG", "model_name": "BLIP2" }, { "question": "Who holds the copyright?", "answer": "the photographer", "gt_answers": [ "charlotte edwards" ], "image_path": "./data/STVQA/imageNet/n02124075_7973.JPEG", "model_name": "BLIP2" }, { "question": "What color are the digits on the train's sign?", "answer": "yellow", "gt_answers": [ "red", "{}" ], "image_path": "./data/STVQA/VisualGenome/1/2340432.jpg", "model_name": "BLIP2" }, { "question": "What does the girls sweatshirt say?", "answer": "george washington university", "gt_answers": [ "The George Washington University" ], "image_path": "./data/STVQA/VisualGenome/1/2342984.jpg", "model_name": "BLIP2" }, { "question": "Which tennis tournament was this picture taken at?", "answer": "indian wells open", "gt_answers": [ "ATP World Tour" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000043431.jpg", "model_name": "BLIP2" }, { "question": "What is the name at the bottom of the booth?", "answer": "sams club", "gt_answers": [ "Organi", "ORGANI SINGAPORE" ], "image_path": "./data/STVQA/icdar/test_img_81.jpg", "model_name": "BLIP2" }, { "question": "What jersey number is the boy in the front of the image wearing?", "answer": "person", "gt_answers": [ "56" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000348116.jpg", "model_name": "BLIP2" }, { "question": "What jersey number is the boy in the back of the image wearing?", "answer": "person", "gt_answers": [ "46" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000348116.jpg", "model_name": "BLIP2" }, { "question": "what is the label given", "answer": "elapid snake", "gt_answers": [ "kevin messenger 2010" ], "image_path": "./data/STVQA/imageNet/n01728920_18655.JPEG", "model_name": "BLIP2" }, { "question": "which animal placed on there", "answer": "snake", "gt_answers": [ "snake" ], "image_path": "./data/STVQA/imageNet/n01728920_18655.JPEG", "model_name": "BLIP2" }, { "question": "What is the photography name?", "answer": "pizza", "gt_answers": [ "WWGE Photography" ], "image_path": "./data/STVQA/VisualGenome/1/2328682.jpg", "model_name": "BLIP2" }, { "question": "What does the green street sign say?", "answer": "no parking", "gt_answers": [ "42 st", "W 42" ], "image_path": "./data/STVQA/VisualGenome/2/2417.jpg", "model_name": "BLIP2" }, { "question": "What does the billboard say?", "answer": "free million dollar poker tournament", "gt_answers": [ "MILLION DOLLAR POKER TOURNAMENT", "MILLION DOLLAR" ], "image_path": "./data/STVQA/VisualGenome/2/2417.jpg", "model_name": "BLIP2" }, { "question": "Where is this train going?", "answer": "to new york", "gt_answers": [ "To New York", "New York" ], "image_path": "./data/STVQA/VisualGenome/2/2416461.jpg", "model_name": "BLIP2" }, { "question": "What does the blue sign at the top of the building say?", "answer": "", "gt_answers": [ "Point" ], "image_path": "./data/STVQA/VisualGenome/1/713741.jpg", "model_name": "BLIP2" }, { "question": "What word is on the octagonal sign?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/2/2406596.jpg", "model_name": "BLIP2" }, { "question": "what number given in his t-shirt?", "answer": "20", "gt_answers": [ "20" ], "image_path": "./data/STVQA/VisualGenome/1/2365461.jpg", "model_name": "BLIP2" }, { "question": "Does the establishment offer wifi?", "answer": "yes", "gt_answers": [ "Free Wifi" ], "image_path": "./data/STVQA/VisualGenome/2/4188.jpg", "model_name": "BLIP2" }, { "question": "What word is on the side of the white vehicle in the picture?", "answer": "police", "gt_answers": [ "POLICE" ], "image_path": "./data/STVQA/imageNet/n03977966_28170.JPEG", "model_name": "BLIP2" }, { "question": "what is printed on the tailgate of this pick up truck?", "answer": "ford", "gt_answers": [ "MoToRZTV" ], "image_path": "./data/STVQA/VisualGenome/2/2405965.jpg", "model_name": "BLIP2" }, { "question": "What center is this?", "answer": "it is a shopping center", "gt_answers": [ "Art" ], "image_path": "./data/STVQA/VisualGenome/2/3380.jpg", "model_name": "BLIP2" }, { "question": "What is the title of the book?", "answer": "a book of secrets", "gt_answers": [ "PENDRAGON" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000069339.jpg", "model_name": "BLIP2" }, { "question": "What is the last name of the author of the book?", "answer": "person", "gt_answers": [ "MACHALE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000069339.jpg", "model_name": "BLIP2" }, { "question": "When was Berlin's established?", "answer": "1895", "gt_answers": [ "1883" ], "image_path": "./data/STVQA/VisualGenome/1/2316812.jpg", "model_name": "BLIP2" }, { "question": "What season comes after winter?", "answer": "spring", "gt_answers": [ "spring" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000441943.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the on the building?", "answer": "person", "gt_answers": [ "Galaxy" ], "image_path": "./data/STVQA/IIIT_text/img_000478.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE NAME OF THIS BUILD", "answer": "shanghai city hall", "gt_answers": [ "CALCADA DOS QUARTEIS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000437138.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTEN ON THIS SIGN BOARD?", "answer": " ", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000437138.jpg", "model_name": "BLIP2" }, { "question": "What word is printed 3 times on the building?", "answer": "ice cream", "gt_answers": [ "DIESEL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000119882.jpg", "model_name": "BLIP2" }, { "question": "what is the identification number on the blue part of the boat", "answer": "hms swan", "gt_answers": [ "rnli16-23", "RNLI 16-23" ], "image_path": "./data/STVQA/VisualGenome/1/498267.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "do not enter", "gt_answers": [ "Do Not Enter" ], "image_path": "./data/STVQA/VisualGenome/2/2403084.jpg", "model_name": "BLIP2" }, { "question": "What is wrirrten on the dorr frame?", "answer": "adirondacks", "gt_answers": [ "Progress", "PROGRESS AUTOMATIC DOOR, INC." ], "image_path": "./data/STVQA/imageNet/n04239074_374.JPEG", "model_name": "BLIP2" }, { "question": "What is the Brand name?", "answer": "vitamin c", "gt_answers": [ "Jiffy" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030466.jpg", "model_name": "BLIP2" }, { "question": "What type of road is Park St?", "answer": "one way street", "gt_answers": [ "One Way", "PARK ST ONE WAY" ], "image_path": "./data/STVQA/VisualGenome/1/2366762.jpg", "model_name": "BLIP2" }, { "question": "What is the slogan in the white corner?", "answer": "masshouse changes", "gt_answers": [ "Making more of the city" ], "image_path": "./data/STVQA/VisualGenome/1/2366762.jpg", "model_name": "BLIP2" }, { "question": "What company is in the building?", "answer": "person", "gt_answers": [ "BURLINGTON" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000482295.jpg", "model_name": "BLIP2" }, { "question": "What time does Almuerzo Criollo start?", "answer": "11 am", "gt_answers": [ "12:00 PM" ], "image_path": "./data/STVQA/VisualGenome/1/2325200.jpg", "model_name": "BLIP2" }, { "question": "What group?", "answer": "person", "gt_answers": [ "First" ], "image_path": "./data/STVQA/imageNet/n02086910_2399.JPEG", "model_name": "BLIP2" }, { "question": "what year was the picture taken?", "answer": "2009", "gt_answers": [ "2011" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000448143.jpg", "model_name": "BLIP2" }, { "question": "Who copyrighted the photograph?", "answer": "person", "gt_answers": [ "David Quick" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000448143.jpg", "model_name": "BLIP2" }, { "question": "What are the first 4 letters of the words on the backboard?", "answer": "person", "gt_answers": [ "High" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000448143.jpg", "model_name": "BLIP2" }, { "question": "What climate is the suggested storage in this image?", "answer": "cold", "gt_answers": [ "Cold" ], "image_path": "./data/STVQA/icdar/img_956.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the walker on the phone?", "answer": "4", "gt_answers": [ "16957" ], "image_path": "./data/STVQA/VisualGenome/2/2408919.jpg", "model_name": "BLIP2" }, { "question": "What is written on the poster to the left of the picture?", "answer": "brewpub", "gt_answers": [ "Tommyknocker Imperial", "IMPERIAL" ], "image_path": "./data/STVQA/imageNet/n02823428_11421.JPEG", "model_name": "BLIP2" }, { "question": "What is the number on the white jersey?", "answer": "person", "gt_answers": [ "32" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000574857.jpg", "model_name": "BLIP2" }, { "question": "What is the slogan on the beverage bottles?", "answer": "i'm thirsty", "gt_answers": [ "BRING IT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000056647.jpg", "model_name": "BLIP2" }, { "question": "What is written in the blue rectangle on the box inside the refrigerator?", "answer": "ice maker", "gt_answers": [ "3D", "30" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000056647.jpg", "model_name": "BLIP2" }, { "question": "when was the picture taken?", "answer": "in the kitchen", "gt_answers": [ "02/10/2009" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000271495.jpg", "model_name": "BLIP2" }, { "question": "What hot dog company is being shown in this picture?", "answer": "person hot dogs", "gt_answers": [ "nathan's" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000034785.jpg", "model_name": "BLIP2" }, { "question": "What number is written on the basket of fires?", "answer": "4", "gt_answers": [ "95" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000034785.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company on the brown shirt?", "answer": "microsoft", "gt_answers": [ "Longhorn's Breath Saloon" ], "image_path": "./data/STVQA/VisualGenome/1/2368327.jpg", "model_name": "BLIP2" }, { "question": "how many cheeses are on the lasagna?", "answer": "4", "gt_answers": [ "4" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000011382.jpg", "model_name": "BLIP2" }, { "question": "What is NEW?", "answer": "qr code", "gt_answers": [ "ERA" ], "image_path": "./data/STVQA/icdar/test_img_396.jpg", "model_name": "BLIP2" }, { "question": "You should fly your own what?", "answer": "airplane", "gt_answers": [ "Flag" ], "image_path": "./data/STVQA/icdar/test_img_396.jpg", "model_name": "BLIP2" }, { "question": "What is the phrase under the New Era logo?", "answer": "new era", "gt_answers": [ "Fly your own flag" ], "image_path": "./data/STVQA/icdar/test_img_396.jpg", "model_name": "BLIP2" }, { "question": "Is it Irvine Center Dr or Ave?", "answer": "ave", "gt_answers": [ "Dr" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000158729.jpg", "model_name": "BLIP2" }, { "question": "What is written on the woman's shirt?", "answer": "banana", "gt_answers": [ "TJOOK" ], "image_path": "./data/STVQA/VisualGenome/1/2325179.jpg", "model_name": "BLIP2" }, { "question": "What is the written on the plane?", "answer": "british airways", "gt_answers": [ "British airways" ], "image_path": "./data/STVQA/IIIT_text/img_000240.jpg", "model_name": "BLIP2" }, { "question": "What is the year on the bottle?", "answer": "1900", "gt_answers": [ "1900" ], "image_path": "./data/STVQA/imageNet/n04579145_5777.JPEG", "model_name": "BLIP2" }, { "question": "How much does it cost for egg salad?", "answer": "it costs $4", "gt_answers": [ "$5.25" ], "image_path": "./data/STVQA/imageNet/n07565083_361.JPEG", "model_name": "BLIP2" }, { "question": "What is the company name on the advertisement?", "answer": "hyundai", "gt_answers": [ "Mike Hynan Hynan Coach Services" ], "image_path": "./data/STVQA/VisualGenome/1/2332839.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the team in blue?", "answer": "hyun", "gt_answers": [ "Eagles" ], "image_path": "./data/STVQA/VisualGenome/1/2332839.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the jersey?", "answer": "person", "gt_answers": [ "8" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000240490.jpg", "model_name": "BLIP2" }, { "question": "What are the letters on the hat?", "answer": "person", "gt_answers": [ "PC" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000240490.jpg", "model_name": "BLIP2" }, { "question": "What sandwich is printed on the wall?", "answer": "philly cheese steak", "gt_answers": [ "Panini" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000394529.jpg", "model_name": "BLIP2" }, { "question": "What street is displayed in the upper left of the image?", "answer": "a city street", "gt_answers": [ "22nd Street" ], "image_path": "./data/STVQA/VisualGenome/1/1159757.jpg", "model_name": "BLIP2" }, { "question": "What word is at the bottom of the picture?", "answer": "printer", "gt_answers": [ "adoos" ], "image_path": "./data/STVQA/imageNet/n03924679_16021.JPEG", "model_name": "BLIP2" }, { "question": "whats the name on the wall?", "answer": "person", "gt_answers": [ "hertz" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000431012.jpg", "model_name": "BLIP2" }, { "question": "what is the single letter after hertz?", "answer": "person", "gt_answers": [ "r" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000431012.jpg", "model_name": "BLIP2" }, { "question": "What do you need to get?", "answer": "horse", "gt_answers": [ "Reading" ], "image_path": "./data/STVQA/VisualGenome/2/2416007.jpg", "model_name": "BLIP2" }, { "question": "What is the title for the movie depicted on the pictures on the bus?", "answer": "jack reacher", "gt_answers": [ "Jack Reacher" ], "image_path": "./data/STVQA/VisualGenome/1/2320314.jpg", "model_name": "BLIP2" }, { "question": "What flavor are the ALTOIDS?", "answer": "mint", "gt_answers": [ "APPLE SOURS" ], "image_path": "./data/STVQA/imageNet/n02092339_1691.JPEG", "model_name": "BLIP2" }, { "question": "What city operates the fire truck?", "answer": "new york city", "gt_answers": [ "MONTVALE" ], "image_path": "./data/STVQA/imageNet/n03345487_7060.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the business written on the side of the horse carriage?", "answer": "person", "gt_answers": [ "DOUBLE M FARMS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000251697.jpg", "model_name": "BLIP2" }, { "question": "What state is written on the side of the horse carriage?", "answer": "michigan", "gt_answers": [ "Iowa" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000251697.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the website?", "answer": "orange graphics", "gt_answers": [ "OrangeGraphics" ], "image_path": "./data/STVQA/imageNet/n06359193_29955.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the plane?", "answer": "spitfire", "gt_answers": [ "HTOE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000073892.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the airplane", "answer": "indigo", "gt_answers": [ "Indigo" ], "image_path": "./data/STVQA/IIIT_text/img_000683.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the sign?", "answer": "saint", "gt_answers": [ "SAINT" ], "image_path": "./data/STVQA/VisualGenome/1/2361725.jpg", "model_name": "BLIP2" }, { "question": "What symbol is on the players arm band?", "answer": "a tennis ball", "gt_answers": [ "nike" ], "image_path": "./data/STVQA/VisualGenome/1/2361633.jpg", "model_name": "BLIP2" }, { "question": "What is the participants number?", "answer": "200,000", "gt_answers": [ "119" ], "image_path": "./data/STVQA/imageNet/n04456115_5677.JPEG", "model_name": "BLIP2" }, { "question": "What symbol is on the sign in blue?", "answer": "person", "gt_answers": [ "?" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000406205.jpg", "model_name": "BLIP2" }, { "question": "What letter is on the sign in green?", "answer": "person", "gt_answers": [ "P" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000406205.jpg", "model_name": "BLIP2" }, { "question": "Who is the manufacturer of the bus?", "answer": "volvo", "gt_answers": [ "Mercedes-Benz" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000303340.jpg", "model_name": "BLIP2" }, { "question": "What company operates the bus?", "answer": "nederlandse vervoer", "gt_answers": [ "Citaro" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000303340.jpg", "model_name": "BLIP2" }, { "question": "What type of business is Walthall?", "answer": "manufacturer", "gt_answers": [ "hotel" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000211867.jpg", "model_name": "BLIP2" }, { "question": "What is the name of store in between the flag and streetlight?", "answer": "person", "gt_answers": [ "BOMARS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000211867.jpg", "model_name": "BLIP2" }, { "question": "What does the plane say?", "answer": "us navy", "gt_answers": [ "Blue Angels" ], "image_path": "./data/STVQA/VisualGenome/2/2408243.jpg", "model_name": "BLIP2" }, { "question": "How old do you have to be to use this computer?", "answer": "18", "gt_answers": [ "Over 18 Only", "OVER 18" ], "image_path": "./data/STVQA/VisualGenome/1/2376682.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the bears foot?", "answer": "bears foot", "gt_answers": [ "Me to You" ], "image_path": "./data/STVQA/VisualGenome/1/2334789.jpg", "model_name": "BLIP2" }, { "question": "What is the store's name?", "answer": "chanel", "gt_answers": [ "Charles & Keith" ], "image_path": "./data/STVQA/icdar/img_852.jpg", "model_name": "BLIP2" }, { "question": "What number is this jet?", "answer": "2", "gt_answers": [ "216" ], "image_path": "./data/STVQA/VisualGenome/1/2376993.jpg", "model_name": "BLIP2" }, { "question": "What is written on the yellow vest>", "answer": "frisbee", "gt_answers": [ "Illegal rave party" ], "image_path": "./data/STVQA/VisualGenome/1/2345067.jpg", "model_name": "BLIP2" }, { "question": "What artist's signature is on this painting?", "answer": "person", "gt_answers": [ "Fiava" ], "image_path": "./data/STVQA/IIIT_text/1450.jpg", "model_name": "BLIP2" }, { "question": "What is your favorite beer?", "answer": "bud light", "gt_answers": [ "Carlsberg Beer", "CARLSBERG" ], "image_path": "./data/STVQA/imageNet/n02823750_3226.JPEG", "model_name": "BLIP2" }, { "question": "What are you drinking?", "answer": "beer", "gt_answers": [ "Carlsberg Beer", "CARLSBERG" ], "image_path": "./data/STVQA/imageNet/n02823750_3226.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the drink in the picture?", "answer": "coors light", "gt_answers": [ "Carlsberg", "Carlsberg Beer" ], "image_path": "./data/STVQA/imageNet/n02823750_3226.JPEG", "model_name": "BLIP2" }, { "question": "What is the first word on the t-shirt?", "answer": "great", "gt_answers": [ "Cereal" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000014333.jpg", "model_name": "BLIP2" }, { "question": "what name is written on the blue and white banner", "answer": "person", "gt_answers": [ "New Era Fits", "New Era" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000217925.jpg", "model_name": "BLIP2" }, { "question": "What is the biggest word shown on the sign on the right side of this picture?", "answer": "person", "gt_answers": [ "MILES" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000155061.jpg", "model_name": "BLIP2" }, { "question": "What is the last word (bottom right) on the sign shown on the right side of this picture?", "answer": "person", "gt_answers": [ "DAY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000155061.jpg", "model_name": "BLIP2" }, { "question": "What is the website address at the bottom?", "answer": "com", "gt_answers": [ "www.myprofe.com" ], "image_path": "./data/STVQA/VisualGenome/1/2323952.jpg", "model_name": "BLIP2" }, { "question": "What word is printed on the lime green sign?", "answer": "person", "gt_answers": [ "Sisak" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000155097.jpg", "model_name": "BLIP2" }, { "question": "What instruction is written inside the red and white hexagon?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2323049.jpg", "model_name": "BLIP2" }, { "question": "What is the color of the caps of players?", "answer": "white", "gt_answers": [ "Blue" ], "image_path": "./data/STVQA/imageNet/n02799071_13320.JPEG", "model_name": "BLIP2" }, { "question": "What is the color of the ball?", "answer": "white", "gt_answers": [ "White" ], "image_path": "./data/STVQA/imageNet/n02799071_13320.JPEG", "model_name": "BLIP2" }, { "question": "What is the color of the pant of the player wearing white jersey?", "answer": "blue", "gt_answers": [ "White" ], "image_path": "./data/STVQA/imageNet/n02799071_13320.JPEG", "model_name": "BLIP2" }, { "question": "Where is this?", "answer": "canary islands", "gt_answers": [ "La playa de Las Canteras.", "LA PLAYA DE LAS CANTERAS" ], "image_path": "./data/STVQA/VisualGenome/1/2347471.jpg", "model_name": "BLIP2" }, { "question": "What is written on the wall with blue?", "answer": "i love you", "gt_answers": [ "RACK ED" ], "image_path": "./data/STVQA/VisualGenome/1/2317040.jpg", "model_name": "BLIP2" }, { "question": "What are the three letters that come after the word SUSHI?", "answer": "nigiri", "gt_answers": [ "TEI" ], "image_path": "./data/STVQA/icdar/img_911.jpg", "model_name": "BLIP2" }, { "question": "what is the sign of board reference?", "answer": "a sign of board reference is a sign", "gt_answers": [ "way cool way" ], "image_path": "./data/STVQA/VisualGenome/1/2330509.jpg", "model_name": "BLIP2" }, { "question": "What is the airline name on the planes pictured?", "answer": "ana", "gt_answers": [ "ANA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000573854.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the bicycles?", "answer": "haro", "gt_answers": [ "trek" ], "image_path": "./data/STVQA/IIIT_text/4450.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company on the back wall?", "answer": "adidas", "gt_answers": [ "Apia" ], "image_path": "./data/STVQA/VisualGenome/1/2322921.jpg", "model_name": "BLIP2" }, { "question": "What is the color of the tennis court?", "answer": "blue", "gt_answers": [ "Blue" ], "image_path": "./data/STVQA/VisualGenome/1/2322921.jpg", "model_name": "BLIP2" }, { "question": "What does the arched text say?", "answer": "coffee shop", "gt_answers": [ "Coffeeshop" ], "image_path": "./data/STVQA/IIIT_text/img_000346.jpg", "model_name": "BLIP2" }, { "question": "What clothing brand is above the woman's picture?", "answer": "epi centre", "gt_answers": [ "Guess" ], "image_path": "./data/STVQA/icdar/test_img_372.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street?", "answer": "brandon street", "gt_answers": [ "Brandon", "Brandon St" ], "image_path": "./data/STVQA/VisualGenome/1/2371304.jpg", "model_name": "BLIP2" }, { "question": "What is the type of street sign?", "answer": "one way", "gt_answers": [ "One way" ], "image_path": "./data/STVQA/VisualGenome/1/2371304.jpg", "model_name": "BLIP2" }, { "question": "What store has a sale?", "answer": "palladium", "gt_answers": [ "Pull & Bear", "PULL&BEAR" ], "image_path": "./data/STVQA/icdar/img_589.jpg", "model_name": "BLIP2" }, { "question": "What does the store front read?", "answer": "starbucks", "gt_answers": [ "My Gym" ], "image_path": "./data/STVQA/icdar/img_281.jpg", "model_name": "BLIP2" }, { "question": "What are the dimensions for Regular?", "answer": "the dimensions for regular are as follows", "gt_answers": [ "18\" x 30\"" ], "image_path": "./data/STVQA/imageNet/n03223299_3055.JPEG", "model_name": "BLIP2" }, { "question": "What are the dimensions for Residence?", "answer": "the dimensions for residence are 1 2 x 1 2", "gt_answers": [ "28\" x 36\"" ], "image_path": "./data/STVQA/imageNet/n03223299_3055.JPEG", "model_name": "BLIP2" }, { "question": "who manufactures the Trinitron?", "answer": "sony", "gt_answers": [ "SONY" ], "image_path": "./data/STVQA/imageNet/n04152593_11730.JPEG", "model_name": "BLIP2" }, { "question": "What brand is in red?", "answer": "person", "gt_answers": [ "vans" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000444809.jpg", "model_name": "BLIP2" }, { "question": "What store is on the top of the post?", "answer": "a beauty salon", "gt_answers": [ "Stationary Store" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000016449.jpg", "model_name": "BLIP2" }, { "question": "What clinic is on the bottom of the post?", "answer": "st mary's hospital", "gt_answers": [ "Beauty Clinic" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000016449.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate of the red vehicle?", "answer": "mk ts", "gt_answers": [ "XVP962", "XVP 962" ], "image_path": "./data/STVQA/IIIT_text/4431.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the business on the tower?", "answer": "person", "gt_answers": [ "Attebury Grain" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000377619.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the slanted sign?", "answer": "3", "gt_answers": [ "20" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000377619.jpg", "model_name": "BLIP2" }, { "question": "What number is on the building?", "answer": "3", "gt_answers": [ "17" ], "image_path": "./data/STVQA/IIIT_text/2286.jpg", "model_name": "BLIP2" }, { "question": "What text do you see at the bottom left of the picture?", "answer": "bathroom", "gt_answers": [ "MIAMIDREAMREALTY.COM" ], "image_path": "./data/STVQA/VisualGenome/1/2347534.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE COLOUR OF HIS JACKET?", "answer": "black", "gt_answers": [ "BLACK" ], "image_path": "./data/STVQA/VisualGenome/1/2316900.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE COLOUR OF HIS SHOES?", "answer": "black", "gt_answers": [ "WHITE WITH BLACK", "white" ], "image_path": "./data/STVQA/VisualGenome/1/2316900.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE COLOUR OF HIS HELMET?", "answer": "black", "gt_answers": [ "BLACK" ], "image_path": "./data/STVQA/VisualGenome/1/2316900.jpg", "model_name": "BLIP2" }, { "question": "What company makes the red truck?", "answer": "scania", "gt_answers": [ "SCANIA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000117514.jpg", "model_name": "BLIP2" }, { "question": "What's the URL on the side of the red truck?", "answer": "person", "gt_answers": [ "www.vmcltd.com" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000117514.jpg", "model_name": "BLIP2" }, { "question": "What is the company abbreviation on the red truck's cab?", "answer": "person", "gt_answers": [ "VMC" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000117514.jpg", "model_name": "BLIP2" }, { "question": "what is the name of the cereal?", "answer": "person", "gt_answers": [ "grape-nuts" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000352724.jpg", "model_name": "BLIP2" }, { "question": "What brand is the cereal?", "answer": "person", "gt_answers": [ "Post" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000352724.jpg", "model_name": "BLIP2" }, { "question": "What brand is the yogurt?", "answer": "person", "gt_answers": [ "Yoplait" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000352724.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE NUMBER OF LAST PERSON?", "answer": "person", "gt_answers": [ "60" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000477192.jpg", "model_name": "BLIP2" }, { "question": "What word is on the purple sign?", "answer": "bus", "gt_answers": [ "Skylight" ], "image_path": "./data/STVQA/icdar/img_238.jpg", "model_name": "BLIP2" }, { "question": "What does the sign with food say?", "answer": "mrt", "gt_answers": [ "Skylight" ], "image_path": "./data/STVQA/icdar/img_238.jpg", "model_name": "BLIP2" }, { "question": "What is the fourth word in black type on the white sign?", "answer": "person", "gt_answers": [ "YIELD" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000164842.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company that owns the building", "answer": "nokia", "gt_answers": [ "vodafone" ], "image_path": "./data/STVQA/IIIT_text/img_001247.jpg", "model_name": "BLIP2" }, { "question": "what is the shape shown?", "answer": "round", "gt_answers": [ "Syringe" ], "image_path": "./data/STVQA/imageNet/n04376876_10759.JPEG", "model_name": "BLIP2" }, { "question": "what is written on top left corner", "answer": "a pantry", "gt_answers": [ "all those details" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000351218.jpg", "model_name": "BLIP2" }, { "question": "what three letters are written at the top center?", "answer": "qqq", "gt_answers": [ "OSS" ], "image_path": "./data/STVQA/VisualGenome/1/2374047.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the side of the bus?", "answer": "a la guichet", "gt_answers": [ "La Cure Gourmande" ], "image_path": "./data/STVQA/VisualGenome/1/2342812.jpg", "model_name": "BLIP2" }, { "question": "Which cell is closest to the camera?", "answer": "c", "gt_answers": [ "26" ], "image_path": "./data/STVQA/VisualGenome/2/1167.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the wine with the copper-colored top?", "answer": "pinot gris", "gt_answers": [ "BRIDLEWOOD" ], "image_path": "./data/STVQA/VisualGenome/1/2362653.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the DVD?", "answer": "the kings speech", "gt_answers": [ "The Situation Workout", "THE SITUATION" ], "image_path": "./data/STVQA/VisualGenome/1/2362653.jpg", "model_name": "BLIP2" }, { "question": "Who manufactures the camera?", "answer": "polaroid corporation", "gt_answers": [ "Polaroid" ], "image_path": "./data/STVQA/imageNet/n03976467_224.JPEG", "model_name": "BLIP2" }, { "question": "What number is the train?", "answer": "3", "gt_answers": [ "106" ], "image_path": "./data/STVQA/VisualGenome/2/2411617.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company for the advertizement?", "answer": "person", "gt_answers": [ "fontana" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000384559.jpg", "model_name": "BLIP2" }, { "question": "What number is on the baseball batter's shirt?", "answer": "person", "gt_answers": [ "15" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000413918.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the box?", "answer": "it is a cake", "gt_answers": [ "Create" ], "image_path": "./data/STVQA/VisualGenome/1/2335730.jpg", "model_name": "BLIP2" }, { "question": "Which numbers are written in front of the train", "answer": "the number of the train", "gt_answers": [ "183 004" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000084103.jpg", "model_name": "BLIP2" }, { "question": "What is written on the side of the train?", "answer": "elx", "gt_answers": [ "alex" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000084103.jpg", "model_name": "BLIP2" }, { "question": "Who is the photographer?", "answer": "wingwatchers by jay", "gt_answers": [ "Jodie" ], "image_path": "./data/STVQA/imageNet/n02168699_4042.JPEG", "model_name": "BLIP2" }, { "question": "Where was this picture found?", "answer": "on a wooden deck", "gt_answers": [ "Wing Watchers.com" ], "image_path": "./data/STVQA/imageNet/n02168699_4042.JPEG", "model_name": "BLIP2" }, { "question": "Where can you get pictures of insects?", "answer": "wingwatchers com", "gt_answers": [ "Wing Watchers.com" ], "image_path": "./data/STVQA/imageNet/n02168699_4042.JPEG", "model_name": "BLIP2" }, { "question": "What brand is this?", "answer": "panasonic", "gt_answers": [ "Panasonic" ], "image_path": "./data/STVQA/imageNet/n04392985_4223.JPEG", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "i do not know", "gt_answers": [ "7125" ], "image_path": "./data/STVQA/VisualGenome/1/2350833.jpg", "model_name": "BLIP2" }, { "question": "What letters are on the left and right of the banner?", "answer": "a and b", "gt_answers": [ "k k", "K TTS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000196676.jpg", "model_name": "BLIP2" }, { "question": "What kind of lamp is it?", "answer": "antler table lamp", "gt_answers": [ "Antler Table Lamp" ], "image_path": "./data/STVQA/imageNet/n04380533_2248.JPEG", "model_name": "BLIP2" }, { "question": "What store is this?", "answer": "starbucks", "gt_answers": [ "7 eleven" ], "image_path": "./data/STVQA/icdar/test_img_197.jpg", "model_name": "BLIP2" }, { "question": "What is the word on the top sign?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000389016.jpg", "model_name": "BLIP2" }, { "question": "What is the last word on the T-shirt", "answer": "skate", "gt_answers": [ "Relate" ], "image_path": "./data/STVQA/VisualGenome/2/2409572.jpg", "model_name": "BLIP2" }, { "question": "what is the first word on top left", "answer": "water", "gt_answers": [ "soap" ], "image_path": "./data/STVQA/imageNet/n04254120_3241.JPEG", "model_name": "BLIP2" }, { "question": "Who copyrighted this photo", "answer": "angela kartt", "gt_answers": [ "Angela Kraft" ], "image_path": "./data/STVQA/imageNet/n02442845_18749.JPEG", "model_name": "BLIP2" }, { "question": "What brand is the camera?", "answer": "panasonic", "gt_answers": [ "Pentax" ], "image_path": "./data/STVQA/imageNet/n04069434_11314.JPEG", "model_name": "BLIP2" }, { "question": "what word is written on the airplane?", "answer": "american airlines", "gt_answers": [ "American" ], "image_path": "./data/STVQA/VisualGenome/1/2348331.jpg", "model_name": "BLIP2" }, { "question": "What city is listed on the boat?", "answer": "cartagena", "gt_answers": [ "CORTEZ" ], "image_path": "./data/STVQA/imageNet/n04606251_4010.JPEG", "model_name": "BLIP2" }, { "question": "How many fluid ounces does the bottle hold?", "answer": "24", "gt_answers": [ "12" ], "image_path": "./data/STVQA/imageNet/n03983396_17918.JPEG", "model_name": "BLIP2" }, { "question": "What type of beverage does the bottle advertise?", "answer": "cola", "gt_answers": [ "COLA", "RC COLA" ], "image_path": "./data/STVQA/imageNet/n03983396_17918.JPEG", "model_name": "BLIP2" }, { "question": "Which state is this bus from?", "answer": "florida", "gt_answers": [ "North Carolina" ], "image_path": "./data/STVQA/VisualGenome/2/2416571.jpg", "model_name": "BLIP2" }, { "question": "What type of center is mentioned on the bus?", "answer": "medical", "gt_answers": [ "Medical-Dental" ], "image_path": "./data/STVQA/VisualGenome/2/2416571.jpg", "model_name": "BLIP2" }, { "question": "What number has the boy in white shirt?", "answer": "4", "gt_answers": [ "7" ], "image_path": "./data/STVQA/VisualGenome/1/2353240.jpg", "model_name": "BLIP2" }, { "question": "what is the two letters after sig?", "answer": "person", "gt_answers": [ "ht" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000437778.jpg", "model_name": "BLIP2" }, { "question": "What this bus for?", "answer": "person", "gt_answers": [ "City sightseeing", "SIGHTSEEING" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000437778.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE COLOR OF THE TANK?", "answer": "blue", "gt_answers": [ "BLUE" ], "image_path": "./data/STVQA/imageNet/n04562935_11100.JPEG", "model_name": "BLIP2" }, { "question": "WHAT IS THE NAME OF THE TANK?", "answer": "water tower", "gt_answers": [ "DEWES" ], "image_path": "./data/STVQA/imageNet/n04562935_11100.JPEG", "model_name": "BLIP2" }, { "question": "What number is the train?", "answer": "id number", "gt_answers": [ "1803" ], "image_path": "./data/STVQA/VisualGenome/1/2341586.jpg", "model_name": "BLIP2" }, { "question": "What street is shown?", "answer": "main street", "gt_answers": [ "SMITH" ], "image_path": "./data/STVQA/VisualGenome/1/2341586.jpg", "model_name": "BLIP2" }, { "question": "What is the red octagon instructing you to do?", "answer": "turn left", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2341586.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store in the fore front?", "answer": "chanel", "gt_answers": [ "PARAGON" ], "image_path": "./data/STVQA/icdar/test_img_335.jpg", "model_name": "BLIP2" }, { "question": "which stores is shown?", "answer": "chanel", "gt_answers": [ "Paragon" ], "image_path": "./data/STVQA/icdar/test_img_335.jpg", "model_name": "BLIP2" }, { "question": "What is this not?", "answer": "a parking meter", "gt_answers": [ "A PARKING METER" ], "image_path": "./data/STVQA/VisualGenome/1/2368061.jpg", "model_name": "BLIP2" }, { "question": "What word in black comes below 1/2 price?", "answer": "sale", "gt_answers": [ "sale" ], "image_path": "./data/STVQA/icdar/img_779.jpg", "model_name": "BLIP2" }, { "question": "What activity is happening in the picture?", "answer": "skiing", "gt_answers": [ "Skiing" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000082766.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate of the blue vehicle in the centre of the image?", "answer": "mcdo", "gt_answers": [ "KAV 215F", "KAV215F" ], "image_path": "./data/STVQA/VisualGenome/1/2346598.jpg", "model_name": "BLIP2" }, { "question": "What is the make of the blue vehicle in the centre of the image?", "answer": "ford", "gt_answers": [ "Dodge" ], "image_path": "./data/STVQA/VisualGenome/1/2346598.jpg", "model_name": "BLIP2" }, { "question": "What is the name written on the top of the front of the ble vehicle in the centre of the image?", "answer": "john deere", "gt_answers": [ "Bruce Mackie" ], "image_path": "./data/STVQA/VisualGenome/1/2346598.jpg", "model_name": "BLIP2" }, { "question": "What beer company is advertised on the blue wall?", "answer": "bud light", "gt_answers": [ "BUD LIGHT" ], "image_path": "./data/STVQA/VisualGenome/1/2329680.jpg", "model_name": "BLIP2" }, { "question": "What is the reference number for this picture?", "answer": "ltv a alligator", "gt_answers": [ "7033" ], "image_path": "./data/STVQA/imageNet/n02704792_5967.JPEG", "model_name": "BLIP2" }, { "question": "What is the first name on the bottom left corner of the image?", "answer": "person", "gt_answers": [ "Ron" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000209533.jpg", "model_name": "BLIP2" }, { "question": "what is the number on the ship", "answer": "the number on the ship", "gt_answers": [ "3 AT 6 856" ], "image_path": "./data/STVQA/VisualGenome/2/2400240.jpg", "model_name": "BLIP2" }, { "question": "what is written behind the man", "answer": "fishing boat", "gt_answers": [ "3 AT 6 856" ], "image_path": "./data/STVQA/VisualGenome/2/2400240.jpg", "model_name": "BLIP2" }, { "question": "what is the name of the vessel", "answer": "m/v san francisco", "gt_answers": [ "3 AT 6 856" ], "image_path": "./data/STVQA/VisualGenome/2/2400240.jpg", "model_name": "BLIP2" }, { "question": "What dates are on this baseball?", "answer": "october 1st, 2017", "gt_answers": [ "1964-2004" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000155790.jpg", "model_name": "BLIP2" }, { "question": "what is present in the green board", "answer": "clock", "gt_answers": [ "For Lease" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000132193.jpg", "model_name": "BLIP2" }, { "question": "what is number showing in the board", "answer": "4", "gt_answers": [ "6046623000", "604 662 3000" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000132193.jpg", "model_name": "BLIP2" }, { "question": "What is written in the wooden sign?", "answer": "bear creek", "gt_answers": [ "Bear Cave" ], "image_path": "./data/STVQA/VisualGenome/1/2365173.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the game that is behind the cat?", "answer": "monopoly", "gt_answers": [ "The Moment of Truth" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000467762.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store?", "answer": "chris alice", "gt_answers": [ "Chez Florence" ], "image_path": "./data/STVQA/imageNet/n04443257_14733.JPEG", "model_name": "BLIP2" }, { "question": "What brand of cigarettes is being advertised?", "answer": "marlboro", "gt_answers": [ "Marlboro" ], "image_path": "./data/STVQA/imageNet/n04443257_14733.JPEG", "model_name": "BLIP2" }, { "question": "What does the white car's license plate say?", "answer": "san diego", "gt_answers": [ "M 2759 UT" ], "image_path": "./data/STVQA/VisualGenome/2/2239.jpg", "model_name": "BLIP2" }, { "question": "What is the large letter on the parking meter?", "answer": "s", "gt_answers": [ "e" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000250268.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the parking meter?", "answer": "0", "gt_answers": [ "6328" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000250268.jpg", "model_name": "BLIP2" }, { "question": "What kind of things do they do here?", "answer": "they do engineering", "gt_answers": [ "Computer science and engineering" ], "image_path": "./data/STVQA/IIIT_text/img_000406.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the skateboard held by the man in yellow?", "answer": "person", "gt_answers": [ "loaded" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000410601.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name of the beverage?", "answer": "person", "gt_answers": [ "Inca Kola" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000290974.jpg", "model_name": "BLIP2" }, { "question": "What time is it on the object?", "answer": "the time is 12 noon", "gt_answers": [ "10:32" ], "image_path": "./data/STVQA/imageNet/n03196217_5469.JPEG", "model_name": "BLIP2" }, { "question": "What does the image advertise in red font?", "answer": "commitment to excellence", "gt_answers": [ "LOW PRICES" ], "image_path": "./data/STVQA/imageNet/n03196217_5469.JPEG", "model_name": "BLIP2" }, { "question": "What is the boy in brown doing?", "answer": "skateboarding", "gt_answers": [ "skateboarding" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000531894.jpg", "model_name": "BLIP2" }, { "question": "Where is the bus going?", "answer": "london bridge", "gt_answers": [ "Degenham Dock" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000100506.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "person", "gt_answers": [ "EL2" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000100506.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store with the purple sign?", "answer": "walmart", "gt_answers": [ "EPICURIA" ], "image_path": "./data/STVQA/VisualGenome/2/2417010.jpg", "model_name": "BLIP2" }, { "question": "What number is the Lecture Theatre?", "answer": "4", "gt_answers": [ "1" ], "image_path": "./data/STVQA/icdar/img_163.jpg", "model_name": "BLIP2" }, { "question": "Which airline is this plane?", "answer": "air canada", "gt_answers": [ "Air Canada" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000420021.jpg", "model_name": "BLIP2" }, { "question": "What is airline company appears on the side of the plane?", "answer": "air canada", "gt_answers": [ "AIR CANADA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000420021.jpg", "model_name": "BLIP2" }, { "question": "What color is the mans life jacket?", "answer": "yellow", "gt_answers": [ "Yellow" ], "image_path": "./data/STVQA/VisualGenome/2/2411497.jpg", "model_name": "BLIP2" }, { "question": "What color is the boat?", "answer": "white", "gt_answers": [ "White" ], "image_path": "./data/STVQA/VisualGenome/2/2411497.jpg", "model_name": "BLIP2" }, { "question": "What word is displayed on the heart?", "answer": "love", "gt_answers": [ "LOVE" ], "image_path": "./data/STVQA/VisualGenome/2/2416153.jpg", "model_name": "BLIP2" }, { "question": "What numbers are on the crate on the right?", "answer": "crate number 1", "gt_answers": [ "17342", "724" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000093707.jpg", "model_name": "BLIP2" }, { "question": "What kind of airmen are referred to on the plane?", "answer": "pilot", "gt_answers": [ "Tuskegee Airmen", "Tuskegee" ], "image_path": "./data/STVQA/VisualGenome/1/2359015.jpg", "model_name": "BLIP2" }, { "question": "How many rounds are in the main event?", "answer": "eight", "gt_answers": [ "12 ROUNDS", "12" ], "image_path": "./data/STVQA/VisualGenome/1/2354197.jpg", "model_name": "BLIP2" }, { "question": "Who are the two fighters?", "answer": "johnson and johnson", "gt_answers": [ "Joe Frazier vs. Muhammad Ali", "Joe Frazier, Muhammad Ali" ], "image_path": "./data/STVQA/VisualGenome/1/2354197.jpg", "model_name": "BLIP2" }, { "question": "What is on the sign?", "answer": "bike lane", "gt_answers": [ "ONE WAY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000021528.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the bottom of the door?", "answer": "i'm the king", "gt_answers": [ "Show" ], "image_path": "./data/STVQA/IIIT_text/2508.jpg", "model_name": "BLIP2" }, { "question": "What does it say at the top of the door?", "answer": "i'm a king", "gt_answers": [ "circus" ], "image_path": "./data/STVQA/IIIT_text/2508.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the baseball hat?", "answer": "i'm a rapper", "gt_answers": [ "Compton" ], "image_path": "./data/STVQA/IIIT_text/2508.jpg", "model_name": "BLIP2" }, { "question": "What brand of TV is labeled on the box?", "answer": "person", "gt_answers": [ "samsung" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000387693.jpg", "model_name": "BLIP2" }, { "question": "What is the table number?", "answer": "person", "gt_answers": [ "14" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000180606.jpg", "model_name": "BLIP2" }, { "question": "What does the text in the top right of the image say?", "answer": "china mainland", "gt_answers": [ "Happy Duckling" ], "image_path": "./data/STVQA/imageNet/n03908618_34292.JPEG", "model_name": "BLIP2" }, { "question": "What is the number listed in the corner?", "answer": "4", "gt_answers": [ "063_856" ], "image_path": "./data/STVQA/VisualGenome/2/2408898.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the silver appliance?", "answer": "mr coffee", "gt_answers": [ "xast" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000264618.jpg", "model_name": "BLIP2" }, { "question": "What is New and Improved?", "answer": "soft & flexible dispenser", "gt_answers": [ "DISPENSER" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000005752.jpg", "model_name": "BLIP2" }, { "question": "What store is the Paper Bagt from?", "answer": "walmart", "gt_answers": [ "Cracker Barrel" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000005752.jpg", "model_name": "BLIP2" }, { "question": "Where does the sign on the right say this is?", "answer": "city centre", "gt_answers": [ "Piccadilly Gardens" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000106382.jpg", "model_name": "BLIP2" }, { "question": "What does the blue sign say?", "answer": "manchester city centre", "gt_answers": [ "cycle hub" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000106382.jpg", "model_name": "BLIP2" }, { "question": "What kind of street sign is this?", "answer": "london road sign", "gt_answers": [ "zone", "ZONE ENDS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000285810.jpg", "model_name": "BLIP2" }, { "question": "What number does the bus have", "answer": "3", "gt_answers": [ "4" ], "image_path": "./data/STVQA/VisualGenome/1/2340445.jpg", "model_name": "BLIP2" }, { "question": "What is the lagest word on the motel sign", "answer": "motel", "gt_answers": [ "Motel" ], "image_path": "./data/STVQA/IIIT_text/img_000816.jpg", "model_name": "BLIP2" }, { "question": "How many megabits are displayed on the bus advertisement?", "answer": "100 megabits", "gt_answers": [ "5", "THREE" ], "image_path": "./data/STVQA/VisualGenome/2/2417107.jpg", "model_name": "BLIP2" }, { "question": "What is the route number of the yellow bus?", "answer": "route number", "gt_answers": [ "66" ], "image_path": "./data/STVQA/VisualGenome/2/2417107.jpg", "model_name": "BLIP2" }, { "question": "What text is on the yellow bottle?", "answer": "dettol", "gt_answers": [ "Sunlight" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000012521.jpg", "model_name": "BLIP2" }, { "question": "What is the time on the platform?", "answer": "the time on the platform", "gt_answers": [ "09:05:25" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000580979.jpg", "model_name": "BLIP2" }, { "question": "What is the last word on the sign?", "answer": "expo", "gt_answers": [ "CYCLISTS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000326613.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the sign?", "answer": "car", "gt_answers": [ "CARRALL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000326613.jpg", "model_name": "BLIP2" }, { "question": "What are the first two words on the sign?", "answer": "carrill closed", "gt_answers": [ "CARRALL CLOSED" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000326613.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the circus", "answer": "the bear in the circus", "gt_answers": [ "banana" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000355857.jpg", "model_name": "BLIP2" }, { "question": "What is the women's skirt made of?", "answer": "bananas", "gt_answers": [ "banana" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000355857.jpg", "model_name": "BLIP2" }, { "question": "What is written on the red sign.", "answer": "teddy bears' picnic", "gt_answers": [ "CIRCUS banana", "BANANA CIRCUS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000355857.jpg", "model_name": "BLIP2" }, { "question": "What is the copyright year?", "answer": "2012", "gt_answers": [ "2004" ], "image_path": "./data/STVQA/imageNet/n02226429_14872.JPEG", "model_name": "BLIP2" }, { "question": "What is the sign on the far left?", "answer": "movie theater", "gt_answers": [ "Real d 3d" ], "image_path": "./data/STVQA/icdar/test_img_452.jpg", "model_name": "BLIP2" }, { "question": "Who is sponsoring this tennis match?", "answer": "nike", "gt_answers": [ "Scottrade" ], "image_path": "./data/STVQA/VisualGenome/2/2411235.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2337117.jpg", "model_name": "BLIP2" }, { "question": "How much is the distance of Jackson?", "answer": "5 miles", "gt_answers": [ "500" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000557263.jpg", "model_name": "BLIP2" }, { "question": "What place is located at 800m away?", "answer": "jackson street", "gt_answers": [ "Montgomery" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000557263.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name of the milk?", "answer": "sam's club", "gt_answers": [ "Clover" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000054521.jpg", "model_name": "BLIP2" }, { "question": "What is in the Old El Paso jar?", "answer": "cat food", "gt_answers": [ "salsa" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000054521.jpg", "model_name": "BLIP2" }, { "question": "What brand is this bottle?", "answer": "apple", "gt_answers": [ "appletiser" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000364722.jpg", "model_name": "BLIP2" }, { "question": "What percent is apple juice?", "answer": "100 percent", "gt_answers": [ "100%" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000364722.jpg", "model_name": "BLIP2" }, { "question": "Which class is mentioned in this photograph?", "answer": "english", "gt_answers": [ "1986" ], "image_path": "./data/STVQA/IIIT_text/img_000529.jpg", "model_name": "BLIP2" }, { "question": "What does the sign in this photograph say?", "answer": "christ south high school", "gt_answers": [ "Chatsworth High School" ], "image_path": "./data/STVQA/IIIT_text/img_000529.jpg", "model_name": "BLIP2" }, { "question": "What chocolate brand is featured?", "answer": "godiva", "gt_answers": [ "Godiva" ], "image_path": "./data/STVQA/icdar/test_img_170.jpg", "model_name": "BLIP2" }, { "question": "What country is Godiva from?", "answer": "belgium", "gt_answers": [ "Belgium" ], "image_path": "./data/STVQA/icdar/test_img_170.jpg", "model_name": "BLIP2" }, { "question": "What gate number is the plane at?", "answer": "a gate", "gt_answers": [ "40" ], "image_path": "./data/STVQA/VisualGenome/1/2362978.jpg", "model_name": "BLIP2" }, { "question": "what is the players last name?", "answer": "person", "gt_answers": [ "BRAUN" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000283678.jpg", "model_name": "BLIP2" }, { "question": "what is the players jersey number?", "answer": "person", "gt_answers": [ "8" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000283678.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the airline on the back of the cart?", "answer": "american airlines", "gt_answers": [ "Delta" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000499372.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say at the top?", "answer": "tepa kabab", "gt_answers": [ "Tzita Kaaba" ], "image_path": "./data/STVQA/IIIT_text/4823.jpg", "model_name": "BLIP2" }, { "question": "What is a type of cheese very popular on pizzas?", "answer": "mozzarella", "gt_answers": [ "Parmesan" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019374.jpg", "model_name": "BLIP2" }, { "question": "According to the sign, how far is the desert?", "answer": "5 miles", "gt_answers": [ "300 miles" ], "image_path": "./data/STVQA/VisualGenome/1/2349973.jpg", "model_name": "BLIP2" }, { "question": "Who manufactured the green pickup truck?", "answer": "ford", "gt_answers": [ "Ford" ], "image_path": "./data/STVQA/VisualGenome/2/2406263.jpg", "model_name": "BLIP2" }, { "question": "What is the last two digit number in the first line of the sign?", "answer": "3", "gt_answers": [ "19" ], "image_path": "./data/STVQA/VisualGenome/1/2362625.jpg", "model_name": "BLIP2" }, { "question": "what is the number on the players shirt?", "answer": "17", "gt_answers": [ "8" ], "image_path": "./data/STVQA/imageNet/n04019541_63092.JPEG", "model_name": "BLIP2" }, { "question": "what is the name of the business on the banner?", "answer": "washington capitals", "gt_answers": [ "chesapeake beach resort & spa" ], "image_path": "./data/STVQA/imageNet/n04019541_63092.JPEG", "model_name": "BLIP2" }, { "question": "What number shirt is the player in red wearing?", "answer": "17", "gt_answers": [ "8" ], "image_path": "./data/STVQA/imageNet/n04019541_63092.JPEG", "model_name": "BLIP2" }, { "question": "What is the last letter of graffiti on the man's chin?", "answer": "e", "gt_answers": [ "B" ], "image_path": "./data/STVQA/IIIT_text/1642.jpg", "model_name": "BLIP2" }, { "question": "What company made the hydrant?", "answer": "johns manville", "gt_answers": [ "Smith" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000420066.jpg", "model_name": "BLIP2" }, { "question": "What number is the player in the red jersey?", "answer": "person", "gt_answers": [ "30" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000376523.jpg", "model_name": "BLIP2" }, { "question": "What is the last name of the batter?", "answer": "person", "gt_answers": [ "ASTUDILLO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000376523.jpg", "model_name": "BLIP2" }, { "question": "What is written in white on the red octagon sign?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000079191.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the yellow front of the train?", "answer": "0", "gt_answers": [ "47790" ], "image_path": "./data/STVQA/VisualGenome/1/713687.jpg", "model_name": "BLIP2" }, { "question": "what's the word on the top right?", "answer": "spicy", "gt_answers": [ "mix" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000006098.jpg", "model_name": "BLIP2" }, { "question": "What tag number is on the middle cow's ear?", "answer": "person", "gt_answers": [ "113" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000530460.jpg", "model_name": "BLIP2" }, { "question": "How many ounces are in this container?", "answer": "4", "gt_answers": [ "1.25", "1.25 FL OZ" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000001272.jpg", "model_name": "BLIP2" }, { "question": "What kind of food is in the box?", "answer": "roast lamb", "gt_answers": [ "Roast Lamb" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016286.jpg", "model_name": "BLIP2" }, { "question": "How many elephants are there?", "answer": "three", "gt_answers": [ "3", "{}" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000402041.jpg", "model_name": "BLIP2" }, { "question": "What is the main color the elephant riders are wearing?", "answer": "red", "gt_answers": [ "Red", "{}" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000402041.jpg", "model_name": "BLIP2" }, { "question": "What color are the elephants?", "answer": "white", "gt_answers": [ "Gray", "{}" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000402041.jpg", "model_name": "BLIP2" }, { "question": "What type of animal is the man kissing", "answer": "snake", "gt_answers": [ "Snake" ], "image_path": "./data/STVQA/imageNet/n01748264_17201.JPEG", "model_name": "BLIP2" }, { "question": "When was this photograph taken?", "answer": "november 2015", "gt_answers": [ "2011" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000399097.jpg", "model_name": "BLIP2" }, { "question": "What does the sign on the sidewalk with the arrow say?", "answer": "st peter's street", "gt_answers": [ "TEA ROOM OPEN" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000399097.jpg", "model_name": "BLIP2" }, { "question": "what is written beneath corona", "answer": "tequila", "gt_answers": [ "extra" ], "image_path": "./data/STVQA/imageNet/n02823428_8352.JPEG", "model_name": "BLIP2" }, { "question": "What phrase follows the name Obama?", "answer": "person", "gt_answers": [ "yes you can" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000515550.jpg", "model_name": "BLIP2" }, { "question": "What brand of typewriter is being used?", "answer": "a royal typewriter", "gt_answers": [ "Olympia" ], "image_path": "./data/STVQA/imageNet/n04505470_4902.JPEG", "model_name": "BLIP2" }, { "question": "What year was the photo taken?", "answer": "2015", "gt_answers": [ "2013" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000560885.jpg", "model_name": "BLIP2" }, { "question": "There are 3 people in this ____?", "answer": "picture", "gt_answers": [ "photo" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000560885.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate?", "answer": "md", "gt_answers": [ "JGT 657" ], "image_path": "./data/STVQA/VisualGenome/1/2376934.jpg", "model_name": "BLIP2" }, { "question": "What kind of sign can you see?", "answer": "stop sign", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2376934.jpg", "model_name": "BLIP2" }, { "question": "What is written on the bag in the front?", "answer": "samsonite", "gt_answers": [ "Everest" ], "image_path": "./data/STVQA/VisualGenome/1/2330375.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name of the bag in the front?", "answer": "rucksack", "gt_answers": [ "Everest" ], "image_path": "./data/STVQA/VisualGenome/1/2330375.jpg", "model_name": "BLIP2" }, { "question": "What is the title of the novel in the image?", "answer": "cat in the bag", "gt_answers": [ "HARRY POTTER" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000415604.jpg", "model_name": "BLIP2" }, { "question": "What is the second word in the name of the pictured store?", "answer": "person", "gt_answers": [ "trend" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000186196.jpg", "model_name": "BLIP2" }, { "question": "What airlines does this plane fly for?", "answer": "person", "gt_answers": [ "swiss" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000503782.jpg", "model_name": "BLIP2" }, { "question": "Is this a bar or club in NYC?", "answer": "a bathroom in a bar", "gt_answers": [ "N.Y.C." ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000482433.jpg", "model_name": "BLIP2" }, { "question": "What is the brand on the red advertisement?", "answer": "coca cola", "gt_answers": [ "Bank of America" ], "image_path": "./data/STVQA/VisualGenome/1/2373899.jpg", "model_name": "BLIP2" }, { "question": "what type of boat is the blue and white one, the big word written on the blue and white boat.", "answer": "police", "gt_answers": [ "police" ], "image_path": "./data/STVQA/VisualGenome/1/2321960.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "no bicycles in station", "gt_answers": [ "walk your bike" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000084981.jpg", "model_name": "BLIP2" }, { "question": "What does the main text say on the blue jersey?", "answer": "santa clara", "gt_answers": [ "Acronis" ], "image_path": "./data/STVQA/VisualGenome/1/2346031.jpg", "model_name": "BLIP2" }, { "question": "What type of car is this?", "answer": "honda civic", "gt_answers": [ "Honda" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000112726.jpg", "model_name": "BLIP2" }, { "question": "What movie is shown on the DVD cover?", "answer": "dinosaur", "gt_answers": [ "Beverly Hills Chihuahua" ], "image_path": "./data/STVQA/VisualGenome/1/2332921.jpg", "model_name": "BLIP2" }, { "question": "What soda company is seen on the cup?", "answer": "pepsi", "gt_answers": [ "Pepsi" ], "image_path": "./data/STVQA/VisualGenome/1/2362046.jpg", "model_name": "BLIP2" }, { "question": "What year was the image taken?", "answer": "2009", "gt_answers": [ "2012" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000578306.jpg", "model_name": "BLIP2" }, { "question": "what type of shop is in the image", "answer": "bookshop", "gt_answers": [ "book shop", "BOOKSHOP" ], "image_path": "./data/STVQA/imageNet/n02871525_23832.JPEG", "model_name": "BLIP2" }, { "question": "what dose the yellow sign say on the building", "answer": "bookshop", "gt_answers": [ "open" ], "image_path": "./data/STVQA/imageNet/n02871525_23832.JPEG", "model_name": "BLIP2" }, { "question": "how many people are in the image", "answer": "two", "gt_answers": [ "one" ], "image_path": "./data/STVQA/imageNet/n02871525_23832.JPEG", "model_name": "BLIP2" }, { "question": "What words are above the image of the red fruit?", "answer": "fruit juice", "gt_answers": [ "Discover Flavour" ], "image_path": "./data/STVQA/VisualGenome/1/2316462.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the town on the fire engine?", "answer": "person", "gt_answers": [ "holliston" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000439890.jpg", "model_name": "BLIP2" }, { "question": "What number is next (left-side) of the word \"stroke\" in front of the green car?", "answer": "3", "gt_answers": [ "4" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000477015.jpg", "model_name": "BLIP2" }, { "question": "What 4-digit number is on the yellow stick in front of the green car?", "answer": "999", "gt_answers": [ "4764" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000477015.jpg", "model_name": "BLIP2" }, { "question": "What 3 letters are shown on the bottom left of the green car?", "answer": "pk", "gt_answers": [ "cng" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000477015.jpg", "model_name": "BLIP2" }, { "question": "What is the character appearing on the microwave?", "answer": "hello kitty", "gt_answers": [ "Hello Kitty" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000322325.jpg", "model_name": "BLIP2" }, { "question": "What is the original price of the item?", "answer": "$99", "gt_answers": [ "91.99" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000322325.jpg", "model_name": "BLIP2" }, { "question": "What is the price of the item after the temporary price cut?", "answer": "$99", "gt_answers": [ "69.99" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000322325.jpg", "model_name": "BLIP2" }, { "question": "What number is printed on the blue roof?", "answer": "4", "gt_answers": [ "4" ], "image_path": "./data/STVQA/VisualGenome/1/2371685.jpg", "model_name": "BLIP2" }, { "question": "What is the colour of the car?", "answer": "white", "gt_answers": [ "White" ], "image_path": "./data/STVQA/VisualGenome/1/2346090.jpg", "model_name": "BLIP2" }, { "question": "What is lying beside the car?", "answer": "purple umbrella", "gt_answers": [ "Umbrella" ], "image_path": "./data/STVQA/VisualGenome/1/2346090.jpg", "model_name": "BLIP2" }, { "question": "What is the colour of umbrella?", "answer": "purple", "gt_answers": [ "Violet" ], "image_path": "./data/STVQA/VisualGenome/1/2346090.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "no diving", "gt_answers": [ "No diving" ], "image_path": "./data/STVQA/VisualGenome/2/2417309.jpg", "model_name": "BLIP2" }, { "question": "What is written on the blue sign?", "answer": "tees", "gt_answers": [ "Gas Showroom" ], "image_path": "./data/STVQA/IIIT_text/2524.jpg", "model_name": "BLIP2" }, { "question": "What kind of invitation is this?", "answer": "a wedding invitation", "gt_answers": [ "Wedding" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000460643.jpg", "model_name": "BLIP2" }, { "question": "what does it say on the building", "answer": "person", "gt_answers": [ "TRGOVACKI CENTAR DUBRAVA" ], "image_path": "./data/STVQA/IIIT_text/3352.jpg", "model_name": "BLIP2" }, { "question": "What type of business is OCBC?", "answer": "bank", "gt_answers": [ "Bank" ], "image_path": "./data/STVQA/icdar/img_412.jpg", "model_name": "BLIP2" }, { "question": "What does the red sign tell people?", "answer": "road ahead closed", "gt_answers": [ "Road Ahead Closed" ], "image_path": "./data/STVQA/IIIT_text/3054.jpg", "model_name": "BLIP2" }, { "question": "What is stated in the beginning of the name of the airline?", "answer": "person", "gt_answers": [ "AIR" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000553150.jpg", "model_name": "BLIP2" }, { "question": "What is the airlines name following the word air?", "answer": "philippine airlines", "gt_answers": [ "PACIFIC" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000553150.jpg", "model_name": "BLIP2" }, { "question": "Below air pacific, what pacific island name does it say?", "answer": "philippines", "gt_answers": [ "fiji" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000553150.jpg", "model_name": "BLIP2" }, { "question": "What fruit is displayed", "answer": "bananas", "gt_answers": [ "Bananas" ], "image_path": "./data/STVQA/VisualGenome/2/2404433.jpg", "model_name": "BLIP2" }, { "question": "What is command of the digital sign?", "answer": "bees clear", "gt_answers": [ "BIKES USE CLARK" ], "image_path": "./data/STVQA/VisualGenome/2/2403876.jpg", "model_name": "BLIP2" }, { "question": "What word is written above the letters 'pas'", "answer": "person", "gt_answers": [ "hot" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000125694.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the brownie mix?", "answer": "ghirardelli triple chocolate", "gt_answers": [ "GHIRARDELLI" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000003210.jpg", "model_name": "BLIP2" }, { "question": "What is written in red?", "answer": "supra", "gt_answers": [ "Sopranos", "SOPRANOS CHIANTI SODA" ], "image_path": "./data/STVQA/imageNet/n03983396_11337.JPEG", "model_name": "BLIP2" }, { "question": "What three capital letters are shown on the yellow umbrella?", "answer": "person", "gt_answers": [ "RMF" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000158362.jpg", "model_name": "BLIP2" }, { "question": "What did this sign say before it was vandalized?", "answer": "do not enter", "gt_answers": [ "do not enter" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000012641.jpg", "model_name": "BLIP2" }, { "question": "What does the vandalized writing on this sign say?", "answer": "do not enter", "gt_answers": [ "IN MY ASSHOLE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000012641.jpg", "model_name": "BLIP2" }, { "question": "What is exactly said on this sign, including the vandalized text?", "answer": "do not enter", "gt_answers": [ "do not IN MY ASSHOLE enter", "do not enter in my asshole" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000012641.jpg", "model_name": "BLIP2" }, { "question": "Where is this bus going?", "answer": "indian city", "gt_answers": [ "Broadway" ], "image_path": "./data/STVQA/VisualGenome/1/150554.jpg", "model_name": "BLIP2" }, { "question": "What is being offered at the right hand?", "answer": "coffee shop", "gt_answers": [ "French Bread" ], "image_path": "./data/STVQA/icdar/img_551.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the place located in the center of the image?", "answer": "person", "gt_answers": [ "BENJI'S" ], "image_path": "./data/STVQA/IIIT_text/1643.jpg", "model_name": "BLIP2" }, { "question": "What's the name of the cafe on the computer screen?", "answer": "the cafe", "gt_answers": [ "PHILLIES" ], "image_path": "./data/STVQA/imageNet/n03782006_36741.JPEG", "model_name": "BLIP2" }, { "question": "How many people are in the cafe?", "answer": "two", "gt_answers": [ "4" ], "image_path": "./data/STVQA/imageNet/n03782006_36741.JPEG", "model_name": "BLIP2" }, { "question": "How many microphones are in the image?", "answer": "two", "gt_answers": [ "1" ], "image_path": "./data/STVQA/imageNet/n03782006_36741.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the business in this photo?", "answer": "saint burger", "gt_answers": [ "Giant Burger" ], "image_path": "./data/STVQA/VisualGenome/1/2333950.jpg", "model_name": "BLIP2" }, { "question": "What kind of stop sign is in the image?", "answer": "all way stop sign", "gt_answers": [ "ALL WAY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000082745.jpg", "model_name": "BLIP2" }, { "question": "What is the maximum capacity of the glass", "answer": "600 ml", "gt_answers": [ "600 mL" ], "image_path": "./data/STVQA/imageNet/n02815834_4697.JPEG", "model_name": "BLIP2" }, { "question": "What is the brand of this glass", "answer": "pyrex brand", "gt_answers": [ "KIMAX" ], "image_path": "./data/STVQA/imageNet/n02815834_4697.JPEG", "model_name": "BLIP2" }, { "question": "Who is the sponsor displayed on the side?", "answer": "fedex", "gt_answers": [ "FedEx" ], "image_path": "./data/STVQA/VisualGenome/1/2316804.jpg", "model_name": "BLIP2" }, { "question": "What is the present on left corner", "answer": "tennis racket", "gt_answers": [ "naDaL news.com", "NADALNEWS.COM" ], "image_path": "./data/STVQA/VisualGenome/1/2375004.jpg", "model_name": "BLIP2" }, { "question": "What is in the glasses?", "answer": "wine", "gt_answers": [ "Wine" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000576389.jpg", "model_name": "BLIP2" }, { "question": "How many are in front?", "answer": "two", "gt_answers": [ "4" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000576389.jpg", "model_name": "BLIP2" }, { "question": "What name is located on the bottom left corner?", "answer": "person", "gt_answers": [ "Brian D. Luster" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000086552.jpg", "model_name": "BLIP2" }, { "question": "What is writing in the board?", "answer": "no dogs", "gt_answers": [ "NO DOGS", "NO DOGS TEACUP POODLES OK" ], "image_path": "./data/STVQA/VisualGenome/1/2339817.jpg", "model_name": "BLIP2" }, { "question": "What four letter word is in the middle of the cauldron?", "answer": "magic", "gt_answers": [ "Soup" ], "image_path": "./data/STVQA/imageNet/n02939185_719.JPEG", "model_name": "BLIP2" }, { "question": "What is the large red letter and number combo on the side of the plane?", "answer": "it is a lst", "gt_answers": [ "N6777B" ], "image_path": "./data/STVQA/VisualGenome/1/2355945.jpg", "model_name": "BLIP2" }, { "question": "What is the text of the watermark", "answer": "a classic car", "gt_answers": [ "(C) Simon Turner" ], "image_path": "./data/STVQA/IIIT_text/4351.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the computer?", "answer": "lenovo", "gt_answers": [ "IBM" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000119849.jpg", "model_name": "BLIP2" }, { "question": "What name is wirtten on the cup", "answer": "samsung", "gt_answers": [ "Joanne" ], "image_path": "./data/STVQA/VisualGenome/1/2370364.jpg", "model_name": "BLIP2" }, { "question": "What is the main destination?", "answer": "city centre", "gt_answers": [ "Middlesbrough" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000281285.jpg", "model_name": "BLIP2" }, { "question": "What is the price of the lowest marked item in this image?", "answer": "$5", "gt_answers": [ "$2.50" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000346915.jpg", "model_name": "BLIP2" }, { "question": "What does the white sign say?", "answer": "no parking", "gt_answers": [ "no turns" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000160009.jpg", "model_name": "BLIP2" }, { "question": "What are the first three letters on the man's sweater?", "answer": "c", "gt_answers": [ "CUV" ], "image_path": "./data/STVQA/VisualGenome/1/2319535.jpg", "model_name": "BLIP2" }, { "question": "what bus number is this?", "answer": "a bus", "gt_answers": [ "154" ], "image_path": "./data/STVQA/icdar/img_201.jpg", "model_name": "BLIP2" }, { "question": "Which city is the red bus from?", "answer": "liverpool", "gt_answers": [ "CHESTER" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000098943.jpg", "model_name": "BLIP2" }, { "question": "Which state is the State Fair being held?", "answer": "missouri", "gt_answers": [ "Wisconsin" ], "image_path": "./data/STVQA/VisualGenome/2/2404847.jpg", "model_name": "BLIP2" }, { "question": "what brand is this chainsaw ?", "answer": "husqvarna", "gt_answers": [ "Farm Boss", "FARMBOSS" ], "image_path": "./data/STVQA/imageNet/n03000684_7138.JPEG", "model_name": "BLIP2" }, { "question": "What is the street block number?", "answer": "401", "gt_answers": [ "4300" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000374891.jpg", "model_name": "BLIP2" }, { "question": "What time of road is this?", "answer": "oasis", "gt_answers": [ "DR" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000374891.jpg", "model_name": "BLIP2" }, { "question": "What is the cross street for Wonderland?", "answer": "oasis ave", "gt_answers": [ "oasis" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000374891.jpg", "model_name": "BLIP2" }, { "question": "What restaurant is pictured?", "answer": "mcdonalds", "gt_answers": [ "McDonald's" ], "image_path": "./data/STVQA/icdar/img_195.jpg", "model_name": "BLIP2" }, { "question": "Which restaurant is shown?", "answer": "mcdonalds", "gt_answers": [ "McDonald's" ], "image_path": "./data/STVQA/icdar/img_195.jpg", "model_name": "BLIP2" }, { "question": "What is written on the bright, yellow shirt?", "answer": "i love my city", "gt_answers": [ "Word", "WORD." ], "image_path": "./data/STVQA/VisualGenome/1/2362985.jpg", "model_name": "BLIP2" }, { "question": "What are the numbers (dashes included) on the \"Barber Shop\" sign?", "answer": "no dashes", "gt_answers": [ "212-866-4160" ], "image_path": "./data/STVQA/VisualGenome/1/2362985.jpg", "model_name": "BLIP2" }, { "question": "How many pedestrians are in the picture?", "answer": "three", "gt_answers": [ "Four" ], "image_path": "./data/STVQA/VisualGenome/1/2362985.jpg", "model_name": "BLIP2" }, { "question": "what is written on the largest sticker on the ramp", "answer": "vans", "gt_answers": [ "Vans" ], "image_path": "./data/STVQA/VisualGenome/2/2414219.jpg", "model_name": "BLIP2" }, { "question": "What is written on the sign above the two men in red shirts?", "answer": "santa fe", "gt_answers": [ "caixanova" ], "image_path": "./data/STVQA/VisualGenome/2/793.jpg", "model_name": "BLIP2" }, { "question": "What is the letter above the & sign?", "answer": "e", "gt_answers": [ "S" ], "image_path": "./data/STVQA/VisualGenome/2/2408421.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the sport on the tee shirt?", "answer": "tennis", "gt_answers": [ "Tennis" ], "image_path": "./data/STVQA/VisualGenome/2/2412290.jpg", "model_name": "BLIP2" }, { "question": "What letter is in the blue circle on the tee shirt?", "answer": "z", "gt_answers": [ "F" ], "image_path": "./data/STVQA/VisualGenome/2/2412290.jpg", "model_name": "BLIP2" }, { "question": "What word are written on the tee shirt?", "answer": "zions", "gt_answers": [ "Titans Tennis" ], "image_path": "./data/STVQA/VisualGenome/2/2412290.jpg", "model_name": "BLIP2" }, { "question": "What is this used for?", "answer": "a sharpener", "gt_answers": [ "Pencil sharpener" ], "image_path": "./data/STVQA/imageNet/n03908714_2869.JPEG", "model_name": "BLIP2" }, { "question": "What type of attraction is this?", "answer": "museum", "gt_answers": [ "Museum" ], "image_path": "./data/STVQA/VisualGenome/1/2334061.jpg", "model_name": "BLIP2" }, { "question": "What year is on the sign?", "answer": "1800", "gt_answers": [ "1730" ], "image_path": "./data/STVQA/VisualGenome/1/2334061.jpg", "model_name": "BLIP2" }, { "question": "What is the title of the top book?", "answer": "a book of hours", "gt_answers": [ "THE ALCHEMIST" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000341714.jpg", "model_name": "BLIP2" }, { "question": "What is the title of the bottom book with red lines?", "answer": "a book of hours", "gt_answers": [ "A PEOPLE'S HISTORY OF THE UNITED STATES" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000341714.jpg", "model_name": "BLIP2" }, { "question": "What is the title of the book on top of the book with red lines?", "answer": "i am a cat", "gt_answers": [ "RATS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000341714.jpg", "model_name": "BLIP2" }, { "question": "What blue word is printed on the side of the plane?", "answer": "united", "gt_answers": [ "United" ], "image_path": "./data/STVQA/VisualGenome/2/2407567.jpg", "model_name": "BLIP2" }, { "question": "What district does the street sign say?", "answer": "district b", "gt_answers": [ "SOUTH STREET SEAPORT HISTORIC DISTRICT" ], "image_path": "./data/STVQA/VisualGenome/1/2321693.jpg", "model_name": "BLIP2" }, { "question": "What word stands out the most on the red poster to the left of the dog?", "answer": "dog", "gt_answers": [ "BEEP" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000522667.jpg", "model_name": "BLIP2" }, { "question": "What are the last four letters for the word on the top left of the picture?", "answer": "st", "gt_answers": [ "Roes" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000522667.jpg", "model_name": "BLIP2" }, { "question": "Why is the keyboard so dusty", "answer": "i didn't clean it in a while", "gt_answers": [ "shift" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000064896.jpg", "model_name": "BLIP2" }, { "question": "How many hours can you park?", "answer": "24 hours", "gt_answers": [ "2" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000326442.jpg", "model_name": "BLIP2" }, { "question": "What word describes the children at play?", "answer": "playful", "gt_answers": [ "slow" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000326442.jpg", "model_name": "BLIP2" }, { "question": "What time does 2 hour parking begin?", "answer": "at 8 am", "gt_answers": [ "9:00", "9:00 AM" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000326442.jpg", "model_name": "BLIP2" }, { "question": "What is the number in the blue box?", "answer": "3", "gt_answers": [ "402" ], "image_path": "./data/STVQA/VisualGenome/1/2336353.jpg", "model_name": "BLIP2" }, { "question": "What does the flyer read in big letters?", "answer": "before and after", "gt_answers": [ "Before" ], "image_path": "./data/STVQA/VisualGenome/2/2414050.jpg", "model_name": "BLIP2" }, { "question": "How long is full-time", "answer": "a full-time student is a student who is enrolled full-time", "gt_answers": [ "24 weeks" ], "image_path": "./data/STVQA/VisualGenome/2/2414050.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street on which the Stop sign appears?", "answer": "person", "gt_answers": [ "45TH PARALLEL DR" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000314279.jpg", "model_name": "BLIP2" }, { "question": "What does the red sign tell drivers to do?", "answer": "stop", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000314279.jpg", "model_name": "BLIP2" }, { "question": "where can I buy shoes here?", "answer": "nike", "gt_answers": [ "footaction" ], "image_path": "./data/STVQA/VisualGenome/2/2411987.jpg", "model_name": "BLIP2" }, { "question": "What word is written inside the heart?", "answer": "hello kitty", "gt_answers": [ "Love" ], "image_path": "./data/STVQA/imageNet/n02971356_16590.JPEG", "model_name": "BLIP2" }, { "question": "What is the slogan for J-Donuts?", "answer": "j-donuts j-donuts", "gt_answers": [ "the unreal taste" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000234176.jpg", "model_name": "BLIP2" }, { "question": "What is written on it?", "answer": "no parking here to corner", "gt_answers": [ "No parking here to corner" ], "image_path": "./data/STVQA/VisualGenome/1/2319596.jpg", "model_name": "BLIP2" }, { "question": "What is handwritten on the paper?", "answer": "to go", "gt_answers": [ "Cheers" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000015759.jpg", "model_name": "BLIP2" }, { "question": "what is the symbol on the glass", "answer": "apple", "gt_answers": [ "apple" ], "image_path": "./data/STVQA/VisualGenome/1/2376275.jpg", "model_name": "BLIP2" }, { "question": "Where do these escalators lead?", "answer": "to the underground station", "gt_answers": [ "SUBWAY" ], "image_path": "./data/STVQA/IIIT_text/325.jpg", "model_name": "BLIP2" }, { "question": "Where are these escalators located?", "answer": "buchanan street station", "gt_answers": [ "Buchanan Street" ], "image_path": "./data/STVQA/IIIT_text/325.jpg", "model_name": "BLIP2" }, { "question": "What is written on the front of the lawnmower?", "answer": "kobalt", "gt_answers": [ "TG" ], "image_path": "./data/STVQA/imageNet/n03649909_32925.JPEG", "model_name": "BLIP2" }, { "question": "Who is the manufacturer of the lawnmower?", "answer": "john deere", "gt_answers": [ "KUBOTA" ], "image_path": "./data/STVQA/imageNet/n03649909_32925.JPEG", "model_name": "BLIP2" }, { "question": "What is the model of the lawnmower?", "answer": "briggs and stratton", "gt_answers": [ "TG1660" ], "image_path": "./data/STVQA/imageNet/n03649909_32925.JPEG", "model_name": "BLIP2" }, { "question": "What company made this product?", "answer": "tamron", "gt_answers": [ "Tamron" ], "image_path": "./data/STVQA/imageNet/n03657121_1181.JPEG", "model_name": "BLIP2" }, { "question": "what is the flower stall called?", "answer": "market", "gt_answers": [ "GRANVILLE ISLAND FLORIST", "GRANVILLE ISLAND" ], "image_path": "./data/STVQA/VisualGenome/1/1159451.jpg", "model_name": "BLIP2" }, { "question": "what do sunlight farms sell?", "answer": "vegetables", "gt_answers": [ "Fresh Fruit and Vegetables" ], "image_path": "./data/STVQA/VisualGenome/1/1159451.jpg", "model_name": "BLIP2" }, { "question": "What's the first word on the street sign?", "answer": "no", "gt_answers": [ "Sorry" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000251335.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company shown in the top right?", "answer": "mrt", "gt_answers": [ "SWAROVSKI" ], "image_path": "./data/STVQA/icdar/img_802.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the food pictured?", "answer": "beef stroganoff", "gt_answers": [ "Beef Stroganoff" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000010221.jpg", "model_name": "BLIP2" }, { "question": "Which block is to the left?", "answer": "person", "gt_answers": [ "800" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000188139.jpg", "model_name": "BLIP2" }, { "question": "What is the street name?", "answer": "person", "gt_answers": [ "kearny" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000188139.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the building?", "answer": "a restaurant", "gt_answers": [ "Bocoray" ], "image_path": "./data/STVQA/VisualGenome/1/2374501.jpg", "model_name": "BLIP2" }, { "question": "What is the street name on the blue sign?", "answer": "sherman wy", "gt_answers": [ "Sherman Wy" ], "image_path": "./data/STVQA/VisualGenome/1/2363097.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE NAME OF THIS SHOP?", "answer": "h m", "gt_answers": [ "OCBC BANK" ], "image_path": "./data/STVQA/icdar/test_img_475.jpg", "model_name": "BLIP2" }, { "question": "What is the city on the bike sign?", "answer": "toronto", "gt_answers": [ "ONTARIO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000234839.jpg", "model_name": "BLIP2" }, { "question": "Which airline do these planes belong to?", "answer": "india go", "gt_answers": [ "IndiGo" ], "image_path": "./data/STVQA/IIIT_text/img_000692.jpg", "model_name": "BLIP2" }, { "question": "What number train is this?", "answer": "656", "gt_answers": [ "8948" ], "image_path": "./data/STVQA/VisualGenome/2/2400049.jpg", "model_name": "BLIP2" }, { "question": "What type of restaurant does the red sign on the top left make reference to?", "answer": "sushi", "gt_answers": [ "Indian Vegetarian" ], "image_path": "./data/STVQA/VisualGenome/1/2370592.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store on the left of the image?", "answer": "nike", "gt_answers": [ "SWAROVSKI" ], "image_path": "./data/STVQA/icdar/test_img_266.jpg", "model_name": "BLIP2" }, { "question": "What is the batter's number?", "answer": "2", "gt_answers": [ "6" ], "image_path": "./data/STVQA/VisualGenome/2/2403749.jpg", "model_name": "BLIP2" }, { "question": "What kind of butter is on the table", "answer": "white butter", "gt_answers": [ "balance smart", "SMART BALANCE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000396029.jpg", "model_name": "BLIP2" }, { "question": "What word is written in yellow on the red and blue striped shirt?", "answer": "wii", "gt_answers": [ "unicef" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000229827.jpg", "model_name": "BLIP2" }, { "question": "What is the TV brand?", "answer": "sony", "gt_answers": [ "SONY" ], "image_path": "./data/STVQA/VisualGenome/1/2338660.jpg", "model_name": "BLIP2" }, { "question": "what is the name of the website on the image?", "answer": "", "gt_answers": [ "tom.com" ], "image_path": "./data/STVQA/imageNet/n02790996_4546.JPEG", "model_name": "BLIP2" }, { "question": "What is the train number?", "answer": "no train number", "gt_answers": [ "N 471" ], "image_path": "./data/STVQA/VisualGenome/1/1159626.jpg", "model_name": "BLIP2" }, { "question": "What is the photographers name?", "answer": "michael scott", "gt_answers": [ "Tai Strietman Photography" ], "image_path": "./data/STVQA/imageNet/n02486261_18107.JPEG", "model_name": "BLIP2" }, { "question": "What does the owl on the man's shirt say?", "answer": "owl city", "gt_answers": [ "Damn fool music" ], "image_path": "./data/STVQA/VisualGenome/1/2351562.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the man's shirt?", "answer": "owl city", "gt_answers": [ "zatopeks" ], "image_path": "./data/STVQA/VisualGenome/1/2351562.jpg", "model_name": "BLIP2" }, { "question": "What is the word above the owl?", "answer": "donut", "gt_answers": [ "zatopeks" ], "image_path": "./data/STVQA/VisualGenome/1/2351562.jpg", "model_name": "BLIP2" }, { "question": "What brand is the bike in front?", "answer": "honda", "gt_answers": [ "Ducati" ], "image_path": "./data/STVQA/VisualGenome/1/2364345.jpg", "model_name": "BLIP2" }, { "question": "What is the big letter on the scoreboard?", "answer": "m", "gt_answers": [ "C" ], "image_path": "./data/STVQA/VisualGenome/1/2343899.jpg", "model_name": "BLIP2" }, { "question": "What US city is on the shirts at the bottom of the photo?", "answer": "chicago", "gt_answers": [ "Kansas City" ], "image_path": "./data/STVQA/VisualGenome/1/2343899.jpg", "model_name": "BLIP2" }, { "question": "What player name is on the shirt on the right?", "answer": "joe kelly", "gt_answers": [ "White" ], "image_path": "./data/STVQA/VisualGenome/1/2343899.jpg", "model_name": "BLIP2" }, { "question": "What drink is being sold?", "answer": "coffee", "gt_answers": [ "Coca Cola", "Coca-Cola" ], "image_path": "./data/STVQA/VisualGenome/1/2349528.jpg", "model_name": "BLIP2" }, { "question": "What is the name written in the cake", "answer": "teddy bear", "gt_answers": [ "Julissa" ], "image_path": "./data/STVQA/VisualGenome/1/2321447.jpg", "model_name": "BLIP2" }, { "question": "what is the doll decorated the cake", "answer": "teddy bear", "gt_answers": [ "Teddy bear" ], "image_path": "./data/STVQA/VisualGenome/1/2321447.jpg", "model_name": "BLIP2" }, { "question": "What is printed on the red and black bag on the left?", "answer": "person", "gt_answers": [ "OGI" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000339670.jpg", "model_name": "BLIP2" }, { "question": "What is the last number shown on the display?", "answer": "4", "gt_answers": [ "5" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000059817.jpg", "model_name": "BLIP2" }, { "question": "What is the train number?", "answer": "sncf", "gt_answers": [ "808" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000005340.jpg", "model_name": "BLIP2" }, { "question": "Who is the train company?", "answer": "sncf", "gt_answers": [ "SNCF" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000005340.jpg", "model_name": "BLIP2" }, { "question": "What type of train is this?", "answer": "high speed train", "gt_answers": [ "SNCF 808", "SNCF" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000005340.jpg", "model_name": "BLIP2" }, { "question": "What name is on the cake?", "answer": "christina", "gt_answers": [ "Jenny" ], "image_path": "./data/STVQA/VisualGenome/1/2318543.jpg", "model_name": "BLIP2" }, { "question": "How old is jenny turning?", "answer": "20", "gt_answers": [ "21" ], "image_path": "./data/STVQA/VisualGenome/1/2318543.jpg", "model_name": "BLIP2" }, { "question": "Whats the brand?", "answer": "white cloud", "gt_answers": [ "White cloud" ], "image_path": "./data/STVQA/imageNet/n03887697_7332.JPEG", "model_name": "BLIP2" }, { "question": "What word is in the center of this picture?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000167354.jpg", "model_name": "BLIP2" }, { "question": "What word is beneeth the 'stop' on the sign?", "answer": "street", "gt_answers": [ "driving" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000167354.jpg", "model_name": "BLIP2" }, { "question": "what letters are visible on the storefront on the left?", "answer": "mcdonalds", "gt_answers": [ "kshop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000167354.jpg", "model_name": "BLIP2" }, { "question": "What number is on this sign?", "answer": "4", "gt_answers": [ "501" ], "image_path": "./data/STVQA/icdar/img_768.jpg", "model_name": "BLIP2" }, { "question": "What brand is the beer in the yellow box?", "answer": "magic hat", "gt_answers": [ "Magic Hat" ], "image_path": "./data/STVQA/VisualGenome/2/2412338.jpg", "model_name": "BLIP2" }, { "question": "What number is on the right corner of the bus?", "answer": "57", "gt_answers": [ "573" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000070104.jpg", "model_name": "BLIP2" }, { "question": "What kinds of products are being sold?", "answer": "cosmetics", "gt_answers": [ "Beauty Care" ], "image_path": "./data/STVQA/icdar/img_415.jpg", "model_name": "BLIP2" }, { "question": "What does the green sign say?", "answer": "bear crossing", "gt_answers": [ "RECONYX" ], "image_path": "./data/STVQA/VisualGenome/2/2403281.jpg", "model_name": "BLIP2" }, { "question": "what number is on the grey shirt?", "answer": "person", "gt_answers": [ "20" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000065632.jpg", "model_name": "BLIP2" }, { "question": "what number is on the pitch?", "answer": "person", "gt_answers": [ "24" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000065632.jpg", "model_name": "BLIP2" }, { "question": "What is the word at the top of the pink board?", "answer": "bathroom", "gt_answers": [ "OUIJA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000134835.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of this product?", "answer": "marcol", "gt_answers": [ "Marigold" ], "image_path": "./data/STVQA/imageNet/n03871628_11287.JPEG", "model_name": "BLIP2" }, { "question": "what is this bus number?", "answer": "i crosstown", "gt_answers": [ "919" ], "image_path": "./data/STVQA/VisualGenome/2/2414590.jpg", "model_name": "BLIP2" }, { "question": "where is this bus going?", "answer": "to cross town", "gt_answers": [ "crosstown" ], "image_path": "./data/STVQA/VisualGenome/2/2414590.jpg", "model_name": "BLIP2" }, { "question": "What street is this location?", "answer": "elm street", "gt_answers": [ "Lanaudiere" ], "image_path": "./data/STVQA/VisualGenome/1/2375730.jpg", "model_name": "BLIP2" }, { "question": "What is it made withb", "answer": "it is made with oats", "gt_answers": [ "Whole grain" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000007362.jpg", "model_name": "BLIP2" }, { "question": "What animal is on the sigh?", "answer": "sheep", "gt_answers": [ "Sheep" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000458027.jpg", "model_name": "BLIP2" }, { "question": "What is the year of the copyright?", "answer": "2012", "gt_answers": [ "2008" ], "image_path": "./data/STVQA/imageNet/n01614925_3110.JPEG", "model_name": "BLIP2" }, { "question": "Who holds the copyright?", "answer": "the photographer", "gt_answers": [ "gary phillips" ], "image_path": "./data/STVQA/imageNet/n01614925_3110.JPEG", "model_name": "BLIP2" }, { "question": "What is another word for farm animals?", "answer": "sheep", "gt_answers": [ "livestock" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000159714.jpg", "model_name": "BLIP2" }, { "question": "what brand/company is this bus?", "answer": "mb", "gt_answers": [ "Centrebus" ], "image_path": "./data/STVQA/VisualGenome/1/2335872.jpg", "model_name": "BLIP2" }, { "question": "What is written between the headlights of the bus", "answer": "london city bus", "gt_answers": [ "Centrebus" ], "image_path": "./data/STVQA/VisualGenome/1/2335872.jpg", "model_name": "BLIP2" }, { "question": "What does the sign on the left of the image read?", "answer": "no parking sign", "gt_answers": [ "surf shop", "SURF SHOP LIQUIDATORS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000116694.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the street sign?", "answer": "lake street", "gt_answers": [ "W LAKE ST" ], "image_path": "./data/STVQA/IIIT_text/1947.jpg", "model_name": "BLIP2" }, { "question": "What does this car's license plate read?", "answer": "london", "gt_answers": [ "LD12 UKK" ], "image_path": "./data/STVQA/IIIT_text/3279.jpg", "model_name": "BLIP2" }, { "question": "what is the number on the police hat", "answer": "4", "gt_answers": [ "11940" ], "image_path": "./data/STVQA/imageNet/n04456115_16952.JPEG", "model_name": "BLIP2" }, { "question": "What word is written inbetween the two blue lights ontop of the truck?", "answer": "person", "gt_answers": [ "GARDA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000094156.jpg", "model_name": "BLIP2" }, { "question": "What number is on the blue shirt?", "answer": "person", "gt_answers": [ "5" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000286406.jpg", "model_name": "BLIP2" }, { "question": "what is the total fat?", "answer": "20 g", "gt_answers": [ "3g" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019892.jpg", "model_name": "BLIP2" }, { "question": "how much sodium?", "answer": "220 mg", "gt_answers": [ "85mg" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019892.jpg", "model_name": "BLIP2" }, { "question": "what are the calories?", "answer": "0 calories", "gt_answers": [ "130" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019892.jpg", "model_name": "BLIP2" }, { "question": "What is the name of this boat?", "answer": "no name", "gt_answers": [ "Lady Joan III" ], "image_path": "./data/STVQA/VisualGenome/1/2351171.jpg", "model_name": "BLIP2" }, { "question": "What is written in the blue panel on the volleyball?", "answer": "volley", "gt_answers": [ "MiKASA" ], "image_path": "./data/STVQA/VisualGenome/1/2321521.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the airline this airplane belongs to?", "answer": "air india", "gt_answers": [ "IndiGo" ], "image_path": "./data/STVQA/IIIT_text/img_000693.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the program on the screen?", "answer": "flickr", "gt_answers": [ "flickr" ], "image_path": "./data/STVQA/VisualGenome/1/2316591.jpg", "model_name": "BLIP2" }, { "question": "What does the bottle with the purple cap contain?", "answer": "olive oil", "gt_answers": [ "Grapeseed oil" ], "image_path": "./data/STVQA/VisualGenome/1/2329872.jpg", "model_name": "BLIP2" }, { "question": "What price is shown in the circle?", "answer": "0", "gt_answers": [ "$75" ], "image_path": "./data/STVQA/imageNet/n06359193_55129.JPEG", "model_name": "BLIP2" }, { "question": "What word has the largest font?", "answer": "news", "gt_answers": [ "NEWS" ], "image_path": "./data/STVQA/imageNet/n06359193_55129.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of one of the sponsors of the tennis tournament?", "answer": "person", "gt_answers": [ "Pacific Life" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000400409.jpg", "model_name": "BLIP2" }, { "question": "What auto make is sponsor of the tennis tournament?", "answer": "ford", "gt_answers": [ "Mercedes Benz" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000400409.jpg", "model_name": "BLIP2" }, { "question": "How many blocks away is lake monona?", "answer": "two", "gt_answers": [ "3" ], "image_path": "./data/STVQA/VisualGenome/2/2412895.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the sign?", "answer": "person", "gt_answers": [ "alle" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000177959.jpg", "model_name": "BLIP2" }, { "question": "Where is the red bus going to?", "answer": "a city", "gt_answers": [ "Georgetown" ], "image_path": "./data/STVQA/VisualGenome/1/2361176.jpg", "model_name": "BLIP2" }, { "question": "What is written in the side of the red bus?", "answer": "revolution", "gt_answers": [ "Circulator" ], "image_path": "./data/STVQA/VisualGenome/1/2361176.jpg", "model_name": "BLIP2" }, { "question": "What street is the horse on?", "answer": "street", "gt_answers": [ "Verdugo St" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000473531.jpg", "model_name": "BLIP2" }, { "question": "What kind of beverages are being advertised on the neon sign?", "answer": "soda", "gt_answers": [ "CAFE , cocktails" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000473531.jpg", "model_name": "BLIP2" }, { "question": "What does the text in english say?", "answer": "reserve bank of india", "gt_answers": [ "Reserve Bank", "Reserve Bank Of India" ], "image_path": "./data/STVQA/IIIT_text/img_000975.jpg", "model_name": "BLIP2" }, { "question": "what number is written on the train", "answer": "the train has a number on it", "gt_answers": [ "31" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000116423.jpg", "model_name": "BLIP2" }, { "question": "what letter is written next to the 31", "answer": "d", "gt_answers": [ "x" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000116423.jpg", "model_name": "BLIP2" }, { "question": "What is the stick?", "answer": "glue", "gt_answers": [ "PRitt" ], "image_path": "./data/STVQA/VisualGenome/2/2412914.jpg", "model_name": "BLIP2" }, { "question": "What is the name of a business in this image", "answer": "starbucks", "gt_answers": [ "Spa Symphony" ], "image_path": "./data/STVQA/icdar/test_img_410.jpg", "model_name": "BLIP2" }, { "question": "What is the name of a business in this image?", "answer": "starbucks", "gt_answers": [ "Chabuton" ], "image_path": "./data/STVQA/icdar/test_img_410.jpg", "model_name": "BLIP2" }, { "question": "What male name is listed on the sign in the back?", "answer": "mike", "gt_answers": [ "Sean" ], "image_path": "./data/STVQA/VisualGenome/1/2367866.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of wet wipes shown?", "answer": "wet n wild", "gt_answers": [ "HUGGIES" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000405060.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the donut store?", "answer": "the doughnut shop", "gt_answers": [ "Voodoo Doughnut", "VOODOO" ], "image_path": "./data/STVQA/VisualGenome/1/2347303.jpg", "model_name": "BLIP2" }, { "question": "How many boats are at the shore?", "answer": "two", "gt_answers": [ "Three" ], "image_path": "./data/STVQA/VisualGenome/1/2320493.jpg", "model_name": "BLIP2" }, { "question": "What year is on the wine bottle?", "answer": "2012", "gt_answers": [ "2012" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000434894.jpg", "model_name": "BLIP2" }, { "question": "What is the left red button?", "answer": "mute", "gt_answers": [ "Guide" ], "image_path": "./data/STVQA/VisualGenome/1/2339051.jpg", "model_name": "BLIP2" }, { "question": "What is the white center circle button?", "answer": "mute", "gt_answers": [ "Info" ], "image_path": "./data/STVQA/VisualGenome/1/2339051.jpg", "model_name": "BLIP2" }, { "question": "What is the slogan of The Irish Times", "answer": "for the times in life", "gt_answers": [ "For the Times We Live In" ], "image_path": "./data/STVQA/IIIT_text/3463.jpg", "model_name": "BLIP2" }, { "question": "Which airline is this plane?", "answer": "american airlines", "gt_answers": [ "SAS" ], "image_path": "./data/STVQA/VisualGenome/2/2405095.jpg", "model_name": "BLIP2" }, { "question": "What is the speed of the ball?", "answer": "the speed of a tennis ball", "gt_answers": [ "113 mph" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000448560.jpg", "model_name": "BLIP2" }, { "question": "What is written on the bottom of the surfboard?", "answer": "i am", "gt_answers": [ "Katin" ], "image_path": "./data/STVQA/VisualGenome/1/2367392.jpg", "model_name": "BLIP2" }, { "question": "What is the last name of the doctor on the green sign?", "answer": "person", "gt_answers": [ "Sawaddipong" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000003493.jpg", "model_name": "BLIP2" }, { "question": "What number is this building?", "answer": "201", "gt_answers": [ "2565" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000488395.jpg", "model_name": "BLIP2" }, { "question": "What is this building's number?", "answer": "201", "gt_answers": [ "2565" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000488395.jpg", "model_name": "BLIP2" }, { "question": "What four numbers are seen in this photo?", "answer": "4", "gt_answers": [ "2565" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000488395.jpg", "model_name": "BLIP2" }, { "question": "What is the first word written at the top of the label on the bottle of soda?", "answer": "person", "gt_answers": [ "special" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000406013.jpg", "model_name": "BLIP2" }, { "question": "What does the watermark say?", "answer": "graduate stock photos", "gt_answers": [ "shutterstock" ], "image_path": "./data/STVQA/imageNet/n02669723_13466.JPEG", "model_name": "BLIP2" }, { "question": "What company makes ROUGE PUR COUTURE", "answer": "mac cosmetics", "gt_answers": [ "Yves Saint Laurent" ], "image_path": "./data/STVQA/VisualGenome/1/2315893.jpg", "model_name": "BLIP2" }, { "question": "What website is watermarked on this image?", "answer": "sgfoodie", "gt_answers": [ "shootfirsteatlater.com" ], "image_path": "./data/STVQA/VisualGenome/1/2366387.jpg", "model_name": "BLIP2" }, { "question": "What are the 5 digits written vertically in a row on the wall above the bananas?", "answer": "a phone number", "gt_answers": [ "12551" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000475939.jpg", "model_name": "BLIP2" }, { "question": "What is the whole number beginning and ending in 1 written vertically on the wall above the bananas?", "answer": "1", "gt_answers": [ "12551" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000475939.jpg", "model_name": "BLIP2" }, { "question": "What is the first store on the left?", "answer": "sephora", "gt_answers": [ "KEITH", "& KEITH" ], "image_path": "./data/STVQA/icdar/img_404.jpg", "model_name": "BLIP2" }, { "question": "What is the Seconds store from the left>", "answer": "sephora", "gt_answers": [ "SEPHORA" ], "image_path": "./data/STVQA/icdar/img_404.jpg", "model_name": "BLIP2" }, { "question": "Who makes the instrument shown?", "answer": "yamaha", "gt_answers": [ "Yamaha" ], "image_path": "./data/STVQA/imageNet/n02804610_5152.JPEG", "model_name": "BLIP2" }, { "question": "What brand is in the object?", "answer": "royal blend whisky", "gt_answers": [ "The Royal Blend", "ROYAL THE BLEND" ], "image_path": "./data/STVQA/imageNet/n04579145_1461.JPEG", "model_name": "BLIP2" }, { "question": "What sport is that person playing", "answer": "tennis", "gt_answers": [ "Tennis" ], "image_path": "./data/STVQA/VisualGenome/2/2410179.jpg", "model_name": "BLIP2" }, { "question": "What type of cheese is this?", "answer": "feta cheese", "gt_answers": [ "MOZZARELLA" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000012143.jpg", "model_name": "BLIP2" }, { "question": "How many people are wearing glasses?", "answer": "3", "gt_answers": [ "One", "{}" ], "image_path": "./data/STVQA/VisualGenome/1/2352510.jpg", "model_name": "BLIP2" }, { "question": "What brand is the camera lens cap?", "answer": "nik", "gt_answers": [ "Nikon" ], "image_path": "./data/STVQA/imageNet/n03657121_9641.JPEG", "model_name": "BLIP2" }, { "question": "What brand name is on the lens cap?", "answer": "nik", "gt_answers": [ "Nikon" ], "image_path": "./data/STVQA/imageNet/n03657121_9641.JPEG", "model_name": "BLIP2" }, { "question": "What section of the city is this?", "answer": "san francisco", "gt_answers": [ "Greenwich" ], "image_path": "./data/STVQA/VisualGenome/2/2400654.jpg", "model_name": "BLIP2" }, { "question": "What are the letters on top of the building?", "answer": "person", "gt_answers": [ "tel" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000179031.jpg", "model_name": "BLIP2" }, { "question": "What name is printed on the chairs at the bottom of the image?", "answer": "us open", "gt_answers": [ "US OPEN" ], "image_path": "./data/STVQA/VisualGenome/1/2373508.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the advertisement on the left?", "answer": "nike", "gt_answers": [ "Heineken" ], "image_path": "./data/STVQA/VisualGenome/1/2373508.jpg", "model_name": "BLIP2" }, { "question": "What does the arrow shaped sign show in the blue background?", "answer": "london underground station", "gt_answers": [ "P" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000408537.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street on the street sign?", "answer": "london road", "gt_answers": [ "CARRINGTON" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000408537.jpg", "model_name": "BLIP2" }, { "question": "What is the city name shown at the bottom of the street sign?", "answer": "london", "gt_answers": [ "WESTMINSTER" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000408537.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the bottle of water?", "answer": "no brand", "gt_answers": [ "ZEPHYRHILLS" ], "image_path": "./data/STVQA/VisualGenome/1/2321705.jpg", "model_name": "BLIP2" }, { "question": "What is the word on the side of the taxi, just above the arrow?", "answer": "person", "gt_answers": [ "xicab" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000138217.jpg", "model_name": "BLIP2" }, { "question": "What instruction is on the red sign?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/2/2408539.jpg", "model_name": "BLIP2" }, { "question": "Are the two people men or women?", "answer": "women", "gt_answers": [ "Women" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000055651.jpg", "model_name": "BLIP2" }, { "question": "What is on the car?", "answer": "stickers", "gt_answers": [ "Revill" ], "image_path": "./data/STVQA/imageNet/n04417672_16141.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the bobsled?", "answer": "whelen", "gt_answers": [ "Whelen" ], "image_path": "./data/STVQA/imageNet/n02860847_147.JPEG", "model_name": "BLIP2" }, { "question": "What is the website under \"Whelen\"?", "answer": "com", "gt_answers": [ "USMilitary.com" ], "image_path": "./data/STVQA/imageNet/n02860847_147.JPEG", "model_name": "BLIP2" }, { "question": "What word is displayed in large lettering, on the side of the plane?", "answer": "cargolux", "gt_answers": [ "cargolux" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000272846.jpg", "model_name": "BLIP2" }, { "question": "What brand is written on the machinery?", "answer": "emerson", "gt_answers": [ "Siemens-Schuckert" ], "image_path": "./data/STVQA/IIIT_text/img_001161.jpg", "model_name": "BLIP2" }, { "question": "What letter is on the button with the up and down arrows on it?", "answer": "d", "gt_answers": [ "P" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000467081.jpg", "model_name": "BLIP2" }, { "question": "What letters come after the letters ATV/ on the same button?", "answer": "tv", "gt_answers": [ "DTV" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000467081.jpg", "model_name": "BLIP2" }, { "question": "What restaurant did the coffee cup come from?", "answer": "donut king", "gt_answers": [ "Tim Hortons" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000022080.jpg", "model_name": "BLIP2" }, { "question": "What does the picture on the toilet say?", "answer": "a dog", "gt_answers": [ "wanted" ], "image_path": "./data/STVQA/VisualGenome/2/534.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the crackers?", "answer": "ritz", "gt_answers": [ "House Recipe" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000185108.jpg", "model_name": "BLIP2" }, { "question": "What company is listed on the red sign?", "answer": "angels baseball", "gt_answers": [ "State Farm" ], "image_path": "./data/STVQA/VisualGenome/1/2371365.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the sports company listed on the fence?", "answer": "adidas", "gt_answers": [ "Fox Sports" ], "image_path": "./data/STVQA/VisualGenome/1/2371365.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the red jersey?", "answer": "2", "gt_answers": [ "17" ], "image_path": "./data/STVQA/VisualGenome/1/2371365.jpg", "model_name": "BLIP2" }, { "question": "What does the sign not want to be picked up?", "answer": "garbage", "gt_answers": [ "Blossoms", "THE BLOSSOMS" ], "image_path": "./data/STVQA/VisualGenome/1/2328272.jpg", "model_name": "BLIP2" }, { "question": "What is the white text on the sign?", "answer": "stop beaver", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2328272.jpg", "model_name": "BLIP2" }, { "question": "what is the arrow mark refer", "answer": "detour", "gt_answers": [ "desviacion" ], "image_path": "./data/STVQA/VisualGenome/1/2362972.jpg", "model_name": "BLIP2" }, { "question": "which animal place on there", "answer": "dog", "gt_answers": [ "dog" ], "image_path": "./data/STVQA/VisualGenome/1/2362972.jpg", "model_name": "BLIP2" }, { "question": "what he doing", "answer": "riding his bike", "gt_answers": [ "cycling" ], "image_path": "./data/STVQA/VisualGenome/1/2362972.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the banner?", "answer": "alma", "gt_answers": [ "ALMA" ], "image_path": "./data/STVQA/imageNet/n02704792_19069.JPEG", "model_name": "BLIP2" }, { "question": "What is the last word on the banner?", "answer": "fie", "gt_answers": [ "Peru" ], "image_path": "./data/STVQA/imageNet/n02704792_19069.JPEG", "model_name": "BLIP2" }, { "question": "Where is this firetruck from?", "answer": "coolville", "gt_answers": [ "COOLVILLE" ], "image_path": "./data/STVQA/VisualGenome/1/2361012.jpg", "model_name": "BLIP2" }, { "question": "What brand name is visible in the white text inside the green square on the box visible in the photo?", "answer": "healthy choice", "gt_answers": [ "Healthy Choice" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000009797.jpg", "model_name": "BLIP2" }, { "question": "What does the red sign say?", "answer": "no parking sign", "gt_answers": [ "enone stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000252693.jpg", "model_name": "BLIP2" }, { "question": "What does the white sign with the red circle say?", "answer": "no parking sign", "gt_answers": [ "DO NOT ENTER" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000252693.jpg", "model_name": "BLIP2" }, { "question": "What does the octagonal red sign say?", "answer": "the white house", "gt_answers": [ "enone stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000252693.jpg", "model_name": "BLIP2" }, { "question": "What number train car is this?", "answer": "a train car", "gt_answers": [ "57307" ], "image_path": "./data/STVQA/VisualGenome/1/2355210.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company that owns the image?", "answer": "shutterstock", "gt_answers": [ "iStockphoto" ], "image_path": "./data/STVQA/imageNet/n03544143_7974.JPEG", "model_name": "BLIP2" }, { "question": "What may happen if this product gets in your eyes?", "answer": "irritate eyes", "gt_answers": [ "It may irritate your eyes.", "IRRITATE" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008342.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTEN ON IT", "answer": "delta airlines", "gt_answers": [ "DELTA" ], "image_path": "./data/STVQA/VisualGenome/1/2360715.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2317403.jpg", "model_name": "BLIP2" }, { "question": "What street is labeled in the upper left?", "answer": "57th street", "gt_answers": [ "5 AV" ], "image_path": "./data/STVQA/VisualGenome/2/607.jpg", "model_name": "BLIP2" }, { "question": "What store is on the far right?", "answer": "macy's", "gt_answers": [ "Van Cleef & Arpeh" ], "image_path": "./data/STVQA/VisualGenome/2/607.jpg", "model_name": "BLIP2" }, { "question": "What is this lady selling?", "answer": "cupcakes", "gt_answers": [ "cupcakes" ], "image_path": "./data/STVQA/VisualGenome/1/2328221.jpg", "model_name": "BLIP2" }, { "question": "What is the man holding?", "answer": "hershey syrup", "gt_answers": [ "HERSHEYS SYRUP" ], "image_path": "./data/STVQA/imageNet/n07836838_2185.JPEG", "model_name": "BLIP2" }, { "question": "What is the first letter on the side of the boat?", "answer": "p", "gt_answers": [ "P" ], "image_path": "./data/STVQA/IIIT_text/2888.jpg", "model_name": "BLIP2" }, { "question": "What color is the word \"police\" written in?", "answer": "blue", "gt_answers": [ "White" ], "image_path": "./data/STVQA/IIIT_text/2888.jpg", "model_name": "BLIP2" }, { "question": "How many letters are in the word on the side of the boat?", "answer": "seven", "gt_answers": [ "6" ], "image_path": "./data/STVQA/IIIT_text/2888.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the blue sign on the floor behind the player?", "answer": "no shoes allowed", "gt_answers": [ "POLO" ], "image_path": "./data/STVQA/VisualGenome/1/2323241.jpg", "model_name": "BLIP2" }, { "question": "What is written on the banana?", "answer": "romance and adventure", "gt_answers": [ "It was really very good" ], "image_path": "./data/STVQA/VisualGenome/1/2316977.jpg", "model_name": "BLIP2" }, { "question": "Who does the boat belong to?", "answer": "us coast guard", "gt_answers": [ "U.S. COAST GUARD" ], "image_path": "./data/STVQA/VisualGenome/2/2410850.jpg", "model_name": "BLIP2" }, { "question": "What is the boat number?", "answer": "u s coast guard", "gt_answers": [ "25678" ], "image_path": "./data/STVQA/VisualGenome/2/2410850.jpg", "model_name": "BLIP2" }, { "question": "Who makes the engine for the boat?", "answer": "john deere", "gt_answers": [ "HONDA" ], "image_path": "./data/STVQA/VisualGenome/2/2410850.jpg", "model_name": "BLIP2" }, { "question": "What is the website address?", "answer": "the website address is http://www", "gt_answers": [ "bbrtfilter.en.alibaba.com" ], "image_path": "./data/STVQA/imageNet/n03843555_2897.JPEG", "model_name": "BLIP2" }, { "question": "What resort are these men skiing at?", "answer": "squaw valley", "gt_answers": [ "timberline" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000414499.jpg", "model_name": "BLIP2" }, { "question": "Which brand is the black snowboard?", "answer": "person", "gt_answers": [ "Burton" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000414499.jpg", "model_name": "BLIP2" }, { "question": "The orange painted snowboard has what letter and which number?", "answer": "a and b", "gt_answers": [ "K2" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000414499.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the book", "answer": "joy in the morning", "gt_answers": [ "Joy of cooking" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000231091.jpg", "model_name": "BLIP2" }, { "question": "How much was spent on fast food in 1972?", "answer": "$2 billion", "gt_answers": [ "$3 billion", "$3billion" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000181909.jpg", "model_name": "BLIP2" }, { "question": "What gas station is the picture from?", "answer": "mcdonalds gas station", "gt_answers": [ "CITGO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000181909.jpg", "model_name": "BLIP2" }, { "question": "What does the box say?", "answer": "real boobs", "gt_answers": [ "Real Boobs U", "REAL BOOBS" ], "image_path": "./data/STVQA/VisualGenome/2/2400949.jpg", "model_name": "BLIP2" }, { "question": "What is the liscence number on the back of the car?", "answer": "the license number on the back of the car", "gt_answers": [ "8477 CJV" ], "image_path": "./data/STVQA/VisualGenome/2/59.jpg", "model_name": "BLIP2" }, { "question": "What three letters are on the top left item", "answer": "aon", "gt_answers": [ "AON" ], "image_path": "./data/STVQA/imageNet/n03692522_8517.JPEG", "model_name": "BLIP2" }, { "question": "Which is the number on the back?", "answer": "person", "gt_answers": [ "75" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000022482.jpg", "model_name": "BLIP2" }, { "question": "Which is player's name?", "answer": "person", "gt_answers": [ "Rodriguez" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000022482.jpg", "model_name": "BLIP2" }, { "question": "What is the caption in this photo say?", "answer": "diana whitney photography", "gt_answers": [ "Diana Whyte Photography" ], "image_path": "./data/STVQA/IIIT_text/7875.jpg", "model_name": "BLIP2" }, { "question": "what is written on the sign?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2370519.jpg", "model_name": "BLIP2" }, { "question": "What is the total sale amount?", "answer": "0", "gt_answers": [ "15.02" ], "image_path": "./data/STVQA/imageNet/n03425413_14244.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the first sign?", "answer": "brighton", "gt_answers": [ "Turquoise ave" ], "image_path": "./data/STVQA/VisualGenome/1/2361840.jpg", "model_name": "BLIP2" }, { "question": "What is written on the second plate?", "answer": "brighton", "gt_answers": [ "Brighton ave" ], "image_path": "./data/STVQA/VisualGenome/1/2361840.jpg", "model_name": "BLIP2" }, { "question": "What is written on the third plate?", "answer": "brighton", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/VisualGenome/1/2361840.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS INDICATED IN THIS SIGN BOARD?", "answer": "eating animals", "gt_answers": [ "STOP EATING ANIMALS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000101753.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the armband?", "answer": "1", "gt_answers": [ "18" ], "image_path": "./data/STVQA/imageNet/n02088364_8619.JPEG", "model_name": "BLIP2" }, { "question": "What brand is this?", "answer": "kraft", "gt_answers": [ "Knorr" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013383.jpg", "model_name": "BLIP2" }, { "question": "What label is visible?", "answer": "knorr", "gt_answers": [ "NUTRITION" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013383.jpg", "model_name": "BLIP2" }, { "question": "What is written on the shirt of the person travelling on bike?", "answer": "person", "gt_answers": [ "POLICE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000293647.jpg", "model_name": "BLIP2" }, { "question": "What does the red sign say?", "answer": "no smoking", "gt_answers": [ "4FINGERS" ], "image_path": "./data/STVQA/icdar/img_325.jpg", "model_name": "BLIP2" }, { "question": "What is the big blue word on the screen?", "answer": "read life", "gt_answers": [ "READ" ], "image_path": "./data/STVQA/VisualGenome/1/498077.jpg", "model_name": "BLIP2" }, { "question": "What is the big yellow word on the screen?", "answer": "read life", "gt_answers": [ "LIFE" ], "image_path": "./data/STVQA/VisualGenome/1/498077.jpg", "model_name": "BLIP2" }, { "question": "What does the blue phrase say?", "answer": "read life", "gt_answers": [ "READ Every Day" ], "image_path": "./data/STVQA/VisualGenome/1/498077.jpg", "model_name": "BLIP2" }, { "question": "What is the third word in the book title starting with \"The Memory...\"", "answer": "memory", "gt_answers": [ "Cathedral" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000013770.jpg", "model_name": "BLIP2" }, { "question": "What is the first name of the author Crais?", "answer": "person", "gt_answers": [ "Robert" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000013770.jpg", "model_name": "BLIP2" }, { "question": "What is the number of motorbike rider?", "answer": "person", "gt_answers": [ "20" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000262299.jpg", "model_name": "BLIP2" }, { "question": "What is the store's phone number?", "answer": "770 - 777 - 8787", "gt_answers": [ "726-1184" ], "image_path": "./data/STVQA/VisualGenome/1/2359704.jpg", "model_name": "BLIP2" }, { "question": "What is the word on the gold plaque?", "answer": "person", "gt_answers": [ "TELL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000098116.jpg", "model_name": "BLIP2" }, { "question": "What is the word on the brown circle?", "answer": "person", "gt_answers": [ "LION" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000098116.jpg", "model_name": "BLIP2" }, { "question": "What does the the top street sign say?", "answer": "elm street", "gt_answers": [ "High St", "HIGH" ], "image_path": "./data/STVQA/VisualGenome/2/2402203.jpg", "model_name": "BLIP2" }, { "question": "What does the red sign say?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/2/2402203.jpg", "model_name": "BLIP2" }, { "question": "what is the license plate of the black car?", "answer": "sc bkr", "gt_answers": [ "SF5I MPX", "SF51MPX" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000209176.jpg", "model_name": "BLIP2" }, { "question": "What number is written on the umpire's sleeve?", "answer": "person", "gt_answers": [ "5" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000369078.jpg", "model_name": "BLIP2" }, { "question": "What is the white uppercase word on the object?", "answer": "hockey", "gt_answers": [ "Cup" ], "image_path": "./data/STVQA/imageNet/n04019541_41949.JPEG", "model_name": "BLIP2" }, { "question": "What does it say on the child's shirt?", "answer": "person", "gt_answers": [ "West" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000578766.jpg", "model_name": "BLIP2" }, { "question": "What does the cap say?", "answer": "strawberry soda", "gt_answers": [ "Strawberry Soda" ], "image_path": "./data/STVQA/imageNet/n02877765_15153.JPEG", "model_name": "BLIP2" }, { "question": "What company name is on the mudflap?", "answer": "person", "gt_answers": [ "YAMAHA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000452162.jpg", "model_name": "BLIP2" }, { "question": "what is the number on the digital sign at the top front of the bus?", "answer": "3", "gt_answers": [ "51" ], "image_path": "./data/STVQA/VisualGenome/1/2374374.jpg", "model_name": "BLIP2" }, { "question": "What event is advertised in large red letters?", "answer": "fashion show", "gt_answers": [ "SALE" ], "image_path": "./data/STVQA/icdar/img_874.jpg", "model_name": "BLIP2" }, { "question": "WHAT ROAD SIGN IS THERE", "answer": "stop sign", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2373994.jpg", "model_name": "BLIP2" }, { "question": "What is written on the Ferry?", "answer": "person", "gt_answers": [ "FERRY BRIGANTIA OF BRISTOL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000449508.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the airline?", "answer": "westjet", "gt_answers": [ "Westjet" ], "image_path": "./data/STVQA/VisualGenome/2/2401448.jpg", "model_name": "BLIP2" }, { "question": "What is the plane identification number?", "answer": "msn 370", "gt_answers": [ "C-GMWJ" ], "image_path": "./data/STVQA/VisualGenome/2/2401448.jpg", "model_name": "BLIP2" }, { "question": "What brand is the copier?", "answer": "toshiba", "gt_answers": [ "TOSHIBA" ], "image_path": "./data/STVQA/imageNet/n03924679_11382.JPEG", "model_name": "BLIP2" }, { "question": "What is the model number of the copier?", "answer": "toshiba bizhub c200", "gt_answers": [ "1560" ], "image_path": "./data/STVQA/imageNet/n03924679_11382.JPEG", "model_name": "BLIP2" }, { "question": "What is the first letter of the artist's name at the bottom of the photo?", "answer": "p", "gt_answers": [ "f" ], "image_path": "./data/STVQA/imageNet/n02111129_4495.JPEG", "model_name": "BLIP2" }, { "question": "What web address is located at the bottom?", "answer": "earthshots org", "gt_answers": [ "EarthShots.org" ], "image_path": "./data/STVQA/imageNet/n02487347_3680.JPEG", "model_name": "BLIP2" }, { "question": "What airline does this aircraft belong to?", "answer": "air france", "gt_answers": [ "Airfrance" ], "image_path": "./data/STVQA/VisualGenome/1/2326777.jpg", "model_name": "BLIP2" }, { "question": "What number is the girl in the white jersey closest to the camera?", "answer": "10", "gt_answers": [ "7" ], "image_path": "./data/STVQA/VisualGenome/1/2364950.jpg", "model_name": "BLIP2" }, { "question": "What number is the girl in the blue jersey?", "answer": "10", "gt_answers": [ "3" ], "image_path": "./data/STVQA/VisualGenome/1/2364950.jpg", "model_name": "BLIP2" }, { "question": "Who does the picture belong to?", "answer": "daniel", "gt_answers": [ "Tom Boldt" ], "image_path": "./data/STVQA/imageNet/n02115913_2390.JPEG", "model_name": "BLIP2" }, { "question": "What is the train number?", "answer": "person", "gt_answers": [ "405" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000547675.jpg", "model_name": "BLIP2" }, { "question": "What does the sign indicate?", "answer": "railroad crossing", "gt_answers": [ "RAILROAD CROSSING" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000547675.jpg", "model_name": "BLIP2" }, { "question": "What is the phone number on the truck", "answer": "the phone number is", "gt_answers": [ "1-866-668-7666" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000048747.jpg", "model_name": "BLIP2" }, { "question": "What company is the airplane from?", "answer": "united airlines", "gt_answers": [ "US Airways" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000196916.jpg", "model_name": "BLIP2" }, { "question": "What is the white text on the black sign on the fender?", "answer": "harley davidson", "gt_answers": [ "F8364" ], "image_path": "./data/STVQA/VisualGenome/1/61584.jpg", "model_name": "BLIP2" }, { "question": "What are the black numbers on the white sign on the forks?", "answer": "the year of manufacture", "gt_answers": [ "343" ], "image_path": "./data/STVQA/VisualGenome/1/61584.jpg", "model_name": "BLIP2" }, { "question": "What are the chrome letters on the handlebars?", "answer": "harley davidson", "gt_answers": [ "AA" ], "image_path": "./data/STVQA/VisualGenome/1/61584.jpg", "model_name": "BLIP2" }, { "question": "What is the website address?", "answer": "person", "gt_answers": [ "WWW.WIMBLEDON.ORG" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000359996.jpg", "model_name": "BLIP2" }, { "question": "What is the speed?", "answer": "100 mph", "gt_answers": [ "111", "111 MPH" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000359996.jpg", "model_name": "BLIP2" }, { "question": "What is written at the bottom of the silver container?", "answer": "l o w", "gt_answers": [ "Lisa's Retro Style" ], "image_path": "./data/STVQA/imageNet/n03062245_6387.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the street?", "answer": "king street", "gt_answers": [ "Auburn", "AUBURN WAY" ], "image_path": "./data/STVQA/VisualGenome/1/2375881.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the building?", "answer": "city hall", "gt_answers": [ "Auburn Justice Center" ], "image_path": "./data/STVQA/VisualGenome/1/2375881.jpg", "model_name": "BLIP2" }, { "question": "What should be do red?", "answer": "stop sign", "gt_answers": [ "No Turn on Red", "NO TURN" ], "image_path": "./data/STVQA/VisualGenome/1/2375881.jpg", "model_name": "BLIP2" }, { "question": "What is the importance of?", "answer": "water", "gt_answers": [ "Details" ], "image_path": "./data/STVQA/VisualGenome/1/2369923.jpg", "model_name": "BLIP2" }, { "question": "What number is written on the pills?", "answer": "4", "gt_answers": [ "300" ], "image_path": "./data/STVQA/imageNet/n03937543_9745.JPEG", "model_name": "BLIP2" }, { "question": "What is the street name on the sign?", "answer": "common wealth", "gt_answers": [ "Commonwealth Ave", "COMMONWEALTH" ], "image_path": "./data/STVQA/VisualGenome/1/2326135.jpg", "model_name": "BLIP2" }, { "question": "What is the best by date on the container on the left?", "answer": "the best by date on the container on the right", "gt_answers": [ "Dec-26-2012" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000003492.jpg", "model_name": "BLIP2" }, { "question": "What is the first ingredient listed on the container on the left?", "answer": "sugar", "gt_answers": [ "Cranberries" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000003492.jpg", "model_name": "BLIP2" }, { "question": "What is the third ingredient on the container on the left?", "answer": "soy sauce", "gt_answers": [ "Almonds" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000003492.jpg", "model_name": "BLIP2" }, { "question": "What is written in the blue color portion of the bottle?", "answer": "sprite", "gt_answers": [ "flavorful spicy sea salt", "Flavorful Dry sea salt" ], "image_path": "./data/STVQA/VisualGenome/1/2355333.jpg", "model_name": "BLIP2" }, { "question": "What does the sign on the front of the stage say?", "answer": "no smoking", "gt_answers": [ "Worship" ], "image_path": "./data/STVQA/imageNet/n03445924_28059.JPEG", "model_name": "BLIP2" }, { "question": "What brand is the microwave on the left?", "answer": "ge", "gt_answers": [ "DAEWOO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000497698.jpg", "model_name": "BLIP2" }, { "question": "What is the toy banana's name?", "answer": "mr banana", "gt_answers": [ "twinkie the kid" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000497698.jpg", "model_name": "BLIP2" }, { "question": "What brand is the vehicle?", "answer": "bmw", "gt_answers": [ "BMW" ], "image_path": "./data/STVQA/imageNet/n03459775_16367.JPEG", "model_name": "BLIP2" }, { "question": "What letters are on the blue sign?", "answer": "unicef", "gt_answers": [ "UNHCR" ], "image_path": "./data/STVQA/VisualGenome/1/2366545.jpg", "model_name": "BLIP2" }, { "question": "In the picture on the right what is written on the red octagonal sign?", "answer": "auckland city centre", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000561861.jpg", "model_name": "BLIP2" }, { "question": "W90 must you give way to?", "answer": "a pedestrian", "gt_answers": [ "pedestrians" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000561861.jpg", "model_name": "BLIP2" }, { "question": "What must you give to pedestrians?", "answer": "the right of way", "gt_answers": [ "way" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000561861.jpg", "model_name": "BLIP2" }, { "question": "What five letter name is on the clock?.", "answer": "amsterdam", "gt_answers": [ "JOYCE" ], "image_path": "./data/STVQA/VisualGenome/1/2360828.jpg", "model_name": "BLIP2" }, { "question": "What room is labeled on the sign?", "answer": "waiting room", "gt_answers": [ "REFRESHMENT" ], "image_path": "./data/STVQA/VisualGenome/1/2360828.jpg", "model_name": "BLIP2" }, { "question": "What number are the platforms?", "answer": "two", "gt_answers": [ "1" ], "image_path": "./data/STVQA/VisualGenome/1/2360828.jpg", "model_name": "BLIP2" }, { "question": "What does the street sign say on the left?", "answer": "morgan street", "gt_answers": [ "SOLEY ST" ], "image_path": "./data/STVQA/VisualGenome/1/2343653.jpg", "model_name": "BLIP2" }, { "question": "What year is the planner on the wall for?", "answer": "2010", "gt_answers": [ "1984" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000213172.jpg", "model_name": "BLIP2" }, { "question": "How many people are wearing wetsuits?", "answer": "five", "gt_answers": [ "fIVE" ], "image_path": "./data/STVQA/VisualGenome/1/498389.jpg", "model_name": "BLIP2" }, { "question": "What are the people standing on?", "answer": "beach", "gt_answers": [ "Sand" ], "image_path": "./data/STVQA/VisualGenome/1/498389.jpg", "model_name": "BLIP2" }, { "question": "What word is listed after a set of numbers on the left side?", "answer": "vodka", "gt_answers": [ "Poggio" ], "image_path": "./data/STVQA/imageNet/n03871628_39494.JPEG", "model_name": "BLIP2" }, { "question": "What number comes after the 5 on the left side?", "answer": "6", "gt_answers": [ "4" ], "image_path": "./data/STVQA/imageNet/n03871628_39494.JPEG", "model_name": "BLIP2" }, { "question": "What is the destination of the bus?", "answer": "toronto", "gt_answers": [ "Cromdale" ], "image_path": "./data/STVQA/imageNet/n04487081_21488.JPEG", "model_name": "BLIP2" }, { "question": "Who makes this tool?", "answer": "microsoft", "gt_answers": [ "HP" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016912.jpg", "model_name": "BLIP2" }, { "question": "What is the tour company called?", "answer": "person", "gt_answers": [ "York Pullman City Tour" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000093070.jpg", "model_name": "BLIP2" }, { "question": "How much does the phone cost?", "answer": "25 cents", "gt_answers": [ "25 cents" ], "image_path": "./data/STVQA/VisualGenome/2/2408147.jpg", "model_name": "BLIP2" }, { "question": "What is printed on the tennis racket?", "answer": "person", "gt_answers": [ "W" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000528465.jpg", "model_name": "BLIP2" }, { "question": "What number appears on his jersey?", "answer": "person", "gt_answers": [ "13" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000578023.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "person", "gt_answers": [ "61" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000297585.jpg", "model_name": "BLIP2" }, { "question": "What number is the Safety House?", "answer": "58", "gt_answers": [ "9" ], "image_path": "./data/STVQA/VisualGenome/1/2375581.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "rodeo st", "gt_answers": [ "RODEO Dr 200", "BH RODEO" ], "image_path": "./data/STVQA/imageNet/n06794110_5457.JPEG", "model_name": "BLIP2" }, { "question": "What can be read at the bottom of the picture?", "answer": "person and person", "gt_answers": [ "HEATHER ABOUNADER PHOTOGRAPHY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000538099.jpg", "model_name": "BLIP2" }, { "question": "What is letters are on the helmet?", "answer": "person", "gt_answers": [ "agv" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000053370.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate on the middle motorcycle?", "answer": "person", "gt_answers": [ "VSU996" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000053370.jpg", "model_name": "BLIP2" }, { "question": "what product is being sold in the vending machine?", "answer": "french fries", "gt_answers": [ "French fries", "REAL FRENCH FRIES" ], "image_path": "./data/STVQA/imageNet/n04525305_6764.JPEG", "model_name": "BLIP2" }, { "question": "What is wirtten on the airplane?", "answer": "american airlines", "gt_answers": [ "American" ], "image_path": "./data/STVQA/VisualGenome/1/2342238.jpg", "model_name": "BLIP2" }, { "question": "What year is written at the bottom of the picture?", "answer": "2012", "gt_answers": [ "2010" ], "image_path": "./data/STVQA/VisualGenome/1/2342238.jpg", "model_name": "BLIP2" }, { "question": "What month abbreviation appears at the bottom of the picture?", "answer": "feb", "gt_answers": [ "Feb" ], "image_path": "./data/STVQA/VisualGenome/1/2342238.jpg", "model_name": "BLIP2" }, { "question": "What letters are on the side of the bus?", "answer": "it", "gt_answers": [ "SPTC" ], "image_path": "./data/STVQA/VisualGenome/1/2325975.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "all way", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2361115.jpg", "model_name": "BLIP2" }, { "question": "What color are the letters and border?", "answer": "red", "gt_answers": [ "White" ], "image_path": "./data/STVQA/VisualGenome/1/2361115.jpg", "model_name": "BLIP2" }, { "question": "What year is written on the large case?", "answer": "person", "gt_answers": [ "1853" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000520569.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the art gallery?", "answer": "sisko gallery", "gt_answers": [ "Sisko Gallery" ], "image_path": "./data/STVQA/VisualGenome/1/2338988.jpg", "model_name": "BLIP2" }, { "question": "What is printed on the teddy bear?", "answer": "person's name", "gt_answers": [ "A BEAR HUG WOULD DO YOU GOOD" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000351397.jpg", "model_name": "BLIP2" }, { "question": "What is the bus route?", "answer": "zone 1", "gt_answers": [ "i04" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000376668.jpg", "model_name": "BLIP2" }, { "question": "Whose birthday is it?", "answer": "it is marge's birthday", "gt_answers": [ "MARCUS" ], "image_path": "./data/STVQA/VisualGenome/2/2405217.jpg", "model_name": "BLIP2" }, { "question": "What fruit is named on the top sign?", "answer": "apples", "gt_answers": [ "Apples" ], "image_path": "./data/STVQA/VisualGenome/1/2364446.jpg", "model_name": "BLIP2" }, { "question": "What fruit is named on the middle sign?", "answer": "apples", "gt_answers": [ "Pears" ], "image_path": "./data/STVQA/VisualGenome/1/2364446.jpg", "model_name": "BLIP2" }, { "question": "What fruit is named on the bottom sign?", "answer": "apples", "gt_answers": [ "Plums" ], "image_path": "./data/STVQA/VisualGenome/1/2364446.jpg", "model_name": "BLIP2" }, { "question": "What is written on the side of the bus?", "answer": "san diego", "gt_answers": [ "Via San Antonio" ], "image_path": "./data/STVQA/VisualGenome/1/2372669.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the beer bottle?", "answer": "person", "gt_answers": [ "Miller Lite" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000134144.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the green airplane?", "answer": "ewa air cargo", "gt_answers": [ "EVA AIR Cargo" ], "image_path": "./data/STVQA/VisualGenome/2/2404908.jpg", "model_name": "BLIP2" }, { "question": "What number is written on the truck?", "answer": "91", "gt_answers": [ "781" ], "image_path": "./data/STVQA/VisualGenome/1/2316693.jpg", "model_name": "BLIP2" }, { "question": "What is the English term for arret?", "answer": "tower", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2364218.jpg", "model_name": "BLIP2" }, { "question": "What type of work does the company who owns this photograph do?", "answer": "financial services", "gt_answers": [ "Photography" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000406356.jpg", "model_name": "BLIP2" }, { "question": "What is the top destintion on this sign?", "answer": "singapore", "gt_answers": [ "Club House" ], "image_path": "./data/STVQA/icdar/img_256.jpg", "model_name": "BLIP2" }, { "question": "Which team does the red team represent?", "answer": "the united states", "gt_answers": [ "Canada" ], "image_path": "./data/STVQA/VisualGenome/1/1160011.jpg", "model_name": "BLIP2" }, { "question": "What does the lighted sign say?", "answer": "aoc", "gt_answers": [ "Hom Yoga" ], "image_path": "./data/STVQA/icdar/img_997.jpg", "model_name": "BLIP2" }, { "question": "what kind of court is it?", "answer": "basketball court", "gt_answers": [ "Food Court" ], "image_path": "./data/STVQA/icdar/img_640.jpg", "model_name": "BLIP2" }, { "question": "Who is currently batting?", "answer": "person", "gt_answers": [ "ethier" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000184877.jpg", "model_name": "BLIP2" }, { "question": "What is the name of player 16?", "answer": "person", "gt_answers": [ "ethier" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000184877.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the player whose team is currently up to bat?", "answer": "person", "gt_answers": [ "ethier" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000184877.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE TEXT ON THE LENS COVER?", "answer": "canon", "gt_answers": [ "CANON" ], "image_path": "./data/STVQA/imageNet/n03657121_20227.JPEG", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "hamburg", "gt_answers": [ "Mundoora.", "MUNDOORA" ], "image_path": "./data/STVQA/VisualGenome/1/2335202.jpg", "model_name": "BLIP2" }, { "question": "What are the last four digits of the licence plate?", "answer": "the last four digits of the licence plate are 0", "gt_answers": [ "5UGN" ], "image_path": "./data/STVQA/imageNet/n02787622_6868.JPEG", "model_name": "BLIP2" }, { "question": "What is the first website printed on the green wall?", "answer": "person", "gt_answers": [ "oaklandathletics.com" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000153224.jpg", "model_name": "BLIP2" }, { "question": "what is the year on the cup?", "answer": "2010", "gt_answers": [ "1926" ], "image_path": "./data/STVQA/VisualGenome/1/2357267.jpg", "model_name": "BLIP2" }, { "question": "What are the blades described as?", "answer": "chalk line", "gt_answers": [ "Hooked" ], "image_path": "./data/STVQA/imageNet/n03481172_8879.JPEG", "model_name": "BLIP2" }, { "question": "What number is written on the back wall?", "answer": "number", "gt_answers": [ "330" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000360813.jpg", "model_name": "BLIP2" }, { "question": "What make is the truck?", "answer": "daf", "gt_answers": [ "Scania" ], "image_path": "./data/STVQA/VisualGenome/1/2325486.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number of the silver car?", "answer": "tah", "gt_answers": [ "NG05BSZ", "NGO5 BSZ" ], "image_path": "./data/STVQA/VisualGenome/1/2325486.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the street sign?", "answer": "person", "gt_answers": [ "great southern overland stage ROUTE of 1849", "great southern overland" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000548690.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the car?", "answer": "hot rod", "gt_answers": [ "Red Voo Doo" ], "image_path": "./data/STVQA/VisualGenome/1/1159915.jpg", "model_name": "BLIP2" }, { "question": "Who owns this car?", "answer": "a man", "gt_answers": [ "Aaron & Misa Hagar", "Aaron & Misha Hagar" ], "image_path": "./data/STVQA/VisualGenome/1/1159915.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the garage that built this car?", "answer": "the garage", "gt_answers": [ "Rat Runners Garage" ], "image_path": "./data/STVQA/VisualGenome/1/1159915.jpg", "model_name": "BLIP2" }, { "question": "What is the last word on the head piece?", "answer": "person", "gt_answers": [ "earth" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000053150.jpg", "model_name": "BLIP2" }, { "question": "What is the last 3 words on the headpiece?", "answer": "i am person", "gt_answers": [ "show on earth" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000053150.jpg", "model_name": "BLIP2" }, { "question": "What word begins with 'S' in this picture?", "answer": "street", "gt_answers": [ "show" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000053150.jpg", "model_name": "BLIP2" }, { "question": "What country are they in?", "answer": "united states", "gt_answers": [ "USA" ], "image_path": "./data/STVQA/VisualGenome/1/2318556.jpg", "model_name": "BLIP2" }, { "question": "What word is printed under the windshield of the yellow and orange van?", "answer": "person", "gt_answers": [ "reading" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000183123.jpg", "model_name": "BLIP2" }, { "question": "What is the brand advertised?", "answer": "rabinowitz", "gt_answers": [ "Railrunners" ], "image_path": "./data/STVQA/imageNet/n03692522_11899.JPEG", "model_name": "BLIP2" }, { "question": "Where is Railrunners based?", "answer": "san diego", "gt_answers": [ "Groningen" ], "image_path": "./data/STVQA/imageNet/n03692522_11899.JPEG", "model_name": "BLIP2" }, { "question": "What is Railrunners website?", "answer": "railrunners website", "gt_answers": [ "mscrailrunners.nl" ], "image_path": "./data/STVQA/imageNet/n03692522_11899.JPEG", "model_name": "BLIP2" }, { "question": "What is the K word at the top of the sign?", "answer": "king", "gt_answers": [ "Kenko" ], "image_path": "./data/STVQA/icdar/test_img_240.jpg", "model_name": "BLIP2" }, { "question": "What are the top two words on the white sign?", "answer": "fire hydrant", "gt_answers": [ "Keep Out" ], "image_path": "./data/STVQA/VisualGenome/1/2351191.jpg", "model_name": "BLIP2" }, { "question": "Which year was this picture taken according to the caption below?", "answer": "2009", "gt_answers": [ "2008" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000305076.jpg", "model_name": "BLIP2" }, { "question": "What is the first line on the cake?", "answer": "happy birthday", "gt_answers": [ "WELCOME" ], "image_path": "./data/STVQA/VisualGenome/1/2335036.jpg", "model_name": "BLIP2" }, { "question": "What store is pictured on the right?", "answer": "dillards", "gt_answers": [ "L'Occitane" ], "image_path": "./data/STVQA/icdar/test_img_128.jpg", "model_name": "BLIP2" }, { "question": "What does the heading read inside the L'Occitane store?", "answer": "l'occitane", "gt_answers": [ "Skincare" ], "image_path": "./data/STVQA/icdar/test_img_128.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the back of the batter?", "answer": "2", "gt_answers": [ "25" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000008519.jpg", "model_name": "BLIP2" }, { "question": "What is the second word in white on the blue box on the shelf?", "answer": "cat", "gt_answers": [ "Four" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000223942.jpg", "model_name": "BLIP2" }, { "question": "what is the place of the pizza place?", "answer": "north beach", "gt_answers": [ "NORTH BEACH PIZZA", "NORTH BEACH" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000564781.jpg", "model_name": "BLIP2" }, { "question": "What is the date of the flag behind the man", "answer": "1776", "gt_answers": [ "July 4th 2009" ], "image_path": "./data/STVQA/VisualGenome/1/2329469.jpg", "model_name": "BLIP2" }, { "question": "What does the pile consist of?", "answer": "shoes", "gt_answers": [ "Shoes" ], "image_path": "./data/STVQA/imageNet/n04120489_545.JPEG", "model_name": "BLIP2" }, { "question": "Is the man allowed to skate here?", "answer": "yes", "gt_answers": [ "NO", "NO SKATEBOARDING" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000030779.jpg", "model_name": "BLIP2" }, { "question": "What kind of peace is on the left?", "answer": "hair peace", "gt_answers": [ "Hair", "HAIR PEACE" ], "image_path": "./data/STVQA/VisualGenome/1/2337477.jpg", "model_name": "BLIP2" }, { "question": "What kind of peace is on the right?", "answer": "hair peace", "gt_answers": [ "Bed", "BED PEACE" ], "image_path": "./data/STVQA/VisualGenome/1/2337477.jpg", "model_name": "BLIP2" }, { "question": "What must you do when a pedestrian is within the crosswalk?", "answer": "yield to the pedestrian", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000118730.jpg", "model_name": "BLIP2" }, { "question": "what does the blue writing on the can say?", "answer": "mrt", "gt_answers": [ "COCONUT" ], "image_path": "./data/STVQA/VisualGenome/1/2368979.jpg", "model_name": "BLIP2" }, { "question": "what does the white writing on the can say?", "answer": "mrt", "gt_answers": [ "Yeo's" ], "image_path": "./data/STVQA/VisualGenome/1/2368979.jpg", "model_name": "BLIP2" }, { "question": "What does the top sign say?", "answer": "cherry street", "gt_answers": [ "Cherokee St" ], "image_path": "./data/STVQA/VisualGenome/1/2347215.jpg", "model_name": "BLIP2" }, { "question": "What does the bottom sign say?", "answer": "cherry street", "gt_answers": [ "Cherry St" ], "image_path": "./data/STVQA/VisualGenome/1/2347215.jpg", "model_name": "BLIP2" }, { "question": "What two street names are there?", "answer": "cherry st", "gt_answers": [ "Cherokee Cherry", "CHEROKEE ST CHERRY ST" ], "image_path": "./data/STVQA/VisualGenome/1/2347215.jpg", "model_name": "BLIP2" }, { "question": "What does the top button say?", "answer": "return", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/imageNet/n04074963_19870.JPEG", "model_name": "BLIP2" }, { "question": "What is the word on the black sign?", "answer": "beach", "gt_answers": [ "Llandudno" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000435124.jpg", "model_name": "BLIP2" }, { "question": "WHICH DEPARTMENT AIR CRAFT IS THIS?", "answer": "navy", "gt_answers": [ "U.S.NAVY" ], "image_path": "./data/STVQA/VisualGenome/1/2361915.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE NUMBER SHOWN IN THE PICTURE ?", "answer": "2", "gt_answers": [ "4" ], "image_path": "./data/STVQA/VisualGenome/1/2361915.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street display on the picture?", "answer": "plymouth street", "gt_answers": [ "PLYMPTON" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000408515.jpg", "model_name": "BLIP2" }, { "question": "What is the big white word on the red octagon?", "answer": "stop", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000408515.jpg", "model_name": "BLIP2" }, { "question": "What are the last three letters before the exclamation mark in the stop sign?", "answer": "ton", "gt_answers": [ "ism" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000408515.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the airline?", "answer": "aviva", "gt_answers": [ "Armavia" ], "image_path": "./data/STVQA/VisualGenome/1/498377.jpg", "model_name": "BLIP2" }, { "question": "What institute is being advertised?", "answer": "uk institute of education", "gt_answers": [ "UCE Birmingham Faculty of Education", "UCE Birmingham" ], "image_path": "./data/STVQA/imageNet/n06359193_1051.JPEG", "model_name": "BLIP2" }, { "question": "What is the product brand?", "answer": "person", "gt_answers": [ "jungle" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000317349.jpg", "model_name": "BLIP2" }, { "question": "What type of product is this?", "answer": "poster", "gt_answers": [ "cell- phone", "CELL-PHONE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000317349.jpg", "model_name": "BLIP2" }, { "question": "What word has blue inside one of the letters?", "answer": "horse", "gt_answers": [ "BIORANCH" ], "image_path": "./data/STVQA/VisualGenome/1/2375818.jpg", "model_name": "BLIP2" }, { "question": "What word is on the bottom right?", "answer": "horse", "gt_answers": [ "Horse" ], "image_path": "./data/STVQA/VisualGenome/1/2375818.jpg", "model_name": "BLIP2" }, { "question": "What are the first 4 letters of the top right word?", "answer": "equestrian", "gt_answers": [ "Prof" ], "image_path": "./data/STVQA/VisualGenome/1/2375818.jpg", "model_name": "BLIP2" }, { "question": "What place is written in the painting?", "answer": "person", "gt_answers": [ "FIJI" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000383576.jpg", "model_name": "BLIP2" }, { "question": "Does this plane transport passengers?", "answer": "yes", "gt_answers": [ "Seaplanes" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000036500.jpg", "model_name": "BLIP2" }, { "question": "Where is this plane located?", "answer": "on the beach", "gt_answers": [ "Catalina" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000036500.jpg", "model_name": "BLIP2" }, { "question": "What is the bus company printed on the front of the bus?", "answer": "intercity express", "gt_answers": [ "citylink" ], "image_path": "./data/STVQA/VisualGenome/2/2415352.jpg", "model_name": "BLIP2" }, { "question": "What is the company name of the pizza in the drawer", "answer": "pizza hut", "gt_answers": [ "Papa Johns" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000092331.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the television screen?", "answer": "pizza", "gt_answers": [ "INSPIRED" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000092331.jpg", "model_name": "BLIP2" }, { "question": "What type of vegetable is organically grown?", "answer": "broccoli", "gt_answers": [ "Broccoli" ], "image_path": "./data/STVQA/VisualGenome/1/2359749.jpg", "model_name": "BLIP2" }, { "question": "What is the sale price of the broccoli?", "answer": "ten dollars", "gt_answers": [ "$1.00" ], "image_path": "./data/STVQA/VisualGenome/1/2359749.jpg", "model_name": "BLIP2" }, { "question": "What is on the screen of the jukebox?", "answer": "cd player", "gt_answers": [ "My music" ], "image_path": "./data/STVQA/imageNet/n02988304_9338.JPEG", "model_name": "BLIP2" }, { "question": "What is the first word of the street name found on the side of the building?", "answer": "brook", "gt_answers": [ "BRICK" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000352564.jpg", "model_name": "BLIP2" }, { "question": "What is the second word of the street name found on the building?", "answer": "brook", "gt_answers": [ "LANE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000352564.jpg", "model_name": "BLIP2" }, { "question": "What is the first word of the street name found in the background?", "answer": "st", "gt_answers": [ "BUXT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000352564.jpg", "model_name": "BLIP2" }, { "question": "what website does this photo belong to?", "answer": "deutsche meister", "gt_answers": [ "www.DerRittmeister.com" ], "image_path": "./data/STVQA/imageNet/n03929855_334.JPEG", "model_name": "BLIP2" }, { "question": "What letter is on the tail of the plane?", "answer": "v", "gt_answers": [ "E" ], "image_path": "./data/STVQA/VisualGenome/1/2365664.jpg", "model_name": "BLIP2" }, { "question": "What brand is the typewriter?", "answer": "person", "gt_answers": [ "Corona" ], "image_path": "./data/STVQA/imageNet/n04264628_21334.JPEG", "model_name": "BLIP2" }, { "question": "What word is on the bottom?", "answer": "saturday", "gt_answers": [ "Saturday" ], "image_path": "./data/STVQA/imageNet/n02834397_1257.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the restaurant?", "answer": "tas", "gt_answers": [ "TAS Restaurant", "TAS" ], "image_path": "./data/STVQA/imageNet/n04081281_10634.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the restaurant?", "answer": "tas", "gt_answers": [ "TAS Restaurant", "TAS" ], "image_path": "./data/STVQA/imageNet/n04081281_10634.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the restaurant/", "answer": "tas", "gt_answers": [ "TAS" ], "image_path": "./data/STVQA/imageNet/n04081281_10634.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the neon jacket?", "answer": "i am person", "gt_answers": [ "Metropolitan Police" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000173430.jpg", "model_name": "BLIP2" }, { "question": "What insect is referenced in the passage?", "answer": "bee", "gt_answers": [ "dragonfly" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000005214.jpg", "model_name": "BLIP2" }, { "question": "What is another name for 'dragonfly' in some parts of the US?", "answer": "mosquito", "gt_answers": [ "snake feeder" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000005214.jpg", "model_name": "BLIP2" }, { "question": "Who took this picture?", "answer": "person", "gt_answers": [ "Ross Merritt Photography" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000319388.jpg", "model_name": "BLIP2" }, { "question": "What store brand is shown?", "answer": "zara", "gt_answers": [ "ZARA" ], "image_path": "./data/STVQA/icdar/img_749.jpg", "model_name": "BLIP2" }, { "question": "What car manufacturer is sponsoring this match?", "answer": "audi", "gt_answers": [ "PEUGEOT" ], "image_path": "./data/STVQA/VisualGenome/1/107946.jpg", "model_name": "BLIP2" }, { "question": "Which concourse has the train stopped at?", "answer": "concourse c", "gt_answers": [ "f" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000130749.jpg", "model_name": "BLIP2" }, { "question": "What is the left-most label on the side of the laptop in the image?", "answer": "person", "gt_answers": [ "CD-RW" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000025174.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE NAME OF THE FILE", "answer": "glass flask", "gt_answers": [ "LEVER FILE" ], "image_path": "./data/STVQA/imageNet/n02815834_3440.JPEG", "model_name": "BLIP2" }, { "question": "HOW MANY ML IS THE BIG FLASK", "answer": "250ml", "gt_answers": [ "500 ML", "500" ], "image_path": "./data/STVQA/imageNet/n02815834_3440.JPEG", "model_name": "BLIP2" }, { "question": "what is the date of this picture?", "answer": "the date is unknown", "gt_answers": [ "6 5 2005" ], "image_path": "./data/STVQA/VisualGenome/2/2217.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the airline?", "answer": "person", "gt_answers": [ "alitalia" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000078825.jpg", "model_name": "BLIP2" }, { "question": "What text is on the logo in the top left of the photo?", "answer": "topps", "gt_answers": [ "Upper deck" ], "image_path": "./data/STVQA/VisualGenome/1/2359501.jpg", "model_name": "BLIP2" }, { "question": "What name is at the bottom of the picture?", "answer": "dave rutledge", "gt_answers": [ "Dave Rutledge" ], "image_path": "./data/STVQA/VisualGenome/1/2359501.jpg", "model_name": "BLIP2" }, { "question": "What is the sign on the silver pole?", "answer": "one way", "gt_answers": [ "oNE WAY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000327628.jpg", "model_name": "BLIP2" }, { "question": "What does the front of the bus say?", "answer": "person", "gt_answers": [ "cumfybus optare eh06uny" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000177060.jpg", "model_name": "BLIP2" }, { "question": "Where is the bus going?", "answer": "to a pub", "gt_answers": [ "VIA RUFFORD TARTLETON" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000177060.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "x1", "gt_answers": [ "202" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000177060.jpg", "model_name": "BLIP2" }, { "question": "What number is on the ski lift?", "answer": "4", "gt_answers": [ "4" ], "image_path": "./data/STVQA/VisualGenome/1/1159996.jpg", "model_name": "BLIP2" }, { "question": "What it is title of this ski lift stop?", "answer": "mountain top", "gt_answers": [ "Mountain Top" ], "image_path": "./data/STVQA/VisualGenome/1/1159996.jpg", "model_name": "BLIP2" }, { "question": "What does the label read on the bottle behind the kitten?", "answer": "water bottle", "gt_answers": [ "Easy to pour Easy to store" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000297979.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "quick groceries", "gt_answers": [ "Quick Stop Groceries" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000064009.jpg", "model_name": "BLIP2" }, { "question": "What is the Brand name?", "answer": "rutland", "gt_answers": [ "Rutland" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029309.jpg", "model_name": "BLIP2" }, { "question": "What item number is this product?", "answer": "rutland", "gt_answers": [ "84" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029309.jpg", "model_name": "BLIP2" }, { "question": "What is the flavoring of the product?", "answer": "caramel peanut", "gt_answers": [ "Caramel Peanut" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000438.jpg", "model_name": "BLIP2" }, { "question": "What month is the Bird Picture for?", "answer": "january", "gt_answers": [ "January" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029620.jpg", "model_name": "BLIP2" }, { "question": "Who is the co-pilot?", "answer": "dog", "gt_answers": [ "Dog" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000282527.jpg", "model_name": "BLIP2" }, { "question": "What does the child's shirt say?", "answer": "happy birthday", "gt_answers": [ "viva" ], "image_path": "./data/STVQA/imageNet/n07615774_10799.JPEG", "model_name": "BLIP2" }, { "question": "What word is under the phone screen?", "answer": "nokia", "gt_answers": [ "Cingular" ], "image_path": "./data/STVQA/VisualGenome/2/2401225.jpg", "model_name": "BLIP2" }, { "question": "What word is listed on the wall?", "answer": "person", "gt_answers": [ "Friedas" ], "image_path": "./data/STVQA/IIIT_text/2294.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate of the bus?", "answer": "ts ts", "gt_answers": [ "BT-LR-17" ], "image_path": "./data/STVQA/VisualGenome/1/2341324.jpg", "model_name": "BLIP2" }, { "question": "WHO IS SPONSORED FOR THIS GAME?", "answer": "person", "gt_answers": [ "GE Money" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000113617.jpg", "model_name": "BLIP2" }, { "question": "What type of light is the head lamp?", "answer": "led", "gt_answers": [ "LED" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000117034.jpg", "model_name": "BLIP2" }, { "question": "What can be done on JobStreet.com?", "answer": "apply for a job", "gt_answers": [ "View jobs by salary matching." ], "image_path": "./data/STVQA/icdar/img_236.jpg", "model_name": "BLIP2" }, { "question": "What is the photo tag's first name?", "answer": "sam", "gt_answers": [ "Caitlin" ], "image_path": "./data/STVQA/imageNet/n02389026_25614.JPEG", "model_name": "BLIP2" }, { "question": "it isn't fals", "answer": "person", "gt_answers": [ "tru" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000290724.jpg", "model_name": "BLIP2" }, { "question": "Does the 4-way stop sign look bright enough?", "answer": "yes, it does", "gt_answers": [ "4-way" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000254911.jpg", "model_name": "BLIP2" }, { "question": "Can you stop here in icy conditions?", "answer": "no", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000254911.jpg", "model_name": "BLIP2" }, { "question": "Does the war on the sign make you feel uncomfortable?", "answer": "no", "gt_answers": [ "WAR" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000254911.jpg", "model_name": "BLIP2" }, { "question": "What is the bus license plate number?", "answer": "nyc bus", "gt_answers": [ "L-30523" ], "image_path": "./data/STVQA/VisualGenome/1/150434.jpg", "model_name": "BLIP2" }, { "question": "What is written on the image", "answer": "the viaduct", "gt_answers": [ "Quatrain Photo", "\ufffd\ufffd QUATRAIN PHOTO" ], "image_path": "./data/STVQA/imageNet/n04532670_663.JPEG", "model_name": "BLIP2" }, { "question": "Who is this product perfect for?", "answer": "cooks", "gt_answers": [ "Seniors & Visually Impaired" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000017164.jpg", "model_name": "BLIP2" }, { "question": "What is the main brand of water?", "answer": "minaqua", "gt_answers": [ "minaqua" ], "image_path": "./data/STVQA/imageNet/n02747177_38232.JPEG", "model_name": "BLIP2" }, { "question": "What is the text on the store front?", "answer": "santander", "gt_answers": [ "Fruites Verdures" ], "image_path": "./data/STVQA/VisualGenome/2/1795.jpg", "model_name": "BLIP2" }, { "question": "What animal is the person feeding?", "answer": "bird", "gt_answers": [ "Bird" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000279476.jpg", "model_name": "BLIP2" }, { "question": "Whats the website this image is on?", "answer": "wordpress com", "gt_answers": [ "http://nonnaluna.wordpress.com" ], "image_path": "./data/STVQA/imageNet/n04033901_3076.JPEG", "model_name": "BLIP2" }, { "question": "What is the food in the picture?", "answer": "cottage pie", "gt_answers": [ "Cottage Pie" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000004046.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street listed on the left?", "answer": "der linden", "gt_answers": [ "Unter den Linden" ], "image_path": "./data/STVQA/VisualGenome/1/2346288.jpg", "model_name": "BLIP2" }, { "question": "Which fruit is this?", "answer": "apples", "gt_answers": [ "Apple" ], "image_path": "./data/STVQA/VisualGenome/2/2404584.jpg", "model_name": "BLIP2" }, { "question": "What four numbers are on the bottom right of the picture", "answer": "1, 2, 3, 4", "gt_answers": [ "2007" ], "image_path": "./data/STVQA/imageNet/n02099601_2690.JPEG", "model_name": "BLIP2" }, { "question": "What is the engine number?", "answer": "person", "gt_answers": [ "3003" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000540769.jpg", "model_name": "BLIP2" }, { "question": "What does the bottom of the sign pointing to the left say?", "answer": "between here and there", "gt_answers": [ "between here and there" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000168905.jpg", "model_name": "BLIP2" }, { "question": "What station is located to the left?", "answer": "london bridge", "gt_answers": [ "Parson Street Staton" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000168905.jpg", "model_name": "BLIP2" }, { "question": "What website can this be found on?", "answer": "nature", "gt_answers": [ "www.naturfoto.cz" ], "image_path": "./data/STVQA/imageNet/n02486261_7976.JPEG", "model_name": "BLIP2" }, { "question": "What is the brand?", "answer": "wahl", "gt_answers": [ "Visiq" ], "image_path": "./data/STVQA/imageNet/n03483316_49740.JPEG", "model_name": "BLIP2" }, { "question": "Who makes these donoughts?", "answer": "buckeye donuts", "gt_answers": [ "Busken" ], "image_path": "./data/STVQA/VisualGenome/2/2411970.jpg", "model_name": "BLIP2" }, { "question": "What word is after have in the first line of the paper?", "answer": "have", "gt_answers": [ "arrived" ], "image_path": "./data/STVQA/VisualGenome/1/2352831.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the top sign?", "answer": "all", "gt_answers": [ "All" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000558475.jpg", "model_name": "BLIP2" }, { "question": "What is printed on the bottom right?", "answer": "person", "gt_answers": [ "ROB BIXBY 2013" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000191350.jpg", "model_name": "BLIP2" }, { "question": "What word is displayed in the top left of the image?", "answer": "baseball", "gt_answers": [ "corbis" ], "image_path": "./data/STVQA/VisualGenome/2/897.jpg", "model_name": "BLIP2" }, { "question": "What is written in blue on the train?", "answer": "aachen", "gt_answers": [ "Kusttram" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000474012.jpg", "model_name": "BLIP2" }, { "question": "What year was this photo taken?", "answer": "2009", "gt_answers": [ "2010" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000535713.jpg", "model_name": "BLIP2" }, { "question": "What kind of dinosaur is mentioned on the screen?", "answer": "stegosaurus", "gt_answers": [ "RAPTORS?", "RAPTORS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000174774.jpg", "model_name": "BLIP2" }, { "question": "What Airline is this plane for", "answer": "air india", "gt_answers": [ "Indigo" ], "image_path": "./data/STVQA/IIIT_text/img_000687.jpg", "model_name": "BLIP2" }, { "question": "What does the sign on the top right say?", "answer": "trade options", "gt_answers": [ "Travel Options" ], "image_path": "./data/STVQA/IIIT_text/img_000892.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name of the marker?", "answer": "pentel", "gt_answers": [ "Sharpie" ], "image_path": "./data/STVQA/imageNet/n06785654_4728.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the photo listed in the middle of the image?", "answer": "taytay dam", "gt_answers": [ "HoangLong", "HoangLong photo" ], "image_path": "./data/STVQA/IIIT_text/5541.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the player on the right?", "answer": "sammy sosa", "gt_answers": [ "SUZUKI" ], "image_path": "./data/STVQA/VisualGenome/1/2320124.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the player on the left?", "answer": "nine", "gt_answers": [ "16" ], "image_path": "./data/STVQA/VisualGenome/1/2320124.jpg", "model_name": "BLIP2" }, { "question": "What numbers are written on the white street sign on the left?", "answer": "no numbers", "gt_answers": [ "3100" ], "image_path": "./data/STVQA/VisualGenome/2/2406478.jpg", "model_name": "BLIP2" }, { "question": "What word is written on the white street sign on right?", "answer": "way", "gt_answers": [ "FILBERT" ], "image_path": "./data/STVQA/VisualGenome/2/2406478.jpg", "model_name": "BLIP2" }, { "question": "Where does the middle lane lead to?", "answer": "valparaiso", "gt_answers": [ "Chetumal" ], "image_path": "./data/STVQA/IIIT_text/4887.jpg", "model_name": "BLIP2" }, { "question": "What is the lowest number marked on the cup?", "answer": "1", "gt_answers": [ "50" ], "image_path": "./data/STVQA/imageNet/n02815834_13293.JPEG", "model_name": "BLIP2" }, { "question": "What word is written in the 1st picture?", "answer": "double free", "gt_answers": [ "DOUBLE" ], "image_path": "./data/STVQA/VisualGenome/1/2360656.jpg", "model_name": "BLIP2" }, { "question": "What word is written in the 2nd picture?", "answer": "double free", "gt_answers": [ "FAULT" ], "image_path": "./data/STVQA/VisualGenome/1/2360656.jpg", "model_name": "BLIP2" }, { "question": "What is the first name of the boat with green on it?", "answer": "person", "gt_answers": [ "vitoria" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000464075.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTEN IN GREEN CARPET?", "answer": "person", "gt_answers": [ "REGION AQUITAINE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000261788.jpg", "model_name": "BLIP2" }, { "question": "What's the number on the girl that is 3rd one from the left side of this picture?", "answer": "3", "gt_answers": [ "154" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000292226.jpg", "model_name": "BLIP2" }, { "question": "Which number is the bike that's 2nd in place?", "answer": "2", "gt_answers": [ "160", "166" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000292226.jpg", "model_name": "BLIP2" }, { "question": "Whats the name brand on the side of the bikers shirts?", "answer": "person", "gt_answers": [ "JAMIS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000292226.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTEN IN THE SIIDE", "answer": "possi", "gt_answers": [ "FOSSIL" ], "image_path": "./data/STVQA/imageNet/n03814906_10628.JPEG", "model_name": "BLIP2" }, { "question": "What is written in the triangle on the sign?", "answer": "lane", "gt_answers": [ "PARE" ], "image_path": "./data/STVQA/VisualGenome/1/2370029.jpg", "model_name": "BLIP2" }, { "question": "What team name is on the jerseys pictured?", "answer": "orioles", "gt_answers": [ "Orioles" ], "image_path": "./data/STVQA/VisualGenome/1/2362858.jpg", "model_name": "BLIP2" }, { "question": "What is the name printed on the jersey facing away from the viewer?", "answer": "baltimore orioles", "gt_answers": [ "Wieters" ], "image_path": "./data/STVQA/VisualGenome/1/2362858.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the jersey for Wieters?", "answer": "34", "gt_answers": [ "32" ], "image_path": "./data/STVQA/VisualGenome/1/2362858.jpg", "model_name": "BLIP2" }, { "question": "What team does the standing player play for?", "answer": "san francisco giants", "gt_answers": [ "GIANTS" ], "image_path": "./data/STVQA/VisualGenome/1/2364612.jpg", "model_name": "BLIP2" }, { "question": "What credit card company logos are displayed?", "answer": "person", "gt_answers": [ "Visa, Mastercard", "VISA MASTERCARD" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000241720.jpg", "model_name": "BLIP2" }, { "question": "What brand is the keyboard?", "answer": "alpine", "gt_answers": [ "ALIENWARE" ], "image_path": "./data/STVQA/VisualGenome/1/2336330.jpg", "model_name": "BLIP2" }, { "question": "What is the continent listed in the photo?", "answer": "europe", "gt_answers": [ "Central Europe" ], "image_path": "./data/STVQA/VisualGenome/1/2335880.jpg", "model_name": "BLIP2" }, { "question": "What price is displayed?", "answer": "$2", "gt_answers": [ "$2" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000001891.jpg", "model_name": "BLIP2" }, { "question": "How many cats are there in this photo?", "answer": "two", "gt_answers": [ "Three", "ONE" ], "image_path": "./data/STVQA/VisualGenome/1/2373100.jpg", "model_name": "BLIP2" }, { "question": "What s the name of the restaurant?", "answer": "burger king", "gt_answers": [ "Hlollabatar", "HLOLLABALAR" ], "image_path": "./data/STVQA/VisualGenome/1/2363220.jpg", "model_name": "BLIP2" }, { "question": "What beverage name is displayed in the window of the restaurant?", "answer": "coke", "gt_answers": [ "Coca Cola", "CocaCola" ], "image_path": "./data/STVQA/VisualGenome/1/2363220.jpg", "model_name": "BLIP2" }, { "question": "What airline is this?", "answer": "emirates", "gt_answers": [ "Emirates" ], "image_path": "./data/STVQA/VisualGenome/1/2355476.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the right of the photo?", "answer": "person", "gt_answers": [ "panasonic" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000211807.jpg", "model_name": "BLIP2" }, { "question": "What does is the second word on the right of the photo?", "answer": "airport", "gt_answers": [ "ideas" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000211807.jpg", "model_name": "BLIP2" }, { "question": "What is the fourth word on the right of the photo?", "answer": "airport", "gt_answers": [ "life" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000211807.jpg", "model_name": "BLIP2" }, { "question": "What is the name of this event?", "answer": "frisbee dog competition", "gt_answers": [ "K9Frisbee" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000033444.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the road on the green sign?", "answer": "las vegas blvd", "gt_answers": [ "Bullhead Pkwy" ], "image_path": "./data/STVQA/VisualGenome/1/2357157.jpg", "model_name": "BLIP2" }, { "question": "What city is written on the rock?", "answer": "los angeles", "gt_answers": [ "BULLHEAD CITY, AZ." ], "image_path": "./data/STVQA/VisualGenome/1/2357157.jpg", "model_name": "BLIP2" }, { "question": "What does the text on the top of the image say?", "answer": "make the bed every morning", "gt_answers": [ "make the bed every morning" ], "image_path": "./data/STVQA/VisualGenome/1/2348515.jpg", "model_name": "BLIP2" }, { "question": "What letter of the alphabet appears directly above this luggae?", "answer": "a", "gt_answers": [ "B" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000013868.jpg", "model_name": "BLIP2" }, { "question": "What letter is in blue?", "answer": "m", "gt_answers": [ "B" ], "image_path": "./data/STVQA/imageNet/n02823750_15022.JPEG", "model_name": "BLIP2" }, { "question": "Who is the author of these books?", "answer": "john steinbeck", "gt_answers": [ "Kay Redfield Jamison" ], "image_path": "./data/STVQA/VisualGenome/2/2409845.jpg", "model_name": "BLIP2" }, { "question": "What is the letter at the top right of the image?", "answer": "b", "gt_answers": [ "c" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000404015.jpg", "model_name": "BLIP2" }, { "question": "What is the last name at the top right of the image?", "answer": "person", "gt_answers": [ "butt" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000404015.jpg", "model_name": "BLIP2" }, { "question": "What is the first name at the top right of the image?", "answer": "person", "gt_answers": [ "william" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000404015.jpg", "model_name": "BLIP2" }, { "question": "What does the red sign say?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2353188.jpg", "model_name": "BLIP2" }, { "question": "What letter is on the hat of the children?", "answer": "c", "gt_answers": [ "J" ], "image_path": "./data/STVQA/VisualGenome/1/713299.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the team the children play on?", "answer": "little league", "gt_answers": [ "Jets" ], "image_path": "./data/STVQA/VisualGenome/1/713299.jpg", "model_name": "BLIP2" }, { "question": "What year is displayed on the plaque?", "answer": "1984", "gt_answers": [ "1981" ], "image_path": "./data/STVQA/VisualGenome/1/713299.jpg", "model_name": "BLIP2" }, { "question": "What three letters come before the word online?", "answer": "c", "gt_answers": [ "CRI" ], "image_path": "./data/STVQA/imageNet/n02837789_7191.JPEG", "model_name": "BLIP2" }, { "question": "What is the city on the box?", "answer": "los angeles", "gt_answers": [ "Gilroy" ], "image_path": "./data/STVQA/imageNet/n03127925_4283.JPEG", "model_name": "BLIP2" }, { "question": "What year did the event take place?", "answer": "2013", "gt_answers": [ "2012" ], "image_path": "./data/STVQA/VisualGenome/1/1160075.jpg", "model_name": "BLIP2" }, { "question": "What is the name at the top?", "answer": "person", "gt_answers": [ "tsonga" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000127098.jpg", "model_name": "BLIP2" }, { "question": "What does vodafone mean?", "answer": "voice over internet protocol", "gt_answers": [ "vodafone" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000014152.jpg", "model_name": "BLIP2" }, { "question": "Where can I buy a vodafone?", "answer": "in the market", "gt_answers": [ "vodafone" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000014152.jpg", "model_name": "BLIP2" }, { "question": "What is written on the sign board?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2317909.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say in the top center of the image?", "answer": "barber shop", "gt_answers": [ "EXIT" ], "image_path": "./data/STVQA/imageNet/n02791270_514.JPEG", "model_name": "BLIP2" }, { "question": "What kind of business is the building on the left?", "answer": "pharmacy", "gt_answers": [ "Hotel" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000476812.jpg", "model_name": "BLIP2" }, { "question": "what brand is the black phone?", "answer": "nokia", "gt_answers": [ "nokia" ], "image_path": "./data/STVQA/VisualGenome/1/2373870.jpg", "model_name": "BLIP2" }, { "question": "What type of fuel does this car take?", "answer": "unleaded gas", "gt_answers": [ "Unleaded Fuel" ], "image_path": "./data/STVQA/imageNet/n03841143_13177.JPEG", "model_name": "BLIP2" }, { "question": "What is the way of mexico", "answer": "to mexico", "gt_answers": [ "Bridge 1" ], "image_path": "./data/STVQA/VisualGenome/2/2406290.jpg", "model_name": "BLIP2" }, { "question": "What is the first word written on the back of the person's shirt?", "answer": "person", "gt_answers": [ "Laskar" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000060774.jpg", "model_name": "BLIP2" }, { "question": "What is closed according to the sign?", "answer": "road", "gt_answers": [ "Road" ], "image_path": "./data/STVQA/VisualGenome/1/2366167.jpg", "model_name": "BLIP2" }, { "question": "What is printed above the plane windows?", "answer": "japan airlines", "gt_answers": [ "JAPAN AIR COMMUTER" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000156895.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the jeans company in the background of the image?", "answer": "person", "gt_answers": [ "Muzu" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000392606.jpg", "model_name": "BLIP2" }, { "question": "This fire truck serves which area?", "answer": "miami beach", "gt_answers": [ "Coral Gables" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000501972.jpg", "model_name": "BLIP2" }, { "question": "What are most brands to avoid plagiarism?", "answer": "wind tunnel", "gt_answers": [ "Patented" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000017231.jpg", "model_name": "BLIP2" }, { "question": "What type of books are indicated by the red sign in the top left of the image?", "answer": "fiction", "gt_answers": [ "NON-FICTION" ], "image_path": "./data/STVQA/VisualGenome/1/2368062.jpg", "model_name": "BLIP2" }, { "question": "What is being cut?", "answer": "pizza", "gt_answers": [ "Pizza" ], "image_path": "./data/STVQA/VisualGenome/1/2355998.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS TIME?", "answer": "time is the passage of events", "gt_answers": [ "3:20" ], "image_path": "./data/STVQA/VisualGenome/1/2346449.jpg", "model_name": "BLIP2" }, { "question": "What business name is on the top of the building?", "answer": "life insurance", "gt_answers": [ "MetLife" ], "image_path": "./data/STVQA/IIIT_text/1650.jpg", "model_name": "BLIP2" }, { "question": "What is written on the bottom of the gas pump?", "answer": "ohio", "gt_answers": [ "GWAY" ], "image_path": "./data/STVQA/imageNet/n03425413_22107.JPEG", "model_name": "BLIP2" }, { "question": "what does this sign say?", "answer": "you've arrived", "gt_answers": [ "you've arrived" ], "image_path": "./data/STVQA/VisualGenome/2/2412562.jpg", "model_name": "BLIP2" }, { "question": "What does the bus say above the door?", "answer": "free nights", "gt_answers": [ "Metroline" ], "image_path": "./data/STVQA/VisualGenome/2/3078.jpg", "model_name": "BLIP2" }, { "question": "What does the destination on the bus say?", "answer": "free night bus", "gt_answers": [ "205 via Kings Cross", "Kings Cross" ], "image_path": "./data/STVQA/VisualGenome/2/3078.jpg", "model_name": "BLIP2" }, { "question": "What is the active ingredient?", "answer": "oxybenzone", "gt_answers": [ "aluminum zirconium trichlorohydrex Gly (20%) anhydrous", "aluminum zirconium trichlorohydrex" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030844.jpg", "model_name": "BLIP2" }, { "question": "What is the phone number?", "answer": "the phone number is 1-888-888-888", "gt_answers": [ "1-800-964-1947" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030844.jpg", "model_name": "BLIP2" }, { "question": "What is the train number?", "answer": "no train number", "gt_answers": [ "055 05995" ], "image_path": "./data/STVQA/VisualGenome/1/2373838.jpg", "model_name": "BLIP2" }, { "question": "What street is this on?", "answer": "santa clara street", "gt_answers": [ "Victoria St" ], "image_path": "./data/STVQA/VisualGenome/2/2408482.jpg", "model_name": "BLIP2" }, { "question": "What does the tail of the airplane say?", "answer": "jetblue", "gt_answers": [ "jetBlue" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000551214.jpg", "model_name": "BLIP2" }, { "question": "What fruit shape is the clock?", "answer": "apple", "gt_answers": [ "Apple" ], "image_path": "./data/STVQA/VisualGenome/1/2335640.jpg", "model_name": "BLIP2" }, { "question": "Whats the school days calendar for?", "answer": "teachers", "gt_answers": [ "LESSONS & CHALLENGES for Teachers" ], "image_path": "./data/STVQA/VisualGenome/1/2335640.jpg", "model_name": "BLIP2" }, { "question": "What times are taped on the machine?", "answer": "tokyo subway times", "gt_answers": [ "8:00am-11:00am" ], "image_path": "./data/STVQA/imageNet/n04525305_1740.JPEG", "model_name": "BLIP2" }, { "question": "What country is written on the book?", "answer": "japan", "gt_answers": [ "JAPAN" ], "image_path": "./data/STVQA/imageNet/n03179701_5399.JPEG", "model_name": "BLIP2" }, { "question": "What company name is on the Jumbo-Sized TV remote?", "answer": "person", "gt_answers": [ "INNOVAGE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000259104.jpg", "model_name": "BLIP2" }, { "question": "THe LITEON TV remote is located between which two remotes?", "answer": "person and person", "gt_answers": [ "PIONEER TOSHIBA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000259104.jpg", "model_name": "BLIP2" }, { "question": "Who is the photographer who took this photo?", "answer": "person", "gt_answers": [ "Mani Babbar" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000466288.jpg", "model_name": "BLIP2" }, { "question": "What is the world's third highest pass called?", "answer": "khardung la", "gt_answers": [ "Chang La" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000466288.jpg", "model_name": "BLIP2" }, { "question": "What letter comes after the apostrophe?", "answer": "z", "gt_answers": [ "S" ], "image_path": "./data/STVQA/imageNet/n03690938_2209.JPEG", "model_name": "BLIP2" }, { "question": "What business is this person eating at?", "answer": "donuts", "gt_answers": [ "DUNKIN DONUTS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000010579.jpg", "model_name": "BLIP2" }, { "question": "What type of sandwich does the bag advertise?", "answer": "donut", "gt_answers": [ "CHICKEN" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000010579.jpg", "model_name": "BLIP2" }, { "question": "What animal is this?", "answer": "skunk", "gt_answers": [ "Skunk" ], "image_path": "./data/STVQA/imageNet/n02445715_144.JPEG", "model_name": "BLIP2" }, { "question": "What company is this photo for?", "answer": "subway", "gt_answers": [ "Nathan's" ], "image_path": "./data/STVQA/VisualGenome/1/2322772.jpg", "model_name": "BLIP2" }, { "question": "What are the big black words on the sign?", "answer": "bus stop", "gt_answers": [ "BUS STOP" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000038435.jpg", "model_name": "BLIP2" }, { "question": "What brand is the ski helmet?", "answer": "person", "gt_answers": [ "Uvex" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000406445.jpg", "model_name": "BLIP2" }, { "question": "What street name is pictured?", "answer": "king street", "gt_answers": [ "Garfield Avenue" ], "image_path": "./data/STVQA/VisualGenome/2/2403690.jpg", "model_name": "BLIP2" }, { "question": "What restaurant is advertised at the bottom of this picture?", "answer": "person", "gt_answers": [ "Taco Bell" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000532355.jpg", "model_name": "BLIP2" }, { "question": "What type of photography is Purple Martini?", "answer": "wedding photography", "gt_answers": [ "Wedding" ], "image_path": "./data/STVQA/IIIT_text/7684.jpg", "model_name": "BLIP2" }, { "question": "what date does the sign say?", "answer": "june 15", "gt_answers": [ "june 13-17 2006", "June 13 - 17, 2006" ], "image_path": "./data/STVQA/imageNet/n02100877_2338.JPEG", "model_name": "BLIP2" }, { "question": "Red sign found on a intersection", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2337559.jpg", "model_name": "BLIP2" }, { "question": "What does the man's shirt say?", "answer": "cash", "gt_answers": [ "Cash" ], "image_path": "./data/STVQA/VisualGenome/1/2326418.jpg", "model_name": "BLIP2" }, { "question": "What brand appears to to be sponsored in the background?", "answer": "volvo", "gt_answers": [ "Fella" ], "image_path": "./data/STVQA/imageNet/n03496892_10844.JPEG", "model_name": "BLIP2" }, { "question": "What brand is the tractor?", "answer": "fendt", "gt_answers": [ "MCCORMICK" ], "image_path": "./data/STVQA/imageNet/n03496892_10844.JPEG", "model_name": "BLIP2" }, { "question": "What is the speed limit for a left turn?", "answer": "30 mph", "gt_answers": [ "25 MPH", "25" ], "image_path": "./data/STVQA/VisualGenome/1/2353013.jpg", "model_name": "BLIP2" }, { "question": "What are the last four letters of produce?", "answer": "person", "gt_answers": [ "duce" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000462472.jpg", "model_name": "BLIP2" }, { "question": "Which beer brand is being advertised above the two Coca-Cola advertisements?", "answer": "person", "gt_answers": [ "COORS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000462472.jpg", "model_name": "BLIP2" }, { "question": "What is the first word at the top of the right most advertisement on the top row?", "answer": "person", "gt_answers": [ "lucky" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000462472.jpg", "model_name": "BLIP2" }, { "question": "What does this shirt represent", "answer": "abvi goodwill", "gt_answers": [ "Goodwill" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000692.jpg", "model_name": "BLIP2" }, { "question": "What does he need donations for?", "answer": "he is homeless", "gt_answers": [ "nice dinner" ], "image_path": "./data/STVQA/VisualGenome/1/2375759.jpg", "model_name": "BLIP2" }, { "question": "Which fruit is depicted in this picture?", "answer": "banana", "gt_answers": [ "Banana" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000462736.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate on the bus?", "answer": "dmc 005", "gt_answers": [ "LT02 ZDR" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000373653.jpg", "model_name": "BLIP2" }, { "question": "What is the destination of the bus?", "answer": "manchester", "gt_answers": [ "Manchester" ], "image_path": "./data/STVQA/VisualGenome/1/2363613.jpg", "model_name": "BLIP2" }, { "question": "what is the number of the bus?", "answer": "mta bus", "gt_answers": [ "10" ], "image_path": "./data/STVQA/VisualGenome/1/2363613.jpg", "model_name": "BLIP2" }, { "question": "What kind of gel is the Ice Cold product", "answer": "epsom salt", "gt_answers": [ "Analgesic" ], "image_path": "./data/STVQA/imageNet/n03742115_6488.JPEG", "model_name": "BLIP2" }, { "question": "Does this boat make you happy?", "answer": "yes", "gt_answers": [ "HAPPY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000400275.jpg", "model_name": "BLIP2" }, { "question": "What sport is being played?", "answer": "tennis", "gt_answers": [ "Tennis" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000201236.jpg", "model_name": "BLIP2" }, { "question": "What is on the bus?", "answer": "people", "gt_answers": [ "Tata" ], "image_path": "./data/STVQA/VisualGenome/2/2417372.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the name of the bridge?", "answer": "hudson river bridge", "gt_answers": [ "Lions Gate", "Lions Gate Bridge" ], "image_path": "./data/STVQA/VisualGenome/1/2336569.jpg", "model_name": "BLIP2" }, { "question": "What brand is the ping-pong table?", "answer": "table tennis table", "gt_answers": [ "sponeta" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000161028.jpg", "model_name": "BLIP2" }, { "question": "what kind of establishment is this?", "answer": "restaurant", "gt_answers": [ "restaurant" ], "image_path": "./data/STVQA/VisualGenome/1/2372333.jpg", "model_name": "BLIP2" }, { "question": "What is the car's license plate?", "answer": "mcfly", "gt_answers": [ "3XHE872" ], "image_path": "./data/STVQA/VisualGenome/1/2326312.jpg", "model_name": "BLIP2" }, { "question": "What is the middle school bus's number?", "answer": "a yellow school bus", "gt_answers": [ "566" ], "image_path": "./data/STVQA/VisualGenome/1/150399.jpg", "model_name": "BLIP2" }, { "question": "What kink of bus is shown?", "answer": "school bus", "gt_answers": [ "School bus" ], "image_path": "./data/STVQA/VisualGenome/1/150399.jpg", "model_name": "BLIP2" }, { "question": "What is the name of this street?", "answer": "cedar street", "gt_answers": [ "Cedar Street", "CEDAR" ], "image_path": "./data/STVQA/VisualGenome/2/2406371.jpg", "model_name": "BLIP2" }, { "question": "What is written on this green sign board?", "answer": "ceara street", "gt_answers": [ "Cedar Street" ], "image_path": "./data/STVQA/VisualGenome/2/2406371.jpg", "model_name": "BLIP2" }, { "question": "What is the name of this cereal?", "answer": "kashi", "gt_answers": [ "Special K" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016496.jpg", "model_name": "BLIP2" }, { "question": "What store name is on the bus?", "answer": "person", "gt_answers": [ "The Honesty Shop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000272218.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the bus?", "answer": "no number", "gt_answers": [ "30" ], "image_path": "./data/STVQA/VisualGenome/1/2358556.jpg", "model_name": "BLIP2" }, { "question": "What number is found on the player wearing red", "answer": "person", "gt_answers": [ "17" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000129722.jpg", "model_name": "BLIP2" }, { "question": "What number is the player to the furthest left wearing?", "answer": "person", "gt_answers": [ "27" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000129722.jpg", "model_name": "BLIP2" }, { "question": "What number is on the top left?", "answer": "2", "gt_answers": [ "+30" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000379159.jpg", "model_name": "BLIP2" }, { "question": "What number is center bottom?", "answer": "two", "gt_answers": [ "0" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000379159.jpg", "model_name": "BLIP2" }, { "question": "What is printed at the bottom left of the image?", "answer": "ski lift", "gt_answers": [ "2012 Jerimiah Fulton", "\ufffd\ufffd 2012 Jerimiah Fulton" ], "image_path": "./data/STVQA/VisualGenome/2/2417658.jpg", "model_name": "BLIP2" }, { "question": "What tour bus is this?", "answer": "official tour bus", "gt_answers": [ "City Sightseeing" ], "image_path": "./data/STVQA/VisualGenome/2/2415679.jpg", "model_name": "BLIP2" }, { "question": "What does it say under the clock?", "answer": "faceable", "gt_answers": [ "Rolex" ], "image_path": "./data/STVQA/VisualGenome/1/2325104.jpg", "model_name": "BLIP2" }, { "question": "Which company is written on the watermark?", "answer": "adidas", "gt_answers": [ "PanArmenian Photo" ], "image_path": "./data/STVQA/VisualGenome/1/2360520.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE NAME OF FUEL STATION?", "answer": "shell", "gt_answers": [ "INDIANOIL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000330572.jpg", "model_name": "BLIP2" }, { "question": "What kind of guitar is in this image?", "answer": "acoustic guitar", "gt_answers": [ "ACOUSTIC" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000011978.jpg", "model_name": "BLIP2" }, { "question": "What is on the shirt of the man behind the fence?", "answer": "person", "gt_answers": [ "FLYING" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000502517.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the red sign?", "answer": "4", "gt_answers": [ "257" ], "image_path": "./data/STVQA/VisualGenome/1/2353917.jpg", "model_name": "BLIP2" }, { "question": "What number is after the R on the poster", "answer": "4", "gt_answers": [ "6" ], "image_path": "./data/STVQA/icdar/img_269.jpg", "model_name": "BLIP2" }, { "question": "What color is the Slimming lettering", "answer": "black", "gt_answers": [ "white" ], "image_path": "./data/STVQA/icdar/img_269.jpg", "model_name": "BLIP2" }, { "question": "What word is spray painted on the red sign?", "answer": "hammer time", "gt_answers": [ "HAMMERTIME" ], "image_path": "./data/STVQA/VisualGenome/1/2325097.jpg", "model_name": "BLIP2" }, { "question": "What does it says in the banner?", "answer": "grand slam return", "gt_answers": [ "Grand Sla return" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000465090.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the airline?", "answer": "delta airlines", "gt_answers": [ "Delta" ], "image_path": "./data/STVQA/VisualGenome/1/2373278.jpg", "model_name": "BLIP2" }, { "question": "what is the number of the plane?", "answer": "2", "gt_answers": [ "N9488P" ], "image_path": "./data/STVQA/VisualGenome/1/2325496.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the mountain on the red sticker?", "answer": "mont blanc", "gt_answers": [ "Mt. Fuji" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000211040.jpg", "model_name": "BLIP2" }, { "question": "What is the year written on the tall boy's sweater?", "answer": "2010", "gt_answers": [ "1975" ], "image_path": "./data/STVQA/VisualGenome/1/2346451.jpg", "model_name": "BLIP2" }, { "question": "What does the green sign say?", "answer": "exit", "gt_answers": [ "exit" ], "image_path": "./data/STVQA/icdar/img_210.jpg", "model_name": "BLIP2" }, { "question": "What company owns this jet?", "answer": "airline", "gt_answers": [ "air canada" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000056065.jpg", "model_name": "BLIP2" }, { "question": "What does the sign on the truck say?", "answer": "eat plants", "gt_answers": [ "GERANIUMS" ], "image_path": "./data/STVQA/VisualGenome/1/2337133.jpg", "model_name": "BLIP2" }, { "question": "What is the license number?", "answer": "tb tb", "gt_answers": [ "ao-068-kh", "A0 068 KH" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000403263.jpg", "model_name": "BLIP2" }, { "question": "What name is at the top of the sign?", "answer": "person", "gt_answers": [ "global ENGLISH SCHOOL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000011697.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the bus company?", "answer": "andrews tours", "gt_answers": [ "Andesmar" ], "image_path": "./data/STVQA/VisualGenome/1/2321275.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "anderson", "gt_answers": [ "259" ], "image_path": "./data/STVQA/VisualGenome/1/2321275.jpg", "model_name": "BLIP2" }, { "question": "How can you contact this company?", "answer": "they have a website", "gt_answers": [ "www.andesmar.com" ], "image_path": "./data/STVQA/VisualGenome/1/2321275.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the rider?", "answer": "person", "gt_answers": [ "G.MADEHO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000153634.jpg", "model_name": "BLIP2" }, { "question": "what name is on the jockys pants?", "answer": "person", "gt_answers": [ "G.MADEHO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000153634.jpg", "model_name": "BLIP2" }, { "question": "What is the name written on the birthday cake?", "answer": "happy birthday", "gt_answers": [ "Lizzie" ], "image_path": "./data/STVQA/VisualGenome/1/2317178.jpg", "model_name": "BLIP2" }, { "question": "What number on the building?", "answer": "4", "gt_answers": [ "33" ], "image_path": "./data/STVQA/IIIT_text/3340.jpg", "model_name": "BLIP2" }, { "question": "What was the street name written?", "answer": "williams st", "gt_answers": [ "WILLIS ST" ], "image_path": "./data/STVQA/VisualGenome/1/2317750.jpg", "model_name": "BLIP2" }, { "question": "What is indicated in the red sign board?", "answer": "cable car", "gt_answers": [ "CABLE CAR" ], "image_path": "./data/STVQA/VisualGenome/1/2317750.jpg", "model_name": "BLIP2" }, { "question": "What is written in the wall besides sign board?", "answer": "cable car", "gt_answers": [ "WATCHES" ], "image_path": "./data/STVQA/VisualGenome/1/2317750.jpg", "model_name": "BLIP2" }, { "question": "What is the sign protesting?", "answer": "london congestion charge", "gt_answers": [ "bike PARKING TAX" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000254176.jpg", "model_name": "BLIP2" }, { "question": "What name is listed in the sidecar windshield?", "answer": "person", "gt_answers": [ "IVAN" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000254176.jpg", "model_name": "BLIP2" }, { "question": "What company name is shown ion the black background building?", "answer": "person", "gt_answers": [ "T.M.LEW" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000254176.jpg", "model_name": "BLIP2" }, { "question": "What is straight ahead?", "answer": "durum stockund", "gt_answers": [ "DJURSHOLM STOCKSUND" ], "image_path": "./data/STVQA/VisualGenome/1/2346766.jpg", "model_name": "BLIP2" }, { "question": "How much is the tea advertised on the bus?", "answer": "50p", "gt_answers": [ "99" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000459082.jpg", "model_name": "BLIP2" }, { "question": "How many people are wearing hats?", "answer": "two", "gt_answers": [ "Two", "{}" ], "image_path": "./data/STVQA/VisualGenome/1/2324607.jpg", "model_name": "BLIP2" }, { "question": "What number can be seen on top?", "answer": "4", "gt_answers": [ "50" ], "image_path": "./data/STVQA/imageNet/n03075370_3057.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the street printed on the sign?", "answer": "wooster street", "gt_answers": [ "WOOSTER" ], "image_path": "./data/STVQA/VisualGenome/1/2319254.jpg", "model_name": "BLIP2" }, { "question": "What is the first name of the person listed at the bottom of the photo?", "answer": "john", "gt_answers": [ "Jim" ], "image_path": "./data/STVQA/imageNet/n01608432_5084.JPEG", "model_name": "BLIP2" }, { "question": "What number is on the umpires sleeve?", "answer": "3", "gt_answers": [ "58", "52" ], "image_path": "./data/STVQA/VisualGenome/1/2368873.jpg", "model_name": "BLIP2" }, { "question": "What is the last word on the bottom?", "answer": "person", "gt_answers": [ "paribas" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000177957.jpg", "model_name": "BLIP2" }, { "question": "What is the blue bus number?", "answer": "3", "gt_answers": [ "9508" ], "image_path": "./data/STVQA/VisualGenome/1/2368476.jpg", "model_name": "BLIP2" }, { "question": "Is it day or night?", "answer": "night", "gt_answers": [ "Night" ], "image_path": "./data/STVQA/IIIT_text/16.jpg", "model_name": "BLIP2" }, { "question": "What is the number below the windshield on the bus closest to you in the image.", "answer": "3", "gt_answers": [ "RB-4", "4" ], "image_path": "./data/STVQA/VisualGenome/1/2327747.jpg", "model_name": "BLIP2" }, { "question": "What is the name under books on the wall?", "answer": "a bookstore", "gt_answers": [ "Kinokuniya" ], "image_path": "./data/STVQA/icdar/test_img_44.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store?", "answer": "kobo", "gt_answers": [ "Kinokuniya" ], "image_path": "./data/STVQA/icdar/test_img_44.jpg", "model_name": "BLIP2" }, { "question": "who is this a memorial too?", "answer": "victoria wash", "gt_answers": [ "hiram bingham" ], "image_path": "./data/STVQA/imageNet/n02892201_14020.JPEG", "model_name": "BLIP2" }, { "question": "What is the date of this event?", "answer": "4 4", "gt_answers": [ "JULY 4TH 2009" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000345417.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the blender?", "answer": "person", "gt_answers": [ "Blendtec home" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000498409.jpg", "model_name": "BLIP2" }, { "question": "what does the lable to the left say?", "answer": "ovens", "gt_answers": [ "The Wu" ], "image_path": "./data/STVQA/VisualGenome/1/2376878.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the cinema?", "answer": "wilshire", "gt_answers": [ "Wilshire" ], "image_path": "./data/STVQA/imageNet/n03032252_28442.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the top of the sign?", "answer": "wilshire", "gt_answers": [ "RIGHT AT YOUR DOOR" ], "image_path": "./data/STVQA/imageNet/n03032252_28442.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the bottom of the sign?", "answer": "death at the door", "gt_answers": [ "DEATH AT A FUNERAL" ], "image_path": "./data/STVQA/imageNet/n03032252_28442.JPEG", "model_name": "BLIP2" }, { "question": "What is the full word seen in the background on the photo to the left?", "answer": "person", "gt_answers": [ "owls" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000335660.jpg", "model_name": "BLIP2" }, { "question": "What is the full word seen in the background on the photo to the right?", "answer": "person", "gt_answers": [ "owls" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000335660.jpg", "model_name": "BLIP2" }, { "question": "What drink is in the mug?", "answer": "coffee", "gt_answers": [ "Coffee" ], "image_path": "./data/STVQA/imageNet/n03085013_19707.JPEG", "model_name": "BLIP2" }, { "question": "What is flying towards the man?", "answer": "a tennis ball", "gt_answers": [ "Tennis ball" ], "image_path": "./data/STVQA/VisualGenome/1/2327108.jpg", "model_name": "BLIP2" }, { "question": "What does the T-shirt say?", "answer": "lego party", "gt_answers": [ "Jr's Garbage" ], "image_path": "./data/STVQA/VisualGenome/1/2345876.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the shirt?", "answer": "happy", "gt_answers": [ "Jr's" ], "image_path": "./data/STVQA/VisualGenome/1/2345876.jpg", "model_name": "BLIP2" }, { "question": "Mention any one name written on the sticks", "answer": "the name of a baseball player", "gt_answers": [ "Wilson" ], "image_path": "./data/STVQA/VisualGenome/1/2362657.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name of the mobile phone?", "answer": "nokia", "gt_answers": [ "NOKIA" ], "image_path": "./data/STVQA/imageNet/n03777754_2068.JPEG", "model_name": "BLIP2" }, { "question": "What is the capital letter on the second word of the store's name?", "answer": "saloon", "gt_answers": [ "S" ], "image_path": "./data/STVQA/VisualGenome/1/2367520.jpg", "model_name": "BLIP2" }, { "question": "What part of the military does this plane belong to?", "answer": "navy", "gt_answers": [ "NAVY" ], "image_path": "./data/STVQA/imageNet/n04552348_1274.JPEG", "model_name": "BLIP2" }, { "question": "What does the red sign say?", "answer": "stop sign", "gt_answers": [ "Stop testan" ], "image_path": "./data/STVQA/VisualGenome/1/2323335.jpg", "model_name": "BLIP2" }, { "question": "What bank is advertised in the dug out?", "answer": "bank of america", "gt_answers": [ "Bank of America" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000500962.jpg", "model_name": "BLIP2" }, { "question": "What does the sign on the t-shirt say?", "answer": "warning", "gt_answers": [ "Warning", "warning !" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016502.jpg", "model_name": "BLIP2" }, { "question": "what does the sign say?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2329324.jpg", "model_name": "BLIP2" }, { "question": "How many players are there whose feet are completely off the ground?", "answer": "two", "gt_answers": [ "two" ], "image_path": "./data/STVQA/VisualGenome/1/2337457.jpg", "model_name": "BLIP2" }, { "question": "What does the green sign say?", "answer": "exit", "gt_answers": [ "Exit" ], "image_path": "./data/STVQA/icdar/img_131.jpg", "model_name": "BLIP2" }, { "question": "What number is on the train?", "answer": "person", "gt_answers": [ "31233" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000135086.jpg", "model_name": "BLIP2" }, { "question": "What is this bus number?", "answer": "x1", "gt_answers": [ "38" ], "image_path": "./data/STVQA/VisualGenome/1/2350689.jpg", "model_name": "BLIP2" }, { "question": "Where is the bus going?", "answer": "london", "gt_answers": [ "Victoria" ], "image_path": "./data/STVQA/VisualGenome/1/2350689.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number of the bus?", "answer": "london", "gt_answers": [ "LJ59GUA", "LJ59 GUA" ], "image_path": "./data/STVQA/VisualGenome/1/2350689.jpg", "model_name": "BLIP2" }, { "question": "What does the white vase say?", "answer": "love", "gt_answers": [ "Love" ], "image_path": "./data/STVQA/VisualGenome/1/2333944.jpg", "model_name": "BLIP2" }, { "question": "What signis posted on the freezer door?", "answer": "private property", "gt_answers": [ "Private Property No Trespassing" ], "image_path": "./data/STVQA/VisualGenome/1/2353516.jpg", "model_name": "BLIP2" }, { "question": "What brand is the lens caps?", "answer": "nikon", "gt_answers": [ "Canon" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000523684.jpg", "model_name": "BLIP2" }, { "question": "What is the word written on the top of the remote control?", "answer": "wii", "gt_answers": [ "Power" ], "image_path": "./data/STVQA/VisualGenome/1/2333488.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the dirtbike?", "answer": "person", "gt_answers": [ "1" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000217094.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the oven?", "answer": "an oven", "gt_answers": [ "Vulcan" ], "image_path": "./data/STVQA/VisualGenome/1/2374883.jpg", "model_name": "BLIP2" }, { "question": "What is written in the upper left corner of this photo?", "answer": "skateboarding", "gt_answers": [ "MyProfe" ], "image_path": "./data/STVQA/VisualGenome/1/2323685.jpg", "model_name": "BLIP2" }, { "question": "What does the text on the plane say?", "answer": "indigo", "gt_answers": [ "indigo" ], "image_path": "./data/STVQA/IIIT_text/img_000680.jpg", "model_name": "BLIP2" }, { "question": "What number is on the green sign in the upper left?", "answer": "4", "gt_answers": [ "20" ], "image_path": "./data/STVQA/VisualGenome/2/2402386.jpg", "model_name": "BLIP2" }, { "question": "What is the jersey number?", "answer": "7", "gt_answers": [ "7" ], "image_path": "./data/STVQA/imageNet/n09835506_19202.JPEG", "model_name": "BLIP2" }, { "question": "What is written in the crosswalk sign?", "answer": "crosswalk", "gt_answers": [ "DONT WALK" ], "image_path": "./data/STVQA/VisualGenome/1/2319154.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the website that this photo is from according to the bottom corner?", "answer": "person", "gt_answers": [ "stpaulphotos.com" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000517510.jpg", "model_name": "BLIP2" }, { "question": "What is the website?", "answer": "the website", "gt_answers": [ "www.shutterstock.com" ], "image_path": "./data/STVQA/imageNet/n07714571_13102.JPEG", "model_name": "BLIP2" }, { "question": "What words are located on the umbrella?", "answer": "bar a ristorante", "gt_answers": [ "BAR & RISTORANTE" ], "image_path": "./data/STVQA/IIIT_text/1243.jpg", "model_name": "BLIP2" }, { "question": "What is the first character on the bus's licence plate?", "answer": "person", "gt_answers": [ "H" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000529470.jpg", "model_name": "BLIP2" }, { "question": "What words are printed on the poster?", "answer": "i love you", "gt_answers": [ "rom project" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000387517.jpg", "model_name": "BLIP2" }, { "question": "What is written on Green board", "answer": "welcome to motel", "gt_answers": [ "WESTWARD HO MOTEL" ], "image_path": "./data/STVQA/VisualGenome/1/2351754.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the bus?", "answer": "no number", "gt_answers": [ "335" ], "image_path": "./data/STVQA/VisualGenome/1/2353849.jpg", "model_name": "BLIP2" }, { "question": "What hotel is being shown", "answer": "marriott marriott marriott", "gt_answers": [ "Marriott" ], "image_path": "./data/STVQA/IIIT_text/img_000726.jpg", "model_name": "BLIP2" }, { "question": "What does that blue sign say?", "answer": "a tennis court", "gt_answers": [ "POLO" ], "image_path": "./data/STVQA/VisualGenome/1/2373707.jpg", "model_name": "BLIP2" }, { "question": "What emergency service is the van used for?", "answer": "police", "gt_answers": [ "Police" ], "image_path": "./data/STVQA/imageNet/n03977966_5355.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the airline written on the plane?", "answer": "person", "gt_answers": [ "Jetblue." ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000186740.jpg", "model_name": "BLIP2" }, { "question": "What game system is the box for?", "answer": "wii", "gt_answers": [ "Wii" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000459819.jpg", "model_name": "BLIP2" }, { "question": "What game is included in the box?", "answer": "wii sports", "gt_answers": [ "Wii Sports" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000459819.jpg", "model_name": "BLIP2" }, { "question": "What does Perry's Place serve?", "answer": "food and soup", "gt_answers": [ "FOOD & SOUP" ], "image_path": "./data/STVQA/VisualGenome/1/2363173.jpg", "model_name": "BLIP2" }, { "question": "What is the red street sign instructing you to do?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2363173.jpg", "model_name": "BLIP2" }, { "question": "Who is up at bat?", "answer": "a baseball player", "gt_answers": [ "Paulino" ], "image_path": "./data/STVQA/imageNet/n04149813_5234.JPEG", "model_name": "BLIP2" }, { "question": "What team is in the field?", "answer": "the texans", "gt_answers": [ "Mariners" ], "image_path": "./data/STVQA/imageNet/n04149813_5234.JPEG", "model_name": "BLIP2" }, { "question": "What team is batting?", "answer": "the phillies", "gt_answers": [ "Pirates" ], "image_path": "./data/STVQA/imageNet/n04149813_5234.JPEG", "model_name": "BLIP2" }, { "question": "What does it say on the closest pizza box in green letters?", "answer": "pizza", "gt_answers": [ "HOME OF POKEY STIX!!" ], "image_path": "./data/STVQA/VisualGenome/1/150340.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company in the building?", "answer": "steelcase", "gt_answers": [ "Chicago Sun-Times" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000356615.jpg", "model_name": "BLIP2" }, { "question": "Where is Jennifer in 2011?", "answer": "in japan", "gt_answers": [ "Kusatsu" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000222913.jpg", "model_name": "BLIP2" }, { "question": "Where is the brown sign on the left pointing to?", "answer": "smith's market inc", "gt_answers": [ "Dr Eugene Clark Library" ], "image_path": "./data/STVQA/VisualGenome/1/713539.jpg", "model_name": "BLIP2" }, { "question": "Who's market is being advertised here?", "answer": "smith's market", "gt_answers": [ "Smitty's" ], "image_path": "./data/STVQA/VisualGenome/1/713539.jpg", "model_name": "BLIP2" }, { "question": "What type of food in Smitty's is Fresh choice?", "answer": "fast food", "gt_answers": [ "Meat" ], "image_path": "./data/STVQA/VisualGenome/1/713539.jpg", "model_name": "BLIP2" }, { "question": "What company is this airplane from?", "answer": "klm airlines", "gt_answers": [ "KLM Asia" ], "image_path": "./data/STVQA/VisualGenome/1/1592121.jpg", "model_name": "BLIP2" }, { "question": "What number is on the front of the bus below the windshield?", "answer": "77", "gt_answers": [ "2178" ], "image_path": "./data/STVQA/VisualGenome/1/2318718.jpg", "model_name": "BLIP2" }, { "question": "What number in orange is above the windshield?", "answer": "3", "gt_answers": [ "19" ], "image_path": "./data/STVQA/VisualGenome/1/2318718.jpg", "model_name": "BLIP2" }, { "question": "Where is the bus going?", "answer": "to work", "gt_answers": [ "METROTOWN STN" ], "image_path": "./data/STVQA/VisualGenome/1/2318718.jpg", "model_name": "BLIP2" }, { "question": "What time of day - AM or PM - is next to the barcode?", "answer": "am", "gt_answers": [ "AM" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013428.jpg", "model_name": "BLIP2" }, { "question": "What is the likely full text of the large text in the reddish orange area below?", "answer": "value great savings pack", "gt_answers": [ "Valu Great Savin Pac" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013428.jpg", "model_name": "BLIP2" }, { "question": "What brand of cell phone is on the table?", "answer": "blackberry", "gt_answers": [ "SAMSUNG" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000081031.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street?", "answer": "king street", "gt_answers": [ "SPRING ST" ], "image_path": "./data/STVQA/VisualGenome/1/285738.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the venue?", "answer": "reel big fish", "gt_answers": [ "Theater Living Arts" ], "image_path": "./data/STVQA/imageNet/n03032252_70433.JPEG", "model_name": "BLIP2" }, { "question": "What band is playing thursday?", "answer": "the big fish", "gt_answers": [ "REEL BIG FISH" ], "image_path": "./data/STVQA/imageNet/n03032252_70433.JPEG", "model_name": "BLIP2" }, { "question": "What are colours of device?", "answer": "black", "gt_answers": [ "black", "{}" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000264406.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the phone?", "answer": "person", "gt_answers": [ "Samsung" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000264406.jpg", "model_name": "BLIP2" }, { "question": "What company owns the bus?", "answer": "taipei metro", "gt_answers": [ "Turis Tour", "TurisTour" ], "image_path": "./data/STVQA/VisualGenome/1/2362511.jpg", "model_name": "BLIP2" }, { "question": "What fruit is being advertised?", "answer": "bananas", "gt_answers": [ "Banana", "bananas" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000422705.jpg", "model_name": "BLIP2" }, { "question": "What watermark is displayed at the top?", "answer": "istockphoto", "gt_answers": [ "gettyimages" ], "image_path": "./data/STVQA/imageNet/n03787032_344.JPEG", "model_name": "BLIP2" }, { "question": "Who is on the cover of the magazine?", "answer": "superman", "gt_answers": [ "Superman" ], "image_path": "./data/STVQA/imageNet/n06596364_1137.JPEG", "model_name": "BLIP2" }, { "question": "Who is the magazine publisher?", "answer": "time inc", "gt_answers": [ "Time" ], "image_path": "./data/STVQA/imageNet/n06596364_1137.JPEG", "model_name": "BLIP2" }, { "question": "What is written in blue board", "answer": "it is a parade", "gt_answers": [ "PLAZA" ], "image_path": "./data/STVQA/VisualGenome/1/2330939.jpg", "model_name": "BLIP2" }, { "question": "What is the time limit?", "answer": "60 minutes", "gt_answers": [ "TWO HOUR" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000057308.jpg", "model_name": "BLIP2" }, { "question": "What year was this picture taken?", "answer": "2015", "gt_answers": [ "2011" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000337780.jpg", "model_name": "BLIP2" }, { "question": "What words are displayed inside the green sign at the top right of the image?", "answer": "grizzly bear", "gt_answers": [ "Bear and Wolf Museum" ], "image_path": "./data/STVQA/VisualGenome/2/2406383.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the sculpture?", "answer": "person", "gt_answers": [ "Ecalite" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000396977.jpg", "model_name": "BLIP2" }, { "question": "what country is this police station in?", "answer": "india", "gt_answers": [ "MUMB" ], "image_path": "./data/STVQA/IIIT_text/img_000938.jpg", "model_name": "BLIP2" }, { "question": "What letter is on the baseball cap?", "answer": "person", "gt_answers": [ "C" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000464183.jpg", "model_name": "BLIP2" }, { "question": "what is the net weight", "answer": "0 oz net weight", "gt_answers": [ "32 oz", "32 OZ (2LB) 907 G" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019388.jpg", "model_name": "BLIP2" }, { "question": "What is the copyrighted name displayed in the bottom left?", "answer": "elizabeth taylor", "gt_answers": [ "Dan Kaiser" ], "image_path": "./data/STVQA/imageNet/n02013706_991.JPEG", "model_name": "BLIP2" }, { "question": "What food is in the can?", "answer": "chick peas", "gt_answers": [ "Chick peas" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000001870.jpg", "model_name": "BLIP2" }, { "question": "What does the writing on the side of the wagon say?", "answer": "horse drawn carriage", "gt_answers": [ "LMS Railway Co 22465", "L M S RAILWAY CO" ], "image_path": "./data/STVQA/imageNet/n03538406_9872.JPEG", "model_name": "BLIP2" }, { "question": "What company made the laptop in this photograph?", "answer": "dell", "gt_answers": [ "DELL" ], "image_path": "./data/STVQA/VisualGenome/1/2315396.jpg", "model_name": "BLIP2" }, { "question": "What company is presenting the dinner?", "answer": "hublot", "gt_answers": [ "Hublot" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000292923.jpg", "model_name": "BLIP2" }, { "question": "What car brand is the dinner for?", "answer": "hublot", "gt_answers": [ "Ferrari" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000292923.jpg", "model_name": "BLIP2" }, { "question": "Who is the dinner for?", "answer": "the hublot fc members", "gt_answers": [ "FERRARI OWNERS CLUB" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000292923.jpg", "model_name": "BLIP2" }, { "question": "What year other than 2005 is listed on the boy's basketball shirt?", "answer": "2006", "gt_answers": [ "2006" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000478582.jpg", "model_name": "BLIP2" }, { "question": "What does the pedestrian sign say?", "answer": "crosswalk", "gt_answers": [ "Walk" ], "image_path": "./data/STVQA/VisualGenome/1/2361282.jpg", "model_name": "BLIP2" }, { "question": "What is the bus schedule number?", "answer": "the bus schedule number is", "gt_answers": [ "6" ], "image_path": "./data/STVQA/VisualGenome/1/2361282.jpg", "model_name": "BLIP2" }, { "question": "Where is the bus going?", "answer": "a city", "gt_answers": [ "Jeffery Exp" ], "image_path": "./data/STVQA/VisualGenome/1/2361282.jpg", "model_name": "BLIP2" }, { "question": "What do the signs say is free?", "answer": "free food", "gt_answers": [ "Blood & Landshark" ], "image_path": "./data/STVQA/VisualGenome/1/2371549.jpg", "model_name": "BLIP2" }, { "question": "What does the sign on the building say?", "answer": "amusement park", "gt_answers": [ "Savoy" ], "image_path": "./data/STVQA/imageNet/n02966193_17538.JPEG", "model_name": "BLIP2" }, { "question": "Where did this photo come from?", "answer": "germany", "gt_answers": [ "www.pjjphotography.co.uk" ], "image_path": "./data/STVQA/imageNet/n02966193_17538.JPEG", "model_name": "BLIP2" }, { "question": "what is written on the bottom right?", "answer": "arrow", "gt_answers": [ "cnsphoto" ], "image_path": "./data/STVQA/imageNet/n02879718_5117.JPEG", "model_name": "BLIP2" }, { "question": "how much are the bananas being sold for?", "answer": "$1.50 each", "gt_answers": [ "1,50" ], "image_path": "./data/STVQA/VisualGenome/1/2358903.jpg", "model_name": "BLIP2" }, { "question": "how much are the ranges being sold for?", "answer": "they are being sold for a dollar", "gt_answers": [ "1,50" ], "image_path": "./data/STVQA/VisualGenome/1/2358903.jpg", "model_name": "BLIP2" }, { "question": "what country are the bananas from?", "answer": "south america", "gt_answers": [ "Equador" ], "image_path": "./data/STVQA/VisualGenome/1/2358903.jpg", "model_name": "BLIP2" }, { "question": "What is the address for the website on this image?", "answer": "com", "gt_answers": [ "2dehands.be" ], "image_path": "./data/STVQA/imageNet/n04026417_12231.JPEG", "model_name": "BLIP2" }, { "question": "What is the text found in the middle of the watch on the necktie?", "answer": "the time is now", "gt_answers": [ "The New Era U.S.A." ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000088871.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the green skateboard?", "answer": "person", "gt_answers": [ "Atwater" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000433715.jpg", "model_name": "BLIP2" }, { "question": "Which of the companies on the wall are a camera manufacturer", "answer": "person", "gt_answers": [ "Canon" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000050470.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the vehicle?", "answer": "green", "gt_answers": [ "GREEN" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000175188.jpg", "model_name": "BLIP2" }, { "question": "What is the second word on the vehicle?", "answer": "green", "gt_answers": [ "MACHINES" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000175188.jpg", "model_name": "BLIP2" }, { "question": "What is the word below the name on the building?", "answer": "green", "gt_answers": [ "salons" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000175188.jpg", "model_name": "BLIP2" }, { "question": "What is written on the male's grey, hooded jersey?", "answer": "wii", "gt_answers": [ "PHILADELPHIA 76ERS" ], "image_path": "./data/STVQA/VisualGenome/2/2417266.jpg", "model_name": "BLIP2" }, { "question": "What four words are written in white, on the black sign in the background to the left?", "answer": "expo center", "gt_answers": [ "MULTIPLY YOUR DIGITAL LIFE.", "MULTIPLY YOUR DIGITAL LIFE" ], "image_path": "./data/STVQA/VisualGenome/2/2417266.jpg", "model_name": "BLIP2" }, { "question": "What website is the photo from?", "answer": "china mainland", "gt_answers": [ "www.ehsy.com" ], "image_path": "./data/STVQA/imageNet/n02730930_36121.JPEG", "model_name": "BLIP2" }, { "question": "What website is shown in front of the woman?", "answer": "china mainland", "gt_answers": [ "www.ehsy.com" ], "image_path": "./data/STVQA/imageNet/n02730930_36121.JPEG", "model_name": "BLIP2" }, { "question": "What are the large letters on the black apron that is hanging near a cupboard?", "answer": "baker", "gt_answers": [ "UPE" ], "image_path": "./data/STVQA/VisualGenome/1/2333489.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company on the building?", "answer": "person", "gt_answers": [ "Evergreen Textile UK Limited" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000033329.jpg", "model_name": "BLIP2" }, { "question": "What number is on the pole of the stoplight?", "answer": "4", "gt_answers": [ "1" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000033329.jpg", "model_name": "BLIP2" }, { "question": "Q: What is written on the cooks shirt? A: Pact", "answer": "a pact a pact is a pact made between a person and a person", "gt_answers": [ "PACT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000055135.jpg", "model_name": "BLIP2" }, { "question": "Q: What brand of charcoal is being used? A: Kingsford", "answer": "a kingsford charcoal grill", "gt_answers": [ "KINGSFORD" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000055135.jpg", "model_name": "BLIP2" }, { "question": "What is a white dog holding in his mouth?", "answer": "a green frisbee", "gt_answers": [ "frisbee" ], "image_path": "./data/STVQA/VisualGenome/1/2327581.jpg", "model_name": "BLIP2" }, { "question": "What is written on the napkin?", "answer": " ", "gt_answers": [ "Passover Haggadah" ], "image_path": "./data/STVQA/VisualGenome/1/2340153.jpg", "model_name": "BLIP2" }, { "question": "What is the word on the back of the truck?", "answer": "person", "gt_answers": [ "Frontier" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000162046.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say is being sold there?", "answer": "bananas", "gt_answers": [ "Bananas" ], "image_path": "./data/STVQA/VisualGenome/1/2319700.jpg", "model_name": "BLIP2" }, { "question": "What is written on the sign?", "answer": "bananas", "gt_answers": [ "Bananas" ], "image_path": "./data/STVQA/VisualGenome/1/2319700.jpg", "model_name": "BLIP2" }, { "question": "What is written there in yellow letters?", "answer": "bananas", "gt_answers": [ "Bananas" ], "image_path": "./data/STVQA/VisualGenome/1/2319700.jpg", "model_name": "BLIP2" }, { "question": "what is the sign of board reference?", "answer": "the sign of board reference", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/VisualGenome/1/2317410.jpg", "model_name": "BLIP2" }, { "question": "What place is labeled on the packaged napkin?", "answer": "mr green", "gt_answers": [ "TULLY COFEE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000276037.jpg", "model_name": "BLIP2" }, { "question": "Which letters are shown on the side of the plastic cup?", "answer": "m", "gt_answers": [ "TULLY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000276037.jpg", "model_name": "BLIP2" }, { "question": "To which mountaintop does the sign point, in English?", "answer": "the sign points to a mountaintop", "gt_answers": [ "Mushroom Peak" ], "image_path": "./data/STVQA/IIIT_text/8549.jpg", "model_name": "BLIP2" }, { "question": "How many meters must be traveled to the mountaintop?", "answer": "900 meters", "gt_answers": [ "3200 m", "3200" ], "image_path": "./data/STVQA/IIIT_text/8549.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number?", "answer": "mc pig", "gt_answers": [ "258-ZNF" ], "image_path": "./data/STVQA/IIIT_text/3965.jpg", "model_name": "BLIP2" }, { "question": "What is written on the sign in front of the vehicle?", "answer": "pig bus", "gt_answers": [ "Mr. Porkchop" ], "image_path": "./data/STVQA/IIIT_text/3965.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street?", "answer": "flinders street", "gt_answers": [ "church" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000188165.jpg", "model_name": "BLIP2" }, { "question": "According to the signage, what is straight ahead?", "answer": "truth or consequences", "gt_answers": [ "CITY OF ELEPHANT BUTTE" ], "image_path": "./data/STVQA/VisualGenome/1/2367275.jpg", "model_name": "BLIP2" }, { "question": "What is the cycle name?", "answer": "tandem bicycle", "gt_answers": [ "CO-MOTION CYCLE", "CO-MOTION" ], "image_path": "./data/STVQA/imageNet/n02835271_976.JPEG", "model_name": "BLIP2" }, { "question": "What is the company on the cup?", "answer": "pepsi", "gt_answers": [ "Pepsi" ], "image_path": "./data/STVQA/VisualGenome/2/2404234.jpg", "model_name": "BLIP2" }, { "question": "Who owns the copyright ?", "answer": "the photographer", "gt_answers": [ "Vicor Goncalves" ], "image_path": "./data/STVQA/imageNet/n02002556_1234.JPEG", "model_name": "BLIP2" }, { "question": "What county is the bus transportation for?", "answer": "county of los angeles", "gt_answers": [ "ORANGE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000111604.jpg", "model_name": "BLIP2" }, { "question": "Who took this photo?", "answer": "person", "gt_answers": [ "Robert McConnell" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000111604.jpg", "model_name": "BLIP2" }, { "question": "What is the large number written on the paper?", "answer": "6", "gt_answers": [ "6" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000217959.jpg", "model_name": "BLIP2" }, { "question": "What unit of measurement does this pump use?", "answer": "gallons", "gt_answers": [ "Gallons" ], "image_path": "./data/STVQA/imageNet/n03425413_19060.JPEG", "model_name": "BLIP2" }, { "question": "What is the name on the Side of the bus?", "answer": "person", "gt_answers": [ "Scottish" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000419815.jpg", "model_name": "BLIP2" }, { "question": "What type of lens cap is that?", "answer": "nikon lens cap", "gt_answers": [ "Nikon" ], "image_path": "./data/STVQA/imageNet/n03657121_8268.JPEG", "model_name": "BLIP2" }, { "question": "Which box contains the word \"library\"?", "answer": "tv in library", "gt_answers": [ "Library", "MEDIA LIBRARY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000049713.jpg", "model_name": "BLIP2" }, { "question": "which box contains the phrase \"MythTv\"?", "answer": "mythtv box", "gt_answers": [ "mythTV" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000049713.jpg", "model_name": "BLIP2" }, { "question": "which box contains the word \"recordings\"?", "answer": "tv remote", "gt_answers": [ "Recordings", "MANAGE RECORDINGS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000049713.jpg", "model_name": "BLIP2" }, { "question": "What is the Oriana Bathroom?", "answer": "an interactive bathroom", "gt_answers": [ "Interactive" ], "image_path": "./data/STVQA/imageNet/n04493381_50366.JPEG", "model_name": "BLIP2" }, { "question": "What are the words in the left corner?", "answer": "the oriana interactive bathroom", "gt_answers": [ "THE LOFT" ], "image_path": "./data/STVQA/imageNet/n04493381_50366.JPEG", "model_name": "BLIP2" }, { "question": "What does the middle sign on the building say?", "answer": "west side bem", "gt_answers": [ "E Barato" ], "image_path": "./data/STVQA/IIIT_text/2426.jpg", "model_name": "BLIP2" }, { "question": "What does the highest sign on the building say?", "answer": "west side bem", "gt_answers": [ "Vestir Bem" ], "image_path": "./data/STVQA/IIIT_text/2426.jpg", "model_name": "BLIP2" }, { "question": "Which city is in the name of the hot pot shot?", "answer": "hong kong", "gt_answers": [ "SEOUL" ], "image_path": "./data/STVQA/icdar/test_img_469.jpg", "model_name": "BLIP2" }, { "question": "What letters are legible to the left of the tennis player?", "answer": "qatar", "gt_answers": [ "OSBA ADER" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000578119.jpg", "model_name": "BLIP2" }, { "question": "What numbered street is shown on the street signs?", "answer": "2nd street", "gt_answers": [ "21st St" ], "image_path": "./data/STVQA/VisualGenome/1/2344925.jpg", "model_name": "BLIP2" }, { "question": "What are the last 4 numbers of the phone number on the sign?", "answer": "212", "gt_answers": [ "5900" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000117796.jpg", "model_name": "BLIP2" }, { "question": "What year what the photo taken", "answer": "2010", "gt_answers": [ "2009" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000058926.jpg", "model_name": "BLIP2" }, { "question": "Who is the photographer listed in the bottom right corner", "answer": "person", "gt_answers": [ "Nengah Januartha" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000058926.jpg", "model_name": "BLIP2" }, { "question": "What street is being passed?", "answer": "main street", "gt_answers": [ "9TH STREET" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000218455.jpg", "model_name": "BLIP2" }, { "question": "In which city is the guy working?", "answer": "new york city", "gt_answers": [ "City of SF" ], "image_path": "./data/STVQA/VisualGenome/1/2355540.jpg", "model_name": "BLIP2" }, { "question": "What is the guys shop?", "answer": "he is a taxi driver", "gt_answers": [ "Parking Meter Repair" ], "image_path": "./data/STVQA/VisualGenome/1/2355540.jpg", "model_name": "BLIP2" }, { "question": "Name the first number on the license plate?", "answer": "1", "gt_answers": [ "5" ], "image_path": "./data/STVQA/VisualGenome/1/2355540.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the guy with the red shirt?", "answer": "person", "gt_answers": [ "3" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000333595.jpg", "model_name": "BLIP2" }, { "question": "Which number is written on the green cartel in the background?", "answer": "person", "gt_answers": [ "400" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000333595.jpg", "model_name": "BLIP2" }, { "question": "Who is funding the project?", "answer": "the city", "gt_answers": [ "AMERICAN RECOVERY AND REINVESTMENT ACT" ], "image_path": "./data/STVQA/VisualGenome/1/2335108.jpg", "model_name": "BLIP2" }, { "question": "What is the slogan of the project?", "answer": "a better way", "gt_answers": [ "Putting America to Work" ], "image_path": "./data/STVQA/VisualGenome/1/2335108.jpg", "model_name": "BLIP2" }, { "question": "What word is written on the green building?", "answer": "street", "gt_answers": [ "EASTERN" ], "image_path": "./data/STVQA/VisualGenome/1/2316446.jpg", "model_name": "BLIP2" }, { "question": "What words are written on the sign hanging from the traffic light?", "answer": "no parking", "gt_answers": [ "PED XING" ], "image_path": "./data/STVQA/VisualGenome/1/2316446.jpg", "model_name": "BLIP2" }, { "question": "What is written is red writing on the above banner?", "answer": "person", "gt_answers": [ "Silent Zone" ], "image_path": "./data/STVQA/VisualGenome/2/1154.jpg", "model_name": "BLIP2" }, { "question": "Who is the hospital named after?", "answer": "person", "gt_answers": [ "San Ignacio" ], "image_path": "./data/STVQA/VisualGenome/2/1154.jpg", "model_name": "BLIP2" }, { "question": "Where is the street sign on the bottom pointing to?", "answer": "hospital", "gt_answers": [ "Hecopab" ], "image_path": "./data/STVQA/VisualGenome/2/1154.jpg", "model_name": "BLIP2" }, { "question": "what is written on front of truck", "answer": "person", "gt_answers": [ "over size" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000548183.jpg", "model_name": "BLIP2" }, { "question": "What number is on the bus?", "answer": "the number on the bus is 40", "gt_answers": [ "1552" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000378906.jpg", "model_name": "BLIP2" }, { "question": "What brand is the remote?", "answer": "person", "gt_answers": [ "Humax" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000137096.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the plane?", "answer": "vw", "gt_answers": [ "D-Misy" ], "image_path": "./data/STVQA/VisualGenome/1/2353629.jpg", "model_name": "BLIP2" }, { "question": "What brand is the laptop in the picture?", "answer": "acer", "gt_answers": [ "acer" ], "image_path": "./data/STVQA/VisualGenome/1/2359496.jpg", "model_name": "BLIP2" }, { "question": "Which search engine is powering this phone?", "answer": "yahoo", "gt_answers": [ "Yahoo" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000018379.jpg", "model_name": "BLIP2" }, { "question": "What brand is the brown and white bag?", "answer": "guess", "gt_answers": [ "GUESS" ], "image_path": "./data/STVQA/imageNet/n04026417_2867.JPEG", "model_name": "BLIP2" }, { "question": "who is the player", "answer": "person", "gt_answers": [ "braun" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000056938.jpg", "model_name": "BLIP2" }, { "question": "what is the players number", "answer": "person", "gt_answers": [ "8" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000056938.jpg", "model_name": "BLIP2" }, { "question": "what company logo is behind player", "answer": "person", "gt_answers": [ "us bank" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000056938.jpg", "model_name": "BLIP2" }, { "question": "What is the uniform number of the catcher?", "answer": "2", "gt_answers": [ "19" ], "image_path": "./data/STVQA/VisualGenome/1/2346930.jpg", "model_name": "BLIP2" }, { "question": "What is the uniform number of the sliding player?", "answer": "2", "gt_answers": [ "15" ], "image_path": "./data/STVQA/VisualGenome/1/2346930.jpg", "model_name": "BLIP2" }, { "question": "What airline is this plane from?", "answer": "avance", "gt_answers": [ "Avianca" ], "image_path": "./data/STVQA/VisualGenome/2/2409501.jpg", "model_name": "BLIP2" }, { "question": "What does the plane have written on it?", "answer": "person", "gt_answers": [ "U.S. Air force" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000293880.jpg", "model_name": "BLIP2" }, { "question": "What city are these signs in?", "answer": "london", "gt_answers": [ "London" ], "image_path": "./data/STVQA/VisualGenome/1/2339378.jpg", "model_name": "BLIP2" }, { "question": "What is the complete name of this product?", "answer": "coca cola classic", "gt_answers": [ "Coca-Cola Classic", "COCA-COLA" ], "image_path": "./data/STVQA/IIIT_text/img_000286.jpg", "model_name": "BLIP2" }, { "question": "What are the innitals on the helment?", "answer": "person", "gt_answers": [ "AC" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000309461.jpg", "model_name": "BLIP2" }, { "question": "What is written on board", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2362278.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the ball?", "answer": "adidas", "gt_answers": [ "Gilbert" ], "image_path": "./data/STVQA/imageNet/n04118538_3212.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the airline?", "answer": "united airlines", "gt_answers": [ "United" ], "image_path": "./data/STVQA/VisualGenome/1/2322205.jpg", "model_name": "BLIP2" }, { "question": "What does the license plate say?", "answer": "person", "gt_answers": [ "NDK-611", "NDK 611" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000226350.jpg", "model_name": "BLIP2" }, { "question": "what is the link shown in the picture?", "answer": "person", "gt_answers": [ "WWW.WESTHOEK.BE" ], "image_path": "./data/STVQA/imageNet/n04523525_10714.JPEG", "model_name": "BLIP2" }, { "question": "What number is on the tag?", "answer": "person", "gt_answers": [ "620" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000391632.jpg", "model_name": "BLIP2" }, { "question": "What is written on the clock?", "answer": "person", "gt_answers": [ "Royal Arcade" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000202270.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "berlin germany", "gt_answers": [ "Solingen Hbf" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000233341.jpg", "model_name": "BLIP2" }, { "question": "What color shirt is the person wearing?", "answer": "red", "gt_answers": [ "Red", "{}" ], "image_path": "./data/STVQA/VisualGenome/2/2407473.jpg", "model_name": "BLIP2" }, { "question": "What number is on the boat?", "answer": "no number", "gt_answers": [ "683311" ], "image_path": "./data/STVQA/VisualGenome/2/2416761.jpg", "model_name": "BLIP2" }, { "question": "What does the license plate say?", "answer": "elf", "gt_answers": [ "BPE N2H", "BPE 112H" ], "image_path": "./data/STVQA/VisualGenome/2/2401315.jpg", "model_name": "BLIP2" }, { "question": "What does the top of the bus say?", "answer": "elf", "gt_answers": [ "Elf Team Tyrrell" ], "image_path": "./data/STVQA/VisualGenome/2/2401315.jpg", "model_name": "BLIP2" }, { "question": "WHAT BRANCH OF THE SERVICE ARE THESE MEN FROM?", "answer": "army", "gt_answers": [ "air national guard AIR NATIONAL GUARD", "AIR NATIONAL GUARD" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000193654.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number?", "answer": "jhb 005", "gt_answers": [ "KNL 665" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000536280.jpg", "model_name": "BLIP2" }, { "question": "What is the bus manufacture company?", "answer": "person", "gt_answers": [ "vanHool" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000536280.jpg", "model_name": "BLIP2" }, { "question": "What is the company that own the bus?", "answer": "person", "gt_answers": [ "scotline tours" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000536280.jpg", "model_name": "BLIP2" }, { "question": "What brand is this mouse?", "answer": "microsoft", "gt_answers": [ "DELL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000139914.jpg", "model_name": "BLIP2" }, { "question": "who is the player number 3?", "answer": "catcher", "gt_answers": [ "stoiffer" ], "image_path": "./data/STVQA/VisualGenome/2/2402776.jpg", "model_name": "BLIP2" }, { "question": "What is written on the wall?", "answer": "revolution", "gt_answers": [ "power of revoluti" ], "image_path": "./data/STVQA/VisualGenome/1/2371153.jpg", "model_name": "BLIP2" }, { "question": "What is the name on this image?", "answer": "fire hydrant", "gt_answers": [ "Dustin Diaz" ], "image_path": "./data/STVQA/VisualGenome/1/2341952.jpg", "model_name": "BLIP2" }, { "question": "What is the street name?", "answer": "person", "gt_answers": [ "RUELLE ELVIS LIVES LANE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000333841.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store?", "answer": "a motorcycle store", "gt_answers": [ "Guess" ], "image_path": "./data/STVQA/VisualGenome/1/2355126.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the bathroom cleaner?", "answer": "ariel", "gt_answers": [ "Flash" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000417105.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the avenue?", "answer": "wilson avenue", "gt_answers": [ "McGill College" ], "image_path": "./data/STVQA/VisualGenome/1/2319828.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the boulevard?", "answer": "wilshire boulevard", "gt_answers": [ "MAISONNEUVE" ], "image_path": "./data/STVQA/VisualGenome/1/2319828.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name of this lock?", "answer": "master", "gt_answers": [ "Master" ], "image_path": "./data/STVQA/imageNet/n03075370_4982.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the sign?", "answer": "pet rest area", "gt_answers": [ "PET REST AREA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000454068.jpg", "model_name": "BLIP2" }, { "question": "What is written in white?", "answer": "pet rest area", "gt_answers": [ "PET REST AREA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000454068.jpg", "model_name": "BLIP2" }, { "question": "Whose rest area is this?", "answer": "pet rest area", "gt_answers": [ "PET" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000454068.jpg", "model_name": "BLIP2" }, { "question": "What is the red word on the circle?", "answer": "person", "gt_answers": [ "York" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000347055.jpg", "model_name": "BLIP2" }, { "question": "What is the price of a cheese calzone?", "answer": "$11 at person's pizza", "gt_answers": [ "28" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000091725.jpg", "model_name": "BLIP2" }, { "question": "What does a square pie come with?", "answer": "a side salad", "gt_answers": [ "Specialty Topping" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000091725.jpg", "model_name": "BLIP2" }, { "question": "What costs 32.00?", "answer": "a large pizza", "gt_answers": [ "Cheese calzone w/ 1 topping", "CHEESE W/ 1 TOPPING" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000091725.jpg", "model_name": "BLIP2" }, { "question": "What is the price of a single room?", "answer": "the price of a single room ranges from $80 to $100", "gt_answers": [ "$19.95", "19.95" ], "image_path": "./data/STVQA/IIIT_text/img_000815.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the motel?", "answer": "all star", "gt_answers": [ "Blue Swallow Motel", "BLUE SWALLOW" ], "image_path": "./data/STVQA/IIIT_text/img_000815.jpg", "model_name": "BLIP2" }, { "question": "What are the letters and/or numbers on the license plate?", "answer": "a b c", "gt_answers": [ "7848WI" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000218870.jpg", "model_name": "BLIP2" }, { "question": "What does the red traffic sign read?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2323999.jpg", "model_name": "BLIP2" }, { "question": "who took this picture?", "answer": "weather k", "gt_answers": [ "Wernher Krutein" ], "image_path": "./data/STVQA/imageNet/n02640242_45953.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the street?", "answer": "torrie way", "gt_answers": [ "TORRIE WAY" ], "image_path": "./data/STVQA/imageNet/n06794110_5582.JPEG", "model_name": "BLIP2" }, { "question": "What is the second word on the blue shirt?", "answer": "tennis", "gt_answers": [ "Does" ], "image_path": "./data/STVQA/VisualGenome/1/2329675.jpg", "model_name": "BLIP2" }, { "question": "What number is on the wall?", "answer": "3", "gt_answers": [ "13" ], "image_path": "./data/STVQA/VisualGenome/1/2329675.jpg", "model_name": "BLIP2" }, { "question": "What year did the bobsled run take place?", "answer": "1980", "gt_answers": [ "1980" ], "image_path": "./data/STVQA/imageNet/n02860847_20726.JPEG", "model_name": "BLIP2" }, { "question": "What is the license plate of the bus?", "answer": "tlh", "gt_answers": [ "DRD130" ], "image_path": "./data/STVQA/imageNet/n04487081_4491.JPEG", "model_name": "BLIP2" }, { "question": "what is the date specified on this image?", "answer": "1 july 2009", "gt_answers": [ "20.3.2007" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000254615.jpg", "model_name": "BLIP2" }, { "question": "What is printed on the hand shaped sign?", "answer": "no skateboarding", "gt_answers": [ "Go Ord" ], "image_path": "./data/STVQA/VisualGenome/1/2344899.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the Pitcher's jersey?", "answer": "person", "gt_answers": [ "39" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000056859.jpg", "model_name": "BLIP2" }, { "question": "What three digit number is on the back fence?", "answer": "3", "gt_answers": [ "395" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000056859.jpg", "model_name": "BLIP2" }, { "question": "What letters are visible on the top of the sign, to the right of the pole?", "answer": "person", "gt_answers": [ "ticians" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000562517.jpg", "model_name": "BLIP2" }, { "question": "What company made the policy car in the photograph?", "answer": "ford", "gt_answers": [ "Ford" ], "image_path": "./data/STVQA/VisualGenome/1/2326583.jpg", "model_name": "BLIP2" }, { "question": "What is the telephone number for the police?", "answer": "911", "gt_answers": [ "1-877-4FPS-411" ], "image_path": "./data/STVQA/VisualGenome/1/2326583.jpg", "model_name": "BLIP2" }, { "question": "What kind of beer are they drinking?", "answer": "bud light", "gt_answers": [ "RED", "red eye" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000474637.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the textbook on the top of the stack?", "answer": "physics for dummies", "gt_answers": [ "sign SOLUTIONS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000474637.jpg", "model_name": "BLIP2" }, { "question": "Who is the manufacturer of the camera?", "answer": "polaroid corporation", "gt_answers": [ "Polaroid" ], "image_path": "./data/STVQA/imageNet/n03976467_1694.JPEG", "model_name": "BLIP2" }, { "question": "What number is written on the side of the box?", "answer": "660", "gt_answers": [ "690" ], "image_path": "./data/STVQA/imageNet/n03976467_1694.JPEG", "model_name": "BLIP2" }, { "question": "What is written on this building?", "answer": "city hall", "gt_answers": [ "investments." ], "image_path": "./data/STVQA/VisualGenome/1/2319405.jpg", "model_name": "BLIP2" }, { "question": "What are the first three letters after the word SUPER?", "answer": "su", "gt_answers": [ "HER" ], "image_path": "./data/STVQA/icdar/test_img_438.jpg", "model_name": "BLIP2" }, { "question": "What is the store on the right called?", "answer": "zara", "gt_answers": [ "Calvin Klein" ], "image_path": "./data/STVQA/icdar/img_764.jpg", "model_name": "BLIP2" }, { "question": "What is the store on the right selling?", "answer": "watches", "gt_answers": [ "Underwear" ], "image_path": "./data/STVQA/icdar/img_764.jpg", "model_name": "BLIP2" }, { "question": "Who created this image?", "answer": "person", "gt_answers": [ "Jason Mayes" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000238568.jpg", "model_name": "BLIP2" }, { "question": "Whats is the vehicle number plate", "answer": "the vehicle number plate is tbc", "gt_answers": [ "PT-15331", "PT - 15331" ], "image_path": "./data/STVQA/VisualGenome/2/2410177.jpg", "model_name": "BLIP2" }, { "question": "What company's brand logo is in the picture?", "answer": "american original instruments", "gt_answers": [ "Gibson" ], "image_path": "./data/STVQA/imageNet/n02787622_5669.JPEG", "model_name": "BLIP2" }, { "question": "How many calories per serving?", "answer": "260 calories per serving", "gt_answers": [ "180" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000894.jpg", "model_name": "BLIP2" }, { "question": "How much dietary fiber per serving?", "answer": "4 g", "gt_answers": [ "5g" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000894.jpg", "model_name": "BLIP2" }, { "question": "How many grams of trans fat per serving?", "answer": "2 grams", "gt_answers": [ "0" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000894.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of Jean?", "answer": "levi straits co", "gt_answers": [ "Levi Strauss & CO." ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000015297.jpg", "model_name": "BLIP2" }, { "question": "Where is the brand located?", "answer": "denver, colorado", "gt_answers": [ "San Francisco Cal." ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000015297.jpg", "model_name": "BLIP2" }, { "question": "What is the sign asking you to do?", "answer": "buy tickets", "gt_answers": [ "Purchase Tickets" ], "image_path": "./data/STVQA/VisualGenome/2/1258.jpg", "model_name": "BLIP2" }, { "question": "What cross street is in front of the camera person?", "answer": "person", "gt_answers": [ "SULPHUR AVE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000524676.jpg", "model_name": "BLIP2" }, { "question": "What toy brand made the green bear?", "answer": "beanie babies", "gt_answers": [ "ty" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000137999.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the fridge?", "answer": "love", "gt_answers": [ "Am" ], "image_path": "./data/STVQA/VisualGenome/2/2409281.jpg", "model_name": "BLIP2" }, { "question": "What kind of cake is on the table?", "answer": "red velvet cake", "gt_answers": [ "red velvet layer cake" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000387102.jpg", "model_name": "BLIP2" }, { "question": "What web site can I go for more info?", "answer": "person", "gt_answers": [ "kittyscuquis.wordpress.com" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000387102.jpg", "model_name": "BLIP2" }, { "question": "What kind of pride?", "answer": "usc pride", "gt_answers": [ "USC" ], "image_path": "./data/STVQA/VisualGenome/1/2364298.jpg", "model_name": "BLIP2" }, { "question": "What percent of Vitamin A is there?", "answer": "vitamin a", "gt_answers": [ "0%" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000003550.jpg", "model_name": "BLIP2" }, { "question": "What percent of Vitamin C is there?", "answer": "5%", "gt_answers": [ "6%" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000003550.jpg", "model_name": "BLIP2" }, { "question": "What percent of Calcium is there?", "answer": "5%", "gt_answers": [ "0%" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000003550.jpg", "model_name": "BLIP2" }, { "question": "What sport is being played?", "answer": "baseball", "gt_answers": [ "Baseball" ], "image_path": "./data/STVQA/VisualGenome/1/2356681.jpg", "model_name": "BLIP2" }, { "question": "What is written on the logo in the background", "answer": "wii", "gt_answers": [ "Live hard drive fast" ], "image_path": "./data/STVQA/VisualGenome/1/2348203.jpg", "model_name": "BLIP2" }, { "question": "What is on the wall in white?", "answer": "baseball game", "gt_answers": [ "PNCBANK", "PNC BANK" ], "image_path": "./data/STVQA/VisualGenome/2/2407549.jpg", "model_name": "BLIP2" }, { "question": "What is the number written on the woman's thigh?", "answer": "person", "gt_answers": [ "12" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000160294.jpg", "model_name": "BLIP2" }, { "question": "What is the 3-letter text written in yellow on the woman's sticks?", "answer": "person", "gt_answers": [ "LEK" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000160294.jpg", "model_name": "BLIP2" }, { "question": "What year was this picture taken?", "answer": "2013", "gt_answers": [ "2013" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000159070.jpg", "model_name": "BLIP2" }, { "question": "Who gave permission to reproduce this picture?", "answer": "person", "gt_answers": [ "becky moody" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000159070.jpg", "model_name": "BLIP2" }, { "question": "What kind of trips are to the left?", "answer": "tours", "gt_answers": [ "Boat", "CANAL BOAT TRIPS" ], "image_path": "./data/STVQA/VisualGenome/1/2377279.jpg", "model_name": "BLIP2" }, { "question": "What is this product?", "answer": "coffee", "gt_answers": [ "Timothy's World Coffee" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016344.jpg", "model_name": "BLIP2" }, { "question": "How many K-cups?", "answer": "12", "gt_answers": [ "24" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016344.jpg", "model_name": "BLIP2" }, { "question": "What flavor is it?", "answer": "caramel apple", "gt_answers": [ "Cinnamon" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016344.jpg", "model_name": "BLIP2" }, { "question": "How many servings are in this container?", "answer": "4", "gt_answers": [ "About 5", "5" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016650.jpg", "model_name": "BLIP2" }, { "question": "How many calories are in one serving?", "answer": "240 calories", "gt_answers": [ "170" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016650.jpg", "model_name": "BLIP2" }, { "question": "How much sodium is in one serving?", "answer": "480 mg sodium", "gt_answers": [ "550mg" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016650.jpg", "model_name": "BLIP2" }, { "question": "What does the golf ball say?", "answer": "david 60", "gt_answers": [ "David 60" ], "image_path": "./data/STVQA/imageNet/n03445777_838.JPEG", "model_name": "BLIP2" }, { "question": "What does the grass say?", "answer": "david 60", "gt_answers": [ "Happy Birthday" ], "image_path": "./data/STVQA/imageNet/n03445777_838.JPEG", "model_name": "BLIP2" }, { "question": "What does it say on the sign on the left?", "answer": "u s open tennis", "gt_answers": [ "Vittel" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000370120.jpg", "model_name": "BLIP2" }, { "question": "What photographer took this photo?", "answer": "michael scott", "gt_answers": [ "DBG Photography" ], "image_path": "./data/STVQA/VisualGenome/1/2346253.jpg", "model_name": "BLIP2" }, { "question": "What does the orange poster say?", "answer": "no smoking", "gt_answers": [ "because you will be inspired" ], "image_path": "./data/STVQA/icdar/img_88.jpg", "model_name": "BLIP2" }, { "question": "Which colors are painted on the fire hydrant?", "answer": "red and yellow", "gt_answers": [ "Yellow, red, green", "Yellow, green and red" ], "image_path": "./data/STVQA/VisualGenome/1/2334644.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE COLOR OF THE AEROPLANE?", "answer": "white", "gt_answers": [ "WHITE" ], "image_path": "./data/STVQA/VisualGenome/1/2357500.jpg", "model_name": "BLIP2" }, { "question": "What is the advertiser behind the player?", "answer": "dew", "gt_answers": [ "Hilton" ], "image_path": "./data/STVQA/VisualGenome/2/2404875.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store behind the mini van?", "answer": "rose", "gt_answers": [ "Rose Metal" ], "image_path": "./data/STVQA/VisualGenome/1/2339698.jpg", "model_name": "BLIP2" }, { "question": "What is the first name on the tag?", "answer": "ferret", "gt_answers": [ "Angela" ], "image_path": "./data/STVQA/imageNet/n02441942_21631.JPEG", "model_name": "BLIP2" }, { "question": "What is the brand name of the sign with the swan?", "answer": "swatch", "gt_answers": [ "swarovski" ], "image_path": "./data/STVQA/VisualGenome/1/2363419.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the train on the left?", "answer": "the train on the right", "gt_answers": [ "115012" ], "image_path": "./data/STVQA/VisualGenome/1/2373563.jpg", "model_name": "BLIP2" }, { "question": "What is written on the front of both trains?", "answer": "amtrak", "gt_answers": [ "SNCF" ], "image_path": "./data/STVQA/VisualGenome/1/2373563.jpg", "model_name": "BLIP2" }, { "question": "What time is in the clocks?", "answer": "the clocks are showing different times", "gt_answers": [ "12:38:56" ], "image_path": "./data/STVQA/imageNet/n03197337_3129.JPEG", "model_name": "BLIP2" }, { "question": "What is the product?", "answer": "oil filter", "gt_answers": [ "Oil Filter" ], "image_path": "./data/STVQA/imageNet/n03843555_7987.JPEG", "model_name": "BLIP2" }, { "question": "Which is the brand?", "answer": "toyota", "gt_answers": [ "Toyota" ], "image_path": "./data/STVQA/imageNet/n03843555_7987.JPEG", "model_name": "BLIP2" }, { "question": "How many doughnuts are being eaten by women?", "answer": "two", "gt_answers": [ "one" ], "image_path": "./data/STVQA/VisualGenome/1/2370664.jpg", "model_name": "BLIP2" }, { "question": "How many trees are in this picture?", "answer": "two", "gt_answers": [ "2", "{}" ], "image_path": "./data/STVQA/VisualGenome/1/2327949.jpg", "model_name": "BLIP2" }, { "question": "how many people are in mid-air?", "answer": "two", "gt_answers": [ "1", "{}" ], "image_path": "./data/STVQA/VisualGenome/1/2327949.jpg", "model_name": "BLIP2" }, { "question": "Wht is the number listed on the sign on the right?", "answer": "3", "gt_answers": [ "1432" ], "image_path": "./data/STVQA/VisualGenome/2/2410543.jpg", "model_name": "BLIP2" }, { "question": "What letter is on the orange cube?", "answer": "b", "gt_answers": [ "B" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000140563.jpg", "model_name": "BLIP2" }, { "question": "What brand is this tool?", "answer": "dewalt", "gt_answers": [ "Oupu" ], "image_path": "./data/STVQA/imageNet/n03995372_9600.JPEG", "model_name": "BLIP2" }, { "question": "What is the fourth large letter written on the side of the truck?", "answer": "c", "gt_answers": [ "c" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000414764.jpg", "model_name": "BLIP2" }, { "question": "What is above the screen?", "answer": "a clock", "gt_answers": [ "SEnli" ], "image_path": "./data/STVQA/VisualGenome/2/2403765.jpg", "model_name": "BLIP2" }, { "question": "What type of information is being logged on the computer?", "answer": "e mail", "gt_answers": [ "FIELD notes" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000078394.jpg", "model_name": "BLIP2" }, { "question": "What type of documents are on top of the computer?", "answer": "a laptop, a phone, a fax machine, and a printer", "gt_answers": [ "FIELD notes" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000078394.jpg", "model_name": "BLIP2" }, { "question": "What animal is in the picture?", "answer": "tiger", "gt_answers": [ "Tiger" ], "image_path": "./data/STVQA/VisualGenome/1/2371884.jpg", "model_name": "BLIP2" }, { "question": "What is the statue holding in his hand?", "answer": "umbrella", "gt_answers": [ "Umbrella" ], "image_path": "./data/STVQA/VisualGenome/1/2371884.jpg", "model_name": "BLIP2" }, { "question": "What color is the fence?", "answer": "black", "gt_answers": [ "Black" ], "image_path": "./data/STVQA/VisualGenome/1/2371884.jpg", "model_name": "BLIP2" }, { "question": "what is appeared in the red heart appeared in wall/", "answer": "graffiti", "gt_answers": [ "pain" ], "image_path": "./data/STVQA/VisualGenome/1/2351417.jpg", "model_name": "BLIP2" }, { "question": "Where are the oranges from?", "answer": "the store", "gt_answers": [ "California" ], "image_path": "./data/STVQA/VisualGenome/1/2328876.jpg", "model_name": "BLIP2" }, { "question": "How are the oranges described on this box?", "answer": "oranges", "gt_answers": [ "Fresh & Juicy" ], "image_path": "./data/STVQA/VisualGenome/1/2328876.jpg", "model_name": "BLIP2" }, { "question": "What number is on the side of the boat?", "answer": "person", "gt_answers": [ "1646" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000529956.jpg", "model_name": "BLIP2" }, { "question": "What does this place help you grow?", "answer": "a library is a great place to study", "gt_answers": [ "business", "YOUR OWN BUSINESS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000338030.jpg", "model_name": "BLIP2" }, { "question": "What phrase is written on the black and white signs?", "answer": "one way", "gt_answers": [ "One Way" ], "image_path": "./data/STVQA/VisualGenome/1/2377166.jpg", "model_name": "BLIP2" }, { "question": "What is the main text on the sticker", "answer": "paris paris", "gt_answers": [ "Dessous" ], "image_path": "./data/STVQA/IIIT_text/2274.jpg", "model_name": "BLIP2" }, { "question": "what does the sign say?", "answer": "alto", "gt_answers": [ "alto" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000025797.jpg", "model_name": "BLIP2" }, { "question": "what does the green,white, and red sticker say?", "answer": "alto", "gt_answers": [ "MADRAZO ES el BUENO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000025797.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS PRINTED ON GREEN JERSEY?", "answer": "team name", "gt_answers": [ "FMI" ], "image_path": "./data/STVQA/VisualGenome/1/2355772.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS PRINTED ON BLUE JERSEY?", "answer": "team name", "gt_answers": [ "M LINE" ], "image_path": "./data/STVQA/VisualGenome/1/2355772.jpg", "model_name": "BLIP2" }, { "question": "WHO IS COPY RIGHT AUTHORITY?", "answer": "the united states copyright office", "gt_answers": [ "WALTHER.SIKSMA.NL" ], "image_path": "./data/STVQA/VisualGenome/1/2355772.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the white bus ?", "answer": "volvo", "gt_answers": [ "Newport" ], "image_path": "./data/STVQA/VisualGenome/2/2411995.jpg", "model_name": "BLIP2" }, { "question": "When does the store in the image open?", "answer": "7 am", "gt_answers": [ "7.30AM" ], "image_path": "./data/STVQA/icdar/test_img_144.jpg", "model_name": "BLIP2" }, { "question": "What's the plate number?", "answer": "person", "gt_answers": [ "749 248-1" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000323252.jpg", "model_name": "BLIP2" }, { "question": "Which sale is being advertised?", "answer": "50% off", "gt_answers": [ "End of season sale", "END OF SEASON" ], "image_path": "./data/STVQA/icdar/img_369.jpg", "model_name": "BLIP2" }, { "question": "Products in this sale are up to how much off?", "answer": "50%", "gt_answers": [ "50 PERCENT" ], "image_path": "./data/STVQA/icdar/img_369.jpg", "model_name": "BLIP2" }, { "question": "What is the brand listed on the object?", "answer": "palladium", "gt_answers": [ "PALLADIO" ], "image_path": "./data/STVQA/imageNet/n03908714_3808.JPEG", "model_name": "BLIP2" }, { "question": "What is the name on the sandwich wrapper?", "answer": "italy", "gt_answers": [ "MILANO" ], "image_path": "./data/STVQA/VisualGenome/2/2411096.jpg", "model_name": "BLIP2" }, { "question": "What can't be stopped?", "answer": "dancing", "gt_answers": [ "Dancin'" ], "image_path": "./data/STVQA/VisualGenome/1/2346088.jpg", "model_name": "BLIP2" }, { "question": "what sign is written on the stone?", "answer": "stop", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/VisualGenome/1/2360280.jpg", "model_name": "BLIP2" }, { "question": "What company does the lady wearing a silver watch work for?", "answer": "person", "gt_answers": [ "ZB" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000515053.jpg", "model_name": "BLIP2" }, { "question": "How is the event being broadcast?", "answer": "live stream", "gt_answers": [ "LIVE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000515053.jpg", "model_name": "BLIP2" }, { "question": "What word is under the leaf icon?", "answer": "new", "gt_answers": [ "new" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000515053.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name of the desktop computer?", "answer": "dell", "gt_answers": [ "DELL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000134586.jpg", "model_name": "BLIP2" }, { "question": "What is written on the red neon sign?", "answer": "tavern", "gt_answers": [ "Restrooms" ], "image_path": "./data/STVQA/VisualGenome/1/2363711.jpg", "model_name": "BLIP2" }, { "question": "what is the word on the red signs?", "answer": "stop", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/VisualGenome/1/2359490.jpg", "model_name": "BLIP2" }, { "question": "What street sign is shown?", "answer": "stop sign", "gt_answers": [ "Pelican" ], "image_path": "./data/STVQA/VisualGenome/2/2404965.jpg", "model_name": "BLIP2" }, { "question": "what dose the orange sticker say", "answer": "\"do not touch\"", "gt_answers": [ "biohazard waste" ], "image_path": "./data/STVQA/VisualGenome/1/2340909.jpg", "model_name": "BLIP2" }, { "question": "What is the year on the bus?", "answer": "2009", "gt_answers": [ "2018" ], "image_path": "./data/STVQA/VisualGenome/1/2324886.jpg", "model_name": "BLIP2" }, { "question": "What number is written after PROP", "answer": "8", "gt_answers": [ "8" ], "image_path": "./data/STVQA/VisualGenome/2/2403218.jpg", "model_name": "BLIP2" }, { "question": "What brand is the store?", "answer": "honda", "gt_answers": [ "Honda" ], "image_path": "./data/STVQA/IIIT_text/img_000597.jpg", "model_name": "BLIP2" }, { "question": "What is the only letter on show?", "answer": "a", "gt_answers": [ "P" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000532779.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the airplane?", "answer": "emirates airlines", "gt_answers": [ "Emirates" ], "image_path": "./data/STVQA/VisualGenome/1/2349375.jpg", "model_name": "BLIP2" }, { "question": "What is the word to the left of the lady in blue?", "answer": "card", "gt_answers": [ "Card" ], "image_path": "./data/STVQA/VisualGenome/2/2416048.jpg", "model_name": "BLIP2" }, { "question": "How many times does the avarage adult heart beats per minute?", "answer": "60 to 100", "gt_answers": [ "72 times", "72" ], "image_path": "./data/STVQA/icdar/img_39.jpg", "model_name": "BLIP2" }, { "question": "What is the title of the commercial display?", "answer": "ipod", "gt_answers": [ "Human Body Experience" ], "image_path": "./data/STVQA/icdar/img_39.jpg", "model_name": "BLIP2" }, { "question": "What country name is written on the side of the aircraft?", "answer": "usa", "gt_answers": [ "United States" ], "image_path": "./data/STVQA/imageNet/n04266014_1118.JPEG", "model_name": "BLIP2" }, { "question": "What brand of beer is on the table?", "answer": "person", "gt_answers": [ "KIRIN ICHIBAN" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000151103.jpg", "model_name": "BLIP2" }, { "question": "What is the big word on the red sign?", "answer": "pizza", "gt_answers": [ "Closed" ], "image_path": "./data/STVQA/imageNet/n04584207_6433.JPEG", "model_name": "BLIP2" }, { "question": "What number is in the white light on the left at the top of the window?", "answer": "4", "gt_answers": [ "10708" ], "image_path": "./data/STVQA/imageNet/n04584207_6433.JPEG", "model_name": "BLIP2" }, { "question": "what does the red sign say?", "answer": "burger king", "gt_answers": [ "closed" ], "image_path": "./data/STVQA/imageNet/n04584207_6433.JPEG", "model_name": "BLIP2" }, { "question": "what is the address in the window?", "answer": "the address in the window", "gt_answers": [ "10708" ], "image_path": "./data/STVQA/imageNet/n04584207_6433.JPEG", "model_name": "BLIP2" }, { "question": "What type of wine is on the left?", "answer": "port", "gt_answers": [ "Shiraz" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000149221.jpg", "model_name": "BLIP2" }, { "question": "What type of wine is on the right?", "answer": "port", "gt_answers": [ "Chardonnay" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000149221.jpg", "model_name": "BLIP2" }, { "question": "What is the size of the container on the right?", "answer": "750ml", "gt_answers": [ "750ml", "750 ML" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000149221.jpg", "model_name": "BLIP2" }, { "question": "What number is on the mans jersey?", "answer": "person", "gt_answers": [ "21" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000570440.jpg", "model_name": "BLIP2" }, { "question": "What is the first name of the image tag?", "answer": "hdr", "gt_answers": [ "Richard" ], "image_path": "./data/STVQA/imageNet/n03065424_54356.JPEG", "model_name": "BLIP2" }, { "question": "What time was this photo taken?", "answer": "08:00", "gt_answers": [ "4:39 AM" ], "image_path": "./data/STVQA/VisualGenome/1/2363429.jpg", "model_name": "BLIP2" }, { "question": "What date was this photo taken?", "answer": "october 1", "gt_answers": [ "09/02/08" ], "image_path": "./data/STVQA/VisualGenome/1/2363429.jpg", "model_name": "BLIP2" }, { "question": "What is written on the tail of the plane?", "answer": "eagle", "gt_answers": [ "Skyline" ], "image_path": "./data/STVQA/VisualGenome/1/2374103.jpg", "model_name": "BLIP2" }, { "question": "What is highlighted in Green?", "answer": "speedometer", "gt_answers": [ "N" ], "image_path": "./data/STVQA/imageNet/n03841143_897.JPEG", "model_name": "BLIP2" }, { "question": "what color is the skateboard?", "answer": "black", "gt_answers": [ "black", "{}" ], "image_path": "./data/STVQA/VisualGenome/2/2414825.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the cross street?", "answer": "king street", "gt_answers": [ "MAIN ST" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000526362.jpg", "model_name": "BLIP2" }, { "question": "What building do you see?", "answer": "a building", "gt_answers": [ "PERMANENT" ], "image_path": "./data/STVQA/VisualGenome/1/2361060.jpg", "model_name": "BLIP2" }, { "question": "What is the closest store shown?", "answer": "walmart", "gt_answers": [ "Boutique" ], "image_path": "./data/STVQA/icdar/img_659.jpg", "model_name": "BLIP2" }, { "question": "what is the word on the binocholars?", "answer": "triplet", "gt_answers": [ "Triplet" ], "image_path": "./data/STVQA/imageNet/n03692522_10281.JPEG", "model_name": "BLIP2" }, { "question": "What is the registration number of the bus?", "answer": "the bus's registration number is person", "gt_answers": [ "14-36", "1436" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000280339.jpg", "model_name": "BLIP2" }, { "question": "What is the number code written at the bottom of the image?", "answer": "mf-mf-mf", "gt_answers": [ "021-51697948" ], "image_path": "./data/STVQA/imageNet/n04505470_2816.JPEG", "model_name": "BLIP2" }, { "question": "what type of drink is on the floor?", "answer": "water", "gt_answers": [ "water" ], "image_path": "./data/STVQA/imageNet/n04557648_3789.JPEG", "model_name": "BLIP2" }, { "question": "What does it say on this train's wagon?", "answer": "sb ffs", "gt_answers": [ "SBB CFF FFS" ], "image_path": "./data/STVQA/VisualGenome/1/2362688.jpg", "model_name": "BLIP2" }, { "question": "what is the name of the restaurant?", "answer": "starbucks", "gt_answers": [ "Ramen Play" ], "image_path": "./data/STVQA/icdar/img_424.jpg", "model_name": "BLIP2" }, { "question": "what is displayed in red?", "answer": "people", "gt_answers": [ "Ramen Play" ], "image_path": "./data/STVQA/icdar/img_424.jpg", "model_name": "BLIP2" }, { "question": "What is written on the corner?", "answer": "person", "gt_answers": [ "Pro Kennex" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000513943.jpg", "model_name": "BLIP2" }, { "question": "What time does the clock say?", "answer": "it says 0", "gt_answers": [ "PM 7:35" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000874.jpg", "model_name": "BLIP2" }, { "question": "What number is on the bottom oven screen?", "answer": "4", "gt_answers": [ "2306" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000203035.jpg", "model_name": "BLIP2" }, { "question": "What is the bus schedule number?", "answer": "the bus schedule number is", "gt_answers": [ "335" ], "image_path": "./data/STVQA/VisualGenome/1/2355773.jpg", "model_name": "BLIP2" }, { "question": "What letters are written in red?", "answer": "i love you", "gt_answers": [ "Bata" ], "image_path": "./data/STVQA/IIIT_text/img_000171.jpg", "model_name": "BLIP2" }, { "question": "What sport is being played?", "answer": "tennis", "gt_answers": [ "Tennis" ], "image_path": "./data/STVQA/VisualGenome/1/2357762.jpg", "model_name": "BLIP2" }, { "question": "What color is the player's shirt?", "answer": "white", "gt_answers": [ "White" ], "image_path": "./data/STVQA/VisualGenome/1/2357762.jpg", "model_name": "BLIP2" }, { "question": "Street number address of this building?", "answer": "xerox building - xerox corporation", "gt_answers": [ "100 S. Clinton Ave", "100 S CLINTON AVENUE" ], "image_path": "./data/STVQA/IIIT_text/img_001324.jpg", "model_name": "BLIP2" }, { "question": "What name is on the street sign?", "answer": "bedford street", "gt_answers": [ "Bedford St" ], "image_path": "./data/STVQA/VisualGenome/1/2344090.jpg", "model_name": "BLIP2" }, { "question": "What is the red sign with green writing advertising?", "answer": "a restaurant", "gt_answers": [ "Hoi Loon Hotel" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000220992.jpg", "model_name": "BLIP2" }, { "question": "Which gas station sign is on the left of the image?", "answer": "person", "gt_answers": [ "7 Eleven" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000220992.jpg", "model_name": "BLIP2" }, { "question": "What does the player's shirt say?", "answer": "person", "gt_answers": [ "LONEY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000278297.jpg", "model_name": "BLIP2" }, { "question": "What is written on the side of the plane?", "answer": "american airlines", "gt_answers": [ "American" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000117336.jpg", "model_name": "BLIP2" }, { "question": "What is this building used for according to the sign above it?", "answer": "post office", "gt_answers": [ "Post Office" ], "image_path": "./data/STVQA/IIIT_text/img_000946.jpg", "model_name": "BLIP2" }, { "question": "What brand is advertised on the purple sign?", "answer": "person", "gt_answers": [ "Fila" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000147051.jpg", "model_name": "BLIP2" }, { "question": "What kind of chips are featured?", "answer": "tortilla chips", "gt_answers": [ "Bar B Que" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000574635.jpg", "model_name": "BLIP2" }, { "question": "what is written on tee-shirt?", "answer": "person", "gt_answers": [ "FEAR" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000042312.jpg", "model_name": "BLIP2" }, { "question": "Who is \"queer\"?", "answer": "queer", "gt_answers": [ "The 318 Roadswitcher" ], "image_path": "./data/STVQA/imageNet/n03393912_5876.JPEG", "model_name": "BLIP2" }, { "question": "What number is pictured?", "answer": "23", "gt_answers": [ "218300 67700" ], "image_path": "./data/STVQA/imageNet/n03393912_5876.JPEG", "model_name": "BLIP2" }, { "question": "What band is the poster advertising?", "answer": "the blues brothers", "gt_answers": [ "THE BLUES BROTHERS" ], "image_path": "./data/STVQA/VisualGenome/1/2352789.jpg", "model_name": "BLIP2" }, { "question": "What does the sign at the top of the bus read?", "answer": "person", "gt_answers": [ "School Bus" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000039733.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store on the far right?", "answer": "mrt", "gt_answers": [ "Watsons" ], "image_path": "./data/STVQA/VisualGenome/1/2324830.jpg", "model_name": "BLIP2" }, { "question": "What is it called when you cease all movement?", "answer": "stop", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000117922.jpg", "model_name": "BLIP2" }, { "question": "What do you drive that has four wheels and a motor?", "answer": "a vehicle", "gt_answers": [ "vehicle" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000117922.jpg", "model_name": "BLIP2" }, { "question": "What is the first name displayed at the bottom?", "answer": "person", "gt_answers": [ "STACY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000012228.jpg", "model_name": "BLIP2" }, { "question": "What number is on the bus?", "answer": "mta bus", "gt_answers": [ "889" ], "image_path": "./data/STVQA/VisualGenome/1/2326832.jpg", "model_name": "BLIP2" }, { "question": "Who made the scooter?", "answer": "vespa", "gt_answers": [ "Ferrari" ], "image_path": "./data/STVQA/VisualGenome/1/1592331.jpg", "model_name": "BLIP2" }, { "question": "What kid of shop is on the red background?", "answer": "a motorcycle shop", "gt_answers": [ "BOOKSHOP" ], "image_path": "./data/STVQA/VisualGenome/1/1592331.jpg", "model_name": "BLIP2" }, { "question": "What is the batter's number?", "answer": "person", "gt_answers": [ "55" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000004201.jpg", "model_name": "BLIP2" }, { "question": "Who is sponsoring the game?", "answer": "person", "gt_answers": [ "modell's sporting goods" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000004201.jpg", "model_name": "BLIP2" }, { "question": "What brand of tricycle is it?", "answer": "super trike", "gt_answers": [ "Super Stock" ], "image_path": "./data/STVQA/imageNet/n04482393_14369.JPEG", "model_name": "BLIP2" }, { "question": "What is the name on the red sign?", "answer": "coke", "gt_answers": [ "Colgate" ], "image_path": "./data/STVQA/VisualGenome/1/2327119.jpg", "model_name": "BLIP2" }, { "question": "what color are the M&M's in the first tube?", "answer": "green", "gt_answers": [ "Aqua Green", "{}" ], "image_path": "./data/STVQA/imageNet/n03089624_97784.JPEG", "model_name": "BLIP2" }, { "question": "What website is this picture from?", "answer": "123rf", "gt_answers": [ "www.shutterstock.com" ], "image_path": "./data/STVQA/imageNet/n07718472_29620.JPEG", "model_name": "BLIP2" }, { "question": "What is the text in the middle say?", "answer": "cucumbers on white background", "gt_answers": [ "SHUTTER STOCK" ], "image_path": "./data/STVQA/imageNet/n07718472_29620.JPEG", "model_name": "BLIP2" }, { "question": "What does the text at the bottom say?", "answer": "cucumbers on white background", "gt_answers": [ "www.shutterstock.com 30031780" ], "image_path": "./data/STVQA/imageNet/n07718472_29620.JPEG", "model_name": "BLIP2" }, { "question": "What kind of fruit is this?", "answer": "orange", "gt_answers": [ "Orange" ], "image_path": "./data/STVQA/VisualGenome/1/2351930.jpg", "model_name": "BLIP2" }, { "question": "What does the red sign say?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2341267.jpg", "model_name": "BLIP2" }, { "question": "What is the slogan on the plane?", "answer": "the city better life", "gt_answers": [ "Better City, Better Life" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000277319.jpg", "model_name": "BLIP2" }, { "question": "What number is the bike in the background?", "answer": "person", "gt_answers": [ "88" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000458505.jpg", "model_name": "BLIP2" }, { "question": "What event is it?", "answer": "flood in paris", "gt_answers": [ "Crue de la Seine" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000150161.jpg", "model_name": "BLIP2" }, { "question": "Which city is it?", "answer": "paris", "gt_answers": [ "Paris" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000150161.jpg", "model_name": "BLIP2" }, { "question": "What time does the Rolex sign say it is?", "answer": "it reads 01:00", "gt_answers": [ "2:28" ], "image_path": "./data/STVQA/VisualGenome/1/2324881.jpg", "model_name": "BLIP2" }, { "question": "What year is written on the tire?", "answer": "1984", "gt_answers": [ "2001" ], "image_path": "./data/STVQA/imageNet/n02974003_1454.JPEG", "model_name": "BLIP2" }, { "question": "Who is the manufacturer of the toy?", "answer": "lego", "gt_answers": [ "LEGO Group" ], "image_path": "./data/STVQA/imageNet/n02974003_1454.JPEG", "model_name": "BLIP2" }, { "question": "what sore is shown on the leftt", "answer": "mall", "gt_answers": [ "Topman" ], "image_path": "./data/STVQA/icdar/test_img_158.jpg", "model_name": "BLIP2" }, { "question": "What is the destination of the bus?", "answer": "montreal", "gt_answers": [ "Vulcan" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000083219.jpg", "model_name": "BLIP2" }, { "question": "What number is above the headlight on the bus?", "answer": "the bus number", "gt_answers": [ "2405" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000083219.jpg", "model_name": "BLIP2" }, { "question": "what is dangerous about the floor?", "answer": "slippery", "gt_answers": [ "Wet Floor" ], "image_path": "./data/STVQA/icdar/img_573.jpg", "model_name": "BLIP2" }, { "question": "What letter is on the players helmet?", "answer": "person", "gt_answers": [ "w" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000094155.jpg", "model_name": "BLIP2" }, { "question": "What are the first 3 letters on the players jersey?", "answer": "person", "gt_answers": [ "BRO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000094155.jpg", "model_name": "BLIP2" }, { "question": "what is the last letter on the players jersey?", "answer": "person", "gt_answers": [ "N" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000094155.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "detour ahead", "gt_answers": [ "Detour ahead" ], "image_path": "./data/STVQA/VisualGenome/1/2326260.jpg", "model_name": "BLIP2" }, { "question": "What does the sign on the small bridge say?", "answer": "no wake zone", "gt_answers": [ "Open" ], "image_path": "./data/STVQA/imageNet/n03947888_6703.JPEG", "model_name": "BLIP2" }, { "question": "What is the first 4 letters of the licence plate?", "answer": "person", "gt_answers": [ "BJFG" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000445006.jpg", "model_name": "BLIP2" }, { "question": "What is written on the orange sign in white letters?", "answer": "person", "gt_answers": [ "rexall" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000499307.jpg", "model_name": "BLIP2" }, { "question": "What brand is being advertised on the yellow sign?", "answer": "person", "gt_answers": [ "Cowan" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000499307.jpg", "model_name": "BLIP2" }, { "question": "What is the top word in the bottom right corner?", "answer": "person", "gt_answers": [ "Fotografia" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000553896.jpg", "model_name": "BLIP2" }, { "question": "What is the middle word in the bottom right corner", "answer": "person", "gt_answers": [ "mpergon" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000553896.jpg", "model_name": "BLIP2" }, { "question": "What two websites appear on the bottom right corner?", "answer": "person and person", "gt_answers": [ "Flickr & Facebook" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000553896.jpg", "model_name": "BLIP2" }, { "question": "What number is written in red?", "answer": "4", "gt_answers": [ "10" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000049551.jpg", "model_name": "BLIP2" }, { "question": "Where is the tennis match being held?", "answer": "in a stadium", "gt_answers": [ "wimbledon" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000231748.jpg", "model_name": "BLIP2" }, { "question": "What is the word on the skateboarder\ufffd\ufffd\ufffds shirt?", "answer": "skate", "gt_answers": [ "CIRCA" ], "image_path": "./data/STVQA/VisualGenome/1/1159579.jpg", "model_name": "BLIP2" }, { "question": "what hotel is on the magazine?", "answer": "holiday inn express hotel &amp suites", "gt_answers": [ "Holiday Inn" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000537862.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store in this photograph?", "answer": "mrt", "gt_answers": [ "7 Eleven" ], "image_path": "./data/STVQA/VisualGenome/1/2360301.jpg", "model_name": "BLIP2" }, { "question": "What is the title of this video?", "answer": "signal jam", "gt_answers": [ "Signal Jam", "SIGNAL_JAM" ], "image_path": "./data/STVQA/VisualGenome/1/2317421.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the advertisement on the right of the tennis player?", "answer": "nike", "gt_answers": [ "Spa" ], "image_path": "./data/STVQA/VisualGenome/1/2334339.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the item the mouse is in?", "answer": "wheel", "gt_answers": [ "Wodent wheel" ], "image_path": "./data/STVQA/imageNet/n02342885_3448.JPEG", "model_name": "BLIP2" }, { "question": "What flavor is this Mustard?", "answer": "spicy brown", "gt_answers": [ "Spicy Brown" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013125.jpg", "model_name": "BLIP2" }, { "question": "Who makes this Mustard?", "answer": "france", "gt_answers": [ "French's" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013125.jpg", "model_name": "BLIP2" }, { "question": "How natural is the Mustard?", "answer": "it is made with natural ingredients", "gt_answers": [ "100% natural", "100%" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013125.jpg", "model_name": "BLIP2" }, { "question": "what discount is on offer here?", "answer": "20% off", "gt_answers": [ "20% off" ], "image_path": "./data/STVQA/icdar/img_326.jpg", "model_name": "BLIP2" }, { "question": "What store is on the right?", "answer": "zara", "gt_answers": [ "H&M" ], "image_path": "./data/STVQA/icdar/test_img_113.jpg", "model_name": "BLIP2" }, { "question": "What is written on the sign?", "answer": "aprovechar la violencia", "gt_answers": [ "Metro" ], "image_path": "./data/STVQA/VisualGenome/1/2326650.jpg", "model_name": "BLIP2" }, { "question": "What gaming system are these people using?", "answer": "wii", "gt_answers": [ "wii" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000222472.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store?", "answer": "an antique store", "gt_answers": [ "Pucket's Grocery", "PUCKETS GROCERY" ], "image_path": "./data/STVQA/VisualGenome/2/2414407.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the magazine in the picture?", "answer": "domino", "gt_answers": [ "domino" ], "image_path": "./data/STVQA/VisualGenome/1/2317136.jpg", "model_name": "BLIP2" }, { "question": "What beer is advertised?", "answer": "bud light", "gt_answers": [ "COORS LIGHT" ], "image_path": "./data/STVQA/IIIT_text/510.jpg", "model_name": "BLIP2" }, { "question": "What is name of business?", "answer": "pizza hut", "gt_answers": [ "ZAFFIRO'S" ], "image_path": "./data/STVQA/IIIT_text/510.jpg", "model_name": "BLIP2" }, { "question": "When was business started?", "answer": "1939", "gt_answers": [ "1954" ], "image_path": "./data/STVQA/IIIT_text/510.jpg", "model_name": "BLIP2" }, { "question": "What company's headquarters are these?", "answer": "sony corporation", "gt_answers": [ "Sony" ], "image_path": "./data/STVQA/IIIT_text/img_001173.jpg", "model_name": "BLIP2" }, { "question": "What is written on the front of the van?", "answer": "police", "gt_answers": [ "police" ], "image_path": "./data/STVQA/IIIT_text/3080.jpg", "model_name": "BLIP2" }, { "question": "What are the last three digits on the car's license plate?", "answer": "3", "gt_answers": [ "007" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000077806.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the player?", "answer": "tow", "gt_answers": [ "TOVAR" ], "image_path": "./data/STVQA/VisualGenome/2/2411747.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the player?", "answer": "32", "gt_answers": [ "83" ], "image_path": "./data/STVQA/VisualGenome/2/2411747.jpg", "model_name": "BLIP2" }, { "question": "What is in the jar?", "answer": "chili sauce", "gt_answers": [ "Butterscotch" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000006020.jpg", "model_name": "BLIP2" }, { "question": "Who took this photograph?", "answer": "daniel schmid", "gt_answers": [ "Georg Peter Landsiedel" ], "image_path": "./data/STVQA/VisualGenome/1/2325539.jpg", "model_name": "BLIP2" }, { "question": "What is the top line of the statement on the boy's shirt?", "answer": "i love baseball", "gt_answers": [ "WHAT'S ON MY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000364815.jpg", "model_name": "BLIP2" }, { "question": "What is the maximum fine?", "answer": "&ampamp &amp &amp &amp &amp", "gt_answers": [ "$10,000" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000173607.jpg", "model_name": "BLIP2" }, { "question": "What will happen to offenders?", "answer": "they will be fined", "gt_answers": [ "Prosecuted" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000173607.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street?", "answer": "spadina avenue", "gt_answers": [ "Spadina Av" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000173607.jpg", "model_name": "BLIP2" }, { "question": "When was the copyright issued?", "answer": "1923", "gt_answers": [ "2012" ], "image_path": "./data/STVQA/VisualGenome/1/2375396.jpg", "model_name": "BLIP2" }, { "question": "What is the statement of warning on the door?", "answer": "bear in the car", "gt_answers": [ "This could happen to your vehicle" ], "image_path": "./data/STVQA/VisualGenome/1/2361799.jpg", "model_name": "BLIP2" }, { "question": "What is the last letter on the train car?", "answer": "person", "gt_answers": [ "r" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000365759.jpg", "model_name": "BLIP2" }, { "question": "What is the first letter on the train car?", "answer": "c", "gt_answers": [ "g" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000365759.jpg", "model_name": "BLIP2" }, { "question": "What letter appears in the middle of the train car?", "answer": "c", "gt_answers": [ "w" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000365759.jpg", "model_name": "BLIP2" }, { "question": "What is the lowest number written on a drawer?", "answer": "1", "gt_answers": [ "10" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000571431.jpg", "model_name": "BLIP2" }, { "question": "What is the word written below the fruit?", "answer": "vitamins", "gt_answers": [ "Vitamins!", "VITAMINS" ], "image_path": "./data/STVQA/VisualGenome/1/2320687.jpg", "model_name": "BLIP2" }, { "question": "what does white board attached to the pole displayed", "answer": "caution pedestrians crossing", "gt_answers": [ "CCTV" ], "image_path": "./data/STVQA/imageNet/n06874185_1436.JPEG", "model_name": "BLIP2" }, { "question": "what does the red board displayed", "answer": "caution pedestrians crossing", "gt_answers": [ "CAUTION PEDESTRIANS CROSSING" ], "image_path": "./data/STVQA/imageNet/n06874185_1436.JPEG", "model_name": "BLIP2" }, { "question": "What kind of station is this?", "answer": "police station", "gt_answers": [ "Police" ], "image_path": "./data/STVQA/IIIT_text/img_000926.jpg", "model_name": "BLIP2" }, { "question": "What are the last three letters on the truck license plate?", "answer": "person", "gt_answers": [ "MCO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000379143.jpg", "model_name": "BLIP2" }, { "question": "What is the first name shown in the image?", "answer": "person", "gt_answers": [ "Rachel" ], "image_path": "./data/STVQA/IIIT_text/626.jpg", "model_name": "BLIP2" }, { "question": "Main scent of this product?", "answer": "lime", "gt_answers": [ "Cucumber" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016907.jpg", "model_name": "BLIP2" }, { "question": "What company made this product?", "answer": "fuji apple", "gt_answers": [ "Fuji Apple" ], "image_path": "./data/STVQA/imageNet/n02971356_2288.JPEG", "model_name": "BLIP2" }, { "question": "What company is the pink umbrella from?", "answer": "samsonite", "gt_answers": [ "Avon" ], "image_path": "./data/STVQA/VisualGenome/1/2359894.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number of the bus?", "answer": "taiwan", "gt_answers": [ "A4734" ], "image_path": "./data/STVQA/VisualGenome/1/2343338.jpg", "model_name": "BLIP2" }, { "question": "What does the sign next to the stop sign read?", "answer": "no parking sign", "gt_answers": [ "Court" ], "image_path": "./data/STVQA/VisualGenome/1/2369960.jpg", "model_name": "BLIP2" }, { "question": "What is written on the octogonal red sign?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2369960.jpg", "model_name": "BLIP2" }, { "question": "How much does the product weigh?", "answer": "750 ml", "gt_answers": [ "432 G" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000014642.jpg", "model_name": "BLIP2" }, { "question": "What vegetable is in the can?", "answer": "cauliflower", "gt_answers": [ "Corn.", "COR" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000014642.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the green sign to the right?", "answer": "person", "gt_answers": [ "Phone" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000398938.jpg", "model_name": "BLIP2" }, { "question": "what is the brand of the tennis racket?", "answer": "person", "gt_answers": [ "wilson" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000101218.jpg", "model_name": "BLIP2" }, { "question": "What year was this photo taken?", "answer": "2010", "gt_answers": [ "2008" ], "image_path": "./data/STVQA/imageNet/n04311174_7512.JPEG", "model_name": "BLIP2" }, { "question": "How many men are in this picture?", "answer": "three", "gt_answers": [ "three" ], "image_path": "./data/STVQA/VisualGenome/2/2405370.jpg", "model_name": "BLIP2" }, { "question": "What color shirt is the man in the middle wearing?", "answer": "green", "gt_answers": [ "green" ], "image_path": "./data/STVQA/VisualGenome/2/2405370.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street posted?", "answer": "washington dc", "gt_answers": [ "GEORGE WASHINGTON Blvd." ], "image_path": "./data/STVQA/VisualGenome/1/2336387.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the jersey of the player to the right?", "answer": "person", "gt_answers": [ "8" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000373575.jpg", "model_name": "BLIP2" }, { "question": "what is the type of gecko in the image?", "answer": "leopard gecko", "gt_answers": [ "Golden", "golden gate" ], "image_path": "./data/STVQA/imageNet/n01675722_11054.JPEG", "model_name": "BLIP2" }, { "question": "What name is written on this building?", "answer": "marriott", "gt_answers": [ "Marriott" ], "image_path": "./data/STVQA/IIIT_text/img_000738.jpg", "model_name": "BLIP2" }, { "question": "What is the name for the player number 32?", "answer": "scott nogelsen", "gt_answers": [ "Vogelsong" ], "image_path": "./data/STVQA/VisualGenome/1/2324879.jpg", "model_name": "BLIP2" }, { "question": "What event is being celebrated (one word)?", "answer": "birthday", "gt_answers": [ "BIRTHDAY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000250424.jpg", "model_name": "BLIP2" }, { "question": "What expression is on the dog's face?", "answer": "happy", "gt_answers": [ "happy" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000250424.jpg", "model_name": "BLIP2" }, { "question": "What is the certificate of?", "answer": "a certificate of authenticity", "gt_answers": [ "Authenticity" ], "image_path": "./data/STVQA/imageNet/n03272010_3475.JPEG", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "road work ahead", "gt_answers": [ "Road work ahead" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000136652.jpg", "model_name": "BLIP2" }, { "question": "According to the sign what is reserved?", "answer": "parking lot", "gt_answers": [ "PARKING" ], "image_path": "./data/STVQA/VisualGenome/2/1867.jpg", "model_name": "BLIP2" }, { "question": "What is the red octagon instructing you to do?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/2/1867.jpg", "model_name": "BLIP2" }, { "question": "What Cardinal is one the street sign?", "answer": "red cardinal", "gt_answers": [ "CARDINAL MEDEIROS AVE" ], "image_path": "./data/STVQA/VisualGenome/2/1867.jpg", "model_name": "BLIP2" }, { "question": "What store is in front of the bus?", "answer": "walmart", "gt_answers": [ "Marshalls" ], "image_path": "./data/STVQA/VisualGenome/1/2367038.jpg", "model_name": "BLIP2" }, { "question": "What is written on the red sign in front of the bus?", "answer": "bus stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2367038.jpg", "model_name": "BLIP2" }, { "question": "What are the first two words of the black lettering?", "answer": "san francisco", "gt_answers": [ "All Rights" ], "image_path": "./data/STVQA/IIIT_text/809.jpg", "model_name": "BLIP2" }, { "question": "What color is the tin box?", "answer": "red", "gt_answers": [ "altoids" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000280926.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street on the top sign?", "answer": "pete rose", "gt_answers": [ "Pete Rose Way" ], "image_path": "./data/STVQA/VisualGenome/2/2400340.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street on the lower sign?", "answer": "pete rose", "gt_answers": [ "Broadway" ], "image_path": "./data/STVQA/VisualGenome/2/2400340.jpg", "model_name": "BLIP2" }, { "question": "What number is next to Broadway?", "answer": "4", "gt_answers": [ "199" ], "image_path": "./data/STVQA/VisualGenome/2/2400340.jpg", "model_name": "BLIP2" }, { "question": "What time is it?", "answer": "in the car", "gt_answers": [ "4:15" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000057264.jpg", "model_name": "BLIP2" }, { "question": "What is written on the arrow on the street sign?", "answer": "nassau street", "gt_answers": [ "One Way" ], "image_path": "./data/STVQA/VisualGenome/1/2371883.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street?", "answer": "nassau street", "gt_answers": [ "MOSCO ST" ], "image_path": "./data/STVQA/VisualGenome/1/2371883.jpg", "model_name": "BLIP2" }, { "question": "What does the red sign say?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2371883.jpg", "model_name": "BLIP2" }, { "question": "What letters are on the green part of the sign?", "answer": "person", "gt_answers": [ "SCT" ], "image_path": "./data/STVQA/IIIT_text/4889.jpg", "model_name": "BLIP2" }, { "question": "What is written on the image", "answer": "butterfly on a leaf", "gt_answers": [ "lanjenfong" ], "image_path": "./data/STVQA/imageNet/n02281787_2337.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the Travel Information sign?", "answer": "travel information", "gt_answers": [ "Save money, shed pounds, bike to work", "SAVE MONEY SHED POUNDS BIKE TO WORK" ], "image_path": "./data/STVQA/VisualGenome/1/2362898.jpg", "model_name": "BLIP2" }, { "question": "What year was this photo taken?", "answer": "2000", "gt_answers": [ "2009" ], "image_path": "./data/STVQA/imageNet/n04118538_7470.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the road?", "answer": "rd de la rochelle", "gt_answers": [ "RUE PETIT CULOT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000016080.jpg", "model_name": "BLIP2" }, { "question": "Which word means little in French?", "answer": "petit", "gt_answers": [ "PETIT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000016080.jpg", "model_name": "BLIP2" }, { "question": "What brand is the camera lens cover in the picture?", "answer": "nikon", "gt_answers": [ "Nikon" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000575486.jpg", "model_name": "BLIP2" }, { "question": "What is the first letter of the word listed on the side of the beam?", "answer": "a", "gt_answers": [ "A" ], "image_path": "./data/STVQA/imageNet/n02777292_2163.JPEG", "model_name": "BLIP2" }, { "question": "What is one word displayed on this picture?", "answer": "canon", "gt_answers": [ "Canon" ], "image_path": "./data/STVQA/imageNet/n03657121_15161.JPEG", "model_name": "BLIP2" }, { "question": "what is the destination of the bus", "answer": "trafalgar square", "gt_answers": [ "HAMPSTEAD HEATH" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000345625.jpg", "model_name": "BLIP2" }, { "question": "What type of plane is this?", "answer": "a wwii fighter plane", "gt_answers": [ "NAVION L-17" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000000247.jpg", "model_name": "BLIP2" }, { "question": "What does the sign on the propeller say?", "answer": "person", "gt_answers": [ "NAVION L-17" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000000247.jpg", "model_name": "BLIP2" }, { "question": "What is written after NAVION?", "answer": "person", "gt_answers": [ "L-17" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000000247.jpg", "model_name": "BLIP2" }, { "question": "What product is on the shelves of the stand?", "answer": "books", "gt_answers": [ "books" ], "image_path": "./data/STVQA/imageNet/n02871525_29077.JPEG", "model_name": "BLIP2" }, { "question": "What is the make of the monitor?", "answer": "acer", "gt_answers": [ "Dell" ], "image_path": "./data/STVQA/VisualGenome/2/914.jpg", "model_name": "BLIP2" }, { "question": "When was the picture taken?", "answer": "june 2012", "gt_answers": [ "2013" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000575252.jpg", "model_name": "BLIP2" }, { "question": "What does the yellow sign say?", "answer": "rough road", "gt_answers": [ "Rough Road" ], "image_path": "./data/STVQA/VisualGenome/2/4394.jpg", "model_name": "BLIP2" }, { "question": "What is the first letter on the white sign?", "answer": "r", "gt_answers": [ "S" ], "image_path": "./data/STVQA/VisualGenome/2/4394.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of her neck brace?", "answer": "the vertebral pillow", "gt_answers": [ "Aspen" ], "image_path": "./data/STVQA/imageNet/n03814639_4082.JPEG", "model_name": "BLIP2" }, { "question": "which number is on the PRICE jersey in the top left of the photo?", "answer": "4", "gt_answers": [ "31" ], "image_path": "./data/STVQA/VisualGenome/2/2404153.jpg", "model_name": "BLIP2" }, { "question": "Where is the photo taken?", "answer": "pakistan", "gt_answers": [ "Rakaposhi, Pakistan" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000013497.jpg", "model_name": "BLIP2" }, { "question": "What country is in this photo?", "answer": "pakistan", "gt_answers": [ "Pakistan" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000013497.jpg", "model_name": "BLIP2" }, { "question": "what is worded on the red sign?", "answer": "stop", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/VisualGenome/1/1592376.jpg", "model_name": "BLIP2" }, { "question": "What type of protection does the drywall offer?", "answer": "fire protection", "gt_answers": [ "Superior moisture and mold protection" ], "image_path": "./data/STVQA/VisualGenome/1/2342676.jpg", "model_name": "BLIP2" }, { "question": "What number is on the dog's vest?", "answer": "2", "gt_answers": [ "1" ], "image_path": "./data/STVQA/imageNet/n02091831_680.JPEG", "model_name": "BLIP2" }, { "question": "What color is the dog's vest?", "answer": "red", "gt_answers": [ "Red" ], "image_path": "./data/STVQA/imageNet/n02091831_680.JPEG", "model_name": "BLIP2" }, { "question": "What color is the dog?", "answer": "white and brown", "gt_answers": [ "Tan and white", "WHITE, GREY" ], "image_path": "./data/STVQA/imageNet/n02091831_680.JPEG", "model_name": "BLIP2" }, { "question": "WHAT WORD IS ON THE BLUE SIGN IN THE TOP LEFT CORNER?", "answer": "cow", "gt_answers": [ "CUSHIONS" ], "image_path": "./data/STVQA/VisualGenome/1/2346653.jpg", "model_name": "BLIP2" }, { "question": "WHAT WORD IS ABOVE READYMADES?", "answer": "cow", "gt_answers": [ "TEXTILES" ], "image_path": "./data/STVQA/VisualGenome/1/2346653.jpg", "model_name": "BLIP2" }, { "question": "What day was the photo taken?", "answer": "sunday", "gt_answers": [ "23" ], "image_path": "./data/STVQA/VisualGenome/1/2331408.jpg", "model_name": "BLIP2" }, { "question": "What year was this photo taken?", "answer": "2010", "gt_answers": [ "2010" ], "image_path": "./data/STVQA/VisualGenome/1/2331408.jpg", "model_name": "BLIP2" }, { "question": "What type of food is in the box?", "answer": "donuts", "gt_answers": [ "donuts" ], "image_path": "./data/STVQA/VisualGenome/2/2415437.jpg", "model_name": "BLIP2" }, { "question": "What is written in green letters at the top?", "answer": "mass transit", "gt_answers": [ "mass transit" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000291855.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "3", "gt_answers": [ "4080" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000291855.jpg", "model_name": "BLIP2" }, { "question": "What type of transit is this?", "answer": "mass transit", "gt_answers": [ "mass" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000291855.jpg", "model_name": "BLIP2" }, { "question": "What does the guy's shirt say?", "answer": "wii", "gt_answers": [ "Cat" ], "image_path": "./data/STVQA/VisualGenome/1/2342372.jpg", "model_name": "BLIP2" }, { "question": "What make is the eraser?", "answer": "sanford", "gt_answers": [ "Impega" ], "image_path": "./data/STVQA/imageNet/n03908618_16819.JPEG", "model_name": "BLIP2" }, { "question": "What make is the pencil?", "answer": "unbranded", "gt_answers": [ "Dixon" ], "image_path": "./data/STVQA/imageNet/n03908618_16819.JPEG", "model_name": "BLIP2" }, { "question": "Who took the image?", "answer": "the photographer", "gt_answers": [ "Angela Kraft" ], "image_path": "./data/STVQA/imageNet/n02441942_20126.JPEG", "model_name": "BLIP2" }, { "question": "What is the number of the skier?", "answer": "4", "gt_answers": [ "4" ], "image_path": "./data/STVQA/VisualGenome/1/2365924.jpg", "model_name": "BLIP2" }, { "question": "What is displayed on the front and side of the vending machine?", "answer": "mountain dew", "gt_answers": [ "Mountain Dew" ], "image_path": "./data/STVQA/imageNet/n04525305_11403.JPEG", "model_name": "BLIP2" }, { "question": "Who is the manufacturer of the truck?", "answer": "mountain dew", "gt_answers": [ "Dodge" ], "image_path": "./data/STVQA/imageNet/n04525305_11403.JPEG", "model_name": "BLIP2" }, { "question": "Who is the photographer?", "answer": "person", "gt_answers": [ "JASKIRAT SINGH BAWA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000276909.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number (4 digits)?", "answer": "royal enfield classic 350", "gt_answers": [ "9500" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000276909.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the green banner?", "answer": "paris", "gt_answers": [ "PARIBAS" ], "image_path": "./data/STVQA/VisualGenome/1/2372419.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the street sign?", "answer": "cherry ave", "gt_answers": [ "Cherry Av" ], "image_path": "./data/STVQA/VisualGenome/1/2368012.jpg", "model_name": "BLIP2" }, { "question": "What kid of place is the supermarket?", "answer": "grocery store", "gt_answers": [ "Food Plus" ], "image_path": "./data/STVQA/VisualGenome/1/2368012.jpg", "model_name": "BLIP2" }, { "question": "What is the word under the word food?", "answer": "cherry", "gt_answers": [ "GROCERY" ], "image_path": "./data/STVQA/VisualGenome/1/2368012.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "par", "gt_answers": [ "pare" ], "image_path": "./data/STVQA/VisualGenome/2/2409030.jpg", "model_name": "BLIP2" }, { "question": "What is the price of the Kitchen?", "answer": "it is a ikea kitchen", "gt_answers": [ "2490" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000153064.jpg", "model_name": "BLIP2" }, { "question": "What is the letter located on the skier's thigh?", "answer": "person", "gt_answers": [ "b" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000395040.jpg", "model_name": "BLIP2" }, { "question": "what make is the chair?", "answer": "schroth", "gt_answers": [ "sparco" ], "image_path": "./data/STVQA/imageNet/n04037443_9119.JPEG", "model_name": "BLIP2" }, { "question": "What are the letters underneath the snow board?", "answer": "person", "gt_answers": [ "FORUM" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000499396.jpg", "model_name": "BLIP2" }, { "question": "What word is shown next to the sun/bird/waves image?", "answer": "person", "gt_answers": [ "MET" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000402297.jpg", "model_name": "BLIP2" }, { "question": "What number bus are these passengers riding?", "answer": "person", "gt_answers": [ "2303" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000402297.jpg", "model_name": "BLIP2" }, { "question": "A passenger just requested a:", "answer": "what is the name of person's dog", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000402297.jpg", "model_name": "BLIP2" }, { "question": "When was the golf course found?", "answer": "the golf course was found in 1895", "gt_answers": [ "1982" ], "image_path": "./data/STVQA/VisualGenome/2/2417940.jpg", "model_name": "BLIP2" }, { "question": "What is the total telethon pledge so far?", "answer": "over a billion dollars", "gt_answers": [ "$1,337" ], "image_path": "./data/STVQA/VisualGenome/2/634.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the police department?", "answer": "new york city police department", "gt_answers": [ "NEW YORK POLICE DEPT" ], "image_path": "./data/STVQA/VisualGenome/2/634.jpg", "model_name": "BLIP2" }, { "question": "What company sign is lit up in pink?", "answer": "yahoo", "gt_answers": [ "Yahoo" ], "image_path": "./data/STVQA/VisualGenome/2/634.jpg", "model_name": "BLIP2" }, { "question": "what is the word in big letters on the red sign?", "answer": "stop", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/VisualGenome/1/2367039.jpg", "model_name": "BLIP2" }, { "question": "What does the yellow sign say?", "answer": "yellow brick rd", "gt_answers": [ "Yellow Brick Rd" ], "image_path": "./data/STVQA/VisualGenome/2/2417211.jpg", "model_name": "BLIP2" }, { "question": "What does the bottom sign say?", "answer": "yellow brick rd", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/2/2417211.jpg", "model_name": "BLIP2" }, { "question": "What does the black sign say?", "answer": "yellow brick rd", "gt_answers": [ "Pancake Blvd", "PANCAKE BLVD." ], "image_path": "./data/STVQA/VisualGenome/2/2417211.jpg", "model_name": "BLIP2" }, { "question": "How much vitamin C is in this product?", "answer": "0 mg vitamin c", "gt_answers": [ "4%" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000011029.jpg", "model_name": "BLIP2" }, { "question": "What is the website address?", "answer": "www.starbulldogs.com", "gt_answers": [ "www.stafbul.com" ], "image_path": "./data/STVQA/imageNet/n02093256_10439.JPEG", "model_name": "BLIP2" }, { "question": "What is straight ahead?", "answer": "roma", "gt_answers": [ "Roma" ], "image_path": "./data/STVQA/VisualGenome/1/2363606.jpg", "model_name": "BLIP2" }, { "question": "What is on the cup?", "answer": "onion rings", "gt_answers": [ "Wind mill" ], "image_path": "./data/STVQA/VisualGenome/2/2404539.jpg", "model_name": "BLIP2" }, { "question": "Where is the bus headed?", "answer": "downtown", "gt_answers": [ "Mt Airy" ], "image_path": "./data/STVQA/VisualGenome/1/2372803.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the entrance on the left", "answer": "toilets", "gt_answers": [ "SHIELA'S" ], "image_path": "./data/STVQA/VisualGenome/1/2374606.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the entrance on the right", "answer": "toilets", "gt_answers": [ "BLOKE'S" ], "image_path": "./data/STVQA/VisualGenome/1/2374606.jpg", "model_name": "BLIP2" }, { "question": "What is the place name written at the top on the person's shirt?", "answer": "red wings", "gt_answers": [ "Detroit" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000370727.jpg", "model_name": "BLIP2" }, { "question": "What is the station?", "answer": "train station", "gt_answers": [ "jennings", "JENNINGS STATION" ], "image_path": "./data/STVQA/imageNet/n03220513_9458.JPEG", "model_name": "BLIP2" }, { "question": "What is being signed up today?", "answer": "a street sign", "gt_answers": [ "Northwest Seattle Little League" ], "image_path": "./data/STVQA/VisualGenome/1/2360554.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the player that has the Frisbee?", "answer": "person", "gt_answers": [ "4" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000409628.jpg", "model_name": "BLIP2" }, { "question": "What does the bag say?", "answer": "construction", "gt_answers": [ "Thank You" ], "image_path": "./data/STVQA/VisualGenome/1/2325274.jpg", "model_name": "BLIP2" }, { "question": "What direction is on the back wall?", "answer": "west", "gt_answers": [ "WEST" ], "image_path": "./data/STVQA/VisualGenome/2/2407306.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE NAME OF THIS SHOP?", "answer": "telecommunications", "gt_answers": [ "POP TELECOM" ], "image_path": "./data/STVQA/icdar/img_232.jpg", "model_name": "BLIP2" }, { "question": "What is written in front of the store?", "answer": "", "gt_answers": [ "POP TELECOM" ], "image_path": "./data/STVQA/icdar/img_232.jpg", "model_name": "BLIP2" }, { "question": "What is written along the horizontal line of the cross sign?", "answer": "allen chapel", "gt_answers": [ "Allen Chapel" ], "image_path": "./data/STVQA/VisualGenome/1/2377350.jpg", "model_name": "BLIP2" }, { "question": "What is the name written in the box under the cross?", "answer": "allen chapel", "gt_answers": [ "Dr. Sherryl Matlock" ], "image_path": "./data/STVQA/VisualGenome/1/2377350.jpg", "model_name": "BLIP2" }, { "question": "How many bottles of alchol are there?", "answer": "2", "gt_answers": [ "Three" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000406295.jpg", "model_name": "BLIP2" }, { "question": "How many cards are there?", "answer": "two", "gt_answers": [ "Three" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000406295.jpg", "model_name": "BLIP2" }, { "question": "What beer is the red truck branded for?", "answer": "bud light", "gt_answers": [ "Budweiser" ], "image_path": "./data/STVQA/VisualGenome/1/2372063.jpg", "model_name": "BLIP2" }, { "question": "What is the company name written in green?", "answer": "person", "gt_answers": [ "Northside" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000530750.jpg", "model_name": "BLIP2" }, { "question": "What is the word in white bold font next to the apple slice?", "answer": "apple", "gt_answers": [ "Unser" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000015300.jpg", "model_name": "BLIP2" }, { "question": "Which number train is this?", "answer": "m train", "gt_answers": [ "54" ], "image_path": "./data/STVQA/VisualGenome/2/2406256.jpg", "model_name": "BLIP2" }, { "question": "Where is this train going to?", "answer": "a city", "gt_answers": [ "Cermak" ], "image_path": "./data/STVQA/VisualGenome/2/2406256.jpg", "model_name": "BLIP2" }, { "question": "What is the first name of the copyright name at the bottom?", "answer": "john", "gt_answers": [ "Casey" ], "image_path": "./data/STVQA/imageNet/n01796340_13874.JPEG", "model_name": "BLIP2" }, { "question": "What is the question for #4", "answer": "what is the declaration of independence", "gt_answers": [ "What do you call a tyrants harsh reign?" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000002474.jpg", "model_name": "BLIP2" }, { "question": "What is shown in the top right corner?", "answer": "the declaration of independence", "gt_answers": [ "Review" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000002474.jpg", "model_name": "BLIP2" }, { "question": "What is this store free of?", "answer": "duty", "gt_answers": [ "Duty" ], "image_path": "./data/STVQA/IIIT_text/img_000427.jpg", "model_name": "BLIP2" }, { "question": "What word is written on the side of the ship?", "answer": "hatsu", "gt_answers": [ "Hatsu" ], "image_path": "./data/STVQA/imageNet/n03095699_5088.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the paper on the wall?", "answer": "a list of items on the table", "gt_answers": [ "Laptop Charity Project" ], "image_path": "./data/STVQA/VisualGenome/1/2328625.jpg", "model_name": "BLIP2" }, { "question": "What number is written", "answer": "1", "gt_answers": [ "97" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000402621.jpg", "model_name": "BLIP2" }, { "question": "What does the paper say?", "answer": "it is a coupon", "gt_answers": [ "Coffee Pot Restaurant" ], "image_path": "./data/STVQA/imageNet/n03063689_9251.JPEG", "model_name": "BLIP2" }, { "question": "What does the octagonal red sign say?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000266334.jpg", "model_name": "BLIP2" }, { "question": "What number is on the bottom right of the train?", "answer": "person", "gt_answers": [ "158723" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000042156.jpg", "model_name": "BLIP2" }, { "question": "What word is on the bottom left of the train?", "answer": "person", "gt_answers": [ "first" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000042156.jpg", "model_name": "BLIP2" }, { "question": "What 6 digit number is on the train?", "answer": "057", "gt_answers": [ "158723" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000042156.jpg", "model_name": "BLIP2" }, { "question": "What is the operation listed?", "answer": "cup", "gt_answers": [ "Cuppa" ], "image_path": "./data/STVQA/VisualGenome/1/2363737.jpg", "model_name": "BLIP2" }, { "question": "What color has the car on the left?", "answer": "yellow", "gt_answers": [ "red" ], "image_path": "./data/STVQA/VisualGenome/2/4987.jpg", "model_name": "BLIP2" }, { "question": "What color has the car on the right?", "answer": "yellow", "gt_answers": [ "yellow" ], "image_path": "./data/STVQA/VisualGenome/2/4987.jpg", "model_name": "BLIP2" }, { "question": "What words are written on the brown pot?", "answer": "brew pot", "gt_answers": [ "Aerobell" ], "image_path": "./data/STVQA/imageNet/n04554684_3186.JPEG", "model_name": "BLIP2" }, { "question": "What does the logo on the tank say?", "answer": "mcdonalds", "gt_answers": [ "Aerobell" ], "image_path": "./data/STVQA/imageNet/n04554684_3186.JPEG", "model_name": "BLIP2" }, { "question": "What number is the player with the bat in his hands?", "answer": "2", "gt_answers": [ "13" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000302051.jpg", "model_name": "BLIP2" }, { "question": "What is the net weight of this product in ounces?", "answer": "16 ounces", "gt_answers": [ "15", "15 oz" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016443.jpg", "model_name": "BLIP2" }, { "question": "How many grams is this product?", "answer": "450 grams", "gt_answers": [ "425" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016443.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the red ruler?", "answer": "modern", "gt_answers": [ "Modern" ], "image_path": "./data/STVQA/imageNet/n04118776_15789.JPEG", "model_name": "BLIP2" }, { "question": "What is the amount of sale?", "answer": "the price of the gas", "gt_answers": [ "24.93" ], "image_path": "./data/STVQA/imageNet/n03425413_7985.JPEG", "model_name": "BLIP2" }, { "question": "What is written next to the white heart?", "answer": "person", "gt_answers": [ "Wedding Special" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000099162.jpg", "model_name": "BLIP2" }, { "question": "What word is written on the red metal sheet sign?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2323006.jpg", "model_name": "BLIP2" }, { "question": "what handwritten word is written next to the sign?", "answer": "hate", "gt_answers": [ "HATE" ], "image_path": "./data/STVQA/VisualGenome/1/2323006.jpg", "model_name": "BLIP2" }, { "question": "letter on fridge?", "answer": "letter on fridge", "gt_answers": [ "A" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000470414.jpg", "model_name": "BLIP2" }, { "question": "What is written on the school bus?", "answer": "school bus", "gt_answers": [ "ATLANTIC EXPRESS" ], "image_path": "./data/STVQA/VisualGenome/1/2353548.jpg", "model_name": "BLIP2" }, { "question": "Who is the copyright owner?", "answer": "the photographer", "gt_answers": [ "Miguel Navaza" ], "image_path": "./data/STVQA/VisualGenome/1/2345298.jpg", "model_name": "BLIP2" }, { "question": "What is the year of the copyright?", "answer": "2010", "gt_answers": [ "2011" ], "image_path": "./data/STVQA/VisualGenome/1/2345298.jpg", "model_name": "BLIP2" }, { "question": "What type of beer is in the photo?", "answer": "coors light", "gt_answers": [ "Corona Extra" ], "image_path": "./data/STVQA/VisualGenome/1/2363851.jpg", "model_name": "BLIP2" }, { "question": "What number is in the bottom right corner?", "answer": "4", "gt_answers": [ "2011" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000347292.jpg", "model_name": "BLIP2" }, { "question": "What is the first word is word on the bottom right ?", "answer": "person", "gt_answers": [ "axel" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000347292.jpg", "model_name": "BLIP2" }, { "question": "What does the grey shirt say", "answer": "person", "gt_answers": [ "Dynamic sports Management & Bethessa Sports and Health Club" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000524027.jpg", "model_name": "BLIP2" }, { "question": "What number is on the black jersey?", "answer": "2", "gt_answers": [ "5" ], "image_path": "./data/STVQA/VisualGenome/1/2316691.jpg", "model_name": "BLIP2" }, { "question": "What day is the SF Weekly free on", "answer": "saturday", "gt_answers": [ "Wednesday" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000382083.jpg", "model_name": "BLIP2" }, { "question": "What is the price displayed in the right of the picture?", "answer": "free", "gt_answers": [ "9.99", "999" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000382083.jpg", "model_name": "BLIP2" }, { "question": "What is the order number?", "answer": "40", "gt_answers": [ "49" ], "image_path": "./data/STVQA/VisualGenome/2/4826.jpg", "model_name": "BLIP2" }, { "question": "What is the airline called?", "answer": "person", "gt_answers": [ "Alitalia" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000132773.jpg", "model_name": "BLIP2" }, { "question": "What do the signs want to do about foreclosures?", "answer": "stop foreclosures", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/2/2407418.jpg", "model_name": "BLIP2" }, { "question": "What three companies are sponsored in this event?", "answer": "mcdonald s coca cola", "gt_answers": [ "GATORADE EICO AT&T" ], "image_path": "./data/STVQA/VisualGenome/1/2351551.jpg", "model_name": "BLIP2" }, { "question": "What kind of wine was in the clear bottle?", "answer": "white wine", "gt_answers": [ "Elderflower Sparkling" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000028945.jpg", "model_name": "BLIP2" }, { "question": "Where was the wine produced?", "answer": "australia", "gt_answers": [ "New Zealand" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000028945.jpg", "model_name": "BLIP2" }, { "question": "What number comes directly before the 3 in this picture?", "answer": "2", "gt_answers": [ "0" ], "image_path": "./data/STVQA/icdar/img_932.jpg", "model_name": "BLIP2" }, { "question": "What number comes after 03-?", "answer": "04-", "gt_answers": [ "26" ], "image_path": "./data/STVQA/icdar/img_932.jpg", "model_name": "BLIP2" }, { "question": "What is the first name on the sign?", "answer": "mr", "gt_answers": [ "Buffet Town" ], "image_path": "./data/STVQA/icdar/test_img_178.jpg", "model_name": "BLIP2" }, { "question": "What name is written on the big white sign above the truck?", "answer": "mcdonalds", "gt_answers": [ "Jones" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000158451.jpg", "model_name": "BLIP2" }, { "question": "what is the BRAND name of the toaster?", "answer": "ruby", "gt_answers": [ "DU-PLEX locust", "DU-PLEX" ], "image_path": "./data/STVQA/imageNet/n04442312_7525.JPEG", "model_name": "BLIP2" }, { "question": "What year is in the corner of the image?", "answer": "2010", "gt_answers": [ "2007" ], "image_path": "./data/STVQA/imageNet/n04141076_42088.JPEG", "model_name": "BLIP2" }, { "question": "What is the number of the train car, painted in yellow?", "answer": "person", "gt_answers": [ "2" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000514519.jpg", "model_name": "BLIP2" }, { "question": "What is the first word written in white on the blue background of the monitor?", "answer": "computer", "gt_answers": [ "Unley" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000410436.jpg", "model_name": "BLIP2" }, { "question": "What is the object title?", "answer": "jesus face", "gt_answers": [ "Church Prayer Rug" ], "image_path": "./data/STVQA/imageNet/n03998194_9441.JPEG", "model_name": "BLIP2" }, { "question": "What are the two words on the front of the telephone?", "answer": "flash relay", "gt_answers": [ "Flash Redial" ], "image_path": "./data/STVQA/imageNet/n03187595_2495.JPEG", "model_name": "BLIP2" }, { "question": "What does the top part of the center button do?", "answer": "it is a flash dial", "gt_answers": [ "Flash" ], "image_path": "./data/STVQA/imageNet/n03187595_2495.JPEG", "model_name": "BLIP2" }, { "question": "What does the bottom part of the center button do?", "answer": "turn the phone off", "gt_answers": [ "Redial" ], "image_path": "./data/STVQA/imageNet/n03187595_2495.JPEG", "model_name": "BLIP2" }, { "question": "What number is the top blue box?", "answer": "1", "gt_answers": [ "1" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000175651.jpg", "model_name": "BLIP2" }, { "question": "What number is the bottom blue box?", "answer": "2", "gt_answers": [ "2" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000175651.jpg", "model_name": "BLIP2" }, { "question": "What is written inside the circle cutout on the top blue box?", "answer": "samsung galaxy s3", "gt_answers": [ "Wii" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000175651.jpg", "model_name": "BLIP2" }, { "question": "What car company sponsored this event?", "answer": "honda", "gt_answers": [ "Kia" ], "image_path": "./data/STVQA/VisualGenome/1/2377149.jpg", "model_name": "BLIP2" }, { "question": "Where is no parking allowed?", "answer": "in front of the city hall", "gt_answers": [ "IN AISLES" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000454916.jpg", "model_name": "BLIP2" }, { "question": "What name is under the clock?", "answer": "pasha", "gt_answers": [ "Pasela" ], "image_path": "./data/STVQA/VisualGenome/1/2354695.jpg", "model_name": "BLIP2" }, { "question": "What is the red sign advertising?", "answer": "person", "gt_answers": [ "PLAZA SNACKS" ], "image_path": "./data/STVQA/IIIT_text/2926.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number on the red and blue car?", "answer": "person", "gt_answers": [ "LS06 BCX" ], "image_path": "./data/STVQA/IIIT_text/2926.jpg", "model_name": "BLIP2" }, { "question": "What does the top sign say?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2361412.jpg", "model_name": "BLIP2" }, { "question": "What does the bottom sign say?", "answer": "no parking", "gt_answers": [ "One way" ], "image_path": "./data/STVQA/VisualGenome/1/2361412.jpg", "model_name": "BLIP2" }, { "question": "When was this photo taken?", "answer": "a bathroom", "gt_answers": [ "11/14/2007" ], "image_path": "./data/STVQA/VisualGenome/1/1592672.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the yellow helmet?", "answer": "person", "gt_answers": [ "SCOTT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000450003.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the right side of the white bus?", "answer": "3", "gt_answers": [ "6035" ], "image_path": "./data/STVQA/VisualGenome/1/2366243.jpg", "model_name": "BLIP2" }, { "question": "Where is the bus going?", "answer": "to the next stop", "gt_answers": [ "North Bergen" ], "image_path": "./data/STVQA/VisualGenome/1/2366243.jpg", "model_name": "BLIP2" }, { "question": "What kind of automobile is the red SUV?", "answer": "suv", "gt_answers": [ "Mitsubishi" ], "image_path": "./data/STVQA/VisualGenome/1/2366243.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "sams club", "gt_answers": [ "Files" ], "image_path": "./data/STVQA/icdar/img_275.jpg", "model_name": "BLIP2" }, { "question": "What soft drink company name is on the red disk?", "answer": "coca cola", "gt_answers": [ "Coca-Cola" ], "image_path": "./data/STVQA/VisualGenome/1/2345165.jpg", "model_name": "BLIP2" }, { "question": "What store name appears in yellow in the background?", "answer": "walmart", "gt_answers": [ "Get Stuffed" ], "image_path": "./data/STVQA/VisualGenome/1/2345165.jpg", "model_name": "BLIP2" }, { "question": "What is the street name?", "answer": "johns market", "gt_answers": [ "10TH ST" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000033891.jpg", "model_name": "BLIP2" }, { "question": "What sign in below the RC logo?", "answer": "johns market", "gt_answers": [ "JOHN'S MARKET" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000033891.jpg", "model_name": "BLIP2" }, { "question": "What is the class year written on the face of the clock?", "answer": "the class year", "gt_answers": [ "1904" ], "image_path": "./data/STVQA/VisualGenome/1/286090.jpg", "model_name": "BLIP2" }, { "question": "What is registration number of the plane?", "answer": "the plane is registered as fs-fs", "gt_answers": [ "N40960" ], "image_path": "./data/STVQA/VisualGenome/1/2323517.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the convenience store?", "answer": "sainsburys", "gt_answers": [ "Ajans" ], "image_path": "./data/STVQA/VisualGenome/1/2337588.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the bank in the photo?", "answer": "bank of america", "gt_answers": [ "citibank" ], "image_path": "./data/STVQA/VisualGenome/1/2354389.jpg", "model_name": "BLIP2" }, { "question": "What does it say in blue and white on the bike?", "answer": "mcdonalds", "gt_answers": [ "Bud" ], "image_path": "./data/STVQA/VisualGenome/2/2411201.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the bottom of the bike?", "answer": "moped", "gt_answers": [ "Fighter" ], "image_path": "./data/STVQA/VisualGenome/2/2411201.jpg", "model_name": "BLIP2" }, { "question": "What kind of bike is this?", "answer": "moped", "gt_answers": [ "WILLAMSF1" ], "image_path": "./data/STVQA/VisualGenome/2/2411201.jpg", "model_name": "BLIP2" }, { "question": "What is the website adress on the white banner?", "answer": "com", "gt_answers": [ "nikefootball.com" ], "image_path": "./data/STVQA/VisualGenome/1/285867.jpg", "model_name": "BLIP2" }, { "question": "What is the rightmost complete red word on the white banner?", "answer": "soccer", "gt_answers": [ "Arsenal" ], "image_path": "./data/STVQA/VisualGenome/1/285867.jpg", "model_name": "BLIP2" }, { "question": "What is the first (leftmost) word on the white banner?", "answer": "arsenal", "gt_answers": [ "Fly" ], "image_path": "./data/STVQA/VisualGenome/1/285867.jpg", "model_name": "BLIP2" }, { "question": "What category is in the front center?", "answer": "fiction", "gt_answers": [ "Finance & Investment" ], "image_path": "./data/STVQA/icdar/img_462.jpg", "model_name": "BLIP2" }, { "question": "What does the black and white sign say?", "answer": "one way", "gt_answers": [ "ONE WAY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000065916.jpg", "model_name": "BLIP2" }, { "question": "What is written on the black clock?", "answer": "peoples dental surgery", "gt_answers": [ "Boodle & Dunthorne" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000374796.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number of the dark car in the foreground?", "answer": "person", "gt_answers": [ "BG 7181" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000560123.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number of the tan car in the background?", "answer": "person", "gt_answers": [ "HWK 246N" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000560123.jpg", "model_name": "BLIP2" }, { "question": "What word is in red lettering on the sign?", "answer": "supermercado", "gt_answers": [ "SUPERMERCAT" ], "image_path": "./data/STVQA/VisualGenome/2/2585.jpg", "model_name": "BLIP2" }, { "question": "What phrase is in black lettering on the sign?", "answer": "supermercado", "gt_answers": [ "VINYA DEL MAR" ], "image_path": "./data/STVQA/VisualGenome/2/2585.jpg", "model_name": "BLIP2" }, { "question": "Where was this product made?", "answer": "united states", "gt_answers": [ "UK" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000006041.jpg", "model_name": "BLIP2" }, { "question": "How much do calls cost?", "answer": "50 cents", "gt_answers": [ "50" ], "image_path": "./data/STVQA/imageNet/n03902125_18981.JPEG", "model_name": "BLIP2" }, { "question": "What is the number of the referee", "answer": "three", "gt_answers": [ "27" ], "image_path": "./data/STVQA/VisualGenome/1/2330321.jpg", "model_name": "BLIP2" }, { "question": "How is this item labeled?", "answer": "sea glass", "gt_answers": [ "Clear" ], "image_path": "./data/STVQA/imageNet/n03633091_15833.JPEG", "model_name": "BLIP2" }, { "question": "Wht does the yellow sign say to look for?", "answer": "trains", "gt_answers": [ "Trains", "look for trains" ], "image_path": "./data/STVQA/VisualGenome/2/2407854.jpg", "model_name": "BLIP2" }, { "question": "What is the three digit number printed on the wall?", "answer": "3", "gt_answers": [ "330" ], "image_path": "./data/STVQA/VisualGenome/1/2327715.jpg", "model_name": "BLIP2" }, { "question": "How much is a single magazine in euros?", "answer": "one euro", "gt_answers": [ "15" ], "image_path": "./data/STVQA/IIIT_text/3073.jpg", "model_name": "BLIP2" }, { "question": "How much are two magazines in euros?", "answer": "two euros", "gt_answers": [ "27" ], "image_path": "./data/STVQA/IIIT_text/3073.jpg", "model_name": "BLIP2" }, { "question": "What is to the left?", "answer": "objazd", "gt_answers": [ "Objazd" ], "image_path": "./data/STVQA/VisualGenome/1/2343979.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the boat?", "answer": "allaren", "gt_answers": [ "alameen" ], "image_path": "./data/STVQA/VisualGenome/1/2372426.jpg", "model_name": "BLIP2" }, { "question": "What road ends here?", "answer": "reno", "gt_answers": [ "Genoa" ], "image_path": "./data/STVQA/VisualGenome/1/2324067.jpg", "model_name": "BLIP2" }, { "question": "Which city name is shown here?", "answer": "singapore", "gt_answers": [ "TOKYO" ], "image_path": "./data/STVQA/icdar/img_709.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store on the left with the black background?", "answer": "dillards", "gt_answers": [ "Giordano" ], "image_path": "./data/STVQA/icdar/test_img_123.jpg", "model_name": "BLIP2" }, { "question": "What is the caption at the bottom of the photo?", "answer": "a book with film", "gt_answers": [ "A Bush Mail Box" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000219488.jpg", "model_name": "BLIP2" }, { "question": "What's the title of the red book?", "answer": "the art of computer programming", "gt_answers": [ "JavaScript" ], "image_path": "./data/STVQA/VisualGenome/1/713661.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "a", "gt_answers": [ "285" ], "image_path": "./data/STVQA/IIIT_text/img_001092.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate of the orange and green bus in front?", "answer": "ta ta", "gt_answers": [ "29-40-GB" ], "image_path": "./data/STVQA/imageNet/n03895866_65924.JPEG", "model_name": "BLIP2" }, { "question": "What is the number near the headlight of the orange and green bus?", "answer": "3", "gt_answers": [ "154" ], "image_path": "./data/STVQA/imageNet/n03895866_65924.JPEG", "model_name": "BLIP2" }, { "question": "First two tiny letters on shirt?", "answer": "a m", "gt_answers": [ "DE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000048126.jpg", "model_name": "BLIP2" }, { "question": "Which skier is in the lead?", "answer": "person", "gt_answers": [ "29" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000373683.jpg", "model_name": "BLIP2" }, { "question": "Which skier is on the same team as #29?", "answer": "person", "gt_answers": [ "12" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000373683.jpg", "model_name": "BLIP2" }, { "question": "What is one of the sponsors listed on their uniforms?", "answer": "person", "gt_answers": [ "GMPA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000373683.jpg", "model_name": "BLIP2" }, { "question": "What is on the right of the man?", "answer": "luggage", "gt_answers": [ "Toilets" ], "image_path": "./data/STVQA/VisualGenome/1/2351809.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "101", "gt_answers": [ "101" ], "image_path": "./data/STVQA/VisualGenome/1/2377148.jpg", "model_name": "BLIP2" }, { "question": "Where is the bus going?", "answer": "to a city", "gt_answers": [ "SANTA ROSA" ], "image_path": "./data/STVQA/VisualGenome/1/2377148.jpg", "model_name": "BLIP2" }, { "question": "What type of bus is it?", "answer": "city bus", "gt_answers": [ "LIMITED STOP EXPRESS" ], "image_path": "./data/STVQA/VisualGenome/1/2377148.jpg", "model_name": "BLIP2" }, { "question": "what is the last 3 numbers?", "answer": "0", "gt_answers": [ "466" ], "image_path": "./data/STVQA/imageNet/n03272562_15701.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of this player", "answer": "person", "gt_answers": [ "sch" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000217929.jpg", "model_name": "BLIP2" }, { "question": "Write the English term in red?", "answer": "tokyo", "gt_answers": [ "Take Five" ], "image_path": "./data/STVQA/IIIT_text/2670.jpg", "model_name": "BLIP2" }, { "question": "From which city is this map?", "answer": "san francisco", "gt_answers": [ "San Francisco" ], "image_path": "./data/STVQA/imageNet/n03938244_13982.JPEG", "model_name": "BLIP2" }, { "question": "What company is advertised on the airplane tail?", "answer": "southwest airlines", "gt_answers": [ "Southwest" ], "image_path": "./data/STVQA/VisualGenome/1/2373097.jpg", "model_name": "BLIP2" }, { "question": "Who owns the copyright to this image?", "answer": "southwest airlines", "gt_answers": [ "Brett Lane Photography" ], "image_path": "./data/STVQA/VisualGenome/1/2373097.jpg", "model_name": "BLIP2" }, { "question": "What TV show is this game based on?", "answer": "star trek", "gt_answers": [ "STAR TREK" ], "image_path": "./data/STVQA/imageNet/n04243546_18328.JPEG", "model_name": "BLIP2" }, { "question": "Where is the front bus going?", "answer": "london city airport", "gt_answers": [ "Barnet Church" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000395101.jpg", "model_name": "BLIP2" }, { "question": "What is written on the street?", "answer": "london road", "gt_answers": [ "No Entry" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000395101.jpg", "model_name": "BLIP2" }, { "question": "What is being sold here?", "answer": "bananas", "gt_answers": [ "BANANAS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000009452.jpg", "model_name": "BLIP2" }, { "question": "What weight are the bananas being sold by?", "answer": "kilogram", "gt_answers": [ "KG" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000009452.jpg", "model_name": "BLIP2" }, { "question": "What is the price of the bananas per kg?", "answer": "$1", "gt_answers": [ "$11.98" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000009452.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store on the right?", "answer": "macys", "gt_answers": [ "MaxMara" ], "image_path": "./data/STVQA/icdar/test_img_286.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store on the left?", "answer": "macys", "gt_answers": [ "Diane von Furstenberg" ], "image_path": "./data/STVQA/icdar/test_img_286.jpg", "model_name": "BLIP2" }, { "question": "What is the email listed?", "answer": "no email listed", "gt_answers": [ "www.tedsonline.com" ], "image_path": "./data/STVQA/VisualGenome/2/2409292.jpg", "model_name": "BLIP2" }, { "question": "What is the make of this bus?", "answer": "school bus", "gt_answers": [ "International" ], "image_path": "./data/STVQA/IIIT_text/img_001130.jpg", "model_name": "BLIP2" }, { "question": "What does the red sign read on the side of the bus?", "answer": "school bus", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/IIIT_text/img_001130.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number?", "answer": "sc sc", "gt_answers": [ "HX-04709", "HX 04709" ], "image_path": "./data/STVQA/IIIT_text/img_001130.jpg", "model_name": "BLIP2" }, { "question": "What color are the back of the skier's gloves?", "answer": "black", "gt_answers": [ "Orange", "{}" ], "image_path": "./data/STVQA/VisualGenome/1/2338110.jpg", "model_name": "BLIP2" }, { "question": "what is the route of the bus?", "answer": "crosstown", "gt_answers": [ "41 Crosstown", "CROSSTOWN" ], "image_path": "./data/STVQA/VisualGenome/1/2350256.jpg", "model_name": "BLIP2" }, { "question": "What is the serial number of the plane?", "answer": "sn mcp", "gt_answers": [ "n861ma" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000175250.jpg", "model_name": "BLIP2" }, { "question": "What is written on planes bottom?", "answer": "person", "gt_answers": [ "www.mokuleleairline" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000175250.jpg", "model_name": "BLIP2" }, { "question": "What is the first word of airlines name?", "answer": "person", "gt_answers": [ "Mokulele" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000175250.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE COLOR OF THIS DRESS?", "answer": "white", "gt_answers": [ "SILKY WHITE", "White" ], "image_path": "./data/STVQA/imageNet/n03450230_30009.JPEG", "model_name": "BLIP2" }, { "question": "What color is her dress?", "answer": "white", "gt_answers": [ "White" ], "image_path": "./data/STVQA/imageNet/n03450230_30009.JPEG", "model_name": "BLIP2" }, { "question": "What type of food is being advertised?", "answer": "desserts", "gt_answers": [ "DESSERTS SWEETS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000075270.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the train on the left?", "answer": "a train", "gt_answers": [ "1839" ], "image_path": "./data/STVQA/VisualGenome/1/2316708.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say on the right?", "answer": "londonderry station", "gt_answers": [ "WARNING KEEP OFF ELECTRIC LIVE RAILS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000074997.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say on the left?", "answer": "londonderry station", "gt_answers": [ "SOUTHPORT CHAPEL STREET" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000074997.jpg", "model_name": "BLIP2" }, { "question": "What 2 letters appear in the largest font on the front of the bus?", "answer": "person", "gt_answers": [ "GO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000048571.jpg", "model_name": "BLIP2" }, { "question": "What is the type in the middle of the image?", "answer": "keyboard", "gt_answers": [ "Workstation" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000127418.jpg", "model_name": "BLIP2" }, { "question": "What is the bbok name?", "answer": "kung fu", "gt_answers": [ "KUNG FU and KARATE", "COMPLETE GUIDE TO THE DEADLY ARTS OF KUNG FU and KARATE" ], "image_path": "./data/STVQA/imageNet/n06596364_12307.JPEG", "model_name": "BLIP2" }, { "question": "What is the price of the book?", "answer": "$24.99", "gt_answers": [ "$1.00" ], "image_path": "./data/STVQA/imageNet/n06596364_12307.JPEG", "model_name": "BLIP2" }, { "question": "What is the number mentioned on the book?", "answer": "4", "gt_answers": [ "NO.1 1974" ], "image_path": "./data/STVQA/imageNet/n06596364_12307.JPEG", "model_name": "BLIP2" }, { "question": "How many males are in the picture?", "answer": "two", "gt_answers": [ "2", "{}" ], "image_path": "./data/STVQA/VisualGenome/1/2363245.jpg", "model_name": "BLIP2" }, { "question": "What does the red sign say?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2344854.jpg", "model_name": "BLIP2" }, { "question": "What is the larger name displayed in the corner?", "answer": "paul duquet", "gt_answers": [ "Paul Bocquet" ], "image_path": "./data/STVQA/VisualGenome/1/2350637.jpg", "model_name": "BLIP2" }, { "question": "What are the blue words in the bottom corner?", "answer": "paul duquet", "gt_answers": [ "Nicolas Risch Photography" ], "image_path": "./data/STVQA/VisualGenome/1/2350637.jpg", "model_name": "BLIP2" }, { "question": "What is the name written on the bus?", "answer": "person", "gt_answers": [ "ValleyMetro" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000068518.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "tahoe transit", "gt_answers": [ "8507" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000068518.jpg", "model_name": "BLIP2" }, { "question": "What words are visible on the white sign in the background?", "answer": "person's house", "gt_answers": [ "road crossing" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000352739.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the bus?", "answer": "3", "gt_answers": [ "90" ], "image_path": "./data/STVQA/VisualGenome/1/2338190.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the side of the plane?", "answer": "gantas", "gt_answers": [ "Qantas" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000511438.jpg", "model_name": "BLIP2" }, { "question": "What store is in the building?", "answer": "walmart", "gt_answers": [ "macy's" ], "image_path": "./data/STVQA/VisualGenome/1/2335225.jpg", "model_name": "BLIP2" }, { "question": "What is written on the blue pad?", "answer": "chemistry le school", "gt_answers": [ "chemistry is chool" ], "image_path": "./data/STVQA/imageNet/n04238763_6896.JPEG", "model_name": "BLIP2" }, { "question": "What does the sticker on the sign say?", "answer": "adolf hitler", "gt_answers": [ "Ial Bullying" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000013524.jpg", "model_name": "BLIP2" }, { "question": "What is the name of bakery prepared this food?", "answer": "person", "gt_answers": [ "sistema bakery" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000269738.jpg", "model_name": "BLIP2" }, { "question": "what line is this bus running?", "answer": "santa fe", "gt_answers": [ "Linea 102" ], "image_path": "./data/STVQA/VisualGenome/1/2361350.jpg", "model_name": "BLIP2" }, { "question": "What is the word on the plane?", "answer": "phoenix", "gt_answers": [ "Dynamic" ], "image_path": "./data/STVQA/VisualGenome/1/2318862.jpg", "model_name": "BLIP2" }, { "question": "What is written on the green sign?", "answer": "person", "gt_answers": [ "Jesus is the way" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000556969.jpg", "model_name": "BLIP2" }, { "question": "What does the man's helmet say?", "answer": "harley davidson", "gt_answers": [ "HBO" ], "image_path": "./data/STVQA/VisualGenome/1/2341000.jpg", "model_name": "BLIP2" }, { "question": "What city is on the white sign?", "answer": "person", "gt_answers": [ "New Delhi" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000540082.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store?", "answer": "vogue bar", "gt_answers": [ "Yogurt Bar" ], "image_path": "./data/STVQA/IIIT_text/4534.jpg", "model_name": "BLIP2" }, { "question": "who has a copywrite for this photo?", "answer": "the photographer", "gt_answers": [ "sassy mom's corner" ], "image_path": "./data/STVQA/VisualGenome/2/2416368.jpg", "model_name": "BLIP2" }, { "question": "What year is shown on the screen?", "answer": "2010", "gt_answers": [ "1939" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000007105.jpg", "model_name": "BLIP2" }, { "question": "what kind of nuts are in the blue packet?", "answer": "peanuts", "gt_answers": [ "MACADAMIA" ], "image_path": "./data/STVQA/imageNet/n03482405_8976.JPEG", "model_name": "BLIP2" }, { "question": "What warning does is written on the back of this truck?", "answer": "do not attempt to drive this vehicle", "gt_answers": [ "Caution Frequently Stopping Vehicle" ], "image_path": "./data/STVQA/VisualGenome/1/2316663.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the first aid kit on the wall?", "answer": "first aid only", "gt_answers": [ "Xpect" ], "image_path": "./data/STVQA/VisualGenome/2/379.jpg", "model_name": "BLIP2" }, { "question": "Who sponsored the match?", "answer": "red bull", "gt_answers": [ "HORNIG" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000233292.jpg", "model_name": "BLIP2" }, { "question": "What is the boat number?", "answer": "199", "gt_answers": [ "29911" ], "image_path": "./data/STVQA/imageNet/n02981792_13549.JPEG", "model_name": "BLIP2" }, { "question": "What website is written on the image?", "answer": "person", "gt_answers": [ "www.versaci.fr" ], "image_path": "./data/STVQA/IIIT_text/4803.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the appliance this woman is holding?", "answer": "crock pot", "gt_answers": [ "Crock Pot" ], "image_path": "./data/STVQA/imageNet/n03133878_2839.JPEG", "model_name": "BLIP2" }, { "question": "What is written here?", "answer": "person", "gt_answers": [ "BNP PARI" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000573248.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the pier?", "answer": "central pier", "gt_answers": [ "Central Pier", "CENTRAL" ], "image_path": "./data/STVQA/VisualGenome/1/2349953.jpg", "model_name": "BLIP2" }, { "question": "What is written on the bottom of the sign in blue?", "answer": "saigon", "gt_answers": [ "BA TRIEU" ], "image_path": "./data/STVQA/VisualGenome/2/2415487.jpg", "model_name": "BLIP2" }, { "question": "What climate is the tap associated with the refrigerator described as?", "answer": "cold", "gt_answers": [ "COLD" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000545200.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the train?", "answer": "0", "gt_answers": [ "45379" ], "image_path": "./data/STVQA/VisualGenome/2/2412681.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the coffee shop?", "answer": "starbucks", "gt_answers": [ "St. Marc Cafe" ], "image_path": "./data/STVQA/icdar/test_img_102.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the cafe?", "answer": "mr coffee", "gt_answers": [ "St. Marc Cafe" ], "image_path": "./data/STVQA/icdar/test_img_102.jpg", "model_name": "BLIP2" }, { "question": "What is written on the sign?", "answer": "mulberry street", "gt_answers": [ "TAYLOR ST MULBERRY ST" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000288131.jpg", "model_name": "BLIP2" }, { "question": "What is the headline of the poster (first line)?", "answer": "dead man talking", "gt_answers": [ "DEAD MAN TALKING" ], "image_path": "./data/STVQA/VisualGenome/1/2328739.jpg", "model_name": "BLIP2" }, { "question": "Who runs the campaign?", "answer": "dead man talking", "gt_answers": [ "Texas Department of Transportation" ], "image_path": "./data/STVQA/VisualGenome/1/2328739.jpg", "model_name": "BLIP2" }, { "question": "What is the team name on the bus?", "answer": "person", "gt_answers": [ "Melbourne Tigers" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000578070.jpg", "model_name": "BLIP2" }, { "question": "What is the city the team is from?", "answer": "melbourne", "gt_answers": [ "Melbourne" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000578070.jpg", "model_name": "BLIP2" }, { "question": "What city is this parking meter located in?", "answer": "chicago", "gt_answers": [ "Denver" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000415475.jpg", "model_name": "BLIP2" }, { "question": "What is written in the watermark?", "answer": "impala", "gt_answers": [ "iStockphoto" ], "image_path": "./data/STVQA/imageNet/n02423022_6531.JPEG", "model_name": "BLIP2" }, { "question": "What does the sign on the top right hand side read?", "answer": "dior", "gt_answers": [ "Dior On Stage" ], "image_path": "./data/STVQA/icdar/test_img_288.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the shop?", "answer": "asian food", "gt_answers": [ "Ben Ngii", "BEN NGIL" ], "image_path": "./data/STVQA/VisualGenome/1/2370376.jpg", "model_name": "BLIP2" }, { "question": "What company is on the umbrella?", "answer": "starbucks", "gt_answers": [ "Coca Cola" ], "image_path": "./data/STVQA/VisualGenome/1/2370376.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "x1", "gt_answers": [ "68" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000394139.jpg", "model_name": "BLIP2" }, { "question": "How many necklaces does the man have on?", "answer": "two", "gt_answers": [ "two", "1" ], "image_path": "./data/STVQA/VisualGenome/1/2319312.jpg", "model_name": "BLIP2" }, { "question": "What is written on the red traffic sign?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2371973.jpg", "model_name": "BLIP2" }, { "question": "`What is written on the yellow traffic sign?", "answer": "stop", "gt_answers": [ "Two way traffic" ], "image_path": "./data/STVQA/VisualGenome/1/2371973.jpg", "model_name": "BLIP2" }, { "question": "How much are items?", "answer": "they are cheap", "gt_answers": [ ".99\ufffd\ufffd" ], "image_path": "./data/STVQA/VisualGenome/1/2326851.jpg", "model_name": "BLIP2" }, { "question": "What is the store called?", "answer": "del taco", "gt_answers": [ "DELRIO" ], "image_path": "./data/STVQA/VisualGenome/1/2326851.jpg", "model_name": "BLIP2" }, { "question": "In which color bus people boarding?", "answer": "red", "gt_answers": [ "Blue", "{}" ], "image_path": "./data/STVQA/VisualGenome/1/2370002.jpg", "model_name": "BLIP2" }, { "question": "Where is this located?", "answer": "in a parade", "gt_answers": [ "St. John's" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000200080.jpg", "model_name": "BLIP2" }, { "question": "What name is pictured?", "answer": "person", "gt_answers": [ "JOHN" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000200080.jpg", "model_name": "BLIP2" }, { "question": "What four letters can you see after \"St. John's\"?", "answer": "person", "gt_answers": [ "Epis" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000200080.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the bus station?", "answer": "2", "gt_answers": [ "11" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000169640.jpg", "model_name": "BLIP2" }, { "question": "What is the technology company on the train?", "answer": "person", "gt_answers": [ "Samsung" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000283884.jpg", "model_name": "BLIP2" }, { "question": "What is the letters on the tail of the plane?", "answer": "japan airlines", "gt_answers": [ "ana" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000362130.jpg", "model_name": "BLIP2" }, { "question": "What country does the plane work for?", "answer": "japan", "gt_answers": [ "japan" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000362130.jpg", "model_name": "BLIP2" }, { "question": "What three letters are on the plane in the background?", "answer": "aa", "gt_answers": [ "ana" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000362130.jpg", "model_name": "BLIP2" }, { "question": "What word in white starts with the letter C?", "answer": "chainmail", "gt_answers": [ "Crusade" ], "image_path": "./data/STVQA/imageNet/n03000247_9278.JPEG", "model_name": "BLIP2" }, { "question": "what are the 2 words on the book, that are hand written?", "answer": "sheep lamb", "gt_answers": [ "sheep barn" ], "image_path": "./data/STVQA/VisualGenome/1/2351440.jpg", "model_name": "BLIP2" }, { "question": "what is the 1 word written on the book that is printed by a computer?", "answer": "sheep", "gt_answers": [ "record" ], "image_path": "./data/STVQA/VisualGenome/1/2351440.jpg", "model_name": "BLIP2" }, { "question": "What number is on the train engine.", "answer": "0", "gt_answers": [ "52" ], "image_path": "./data/STVQA/VisualGenome/2/2402607.jpg", "model_name": "BLIP2" }, { "question": "what are the 2 words written on the face of the clock?", "answer": "roman numerals", "gt_answers": [ "Pub Restauarant" ], "image_path": "./data/STVQA/VisualGenome/1/2376030.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store?", "answer": "bata", "gt_answers": [ "BATA" ], "image_path": "./data/STVQA/IIIT_text/img_000148.jpg", "model_name": "BLIP2" }, { "question": "What city name is written on the building?", "answer": "melbourne", "gt_answers": [ "London" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000339074.jpg", "model_name": "BLIP2" }, { "question": "what street is listed on the right?", "answer": "fifth avenue", "gt_answers": [ "fifth ave" ], "image_path": "./data/STVQA/VisualGenome/1/2372471.jpg", "model_name": "BLIP2" }, { "question": "What is the white text on the dog's collar?", "answer": "schnauzer", "gt_answers": [ "WOOF" ], "image_path": "./data/STVQA/imageNet/n02097209_6366.JPEG", "model_name": "BLIP2" }, { "question": "What is the word at the top of the can that starts with M?", "answer": "mountain", "gt_answers": [ "MENOS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000422828.jpg", "model_name": "BLIP2" }, { "question": "what is the first word on the sign?", "answer": "old", "gt_answers": [ "ragnar" ], "image_path": "./data/STVQA/VisualGenome/2/2411905.jpg", "model_name": "BLIP2" }, { "question": "What number range is located on the bottom left?", "answer": "m to l", "gt_answers": [ "6-11", "6-11 KG" ], "image_path": "./data/STVQA/imageNet/n03188531_15208.JPEG", "model_name": "BLIP2" }, { "question": "What is the number worn by the player in red?", "answer": "4", "gt_answers": [ "12" ], "image_path": "./data/STVQA/VisualGenome/2/2408044.jpg", "model_name": "BLIP2" }, { "question": "What is the number worn by the player in orange?", "answer": "4", "gt_answers": [ "34" ], "image_path": "./data/STVQA/VisualGenome/2/2408044.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the photo agency?", "answer": "stock photo", "gt_answers": [ "Foto S.A." ], "image_path": "./data/STVQA/imageNet/n02786058_6977.JPEG", "model_name": "BLIP2" }, { "question": "What does Foto S.A. do?", "answer": "stock photography", "gt_answers": [ "Photo and servicing agency" ], "image_path": "./data/STVQA/imageNet/n02786058_6977.JPEG", "model_name": "BLIP2" }, { "question": "What company owns this photo?", "answer": "getty images", "gt_answers": [ "Foto S.A." ], "image_path": "./data/STVQA/imageNet/n02786058_6977.JPEG", "model_name": "BLIP2" }, { "question": "what is the name of the street?", "answer": "third street", "gt_answers": [ "Third Street Promenade", "THIRD STREET" ], "image_path": "./data/STVQA/VisualGenome/1/2365141.jpg", "model_name": "BLIP2" }, { "question": "What type of vehicle is allowed on the right most lane?", "answer": "buses", "gt_answers": [ "Buses only" ], "image_path": "./data/STVQA/VisualGenome/1/2365141.jpg", "model_name": "BLIP2" }, { "question": "what is this a picture of?", "answer": "bridge", "gt_answers": [ "bridge", "CLIFDEN SUSPENSION BRIDGE" ], "image_path": "./data/STVQA/imageNet/n04366367_1851.JPEG", "model_name": "BLIP2" }, { "question": "What company makes the truck depicted in the photo?", "answer": "volvo", "gt_answers": [ "scania" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000545676.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the door?", "answer": "3", "gt_answers": [ "1105" ], "image_path": "./data/STVQA/VisualGenome/2/2401207.jpg", "model_name": "BLIP2" }, { "question": "What does the red sign say to do?", "answer": "stop", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/VisualGenome/1/2334235.jpg", "model_name": "BLIP2" }, { "question": "What counrry is mentioned in thebtext?", "answer": "india", "gt_answers": [ "India" ], "image_path": "./data/STVQA/IIIT_text/img_000006.jpg", "model_name": "BLIP2" }, { "question": "What word is used at least four times in this picture?", "answer": "person", "gt_answers": [ "up" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000426853.jpg", "model_name": "BLIP2" }, { "question": "What is the phone number for Arial Equip.?", "answer": "person", "gt_answers": [ "877.734.8400" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000426853.jpg", "model_name": "BLIP2" }, { "question": "What word is abbreviated in this picture?", "answer": "person", "gt_answers": [ "equip." ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000426853.jpg", "model_name": "BLIP2" }, { "question": "What year was this photo taken according to the dateline?", "answer": "2012", "gt_answers": [ "2006" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000520654.jpg", "model_name": "BLIP2" }, { "question": "What brand is the beer?", "answer": "stella artois", "gt_answers": [ "Stella Artois" ], "image_path": "./data/STVQA/VisualGenome/1/2341019.jpg", "model_name": "BLIP2" }, { "question": "What brand is the remote control?", "answer": "sony", "gt_answers": [ "LG" ], "image_path": "./data/STVQA/VisualGenome/1/2341019.jpg", "model_name": "BLIP2" }, { "question": "Which sport is he playing?", "answer": "tennis", "gt_answers": [ "Tennis" ], "image_path": "./data/STVQA/VisualGenome/1/2320471.jpg", "model_name": "BLIP2" }, { "question": "What gender is the player?", "answer": "male", "gt_answers": [ "Male" ], "image_path": "./data/STVQA/VisualGenome/1/2320471.jpg", "model_name": "BLIP2" }, { "question": "What is he holding in his hand?", "answer": "towel", "gt_answers": [ "Towel" ], "image_path": "./data/STVQA/VisualGenome/1/2320471.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store with the red background?", "answer": "joy q", "gt_answers": [ "The Gift Shop" ], "image_path": "./data/STVQA/icdar/img_944.jpg", "model_name": "BLIP2" }, { "question": "What type of services are offered from the store with the yellow store sign?", "answer": "beauty salon", "gt_answers": [ "Hair salon" ], "image_path": "./data/STVQA/icdar/img_944.jpg", "model_name": "BLIP2" }, { "question": "What is the \"J\" word in the upper right corner?", "answer": "joy", "gt_answers": [ "Joy" ], "image_path": "./data/STVQA/icdar/img_944.jpg", "model_name": "BLIP2" }, { "question": "how much seter on bus", "answer": "seter on bus", "gt_answers": [ "47" ], "image_path": "./data/STVQA/VisualGenome/1/2332782.jpg", "model_name": "BLIP2" }, { "question": "what is the name of bus", "answer": "luxusbus", "gt_answers": [ "luksusbuss" ], "image_path": "./data/STVQA/VisualGenome/1/2332782.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the larger book under the computer?", "answer": "cat in the hat", "gt_answers": [ "great houses of britain" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000053479.jpg", "model_name": "BLIP2" }, { "question": "How many women are featured in the black book", "answer": "two", "gt_answers": [ "100" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000053479.jpg", "model_name": "BLIP2" }, { "question": "What kind of spider is this?", "answer": "wolf spider", "gt_answers": [ "wolf spider" ], "image_path": "./data/STVQA/imageNet/n01775062_7047.JPEG", "model_name": "BLIP2" }, { "question": "What team does the pitcher pitch for?", "answer": "new york mets", "gt_answers": [ "Camden" ], "image_path": "./data/STVQA/VisualGenome/1/2373332.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the building", "answer": "the buttery", "gt_answers": [ "THE BUTTERY" ], "image_path": "./data/STVQA/VisualGenome/1/2345215.jpg", "model_name": "BLIP2" }, { "question": "What is written on the white board", "answer": "the buttery", "gt_answers": [ "ONE WAY" ], "image_path": "./data/STVQA/VisualGenome/1/2345215.jpg", "model_name": "BLIP2" }, { "question": "What is written on the side of the suitcase?", "answer": "fragile", "gt_answers": [ "THIS SIDE UP FRAGILE" ], "image_path": "./data/STVQA/VisualGenome/1/2345924.jpg", "model_name": "BLIP2" }, { "question": "What number is on the corner of the box?", "answer": "5", "gt_answers": [ "5" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000009369.jpg", "model_name": "BLIP2" }, { "question": "What business is listed in the bottom right corner?", "answer": "person", "gt_answers": [ "photography", "PETE LABROZZI PHOTOGRAPHY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000128706.jpg", "model_name": "BLIP2" }, { "question": "What does the red and white banner read?", "answer": "i'm in the office", "gt_answers": [ "FORDHAM" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000541491.jpg", "model_name": "BLIP2" }, { "question": "What is written in white on the blue motorcycle?", "answer": "berlin", "gt_answers": [ "Milka" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000511752.jpg", "model_name": "BLIP2" }, { "question": "What is the sign showing you how to wash?", "answer": "how to brush your teeth", "gt_answers": [ "Hands" ], "image_path": "./data/STVQA/VisualGenome/1/2316925.jpg", "model_name": "BLIP2" }, { "question": "What is written on the front of the building?", "answer": "blue sky", "gt_answers": [ "parachute air club" ], "image_path": "./data/STVQA/VisualGenome/2/2403255.jpg", "model_name": "BLIP2" }, { "question": "whats the name of the building?", "answer": "person", "gt_answers": [ "radio city" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000193867.jpg", "model_name": "BLIP2" }, { "question": "what is the engine no.?", "answer": "no", "gt_answers": [ "L.150" ], "image_path": "./data/STVQA/VisualGenome/1/2368148.jpg", "model_name": "BLIP2" }, { "question": "What is the number of train?", "answer": "3", "gt_answers": [ "6" ], "image_path": "./data/STVQA/VisualGenome/1/2374709.jpg", "model_name": "BLIP2" }, { "question": "What is the white text on the red sign?", "answer": "apollo hospital", "gt_answers": [ "24 HOURS EMERGENCY" ], "image_path": "./data/STVQA/IIIT_text/img_000137.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store?", "answer": "jen's take home", "gt_answers": [ "Jeni's Take Home" ], "image_path": "./data/STVQA/VisualGenome/1/2375180.jpg", "model_name": "BLIP2" }, { "question": "What numbers are displayed on the right of the pitcher?", "answer": "0", "gt_answers": [ "326" ], "image_path": "./data/STVQA/imageNet/n09835506_4745.JPEG", "model_name": "BLIP2" }, { "question": "What company is in the advertisement on the pitcher's left?", "answer": "nike", "gt_answers": [ "AIG" ], "image_path": "./data/STVQA/imageNet/n09835506_4745.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the building", "answer": "person", "gt_answers": [ "SIBLEY WAREHOUSE & STORAGE COS FIRE PROOF WAREHOUSE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000105468.jpg", "model_name": "BLIP2" }, { "question": "What is written on the sign?", "answer": "all way", "gt_answers": [ "Stop All-Way" ], "image_path": "./data/STVQA/VisualGenome/1/2340998.jpg", "model_name": "BLIP2" }, { "question": "What is on the license plate on the motorcycle?", "answer": "san diego", "gt_answers": [ "HAIRDR" ], "image_path": "./data/STVQA/VisualGenome/2/2410172.jpg", "model_name": "BLIP2" }, { "question": "what is the box?", "answer": "a calculator", "gt_answers": [ "Phone" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000047904.jpg", "model_name": "BLIP2" }, { "question": "what is the phone for?", "answer": "it's a calculator", "gt_answers": [ "Calling for", "CALLING" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000047904.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the TV show?", "answer": "tv show", "gt_answers": [ "M*A*S*H" ], "image_path": "./data/STVQA/VisualGenome/1/2344777.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate of the green vehicle?", "answer": "tahoe", "gt_answers": [ "PAF808" ], "image_path": "./data/STVQA/imageNet/n03594945_5234.JPEG", "model_name": "BLIP2" }, { "question": "What does it say on the yellow sign?", "answer": "no parking", "gt_answers": [ "Danger Keep Away" ], "image_path": "./data/STVQA/imageNet/n03594945_5234.JPEG", "model_name": "BLIP2" }, { "question": "What kind of simple trip planner", "answer": "a cell phone", "gt_answers": [ "Metro" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000227568.jpg", "model_name": "BLIP2" }, { "question": "what is written in the start box", "answer": "lg g2", "gt_answers": [ "Laist hq" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000227568.jpg", "model_name": "BLIP2" }, { "question": "what is written in the end box", "answer": "lg cell phone", "gt_answers": [ "7th/grand" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000227568.jpg", "model_name": "BLIP2" }, { "question": "How many minutes do you get for $1.00?", "answer": "30 minutes", "gt_answers": [ "3 MINUTOS" ], "image_path": "./data/STVQA/imageNet/n03902125_5633.JPEG", "model_name": "BLIP2" }, { "question": "What is the name on the side of the plane?", "answer": "air transat", "gt_answers": [ "Air Transat" ], "image_path": "./data/STVQA/VisualGenome/1/2373794.jpg", "model_name": "BLIP2" }, { "question": "Where is the bus going?", "answer": "to the next stop", "gt_answers": [ "Downtown" ], "image_path": "./data/STVQA/VisualGenome/2/2403728.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS DISPLAYED ON THE WHITE JACKETS?", "answer": "ems", "gt_answers": [ "POLIZIA" ], "image_path": "./data/STVQA/imageNet/n04336792_15017.JPEG", "model_name": "BLIP2" }, { "question": "WHAT IS THE NUMBER DISPLAYED ON LEFT SIDE OF THE RED VEHICLE?", "answer": "3", "gt_answers": [ "115" ], "image_path": "./data/STVQA/imageNet/n04336792_15017.JPEG", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTEN ON THE LEFT SIDE OF THE RED VEHICLE?", "answer": "ems", "gt_answers": [ "VIGILI DEL FUOCO 115", "VIGILI DEL FUOCO" ], "image_path": "./data/STVQA/imageNet/n04336792_15017.JPEG", "model_name": "BLIP2" }, { "question": "What is the first word on the bus on the left", "answer": "person", "gt_answers": [ "sebastian" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000526922.jpg", "model_name": "BLIP2" }, { "question": "What type is the first word of the road this building is on?", "answer": "road", "gt_answers": [ "LANE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000552791.jpg", "model_name": "BLIP2" }, { "question": "What is the second word of the road (Lane [ ] Rd)?", "answer": "lane", "gt_answers": [ "HEAD" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000552791.jpg", "model_name": "BLIP2" }, { "question": "Is this building on a ST or RD?", "answer": "st", "gt_answers": [ "RD" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000552791.jpg", "model_name": "BLIP2" }, { "question": "What date was this image taken?", "answer": "the date is unknown", "gt_answers": [ "2003 6 8" ], "image_path": "./data/STVQA/VisualGenome/1/2318872.jpg", "model_name": "BLIP2" }, { "question": "What is the first word written on the white background on the bus?", "answer": "person", "gt_answers": [ "classic" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000456416.jpg", "model_name": "BLIP2" }, { "question": "What country does the sign welcome the driver to?", "answer": "canada", "gt_answers": [ "Toronto" ], "image_path": "./data/STVQA/VisualGenome/1/2371622.jpg", "model_name": "BLIP2" }, { "question": "What brand of chocoate is sold in the image?", "answer": "cadbury dairy milk", "gt_answers": [ "Cadbury" ], "image_path": "./data/STVQA/icdar/test_img_183.jpg", "model_name": "BLIP2" }, { "question": "What is the babies name?", "answer": "no name", "gt_answers": [ "Schwaller" ], "image_path": "./data/STVQA/imageNet/n02804414_8548.JPEG", "model_name": "BLIP2" }, { "question": "What sale is advertised for the store in the image?", "answer": "50% off sale", "gt_answers": [ "40%" ], "image_path": "./data/STVQA/icdar/test_img_427.jpg", "model_name": "BLIP2" }, { "question": "What is the Norway country code?", "answer": "0047", "gt_answers": [ "nor" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000061293.jpg", "model_name": "BLIP2" }, { "question": "What is the set of letters before NOR?", "answer": "person", "gt_answers": [ "dnb" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000061293.jpg", "model_name": "BLIP2" }, { "question": "What set of letters has two capitals and one lowercase?", "answer": "person", "gt_answers": [ "dnb" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000061293.jpg", "model_name": "BLIP2" }, { "question": "What kid of train service is in the picture?", "answer": "freight train", "gt_answers": [ "railpool" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000494768.jpg", "model_name": "BLIP2" }, { "question": "Which train company is being used?", "answer": "person", "gt_answers": [ "txlogistik" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000494768.jpg", "model_name": "BLIP2" }, { "question": "What is the train's \"plate\" number?", "answer": "nr nr", "gt_answers": [ "185 693-9" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000494768.jpg", "model_name": "BLIP2" }, { "question": "What type of vehicle is this?", "answer": "taxi", "gt_answers": [ "Taxi" ], "image_path": "./data/STVQA/imageNet/n02930766_33429.JPEG", "model_name": "BLIP2" }, { "question": "What is this taxi's number?", "answer": "788", "gt_answers": [ "788" ], "image_path": "./data/STVQA/imageNet/n02930766_33429.JPEG", "model_name": "BLIP2" }, { "question": "What is the brand of hot sauce?", "answer": "person", "gt_answers": [ "TABASCO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000157639.jpg", "model_name": "BLIP2" }, { "question": "What is an exciting experience?", "answer": "a shopping mall", "gt_answers": [ "Shopping" ], "image_path": "./data/STVQA/icdar/test_img_70.jpg", "model_name": "BLIP2" }, { "question": "Under the sign banning cars and motorcycles, what is the word printed at the very bottom?", "answer": "no", "gt_answers": [ "SALLITTU" ], "image_path": "./data/STVQA/VisualGenome/1/2362100.jpg", "model_name": "BLIP2" }, { "question": "What is the street on the street sign?", "answer": "k street", "gt_answers": [ "K ST NW" ], "image_path": "./data/STVQA/IIIT_text/440.jpg", "model_name": "BLIP2" }, { "question": "I they go left, what Altitude would they reach?", "answer": "3000 feet", "gt_answers": [ "Altitude 2000", "2000" ], "image_path": "./data/STVQA/VisualGenome/1/2371568.jpg", "model_name": "BLIP2" }, { "question": "What logo is present in the picture?", "answer": "at&t", "gt_answers": [ "Johnson Controls" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000014762.jpg", "model_name": "BLIP2" }, { "question": "what is the name of this airline?", "answer": "air china", "gt_answers": [ "CHINA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000259553.jpg", "model_name": "BLIP2" }, { "question": "What does the baseball players jersey say?", "answer": "texas rangers", "gt_answers": [ "Wagner 35" ], "image_path": "./data/STVQA/VisualGenome/1/2334922.jpg", "model_name": "BLIP2" }, { "question": "What is the brand in the packages?", "answer": "person", "gt_answers": [ "Starkist" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000248191.jpg", "model_name": "BLIP2" }, { "question": "What does the street sign say?", "answer": "yield", "gt_answers": [ "YIELD" ], "image_path": "./data/STVQA/VisualGenome/2/1229.jpg", "model_name": "BLIP2" }, { "question": "What costs 3.99?", "answer": "oranges", "gt_answers": [ "clementines" ], "image_path": "./data/STVQA/VisualGenome/2/2414544.jpg", "model_name": "BLIP2" }, { "question": "What number is on the jersey of the girl holding the ball?", "answer": "14", "gt_answers": [ "14" ], "image_path": "./data/STVQA/imageNet/n02802426_18069.JPEG", "model_name": "BLIP2" }, { "question": "What is the number on the jersey of the player holding the ball?", "answer": "14", "gt_answers": [ "14" ], "image_path": "./data/STVQA/imageNet/n02802426_18069.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the hotel?", "answer": "hilton garden inn", "gt_answers": [ "Marriott" ], "image_path": "./data/STVQA/IIIT_text/img_000741.jpg", "model_name": "BLIP2" }, { "question": "What city is this sign for?", "answer": "manhattan", "gt_answers": [ "Manhattan" ], "image_path": "./data/STVQA/VisualGenome/2/2401009.jpg", "model_name": "BLIP2" }, { "question": "Where does the road west lead?", "answer": "manhattan", "gt_answers": [ "Canal St" ], "image_path": "./data/STVQA/VisualGenome/2/2401009.jpg", "model_name": "BLIP2" }, { "question": "Where does the road north lead?", "answer": "manhattan", "gt_answers": [ "Bowery" ], "image_path": "./data/STVQA/VisualGenome/2/2401009.jpg", "model_name": "BLIP2" }, { "question": "What is written on the blue sign behind the elephant?", "answer": "the elephant", "gt_answers": [ "San Ramosa" ], "image_path": "./data/STVQA/VisualGenome/2/2402498.jpg", "model_name": "BLIP2" }, { "question": "What number is on the paper?", "answer": "20", "gt_answers": [ "20" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000556162.jpg", "model_name": "BLIP2" }, { "question": "What number is on the fin of the lead plane?", "answer": "2", "gt_answers": [ "1" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000246982.jpg", "model_name": "BLIP2" }, { "question": "What number is on the fin of the back plane?", "answer": "91", "gt_answers": [ "7" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000246982.jpg", "model_name": "BLIP2" }, { "question": "what number is given on flight", "answer": "flight number", "gt_answers": [ "N361PH" ], "image_path": "./data/STVQA/VisualGenome/1/2354005.jpg", "model_name": "BLIP2" }, { "question": "What company made the sewing machine?", "answer": "kenmore", "gt_answers": [ "brother" ], "image_path": "./data/STVQA/imageNet/n04179913_1798.JPEG", "model_name": "BLIP2" }, { "question": "What Number is on the Television?", "answer": "3", "gt_answers": [ "10" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000272741.jpg", "model_name": "BLIP2" }, { "question": "What is the name in the upper left corner of the image?", "answer": "johnny d", "gt_answers": [ "Angel Soler" ], "image_path": "./data/STVQA/imageNet/n02804610_19486.JPEG", "model_name": "BLIP2" }, { "question": "What is the first name of the tag?", "answer": "person", "gt_answers": [ "Phillip" ], "image_path": "./data/STVQA/imageNet/n01496331_11875.JPEG", "model_name": "BLIP2" }, { "question": "what does this sign say?", "answer": "stop", "gt_answers": [ "STOP ALL WAY", "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2315841.jpg", "model_name": "BLIP2" }, { "question": "what should a person do when seeing this sign?", "answer": "stop", "gt_answers": [ "STOP ALL WAY", "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2315841.jpg", "model_name": "BLIP2" }, { "question": "What does this sign represent?", "answer": "stop", "gt_answers": [ "STOP ALL WAY", "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2315841.jpg", "model_name": "BLIP2" }, { "question": "What is being celebrated?", "answer": "birthday", "gt_answers": [ "BIRTHDAY" ], "image_path": "./data/STVQA/VisualGenome/1/2374826.jpg", "model_name": "BLIP2" }, { "question": "What body organ is mentioned on the banner in the background?", "answer": "liver", "gt_answers": [ "Brains" ], "image_path": "./data/STVQA/VisualGenome/1/2374804.jpg", "model_name": "BLIP2" }, { "question": "WHAT MOVIE IS THE POSTER OF?", "answer": "the matrix", "gt_answers": [ "firefox" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000035094.jpg", "model_name": "BLIP2" }, { "question": "WHO IS THE STAR IN THE MOVIE POSTER?", "answer": "person", "gt_answers": [ "clint eastwood" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000035094.jpg", "model_name": "BLIP2" }, { "question": "WHO'S FIRST NAME IS ON WALL?", "answer": "person", "gt_answers": [ "noah" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000035094.jpg", "model_name": "BLIP2" }, { "question": "What colour is the woman's bag?", "answer": "brown", "gt_answers": [ "Blue", "{}" ], "image_path": "./data/STVQA/VisualGenome/1/713352.jpg", "model_name": "BLIP2" }, { "question": "What popular body spray brand is printed on the sleeves of each player?", "answer": "person", "gt_answers": [ "AXE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000054375.jpg", "model_name": "BLIP2" }, { "question": "What letters are printed in the circle on each players hat?", "answer": "person", "gt_answers": [ "RC" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000054375.jpg", "model_name": "BLIP2" }, { "question": "What is the emblem name on the shirt's white sleeve?", "answer": "person", "gt_answers": [ "XCEL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000340331.jpg", "model_name": "BLIP2" }, { "question": "what is the first word on the black shirt?", "answer": "man", "gt_answers": [ "rock" ], "image_path": "./data/STVQA/imageNet/n03141823_1627.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the airline?", "answer": "esta", "gt_answers": [ "estafeta" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000284333.jpg", "model_name": "BLIP2" }, { "question": "WHAT ISN WRITTEN IN FRONT OF VEHICLE", "answer": "mustang", "gt_answers": [ "SUNSET CLASSICS", "SUNSETCLASSICS" ], "image_path": "./data/STVQA/imageNet/n03459775_12633.JPEG", "model_name": "BLIP2" }, { "question": "What is the label above the sweets on the left?", "answer": "mcdonalds", "gt_answers": [ "Mentos" ], "image_path": "./data/STVQA/icdar/img_669.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the motorcycle's shock absorber?", "answer": "person", "gt_answers": [ "YAMAHA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000323125.jpg", "model_name": "BLIP2" }, { "question": "What number is shown on the motorcycle?", "answer": "person", "gt_answers": [ "74" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000323125.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate of the purple bike?", "answer": "person", "gt_answers": [ "7466" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000373700.jpg", "model_name": "BLIP2" }, { "question": "What is the last name of the batter?", "answer": "person", "gt_answers": [ "Gomez" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000551372.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the train engine?", "answer": "person", "gt_answers": [ "1040" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000321302.jpg", "model_name": "BLIP2" }, { "question": "What is the sign says?", "answer": "no parking", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/VisualGenome/2/2413686.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the white sign?", "answer": "forklifts not allowed", "gt_answers": [ "Vehicles parked entirely at owners risk." ], "image_path": "./data/STVQA/VisualGenome/2/2412939.jpg", "model_name": "BLIP2" }, { "question": "What numbers are on the bus?", "answer": "123", "gt_answers": [ "123" ], "image_path": "./data/STVQA/VisualGenome/2/2413520.jpg", "model_name": "BLIP2" }, { "question": "What is the license plat number fo the white four door car?", "answer": "the license plate number of the white four door car", "gt_answers": [ "DL9C JO912", "DL9C J0912" ], "image_path": "./data/STVQA/VisualGenome/1/2354951.jpg", "model_name": "BLIP2" }, { "question": "what is the longest display text shown?", "answer": "samsung", "gt_answers": [ "SAMSUNG" ], "image_path": "./data/STVQA/IIIT_text/img_001057.jpg", "model_name": "BLIP2" }, { "question": "What is the shortest display text displayed?", "answer": "", "gt_answers": [ "ING" ], "image_path": "./data/STVQA/IIIT_text/img_001057.jpg", "model_name": "BLIP2" }, { "question": "What four letters appear before bus?", "answer": "nyc", "gt_answers": [ "NYCT" ], "image_path": "./data/STVQA/VisualGenome/1/1591919.jpg", "model_name": "BLIP2" }, { "question": "What company does the truck belong to?", "answer": "person", "gt_answers": [ "Retrograde Ltd." ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000342132.jpg", "model_name": "BLIP2" }, { "question": "what percentage refer on top of fridge", "answer": "0 percent", "gt_answers": [ "0%" ], "image_path": "./data/STVQA/VisualGenome/1/2326710.jpg", "model_name": "BLIP2" }, { "question": "What does the shirt say?", "answer": "i'm with stupid", "gt_answers": [ "I'm with stupid" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000352182.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the airline?", "answer": "united airlines", "gt_answers": [ "UNITED" ], "image_path": "./data/STVQA/VisualGenome/1/2355482.jpg", "model_name": "BLIP2" }, { "question": "What name is written on the plane?", "answer": "united airlines", "gt_answers": [ "UNITED" ], "image_path": "./data/STVQA/VisualGenome/1/2355482.jpg", "model_name": "BLIP2" }, { "question": "Where is Capital Hill Neighborhood located?", "answer": "washington dc", "gt_answers": [ "Salt Lake City" ], "image_path": "./data/STVQA/VisualGenome/2/2404051.jpg", "model_name": "BLIP2" }, { "question": "What is another name for the street Bliss Ct.?", "answer": "bliss road", "gt_answers": [ "550 W." ], "image_path": "./data/STVQA/VisualGenome/2/2404051.jpg", "model_name": "BLIP2" }, { "question": "What is next to the insect to show their same size?", "answer": "toothpaste", "gt_answers": [ "Chapstick" ], "image_path": "./data/STVQA/imageNet/n02174001_2430.JPEG", "model_name": "BLIP2" }, { "question": "What word is in the red logo?", "answer": "1844", "gt_answers": [ "FULLER" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000011482.jpg", "model_name": "BLIP2" }, { "question": "What number is black?", "answer": "18", "gt_answers": [ "184" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000011482.jpg", "model_name": "BLIP2" }, { "question": "What number is red?", "answer": "18", "gt_answers": [ "100" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000011482.jpg", "model_name": "BLIP2" }, { "question": "What is the college name on the sweatshirt?", "answer": "person", "gt_answers": [ "FRANKLIN MARSHALL", "FRANKLIN & MARSHALL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000086524.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the sporting company that is on the red sign?", "answer": "person", "gt_answers": [ "MODELL'S" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000411934.jpg", "model_name": "BLIP2" }, { "question": "Modell's is an advertisement for what type of goods?", "answer": "cigarettes", "gt_answers": [ "SPORTING" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000411934.jpg", "model_name": "BLIP2" }, { "question": "What color is the exterior of the cardboard box?", "answer": "white", "gt_answers": [ "White", "{}" ], "image_path": "./data/STVQA/VisualGenome/1/2321857.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the top right ?", "answer": "lee kerker leather", "gt_answers": [ "IMA Webley Replica" ], "image_path": "./data/STVQA/imageNet/n03527444_23076.JPEG", "model_name": "BLIP2" }, { "question": "What is written on this car's licence plate?", "answer": "citroen", "gt_answers": [ "1415 DNZ" ], "image_path": "./data/STVQA/imageNet/n03670208_35397.JPEG", "model_name": "BLIP2" }, { "question": "When something is great you may call it this?", "answer": "grand", "gt_answers": [ "super" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000541900.jpg", "model_name": "BLIP2" }, { "question": "What number is on the black shirt?", "answer": "person", "gt_answers": [ "99" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000223299.jpg", "model_name": "BLIP2" }, { "question": "What brand is listed on the fencing?", "answer": "nike", "gt_answers": [ "SFR QUIKSILVER" ], "image_path": "./data/STVQA/VisualGenome/1/2366614.jpg", "model_name": "BLIP2" }, { "question": "Q: What does St. mean? A: Street", "answer": "talbot st shops & restaurants", "gt_answers": [ "st." ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000402847.jpg", "model_name": "BLIP2" }, { "question": "Q: What letter does the street start with? A: T", "answer": "talbot street", "gt_answers": [ "t" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000402847.jpg", "model_name": "BLIP2" }, { "question": "What is the name written on player's jersey?", "answer": "johnson", "gt_answers": [ "FISHER" ], "image_path": "./data/STVQA/VisualGenome/1/2350949.jpg", "model_name": "BLIP2" }, { "question": "What is the jersey number of player?", "answer": "23", "gt_answers": [ "46" ], "image_path": "./data/STVQA/VisualGenome/1/2350949.jpg", "model_name": "BLIP2" }, { "question": "What is the clothing brand?", "answer": "vans", "gt_answers": [ "LIFEGUARD" ], "image_path": "./data/STVQA/VisualGenome/1/2370412.jpg", "model_name": "BLIP2" }, { "question": "What is the year on the photo?", "answer": "2010", "gt_answers": [ "2005" ], "image_path": "./data/STVQA/imageNet/n02007558_6183.JPEG", "model_name": "BLIP2" }, { "question": "What is written in Wooden board", "answer": "elephant sanctuary", "gt_answers": [ "TRAINING CENTER" ], "image_path": "./data/STVQA/VisualGenome/1/2323337.jpg", "model_name": "BLIP2" }, { "question": "what is the number on the shirts?", "answer": "person", "gt_answers": [ "20" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000498706.jpg", "model_name": "BLIP2" }, { "question": "What is the number of skiers?", "answer": "three", "gt_answers": [ "20" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000498706.jpg", "model_name": "BLIP2" }, { "question": "What number is above TWIN PEAKS?", "answer": "2", "gt_answers": [ "200" ], "image_path": "./data/STVQA/VisualGenome/1/2320787.jpg", "model_name": "BLIP2" }, { "question": "What is the red street sign instructing you to do?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2320787.jpg", "model_name": "BLIP2" }, { "question": "what is the text in the blue sign?", "answer": "obama bid", "gt_answers": [ "OBAMA BIDEN" ], "image_path": "./data/STVQA/VisualGenome/2/2400245.jpg", "model_name": "BLIP2" }, { "question": "When does fonuts close?", "answer": "5 pm", "gt_answers": [ "6:00 PM" ], "image_path": "./data/STVQA/VisualGenome/1/2327263.jpg", "model_name": "BLIP2" }, { "question": "What does the red sign say?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2325398.jpg", "model_name": "BLIP2" }, { "question": "What numerical version of LIGHTROOM is on the computer screen?", "answer": "6", "gt_answers": [ "2" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000405250.jpg", "model_name": "BLIP2" }, { "question": "What word comes after taste?", "answer": "texture", "gt_answers": [ "NATURALLY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000420236.jpg", "model_name": "BLIP2" }, { "question": "what color is the woman's nail polish petting the dog?", "answer": "blue", "gt_answers": [ "Black" ], "image_path": "./data/STVQA/imageNet/n02110958_4249.JPEG", "model_name": "BLIP2" }, { "question": "What is the second word on the front of the player's jersey?", "answer": "person", "gt_answers": [ "VALLEY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000012434.jpg", "model_name": "BLIP2" }, { "question": "What type of crab is this?", "answer": "carabina maxima", "gt_answers": [ "Green Crab" ], "image_path": "./data/STVQA/imageNet/n01978455_8146.JPEG", "model_name": "BLIP2" }, { "question": "How many players are on the baseball field?", "answer": "two", "gt_answers": [ "Three", "two" ], "image_path": "./data/STVQA/VisualGenome/2/2401837.jpg", "model_name": "BLIP2" }, { "question": "What does the side of the truck say?", "answer": "shell", "gt_answers": [ "Shell" ], "image_path": "./data/STVQA/VisualGenome/2/2401021.jpg", "model_name": "BLIP2" }, { "question": "What is the price of the guillotine in Euro?", "answer": "it costs 99 euros", "gt_answers": [ "99" ], "image_path": "./data/STVQA/imageNet/n03467068_19017.JPEG", "model_name": "BLIP2" }, { "question": "How much does the tractor cost in the picture?", "answer": "it cost $600", "gt_answers": [ "653.2" ], "image_path": "./data/STVQA/imageNet/n04465501_4826.JPEG", "model_name": "BLIP2" }, { "question": "Where is Parkway motor co located?", "answer": "indianapolis, indiana", "gt_answers": [ "1065 Wisconsin ave" ], "image_path": "./data/STVQA/imageNet/n04465501_4826.JPEG", "model_name": "BLIP2" }, { "question": "What is the number on the side of the train?", "answer": "ms stl", "gt_answers": [ "654" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000200369.jpg", "model_name": "BLIP2" }, { "question": "What is the website watermarked in the bottom right corner of the image?", "answer": "rossbach", "gt_answers": [ "meine-reiseberichte.net" ], "image_path": "./data/STVQA/VisualGenome/1/2353514.jpg", "model_name": "BLIP2" }, { "question": "WHAT LETTERS ARE WRITTEN ON THE BIG WHITE BOX?", "answer": "person", "gt_answers": [ "ENEL ENEL", "Enel" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000534513.jpg", "model_name": "BLIP2" }, { "question": "HOW MANY TIMES IS THE WORD FedEx WRITTEN ON THE CHAIR?", "answer": "two", "gt_answers": [ "FED EX 5 TIMES", "5" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000534513.jpg", "model_name": "BLIP2" }, { "question": "WHAT COLORS ARE THE LETTERS, COH?", "answer": "blue and white", "gt_answers": [ "COH RED", "red" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000534513.jpg", "model_name": "BLIP2" }, { "question": "What type of formula is this?", "answer": "infant formula", "gt_answers": [ "Original" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000002160.jpg", "model_name": "BLIP2" }, { "question": "What is the hashtag on the computer?", "answer": "tfw", "gt_answers": [ "#apprentice" ], "image_path": "./data/STVQA/VisualGenome/1/2340768.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the airline that operate the plane?", "answer": "virgin airlines", "gt_answers": [ "Virgin America" ], "image_path": "./data/STVQA/VisualGenome/1/1593018.jpg", "model_name": "BLIP2" }, { "question": "What is the first brand on the sign?", "answer": "adidas", "gt_answers": [ "Sprint" ], "image_path": "./data/STVQA/VisualGenome/1/2370792.jpg", "model_name": "BLIP2" }, { "question": "What words are in yellow paint, on the sign?", "answer": "benidorm", "gt_answers": [ "Bienvenido A Mal Pais" ], "image_path": "./data/STVQA/VisualGenome/2/2414939.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the sign", "answer": "reserved", "gt_answers": [ "Rotunda" ], "image_path": "./data/STVQA/imageNet/n02892201_33320.JPEG", "model_name": "BLIP2" }, { "question": "What does the tennis bag have written on it?", "answer": "nike", "gt_answers": [ "Babolat" ], "image_path": "./data/STVQA/VisualGenome/1/2331695.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE NAME OF THIS SHOP?", "answer": "starbucks", "gt_answers": [ "OSAKA OHSHO" ], "image_path": "./data/STVQA/icdar/img_308.jpg", "model_name": "BLIP2" }, { "question": "What airline does this plane belong to?", "answer": "person", "gt_answers": [ "sas" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000378586.jpg", "model_name": "BLIP2" }, { "question": "What airline name is on the plane?", "answer": "person", "gt_answers": [ "sas" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000378586.jpg", "model_name": "BLIP2" }, { "question": "What three letters on the aircraft stand for Scandinavian Airline System?", "answer": "sas", "gt_answers": [ "sas" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000378586.jpg", "model_name": "BLIP2" }, { "question": "What sponsor is advertised in the background wall?", "answer": "adidas", "gt_answers": [ "MERCEDES-BENZ" ], "image_path": "./data/STVQA/VisualGenome/1/2324911.jpg", "model_name": "BLIP2" }, { "question": "What is the name of bank shown ?", "answer": "citibank", "gt_answers": [ "Chase" ], "image_path": "./data/STVQA/VisualGenome/2/2418.jpg", "model_name": "BLIP2" }, { "question": "What kind of market is on the right?", "answer": "fish market", "gt_answers": [ "farmers ma" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000042371.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the toy on the box?", "answer": "space shuttle booster rockets", "gt_answers": [ "Space Shuttle & Booster Rockets" ], "image_path": "./data/STVQA/imageNet/n04266014_2940.JPEG", "model_name": "BLIP2" }, { "question": "What chocolate kept in the plate?", "answer": "milk chocolate", "gt_answers": [ "Kitkat", "Kit Kat" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000226075.jpg", "model_name": "BLIP2" }, { "question": "What word is on the child's shirt?", "answer": "baseball", "gt_answers": [ "Mets" ], "image_path": "./data/STVQA/VisualGenome/1/2336239.jpg", "model_name": "BLIP2" }, { "question": "What letter is on the child's hat?", "answer": "m", "gt_answers": [ "M" ], "image_path": "./data/STVQA/VisualGenome/1/2336239.jpg", "model_name": "BLIP2" }, { "question": "what vitamins does it give you?", "answer": "a, b, c, d, e, f, g, h, i, j, k", "gt_answers": [ "A & C" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000011452.jpg", "model_name": "BLIP2" }, { "question": "What is the word on the yellow license plate?", "answer": "audi", "gt_answers": [ "KEYES" ], "image_path": "./data/STVQA/imageNet/n02814533_7283.JPEG", "model_name": "BLIP2" }, { "question": "How many days are mentioned?", "answer": "two", "gt_answers": [ "7" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000425685.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the bank?", "answer": "person", "gt_answers": [ "ASB" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000425685.jpg", "model_name": "BLIP2" }, { "question": "What is written on the front of the stand?", "answer": "newsagency", "gt_answers": [ "Newsagency" ], "image_path": "./data/STVQA/IIIT_text/img_000854.jpg", "model_name": "BLIP2" }, { "question": "What is written around the table in the front of the stand?", "answer": "newsagency", "gt_answers": [ "Book Sale" ], "image_path": "./data/STVQA/IIIT_text/img_000854.jpg", "model_name": "BLIP2" }, { "question": "Wich numbers are in the top of the bus?", "answer": "0 0", "gt_answers": [ "41 18" ], "image_path": "./data/STVQA/VisualGenome/1/2359833.jpg", "model_name": "BLIP2" }, { "question": "What words are written in pink letters on the window?", "answer": "prada", "gt_answers": [ "PINK PRESENTS" ], "image_path": "./data/STVQA/VisualGenome/2/2413006.jpg", "model_name": "BLIP2" }, { "question": "where is the red bus welcoming you?", "answer": "half moon bay", "gt_answers": [ "Half moon bay" ], "image_path": "./data/STVQA/VisualGenome/1/2349450.jpg", "model_name": "BLIP2" }, { "question": "what is the name of the pub advertised on the red bus?", "answer": "half moon bay", "gt_answers": [ "Cameron's bus", "CAMERON'S PUB" ], "image_path": "./data/STVQA/VisualGenome/1/2349450.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the drink?", "answer": "coca cola", "gt_answers": [ "Coca Cola" ], "image_path": "./data/STVQA/IIIT_text/img_000283.jpg", "model_name": "BLIP2" }, { "question": "What is the website called?", "answer": "mycoke", "gt_answers": [ "myenjoyzone.com" ], "image_path": "./data/STVQA/IIIT_text/img_000283.jpg", "model_name": "BLIP2" }, { "question": "how many cell phones are on the table?", "answer": "three", "gt_answers": [ "5" ], "image_path": "./data/STVQA/VisualGenome/1/2368825.jpg", "model_name": "BLIP2" }, { "question": "What word is on the sign next to the red light?", "answer": "bike", "gt_answers": [ "Second" ], "image_path": "./data/STVQA/VisualGenome/2/2411638.jpg", "model_name": "BLIP2" }, { "question": "What number is underneath the number 65?", "answer": "65", "gt_answers": [ "81" ], "image_path": "./data/STVQA/VisualGenome/2/2411638.jpg", "model_name": "BLIP2" }, { "question": "What two words are underneath the picture of the bike?", "answer": "bike lane", "gt_answers": [ "Bike Lane" ], "image_path": "./data/STVQA/VisualGenome/2/2411638.jpg", "model_name": "BLIP2" }, { "question": "How many windows are on Benny's Shop?", "answer": "three", "gt_answers": [ "3" ], "image_path": "./data/STVQA/VisualGenome/1/2374906.jpg", "model_name": "BLIP2" }, { "question": "What is the letter on the flog above O?", "answer": "e", "gt_answers": [ "E" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000033505.jpg", "model_name": "BLIP2" }, { "question": "the time?", "answer": "the cat is watching the game", "gt_answers": [ "5:52", "5:52 PM" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000257669.jpg", "model_name": "BLIP2" }, { "question": "What does the white and black right pointing sign say?", "answer": "tunis", "gt_answers": [ "Tunis" ], "image_path": "./data/STVQA/VisualGenome/1/2374735.jpg", "model_name": "BLIP2" }, { "question": "What does it say as a title in blue letters on the white sign?", "answer": "the dining hall", "gt_answers": [ "The Missing Man Table" ], "image_path": "./data/STVQA/VisualGenome/1/2367682.jpg", "model_name": "BLIP2" }, { "question": "What is the company in the photo?", "answer": "coco frio", "gt_answers": [ "Coco Frios" ], "image_path": "./data/STVQA/VisualGenome/1/2348197.jpg", "model_name": "BLIP2" }, { "question": "What is the web address of the company?", "answer": "com", "gt_answers": [ "www.myprofe.com" ], "image_path": "./data/STVQA/VisualGenome/1/2333295.jpg", "model_name": "BLIP2" }, { "question": "What airline is the airplane from?", "answer": "singapore airlines", "gt_answers": [ "Singapore Airlines" ], "image_path": "./data/STVQA/imageNet/n02690373_9218.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the green ensigns?", "answer": "prince", "gt_answers": [ "prince" ], "image_path": "./data/STVQA/VisualGenome/1/2324799.jpg", "model_name": "BLIP2" }, { "question": "What words are on the picture above the bed?", "answer": "i love you", "gt_answers": [ "A dream is a wish your heart makes." ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000433236.jpg", "model_name": "BLIP2" }, { "question": "What does the word on the building say?", "answer": "samsung", "gt_answers": [ "ED AR KAROTITI Samsung" ], "image_path": "./data/STVQA/IIIT_text/img_001077.jpg", "model_name": "BLIP2" }, { "question": "what is the blue bus number?", "answer": "person", "gt_answers": [ "50" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000002639.jpg", "model_name": "BLIP2" }, { "question": "What is the team name on the baseball jersey?", "answer": "indians", "gt_answers": [ "Indians" ], "image_path": "./data/STVQA/VisualGenome/1/2362864.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name listed on the white controller?", "answer": "person", "gt_answers": [ "SONY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000259790.jpg", "model_name": "BLIP2" }, { "question": "What is the brand listed on the black device?", "answer": "person", "gt_answers": [ "SONY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000259790.jpg", "model_name": "BLIP2" }, { "question": "What time is it?", "answer": "it is noon", "gt_answers": [ "17:35" ], "image_path": "./data/STVQA/VisualGenome/1/1592645.jpg", "model_name": "BLIP2" }, { "question": "What do they sell next to roberto's cakes?", "answer": "sausages", "gt_answers": [ "Octopus" ], "image_path": "./data/STVQA/IIIT_text/3112.jpg", "model_name": "BLIP2" }, { "question": "Weather it has close or open", "answer": "open", "gt_answers": [ "OPEN" ], "image_path": "./data/STVQA/VisualGenome/1/2330150.jpg", "model_name": "BLIP2" }, { "question": "What is the top word displayed on the partition?", "answer": "person", "gt_answers": [ "BIRCH" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000457149.jpg", "model_name": "BLIP2" }, { "question": "What is the bottom word displayed on the partition?", "answer": "person", "gt_answers": [ "HILL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000457149.jpg", "model_name": "BLIP2" }, { "question": "What is the number in the cap", "answer": "seven", "gt_answers": [ "17" ], "image_path": "./data/STVQA/imageNet/n02877765_15843.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the fruit?", "answer": "pomegranate", "gt_answers": [ "Pomegranate" ], "image_path": "./data/STVQA/imageNet/n07768694_9879.JPEG", "model_name": "BLIP2" }, { "question": "What is the first word written in white letters on a red background?", "answer": "person", "gt_answers": [ "sandwich" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000569591.jpg", "model_name": "BLIP2" }, { "question": "What are the five numbers after Odessa?", "answer": "odessa, ukraine", "gt_answers": [ "75014" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000569591.jpg", "model_name": "BLIP2" }, { "question": "What is the first word to the left of the wheelchair?", "answer": "person", "gt_answers": [ "interdit" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000569591.jpg", "model_name": "BLIP2" }, { "question": "What does the sign warn you not to forget?", "answer": "to lock your bike", "gt_answers": [ "your bicycle", "BICYCLE" ], "image_path": "./data/STVQA/IIIT_text/4460.jpg", "model_name": "BLIP2" }, { "question": "What is the title at the top of the white paper?", "answer": "bicycle passengers", "gt_answers": [ "Bicycle Passengers" ], "image_path": "./data/STVQA/IIIT_text/4460.jpg", "model_name": "BLIP2" }, { "question": "What is 1 serving size?", "answer": "1 cup", "gt_answers": [ "1 cup (245g)", "1 CUP" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008430.jpg", "model_name": "BLIP2" }, { "question": "How many servings per container?", "answer": "4", "gt_answers": [ "About 2.", "2" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008430.jpg", "model_name": "BLIP2" }, { "question": "How many calories in a serving?", "answer": "260 calories per serving", "gt_answers": [ "110" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008430.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company?", "answer": "uhaul", "gt_answers": [ "U-HAUL" ], "image_path": "./data/STVQA/VisualGenome/1/2368808.jpg", "model_name": "BLIP2" }, { "question": "What is the name of this restaurant?", "answer": "person", "gt_answers": [ "THE WORKS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000248478.jpg", "model_name": "BLIP2" }, { "question": "What company is being advertised by the yellow entryway banners?", "answer": "lego", "gt_answers": [ "Lego" ], "image_path": "./data/STVQA/icdar/img_464.jpg", "model_name": "BLIP2" }, { "question": "What product is advertised on the two orange signs?", "answer": "lego", "gt_answers": [ "LEGO" ], "image_path": "./data/STVQA/icdar/img_464.jpg", "model_name": "BLIP2" }, { "question": "What is the first English word written on the blue sign?", "answer": "bank", "gt_answers": [ "reserve" ], "image_path": "./data/STVQA/IIIT_text/img_000978.jpg", "model_name": "BLIP2" }, { "question": "What name do you see on a boy's shirt?", "answer": "sam", "gt_answers": [ "EVERLAST" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000306353.jpg", "model_name": "BLIP2" }, { "question": "What are the 6 characters that come after the dash at the top of the photo?", "answer": "a border collie", "gt_answers": [ "DAJ064" ], "image_path": "./data/STVQA/imageNet/n02106030_3624.JPEG", "model_name": "BLIP2" }, { "question": "What are the 9 characters that come before the dash at the top of the photo?", "answer": "border collie", "gt_answers": [ "81120016H" ], "image_path": "./data/STVQA/imageNet/n02106030_3624.JPEG", "model_name": "BLIP2" }, { "question": "What is the text in this image? Exclude any symbols", "answer": "a border collie in the grass", "gt_answers": [ "81120016H DAJ064" ], "image_path": "./data/STVQA/imageNet/n02106030_3624.JPEG", "model_name": "BLIP2" }, { "question": "What are the initials on the front of the train?", "answer": "sb", "gt_answers": [ "DB" ], "image_path": "./data/STVQA/VisualGenome/1/2350131.jpg", "model_name": "BLIP2" }, { "question": "What does the sign with black letter on the railing say?", "answer": "red bull", "gt_answers": [ "Red Bull" ], "image_path": "./data/STVQA/VisualGenome/1/2351307.jpg", "model_name": "BLIP2" }, { "question": "What advertiser is written on the bottom step?", "answer": "red bull", "gt_answers": [ "Converse" ], "image_path": "./data/STVQA/VisualGenome/1/2351307.jpg", "model_name": "BLIP2" }, { "question": "Who's name is on the top of this?", "answer": "person", "gt_answers": [ "Yngwie Johann Malmsteen" ], "image_path": "./data/STVQA/imageNet/n03272010_605.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the musician?", "answer": "person", "gt_answers": [ "Yngwie Johann Malmsteen" ], "image_path": "./data/STVQA/imageNet/n03272010_605.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the building that is painted white?", "answer": "victoria station", "gt_answers": [ "The Wellington.", "THE WELLINGTON" ], "image_path": "./data/STVQA/IIIT_text/4494.jpg", "model_name": "BLIP2" }, { "question": "What website is advertised on the window?", "answer": "youtube", "gt_answers": [ "mamasandpapas.com" ], "image_path": "./data/STVQA/VisualGenome/1/2370288.jpg", "model_name": "BLIP2" }, { "question": "What is the tagline for the store?", "answer": "if you can dream it we can make it", "gt_answers": [ "Quality & Value for Everyone" ], "image_path": "./data/STVQA/VisualGenome/1/2370288.jpg", "model_name": "BLIP2" }, { "question": "Which red letters are on the man's green shirt?", "answer": "person", "gt_answers": [ "MWT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000360147.jpg", "model_name": "BLIP2" }, { "question": "Where is the lane closed?", "answer": "ahead", "gt_answers": [ "Ahead" ], "image_path": "./data/STVQA/VisualGenome/1/2364545.jpg", "model_name": "BLIP2" }, { "question": "What company works in this building?", "answer": "deutsche bank", "gt_answers": [ "IBM" ], "image_path": "./data/STVQA/IIIT_text/img_000027.jpg", "model_name": "BLIP2" }, { "question": "What type of animal is it?", "answer": "snake", "gt_answers": [ "Snake" ], "image_path": "./data/STVQA/imageNet/n01753488_2778.JPEG", "model_name": "BLIP2" }, { "question": "What airline is displayed?", "answer": "delta", "gt_answers": [ "Delta" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000444210.jpg", "model_name": "BLIP2" }, { "question": "What is the identification number of the plane?", "answer": "msn 370", "gt_answers": [ "3754" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000444210.jpg", "model_name": "BLIP2" }, { "question": "What SPF level is the sun screen?", "answer": "spf 45", "gt_answers": [ "45" ], "image_path": "./data/STVQA/imageNet/n04357314_14941.JPEG", "model_name": "BLIP2" }, { "question": "What word is written above \"Sun Block\"?", "answer": "no alcohol", "gt_answers": [ "Babies" ], "image_path": "./data/STVQA/imageNet/n04357314_14941.JPEG", "model_name": "BLIP2" }, { "question": "what does MW stand for?", "answer": "my world", "gt_answers": [ "mark warner" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000461509.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name on the back of the device?", "answer": "sony", "gt_answers": [ "Cingular" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000116735.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name of the equipment?", "answer": "sony", "gt_answers": [ "Beno" ], "image_path": "./data/STVQA/imageNet/n04009552_7776.JPEG", "model_name": "BLIP2" }, { "question": "What are the numbers listed below the innitials?", "answer": "0", "gt_answers": [ "305" ], "image_path": "./data/STVQA/VisualGenome/2/2405366.jpg", "model_name": "BLIP2" }, { "question": "What is the third word in the bottom right corner of the frame?", "answer": "person", "gt_answers": [ "PHOTO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000118186.jpg", "model_name": "BLIP2" }, { "question": "What is the first name of the photographer who took this photo?", "answer": "person", "gt_answers": [ "MICHAEL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000118186.jpg", "model_name": "BLIP2" }, { "question": "What is the last name of the photographer who took this photo?", "answer": "person", "gt_answers": [ "TOONE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000118186.jpg", "model_name": "BLIP2" }, { "question": "What is the earliest year in the watermark?", "answer": "1860", "gt_answers": [ "2005" ], "image_path": "./data/STVQA/imageNet/n02009912_14063.JPEG", "model_name": "BLIP2" }, { "question": "What is the latest year in the watermark?", "answer": "2010", "gt_answers": [ "2008" ], "image_path": "./data/STVQA/imageNet/n02009912_14063.JPEG", "model_name": "BLIP2" }, { "question": "What is written in largest type on the package of toilet paper rolls?", "answer": "toilet paper", "gt_answers": [ "Edet" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000173751.jpg", "model_name": "BLIP2" }, { "question": "what is written in the back", "answer": "prince", "gt_answers": [ "prince" ], "image_path": "./data/STVQA/VisualGenome/1/2328924.jpg", "model_name": "BLIP2" }, { "question": "What word is advertised in neon lights?", "answer": "person", "gt_answers": [ "city" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000358100.jpg", "model_name": "BLIP2" }, { "question": "What is the six-letter name of the street listed in the image?", "answer": "person", "gt_answers": [ "NEWTON" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000004481.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate on the red bus?", "answer": "london", "gt_answers": [ "Y849 TGH" ], "image_path": "./data/STVQA/VisualGenome/1/285954.jpg", "model_name": "BLIP2" }, { "question": "What word is written on the snake?", "answer": "cobra", "gt_answers": [ "punchstock" ], "image_path": "./data/STVQA/imageNet/n01748264_15413.JPEG", "model_name": "BLIP2" }, { "question": "What does the clock say?", "answer": "san francisco", "gt_answers": [ "6:36:55" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000020524.jpg", "model_name": "BLIP2" }, { "question": "Where is the mug from?", "answer": "san francisco", "gt_answers": [ "San Francisco", "SAN FRANCISO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000020524.jpg", "model_name": "BLIP2" }, { "question": "what is the name of the bar on the right side of the sign?", "answer": "grand opera", "gt_answers": [ "monk bar" ], "image_path": "./data/STVQA/VisualGenome/1/2367332.jpg", "model_name": "BLIP2" }, { "question": "What is the smallest number on the clock face?", "answer": "12", "gt_answers": [ "1" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000354569.jpg", "model_name": "BLIP2" }, { "question": "What kind of food is in this picture?", "answer": "sandwiches", "gt_answers": [ "Sandwich" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000206.jpg", "model_name": "BLIP2" }, { "question": "What street is the woman walking on?", "answer": "6th street", "gt_answers": [ "Jose Sarria Ct" ], "image_path": "./data/STVQA/VisualGenome/2/2416133.jpg", "model_name": "BLIP2" }, { "question": "What is the name display in the picture?", "answer": "person", "gt_answers": [ "daniel", "DANIEL BLEUMINK" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000499360.jpg", "model_name": "BLIP2" }, { "question": "What number is display in the bus?", "answer": "person", "gt_answers": [ "102" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000499360.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the image?", "answer": "click", "gt_answers": [ "Click" ], "image_path": "./data/STVQA/imageNet/n03146219_1120.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the company on the front shuttle?", "answer": "mta transit", "gt_answers": [ "Dartline" ], "image_path": "./data/STVQA/VisualGenome/1/2334568.jpg", "model_name": "BLIP2" }, { "question": "What does the road sign say?", "answer": "one way", "gt_answers": [ "One Way" ], "image_path": "./data/STVQA/VisualGenome/2/2416359.jpg", "model_name": "BLIP2" }, { "question": "Which street is shown on the sign?", "answer": "main street", "gt_answers": [ "Moor St", "MOOR" ], "image_path": "./data/STVQA/VisualGenome/1/2361475.jpg", "model_name": "BLIP2" }, { "question": "Where can I get Pepsi?", "answer": "sun valley market", "gt_answers": [ "Sun Valley Market" ], "image_path": "./data/STVQA/VisualGenome/1/2343629.jpg", "model_name": "BLIP2" }, { "question": "What Avenue is the market on?", "answer": "sun valley avenue", "gt_answers": [ "10th Avenue" ], "image_path": "./data/STVQA/VisualGenome/1/2343629.jpg", "model_name": "BLIP2" }, { "question": "What type of food comes in this package?", "answer": "chicken", "gt_answers": [ "Chicken Burgers" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029288.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the bus?", "answer": "person", "gt_answers": [ "Biobus" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000382320.jpg", "model_name": "BLIP2" }, { "question": "What brand is this watch?", "answer": "konus", "gt_answers": [ "Konus" ], "image_path": "./data/STVQA/imageNet/n03197337_4422.JPEG", "model_name": "BLIP2" }, { "question": "What time does the left watch say?", "answer": "12 o clock", "gt_answers": [ "12:00" ], "image_path": "./data/STVQA/imageNet/n03197337_4422.JPEG", "model_name": "BLIP2" }, { "question": "WHAT IS THE BUS NUMBER?", "answer": "person", "gt_answers": [ "153" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000265351.jpg", "model_name": "BLIP2" }, { "question": "WHERE DOES THE BUS GO?", "answer": "to the city centre", "gt_answers": [ "CASTLEFORD VIA ROTHWELL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000265351.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE BUSES LICENSE PLATE?", "answer": "bh bh", "gt_answers": [ "S481 ANW" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000265351.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the one brand sponsor visible in this image?", "answer": "adidas", "gt_answers": [ "POLO" ], "image_path": "./data/STVQA/VisualGenome/1/2377341.jpg", "model_name": "BLIP2" }, { "question": "What does it say on this building?", "answer": "vodafone", "gt_answers": [ "vodafone" ], "image_path": "./data/STVQA/IIIT_text/img_001239.jpg", "model_name": "BLIP2" }, { "question": "What is written on the side of the plane?", "answer": "amerijet", "gt_answers": [ "BONANZA" ], "image_path": "./data/STVQA/VisualGenome/2/2406221.jpg", "model_name": "BLIP2" }, { "question": "What number is the park?", "answer": "essex park", "gt_answers": [ "n.3", "3" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000286144.jpg", "model_name": "BLIP2" }, { "question": "What kind of food is sold here?", "answer": "chicken", "gt_answers": [ "Sushi" ], "image_path": "./data/STVQA/icdar/img_710.jpg", "model_name": "BLIP2" }, { "question": "What brand is sponsored in this tennis game?", "answer": "adidas", "gt_answers": [ "Lacoste" ], "image_path": "./data/STVQA/VisualGenome/1/2346220.jpg", "model_name": "BLIP2" }, { "question": "What action is described from the first three letters of the bold, teal lettering?", "answer": "digital", "gt_answers": [ "DIG" ], "image_path": "./data/STVQA/VisualGenome/2/2400547.jpg", "model_name": "BLIP2" }, { "question": "who is the author of the book?", "answer": "charles dickens", "gt_answers": [ "judith kerr" ], "image_path": "./data/STVQA/VisualGenome/1/2316672.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say in the middle picture?", "answer": "no parking sign", "gt_answers": [ "BAKER MARKET" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000254714.jpg", "model_name": "BLIP2" }, { "question": "What does the sign above the door say?", "answer": "laboratory", "gt_answers": [ "Exit" ], "image_path": "./data/STVQA/imageNet/n03630383_6511.JPEG", "model_name": "BLIP2" }, { "question": "What is the logo on the man's labcoat?", "answer": "pfizer", "gt_answers": [ "TSG" ], "image_path": "./data/STVQA/imageNet/n03630383_6511.JPEG", "model_name": "BLIP2" }, { "question": "What road is on the sign?", "answer": "college rd", "gt_answers": [ "College rd" ], "image_path": "./data/STVQA/VisualGenome/1/2317108.jpg", "model_name": "BLIP2" }, { "question": "What brand is the forklift?", "answer": "hyster", "gt_answers": [ "Xilin" ], "image_path": "./data/STVQA/imageNet/n03384352_7342.JPEG", "model_name": "BLIP2" }, { "question": "WHAT IS THE COLOUR OF DRESS", "answer": "black", "gt_answers": [ "BLACK" ], "image_path": "./data/STVQA/VisualGenome/1/2350954.jpg", "model_name": "BLIP2" }, { "question": "What direction is the sign pointing out?", "answer": "east", "gt_answers": [ "One Way" ], "image_path": "./data/STVQA/VisualGenome/2/2416347.jpg", "model_name": "BLIP2" }, { "question": "What word is printed in white on a red background?", "answer": "one way", "gt_answers": [ "Pedestrians" ], "image_path": "./data/STVQA/VisualGenome/2/2416347.jpg", "model_name": "BLIP2" }, { "question": "What are the last two numbers on the license plate?", "answer": "0", "gt_answers": [ "39" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000116196.jpg", "model_name": "BLIP2" }, { "question": "What is the word after the word POLICE on the back of the boat?", "answer": "patrol", "gt_answers": [ "ENFORCEMENT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000116196.jpg", "model_name": "BLIP2" }, { "question": "What is the word on the boat directly below the window?", "answer": "patrol", "gt_answers": [ "POLICE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000116196.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the sign?", "answer": "this", "gt_answers": [ "This" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000120595.jpg", "model_name": "BLIP2" }, { "question": "What year is the photo?", "answer": "1940", "gt_answers": [ "2009" ], "image_path": "./data/STVQA/imageNet/n03041632_34505.JPEG", "model_name": "BLIP2" }, { "question": "What is the cab number?", "answer": "57", "gt_answers": [ "Y809" ], "image_path": "./data/STVQA/imageNet/n03770679_22242.JPEG", "model_name": "BLIP2" }, { "question": "What telephone number is on the cab?", "answer": "555 555", "gt_answers": [ "426-6262" ], "image_path": "./data/STVQA/imageNet/n03770679_22242.JPEG", "model_name": "BLIP2" }, { "question": "What colour is the sky", "answer": "blue", "gt_answers": [ "Blue" ], "image_path": "./data/STVQA/imageNet/n04562935_21369.JPEG", "model_name": "BLIP2" }, { "question": "What word is listed on the blue boat?", "answer": "sail", "gt_answers": [ "WATERMARK" ], "image_path": "./data/STVQA/imageNet/n03216828_56117.JPEG", "model_name": "BLIP2" }, { "question": "What is the engine number?", "answer": "c40 8", "gt_answers": [ "6814" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000366058.jpg", "model_name": "BLIP2" }, { "question": "What is the first word in all caps on the central arch?", "answer": "person", "gt_answers": [ "LES" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000391053.jpg", "model_name": "BLIP2" }, { "question": "What year in on the picture", "answer": "1970's", "gt_answers": [ "2007" ], "image_path": "./data/STVQA/imageNet/n03337140_399.JPEG", "model_name": "BLIP2" }, { "question": "What game is being played in this image?", "answer": "tennis", "gt_answers": [ "Tennis" ], "image_path": "./data/STVQA/VisualGenome/1/2316076.jpg", "model_name": "BLIP2" }, { "question": "What is the 3 number combination displayed in this image?", "answer": "617", "gt_answers": [ "524" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000132262.jpg", "model_name": "BLIP2" }, { "question": "What does the white sign say?", "answer": "hollywood", "gt_answers": [ "Hollywood" ], "image_path": "./data/STVQA/IIIT_text/img_000581.jpg", "model_name": "BLIP2" }, { "question": "What is the cost of the objects in the center of the image", "answer": "ice cream", "gt_answers": [ "$7.55" ], "image_path": "./data/STVQA/icdar/img_607.jpg", "model_name": "BLIP2" }, { "question": "What is the last word on the top line of the girl's shirt?", "answer": "star", "gt_answers": [ "Sing" ], "image_path": "./data/STVQA/VisualGenome/1/2361655.jpg", "model_name": "BLIP2" }, { "question": "What sport is being played?", "answer": "cross country skiing", "gt_answers": [ "Skiiing" ], "image_path": "./data/STVQA/VisualGenome/1/1159926.jpg", "model_name": "BLIP2" }, { "question": "How far is Santa Cruz according to the sign?", "answer": "20 miles", "gt_answers": [ "20" ], "image_path": "./data/STVQA/VisualGenome/1/2368248.jpg", "model_name": "BLIP2" }, { "question": "What location is 68 miles away?", "answer": "san francisco", "gt_answers": [ "Monterey" ], "image_path": "./data/STVQA/VisualGenome/1/2368248.jpg", "model_name": "BLIP2" }, { "question": "What number is on the jersey?", "answer": "person", "gt_answers": [ "29" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000513456.jpg", "model_name": "BLIP2" }, { "question": "What is the logo on the teddy bear's shirt?", "answer": "skull and crossbones", "gt_answers": [ "Streetbear" ], "image_path": "./data/STVQA/VisualGenome/1/2363834.jpg", "model_name": "BLIP2" }, { "question": "What does the sign by the tracks say?", "answer": "speed limit 25 mph", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2351327.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the shop with the black background?", "answer": "john smith", "gt_answers": [ "Jones" ], "image_path": "./data/STVQA/VisualGenome/2/734.jpg", "model_name": "BLIP2" }, { "question": "What word is advertised in the window at Jones?", "answer": "sale", "gt_answers": [ "sale" ], "image_path": "./data/STVQA/VisualGenome/2/734.jpg", "model_name": "BLIP2" }, { "question": "What city is on the Jersey?", "answer": "kansas city", "gt_answers": [ "Kansas City.", "KANSAS CITY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000206697.jpg", "model_name": "BLIP2" }, { "question": "What kind of juice is pictured here", "answer": "orange juice", "gt_answers": [ "Tomato Juice", "TOMATO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000481530.jpg", "model_name": "BLIP2" }, { "question": "What kind of alcohol is pictured here", "answer": "beer and coffee", "gt_answers": [ "Vodka" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000481530.jpg", "model_name": "BLIP2" }, { "question": "What is the Brand name?", "answer": "mountain dew", "gt_answers": [ "Mountain Dew." ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000003336.jpg", "model_name": "BLIP2" }, { "question": "what does the sticker on the bananna say", "answer": "it says bananas", "gt_answers": [ "fyffes" ], "image_path": "./data/STVQA/VisualGenome/1/2324338.jpg", "model_name": "BLIP2" }, { "question": "What is the name listed in gold on the building?", "answer": "eat", "gt_answers": [ "Zizzi" ], "image_path": "./data/STVQA/IIIT_text/2947.jpg", "model_name": "BLIP2" }, { "question": "What airlines is this?", "answer": "klm airlines", "gt_answers": [ "KLM" ], "image_path": "./data/STVQA/VisualGenome/1/2335366.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of this cycle?", "answer": "mountain bike", "gt_answers": [ "TROPHY AUTHOR" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000136415.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the business on the sign?", "answer": "person", "gt_answers": [ "GOODELL'S ERITAGE UPHOLSTERY CO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000293931.jpg", "model_name": "BLIP2" }, { "question": "how much does a ticket cost for adults?", "answer": "\u00a330", "gt_answers": [ "18" ], "image_path": "./data/STVQA/VisualGenome/2/2404635.jpg", "model_name": "BLIP2" }, { "question": "how much does a ticket cost for students/seniors?", "answer": "it is free for students", "gt_answers": [ "16" ], "image_path": "./data/STVQA/VisualGenome/2/2404635.jpg", "model_name": "BLIP2" }, { "question": "What does it say after Fly?", "answer": "fly", "gt_answers": [ "Kingfisher" ], "image_path": "./data/STVQA/VisualGenome/1/2354663.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the pizza place?", "answer": "mad pizza", "gt_answers": [ "Maddio's" ], "image_path": "./data/STVQA/VisualGenome/1/2341898.jpg", "model_name": "BLIP2" }, { "question": "Parking prevent which place?", "answer": "runways", "gt_answers": [ "Runaways" ], "image_path": "./data/STVQA/VisualGenome/1/2346221.jpg", "model_name": "BLIP2" }, { "question": "What number is diplayed on the right fielder's uniform?", "answer": "person", "gt_answers": [ "3" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000544240.jpg", "model_name": "BLIP2" }, { "question": "What is the advertisment for in the background?", "answer": "person", "gt_answers": [ "X-RAY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000544240.jpg", "model_name": "BLIP2" }, { "question": "What does the bus say?", "answer": "san diego", "gt_answers": [ "Guest Shuttle" ], "image_path": "./data/STVQA/VisualGenome/2/2917.jpg", "model_name": "BLIP2" }, { "question": "What does the white sign say?", "answer": "no trespassing", "gt_answers": [ "CSX TRANSPORTATION NO TRESPASSING" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000355357.jpg", "model_name": "BLIP2" }, { "question": "What is the telephone number on the Hot Spot sign?", "answer": "+1 416 888 8868", "gt_answers": [ "416-971-6629" ], "image_path": "./data/STVQA/VisualGenome/1/2322040.jpg", "model_name": "BLIP2" }, { "question": "Which road is shownshown?", "answer": "newbury road", "gt_answers": [ "Newbury Road" ], "image_path": "./data/STVQA/VisualGenome/1/2318102.jpg", "model_name": "BLIP2" }, { "question": "planes number?", "answer": "person", "gt_answers": [ "423" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000465395.jpg", "model_name": "BLIP2" }, { "question": "What team is up to bat", "answer": "the mariners", "gt_answers": [ "Mariners" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000048548.jpg", "model_name": "BLIP2" }, { "question": "What is the street address of the tall building in the image?", "answer": "london road, london, england london, england", "gt_answers": [ "140 London Wall.", "LONDON WALL" ], "image_path": "./data/STVQA/IIIT_text/3547.jpg", "model_name": "BLIP2" }, { "question": "What is the code in the bottom left corner?", "answer": "the code in the bottom left corner", "gt_answers": [ "ddc95" ], "image_path": "./data/STVQA/imageNet/n04204347_6372.JPEG", "model_name": "BLIP2" }, { "question": "What is the white tag on the bottom left?", "answer": "a white tag", "gt_answers": [ "ddc95" ], "image_path": "./data/STVQA/imageNet/n04204347_6372.JPEG", "model_name": "BLIP2" }, { "question": "What is the model of the laptop, as shown at the bottom of the screen?", "answer": "person", "gt_answers": [ "MacBook" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000361255.jpg", "model_name": "BLIP2" }, { "question": "What is the blue bus' license plate?", "answer": "dcb 003", "gt_answers": [ "X742 JCS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000087845.jpg", "model_name": "BLIP2" }, { "question": "What is the company of the blue bus?", "answer": "person", "gt_answers": [ "STAGECOACH" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000087845.jpg", "model_name": "BLIP2" }, { "question": "What is the bus' destination?", "answer": "leeds", "gt_answers": [ "Kirkcaldy mid stree", "KIRKCALDY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000087845.jpg", "model_name": "BLIP2" }, { "question": "Which airlines does that plane work for?", "answer": "hong kong airlines", "gt_answers": [ "shanghai airlines" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000314681.jpg", "model_name": "BLIP2" }, { "question": "Who took the photo?", "answer": "person", "gt_answers": [ "christopher chau" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000314681.jpg", "model_name": "BLIP2" }, { "question": "When was the photo taken?", "answer": "2018", "gt_answers": [ "2012" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000314681.jpg", "model_name": "BLIP2" }, { "question": "What number is on the train?", "answer": "person", "gt_answers": [ "66713" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000043506.jpg", "model_name": "BLIP2" }, { "question": "What number is on the post?", "answer": "1", "gt_answers": [ "1028" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000043506.jpg", "model_name": "BLIP2" }, { "question": "What dies the little boy's shirt says?", "answer": "person", "gt_answers": [ "ATHLETICS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000294850.jpg", "model_name": "BLIP2" }, { "question": "What number is on the truck door?", "answer": "person", "gt_answers": [ "30" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000504099.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "nyc bus number 7", "gt_answers": [ "3936" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000502197.jpg", "model_name": "BLIP2" }, { "question": "What is the first word to the right of the photo?", "answer": "shoppers", "gt_answers": [ "Arnold" ], "image_path": "./data/STVQA/icdar/img_224.jpg", "model_name": "BLIP2" }, { "question": "What is below the stop sign?", "answer": "a stop sign", "gt_answers": [ "3 way" ], "image_path": "./data/STVQA/VisualGenome/1/2366732.jpg", "model_name": "BLIP2" }, { "question": "What is written on the truck?", "answer": "city of san diego", "gt_answers": [ "SCRAPE-DON'T RINSE", "SCRAPE - DON'T RINSE" ], "image_path": "./data/STVQA/VisualGenome/1/2326816.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store on the far right?", "answer": "cdiscount", "gt_answers": [ "Cudule", "GUDULE" ], "image_path": "./data/STVQA/VisualGenome/1/2344521.jpg", "model_name": "BLIP2" }, { "question": "What name is on the bottom left of the picture?", "answer": "person", "gt_answers": [ "Deo Araujo" ], "image_path": "./data/STVQA/IIIT_text/6490.jpg", "model_name": "BLIP2" }, { "question": "What number is on the right side of the sign?", "answer": "1", "gt_answers": [ "2700" ], "image_path": "./data/STVQA/VisualGenome/2/2400260.jpg", "model_name": "BLIP2" }, { "question": "What is the street name on the sign?", "answer": "clay st", "gt_answers": [ "Clay St", "CLAY" ], "image_path": "./data/STVQA/VisualGenome/2/2400260.jpg", "model_name": "BLIP2" }, { "question": "What is the street name on the sign?", "answer": "clay st", "gt_answers": [ "Clay St", "CLAY" ], "image_path": "./data/STVQA/VisualGenome/2/2400260.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the building?", "answer": "log cabin", "gt_answers": [ "Visitor Information Center" ], "image_path": "./data/STVQA/VisualGenome/1/2354846.jpg", "model_name": "BLIP2" }, { "question": "What website can I find information about new era?", "answer": "new era caps", "gt_answers": [ "NEWERACAP.COM" ], "image_path": "./data/STVQA/VisualGenome/1/2341193.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the plane?", "answer": "person", "gt_answers": [ "IndiGo" ], "image_path": "./data/STVQA/IIIT_text/img_000710.jpg", "model_name": "BLIP2" }, { "question": "What was this theater called?", "answer": "plaza", "gt_answers": [ "Plaza" ], "image_path": "./data/STVQA/imageNet/n03032252_26941.JPEG", "model_name": "BLIP2" }, { "question": "What is the first movie showing?", "answer": "person", "gt_answers": [ "Beloved" ], "image_path": "./data/STVQA/imageNet/n03032252_26941.JPEG", "model_name": "BLIP2" }, { "question": "What is the second movie showing?", "answer": "person", "gt_answers": [ "Holy man Ronin" ], "image_path": "./data/STVQA/imageNet/n03032252_26941.JPEG", "model_name": "BLIP2" }, { "question": "Closest store name?", "answer": "walmart", "gt_answers": [ "Masa" ], "image_path": "./data/STVQA/icdar/test_img_61.jpg", "model_name": "BLIP2" }, { "question": "What is the word on the green sign in the top right of the image?", "answer": "exit", "gt_answers": [ "EXIT" ], "image_path": "./data/STVQA/icdar/test_img_252.jpg", "model_name": "BLIP2" }, { "question": "What kind of company is Santa Fe Natural?", "answer": "tobacco company", "gt_answers": [ "Tobacco" ], "image_path": "./data/STVQA/imageNet/n03666591_5876.JPEG", "model_name": "BLIP2" }, { "question": "How far is it to the wall?", "answer": "it's about 30 feet", "gt_answers": [ "340" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000074090.jpg", "model_name": "BLIP2" }, { "question": "What is the catcher's number?", "answer": "person", "gt_answers": [ "19" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000074090.jpg", "model_name": "BLIP2" }, { "question": "What are the last two letters of the catcher's name?", "answer": "person", "gt_answers": [ "ry" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000074090.jpg", "model_name": "BLIP2" }, { "question": "What station is this game being shown on?", "answer": "nippon television", "gt_answers": [ "NESN" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000133510.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the umpires sleeve?", "answer": "3", "gt_answers": [ "53" ], "image_path": "./data/STVQA/VisualGenome/1/2334046.jpg", "model_name": "BLIP2" }, { "question": "What team does the batter play for?", "answer": "new york mets", "gt_answers": [ "Mets" ], "image_path": "./data/STVQA/VisualGenome/1/2334046.jpg", "model_name": "BLIP2" }, { "question": "What website is being advertised in the stands?", "answer": "com", "gt_answers": [ "neweracap.com" ], "image_path": "./data/STVQA/VisualGenome/1/2334046.jpg", "model_name": "BLIP2" }, { "question": "What radio station is being advertised for New Jersey?", "answer": "wgl fm", "gt_answers": [ "Jersey 101.5" ], "image_path": "./data/STVQA/VisualGenome/1/2348077.jpg", "model_name": "BLIP2" }, { "question": "What college is being advertised?", "answer": "rutgers university", "gt_answers": [ "Thomas Edison State College" ], "image_path": "./data/STVQA/VisualGenome/1/2348077.jpg", "model_name": "BLIP2" }, { "question": "What amusement park is being advertised?", "answer": "amusement park", "gt_answers": [ "Busch Gardens" ], "image_path": "./data/STVQA/VisualGenome/1/2348077.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the Chinese restaurant?", "answer": "tom tom", "gt_answers": [ "Dried Beef King" ], "image_path": "./data/STVQA/VisualGenome/2/2404653.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street?", "answer": "tom", "gt_answers": [ "Mott", "MOTT ST" ], "image_path": "./data/STVQA/VisualGenome/2/2404653.jpg", "model_name": "BLIP2" }, { "question": "What is written above the Mott Street sign?", "answer": "mott street", "gt_answers": [ "Way" ], "image_path": "./data/STVQA/VisualGenome/2/2404653.jpg", "model_name": "BLIP2" }, { "question": "What's the date?", "answer": "january 1st", "gt_answers": [ "08/01/13" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000007389.jpg", "model_name": "BLIP2" }, { "question": "What is the number that was dialed?", "answer": "cisco ip phone", "gt_answers": [ "421250" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000007389.jpg", "model_name": "BLIP2" }, { "question": "What does the board read at the very top (in English)?", "answer": " ", "gt_answers": [ "Additional Services" ], "image_path": "./data/STVQA/icdar/img_78.jpg", "model_name": "BLIP2" }, { "question": "what is the first location name on the shirt", "answer": "london", "gt_answers": [ "london" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000227337.jpg", "model_name": "BLIP2" }, { "question": "what is the name of the second location on the shirt", "answer": "berlin", "gt_answers": [ "new york" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000227337.jpg", "model_name": "BLIP2" }, { "question": "what is the name of the third location on the shirt", "answer": "berlin", "gt_answers": [ "Berlin" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000227337.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number of the bike on the left?", "answer": "sm sm", "gt_answers": [ "ka.0.9 ef..9690" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000035132.jpg", "model_name": "BLIP2" }, { "question": "What letters are on the player's hat?", "answer": "stl", "gt_answers": [ "GBS" ], "image_path": "./data/STVQA/VisualGenome/1/2347290.jpg", "model_name": "BLIP2" }, { "question": "What ave is named on a sign?", "answer": "rakeen", "gt_answers": [ "Sergio Dukes ave", "SERGIO DUKES" ], "image_path": "./data/STVQA/VisualGenome/1/2374016.jpg", "model_name": "BLIP2" }, { "question": "What street is on a sign?", "answer": "rakeen street", "gt_answers": [ "Ordero Hilliard St" ], "image_path": "./data/STVQA/VisualGenome/1/2374016.jpg", "model_name": "BLIP2" }, { "question": "what is the street on top of gay st?", "answer": "gay street", "gt_answers": [ "christopher st", "Christopher" ], "image_path": "./data/STVQA/VisualGenome/1/2325704.jpg", "model_name": "BLIP2" }, { "question": "what kind of mushrooms?", "answer": "oyster mushrooms", "gt_answers": [ "Organic" ], "image_path": "./data/STVQA/VisualGenome/1/2349482.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the blue sex shop?", "answer": "super mags", "gt_answers": [ "Super Mags" ], "image_path": "./data/STVQA/VisualGenome/1/2375848.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the bus?", "answer": "no number", "gt_answers": [ "197" ], "image_path": "./data/STVQA/icdar/test_img_26.jpg", "model_name": "BLIP2" }, { "question": "Where is the bus going?", "answer": "to the airport", "gt_answers": [ "JVR Town Hall RD", "JUR TOWN HALL RD" ], "image_path": "./data/STVQA/icdar/test_img_26.jpg", "model_name": "BLIP2" }, { "question": "Who manufactured the bus?", "answer": "lexus", "gt_answers": [ "Mitsubishi Electric" ], "image_path": "./data/STVQA/icdar/test_img_26.jpg", "model_name": "BLIP2" }, { "question": "What does the card on the bottom of the side of the fridge way?", "answer": "it's person's birthday", "gt_answers": [ "WE WANT YOU" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000062763.jpg", "model_name": "BLIP2" }, { "question": "Which person is from Australia?", "answer": "john buchanan", "gt_answers": [ "Bryce Undy" ], "image_path": "./data/STVQA/imageNet/n03584254_2289.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the field?", "answer": "clint thomas park", "gt_answers": [ "Blair Field" ], "image_path": "./data/STVQA/imageNet/n04149813_11043.JPEG", "model_name": "BLIP2" }, { "question": "What restaurant name is on the left side of the board?", "answer": "cline thompson's lodge", "gt_answers": [ "OUTBACK" ], "image_path": "./data/STVQA/imageNet/n04149813_11043.JPEG", "model_name": "BLIP2" }, { "question": "what measurement given on board", "answer": "length", "gt_answers": [ "1560m-2120m" ], "image_path": "./data/STVQA/VisualGenome/1/713652.jpg", "model_name": "BLIP2" }, { "question": "what is the sign of board reference", "answer": "the sign of board reference is a sign", "gt_answers": [ "schonbodenb" ], "image_path": "./data/STVQA/VisualGenome/1/713652.jpg", "model_name": "BLIP2" }, { "question": "WHAT NUMBER BUS IS THIS?", "answer": "person", "gt_answers": [ "11" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000131225.jpg", "model_name": "BLIP2" }, { "question": "WHAT KIND OF BUS IS THIS?", "answer": "a person bus", "gt_answers": [ "SPECIAL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000131225.jpg", "model_name": "BLIP2" }, { "question": "Who goes to town?", "answer": "paddington", "gt_answers": [ "Paddington" ], "image_path": "./data/STVQA/VisualGenome/1/2324508.jpg", "model_name": "BLIP2" }, { "question": "What week of this women's pregnancy is it?", "answer": "it is week 11", "gt_answers": [ "Week 31" ], "image_path": "./data/STVQA/VisualGenome/1/2352235.jpg", "model_name": "BLIP2" }, { "question": "What sign is displayed on the window of the handbag store?", "answer": "sale sign", "gt_answers": [ "Sale" ], "image_path": "./data/STVQA/icdar/img_312.jpg", "model_name": "BLIP2" }, { "question": "What does the name say above the yellow van?", "answer": "sams club", "gt_answers": [ "Penrose eyecare" ], "image_path": "./data/STVQA/VisualGenome/2/1938.jpg", "model_name": "BLIP2" }, { "question": "What is the route number of the bus?", "answer": "no route number", "gt_answers": [ "3" ], "image_path": "./data/STVQA/imageNet/n04487081_8351.JPEG", "model_name": "BLIP2" }, { "question": "Where is the bus headed?", "answer": "downtown", "gt_answers": [ "Jasper Place" ], "image_path": "./data/STVQA/imageNet/n04487081_8351.JPEG", "model_name": "BLIP2" }, { "question": "What is the license plate of the black car?", "answer": "tahoe", "gt_answers": [ "X1936X" ], "image_path": "./data/STVQA/VisualGenome/1/1159768.jpg", "model_name": "BLIP2" }, { "question": "What company has been cruisin' since 1936?", "answer": "ford motor company", "gt_answers": [ "Robot & Co" ], "image_path": "./data/STVQA/VisualGenome/1/1159768.jpg", "model_name": "BLIP2" }, { "question": "What year did Robot & Co start cruisin'?", "answer": "1955", "gt_answers": [ "1936" ], "image_path": "./data/STVQA/VisualGenome/1/1159768.jpg", "model_name": "BLIP2" }, { "question": "What city is this bus advertising?", "answer": "new york city", "gt_answers": [ "Chicago" ], "image_path": "./data/STVQA/VisualGenome/2/2400201.jpg", "model_name": "BLIP2" }, { "question": "What is the word on the object?", "answer": "apple", "gt_answers": [ "Body" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000009862.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the skateboard?", "answer": "longboard", "gt_answers": [ "Gravity Skateboards", "GRAVITY SKATEBOARD" ], "image_path": "./data/STVQA/VisualGenome/2/2411508.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the restaurant?", "answer": "person", "gt_answers": [ "BUDDY'S" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000047767.jpg", "model_name": "BLIP2" }, { "question": "What time do the clocks say?", "answer": "the time of day", "gt_answers": [ "12:01" ], "image_path": "./data/STVQA/imageNet/n03196217_5486.JPEG", "model_name": "BLIP2" }, { "question": "whats the number on the red sign?", "answer": "212", "gt_answers": [ "80" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000559288.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the bus?", "answer": "person", "gt_answers": [ "27" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000553735.jpg", "model_name": "BLIP2" }, { "question": "What is the time shown on the clock?", "answer": "12:00", "gt_answers": [ "9:15" ], "image_path": "./data/STVQA/VisualGenome/1/2367505.jpg", "model_name": "BLIP2" }, { "question": "What is written on the flyer?", "answer": "all way", "gt_answers": [ "DANCE!" ], "image_path": "./data/STVQA/VisualGenome/1/2375490.jpg", "model_name": "BLIP2" }, { "question": "What year on the red t-shirt?", "answer": "2008", "gt_answers": [ "1981" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000471405.jpg", "model_name": "BLIP2" }, { "question": "what are the numbers on train", "answer": "the number on the front of the train", "gt_answers": [ "378 136" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000120276.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the pitcher?", "answer": "3", "gt_answers": [ "27" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000382664.jpg", "model_name": "BLIP2" }, { "question": "What sign is posted on the fence?", "answer": "no parking sign", "gt_answers": [ "Posted no trespassing" ], "image_path": "./data/STVQA/VisualGenome/1/2334635.jpg", "model_name": "BLIP2" }, { "question": "What is the flight name", "answer": "american airlines", "gt_answers": [ "American" ], "image_path": "./data/STVQA/VisualGenome/1/2360860.jpg", "model_name": "BLIP2" }, { "question": "What is the time and date", "answer": "the time is unknown", "gt_answers": [ "2012.12.15 08:40" ], "image_path": "./data/STVQA/VisualGenome/1/2360860.jpg", "model_name": "BLIP2" }, { "question": "What is printed on the large cake?", "answer": "person's birthday", "gt_answers": [ "HAPPY BIRTHDAY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000329018.jpg", "model_name": "BLIP2" }, { "question": "What sport are they playing?", "answer": "baseball", "gt_answers": [ "Baseball" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000368969.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the main entrance?", "answer": "qeb", "gt_answers": [ "QEHB" ], "image_path": "./data/STVQA/IIIT_text/4457.jpg", "model_name": "BLIP2" }, { "question": "who took this photo", "answer": "john gaston", "gt_answers": [ "john cassady" ], "image_path": "./data/STVQA/imageNet/n01855032_2880.JPEG", "model_name": "BLIP2" }, { "question": "what year was the photo taken in", "answer": "2009", "gt_answers": [ "2005" ], "image_path": "./data/STVQA/imageNet/n01855032_2880.JPEG", "model_name": "BLIP2" }, { "question": "What is the airway line?", "answer": "thai airways", "gt_answers": [ "Hellenic Imperial" ], "image_path": "./data/STVQA/VisualGenome/2/2403452.jpg", "model_name": "BLIP2" }, { "question": "What is written on the top of the bus?", "answer": "mrt", "gt_answers": [ "merry holidays" ], "image_path": "./data/STVQA/VisualGenome/1/1593077.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate of the bus?", "answer": "the license plate of the bus", "gt_answers": [ "JMC 8858" ], "image_path": "./data/STVQA/VisualGenome/1/1593077.jpg", "model_name": "BLIP2" }, { "question": "What is lost on the white paper?", "answer": "goldfish", "gt_answers": [ "Goldfish" ], "image_path": "./data/STVQA/VisualGenome/2/2402910.jpg", "model_name": "BLIP2" }, { "question": "What color is the dog's harness?", "answer": "black", "gt_answers": [ "Black" ], "image_path": "./data/STVQA/imageNet/n02108422_3410.JPEG", "model_name": "BLIP2" }, { "question": "What word in this picture begins with an S?", "answer": "sandwich", "gt_answers": [ "SABOR" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000051618.jpg", "model_name": "BLIP2" }, { "question": "What word in this picture begins with a C?", "answer": "cheese", "gt_answers": [ "Crema" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000051618.jpg", "model_name": "BLIP2" }, { "question": "What type of sauce is in the red bottle?", "answer": "hot sauce", "gt_answers": [ "AJI CREMA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000051618.jpg", "model_name": "BLIP2" }, { "question": "What city is listed on the sign in the back?", "answer": "chicago", "gt_answers": [ "TORONTO" ], "image_path": "./data/STVQA/VisualGenome/1/2350397.jpg", "model_name": "BLIP2" }, { "question": "What word is on the white uniform shirt?", "answer": "person", "gt_answers": [ "GIANT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000249519.jpg", "model_name": "BLIP2" }, { "question": "What type of business is shown?", "answer": "bank", "gt_answers": [ "bank" ], "image_path": "./data/STVQA/IIIT_text/3723.jpg", "model_name": "BLIP2" }, { "question": "What does the text say on the bottom left?", "answer": "two women with umbrellas", "gt_answers": [ "Sandro Lacarbona" ], "image_path": "./data/STVQA/VisualGenome/1/2317364.jpg", "model_name": "BLIP2" }, { "question": "What is on the back of the white jersey?", "answer": "nationals", "gt_answers": [ "Bat Boy" ], "image_path": "./data/STVQA/VisualGenome/1/2351999.jpg", "model_name": "BLIP2" }, { "question": "What letter is on the person playing the instrument?", "answer": "d", "gt_answers": [ "W" ], "image_path": "./data/STVQA/VisualGenome/1/2351999.jpg", "model_name": "BLIP2" }, { "question": "I promise you are going to what?", "answer": "times square", "gt_answers": [ "love it" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000365206.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the male actor on the billboard?", "answer": "person", "gt_answers": [ "Sean Hayes" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000365206.jpg", "model_name": "BLIP2" }, { "question": "Whats is the store's name on the right?", "answer": "john lewis", "gt_answers": [ "PIZZA & PASTA" ], "image_path": "./data/STVQA/VisualGenome/2/767.jpg", "model_name": "BLIP2" }, { "question": "What is the maximum volumeof the bigger conical flask?", "answer": "200ml", "gt_answers": [ "200 ml" ], "image_path": "./data/STVQA/imageNet/n02815834_1950.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the pink store?", "answer": "the swallow bakery", "gt_answers": [ "THE SWALLOW BAKERY" ], "image_path": "./data/STVQA/imageNet/n02776631_23368.JPEG", "model_name": "BLIP2" }, { "question": "What is the title of the webpage currently on the computer screen?", "answer": "myspace", "gt_answers": [ "Gravity's Grace" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000412289.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the train?", "answer": "no number", "gt_answers": [ "171" ], "image_path": "./data/STVQA/VisualGenome/1/2354926.jpg", "model_name": "BLIP2" }, { "question": "What is the large white word on the train?", "answer": "metro", "gt_answers": [ "Metra" ], "image_path": "./data/STVQA/VisualGenome/1/2354926.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE NUMBER WRITTEN", "answer": "two", "gt_answers": [ "867" ], "image_path": "./data/STVQA/imageNet/n02981792_3782.JPEG", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTEN ON THE BOAT", "answer": "the name of the boat", "gt_answers": [ "TYBEE 500" ], "image_path": "./data/STVQA/imageNet/n02981792_3782.JPEG", "model_name": "BLIP2" }, { "question": "Who does the copyright belongs to?", "answer": "the photographer", "gt_answers": [ "Adrian Patino" ], "image_path": "./data/STVQA/VisualGenome/1/2359776.jpg", "model_name": "BLIP2" }, { "question": "what type of game is this", "answer": "crossword", "gt_answers": [ "crossword puzzle" ], "image_path": "./data/STVQA/imageNet/n06785654_7140.JPEG", "model_name": "BLIP2" }, { "question": "what is the answer to 13 down", "answer": "pop triple play", "gt_answers": [ "tee" ], "image_path": "./data/STVQA/imageNet/n06785654_7140.JPEG", "model_name": "BLIP2" }, { "question": "what color pen was used to pill in boxes", "answer": "black pen", "gt_answers": [ "Blue" ], "image_path": "./data/STVQA/imageNet/n06785654_7140.JPEG", "model_name": "BLIP2" }, { "question": "What year was the photo taken?", "answer": "2008", "gt_answers": [ "2007" ], "image_path": "./data/STVQA/imageNet/n02114855_6829.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the photographer?", "answer": "jim bova", "gt_answers": [ "Al Bolivar" ], "image_path": "./data/STVQA/imageNet/n02114855_6829.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the trailer of the truck?", "answer": "detroit steel", "gt_answers": [ "CIRCUIT RIDER" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000297023.jpg", "model_name": "BLIP2" }, { "question": "What number is on the trailer?", "answer": "nbss", "gt_answers": [ "110" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000297023.jpg", "model_name": "BLIP2" }, { "question": "What word is on the powder?", "answer": "pressed", "gt_answers": [ "Stockphoto" ], "image_path": "./data/STVQA/imageNet/n03314780_6164.JPEG", "model_name": "BLIP2" }, { "question": "What number is the batter?", "answer": "person", "gt_answers": [ "14" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000292257.jpg", "model_name": "BLIP2" }, { "question": "what name engraved on the metal?", "answer": "egc", "gt_answers": [ "EGG" ], "image_path": "./data/STVQA/imageNet/n04141327_4464.JPEG", "model_name": "BLIP2" }, { "question": "What brand is listed above the alligator logo?", "answer": "lacoste", "gt_answers": [ "Lacoste" ], "image_path": "./data/STVQA/VisualGenome/1/2329092.jpg", "model_name": "BLIP2" }, { "question": "What hotel chain is advertised in brown and yellow?", "answer": "starwood hotels", "gt_answers": [ "Clarion", "Clarion Hotel" ], "image_path": "./data/STVQA/VisualGenome/1/2348136.jpg", "model_name": "BLIP2" }, { "question": "What age do you have to be to buy alcohol and tobacco?", "answer": "18 years old", "gt_answers": [ "18" ], "image_path": "./data/STVQA/icdar/img_98.jpg", "model_name": "BLIP2" }, { "question": "WHAT COLOUR IS THEIR BEEK", "answer": "black", "gt_answers": [ "RED" ], "image_path": "./data/STVQA/imageNet/n01860187_2912.JPEG", "model_name": "BLIP2" }, { "question": "What is written in the sand", "answer": "accelerate", "gt_answers": [ "Accenture" ], "image_path": "./data/STVQA/IIIT_text/img_000071.jpg", "model_name": "BLIP2" }, { "question": "Where is a good place to get a gift?", "answer": "the candy store", "gt_answers": [ "the candy store" ], "image_path": "./data/STVQA/imageNet/n03089624_21313.JPEG", "model_name": "BLIP2" }, { "question": "what website can you visit to get more information about this photo?", "answer": "wikipedia", "gt_answers": [ "www.padelnews.com" ], "image_path": "./data/STVQA/imageNet/n04039381_30881.JPEG", "model_name": "BLIP2" }, { "question": "what is being advertised on the bid white board?", "answer": "a mobile phone", "gt_answers": [ "best-kept secret to looking good" ], "image_path": "./data/STVQA/icdar/img_159.jpg", "model_name": "BLIP2" }, { "question": "what is the cost of 3 premium face, body or spa treatments?", "answer": "sgd 100", "gt_answers": [ "$128" ], "image_path": "./data/STVQA/icdar/img_159.jpg", "model_name": "BLIP2" }, { "question": "what is the phone number for Mary Chia?", "answer": "the phone number for mary chia is", "gt_answers": [ "6659 1161" ], "image_path": "./data/STVQA/icdar/img_159.jpg", "model_name": "BLIP2" }, { "question": "What does the license plate say", "answer": "person", "gt_answers": [ "L3 UMO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000355081.jpg", "model_name": "BLIP2" }, { "question": "what are the colors", "answer": "yellow and green", "gt_answers": [ "yellow and green", "green and yellow" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000080745.jpg", "model_name": "BLIP2" }, { "question": "Who owns the copyright to the image?", "answer": "person", "gt_answers": [ "Gerry Walden" ], "image_path": "./data/STVQA/IIIT_text/2198.jpg", "model_name": "BLIP2" }, { "question": "What phrase has been painted onto the wall?", "answer": "take it to the hoop", "gt_answers": [ "Take it to the hoop" ], "image_path": "./data/STVQA/IIIT_text/2198.jpg", "model_name": "BLIP2" }, { "question": "Where is this tournament being played?", "answer": "a tennis court", "gt_answers": [ "Sydney" ], "image_path": "./data/STVQA/VisualGenome/1/2318701.jpg", "model_name": "BLIP2" }, { "question": "What can you buy from the shop on the corner?", "answer": "cigarettes", "gt_answers": [ "Fruit, burgers, popcorn and sandwiches" ], "image_path": "./data/STVQA/VisualGenome/1/2321296.jpg", "model_name": "BLIP2" }, { "question": "What airline owns the airplane?", "answer": "american airlines", "gt_answers": [ "American" ], "image_path": "./data/STVQA/IIIT_text/img_000108.jpg", "model_name": "BLIP2" }, { "question": "What company files this airplane?", "answer": "american airlines", "gt_answers": [ "American" ], "image_path": "./data/STVQA/IIIT_text/img_000108.jpg", "model_name": "BLIP2" }, { "question": "When is it not okay to exit here?", "answer": "between night and day", "gt_answers": [ "BETWEEN MIDNIGHT AND 7 A.M." ], "image_path": "./data/STVQA/VisualGenome/1/2318206.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the restaurant?", "answer": "the person", "gt_answers": [ "Portillo's" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000537424.jpg", "model_name": "BLIP2" }, { "question": "Who has the copyright on the photo?", "answer": "pet insects", "gt_answers": [ "pet_insects" ], "image_path": "./data/STVQA/imageNet/n02174001_5497.JPEG", "model_name": "BLIP2" }, { "question": "What store is on the left?", "answer": "ub bank", "gt_answers": [ "UOB" ], "image_path": "./data/STVQA/icdar/test_img_208.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name of this blender?", "answer": "person", "gt_answers": [ "oster" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000044704.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say is in motion?", "answer": "person", "gt_answers": [ "Art" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000350086.jpg", "model_name": "BLIP2" }, { "question": "How many special bikes?", "answer": "two", "gt_answers": [ "40" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000350086.jpg", "model_name": "BLIP2" }, { "question": "What is the title across the top of the image?", "answer": "the strange countess", "gt_answers": [ "The Strange Countess" ], "image_path": "./data/STVQA/imageNet/n07248320_23501.JPEG", "model_name": "BLIP2" }, { "question": "What is the name in red text?", "answer": "edgar wallace", "gt_answers": [ "Edgar Wallace" ], "image_path": "./data/STVQA/imageNet/n07248320_23501.JPEG", "model_name": "BLIP2" }, { "question": "Which photography firm has taken this photo?", "answer": "krista", "gt_answers": [ "KRISTA" ], "image_path": "./data/STVQA/VisualGenome/1/2365031.jpg", "model_name": "BLIP2" }, { "question": "Where is this theater?", "answer": "chicago", "gt_answers": [ "Effingham County" ], "image_path": "./data/STVQA/imageNet/n03032252_51633.JPEG", "model_name": "BLIP2" }, { "question": "What movie starts friday?", "answer": "lion king", "gt_answers": [ "Napoleon Dynamite" ], "image_path": "./data/STVQA/imageNet/n03032252_51633.JPEG", "model_name": "BLIP2" }, { "question": "What US state is printed on the green street sign?", "answer": "illinois", "gt_answers": [ "rhode ISLAND" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000052256.jpg", "model_name": "BLIP2" }, { "question": "What word is visible above the red vehicle?", "answer": "fire", "gt_answers": [ "Hydrogen" ], "image_path": "./data/STVQA/imageNet/n03345487_4091.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the building?", "answer": "mitsubishi", "gt_answers": [ "ritim" ], "image_path": "./data/STVQA/VisualGenome/2/2402889.jpg", "model_name": "BLIP2" }, { "question": "What is the function of the red button?", "answer": "it is a power button", "gt_answers": [ "POWER" ], "image_path": "./data/STVQA/imageNet/n04074963_20276.JPEG", "model_name": "BLIP2" }, { "question": "What does it say on the wooden bench?", "answer": "st. johns river", "gt_answers": [ "Holbeck Essen" ], "image_path": "./data/STVQA/VisualGenome/1/107930.jpg", "model_name": "BLIP2" }, { "question": "What is the orange vegetable on the bag?", "answer": "carrots", "gt_answers": [ "Carrot" ], "image_path": "./data/STVQA/VisualGenome/1/2322857.jpg", "model_name": "BLIP2" }, { "question": "Where is the coffee mug from?", "answer": "a gift from a friend", "gt_answers": [ "Cafe Du Monde" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000337422.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the airline", "answer": "delta airlines", "gt_answers": [ "Delta" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000053111.jpg", "model_name": "BLIP2" }, { "question": "What is orange train's number on the front?", "answer": "0", "gt_answers": [ "S316" ], "image_path": "./data/STVQA/VisualGenome/1/2350124.jpg", "model_name": "BLIP2" }, { "question": "Which country is represented by the airline symbol that is so visible?", "answer": "singapore", "gt_answers": [ "SINGAPORE" ], "image_path": "./data/STVQA/icdar/img_836.jpg", "model_name": "BLIP2" }, { "question": "How many laps are left to go?", "answer": "two", "gt_answers": [ "1" ], "image_path": "./data/STVQA/VisualGenome/1/2355855.jpg", "model_name": "BLIP2" }, { "question": "What team is shown on the catcher's sleeve?", "answer": "person", "gt_answers": [ "LA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000375838.jpg", "model_name": "BLIP2" }, { "question": "What company made the catcher's wrist brace?", "answer": "person", "gt_answers": [ "TPX" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000375838.jpg", "model_name": "BLIP2" }, { "question": "What percentage off is the clearance?", "answer": "50%", "gt_answers": [ "30%", "30" ], "image_path": "./data/STVQA/imageNet/n03877472_12747.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the train?", "answer": "amtrak", "gt_answers": [ "67027 EWS" ], "image_path": "./data/STVQA/VisualGenome/1/2324097.jpg", "model_name": "BLIP2" }, { "question": "What brand is the camera", "answer": "nokia", "gt_answers": [ "nokia" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000337779.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the street sign?", "answer": "130", "gt_answers": [ "1300" ], "image_path": "./data/STVQA/VisualGenome/2/2404834.jpg", "model_name": "BLIP2" }, { "question": "What is the name in the website tag?", "answer": "person", "gt_answers": [ "PHOTOSTOCKSOURCE.COM" ], "image_path": "./data/STVQA/imageNet/n09835506_4173.JPEG", "model_name": "BLIP2" }, { "question": "what time can you see on the cell phone ad?", "answer": "the time is 12 noon", "gt_answers": [ "1:10" ], "image_path": "./data/STVQA/icdar/img_295.jpg", "model_name": "BLIP2" }, { "question": "What does the ad claim about the phone?", "answer": "it's the fastest android phone ever", "gt_answers": [ "the worlds thinnest smartphone" ], "image_path": "./data/STVQA/icdar/img_295.jpg", "model_name": "BLIP2" }, { "question": "what is the brand for the ad?", "answer": "lg", "gt_answers": [ "oppo" ], "image_path": "./data/STVQA/icdar/img_295.jpg", "model_name": "BLIP2" }, { "question": "What does the text in the thought bubble on the wall say?", "answer": "hello", "gt_answers": [ "HOLD ME!", "HOLD ME" ], "image_path": "./data/STVQA/imageNet/n03891332_3676.JPEG", "model_name": "BLIP2" }, { "question": "What is the train number?", "answer": "person", "gt_answers": [ "42" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000226254.jpg", "model_name": "BLIP2" }, { "question": "What are the numbers/letters on the aircraft?", "answer": "f-16", "gt_answers": [ "J-624" ], "image_path": "./data/STVQA/VisualGenome/1/2324458.jpg", "model_name": "BLIP2" }, { "question": "Which side of the cassette is pictured?", "answer": "front", "gt_answers": [ "Side A", "A" ], "image_path": "./data/STVQA/imageNet/n02978881_8693.JPEG", "model_name": "BLIP2" }, { "question": "What does the logo on the tail of the plane say?", "answer": "t mobile", "gt_answers": [ "T" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000278565.jpg", "model_name": "BLIP2" }, { "question": "What is written on the engine of the plane?", "answer": "person", "gt_answers": [ "T" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000278565.jpg", "model_name": "BLIP2" }, { "question": "What company does the airplane belong to?", "answer": "person", "gt_answers": [ "TRANSAVIA.COM", "TRANSAVIA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000278565.jpg", "model_name": "BLIP2" }, { "question": "During what years was the artist of this painting alive?", "answer": "person was born in 1887 and died in 1966", "gt_answers": [ "1868-1940.", "1868-1940" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000320503.jpg", "model_name": "BLIP2" }, { "question": "What words are on the sign?", "answer": "hollywood", "gt_answers": [ "Hollywood" ], "image_path": "./data/STVQA/IIIT_text/img_000560.jpg", "model_name": "BLIP2" }, { "question": "Who is the person who the man in the centerwants to get voted into the big brother house?", "answer": "sarah", "gt_answers": [ "George" ], "image_path": "./data/STVQA/VisualGenome/2/638.jpg", "model_name": "BLIP2" }, { "question": "What is written in white on the red sign on the window?", "answer": "sale", "gt_answers": [ "FURTHER DISCOUNTS" ], "image_path": "./data/STVQA/icdar/test_img_318.jpg", "model_name": "BLIP2" }, { "question": "What word is on the yellow sign?", "answer": "bump", "gt_answers": [ "Bump" ], "image_path": "./data/STVQA/VisualGenome/1/2362202.jpg", "model_name": "BLIP2" }, { "question": "What is his name?", "answer": "john", "gt_answers": [ "Jose" ], "image_path": "./data/STVQA/VisualGenome/1/107977.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE ANIMAL?", "answer": "cat", "gt_answers": [ "CAT" ], "image_path": "./data/STVQA/VisualGenome/1/2338263.jpg", "model_name": "BLIP2" }, { "question": "Where is this bus going?", "answer": "to the next stop", "gt_answers": [ "Mt Airy" ], "image_path": "./data/STVQA/VisualGenome/2/2402586.jpg", "model_name": "BLIP2" }, { "question": "What is the name of one of the sponsors?", "answer": "nike", "gt_answers": [ "Apollinaris" ], "image_path": "./data/STVQA/VisualGenome/1/1592467.jpg", "model_name": "BLIP2" }, { "question": "What is printed inside the blue area?", "answer": "intel logo", "gt_answers": [ "Intel Inside" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000296.jpg", "model_name": "BLIP2" }, { "question": "What does the label say this is designed for?", "answer": "microsoft windows 98", "gt_answers": [ "Microsoft Windows XP" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000296.jpg", "model_name": "BLIP2" }, { "question": "What does the yellow card say to do?", "answer": "place your cell phone on the table", "gt_answers": [ "Hang up" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000540716.jpg", "model_name": "BLIP2" }, { "question": "What is the name of this street?", "answer": "stockton street", "gt_answers": [ "Stockton St" ], "image_path": "./data/STVQA/VisualGenome/2/2401670.jpg", "model_name": "BLIP2" }, { "question": "What is the website listed on the photo?", "answer": "dunkinkuntan", "gt_answers": [ "duncaninkuantan.blogspot.com" ], "image_path": "./data/STVQA/imageNet/n03467068_28225.JPEG", "model_name": "BLIP2" }, { "question": "What is the serial number of this boat?", "answer": "no serial number", "gt_answers": [ "ME14XSR" ], "image_path": "./data/STVQA/VisualGenome/1/2316495.jpg", "model_name": "BLIP2" }, { "question": "What does the back of the bus say?", "answer": "i love london", "gt_answers": [ "Pull-ups" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000364006.jpg", "model_name": "BLIP2" }, { "question": "What is the word in the picture that starts with F", "answer": "front", "gt_answers": [ "FAG" ], "image_path": "./data/STVQA/VisualGenome/2/4109.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "nr", "gt_answers": [ "103" ], "image_path": "./data/STVQA/VisualGenome/1/2349646.jpg", "model_name": "BLIP2" }, { "question": "Where is the right sign pointing to?", "answer": "saklap dede", "gt_answers": [ "Galip Dede" ], "image_path": "./data/STVQA/VisualGenome/1/2363918.jpg", "model_name": "BLIP2" }, { "question": "Where is the left sign pointing to?", "answer": "saklap dede", "gt_answers": [ "Sahkapisi" ], "image_path": "./data/STVQA/VisualGenome/1/2363918.jpg", "model_name": "BLIP2" }, { "question": "What phrase is on the side of the toolkit?", "answer": "congregational tool kit", "gt_answers": [ "Congregational Tool Kit Resources For Your Church" ], "image_path": "./data/STVQA/imageNet/n02966687_10029.JPEG", "model_name": "BLIP2" }, { "question": "What is the brand of the drink?", "answer": "coke", "gt_answers": [ "Cocio", "COCIO CLASSIC" ], "image_path": "./data/STVQA/VisualGenome/1/2367939.jpg", "model_name": "BLIP2" }, { "question": "What is the first word in yellow text?", "answer": "william", "gt_answers": [ "William" ], "image_path": "./data/STVQA/VisualGenome/1/2319930.jpg", "model_name": "BLIP2" }, { "question": "What is the second word in yellow text?", "answer": "ju", "gt_answers": [ "Jou" ], "image_path": "./data/STVQA/VisualGenome/1/2319930.jpg", "model_name": "BLIP2" }, { "question": "What store has the vertical banner?", "answer": "walmart", "gt_answers": [ "Old Navy" ], "image_path": "./data/STVQA/VisualGenome/1/2367092.jpg", "model_name": "BLIP2" }, { "question": "Which store is located in the foreground?", "answer": "department store", "gt_answers": [ "GAP" ], "image_path": "./data/STVQA/VisualGenome/1/2367092.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company printed on the plane?", "answer": "china airlines", "gt_answers": [ "CHINA AIRLINES" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000017675.jpg", "model_name": "BLIP2" }, { "question": "What are the red letters on the back of the white truck?", "answer": "miami beach", "gt_answers": [ "CFI" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000489967.jpg", "model_name": "BLIP2" }, { "question": "What year was this for?", "answer": "2010", "gt_answers": [ "2010" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008451.jpg", "model_name": "BLIP2" }, { "question": "What is written on the road signs?", "answer": "one way", "gt_answers": [ "One Way" ], "image_path": "./data/STVQA/VisualGenome/2/2404511.jpg", "model_name": "BLIP2" }, { "question": "What is the website address below the blue bucket?", "answer": "www", "gt_answers": [ "alibaba.com.cn" ], "image_path": "./data/STVQA/imageNet/n02909870_14840.JPEG", "model_name": "BLIP2" }, { "question": "What name is written on the stone?", "answer": "john", "gt_answers": [ "LORNA" ], "image_path": "./data/STVQA/VisualGenome/1/2321414.jpg", "model_name": "BLIP2" }, { "question": "What color is the ruler?", "answer": "blue", "gt_answers": [ "Light blue.", "{}" ], "image_path": "./data/STVQA/imageNet/n04118776_46440.JPEG", "model_name": "BLIP2" }, { "question": "What kind of bus is this?", "answer": "school bus", "gt_answers": [ "School bus" ], "image_path": "./data/STVQA/imageNet/n04146614_11104.JPEG", "model_name": "BLIP2" }, { "question": "what are the two words on the circle with mickey mouse>", "answer": "happy birthday", "gt_answers": [ "Happy Birthday!", "HAPPY BIRTHDAY" ], "image_path": "./data/STVQA/VisualGenome/1/2342601.jpg", "model_name": "BLIP2" }, { "question": "what is the first word on the white circle on top of the cake?", "answer": "happy", "gt_answers": [ "Happy" ], "image_path": "./data/STVQA/VisualGenome/1/2342601.jpg", "model_name": "BLIP2" }, { "question": "What number is on the baseball painted on the wall?", "answer": "23", "gt_answers": [ "42" ], "image_path": "./data/STVQA/VisualGenome/1/2337226.jpg", "model_name": "BLIP2" }, { "question": "What color is the bus?", "answer": "yellow", "gt_answers": [ "yellow" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000125213.jpg", "model_name": "BLIP2" }, { "question": "What does the licence plate say?", "answer": "person", "gt_answers": [ "sn55 dvl" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000125213.jpg", "model_name": "BLIP2" }, { "question": "What number is on the front of the bus?", "answer": "person", "gt_answers": [ "212" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000125213.jpg", "model_name": "BLIP2" }, { "question": "What type of vehicle is going through the arch?", "answer": "bus", "gt_answers": [ "Bus" ], "image_path": "./data/STVQA/imageNet/n04486054_1409.JPEG", "model_name": "BLIP2" }, { "question": "what are the letters in the top row on the sign in the upper right portion of this photo?", "answer": "mt", "gt_answers": [ "aud" ], "image_path": "./data/STVQA/VisualGenome/1/2377129.jpg", "model_name": "BLIP2" }, { "question": "How many children are getting off of the bus?", "answer": "three", "gt_answers": [ "3" ], "image_path": "./data/STVQA/IIIT_text/img_001091.jpg", "model_name": "BLIP2" }, { "question": "What is the four digit number of this bus?", "answer": "999", "gt_answers": [ "7429" ], "image_path": "./data/STVQA/IIIT_text/img_001091.jpg", "model_name": "BLIP2" }, { "question": "What is the number for the player that is named Casilla?", "answer": "2", "gt_answers": [ "46" ], "image_path": "./data/STVQA/VisualGenome/1/2330698.jpg", "model_name": "BLIP2" }, { "question": "What is the jet?", "answer": "fighter jet", "gt_answers": [ "J-062" ], "image_path": "./data/STVQA/VisualGenome/2/2412661.jpg", "model_name": "BLIP2" }, { "question": "What color stripes go down this train?", "answer": "orange", "gt_answers": [ "Orange", "{}" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000238051.jpg", "model_name": "BLIP2" }, { "question": "What company makes the food shown?", "answer": "general foods", "gt_answers": [ "General Foods" ], "image_path": "./data/STVQA/VisualGenome/1/2368500.jpg", "model_name": "BLIP2" }, { "question": "What kind of foods are shown?", "answer": "gourmet foods", "gt_answers": [ "Gourmet Foods" ], "image_path": "./data/STVQA/VisualGenome/1/2368500.jpg", "model_name": "BLIP2" }, { "question": "What word is above the green button on the meter?", "answer": "parking", "gt_answers": [ "OK" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000262851.jpg", "model_name": "BLIP2" }, { "question": "What is the speed limit?", "answer": "60 mph", "gt_answers": [ "25" ], "image_path": "./data/STVQA/VisualGenome/1/2353022.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the phone next to he purple one?", "answer": "person", "gt_answers": [ "NOKIA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000185200.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate of the blue car?", "answer": "person", "gt_answers": [ "YSK 365", "YSK365" ], "image_path": "./data/STVQA/IIIT_text/4418.jpg", "model_name": "BLIP2" }, { "question": "What is one of the sponsors on the blue wall?", "answer": "nike", "gt_answers": [ "Olympus" ], "image_path": "./data/STVQA/VisualGenome/2/2414940.jpg", "model_name": "BLIP2" }, { "question": "What is the number in the square?", "answer": "2", "gt_answers": [ "2" ], "image_path": "./data/STVQA/imageNet/n06794110_5477.JPEG", "model_name": "BLIP2" }, { "question": "What time is shown on the clock?", "answer": "it is 2:00 am", "gt_answers": [ "4:22" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000005975.jpg", "model_name": "BLIP2" }, { "question": "Which sections contain letters?", "answer": "letter b", "gt_answers": [ "ISEST" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000316571.jpg", "model_name": "BLIP2" }, { "question": "Which sections contain letters?", "answer": "letter b", "gt_answers": [ "SIGNS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000316571.jpg", "model_name": "BLIP2" }, { "question": "What does the license plate on the train say?", "answer": "i am a clown", "gt_answers": [ "POPOFF" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000500135.jpg", "model_name": "BLIP2" }, { "question": "What District is the School?", "answer": "the school is in the st johnsbury school district", "gt_answers": [ "District #2", "2" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000007509.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the road?", "answer": "mill road", "gt_answers": [ "MILL ROAD" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000252122.jpg", "model_name": "BLIP2" }, { "question": "What is the sponsor logo in the background?", "answer": "nike", "gt_answers": [ "KIA" ], "image_path": "./data/STVQA/VisualGenome/1/2317319.jpg", "model_name": "BLIP2" }, { "question": "What kind of wine is the front bottle", "answer": "red wine", "gt_answers": [ "Port" ], "image_path": "./data/STVQA/imageNet/n04591713_2857.JPEG", "model_name": "BLIP2" }, { "question": "What are the two streets?", "answer": "lakeridge and elm", "gt_answers": [ "Forder & Main St" ], "image_path": "./data/STVQA/VisualGenome/1/2361995.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "shell gas station", "gt_answers": [ "Czech stop" ], "image_path": "./data/STVQA/VisualGenome/1/2326901.jpg", "model_name": "BLIP2" }, { "question": "What year was this photo taken?", "answer": "2000", "gt_answers": [ "2001" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000341369.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number that is next to the wrecked car?", "answer": "a city bus", "gt_answers": [ "174" ], "image_path": "./data/STVQA/VisualGenome/1/2323755.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the door?", "answer": "3", "gt_answers": [ "48184" ], "image_path": "./data/STVQA/VisualGenome/2/2410945.jpg", "model_name": "BLIP2" }, { "question": "What is written on the top green sign?", "answer": "greenbush rd", "gt_answers": [ "Greenbush Rd" ], "image_path": "./data/STVQA/VisualGenome/1/2328549.jpg", "model_name": "BLIP2" }, { "question": "What type of traffic lane is in this photo?", "answer": "bus only", "gt_answers": [ "BUS ONLY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000346384.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "stop", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/VisualGenome/2/2408545.jpg", "model_name": "BLIP2" }, { "question": "What does the type say at the bottom of the train?", "answer": "amtrak", "gt_answers": [ "First" ], "image_path": "./data/STVQA/VisualGenome/1/713906.jpg", "model_name": "BLIP2" }, { "question": "What is under XO?", "answer": "caus cognac", "gt_answers": [ "Superior" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000005531.jpg", "model_name": "BLIP2" }, { "question": "What number train is this?", "answer": "st louis", "gt_answers": [ "2857" ], "image_path": "./data/STVQA/imageNet/n04310018_8914.JPEG", "model_name": "BLIP2" }, { "question": "What name is on the sign by the boy's knee?", "answer": "bnp paris", "gt_answers": [ "Mary Cohr" ], "image_path": "./data/STVQA/VisualGenome/2/2403134.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the Cafe?", "answer": "the cafe", "gt_answers": [ "Bagdad" ], "image_path": "./data/STVQA/VisualGenome/1/2363043.jpg", "model_name": "BLIP2" }, { "question": "What is the last letter on the yellow sign?", "answer": "e", "gt_answers": [ "d" ], "image_path": "./data/STVQA/VisualGenome/1/2363043.jpg", "model_name": "BLIP2" }, { "question": "What follows Bagdad on the store front?", "answer": "iraq", "gt_answers": [ "Cafe" ], "image_path": "./data/STVQA/VisualGenome/1/2363043.jpg", "model_name": "BLIP2" }, { "question": "what is the number of the red and gold train?", "answer": "458", "gt_answers": [ "4268" ], "image_path": "./data/STVQA/VisualGenome/2/2403900.jpg", "model_name": "BLIP2" }, { "question": "Which one of the 50 US states is in the name of of the train?", "answer": "new york", "gt_answers": [ "Maine" ], "image_path": "./data/STVQA/VisualGenome/2/2403900.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the bus?", "answer": "e", "gt_answers": [ "66" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000275206.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number of the bus that is behind the bus that has a sign that says \"TRY ME\"?", "answer": "x1", "gt_answers": [ "88" ], "image_path": "./data/STVQA/VisualGenome/1/2330088.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the boat?", "answer": "schooner", "gt_answers": [ "HMS SCRAPEO" ], "image_path": "./data/STVQA/VisualGenome/1/2376162.jpg", "model_name": "BLIP2" }, { "question": "What is written on the boat?", "answer": "schooner", "gt_answers": [ "HMS SCRAPEO" ], "image_path": "./data/STVQA/VisualGenome/1/2376162.jpg", "model_name": "BLIP2" }, { "question": "What is the name in yellow on the side of the train?", "answer": "eurostar", "gt_answers": [ "Eurostar" ], "image_path": "./data/STVQA/VisualGenome/1/2369130.jpg", "model_name": "BLIP2" }, { "question": "What airline flys this plane?", "answer": "southwest airlines", "gt_answers": [ "Delta" ], "image_path": "./data/STVQA/VisualGenome/1/2377142.jpg", "model_name": "BLIP2" }, { "question": "What meal is inside of the red box?", "answer": "shrimp marsala", "gt_answers": [ "SHRIMP MARINARA" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000005590.jpg", "model_name": "BLIP2" }, { "question": "What four lettters are visible on the train?", "answer": "london st pancras international", "gt_answers": [ "SOUT" ], "image_path": "./data/STVQA/VisualGenome/1/2347936.jpg", "model_name": "BLIP2" }, { "question": "What does the white sign say?", "answer": "tokyo station", "gt_answers": [ "Tokyo Station" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000098121.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS NAME OF THE BUS", "answer": "explore history", "gt_answers": [ "EXPLORE MEDWAY" ], "image_path": "./data/STVQA/VisualGenome/1/2341028.jpg", "model_name": "BLIP2" }, { "question": "WHAT TYPE OF BUS", "answer": "double decker", "gt_answers": [ "GUIDED TOUR BUS" ], "image_path": "./data/STVQA/VisualGenome/1/2341028.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS WEBSITE ADDRESS", "answer": "com", "gt_answers": [ "WWW.VISITMEDWAY.ORG" ], "image_path": "./data/STVQA/VisualGenome/1/2341028.jpg", "model_name": "BLIP2" }, { "question": "What does the left banner say?", "answer": "skateboarding", "gt_answers": [ "Gravity Skateboards" ], "image_path": "./data/STVQA/VisualGenome/1/2326012.jpg", "model_name": "BLIP2" }, { "question": "What does the right banner say?", "answer": "skateboarding", "gt_answers": [ "Board Shop Skateboarding", "BOARD SHOP COMAU" ], "image_path": "./data/STVQA/VisualGenome/1/2326012.jpg", "model_name": "BLIP2" }, { "question": "what is the name of the theater?", "answer": "person", "gt_answers": [ "ORPHEUM" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000307554.jpg", "model_name": "BLIP2" }, { "question": "What does the front of the marquee say (the side that is facing the street)?", "answer": "person", "gt_answers": [ "THE 10TH WISCONSIN FILM FESTIVAL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000307554.jpg", "model_name": "BLIP2" }, { "question": "What year is the Wisconsin Film Festival currently celebrating?", "answer": "50th anniversary", "gt_answers": [ "10TH" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000307554.jpg", "model_name": "BLIP2" }, { "question": "What is printed three times on the card above the flowers?", "answer": "i love you", "gt_answers": [ "Good luck" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000070387.jpg", "model_name": "BLIP2" }, { "question": "What shape is suggested by the name of this company?", "answer": "star", "gt_answers": [ "star" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000409614.jpg", "model_name": "BLIP2" }, { "question": "What is sold by The Star?", "answer": "beer and wine", "gt_answers": [ "GEORGE'S BEERS", "GEORGE'S BEERS & STOUT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000409614.jpg", "model_name": "BLIP2" }, { "question": "Other than beers, what is sold by The Star?", "answer": "scotch whisky", "gt_answers": [ "stout" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000409614.jpg", "model_name": "BLIP2" }, { "question": "What is the date on this photo?", "answer": "the date is unknown", "gt_answers": [ "2004/3/6", "2004" ], "image_path": "./data/STVQA/imageNet/n04347754_5279.JPEG", "model_name": "BLIP2" }, { "question": "What is the speed limit in this area?", "answer": "20 mph", "gt_answers": [ "30" ], "image_path": "./data/STVQA/VisualGenome/1/1159593.jpg", "model_name": "BLIP2" }, { "question": "What is the last word on the bottom of the red snowboard?", "answer": "person", "gt_answers": [ "HUGGING" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000232420.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the motorcycle?", "answer": "person", "gt_answers": [ "HARLEY-DAVIDSON" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000188575.jpg", "model_name": "BLIP2" }, { "question": "What is the word on the green wall?", "answer": "krees", "gt_answers": [ "Kress" ], "image_path": "./data/STVQA/VisualGenome/2/3372.jpg", "model_name": "BLIP2" }, { "question": "What is the name written on the white sign", "answer": "krees", "gt_answers": [ "Patricks" ], "image_path": "./data/STVQA/VisualGenome/2/3372.jpg", "model_name": "BLIP2" }, { "question": "What name printed in the man;s T-shirt?", "answer": "apple", "gt_answers": [ "Yuh!" ], "image_path": "./data/STVQA/VisualGenome/1/2355949.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name of the cat food?", "answer": "person", "gt_answers": [ "ja!" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000462589.jpg", "model_name": "BLIP2" }, { "question": "What are the numbers on the license plate?", "answer": "person, person, person", "gt_answers": [ "57247" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000427449.jpg", "model_name": "BLIP2" }, { "question": "What are the letters on the license plate?", "answer": "person", "gt_answers": [ "mg" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000427449.jpg", "model_name": "BLIP2" }, { "question": "What year is listed on the IRT sign?", "answer": "2015", "gt_answers": [ "1972" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000577975.jpg", "model_name": "BLIP2" }, { "question": "What year is listed in the copyright?", "answer": "2015", "gt_answers": [ "2013" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000577975.jpg", "model_name": "BLIP2" }, { "question": "What number is visible on the saddle?", "answer": "person", "gt_answers": [ "7276" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000577975.jpg", "model_name": "BLIP2" }, { "question": "How much storage is on the CD-R?", "answer": "128mb", "gt_answers": [ "700MB" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000159127.jpg", "model_name": "BLIP2" }, { "question": "What is hand written on the sign on top of STOP?", "answer": "don't be afraid", "gt_answers": [ "DONT" ], "image_path": "./data/STVQA/VisualGenome/1/2366485.jpg", "model_name": "BLIP2" }, { "question": "Where is this picture taken and located?", "answer": "person's workshop", "gt_answers": [ "LAGUNA BEACH" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000487927.jpg", "model_name": "BLIP2" }, { "question": "what is the number of yellow bus", "answer": "0", "gt_answers": [ "36" ], "image_path": "./data/STVQA/imageNet/n04146614_10064.JPEG", "model_name": "BLIP2" }, { "question": "What text is found on the side of the bus?", "answer": "ecocar", "gt_answers": [ "ESCOLAR" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000216142.jpg", "model_name": "BLIP2" }, { "question": "what number is on this box?", "answer": "28", "gt_answers": [ "88" ], "image_path": "./data/STVQA/imageNet/n04372370_27244.JPEG", "model_name": "BLIP2" }, { "question": "what letter is on this box?", "answer": "b", "gt_answers": [ "S" ], "image_path": "./data/STVQA/imageNet/n04372370_27244.JPEG", "model_name": "BLIP2" }, { "question": "What does the bottom of the snowboard say?", "answer": "k2", "gt_answers": [ "Step Child" ], "image_path": "./data/STVQA/VisualGenome/1/2374185.jpg", "model_name": "BLIP2" }, { "question": "What word is on the red sign?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2368863.jpg", "model_name": "BLIP2" }, { "question": "What is written on the sign in this picture?", "answer": "ushai", "gt_answers": [ "Ushuai" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000513980.jpg", "model_name": "BLIP2" }, { "question": "What two things are written on the red sign?", "answer": "stop and yield", "gt_answers": [ "Stop Seten" ], "image_path": "./data/STVQA/VisualGenome/1/2373787.jpg", "model_name": "BLIP2" }, { "question": "What number is on the train?", "answer": "u s train number", "gt_answers": [ "6068" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000337875.jpg", "model_name": "BLIP2" }, { "question": "What emblem is shown in the top left corner?", "answer": "person", "gt_answers": [ "My prof e" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000574885.jpg", "model_name": "BLIP2" }, { "question": "What is the number found on the green fence?", "answer": "4", "gt_answers": [ "330" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000413713.jpg", "model_name": "BLIP2" }, { "question": "What brand is advertised on the green fence?", "answer": "person", "gt_answers": [ "Adidas" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000413713.jpg", "model_name": "BLIP2" }, { "question": "Who took this picture?", "answer": "daniel", "gt_answers": [ "Paco Lozano" ], "image_path": "./data/STVQA/imageNet/n03788195_2655.JPEG", "model_name": "BLIP2" }, { "question": "What date is written on the fire hydrant?", "answer": "september 11, 2001", "gt_answers": [ "7/9/06" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000133908.jpg", "model_name": "BLIP2" }, { "question": "What is written above the 7/9/06?", "answer": "person", "gt_answers": [ "29716/03" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000133908.jpg", "model_name": "BLIP2" }, { "question": "What brand of camera is the blue box?", "answer": "nikon", "gt_answers": [ "Canon" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000432212.jpg", "model_name": "BLIP2" }, { "question": "Whats the name of the store?", "answer": "titan", "gt_answers": [ "Titan" ], "image_path": "./data/STVQA/IIIT_text/img_001225.jpg", "model_name": "BLIP2" }, { "question": "Whats the brand of watch in the advertisement?", "answer": "titan", "gt_answers": [ "Tycoon" ], "image_path": "./data/STVQA/IIIT_text/img_001225.jpg", "model_name": "BLIP2" }, { "question": "Is the security guard standing or sitting?", "answer": "standing", "gt_answers": [ "Sitting" ], "image_path": "./data/STVQA/IIIT_text/img_001225.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name on the bottom right side of the yellow cup?", "answer": "person", "gt_answers": [ "CocaCola", "COCA COLA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000499204.jpg", "model_name": "BLIP2" }, { "question": "What number is printed on the top of the cup?", "answer": "4", "gt_answers": [ "1916" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000499204.jpg", "model_name": "BLIP2" }, { "question": "What is the active ingredient?", "answer": "benzyl alcohol", "gt_answers": [ "purified water" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000001351.jpg", "model_name": "BLIP2" }, { "question": "What words are displayed on the lower area of the barrel?", "answer": "super stock", "gt_answers": [ "Super Stock" ], "image_path": "./data/STVQA/imageNet/n02795169_6193.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the back of the bus?", "answer": "school bus", "gt_answers": [ "School Bus" ], "image_path": "./data/STVQA/VisualGenome/1/2352668.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number of the car?", "answer": "tah", "gt_answers": [ "177626" ], "image_path": "./data/STVQA/VisualGenome/1/2352668.jpg", "model_name": "BLIP2" }, { "question": "What type of milk is shown?", "answer": "whole milk", "gt_answers": [ "Semi-Skimmed Milk", "SEMI-SKIMMED" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030694.jpg", "model_name": "BLIP2" }, { "question": "what is the capacity of the milk bottle?", "answer": "1 litre", "gt_answers": [ "1 litre" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030694.jpg", "model_name": "BLIP2" }, { "question": "What color does the stoplight show?", "answer": "green", "gt_answers": [ "red" ], "image_path": "./data/STVQA/VisualGenome/1/150267.jpg", "model_name": "BLIP2" }, { "question": "What is on the red sign?", "answer": "a stop sign", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2344672.jpg", "model_name": "BLIP2" }, { "question": "What is the train number", "answer": "nr", "gt_answers": [ "186 284" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000411825.jpg", "model_name": "BLIP2" }, { "question": "Who owns the truck?", "answer": "s & w", "gt_answers": [ "M. Way & Son", "M.WAY & SON" ], "image_path": "./data/STVQA/VisualGenome/1/2377072.jpg", "model_name": "BLIP2" }, { "question": "What number is visible at the top of the fence?", "answer": "4", "gt_answers": [ "One" ], "image_path": "./data/STVQA/VisualGenome/1/2357743.jpg", "model_name": "BLIP2" }, { "question": "What number is the plane?", "answer": "2", "gt_answers": [ "N288SA" ], "image_path": "./data/STVQA/VisualGenome/1/713106.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the hood of the truck?", "answer": "mcdonalds", "gt_answers": [ "Hot Dogs" ], "image_path": "./data/STVQA/VisualGenome/1/2360153.jpg", "model_name": "BLIP2" }, { "question": "What does it say immediately over the truck in blue lettering on the yellow background?", "answer": "mcdonalds", "gt_answers": [ "SABRETT" ], "image_path": "./data/STVQA/VisualGenome/1/2360153.jpg", "model_name": "BLIP2" }, { "question": "What comes after \"The World of\"", "answer": "jughead", "gt_answers": [ "Jughead" ], "image_path": "./data/STVQA/imageNet/n06596364_4220.JPEG", "model_name": "BLIP2" }, { "question": "How much did this cost?", "answer": "0", "gt_answers": [ "25 c" ], "image_path": "./data/STVQA/imageNet/n06596364_4220.JPEG", "model_name": "BLIP2" }, { "question": "What issue was this?", "answer": "the world of jughead", "gt_answers": [ "NO.172", "172" ], "image_path": "./data/STVQA/imageNet/n06596364_4220.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the trailer?", "answer": "studs and best", "gt_answers": [ "Sturgis & Bust", "Sturgis or Bust" ], "image_path": "./data/STVQA/VisualGenome/1/2358077.jpg", "model_name": "BLIP2" }, { "question": "What type of business is this?", "answer": "barber shop", "gt_answers": [ "Barber shop" ], "image_path": "./data/STVQA/imageNet/n02791270_1798.JPEG", "model_name": "BLIP2" }, { "question": "What is the bottom word written on the blue cloud on the wall?", "answer": "person", "gt_answers": [ "BIRTHDAY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000367259.jpg", "model_name": "BLIP2" }, { "question": "What does the white sign say?", "answer": "tunis", "gt_answers": [ "Tunis" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000467126.jpg", "model_name": "BLIP2" }, { "question": "What state is listed in the text?", "answer": "washington", "gt_answers": [ "Colorado" ], "image_path": "./data/STVQA/imageNet/n03887697_2293.JPEG", "model_name": "BLIP2" }, { "question": "What city is listed in the text?", "answer": "seattle", "gt_answers": [ "Dallas" ], "image_path": "./data/STVQA/imageNet/n03887697_2293.JPEG", "model_name": "BLIP2" }, { "question": "What number is listed on the top of the page?", "answer": "2", "gt_answers": [ "498" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000285841.jpg", "model_name": "BLIP2" }, { "question": "What is the airline of the blue plane?", "answer": "klm", "gt_answers": [ "KLM" ], "image_path": "./data/STVQA/VisualGenome/2/2406090.jpg", "model_name": "BLIP2" }, { "question": "u.s.air force namer", "answer": "the u s air force", "gt_answers": [ "53366" ], "image_path": "./data/STVQA/VisualGenome/1/2328486.jpg", "model_name": "BLIP2" }, { "question": "what color is the fence", "answer": "white", "gt_answers": [ "blue" ], "image_path": "./data/STVQA/IIIT_text/img_000763.jpg", "model_name": "BLIP2" }, { "question": "how many people are in the photo", "answer": "three", "gt_answers": [ "12", "13" ], "image_path": "./data/STVQA/IIIT_text/img_000763.jpg", "model_name": "BLIP2" }, { "question": "What is the special buy price?", "answer": "$1.50 per pound", "gt_answers": [ "$1.99" ], "image_path": "./data/STVQA/VisualGenome/2/2413931.jpg", "model_name": "BLIP2" }, { "question": "What number is on the front of the train?", "answer": "1", "gt_answers": [ "67015" ], "image_path": "./data/STVQA/VisualGenome/1/150484.jpg", "model_name": "BLIP2" }, { "question": "What is written in blue?", "answer": "london", "gt_answers": [ "Cinema Casino" ], "image_path": "./data/STVQA/VisualGenome/1/1592683.jpg", "model_name": "BLIP2" }, { "question": "what company is advertised", "answer": "google", "gt_answers": [ "google" ], "image_path": "./data/STVQA/IIIT_text/img_000482.jpg", "model_name": "BLIP2" }, { "question": "What is the maximum height with units?", "answer": "the maximum height with units is 30 m", "gt_answers": [ "3.3M", "3,3m" ], "image_path": "./data/STVQA/icdar/img_192.jpg", "model_name": "BLIP2" }, { "question": "What is the number that shows where the bus is going?", "answer": "nr", "gt_answers": [ "73", "73E" ], "image_path": "./data/STVQA/VisualGenome/1/2327105.jpg", "model_name": "BLIP2" }, { "question": "True or false the bread bag says pain?", "answer": "false", "gt_answers": [ "PAIN" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000221120.jpg", "model_name": "BLIP2" }, { "question": "Does Bon appeu appear in this photo?", "answer": "no", "gt_answers": [ "BON" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000221120.jpg", "model_name": "BLIP2" }, { "question": "What year was this photo taken?", "answer": "2015", "gt_answers": [ "2009" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000172974.jpg", "model_name": "BLIP2" }, { "question": "What is the topic discussed in the cassette?", "answer": "what hifi", "gt_answers": [ "Your complete guide to buying Hi-Fi" ], "image_path": "./data/STVQA/imageNet/n02978881_3761.JPEG", "model_name": "BLIP2" }, { "question": "What side of the cassette is facing upwards?", "answer": "the white side", "gt_answers": [ "Side 1" ], "image_path": "./data/STVQA/imageNet/n02978881_3761.JPEG", "model_name": "BLIP2" }, { "question": "What is the city name for this police car?", "answer": "person", "gt_answers": [ "PORTLAND" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000335339.jpg", "model_name": "BLIP2" }, { "question": "What color is the wonan\u2019s Jacket?", "answer": "red", "gt_answers": [ "Blue" ], "image_path": "./data/STVQA/IIIT_text/4626.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the bus?", "answer": "tour bus", "gt_answers": [ "Gray Line", "GRAYLINE" ], "image_path": "./data/STVQA/VisualGenome/1/2357685.jpg", "model_name": "BLIP2" }, { "question": "What are the letters in the bottom right corner?", "answer": "person", "gt_answers": [ "K9" ], "image_path": "./data/STVQA/imageNet/n02113978_5127.JPEG", "model_name": "BLIP2" }, { "question": "What is the jersey number on the back", "answer": "86", "gt_answers": [ "28" ], "image_path": "./data/STVQA/VisualGenome/1/2346662.jpg", "model_name": "BLIP2" }, { "question": "What is the name of player?", "answer": "michael jackson", "gt_answers": [ "Posey" ], "image_path": "./data/STVQA/VisualGenome/1/2346662.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the white umbrella?", "answer": "nokia", "gt_answers": [ "Visa" ], "image_path": "./data/STVQA/IIIT_text/img_000872.jpg", "model_name": "BLIP2" }, { "question": "What single digit can be seen in the website tag?", "answer": "1", "gt_answers": [ "4" ], "image_path": "./data/STVQA/imageNet/n03814639_379.JPEG", "model_name": "BLIP2" }, { "question": "What store is across the way whose name appears in white?", "answer": "macy's", "gt_answers": [ "SIXTIES" ], "image_path": "./data/STVQA/icdar/img_317.jpg", "model_name": "BLIP2" }, { "question": "How far away is Unter den Linden?", "answer": "from here", "gt_answers": [ "750m" ], "image_path": "./data/STVQA/VisualGenome/1/2377021.jpg", "model_name": "BLIP2" }, { "question": "Which destination is closest?", "answer": "friedrichstrasse", "gt_answers": [ "Deutsches Currywurst Museum" ], "image_path": "./data/STVQA/VisualGenome/1/2377021.jpg", "model_name": "BLIP2" }, { "question": "What does the yellow text say", "answer": "welcome", "gt_answers": [ "Welcome!", "WELCOME" ], "image_path": "./data/STVQA/IIIT_text/img_001277.jpg", "model_name": "BLIP2" }, { "question": "What number is the batter wearing?", "answer": "person", "gt_answers": [ "2" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000476341.jpg", "model_name": "BLIP2" }, { "question": "What are the 3 letters on the batter's uniform?", "answer": "jp", "gt_answers": [ "UNI" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000476341.jpg", "model_name": "BLIP2" }, { "question": "Which of these train names is also a sign of the zodiac?", "answer": "leo", "gt_answers": [ "Leo" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000389065.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the sign?", "answer": "15", "gt_answers": [ "15" ], "image_path": "./data/STVQA/VisualGenome/2/2411708.jpg", "model_name": "BLIP2" }, { "question": "What is for life according to the sign?", "answer": "bananas", "gt_answers": [ "Food" ], "image_path": "./data/STVQA/VisualGenome/2/2411708.jpg", "model_name": "BLIP2" }, { "question": "What year was the motorcycle donated?", "answer": "2007", "gt_answers": [ "2007" ], "image_path": "./data/STVQA/imageNet/n02892201_15381.JPEG", "model_name": "BLIP2" }, { "question": "What month was the motorcycle donated?", "answer": "november", "gt_answers": [ "November" ], "image_path": "./data/STVQA/imageNet/n02892201_15381.JPEG", "model_name": "BLIP2" }, { "question": "What does it say on the blue panels on the left?", "answer": "tennis court", "gt_answers": [ "South African Airways" ], "image_path": "./data/STVQA/VisualGenome/1/2342402.jpg", "model_name": "BLIP2" }, { "question": "What letter in black on a yellow background, on a is visible in the background?", "answer": "a", "gt_answers": [ "a" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000429318.jpg", "model_name": "BLIP2" }, { "question": "What letter is visible in pink on a sticker on the front of the brown suitcase to the lefthand side?", "answer": "d", "gt_answers": [ "H" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000429318.jpg", "model_name": "BLIP2" }, { "question": "What color is the bus", "answer": "yellow", "gt_answers": [ "Yellow" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000042705.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number", "answer": "nr 1", "gt_answers": [ "66" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000042705.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store in the corner?", "answer": "subway", "gt_answers": [ "Swatch" ], "image_path": "./data/STVQA/VisualGenome/1/498010.jpg", "model_name": "BLIP2" }, { "question": "What is the last initial on the towel?", "answer": "m", "gt_answers": [ "M" ], "image_path": "./data/STVQA/VisualGenome/2/4053.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the bus?", "answer": "mvb", "gt_answers": [ "931" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000009226.jpg", "model_name": "BLIP2" }, { "question": "What is the train track number?", "answer": "4", "gt_answers": [ "4" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000009226.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the street sign?", "answer": "hammock", "gt_answers": [ "Hammock", "Hammock Ln" ], "image_path": "./data/STVQA/VisualGenome/1/2319733.jpg", "model_name": "BLIP2" }, { "question": "What kind of establishment is Betos Place?", "answer": "shop", "gt_answers": [ "RESTAURANT BAR" ], "image_path": "./data/STVQA/VisualGenome/2/2409126.jpg", "model_name": "BLIP2" }, { "question": "What baseball team is on the yellow banner?", "answer": "los angeles dodgers", "gt_answers": [ "BOSTON RED SOX" ], "image_path": "./data/STVQA/VisualGenome/2/2409126.jpg", "model_name": "BLIP2" }, { "question": "What brand of beer has a crown over the name logo?", "answer": "coors light", "gt_answers": [ "Corona Extra", "CORONA" ], "image_path": "./data/STVQA/VisualGenome/2/2409126.jpg", "model_name": "BLIP2" }, { "question": "Which never closes?", "answer": "the kitchen door", "gt_answers": [ "kitchen" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000448351.jpg", "model_name": "BLIP2" }, { "question": "What do you need to be in order for the Kitchen to be always open?", "answer": "a cook", "gt_answers": [ "cook" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000448351.jpg", "model_name": "BLIP2" }, { "question": "What does the sticker on the bottom right say?", "answer": "teddy bear", "gt_answers": [ "Spencer Jr." ], "image_path": "./data/STVQA/VisualGenome/1/285672.jpg", "model_name": "BLIP2" }, { "question": "What letter is featured on the tennis racquet?", "answer": "a", "gt_answers": [ "W" ], "image_path": "./data/STVQA/VisualGenome/1/2374095.jpg", "model_name": "BLIP2" }, { "question": "What watch maker is the sponsor of the time clock?", "answer": "omega", "gt_answers": [ "Rolex" ], "image_path": "./data/STVQA/VisualGenome/1/2374095.jpg", "model_name": "BLIP2" }, { "question": "What does the black text say?", "answer": "a pylon is a power transmission line", "gt_answers": [ "accenture" ], "image_path": "./data/STVQA/IIIT_text/img_000074.jpg", "model_name": "BLIP2" }, { "question": "What is written on the white arrows?", "answer": "no parking", "gt_answers": [ "ONE WAY" ], "image_path": "./data/STVQA/VisualGenome/1/2339627.jpg", "model_name": "BLIP2" }, { "question": "What can you expect to find in the store to the left?", "answer": "men's and women's clothing", "gt_answers": [ "FITTING ROOM" ], "image_path": "./data/STVQA/icdar/test_img_384.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the photography company?", "answer": "diana whitte photography", "gt_answers": [ "Diana Whyte Photography" ], "image_path": "./data/STVQA/IIIT_text/3918.jpg", "model_name": "BLIP2" }, { "question": "What color is the child's outfit?", "answer": "purple", "gt_answers": [ "purple" ], "image_path": "./data/STVQA/IIIT_text/3918.jpg", "model_name": "BLIP2" }, { "question": "What color is the chair?", "answer": "white", "gt_answers": [ "White" ], "image_path": "./data/STVQA/IIIT_text/3918.jpg", "model_name": "BLIP2" }, { "question": "What is the pregnant woman holding?", "answer": "shoes", "gt_answers": [ "Baby shoes" ], "image_path": "./data/STVQA/IIIT_text/6464.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE COLOR OF THE DOG?", "answer": "black", "gt_answers": [ "BLACK" ], "image_path": "./data/STVQA/imageNet/n02110627_25596.JPEG", "model_name": "BLIP2" }, { "question": "What website is listed in the image?", "answer": "china mainland", "gt_answers": [ "alibaba.com.cn" ], "image_path": "./data/STVQA/imageNet/n04153751_9565.JPEG", "model_name": "BLIP2" }, { "question": "What city is listed on the side of the bus?", "answer": "london", "gt_answers": [ "London" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000002644.jpg", "model_name": "BLIP2" }, { "question": "What kind of guys are these?", "answer": "i'm not sure", "gt_answers": [ "Coffee" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030160.jpg", "model_name": "BLIP2" }, { "question": "What can happen if if you are not careful around the moving arm?", "answer": "you can get hurt", "gt_answers": [ "The moving arm can cause bodily harm or vehicle damage.", "bodily harm or vehicle damage" ], "image_path": "./data/STVQA/VisualGenome/1/2342400.jpg", "model_name": "BLIP2" }, { "question": "What do the nutritious oils in this dressing do?", "answer": "promote healthy skin and hair", "gt_answers": [ "ABSORB THE ANTIOXIDANTS" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000028156.jpg", "model_name": "BLIP2" }, { "question": "What does this dressing add to you salad?", "answer": "it adds calories and fat", "gt_answers": [ "Delicious flavor" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000028156.jpg", "model_name": "BLIP2" }, { "question": "What year?", "answer": "2009", "gt_answers": [ "2009" ], "image_path": "./data/STVQA/VisualGenome/1/2348589.jpg", "model_name": "BLIP2" }, { "question": "What percentage off is offered in the window?", "answer": "50", "gt_answers": [ "50" ], "image_path": "./data/STVQA/icdar/img_801.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTEN IN THE ADVERTISEMENT BOARD?", "answer": "person", "gt_answers": [ "MOET & CHANDON" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000116827.jpg", "model_name": "BLIP2" }, { "question": "slogan on shirt?", "answer": "person", "gt_answers": [ "JUST DO IT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000186544.jpg", "model_name": "BLIP2" }, { "question": "What airline is the airplane?", "answer": "ethiopian airlines", "gt_answers": [ "Ethiopian" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000304590.jpg", "model_name": "BLIP2" }, { "question": "What airline is the loading staircase?", "answer": "ethiopian airlines", "gt_answers": [ "Kenya Airways" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000304590.jpg", "model_name": "BLIP2" }, { "question": "What type of emergency vehicle is pictured below?", "answer": "police car", "gt_answers": [ "Police" ], "image_path": "./data/STVQA/VisualGenome/1/2345619.jpg", "model_name": "BLIP2" }, { "question": "What is the Brand name?", "answer": "smith", "gt_answers": [ "HIMS" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008237.jpg", "model_name": "BLIP2" }, { "question": "What ingredients are in the drink?", "answer": "lemonade", "gt_answers": [ "GRAPEFRUIT CHAMOMILE & CARDAMON" ], "image_path": "./data/STVQA/VisualGenome/1/2329684.jpg", "model_name": "BLIP2" }, { "question": "How much is the sale at the store?", "answer": "the sale at the store", "gt_answers": [ "70%" ], "image_path": "./data/STVQA/icdar/img_878.jpg", "model_name": "BLIP2" }, { "question": "What does the board read?", "answer": "0 outs", "gt_answers": [ "DODGERS:COM", "DODGERS.COM" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000249362.jpg", "model_name": "BLIP2" }, { "question": "What is written on the wall?", "answer": "i love tennis", "gt_answers": [ "Vittel" ], "image_path": "./data/STVQA/VisualGenome/2/2405865.jpg", "model_name": "BLIP2" }, { "question": "what his t-shirt refer", "answer": "organic waves", "gt_answers": [ "QUIK SILVER" ], "image_path": "./data/STVQA/VisualGenome/1/2342309.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the ferries?", "answer": "ferries", "gt_answers": [ "Brittany" ], "image_path": "./data/STVQA/VisualGenome/1/2358265.jpg", "model_name": "BLIP2" }, { "question": "What number is written on the post?", "answer": "0", "gt_answers": [ "Two" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000571199.jpg", "model_name": "BLIP2" }, { "question": "What is written on the post?", "answer": "lisbon", "gt_answers": [ "Correio" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000571199.jpg", "model_name": "BLIP2" }, { "question": "What does the yellow sign say?", "answer": "turtle king", "gt_answers": [ "SLOW TURTLE XING" ], "image_path": "./data/STVQA/VisualGenome/1/2359889.jpg", "model_name": "BLIP2" }, { "question": "What number is on the train?", "answer": "the train is a dmu", "gt_answers": [ "112 112", "112" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000242973.jpg", "model_name": "BLIP2" }, { "question": "What phrase is on the tram?", "answer": "''police''", "gt_answers": [ "WIR SORGEN FUR IHRE SICHERHEIT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000242973.jpg", "model_name": "BLIP2" }, { "question": "What word is on the front of the tram?", "answer": "person", "gt_answers": [ "POLIZEI" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000242973.jpg", "model_name": "BLIP2" }, { "question": "What is the store name on the second floor?", "answer": "zara", "gt_answers": [ "GUESS" ], "image_path": "./data/STVQA/icdar/test_img_336.jpg", "model_name": "BLIP2" }, { "question": "What does the blue sign say?", "answer": "mcdonalds", "gt_answers": [ "GUESS" ], "image_path": "./data/STVQA/icdar/test_img_336.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the Oven?", "answer": "ge", "gt_answers": [ "HOLF" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000209923.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the boulevard on the sign?", "answer": "logan boulevard", "gt_answers": [ "Logan", "Logan Blvd." ], "image_path": "./data/STVQA/imageNet/n06874185_113.JPEG", "model_name": "BLIP2" }, { "question": "What 4 digit number is written on the sign?", "answer": "0", "gt_answers": [ "2600" ], "image_path": "./data/STVQA/imageNet/n06874185_113.JPEG", "model_name": "BLIP2" }, { "question": "What are the letters on the side of the train in yellow?", "answer": "csx", "gt_answers": [ "CSX" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000093000.jpg", "model_name": "BLIP2" }, { "question": "What are the letters on the front of the train?", "answer": "csx", "gt_answers": [ "CSX" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000093000.jpg", "model_name": "BLIP2" }, { "question": "What character and numbers are on the top right front of the train?", "answer": "c s c s", "gt_answers": [ "B432", "8432" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000093000.jpg", "model_name": "BLIP2" }, { "question": "What gate is this?", "answer": "a pedestrian gate", "gt_answers": [ "The side gate", "SIDE-GATE" ], "image_path": "./data/STVQA/icdar/img_108.jpg", "model_name": "BLIP2" }, { "question": "Which state is this product from?", "answer": "new york", "gt_answers": [ "VERMONT" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000006154.jpg", "model_name": "BLIP2" }, { "question": "Which organization is shown on the blue badge on the product?", "answer": "neighborhood cheese", "gt_answers": [ "American Cheese Society." ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000006154.jpg", "model_name": "BLIP2" }, { "question": "What brand is the sewing machine?", "answer": "singer", "gt_answers": [ "Singer" ], "image_path": "./data/STVQA/imageNet/n04179913_14547.JPEG", "model_name": "BLIP2" }, { "question": "What type of traffic signal is it?", "answer": "stop sign", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000248496.jpg", "model_name": "BLIP2" }, { "question": "What type of stop signal is it?", "answer": "four way", "gt_answers": [ "4 way" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000248496.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "alto", "gt_answers": [ "Alto" ], "image_path": "./data/STVQA/VisualGenome/1/1592898.jpg", "model_name": "BLIP2" }, { "question": "What website is advertised on the bottom right corner?", "answer": "youtube", "gt_answers": [ "www.myprofe.com" ], "image_path": "./data/STVQA/VisualGenome/1/2359507.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE BRAND OF THIS SURGE PROTECTOR?", "answer": "philips", "gt_answers": [ "BELKIN" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000080566.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the bus?", "answer": "person", "gt_answers": [ "31" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000364476.jpg", "model_name": "BLIP2" }, { "question": "What is written in front of the sign?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2359876.jpg", "model_name": "BLIP2" }, { "question": "WHAT NUMBER IS THE BATTER?", "answer": "person", "gt_answers": [ "74 74", "74" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000048032.jpg", "model_name": "BLIP2" }, { "question": "What number radio station is listed as a sponsor on the wall?", "answer": "fm station", "gt_answers": [ "101.5" ], "image_path": "./data/STVQA/VisualGenome/2/2403804.jpg", "model_name": "BLIP2" }, { "question": "What is the word written on the chest of the man in the blue jacket?", "answer": "baseball", "gt_answers": [ "Thunder" ], "image_path": "./data/STVQA/VisualGenome/2/2403804.jpg", "model_name": "BLIP2" }, { "question": "What year was the photo taken?", "answer": "2015", "gt_answers": [ "2013" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000194788.jpg", "model_name": "BLIP2" }, { "question": "What does the black tag say?", "answer": "it's a cake", "gt_answers": [ "2011 turku abo" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000031994.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the book?", "answer": "i am america", "gt_answers": [ "\"I am America (and so can you)\"", "I AM AMERICA (AND SO CAN YOU!)" ], "image_path": "./data/STVQA/VisualGenome/1/2361823.jpg", "model_name": "BLIP2" }, { "question": "Who is the author of the book?", "answer": "stephen colbert", "gt_answers": [ "Stephen Colbert" ], "image_path": "./data/STVQA/VisualGenome/1/2361823.jpg", "model_name": "BLIP2" }, { "question": "What is the text written on the banner?", "answer": "i am a human being", "gt_answers": [ "Klimaschutz jetzt", "KLIMASCHUTZ JETZT!" ], "image_path": "./data/STVQA/VisualGenome/1/2338872.jpg", "model_name": "BLIP2" }, { "question": "What store is on the bottom level of the mall?", "answer": "walmart", "gt_answers": [ "Avenue" ], "image_path": "./data/STVQA/icdar/img_476.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the apparel retail store?", "answer": "j crew", "gt_answers": [ "Avenue" ], "image_path": "./data/STVQA/icdar/img_476.jpg", "model_name": "BLIP2" }, { "question": "What is the only number visible on the player's jersey?", "answer": "person", "gt_answers": [ "2" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000050686.jpg", "model_name": "BLIP2" }, { "question": "What company manufactured this bus?", "answer": "person", "gt_answers": [ "Woodey", "WOODRY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000435327.jpg", "model_name": "BLIP2" }, { "question": "the shop MUNCHIES has what written in red text on the window?", "answer": "munchies", "gt_answers": [ "HOT & COLD FILLED ROLLS" ], "image_path": "./data/STVQA/VisualGenome/1/2315938.jpg", "model_name": "BLIP2" }, { "question": "tiny letters on sweater?", "answer": "i love you", "gt_answers": [ "OXY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000494341.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE DOLL?", "answer": "teddy bear", "gt_answers": [ "BEAR" ], "image_path": "./data/STVQA/VisualGenome/1/2325101.jpg", "model_name": "BLIP2" }, { "question": "What number is in the sign", "answer": "59", "gt_answers": [ "59" ], "image_path": "./data/STVQA/IIIT_text/4699.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the sign?", "answer": "isla", "gt_answers": [ "Isla" ], "image_path": "./data/STVQA/IIIT_text/4699.jpg", "model_name": "BLIP2" }, { "question": "what is the brand name of the key board?", "answer": "dell", "gt_answers": [ "DELL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000580960.jpg", "model_name": "BLIP2" }, { "question": "What character name is on the fork?", "answer": "person", "gt_answers": [ "HELLO KITTY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000466750.jpg", "model_name": "BLIP2" }, { "question": "What is the number written in bold red?", "answer": "4", "gt_answers": [ "90" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000012556.jpg", "model_name": "BLIP2" }, { "question": "What institute is this?", "answer": "indian institute of technology", "gt_answers": [ "INDIAN INSTITUTE OF TECHNOLOGY" ], "image_path": "./data/STVQA/IIIT_text/img_000068.jpg", "model_name": "BLIP2" }, { "question": "how many people are in this photo?", "answer": "seven", "gt_answers": [ "Seven", "8" ], "image_path": "./data/STVQA/IIIT_text/img_000075.jpg", "model_name": "BLIP2" }, { "question": "What store is just here?", "answer": "city center", "gt_answers": [ "Macy's", "MACYS" ], "image_path": "./data/STVQA/VisualGenome/2/2402713.jpg", "model_name": "BLIP2" }, { "question": "What company shop has a blue sign?", "answer": "mrt", "gt_answers": [ "Apex" ], "image_path": "./data/STVQA/icdar/img_481.jpg", "model_name": "BLIP2" }, { "question": "What is the website name at the bottom?", "answer": "person", "gt_answers": [ "www.axcium.com" ], "image_path": "./data/STVQA/imageNet/n02097209_3435.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the photographer?", "answer": "person", "gt_answers": [ "John Bald" ], "image_path": "./data/STVQA/IIIT_text/658.jpg", "model_name": "BLIP2" }, { "question": "Where is this doughnut featured?", "answer": "the doughnut shop", "gt_answers": [ "Boston Globe" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000443752.jpg", "model_name": "BLIP2" }, { "question": "What object do you see in the picture?", "answer": "broom", "gt_answers": [ "Broom" ], "image_path": "./data/STVQA/imageNet/n02906734_7411.JPEG", "model_name": "BLIP2" }, { "question": "What company's logo is on the coffee cup?", "answer": "starbucks", "gt_answers": [ "STARBUCKS COFFEE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000460694.jpg", "model_name": "BLIP2" }, { "question": "What is the first word in white at the top of the clock?", "answer": "person", "gt_answers": [ "james" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000579899.jpg", "model_name": "BLIP2" }, { "question": "What is the name written on the back of the blue jersey?", "answer": "person", "gt_answers": [ "SCOTT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000187103.jpg", "model_name": "BLIP2" }, { "question": "What is on the sign?", "answer": "west welcome", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2357129.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the air craft", "answer": "air canada express", "gt_answers": [ "AIR CANADA express" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000185888.jpg", "model_name": "BLIP2" }, { "question": "what number is on the oragnger players shorts", "answer": "0", "gt_answers": [ "25" ], "image_path": "./data/STVQA/VisualGenome/1/2316660.jpg", "model_name": "BLIP2" }, { "question": "how many people are on the soccer field", "answer": "eight", "gt_answers": [ "seven", "{}" ], "image_path": "./data/STVQA/VisualGenome/1/2316660.jpg", "model_name": "BLIP2" }, { "question": "what color is the mans shirt whos carring the ball", "answer": "blue", "gt_answers": [ "yellow", "{}" ], "image_path": "./data/STVQA/VisualGenome/1/2316660.jpg", "model_name": "BLIP2" }, { "question": "What number is in the top left of the picture above the gray haired lady's head?", "answer": "4", "gt_answers": [ "6" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000481702.jpg", "model_name": "BLIP2" }, { "question": "What number is on the bus?", "answer": "x1", "gt_answers": [ "11" ], "image_path": "./data/STVQA/VisualGenome/1/2343455.jpg", "model_name": "BLIP2" }, { "question": "What color is the bus?", "answer": "red", "gt_answers": [ "Red" ], "image_path": "./data/STVQA/VisualGenome/1/2343455.jpg", "model_name": "BLIP2" }, { "question": "What name is printed on the back of the catcher's jersey?", "answer": "person", "gt_answers": [ "Rodriguez" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000101234.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company in the building?", "answer": "bank of america", "gt_answers": [ "Cambridge trust company" ], "image_path": "./data/STVQA/VisualGenome/2/1414.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store?", "answer": "person's drugs", "gt_answers": [ "Engstrom's Prescriptions" ], "image_path": "./data/STVQA/VisualGenome/1/2351293.jpg", "model_name": "BLIP2" }, { "question": "What is sold in this store?", "answer": "drugs", "gt_answers": [ "Drugs" ], "image_path": "./data/STVQA/VisualGenome/1/2351293.jpg", "model_name": "BLIP2" }, { "question": "What insurance company is referenced on the newspaper?", "answer": "person", "gt_answers": [ "Prudential" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000044763.jpg", "model_name": "BLIP2" }, { "question": "What name is on the back of the grey shirt?", "answer": "phillies", "gt_answers": [ "DEBRAND" ], "image_path": "./data/STVQA/VisualGenome/1/2321608.jpg", "model_name": "BLIP2" }, { "question": "What letter is on the tennis racket?", "answer": "person", "gt_answers": [ "W" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000461389.jpg", "model_name": "BLIP2" }, { "question": "How many people are in the picture?", "answer": "two", "gt_answers": [ "Three" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000461389.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the book being read?", "answer": "person", "gt_answers": [ "Mother Goose" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000436453.jpg", "model_name": "BLIP2" }, { "question": "What is the preferred charge card?", "answer": "visa", "gt_answers": [ "VISA" ], "image_path": "./data/STVQA/icdar/img_109.jpg", "model_name": "BLIP2" }, { "question": "What is on the red sign on the fence?", "answer": "motocross", "gt_answers": [ "102.5 clyde 1" ], "image_path": "./data/STVQA/VisualGenome/1/2345393.jpg", "model_name": "BLIP2" }, { "question": "Who is the manufacturer of the projectors?", "answer": "hp", "gt_answers": [ "Sharp" ], "image_path": "./data/STVQA/imageNet/n04009552_2996.JPEG", "model_name": "BLIP2" }, { "question": "What website is this?", "answer": "china mainland", "gt_answers": [ "www.china.cn" ], "image_path": "./data/STVQA/imageNet/n02769748_474.JPEG", "model_name": "BLIP2" }, { "question": "what website is featured on the picture?", "answer": "china mainland", "gt_answers": [ "www.china.cn" ], "image_path": "./data/STVQA/imageNet/n02769748_474.JPEG", "model_name": "BLIP2" }, { "question": "What kind of food are they going to be cooking?", "answer": "pizza", "gt_answers": [ "Grilled Pizza" ], "image_path": "./data/STVQA/VisualGenome/2/2409508.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the wine?", "answer": "grilled pizza", "gt_answers": [ "BARBERA D'ASTI" ], "image_path": "./data/STVQA/VisualGenome/2/2409508.jpg", "model_name": "BLIP2" }, { "question": "What is the top word written on the sign?", "answer": "music", "gt_answers": [ "music" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000521982.jpg", "model_name": "BLIP2" }, { "question": "What is written on the black part of the sign?", "answer": "music is great", "gt_answers": [ "music is GREAT", "MUSIC" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000521982.jpg", "model_name": "BLIP2" }, { "question": "What is the shortest word written on the sign?", "answer": "music", "gt_answers": [ "is" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000521982.jpg", "model_name": "BLIP2" }, { "question": "What numbers are on the train?", "answer": "246", "gt_answers": [ "246" ], "image_path": "./data/STVQA/VisualGenome/1/2339245.jpg", "model_name": "BLIP2" }, { "question": "Who took this photograph?", "answer": "the photographer", "gt_answers": [ "Olivier Cabaret" ], "image_path": "./data/STVQA/VisualGenome/1/2368251.jpg", "model_name": "BLIP2" }, { "question": "What year is the image copyrighted in?", "answer": "2010", "gt_answers": [ "2004" ], "image_path": "./data/STVQA/imageNet/n02091134_5925.JPEG", "model_name": "BLIP2" }, { "question": "what name is on the clock", "answer": "saint peter", "gt_answers": [ "saint christopher's place" ], "image_path": "./data/STVQA/VisualGenome/1/2318881.jpg", "model_name": "BLIP2" }, { "question": "How much is water?", "answer": "$2", "gt_answers": [ "$2.00" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000064730.jpg", "model_name": "BLIP2" }, { "question": "What brand is the cell phone?", "answer": "samsung", "gt_answers": [ "LG" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000341226.jpg", "model_name": "BLIP2" }, { "question": "Who made the cell phone?", "answer": "nokia", "gt_answers": [ "LG" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000341226.jpg", "model_name": "BLIP2" }, { "question": "What does the watermark say?", "answer": "person", "gt_answers": [ "BYMAHS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000341226.jpg", "model_name": "BLIP2" }, { "question": "What is written on the sign?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2342091.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street on the green sign?", "answer": "lake street", "gt_answers": [ "S CANAL ST" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000488990.jpg", "model_name": "BLIP2" }, { "question": "What appears below S CANAL ST?", "answer": "the river", "gt_answers": [ "500 W" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000488990.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the bread?", "answer": "rye bread", "gt_answers": [ "Acme", "ACME BREAD" ], "image_path": "./data/STVQA/VisualGenome/2/2415917.jpg", "model_name": "BLIP2" }, { "question": "what do the white letters on the ground say", "answer": "no parking", "gt_answers": [ "do not enter" ], "image_path": "./data/STVQA/VisualGenome/1/2366001.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store?", "answer": "newsagency", "gt_answers": [ "NEWSAGENCY" ], "image_path": "./data/STVQA/IIIT_text/img_000851.jpg", "model_name": "BLIP2" }, { "question": "What is the trolley number?", "answer": "3", "gt_answers": [ "1209" ], "image_path": "./data/STVQA/imageNet/n04335435_18633.JPEG", "model_name": "BLIP2" }, { "question": "What is the copyright date?", "answer": "2009", "gt_answers": [ "2008" ], "image_path": "./data/STVQA/VisualGenome/1/2334946.jpg", "model_name": "BLIP2" }, { "question": "Who is the photographer for this image?", "answer": "enrique margarita", "gt_answers": [ "Gustavo Morejon J." ], "image_path": "./data/STVQA/VisualGenome/1/2334946.jpg", "model_name": "BLIP2" }, { "question": "What is the photographer's website address?", "answer": "http://www", "gt_answers": [ "www.gustavomorejon.com" ], "image_path": "./data/STVQA/VisualGenome/1/2334946.jpg", "model_name": "BLIP2" }, { "question": "which soda is advertised in the picture", "answer": "coke", "gt_answers": [ "pepsi" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000098732.jpg", "model_name": "BLIP2" }, { "question": "what sauce is advertised in the picture", "answer": "mustard", "gt_answers": [ "ketchup" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000098732.jpg", "model_name": "BLIP2" }, { "question": "What business name is on the tallest building?", "answer": "samsung", "gt_answers": [ "Vodafone" ], "image_path": "./data/STVQA/IIIT_text/img_001242.jpg", "model_name": "BLIP2" }, { "question": "What does the motorbike say?", "answer": "police", "gt_answers": [ "POLICE" ], "image_path": "./data/STVQA/VisualGenome/1/2339086.jpg", "model_name": "BLIP2" }, { "question": "Who is the memorial in memory of?", "answer": "mary jo green", "gt_answers": [ "Mary Ann Griffin" ], "image_path": "./data/STVQA/imageNet/n02892201_17047.JPEG", "model_name": "BLIP2" }, { "question": "When did Mary Ann Grifin die?", "answer": "she died on october 1st, 1908", "gt_answers": [ "8-8-1876" ], "image_path": "./data/STVQA/imageNet/n02892201_17047.JPEG", "model_name": "BLIP2" }, { "question": "how many numbers are on the wings?", "answer": "two", "gt_answers": [ "four" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000215502.jpg", "model_name": "BLIP2" }, { "question": "what are the numbers on the wings?", "answer": "a b", "gt_answers": [ "1944" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000215502.jpg", "model_name": "BLIP2" }, { "question": "What name is on the double dorrs?", "answer": "bank of asia", "gt_answers": [ "Swarovski" ], "image_path": "./data/STVQA/icdar/test_img_285.jpg", "model_name": "BLIP2" }, { "question": "What item code is listed in the photo?", "answer": "lb-breast-bikini-pink", "gt_answers": [ "4838" ], "image_path": "./data/STVQA/imageNet/n02892767_13819.JPEG", "model_name": "BLIP2" }, { "question": "What size is the item shown?", "answer": "medium", "gt_answers": [ "34" ], "image_path": "./data/STVQA/imageNet/n02892767_13819.JPEG", "model_name": "BLIP2" }, { "question": "What is the email for the company?", "answer": "labrasse@gmail com", "gt_answers": [ "LaBrassiere@ymail.com" ], "image_path": "./data/STVQA/imageNet/n02892767_13819.JPEG", "model_name": "BLIP2" }, { "question": "What 2 streets is this at?", "answer": "west street and king street", "gt_answers": [ "West 32nd St and Korea Way" ], "image_path": "./data/STVQA/VisualGenome/1/2332161.jpg", "model_name": "BLIP2" }, { "question": "What is the number above the door?", "answer": "4", "gt_answers": [ "1760" ], "image_path": "./data/STVQA/IIIT_text/3451.jpg", "model_name": "BLIP2" }, { "question": "What year is listed in the copyright?", "answer": "2015", "gt_answers": [ "2012" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000487322.jpg", "model_name": "BLIP2" }, { "question": "What number is written on the fire hydrant?", "answer": "3", "gt_answers": [ "12470" ], "image_path": "./data/STVQA/VisualGenome/1/2365720.jpg", "model_name": "BLIP2" }, { "question": "what is the name of the website?", "answer": "lunchinbox net", "gt_answers": [ "lunchinabox.net" ], "image_path": "./data/STVQA/imageNet/n04557648_11339.JPEG", "model_name": "BLIP2" }, { "question": "What word is in the red octagon?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2321622.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the soda in the picture?", "answer": "diet coke", "gt_answers": [ "Coca Cola" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000054294.jpg", "model_name": "BLIP2" }, { "question": "What car is on the wall?", "answer": "audi", "gt_answers": [ "Kia" ], "image_path": "./data/STVQA/VisualGenome/2/2405990.jpg", "model_name": "BLIP2" }, { "question": "How frequently does the bus in this image appear at the same location according to its own propoganda?", "answer": "every 15 minutes", "gt_answers": [ "FROM EVERY 5 MINUTES" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000420488.jpg", "model_name": "BLIP2" }, { "question": "What kind of business is on the black advertisment?", "answer": "beach", "gt_answers": [ "Entertainment" ], "image_path": "./data/STVQA/VisualGenome/1/713356.jpg", "model_name": "BLIP2" }, { "question": "What is the street address for the entertainment business?", "answer": "santa clara street", "gt_answers": [ "125 Fifth Ave", "125 Fifth Ave. Brooklyn, NY 11217" ], "image_path": "./data/STVQA/VisualGenome/1/713356.jpg", "model_name": "BLIP2" }, { "question": "What is the total bet?", "answer": "5 coins", "gt_answers": [ "90" ], "image_path": "./data/STVQA/imageNet/n04243546_28435.JPEG", "model_name": "BLIP2" }, { "question": "What is the balance?", "answer": "balance is the amount of money in your casino account", "gt_answers": [ "1356.55" ], "image_path": "./data/STVQA/imageNet/n04243546_28435.JPEG", "model_name": "BLIP2" }, { "question": "What is the game called?", "answer": "slot machine", "gt_answers": [ "Mystery of the Tarot" ], "image_path": "./data/STVQA/imageNet/n04243546_28435.JPEG", "model_name": "BLIP2" }, { "question": "What year is listed on the tiles?", "answer": "1889", "gt_answers": [ "1991" ], "image_path": "./data/STVQA/IIIT_text/229.jpg", "model_name": "BLIP2" }, { "question": "What does the shirt say?", "answer": "iron cross", "gt_answers": [ "west coast choppers" ], "image_path": "./data/STVQA/imageNet/n03595614_22179.JPEG", "model_name": "BLIP2" }, { "question": "What is the date this photo was taken?", "answer": "september 2008", "gt_answers": [ "08.14.2007" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000051302.jpg", "model_name": "BLIP2" }, { "question": "What company is branded on these towels?", "answer": "samsung", "gt_answers": [ "Samsung" ], "image_path": "./data/STVQA/imageNet/n02808304_2359.JPEG", "model_name": "BLIP2" }, { "question": "What type of vehicle is on the pier?", "answer": "truck", "gt_answers": [ "truck" ], "image_path": "./data/STVQA/VisualGenome/1/2331968.jpg", "model_name": "BLIP2" }, { "question": "What word is on the bottom of the street sign?", "answer": "time", "gt_answers": [ "Everyday" ], "image_path": "./data/STVQA/VisualGenome/2/2409577.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the street sign?", "answer": "night", "gt_answers": [ "Night" ], "image_path": "./data/STVQA/VisualGenome/2/2409577.jpg", "model_name": "BLIP2" }, { "question": "What is the first number on the street sign?", "answer": "3", "gt_answers": [ "7" ], "image_path": "./data/STVQA/VisualGenome/2/2409577.jpg", "model_name": "BLIP2" }, { "question": "What is the brand on the top right of the front of the van?", "answer": "person", "gt_answers": [ "Westfalia" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000136369.jpg", "model_name": "BLIP2" }, { "question": "What are the two letters on the van's license plate?", "answer": "person and person", "gt_answers": [ "DW" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000136369.jpg", "model_name": "BLIP2" }, { "question": "What is the first name of the person who owns this photo?", "answer": "nick", "gt_answers": [ "Nicki" ], "image_path": "./data/STVQA/imageNet/n02410509_873.JPEG", "model_name": "BLIP2" }, { "question": "What is the last name of the person who owns this photo?", "answer": "person", "gt_answers": [ "Toizer" ], "image_path": "./data/STVQA/imageNet/n02410509_873.JPEG", "model_name": "BLIP2" }, { "question": "What type of vitamin does this contain?", "answer": "vitamin c", "gt_answers": [ "Vitamin E Pearls" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000007400.jpg", "model_name": "BLIP2" }, { "question": "what are the words on the sign under the no left turn arrow?", "answer": "no parking on the street", "gt_answers": [ "no parking any time" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000184709.jpg", "model_name": "BLIP2" }, { "question": "What compartment is the man at the front of the train in?", "answer": "the driver's compartment", "gt_answers": [ "Conductor" ], "image_path": "./data/STVQA/VisualGenome/2/2406261.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the train?", "answer": "no number", "gt_answers": [ "57" ], "image_path": "./data/STVQA/VisualGenome/2/2406261.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company that owns this plane?", "answer": "delta airlines", "gt_answers": [ "Delta" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000171637.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the plane?", "answer": "british airways", "gt_answers": [ "British Airways" ], "image_path": "./data/STVQA/IIIT_text/img_000210.jpg", "model_name": "BLIP2" }, { "question": "What letters are on the poster?", "answer": "person", "gt_answers": [ "M.K", "MK" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000111525.jpg", "model_name": "BLIP2" }, { "question": "What is the yellow last word on the building?", "answer": "person", "gt_answers": [ "Reileck" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000573981.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the town?", "answer": "otello", "gt_answers": [ "Othello" ], "image_path": "./data/STVQA/VisualGenome/1/497961.jpg", "model_name": "BLIP2" }, { "question": "what is the phone number of the Cafe Huong Que", "answer": "510-644-4444", "gt_answers": [ "206-722-4318" ], "image_path": "./data/STVQA/VisualGenome/1/497961.jpg", "model_name": "BLIP2" }, { "question": "Is the Cafe Huong Que open or closed", "answer": "open", "gt_answers": [ "open" ], "image_path": "./data/STVQA/VisualGenome/1/497961.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the road sign?", "answer": "elm street", "gt_answers": [ "Main Street" ], "image_path": "./data/STVQA/VisualGenome/1/150551.jpg", "model_name": "BLIP2" }, { "question": "What is the title of the book?", "answer": "the cat in the hat", "gt_answers": [ "Green Eggs and Ham" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000451018.jpg", "model_name": "BLIP2" }, { "question": "What food is shown on the plate?", "answer": "spaghetti", "gt_answers": [ "Green Eggs and Ham" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000451018.jpg", "model_name": "BLIP2" }, { "question": "What white text is found above the illustration?", "answer": "the cat in bed", "gt_answers": [ "Green Eggs and Ham" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000451018.jpg", "model_name": "BLIP2" }, { "question": "What does this shop sells?", "answer": "haircuts", "gt_answers": [ "HAIR CUTS" ], "image_path": "./data/STVQA/imageNet/n02791270_8433.JPEG", "model_name": "BLIP2" }, { "question": "What exercise is being shown?", "answer": "barbell curl", "gt_answers": [ "BARBELL BICEPS CURL" ], "image_path": "./data/STVQA/imageNet/n02790996_2967.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the gas station?", "answer": "sam's", "gt_answers": [ "Gulf" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000306535.jpg", "model_name": "BLIP2" }, { "question": "What sports team is on the guys shirt?", "answer": "person", "gt_answers": [ "Bears" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000090444.jpg", "model_name": "BLIP2" }, { "question": "What word is underlined?", "answer": "oad", "gt_answers": [ "DADDY" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000028730.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the team the man plays for?", "answer": "texas", "gt_answers": [ "Texas" ], "image_path": "./data/STVQA/VisualGenome/1/2368064.jpg", "model_name": "BLIP2" }, { "question": "What is the player's number?", "answer": "2", "gt_answers": [ "44" ], "image_path": "./data/STVQA/VisualGenome/1/2368064.jpg", "model_name": "BLIP2" }, { "question": "what you seen text on top right corner?", "answer": "\"relaciones de mujeres 8\"", "gt_answers": [ "Torremozas" ], "image_path": "./data/STVQA/VisualGenome/1/2370068.jpg", "model_name": "BLIP2" }, { "question": "What words are written on the middle two tiers of cake?", "answer": "i believe in marriage equality", "gt_answers": [ "PRIVILEGED STRUGGLING", "STRUGGLING PRIVILEGED" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000360473.jpg", "model_name": "BLIP2" }, { "question": "What word is written on the top tier of cake?", "answer": "person", "gt_answers": [ "SINKING" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000360473.jpg", "model_name": "BLIP2" }, { "question": "What word is written on the bottom tier of cake?", "answer": "person", "gt_answers": [ "ELITE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000360473.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the coffee shop seen?", "answer": "starbucks", "gt_answers": [ "Starbucks", "STARBUCKS COFFEE" ], "image_path": "./data/STVQA/VisualGenome/2/2430.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the brewery?", "answer": "brew pub", "gt_answers": [ "Morgan brewery", "MORGAN STREET" ], "image_path": "./data/STVQA/VisualGenome/1/713722.jpg", "model_name": "BLIP2" }, { "question": "What is written on the play ground", "answer": "tennis court", "gt_answers": [ "PEUGEOT" ], "image_path": "./data/STVQA/VisualGenome/1/2363635.jpg", "model_name": "BLIP2" }, { "question": "What are the first four letters of the word in the lower right hand corner?", "answer": "ta", "gt_answers": [ "MARK" ], "image_path": "./data/STVQA/imageNet/n04476259_2726.JPEG", "model_name": "BLIP2" }, { "question": "What two letters are listed after the dot?", "answer": "a and b", "gt_answers": [ "NL" ], "image_path": "./data/STVQA/imageNet/n04476259_2726.JPEG", "model_name": "BLIP2" }, { "question": "what's the brand name at the top of the ball?", "answer": "rawlings", "gt_answers": [ "rawlings" ], "image_path": "./data/STVQA/imageNet/n02799071_88.JPEG", "model_name": "BLIP2" }, { "question": "What is color of plant", "answer": "green", "gt_answers": [ "green" ], "image_path": "./data/STVQA/imageNet/n02259212_2045.JPEG", "model_name": "BLIP2" }, { "question": "What is the photography company featured in the photo?", "answer": "person", "gt_answers": [ "Olivia Allison Photography" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000511249.jpg", "model_name": "BLIP2" }, { "question": "What does the mug say?", "answer": "person", "gt_answers": [ "Starbucks coffee" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000293385.jpg", "model_name": "BLIP2" }, { "question": "What follows 'shine' on the sign on the building?", "answer": "tokyo", "gt_answers": [ "City" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000305973.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the shop front?", "answer": "chanel", "gt_answers": [ "Omega" ], "image_path": "./data/STVQA/icdar/test_img_171.jpg", "model_name": "BLIP2" }, { "question": "What is are the tube shaped fruits on the bottom of the image called?", "answer": "bananas", "gt_answers": [ "BANANAS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000338670.jpg", "model_name": "BLIP2" }, { "question": "What number iis Barber shop?", "answer": "second genesis", "gt_answers": [ "5107" ], "image_path": "./data/STVQA/VisualGenome/1/2331185.jpg", "model_name": "BLIP2" }, { "question": "What is Susan\ufffd\ufffd\ufffds", "answer": "s short", "gt_answers": [ "Unisex hairstylist", "HAIRSTYLIST" ], "image_path": "./data/STVQA/VisualGenome/1/2331185.jpg", "model_name": "BLIP2" }, { "question": "What word is written in white on black on the skier's coat?", "answer": "person", "gt_answers": [ "GRENADE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000574218.jpg", "model_name": "BLIP2" }, { "question": "What motor company is she sponsored by?", "answer": "person", "gt_answers": [ "Ford" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000103722.jpg", "model_name": "BLIP2" }, { "question": "What is the bus' licence number?", "answer": "lt stc", "gt_answers": [ "byo yx59", "YX59 BYO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000187131.jpg", "model_name": "BLIP2" }, { "question": "What area does the bus travel on its route?", "answer": "south london", "gt_answers": [ "centre town sutton", "SUTTON TOWN CENTRE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000187131.jpg", "model_name": "BLIP2" }, { "question": "Who made the bus?", "answer": "leyland", "gt_answers": [ "abellio abellio", "ABELLIO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000187131.jpg", "model_name": "BLIP2" }, { "question": "What is the first word written on the mirror?", "answer": "car", "gt_answers": [ "OBJECTS" ], "image_path": "./data/STVQA/imageNet/n02965783_2417.JPEG", "model_name": "BLIP2" }, { "question": "What do they sell at the purple tent?", "answer": "samosas", "gt_answers": [ "tea" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000568640.jpg", "model_name": "BLIP2" }, { "question": "What do they do in the blue tent?", "answer": "they sell food", "gt_answers": [ "FLY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000568640.jpg", "model_name": "BLIP2" }, { "question": "What is written on the large building?", "answer": "person", "gt_answers": [ "MDCCCVI" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000081135.jpg", "model_name": "BLIP2" }, { "question": "Who says you can't park here?", "answer": "person", "gt_answers": [ "police" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000081102.jpg", "model_name": "BLIP2" }, { "question": "What percentage is on the sign?", "answer": "100", "gt_answers": [ "60%" ], "image_path": "./data/STVQA/VisualGenome/2/2415447.jpg", "model_name": "BLIP2" }, { "question": "WHAT ISTHE NUMBER OF THE TRAIN?", "answer": "no number", "gt_answers": [ "46521" ], "image_path": "./data/STVQA/VisualGenome/1/2355508.jpg", "model_name": "BLIP2" }, { "question": "HOW MANY PERSON ARE THERE IN THE PITCURE?", "answer": "two", "gt_answers": [ "2" ], "image_path": "./data/STVQA/VisualGenome/1/2355508.jpg", "model_name": "BLIP2" }, { "question": "What color is the tracker?", "answer": "red", "gt_answers": [ "Red.", "Red" ], "image_path": "./data/STVQA/imageNet/n03496892_4388.JPEG", "model_name": "BLIP2" }, { "question": "How many tires do you see?", "answer": "four", "gt_answers": [ "4" ], "image_path": "./data/STVQA/imageNet/n03496892_4388.JPEG", "model_name": "BLIP2" }, { "question": "What school is listed on the picture?", "answer": "department of computer science", "gt_answers": [ "The Erik Jonsson School of Engineering & Computer Science At The University Of Texas At Dallas", "THE ERIK JONSSON SCHOOL" ], "image_path": "./data/STVQA/IIIT_text/img_000403.jpg", "model_name": "BLIP2" }, { "question": "What department is listed on the picture?", "answer": "department of computer science", "gt_answers": [ "Department of Computer Science" ], "image_path": "./data/STVQA/IIIT_text/img_000403.jpg", "model_name": "BLIP2" }, { "question": "What year was this picture taken?", "answer": "2012", "gt_answers": [ "2004" ], "image_path": "./data/STVQA/IIIT_text/img_000080.jpg", "model_name": "BLIP2" }, { "question": "What month was this picture taken?", "answer": "august", "gt_answers": [ "Nov" ], "image_path": "./data/STVQA/IIIT_text/img_000080.jpg", "model_name": "BLIP2" }, { "question": "What team name is displayed on the player's jersey?", "answer": "tampa bay rays", "gt_answers": [ "RAYS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000500740.jpg", "model_name": "BLIP2" }, { "question": "What company name is displayed on the package of gum?", "answer": "person", "gt_answers": [ "WRIGLEY'S" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000500740.jpg", "model_name": "BLIP2" }, { "question": "What flavor is the gum?", "answer": "mint", "gt_answers": [ "DOUBLEMINT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000500740.jpg", "model_name": "BLIP2" }, { "question": "Who is the manufacturer of the camera?", "answer": "canon", "gt_answers": [ "Canon" ], "image_path": "./data/STVQA/imageNet/n04069434_21423.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the camera strap?", "answer": "canon", "gt_answers": [ "CANON EOS DIGITAL" ], "image_path": "./data/STVQA/imageNet/n04069434_21423.JPEG", "model_name": "BLIP2" }, { "question": "What name is printed on the magazine?", "answer": "person", "gt_answers": [ "TINA FEY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000252069.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the magazine?", "answer": "person", "gt_answers": [ "VANITY FAIR" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000252069.jpg", "model_name": "BLIP2" }, { "question": "What is a brand of coffee maker?", "answer": "nespresso", "gt_answers": [ "NESCAFE" ], "image_path": "./data/STVQA/icdar/img_96.jpg", "model_name": "BLIP2" }, { "question": "what is written within the speak bubble on the shirt?", "answer": "a fish", "gt_answers": [ "justo!", "JUSTO" ], "image_path": "./data/STVQA/imageNet/n03595614_19756.JPEG", "model_name": "BLIP2" }, { "question": "what is the name of the clothing brand?", "answer": "santos", "gt_answers": [ "Justo" ], "image_path": "./data/STVQA/imageNet/n03595614_19756.JPEG", "model_name": "BLIP2" }, { "question": "how much did the shirt cost?", "answer": "$18", "gt_answers": [ "14.99" ], "image_path": "./data/STVQA/imageNet/n03595614_19756.JPEG", "model_name": "BLIP2" }, { "question": "What words have been added to the stop sign?", "answer": "no thru traffic ahead", "gt_answers": [ "Don't Believin!", "DON'T BELIEVIN" ], "image_path": "./data/STVQA/VisualGenome/2/2417417.jpg", "model_name": "BLIP2" }, { "question": "Who took the photo?", "answer": "person", "gt_answers": [ "Mostaque A Chowdhury" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000183413.jpg", "model_name": "BLIP2" }, { "question": "What is written on the blue board next to the man with the green hat?", "answer": "person", "gt_answers": [ "GNOL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000065136.jpg", "model_name": "BLIP2" }, { "question": "What is the word on the third board from the left?", "answer": "person", "gt_answers": [ "SIERRA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000065136.jpg", "model_name": "BLIP2" }, { "question": "What word comes after 'THE' on the sign?", "answer": "restaurant", "gt_answers": [ "Best" ], "image_path": "./data/STVQA/VisualGenome/1/2319929.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the speed boat?", "answer": "victory", "gt_answers": [ "Victory" ], "image_path": "./data/STVQA/imageNet/n04273569_11231.JPEG", "model_name": "BLIP2" }, { "question": "What number is on the speed boat?", "answer": "1", "gt_answers": [ "77" ], "image_path": "./data/STVQA/imageNet/n04273569_11231.JPEG", "model_name": "BLIP2" }, { "question": "What color is the speed boat?", "answer": "blue", "gt_answers": [ "Blue" ], "image_path": "./data/STVQA/imageNet/n04273569_11231.JPEG", "model_name": "BLIP2" }, { "question": "What brand is listed on the top of the speaker?", "answer": "sony", "gt_answers": [ "Logitech" ], "image_path": "./data/STVQA/imageNet/n04041544_3215.JPEG", "model_name": "BLIP2" }, { "question": "What is the date", "answer": "the date is february", "gt_answers": [ "3/26/05" ], "image_path": "./data/STVQA/imageNet/n02108089_9515.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the red sign?", "answer": "one way", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000267463.jpg", "model_name": "BLIP2" }, { "question": "What is written on the black sign?", "answer": "one way", "gt_answers": [ "One way" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000267463.jpg", "model_name": "BLIP2" }, { "question": "How many cents per pound are the bananas?", "answer": "100 cents", "gt_answers": [ "99" ], "image_path": "./data/STVQA/VisualGenome/1/2353806.jpg", "model_name": "BLIP2" }, { "question": "How many dollars per pound are the Mandarines?", "answer": "two dollars", "gt_answers": [ "1.99" ], "image_path": "./data/STVQA/VisualGenome/1/2353806.jpg", "model_name": "BLIP2" }, { "question": "What does the green text word above Organic say?", "answer": "certified organic", "gt_answers": [ "certified" ], "image_path": "./data/STVQA/VisualGenome/1/2353806.jpg", "model_name": "BLIP2" }, { "question": "What word is written above STOP?", "answer": "believing", "gt_answers": [ "DON'T" ], "image_path": "./data/STVQA/VisualGenome/1/2317264.jpg", "model_name": "BLIP2" }, { "question": "What word is written below STOP?", "answer": "believing", "gt_answers": [ "BELIEVING" ], "image_path": "./data/STVQA/VisualGenome/1/2317264.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the monitor?", "answer": "acer", "gt_answers": [ "Soyo" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000458286.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the game on the slot machine?", "answer": "seven league", "gt_answers": [ "SEVEN LEAGUE" ], "image_path": "./data/STVQA/imageNet/n04243546_2390.JPEG", "model_name": "BLIP2" }, { "question": "What channel is the movie on?", "answer": "disney channel", "gt_answers": [ "Disney Channel" ], "image_path": "./data/STVQA/IIIT_text/img_000517.jpg", "model_name": "BLIP2" }, { "question": "What is the first word at the top?", "answer": "high", "gt_answers": [ "High" ], "image_path": "./data/STVQA/IIIT_text/img_000517.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the movie?", "answer": "high school musical", "gt_answers": [ "High School Musical" ], "image_path": "./data/STVQA/IIIT_text/img_000517.jpg", "model_name": "BLIP2" }, { "question": "What does not stop?", "answer": "cars", "gt_answers": [ "CROSS TRAFFIC" ], "image_path": "./data/STVQA/VisualGenome/1/2367983.jpg", "model_name": "BLIP2" }, { "question": "What is the top word on the sign?", "answer": "state", "gt_answers": [ "STATE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000182029.jpg", "model_name": "BLIP2" }, { "question": "What collection is the sword from?", "answer": "person collection", "gt_answers": [ "Greg Domian Collection" ], "image_path": "./data/STVQA/imageNet/n04141327_910.JPEG", "model_name": "BLIP2" }, { "question": "what can you play?", "answer": "ocarina", "gt_answers": [ "ocarina" ], "image_path": "./data/STVQA/imageNet/n03840681_12998.JPEG", "model_name": "BLIP2" }, { "question": "What does the sign say", "answer": "don't block the box", "gt_answers": [ "dont block the box FINE +2 points", "Don't Block the Box Fine +2 Points" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000093119.jpg", "model_name": "BLIP2" }, { "question": "What brand of cough drops are on the right side of the bottom shelf?", "answer": "zoe", "gt_answers": [ "Halls" ], "image_path": "./data/STVQA/imageNet/n04443257_8266.JPEG", "model_name": "BLIP2" }, { "question": "What year is on the glass?", "answer": "2012", "gt_answers": [ "1678" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000204203.jpg", "model_name": "BLIP2" }, { "question": "What word comes after AVE", "answer": "avenue", "gt_answers": [ "West" ], "image_path": "./data/STVQA/icdar/img_61.jpg", "model_name": "BLIP2" }, { "question": "What number comes after no.?", "answer": "two", "gt_answers": [ "1" ], "image_path": "./data/STVQA/icdar/img_61.jpg", "model_name": "BLIP2" }, { "question": "What food is the man eating?", "answer": "pizza", "gt_answers": [ "Pizza" ], "image_path": "./data/STVQA/VisualGenome/1/2326082.jpg", "model_name": "BLIP2" }, { "question": "What's the third alphabet", "answer": "e", "gt_answers": [ "C" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000329638.jpg", "model_name": "BLIP2" }, { "question": "What type of instructions are listed?", "answer": "cooking instructions", "gt_answers": [ "Cooking" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000009864.jpg", "model_name": "BLIP2" }, { "question": "which number does the bus have?", "answer": "number", "gt_answers": [ "53" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000181564.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate of the vehicle?", "answer": "it is japan", "gt_answers": [ "P-18368", "P.18368" ], "image_path": "./data/STVQA/VisualGenome/2/2417043.jpg", "model_name": "BLIP2" }, { "question": "Where is the red Bus headed?", "answer": "london", "gt_answers": [ "Euston" ], "image_path": "./data/STVQA/VisualGenome/1/2354007.jpg", "model_name": "BLIP2" }, { "question": "What does the advertisement say on the side of the bus?", "answer": "the world is your oyster", "gt_answers": [ "Wicked is Spellbinding!" ], "image_path": "./data/STVQA/VisualGenome/1/2354007.jpg", "model_name": "BLIP2" }, { "question": "What is written in the boys shirt?", "answer": "baseball", "gt_answers": [ "Sun Junior" ], "image_path": "./data/STVQA/VisualGenome/1/2368041.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "narita express", "gt_answers": [ "257" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000259349.jpg", "model_name": "BLIP2" }, { "question": "which date is indicated in the picture", "answer": "january 1", "gt_answers": [ "Nov 26 2005" ], "image_path": "./data/STVQA/VisualGenome/1/2348688.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTEN THERE", "answer": "rtbr", "gt_answers": [ "RTBF" ], "image_path": "./data/STVQA/VisualGenome/1/2364722.jpg", "model_name": "BLIP2" }, { "question": "What is the route number for bus 4306?", "answer": "4305", "gt_answers": [ "1428" ], "image_path": "./data/STVQA/VisualGenome/1/2355481.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the front of the train?", "answer": "0", "gt_answers": [ "150208" ], "image_path": "./data/STVQA/VisualGenome/2/2412036.jpg", "model_name": "BLIP2" }, { "question": "What website is on the bottom of this image?", "answer": "com", "gt_answers": [ "WWW.CORSIBOSCAIOLI.CH" ], "image_path": "./data/STVQA/imageNet/n03000684_15071.JPEG", "model_name": "BLIP2" }, { "question": "What is the last word in the text?", "answer": "custode", "gt_answers": [ "custode" ], "image_path": "./data/STVQA/imageNet/n03000684_15071.JPEG", "model_name": "BLIP2" }, { "question": "What number is the player wearing?", "answer": "23", "gt_answers": [ "10" ], "image_path": "./data/STVQA/VisualGenome/1/2336240.jpg", "model_name": "BLIP2" }, { "question": "What is the stores name on the right?", "answer": "dm", "gt_answers": [ "GEOX" ], "image_path": "./data/STVQA/VisualGenome/1/2345273.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the image", "answer": "reflex", "gt_answers": [ "Brownie" ], "image_path": "./data/STVQA/imageNet/n04069434_10808.JPEG", "model_name": "BLIP2" }, { "question": "What street is to the right?", "answer": "hudson street", "gt_answers": [ "Greenwich St.", "GREENWICH STREET" ], "image_path": "./data/STVQA/VisualGenome/1/2351835.jpg", "model_name": "BLIP2" }, { "question": "What is on the white arrow?", "answer": "no parking", "gt_answers": [ "One Way" ], "image_path": "./data/STVQA/VisualGenome/1/2351835.jpg", "model_name": "BLIP2" }, { "question": "What street is closed?", "answer": "west street", "gt_answers": [ "Albany St.", "ALBANY STREET" ], "image_path": "./data/STVQA/VisualGenome/1/2351835.jpg", "model_name": "BLIP2" }, { "question": "What word is located at the bottom of the bag?", "answer": "adidas", "gt_answers": [ "Sports" ], "image_path": "./data/STVQA/imageNet/n02769748_87439.JPEG", "model_name": "BLIP2" }, { "question": "What color is the word at the bottom of the bag?", "answer": "blue", "gt_answers": [ "red?", "RED" ], "image_path": "./data/STVQA/imageNet/n02769748_87439.JPEG", "model_name": "BLIP2" }, { "question": "What does the bus say?", "answer": "luzern bus", "gt_answers": [ "Luzern Bahnhof", "2 LUZERN BAHNHOF" ], "image_path": "./data/STVQA/VisualGenome/2/2412829.jpg", "model_name": "BLIP2" }, { "question": "What is the Bridge called?", "answer": "golden gate", "gt_answers": [ "Oakland Bay", "SAN FRANCISCO-OAKLAND BAY BRIDGE" ], "image_path": "./data/STVQA/VisualGenome/2/2410264.jpg", "model_name": "BLIP2" }, { "question": "Is this view northerly, westerly, easterly or southerly?", "answer": "southerly", "gt_answers": [ "Easterly" ], "image_path": "./data/STVQA/VisualGenome/2/2410264.jpg", "model_name": "BLIP2" }, { "question": "What is the players team number?", "answer": "20", "gt_answers": [ "20" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000173506.jpg", "model_name": "BLIP2" }, { "question": "What is the beer company sponsoring this team?", "answer": "corona", "gt_answers": [ "CORONA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000173506.jpg", "model_name": "BLIP2" }, { "question": "What color is this sign?", "answer": "blue", "gt_answers": [ "blue" ], "image_path": "./data/STVQA/IIIT_text/3617.jpg", "model_name": "BLIP2" }, { "question": "What do we know about the knife at the top of the image?", "answer": "it is a real knife", "gt_answers": [ "real knife" ], "image_path": "./data/STVQA/imageNet/n03041632_12487.JPEG", "model_name": "BLIP2" }, { "question": "What do we know about the knives at the bottom of the image?", "answer": "they are real knives", "gt_answers": [ "3D model" ], "image_path": "./data/STVQA/imageNet/n03041632_12487.JPEG", "model_name": "BLIP2" }, { "question": "What number is on the back of one man's white shirt?", "answer": "4", "gt_answers": [ "9" ], "image_path": "./data/STVQA/VisualGenome/2/3102.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the summit the backpack is from?", "answer": "the summit", "gt_answers": [ "IPv6" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000285041.jpg", "model_name": "BLIP2" }, { "question": "What size number is the muzzle?", "answer": "small breed", "gt_answers": [ "0" ], "image_path": "./data/STVQA/imageNet/n03803284_5129.JPEG", "model_name": "BLIP2" }, { "question": "What is the 5 digit item number listed on the package?", "answer": "quick fit muzzle", "gt_answers": [ "59000" ], "image_path": "./data/STVQA/imageNet/n03803284_5129.JPEG", "model_name": "BLIP2" }, { "question": "What is the code written on the end of the aircraft?", "answer": "f 16", "gt_answers": [ "J-011" ], "image_path": "./data/STVQA/VisualGenome/2/2405560.jpg", "model_name": "BLIP2" }, { "question": "What is the restaurant name?", "answer": "keslak", "gt_answers": [ "Crown Fried Chicken" ], "image_path": "./data/STVQA/VisualGenome/2/2408427.jpg", "model_name": "BLIP2" }, { "question": "What is the building name next to the restaurant?", "answer": "kslak", "gt_answers": [ "K.Slak", "K SLAK" ], "image_path": "./data/STVQA/VisualGenome/2/2408427.jpg", "model_name": "BLIP2" }, { "question": "What letters are visible on the shirt of the girl holding the drink?", "answer": "lms", "gt_answers": [ "SENIO" ], "image_path": "./data/STVQA/imageNet/n02437616_15433.JPEG", "model_name": "BLIP2" }, { "question": "What building number are the giraffes by?", "answer": "the giraffes are by building number 2", "gt_answers": [ "16" ], "image_path": "./data/STVQA/VisualGenome/1/2366378.jpg", "model_name": "BLIP2" }, { "question": "What kind of food is this?", "answer": "chips", "gt_answers": [ "ATO CHIPS" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029596.jpg", "model_name": "BLIP2" }, { "question": "Who is the manufacturer of the SmartPower Blender", "answer": "the smartpower blender is manufactured by person", "gt_answers": [ "Cuisinart" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000431492.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the Cuisinart Blender?", "answer": "the cuisinart cbb 600", "gt_answers": [ "SmartPower", "SMARTPOWER DUET" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000431492.jpg", "model_name": "BLIP2" }, { "question": "What was the brand and model of the Blender?", "answer": "the blender was a vitamix", "gt_answers": [ "Cuisinart SmartPower", "CUISINART SMARTPOWER DUET BLENDER/FOOD PROCESSOR" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000431492.jpg", "model_name": "BLIP2" }, { "question": "What is the blue book on the top self says", "answer": "the blue book on the top self says", "gt_answers": [ "Debunking Economics" ], "image_path": "./data/STVQA/icdar/img_500.jpg", "model_name": "BLIP2" }, { "question": "What kind of film was used?", "answer": "color film", "gt_answers": [ "KODAK" ], "image_path": "./data/STVQA/VisualGenome/1/2371943.jpg", "model_name": "BLIP2" }, { "question": "What is the name of this hotel?", "answer": "the marriott marriott hotel", "gt_answers": [ "Marriott" ], "image_path": "./data/STVQA/IIIT_text/img_000755.jpg", "model_name": "BLIP2" }, { "question": "Who took this photo?", "answer": "the photographer", "gt_answers": [ "Philippe Roynet" ], "image_path": "./data/STVQA/VisualGenome/1/61517.jpg", "model_name": "BLIP2" }, { "question": "What platform is the train currently at?", "answer": "platform 1", "gt_answers": [ "2c" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000326750.jpg", "model_name": "BLIP2" }, { "question": "What is the number printed on the train in the image?", "answer": "person", "gt_answers": [ "375913" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000326750.jpg", "model_name": "BLIP2" }, { "question": "What brand is the blue plane?", "answer": "klm", "gt_answers": [ "KLM" ], "image_path": "./data/STVQA/imageNet/n02690373_7740.JPEG", "model_name": "BLIP2" }, { "question": "What is the airport terminal name?", "answer": "klm", "gt_answers": [ "Anthony Fokker" ], "image_path": "./data/STVQA/imageNet/n02690373_7740.JPEG", "model_name": "BLIP2" }, { "question": "What number is on the white building?", "answer": "3", "gt_answers": [ "73" ], "image_path": "./data/STVQA/imageNet/n02690373_7740.JPEG", "model_name": "BLIP2" }, { "question": "What store has a blue sign?", "answer": "zara", "gt_answers": [ "Steve Madden" ], "image_path": "./data/STVQA/icdar/img_721.jpg", "model_name": "BLIP2" }, { "question": "Which store logo is blue?", "answer": "zara", "gt_answers": [ "Steve Madden" ], "image_path": "./data/STVQA/icdar/img_721.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store on the bottom floor with a white logo?", "answer": "zara", "gt_answers": [ "Fossil" ], "image_path": "./data/STVQA/icdar/img_721.jpg", "model_name": "BLIP2" }, { "question": "What color is the bus?", "answer": "blue", "gt_answers": [ "Blue" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000433232.jpg", "model_name": "BLIP2" }, { "question": "How many cheerleaders are there?", "answer": "eight", "gt_answers": [ "3", "2" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000433232.jpg", "model_name": "BLIP2" }, { "question": "What is written on the front of the bus?", "answer": "person", "gt_answers": [ "Samsung" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000433232.jpg", "model_name": "BLIP2" }, { "question": "Who is the copyrighted photographer of the image?", "answer": "person", "gt_answers": [ "LOTHAR LENZ" ], "image_path": "./data/STVQA/imageNet/n02395406_36871.JPEG", "model_name": "BLIP2" }, { "question": "What website is listed?", "answer": "person", "gt_answers": [ "www.pferdefotoarchiv.de" ], "image_path": "./data/STVQA/imageNet/n02395406_36871.JPEG", "model_name": "BLIP2" }, { "question": "Whose catering company owns this truck", "answer": "latina catering", "gt_answers": [ "Linda", "LINDA'S CATERING" ], "image_path": "./data/STVQA/VisualGenome/1/2348219.jpg", "model_name": "BLIP2" }, { "question": "What does Linda's company do?", "answer": "linda's company is a food truck", "gt_answers": [ "Catering" ], "image_path": "./data/STVQA/VisualGenome/1/2348219.jpg", "model_name": "BLIP2" }, { "question": "What type of Handbook is it?", "answer": "christianity", "gt_answers": [ "A Handbook of Psychological disorders", "PSYCHOLOGICAL DISORDERS" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000028585.jpg", "model_name": "BLIP2" }, { "question": "What type of cereal is on top of the fridge?", "answer": "corn flakes", "gt_answers": [ "Honey Nut Cheerios" ], "image_path": "./data/STVQA/VisualGenome/1/713674.jpg", "model_name": "BLIP2" }, { "question": "What name is on the logo at the bottom on the train?", "answer": "amtrak", "gt_answers": [ "AMTRAK" ], "image_path": "./data/STVQA/VisualGenome/1/1592917.jpg", "model_name": "BLIP2" }, { "question": "What company besides AMTRAK is advertised?", "answer": "mta", "gt_answers": [ "Metra" ], "image_path": "./data/STVQA/VisualGenome/1/1592917.jpg", "model_name": "BLIP2" }, { "question": "What year was this photo taken?", "answer": "2010", "gt_answers": [ "2012" ], "image_path": "./data/STVQA/VisualGenome/1/2346851.jpg", "model_name": "BLIP2" }, { "question": "Who took this photo?", "answer": "the photographer", "gt_answers": [ "Bridget Samuels" ], "image_path": "./data/STVQA/VisualGenome/1/2346851.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the homogenized milk?", "answer": "kraft", "gt_answers": [ "Darigold" ], "image_path": "./data/STVQA/imageNet/n07831146_2895.JPEG", "model_name": "BLIP2" }, { "question": "Which spice is furthest to the right?", "answer": "garlic", "gt_answers": [ "Parsley" ], "image_path": "./data/STVQA/imageNet/n07831146_2895.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the plane?", "answer": "air canada", "gt_answers": [ "Air Canada" ], "image_path": "./data/STVQA/VisualGenome/1/2363291.jpg", "model_name": "BLIP2" }, { "question": "What is written on the green shirt?", "answer": "shamrock", "gt_answers": [ "King Monkey" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000267779.jpg", "model_name": "BLIP2" }, { "question": "What is written on the red canister?", "answer": "person", "gt_answers": [ "Le Creusei" ], "image_path": "./data/STVQA/imageNet/n04270147_12943.JPEG", "model_name": "BLIP2" }, { "question": "What place does the sign board showing?", "answer": "manhattan", "gt_answers": [ "MANHATTAN AV" ], "image_path": "./data/STVQA/VisualGenome/1/2376680.jpg", "model_name": "BLIP2" }, { "question": "What number is shown in the sign board?", "answer": "3", "gt_answers": [ "ONE" ], "image_path": "./data/STVQA/VisualGenome/1/2376680.jpg", "model_name": "BLIP2" }, { "question": "What number is the batter?", "answer": "person", "gt_answers": [ "4" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000243509.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the airline?", "answer": "new zealand airways", "gt_answers": [ "AIR NEW ZEALAND" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000487413.jpg", "model_name": "BLIP2" }, { "question": "What is the website listed?", "answer": "tennis", "gt_answers": [ "NEWS.COM", "NADALNEWS.COM" ], "image_path": "./data/STVQA/VisualGenome/1/2322755.jpg", "model_name": "BLIP2" }, { "question": "what is the name of the shop on the top floor?", "answer": "h&m", "gt_answers": [ "alan & guys" ], "image_path": "./data/STVQA/icdar/img_496.jpg", "model_name": "BLIP2" }, { "question": "What logo is on the coffee cup?", "answer": "illy", "gt_answers": [ "Illy" ], "image_path": "./data/STVQA/VisualGenome/1/2325754.jpg", "model_name": "BLIP2" }, { "question": "What water brand is advertised?", "answer": "evian", "gt_answers": [ "Poland Spring" ], "image_path": "./data/STVQA/VisualGenome/2/2407523.jpg", "model_name": "BLIP2" }, { "question": "Which famous street in America, according to these road signs, is close to Broadway?", "answer": "57th street", "gt_answers": [ "WALL ST" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000008006.jpg", "model_name": "BLIP2" }, { "question": "What button is on the right?", "answer": "the radio button", "gt_answers": [ "EQ" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000004184.jpg", "model_name": "BLIP2" }, { "question": "what label stick on bottle", "answer": "label stick on bottle", "gt_answers": [ "bena" ], "image_path": "./data/STVQA/VisualGenome/1/2344792.jpg", "model_name": "BLIP2" }, { "question": "what is number plate of train", "answer": "number plate of train", "gt_answers": [ "2DI2", "2D12" ], "image_path": "./data/STVQA/VisualGenome/1/2332995.jpg", "model_name": "BLIP2" }, { "question": "where the train is going", "answer": "to the station", "gt_answers": [ "BANBURY" ], "image_path": "./data/STVQA/VisualGenome/1/2332995.jpg", "model_name": "BLIP2" }, { "question": "What is written on the orange wall?", "answer": "santa clause", "gt_answers": [ "SALE" ], "image_path": "./data/STVQA/icdar/img_365.jpg", "model_name": "BLIP2" }, { "question": "What does the green sign say?", "answer": "no parking", "gt_answers": [ "Aurelia" ], "image_path": "./data/STVQA/VisualGenome/1/2354240.jpg", "model_name": "BLIP2" }, { "question": "What has been written beneath the stop sign?", "answer": "no parking", "gt_answers": [ "IT!", "It" ], "image_path": "./data/STVQA/VisualGenome/1/2354240.jpg", "model_name": "BLIP2" }, { "question": "What is the company sponsor featured on bottom center portion of the motorcycle?", "answer": "honda", "gt_answers": [ "REPSOL" ], "image_path": "./data/STVQA/VisualGenome/1/2323132.jpg", "model_name": "BLIP2" }, { "question": "What is the company sponsor featured in the blue and black logo on the main part of the morotrcycle?", "answer": "honda", "gt_answers": [ "Estrella Galicia" ], "image_path": "./data/STVQA/VisualGenome/1/2323132.jpg", "model_name": "BLIP2" }, { "question": "How many players on the field", "answer": "nine", "gt_answers": [ "2", "{}" ], "image_path": "./data/STVQA/VisualGenome/1/2364668.jpg", "model_name": "BLIP2" }, { "question": "Is the name Mark Clarkson visible in this photo?", "answer": "no", "gt_answers": [ "mark clarkson" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000382953.jpg", "model_name": "BLIP2" }, { "question": "What stadium?", "answer": "rodeo", "gt_answers": [ "EL CAPITAN" ], "image_path": "./data/STVQA/VisualGenome/1/285988.jpg", "model_name": "BLIP2" }, { "question": "What does the water tower say?", "answer": "university", "gt_answers": [ "Skymax" ], "image_path": "./data/STVQA/VisualGenome/1/2350572.jpg", "model_name": "BLIP2" }, { "question": "What street signs are seen?", "answer": "two street signs", "gt_answers": [ "Emerald and University" ], "image_path": "./data/STVQA/VisualGenome/1/2350572.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the tail of the plane?", "answer": "coulson", "gt_answers": [ "Coulson Flying Tankers" ], "image_path": "./data/STVQA/VisualGenome/1/2349748.jpg", "model_name": "BLIP2" }, { "question": "What number is displayed on the right player?", "answer": "34", "gt_answers": [ "34" ], "image_path": "./data/STVQA/imageNet/n03379051_3383.JPEG", "model_name": "BLIP2" }, { "question": "According to the packaging, what type of nut is inside this product?", "answer": "hazelnut", "gt_answers": [ "Hazelnut.", "HAZELNUT" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000031087.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the event shown?", "answer": "us open tennis tournament", "gt_answers": [ "us open" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000444953.jpg", "model_name": "BLIP2" }, { "question": "What is the name of one of the sponsors of the event?", "answer": "person", "gt_answers": [ "jpmorgan" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000444953.jpg", "model_name": "BLIP2" }, { "question": "what is the website name", "answer": "com", "gt_answers": [ "cityjet.com" ], "image_path": "./data/STVQA/VisualGenome/1/2370124.jpg", "model_name": "BLIP2" }, { "question": "what is number of the plane", "answer": "no", "gt_answers": [ "00-VLF" ], "image_path": "./data/STVQA/VisualGenome/1/2370124.jpg", "model_name": "BLIP2" }, { "question": "What is written on the side of the bus?", "answer": "person", "gt_answers": [ "Valley Central School" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000119449.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the school?", "answer": "person", "gt_answers": [ "Valley Central School" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000119449.jpg", "model_name": "BLIP2" }, { "question": "What is in the white and blue bag to the far right of the image?", "answer": "flowers", "gt_answers": [ "SUGAR" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000331417.jpg", "model_name": "BLIP2" }, { "question": "How many people are in this picture?", "answer": "two", "gt_answers": [ "2" ], "image_path": "./data/STVQA/IIIT_text/3408.jpg", "model_name": "BLIP2" }, { "question": "What is the soda company in the right window?", "answer": "coca cola", "gt_answers": [ "Coca Cola" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000487693.jpg", "model_name": "BLIP2" }, { "question": "Who photographed this picture and is credited on the bottom right of the photo?", "answer": "person", "gt_answers": [ "daniele azzarelli" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000053672.jpg", "model_name": "BLIP2" }, { "question": "What city name is written on the umbrella?", "answer": "new york city", "gt_answers": [ "New York" ], "image_path": "./data/STVQA/VisualGenome/1/2350102.jpg", "model_name": "BLIP2" }, { "question": "What are the crab packing to?", "answer": "crab packing", "gt_answers": [ "TRAVEL" ], "image_path": "./data/STVQA/imageNet/n01978287_4564.JPEG", "model_name": "BLIP2" }, { "question": "How large are the crabs?", "answer": "small", "gt_answers": [ "GIGANTIC" ], "image_path": "./data/STVQA/imageNet/n01978287_4564.JPEG", "model_name": "BLIP2" }, { "question": "How much does the 48 hour pack cost?", "answer": "$45", "gt_answers": [ "$10.00" ], "image_path": "./data/STVQA/imageNet/n01978287_4564.JPEG", "model_name": "BLIP2" }, { "question": "What is the website address on the picture?", "answer": "person", "gt_answers": [ "www.versaci.fr" ], "image_path": "./data/STVQA/IIIT_text/4609.jpg", "model_name": "BLIP2" }, { "question": "What is written in yellow on the board?", "answer": "waterhole creek", "gt_answers": [ "Crocodile Dundee's" ], "image_path": "./data/STVQA/VisualGenome/1/2370790.jpg", "model_name": "BLIP2" }, { "question": "What is the identification number on the side of the ship?", "answer": "hms sydney", "gt_answers": [ "F476" ], "image_path": "./data/STVQA/imageNet/n02687172_19840.JPEG", "model_name": "BLIP2" }, { "question": "What car is this train?", "answer": "dining car", "gt_answers": [ "Dining car" ], "image_path": "./data/STVQA/VisualGenome/2/2405886.jpg", "model_name": "BLIP2" }, { "question": "What animals are there in the picture?", "answer": "tigers", "gt_answers": [ "Tigers" ], "image_path": "./data/STVQA/imageNet/n02129604_17329.JPEG", "model_name": "BLIP2" }, { "question": "What text is on the plate?", "answer": "truck parking", "gt_answers": [ "TRUCK PARKING" ], "image_path": "./data/STVQA/VisualGenome/1/2367178.jpg", "model_name": "BLIP2" }, { "question": "What are the last four numbers on the bike's license plate?", "answer": "93", "gt_answers": [ "8481" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000006846.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the jersey?", "answer": "21", "gt_answers": [ "121" ], "image_path": "./data/STVQA/VisualGenome/1/713236.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store where he is shopping ?", "answer": "a market", "gt_answers": [ "City Market" ], "image_path": "./data/STVQA/VisualGenome/1/2367964.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE COLOR OF THE SNAKE?", "answer": "green", "gt_answers": [ "GREEN" ], "image_path": "./data/STVQA/imageNet/n01739381_6519.JPEG", "model_name": "BLIP2" }, { "question": "WHAT DO YOU CALL THIS SNAKE?", "answer": "vine snake", "gt_answers": [ "VINE SNAKE" ], "image_path": "./data/STVQA/imageNet/n01739381_6519.JPEG", "model_name": "BLIP2" }, { "question": "Which organic food group is mentioned on the backdrop?", "answer": "whole foods", "gt_answers": [ "vegetables", "Riverford organic Vegetables" ], "image_path": "./data/STVQA/VisualGenome/1/2341378.jpg", "model_name": "BLIP2" }, { "question": "What is the first name on the bottom of this photograph?", "answer": "person", "gt_answers": [ "Tom" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000207013.jpg", "model_name": "BLIP2" }, { "question": "What type of workshop does the sign show?", "answer": "knitting", "gt_answers": [ "Knitting", "KNITTING WORKSHOP" ], "image_path": "./data/STVQA/VisualGenome/1/2367809.jpg", "model_name": "BLIP2" }, { "question": "What is the author's first name?", "answer": "elizabeth", "gt_answers": [ "Elizabeth" ], "image_path": "./data/STVQA/VisualGenome/1/2367809.jpg", "model_name": "BLIP2" }, { "question": "What is the author's last name?", "answer": "baum", "gt_answers": [ "Zimmermann" ], "image_path": "./data/STVQA/VisualGenome/1/2367809.jpg", "model_name": "BLIP2" }, { "question": "What brand name is on the plane?", "answer": "air china", "gt_answers": [ "Air china" ], "image_path": "./data/STVQA/VisualGenome/1/2331577.jpg", "model_name": "BLIP2" }, { "question": "What saints name is in this photo?", "answer": "saint martin", "gt_answers": [ "Georges" ], "image_path": "./data/STVQA/VisualGenome/1/2357165.jpg", "model_name": "BLIP2" }, { "question": "What are the 3 letters written above \"The French\"?", "answer": "t", "gt_answers": [ "Tri" ], "image_path": "./data/STVQA/VisualGenome/1/2339138.jpg", "model_name": "BLIP2" }, { "question": "What are the 3 digits visible behind the lady, written after \"Tel.\"?", "answer": "917", "gt_answers": [ "212" ], "image_path": "./data/STVQA/VisualGenome/1/2339138.jpg", "model_name": "BLIP2" }, { "question": "What is the color of the plane?", "answer": "blue and white", "gt_answers": [ "White", "WHITE AND BLUE STRIP" ], "image_path": "./data/STVQA/VisualGenome/1/2351315.jpg", "model_name": "BLIP2" }, { "question": "What is written on the plane?", "answer": "moscow airlines", "gt_answers": [ "SunExpress" ], "image_path": "./data/STVQA/VisualGenome/1/2351315.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the Airplane?", "answer": "lufthansa boeing 747", "gt_answers": [ "LUFTHANSA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000111223.jpg", "model_name": "BLIP2" }, { "question": "What is in the package on the upper right?", "answer": "a sandwich", "gt_answers": [ "sandwich" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000259297.jpg", "model_name": "BLIP2" }, { "question": "How much does the salad cost?", "answer": "it's free", "gt_answers": [ "105" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000259297.jpg", "model_name": "BLIP2" }, { "question": "What type dresses are sold at this store?", "answer": "wedding dresses", "gt_answers": [ "bridal special occasion" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000333663.jpg", "model_name": "BLIP2" }, { "question": "Do they sell anything besides dresses?", "answer": "yes they sell a lot of shoes", "gt_answers": [ "ACCESSORIES" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000333663.jpg", "model_name": "BLIP2" }, { "question": "What number is on the baseball players jersey?", "answer": "person", "gt_answers": [ "15" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000385837.jpg", "model_name": "BLIP2" }, { "question": "what is the address given in bus", "answer": "no address is given in bus", "gt_answers": [ "252 inglewood" ], "image_path": "./data/STVQA/VisualGenome/1/2365391.jpg", "model_name": "BLIP2" }, { "question": "what number given on bus", "answer": "number given on bus", "gt_answers": [ "1201", "252" ], "image_path": "./data/STVQA/VisualGenome/1/2365391.jpg", "model_name": "BLIP2" }, { "question": "which year is given", "answer": "1963", "gt_answers": [ "2013" ], "image_path": "./data/STVQA/VisualGenome/1/2365391.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number?", "answer": "mcf", "gt_answers": [ "SK12 MVC", "SK12MVC" ], "image_path": "./data/STVQA/VisualGenome/2/2415675.jpg", "model_name": "BLIP2" }, { "question": "Whayt brand is the truck?", "answer": "scania", "gt_answers": [ "Scania" ], "image_path": "./data/STVQA/VisualGenome/2/2415675.jpg", "model_name": "BLIP2" }, { "question": "What word is on the riders pants?", "answer": "person", "gt_answers": [ "ROCKET" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000452167.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number on the van?", "answer": "mcd", "gt_answers": [ "GN07 NWZ" ], "image_path": "./data/STVQA/VisualGenome/1/2364724.jpg", "model_name": "BLIP2" }, { "question": "What is the word on the left border at the top of the border around the image?", "answer": "train", "gt_answers": [ "STOCK" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000055121.jpg", "model_name": "BLIP2" }, { "question": "What is the word on the right border at the top of the image?", "answer": "train", "gt_answers": [ "URBIAN" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000055121.jpg", "model_name": "BLIP2" }, { "question": "What is the word that is upside down on the image's border in the top right corner?", "answer": "train", "gt_answers": [ "URBIAN" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000055121.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the oil company?", "answer": "person", "gt_answers": [ "crockett" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000095267.jpg", "model_name": "BLIP2" }, { "question": "What does the banner in the upper corner say?", "answer": "imagine", "gt_answers": [ "INMAGINE" ], "image_path": "./data/STVQA/imageNet/n04317175_5328.JPEG", "model_name": "BLIP2" }, { "question": "What number is on the black jersey?", "answer": "person", "gt_answers": [ "10" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000045659.jpg", "model_name": "BLIP2" }, { "question": "Who took this picture?", "answer": "person", "gt_answers": [ "Purple Martini" ], "image_path": "./data/STVQA/IIIT_text/7506.jpg", "model_name": "BLIP2" }, { "question": "What type of photography does Purple Martini do?", "answer": "wedding photography", "gt_answers": [ "Wedding Photography" ], "image_path": "./data/STVQA/IIIT_text/7506.jpg", "model_name": "BLIP2" }, { "question": "What is written in black letters?", "answer": "venezia", "gt_answers": [ "Uventina" ], "image_path": "./data/STVQA/imageNet/n07745940_8569.JPEG", "model_name": "BLIP2" }, { "question": "What is written on dinning table", "answer": "a bar", "gt_answers": [ "GUINNESS" ], "image_path": "./data/STVQA/VisualGenome/1/2327207.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "street des 13 january", "gt_answers": [ "StraBe des 13. Januar" ], "image_path": "./data/STVQA/VisualGenome/1/2362098.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "st george street", "gt_answers": [ "St George Ct.", "ST. GEORGE CT." ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000127104.jpg", "model_name": "BLIP2" }, { "question": "What does the sign in green letters read?", "answer": "there is no place like home", "gt_answers": [ "YOUR FUTURE IS THE NATURAL WORLD" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000104325.jpg", "model_name": "BLIP2" }, { "question": "What is the word under the red plus sign?", "answer": "android", "gt_answers": [ "ice" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000255713.jpg", "model_name": "BLIP2" }, { "question": "What are the words on the sign seen over the man's shoulder?", "answer": "no smoking", "gt_answers": [ "ONE WAY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000537608.jpg", "model_name": "BLIP2" }, { "question": "What does the octagonal red sign say?", "answer": "divided highway", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000009176.jpg", "model_name": "BLIP2" }, { "question": "What type of sale is this?", "answer": "cosmetics", "gt_answers": [ "Grand Anniversary SALE" ], "image_path": "./data/STVQA/icdar/img_866.jpg", "model_name": "BLIP2" }, { "question": "What is the second digit in the platform number for the red train?", "answer": "2", "gt_answers": [ "6" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000070363.jpg", "model_name": "BLIP2" }, { "question": "What does the writing on the side of the bus say?", "answer": "urba", "gt_answers": [ "URBANO" ], "image_path": "./data/STVQA/VisualGenome/1/2337609.jpg", "model_name": "BLIP2" }, { "question": "What are the last two letters on the tail of the airplane?", "answer": "cj", "gt_answers": [ "TC" ], "image_path": "./data/STVQA/VisualGenome/1/2316420.jpg", "model_name": "BLIP2" }, { "question": "What does #21's shirt say?", "answer": "i love baseball", "gt_answers": [ "Tolleson" ], "image_path": "./data/STVQA/VisualGenome/2/2409878.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the bar?", "answer": "oasis", "gt_answers": [ "Oasis" ], "image_path": "./data/STVQA/VisualGenome/1/2317965.jpg", "model_name": "BLIP2" }, { "question": "Who click this photo?", "answer": "person", "gt_answers": [ "SEBASTIAN SOIDEN" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000421341.jpg", "model_name": "BLIP2" }, { "question": "What word is on the large vertical sign?", "answer": "park", "gt_answers": [ "PARK" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000186315.jpg", "model_name": "BLIP2" }, { "question": "What is the address on the overhang?", "answer": "the address on the overhang is 201 w st louis street", "gt_answers": [ "129" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000186315.jpg", "model_name": "BLIP2" }, { "question": "What does the sign that is under the overhang say?", "answer": "the park", "gt_answers": [ "PARKING" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000186315.jpg", "model_name": "BLIP2" }, { "question": "What is the last name of the player on the right?", "answer": "garcia", "gt_answers": [ "Castella" ], "image_path": "./data/STVQA/VisualGenome/1/2343855.jpg", "model_name": "BLIP2" }, { "question": "What does Tinder Box sell?", "answer": "cigars", "gt_answers": [ "Cigars" ], "image_path": "./data/STVQA/imageNet/n04443257_17262.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the store?", "answer": "i b bachschmidt", "gt_answers": [ "IB BALSCHMIDT" ], "image_path": "./data/STVQA/VisualGenome/1/1159995.jpg", "model_name": "BLIP2" }, { "question": "What brand of trucking is shown?", "answer": "land rover", "gt_answers": [ "Land Rover" ], "image_path": "./data/STVQA/imageNet/n03594945_14596.JPEG", "model_name": "BLIP2" }, { "question": "What is the word in the corner of the image?", "answer": "person", "gt_answers": [ "myprofe" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000160229.jpg", "model_name": "BLIP2" }, { "question": "What kind of car part are these items?", "answer": "oil filter", "gt_answers": [ "Oil Filter" ], "image_path": "./data/STVQA/imageNet/n03843555_13542.JPEG", "model_name": "BLIP2" }, { "question": "Who is the manufacturer of the oil filters?", "answer": "subaru", "gt_answers": [ "Subaru", "SUBARU GENUINE PARTS" ], "image_path": "./data/STVQA/imageNet/n03843555_13542.JPEG", "model_name": "BLIP2" }, { "question": "What is the brand of Soda?", "answer": "person", "gt_answers": [ "DR PEPPER" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000451800.jpg", "model_name": "BLIP2" }, { "question": "What is being celebrated?", "answer": "a birthday", "gt_answers": [ "Birthday" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000381856.jpg", "model_name": "BLIP2" }, { "question": "How old is this person turning?", "answer": "60", "gt_answers": [ "90" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000381856.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the book?", "answer": "archie's joke book", "gt_answers": [ "archie's joke book" ], "image_path": "./data/STVQA/imageNet/n06596364_9948.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the building?", "answer": "sony building", "gt_answers": [ "Sony" ], "image_path": "./data/STVQA/IIIT_text/img_001189.jpg", "model_name": "BLIP2" }, { "question": "What is the title of the photo?", "answer": "surfers at the beach", "gt_answers": [ "Surf Board Riders" ], "image_path": "./data/STVQA/VisualGenome/1/2345325.jpg", "model_name": "BLIP2" }, { "question": "Where is the location of this photo?", "answer": "laguna beach", "gt_answers": [ "Balboa - Calif" ], "image_path": "./data/STVQA/VisualGenome/1/2345325.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name on the buses?", "answer": "stran", "gt_answers": [ "Stran" ], "image_path": "./data/STVQA/IIIT_text/4227.jpg", "model_name": "BLIP2" }, { "question": "What does the largest red sign say?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000348874.jpg", "model_name": "BLIP2" }, { "question": "Where is the bus going to?", "answer": "downtown", "gt_answers": [ "Berkeley Bart", "BERKELEY" ], "image_path": "./data/STVQA/VisualGenome/1/2360016.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "3", "gt_answers": [ "14", "1R" ], "image_path": "./data/STVQA/VisualGenome/1/2360016.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the net's circled sign?", "answer": "it says net", "gt_answers": [ "Corona" ], "image_path": "./data/STVQA/VisualGenome/1/2375499.jpg", "model_name": "BLIP2" }, { "question": "What is this roads name?", "answer": "hardscrabble", "gt_answers": [ "HARDSCRABBLE RD", "Hardscrabble" ], "image_path": "./data/STVQA/VisualGenome/1/2346720.jpg", "model_name": "BLIP2" }, { "question": "what is the license plate number?", "answer": "mcdo", "gt_answers": [ "18 hd" ], "image_path": "./data/STVQA/VisualGenome/1/1159762.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the red bus?", "answer": "no", "gt_answers": [ "263" ], "image_path": "./data/STVQA/VisualGenome/1/2331529.jpg", "model_name": "BLIP2" }, { "question": "What company does the tow truck belong too?", "answer": "towing company", "gt_answers": [ "Sovereign" ], "image_path": "./data/STVQA/VisualGenome/1/2331529.jpg", "model_name": "BLIP2" }, { "question": "What do the top red letters read?", "answer": "ignitor", "gt_answers": [ "Handling Instructions" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000249.jpg", "model_name": "BLIP2" }, { "question": "What is the last word on the nose of the airplane?", "answer": "us", "gt_answers": [ "FORCE" ], "image_path": "./data/STVQA/imageNet/n04552348_8200.JPEG", "model_name": "BLIP2" }, { "question": "What arena is this hockey puck from?", "answer": "san jose coliseum", "gt_answers": [ "SAN JOSE ARENA", "SAN JOSE" ], "image_path": "./data/STVQA/imageNet/n04019541_11301.JPEG", "model_name": "BLIP2" }, { "question": "what is the cost of bananas", "answer": "the cost of bananas depends on where you buy them", "gt_answers": [ "19c" ], "image_path": "./data/STVQA/VisualGenome/1/2354893.jpg", "model_name": "BLIP2" }, { "question": "What are the 3 letters on the plate?", "answer": "person", "gt_answers": [ "AMK" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000455340.jpg", "model_name": "BLIP2" }, { "question": "WhatWhat color is the crosswalk", "answer": "black and white", "gt_answers": [ "White", "{}" ], "image_path": "./data/STVQA/VisualGenome/1/2359366.jpg", "model_name": "BLIP2" }, { "question": "What color are the trees", "answer": "green", "gt_answers": [ "Green", "{}" ], "image_path": "./data/STVQA/VisualGenome/1/2359366.jpg", "model_name": "BLIP2" }, { "question": "Where is the man in the picture heading to?", "answer": "a store", "gt_answers": [ "World of Sports" ], "image_path": "./data/STVQA/icdar/img_284.jpg", "model_name": "BLIP2" }, { "question": "what is written in white on the orange board?", "answer": "the history of the toilet", "gt_answers": [ "A sip of con" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000323470.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the bottom of the flag?", "answer": "switzerland", "gt_answers": [ "Chamonix" ], "image_path": "./data/STVQA/VisualGenome/1/2351119.jpg", "model_name": "BLIP2" }, { "question": "What combination of letter and numbers are on the skiers shoulders?", "answer": "sls", "gt_answers": [ "A1" ], "image_path": "./data/STVQA/VisualGenome/1/2351119.jpg", "model_name": "BLIP2" }, { "question": "where is the candy bar located?", "answer": "in the living room", "gt_answers": [ "LERON" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000385070.jpg", "model_name": "BLIP2" }, { "question": "What type of restaurant is behind the parked bikes?", "answer": "coffee shop", "gt_answers": [ "Sushi" ], "image_path": "./data/STVQA/VisualGenome/1/2347996.jpg", "model_name": "BLIP2" }, { "question": "When was this picture taken?", "answer": "summer", "gt_answers": [ "2011" ], "image_path": "./data/STVQA/imageNet/n01601694_21788.JPEG", "model_name": "BLIP2" }, { "question": "Which company took this picture?", "answer": "nikon", "gt_answers": [ "David Flores Photography" ], "image_path": "./data/STVQA/imageNet/n01601694_21788.JPEG", "model_name": "BLIP2" }, { "question": "WHAT WORD IS WRITTEN ON THE METAL", "answer": "blue", "gt_answers": [ "BLUESBAND HOHNER INTERNATIONAL" ], "image_path": "./data/STVQA/imageNet/n03494278_40353.JPEG", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTEN ON THE PAPER", "answer": "how to play the harmonica", "gt_answers": [ "HERE'S HOW TO PLAY YOUR HOHNER" ], "image_path": "./data/STVQA/imageNet/n03494278_40353.JPEG", "model_name": "BLIP2" }, { "question": "What does the sign say below the clock?", "answer": "tavern", "gt_answers": [ "Manchester Road" ], "image_path": "./data/STVQA/VisualGenome/1/2369184.jpg", "model_name": "BLIP2" }, { "question": "What text is under the logo", "answer": "metro", "gt_answers": [ "Metro" ], "image_path": "./data/STVQA/VisualGenome/2/2412941.jpg", "model_name": "BLIP2" }, { "question": "What word is displayed on the surfboard stand?", "answer": "view", "gt_answers": [ "view" ], "image_path": "./data/STVQA/VisualGenome/2/2411332.jpg", "model_name": "BLIP2" }, { "question": "Who is the photographer that took this picture?", "answer": "michael mcdonald", "gt_answers": [ "Koen Brouwer" ], "image_path": "./data/STVQA/VisualGenome/2/2403755.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the bottom of the snowboard in the foreground?", "answer": "k2", "gt_answers": [ "BURTON" ], "image_path": "./data/STVQA/VisualGenome/1/1159441.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the bottom right of the picture?", "answer": "boeing", "gt_answers": [ "arnoldo riker", "arnoldo.riker" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000497202.jpg", "model_name": "BLIP2" }, { "question": "What figure is written on the tag?", "answer": "six", "gt_answers": [ "6" ], "image_path": "./data/STVQA/VisualGenome/1/2337046.jpg", "model_name": "BLIP2" }, { "question": "From which country is the pasport", "answer": "from canada", "gt_answers": [ "United States", "UNITED STATES OF AMRICA" ], "image_path": "./data/STVQA/VisualGenome/1/1159307.jpg", "model_name": "BLIP2" }, { "question": "What is written in green?", "answer": "always welcome", "gt_answers": [ "always welcome" ], "image_path": "./data/STVQA/IIIT_text/img_001281.jpg", "model_name": "BLIP2" }, { "question": "what is written in red?", "answer": "always welcome", "gt_answers": [ "orkut143.com" ], "image_path": "./data/STVQA/IIIT_text/img_001281.jpg", "model_name": "BLIP2" }, { "question": "what company is written on the plane", "answer": "airberlin", "gt_answers": [ "Lufthansa" ], "image_path": "./data/STVQA/imageNet/n02690373_5724.JPEG", "model_name": "BLIP2" }, { "question": "what text is on the plane?", "answer": "airberlin", "gt_answers": [ "Lufthansa" ], "image_path": "./data/STVQA/imageNet/n02690373_5724.JPEG", "model_name": "BLIP2" }, { "question": "How many forks are in the picture?", "answer": "two", "gt_answers": [ "Two" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000002886.jpg", "model_name": "BLIP2" }, { "question": "How many spoons are in the picture?", "answer": "2", "gt_answers": [ "One" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000002886.jpg", "model_name": "BLIP2" }, { "question": "what is the sign of board reference?", "answer": "goyard", "gt_answers": [ "goyard" ], "image_path": "./data/STVQA/VisualGenome/1/2329292.jpg", "model_name": "BLIP2" }, { "question": "What is the name above the key?", "answer": "person", "gt_answers": [ "Caspari" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000293605.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store selling head accessories?", "answer": "person", "gt_answers": [ "capking." ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000293605.jpg", "model_name": "BLIP2" }, { "question": "What is the building number?", "answer": "3", "gt_answers": [ "38" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000293605.jpg", "model_name": "BLIP2" }, { "question": "Q: What company is the van from? A: TNT", "answer": "a person", "gt_answers": [ "TNT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000573807.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate of the police motorcycle?", "answer": "london", "gt_answers": [ "BU07 AZG", "BUO7 AZG" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000337087.jpg", "model_name": "BLIP2" }, { "question": "what is the name of shop", "answer": "hughes shoes", "gt_answers": [ "Huey's" ], "image_path": "./data/STVQA/imageNet/n04200800_9497.JPEG", "model_name": "BLIP2" }, { "question": "what they sale", "answer": "shoes", "gt_answers": [ "shoes" ], "image_path": "./data/STVQA/imageNet/n04200800_9497.JPEG", "model_name": "BLIP2" }, { "question": "what is the size of shoes given", "answer": "medium", "gt_answers": [ "Wide, Medium, Narrow" ], "image_path": "./data/STVQA/imageNet/n04200800_9497.JPEG", "model_name": "BLIP2" }, { "question": "What is the bus number of this bus?", "answer": "no bus number", "gt_answers": [ "117" ], "image_path": "./data/STVQA/IIIT_text/img_001125.jpg", "model_name": "BLIP2" }, { "question": "what is the red word on the sing on the right?", "answer": "person", "gt_answers": [ "citroen" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000091868.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the column?", "answer": "on the sea", "gt_answers": [ "Salt Grill" ], "image_path": "./data/STVQA/icdar/img_838.jpg", "model_name": "BLIP2" }, { "question": "what word is enclosed in the box?", "answer": "person", "gt_answers": [ "corn" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000423356.jpg", "model_name": "BLIP2" }, { "question": "What brand is the yellow shirt?", "answer": "nike", "gt_answers": [ "Oshkosh" ], "image_path": "./data/STVQA/VisualGenome/2/2417907.jpg", "model_name": "BLIP2" }, { "question": "What is written on the green sign?", "answer": "no parking", "gt_answers": [ "SURF" ], "image_path": "./data/STVQA/VisualGenome/1/2348108.jpg", "model_name": "BLIP2" }, { "question": "What text appears on the red arrow?", "answer": "soft flexible dispenser", "gt_answers": [ "New & Improved" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000018606.jpg", "model_name": "BLIP2" }, { "question": "What exit number is shown?", "answer": "exit", "gt_answers": [ "22" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000048742.jpg", "model_name": "BLIP2" }, { "question": "What highway continues to the left?", "answer": "i 95", "gt_answers": [ "95" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000048742.jpg", "model_name": "BLIP2" }, { "question": "What street is at this exit?", "answer": "philadelphia avenue", "gt_answers": [ "CALLOWHILL ST" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000048742.jpg", "model_name": "BLIP2" }, { "question": "What is the tv brand name?", "answer": "lg", "gt_answers": [ "SOny" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000289545.jpg", "model_name": "BLIP2" }, { "question": "Whats the truck's color?", "answer": "blue", "gt_answers": [ "blue" ], "image_path": "./data/STVQA/imageNet/n04461696_5988.JPEG", "model_name": "BLIP2" }, { "question": "What company makes this can opener?", "answer": "chef innox", "gt_answers": [ "Chef Inox" ], "image_path": "./data/STVQA/imageNet/n02951585_6086.JPEG", "model_name": "BLIP2" }, { "question": "What color is this caboose?", "answer": "red", "gt_answers": [ "red" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000373425.jpg", "model_name": "BLIP2" }, { "question": "What are the two large letters on the side of the caboose?", "answer": "person and person", "gt_answers": [ "NW" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000373425.jpg", "model_name": "BLIP2" }, { "question": "What are the numbers below the letters?", "answer": "the car number", "gt_answers": [ "518590" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000373425.jpg", "model_name": "BLIP2" }, { "question": "What is the name on on the truck", "answer": "person", "gt_answers": [ "stobart" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000216158.jpg", "model_name": "BLIP2" }, { "question": "What type of truck it this", "answer": "is a person", "gt_answers": [ "c a r r i e r" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000216158.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate #", "answer": "dk dk", "gt_answers": [ "08-0-74404" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000216158.jpg", "model_name": "BLIP2" }, { "question": "What is written on the image?", "answer": "chimpanzee testing for food", "gt_answers": [ "Chimpanzee fishing for food." ], "image_path": "./data/STVQA/imageNet/n02481823_2363.JPEG", "model_name": "BLIP2" }, { "question": "What designations are shown on the front of the train to the far left?", "answer": "person", "gt_answers": [ "Y 133" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000466621.jpg", "model_name": "BLIP2" }, { "question": "What designations are shown on the lower front of the train to the far right?", "answer": "person", "gt_answers": [ "J 515" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000466621.jpg", "model_name": "BLIP2" }, { "question": "What number is below Half St?", "answer": "2", "gt_answers": [ "1200" ], "image_path": "./data/STVQA/VisualGenome/1/2353139.jpg", "model_name": "BLIP2" }, { "question": "What is does the white text say?", "answer": "bibs", "gt_answers": [ "patiently" ], "image_path": "./data/STVQA/IIIT_text/6235.jpg", "model_name": "BLIP2" }, { "question": "What does the text in blue say?", "answer": "two babies laying in bed", "gt_answers": [ "waiting" ], "image_path": "./data/STVQA/IIIT_text/6235.jpg", "model_name": "BLIP2" }, { "question": "What business would I reach if I called (415) 970-1015?", "answer": "transportes quettal", "gt_answers": [ "Transportes Quetzal" ], "image_path": "./data/STVQA/VisualGenome/1/2332296.jpg", "model_name": "BLIP2" }, { "question": "What is the green bus number?", "answer": "a bus", "gt_answers": [ "336A", "336" ], "image_path": "./data/STVQA/VisualGenome/1/2346324.jpg", "model_name": "BLIP2" }, { "question": "What is the red bus number?", "answer": "x2", "gt_answers": [ "147" ], "image_path": "./data/STVQA/VisualGenome/1/2346324.jpg", "model_name": "BLIP2" }, { "question": "Where is the green bus going to?", "answer": "to the airport", "gt_answers": [ "Loudwater Village" ], "image_path": "./data/STVQA/VisualGenome/1/2346324.jpg", "model_name": "BLIP2" }, { "question": "what is the sign of flight refer", "answer": "wing", "gt_answers": [ "ORBEST" ], "image_path": "./data/STVQA/VisualGenome/1/2360427.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street?", "answer": "king street", "gt_answers": [ "BALTIMORE ST." ], "image_path": "./data/STVQA/VisualGenome/2/2405192.jpg", "model_name": "BLIP2" }, { "question": "What number is on the man's jersey?", "answer": "person", "gt_answers": [ "10" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000222506.jpg", "model_name": "BLIP2" }, { "question": "What are the words on the yellow monkey fruit?", "answer": "peel here", "gt_answers": [ "PEEL HERE" ], "image_path": "./data/STVQA/VisualGenome/1/2349485.jpg", "model_name": "BLIP2" }, { "question": "What does the banana say?", "answer": "peel here", "gt_answers": [ "PEEL HERE" ], "image_path": "./data/STVQA/VisualGenome/1/2349485.jpg", "model_name": "BLIP2" }, { "question": "What is the website on the advertisement?", "answer": "com", "gt_answers": [ "www.imob.sg" ], "image_path": "./data/STVQA/icdar/img_153.jpg", "model_name": "BLIP2" }, { "question": "What is the sign to the left advertising?", "answer": "fast food", "gt_answers": [ "HOLIDAY PLATTER" ], "image_path": "./data/STVQA/icdar/test_img_415.jpg", "model_name": "BLIP2" }, { "question": "What number is on the front of the motorbike?", "answer": "4", "gt_answers": [ "5" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000174705.jpg", "model_name": "BLIP2" }, { "question": "What clinic is one the left?", "answer": "mri scan", "gt_answers": [ "Healthway Medical" ], "image_path": "./data/STVQA/icdar/img_278.jpg", "model_name": "BLIP2" }, { "question": "What is the text on the bottom right of the image?", "answer": "i love onions", "gt_answers": [ "sheeba" ], "image_path": "./data/STVQA/imageNet/n03958227_4642.JPEG", "model_name": "BLIP2" }, { "question": "What company is referenced in the lower right-hand corner?", "answer": "person", "gt_answers": [ "eyecreation", "EYE CREATION" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000552346.jpg", "model_name": "BLIP2" }, { "question": "Who is in the photo?", "answer": "person", "gt_answers": [ "Matteo Catanese" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000563257.jpg", "model_name": "BLIP2" }, { "question": "what is the brand name of the blue shirt?", "answer": "nike", "gt_answers": [ "Hollister" ], "image_path": "./data/STVQA/icdar/img_336.jpg", "model_name": "BLIP2" }, { "question": "what clothing company is displayed on the blue shirt?", "answer": "nike", "gt_answers": [ "Hollister" ], "image_path": "./data/STVQA/icdar/img_336.jpg", "model_name": "BLIP2" }, { "question": "what is the train id number?", "answer": "person", "gt_answers": [ "31193" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000282567.jpg", "model_name": "BLIP2" }, { "question": "What is the red street sign asking you to do?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2355131.jpg", "model_name": "BLIP2" }, { "question": "What does the white flag with red letter say?", "answer": "person", "gt_answers": [ "GRA OPE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000155198.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate of the leading, red car?", "answer": "md 007", "gt_answers": [ "HV03 JXE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000353260.jpg", "model_name": "BLIP2" }, { "question": "What is written on the container with the silver top?", "answer": "salt", "gt_answers": [ "Salt" ], "image_path": "./data/STVQA/imageNet/n04131690_8780.JPEG", "model_name": "BLIP2" }, { "question": "How many cups can the clear container hold?", "answer": "2", "gt_answers": [ "2" ], "image_path": "./data/STVQA/imageNet/n04131690_8780.JPEG", "model_name": "BLIP2" }, { "question": "What is the train number?", "answer": "person", "gt_answers": [ "377 605" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000122207.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store?", "answer": "mrt", "gt_answers": [ "TAGHeuer" ], "image_path": "./data/STVQA/icdar/test_img_95.jpg", "model_name": "BLIP2" }, { "question": "What is written on the tire?", "answer": "san francisco", "gt_answers": [ "LOCO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000346004.jpg", "model_name": "BLIP2" }, { "question": "what is the name of the bike?", "answer": "it is a cargo bike", "gt_answers": [ "Scott" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000346004.jpg", "model_name": "BLIP2" }, { "question": "What brand is the bike tire?", "answer": "michelin", "gt_answers": [ "Loco" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000346004.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the location they are at?", "answer": "schoenboden", "gt_answers": [ "Schonbodenb" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000296374.jpg", "model_name": "BLIP2" }, { "question": "What river does this bridge cross?", "answer": "willamette river", "gt_answers": [ "Willamette River" ], "image_path": "./data/STVQA/VisualGenome/1/2365431.jpg", "model_name": "BLIP2" }, { "question": "What county is the Willamette River located?", "answer": "marion county", "gt_answers": [ "Marion County" ], "image_path": "./data/STVQA/VisualGenome/1/2365431.jpg", "model_name": "BLIP2" }, { "question": "What is the name of LaunchPad as written on the wall?", "answer": "launchpad", "gt_answers": [ "JTC" ], "image_path": "./data/STVQA/icdar/img_252.jpg", "model_name": "BLIP2" }, { "question": "What is written in the window of the bus?", "answer": "school bus", "gt_answers": [ "N 30" ], "image_path": "./data/STVQA/VisualGenome/1/2362829.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the bumper of the bus?", "answer": "57", "gt_answers": [ "215" ], "image_path": "./data/STVQA/VisualGenome/1/2362829.jpg", "model_name": "BLIP2" }, { "question": "What is written on the back door of the bus?", "answer": "school bus", "gt_answers": [ "STOP WHEN RED LIGHTS FLASH" ], "image_path": "./data/STVQA/VisualGenome/1/2362829.jpg", "model_name": "BLIP2" }, { "question": "What timezone is the clock tracking?", "answer": "utc", "gt_answers": [ "New York" ], "image_path": "./data/STVQA/VisualGenome/1/2315403.jpg", "model_name": "BLIP2" }, { "question": "What is the number bordered by orange?", "answer": "15", "gt_answers": [ "15" ], "image_path": "./data/STVQA/VisualGenome/2/2410648.jpg", "model_name": "BLIP2" }, { "question": "What does the text bordered by green say?", "answer": "go great 15", "gt_answers": [ "E45" ], "image_path": "./data/STVQA/VisualGenome/2/2410648.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the orgainziation?", "answer": "hong kong parrot society", "gt_answers": [ "BIRD SHOW" ], "image_path": "./data/STVQA/VisualGenome/1/498315.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the sharpener?", "answer": "sharpie", "gt_answers": [ "panasonic" ], "image_path": "./data/STVQA/imageNet/n04116512_19239.JPEG", "model_name": "BLIP2" }, { "question": "What is the brand name on the truck?", "answer": "person", "gt_answers": [ "GMC" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000032455.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the top of the train?", "answer": "hmr", "gt_answers": [ "MINIATURE RAILWAY" ], "image_path": "./data/STVQA/VisualGenome/2/2415147.jpg", "model_name": "BLIP2" }, { "question": "What three letters are on the side of the train?", "answer": "hmr", "gt_answers": [ "HMR" ], "image_path": "./data/STVQA/VisualGenome/2/2415147.jpg", "model_name": "BLIP2" }, { "question": "What does the largest sign say?", "answer": "sara", "gt_answers": [ "Sasa" ], "image_path": "./data/STVQA/IIIT_text/1898.jpg", "model_name": "BLIP2" }, { "question": "What website owns this image?", "answer": "flikr", "gt_answers": [ "Pierto.com" ], "image_path": "./data/STVQA/VisualGenome/1/2366609.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company truck?", "answer": "united", "gt_answers": [ "UNITED" ], "image_path": "./data/STVQA/imageNet/n03796401_2258.JPEG", "model_name": "BLIP2" }, { "question": "what word is on the white and black shoe in the mifddle", "answer": "nike", "gt_answers": [ "grace" ], "image_path": "./data/STVQA/imageNet/n04200800_16283.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the numbers?", "answer": "", "gt_answers": [ "275" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000224601.jpg", "model_name": "BLIP2" }, { "question": "Where is the license plate from?", "answer": "new jersey", "gt_answers": [ "California" ], "image_path": "./data/STVQA/VisualGenome/1/2345818.jpg", "model_name": "BLIP2" }, { "question": "What does the license plate say?", "answer": "a dog", "gt_answers": [ "6CTA983" ], "image_path": "./data/STVQA/VisualGenome/1/2345818.jpg", "model_name": "BLIP2" }, { "question": "What is written on the side of the bird box?", "answer": "the rock city", "gt_answers": [ "See Rock City" ], "image_path": "./data/STVQA/imageNet/n02843684_7133.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the side of the plane?", "answer": "person", "gt_answers": [ "british airways" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000397986.jpg", "model_name": "BLIP2" }, { "question": "What logo is in black print?", "answer": "person", "gt_answers": [ "BRIDGESTONE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000429266.jpg", "model_name": "BLIP2" }, { "question": "What sponsor is in red letters?", "answer": "person", "gt_answers": [ "cmra" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000429266.jpg", "model_name": "BLIP2" }, { "question": "What year is depicted in this image?", "answer": "2015", "gt_answers": [ "2013" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000409825.jpg", "model_name": "BLIP2" }, { "question": "What is the largest word on the red sign?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000140613.jpg", "model_name": "BLIP2" }, { "question": "For UTC how much is the private master suite per month?", "answer": "the private master suite per month is 0", "gt_answers": [ "$800" ], "image_path": "./data/STVQA/imageNet/n06785654_15323.JPEG", "model_name": "BLIP2" }, { "question": "Which number must be called in case of emergency?", "answer": "911", "gt_answers": [ "911" ], "image_path": "./data/STVQA/VisualGenome/1/2315483.jpg", "model_name": "BLIP2" }, { "question": "What brand of controller is being used?", "answer": "xbox 360", "gt_answers": [ "COX" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000469405.jpg", "model_name": "BLIP2" }, { "question": "WHICH TYPE OF VEHICLE TAKING ANOTHER VEHICLE?", "answer": "tow truck", "gt_answers": [ "RECOVERY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000071714.jpg", "model_name": "BLIP2" }, { "question": "What is written in front of bus ?", "answer": "face manila", "gt_answers": [ "Free mallride" ], "image_path": "./data/STVQA/VisualGenome/1/713594.jpg", "model_name": "BLIP2" }, { "question": "What brand of vehicle is the red vehicle?", "answer": "toyota", "gt_answers": [ "Toyota" ], "image_path": "./data/STVQA/VisualGenome/2/2408016.jpg", "model_name": "BLIP2" }, { "question": "what shop is written on the top left?", "answer": "h&m", "gt_answers": [ "Marks & Spencer" ], "image_path": "./data/STVQA/icdar/img_391.jpg", "model_name": "BLIP2" }, { "question": "What is the name printed on the red sign?", "answer": "person", "gt_answers": [ "Merrell" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000264241.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the novel?", "answer": "the pirates and the scientists", "gt_answers": [ "The Pirates! In an adventure with Scientists" ], "image_path": "./data/STVQA/imageNet/n07248320_4007.JPEG", "model_name": "BLIP2" }, { "question": "Who is the Author?", "answer": "gillian slade", "gt_answers": [ "Gideon Defoe" ], "image_path": "./data/STVQA/imageNet/n07248320_4007.JPEG", "model_name": "BLIP2" }, { "question": "What cat brand is shown on the tin top?", "answer": "hello kitty", "gt_answers": [ "HELLO KITTY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000395030.jpg", "model_name": "BLIP2" }, { "question": "What beverage is showing a label with the food?", "answer": "soda", "gt_answers": [ "JUICE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000395030.jpg", "model_name": "BLIP2" }, { "question": "What does his shirt say?", "answer": "i love tennis", "gt_answers": [ "Nike" ], "image_path": "./data/STVQA/VisualGenome/1/2323130.jpg", "model_name": "BLIP2" }, { "question": "What type of bird is this?", "answer": "sandgrouse", "gt_answers": [ "australian bustard" ], "image_path": "./data/STVQA/imageNet/n02018795_599.JPEG", "model_name": "BLIP2" }, { "question": "Who took the photograph?", "answer": "the australian birdlife society", "gt_answers": [ "Phil Gregory" ], "image_path": "./data/STVQA/imageNet/n02018795_599.JPEG", "model_name": "BLIP2" }, { "question": "What year was the photo taken?", "answer": "2003", "gt_answers": [ "2005" ], "image_path": "./data/STVQA/imageNet/n02018795_599.JPEG", "model_name": "BLIP2" }, { "question": "What is listed Largely on the side of the Truck?", "answer": "statewide", "gt_answers": [ "Statewide" ], "image_path": "./data/STVQA/VisualGenome/1/1160033.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the shop in the image?", "answer": "chanel", "gt_answers": [ "SPRIT" ], "image_path": "./data/STVQA/icdar/img_396.jpg", "model_name": "BLIP2" }, { "question": "What is one thing displayed in this picture?", "answer": "toilets", "gt_answers": [ "Toilet" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000286359.jpg", "model_name": "BLIP2" }, { "question": "What is on the bus display?", "answer": "a map", "gt_answers": [ "NYCT Bus" ], "image_path": "./data/STVQA/VisualGenome/1/1159361.jpg", "model_name": "BLIP2" }, { "question": "What kind of car is in the picture?", "answer": "bus", "gt_answers": [ "Jovial Car" ], "image_path": "./data/STVQA/VisualGenome/1/2370548.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say to stop doing?", "answer": "killing gays", "gt_answers": [ "Killing gays" ], "image_path": "./data/STVQA/VisualGenome/1/2326660.jpg", "model_name": "BLIP2" }, { "question": "How much off is posted on the sale sign?", "answer": "20%", "gt_answers": [ "50%.", "50%" ], "image_path": "./data/STVQA/icdar/test_img_175.jpg", "model_name": "BLIP2" }, { "question": "What are the letters on the insignia?", "answer": "london", "gt_answers": [ "ER" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000357696.jpg", "model_name": "BLIP2" }, { "question": "What is the title of the paper in the bottom right corner?", "answer": "''the b 52''", "gt_answers": [ "DOUGLAS A-3D SKYWARRIOR" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000425433.jpg", "model_name": "BLIP2" }, { "question": "What is the cloth used for?", "answer": "curtains", "gt_answers": [ "Shower curtain" ], "image_path": "./data/STVQA/imageNet/n04209239_10173.JPEG", "model_name": "BLIP2" }, { "question": "What is the brand name?", "answer": "splash", "gt_answers": [ "Splash", "Splash bath" ], "image_path": "./data/STVQA/imageNet/n04209239_10173.JPEG", "model_name": "BLIP2" }, { "question": "What fruit is the wine?", "answer": "blackberry", "gt_answers": [ "Blackberry" ], "image_path": "./data/STVQA/VisualGenome/1/2330417.jpg", "model_name": "BLIP2" }, { "question": "What are the two letter written in white on the side of the ramp?", "answer": "sk8", "gt_answers": [ "DV" ], "image_path": "./data/STVQA/VisualGenome/2/2402345.jpg", "model_name": "BLIP2" }, { "question": "What word comes after BODY?", "answer": "shop", "gt_answers": [ "SHOP" ], "image_path": "./data/STVQA/icdar/img_782.jpg", "model_name": "BLIP2" }, { "question": "What percentage off is this sale?", "answer": "50%", "gt_answers": [ "40%", "40" ], "image_path": "./data/STVQA/icdar/img_782.jpg", "model_name": "BLIP2" }, { "question": "What word is listed twice?", "answer": "sea", "gt_answers": [ "SEA" ], "image_path": "./data/STVQA/VisualGenome/2/2408188.jpg", "model_name": "BLIP2" }, { "question": "What is the word that begins with \"M\"?", "answer": "mine", "gt_answers": [ "MIST" ], "image_path": "./data/STVQA/VisualGenome/2/2408188.jpg", "model_name": "BLIP2" }, { "question": "What is written on the side of the black car?", "answer": "sheriff", "gt_answers": [ "Sheriff" ], "image_path": "./data/STVQA/imageNet/n03977966_34862.JPEG", "model_name": "BLIP2" }, { "question": "How much does the biscotti cost?", "answer": "$2 each", "gt_answers": [ "0.6" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000138025.jpg", "model_name": "BLIP2" }, { "question": "How much does the mini krapfen cost?", "answer": "it's free", "gt_answers": [ "0.6" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000138025.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store shown?", "answer": "h&m", "gt_answers": [ "swatch" ], "image_path": "./data/STVQA/icdar/test_img_220.jpg", "model_name": "BLIP2" }, { "question": "What is the country name in his shirt?", "answer": "australia", "gt_answers": [ "Columbia" ], "image_path": "./data/STVQA/VisualGenome/1/2341148.jpg", "model_name": "BLIP2" }, { "question": "how many year?", "answer": "two", "gt_answers": [ "10 year" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000494297.jpg", "model_name": "BLIP2" }, { "question": "What phone number is listed on the box?", "answer": "the number is listed on the box", "gt_answers": [ "650-368-5908" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000420363.jpg", "model_name": "BLIP2" }, { "question": "What road is Wegman's Nursery on?", "answer": "wegman road", "gt_answers": [ "Woodside Road" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000420363.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the box above the web address?", "answer": "daffodils in pots", "gt_answers": [ "Open 7 Days a Week" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000420363.jpg", "model_name": "BLIP2" }, { "question": "What is the second word on the sign to the left of the image?", "answer": "highway", "gt_answers": [ "CAI" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000315348.jpg", "model_name": "BLIP2" }, { "question": "What street name does the signpost say?", "answer": "king street", "gt_answers": [ "HAIGHT" ], "image_path": "./data/STVQA/VisualGenome/1/2337658.jpg", "model_name": "BLIP2" }, { "question": "What is the country on the orange sign?", "answer": "south africa", "gt_answers": [ "Malaysia" ], "image_path": "./data/STVQA/VisualGenome/1/2329534.jpg", "model_name": "BLIP2" }, { "question": "What is written at the top of the vehicle?", "answer": "school bus", "gt_answers": [ "School Bus" ], "image_path": "./data/STVQA/IIIT_text/img_001122.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the bus company?", "answer": "scottish citylink", "gt_answers": [ "Arriva" ], "image_path": "./data/STVQA/VisualGenome/2/2407415.jpg", "model_name": "BLIP2" }, { "question": "What does the big sign with white words and red background say?", "answer": "sale", "gt_answers": [ "Sale" ], "image_path": "./data/STVQA/icdar/img_390.jpg", "model_name": "BLIP2" }, { "question": "What is title associated with Gloucester?", "answer": "city of gloucester", "gt_answers": [ "Duke" ], "image_path": "./data/STVQA/VisualGenome/1/2362172.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street?", "answer": "henry street", "gt_answers": [ "S. HENRY" ], "image_path": "./data/STVQA/VisualGenome/1/2362172.jpg", "model_name": "BLIP2" }, { "question": "What are the two letters on the red sign?", "answer": "bc", "gt_answers": [ "DQ" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000442326.jpg", "model_name": "BLIP2" }, { "question": "What country is printed in the top right corner?", "answer": "australia", "gt_answers": [ "WALES" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000278229.jpg", "model_name": "BLIP2" }, { "question": "Which words are place names?", "answer": "london london", "gt_answers": [ "CYMRU NY MUNCHEN BAYERN WALES", "BAYERN WALES NY MUNCHEN CYMRU" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000278229.jpg", "model_name": "BLIP2" }, { "question": "What city is printed on the square white sticker?", "answer": "rome", "gt_answers": [ "NY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000278229.jpg", "model_name": "BLIP2" }, { "question": "How many rolls come in a package?", "answer": "12", "gt_answers": [ "6" ], "image_path": "./data/STVQA/imageNet/n15075141_13342.JPEG", "model_name": "BLIP2" }, { "question": "What is the underlined white words?", "answer": "pizza", "gt_answers": [ "Brick Oven Pizza" ], "image_path": "./data/STVQA/VisualGenome/1/2357756.jpg", "model_name": "BLIP2" }, { "question": "what is written in bold capitals on the top 2 lines?", "answer": "toilet and sink", "gt_answers": [ "DIRECTIONS FOR OPERATING BEFORE USING" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000250607.jpg", "model_name": "BLIP2" }, { "question": "What herb is on the Parmesan bagel?", "answer": "basil", "gt_answers": [ "Oregano" ], "image_path": "./data/STVQA/VisualGenome/1/2329482.jpg", "model_name": "BLIP2" }, { "question": "What is written left side corner", "answer": "a frisbee", "gt_answers": [ "gonzalo salas" ], "image_path": "./data/STVQA/VisualGenome/1/2342132.jpg", "model_name": "BLIP2" }, { "question": "What is being advertised on the far left?", "answer": "beer", "gt_answers": [ "coffee" ], "image_path": "./data/STVQA/VisualGenome/1/2340256.jpg", "model_name": "BLIP2" }, { "question": "What does the watermark in the bottom right corner of the picture say?", "answer": "wwii aircraft", "gt_answers": [ "Desrosiers Photo" ], "image_path": "./data/STVQA/VisualGenome/1/2346561.jpg", "model_name": "BLIP2" }, { "question": "What does the bottom of the snowboard say?", "answer": "person", "gt_answers": [ "Ride", "RIDE DH" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000407178.jpg", "model_name": "BLIP2" }, { "question": "Where is this traffic light located?", "answer": "wall street", "gt_answers": [ "Wall St" ], "image_path": "./data/STVQA/VisualGenome/1/2336393.jpg", "model_name": "BLIP2" }, { "question": "What is the word surrounded by red", "answer": "stop", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000563233.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the green motorcycle", "answer": "two", "gt_answers": [ "383" ], "image_path": "./data/STVQA/VisualGenome/1/2327718.jpg", "model_name": "BLIP2" }, { "question": "What word can you see three (3) times?", "answer": "bird", "gt_answers": [ "rim rim rim", "rim" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000260050.jpg", "model_name": "BLIP2" }, { "question": "What store is on the right?", "answer": "subway", "gt_answers": [ "Subway" ], "image_path": "./data/STVQA/icdar/test_img_69.jpg", "model_name": "BLIP2" }, { "question": "What restaurant is depicted in yellow and white text?", "answer": "subway", "gt_answers": [ "Subway" ], "image_path": "./data/STVQA/icdar/test_img_69.jpg", "model_name": "BLIP2" }, { "question": "What kind of key is needed?", "answer": "a product key is needed", "gt_answers": [ "Drive Encryption" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000015970.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name printed on the sign and the two spice bottles in the background?", "answer": "person", "gt_answers": [ "PLANET DAILIES" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000414571.jpg", "model_name": "BLIP2" }, { "question": "What is printed on the lower left corner of the placemat beneath the plate?", "answer": "the name of the restaurant", "gt_answers": [ "BEACH BESTS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000414571.jpg", "model_name": "BLIP2" }, { "question": "What is printed within a red background on the lower right corner of the placemat?", "answer": "person's grill", "gt_answers": [ "TUR" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000414571.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "public market farmers", "gt_answers": [ "Public Market Farmers" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000538407.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the caera in the image?", "answer": "nikon", "gt_answers": [ "Pentax" ], "image_path": "./data/STVQA/imageNet/n04069434_12300.JPEG", "model_name": "BLIP2" }, { "question": "What sport is he playing", "answer": "tennis", "gt_answers": [ "Tennis" ], "image_path": "./data/STVQA/VisualGenome/2/2409431.jpg", "model_name": "BLIP2" }, { "question": "What does the match timer say", "answer": "the match timer shows the time of the match", "gt_answers": [ "0:58" ], "image_path": "./data/STVQA/VisualGenome/2/2409431.jpg", "model_name": "BLIP2" }, { "question": "How many towels do you see?", "answer": "two", "gt_answers": [ "one" ], "image_path": "./data/STVQA/VisualGenome/2/2409431.jpg", "model_name": "BLIP2" }, { "question": "What word in red is on the bottom right of the picture?", "answer": "person", "gt_answers": [ "superseded" ], "image_path": "./data/STVQA/IIIT_text/1884.jpg", "model_name": "BLIP2" }, { "question": "What is written on the shirt of the man in red?", "answer": "san francisco", "gt_answers": [ "Dr Pepper" ], "image_path": "./data/STVQA/VisualGenome/1/285958.jpg", "model_name": "BLIP2" }, { "question": "What number is printed over the doorway?", "answer": "3", "gt_answers": [ "206" ], "image_path": "./data/STVQA/VisualGenome/1/285958.jpg", "model_name": "BLIP2" }, { "question": "Who is the writer of the book The Counterfeit Guest that is shown in the photo?", "answer": "person", "gt_answers": [ "Rose Melikan" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000144812.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the book written by Nick McDonell that is shown>", "answer": "we are beginning our desert", "gt_answers": [ "Twelve" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000144812.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the book written by James Meek that is shown?", "answer": "we are beginning our desert", "gt_answers": [ "We are Now Beginning Our Descent" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000144812.jpg", "model_name": "BLIP2" }, { "question": "How many miles is liverpool?", "answer": "100 miles", "gt_answers": [ "302" ], "image_path": "./data/STVQA/IIIT_text/3047.jpg", "model_name": "BLIP2" }, { "question": "What word is above ONTARIO?", "answer": "street", "gt_answers": [ "PLACE" ], "image_path": "./data/STVQA/VisualGenome/1/2341602.jpg", "model_name": "BLIP2" }, { "question": "What word is below RUE?", "answer": "rue", "gt_answers": [ "DU MUSEE" ], "image_path": "./data/STVQA/VisualGenome/1/2341602.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE NAME OF THIS COUNTRY?", "answer": "india", "gt_answers": [ "SINGAPORE" ], "image_path": "./data/STVQA/icdar/img_191.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTEN IN THIS ADVERTISEMENT BOARD?", "answer": "a movie poster", "gt_answers": [ "ULTIMATE WILDLIFE EXPERIENCE" ], "image_path": "./data/STVQA/icdar/img_191.jpg", "model_name": "BLIP2" }, { "question": "What is the team name on the uniform?", "answer": "philadelphia phillies", "gt_answers": [ "Jays" ], "image_path": "./data/STVQA/VisualGenome/1/2360492.jpg", "model_name": "BLIP2" }, { "question": "What are the sponsors' letters in the light green box?", "answer": "mlb", "gt_answers": [ "TD" ], "image_path": "./data/STVQA/VisualGenome/1/2360492.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "m15", "gt_answers": [ "6658" ], "image_path": "./data/STVQA/VisualGenome/2/2402460.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the cans that the men are standing on?", "answer": "red bull", "gt_answers": [ "Red Bull" ], "image_path": "./data/STVQA/VisualGenome/1/2354271.jpg", "model_name": "BLIP2" }, { "question": "What is the speed limit?", "answer": "20", "gt_answers": [ "20" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000497608.jpg", "model_name": "BLIP2" }, { "question": "WHAT TYPE OF SPICE IS IT", "answer": "ground ginger", "gt_answers": [ "GROUND GINGER", "GINGER" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008222.jpg", "model_name": "BLIP2" }, { "question": "WHICH BRAND IS IT", "answer": "ground ginger", "gt_answers": [ "KROGER" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008222.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE NET WT", "answer": "ground ginger 1 oz", "gt_answers": [ "1.5 OZ AND 42 G", "1.5 OZ" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008222.jpg", "model_name": "BLIP2" }, { "question": "How much of a discount is offered here?", "answer": "50%", "gt_answers": [ "Up to 50% off" ], "image_path": "./data/STVQA/icdar/test_img_221.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the boat?", "answer": "tokyo express", "gt_answers": [ "Tokyo Express" ], "image_path": "./data/STVQA/imageNet/n03095699_10479.JPEG", "model_name": "BLIP2" }, { "question": "What is written in white capital letters on the front of the boat?", "answer": "express", "gt_answers": [ "Tokyo Express" ], "image_path": "./data/STVQA/imageNet/n03095699_10479.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the building?", "answer": "san francisco", "gt_answers": [ "fiori" ], "image_path": "./data/STVQA/VisualGenome/2/963.jpg", "model_name": "BLIP2" }, { "question": "What is the four-letter word on the blue signs?", "answer": "person", "gt_answers": [ "BOWL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000079152.jpg", "model_name": "BLIP2" }, { "question": "What does the first chart on the left say at the top?", "answer": "the number of houses built in a year", "gt_answers": [ "perkalian 1-200" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000055874.jpg", "model_name": "BLIP2" }, { "question": "What is written on the licence plate?", "answer": "fair cars", "gt_answers": [ "Volkswagen" ], "image_path": "./data/STVQA/VisualGenome/1/2342109.jpg", "model_name": "BLIP2" }, { "question": "What is written on the blue part of the sign?", "answer": "aracha", "gt_answers": [ "A-1" ], "image_path": "./data/STVQA/VisualGenome/1/2341447.jpg", "model_name": "BLIP2" }, { "question": "What number is the train?", "answer": "0", "gt_answers": [ "41" ], "image_path": "./data/STVQA/VisualGenome/1/2332201.jpg", "model_name": "BLIP2" }, { "question": "What is the first letter of the Los Angeles catcher's name?", "answer": "person", "gt_answers": [ "J." ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000163553.jpg", "model_name": "BLIP2" }, { "question": "Who is the sponsor on the back wall?", "answer": "person", "gt_answers": [ "TJ Maxx", "T.J. MAXX" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000498082.jpg", "model_name": "BLIP2" }, { "question": "What is written in white on the bus?", "answer": "person", "gt_answers": [ "MAX MILLER" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000158036.jpg", "model_name": "BLIP2" }, { "question": "What is written on the front of the blue bus?", "answer": "toyota", "gt_answers": [ "Higer" ], "image_path": "./data/STVQA/VisualGenome/2/2406967.jpg", "model_name": "BLIP2" }, { "question": "What store sells vries shirts?", "answer": "vries shop", "gt_answers": [ "vries" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000565360.jpg", "model_name": "BLIP2" }, { "question": "What is the magazine name on the table?", "answer": "health", "gt_answers": [ "Metro" ], "image_path": "./data/STVQA/VisualGenome/2/2006.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the manufacturing company that made this truck?", "answer": "isuzu", "gt_answers": [ "Isuzu" ], "image_path": "./data/STVQA/imageNet/n03796401_5310.JPEG", "model_name": "BLIP2" }, { "question": "what capital letters are written on the purple sign?", "answer": "person", "gt_answers": [ "nyu" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000224959.jpg", "model_name": "BLIP2" }, { "question": "What does the red and white sign on the right read?", "answer": "no parking", "gt_answers": [ "NO STANDING ANYTIME" ], "image_path": "./data/STVQA/VisualGenome/1/1160130.jpg", "model_name": "BLIP2" }, { "question": "What is written in front of the train?", "answer": "nyc", "gt_answers": [ "OBB" ], "image_path": "./data/STVQA/VisualGenome/1/2371066.jpg", "model_name": "BLIP2" }, { "question": "what is written on the display window?", "answer": "ice cream", "gt_answers": [ "THE NEW EXPLOSIVE" ], "image_path": "./data/STVQA/icdar/img_528.jpg", "model_name": "BLIP2" }, { "question": "What time is shown for the train leaving to Uitgeest?", "answer": "the time shown for the train leaving to uitgeest", "gt_answers": [ "12:47" ], "image_path": "./data/STVQA/VisualGenome/1/2328542.jpg", "model_name": "BLIP2" }, { "question": "What is the platform number?", "answer": "4", "gt_answers": [ "8a" ], "image_path": "./data/STVQA/VisualGenome/1/2328542.jpg", "model_name": "BLIP2" }, { "question": "where is the final destination of the train?", "answer": "new york city", "gt_answers": [ "Uitgeest" ], "image_path": "./data/STVQA/VisualGenome/1/2328542.jpg", "model_name": "BLIP2" }, { "question": "What does the left neon sign say?", "answer": "open", "gt_answers": [ "OPEN" ], "image_path": "./data/STVQA/VisualGenome/1/2349620.jpg", "model_name": "BLIP2" }, { "question": "What year is listed as the copyright?", "answer": "2010", "gt_answers": [ "2010" ], "image_path": "./data/STVQA/imageNet/n01632458_11130.JPEG", "model_name": "BLIP2" }, { "question": "Who submitted this picture?", "answer": "daniel", "gt_answers": [ "Michael F. Benard" ], "image_path": "./data/STVQA/imageNet/n01632458_11130.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of this airline?", "answer": "klm", "gt_answers": [ "KLM ASIA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000221532.jpg", "model_name": "BLIP2" }, { "question": "What is the number of this train?", "answer": "no number", "gt_answers": [ "2893" ], "image_path": "./data/STVQA/VisualGenome/1/2372076.jpg", "model_name": "BLIP2" }, { "question": "What model number is shown after the word \"Vocalist\"?", "answer": "yamaha dsp vocalist", "gt_answers": [ "VL3D" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000018207.jpg", "model_name": "BLIP2" }, { "question": "What is written on the red sign?", "answer": "railway street", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000195860.jpg", "model_name": "BLIP2" }, { "question": "What word is on the dog's harness?", "answer": "ski", "gt_answers": [ "Mini" ], "image_path": "./data/STVQA/imageNet/n02109525_37925.JPEG", "model_name": "BLIP2" }, { "question": "What year is on the bottle?", "answer": "2012", "gt_answers": [ "2010" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000204529.jpg", "model_name": "BLIP2" }, { "question": "What word comes after pinot on the bottle?", "answer": "person", "gt_answers": [ "BLANC" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000204529.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store shown?", "answer": "sephora", "gt_answers": [ "Sephora" ], "image_path": "./data/STVQA/icdar/img_804.jpg", "model_name": "BLIP2" }, { "question": "Whats the name of the Kiosk?", "answer": "american express", "gt_answers": [ "American Express" ], "image_path": "./data/STVQA/icdar/img_783.jpg", "model_name": "BLIP2" }, { "question": "What is the number shown on the back of the batter's jersey?", "answer": "person", "gt_answers": [ "9" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000040995.jpg", "model_name": "BLIP2" }, { "question": "What is the name shown on the back of the batter's jersey?", "answer": "person", "gt_answers": [ "johnson" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000040995.jpg", "model_name": "BLIP2" }, { "question": "Where is this photo from?", "answer": "australia", "gt_answers": [ "dreamstime.com" ], "image_path": "./data/STVQA/imageNet/n03935335_13903.JPEG", "model_name": "BLIP2" }, { "question": "How many people are playing frisbee?", "answer": "two", "gt_answers": [ "Three", "THREE PEOPLE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000142229.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the organization on the sign?", "answer": "fortuna mountain rescue team", "gt_answers": [ "BORDER ROADS" ], "image_path": "./data/STVQA/IIIT_text/7777.jpg", "model_name": "BLIP2" }, { "question": "Who made this laptop?", "answer": "dell", "gt_answers": [ "Dell" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016040.jpg", "model_name": "BLIP2" }, { "question": "What is the first letter on the screen?", "answer": "t", "gt_answers": [ "P" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016040.jpg", "model_name": "BLIP2" }, { "question": "When can you stop here?", "answer": "when the light turns green", "gt_answers": [ "NO STOPPING ANYTIME" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000010566.jpg", "model_name": "BLIP2" }, { "question": "What is the queens name that is on this boat?", "answer": "victoria", "gt_answers": [ "QUEEN ELIZABETH" ], "image_path": "./data/STVQA/VisualGenome/1/2355707.jpg", "model_name": "BLIP2" }, { "question": "what popular internet search engine is advertised here", "answer": "google", "gt_answers": [ "YAHOO!" ], "image_path": "./data/STVQA/VisualGenome/2/2411346.jpg", "model_name": "BLIP2" }, { "question": "What is the black text on the license plate?", "answer": "les b\u00eates", "gt_answers": [ "CL 637 AP" ], "image_path": "./data/STVQA/IIIT_text/4146.jpg", "model_name": "BLIP2" }, { "question": "What kind of soda does this machine vend?", "answer": "coca cola", "gt_answers": [ "coke" ], "image_path": "./data/STVQA/imageNet/n04525305_10428.JPEG", "model_name": "BLIP2" }, { "question": "What is the jersey number of the player to the left?", "answer": "person", "gt_answers": [ "32" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000528117.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the front of the jersey on the player to the right?", "answer": "person", "gt_answers": [ "orioles" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000528117.jpg", "model_name": "BLIP2" }, { "question": "What is the name printed on the back of the player to the left's jersey?", "answer": "person", "gt_answers": [ "KIETERS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000528117.jpg", "model_name": "BLIP2" }, { "question": "what is the city name in image?", "answer": "miami", "gt_answers": [ "sydney" ], "image_path": "./data/STVQA/VisualGenome/1/2359419.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "x1", "gt_answers": [ "2" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000290168.jpg", "model_name": "BLIP2" }, { "question": "What two readable letters are being reflected in his glasses?", "answer": "donuts", "gt_answers": [ "LU" ], "image_path": "./data/STVQA/VisualGenome/2/2400931.jpg", "model_name": "BLIP2" }, { "question": "What website address is in the image?", "answer": "person", "gt_answers": [ "mhbaker.com" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000208907.jpg", "model_name": "BLIP2" }, { "question": "Who took the picture?", "answer": "person", "gt_answers": [ "Michael R. Hicks" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000343857.jpg", "model_name": "BLIP2" }, { "question": "What number is on the bottom sign?", "answer": "3", "gt_answers": [ "2" ], "image_path": "./data/STVQA/VisualGenome/2/2408075.jpg", "model_name": "BLIP2" }, { "question": "What city is on the sign?", "answer": "hong kong", "gt_answers": [ "Hong Kong" ], "image_path": "./data/STVQA/VisualGenome/1/2369365.jpg", "model_name": "BLIP2" }, { "question": "what is the sign of 1st board refer", "answer": "1st board refer is the sign of 1st board refer", "gt_answers": [ "firehol", "FIREHOU" ], "image_path": "./data/STVQA/VisualGenome/1/2352662.jpg", "model_name": "BLIP2" }, { "question": "what is the sign of 2nd board refer", "answer": "2nd board refer is the sign of 2nd board refer", "gt_answers": [ "inspiration" ], "image_path": "./data/STVQA/VisualGenome/1/2352662.jpg", "model_name": "BLIP2" }, { "question": "What is the company of the train?", "answer": "amtrak", "gt_answers": [ "Javelin" ], "image_path": "./data/STVQA/VisualGenome/1/2376799.jpg", "model_name": "BLIP2" }, { "question": "What word is located below three triangles red, whit and blue?", "answer": "baseball", "gt_answers": [ "Majestic" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000336077.jpg", "model_name": "BLIP2" }, { "question": "What number is on the batter's jersey?", "answer": "2", "gt_answers": [ "5" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000336077.jpg", "model_name": "BLIP2" }, { "question": "What word can be found on the wall in the bull pit, above the photographer with the white camera?", "answer": "bull", "gt_answers": [ "new" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000336077.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number?", "answer": "tah", "gt_answers": [ "W302 NTX" ], "image_path": "./data/STVQA/VisualGenome/2/2402601.jpg", "model_name": "BLIP2" }, { "question": "What airline is this plane for?", "answer": "ryanair", "gt_answers": [ "Ryanair" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000432218.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the school bus?", "answer": "the number of the bus", "gt_answers": [ "21" ], "image_path": "./data/STVQA/IIIT_text/img_001089.jpg", "model_name": "BLIP2" }, { "question": "What is the breed of dog that is shown in the photo?", "answer": "jack russell terrier", "gt_answers": [ "Jack Russell Terrier", "TERRIER" ], "image_path": "./data/STVQA/VisualGenome/1/2335241.jpg", "model_name": "BLIP2" }, { "question": "What is the average life expectancy for this breed of dog?", "answer": "8 to 10 years", "gt_answers": [ "13-24 years", "13-14" ], "image_path": "./data/STVQA/VisualGenome/1/2335241.jpg", "model_name": "BLIP2" }, { "question": "What sport is he practicing?", "answer": "tennis", "gt_answers": [ "Tennis" ], "image_path": "./data/STVQA/VisualGenome/1/2368613.jpg", "model_name": "BLIP2" }, { "question": "What colour is he wearing?", "answer": "white", "gt_answers": [ "White" ], "image_path": "./data/STVQA/VisualGenome/1/2368613.jpg", "model_name": "BLIP2" }, { "question": "What type of sauce is the jar?", "answer": "tomato sauce", "gt_answers": [ "Vegetarian" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029263.jpg", "model_name": "BLIP2" }, { "question": "What is with the sauce?", "answer": "tomato sauce", "gt_answers": [ "Soy" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029263.jpg", "model_name": "BLIP2" }, { "question": "What is the net weight in g?", "answer": "450 g", "gt_answers": [ "680", "680G" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029263.jpg", "model_name": "BLIP2" }, { "question": "Do the letters JSD appear on this image?", "answer": "no", "gt_answers": [ "JSD" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000092197.jpg", "model_name": "BLIP2" }, { "question": "How much do blood orange doughnuts cost?", "answer": "$2 each", "gt_answers": [ "$2.00" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000337555.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the place that made this wine?", "answer": "becker vineyard", "gt_answers": [ "BECKER VINEYARDS" ], "image_path": "./data/STVQA/VisualGenome/2/2409865.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate of the bus?", "answer": "dmc 005", "gt_answers": [ "X59IUSC" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000060458.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate of the bus?", "answer": "dmc 005", "gt_answers": [ "X59IUSC" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000060458.jpg", "model_name": "BLIP2" }, { "question": "What kind of dog is this?", "answer": "basset hound", "gt_answers": [ "Basset Hound" ], "image_path": "./data/STVQA/imageNet/n02088238_8110.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the fruit?", "answer": "banana", "gt_answers": [ "Banana" ], "image_path": "./data/STVQA/VisualGenome/1/2348731.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the spread in the container?", "answer": "nutella", "gt_answers": [ "Nutella" ], "image_path": "./data/STVQA/VisualGenome/1/2348731.jpg", "model_name": "BLIP2" }, { "question": "What does the sticker on the front of the truck say?", "answer": "i am a soldier", "gt_answers": [ "Lucky Lizard" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000295657.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company owning this plane", "answer": "united airlines", "gt_answers": [ "UNITED" ], "image_path": "./data/STVQA/VisualGenome/1/2344305.jpg", "model_name": "BLIP2" }, { "question": "What is the website advertised?", "answer": "tennis", "gt_answers": [ "usopen.org" ], "image_path": "./data/STVQA/VisualGenome/1/2361452.jpg", "model_name": "BLIP2" }, { "question": "What brand is advertised behind the player?", "answer": "adidas", "gt_answers": [ "OLYMPUS" ], "image_path": "./data/STVQA/VisualGenome/1/2361452.jpg", "model_name": "BLIP2" }, { "question": "What country is this plate from?", "answer": "mexico", "gt_answers": [ "Mexico" ], "image_path": "./data/STVQA/IIIT_text/4524.jpg", "model_name": "BLIP2" }, { "question": "What is the Licence plate number?", "answer": "rl 9", "gt_answers": [ "508 RL 9", "508 RL-9" ], "image_path": "./data/STVQA/IIIT_text/4524.jpg", "model_name": "BLIP2" }, { "question": "What is displayed in this picture?", "answer": "sandwich", "gt_answers": [ "sandwich" ], "image_path": "./data/STVQA/VisualGenome/1/2339438.jpg", "model_name": "BLIP2" }, { "question": "Which direction can cars not turn?", "answer": "right", "gt_answers": [ "Right", "right turn" ], "image_path": "./data/STVQA/VisualGenome/1/2325024.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTEN ON THE TAIL OF THE WHITE PLANE?", "answer": "qantas", "gt_answers": [ "KLM" ], "image_path": "./data/STVQA/VisualGenome/1/2369343.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTEN ON THE RED TAIL OF THE SECOND PLANE?", "answer": "qantas", "gt_answers": [ "VIRGIN" ], "image_path": "./data/STVQA/VisualGenome/1/2369343.jpg", "model_name": "BLIP2" }, { "question": "What is this brand?", "answer": "r e d", "gt_answers": [ "beb", "bcb" ], "image_path": "./data/STVQA/imageNet/n03690938_793.JPEG", "model_name": "BLIP2" }, { "question": "What number is on the front of the train?", "answer": "1", "gt_answers": [ "73" ], "image_path": "./data/STVQA/VisualGenome/2/2410729.jpg", "model_name": "BLIP2" }, { "question": "What type of resturant is behind them?", "answer": "fast food", "gt_answers": [ "Steakhouse", "STEAKHOUSE CHICAGO" ], "image_path": "./data/STVQA/VisualGenome/1/1159291.jpg", "model_name": "BLIP2" }, { "question": "What word is printed on the green shirt?", "answer": "peace", "gt_answers": [ "FROM" ], "image_path": "./data/STVQA/VisualGenome/1/2369295.jpg", "model_name": "BLIP2" }, { "question": "What number is on the jersey?", "answer": "2", "gt_answers": [ "99" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000065518.jpg", "model_name": "BLIP2" }, { "question": "What animal is this?", "answer": "eastern diamondback rattlesnake", "gt_answers": [ "Eastern diamondback rattle snake" ], "image_path": "./data/STVQA/imageNet/n01755581_12982.JPEG", "model_name": "BLIP2" }, { "question": "What company made the accordian?", "answer": "hohner", "gt_answers": [ "Hohner" ], "image_path": "./data/STVQA/imageNet/n02672831_17131.JPEG", "model_name": "BLIP2" }, { "question": "What year is the picture copyright?", "answer": "2010", "gt_answers": [ "2010" ], "image_path": "./data/STVQA/VisualGenome/1/2338128.jpg", "model_name": "BLIP2" }, { "question": "What brand of camera is being used?", "answer": "nikon", "gt_answers": [ "Nikon" ], "image_path": "./data/STVQA/imageNet/n03314780_12192.JPEG", "model_name": "BLIP2" }, { "question": "What is the players number?", "answer": "23", "gt_answers": [ "34" ], "image_path": "./data/STVQA/VisualGenome/1/2371802.jpg", "model_name": "BLIP2" }, { "question": "What team is the pitcher with?", "answer": "mets", "gt_answers": [ "Mets" ], "image_path": "./data/STVQA/VisualGenome/1/2371802.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of this product?", "answer": "crock pot", "gt_answers": [ "Crock-Pot", "Crock Pot" ], "image_path": "./data/STVQA/imageNet/n03133878_193.JPEG", "model_name": "BLIP2" }, { "question": "What is written below the logo?", "answer": "crock pot", "gt_answers": [ "Little Dipper" ], "image_path": "./data/STVQA/imageNet/n03133878_193.JPEG", "model_name": "BLIP2" }, { "question": "What is the player's jersey number?", "answer": "person", "gt_answers": [ "38" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000539167.jpg", "model_name": "BLIP2" }, { "question": "What number is on his shirt?", "answer": "person", "gt_answers": [ "38" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000539167.jpg", "model_name": "BLIP2" }, { "question": "What time was this photo take?", "answer": "at night", "gt_answers": [ "8:47 am", "8:47" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000014653.jpg", "model_name": "BLIP2" }, { "question": "Was this photo taken during am or pm?", "answer": "am", "gt_answers": [ "am" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000014653.jpg", "model_name": "BLIP2" }, { "question": "What does the guy's shirt say?", "answer": "he is a snowboarder", "gt_answers": [ "devil inside" ], "image_path": "./data/STVQA/VisualGenome/1/2340607.jpg", "model_name": "BLIP2" }, { "question": "What text is on the building sign?", "answer": "morgan records", "gt_answers": [ "Mother Redcaps" ], "image_path": "./data/STVQA/IIIT_text/3453.jpg", "model_name": "BLIP2" }, { "question": "What is the destination of bus route 450?", "answer": "san francisco international airport", "gt_answers": [ "ARTESIA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000001720.jpg", "model_name": "BLIP2" }, { "question": "What is the measurement unit?", "answer": "cm", "gt_answers": [ "Inches" ], "image_path": "./data/STVQA/imageNet/n04118776_34781.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the vegetable that has its price listed on the lower shelf?", "answer": "carrots", "gt_answers": [ "CARROTS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000277788.jpg", "model_name": "BLIP2" }, { "question": "What should you watch?", "answer": "your back", "gt_answers": [ "your back" ], "image_path": "./data/STVQA/imageNet/n03595614_32397.JPEG", "model_name": "BLIP2" }, { "question": "What is sweet?", "answer": "revenge", "gt_answers": [ "revenge" ], "image_path": "./data/STVQA/imageNet/n03595614_32397.JPEG", "model_name": "BLIP2" }, { "question": "what is written on the side of the bus?", "answer": "person", "gt_answers": [ "Ricksha Bus" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000455333.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street?", "answer": "yankee way", "gt_answers": [ "Yawkey Way" ], "image_path": "./data/STVQA/VisualGenome/1/2359343.jpg", "model_name": "BLIP2" }, { "question": "What sport is being played in the image?", "answer": "baseball", "gt_answers": [ "Baseball" ], "image_path": "./data/STVQA/VisualGenome/2/2405007.jpg", "model_name": "BLIP2" }, { "question": "What team in the player with the bat playing for?", "answer": "texas rangers", "gt_answers": [ "Texas" ], "image_path": "./data/STVQA/VisualGenome/2/2405007.jpg", "model_name": "BLIP2" }, { "question": "What does the red portion of the sign say?", "answer": "nyc subway station", "gt_answers": [ "exit" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000254081.jpg", "model_name": "BLIP2" }, { "question": "What street is this subway stop for?", "answer": "avenue of the Americas", "gt_answers": [ "25 street" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000254081.jpg", "model_name": "BLIP2" }, { "question": "What avenue is this subway stop for?", "answer": "avenue of the Americas", "gt_answers": [ "8 avenue" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000254081.jpg", "model_name": "BLIP2" }, { "question": "What is available in the office?", "answer": "there is a kitchen in the office", "gt_answers": [ "OVEN TRAYS DISHES" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000365927.jpg", "model_name": "BLIP2" }, { "question": "What word does the bus sign say below the icon of the bus?", "answer": "bus", "gt_answers": [ "Terminal" ], "image_path": "./data/STVQA/IIIT_text/4907.jpg", "model_name": "BLIP2" }, { "question": "What the name of the restaurant", "answer": "person", "gt_answers": [ "DOUGHNUT plant" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000474550.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say in the top of the middle image?", "answer": "nokia", "gt_answers": [ "The Amazing Everyday" ], "image_path": "./data/STVQA/IIIT_text/img_000876.jpg", "model_name": "BLIP2" }, { "question": "How many persons appear in the picture?", "answer": "two", "gt_answers": [ "2" ], "image_path": "./data/STVQA/imageNet/n02403003_5442.JPEG", "model_name": "BLIP2" }, { "question": "How many animals are pulling the cart?", "answer": "two", "gt_answers": [ "2" ], "image_path": "./data/STVQA/imageNet/n02403003_5442.JPEG", "model_name": "BLIP2" }, { "question": "What does the octagonal red sign say?", "answer": "st mary's church", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000395007.jpg", "model_name": "BLIP2" }, { "question": "what video game console is being played? A: Wii", "answer": "a wii", "gt_answers": [ "Wii" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000452806.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the top sign?", "answer": "thomas path", "gt_answers": [ "Museum" ], "image_path": "./data/STVQA/VisualGenome/1/2345669.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the sign?", "answer": "person", "gt_answers": [ "TOWING" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000463859.jpg", "model_name": "BLIP2" }, { "question": "What is written on the sign", "answer": "person", "gt_answers": [ "Ski patrol" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000071122.jpg", "model_name": "BLIP2" }, { "question": "What airline does this plane belong to?", "answer": "person", "gt_answers": [ "KLM" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000072895.jpg", "model_name": "BLIP2" }, { "question": "What letters are written on the tail of the plane?", "answer": "klm", "gt_answers": [ "KLM" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000072895.jpg", "model_name": "BLIP2" }, { "question": "What letters represent on the aircraft represent Koninklijke Luchtvaart Maatschappij airline?", "answer": "klm", "gt_answers": [ "KLM" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000072895.jpg", "model_name": "BLIP2" }, { "question": "What does the sign above the stove say?", "answer": "texas", "gt_answers": [ "Aggie Xing" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000357898.jpg", "model_name": "BLIP2" }, { "question": "What does this man's hat say?", "answer": "i am a man", "gt_answers": [ "Milwaukee" ], "image_path": "./data/STVQA/VisualGenome/1/286062.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the black boarder of the photo booth?", "answer": "photo booth", "gt_answers": [ "Photosmart Studio" ], "image_path": "./data/STVQA/VisualGenome/2/1245.jpg", "model_name": "BLIP2" }, { "question": "what does the black sticker say.", "answer": "bike and destroy", "gt_answers": [ "Bike and Destroy" ], "image_path": "./data/STVQA/VisualGenome/2/2401065.jpg", "model_name": "BLIP2" }, { "question": "What does the top sign say?", "answer": "winchester avenue", "gt_answers": [ "n main st" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000026266.jpg", "model_name": "BLIP2" }, { "question": "What does the bottom sign say?", "answer": "winchester avenue", "gt_answers": [ "winchester ave" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000026266.jpg", "model_name": "BLIP2" }, { "question": "Which sign says 'ave'?", "answer": "winchester ave", "gt_answers": [ "winchester" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000026266.jpg", "model_name": "BLIP2" }, { "question": "What is written on the blue backbord", "answer": "jc penney", "gt_answers": [ "JPMorgan" ], "image_path": "./data/STVQA/VisualGenome/1/2369030.jpg", "model_name": "BLIP2" }, { "question": "What city is listed on the clock?", "answer": "person", "gt_answers": [ "London" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000172947.jpg", "model_name": "BLIP2" }, { "question": "What is the only readable word on the man's shirt?", "answer": "person", "gt_answers": [ "Rebellion" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000065588.jpg", "model_name": "BLIP2" }, { "question": "What is does the sign below the clock say in bold letters?", "answer": "sam's club", "gt_answers": [ "CLOCK TOWER" ], "image_path": "./data/STVQA/VisualGenome/1/2357647.jpg", "model_name": "BLIP2" }, { "question": "What is the name of this subway station?", "answer": "camden town station", "gt_answers": [ "Camden Town Station" ], "image_path": "./data/STVQA/IIIT_text/2988.jpg", "model_name": "BLIP2" }, { "question": "What number is on the car?", "answer": "24", "gt_answers": [ "22" ], "image_path": "./data/STVQA/imageNet/n04037443_36548.JPEG", "model_name": "BLIP2" }, { "question": "What is the largest name located on the restaurant sign?", "answer": "person", "gt_answers": [ "Cane's" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000075493.jpg", "model_name": "BLIP2" }, { "question": "What is the street name listed on the top of the two street signs?", "answer": "person", "gt_answers": [ "trust", "Trust Dr" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000075493.jpg", "model_name": "BLIP2" }, { "question": "What is the street name listed at the bottom of the two street signs?", "answer": "person", "gt_answers": [ "Corporate", "Corporate Blvd" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000075493.jpg", "model_name": "BLIP2" }, { "question": "what sport is being performed in this picture?", "answer": "baseball", "gt_answers": [ "baseball", "Baseball." ], "image_path": "./data/STVQA/VisualGenome/1/2359280.jpg", "model_name": "BLIP2" }, { "question": "What city team is it?", "answer": "royals", "gt_answers": [ "Kansas City", "Kansas City." ], "image_path": "./data/STVQA/VisualGenome/1/2359280.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the restaurant at the top of the image?", "answer": "spicy sweet restaurant", "gt_answers": [ "SPICY & SWEET RESTAURANT" ], "image_path": "./data/STVQA/imageNet/n07565083_10724.JPEG", "model_name": "BLIP2" }, { "question": "What telephone number is listed at the bottom of the menu?", "answer": "the phone number for spicy and sweet restaurant", "gt_answers": [ "416-651-6363" ], "image_path": "./data/STVQA/imageNet/n07565083_10724.JPEG", "model_name": "BLIP2" }, { "question": "What is the number that is printed on the bus window?", "answer": "77", "gt_answers": [ "US141" ], "image_path": "./data/STVQA/VisualGenome/1/2356663.jpg", "model_name": "BLIP2" }, { "question": "What country is shown here?", "answer": "canada", "gt_answers": [ "Canada" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000260407.jpg", "model_name": "BLIP2" }, { "question": "What is the word to the right of the copyright logo?", "answer": "person", "gt_answers": [ "luca" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000103935.jpg", "model_name": "BLIP2" }, { "question": "What does is say on this buses blue sign in white capital letters?", "answer": "lincoln city centre", "gt_answers": [ "GERTCHA SELF DOWN TO LONDON BY TRAIN" ], "image_path": "./data/STVQA/VisualGenome/1/2365410.jpg", "model_name": "BLIP2" }, { "question": "What is written on the red traffic sign?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2357432.jpg", "model_name": "BLIP2" }, { "question": "What color is SALE! written in?", "answer": "blue", "gt_answers": [ "White" ], "image_path": "./data/STVQA/IIIT_text/img_001014.jpg", "model_name": "BLIP2" }, { "question": "What color is around SALE!?", "answer": "blue", "gt_answers": [ "Purple" ], "image_path": "./data/STVQA/IIIT_text/img_001014.jpg", "model_name": "BLIP2" }, { "question": "How many letters are in the picture?", "answer": "five", "gt_answers": [ "4" ], "image_path": "./data/STVQA/IIIT_text/img_001014.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the photographer that took this photo?", "answer": "person", "gt_answers": [ "Andreas", "ANDREAS SCHAUER-VILLANUEVA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000164944.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "person", "gt_answers": [ "463" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000123935.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number?", "answer": "san francisco", "gt_answers": [ "278849" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000123935.jpg", "model_name": "BLIP2" }, { "question": "What amusement park is mentioned on the banner in the stands?", "answer": "amusement park", "gt_answers": [ "Disneyland" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000550655.jpg", "model_name": "BLIP2" }, { "question": "What isnt allowed in the upcoming area?", "answer": "vehicles bicycles skateboards glass containers animals reptiles", "gt_answers": [ "Vehicles,bicycles,skates,skateboards,glass containers,animals/reptiles" ], "image_path": "./data/STVQA/VisualGenome/1/2357153.jpg", "model_name": "BLIP2" }, { "question": "What is written on the side of the train?", "answer": "polson logging co", "gt_answers": [ "Polson Logging co" ], "image_path": "./data/STVQA/VisualGenome/1/2341912.jpg", "model_name": "BLIP2" }, { "question": "What does this sign in front of this building read?", "answer": "delhi high court", "gt_answers": [ "DELHI HIGH COURT" ], "image_path": "./data/STVQA/IIIT_text/img_000502.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the taxi cab?", "answer": "person", "gt_answers": [ "2431" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000566395.jpg", "model_name": "BLIP2" }, { "question": "Which country is this used in?", "answer": "china", "gt_answers": [ "China" ], "image_path": "./data/STVQA/imageNet/n03417042_17865.JPEG", "model_name": "BLIP2" }, { "question": "How many windshield wipers are on the vehicle?", "answer": "two", "gt_answers": [ "3" ], "image_path": "./data/STVQA/imageNet/n03417042_17865.JPEG", "model_name": "BLIP2" }, { "question": "What type of business is on the corner of the street?", "answer": "bank", "gt_answers": [ "Bank" ], "image_path": "./data/STVQA/VisualGenome/1/2369773.jpg", "model_name": "BLIP2" }, { "question": "What year was the Monticello Convention held?", "answer": "1787", "gt_answers": [ "1852" ], "image_path": "./data/STVQA/imageNet/n04355338_2362.JPEG", "model_name": "BLIP2" }, { "question": "What word is on the train?", "answer": "transit", "gt_answers": [ "TRANSIT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000211381.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the yellow shop?", "answer": "person", "gt_answers": [ "Babos" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000483692.jpg", "model_name": "BLIP2" }, { "question": "What brand is the surf board?", "answer": "hawaii", "gt_answers": [ "Lifestyle" ], "image_path": "./data/STVQA/VisualGenome/1/2329658.jpg", "model_name": "BLIP2" }, { "question": "What letters are showing at the top of the image?", "answer": "fire iron way", "gt_answers": [ "FI" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000414082.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the sign?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2371483.jpg", "model_name": "BLIP2" }, { "question": "What does the largest word on the plane say?", "answer": "canada", "gt_answers": [ "express" ], "image_path": "./data/STVQA/VisualGenome/1/2328061.jpg", "model_name": "BLIP2" }, { "question": "What word is on the poster?", "answer": "free", "gt_answers": [ "Cats" ], "image_path": "./data/STVQA/icdar/img_692.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "id", "gt_answers": [ "178" ], "image_path": "./data/STVQA/VisualGenome/1/286028.jpg", "model_name": "BLIP2" }, { "question": "What is the shop called?", "answer": "james murphy", "gt_answers": [ "P.T. MURPHY", "P.T.MURPHY" ], "image_path": "./data/STVQA/IIIT_text/2347.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the Airline on the plane", "answer": "british airways", "gt_answers": [ "British Airways" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000231572.jpg", "model_name": "BLIP2" }, { "question": "What is the distance of the two roads merging ?", "answer": "100 meters", "gt_answers": [ "100m", "100 m" ], "image_path": "./data/STVQA/VisualGenome/1/2356846.jpg", "model_name": "BLIP2" }, { "question": "During what year was this image copyrighted?", "answer": "2010", "gt_answers": [ "2013" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000061992.jpg", "model_name": "BLIP2" }, { "question": "What station is this train stopped at?", "answer": "london bridge", "gt_answers": [ "COCKFOSTERS" ], "image_path": "./data/STVQA/VisualGenome/1/2331217.jpg", "model_name": "BLIP2" }, { "question": "What is written on the wings of the jet fighter planes?", "answer": "the united states of america", "gt_answers": [ "USAF" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000170451.jpg", "model_name": "BLIP2" }, { "question": "Which airline is this?", "answer": "american airlines", "gt_answers": [ "American" ], "image_path": "./data/STVQA/VisualGenome/2/3908.jpg", "model_name": "BLIP2" }, { "question": "What color shirt is the man ordering tickets wearing?", "answer": "blue", "gt_answers": [ "Purple" ], "image_path": "./data/STVQA/VisualGenome/2/3908.jpg", "model_name": "BLIP2" }, { "question": "What color tie is the man on the left wearing?", "answer": "blue", "gt_answers": [ "Red" ], "image_path": "./data/STVQA/VisualGenome/2/3908.jpg", "model_name": "BLIP2" }, { "question": "What type of remote is in the packaging?", "answer": "universal remote", "gt_answers": [ "UNIVERSAL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000137547.jpg", "model_name": "BLIP2" }, { "question": "What is the name on this stocking?", "answer": "william", "gt_answers": [ "William" ], "image_path": "./data/STVQA/imageNet/n03026506_3500.JPEG", "model_name": "BLIP2" }, { "question": "How many shower caps are in this package?", "answer": "three", "gt_answers": [ "3" ], "image_path": "./data/STVQA/imageNet/n04209133_10027.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the sign?", "answer": "reserve bank of india", "gt_answers": [ "Reserve Bank of India" ], "image_path": "./data/STVQA/IIIT_text/img_000993.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the skier out in front in the picture?", "answer": "one", "gt_answers": [ "33" ], "image_path": "./data/STVQA/VisualGenome/1/2368096.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the plane?", "answer": "korea", "gt_answers": [ "KOREAN" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000493251.jpg", "model_name": "BLIP2" }, { "question": "What is the middle word on the plane?", "answer": "air", "gt_answers": [ "air" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000493251.jpg", "model_name": "BLIP2" }, { "question": "What is the last word on the side of the plane?", "answer": "korea", "gt_answers": [ "cargo" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000493251.jpg", "model_name": "BLIP2" }, { "question": "what name is in the logo on the man's shirt?", "answer": "person", "gt_answers": [ "opper" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000171321.jpg", "model_name": "BLIP2" }, { "question": "what company name appears on the man's hat?", "answer": "person", "gt_answers": [ "Topper" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000171321.jpg", "model_name": "BLIP2" }, { "question": "what letters appear on the lower right corner of the picture?", "answer": "person", "gt_answers": [ "KT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000171321.jpg", "model_name": "BLIP2" }, { "question": "What number is in the photo?", "answer": "3", "gt_answers": [ "42" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000224942.jpg", "model_name": "BLIP2" }, { "question": "What is printed on the car's license plate?", "answer": "ct", "gt_answers": [ "DL9553" ], "image_path": "./data/STVQA/VisualGenome/1/2333273.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name on the umbrellas?", "answer": "person", "gt_answers": [ "Singha" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000167027.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the plaza on the sign?", "answer": "person plaza", "gt_answers": [ "Thailand Plaza", "THAILAND" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000167027.jpg", "model_name": "BLIP2" }, { "question": "What is the first word written in white at the bottom of the bus?", "answer": "bus", "gt_answers": [ "City" ], "image_path": "./data/STVQA/VisualGenome/1/2357817.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company building?", "answer": "nokia", "gt_answers": [ "Nokia" ], "image_path": "./data/STVQA/IIIT_text/img_000866.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the magazine?", "answer": "vampire", "gt_answers": [ "Vampirella" ], "image_path": "./data/STVQA/imageNet/n06596364_13599.JPEG", "model_name": "BLIP2" }, { "question": "What is the word on ths sign in this picture?", "answer": "electronics", "gt_answers": [ "SAMSUNG" ], "image_path": "./data/STVQA/icdar/img_512.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of gum?", "answer": "chewy", "gt_answers": [ "Doublemint" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000549409.jpg", "model_name": "BLIP2" }, { "question": "What number is on the front of the bus?", "answer": "300", "gt_answers": [ "300" ], "image_path": "./data/STVQA/VisualGenome/2/2406968.jpg", "model_name": "BLIP2" }, { "question": "What port is the bus parked in front of?", "answer": "st peter port", "gt_answers": [ "3" ], "image_path": "./data/STVQA/VisualGenome/2/2406968.jpg", "model_name": "BLIP2" }, { "question": "What is written on the rear section of the airplane\ufffd\ufffds wing?", "answer": "person", "gt_answers": [ "X-48B" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000403286.jpg", "model_name": "BLIP2" }, { "question": "What does the text on the shirt say?", "answer": "hot dog", "gt_answers": [ "Digard" ], "image_path": "./data/STVQA/VisualGenome/2/2403483.jpg", "model_name": "BLIP2" }, { "question": "What food is pictured here?", "answer": "hot dogs", "gt_answers": [ "Hot dog" ], "image_path": "./data/STVQA/VisualGenome/2/2403483.jpg", "model_name": "BLIP2" }, { "question": "where is this bus headed?", "answer": "birmingham city centre", "gt_answers": [ "Ecclesfield" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000223091.jpg", "model_name": "BLIP2" }, { "question": "what company runs this bus?", "answer": "person", "gt_answers": [ "stagecoach stagecoach", "STAGECOACH" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000223091.jpg", "model_name": "BLIP2" }, { "question": "What is the word on the back left of the bus?", "answer": "person", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000220390.jpg", "model_name": "BLIP2" }, { "question": "When is the morning rubbish collection?", "answer": "the morning rubbish collection is at 7am", "gt_answers": [ "10.30am to noon", "1030AM TO NOON" ], "image_path": "./data/STVQA/VisualGenome/1/2355545.jpg", "model_name": "BLIP2" }, { "question": "When is the evening rubbish collection?", "answer": "on tuesday and thursday", "gt_answers": [ "10.00pm to midnight", "1000PM TO MIDNIGHT" ], "image_path": "./data/STVQA/VisualGenome/1/2355545.jpg", "model_name": "BLIP2" }, { "question": "What does the green sticker on this sign say?", "answer": "use of car horn here", "gt_answers": [ "SOLVE" ], "image_path": "./data/STVQA/VisualGenome/1/2355388.jpg", "model_name": "BLIP2" }, { "question": "Who took this photo?", "answer": "daniel", "gt_answers": [ "Ivan Bustamante" ], "image_path": "./data/STVQA/VisualGenome/2/2416344.jpg", "model_name": "BLIP2" }, { "question": "What city is referenced on the man's shirt?", "answer": "chicago", "gt_answers": [ "New York" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000197001.jpg", "model_name": "BLIP2" }, { "question": "What is written in green on the side of the bus?", "answer": "dog daycare", "gt_answers": [ "Dog Daycare" ], "image_path": "./data/STVQA/VisualGenome/1/2340016.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the baby care system?", "answer": "pacifier dispenser", "gt_answers": [ "KOALA KARE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000027298.jpg", "model_name": "BLIP2" }, { "question": "How many bicycles are there?", "answer": "two", "gt_answers": [ "Two", "P" ], "image_path": "./data/STVQA/VisualGenome/1/2319317.jpg", "model_name": "BLIP2" }, { "question": "What is the hot air balloon advertising?", "answer": "lavans", "gt_answers": [ "Lavans" ], "image_path": "./data/STVQA/imageNet/n02782093_39116.JPEG", "model_name": "BLIP2" }, { "question": "What company is sponsoring the hot air balloon?", "answer": "lavans", "gt_answers": [ "Lavans" ], "image_path": "./data/STVQA/imageNet/n02782093_39116.JPEG", "model_name": "BLIP2" }, { "question": "What are the blue printed words on the hot air ballon?", "answer": "lavans", "gt_answers": [ "Lavans" ], "image_path": "./data/STVQA/imageNet/n02782093_39116.JPEG", "model_name": "BLIP2" }, { "question": "Who holds the copyright?", "answer": "person", "gt_answers": [ "Tony Northrup" ], "image_path": "./data/STVQA/imageNet/n02444819_308.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the train operator?", "answer": "person", "gt_answers": [ "Fret" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000027504.jpg", "model_name": "BLIP2" }, { "question": "what street is shown?", "answer": "a street", "gt_answers": [ "18th", "18TH STREET" ], "image_path": "./data/STVQA/VisualGenome/2/2406204.jpg", "model_name": "BLIP2" }, { "question": "what is the sign refer", "answer": "refer", "gt_answers": [ "amy581014" ], "image_path": "./data/STVQA/imageNet/n03131574_1780.JPEG", "model_name": "BLIP2" }, { "question": "What does CSA stand for?", "answer": "community supported agriculture", "gt_answers": [ "Community Supported Agriculture" ], "image_path": "./data/STVQA/VisualGenome/2/2405515.jpg", "model_name": "BLIP2" }, { "question": "What does the text in white say?", "answer": "mcdonald's ice cream", "gt_answers": [ "Jack Pot" ], "image_path": "./data/STVQA/imageNet/n04243546_4482.JPEG", "model_name": "BLIP2" }, { "question": "What sport is being played?", "answer": "baseball", "gt_answers": [ "Baseball" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000337648.jpg", "model_name": "BLIP2" }, { "question": "Is this a child or adult game?", "answer": "child", "gt_answers": [ "Child" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000337648.jpg", "model_name": "BLIP2" }, { "question": "What color is their uniform?", "answer": "green", "gt_answers": [ "Green and yellow", "Green & White" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000337648.jpg", "model_name": "BLIP2" }, { "question": "What text is on the bottom of the product?", "answer": "swiss made", "gt_answers": [ "Super" ], "image_path": "./data/STVQA/imageNet/n02910353_10857.JPEG", "model_name": "BLIP2" }, { "question": "What does the monument sign say?", "answer": "st mary's cathedral", "gt_answers": [ "Homo-monument" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000276311.jpg", "model_name": "BLIP2" }, { "question": "what team is on the tshirt?", "answer": "atlanta braves", "gt_answers": [ "Braves" ], "image_path": "./data/STVQA/VisualGenome/1/2358109.jpg", "model_name": "BLIP2" }, { "question": "What is the first word seen on the display?", "answer": "liquor", "gt_answers": [ "Liquor" ], "image_path": "./data/STVQA/imageNet/n04525305_11287.JPEG", "model_name": "BLIP2" }, { "question": "Who took this photo?", "answer": "person", "gt_answers": [ "Roger Price" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000028503.jpg", "model_name": "BLIP2" }, { "question": "What does the red sign say?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2369925.jpg", "model_name": "BLIP2" }, { "question": "Where is this picture taken?", "answer": "antique and vintage art gallery", "gt_answers": [ "ANTIQUE & FINE ARTS AUCTIONS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000523571.jpg", "model_name": "BLIP2" }, { "question": "What number is written on the sail?", "answer": "person", "gt_answers": [ "159" ], "image_path": "./data/STVQA/imageNet/n04612504_15248.JPEG", "model_name": "BLIP2" }, { "question": "What letter is near the top of the sail?", "answer": "person", "gt_answers": [ "Y" ], "image_path": "./data/STVQA/imageNet/n04612504_15248.JPEG", "model_name": "BLIP2" }, { "question": "What fuel does the truck run on?", "answer": "diesel", "gt_answers": [ "natural gas" ], "image_path": "./data/STVQA/VisualGenome/2/2414566.jpg", "model_name": "BLIP2" }, { "question": "What year is timestamped on the photograph?", "answer": "2010", "gt_answers": [ "2011" ], "image_path": "./data/STVQA/VisualGenome/2/2414566.jpg", "model_name": "BLIP2" }, { "question": "What agency captured this picture?", "answer": "wyoming game and fish department", "gt_answers": [ "Wyoming Game and Fish Department" ], "image_path": "./data/STVQA/imageNet/n02443484_1894.JPEG", "model_name": "BLIP2" }, { "question": "What company owns this photo?", "answer": "person", "gt_answers": [ "Wyoming Game and Fish Department" ], "image_path": "./data/STVQA/imageNet/n02443484_1894.JPEG", "model_name": "BLIP2" }, { "question": "What is the website listed on the bottom of the photo?", "answer": "person", "gt_answers": [ "http://www.chuqui.com" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000496118.jpg", "model_name": "BLIP2" }, { "question": "What number is the player wearing?", "answer": "11", "gt_answers": [ "17" ], "image_path": "./data/STVQA/VisualGenome/2/2414770.jpg", "model_name": "BLIP2" }, { "question": "where is the tram travelling to?", "answer": "the seafront", "gt_answers": [ "STARR GATE" ], "image_path": "./data/STVQA/imageNet/n04335435_24175.JPEG", "model_name": "BLIP2" }, { "question": "Which country's Government is referred to in this image?", "answer": "canada", "gt_answers": [ "Canada" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000155981.jpg", "model_name": "BLIP2" }, { "question": "What is the name for the silver object in the image?", "answer": "plane iron", "gt_answers": [ "Blade Or Iron" ], "image_path": "./data/STVQA/imageNet/n03954731_40761.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the restaurant", "answer": "silk", "gt_answers": [ "Thai at Silk" ], "image_path": "./data/STVQA/VisualGenome/2/1359.jpg", "model_name": "BLIP2" }, { "question": "What letter is in red?", "answer": "person", "gt_answers": [ "A" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000029116.jpg", "model_name": "BLIP2" }, { "question": "What does the smallest text say?", "answer": "a skateboarder in the air", "gt_answers": [ "images" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000029116.jpg", "model_name": "BLIP2" }, { "question": "What letter is in pink?", "answer": "person", "gt_answers": [ "A" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000029116.jpg", "model_name": "BLIP2" }, { "question": "What is this plane's license number?", "answer": "it is a private plane", "gt_answers": [ "PH-DEZ" ], "image_path": "./data/STVQA/VisualGenome/1/2352809.jpg", "model_name": "BLIP2" }, { "question": "What is written on the top sign?", "answer": "danforth ave", "gt_answers": [ "DANFORTH AVE. 479" ], "image_path": "./data/STVQA/VisualGenome/1/2331979.jpg", "model_name": "BLIP2" }, { "question": "What do the signs say?", "answer": "no parking on the street", "gt_answers": [ "Regardez", "REGARDEZ A GAUCHE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000561735.jpg", "model_name": "BLIP2" }, { "question": "What is the truck's license plate number?", "answer": "person", "gt_answers": [ "G81083" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000068777.jpg", "model_name": "BLIP2" }, { "question": "What is the web URL shown?", "answer": "person", "gt_answers": [ "WWW.MAVERIX.ORG" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000123711.jpg", "model_name": "BLIP2" }, { "question": "what does the red sign read?", "answer": "park road east", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2332282.jpg", "model_name": "BLIP2" }, { "question": "what number is the bus", "answer": "8", "gt_answers": [ "704" ], "image_path": "./data/STVQA/VisualGenome/2/2406821.jpg", "model_name": "BLIP2" }, { "question": "What word is in the top left corner?", "answer": "bath", "gt_answers": [ "corbis" ], "image_path": "./data/STVQA/imageNet/n04553703_20667.JPEG", "model_name": "BLIP2" }, { "question": "What booth are the guests visiting?", "answer": "vlaser", "gt_answers": [ "viLaser" ], "image_path": "./data/STVQA/VisualGenome/1/2349174.jpg", "model_name": "BLIP2" }, { "question": "What is the train number", "answer": "the train number is person", "gt_answers": [ "17" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000223288.jpg", "model_name": "BLIP2" }, { "question": "What are the words on the bus?", "answer": "london red bus", "gt_answers": [ "another red bus" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000003988.jpg", "model_name": "BLIP2" }, { "question": "What is the red vehicle in the photo?", "answer": "bus", "gt_answers": [ "bus" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000003988.jpg", "model_name": "BLIP2" }, { "question": "What is this buses number?", "answer": "no number", "gt_answers": [ "24" ], "image_path": "./data/STVQA/imageNet/n04146614_14916.JPEG", "model_name": "BLIP2" }, { "question": "Which number is a part of an add on the side of the bus?", "answer": "999", "gt_answers": [ "1.58" ], "image_path": "./data/STVQA/imageNet/n04146614_14916.JPEG", "model_name": "BLIP2" }, { "question": "What company is this?", "answer": "papa johns", "gt_answers": [ "Papa Johns" ], "image_path": "./data/STVQA/VisualGenome/2/1133.jpg", "model_name": "BLIP2" }, { "question": "What color is the sign in this picture?", "answer": "blue", "gt_answers": [ "Blue" ], "image_path": "./data/STVQA/VisualGenome/1/2375982.jpg", "model_name": "BLIP2" }, { "question": "What color is the text on the sign?", "answer": "blue", "gt_answers": [ "White" ], "image_path": "./data/STVQA/VisualGenome/1/2375982.jpg", "model_name": "BLIP2" }, { "question": "What is the largest word on the back of the blue shirt?", "answer": "person", "gt_answers": [ "Super" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000246719.jpg", "model_name": "BLIP2" }, { "question": "What number is on the front of the train?", "answer": "73", "gt_answers": [ "159106" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000443533.jpg", "model_name": "BLIP2" }, { "question": "What are the 5 letters at the very top of the image?", "answer": "afes", "gt_answers": [ "AAFES" ], "image_path": "./data/STVQA/imageNet/n03425413_12685.JPEG", "model_name": "BLIP2" }, { "question": "What type of gas is found under the 4 stars?", "answer": "premium gas", "gt_answers": [ "UNLEADED" ], "image_path": "./data/STVQA/imageNet/n03425413_12685.JPEG", "model_name": "BLIP2" }, { "question": "What city is this?", "answer": "chicago", "gt_answers": [ "Memphis" ], "image_path": "./data/STVQA/VisualGenome/1/2342646.jpg", "model_name": "BLIP2" }, { "question": "what are the letters on the blue shirt?", "answer": "person", "gt_answers": [ "jcrew" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000145189.jpg", "model_name": "BLIP2" }, { "question": "What brand is this product?", "answer": "person", "gt_answers": [ "SUSE LINUX" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000563270.jpg", "model_name": "BLIP2" }, { "question": "What pain should you stop?", "answer": "back pain", "gt_answers": [ "Foot, Knee & Back" ], "image_path": "./data/STVQA/icdar/img_493.jpg", "model_name": "BLIP2" }, { "question": "What is the building labelled by the sign?", "answer": "sydney railway station", "gt_answers": [ "Fire Station" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000239040.jpg", "model_name": "BLIP2" }, { "question": "WHAT COLOR IS THE BUS", "answer": "red", "gt_answers": [ "RED" ], "image_path": "./data/STVQA/IIIT_text/4430.jpg", "model_name": "BLIP2" }, { "question": "WHERE IS IT GOING TO", "answer": "london", "gt_answers": [ "PADDINGTON" ], "image_path": "./data/STVQA/IIIT_text/4430.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE BUS NUMBER", "answer": "person", "gt_answers": [ "36" ], "image_path": "./data/STVQA/IIIT_text/4430.jpg", "model_name": "BLIP2" }, { "question": "What is the photo tag?", "answer": "chimpanzee", "gt_answers": [ "Tony Northrup", "\ufffd\ufffd TONY NORTHRUP" ], "image_path": "./data/STVQA/imageNet/n02481823_4165.JPEG", "model_name": "BLIP2" }, { "question": "What products does this store sell?", "answer": "antiques", "gt_answers": [ "ANTIQUES" ], "image_path": "./data/STVQA/imageNet/n06874185_29995.JPEG", "model_name": "BLIP2" }, { "question": "what is written on the green boat?", "answer": "numbers", "gt_answers": [ "869SE5" ], "image_path": "./data/STVQA/VisualGenome/2/2407889.jpg", "model_name": "BLIP2" }, { "question": "what is written on the blue boat?", "answer": "numbers", "gt_answers": [ "4057SE5" ], "image_path": "./data/STVQA/VisualGenome/2/2407889.jpg", "model_name": "BLIP2" }, { "question": "What year was this photo copyrighted?", "answer": "2010", "gt_answers": [ "2011" ], "image_path": "./data/STVQA/VisualGenome/2/2403544.jpg", "model_name": "BLIP2" }, { "question": "What does the sign in the bus say?", "answer": "london bus map", "gt_answers": [ "Stopping" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000182104.jpg", "model_name": "BLIP2" }, { "question": "What company is on the van?", "answer": "samsung", "gt_answers": [ "FedEx" ], "image_path": "./data/STVQA/icdar/test_img_334.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of that truck?", "answer": "scania", "gt_answers": [ "Scania" ], "image_path": "./data/STVQA/VisualGenome/1/2345032.jpg", "model_name": "BLIP2" }, { "question": "What is the model of the truck?", "answer": "scania", "gt_answers": [ "R440" ], "image_path": "./data/STVQA/VisualGenome/1/2345032.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate of the truck?", "answer": "md md", "gt_answers": [ "FJ60LGK" ], "image_path": "./data/STVQA/VisualGenome/1/2345032.jpg", "model_name": "BLIP2" }, { "question": "What is the name of this store?", "answer": "the fountain", "gt_answers": [ "The Fountain?", "The Fountain" ], "image_path": "./data/STVQA/VisualGenome/1/2350945.jpg", "model_name": "BLIP2" }, { "question": "What service is provided while you wait?", "answer": "an umbrella", "gt_answers": [ "Shoe Repair" ], "image_path": "./data/STVQA/VisualGenome/1/2350945.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the boat in the picture?", "answer": "thunderchuck", "gt_answers": [ "Thunderchick" ], "image_path": "./data/STVQA/VisualGenome/1/2355027.jpg", "model_name": "BLIP2" }, { "question": "What number is displayed on the front of the building?", "answer": "4", "gt_answers": [ "1900" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000473503.jpg", "model_name": "BLIP2" }, { "question": "What is the place listed above the witch character?", "answer": "miami", "gt_answers": [ "Moomin Valley" ], "image_path": "./data/STVQA/imageNet/n04560804_16073.JPEG", "model_name": "BLIP2" }, { "question": "What is the copyrights to this picture?", "answer": "person", "gt_answers": [ "2013 FRANZ VENHAUS", "c 2013 franz venhaus" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000236176.jpg", "model_name": "BLIP2" }, { "question": "What is the company name of the bus?", "answer": "city bus", "gt_answers": [ "RapidRide" ], "image_path": "./data/STVQA/IIIT_text/4298.jpg", "model_name": "BLIP2" }, { "question": "what kind on vehicle is this ?", "answer": "ambulance", "gt_answers": [ "Rode Kruis" ], "image_path": "./data/STVQA/imageNet/n03769881_2599.JPEG", "model_name": "BLIP2" }, { "question": "What do the four yellow signs say?", "answer": "no parking", "gt_answers": [ "JACO, OROTINA, ZARCERO, BAGACES", "jaco orotina zarcero bagaces" ], "image_path": "./data/STVQA/VisualGenome/1/2357671.jpg", "model_name": "BLIP2" }, { "question": "What is written on the bike?", "answer": "harley davidson", "gt_answers": [ "Indian" ], "image_path": "./data/STVQA/VisualGenome/1/2333272.jpg", "model_name": "BLIP2" }, { "question": "What brand is carried by the blue truck?", "answer": "corona extra", "gt_answers": [ "Corona" ], "image_path": "./data/STVQA/IIIT_text/4898.jpg", "model_name": "BLIP2" }, { "question": "What is the brand off the white car", "answer": "person", "gt_answers": [ "Nissan" ], "image_path": "./data/STVQA/IIIT_text/4898.jpg", "model_name": "BLIP2" }, { "question": "What do the biggest letters on this bus say?", "answer": "the party bus", "gt_answers": [ "The Honesty Shop" ], "image_path": "./data/STVQA/VisualGenome/1/150374.jpg", "model_name": "BLIP2" }, { "question": "What brand name is on the bag?", "answer": "nestle", "gt_answers": [ "Nestle" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000836.jpg", "model_name": "BLIP2" }, { "question": "How much protein does the food contain per 100 grams?", "answer": "20 g", "gt_answers": [ "9.2 grams" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000836.jpg", "model_name": "BLIP2" }, { "question": "What company plane is this?", "answer": "delta airlines", "gt_answers": [ "Delta" ], "image_path": "./data/STVQA/VisualGenome/1/2363863.jpg", "model_name": "BLIP2" }, { "question": "What is the gate number?", "answer": "a", "gt_answers": [ "A12" ], "image_path": "./data/STVQA/VisualGenome/1/2363863.jpg", "model_name": "BLIP2" }, { "question": "What do the street signs read?", "answer": "illinois and 2nd street", "gt_answers": [ "1100 ILLINOIS 600 22ND ST" ], "image_path": "./data/STVQA/VisualGenome/1/2322321.jpg", "model_name": "BLIP2" }, { "question": "What is the Brand of the Computer to the far right?", "answer": "acer", "gt_answers": [ "sony" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000305501.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the weekly newsmagazine", "answer": "outlook", "gt_answers": [ "Outlook" ], "image_path": "./data/STVQA/VisualGenome/1/2329122.jpg", "model_name": "BLIP2" }, { "question": "What cream cheese is there?", "answer": "person", "gt_answers": [ "PHILADELPHIA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000064389.jpg", "model_name": "BLIP2" }, { "question": "What is the word that is displayed inside the red circle?", "answer": "pay", "gt_answers": [ "FUR" ], "image_path": "./data/STVQA/imageNet/n03902125_15097.JPEG", "model_name": "BLIP2" }, { "question": "What brand of phone is this?", "answer": "sony", "gt_answers": [ "Siemens" ], "image_path": "./data/STVQA/IIIT_text/img_001141.jpg", "model_name": "BLIP2" }, { "question": "What does the top street sign say?", "answer": "one way", "gt_answers": [ "JOEY RAMONE PLACE" ], "image_path": "./data/STVQA/VisualGenome/1/2370496.jpg", "model_name": "BLIP2" }, { "question": "Which street sign is higher than the other?", "answer": "stop sign", "gt_answers": [ "TERRACE" ], "image_path": "./data/STVQA/VisualGenome/2/2415988.jpg", "model_name": "BLIP2" }, { "question": "What does the logo in the image say?", "answer": "skateboards", "gt_answers": [ "MyProfe" ], "image_path": "./data/STVQA/VisualGenome/1/2362783.jpg", "model_name": "BLIP2" }, { "question": "What does the bench say?", "answer": "city of london", "gt_answers": [ "City of London" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000103583.jpg", "model_name": "BLIP2" }, { "question": "how is the food seasoned?", "answer": "lemon garlic", "gt_answers": [ "lemon garlic" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000001124.jpg", "model_name": "BLIP2" }, { "question": "What words present in the banner", "answer": "hello world", "gt_answers": [ "Merry X-mas" ], "image_path": "./data/STVQA/VisualGenome/1/2350317.jpg", "model_name": "BLIP2" }, { "question": "Which Australian region offers food similar to what you might find at this restaurant?", "answer": "south australia", "gt_answers": [ "OUTBACK" ], "image_path": "./data/STVQA/icdar/test_img_362.jpg", "model_name": "BLIP2" }, { "question": "Where is the location on the map?", "answer": "zell am see", "gt_answers": [ "Zeilitzheim" ], "image_path": "./data/STVQA/VisualGenome/1/2317055.jpg", "model_name": "BLIP2" }, { "question": "What is the title of the blue sign?", "answer": "map", "gt_answers": [ "Raiffeisenbank" ], "image_path": "./data/STVQA/VisualGenome/1/2317055.jpg", "model_name": "BLIP2" }, { "question": "This red sign displays which brand name?", "answer": "vodafone", "gt_answers": [ "Vodafone" ], "image_path": "./data/STVQA/IIIT_text/img_001255.jpg", "model_name": "BLIP2" }, { "question": "What two actions are not allowed in this specific location?", "answer": "riding bicycles or skateboards", "gt_answers": [ "TIPPING & DUMPING" ], "image_path": "./data/STVQA/icdar/img_15.jpg", "model_name": "BLIP2" }, { "question": "What is the text at the bottom of the back of the vehicle", "answer": "it says sweden", "gt_answers": [ "Faymonville" ], "image_path": "./data/STVQA/VisualGenome/1/2365194.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the sticker on the left-backside of the truck", "answer": "0", "gt_answers": [ "60" ], "image_path": "./data/STVQA/VisualGenome/1/2365194.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name of the punching bags?", "answer": "dubat", "gt_answers": [ "Tabo" ], "image_path": "./data/STVQA/imageNet/n04023962_16417.JPEG", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "person", "gt_answers": [ "55" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000324852.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE WORD WRITTEN ON THE CHAIR", "answer": "person", "gt_answers": [ "Coca-Cola" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000427555.jpg", "model_name": "BLIP2" }, { "question": "What number is on the top of the train?", "answer": "person", "gt_answers": [ "90251" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000373267.jpg", "model_name": "BLIP2" }, { "question": "What does the front of the train say?", "answer": "person", "gt_answers": [ "Amtrak" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000373267.jpg", "model_name": "BLIP2" }, { "question": "what is the name of the bus", "answer": "liberty liner", "gt_answers": [ "VICTORY LINER" ], "image_path": "./data/STVQA/VisualGenome/1/2367237.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS NUMBER of THE BUS", "answer": "number of the bus", "gt_answers": [ "2119" ], "image_path": "./data/STVQA/VisualGenome/1/2367237.jpg", "model_name": "BLIP2" }, { "question": "what is letter written behind the bus", "answer": "l", "gt_answers": [ "p" ], "image_path": "./data/STVQA/VisualGenome/1/2367237.jpg", "model_name": "BLIP2" }, { "question": "What number is the train?", "answer": "mta train", "gt_answers": [ "7850" ], "image_path": "./data/STVQA/VisualGenome/2/2408899.jpg", "model_name": "BLIP2" }, { "question": "What team is on his shirt?", "answer": "person", "gt_answers": [ "Red Sox", "SOX" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000450684.jpg", "model_name": "BLIP2" }, { "question": "What is written in green on the white food truck ?", "answer": "mx taco", "gt_answers": [ "Maki Taco" ], "image_path": "./data/STVQA/VisualGenome/1/2336452.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store in the background?", "answer": "walmart", "gt_answers": [ "Carphone Warehouse" ], "image_path": "./data/STVQA/VisualGenome/2/2410379.jpg", "model_name": "BLIP2" }, { "question": "What text is displayed on the bottom right?", "answer": "rainbow loom", "gt_answers": [ "estrelaaa Paulinha" ], "image_path": "./data/STVQA/imageNet/n03476684_13918.JPEG", "model_name": "BLIP2" }, { "question": "What text is seen as heading on the blue book?", "answer": "i am a computer", "gt_answers": [ "Mountain Biking" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000062053.jpg", "model_name": "BLIP2" }, { "question": "What is the photographers name?", "answer": "person", "gt_answers": [ "Jordan Light" ], "image_path": "./data/STVQA/imageNet/n03956157_7164.JPEG", "model_name": "BLIP2" }, { "question": "What is the first blue letter on the plane?", "answer": "u", "gt_answers": [ "n" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000166086.jpg", "model_name": "BLIP2" }, { "question": "What word is displayed on front of the bobsled?", "answer": "canada", "gt_answers": [ "Whelen" ], "image_path": "./data/STVQA/imageNet/n02860847_21360.JPEG", "model_name": "BLIP2" }, { "question": "A king is married to a what?", "answer": "queen", "gt_answers": [ "Queen" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000015245.jpg", "model_name": "BLIP2" }, { "question": "What type of drink is there?", "answer": "soda", "gt_answers": [ "Arizona Fruit Punch." ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000437801.jpg", "model_name": "BLIP2" }, { "question": "What is inside the brown case?", "answer": "a guitar", "gt_answers": [ "A guitar" ], "image_path": "./data/STVQA/VisualGenome/1/2324862.jpg", "model_name": "BLIP2" }, { "question": "What word is printed in red?", "answer": "darkness", "gt_answers": [ "Devil" ], "image_path": "./data/STVQA/imageNet/n06596364_6165.JPEG", "model_name": "BLIP2" }, { "question": "Who's name is in the title?", "answer": "miss jones", "gt_answers": [ "Miss Jones" ], "image_path": "./data/STVQA/imageNet/n06596364_6165.JPEG", "model_name": "BLIP2" }, { "question": "What is the first name on the tag??", "answer": "john", "gt_answers": [ "Nick" ], "image_path": "./data/STVQA/imageNet/n01735189_5072.JPEG", "model_name": "BLIP2" }, { "question": "What car manufacturer name is printed in the car?", "answer": "toyota", "gt_answers": [ "TOYOTA" ], "image_path": "./data/STVQA/VisualGenome/1/2341057.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the food truck?", "answer": "havana food truck", "gt_answers": [ "Caravan Crepes" ], "image_path": "./data/STVQA/VisualGenome/1/2358974.jpg", "model_name": "BLIP2" }, { "question": "What is the word shown in the blue neon sign box?", "answer": "car", "gt_answers": [ "open" ], "image_path": "./data/STVQA/IIIT_text/2042.jpg", "model_name": "BLIP2" }, { "question": "Where was the wine bottled?", "answer": "in argentina", "gt_answers": [ "Spain" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000018960.jpg", "model_name": "BLIP2" }, { "question": "What is the red sign?", "answer": "a stop sign", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000288021.jpg", "model_name": "BLIP2" }, { "question": "What movie is advertised on the window ?", "answer": "exodus", "gt_answers": [ "Exodus: Gods and Kings", "EXODUS" ], "image_path": "./data/STVQA/icdar/img_82.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street?", "answer": "london street", "gt_answers": [ "London Street W2", "LONDON" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000103390.jpg", "model_name": "BLIP2" }, { "question": "Which city is it?", "answer": "london", "gt_answers": [ "Westminster" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000103390.jpg", "model_name": "BLIP2" }, { "question": "Who sponsers this event?", "answer": "nike", "gt_answers": [ "KIA" ], "image_path": "./data/STVQA/VisualGenome/1/2321697.jpg", "model_name": "BLIP2" }, { "question": "What word comes after Madison Square?", "answer": "new", "gt_answers": [ "Garden" ], "image_path": "./data/STVQA/VisualGenome/2/2402.jpg", "model_name": "BLIP2" }, { "question": "What text is on the black sign?", "answer": "food selection", "gt_answers": [ "Hotels" ], "image_path": "./data/STVQA/icdar/test_img_203.jpg", "model_name": "BLIP2" }, { "question": "What text is on the red sign?", "answer": "food selection", "gt_answers": [ "Food directory" ], "image_path": "./data/STVQA/icdar/test_img_203.jpg", "model_name": "BLIP2" }, { "question": "What is written in block lettes on the bike?", "answer": "police", "gt_answers": [ "FORT LAUDERDALE POLICE" ], "image_path": "./data/STVQA/VisualGenome/1/2356370.jpg", "model_name": "BLIP2" }, { "question": "What store is that?", "answer": "beauty store", "gt_answers": [ "Aveda" ], "image_path": "./data/STVQA/icdar/img_563.jpg", "model_name": "BLIP2" }, { "question": "What type of Aveda store is that?", "answer": "a retail store", "gt_answers": [ "Team Salon Aveda", "TEAM SALON" ], "image_path": "./data/STVQA/icdar/img_563.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE LICENSE PLATE OF THE BLUE BUS?", "answer": "md md", "gt_answers": [ "YNO3NCF", "YNO3 NCF" ], "image_path": "./data/STVQA/VisualGenome/1/2349683.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTEN IN FRONT OF THE BLUE BUS", "answer": "london bus", "gt_answers": [ "ARRIVA" ], "image_path": "./data/STVQA/VisualGenome/1/2349683.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the remote?", "answer": "channel", "gt_answers": [ "One" ], "image_path": "./data/STVQA/VisualGenome/2/2415871.jpg", "model_name": "BLIP2" }, { "question": "What number is on the tractor?", "answer": "no number", "gt_answers": [ "130" ], "image_path": "./data/STVQA/VisualGenome/1/2339412.jpg", "model_name": "BLIP2" }, { "question": "What is this animal?", "answer": "cat", "gt_answers": [ "cat" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000050794.jpg", "model_name": "BLIP2" }, { "question": "When was this picture taken?", "answer": "indian", "gt_answers": [ "2010/02/09" ], "image_path": "./data/STVQA/VisualGenome/1/2375052.jpg", "model_name": "BLIP2" }, { "question": "What is on the bus?", "answer": "a map of madrid", "gt_answers": [ "MADRID CITY TOUR" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000487167.jpg", "model_name": "BLIP2" }, { "question": "Who is the house being sold by?", "answer": "owner", "gt_answers": [ "Owner" ], "image_path": "./data/STVQA/VisualGenome/1/2365018.jpg", "model_name": "BLIP2" }, { "question": "What number is on the sign?", "answer": "3", "gt_answers": [ "2590105", "259 0105" ], "image_path": "./data/STVQA/VisualGenome/1/2365018.jpg", "model_name": "BLIP2" }, { "question": "What text message is written on the pink phone?", "answer": "i love you", "gt_answers": [ "I love you" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000106438.jpg", "model_name": "BLIP2" }, { "question": "What color is the tennis ball?", "answer": "white", "gt_answers": [ "yellow", "{}" ], "image_path": "./data/STVQA/VisualGenome/1/2325333.jpg", "model_name": "BLIP2" }, { "question": "WHAT NAME IN THE FIGURE?", "answer": "cauliflower", "gt_answers": [ "OLEKSIY MAKSYMENKO" ], "image_path": "./data/STVQA/imageNet/n07715103_8561.JPEG", "model_name": "BLIP2" }, { "question": "What model car is sold at this location?", "answer": "honda civic", "gt_answers": [ "Honda" ], "image_path": "./data/STVQA/IIIT_text/img_000624.jpg", "model_name": "BLIP2" }, { "question": "Who is the photographer?", "answer": "rajesh kumar", "gt_answers": [ "Sanmoy Photography", "SANMOY" ], "image_path": "./data/STVQA/IIIT_text/6599.jpg", "model_name": "BLIP2" }, { "question": "What word is on the warning sign?", "answer": "zone", "gt_answers": [ "ZONE" ], "image_path": "./data/STVQA/VisualGenome/2/2407077.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the train in the foreground?", "answer": "050", "gt_answers": [ "444 007" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000312541.jpg", "model_name": "BLIP2" }, { "question": "What drink brand is advertised on the green umbrella?", "answer": "pepsi", "gt_answers": [ "Heineken" ], "image_path": "./data/STVQA/VisualGenome/2/2404959.jpg", "model_name": "BLIP2" }, { "question": "What is listed on the white street sign?", "answer": "person", "gt_answers": [ "ONE WAY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000143749.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the building?", "answer": "emporis", "gt_answers": [ "Emporis" ], "image_path": "./data/STVQA/IIIT_text/img_000857.jpg", "model_name": "BLIP2" }, { "question": "what does the sign say?", "answer": "stop", "gt_answers": [ "Please stop unless cave trained", "PLEASE STOP UNLESS CAVE TRAINED We Care!" ], "image_path": "./data/STVQA/VisualGenome/1/2317658.jpg", "model_name": "BLIP2" }, { "question": "how many childrens on there", "answer": "two", "gt_answers": [ "1", "3" ], "image_path": "./data/STVQA/VisualGenome/1/2372120.jpg", "model_name": "BLIP2" }, { "question": "what the children wear on head", "answer": "helmet", "gt_answers": [ "helmet" ], "image_path": "./data/STVQA/VisualGenome/1/2372120.jpg", "model_name": "BLIP2" }, { "question": "how many peoples on there", "answer": "three", "gt_answers": [ "4", "5" ], "image_path": "./data/STVQA/VisualGenome/1/2372120.jpg", "model_name": "BLIP2" }, { "question": "In what city is this fire station?", "answer": "person", "gt_answers": [ "Dallas" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000514454.jpg", "model_name": "BLIP2" }, { "question": "What is the station number on the building?", "answer": "10", "gt_answers": [ "10" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000514454.jpg", "model_name": "BLIP2" }, { "question": "Who is the photo copyrighted to?", "answer": "daniel mcdonald", "gt_answers": [ "Nick Scobel" ], "image_path": "./data/STVQA/imageNet/n01632458_702.JPEG", "model_name": "BLIP2" }, { "question": "What year was this photo copyrighted?", "answer": "2009", "gt_answers": [ "2008" ], "image_path": "./data/STVQA/imageNet/n01632458_702.JPEG", "model_name": "BLIP2" }, { "question": "What word is on the front of the bus?", "answer": "bus", "gt_answers": [ "Garage" ], "image_path": "./data/STVQA/VisualGenome/1/2373266.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the tool set?", "answer": "torc", "gt_answers": [ "Royal Craft" ], "image_path": "./data/STVQA/imageNet/n04154565_26257.JPEG", "model_name": "BLIP2" }, { "question": "What does the neon sign say?", "answer": "schaffy", "gt_answers": [ "SCHLAFLY" ], "image_path": "./data/STVQA/VisualGenome/1/2340101.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the airline displayed on the tail fin of the airplane?", "answer": "southwest airlines", "gt_answers": [ "southwest" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000262492.jpg", "model_name": "BLIP2" }, { "question": "What is the clock brand being advertised in the background?", "answer": "seiko", "gt_answers": [ "Rolex" ], "image_path": "./data/STVQA/VisualGenome/1/2320304.jpg", "model_name": "BLIP2" }, { "question": "Which country is the castle located?", "answer": "scotland", "gt_answers": [ "Scotland" ], "image_path": "./data/STVQA/imageNet/n02980441_30054.JPEG", "model_name": "BLIP2" }, { "question": "What is written inside the octagon?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/2/2414585.jpg", "model_name": "BLIP2" }, { "question": "What food will she travel for?", "answer": "pizza", "gt_answers": [ "Vegan" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000559836.jpg", "model_name": "BLIP2" }, { "question": "What are the first 3 numbers on the taxi's license plate?", "answer": "123", "gt_answers": [ "389" ], "image_path": "./data/STVQA/VisualGenome/1/2329280.jpg", "model_name": "BLIP2" }, { "question": "What is the title of the sticker on the taxi front door?", "answer": "cab driver", "gt_answers": [ "Taxi Fare" ], "image_path": "./data/STVQA/VisualGenome/1/2329280.jpg", "model_name": "BLIP2" }, { "question": "What does word is on the red sign?", "answer": "alto", "gt_answers": [ "ALTO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000559755.jpg", "model_name": "BLIP2" }, { "question": "Which street is in the 1000 direction?", "answer": "schrader", "gt_answers": [ "Schrader" ], "image_path": "./data/STVQA/VisualGenome/2/2400399.jpg", "model_name": "BLIP2" }, { "question": "which street is in the 150 direction?", "answer": "schrader", "gt_answers": [ "Carl" ], "image_path": "./data/STVQA/VisualGenome/2/2400399.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the bus?", "answer": "3", "gt_answers": [ "573" ], "image_path": "./data/STVQA/VisualGenome/1/2317389.jpg", "model_name": "BLIP2" }, { "question": "What is the letter on the player's hats?", "answer": "person", "gt_answers": [ "D", "B" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000242074.jpg", "model_name": "BLIP2" }, { "question": "What product is shown in the image?", "answer": "shea cashmere body cream", "gt_answers": [ "Body Cream" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000014404.jpg", "model_name": "BLIP2" }, { "question": "How much does the product weigh?", "answer": "1 oz", "gt_answers": [ "198 g" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000014404.jpg", "model_name": "BLIP2" }, { "question": "What is the big word on the white sign?", "answer": "insead", "gt_answers": [ "INSEAD" ], "image_path": "./data/STVQA/icdar/test_img_30.jpg", "model_name": "BLIP2" }, { "question": "What name is on the building?", "answer": "person", "gt_answers": [ "Yates Wine Lodge", "YATISS WINE LODGE" ], "image_path": "./data/STVQA/IIIT_text/168.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the business?", "answer": "frutta fresca", "gt_answers": [ "Frutta fresca" ], "image_path": "./data/STVQA/VisualGenome/1/2329380.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the building?\"", "answer": "train station", "gt_answers": [ "Garibaldi" ], "image_path": "./data/STVQA/VisualGenome/1/2359592.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the train?", "answer": "the train has no number", "gt_answers": [ "90" ], "image_path": "./data/STVQA/VisualGenome/1/2359592.jpg", "model_name": "BLIP2" }, { "question": "What brand of car is on the sign?", "answer": "saab", "gt_answers": [ "Saab" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000367335.jpg", "model_name": "BLIP2" }, { "question": "The building is for the reserve bank of which country?", "answer": "india", "gt_answers": [ "India" ], "image_path": "./data/STVQA/IIIT_text/img_000994.jpg", "model_name": "BLIP2" }, { "question": "What is the title given to this photo?", "answer": "infinite danger", "gt_answers": [ "imminent danger" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000151259.jpg", "model_name": "BLIP2" }, { "question": "Where was the photo taken?", "answer": "tokyo", "gt_answers": [ "zeros cape", "zeros - cape" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000151259.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the soccer club shown on the board?", "answer": "manchester united", "gt_answers": [ "Real Madrid CF", "RED MADRID CF" ], "image_path": "./data/STVQA/imageNet/n04149813_9247.JPEG", "model_name": "BLIP2" }, { "question": "what store in on the right?", "answer": "dillards", "gt_answers": [ "adidas" ], "image_path": "./data/STVQA/icdar/img_679.jpg", "model_name": "BLIP2" }, { "question": "What kind of fuel should only be used?", "answer": "unleaded gasoline", "gt_answers": [ "Unleaded" ], "image_path": "./data/STVQA/imageNet/n03841143_28510.JPEG", "model_name": "BLIP2" }, { "question": "What number is inside of the heart?", "answer": "one", "gt_answers": [ "350" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000103575.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the sign?", "answer": "no bicycles allowed", "gt_answers": [ "NO NEEN NEIN PAS DE VTT DANS LE HAMEAU" ], "image_path": "./data/STVQA/IIIT_text/5671.jpg", "model_name": "BLIP2" }, { "question": "Who is the manufacturer of the items?", "answer": "shanghai luoyang swimwear factory", "gt_answers": [ "Lipotes" ], "image_path": "./data/STVQA/imageNet/n02807133_8098.JPEG", "model_name": "BLIP2" }, { "question": "What website is displayed?", "answer": "com", "gt_answers": [ "alibaba.com.cn" ], "image_path": "./data/STVQA/imageNet/n02807133_8098.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the bookstore?", "answer": "oxam", "gt_answers": [ "Oxfam Bookshop" ], "image_path": "./data/STVQA/imageNet/n02871525_19356.JPEG", "model_name": "BLIP2" }, { "question": "Who are they looking to hire?", "answer": "a manager", "gt_answers": [ "Volunteers" ], "image_path": "./data/STVQA/imageNet/n02871525_19356.JPEG", "model_name": "BLIP2" }, { "question": "What is the name on the side of the bus?", "answer": "person", "gt_answers": [ "Bruckmann" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000557272.jpg", "model_name": "BLIP2" }, { "question": "What business is the man with the phone in front of?", "answer": "a restaurant", "gt_answers": [ "marine" ], "image_path": "./data/STVQA/VisualGenome/2/2409772.jpg", "model_name": "BLIP2" }, { "question": "Where is this bus going?", "answer": "to pick up passengers", "gt_answers": [ "Fullerton" ], "image_path": "./data/STVQA/VisualGenome/1/2344564.jpg", "model_name": "BLIP2" }, { "question": "What number is before the word Fullerton?", "answer": "3", "gt_answers": [ "43" ], "image_path": "./data/STVQA/VisualGenome/1/2344564.jpg", "model_name": "BLIP2" }, { "question": "What number is in the window of the bus?", "answer": "3", "gt_answers": [ "4311" ], "image_path": "./data/STVQA/VisualGenome/1/2344564.jpg", "model_name": "BLIP2" }, { "question": "Who took this photo?", "answer": "me", "gt_answers": [ "Michael Hanscom" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000578387.jpg", "model_name": "BLIP2" }, { "question": "What letters are written on the wall in red?", "answer": "tiw", "gt_answers": [ "TNW" ], "image_path": "./data/STVQA/VisualGenome/1/286032.jpg", "model_name": "BLIP2" }, { "question": "What is the car model name displayed across the top?", "answer": "ford model t road trip", "gt_answers": [ "Model T" ], "image_path": "./data/STVQA/imageNet/n03777568_17058.JPEG", "model_name": "BLIP2" }, { "question": "What does the blue text displayed across the top say?", "answer": "model t road trip", "gt_answers": [ "Road Trip" ], "image_path": "./data/STVQA/imageNet/n03777568_17058.JPEG", "model_name": "BLIP2" }, { "question": "What abbreviated letters are written on the license plate?", "answer": "rv", "gt_answers": [ "BS DUDES" ], "image_path": "./data/STVQA/imageNet/n04065272_19543.JPEG", "model_name": "BLIP2" }, { "question": "What word is printed on the red sign?", "answer": "no parking", "gt_answers": [ "Slow" ], "image_path": "./data/STVQA/icdar/test_img_5.jpg", "model_name": "BLIP2" }, { "question": "Who wrote The human fly", "answer": "robert harris", "gt_answers": [ "T.C. BOYLE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000286009.jpg", "model_name": "BLIP2" }, { "question": "What does the light blue banner read?", "answer": "person", "gt_answers": [ "Olympus" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000023015.jpg", "model_name": "BLIP2" }, { "question": "What is the worker holding?", "answer": "stop sign", "gt_answers": [ "Stop sign" ], "image_path": "./data/STVQA/VisualGenome/1/2340007.jpg", "model_name": "BLIP2" }, { "question": "What country is on the signs?", "answer": "pakistan", "gt_answers": [ "Pakistan" ], "image_path": "./data/STVQA/VisualGenome/1/2354129.jpg", "model_name": "BLIP2" }, { "question": "What does the green sign say?", "answer": "road closed", "gt_answers": [ "EASY STREET" ], "image_path": "./data/STVQA/VisualGenome/1/2315943.jpg", "model_name": "BLIP2" }, { "question": "What does the blue sign say?", "answer": "no parking", "gt_answers": [ "HEADER" ], "image_path": "./data/STVQA/VisualGenome/1/2315943.jpg", "model_name": "BLIP2" }, { "question": "What medical company is advertised on the light blue sign?", "answer": "person", "gt_answers": [ "EMPIRE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000104328.jpg", "model_name": "BLIP2" }, { "question": "What tv company is advertised on the red sign?", "answer": "person", "gt_answers": [ "bravia" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000104328.jpg", "model_name": "BLIP2" }, { "question": "What does W.B.Mason sell?", "answer": "baseball bats", "gt_answers": [ "OFFICE supplies" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000104328.jpg", "model_name": "BLIP2" }, { "question": "What word is written on the yellow tarp?", "answer": "rain", "gt_answers": [ "Dispatch" ], "image_path": "./data/STVQA/VisualGenome/1/2376935.jpg", "model_name": "BLIP2" }, { "question": "What is the sign number for Marina Bay?", "answer": "4", "gt_answers": [ "10" ], "image_path": "./data/STVQA/icdar/img_54.jpg", "model_name": "BLIP2" }, { "question": "What is the sign number for Dhoby Ghaut?", "answer": "ghaut", "gt_answers": [ "8" ], "image_path": "./data/STVQA/icdar/img_54.jpg", "model_name": "BLIP2" }, { "question": "What is the month during which this competition is taking place?", "answer": "august", "gt_answers": [ "February" ], "image_path": "./data/STVQA/imageNet/n02107574_2717.JPEG", "model_name": "BLIP2" }, { "question": "What is the year during which this competition is taking place?", "answer": "2015", "gt_answers": [ "2004" ], "image_path": "./data/STVQA/imageNet/n02107574_2717.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the club that is hosting this competition?", "answer": "swiss mountain dog club", "gt_answers": [ "Colorado Kennel Club" ], "image_path": "./data/STVQA/imageNet/n02107574_2717.JPEG", "model_name": "BLIP2" }, { "question": "What is the brand name of the right-most remote?", "answer": "person", "gt_answers": [ "telefunken" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000152070.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the bus company?", "answer": "person", "gt_answers": [ "CHI BOY", "CHI-BOY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000021253.jpg", "model_name": "BLIP2" }, { "question": "What is the building?", "answer": "a hospital", "gt_answers": [ "Graduate student center", "RICHARD A. AUHLL GRADUATE STUDENT CENTER" ], "image_path": "./data/STVQA/VisualGenome/2/136.jpg", "model_name": "BLIP2" }, { "question": "What kind of monster is it?", "answer": "gila monster", "gt_answers": [ "Gila", "GILA MONSTER" ], "image_path": "./data/STVQA/imageNet/n01692333_1235.JPEG", "model_name": "BLIP2" }, { "question": "How many calories per serving does this product have?", "answer": "100 calories per serving", "gt_answers": [ "90" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000006885.jpg", "model_name": "BLIP2" }, { "question": "What is the listed serving size on the label?", "answer": "1 cup", "gt_answers": [ "1/2 cup" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000006885.jpg", "model_name": "BLIP2" }, { "question": "What number is displayed on the black sign?", "answer": "4", "gt_answers": [ "880" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000118150.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store behind all these dipshits?", "answer": "pam", "gt_answers": [ "Pam" ], "image_path": "./data/STVQA/VisualGenome/2/2413770.jpg", "model_name": "BLIP2" }, { "question": "What number is WERTH wearing?", "answer": "person", "gt_answers": [ "28" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000013362.jpg", "model_name": "BLIP2" }, { "question": "WHAT COLOUR IS THEWALL", "answer": "yellow", "gt_answers": [ "YELLOW" ], "image_path": "./data/STVQA/VisualGenome/1/2335226.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTEN IN THE PICTURE", "answer": "it is very dirty japanese bathroom", "gt_answers": [ "IT IS VERY DIRTY JAPANESE BATHROOM", "it is very dirty Japanese bathroom." ], "image_path": "./data/STVQA/VisualGenome/1/2335226.jpg", "model_name": "BLIP2" }, { "question": "What is the abbreviated day of the week?", "answer": "d", "gt_answers": [ "TUE" ], "image_path": "./data/STVQA/imageNet/n03706229_5078.JPEG", "model_name": "BLIP2" }, { "question": "Who is the maker of the watch?", "answer": "casio", "gt_answers": [ "CASIO" ], "image_path": "./data/STVQA/imageNet/n03706229_5078.JPEG", "model_name": "BLIP2" }, { "question": "Which word appears on the yellow sign on the left?", "answer": "person", "gt_answers": [ "Square" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000236224.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the avenue on the bottom street sign?", "answer": "purdue avenue", "gt_answers": [ "Purdue", "PURDUE AVE" ], "image_path": "./data/STVQA/VisualGenome/1/2366148.jpg", "model_name": "BLIP2" }, { "question": "What is the route displayed at the top of the bus?", "answer": "the route is displayed on the bus", "gt_answers": [ "L SHUTTLE" ], "image_path": "./data/STVQA/VisualGenome/1/2360164.jpg", "model_name": "BLIP2" }, { "question": "What is the first name written at the bottom?", "answer": "mario", "gt_answers": [ "Maria" ], "image_path": "./data/STVQA/VisualGenome/1/2372381.jpg", "model_name": "BLIP2" }, { "question": "What type of ovens are in the store?", "answer": "pizza ovens", "gt_answers": [ "pizza" ], "image_path": "./data/STVQA/VisualGenome/1/2365884.jpg", "model_name": "BLIP2" }, { "question": "what brand of appliances are available?", "answer": "kenmore", "gt_answers": [ "bari" ], "image_path": "./data/STVQA/VisualGenome/1/2365884.jpg", "model_name": "BLIP2" }, { "question": "What cartoon character is on the mug?", "answer": "peanuts", "gt_answers": [ "Snoopy" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000458748.jpg", "model_name": "BLIP2" }, { "question": "what you seen text in scat board stage?", "answer": "scat board stage", "gt_answers": [ "STANCE" ], "image_path": "./data/STVQA/VisualGenome/1/713863.jpg", "model_name": "BLIP2" }, { "question": "What movie is advertised on the bus?", "answer": "star wars", "gt_answers": [ "Final Destination" ], "image_path": "./data/STVQA/VisualGenome/1/2342146.jpg", "model_name": "BLIP2" }, { "question": "What is advertised below the clock?", "answer": "a sailboat", "gt_answers": [ "Colgate" ], "image_path": "./data/STVQA/VisualGenome/1/2318221.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the player in #22 Jersey?", "answer": "person", "gt_answers": [ "CHAMBLESS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000535669.jpg", "model_name": "BLIP2" }, { "question": "What does the warning sign ask you to do?", "answer": "slow down", "gt_answers": [ "slow down" ], "image_path": "./data/STVQA/VisualGenome/1/2328097.jpg", "model_name": "BLIP2" }, { "question": "How much euors are the Pendant watches?", "answer": "200 eu", "gt_answers": [ "six" ], "image_path": "./data/STVQA/IIIT_text/3107.jpg", "model_name": "BLIP2" }, { "question": "what does the bigger sign say?", "answer": "no photography", "gt_answers": [ "no photos" ], "image_path": "./data/STVQA/IIIT_text/3107.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the building", "answer": "super 8 motel", "gt_answers": [ "Super 8 Motel" ], "image_path": "./data/STVQA/IIIT_text/img_000823.jpg", "model_name": "BLIP2" }, { "question": "Who won?", "answer": "person", "gt_answers": [ "YOU" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000308180.jpg", "model_name": "BLIP2" }, { "question": "What is the yellow word on the red sign?", "answer": "person", "gt_answers": [ "War" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000559235.jpg", "model_name": "BLIP2" }, { "question": "What is the first word in the website tag?", "answer": "rv", "gt_answers": [ "Motorhomes" ], "image_path": "./data/STVQA/imageNet/n04065272_27360.JPEG", "model_name": "BLIP2" }, { "question": "What vintage poster is on the wall?", "answer": "person", "gt_answers": [ "Coca Cola" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000109145.jpg", "model_name": "BLIP2" }, { "question": "what website do i go to?", "answer": "youtube", "gt_answers": [ "creativecommons.org" ], "image_path": "./data/STVQA/imageNet/n02787622_5276.JPEG", "model_name": "BLIP2" }, { "question": "What does the banjo label say?", "answer": "some rights reserved", "gt_answers": [ "Some rights reserved" ], "image_path": "./data/STVQA/imageNet/n02787622_5276.JPEG", "model_name": "BLIP2" }, { "question": "What number is the needle pointing closest to?", "answer": "12", "gt_answers": [ "1000" ], "image_path": "./data/STVQA/imageNet/n02794156_872.JPEG", "model_name": "BLIP2" }, { "question": "What is the price listed on the \"Special Offer\" sign?", "answer": "\u00a3", "gt_answers": [ "$4.99" ], "image_path": "./data/STVQA/icdar/img_962.jpg", "model_name": "BLIP2" }, { "question": "what type of bra is advertised?", "answer": "strapless", "gt_answers": [ "strapless" ], "image_path": "./data/STVQA/imageNet/n02892767_18140.JPEG", "model_name": "BLIP2" }, { "question": "What kind of bra is shown?", "answer": "strapless", "gt_answers": [ "strapless" ], "image_path": "./data/STVQA/imageNet/n02892767_18140.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the street?", "answer": "person", "gt_answers": [ "Wonder View Dr" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000455010.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the street sign?", "answer": "2", "gt_answers": [ "3500" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000455010.jpg", "model_name": "BLIP2" }, { "question": "What is printed on the bottom of the snowboard?", "answer": "person", "gt_answers": [ "FORUM" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000005916.jpg", "model_name": "BLIP2" }, { "question": "How many vehicles are in this picture?", "answer": "three", "gt_answers": [ "5" ], "image_path": "./data/STVQA/IIIT_text/img_001182.jpg", "model_name": "BLIP2" }, { "question": "What amusement park is written above the large sphere?", "answer": "epcot", "gt_answers": [ "Epcot" ], "image_path": "./data/STVQA/imageNet/n03388043_788.JPEG", "model_name": "BLIP2" }, { "question": "What word is next to the arrow pointing left?", "answer": "ski", "gt_answers": [ "Whitetail" ], "image_path": "./data/STVQA/VisualGenome/2/2405121.jpg", "model_name": "BLIP2" }, { "question": "What word is next to the arrow pointing right?", "answer": "ski", "gt_answers": [ "Redhawk" ], "image_path": "./data/STVQA/VisualGenome/2/2405121.jpg", "model_name": "BLIP2" }, { "question": "What is the first word after the box that is marked?", "answer": "guest", "gt_answers": [ "Incredible" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000015002.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the backpack?", "answer": "person", "gt_answers": [ "JONAS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000298692.jpg", "model_name": "BLIP2" }, { "question": "What word is located on the bus tire?", "answer": "person", "gt_answers": [ "GOOD" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000298692.jpg", "model_name": "BLIP2" }, { "question": "What is the phone number for the food truck?", "answer": "888-deli-america", "gt_answers": [ "571-458-9732" ], "image_path": "./data/STVQA/VisualGenome/1/2368432.jpg", "model_name": "BLIP2" }, { "question": "What type of food is served from the food truck?", "answer": "fast food", "gt_answers": [ "American" ], "image_path": "./data/STVQA/VisualGenome/1/2368432.jpg", "model_name": "BLIP2" }, { "question": "what is number on white shirt", "answer": "person", "gt_answers": [ "41" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000278710.jpg", "model_name": "BLIP2" }, { "question": "How many sides are on the STOP sign", "answer": "two", "gt_answers": [ "8" ], "image_path": "./data/STVQA/VisualGenome/1/2331442.jpg", "model_name": "BLIP2" }, { "question": "What brand is depicted here?", "answer": "coca cola", "gt_answers": [ "Coca Cola", "COCA-COLA" ], "image_path": "./data/STVQA/IIIT_text/img_000325.jpg", "model_name": "BLIP2" }, { "question": "What percentage of your daily fiber needs does this food supply?", "answer": "9 %", "gt_answers": [ "46%" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000560.jpg", "model_name": "BLIP2" }, { "question": "What country is the airline from?", "answer": "japan", "gt_answers": [ "JAPAN" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000559438.jpg", "model_name": "BLIP2" }, { "question": "Which company is this plane working for?", "answer": "japan airlines", "gt_answers": [ "JAPAN airlines" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000559438.jpg", "model_name": "BLIP2" }, { "question": "What word is in the red bird on the planes tail?", "answer": "japan", "gt_answers": [ "jal" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000559438.jpg", "model_name": "BLIP2" }, { "question": "What do the words say on the front of the bus in green?", "answer": "sydney sightseeing", "gt_answers": [ "Not in service" ], "image_path": "./data/STVQA/VisualGenome/1/2359579.jpg", "model_name": "BLIP2" }, { "question": "How many wheels of the bus are visible?", "answer": "four", "gt_answers": [ "Three" ], "image_path": "./data/STVQA/VisualGenome/1/2359579.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name on the egg carton?", "answer": "person", "gt_answers": [ "OUR FARM" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000338988.jpg", "model_name": "BLIP2" }, { "question": "How many eggs are in the egg carton as shown on its lid?", "answer": "12", "gt_answers": [ "ONE DOZEN" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000338988.jpg", "model_name": "BLIP2" }, { "question": "What is the black train's number?", "answer": "no number", "gt_answers": [ "73", "SEVENTY THREE" ], "image_path": "./data/STVQA/VisualGenome/1/2348294.jpg", "model_name": "BLIP2" }, { "question": "What is mini?", "answer": "dog", "gt_answers": [ "STORAGE" ], "image_path": "./data/STVQA/VisualGenome/1/2369089.jpg", "model_name": "BLIP2" }, { "question": "Which word is shown at the top of the watch?", "answer": "protection", "gt_answers": [ "Protection" ], "image_path": "./data/STVQA/imageNet/n03197337_4275.JPEG", "model_name": "BLIP2" }, { "question": "What time is shown on the watch?", "answer": "digital time", "gt_answers": [ "13:01:14", "13:01 14" ], "image_path": "./data/STVQA/imageNet/n03197337_4275.JPEG", "model_name": "BLIP2" }, { "question": "What is the date shown on the watch?", "answer": "the date is shown on the dial", "gt_answers": [ "8-24", "FR 8-24" ], "image_path": "./data/STVQA/imageNet/n03197337_4275.JPEG", "model_name": "BLIP2" }, { "question": "What company makes this product?", "answer": "the william hill company", "gt_answers": [ "MELBOURN BR" ], "image_path": "./data/STVQA/imageNet/n02823428_1695.JPEG", "model_name": "BLIP2" }, { "question": "What flavor is this product?", "answer": "apple", "gt_answers": [ "APRICOT" ], "image_path": "./data/STVQA/imageNet/n02823428_1695.JPEG", "model_name": "BLIP2" }, { "question": "What does the white banner say?", "answer": "dominion", "gt_answers": [ "Stage Sponsor" ], "image_path": "./data/STVQA/imageNet/n03721384_9895.JPEG", "model_name": "BLIP2" }, { "question": "What does the blue banner say?", "answer": "dominion", "gt_answers": [ "Dominion It all starts here." ], "image_path": "./data/STVQA/imageNet/n03721384_9895.JPEG", "model_name": "BLIP2" }, { "question": "What is the company?", "answer": "tipper's", "gt_answers": [ "Tippers" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000102004.jpg", "model_name": "BLIP2" }, { "question": "What locations are shown in the upper left portion of the picture?", "answer": "paris france", "gt_answers": [ "PARIS VINCENNES" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000566839.jpg", "model_name": "BLIP2" }, { "question": "What other URL is shown on the banner with the blue background in the upper left portion of the photo?", "answer": "person", "gt_answers": [ "CASAQLIGUE.FR" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000566839.jpg", "model_name": "BLIP2" }, { "question": "What is available for purchase in the building across the street?", "answer": "bikes", "gt_answers": [ "FOOD" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000102506.jpg", "model_name": "BLIP2" }, { "question": "What is the model number of the street light?", "answer": "the model number of the street light is sml", "gt_answers": [ "K1" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000102506.jpg", "model_name": "BLIP2" }, { "question": "What does the octagonal sign direct you to do?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2359907.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the business that operates out of this building?", "answer": "a restaurant", "gt_answers": [ "La Piramibe" ], "image_path": "./data/STVQA/IIIT_text/4912.jpg", "model_name": "BLIP2" }, { "question": "What is the last name of the photographer who shot this photograph?", "answer": "person", "gt_answers": [ "Versaci" ], "image_path": "./data/STVQA/IIIT_text/4912.jpg", "model_name": "BLIP2" }, { "question": "What company branded this toy?", "answer": "toyota", "gt_answers": [ "Mac Tools", "MAL TOOLS" ], "image_path": "./data/STVQA/imageNet/n02966687_4242.JPEG", "model_name": "BLIP2" }, { "question": "What dish is the sauce for?", "answer": "pizza", "gt_answers": [ "pizza" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000381945.jpg", "model_name": "BLIP2" }, { "question": "What is the first word of the brand name of the can?", "answer": "person", "gt_answers": [ "muir" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000381945.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the red sign?", "answer": "ferrari", "gt_answers": [ "Santander" ], "image_path": "./data/STVQA/VisualGenome/2/2417794.jpg", "model_name": "BLIP2" }, { "question": "How many cats are there?", "answer": "two", "gt_answers": [ "2" ], "image_path": "./data/STVQA/VisualGenome/1/2343208.jpg", "model_name": "BLIP2" }, { "question": "What does the text say?", "answer": "a view of a river and mountains", "gt_answers": [ "HoangLong photo" ], "image_path": "./data/STVQA/IIIT_text/5590.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the bus' tour company?", "answer": "person", "gt_answers": [ "big bus tours" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000388935.jpg", "model_name": "BLIP2" }, { "question": "Where does the tour take place?", "answer": "in london", "gt_answers": [ "london" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000388935.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company in the building on the right", "answer": "adobe", "gt_answers": [ "Topshelf Clothing" ], "image_path": "./data/STVQA/IIIT_text/2711.jpg", "model_name": "BLIP2" }, { "question": "What type of business is North Moore?", "answer": "industrial", "gt_answers": [ "Catering" ], "image_path": "./data/STVQA/IIIT_text/2711.jpg", "model_name": "BLIP2" }, { "question": "What team does the boys with yellow shirts play for?", "answer": "person", "gt_answers": [ "ISB" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000447292.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the inn?", "answer": "the fox", "gt_answers": [ "The Pickerel", "THE PICKEREL INN" ], "image_path": "./data/STVQA/VisualGenome/2/2851.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the side of the bus?", "answer": "person", "gt_answers": [ "tindo" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000293074.jpg", "model_name": "BLIP2" }, { "question": "What is the watermark written on the lower left hand corner of the image?", "answer": "a city", "gt_answers": [ "AMZ Photography" ], "image_path": "./data/STVQA/VisualGenome/1/2374854.jpg", "model_name": "BLIP2" }, { "question": "Q: What brand of jeans is the skater wearing? A: element", "answer": "long answer a element", "gt_answers": [ "element" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000162654.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name of this product?", "answer": "therapy systems emergency treatment cream", "gt_answers": [ "Therapy Systems" ], "image_path": "./data/STVQA/imageNet/n04357314_1828.JPEG", "model_name": "BLIP2" }, { "question": "What is written in red in the black ring around the bottom?", "answer": "therapy systems emergency treatment cream", "gt_answers": [ "Emergency Treatment Cream" ], "image_path": "./data/STVQA/imageNet/n04357314_1828.JPEG", "model_name": "BLIP2" }, { "question": "what is the sign of board reference?", "answer": "stop look and listen", "gt_answers": [ "railroad crossing stop look and listen" ], "image_path": "./data/STVQA/VisualGenome/1/2352003.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the boat", "answer": "the name of the boat", "gt_answers": [ "Miss Grace" ], "image_path": "./data/STVQA/IIIT_text/965.jpg", "model_name": "BLIP2" }, { "question": "What six numbers are visible?", "answer": "the train number", "gt_answers": [ "222003" ], "image_path": "./data/STVQA/VisualGenome/1/2316423.jpg", "model_name": "BLIP2" }, { "question": "Where is the train headed to?", "answer": "a city", "gt_answers": [ "Sheffield" ], "image_path": "./data/STVQA/VisualGenome/1/2316423.jpg", "model_name": "BLIP2" }, { "question": "what word is printed on the from of the dirt bike?", "answer": "person", "gt_answers": [ "CARAS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000329469.jpg", "model_name": "BLIP2" }, { "question": "What sports team is the boy's t-shirt advertising?", "answer": "person", "gt_answers": [ "Miami Eagles" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000443082.jpg", "model_name": "BLIP2" }, { "question": "What does the tail of this airliner have on it?", "answer": "american airlines", "gt_answers": [ "AA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000281262.jpg", "model_name": "BLIP2" }, { "question": "What does it say in the tower?", "answer": "train station", "gt_answers": [ "MUNCHEN HBF", "MUNCHEN-HBF" ], "image_path": "./data/STVQA/VisualGenome/1/2375817.jpg", "model_name": "BLIP2" }, { "question": "What brand is the crockpot?", "answer": "crock pot", "gt_answers": [ "Rival" ], "image_path": "./data/STVQA/imageNet/n03133878_1242.JPEG", "model_name": "BLIP2" }, { "question": "What is the first word after the copyright symbol?", "answer": "coral", "gt_answers": [ "Jeff" ], "image_path": "./data/STVQA/imageNet/n02607072_5165.JPEG", "model_name": "BLIP2" }, { "question": "What is the second word after the copyright symbol?", "answer": "coral", "gt_answers": [ "Williams" ], "image_path": "./data/STVQA/imageNet/n02607072_5165.JPEG", "model_name": "BLIP2" }, { "question": "What is the full URL listed?", "answer": "http://www", "gt_answers": [ "JeffWilliamsNeptunesArt.com" ], "image_path": "./data/STVQA/imageNet/n02607072_5165.JPEG", "model_name": "BLIP2" }, { "question": "How many tracks are at the railway crossing?", "answer": "two", "gt_answers": [ "2 tracks", "2" ], "image_path": "./data/STVQA/VisualGenome/1/2352606.jpg", "model_name": "BLIP2" }, { "question": "What is the beand written in the front of the locomotive?", "answer": "amtrak", "gt_answers": [ "metronom" ], "image_path": "./data/STVQA/VisualGenome/1/2370475.jpg", "model_name": "BLIP2" }, { "question": "What is the number written on the locomotive?", "answer": "nr", "gt_answers": [ "146 531-9" ], "image_path": "./data/STVQA/VisualGenome/1/2370475.jpg", "model_name": "BLIP2" }, { "question": "Whats the name of the hotel?", "answer": "galaxy hotel", "gt_answers": [ "Galaxy", "Galaxy Hotel" ], "image_path": "./data/STVQA/IIIT_text/img_000465.jpg", "model_name": "BLIP2" }, { "question": "What does the shirt say?", "answer": "person", "gt_answers": [ "SURF", "SURF THE FALL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000369362.jpg", "model_name": "BLIP2" }, { "question": "What company took the picture?", "answer": "scott scott photography", "gt_answers": [ "Purple Martini", "PURPLE MARTINI WEDDING PHOTOGRAPHY" ], "image_path": "./data/STVQA/IIIT_text/7507.jpg", "model_name": "BLIP2" }, { "question": "What kind of photography does Purple Martini do?", "answer": "wedding photography", "gt_answers": [ "Wedding Photography", "WEDDING" ], "image_path": "./data/STVQA/IIIT_text/7507.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "carousel", "gt_answers": [ "Tickets" ], "image_path": "./data/STVQA/imageNet/n02966193_31844.JPEG", "model_name": "BLIP2" }, { "question": "What clothing company typed on the woman's shirt?", "answer": "american apparel", "gt_answers": [ "VANS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000418185.jpg", "model_name": "BLIP2" }, { "question": "What is written on the top sign?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2334638.jpg", "model_name": "BLIP2" }, { "question": "What is written on the bottom sign?", "answer": "no parking", "gt_answers": [ "OPPOSING TRAFFIC DOES NOT STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2334638.jpg", "model_name": "BLIP2" }, { "question": "What is being sold at the counter?", "answer": "meat", "gt_answers": [ "Meat" ], "image_path": "./data/STVQA/icdar/img_963.jpg", "model_name": "BLIP2" }, { "question": "What is written on the building on the right?", "answer": "surf in the city", "gt_answers": [ "UBS" ], "image_path": "./data/STVQA/VisualGenome/1/1592782.jpg", "model_name": "BLIP2" }, { "question": "What color is the stripe on the blue shirts/", "answer": "white", "gt_answers": [ "white", "{}" ], "image_path": "./data/STVQA/VisualGenome/2/2406498.jpg", "model_name": "BLIP2" }, { "question": "How many players are there?", "answer": "three", "gt_answers": [ "three", "{}" ], "image_path": "./data/STVQA/VisualGenome/2/2406498.jpg", "model_name": "BLIP2" }, { "question": "What is the top word on the sign?", "answer": "place", "gt_answers": [ "Place" ], "image_path": "./data/STVQA/VisualGenome/1/2362798.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the side of the train?", "answer": "c40-8", "gt_answers": [ "8446" ], "image_path": "./data/STVQA/VisualGenome/1/2325228.jpg", "model_name": "BLIP2" }, { "question": "What does the white sign say on the motorcycle?", "answer": "it says vintage motorcycle", "gt_answers": [ "335" ], "image_path": "./data/STVQA/VisualGenome/1/2330992.jpg", "model_name": "BLIP2" }, { "question": "What does the black sign say on the motorcycle?", "answer": "i am a motorcycle", "gt_answers": [ "FH2949" ], "image_path": "./data/STVQA/VisualGenome/1/2330992.jpg", "model_name": "BLIP2" }, { "question": "What color is the stop light?", "answer": "red", "gt_answers": [ "Red" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000120077.jpg", "model_name": "BLIP2" }, { "question": "What does the green banner say?", "answer": "dream", "gt_answers": [ "ITALIAN ICE" ], "image_path": "./data/STVQA/VisualGenome/2/2412365.jpg", "model_name": "BLIP2" }, { "question": "What does the beige sign say?", "answer": "dream", "gt_answers": [ "Cheese Dream" ], "image_path": "./data/STVQA/VisualGenome/2/2412365.jpg", "model_name": "BLIP2" }, { "question": "What is the street name?", "answer": "elm street", "gt_answers": [ "22 3/4 ST" ], "image_path": "./data/STVQA/VisualGenome/1/2334344.jpg", "model_name": "BLIP2" }, { "question": "Which is the avenue?", "answer": "the avenue", "gt_answers": [ "It is 8th Ave/CTH D.", "8TH AVE" ], "image_path": "./data/STVQA/VisualGenome/1/2334344.jpg", "model_name": "BLIP2" }, { "question": "What is the name of this Barleywine?", "answer": "hamilton spot", "gt_answers": [ "TORS HAMMER" ], "image_path": "./data/STVQA/VisualGenome/1/2336698.jpg", "model_name": "BLIP2" }, { "question": "What is listed on the tail of the bottom plane?", "answer": "space shuttle", "gt_answers": [ "NASA" ], "image_path": "./data/STVQA/VisualGenome/1/2333654.jpg", "model_name": "BLIP2" }, { "question": "What does the red sign say?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2358198.jpg", "model_name": "BLIP2" }, { "question": "what is written on the lable", "answer": "a banana", "gt_answers": [ "ALVIN" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000576517.jpg", "model_name": "BLIP2" }, { "question": "What is the ear tag number of this cow?", "answer": "person", "gt_answers": [ "9420" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000305431.jpg", "model_name": "BLIP2" }, { "question": "What is on the back of the plane?", "answer": "space shuttle", "gt_answers": [ "NASA" ], "image_path": "./data/STVQA/VisualGenome/2/2416592.jpg", "model_name": "BLIP2" }, { "question": "What is this function showcaseing?", "answer": "a decorated toilet", "gt_answers": [ "Decorated Thrones" ], "image_path": "./data/STVQA/VisualGenome/2/2417795.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name on the projector?", "answer": "sony", "gt_answers": [ "BENQ" ], "image_path": "./data/STVQA/imageNet/n04009552_13027.JPEG", "model_name": "BLIP2" }, { "question": "What number is on the bus", "answer": "san francisco bay area transit", "gt_answers": [ "1955" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000262442.jpg", "model_name": "BLIP2" }, { "question": "What is the bus location route", "answer": "the bus is in downtown los angeles", "gt_answers": [ "Old Pasadena", "PASADENA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000262442.jpg", "model_name": "BLIP2" }, { "question": "What brand of shoes are shown on the sign?", "answer": "nike", "gt_answers": [ "converse" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000521652.jpg", "model_name": "BLIP2" }, { "question": "Who took this photograph?", "answer": "julian photography", "gt_answers": [ "Jason Milich" ], "image_path": "./data/STVQA/VisualGenome/1/2338756.jpg", "model_name": "BLIP2" }, { "question": "What is written at the bottom of the image?", "answer": "julian photography", "gt_answers": [ "Jason Milich Photography" ], "image_path": "./data/STVQA/VisualGenome/1/2338756.jpg", "model_name": "BLIP2" }, { "question": "What number is visible on the digital display?", "answer": "fahrenheit or celsius", "gt_answers": [ "4" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000001370.jpg", "model_name": "BLIP2" }, { "question": "What is the switch currently set on?", "answer": "on", "gt_answers": [ "auto" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000001370.jpg", "model_name": "BLIP2" }, { "question": "What word appears below the word 'room'?", "answer": "temperature", "gt_answers": [ "set" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000001370.jpg", "model_name": "BLIP2" }, { "question": "Who is the manufacturer of the box of paper?", "answer": "person", "gt_answers": [ "xerox" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000146700.jpg", "model_name": "BLIP2" }, { "question": "What store is in the image?", "answer": "tiffany and co", "gt_answers": [ "Asier Spring", "ASTER SPRING" ], "image_path": "./data/STVQA/icdar/img_379.jpg", "model_name": "BLIP2" }, { "question": "Which is the number written on the t-shirt?", "answer": "2", "gt_answers": [ "5" ], "image_path": "./data/STVQA/VisualGenome/1/2355245.jpg", "model_name": "BLIP2" }, { "question": "What tow numbers come after 50?", "answer": "60 and 70", "gt_answers": [ "15" ], "image_path": "./data/STVQA/VisualGenome/1/2348045.jpg", "model_name": "BLIP2" }, { "question": "What number is on the man's jersey?", "answer": "person", "gt_answers": [ "3" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000236186.jpg", "model_name": "BLIP2" }, { "question": "What type of bus is shown?", "answer": "school bus", "gt_answers": [ "School Bus" ], "image_path": "./data/STVQA/VisualGenome/1/2337678.jpg", "model_name": "BLIP2" }, { "question": "What is the tooth brush brand?", "answer": "crest", "gt_answers": [ "Oral-B" ], "image_path": "./data/STVQA/VisualGenome/1/2369071.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the product in the green container?", "answer": "activi", "gt_answers": [ "activia" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000105643.jpg", "model_name": "BLIP2" }, { "question": "What watermark is displayed?", "answer": "person", "gt_answers": [ "austin woods" ], "image_path": "./data/STVQA/imageNet/n02870880_117.JPEG", "model_name": "BLIP2" }, { "question": "What zone is in the green circle?", "answer": "the green circle is zone 1", "gt_answers": [ "1" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000491440.jpg", "model_name": "BLIP2" }, { "question": "What stop is depicted?", "answer": "wharf street", "gt_answers": [ "156" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000491440.jpg", "model_name": "BLIP2" }, { "question": "What kind of building is the right building?", "answer": "apartment building", "gt_answers": [ "Hotel" ], "image_path": "./data/STVQA/VisualGenome/2/2688.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the back of the bus?", "answer": "person", "gt_answers": [ "200" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000121535.jpg", "model_name": "BLIP2" }, { "question": "What is the first word written at the bottom of the image?", "answer": "person", "gt_answers": [ "Peter" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000520765.jpg", "model_name": "BLIP2" }, { "question": "What phrase is printed on the garbage container?", "answer": "archaeology is rubbish", "gt_answers": [ "ARCHAEOLOGY IS RUBBISH" ], "image_path": "./data/STVQA/imageNet/n02747177_2230.JPEG", "model_name": "BLIP2" }, { "question": "What is the quote on the trash bin?", "answer": "archaeology is rubbish", "gt_answers": [ "Archaeology is rubbish" ], "image_path": "./data/STVQA/imageNet/n02747177_2230.JPEG", "model_name": "BLIP2" }, { "question": "What time does the clock display?", "answer": "it displays the time", "gt_answers": [ "11:38" ], "image_path": "./data/STVQA/imageNet/n03196217_8034.JPEG", "model_name": "BLIP2" }, { "question": "what is the name of vehicle?", "answer": "tow truck", "gt_answers": [ "HARRIS'S AMUSE MENTS", "HARRIS'S AMUSEMENTS" ], "image_path": "./data/STVQA/VisualGenome/1/2332432.jpg", "model_name": "BLIP2" }, { "question": "What bold word is dark blue on the blue bag?", "answer": "advance", "gt_answers": [ "ADVANCE" ], "image_path": "./data/STVQA/imageNet/n02096294_1019.JPEG", "model_name": "BLIP2" }, { "question": "What letters are in the middle of the globe?", "answer": "advance", "gt_answers": [ "FCI" ], "image_path": "./data/STVQA/imageNet/n02096294_1019.JPEG", "model_name": "BLIP2" }, { "question": "What is the cost of VERMUT?", "answer": "it is sold by the litre", "gt_answers": [ "3,70" ], "image_path": "./data/STVQA/VisualGenome/2/4573.jpg", "model_name": "BLIP2" }, { "question": "What is the cost of VI MISSA?", "answer": "the price of a bottle of vi missa varies depending on the vintage", "gt_answers": [ "260" ], "image_path": "./data/STVQA/VisualGenome/2/4573.jpg", "model_name": "BLIP2" }, { "question": "What is the cost of BLANC?", "answer": "the cost of blanc varies depending on where you buy it", "gt_answers": [ "155" ], "image_path": "./data/STVQA/VisualGenome/2/4573.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the planes/", "answer": "biplane", "gt_answers": [ "Breitling" ], "image_path": "./data/STVQA/VisualGenome/1/2359542.jpg", "model_name": "BLIP2" }, { "question": "where is the stop sign?", "answer": "in front of the building", "gt_answers": [ "war" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000308579.jpg", "model_name": "BLIP2" }, { "question": "What the yellow words say?", "answer": "the beach", "gt_answers": [ "adi home stay" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000170464.jpg", "model_name": "BLIP2" }, { "question": "What is printed on the left side of the cup?", "answer": "coke", "gt_answers": [ "Coke" ], "image_path": "./data/STVQA/VisualGenome/1/2352647.jpg", "model_name": "BLIP2" }, { "question": "What is printed on the right side of the cup?", "answer": "coke", "gt_answers": [ "Coca-Cola" ], "image_path": "./data/STVQA/VisualGenome/1/2352647.jpg", "model_name": "BLIP2" }, { "question": "What is written on the runway marker?", "answer": "person", "gt_answers": [ "B 09-27" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000424481.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the library?", "answer": "sydney central library", "gt_answers": [ "Cockburn" ], "image_path": "./data/STVQA/VisualGenome/1/2343018.jpg", "model_name": "BLIP2" }, { "question": "What does the traffic sign say?", "answer": "dc law", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2322004.jpg", "model_name": "BLIP2" }, { "question": "What is written on the front of the shirt?", "answer": "person", "gt_answers": [ "Marine" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000581181.jpg", "model_name": "BLIP2" }, { "question": "What does the sign with the arrow read?", "answer": "one day", "gt_answers": [ "One Day" ], "image_path": "./data/STVQA/VisualGenome/1/2316646.jpg", "model_name": "BLIP2" }, { "question": "What number is the higher jumper wearing?", "answer": "two", "gt_answers": [ "4" ], "image_path": "./data/STVQA/imageNet/n04540053_7860.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the airplane?", "answer": "san diego", "gt_answers": [ "LY-LPG" ], "image_path": "./data/STVQA/VisualGenome/1/2331856.jpg", "model_name": "BLIP2" }, { "question": "What letter is on the side of the black box?", "answer": "s", "gt_answers": [ "P" ], "image_path": "./data/STVQA/imageNet/n03891332_3736.JPEG", "model_name": "BLIP2" }, { "question": "What is the second word written at the bottom of the image?", "answer": "person", "gt_answers": [ "quyen", "ouyen" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000505963.jpg", "model_name": "BLIP2" }, { "question": "What does the billboard says that starts with letter V", "answer": "vietnam tourism", "gt_answers": [ "VIECHA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000044017.jpg", "model_name": "BLIP2" }, { "question": "What street was this image taken near?", "answer": "greenock cut", "gt_answers": [ "Patrick Street" ], "image_path": "./data/STVQA/VisualGenome/2/2416621.jpg", "model_name": "BLIP2" }, { "question": "What is the speed limit?", "answer": "35", "gt_answers": [ "35" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000425439.jpg", "model_name": "BLIP2" }, { "question": "What is the name of this street?", "answer": "paris avenue", "gt_answers": [ "paris Ave" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000171082.jpg", "model_name": "BLIP2" }, { "question": "What is the traffic advice on the sign?", "answer": "no through road", "gt_answers": [ "no Through road" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000171082.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE WHITE BUS NUMNER/", "answer": "the white bus number/ short answer", "gt_answers": [ "H-192" ], "image_path": "./data/STVQA/VisualGenome/1/2334295.jpg", "model_name": "BLIP2" }, { "question": "HOW MANY PEOPLE ARE STANDING OUTSIDE THE BUS?", "answer": "three", "gt_answers": [ "3" ], "image_path": "./data/STVQA/VisualGenome/1/2334295.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTEN IN THE FRONT OF WHITE BUS?", "answer": "honda", "gt_answers": [ "HOTARD" ], "image_path": "./data/STVQA/VisualGenome/1/2334295.jpg", "model_name": "BLIP2" }, { "question": "How many mL can this container measure?", "answer": "100 ml", "gt_answers": [ "1000 mL", "1000" ], "image_path": "./data/STVQA/imageNet/n02815834_1187.JPEG", "model_name": "BLIP2" }, { "question": "What number is written on the bat?", "answer": "person", "gt_answers": [ "113" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000550478.jpg", "model_name": "BLIP2" }, { "question": "What board game is being played?", "answer": "person", "gt_answers": [ "Sequence" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000041252.jpg", "model_name": "BLIP2" }, { "question": "What is the name of this shop?", "answer": "starbucks coffee", "gt_answers": [ "Starbucks Coffee" ], "image_path": "./data/STVQA/IIIT_text/img_000350.jpg", "model_name": "BLIP2" }, { "question": "What is this building called?", "answer": "new york stock exchange", "gt_answers": [ "NEW YORK STOCK EXCHANGE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000102899.jpg", "model_name": "BLIP2" }, { "question": "What store is on the corner?", "answer": "city sports", "gt_answers": [ "City Sports" ], "image_path": "./data/STVQA/VisualGenome/2/1863.jpg", "model_name": "BLIP2" }, { "question": "What school is nearby?", "answer": "city sports", "gt_answers": [ "Lesley University" ], "image_path": "./data/STVQA/VisualGenome/2/1863.jpg", "model_name": "BLIP2" }, { "question": "What is the text on the license plate?", "answer": "san diego", "gt_answers": [ "2DTW337" ], "image_path": "./data/STVQA/VisualGenome/1/2358487.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the hotel?", "answer": "wynn hotel", "gt_answers": [ "CONGRESS HOTEL" ], "image_path": "./data/STVQA/VisualGenome/1/2372405.jpg", "model_name": "BLIP2" }, { "question": "What does the octagonal red sign say?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2336104.jpg", "model_name": "BLIP2" }, { "question": "What type of car is passing by?", "answer": "taxi", "gt_answers": [ "NYC TAXI", "taxi" ], "image_path": "./data/STVQA/VisualGenome/2/2416422.jpg", "model_name": "BLIP2" }, { "question": "What time is it?", "answer": "the lock screen", "gt_answers": [ "21:12" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019505.jpg", "model_name": "BLIP2" }, { "question": "What day of the week is it?", "answer": "it's monday", "gt_answers": [ "Saturday" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019505.jpg", "model_name": "BLIP2" }, { "question": "What percentage is the battery at?", "answer": "it's at 0 percent", "gt_answers": [ "77%" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019505.jpg", "model_name": "BLIP2" }, { "question": "What part of the military does this plane belong to?", "answer": "the royal air force", "gt_answers": [ "ARMY" ], "image_path": "./data/STVQA/VisualGenome/2/892.jpg", "model_name": "BLIP2" }, { "question": "What letter is in the center of the plane?", "answer": "b", "gt_answers": [ "S" ], "image_path": "./data/STVQA/VisualGenome/2/892.jpg", "model_name": "BLIP2" }, { "question": "What is the website written on the bottom left corner?", "answer": "youtube", "gt_answers": [ "FreakingNews.com" ], "image_path": "./data/STVQA/imageNet/n02123159_9882.JPEG", "model_name": "BLIP2" }, { "question": "How much is the Tavuk Doner on the food cart?", "answer": "10 lira", "gt_answers": [ "2", "2.00 TL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000019236.jpg", "model_name": "BLIP2" }, { "question": "What word is below the eggplant in the image?", "answer": "person", "gt_answers": [ "Sev" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000019236.jpg", "model_name": "BLIP2" }, { "question": "What number is on the horse rider?", "answer": "4", "gt_answers": [ "853" ], "image_path": "./data/STVQA/VisualGenome/1/2343933.jpg", "model_name": "BLIP2" }, { "question": "What is the website on the wall?", "answer": "com", "gt_answers": [ "aps.com" ], "image_path": "./data/STVQA/VisualGenome/1/2343933.jpg", "model_name": "BLIP2" }, { "question": "What country is the manual from?", "answer": "australia", "gt_answers": [ "Japan" ], "image_path": "./data/STVQA/imageNet/n04238763_22502.JPEG", "model_name": "BLIP2" }, { "question": "What date is displayed in the picture at the bottom left hand corner?", "answer": "january 1", "gt_answers": [ "2011" ], "image_path": "./data/STVQA/VisualGenome/1/2362948.jpg", "model_name": "BLIP2" }, { "question": "What is the Brand name?", "answer": "sony", "gt_answers": [ "iHome" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030427.jpg", "model_name": "BLIP2" }, { "question": "What time is displayed on the clock?", "answer": "current time", "gt_answers": [ "3:24" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000030427.jpg", "model_name": "BLIP2" }, { "question": "What is written on the blender?", "answer": "vitamix", "gt_answers": [ "Osterizer" ], "image_path": "./data/STVQA/VisualGenome/2/2403635.jpg", "model_name": "BLIP2" }, { "question": "What number is shown on the jersey?", "answer": "2", "gt_answers": [ "92" ], "image_path": "./data/STVQA/VisualGenome/1/1159687.jpg", "model_name": "BLIP2" }, { "question": "What kind of spice chai is this tea?", "answer": "tiger spice chai", "gt_answers": [ "Tiger Spice", "Tiger" ], "image_path": "./data/STVQA/VisualGenome/2/2410324.jpg", "model_name": "BLIP2" }, { "question": "What is the net weight in oz?", "answer": "16 oz", "gt_answers": [ "14" ], "image_path": "./data/STVQA/VisualGenome/2/2410324.jpg", "model_name": "BLIP2" }, { "question": "What brand of skis is the man wearing his sunglasses carrying?", "answer": "k2", "gt_answers": [ "fischer fischer", "fischer" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000273951.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the guest house?", "answer": "person", "gt_answers": [ "SUNRISE GUEST HOUSE", "SUNRISE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000017472.jpg", "model_name": "BLIP2" }, { "question": "What letters appear on the wall behind the cow?", "answer": "person", "gt_answers": [ "STD" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000017472.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the course?", "answer": "santa rita disc golf course", "gt_answers": [ "Buena Vista Disc Golf Course" ], "image_path": "./data/STVQA/VisualGenome/2/2415987.jpg", "model_name": "BLIP2" }, { "question": "What year is this Ambergate Carnival?", "answer": "2009", "gt_answers": [ "2008" ], "image_path": "./data/STVQA/imageNet/n06359193_16599.JPEG", "model_name": "BLIP2" }, { "question": "Where can you find the Salty Dawg Saloon?", "answer": "atlanta, georgia", "gt_answers": [ "Homer Spit Alaska" ], "image_path": "./data/STVQA/VisualGenome/1/2360865.jpg", "model_name": "BLIP2" }, { "question": "What year was Rory's established?", "answer": "1939", "gt_answers": [ "1959" ], "image_path": "./data/STVQA/IIIT_text/3462.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of this product?", "answer": "organic sugar", "gt_answers": [ "Trader Joe's" ], "image_path": "./data/STVQA/imageNet/n03871628_21957.JPEG", "model_name": "BLIP2" }, { "question": "What does the label in the lower left hand corner indicate?", "answer": "organic sugar", "gt_answers": [ "USDA organic" ], "image_path": "./data/STVQA/imageNet/n03871628_21957.JPEG", "model_name": "BLIP2" }, { "question": "What is Trader Joe's organic sugar made of?", "answer": "a blend of cane and beet sugar", "gt_answers": [ "Evaporated Cane Juice" ], "image_path": "./data/STVQA/imageNet/n03871628_21957.JPEG", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "x1", "gt_answers": [ "3" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000414492.jpg", "model_name": "BLIP2" }, { "question": "Where can the letters ALU be found in this photo?", "answer": "alu alu", "gt_answers": [ "alu" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000322445.jpg", "model_name": "BLIP2" }, { "question": "What does the sign at the top of the restaurant read?", "answer": "asian market", "gt_answers": [ "Sushi" ], "image_path": "./data/STVQA/icdar/img_965.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the magazine title?", "answer": "wood", "gt_answers": [ "Making" ], "image_path": "./data/STVQA/imageNet/n03954731_10167.JPEG", "model_name": "BLIP2" }, { "question": "What are the last two words of the magazine title?", "answer": "wood planes", "gt_answers": [ "Wood Planes" ], "image_path": "./data/STVQA/imageNet/n03954731_10167.JPEG", "model_name": "BLIP2" }, { "question": "What brand is the television commercial for?", "answer": "person", "gt_answers": [ "Liberty" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000215398.jpg", "model_name": "BLIP2" }, { "question": "What number is on the bike?", "answer": "person", "gt_answers": [ "629" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000282015.jpg", "model_name": "BLIP2" }, { "question": "What county is this bus from?", "answer": "lee county", "gt_answers": [ "Loudoun County", "Loudoun" ], "image_path": "./data/STVQA/VisualGenome/2/2411247.jpg", "model_name": "BLIP2" }, { "question": "What is the letter written on the baseball caps?", "answer": "j", "gt_answers": [ "J" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000140940.jpg", "model_name": "BLIP2" }, { "question": "What movie are these donuts from?", "answer": "toy story", "gt_answers": [ "THE SIMPSONS" ], "image_path": "./data/STVQA/VisualGenome/1/2355097.jpg", "model_name": "BLIP2" }, { "question": "Who's name is on the picture?", "answer": "lauren f", "gt_answers": [ "James S. Ferguson" ], "image_path": "./data/STVQA/VisualGenome/1/2336947.jpg", "model_name": "BLIP2" }, { "question": "What year would you say this picture was taken?", "answer": "2007", "gt_answers": [ "2007" ], "image_path": "./data/STVQA/VisualGenome/1/2336947.jpg", "model_name": "BLIP2" }, { "question": "What's the complete text in the photo?", "answer": "bananas growing on a tree", "gt_answers": [ "James S. Ferguson 2007", "\ufffd\ufffd JAMES S. FERGUSON 2007" ], "image_path": "./data/STVQA/VisualGenome/1/2336947.jpg", "model_name": "BLIP2" }, { "question": "what is the sign of board reference?", "answer": "a board of directors", "gt_answers": [ "jord" ], "image_path": "./data/STVQA/VisualGenome/1/2357215.jpg", "model_name": "BLIP2" }, { "question": "What is the text in the picture?", "answer": "hamster", "gt_answers": [ "hesedetang", "\ufffd\ufffd HESEDETANG *" ], "image_path": "./data/STVQA/imageNet/n02342885_11074.JPEG", "model_name": "BLIP2" }, { "question": "What is the animal eating", "answer": "broccoli", "gt_answers": [ "broccoli" ], "image_path": "./data/STVQA/imageNet/n02342885_11074.JPEG", "model_name": "BLIP2" }, { "question": "What is the speed limit?", "answer": "30 mph", "gt_answers": [ "40" ], "image_path": "./data/STVQA/icdar/img_228.jpg", "model_name": "BLIP2" }, { "question": "What does the yellow street sign say?", "answer": "attention college", "gt_answers": [ "ATENCION COLEGIO" ], "image_path": "./data/STVQA/VisualGenome/1/2321704.jpg", "model_name": "BLIP2" }, { "question": "what number is above the bald mans head?", "answer": "4", "gt_answers": [ "11" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000006253.jpg", "model_name": "BLIP2" }, { "question": "what is on the young mans shirt?", "answer": "person", "gt_answers": [ "nasca" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000006253.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the building?", "answer": "3", "gt_answers": [ "36:6", "36-6" ], "image_path": "./data/STVQA/VisualGenome/1/2341637.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name of the hair ionic system?", "answer": "conair", "gt_answers": [ "CONAIR" ], "image_path": "./data/STVQA/imageNet/n03483316_46423.JPEG", "model_name": "BLIP2" }, { "question": "How many watts ionic system?", "answer": "200 watts", "gt_answers": [ "1875 watt Double ionic port system", "1875" ], "image_path": "./data/STVQA/imageNet/n03483316_46423.JPEG", "model_name": "BLIP2" }, { "question": "What is written in white on the red bus?", "answer": "person", "gt_answers": [ "Stagecoach" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000032458.jpg", "model_name": "BLIP2" }, { "question": "what is the name of the street?", "answer": "maple street", "gt_answers": [ "main street" ], "image_path": "./data/STVQA/VisualGenome/1/2333927.jpg", "model_name": "BLIP2" }, { "question": "what is written on the board displayed?", "answer": "street sign", "gt_answers": [ "old town temecula WiFi district", "OLD TOW TEMECULA WI-FI DISTRICT" ], "image_path": "./data/STVQA/VisualGenome/1/2333927.jpg", "model_name": "BLIP2" }, { "question": "What year was the photo taken?", "answer": "2010", "gt_answers": [ "2006" ], "image_path": "./data/STVQA/imageNet/n03854065_8369.JPEG", "model_name": "BLIP2" }, { "question": "What year is listed in yellow at the bottom?", "answer": "2010", "gt_answers": [ "2005" ], "image_path": "./data/STVQA/imageNet/n01664065_5049.JPEG", "model_name": "BLIP2" }, { "question": "what is the name of the corner store?", "answer": "starbucks", "gt_answers": [ "office depot" ], "image_path": "./data/STVQA/VisualGenome/2/636.jpg", "model_name": "BLIP2" }, { "question": "What name is written on the white jersey?", "answer": "indians", "gt_answers": [ "Davis" ], "image_path": "./data/STVQA/VisualGenome/1/2329838.jpg", "model_name": "BLIP2" }, { "question": "What is written on the structure?", "answer": "canada", "gt_answers": [ "Canada" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000161796.jpg", "model_name": "BLIP2" }, { "question": "Which year is this expo?", "answer": "2015", "gt_answers": [ "2010" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000161796.jpg", "model_name": "BLIP2" }, { "question": "What country is being displayed?", "answer": "canada", "gt_answers": [ "Canada" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000161796.jpg", "model_name": "BLIP2" }, { "question": "What is the website address that is listed?", "answer": "person", "gt_answers": [ "www.versaci.fr" ], "image_path": "./data/STVQA/IIIT_text/4652.jpg", "model_name": "BLIP2" }, { "question": "What man's first name is shown on the image?", "answer": "person", "gt_answers": [ "Pascal" ], "image_path": "./data/STVQA/IIIT_text/4652.jpg", "model_name": "BLIP2" }, { "question": "What is written on sign board?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/713180.jpg", "model_name": "BLIP2" }, { "question": "What is on the right wing?", "answer": "antenna", "gt_answers": [ "1" ], "image_path": "./data/STVQA/VisualGenome/1/2364613.jpg", "model_name": "BLIP2" }, { "question": "What does the word say?", "answer": "doublet", "gt_answers": [ "Doublet 10X-20mm" ], "image_path": "./data/STVQA/imageNet/n03692522_7349.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the street on this sign?", "answer": "cecilia street", "gt_answers": [ "CECILIA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000086831.jpg", "model_name": "BLIP2" }, { "question": "What number block is displayed on the sign?", "answer": "0", "gt_answers": [ "800" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000086831.jpg", "model_name": "BLIP2" }, { "question": "What is the left word on the page?", "answer": "horse", "gt_answers": [ "stefano" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000375841.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the image?", "answer": "two", "gt_answers": [ "2009" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000375841.jpg", "model_name": "BLIP2" }, { "question": "What is the capitalized word on the page?", "answer": "horses", "gt_answers": [ "Stefano Pertusati" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000375841.jpg", "model_name": "BLIP2" }, { "question": "How many meters are shown on the sign?", "answer": "100", "gt_answers": [ "50" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000550783.jpg", "model_name": "BLIP2" }, { "question": "What is written on the sign?", "answer": "nothing can stop me now", "gt_answers": [ "Nothing can stop me now.", "NOTHING CAN STOP ME NOW" ], "image_path": "./data/STVQA/VisualGenome/1/2368955.jpg", "model_name": "BLIP2" }, { "question": "What type of bus is this?", "answer": "school bus", "gt_answers": [ "School bus" ], "image_path": "./data/STVQA/IIIT_text/img_001106.jpg", "model_name": "BLIP2" }, { "question": "What is written on the building?", "answer": "gen inc", "gt_answers": [ "GENERIS INC. CATERERS" ], "image_path": "./data/STVQA/VisualGenome/1/2375529.jpg", "model_name": "BLIP2" }, { "question": "What does the red text say?", "answer": "gen inc", "gt_answers": [ "GENERIS INC. CATERERS" ], "image_path": "./data/STVQA/VisualGenome/1/2375529.jpg", "model_name": "BLIP2" }, { "question": "What is the top word on the big tire?", "answer": "ford", "gt_answers": [ "BEVEROL" ], "image_path": "./data/STVQA/imageNet/n04465501_11897.JPEG", "model_name": "BLIP2" }, { "question": "What is the bottom word on the big tire?", "answer": "ford", "gt_answers": [ "SCHROOT" ], "image_path": "./data/STVQA/imageNet/n04465501_11897.JPEG", "model_name": "BLIP2" }, { "question": "Who is the manufacturer of the motorcycle?", "answer": "ducati", "gt_answers": [ "Ducati" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000021151.jpg", "model_name": "BLIP2" }, { "question": "What is written on the top left side of the front of the train?", "answer": "mta", "gt_answers": [ "VIRYF", "UIRYF" ], "image_path": "./data/STVQA/VisualGenome/1/2373431.jpg", "model_name": "BLIP2" }, { "question": "What is written in orange under the windsheild?", "answer": "nyc subway", "gt_answers": [ "GANA", "GANZ" ], "image_path": "./data/STVQA/VisualGenome/1/2373431.jpg", "model_name": "BLIP2" }, { "question": "What is written above the right windsheild?", "answer": "amtrak", "gt_answers": [ "JBGR", "JBER" ], "image_path": "./data/STVQA/VisualGenome/1/2373431.jpg", "model_name": "BLIP2" }, { "question": "What is the on the timeline?", "answer": "19th century", "gt_answers": [ "14:56" ], "image_path": "./data/STVQA/VisualGenome/1/1592263.jpg", "model_name": "BLIP2" }, { "question": "What is writing on the blue board?", "answer": "", "gt_answers": [ "Only1 Technology", "Only 1 Technology" ], "image_path": "./data/STVQA/VisualGenome/1/1592263.jpg", "model_name": "BLIP2" }, { "question": "What is the manufacturer of the espresso machine?", "answer": "breville", "gt_answers": [ "taurus" ], "image_path": "./data/STVQA/imageNet/n03297495_1272.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the company?", "answer": "person", "gt_answers": [ "Bawarchi" ], "image_path": "./data/STVQA/IIIT_text/img_000200.jpg", "model_name": "BLIP2" }, { "question": "What is written on the train?", "answer": "aquarque", "gt_answers": [ "Aguarague" ], "image_path": "./data/STVQA/VisualGenome/1/2349702.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE NUMBER OF THIS BUS?", "answer": "no number on bus", "gt_answers": [ "D2" ], "image_path": "./data/STVQA/icdar/img_24.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS THE NAME OF THIS BUS?", "answer": "a bus", "gt_answers": [ "NUS" ], "image_path": "./data/STVQA/icdar/img_24.jpg", "model_name": "BLIP2" }, { "question": "What model of Yamaha is this", "answer": "yzf r6", "gt_answers": [ "GTS" ], "image_path": "./data/STVQA/VisualGenome/1/2358649.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the skier on the right?", "answer": "20", "gt_answers": [ "200" ], "image_path": "./data/STVQA/VisualGenome/1/2340799.jpg", "model_name": "BLIP2" }, { "question": "what is the big sign refer in bus?", "answer": "it is a double decker bus", "gt_answers": [ "partybus zakopane", "PARTYBUS" ], "image_path": "./data/STVQA/VisualGenome/1/2354064.jpg", "model_name": "BLIP2" }, { "question": "What does the big black letters on mouth wash read?", "answer": "advil", "gt_answers": [ "Antiseptic Oral Rinse" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000520072.jpg", "model_name": "BLIP2" }, { "question": "What number is on the top row left sign?", "answer": "3", "gt_answers": [ "1,000" ], "image_path": "./data/STVQA/VisualGenome/1/2326723.jpg", "model_name": "BLIP2" }, { "question": "What number is on the top row right sign?", "answer": "4", "gt_answers": [ "1,200" ], "image_path": "./data/STVQA/VisualGenome/1/2326723.jpg", "model_name": "BLIP2" }, { "question": "What number is on the bottom row left sign?", "answer": "3", "gt_answers": [ "1,300" ], "image_path": "./data/STVQA/VisualGenome/1/2326723.jpg", "model_name": "BLIP2" }, { "question": "What type of tour is the bus taking you on?", "answer": "sightseeing", "gt_answers": [ "OPEN TOP SIGHTSEEING", "WASHINGTON DC OPEN TOP SIGHTSEEING" ], "image_path": "./data/STVQA/VisualGenome/1/2324588.jpg", "model_name": "BLIP2" }, { "question": "What does the red sign say?", "answer": "stop sign", "gt_answers": [ "STOP HUMP" ], "image_path": "./data/STVQA/VisualGenome/1/2324065.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on this cup?", "answer": "for", "gt_answers": [ "Fido" ], "image_path": "./data/STVQA/imageNet/n03063599_3904.JPEG", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "ny park", "gt_answers": [ "51" ], "image_path": "./data/STVQA/VisualGenome/1/2362524.jpg", "model_name": "BLIP2" }, { "question": "Where is the bus going?", "answer": "to the park", "gt_answers": [ "Hyde Park" ], "image_path": "./data/STVQA/VisualGenome/1/2362524.jpg", "model_name": "BLIP2" }, { "question": "What is written on the arrow pointing left?", "answer": "turn left", "gt_answers": [ "Einbahn" ], "image_path": "./data/STVQA/VisualGenome/1/2346000.jpg", "model_name": "BLIP2" }, { "question": "What does the octagonal red sign say?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/icdar/img_150.jpg", "model_name": "BLIP2" }, { "question": "What time does the sign start?", "answer": "8am", "gt_answers": [ "8 am" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000436048.jpg", "model_name": "BLIP2" }, { "question": "What color is the truck?", "answer": "white", "gt_answers": [ "White" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000216579.jpg", "model_name": "BLIP2" }, { "question": "What letter is on the glove?", "answer": "person", "gt_answers": [ "R" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000367823.jpg", "model_name": "BLIP2" }, { "question": "What is the watermark on the image?", "answer": "it is a copyright watermark", "gt_answers": [ "iStockphoto" ], "image_path": "./data/STVQA/imageNet/n04467665_41038.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the product?", "answer": "special lasagne", "gt_answers": [ "Special Lasagne" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000016902.jpg", "model_name": "BLIP2" }, { "question": "what letters are in the circle on the front of the plave", "answer": "w h j", "gt_answers": [ "TWA" ], "image_path": "./data/STVQA/VisualGenome/1/2339787.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the church?", "answer": "person", "gt_answers": [ "HANA", "HANA CHURCH" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000396528.jpg", "model_name": "BLIP2" }, { "question": "What is affiliated with the name \"Hana?\"", "answer": "hana is a surname", "gt_answers": [ "CHURCH" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000396528.jpg", "model_name": "BLIP2" }, { "question": "what does the title left to inbox say?", "answer": "inbox", "gt_answers": [ "next" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000466001.jpg", "model_name": "BLIP2" }, { "question": "what is the title of the green book?", "answer": "a book of hours", "gt_answers": [ "teams" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000466001.jpg", "model_name": "BLIP2" }, { "question": "What is written on the white sign?", "answer": "florida", "gt_answers": [ "Florida" ], "image_path": "./data/STVQA/VisualGenome/1/2363832.jpg", "model_name": "BLIP2" }, { "question": "how many calories is half a carton of this?", "answer": "tomato pasta", "gt_answers": [ "80" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029015.jpg", "model_name": "BLIP2" }, { "question": "What are the amount of grams of sugar in half a carton?", "answer": "0 g 0 g", "gt_answers": [ "15" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029015.jpg", "model_name": "BLIP2" }, { "question": "What does it say about containing saturates?", "answer": "0 g saturates", "gt_answers": [ "trace" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000029015.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name of the gas?", "answer": "shell", "gt_answers": [ "Shell" ], "image_path": "./data/STVQA/imageNet/n03425413_17111.JPEG", "model_name": "BLIP2" }, { "question": "What number is on the train?", "answer": "0 4 6", "gt_answers": [ "45110" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000031666.jpg", "model_name": "BLIP2" }, { "question": "On what day can you get 1/2 price burgers?", "answer": "friday", "gt_answers": [ "Tues Night", "TUES" ], "image_path": "./data/STVQA/VisualGenome/1/2341475.jpg", "model_name": "BLIP2" }, { "question": "What time can you get 1/2 price burgers?", "answer": "on tuesdays and wednesdays", "gt_answers": [ "5PM-Close" ], "image_path": "./data/STVQA/VisualGenome/1/2341475.jpg", "model_name": "BLIP2" }, { "question": "What can you get for 1/2 Price on Tues Night?", "answer": "a movie ticket", "gt_answers": [ "Burgers" ], "image_path": "./data/STVQA/VisualGenome/1/2341475.jpg", "model_name": "BLIP2" }, { "question": "What does the top right graffiti tag say?", "answer": "nyc", "gt_answers": [ "CHARM" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000239593.jpg", "model_name": "BLIP2" }, { "question": "What airline owns the plane", "answer": "ethiopian airlines", "gt_answers": [ "Ethiopian" ], "image_path": "./data/STVQA/imageNet/n02690373_17262.JPEG", "model_name": "BLIP2" }, { "question": "What is the team name printed on the jersey?", "answer": "miami heat", "gt_answers": [ "Heat" ], "image_path": "./data/STVQA/VisualGenome/1/2363604.jpg", "model_name": "BLIP2" }, { "question": "What number is on the jersey?", "answer": "23", "gt_answers": [ "6" ], "image_path": "./data/STVQA/VisualGenome/1/2363604.jpg", "model_name": "BLIP2" }, { "question": "What kind of candy is this?", "answer": "gummy bears", "gt_answers": [ "HERSHEY'S" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000026297.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the truck in the lower left corner?", "answer": "u s airways", "gt_answers": [ "860" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000475763.jpg", "model_name": "BLIP2" }, { "question": "What is on sale?`", "answer": "cattle", "gt_answers": [ "Computer" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000107353.jpg", "model_name": "BLIP2" }, { "question": "Whose stocking is this?", "answer": "mano", "gt_answers": [ "Manoj" ], "image_path": "./data/STVQA/imageNet/n03026506_1128.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of this store?", "answer": "denim store", "gt_answers": [ "Bread & Butter" ], "image_path": "./data/STVQA/icdar/img_799.jpg", "model_name": "BLIP2" }, { "question": "What does the stop sign say?", "answer": "crosswalk", "gt_answers": [ "STOP FOR PEDESTRIAN IN CROSSWALK" ], "image_path": "./data/STVQA/VisualGenome/1/2367176.jpg", "model_name": "BLIP2" }, { "question": "Where is the license plate from?", "answer": "new jersey", "gt_answers": [ "ONTARIO" ], "image_path": "./data/STVQA/VisualGenome/1/2339542.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number?", "answer": "tah", "gt_answers": [ "J16 69L" ], "image_path": "./data/STVQA/VisualGenome/1/2339542.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the blimb?", "answer": "holden", "gt_answers": [ "Holden" ], "image_path": "./data/STVQA/imageNet/n02692877_9965.JPEG", "model_name": "BLIP2" }, { "question": "Who manufactured the can of fruit shown on the left side of this photo?", "answer": "person", "gt_answers": [ "Del Monte" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000506045.jpg", "model_name": "BLIP2" }, { "question": "What white word appears at the top right of the image?", "answer": "person", "gt_answers": [ "coffee" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000347405.jpg", "model_name": "BLIP2" }, { "question": "What black word appears below the jar?", "answer": "person", "gt_answers": [ "ESPRESSO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000347405.jpg", "model_name": "BLIP2" }, { "question": "whats the only full word written here?", "answer": "person", "gt_answers": [ "and" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000212070.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the guy's hoodie?", "answer": "i'm a nerd", "gt_answers": [ "DRUG FREE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000446834.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the arrow?", "answer": "no parking", "gt_answers": [ "ONE WAY" ], "image_path": "./data/STVQA/VisualGenome/2/786.jpg", "model_name": "BLIP2" }, { "question": "What is the first letter in white at the top left of the photo?", "answer": "r", "gt_answers": [ "r" ], "image_path": "./data/STVQA/imageNet/n03843555_11276.JPEG", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "be smart don't walk in the street", "gt_answers": [ "BE SMART Don't Walk in the Streey", "BE SMART" ], "image_path": "./data/STVQA/VisualGenome/2/2409222.jpg", "model_name": "BLIP2" }, { "question": "What colour is the main motorcycle?", "answer": "black and green", "gt_answers": [ "Black and green", "black" ], "image_path": "./data/STVQA/VisualGenome/1/2370900.jpg", "model_name": "BLIP2" }, { "question": "What colour bike is the man riding?", "answer": "green", "gt_answers": [ "Red" ], "image_path": "./data/STVQA/VisualGenome/1/2370900.jpg", "model_name": "BLIP2" }, { "question": "Where is the white sign pointing to?", "answer": "a building", "gt_answers": [ "BELLE ISLE" ], "image_path": "./data/STVQA/VisualGenome/1/2331861.jpg", "model_name": "BLIP2" }, { "question": "What is written on the truck door?", "answer": "person", "gt_answers": [ "GO USA", "USA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000281652.jpg", "model_name": "BLIP2" }, { "question": "What shape are the headlights on the truck?", "answer": "square", "gt_answers": [ "round" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000281652.jpg", "model_name": "BLIP2" }, { "question": "What brand is this computer?", "answer": "dell", "gt_answers": [ "DELL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000311395.jpg", "model_name": "BLIP2" }, { "question": "Who made this computer?", "answer": "dell", "gt_answers": [ "DELL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000311395.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name written on the label of the bottle?", "answer": "person", "gt_answers": [ "amstel" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000458604.jpg", "model_name": "BLIP2" }, { "question": "What is the mayonnaise brand?", "answer": "person", "gt_answers": [ "CAINS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000165684.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the racquet frame?", "answer": "person", "gt_answers": [ "90" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000048944.jpg", "model_name": "BLIP2" }, { "question": "What does the sign on the wall say in big letters?", "answer": "no smoking", "gt_answers": [ "BUSINESS LOUNGE" ], "image_path": "./data/STVQA/VisualGenome/1/2322469.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street?", "answer": "person", "gt_answers": [ "Logan St.", "Logan" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000132123.jpg", "model_name": "BLIP2" }, { "question": "What does the arrow demonstrate?", "answer": "the direction of traffic", "gt_answers": [ "One Way" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000132123.jpg", "model_name": "BLIP2" }, { "question": "What is on the red sign?", "answer": "no parking sign", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000132123.jpg", "model_name": "BLIP2" }, { "question": "what color is this bench?", "answer": "red", "gt_answers": [ "red bench" ], "image_path": "./data/STVQA/VisualGenome/1/2341781.jpg", "model_name": "BLIP2" }, { "question": "What is the first letter listed on the end of the beam?", "answer": "b", "gt_answers": [ "C" ], "image_path": "./data/STVQA/imageNet/n02777292_14309.JPEG", "model_name": "BLIP2" }, { "question": "What is written in gold letters on the sign attached to wooden posts?", "answer": "person", "gt_answers": [ "MIURA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000491837.jpg", "model_name": "BLIP2" }, { "question": "What digits are written on the blue sign attached to the wooden post?", "answer": "0", "gt_answers": [ "30-19" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000491837.jpg", "model_name": "BLIP2" }, { "question": "What number is after Ginza", "answer": "4", "gt_answers": [ "6" ], "image_path": "./data/STVQA/VisualGenome/1/2352827.jpg", "model_name": "BLIP2" }, { "question": "What word is after the -?", "answer": "street", "gt_answers": [ "chome" ], "image_path": "./data/STVQA/VisualGenome/1/2352827.jpg", "model_name": "BLIP2" }, { "question": "What word is before 6?", "answer": "5", "gt_answers": [ "Ginza" ], "image_path": "./data/STVQA/VisualGenome/1/2352827.jpg", "model_name": "BLIP2" }, { "question": "What is the soda brand in the silver box?", "answer": "person", "gt_answers": [ "Pepsi" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000112459.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the product?", "answer": "darigold chocolate milk", "gt_answers": [ "DARIGOLD" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019180.jpg", "model_name": "BLIP2" }, { "question": "What vitamin does it contain?", "answer": "vitamin d", "gt_answers": [ "D" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000019180.jpg", "model_name": "BLIP2" }, { "question": "What company's brand logo is on the block?", "answer": "monstrous", "gt_answers": [ "movistar" ], "image_path": "./data/STVQA/VisualGenome/2/2403290.jpg", "model_name": "BLIP2" }, { "question": "What is the bottom word printed on the blue box?", "answer": "solar", "gt_answers": [ "SCHOOL" ], "image_path": "./data/STVQA/VisualGenome/1/2318668.jpg", "model_name": "BLIP2" }, { "question": "What is the tag number on the car?", "answer": "sc sc", "gt_answers": [ "514 MFY" ], "image_path": "./data/STVQA/VisualGenome/1/2316376.jpg", "model_name": "BLIP2" }, { "question": "|What is the number on the boys shirt inverted?|", "answer": "inverted number on the boys shirt", "gt_answers": [ "6" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000155519.jpg", "model_name": "BLIP2" }, { "question": "that number?", "answer": "ten", "gt_answers": [ "2" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000486471.jpg", "model_name": "BLIP2" }, { "question": "What does the text say on the side of the plain", "answer": "air france", "gt_answers": [ "Air France" ], "image_path": "./data/STVQA/VisualGenome/2/2404724.jpg", "model_name": "BLIP2" }, { "question": "What company makes the syringe?", "answer": "kodak", "gt_answers": [ "KOJAK" ], "image_path": "./data/STVQA/imageNet/n04376876_30832.JPEG", "model_name": "BLIP2" }, { "question": "What do you insert into the slot?", "answer": "coins", "gt_answers": [ "COINS" ], "image_path": "./data/STVQA/VisualGenome/1/1592082.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the first floor business, on the left side of this picture?", "answer": "person", "gt_answers": [ "City Chain", "CITY CHAIN PRIMO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000336063.jpg", "model_name": "BLIP2" }, { "question": "Which is the next coming airport?", "answer": "schoenfeld", "gt_answers": [ "Schonefeld.", "Schonefeld" ], "image_path": "./data/STVQA/VisualGenome/1/2334528.jpg", "model_name": "BLIP2" }, { "question": "Which brand is being advertised?", "answer": "k2", "gt_answers": [ "Burton" ], "image_path": "./data/STVQA/VisualGenome/1/2375355.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the clothing brand seen in this picture?", "answer": "versace", "gt_answers": [ "Versace" ], "image_path": "./data/STVQA/icdar/img_697.jpg", "model_name": "BLIP2" }, { "question": "which airlines is the plane from?", "answer": "canadian airlines", "gt_answers": [ "air canada" ], "image_path": "./data/STVQA/VisualGenome/1/2326939.jpg", "model_name": "BLIP2" }, { "question": "From where is this boat?", "answer": "new york city", "gt_answers": [ "Queen City Yacht Club" ], "image_path": "./data/STVQA/imageNet/n03874293_14577.JPEG", "model_name": "BLIP2" }, { "question": "Which word has the largest font in the picture?", "answer": "tennis", "gt_answers": [ "TENNIS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000205648.jpg", "model_name": "BLIP2" }, { "question": "What clock is underneath Germany?", "answer": "green good house", "gt_answers": [ "U.S.A", "U.S.A." ], "image_path": "./data/STVQA/VisualGenome/1/2332545.jpg", "model_name": "BLIP2" }, { "question": "What is written on the side of the boat?", "answer": "lighthouse", "gt_answers": [ "Pilot" ], "image_path": "./data/STVQA/VisualGenome/2/2401637.jpg", "model_name": "BLIP2" }, { "question": "What is the date of the picture?", "answer": "september 2008", "gt_answers": [ "20 Oct 2012" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000401083.jpg", "model_name": "BLIP2" }, { "question": "What fruit is in the plastic bags?", "answer": "oranges", "gt_answers": [ "grapes" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000401083.jpg", "model_name": "BLIP2" }, { "question": "What is written on the side of the boat?", "answer": "plats", "gt_answers": [ "PILOTS" ], "image_path": "./data/STVQA/VisualGenome/1/2354714.jpg", "model_name": "BLIP2" }, { "question": "What is the monument named on this bus?", "answer": "the monument", "gt_answers": [ "Stonehenge" ], "image_path": "./data/STVQA/VisualGenome/1/2326548.jpg", "model_name": "BLIP2" }, { "question": "What is world famous ?", "answer": "statue of liberty", "gt_answers": [ "Pizza" ], "image_path": "./data/STVQA/VisualGenome/1/2331884.jpg", "model_name": "BLIP2" }, { "question": "Where are they located ?", "answer": "on the sidewalk", "gt_answers": [ "San Diego" ], "image_path": "./data/STVQA/VisualGenome/1/2331884.jpg", "model_name": "BLIP2" }, { "question": "How much is the jackpot?", "answer": "$2 million", "gt_answers": [ "15 million" ], "image_path": "./data/STVQA/IIIT_text/img_000848.jpg", "model_name": "BLIP2" }, { "question": "What type of dog do you see in the picture?", "answer": "bloodhound", "gt_answers": [ "bloodhound" ], "image_path": "./data/STVQA/imageNet/n02088466_4715.JPEG", "model_name": "BLIP2" }, { "question": "What does the printed text at the top say?", "answer": "players' cigarettes", "gt_answers": [ "PLAYER'S CIGARETTES" ], "image_path": "./data/STVQA/imageNet/n02088466_4715.JPEG", "model_name": "BLIP2" }, { "question": "What does it say in the man's blue shirt?", "answer": "hot dog", "gt_answers": [ "New York" ], "image_path": "./data/STVQA/VisualGenome/1/2350153.jpg", "model_name": "BLIP2" }, { "question": "What sign is above the door?", "answer": "exit sign", "gt_answers": [ "EXIT" ], "image_path": "./data/STVQA/VisualGenome/2/254.jpg", "model_name": "BLIP2" }, { "question": "What does the sign on the left side say?", "answer": "mrt", "gt_answers": [ "Blush!" ], "image_path": "./data/STVQA/icdar/img_881.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the man's chest?", "answer": "one", "gt_answers": [ "820" ], "image_path": "./data/STVQA/VisualGenome/1/2354995.jpg", "model_name": "BLIP2" }, { "question": "What is written in the red circle?", "answer": "claro", "gt_answers": [ "Claro" ], "image_path": "./data/STVQA/imageNet/n03777754_7428.JPEG", "model_name": "BLIP2" }, { "question": "What meal is this?", "answer": "lasagna", "gt_answers": [ "Chile relleno" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000012696.jpg", "model_name": "BLIP2" }, { "question": "What letters are written by the number 21?", "answer": "a, b, c, d, e, f, g, h, i, j, k", "gt_answers": [ "MM" ], "image_path": "./data/STVQA/imageNet/n03692522_1132.JPEG", "model_name": "BLIP2" }, { "question": "What's written in bold letters in the center of the item on the right?", "answer": "triplet", "gt_answers": [ "TRIPLET" ], "image_path": "./data/STVQA/imageNet/n03692522_1132.JPEG", "model_name": "BLIP2" }, { "question": "How many people are in the alley?", "answer": "two", "gt_answers": [ "2" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000329726.jpg", "model_name": "BLIP2" }, { "question": "What does the sign say?", "answer": "hollywood", "gt_answers": [ "Hollywood" ], "image_path": "./data/STVQA/IIIT_text/img_000582.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the yellow shirt?", "answer": "person", "gt_answers": [ "Lobster" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000355593.jpg", "model_name": "BLIP2" }, { "question": "What is the noun form of the word choose?", "answer": "election", "gt_answers": [ "Choice" ], "image_path": "./data/STVQA/imageNet/n04023962_28195.JPEG", "model_name": "BLIP2" }, { "question": "What website is listed on the image?", "answer": "mapleimage com", "gt_answers": [ "naplesimage.com" ], "image_path": "./data/STVQA/imageNet/n04273569_25629.JPEG", "model_name": "BLIP2" }, { "question": "What is the maximum percentage you can save at this Store?", "answer": "70", "gt_answers": [ "70%" ], "image_path": "./data/STVQA/icdar/img_392.jpg", "model_name": "BLIP2" }, { "question": "What word is in pink/red?", "answer": "glide", "gt_answers": [ "glide" ], "image_path": "./data/STVQA/icdar/img_680.jpg", "model_name": "BLIP2" }, { "question": "What complete word is seen directly behind the running woman?", "answer": "games", "gt_answers": [ "Games" ], "image_path": "./data/STVQA/VisualGenome/1/2334284.jpg", "model_name": "BLIP2" }, { "question": "What is the word on the boat?", "answer": "venice", "gt_answers": [ "Accademia" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000056193.jpg", "model_name": "BLIP2" }, { "question": "What year was this photo taken?", "answer": "2009", "gt_answers": [ "2009" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000536059.jpg", "model_name": "BLIP2" }, { "question": "Which car manufacture is shown on the advertising boards?", "answer": "honda", "gt_answers": [ "Kia" ], "image_path": "./data/STVQA/VisualGenome/1/2361363.jpg", "model_name": "BLIP2" }, { "question": "What year was this picture taken?", "answer": "1984", "gt_answers": [ "2013" ], "image_path": "./data/STVQA/VisualGenome/1/2364355.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number on the vehicle shown?", "answer": "san francisco", "gt_answers": [ "0257b" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000256576.jpg", "model_name": "BLIP2" }, { "question": "What does the word in blue say on the back of the vehicle?", "answer": "police", "gt_answers": [ "police" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000256576.jpg", "model_name": "BLIP2" }, { "question": "What is the number located on the back right of the vehicle?", "answer": "san francisco", "gt_answers": [ "1877-4fps-411", "1-877-4fps-411" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000256576.jpg", "model_name": "BLIP2" }, { "question": "What is toward the left?", "answer": "detour", "gt_answers": [ "Detour" ], "image_path": "./data/STVQA/VisualGenome/1/2374603.jpg", "model_name": "BLIP2" }, { "question": "What is written next to the arrow on the black sign?", "answer": "stop sign", "gt_answers": [ "Entrance" ], "image_path": "./data/STVQA/VisualGenome/1/2342596.jpg", "model_name": "BLIP2" }, { "question": "What is printed on the license plate?", "answer": "london", "gt_answers": [ "BGJ 115" ], "image_path": "./data/STVQA/IIIT_text/4400.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the top street sign?", "answer": "2", "gt_answers": [ "2000" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000143995.jpg", "model_name": "BLIP2" }, { "question": "What is the weather currently like?", "answer": "it is raining", "gt_answers": [ "Stormy" ], "image_path": "./data/STVQA/imageNet/n02794156_7077.JPEG", "model_name": "BLIP2" }, { "question": "What day is it?", "answer": "monday", "gt_answers": [ "Day 14" ], "image_path": "./data/STVQA/imageNet/n07697537_3932.JPEG", "model_name": "BLIP2" }, { "question": "WHICH PHOTOGRAPHY IS IT", "answer": "it is a photograph of a bridge in a city", "gt_answers": [ "MLECHUGA PHOTOGRAPHY" ], "image_path": "./data/STVQA/IIIT_text/5251.jpg", "model_name": "BLIP2" }, { "question": "What is the name in the bottom left corner?", "answer": "person", "gt_answers": [ "Tracey Sanders" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000533308.jpg", "model_name": "BLIP2" }, { "question": "What year is given in the bottom left corner?", "answer": "2010", "gt_answers": [ "2012" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000533308.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street on the yellow sign?", "answer": "elm street", "gt_answers": [ "Main Street" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000417129.jpg", "model_name": "BLIP2" }, { "question": "What company uses the truck?", "answer": "towing company", "gt_answers": [ "Abbey Transport LTD", "ABBEY" ], "image_path": "./data/STVQA/VisualGenome/1/1592233.jpg", "model_name": "BLIP2" }, { "question": "How high does a jackpot have to be to be paid for by the attendant?", "answer": "the jackpot has to be at least $100", "gt_answers": [ "1000" ], "image_path": "./data/STVQA/imageNet/n04243546_16210.JPEG", "model_name": "BLIP2" }, { "question": "what is the web address given", "answer": "www claas com", "gt_answers": [ "freefoto.com" ], "image_path": "./data/STVQA/imageNet/n03496892_39042.JPEG", "model_name": "BLIP2" }, { "question": "Which street sign is higher than the other?", "answer": "no parking sign", "gt_answers": [ "HIGH" ], "image_path": "./data/STVQA/VisualGenome/2/2411897.jpg", "model_name": "BLIP2" }, { "question": "What word appears twice on the remotes?", "answer": "control", "gt_answers": [ "Fear" ], "image_path": "./data/STVQA/imageNet/n04074963_1277.JPEG", "model_name": "BLIP2" }, { "question": "What word is on the third remote?", "answer": "remote", "gt_answers": [ "dumb" ], "image_path": "./data/STVQA/imageNet/n04074963_1277.JPEG", "model_name": "BLIP2" }, { "question": "What word is on the first remote?", "answer": "remote", "gt_answers": [ "Hate" ], "image_path": "./data/STVQA/imageNet/n04074963_1277.JPEG", "model_name": "BLIP2" }, { "question": "What is written in black text on the sign?", "answer": "humped zebra crossing", "gt_answers": [ "Humped Zebra Crossing" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000414416.jpg", "model_name": "BLIP2" }, { "question": "What is written on the skaters t-shirt?", "answer": "uncon", "gt_answers": [ "bacon" ], "image_path": "./data/STVQA/VisualGenome/1/2376700.jpg", "model_name": "BLIP2" }, { "question": "What does the caption say on the photo?", "answer": "splash of milk", "gt_answers": [ "GOOD TO THE VERY LAST DROP" ], "image_path": "./data/STVQA/imageNet/n07932039_9087.JPEG", "model_name": "BLIP2" }, { "question": "what is written at front side of the train?", "answer": "person", "gt_answers": [ "First" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000407814.jpg", "model_name": "BLIP2" }, { "question": "What does the white sign say?", "answer": "road closed", "gt_answers": [ "Road closed" ], "image_path": "./data/STVQA/VisualGenome/1/713303.jpg", "model_name": "BLIP2" }, { "question": "What does the orange sign say?", "answer": "road closed", "gt_answers": [ "Slow" ], "image_path": "./data/STVQA/VisualGenome/1/713303.jpg", "model_name": "BLIP2" }, { "question": "How many traffic cones are in the picture?", "answer": "two", "gt_answers": [ "Three" ], "image_path": "./data/STVQA/VisualGenome/1/713303.jpg", "model_name": "BLIP2" }, { "question": "What are the words on the green bar at the top of the box?", "answer": "classic waffle maker", "gt_answers": [ "WAFFLE MAKER" ], "image_path": "./data/STVQA/imageNet/n04542943_6049.JPEG", "model_name": "BLIP2" }, { "question": "What series is this waffle maker?", "answer": "classic series", "gt_answers": [ "Brushed Stainless Series", "BRUSHED STAINLESS" ], "image_path": "./data/STVQA/imageNet/n04542943_6049.JPEG", "model_name": "BLIP2" }, { "question": "How many settings is on this waffle maker?", "answer": "4", "gt_answers": [ "5" ], "image_path": "./data/STVQA/imageNet/n04542943_6049.JPEG", "model_name": "BLIP2" }, { "question": "What does the watermark read?", "answer": "harry potter", "gt_answers": [ "Ilya Spirin Photography" ], "image_path": "./data/STVQA/VisualGenome/1/2330269.jpg", "model_name": "BLIP2" }, { "question": "What does the large word on the front of the truck say?", "answer": "co", "gt_answers": [ "COPP" ], "image_path": "./data/STVQA/VisualGenome/2/2409185.jpg", "model_name": "BLIP2" }, { "question": "What is the flavor of this Healthy Choice meal?", "answer": "tomato basil", "gt_answers": [ "Fresh Mixers" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013596.jpg", "model_name": "BLIP2" }, { "question": "What flag is up?", "answer": "american flag", "gt_answers": [ "American" ], "image_path": "./data/STVQA/VisualGenome/1/2373784.jpg", "model_name": "BLIP2" }, { "question": "WHAT TYPE OF BUS IS IT", "answer": "school bus", "gt_answers": [ "SCHOOL BUS" ], "image_path": "./data/STVQA/IIIT_text/img_001129.jpg", "model_name": "BLIP2" }, { "question": "WHAT COLOR IS THE BUS", "answer": "yellow", "gt_answers": [ "YELLOW" ], "image_path": "./data/STVQA/IIIT_text/img_001129.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the storefront in red?", "answer": "person", "gt_answers": [ "Snackbar" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000535113.jpg", "model_name": "BLIP2" }, { "question": "What is the name written on top of the phone number on the trailer?", "answer": "person", "gt_answers": [ "WILLIAM" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000525580.jpg", "model_name": "BLIP2" }, { "question": "what is he playing?", "answer": "baseball", "gt_answers": [ "baseball" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000326628.jpg", "model_name": "BLIP2" }, { "question": "What is the biggest word printed in red on the blue sign?", "answer": "travel", "gt_answers": [ "Unlimited" ], "image_path": "./data/STVQA/icdar/img_525.jpg", "model_name": "BLIP2" }, { "question": "What color is written on the bus sign?", "answer": "blue", "gt_answers": [ "Orange" ], "image_path": "./data/STVQA/VisualGenome/1/2322599.jpg", "model_name": "BLIP2" }, { "question": "what is written on the blue shirt?", "answer": "person", "gt_answers": [ "Legends" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000462944.jpg", "model_name": "BLIP2" }, { "question": "What is shown in the picture?", "answer": "a tube of chapstick", "gt_answers": [ "chapstick", "Chap Stick" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000012544.jpg", "model_name": "BLIP2" }, { "question": "What brand is the camera?", "answer": "nikon", "gt_answers": [ "Kinaflex" ], "image_path": "./data/STVQA/imageNet/n04069434_296.JPEG", "model_name": "BLIP2" }, { "question": "What street?", "answer": "50 street", "gt_answers": [ "50" ], "image_path": "./data/STVQA/VisualGenome/1/2324824.jpg", "model_name": "BLIP2" }, { "question": "what is written in black?", "answer": "horizon", "gt_answers": [ "Horizon" ], "image_path": "./data/STVQA/VisualGenome/1/2347325.jpg", "model_name": "BLIP2" }, { "question": "What website is address does the red text show?", "answer": "http://www", "gt_answers": [ "CVS.com" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000002753.jpg", "model_name": "BLIP2" }, { "question": "What is written on the blue snowboard?", "answer": "a name", "gt_answers": [ "GMR AR 158-01" ], "image_path": "./data/STVQA/VisualGenome/1/2317572.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the boat?", "answer": "dorset", "gt_answers": [ "Doretta L" ], "image_path": "./data/STVQA/VisualGenome/1/2331393.jpg", "model_name": "BLIP2" }, { "question": "Who has copyright over this image?", "answer": "the photographer", "gt_answers": [ "Bridget Samuels" ], "image_path": "./data/STVQA/VisualGenome/2/2415485.jpg", "model_name": "BLIP2" }, { "question": "What year was it copyrighted?", "answer": "1976", "gt_answers": [ "2012" ], "image_path": "./data/STVQA/VisualGenome/2/2415485.jpg", "model_name": "BLIP2" }, { "question": "what is the name of the business?", "answer": "beers on call", "gt_answers": [ "Geeks on call" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000433397.jpg", "model_name": "BLIP2" }, { "question": "What is this advertisement for?", "answer": "waste management", "gt_answers": [ "wastetech", "PHS WASTETECH" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000045494.jpg", "model_name": "BLIP2" }, { "question": "What is phone number for Wasteetch?", "answer": "030 333 0000", "gt_answers": [ "0870 241 7491" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000045494.jpg", "model_name": "BLIP2" }, { "question": "Who posted this picture?", "answer": "person", "gt_answers": [ "FANCYSHOTS.NL" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000127924.jpg", "model_name": "BLIP2" }, { "question": "What establishment name is written at the very top of the taller sign?", "answer": "rafael street", "gt_answers": [ "CENTRAL PARK" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000545310.jpg", "model_name": "BLIP2" }, { "question": "What opportunity is indicated lower on the tall green sign?", "answer": "free parking", "gt_answers": [ "PARKING" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000545310.jpg", "model_name": "BLIP2" }, { "question": "What is the name indicated on the lower sign with the white background, on the foreground face of that sign?", "answer": " ", "gt_answers": [ "BOGRASHOV" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000545310.jpg", "model_name": "BLIP2" }, { "question": "What is written on the white sign?", "answer": "sams club", "gt_answers": [ "Sale" ], "image_path": "./data/STVQA/icdar/test_img_154.jpg", "model_name": "BLIP2" }, { "question": "What is the text inside the square?", "answer": "up the pole", "gt_answers": [ "Up the pole" ], "image_path": "./data/STVQA/imageNet/n03976657_26826.JPEG", "model_name": "BLIP2" }, { "question": "which year given", "answer": "2008", "gt_answers": [ "2008" ], "image_path": "./data/STVQA/imageNet/n02119789_16556.JPEG", "model_name": "BLIP2" }, { "question": "What option on the dial is spelled out?", "answer": "shuffle", "gt_answers": [ "MENU" ], "image_path": "./data/STVQA/imageNet/n03584254_244.JPEG", "model_name": "BLIP2" }, { "question": "What is the number of the house?;", "answer": "2", "gt_answers": [ "5041" ], "image_path": "./data/STVQA/VisualGenome/1/2350857.jpg", "model_name": "BLIP2" }, { "question": "what's street name ?", "answer": "broadway", "gt_answers": [ "Broadway", "BROADWAY W 120 ST" ], "image_path": "./data/STVQA/VisualGenome/1/2357281.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the screen of the bus window?", "answer": "person", "gt_answers": [ "3" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000332684.jpg", "model_name": "BLIP2" }, { "question": "what two words are at the top of the black sign?", "answer": "road closed", "gt_answers": [ "area closed" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000110217.jpg", "model_name": "BLIP2" }, { "question": "what word is on the red sign?", "answer": "stop", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000110217.jpg", "model_name": "BLIP2" }, { "question": "when is the area closed?", "answer": "during hunting season", "gt_answers": [ "SUNSET-8AM" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000110217.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the bottom of the case?", "answer": "happy holidays", "gt_answers": [ "She can not swim, nor can I" ], "image_path": "./data/STVQA/imageNet/n03908618_40842.JPEG", "model_name": "BLIP2" }, { "question": "What are the numbers on the bus?", "answer": "the numbers on the bus are person", "gt_answers": [ "792" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000551679.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "person", "gt_answers": [ "702", "792" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000551679.jpg", "model_name": "BLIP2" }, { "question": "What is written on the plane?", "answer": "person", "gt_answers": [ "MABLE CODY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000360352.jpg", "model_name": "BLIP2" }, { "question": "What does Bell holding on her hand?", "answer": "tennis racket", "gt_answers": [ "bell" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000271282.jpg", "model_name": "BLIP2" }, { "question": "What is written at the bottom of the photo?", "answer": "i love you", "gt_answers": [ "J.KHONGHUNPHOTO" ], "image_path": "./data/STVQA/imageNet/n03131574_5704.JPEG", "model_name": "BLIP2" }, { "question": "What is the title of the book shown?", "answer": "eight cousins", "gt_answers": [ "EIGHT COUSINS" ], "image_path": "./data/STVQA/imageNet/n07248320_98323.JPEG", "model_name": "BLIP2" }, { "question": "What author is credited on the book cover?", "answer": "louisa may alcott", "gt_answers": [ "LOUISA MAY ALCOTT" ], "image_path": "./data/STVQA/imageNet/n07248320_98323.JPEG", "model_name": "BLIP2" }, { "question": "What number is on the dug out?", "answer": "person", "gt_answers": [ "340" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000234234.jpg", "model_name": "BLIP2" }, { "question": "What is the number written the most upper left on the sign behind the person?", "answer": "4", "gt_answers": [ "3071" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000353754.jpg", "model_name": "BLIP2" }, { "question": "What number is written in the bottom right side?", "answer": "1", "gt_answers": [ "23/4-07" ], "image_path": "./data/STVQA/imageNet/n02094114_1658.JPEG", "model_name": "BLIP2" }, { "question": "What is the name showing between the dog's paws?", "answer": "alexander", "gt_answers": [ "Asbjorn" ], "image_path": "./data/STVQA/imageNet/n02094114_1658.JPEG", "model_name": "BLIP2" }, { "question": "What district is the Police car from?", "answer": "4th district", "gt_answers": [ "3rd" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000107011.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the right side of the bumper of the Police car?", "answer": "57", "gt_answers": [ "3071" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000107011.jpg", "model_name": "BLIP2" }, { "question": "What two name brands are printed on the wall?", "answer": "person and person", "gt_answers": [ "adidas FedEx" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000256057.jpg", "model_name": "BLIP2" }, { "question": "Which airline is shown on the tail?", "answer": "all nippon airlines", "gt_answers": [ "ANA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000481049.jpg", "model_name": "BLIP2" }, { "question": "What is the plane's identifying number?", "answer": "person", "gt_answers": [ "N7B7EX", "N787EX" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000481049.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the TV station shown on the sign at the top?", "answer": "fox sports", "gt_answers": [ "FSN" ], "image_path": "./data/STVQA/VisualGenome/1/2317417.jpg", "model_name": "BLIP2" }, { "question": "What is the number of this bus?", "answer": "3", "gt_answers": [ "6" ], "image_path": "./data/STVQA/VisualGenome/1/2319281.jpg", "model_name": "BLIP2" }, { "question": "What is the color of the line that the train on the left is on?", "answer": "red", "gt_answers": [ "Orange line", "ORANGE" ], "image_path": "./data/STVQA/VisualGenome/1/2345629.jpg", "model_name": "BLIP2" }, { "question": "What is written on the pink shield", "answer": "samsung", "gt_answers": [ "LIVE THE LIFE YOU LOVE" ], "image_path": "./data/STVQA/icdar/img_467.jpg", "model_name": "BLIP2" }, { "question": "How many chicks are there?", "answer": "three", "gt_answers": [ "Three" ], "image_path": "./data/STVQA/IIIT_text/7026.jpg", "model_name": "BLIP2" }, { "question": "What color is the middle chick?", "answer": "black", "gt_answers": [ "Black" ], "image_path": "./data/STVQA/IIIT_text/7026.jpg", "model_name": "BLIP2" }, { "question": "What kind of candle is it?", "answer": "a wedding candle", "gt_answers": [ "Marry Candle" ], "image_path": "./data/STVQA/imageNet/n02948072_11043.JPEG", "model_name": "BLIP2" }, { "question": "What is the word at the top?", "answer": "crystal", "gt_answers": [ "Cristal" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000028574.jpg", "model_name": "BLIP2" }, { "question": "what is the sign of board reference?", "answer": "", "gt_answers": [ "500 atp world" ], "image_path": "./data/STVQA/VisualGenome/1/2320398.jpg", "model_name": "BLIP2" }, { "question": "What is the team name on the batter's jersey?", "answer": "orioles", "gt_answers": [ "orioles" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000382695.jpg", "model_name": "BLIP2" }, { "question": "What is the name of this airline?", "answer": "air canada", "gt_answers": [ "Air Transat" ], "image_path": "./data/STVQA/VisualGenome/1/2323834.jpg", "model_name": "BLIP2" }, { "question": "what is the number above the bench?", "answer": "3", "gt_answers": [ "340" ], "image_path": "./data/STVQA/VisualGenome/1/2373871.jpg", "model_name": "BLIP2" }, { "question": "this is a toyota what?", "answer": "4runner", "gt_answers": [ "TUNDRA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000125377.jpg", "model_name": "BLIP2" }, { "question": "What does it say under the blue house on the white remote?", "answer": "dvr", "gt_answers": [ "HOME" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000422343.jpg", "model_name": "BLIP2" }, { "question": "what is the number in the red t-shirt player who is standing behind yellow t-shirt in the field?", "answer": "4", "gt_answers": [ "8" ], "image_path": "./data/STVQA/VisualGenome/1/2317119.jpg", "model_name": "BLIP2" }, { "question": "What avenue is shown?", "answer": "person", "gt_answers": [ "IMIQTAVIK" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000039926.jpg", "model_name": "BLIP2" }, { "question": "What does the orange text say?", "answer": "a man standing in a flooded street", "gt_answers": [ "Wells Fargo" ], "image_path": "./data/STVQA/VisualGenome/1/2342273.jpg", "model_name": "BLIP2" }, { "question": "what is the contact number on the to let board?", "answer": "person", "gt_answers": [ "01244 408200" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000058597.jpg", "model_name": "BLIP2" }, { "question": "what are the numerals written on either side of the clock?", "answer": "roman numerals", "gt_answers": [ "18 97" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000058597.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the top left oft he ad?", "answer": "remove", "gt_answers": [ "Remove" ], "image_path": "./data/STVQA/imageNet/n03109150_8960.JPEG", "model_name": "BLIP2" }, { "question": "What is number listed on the front of the bike?", "answer": "1", "gt_answers": [ "67" ], "image_path": "./data/STVQA/VisualGenome/1/2346985.jpg", "model_name": "BLIP2" }, { "question": "what's flight no.?", "answer": "flight no", "gt_answers": [ "N650KB" ], "image_path": "./data/STVQA/VisualGenome/1/2337307.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the website on this photo?", "answer": "huangshan photography", "gt_answers": [ "www.bbker.com" ], "image_path": "./data/STVQA/imageNet/n09468604_16958.JPEG", "model_name": "BLIP2" }, { "question": "What is the title of the book with the house on the cover?", "answer": "person's house", "gt_answers": [ "Little Pink House" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000555339.jpg", "model_name": "BLIP2" }, { "question": "What does the red sign say?", "answer": "stop", "gt_answers": [ "Stop.", "STOP" ], "image_path": "./data/STVQA/VisualGenome/2/2409400.jpg", "model_name": "BLIP2" }, { "question": "What is the speed limit?", "answer": "50 mph", "gt_answers": [ "15" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000160818.jpg", "model_name": "BLIP2" }, { "question": "What is the first phrase on the screen/", "answer": "psp emulator", "gt_answers": [ "Please wait..." ], "image_path": "./data/STVQA/imageNet/n03485407_6288.JPEG", "model_name": "BLIP2" }, { "question": "What drink is on the sign?", "answer": "coca cola", "gt_answers": [ "Coca-cola", "COCA COLA" ], "image_path": "./data/STVQA/VisualGenome/2/1808.jpg", "model_name": "BLIP2" }, { "question": "How much was the bet?", "answer": "the bet was 100 credits", "gt_answers": [ "$0.25" ], "image_path": "./data/STVQA/imageNet/n04243546_29962.JPEG", "model_name": "BLIP2" }, { "question": "How much did the person win?", "answer": "$600", "gt_answers": [ "$2.07" ], "image_path": "./data/STVQA/imageNet/n04243546_29962.JPEG", "model_name": "BLIP2" }, { "question": "What is the license plate number of the black car?", "answer": "person", "gt_answers": [ "HGX 6946" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000511301.jpg", "model_name": "BLIP2" }, { "question": "Where is the Zipcar Street Team located?", "answer": "the zipcar street team is located in new york city", "gt_answers": [ "ON THE CORNER OF WILLIAM PENN PL AND 5TH AVE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000511301.jpg", "model_name": "BLIP2" }, { "question": "What is the car number?", "answer": "no number", "gt_answers": [ "540425" ], "image_path": "./data/STVQA/VisualGenome/1/2331542.jpg", "model_name": "BLIP2" }, { "question": "What kind of soda is shown?", "answer": "coke", "gt_answers": [ "Diet cream soda" ], "image_path": "./data/STVQA/VisualGenome/2/2405907.jpg", "model_name": "BLIP2" }, { "question": "Who makes the soda?", "answer": "coca cola", "gt_answers": [ "HyVee" ], "image_path": "./data/STVQA/VisualGenome/2/2405907.jpg", "model_name": "BLIP2" }, { "question": "What is doing the women?", "answer": "dancing", "gt_answers": [ "Singing" ], "image_path": "./data/STVQA/VisualGenome/1/2339173.jpg", "model_name": "BLIP2" }, { "question": "Where type of blackthorns are sold?", "answer": "in shops", "gt_answers": [ "Irish" ], "image_path": "./data/STVQA/VisualGenome/1/2355129.jpg", "model_name": "BLIP2" }, { "question": "What type of crops are sold?", "answer": "vegetables and fruits", "gt_answers": [ "Riding" ], "image_path": "./data/STVQA/VisualGenome/1/2355129.jpg", "model_name": "BLIP2" }, { "question": "What is the last item listed on the right sign?", "answer": "sticks", "gt_answers": [ "Swordsticks" ], "image_path": "./data/STVQA/VisualGenome/1/2355129.jpg", "model_name": "BLIP2" }, { "question": "What is written on the fender of the motorcycle?", "answer": "person", "gt_answers": [ "Dallas police" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000184397.jpg", "model_name": "BLIP2" }, { "question": "What might you earn instead of real money in this image if you followed the guidance?", "answer": "points", "gt_answers": [ "STARS" ], "image_path": "./data/STVQA/icdar/test_img_155.jpg", "model_name": "BLIP2" }, { "question": "what is the street sign in the middle say?", "answer": "dan whitney street", "gt_answers": [ "14th street" ], "image_path": "./data/STVQA/VisualGenome/1/2343070.jpg", "model_name": "BLIP2" }, { "question": "Which Canadian province is named on the awning?", "answer": "quebec", "gt_answers": [ "QUEBEC" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000553700.jpg", "model_name": "BLIP2" }, { "question": "What is the name of company on the red sign?", "answer": "person", "gt_answers": [ "Big Horn Brewing Co." ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000136832.jpg", "model_name": "BLIP2" }, { "question": "what is on the sign in blue?", "answer": "'do not enter'", "gt_answers": [ "T" ], "image_path": "./data/STVQA/VisualGenome/2/3886.jpg", "model_name": "BLIP2" }, { "question": "What is the name of one sponsor for this event?", "answer": "person", "gt_answers": [ "eastern daily press" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000494721.jpg", "model_name": "BLIP2" }, { "question": "What does it say at the very top of the blue road sign?", "answer": "bangkok international airport", "gt_answers": [ "Rabindra Sadan" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000378155.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the road heading left on the road sign?", "answer": "bangalore road", "gt_answers": [ "Lee Road" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000378155.jpg", "model_name": "BLIP2" }, { "question": "what is the address refer by bus?", "answer": "the address refer by bus", "gt_answers": [ "73 ARMITAGE" ], "image_path": "./data/STVQA/VisualGenome/1/2367541.jpg", "model_name": "BLIP2" }, { "question": "What is written on the pillow?", "answer": "peace", "gt_answers": [ "Peace" ], "image_path": "./data/STVQA/VisualGenome/1/2362220.jpg", "model_name": "BLIP2" }, { "question": "What is written on the yellow board", "answer": "time for a pint", "gt_answers": [ "Time NO TIES PLEASE For a Pint" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000246787.jpg", "model_name": "BLIP2" }, { "question": "What is the text on the red train?", "answer": "sc sc", "gt_answers": [ "DB", "DB 185 334-7" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000310108.jpg", "model_name": "BLIP2" }, { "question": "What European country is this in?", "answer": "germany", "gt_answers": [ "UK" ], "image_path": "./data/STVQA/imageNet/n02088094_4462.JPEG", "model_name": "BLIP2" }, { "question": "What are the letters on the right car?", "answer": "em", "gt_answers": [ "SNCF" ], "image_path": "./data/STVQA/VisualGenome/1/2336513.jpg", "model_name": "BLIP2" }, { "question": "name on the banner with white background", "answer": "on the banner with white background", "gt_answers": [ "thermen lamer" ], "image_path": "./data/STVQA/VisualGenome/1/2355724.jpg", "model_name": "BLIP2" }, { "question": "What district does this school belong to?", "answer": "williamsport school district", "gt_answers": [ "Sterling School District 2", "STERLING SCHOOL" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008177.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the card?", "answer": "john smith", "gt_answers": [ "Oma" ], "image_path": "./data/STVQA/VisualGenome/2/4796.jpg", "model_name": "BLIP2" }, { "question": "What does the sign on the right say?", "answer": "no parking", "gt_answers": [ "Duong pasteur" ], "image_path": "./data/STVQA/VisualGenome/2/2405748.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTEN ON BUS SHELTER ADVERTISEMENT BOARD?", "answer": "nyc subway", "gt_answers": [ "FREE MOVE? FOR REAL." ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000141200.jpg", "model_name": "BLIP2" }, { "question": "what is the name of match box?", "answer": "safety matches", "gt_answers": [ "Safety Matches" ], "image_path": "./data/STVQA/imageNet/n03729826_10893.JPEG", "model_name": "BLIP2" }, { "question": "How many matches does this box contains?", "answer": "50 matches", "gt_answers": [ "50 MATCHES" ], "image_path": "./data/STVQA/imageNet/n03729826_10893.JPEG", "model_name": "BLIP2" }, { "question": "what is the logo on match box?", "answer": "safety matches", "gt_answers": [ "HOME BRAND" ], "image_path": "./data/STVQA/imageNet/n03729826_10893.JPEG", "model_name": "BLIP2" }, { "question": "What is in the middle of the street arch?", "answer": "an arch", "gt_answers": [ "ENCINITAS" ], "image_path": "./data/STVQA/VisualGenome/1/2361083.jpg", "model_name": "BLIP2" }, { "question": "Who runs this bus?", "answer": "philippine national airlines", "gt_answers": [ "DAGUPAN BUS" ], "image_path": "./data/STVQA/VisualGenome/1/2362841.jpg", "model_name": "BLIP2" }, { "question": "What is the bus's number?", "answer": "no number", "gt_answers": [ "71719" ], "image_path": "./data/STVQA/VisualGenome/1/2362841.jpg", "model_name": "BLIP2" }, { "question": "what is the companies name?", "answer": "pink penny", "gt_answers": [ "pink penny" ], "image_path": "./data/STVQA/imageNet/n06359193_58785.JPEG", "model_name": "BLIP2" }, { "question": "what is this a promotion for?", "answer": "pink penny", "gt_answers": [ "A WEDDING ALBUM DESIGN BOUTIQUE" ], "image_path": "./data/STVQA/imageNet/n06359193_58785.JPEG", "model_name": "BLIP2" }, { "question": "what this in bus colour?", "answer": "purple and white", "gt_answers": [ "red", "red & white" ], "image_path": "./data/STVQA/VisualGenome/1/2347902.jpg", "model_name": "BLIP2" }, { "question": "how many number of bus?", "answer": "two", "gt_answers": [ "1" ], "image_path": "./data/STVQA/VisualGenome/1/2347902.jpg", "model_name": "BLIP2" }, { "question": "what is the bus number?", "answer": "t2", "gt_answers": [ "193" ], "image_path": "./data/STVQA/VisualGenome/1/2347902.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the Ave?", "answer": "westlake ave", "gt_answers": [ "Westlake Ave", "WESTLAKE" ], "image_path": "./data/STVQA/VisualGenome/1/2372895.jpg", "model_name": "BLIP2" }, { "question": "where did they buy the coffee?", "answer": "the coffee shop", "gt_answers": [ "Lebkov & Sons Good Food", "LEBKOV" ], "image_path": "./data/STVQA/imageNet/n07920052_15897.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the restaurant?", "answer": "the stock restaurant", "gt_answers": [ "THE SHACK RESTAURANT" ], "image_path": "./data/STVQA/VisualGenome/2/2406980.jpg", "model_name": "BLIP2" }, { "question": "what are the blue letters?", "answer": "the name of the lake", "gt_answers": [ "CMS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000431991.jpg", "model_name": "BLIP2" }, { "question": "what sign is shown below J F Kennedy Bl?", "answer": "no parking sign", "gt_answers": [ "One Way" ], "image_path": "./data/STVQA/VisualGenome/1/2359671.jpg", "model_name": "BLIP2" }, { "question": "what street is pictured?", "answer": "king street", "gt_answers": [ "J F Kennedy Bl" ], "image_path": "./data/STVQA/VisualGenome/1/2359671.jpg", "model_name": "BLIP2" }, { "question": "what is the name of the shop in the photo?", "answer": "no name", "gt_answers": [ "the Piper", "PIPER" ], "image_path": "./data/STVQA/VisualGenome/1/2359671.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the top front of the white bus?", "answer": "school bus", "gt_answers": [ "Mounties" ], "image_path": "./data/STVQA/VisualGenome/1/2353651.jpg", "model_name": "BLIP2" }, { "question": "What color is the boy's shirt?", "answer": "red", "gt_answers": [ "Red and white", "RED" ], "image_path": "./data/STVQA/VisualGenome/1/2322401.jpg", "model_name": "BLIP2" }, { "question": "what is the sign on bus", "answer": "mobile pizza", "gt_answers": [ "lostbrothers" ], "image_path": "./data/STVQA/VisualGenome/1/2370329.jpg", "model_name": "BLIP2" }, { "question": "what is the number of pizzeria", "answer": "two", "gt_answers": [ "908-333-8415" ], "image_path": "./data/STVQA/VisualGenome/1/2370329.jpg", "model_name": "BLIP2" }, { "question": "what is the web address given", "answer": "www.mobilepizza.com", "gt_answers": [ "www.lostbrotherspizza.com" ], "image_path": "./data/STVQA/VisualGenome/1/2370329.jpg", "model_name": "BLIP2" }, { "question": "What type of crossing is this?", "answer": "railroad crossing", "gt_answers": [ "Railroad crossing", "RAILROAD" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000116732.jpg", "model_name": "BLIP2" }, { "question": "What is on the hotdog?", "answer": "ketchup and mustard", "gt_answers": [ "Cheese" ], "image_path": "./data/STVQA/VisualGenome/1/2354946.jpg", "model_name": "BLIP2" }, { "question": "How much would a FETCH DOG cost?", "answer": "$5", "gt_answers": [ "$4.25" ], "image_path": "./data/STVQA/VisualGenome/1/2354946.jpg", "model_name": "BLIP2" }, { "question": "What does the baby's shirt say?", "answer": "i love music", "gt_answers": [ "baby girl" ], "image_path": "./data/STVQA/imageNet/n02834397_24370.JPEG", "model_name": "BLIP2" }, { "question": "Is it going to be a baby boy or baby girl?", "answer": "girl", "gt_answers": [ "baby girl" ], "image_path": "./data/STVQA/imageNet/n02834397_24370.JPEG", "model_name": "BLIP2" }, { "question": "What percentage is the milk?", "answer": "2%", "gt_answers": [ "2%" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000317986.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of milk?", "answer": "person", "gt_answers": [ "Darigold" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000317986.jpg", "model_name": "BLIP2" }, { "question": "What vitamins are in the milk?", "answer": "vitamin d", "gt_answers": [ "Vitamin A & D" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000317986.jpg", "model_name": "BLIP2" }, { "question": "What name is watermarked in the bottom left?", "answer": "person", "gt_answers": [ "Koen Brauwer", "KOEN BROUWER" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000226451.jpg", "model_name": "BLIP2" }, { "question": "What brand is shown in the image?", "answer": "nikon", "gt_answers": [ "Nikon" ], "image_path": "./data/STVQA/imageNet/n03657121_10301.JPEG", "model_name": "BLIP2" }, { "question": "What brand is represented in this picture?", "answer": "nikon", "gt_answers": [ "Nikon" ], "image_path": "./data/STVQA/imageNet/n03657121_10301.JPEG", "model_name": "BLIP2" }, { "question": "What is INFANTE's jersey number?", "answer": "person", "gt_answers": [ "4" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000537543.jpg", "model_name": "BLIP2" }, { "question": "who are the photographers?", "answer": "arkive", "gt_answers": [ "S. and A. Sailer", "S. AND A.SAILER" ], "image_path": "./data/STVQA/imageNet/n02487347_19051.JPEG", "model_name": "BLIP2" }, { "question": "Who is the person on the television?", "answer": "obama", "gt_answers": [ "Barack Obama" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000435584.jpg", "model_name": "BLIP2" }, { "question": "Which animal is this designed for?", "answer": "mouse", "gt_answers": [ "Mouse" ], "image_path": "./data/STVQA/imageNet/n03794056_13377.JPEG", "model_name": "BLIP2" }, { "question": "What company owns this plane?", "answer": "fedex", "gt_answers": [ "FedEx" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000118920.jpg", "model_name": "BLIP2" }, { "question": "What is this?", "answer": "university of technology", "gt_answers": [ "INDIAN INSTITUTE OF TECHNOLOGY" ], "image_path": "./data/STVQA/IIIT_text/img_000042.jpg", "model_name": "BLIP2" }, { "question": "what number is on the building?", "answer": "201", "gt_answers": [ "400" ], "image_path": "./data/STVQA/VisualGenome/2/1490.jpg", "model_name": "BLIP2" }, { "question": "what color is the building?", "answer": "gray", "gt_answers": [ "grey" ], "image_path": "./data/STVQA/VisualGenome/2/1490.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company on the window to the right?", "answer": "jollibee", "gt_answers": [ "Jollibean" ], "image_path": "./data/STVQA/icdar/test_img_89.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store in the picture?", "answer": "jolly bean", "gt_answers": [ "Jollibean" ], "image_path": "./data/STVQA/icdar/test_img_89.jpg", "model_name": "BLIP2" }, { "question": "What is the first word of the description at the bottom of the picture?", "answer": "kitchen", "gt_answers": [ "Back" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000160214.jpg", "model_name": "BLIP2" }, { "question": "What is the last word of the description at the bottom of the picture?", "answer": "person", "gt_answers": [ "section" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000160214.jpg", "model_name": "BLIP2" }, { "question": "what is the title of the photo?", "answer": "the kitchen", "gt_answers": [ "back portion of new section" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000160214.jpg", "model_name": "BLIP2" }, { "question": "Who provided the image?", "answer": "nottingham vet school", "gt_answers": [ "Nottingham Vet School" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000574471.jpg", "model_name": "BLIP2" }, { "question": "what mattress company is on the bears pajamas?", "answer": "person", "gt_answers": [ "Dreams" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000011029.jpg", "model_name": "BLIP2" }, { "question": "What is written on the orange sign with the arrow pointing right?", "answer": "detour", "gt_answers": [ "DETOUR" ], "image_path": "./data/STVQA/VisualGenome/1/2342993.jpg", "model_name": "BLIP2" }, { "question": "what large electrical brand can be seen on a white banner?", "answer": "person", "gt_answers": [ "TOSHIBA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000191850.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number of this car?", "answer": "bc bc", "gt_answers": [ "CC-738-VG" ], "image_path": "./data/STVQA/IIIT_text/4253.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate?", "answer": "a cow", "gt_answers": [ "RJ-27" ], "image_path": "./data/STVQA/VisualGenome/1/2331927.jpg", "model_name": "BLIP2" }, { "question": "What word is in the bottom corner?", "answer": "ostrich", "gt_answers": [ "Selga" ], "image_path": "./data/STVQA/imageNet/n01518878_194.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the blue dustbin", "answer": "swedish for skateboard", "gt_answers": [ "esport" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000477428.jpg", "model_name": "BLIP2" }, { "question": "What does the third line of text on the shirt say?", "answer": "i love frisbee", "gt_answers": [ "I was there!" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000226294.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate on the white car?", "answer": "md md", "gt_answers": [ "1B-4967 OH", "IB-4967-OH" ], "image_path": "./data/STVQA/VisualGenome/2/839.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "person", "gt_answers": [ "255", "218" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000289231.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number on the bus?", "answer": "london ltd", "gt_answers": [ "YX60 DWY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000289231.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number on the bus?", "answer": "cbd 005", "gt_answers": [ "yn03 ncf" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000251922.jpg", "model_name": "BLIP2" }, { "question": "Where is the bus going?", "answer": "to london bridge", "gt_answers": [ "Bellfields" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000251922.jpg", "model_name": "BLIP2" }, { "question": "What brand is the byus?", "answer": "byus", "gt_answers": [ "Arriva" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000251922.jpg", "model_name": "BLIP2" }, { "question": "What is the first word of the yellow lettering?", "answer": "al", "gt_answers": [ "Copyrights" ], "image_path": "./data/STVQA/imageNet/n03729826_4215.JPEG", "model_name": "BLIP2" }, { "question": "where is the departure point of the yellow bus?", "answer": "sydney harbour bridge", "gt_answers": [ "Roosevelt Square" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000443764.jpg", "model_name": "BLIP2" }, { "question": "what is the phone number of River Ride?", "answer": "the phone number of river ride is 030 222 0000", "gt_answers": [ "33-22-555" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000443764.jpg", "model_name": "BLIP2" }, { "question": "What does the pink writing say on the black shirt?", "answer": "drop", "gt_answers": [ "Drop the bomb" ], "image_path": "./data/STVQA/imageNet/n03595614_6106.JPEG", "model_name": "BLIP2" }, { "question": "What is the number written near the front wheel of the vehicle?", "answer": "1", "gt_answers": [ "767" ], "image_path": "./data/STVQA/VisualGenome/1/2347614.jpg", "model_name": "BLIP2" }, { "question": "What is the big word on the hoodie?", "answer": "unification", "gt_answers": [ "Unification" ], "image_path": "./data/STVQA/imageNet/n04370456_6779.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the business written with yellow?", "answer": "pizzeria", "gt_answers": [ "FERRAMENTA COLORI" ], "image_path": "./data/STVQA/VisualGenome/2/3921.jpg", "model_name": "BLIP2" }, { "question": "Who took this picture?", "answer": "person", "gt_answers": [ "RAMESH RASAIYAN" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000341985.jpg", "model_name": "BLIP2" }, { "question": "What does it say on this building?", "answer": "hilton garden inn", "gt_answers": [ "Marriott" ], "image_path": "./data/STVQA/IIIT_text/img_000758.jpg", "model_name": "BLIP2" }, { "question": "What is the name of this location?", "answer": "birmingham square", "gt_answers": [ "Marina Square" ], "image_path": "./data/STVQA/icdar/test_img_107.jpg", "model_name": "BLIP2" }, { "question": "What name is displayed at the top of the truck in this picture?", "answer": "drayage company", "gt_answers": [ "Bender" ], "image_path": "./data/STVQA/imageNet/n04467665_59160.JPEG", "model_name": "BLIP2" }, { "question": "What does it say on the top front of the truck?", "answer": "big tex", "gt_answers": [ "Bender" ], "image_path": "./data/STVQA/imageNet/n04467665_59160.JPEG", "model_name": "BLIP2" }, { "question": "what is the year printed on this photo?", "answer": "1976", "gt_answers": [ "2007" ], "image_path": "./data/STVQA/VisualGenome/1/2352861.jpg", "model_name": "BLIP2" }, { "question": "What number is the second hand pointing to?", "answer": "10", "gt_answers": [ "9" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000414439.jpg", "model_name": "BLIP2" }, { "question": "Which number will the minute hand indicate next?", "answer": "12", "gt_answers": [ "7" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000414439.jpg", "model_name": "BLIP2" }, { "question": "What is the upcoming hour?", "answer": "a teddy bear and a clock", "gt_answers": [ "6" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000414439.jpg", "model_name": "BLIP2" }, { "question": "How far away is Takeshiba Sanbashi?", "answer": "it is a short distance away", "gt_answers": [ "400m" ], "image_path": "./data/STVQA/imageNet/n06794110_1142.JPEG", "model_name": "BLIP2" }, { "question": "How far away is the Subway?", "answer": "5 minutes", "gt_answers": [ "300m" ], "image_path": "./data/STVQA/imageNet/n06794110_1142.JPEG", "model_name": "BLIP2" }, { "question": "How far away is Kyu Shiba-rikyu Gardens?", "answer": "from kyoto station", "gt_answers": [ "190m" ], "image_path": "./data/STVQA/imageNet/n06794110_1142.JPEG", "model_name": "BLIP2" }, { "question": "What is on the bus?", "answer": "passengers", "gt_answers": [ "NYPD" ], "image_path": "./data/STVQA/VisualGenome/2/630.jpg", "model_name": "BLIP2" }, { "question": "What letters are above the red door?", "answer": "t o p", "gt_answers": [ "HSW" ], "image_path": "./data/STVQA/VisualGenome/1/2367835.jpg", "model_name": "BLIP2" }, { "question": "What number is above the red door?", "answer": "3", "gt_answers": [ "17" ], "image_path": "./data/STVQA/VisualGenome/1/2367835.jpg", "model_name": "BLIP2" }, { "question": "What kind of FOOD is it?", "answer": "fish and chips", "gt_answers": [ "FAST", "fast food" ], "image_path": "./data/STVQA/VisualGenome/1/2367835.jpg", "model_name": "BLIP2" }, { "question": "what does the shirt say?", "answer": "i'm a character", "gt_answers": [ "I'M A CHARACTER @ #140CONF" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000136963.jpg", "model_name": "BLIP2" }, { "question": "What is written on the banana?", "answer": "welcome home", "gt_answers": [ "Welcome Home Derek", "WELCOME HOME OERGY" ], "image_path": "./data/STVQA/VisualGenome/1/2366474.jpg", "model_name": "BLIP2" }, { "question": "when was the photo taken?", "answer": "the photo was taken", "gt_answers": [ "08/03/2008" ], "image_path": "./data/STVQA/imageNet/n04493381_10628.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the back of the black tshirt?", "answer": "i love you", "gt_answers": [ "Tuska" ], "image_path": "./data/STVQA/VisualGenome/1/2336626.jpg", "model_name": "BLIP2" }, { "question": "What is the license plate number?", "answer": "mk ii", "gt_answers": [ "OTL 267" ], "image_path": "./data/STVQA/IIIT_text/4346.jpg", "model_name": "BLIP2" }, { "question": "What is their team name", "answer": "person", "gt_answers": [ "GV" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000296614.jpg", "model_name": "BLIP2" }, { "question": "What does the bold type in the ticket in the advertisement on the left say?", "answer": "ice hockey", "gt_answers": [ "Backyard Fun" ], "image_path": "./data/STVQA/imageNet/n04019541_51874.JPEG", "model_name": "BLIP2" }, { "question": "What is the name on the back of the grey jersey of the hockey player on the right?", "answer": "eddie giroux", "gt_answers": [ "Wilson" ], "image_path": "./data/STVQA/imageNet/n04019541_51874.JPEG", "model_name": "BLIP2" }, { "question": "What is the name on the grey jersey?", "answer": "nhl", "gt_answers": [ "Wilson" ], "image_path": "./data/STVQA/imageNet/n04019541_51874.JPEG", "model_name": "BLIP2" }, { "question": "What is the name on the Black jersey?", "answer": "nhl", "gt_answers": [ "Alzner" ], "image_path": "./data/STVQA/imageNet/n04019541_51874.JPEG", "model_name": "BLIP2" }, { "question": "Where is this bus going?", "answer": "to the mountains", "gt_answers": [ "Ottawa" ], "image_path": "./data/STVQA/VisualGenome/1/2340248.jpg", "model_name": "BLIP2" }, { "question": "What number fence is the horse jumping over?", "answer": "4", "gt_answers": [ "4" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000246345.jpg", "model_name": "BLIP2" }, { "question": "What is the first name of the company hosting this event?", "answer": "person", "gt_answers": [ "DODSON" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000246345.jpg", "model_name": "BLIP2" }, { "question": "What word is in bold letters on the paper in the picture?", "answer": "star", "gt_answers": [ "Carving" ], "image_path": "./data/STVQA/imageNet/n02966687_6334.JPEG", "model_name": "BLIP2" }, { "question": "What is the warning sign instruction?", "answer": "do not cross tracks", "gt_answers": [ "Use Extreme Caution" ], "image_path": "./data/STVQA/VisualGenome/1/2316172.jpg", "model_name": "BLIP2" }, { "question": "What does the red and white sign say?", "answer": "bicycle tracks", "gt_answers": [ "No Parking Any Time" ], "image_path": "./data/STVQA/VisualGenome/1/2316172.jpg", "model_name": "BLIP2" }, { "question": "What type of roads are the signs depicting?", "answer": "tracks", "gt_answers": [ "One Way" ], "image_path": "./data/STVQA/VisualGenome/1/2316172.jpg", "model_name": "BLIP2" }, { "question": "What is the text at the bottom?", "answer": "luxor temple", "gt_answers": [ "Phistoria.net" ], "image_path": "./data/STVQA/imageNet/n03788195_7835.JPEG", "model_name": "BLIP2" }, { "question": "What does the sign say on the green pole?", "answer": "stockholm metro", "gt_answers": [ "0-32a" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000050366.jpg", "model_name": "BLIP2" }, { "question": "who owns the bus?", "answer": "havit", "gt_answers": [ "hyatt regency", "HYATT" ], "image_path": "./data/STVQA/VisualGenome/1/2326553.jpg", "model_name": "BLIP2" }, { "question": "What is written on the green wall?", "answer": "baseball", "gt_answers": [ "WLMI 92.9" ], "image_path": "./data/STVQA/VisualGenome/2/2411130.jpg", "model_name": "BLIP2" }, { "question": "What does the largest word on the computer screen say?", "answer": "open", "gt_answers": [ "Think" ], "image_path": "./data/STVQA/VisualGenome/1/2347730.jpg", "model_name": "BLIP2" }, { "question": "What is the common phrase on all three pieces of clothing?", "answer": "go team go", "gt_answers": [ "Go Team Go" ], "image_path": "./data/STVQA/imageNet/n04019541_25.JPEG", "model_name": "BLIP2" }, { "question": "What is written on objects ?", "answer": "go team go", "gt_answers": [ "Go team go" ], "image_path": "./data/STVQA/imageNet/n04019541_25.JPEG", "model_name": "BLIP2" }, { "question": "What type of animal is shown?", "answer": "dog", "gt_answers": [ "Dog" ], "image_path": "./data/STVQA/imageNet/n02099601_8698.JPEG", "model_name": "BLIP2" }, { "question": "What are the first two words in the box?", "answer": "black and white", "gt_answers": [ "Loading Zoom" ], "image_path": "./data/STVQA/imageNet/n04026417_9739.JPEG", "model_name": "BLIP2" }, { "question": "What two words are listed after the comma in the box?", "answer": "black and white", "gt_answers": [ "please wait" ], "image_path": "./data/STVQA/imageNet/n04026417_9739.JPEG", "model_name": "BLIP2" }, { "question": "WHAT IS THE WEBSITE SHOWN IN THE IMAGE", "answer": "taylormadejewelry", "gt_answers": [ "www.islandgems.biz" ], "image_path": "./data/STVQA/imageNet/n03814906_32536.JPEG", "model_name": "BLIP2" }, { "question": "what is the photographer name", "answer": "silvia tyler", "gt_answers": [ "gladstone taylor" ], "image_path": "./data/STVQA/imageNet/n03814906_32536.JPEG", "model_name": "BLIP2" }, { "question": "What is the license plate of the bus?", "answer": "el santo", "gt_answers": [ "BSL523", "BSL 523" ], "image_path": "./data/STVQA/VisualGenome/1/2358850.jpg", "model_name": "BLIP2" }, { "question": "Where is the bus going?", "answer": "to the next stop", "gt_answers": [ "Flete-Far" ], "image_path": "./data/STVQA/VisualGenome/1/2358850.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the restaurant?", "answer": "chinese restaurant", "gt_answers": [ "La Roca" ], "image_path": "./data/STVQA/VisualGenome/1/2358850.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the book store?", "answer": "the university bookstore", "gt_answers": [ "Sussex University Bookshop" ], "image_path": "./data/STVQA/VisualGenome/1/2368229.jpg", "model_name": "BLIP2" }, { "question": "What is the train number?", "answer": "person", "gt_answers": [ "238" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000014925.jpg", "model_name": "BLIP2" }, { "question": "What brand is labeled on the awning sign?", "answer": "samsung", "gt_answers": [ "XEROX" ], "image_path": "./data/STVQA/VisualGenome/1/2328044.jpg", "model_name": "BLIP2" }, { "question": "What brand is advertised with two x's in the name?", "answer": "xtra", "gt_answers": [ "XEROX" ], "image_path": "./data/STVQA/VisualGenome/1/2328044.jpg", "model_name": "BLIP2" }, { "question": "what is written on the plane", "answer": "glider", "gt_answers": [ "RN 0-1842" ], "image_path": "./data/STVQA/VisualGenome/1/2367950.jpg", "model_name": "BLIP2" }, { "question": "What can be found above the keyboard?", "answer": "mouse", "gt_answers": [ "user guide", "IMAC USER GUIDE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000128074.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the brand of the vending machine on the left?", "answer": "coca cola", "gt_answers": [ "Coca Cola" ], "image_path": "./data/STVQA/imageNet/n04525305_10434.JPEG", "model_name": "BLIP2" }, { "question": "Who is one of the sponsors of this baseball field?", "answer": "person", "gt_answers": [ "mastercard" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000557944.jpg", "model_name": "BLIP2" }, { "question": "What is the number on Ichird's jersey?", "answer": "person", "gt_answers": [ "51" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000557944.jpg", "model_name": "BLIP2" }, { "question": "What is the name on #51's jersey?", "answer": "person", "gt_answers": [ "ichird" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000557944.jpg", "model_name": "BLIP2" }, { "question": "What is the name associated with the picture?", "answer": "cobra snake", "gt_answers": [ "Kees de Vries" ], "image_path": "./data/STVQA/imageNet/n01748264_7516.JPEG", "model_name": "BLIP2" }, { "question": "What is written with white on the building", "answer": "garbage", "gt_answers": [ "Open Pantry" ], "image_path": "./data/STVQA/VisualGenome/1/2334707.jpg", "model_name": "BLIP2" }, { "question": "What are they selling?", "answer": "shoes", "gt_answers": [ "Shoes", "VEGETARIAN SHOES" ], "image_path": "./data/STVQA/imageNet/n04200800_20245.JPEG", "model_name": "BLIP2" }, { "question": "What is the word on the red sign?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/2/3574.jpg", "model_name": "BLIP2" }, { "question": "What should you do when you come upon the Red Sign?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/2/3574.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the locomotive?", "answer": "gsf", "gt_answers": [ "1566" ], "image_path": "./data/STVQA/VisualGenome/2/2416332.jpg", "model_name": "BLIP2" }, { "question": "What does the octagonal red sign say?", "answer": "all way stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2365012.jpg", "model_name": "BLIP2" }, { "question": "Which brand of car is this?", "answer": "land rover", "gt_answers": [ "Land Rover" ], "image_path": "./data/STVQA/imageNet/n02814533_77309.JPEG", "model_name": "BLIP2" }, { "question": "What's the automaker of this car?", "answer": "land rover", "gt_answers": [ "Land Rover" ], "image_path": "./data/STVQA/imageNet/n02814533_77309.JPEG", "model_name": "BLIP2" }, { "question": "What's the car plate of this car?", "answer": "ldx 0", "gt_answers": [ "Y591MJX" ], "image_path": "./data/STVQA/imageNet/n02814533_77309.JPEG", "model_name": "BLIP2" }, { "question": "What is the number on the left side of the bus?", "answer": "the number of the bus", "gt_answers": [ "440" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000099844.jpg", "model_name": "BLIP2" }, { "question": "What are the last two digits of the white truck's license plate?", "answer": "28", "gt_answers": [ "22" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000266914.jpg", "model_name": "BLIP2" }, { "question": "What color is the tennis players shorts?", "answer": "blue", "gt_answers": [ "Green", "Green Black" ], "image_path": "./data/STVQA/VisualGenome/1/2347113.jpg", "model_name": "BLIP2" }, { "question": "What color is the ball boys shirt?", "answer": "blue", "gt_answers": [ "Red", "Red White" ], "image_path": "./data/STVQA/VisualGenome/1/2347113.jpg", "model_name": "BLIP2" }, { "question": "Whats the main color is the tennis players shirt?", "answer": "blue", "gt_answers": [ "White" ], "image_path": "./data/STVQA/VisualGenome/1/2347113.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the black disc?", "answer": "nikon", "gt_answers": [ "Nikon" ], "image_path": "./data/STVQA/imageNet/n03657121_1520.JPEG", "model_name": "BLIP2" }, { "question": "What word is written in yellow?", "answer": "stop", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2355665.jpg", "model_name": "BLIP2" }, { "question": "What is written above the red rectangle?", "answer": "a man giving a presentation", "gt_answers": [ "Parent A" ], "image_path": "./data/STVQA/VisualGenome/1/2360285.jpg", "model_name": "BLIP2" }, { "question": "What is written above the green rectangle?", "answer": "a man giving a presentation", "gt_answers": [ "Parent B" ], "image_path": "./data/STVQA/VisualGenome/1/2360285.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the fitbess club?", "answer": "fitbess club", "gt_answers": [ "Elite", "Elite Fitness Club" ], "image_path": "./data/STVQA/VisualGenome/2/3272.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the restaurant?", "answer": "tasty", "gt_answers": [ "ELITE restaurant" ], "image_path": "./data/STVQA/VisualGenome/2/3272.jpg", "model_name": "BLIP2" }, { "question": "What is the word shown on the left-hand side?", "answer": "person", "gt_answers": [ "Liberte" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000159887.jpg", "model_name": "BLIP2" }, { "question": "What does the text in the bottom right corner of the picture say?", "answer": "brushing your teeth", "gt_answers": [ "exposed Ismar Badzic by", "EXPOSED BY ISMAR BADZIC" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000284911.jpg", "model_name": "BLIP2" }, { "question": "What numbers can be seen on the score board behind the player?", "answer": "0", "gt_answers": [ "5 3" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000396534.jpg", "model_name": "BLIP2" }, { "question": "What logo can be scene on the right side of the man's shits", "answer": "person", "gt_answers": [ "UNI QLO", "UNIQLO" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000396534.jpg", "model_name": "BLIP2" }, { "question": "What can you get 6 of for $5?", "answer": "donuts", "gt_answers": [ "Donuts" ], "image_path": "./data/STVQA/VisualGenome/2/2404764.jpg", "model_name": "BLIP2" }, { "question": "Which traffic sign is this?", "answer": "stop sign", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2339855.jpg", "model_name": "BLIP2" }, { "question": "What number is this train?", "answer": "2", "gt_answers": [ "187 019-5" ], "image_path": "./data/STVQA/VisualGenome/2/2412509.jpg", "model_name": "BLIP2" }, { "question": "What country is this plane for?", "answer": "ukraine", "gt_answers": [ "Ukraine" ], "image_path": "./data/STVQA/VisualGenome/1/2350985.jpg", "model_name": "BLIP2" }, { "question": "What is the brand name on top of the speedometer?", "answer": "adc", "gt_answers": [ "Adecco" ], "image_path": "./data/STVQA/VisualGenome/1/2324464.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTENON THE WALL", "answer": "first st metro", "gt_answers": [ "FIRST ST METRO" ], "image_path": "./data/STVQA/VisualGenome/1/2348016.jpg", "model_name": "BLIP2" }, { "question": "Who owns the copyright for this picture?", "answer": "the photographer", "gt_answers": [ "G. Frank Peterson" ], "image_path": "./data/STVQA/VisualGenome/2/2406209.jpg", "model_name": "BLIP2" }, { "question": "What words are written in yellow on the pavement?", "answer": "bus stop", "gt_answers": [ "BUS STOP" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000247346.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the game?", "answer": "pompeii", "gt_answers": [ "Pompeii" ], "image_path": "./data/STVQA/imageNet/n04243546_24990.JPEG", "model_name": "BLIP2" }, { "question": "What year was this photo copyrighted?", "answer": "2010", "gt_answers": [ "2008" ], "image_path": "./data/STVQA/imageNet/n02011460_1841.JPEG", "model_name": "BLIP2" }, { "question": "What are the last two numbers?", "answer": "2", "gt_answers": [ "94" ], "image_path": "./data/STVQA/imageNet/n03947888_38031.JPEG", "model_name": "BLIP2" }, { "question": "What is the 3 digit number written at the bottom of the memorial?", "answer": "2", "gt_answers": [ "346" ], "image_path": "./data/STVQA/imageNet/n02892201_5769.JPEG", "model_name": "BLIP2" }, { "question": "What is the third letter in the sign?", "answer": "o", "gt_answers": [ "O" ], "image_path": "./data/STVQA/VisualGenome/2/2409078.jpg", "model_name": "BLIP2" }, { "question": "What is the address of the website listed?", "answer": "com", "gt_answers": [ "WWW.DX01.COM" ], "image_path": "./data/STVQA/imageNet/n04204238_6992.JPEG", "model_name": "BLIP2" }, { "question": "What are the first three numbers on the bottom row?", "answer": "0", "gt_answers": [ "577" ], "image_path": "./data/STVQA/imageNet/n04204238_6992.JPEG", "model_name": "BLIP2" }, { "question": "What are the last three numbers on the bottom row?", "answer": "0", "gt_answers": [ "582" ], "image_path": "./data/STVQA/imageNet/n04204238_6992.JPEG", "model_name": "BLIP2" }, { "question": "What is the license plate number on the red car?", "answer": "st john", "gt_answers": [ "GSV 820" ], "image_path": "./data/STVQA/IIIT_text/4348.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the airplane?", "answer": "boeing", "gt_answers": [ "continental" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000409020.jpg", "model_name": "BLIP2" }, { "question": "what is the word before noble?", "answer": "noble", "gt_answers": [ "barnes" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000241003.jpg", "model_name": "BLIP2" }, { "question": "What is the word after Barnes?", "answer": "books", "gt_answers": [ "noble" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000241003.jpg", "model_name": "BLIP2" }, { "question": "what is full name on boxes?", "answer": "person", "gt_answers": [ "barnes noble", "BARNES & NOBLE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000241003.jpg", "model_name": "BLIP2" }, { "question": "What number is the bus?", "answer": "person", "gt_answers": [ "43" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000352019.jpg", "model_name": "BLIP2" }, { "question": "What drink is included as an ingredient for this cake?", "answer": "coffee liqueur", "gt_answers": [ "Coffee" ], "image_path": "./data/STVQA/VisualGenome/1/2344721.jpg", "model_name": "BLIP2" }, { "question": "What word can you see in the picture?", "answer": "mon", "gt_answers": [ "MON" ], "image_path": "./data/STVQA/imageNet/n03710193_279.JPEG", "model_name": "BLIP2" }, { "question": "Is this girl's hair long or short?", "answer": "long", "gt_answers": [ "short" ], "image_path": "./data/STVQA/imageNet/n03595614_12604.JPEG", "model_name": "BLIP2" }, { "question": "What is the top line written on the blue sticker?", "answer": "a parking meter", "gt_answers": [ "Buy Parking Cards at" ], "image_path": "./data/STVQA/VisualGenome/1/2358376.jpg", "model_name": "BLIP2" }, { "question": "What is the 3 digit number on the first line on the white sticker?", "answer": "0", "gt_answers": [ "363" ], "image_path": "./data/STVQA/VisualGenome/1/2358376.jpg", "model_name": "BLIP2" }, { "question": "What is the 5 digit number on the second line on the white sticker?", "answer": "0", "gt_answers": [ "3370" ], "image_path": "./data/STVQA/VisualGenome/1/2358376.jpg", "model_name": "BLIP2" }, { "question": "What does the street sign say?", "answer": "king street", "gt_answers": [ "Peterborough" ], "image_path": "./data/STVQA/IIIT_text/164.jpg", "model_name": "BLIP2" }, { "question": "What shop is to the left of the picture?", "answer": "nike", "gt_answers": [ "Skechers" ], "image_path": "./data/STVQA/icdar/img_368.jpg", "model_name": "BLIP2" }, { "question": "What level is shown in the picture?", "answer": "second floor", "gt_answers": [ "L3" ], "image_path": "./data/STVQA/icdar/img_368.jpg", "model_name": "BLIP2" }, { "question": "What is written in the white oval on the left side?", "answer": "person", "gt_answers": [ "MyProfe" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000048145.jpg", "model_name": "BLIP2" }, { "question": "What is the last letter written in white in the center of an oval?", "answer": "person", "gt_answers": [ "e" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000048145.jpg", "model_name": "BLIP2" }, { "question": "What is the name present in the left back of the car", "answer": "suzuki", "gt_answers": [ "NISSAN" ], "image_path": "./data/STVQA/VisualGenome/1/2349249.jpg", "model_name": "BLIP2" }, { "question": "What is the number in the plate", "answer": "a c", "gt_answers": [ "BETTXA" ], "image_path": "./data/STVQA/VisualGenome/1/2349249.jpg", "model_name": "BLIP2" }, { "question": "What word is written on the green box?", "answer": "computer", "gt_answers": [ "Router" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000254787.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the drivers window?", "answer": "0", "gt_answers": [ "158793" ], "image_path": "./data/STVQA/VisualGenome/1/2329696.jpg", "model_name": "BLIP2" }, { "question": "What is the company name of the left plane?", "answer": "batik air", "gt_answers": [ "Kenya Airways" ], "image_path": "./data/STVQA/VisualGenome/1/2323848.jpg", "model_name": "BLIP2" }, { "question": "What name is displayed on the right plane?", "answer": "batik air", "gt_answers": [ "Martinair Cargo" ], "image_path": "./data/STVQA/VisualGenome/1/2323848.jpg", "model_name": "BLIP2" }, { "question": "What is the first blue letter?", "answer": "a", "gt_answers": [ "s" ], "image_path": "./data/STVQA/VisualGenome/2/2415709.jpg", "model_name": "BLIP2" }, { "question": "What is the name if the sign with a yellow background and red and white logo?", "answer": "canadian tire", "gt_answers": [ "HOME", "HOME HARDWARE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000355786.jpg", "model_name": "BLIP2" }, { "question": "What brand of cars is sold at this dealership?", "answer": "honda", "gt_answers": [ "Honda" ], "image_path": "./data/STVQA/IIIT_text/img_000599.jpg", "model_name": "BLIP2" }, { "question": "Which state appears on the hoodie?", "answer": "person", "gt_answers": [ "alaska" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000275556.jpg", "model_name": "BLIP2" }, { "question": "What word comes next after salty?", "answer": "sweet", "gt_answers": [ "dawg" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000275556.jpg", "model_name": "BLIP2" }, { "question": "What letter is on the neon orange sign?", "answer": "k", "gt_answers": [ "p" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000271862.jpg", "model_name": "BLIP2" }, { "question": "what is the number of this person who is showing his back side?", "answer": "person", "gt_answers": [ "15" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000579179.jpg", "model_name": "BLIP2" }, { "question": "what is written on his T-shirt at back side <", "answer": "person", "gt_answers": [ "WDMSC" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000579179.jpg", "model_name": "BLIP2" }, { "question": "What should you do when you see these signs?", "answer": "look at them", "gt_answers": [ "stop" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000292058.jpg", "model_name": "BLIP2" }, { "question": "What is the bus number?", "answer": "nbsp", "gt_answers": [ "2407", "1407" ], "image_path": "./data/STVQA/VisualGenome/1/1592506.jpg", "model_name": "BLIP2" }, { "question": "What's the namen of the street sign?", "answer": "garden street", "gt_answers": [ "garden st" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000542695.jpg", "model_name": "BLIP2" }, { "question": "What hours on a Saturday allows for 2 hr parking?", "answer": "8 am to 6 pm", "gt_answers": [ "9AM-10PM" ], "image_path": "./data/STVQA/VisualGenome/1/2350138.jpg", "model_name": "BLIP2" }, { "question": "What country is mentioned in the image?", "answer": "usa", "gt_answers": [ "USA" ], "image_path": "./data/STVQA/imageNet/n03127925_4045.JPEG", "model_name": "BLIP2" }, { "question": "What brand is this electronic?", "answer": "sony", "gt_answers": [ "Edirol" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000017342.jpg", "model_name": "BLIP2" }, { "question": "What year is in blue on the bank?", "answer": "1989/90", "gt_answers": [ "1878" ], "image_path": "./data/STVQA/imageNet/n03935335_11776.JPEG", "model_name": "BLIP2" }, { "question": "What does the girl's sweater say?", "answer": "person", "gt_answers": [ "Universita Roma" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000074515.jpg", "model_name": "BLIP2" }, { "question": "What number is the horse?", "answer": "person", "gt_answers": [ "69" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000355830.jpg", "model_name": "BLIP2" }, { "question": "Who copyrighted this image?", "answer": "person", "gt_answers": [ "katy wrathall" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000355830.jpg", "model_name": "BLIP2" }, { "question": "With what format can they edit uncompressed?", "answer": "flv", "gt_answers": [ "ProRes" ], "image_path": "./data/STVQA/imageNet/n04404412_30664.JPEG", "model_name": "BLIP2" }, { "question": "What word is pictured on the large television?", "answer": "maker", "gt_answers": [ "Make" ], "image_path": "./data/STVQA/imageNet/n04404412_30664.JPEG", "model_name": "BLIP2" }, { "question": "The wines of which state are you asked to discover?", "answer": "california", "gt_answers": [ "California", "CALIFORNIA WINES" ], "image_path": "./data/STVQA/VisualGenome/1/2331294.jpg", "model_name": "BLIP2" }, { "question": "What do the paper signs on the motorcycle say?", "answer": "the teddy bears picnic", "gt_answers": [ "No To the Bike Parking Tax" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000234720.jpg", "model_name": "BLIP2" }, { "question": "How many men are standing by the train?", "answer": "two", "gt_answers": [ "2" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000206155.jpg", "model_name": "BLIP2" }, { "question": "Where is the mountain at that the presidential train ascending?", "answer": "the summit of mount person", "gt_answers": [ "Alarcon" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000206155.jpg", "model_name": "BLIP2" }, { "question": "What is the three digit number that forms part of the registration plate of the vehicle in this image?", "answer": "person", "gt_answers": [ "254" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000516189.jpg", "model_name": "BLIP2" }, { "question": "What follows Save NASA on the sign?", "answer": "person", "gt_answers": [ "IMPEACH OBAMA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000192539.jpg", "model_name": "BLIP2" }, { "question": "What court number are they playing on?", "answer": "two", "gt_answers": [ "9" ], "image_path": "./data/STVQA/VisualGenome/1/2345447.jpg", "model_name": "BLIP2" }, { "question": "What is the number of the batter?", "answer": "2", "gt_answers": [ "22" ], "image_path": "./data/STVQA/VisualGenome/2/2413839.jpg", "model_name": "BLIP2" }, { "question": "What is written below the stop sign?", "answer": "no parking", "gt_answers": [ "All Way" ], "image_path": "./data/STVQA/VisualGenome/1/2325110.jpg", "model_name": "BLIP2" }, { "question": "What is the route number displayed next to the bus door?", "answer": "the bus number", "gt_answers": [ "45" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000024489.jpg", "model_name": "BLIP2" }, { "question": "What 5 words are written in red script?", "answer": "flying in an emergency", "gt_answers": [ "Safe flying is no accident", "Safe flying is no accident!" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000373596.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the photgrapher", "answer": "michael mccoy", "gt_answers": [ "scott carpenter" ], "image_path": "./data/STVQA/imageNet/n02018207_24340.JPEG", "model_name": "BLIP2" }, { "question": "To what airline does this plane belong?", "answer": "austrian airlines", "gt_answers": [ "Austrian Airlines", "AUSTRIAN" ], "image_path": "./data/STVQA/VisualGenome/1/2359137.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the white banner with red lettering?", "answer": "person's farm", "gt_answers": [ "KIDS MEALS SPECIALS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000446677.jpg", "model_name": "BLIP2" }, { "question": "What is written on the Blue baseball hat with the yellow bill?", "answer": "person", "gt_answers": [ "CAT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000446677.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the bottle?", "answer": "300", "gt_answers": [ "300" ], "image_path": "./data/STVQA/imageNet/n02823428_8929.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the brewery?", "answer": "person", "gt_answers": [ "The Chiltern Brewery" ], "image_path": "./data/STVQA/imageNet/n02823428_8929.JPEG", "model_name": "BLIP2" }, { "question": "What is the word written in cursive?", "answer": "beer", "gt_answers": [ "Old Ale" ], "image_path": "./data/STVQA/imageNet/n02823428_8929.JPEG", "model_name": "BLIP2" }, { "question": "What is the last name of the photographer, as written on the bottom right?", "answer": "person", "gt_answers": [ "Hayward" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000294199.jpg", "model_name": "BLIP2" }, { "question": "What kind of foods selling here?", "answer": "fried foods", "gt_answers": [ "Turkey legs, Baked potatoes, fresh pizza Spaghetti" ], "image_path": "./data/STVQA/VisualGenome/1/2368696.jpg", "model_name": "BLIP2" }, { "question": "What word in white is visible on the back left ski?", "answer": "fs", "gt_answers": [ "HEAD" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000458475.jpg", "model_name": "BLIP2" }, { "question": "where do you find this chair?", "answer": "barber shop", "gt_answers": [ "barber" ], "image_path": "./data/STVQA/imageNet/n02791124_7207.JPEG", "model_name": "BLIP2" }, { "question": "What kind of Chair is this", "answer": "barber chair", "gt_answers": [ "Barber Chair", "BARBER" ], "image_path": "./data/STVQA/imageNet/n02791124_7207.JPEG", "model_name": "BLIP2" }, { "question": "Where is this bus going?", "answer": "to a store", "gt_answers": [ "CROSSTOWN" ], "image_path": "./data/STVQA/VisualGenome/1/2315803.jpg", "model_name": "BLIP2" }, { "question": "What is the last word on the t-shirt?", "answer": "person", "gt_answers": [ "IT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000226747.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the top plane?", "answer": "number", "gt_answers": [ "718" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000060953.jpg", "model_name": "BLIP2" }, { "question": "What brand is written on the blue sign?", "answer": "person", "gt_answers": [ "Reliance Mobile" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000406770.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the glass door?", "answer": "3", "gt_answers": [ "22" ], "image_path": "./data/STVQA/VisualGenome/2/2411661.jpg", "model_name": "BLIP2" }, { "question": "What word is upside down in the circle on the box to the left?", "answer": "lego", "gt_answers": [ "longwave" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000334568.jpg", "model_name": "BLIP2" }, { "question": "What does the green sign say?", "answer": "winter ville", "gt_answers": [ "WINTER HOLLY LN" ], "image_path": "./data/STVQA/VisualGenome/2/2414324.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the juice that the children are drinking in the photo?", "answer": "apple juice", "gt_answers": [ "HiC" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000119560.jpg", "model_name": "BLIP2" }, { "question": "What organization does this jet belong to?", "answer": "us military", "gt_answers": [ "NAVY" ], "image_path": "./data/STVQA/VisualGenome/2/3890.jpg", "model_name": "BLIP2" }, { "question": "What is the name of this grocery store?", "answer": "market urban market", "gt_answers": [ "Harvest urban market" ], "image_path": "./data/STVQA/VisualGenome/2/2403920.jpg", "model_name": "BLIP2" }, { "question": "What does the third poster on the right with the person say?", "answer": "thank you", "gt_answers": [ "Hyperion" ], "image_path": "./data/STVQA/icdar/test_img_468.jpg", "model_name": "BLIP2" }, { "question": "What is on the soccer jersey?", "answer": "the team name", "gt_answers": [ "Malaysia" ], "image_path": "./data/STVQA/VisualGenome/1/2356514.jpg", "model_name": "BLIP2" }, { "question": "What number is the train?", "answer": "3", "gt_answers": [ "375826" ], "image_path": "./data/STVQA/VisualGenome/1/2349394.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the top shelf in green letters?", "answer": "donuts", "gt_answers": [ "Lee's Donuts" ], "image_path": "./data/STVQA/VisualGenome/1/2339169.jpg", "model_name": "BLIP2" }, { "question": "What Company is on present on the bus?", "answer": "diamond bus", "gt_answers": [ "The detroit bus co.", "Detroit Bus Co." ], "image_path": "./data/STVQA/VisualGenome/1/2369000.jpg", "model_name": "BLIP2" }, { "question": "What brand name is printed on the side of the green can?", "answer": "person", "gt_answers": [ "Mountain Dew" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000098355.jpg", "model_name": "BLIP2" }, { "question": "What location is printed at the top end of the green can?", "answer": "new york city", "gt_answers": [ "NEW YORK" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000098355.jpg", "model_name": "BLIP2" }, { "question": "What six letter word is visible in this picture?", "answer": "baseball", "gt_answers": [ "LAWSON" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000450362.jpg", "model_name": "BLIP2" }, { "question": "What six letter word is visible in this picture?", "answer": "baseball", "gt_answers": [ "Lawson" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000450362.jpg", "model_name": "BLIP2" }, { "question": "What six letter word is visible in this picture?", "answer": "baseball", "gt_answers": [ "lawson" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000450362.jpg", "model_name": "BLIP2" }, { "question": "What is the word on white board", "answer": "xerox", "gt_answers": [ "xerox" ], "image_path": "./data/STVQA/IIIT_text/img_001325.jpg", "model_name": "BLIP2" }, { "question": "What is the name of this building?", "answer": "the yellow house", "gt_answers": [ "Bar Ristorante Tana Della Volpe" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000277321.jpg", "model_name": "BLIP2" }, { "question": "Is the building in the background a school or a hotel?", "answer": "school", "gt_answers": [ "Hotel" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000277321.jpg", "model_name": "BLIP2" }, { "question": "what is written inside the board", "answer": "a street sign", "gt_answers": [ "CARRER DE LA LLIBERTAT" ], "image_path": "./data/STVQA/VisualGenome/1/2355954.jpg", "model_name": "BLIP2" }, { "question": "what is the name of the restaurant", "answer": "the truck is parked in front of a restaurant", "gt_answers": [ "cafenineteen" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000336021.jpg", "model_name": "BLIP2" }, { "question": "what does the black and white sign say?", "answer": "one way", "gt_answers": [ "ONE WAY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000276853.jpg", "model_name": "BLIP2" }, { "question": "what does the big red sign say?", "answer": "one way", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000276853.jpg", "model_name": "BLIP2" }, { "question": "Where is this team located?", "answer": "st louis, missouri", "gt_answers": [ "Springfield" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000484355.jpg", "model_name": "BLIP2" }, { "question": "What country is this beach located in?", "answer": "costa rica", "gt_answers": [ "Costa Rica" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000192795.jpg", "model_name": "BLIP2" }, { "question": "What city is this beach near?", "answer": "santa teresa", "gt_answers": [ "Santa Teresa" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000192795.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the black book on the top shelf with red letters, to the left of the word Comics?", "answer": "superman", "gt_answers": [ "FROM HELL" ], "image_path": "./data/STVQA/imageNet/n06596364_19401.JPEG", "model_name": "BLIP2" }, { "question": "What is the word in yellow beside the book titled \"From Hell\"?", "answer": "hell", "gt_answers": [ "COMICS" ], "image_path": "./data/STVQA/imageNet/n06596364_19401.JPEG", "model_name": "BLIP2" }, { "question": "What does the text in blue say?", "answer": "family portrait", "gt_answers": [ "FAMILY PORTRAIT", "FAMILY PORTAIT" ], "image_path": "./data/STVQA/imageNet/n02510455_14.JPEG", "model_name": "BLIP2" }, { "question": "What is the price for one sign piece?", "answer": "one pound", "gt_answers": [ "15" ], "image_path": "./data/STVQA/IIIT_text/3167.jpg", "model_name": "BLIP2" }, { "question": "What is the price for two sign pieces?", "answer": "the price for two sign pieces", "gt_answers": [ "27" ], "image_path": "./data/STVQA/IIIT_text/3167.jpg", "model_name": "BLIP2" }, { "question": "What letters are to the left of the clock?", "answer": "abc", "gt_answers": [ "ABC" ], "image_path": "./data/STVQA/VisualGenome/1/2377181.jpg", "model_name": "BLIP2" }, { "question": "What is printed on the right side of the clock?", "answer": "serving central oregon", "gt_answers": [ "1240 KC" ], "image_path": "./data/STVQA/VisualGenome/1/2377181.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of watches sold by the store on the left", "answer": "swatch", "gt_answers": [ "Swatch" ], "image_path": "./data/STVQA/IIIT_text/1900.jpg", "model_name": "BLIP2" }, { "question": "What is written on the board?", "answer": "your dad is god", "gt_answers": [ "Fathers Day", "Your Dad Called And he told us what he wants For Fathers Day!" ], "image_path": "./data/STVQA/VisualGenome/1/2326897.jpg", "model_name": "BLIP2" }, { "question": "What is written on the wall?", "answer": "cotton on", "gt_answers": [ "Cotton On" ], "image_path": "./data/STVQA/icdar/img_857.jpg", "model_name": "BLIP2" }, { "question": "What airline is printed on the plane?", "answer": "go air", "gt_answers": [ "Condor" ], "image_path": "./data/STVQA/VisualGenome/1/2362856.jpg", "model_name": "BLIP2" }, { "question": "What is the date shown on the picture?", "answer": "january 1", "gt_answers": [ "Jun 13 2010" ], "image_path": "./data/STVQA/VisualGenome/1/2340368.jpg", "model_name": "BLIP2" }, { "question": "What is the first word in english on the sign?", "answer": "person", "gt_answers": [ "Advance" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000248263.jpg", "model_name": "BLIP2" }, { "question": "What is one street in this intersection?", "answer": "riverdale", "gt_answers": [ "Riverdale Av.", "RIVERDALE AV" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000437597.jpg", "model_name": "BLIP2" }, { "question": "What is one street in this intersection?", "answer": "riverdale", "gt_answers": [ "Riverdale Av.", "RIVERDALE AV" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000437597.jpg", "model_name": "BLIP2" }, { "question": "What instrument is this man playing?", "answer": "harp", "gt_answers": [ "Harp" ], "image_path": "./data/STVQA/imageNet/n03495258_15256.JPEG", "model_name": "BLIP2" }, { "question": "Where is the driver taking the kids?", "answer": "to school", "gt_answers": [ "School" ], "image_path": "./data/STVQA/IIIT_text/img_001080.jpg", "model_name": "BLIP2" }, { "question": "what is the sign of train refer", "answer": "a train refers to a passenger train", "gt_answers": [ "drs" ], "image_path": "./data/STVQA/VisualGenome/1/2321177.jpg", "model_name": "BLIP2" }, { "question": "what is the number given on train", "answer": "number given on train", "gt_answers": [ "37087" ], "image_path": "./data/STVQA/VisualGenome/1/2321177.jpg", "model_name": "BLIP2" }, { "question": "What is written on the side of the tents?", "answer": "tents for sale", "gt_answers": [ "Telefonica" ], "image_path": "./data/STVQA/imageNet/n03792972_6150.JPEG", "model_name": "BLIP2" }, { "question": "Who is the sponsor for the tents?", "answer": "samsung", "gt_answers": [ "Telefonica" ], "image_path": "./data/STVQA/imageNet/n03792972_6150.JPEG", "model_name": "BLIP2" }, { "question": "Where is the white bus going?", "answer": "to the city", "gt_answers": [ "Stephanshorn", "STEPHONSHORN" ], "image_path": "./data/STVQA/imageNet/n04487081_17696.JPEG", "model_name": "BLIP2" }, { "question": "What photography company is listed?", "answer": "adobe stock", "gt_answers": [ "NORTHLINE PHOTOGRAPHY" ], "image_path": "./data/STVQA/VisualGenome/2/2416729.jpg", "model_name": "BLIP2" }, { "question": "What does the word say inside the arrow pointing right?", "answer": "chocolate", "gt_answers": [ "Chocolate" ], "image_path": "./data/STVQA/VisualGenome/1/2371284.jpg", "model_name": "BLIP2" }, { "question": "What does the white top sign say?", "answer": "chocolate", "gt_answers": [ "Herb Caen Way" ], "image_path": "./data/STVQA/VisualGenome/1/2371284.jpg", "model_name": "BLIP2" }, { "question": "What number is the bus?", "answer": "4", "gt_answers": [ "23" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000403534.jpg", "model_name": "BLIP2" }, { "question": "What type of bus is this?", "answer": "school bus", "gt_answers": [ "PRIVATE bus", "PRIVATE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000403534.jpg", "model_name": "BLIP2" }, { "question": "What number is this bus known as?", "answer": "person", "gt_answers": [ "23" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000403534.jpg", "model_name": "BLIP2" }, { "question": "What flavor are the captain's wafers pictured in the photo?", "answer": "vanilla", "gt_answers": [ "Sour cream and chives." ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000012918.jpg", "model_name": "BLIP2" }, { "question": "What brand are the crackers?", "answer": "ritz", "gt_answers": [ "Lance" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000012918.jpg", "model_name": "BLIP2" }, { "question": "how man players are there", "answer": "two", "gt_answers": [ "one" ], "image_path": "./data/STVQA/VisualGenome/1/2364854.jpg", "model_name": "BLIP2" }, { "question": "what it the name of the sponsor of this event?", "answer": "person", "gt_answers": [ "INTERSPORT" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000270123.jpg", "model_name": "BLIP2" }, { "question": "who is the sponsor in red on the skiers uniform?", "answer": "person", "gt_answers": [ "CC ATH" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000270123.jpg", "model_name": "BLIP2" }, { "question": "how many miles does the display view", "answer": "how many miles does the display view", "gt_answers": [ "10001" ], "image_path": "./data/STVQA/imageNet/n03841143_24382.JPEG", "model_name": "BLIP2" }, { "question": "What time was this photo taken?", "answer": "10 am", "gt_answers": [ "05:38" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000029687.jpg", "model_name": "BLIP2" }, { "question": "What month and day was this photo taken?", "answer": "september 1st", "gt_answers": [ "04/18" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000029687.jpg", "model_name": "BLIP2" }, { "question": "In which year was this photo taken?", "answer": "2012", "gt_answers": [ "2012" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000029687.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the blue boat?", "answer": "the blue boat", "gt_answers": [ "El Pirate" ], "image_path": "./data/STVQA/VisualGenome/1/2351012.jpg", "model_name": "BLIP2" }, { "question": "What kind of potion is this?", "answer": "love potion", "gt_answers": [ "LOVE" ], "image_path": "./data/STVQA/imageNet/n03062245_3384.JPEG", "model_name": "BLIP2" }, { "question": "What team does the red jersey belong to?", "answer": "miami heat", "gt_answers": [ "Heat" ], "image_path": "./data/STVQA/imageNet/n03595614_23406.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the street crossing Wall St?", "answer": "wall street", "gt_answers": [ "Broadway" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000428991.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street crossing Broadway St?", "answer": "wall street", "gt_answers": [ "Wall" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000428991.jpg", "model_name": "BLIP2" }, { "question": "What is the title of the watermark written on the lower right hand corner of the image?", "answer": "concorde", "gt_answers": [ "BLPhotography" ], "image_path": "./data/STVQA/VisualGenome/1/2335334.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the business?", "answer": "mike's market", "gt_answers": [ "Mike's Market" ], "image_path": "./data/STVQA/VisualGenome/1/2351663.jpg", "model_name": "BLIP2" }, { "question": "What is written on the key left of the missing ones?", "answer": "microsoft", "gt_answers": [ "Alt" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000462224.jpg", "model_name": "BLIP2" }, { "question": "what is written next to the Windows symbol?", "answer": "microsoft", "gt_answers": [ "start" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000462224.jpg", "model_name": "BLIP2" }, { "question": "What number has the key above the star?", "answer": "1", "gt_answers": [ "3" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000462224.jpg", "model_name": "BLIP2" }, { "question": "What city is on the shirt?", "answer": "new york city", "gt_answers": [ "London" ], "image_path": "./data/STVQA/IIIT_text/3175.jpg", "model_name": "BLIP2" }, { "question": "What number is on the ball player's shirt?", "answer": "2", "gt_answers": [ "51" ], "image_path": "./data/STVQA/VisualGenome/1/2367336.jpg", "model_name": "BLIP2" }, { "question": "what is written in the stadium", "answer": "person", "gt_answers": [ "KIA" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000313541.jpg", "model_name": "BLIP2" }, { "question": "What is the word written under the puzzle?", "answer": "bridge", "gt_answers": [ "HEIDELBERG" ], "image_path": "./data/STVQA/imageNet/n03598930_2773.JPEG", "model_name": "BLIP2" }, { "question": "What is yellow ship no.?", "answer": "boat", "gt_answers": [ "RX60" ], "image_path": "./data/STVQA/VisualGenome/1/1159823.jpg", "model_name": "BLIP2" }, { "question": "What is brown ship no.?", "answer": "boat", "gt_answers": [ "RX55" ], "image_path": "./data/STVQA/VisualGenome/1/1159823.jpg", "model_name": "BLIP2" }, { "question": "What number is on the red plate on the front of the train?", "answer": "santa fe", "gt_answers": [ "2472" ], "image_path": "./data/STVQA/VisualGenome/1/2327621.jpg", "model_name": "BLIP2" }, { "question": "What brand is on the can?", "answer": "crush", "gt_answers": [ "Crush Orange" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000483530.jpg", "model_name": "BLIP2" }, { "question": "What color is the woman's shirt?", "answer": "blue", "gt_answers": [ "Pink" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000159069.jpg", "model_name": "BLIP2" }, { "question": "What color is the woman's skirt?", "answer": "red", "gt_answers": [ "Brown" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000159069.jpg", "model_name": "BLIP2" }, { "question": "What color is the moped?", "answer": "red", "gt_answers": [ "Red" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000159069.jpg", "model_name": "BLIP2" }, { "question": "What are the three letters still visible on the street sign?", "answer": "stop", "gt_answers": [ "TOP" ], "image_path": "./data/STVQA/VisualGenome/1/2366705.jpg", "model_name": "BLIP2" }, { "question": "What music studio is this?", "answer": "universal music group", "gt_answers": [ "Universal Music" ], "image_path": "./data/STVQA/IIIT_text/1302.jpg", "model_name": "BLIP2" }, { "question": "universal Music is a subsidiary of what company?", "answer": "universal entertainment group", "gt_answers": [ "Universal" ], "image_path": "./data/STVQA/IIIT_text/1302.jpg", "model_name": "BLIP2" }, { "question": "Who works in this build?", "answer": "police officers", "gt_answers": [ "Police" ], "image_path": "./data/STVQA/IIIT_text/img_000923.jpg", "model_name": "BLIP2" }, { "question": "What does it say on the purple shirt?", "answer": "citizen volunteers", "gt_answers": [ "Citizen Volunteer" ], "image_path": "./data/STVQA/VisualGenome/1/2336642.jpg", "model_name": "BLIP2" }, { "question": "What does the visible text in the upper right corner say?", "answer": "mercado de libros", "gt_answers": [ "G3 BAZA" ], "image_path": "./data/STVQA/VisualGenome/2/3174.jpg", "model_name": "BLIP2" }, { "question": "What does the right side of the pizza box say?", "answer": "pizza hut", "gt_answers": [ "Enjoy your pizza" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000196480.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the top-left of the cake?", "answer": "person", "gt_answers": [ "Last" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000213344.jpg", "model_name": "BLIP2" }, { "question": "What is the watermock text say?", "answer": "the watermock text say", "gt_answers": [ "shutterstock" ], "image_path": "./data/STVQA/imageNet/n02669723_9686.JPEG", "model_name": "BLIP2" }, { "question": "What is the number at the bottom?", "answer": "1", "gt_answers": [ "36875461" ], "image_path": "./data/STVQA/imageNet/n02669723_9686.JPEG", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTEN ON THERE", "answer": "built in 1946", "gt_answers": [ "BUILT 1945 CREWE" ], "image_path": "./data/STVQA/VisualGenome/1/2343054.jpg", "model_name": "BLIP2" }, { "question": "WHICH VEHICLE IS THERE", "answer": "train", "gt_answers": [ "TRAIN" ], "image_path": "./data/STVQA/VisualGenome/1/2343054.jpg", "model_name": "BLIP2" }, { "question": "What is the date shown in this picture?", "answer": "christmas eve", "gt_answers": [ "23 Dec" ], "image_path": "./data/STVQA/VisualGenome/1/2338472.jpg", "model_name": "BLIP2" }, { "question": "What is the function of the vehicle?", "answer": "ambulance", "gt_answers": [ "AMBULANCE" ], "image_path": "./data/STVQA/imageNet/n03594945_11676.JPEG", "model_name": "BLIP2" }, { "question": "What is the model name of the vehicle?", "answer": "defender", "gt_answers": [ "DEFENDER" ], "image_path": "./data/STVQA/imageNet/n03594945_11676.JPEG", "model_name": "BLIP2" }, { "question": "What is written on the top front of the bus?", "answer": "puerto rico", "gt_answers": [ "POLPERRO TRAM CO." ], "image_path": "./data/STVQA/VisualGenome/1/2332331.jpg", "model_name": "BLIP2" }, { "question": "What number is written on the man's shirt?", "answer": "3", "gt_answers": [ "Three" ], "image_path": "./data/STVQA/VisualGenome/1/2362748.jpg", "model_name": "BLIP2" }, { "question": "What is on the man's head?", "answer": "hat", "gt_answers": [ "glasses" ], "image_path": "./data/STVQA/VisualGenome/1/2362748.jpg", "model_name": "BLIP2" }, { "question": "What is the Brand name?", "answer": "fiesta", "gt_answers": [ "Culinary Delights" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000010995.jpg", "model_name": "BLIP2" }, { "question": "What type of Gumbo is in the package?", "answer": "cajun", "gt_answers": [ "SHRIMP & SAUSAGE" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000010995.jpg", "model_name": "BLIP2" }, { "question": "What style selection is this product?", "answer": "asian", "gt_answers": [ "Cajun" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000010995.jpg", "model_name": "BLIP2" }, { "question": "what is the name on top of label", "answer": "big house red", "gt_answers": [ "big house red" ], "image_path": "./data/STVQA/imageNet/n04591713_374.JPEG", "model_name": "BLIP2" }, { "question": "What does it say directly under the wind shield?", "answer": "lincoln city centre", "gt_answers": [ "Training bus" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000378821.jpg", "model_name": "BLIP2" }, { "question": "What city is the pitcher playing for?", "answer": "new york city", "gt_answers": [ "Toledo" ], "image_path": "./data/STVQA/VisualGenome/1/2348105.jpg", "model_name": "BLIP2" }, { "question": "What numbers are visible on the red sign on the right?", "answer": "0 0", "gt_answers": [ "180", "1-800" ], "image_path": "./data/STVQA/VisualGenome/1/2348105.jpg", "model_name": "BLIP2" }, { "question": "What two letters are visible on the sign on the left?", "answer": "mlb", "gt_answers": [ "as" ], "image_path": "./data/STVQA/VisualGenome/1/2348105.jpg", "model_name": "BLIP2" }, { "question": "What type of injury is it used for?", "answer": "traumatic brain injury", "gt_answers": [ "Traumatic bleeding" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008590.jpg", "model_name": "BLIP2" }, { "question": "What are the dimensions?", "answer": "x x inches", "gt_answers": [ "3 IN X 4 YDS" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000008590.jpg", "model_name": "BLIP2" }, { "question": "What brands are advertised on the train?", "answer": "swiss railways", "gt_answers": [ "SAMSUNG" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000283309.jpg", "model_name": "BLIP2" }, { "question": "What type of electronic is being advertised?", "answer": "smart phone", "gt_answers": [ "SMART TV" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000283309.jpg", "model_name": "BLIP2" }, { "question": "what is the number of the vehicle?", "answer": "8808", "gt_answers": [ "82038" ], "image_path": "./data/STVQA/VisualGenome/1/2317837.jpg", "model_name": "BLIP2" }, { "question": "what is the name of the vehicle?", "answer": "paras", "gt_answers": [ "PARTAS" ], "image_path": "./data/STVQA/VisualGenome/1/2317837.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the website?", "answer": "person", "gt_answers": [ "WingHouse" ], "image_path": "./data/STVQA/imageNet/n06359193_4947.JPEG", "model_name": "BLIP2" }, { "question": "What is on the plane?", "answer": "passengers", "gt_answers": [ "Malaysia" ], "image_path": "./data/STVQA/VisualGenome/2/2412684.jpg", "model_name": "BLIP2" }, { "question": "What is located at this next exit?", "answer": "hidden worlds gentes park 2", "gt_answers": [ "Hidden Worlds Cenotes Park." ], "image_path": "./data/STVQA/IIIT_text/4671.jpg", "model_name": "BLIP2" }, { "question": "Who owns the Irish restaurant and bar?", "answer": "the irish restaurant and bar is owned by the irish pub group", "gt_answers": [ "Malones", "MALONE'S" ], "image_path": "./data/STVQA/icdar/test_img_444.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the restaurant shown?", "answer": "mcdonalds", "gt_answers": [ "Malone's Irish Restaurant & Bar", "MALONE'S" ], "image_path": "./data/STVQA/icdar/test_img_444.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the product?", "answer": "a speaker", "gt_answers": [ "infinty" ], "image_path": "./data/STVQA/imageNet/n03691459_48693.JPEG", "model_name": "BLIP2" }, { "question": "What is the number on the front of this train?", "answer": "46", "gt_answers": [ "485" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000335804.jpg", "model_name": "BLIP2" }, { "question": "What is the name labeled behind the number 485?", "answer": "person", "gt_answers": [ "north EAST express" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000335804.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company on this train?", "answer": "japan railways", "gt_answers": [ "EAST JAPAN RAILWAY COMPANY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000335804.jpg", "model_name": "BLIP2" }, { "question": "What are the 6 letters visible on the front of this store?", "answer": "sander", "gt_answers": [ "Tander" ], "image_path": "./data/STVQA/VisualGenome/1/713035.jpg", "model_name": "BLIP2" }, { "question": "What company is displayed on the truck", "answer": "a crane", "gt_answers": [ "roxu" ], "image_path": "./data/STVQA/imageNet/n03126707_25027.JPEG", "model_name": "BLIP2" }, { "question": "What numbers are on the front window?", "answer": "the bus number", "gt_answers": [ "1019" ], "image_path": "./data/STVQA/VisualGenome/1/2348477.jpg", "model_name": "BLIP2" }, { "question": "What word can be read on the fire hydrant?", "answer": "person", "gt_answers": [ "Fire" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000035972.jpg", "model_name": "BLIP2" }, { "question": "What follows the word no?", "answer": "no smoking", "gt_answers": [ "smoking" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000272040.jpg", "model_name": "BLIP2" }, { "question": "What is written on the green fence?", "answer": "person", "gt_answers": [ "Poland Spring" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000566175.jpg", "model_name": "BLIP2" }, { "question": "What ad is displayed under numbers?", "answer": "mcdonalds", "gt_answers": [ "Samsung" ], "image_path": "./data/STVQA/VisualGenome/2/2421.jpg", "model_name": "BLIP2" }, { "question": "Who is the lender above the tourist og sign?", "answer": "citibank", "gt_answers": [ "Prudential Financial" ], "image_path": "./data/STVQA/VisualGenome/2/2421.jpg", "model_name": "BLIP2" }, { "question": "What is written on the truck?", "answer": "garbage", "gt_answers": [ "todos somos orizaba", "ORIZABA" ], "image_path": "./data/STVQA/imageNet/n03417042_8367.JPEG", "model_name": "BLIP2" }, { "question": "What is the airline of the plane?", "answer": "air canada", "gt_answers": [ "Air Canada" ], "image_path": "./data/STVQA/VisualGenome/2/2404919.jpg", "model_name": "BLIP2" }, { "question": "what is the bran name on display?", "answer": "minkin", "gt_answers": [ "Minikin" ], "image_path": "./data/STVQA/icdar/img_375.jpg", "model_name": "BLIP2" }, { "question": "what is advertised on the display window to the right in red writing?", "answer": "minikin", "gt_answers": [ "instinct takes over" ], "image_path": "./data/STVQA/icdar/img_375.jpg", "model_name": "BLIP2" }, { "question": "What is this store called?", "answer": "marie hubert", "gt_answers": [ "Marie-Hubert" ], "image_path": "./data/STVQA/imageNet/n04200800_4494.JPEG", "model_name": "BLIP2" }, { "question": "What number train is this?", "answer": "a train", "gt_answers": [ "82304" ], "image_path": "./data/STVQA/VisualGenome/2/2404158.jpg", "model_name": "BLIP2" }, { "question": "What two words are written directly after U.S. on the plane?", "answer": "air force", "gt_answers": [ "AIR FORCE" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000115862.jpg", "model_name": "BLIP2" }, { "question": "What four digit number is written directly under the window ?", "answer": "person", "gt_answers": [ "9211" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000115862.jpg", "model_name": "BLIP2" }, { "question": "What is the Name of the Ad?", "answer": "paris", "gt_answers": [ "Paribas" ], "image_path": "./data/STVQA/VisualGenome/1/2349488.jpg", "model_name": "BLIP2" }, { "question": "What brand of TV remote is this?", "answer": "sony", "gt_answers": [ "ARCHOS" ], "image_path": "./data/STVQA/imageNet/n04074963_18951.JPEG", "model_name": "BLIP2" }, { "question": "What is written at the bottom of the picture?", "answer": "a man on a skateboard", "gt_answers": [ "Foto: Leo Gomes" ], "image_path": "./data/STVQA/VisualGenome/1/2355050.jpg", "model_name": "BLIP2" }, { "question": "What is the name of website being displayed in screen?", "answer": "starbucks", "gt_answers": [ "Starbucks coffee company" ], "image_path": "./data/STVQA/VisualGenome/1/2369607.jpg", "model_name": "BLIP2" }, { "question": "what is the country name written on the doll?", "answer": "brazil", "gt_answers": [ "BRASIL" ], "image_path": "./data/STVQA/VisualGenome/1/2369607.jpg", "model_name": "BLIP2" }, { "question": "What are the numbers written in red on the plane?", "answer": "f-16", "gt_answers": [ "211" ], "image_path": "./data/STVQA/VisualGenome/2/2415778.jpg", "model_name": "BLIP2" }, { "question": "What color is the door?", "answer": "blue", "gt_answers": [ "Blue" ], "image_path": "./data/STVQA/VisualGenome/1/2327872.jpg", "model_name": "BLIP2" }, { "question": "what does the fire truck have written on it?", "answer": "san diego fire department", "gt_answers": [ "Rescue 9" ], "image_path": "./data/STVQA/imageNet/n03345487_2906.JPEG", "model_name": "BLIP2" }, { "question": "What is the text in the bottom left corner?", "answer": "chevrolet silverado towing a boat", "gt_answers": [ "1stpix" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000344998.jpg", "model_name": "BLIP2" }, { "question": "What is the occupation listed on the side of the green truck?", "answer": "person", "gt_answers": [ "PARK RANGER" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000344998.jpg", "model_name": "BLIP2" }, { "question": "What is the photo tag?", "answer": "mushrooms", "gt_answers": [ "Raymond McNeil" ], "image_path": "./data/STVQA/imageNet/n13037406_478.JPEG", "model_name": "BLIP2" }, { "question": "WHAT IS THE LETTER WRITTEN", "answer": "nec", "gt_answers": [ "NEC" ], "image_path": "./data/STVQA/imageNet/n04238763_19010.JPEG", "model_name": "BLIP2" }, { "question": "WHAT COLOUR IS ON BOTH SIDE", "answer": "black and silver", "gt_answers": [ "BLACK" ], "image_path": "./data/STVQA/imageNet/n04238763_19010.JPEG", "model_name": "BLIP2" }, { "question": "What number is shown on the closest boat in the frame to the camera?", "answer": "33", "gt_answers": [ "33" ], "image_path": "./data/STVQA/VisualGenome/1/2316102.jpg", "model_name": "BLIP2" }, { "question": "What is the last name of the person who owns the copyright of this photo?", "answer": "mcdonald", "gt_answers": [ "Marbach" ], "image_path": "./data/STVQA/VisualGenome/1/2316102.jpg", "model_name": "BLIP2" }, { "question": "Which letter is seen to the left of the large \"33\" on the first boat?", "answer": "e", "gt_answers": [ "E" ], "image_path": "./data/STVQA/VisualGenome/1/2316102.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the ski trail?", "answer": "black diamond", "gt_answers": [ "Charlie Banks Trail", "CHARLIE BANKS" ], "image_path": "./data/STVQA/VisualGenome/1/2367935.jpg", "model_name": "BLIP2" }, { "question": "What is the distance of the trail?", "answer": "5 km", "gt_answers": [ "3K" ], "image_path": "./data/STVQA/VisualGenome/1/2367935.jpg", "model_name": "BLIP2" }, { "question": "What ski level is the trail rated?", "answer": "intermediate", "gt_answers": [ "Advanced-Intermediate" ], "image_path": "./data/STVQA/VisualGenome/1/2367935.jpg", "model_name": "BLIP2" }, { "question": "What is written on the label of each bottle?", "answer": "the name of the beard oil", "gt_answers": [ "Feel Good" ], "image_path": "./data/STVQA/imageNet/n03937543_1013.JPEG", "model_name": "BLIP2" }, { "question": "what website is advertised here?", "answer": "chester zoo", "gt_answers": [ "ChesterZoo.org" ], "image_path": "./data/STVQA/VisualGenome/2/2404232.jpg", "model_name": "BLIP2" }, { "question": "What is in the image?", "answer": "a jar of salsa", "gt_answers": [ "Nutrition Facts" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013746.jpg", "model_name": "BLIP2" }, { "question": "How many calories are in this package?", "answer": "0 calories per serving", "gt_answers": [ "10 calories", "10" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000013746.jpg", "model_name": "BLIP2" }, { "question": "What type of Indian Institute is this building?", "answer": "institute of technology", "gt_answers": [ "technology" ], "image_path": "./data/STVQA/IIIT_text/img_000059.jpg", "model_name": "BLIP2" }, { "question": "What word comes after \"Indian Institute of\"?", "answer": "engineering", "gt_answers": [ "technology" ], "image_path": "./data/STVQA/IIIT_text/img_000059.jpg", "model_name": "BLIP2" }, { "question": "What is the word on the sign above the arrow?", "answer": "stop", "gt_answers": [ "wa" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000264456.jpg", "model_name": "BLIP2" }, { "question": "What is the word below \"ON' on the sign?", "answer": "on", "gt_answers": [ "wa" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000264456.jpg", "model_name": "BLIP2" }, { "question": "What are the numbers under the street sign \"Fuller\"", "answer": "the numbers under the street sign are the street names", "gt_answers": [ "1600" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000264456.jpg", "model_name": "BLIP2" }, { "question": "What does the arrow's label read?", "answer": "old man grinds", "gt_answers": [ "old man grinds" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000095924.jpg", "model_name": "BLIP2" }, { "question": "What AVE. is this?", "answer": "dead end street", "gt_answers": [ "MOODY" ], "image_path": "./data/STVQA/VisualGenome/1/2346768.jpg", "model_name": "BLIP2" }, { "question": "Who is this photo by?", "answer": "person", "gt_answers": [ "JUAN N ONLY" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000000109.jpg", "model_name": "BLIP2" }, { "question": "What company name is printed on the guitar pick?", "answer": "fender", "gt_answers": [ "Fender" ], "image_path": "./data/STVQA/imageNet/n03929660_17906.JPEG", "model_name": "BLIP2" }, { "question": "What word is written in all capital letters on the guitar pick?", "answer": "rock", "gt_answers": [ "MEDIUM" ], "image_path": "./data/STVQA/imageNet/n03929660_17906.JPEG", "model_name": "BLIP2" }, { "question": "What number is on the side of the fire truck?", "answer": "9", "gt_answers": [ "41" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000359695.jpg", "model_name": "BLIP2" }, { "question": "What is the shop at the top of the sign?", "answer": "wool shop", "gt_answers": [ "Fromagerie" ], "image_path": "./data/STVQA/VisualGenome/1/2355017.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the bus in the picture?", "answer": "yellow cab", "gt_answers": [ "City Sights NY" ], "image_path": "./data/STVQA/VisualGenome/2/2427.jpg", "model_name": "BLIP2" }, { "question": "What sport is being played?", "answer": "baseball", "gt_answers": [ "Baseball" ], "image_path": "./data/STVQA/VisualGenome/1/2337458.jpg", "model_name": "BLIP2" }, { "question": "How many signs are there?", "answer": "three", "gt_answers": [ "4", "three" ], "image_path": "./data/STVQA/VisualGenome/1/2323113.jpg", "model_name": "BLIP2" }, { "question": "What number is on the blue hoodie?", "answer": "person", "gt_answers": [ "83" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000308470.jpg", "model_name": "BLIP2" }, { "question": "What number is on the player in yellow?", "answer": "person", "gt_answers": [ "13" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000308470.jpg", "model_name": "BLIP2" }, { "question": "What number is in red?", "answer": "person", "gt_answers": [ "3" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000308470.jpg", "model_name": "BLIP2" }, { "question": "What website is listed?", "answer": "person", "gt_answers": [ "FreakyFrugalite.com" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000491932.jpg", "model_name": "BLIP2" }, { "question": "What is the name on the cake?", "answer": "christopher", "gt_answers": [ "Christopher" ], "image_path": "./data/STVQA/VisualGenome/2/2416833.jpg", "model_name": "BLIP2" }, { "question": "What kind type facts are on this can?", "answer": "nutrition facts", "gt_answers": [ "Nutrition" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000000584.jpg", "model_name": "BLIP2" }, { "question": "What kind of game system is this person playing with?", "answer": "wii", "gt_answers": [ "Wii" ], "image_path": "./data/STVQA/VisualGenome/2/2402476.jpg", "model_name": "BLIP2" }, { "question": "What are the letters stated on the hat ?", "answer": "babe", "gt_answers": [ "N Y", "NY" ], "image_path": "./data/STVQA/VisualGenome/1/2349876.jpg", "model_name": "BLIP2" }, { "question": "What are the numbers on the monitor?", "answer": "the numbers on the monitor are the date and time", "gt_answers": [ "10 11" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000383893.jpg", "model_name": "BLIP2" }, { "question": "What is written in white on the sign?", "answer": "art", "gt_answers": [ "Stop" ], "image_path": "./data/STVQA/VisualGenome/1/2342774.jpg", "model_name": "BLIP2" }, { "question": "What is written in yellow on the sign?", "answer": "art", "gt_answers": [ "Art" ], "image_path": "./data/STVQA/VisualGenome/1/2342774.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the boat (written on white board) ?", "answer": "person", "gt_answers": [ "ternen gilleleje" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000164000.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the window?", "answer": "11", "gt_answers": [ "112" ], "image_path": "./data/STVQA/imageNet/n02701002_10365.JPEG", "model_name": "BLIP2" }, { "question": "What is the license plate number?", "answer": "111", "gt_answers": [ "SFA RK 352" ], "image_path": "./data/STVQA/imageNet/n02701002_10365.JPEG", "model_name": "BLIP2" }, { "question": "What brand of car is shown in the image?", "answer": "audi", "gt_answers": [ "Ford" ], "image_path": "./data/STVQA/VisualGenome/1/2362899.jpg", "model_name": "BLIP2" }, { "question": "What brand name is on the guys shirt?", "answer": "person", "gt_answers": [ "Harley Davidson", "HARLEY-DAVIDSON" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000167644.jpg", "model_name": "BLIP2" }, { "question": "What is written on the top of the green wall?", "answer": "fed cup", "gt_answers": [ "Rolex" ], "image_path": "./data/STVQA/VisualGenome/1/2377304.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the boat?", "answer": "no name", "gt_answers": [ "Viking" ], "image_path": "./data/STVQA/VisualGenome/1/2358259.jpg", "model_name": "BLIP2" }, { "question": "What are the letters in the circle?", "answer": "a b c d", "gt_answers": [ "abc" ], "image_path": "./data/STVQA/imageNet/n03141823_20027.JPEG", "model_name": "BLIP2" }, { "question": "What is the month date-stamped on the picture?", "answer": "august", "gt_answers": [ "06" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000515249.jpg", "model_name": "BLIP2" }, { "question": "What color is his jacket?", "answer": "blue", "gt_answers": [ "Blue", "{}" ], "image_path": "./data/STVQA/VisualGenome/1/2373518.jpg", "model_name": "BLIP2" }, { "question": "What color is his helmet?", "answer": "blue", "gt_answers": [ "silver", "{}" ], "image_path": "./data/STVQA/VisualGenome/1/2373518.jpg", "model_name": "BLIP2" }, { "question": "What is the name of this business?", "answer": "la bottega del cafe", "gt_answers": [ "La Bottega del Caffe" ], "image_path": "./data/STVQA/IIIT_text/1274.jpg", "model_name": "BLIP2" }, { "question": "What number can be found on the sign?", "answer": "5", "gt_answers": [ "5" ], "image_path": "./data/STVQA/IIIT_text/1274.jpg", "model_name": "BLIP2" }, { "question": "What word appears on the top left corner?", "answer": "i", "gt_answers": [ "Nickeeth" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000044998.jpg", "model_name": "BLIP2" }, { "question": "WHAT IS WRITTEN IN THIS IMAGE?", "answer": "welcome to hope", "gt_answers": [ "HOMAGE TO HAPA" ], "image_path": "./data/STVQA/VisualGenome/1/2323348.jpg", "model_name": "BLIP2" }, { "question": "what is the sign on top refer", "answer": "the sign on top refers", "gt_answers": [ "2828 yes optus", "YES OPTUS" ], "image_path": "./data/STVQA/VisualGenome/1/2338963.jpg", "model_name": "BLIP2" }, { "question": "Is there more coffee or pizza here?", "answer": "coffee", "gt_answers": [ "Coffee" ], "image_path": "./data/STVQA/vizwiz/VizWiz_val_000000028068.jpg", "model_name": "BLIP2" }, { "question": "What does the sign tell you to do?", "answer": "stop", "gt_answers": [ "STOP" ], "image_path": "./data/STVQA/VisualGenome/1/2328833.jpg", "model_name": "BLIP2" }, { "question": "What is the label on the drink cup?", "answer": "person", "gt_answers": [ "Coca-Cola" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000509509.jpg", "model_name": "BLIP2" }, { "question": "What does the yellow sign say?", "answer": "no smoking", "gt_answers": [ "Low Prices Daily" ], "image_path": "./data/STVQA/icdar/img_606.jpg", "model_name": "BLIP2" }, { "question": "What does the store front read?", "answer": "nike", "gt_answers": [ "collectpoint" ], "image_path": "./data/STVQA/icdar/img_870.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store in the back?", "answer": "zara", "gt_answers": [ "Collectpoint" ], "image_path": "./data/STVQA/icdar/img_870.jpg", "model_name": "BLIP2" }, { "question": "What letter is on the hat?", "answer": "person", "gt_answers": [ "C" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000435695.jpg", "model_name": "BLIP2" }, { "question": "What airline is this plane from?", "answer": "united airlines", "gt_answers": [ "UNITED" ], "image_path": "./data/STVQA/VisualGenome/2/2417823.jpg", "model_name": "BLIP2" }, { "question": "What is company name written on bus ?", "answer": "brighton and hove", "gt_answers": [ "BRIGHTON&HOVE" ], "image_path": "./data/STVQA/VisualGenome/1/2327873.jpg", "model_name": "BLIP2" }, { "question": "What is the number in the green jersey facing away from the camera?", "answer": "person", "gt_answers": [ "8" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000048442.jpg", "model_name": "BLIP2" }, { "question": "What word is written under STOP?", "answer": "driving", "gt_answers": [ "Driving" ], "image_path": "./data/STVQA/VisualGenome/2/2409386.jpg", "model_name": "BLIP2" }, { "question": "What color is the no-parking sign?", "answer": "yellow", "gt_answers": [ "Yellow" ], "image_path": "./data/STVQA/VisualGenome/1/2361163.jpg", "model_name": "BLIP2" }, { "question": "What car brand is represented in this photograph?", "answer": "honda", "gt_answers": [ "Honda" ], "image_path": "./data/STVQA/IIIT_text/img_000591.jpg", "model_name": "BLIP2" }, { "question": "What letter represents this store front's brand?", "answer": "h", "gt_answers": [ "H" ], "image_path": "./data/STVQA/IIIT_text/img_000591.jpg", "model_name": "BLIP2" }, { "question": "What word is written in red in the photograph?", "answer": "honda", "gt_answers": [ "Honda" ], "image_path": "./data/STVQA/IIIT_text/img_000591.jpg", "model_name": "BLIP2" }, { "question": "What is the primary color of the parrot?", "answer": "green", "gt_answers": [ "Green" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000137420.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the second bus from the left?", "answer": "person", "gt_answers": [ "8257" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000515252.jpg", "model_name": "BLIP2" }, { "question": "What bank is in this picture?", "answer": "bank of america", "gt_answers": [ "Citizens Bank" ], "image_path": "./data/STVQA/VisualGenome/2/1859.jpg", "model_name": "BLIP2" }, { "question": "what is present in top of the left corner", "answer": "a basket of vegetables", "gt_answers": [ "Ben spark" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000224759.jpg", "model_name": "BLIP2" }, { "question": "What letters are in the bottom right of this label?", "answer": "ef", "gt_answers": [ "Z5" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000002660.jpg", "model_name": "BLIP2" }, { "question": "what brand is this helmet?", "answer": "ed hardy", "gt_answers": [ "Doeri" ], "image_path": "./data/STVQA/imageNet/n03127747_1420.JPEG", "model_name": "BLIP2" }, { "question": "What type of lettuce is included in the package?", "answer": "romaine lettuce", "gt_answers": [ "Iceberg" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000004881.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of the lotion?", "answer": "body shop", "gt_answers": [ "Bath & Body Works" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000002192.jpg", "model_name": "BLIP2" }, { "question": "What is the scent of the lotion?", "answer": "it smells like sweet peas", "gt_answers": [ "Velvet Sugar" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000002192.jpg", "model_name": "BLIP2" }, { "question": "What type of store is in the background?", "answer": "comic book store", "gt_answers": [ "Bank" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000295505.jpg", "model_name": "BLIP2" }, { "question": "What brand is being advertised in the background?", "answer": "lego", "gt_answers": [ "Aegon" ], "image_path": "./data/STVQA/VisualGenome/2/2406522.jpg", "model_name": "BLIP2" }, { "question": "Who is the owner's first love?", "answer": "mom", "gt_answers": [ "Mom" ], "image_path": "./data/STVQA/VisualGenome/2/2402502.jpg", "model_name": "BLIP2" }, { "question": "What is the auto show on the truck?", "answer": "detroit water", "gt_answers": [ "42 ANNUAL DETROIT AUTO SHOW" ], "image_path": "./data/STVQA/VisualGenome/1/2360185.jpg", "model_name": "BLIP2" }, { "question": "What does the wooden panel say?", "answer": "detroit water", "gt_answers": [ "Quality motors since 1911" ], "image_path": "./data/STVQA/VisualGenome/1/2360185.jpg", "model_name": "BLIP2" }, { "question": "What word is after the word JOHN in this picture?", "answer": "little", "gt_answers": [ "LITTLE" ], "image_path": "./data/STVQA/icdar/img_658.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the street?", "answer": "season street", "gt_answers": [ "Sheldon St.", "SHELDON ST" ], "image_path": "./data/STVQA/VisualGenome/1/2344214.jpg", "model_name": "BLIP2" }, { "question": "What is the address of the street?", "answer": "spencer street", "gt_answers": [ "12400" ], "image_path": "./data/STVQA/VisualGenome/1/2344214.jpg", "model_name": "BLIP2" }, { "question": "What is the percentage of alcohol in the bottle?", "answer": "12%", "gt_answers": [ "14.5" ], "image_path": "./data/STVQA/imageNet/n04591713_5298.JPEG", "model_name": "BLIP2" }, { "question": "What is she playing?", "answer": "tennis", "gt_answers": [ "Tennis" ], "image_path": "./data/STVQA/VisualGenome/2/2408817.jpg", "model_name": "BLIP2" }, { "question": "What is the street name?", "answer": "swan street", "gt_answers": [ "Swan", "SWAN st" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000045335.jpg", "model_name": "BLIP2" }, { "question": "What does the upper sign say?", "answer": "no outlet", "gt_answers": [ "SWAN ST" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000045335.jpg", "model_name": "BLIP2" }, { "question": "What word is written up the side of the building?", "answer": "elang", "gt_answers": [ "Bang" ], "image_path": "./data/STVQA/IIIT_text/3611.jpg", "model_name": "BLIP2" }, { "question": "What brand name is on the bag?", "answer": "banquet", "gt_answers": [ "Banquet" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000018091.jpg", "model_name": "BLIP2" }, { "question": "What company owns the train?", "answer": "toronto transit commission", "gt_answers": [ "ttc" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000355054.jpg", "model_name": "BLIP2" }, { "question": "Who is the photographer of this picture?", "answer": "person", "gt_answers": [ "christian flores" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000355054.jpg", "model_name": "BLIP2" }, { "question": "What is title of the photo?", "answer": "subway train", "gt_answers": [ "RYAN'S TTC PHOTOS" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000355054.jpg", "model_name": "BLIP2" }, { "question": "What metal are the snaps made out of?", "answer": "hardened steel", "gt_answers": [ "Steel" ], "image_path": "./data/STVQA/imageNet/n02916936_7426.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the warship shown?", "answer": "uss midway", "gt_answers": [ "U.S.S. Midway CVB-41" ], "image_path": "./data/STVQA/imageNet/n02687172_21055.JPEG", "model_name": "BLIP2" }, { "question": "What company is advertised on the boards?", "answer": "coca cola", "gt_answers": [ "Bank of America" ], "image_path": "./data/STVQA/VisualGenome/2/2411834.jpg", "model_name": "BLIP2" }, { "question": "How much is acorn squash?", "answer": "acorn squash is a type of squash", "gt_answers": [ "99 lb." ], "image_path": "./data/STVQA/imageNet/n07717410_11533.JPEG", "model_name": "BLIP2" }, { "question": "What farm market is this at?", "answer": "santa monica farmers market", "gt_answers": [ "Burris Farm Market" ], "image_path": "./data/STVQA/imageNet/n07717410_11533.JPEG", "model_name": "BLIP2" }, { "question": "What kind of onion is being sold?", "answer": "yellow onion", "gt_answers": [ "Sweet", "Sweet Onion" ], "image_path": "./data/STVQA/imageNet/n07717410_11533.JPEG", "model_name": "BLIP2" }, { "question": "What is the name of the company on the sign?", "answer": "google", "gt_answers": [ "Google" ], "image_path": "./data/STVQA/IIIT_text/img_000481.jpg", "model_name": "BLIP2" }, { "question": "What is the name printed on the cup in the foreground?", "answer": "yelp", "gt_answers": [ "Yotel" ], "image_path": "./data/STVQA/VisualGenome/1/2347935.jpg", "model_name": "BLIP2" }, { "question": "What is the letter shown on the building in the background?", "answer": "y", "gt_answers": [ "W" ], "image_path": "./data/STVQA/VisualGenome/1/2347935.jpg", "model_name": "BLIP2" }, { "question": "What is the bank?", "answer": "first national bank", "gt_answers": [ "First national", "FIRST NATIONAL BANK" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000577723.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the orange drink ?", "answer": "person", "gt_answers": [ "Fanta" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000310518.jpg", "model_name": "BLIP2" }, { "question": "When was this picture taken?", "answer": "in the spring", "gt_answers": [ "11:08 AM" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000357235.jpg", "model_name": "BLIP2" }, { "question": "What date was this picture taken", "answer": "1 july 2015", "gt_answers": [ "06/01/2011" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000357235.jpg", "model_name": "BLIP2" }, { "question": "What is the opposite to YOUNGEST?", "answer": "oldest", "gt_answers": [ "OLDEST" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000329134.jpg", "model_name": "BLIP2" }, { "question": "What year was this picture taken?", "answer": "2010", "gt_answers": [ "2010" ], "image_path": "./data/STVQA/imageNet/n02107908_11083.JPEG", "model_name": "BLIP2" }, { "question": "What type of sauce is in the picture?", "answer": "hot sauce", "gt_answers": [ "Tabasco" ], "image_path": "./data/STVQA/VisualGenome/1/2373519.jpg", "model_name": "BLIP2" }, { "question": "what time are today's specials?", "answer": "the traffic light", "gt_answers": [ "9-2pm", "9-2 pm" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000449066.jpg", "model_name": "BLIP2" }, { "question": "What year is written on the disc?", "answer": "2002", "gt_answers": [ "2002" ], "image_path": "./data/STVQA/imageNet/n04019541_11817.JPEG", "model_name": "BLIP2" }, { "question": "What letter is on the tennis racquets?", "answer": "person", "gt_answers": [ "W" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000353653.jpg", "model_name": "BLIP2" }, { "question": "What is written in red on the sign on the left?", "answer": "person", "gt_answers": [ "exit" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000047972.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the company that made this vehicle?", "answer": "person", "gt_answers": [ "Dodge" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000380171.jpg", "model_name": "BLIP2" }, { "question": "What is written in green on the plane?", "answer": "air france", "gt_answers": [ "transavia" ], "image_path": "./data/STVQA/VisualGenome/1/2358084.jpg", "model_name": "BLIP2" }, { "question": "What is the registration of the plane?", "answer": "msn 002", "gt_answers": [ "F-GZNC", "F-GZMC" ], "image_path": "./data/STVQA/VisualGenome/1/2358084.jpg", "model_name": "BLIP2" }, { "question": "What is the first word on the mans shirt?", "answer": "person", "gt_answers": [ "be" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000362184.jpg", "model_name": "BLIP2" }, { "question": "What does the mans shirt say?", "answer": "person", "gt_answers": [ "be the change" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000362184.jpg", "model_name": "BLIP2" }, { "question": "What does the sign represent ?", "answer": "bus stop", "gt_answers": [ "Bus Stop" ], "image_path": "./data/STVQA/VisualGenome/2/2404132.jpg", "model_name": "BLIP2" }, { "question": "What does the red sticker say?", "answer": "funding war", "gt_answers": [ "FUNDING WAR!" ], "image_path": "./data/STVQA/VisualGenome/2/2417731.jpg", "model_name": "BLIP2" }, { "question": "What website is being shown in front of the computer screeen?", "answer": "youtube", "gt_answers": [ "www.bizsyscon.com" ], "image_path": "./data/STVQA/imageNet/n04152593_5249.JPEG", "model_name": "BLIP2" }, { "question": "What brand is on the white banner?", "answer": "person", "gt_answers": [ "basic" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000514186.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the black emblem on the far right?", "answer": "person", "gt_answers": [ "BILLABON" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000514186.jpg", "model_name": "BLIP2" }, { "question": "What grain does this food contain?", "answer": "wheat", "gt_answers": [ "Whole grain", "WHOLE" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000007235.jpg", "model_name": "BLIP2" }, { "question": "What does the red push pin say?", "answer": "i love you", "gt_answers": [ "Child hunger ends here", "CHILD HUNGER ENDS HERE." ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000007235.jpg", "model_name": "BLIP2" }, { "question": "What product is this?", "answer": "pizza", "gt_answers": [ "Coffee steamers" ], "image_path": "./data/STVQA/vizwiz/VizWiz_train_000000007235.jpg", "model_name": "BLIP2" }, { "question": "What brand is the keyboard?", "answer": "logitech", "gt_answers": [ "Logitech" ], "image_path": "./data/STVQA/VisualGenome/1/2334019.jpg", "model_name": "BLIP2" }, { "question": "What type of media desktop is this?", "answer": "a computer desktop", "gt_answers": [ "Dinovo" ], "image_path": "./data/STVQA/VisualGenome/1/2334019.jpg", "model_name": "BLIP2" }, { "question": "Who has the copyright on the photo?", "answer": "the photographer", "gt_answers": [ "Robert J. Steiner" ], "image_path": "./data/STVQA/imageNet/n02776631_24991.JPEG", "model_name": "BLIP2" }, { "question": "What is printed word on the white pot?", "answer": "love", "gt_answers": [ "Love" ], "image_path": "./data/STVQA/VisualGenome/1/2364598.jpg", "model_name": "BLIP2" }, { "question": "What is the brand of camera?", "answer": "canon", "gt_answers": [ "Canon" ], "image_path": "./data/STVQA/imageNet/n04069434_4504.JPEG", "model_name": "BLIP2" }, { "question": "What is the number of the parking meter?", "answer": "4", "gt_answers": [ "303" ], "image_path": "./data/STVQA/imageNet/n03891332_5430.JPEG", "model_name": "BLIP2" }, { "question": "what color is the hat?", "answer": "white", "gt_answers": [ "white" ], "image_path": "./data/STVQA/VisualGenome/1/2328765.jpg", "model_name": "BLIP2" }, { "question": "What is the number on the bumper of the school bus?", "answer": "0", "gt_answers": [ "253" ], "image_path": "./data/STVQA/VisualGenome/1/2372661.jpg", "model_name": "BLIP2" }, { "question": "What is the street name on the bottom sign?", "answer": "wilson street", "gt_answers": [ "Fun street", "FUN" ], "image_path": "./data/STVQA/VisualGenome/2/2415089.jpg", "model_name": "BLIP2" }, { "question": "What is written on the billboard closest to the stop sign?", "answer": "it is a billboard", "gt_answers": [ "American Apparel" ], "image_path": "./data/STVQA/VisualGenome/1/2326693.jpg", "model_name": "BLIP2" }, { "question": "Who is the green sponsor of the hockey team?", "answer": "nhl", "gt_answers": [ "Rasmussen college" ], "image_path": "./data/STVQA/imageNet/n04019541_59273.JPEG", "model_name": "BLIP2" }, { "question": "What number is the player Makaroy?", "answer": "17", "gt_answers": [ "26" ], "image_path": "./data/STVQA/imageNet/n04019541_59273.JPEG", "model_name": "BLIP2" }, { "question": "What beer brand sponsors this team?", "answer": "bud light", "gt_answers": [ "Bud light" ], "image_path": "./data/STVQA/imageNet/n04019541_59273.JPEG", "model_name": "BLIP2" }, { "question": "What is the date and time written in the notebook?", "answer": "the date and time is written in the notebook", "gt_answers": [ "8/23/10 12:55 PM" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000365879.jpg", "model_name": "BLIP2" }, { "question": "What language is being studied?", "answer": "german", "gt_answers": [ "SPANISH" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000365879.jpg", "model_name": "BLIP2" }, { "question": "What are the words printed in the top area, describing the brand associated with the toy?", "answer": "doctor who", "gt_answers": [ "Doctor Who" ], "image_path": "./data/STVQA/VisualGenome/1/2317268.jpg", "model_name": "BLIP2" }, { "question": "What is the message in white lettering, at the bottom of the toy's packaging?", "answer": "do not drink and drive", "gt_answers": [ "Try Me" ], "image_path": "./data/STVQA/VisualGenome/1/2317268.jpg", "model_name": "BLIP2" }, { "question": "What is written at the bottom of the poster on teh right?", "answer": "i love you", "gt_answers": [ "SHORTBUS" ], "image_path": "./data/STVQA/VisualGenome/1/2356182.jpg", "model_name": "BLIP2" }, { "question": "What is printed on the sweatshirt?", "answer": "shinobi", "gt_answers": [ "Shiamak" ], "image_path": "./data/STVQA/VisualGenome/1/2368645.jpg", "model_name": "BLIP2" }, { "question": "What is the first word after the copyright symbol?", "answer": "", "gt_answers": [ "Majomo" ], "image_path": "./data/STVQA/VisualGenome/1/2368645.jpg", "model_name": "BLIP2" }, { "question": "What is the second word after the copyright symbol?", "answer": "", "gt_answers": [ "Photography" ], "image_path": "./data/STVQA/VisualGenome/1/2368645.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the store?", "answer": "person", "gt_answers": [ "COLDCORNER" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000069486.jpg", "model_name": "BLIP2" }, { "question": "When was the photo copyrighted?", "answer": "2010", "gt_answers": [ "2011" ], "image_path": "./data/STVQA/VisualGenome/1/2319670.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the photographer?", "answer": "michael keller", "gt_answers": [ "Miguel Navaza" ], "image_path": "./data/STVQA/VisualGenome/1/2319670.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the network of hospitals advertised?", "answer": "person", "gt_answers": [ "AM+Ri" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000029909.jpg", "model_name": "BLIP2" }, { "question": "What two words are in red letters?", "answer": "coco rabbit", "gt_answers": [ "Coco Rabbit" ], "image_path": "./data/STVQA/imageNet/n03594734_45012.JPEG", "model_name": "BLIP2" }, { "question": "Which word is the name of an animal?", "answer": "rabbit", "gt_answers": [ "Rabbit" ], "image_path": "./data/STVQA/imageNet/n03594734_45012.JPEG", "model_name": "BLIP2" }, { "question": "What word has four letters?", "answer": "rabbit", "gt_answers": [ "Coco" ], "image_path": "./data/STVQA/imageNet/n03594734_45012.JPEG", "model_name": "BLIP2" }, { "question": "What brand of airline is this?", "answer": "american airlines", "gt_answers": [ "American" ], "image_path": "./data/STVQA/IIIT_text/img_000096.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the movie advertised on the bus?", "answer": "kingsman", "gt_answers": [ "prisoners" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000114204.jpg", "model_name": "BLIP2" }, { "question": "Whats the first name on they cubical", "answer": "person", "gt_answers": [ "barbara" ], "image_path": "./data/STVQA/coco-text/COCO_train2014_000000371295.jpg", "model_name": "BLIP2" }, { "question": "What does the business to the left sell?", "answer": "beer", "gt_answers": [ "Chocolate" ], "image_path": "./data/STVQA/icdar/test_img_244.jpg", "model_name": "BLIP2" }, { "question": "What is the name of the business?", "answer": "tax brewers", "gt_answers": [ "AX BRENNER CHOCOLATE BAR" ], "image_path": "./data/STVQA/icdar/test_img_244.jpg", "model_name": "BLIP2" } ]