Files
Grounded-SAM-2/lib/test/evaluation/votdataset.py
2024-11-19 22:12:54 -08:00

349 lines
12 KiB
Python

from typing import Union, TextIO
import numpy as np
from numba import jit
from lib.test.evaluation.data import SequenceList, BaseDataset, Sequence
class VOTDataset(BaseDataset):
"""
VOT2018 dataset
Publication:
The sixth Visual Object Tracking VOT2018 challenge results.
Matej Kristan, Ales Leonardis, Jiri Matas, Michael Felsberg, Roman Pfugfelder, Luka Cehovin Zajc, Tomas Vojir,
Goutam Bhat, Alan Lukezic et al.
ECCV, 2018
https://prints.vicos.si/publications/365
Download the dataset from http://www.votchallenge.net/vot2018/dataset.html
"""
def __init__(self, year=18):
super().__init__()
self.year = year
if year == 18:
self.base_path = self.env_settings.vot18_path
elif year == 20:
self.base_path = self.env_settings.vot20_path
elif year == 22:
self.base_path = self.env_settings.vot22_path
self.sequence_list = self._get_sequence_list(year)
def get_sequence_list(self):
return SequenceList([self._construct_sequence(s) for s in self.sequence_list])
def _construct_sequence(self, sequence_name):
sequence_path = sequence_name
nz = 8
ext = 'jpg'
start_frame = 1
anno_path = '{}/{}/groundtruth.txt'.format(self.base_path, sequence_name)
if self.year == 18 or self.year == 22:
try:
ground_truth_rect = np.loadtxt(str(anno_path), dtype=np.float64)
except:
ground_truth_rect = np.loadtxt(str(anno_path), delimiter=',', dtype=np.float64)
end_frame = ground_truth_rect.shape[0]
frames = ['{base_path}/{sequence_path}/color/{frame:0{nz}}.{ext}'.format(base_path=self.base_path,
sequence_path=sequence_path, frame=frame_num, nz=nz, ext=ext)
for frame_num in range(start_frame, end_frame+1)]
# Convert gt
if ground_truth_rect.shape[1] > 4:
gt_x_all = ground_truth_rect[:, [0, 2, 4, 6]]
gt_y_all = ground_truth_rect[:, [1, 3, 5, 7]]
x1 = np.amin(gt_x_all, 1).reshape(-1,1)
y1 = np.amin(gt_y_all, 1).reshape(-1,1)
x2 = np.amax(gt_x_all, 1).reshape(-1,1)
y2 = np.amax(gt_y_all, 1).reshape(-1,1)
ground_truth_rect = np.concatenate((x1, y1, x2-x1, y2-y1), 1)
elif self.year == 20:
ground_truth_rect = read_file(str(anno_path))
ground_truth_rect = np.array(ground_truth_rect, dtype=np.float64)
end_frame = ground_truth_rect.shape[0]
frames = ['{base_path}/{sequence_path}/color/{frame:0{nz}}.{ext}'.format(base_path=self.base_path,
sequence_path=sequence_path,
frame=frame_num, nz=nz, ext=ext)
for frame_num in range(start_frame, end_frame + 1)]
else:
raise NotImplementedError
return Sequence(sequence_name, frames, 'vot', ground_truth_rect)
def __len__(self):
return len(self.sequence_list)
def _get_sequence_list(self, year):
if year == 18:
sequence_list= ['ants1',
'ants3',
'bag',
'ball1',
'ball2',
'basketball',
'birds1',
'blanket',
'bmx',
'bolt1',
'bolt2',
'book',
'butterfly',
'car1',
'conduction1',
'crabs1',
'crossing',
'dinosaur',
'drone_across',
'drone_flip',
'drone1',
'fernando',
'fish1',
'fish2',
'fish3',
'flamingo1',
'frisbee',
'girl',
'glove',
'godfather',
'graduate',
'gymnastics1',
'gymnastics2',
'gymnastics3',
'hand',
'handball1',
'handball2',
'helicopter',
'iceskater1',
'iceskater2',
'leaves',
'matrix',
'motocross1',
'motocross2',
'nature',
'pedestrian1',
'rabbit',
'racing',
'road',
'shaking',
'sheep',
'singer2',
'singer3',
'soccer1',
'soccer2',
'soldier',
'tiger',
'traffic',
'wiper',
'zebrafish1']
elif year == 20:
sequence_list= ['agility',
'ants1',
'ball2',
'ball3',
'basketball',
'birds1',
'bolt1',
'book',
'butterfly',
'car1',
'conduction1',
'crabs1',
'dinosaur',
'dribble',
'drone1',
'drone_across',
'drone_flip',
'fernando',
'fish1',
'fish2',
'flamingo1',
'frisbee',
'girl',
'glove',
'godfather',
'graduate',
'gymnastics1',
'gymnastics2',
'gymnastics3',
'hand',
'hand02',
'hand2',
'handball1',
'handball2',
'helicopter',
'iceskater1',
'iceskater2',
'lamb',
'leaves',
'marathon',
'matrix',
'monkey',
'motocross1',
'nature',
'polo',
'rabbit',
'rabbit2',
'road',
'rowing',
'shaking',
'singer2',
'singer3',
'soccer1',
'soccer2',
'soldier',
'surfing',
'tiger',
'wheel',
'wiper',
'zebrafish1']
elif year == 22:
sequence_list= ['agility',
'animal',
'ants1',
'bag',
'ball2',
'ball3',
'basketball',
'birds1',
'birds2',
'bolt1',
'book',
'bubble',
'butterfly',
'car1',
'conduction1',
'crabs1',
'dinosaur',
'diver',
'drone1',
'drone_across',
'fernando',
'fish1',
'fish2',
'flamingo1',
'frisbee',
'girl',
'graduate',
'gymnastics1',
'gymnastics2',
'gymnastics3',
'hand',
'hand2',
'handball1',
'handball2',
'helicopter',
'iceskater1',
'iceskater2',
'kangaroo',
'lamb',
'leaves',
'marathon',
'matrix',
'monkey',
'motocross1',
'nature',
'polo',
'rabbit',
'rabbit2',
'rowing',
'shaking',
'singer2',
'singer3',
'snake',
'soccer1',
'soccer2',
'soldier',
'surfing',
'tennis',
'tiger',
'wheel',
'wiper',
'zebrafish1']
else:
raise NotImplementedError
return sequence_list
def parse(string):
"""
parse string to the appropriate region format and return region object
"""
from vot.region.shapes import Rectangle, Polygon, Mask
if string[0] == 'm':
# input is a mask - decode it
m_, offset_, region = create_mask_from_string(string[1:].split(','))
# return Mask(m_, offset=offset_)
return region
else:
# input is not a mask - check if special, rectangle or polygon
raise NotImplementedError
print('Unknown region format.')
return None
def read_file(fp: Union[str, TextIO]):
if isinstance(fp, str):
with open(fp) as file:
lines = file.readlines()
else:
lines = fp.readlines()
regions = []
# iterate over all lines in the file
for i, line in enumerate(lines):
regions.append(parse(line.strip()))
return regions
def create_mask_from_string(mask_encoding):
"""
mask_encoding: a string in the following format: x0, y0, w, h, RLE
output: mask, offset
mask: 2-D binary mask, size defined in the mask encoding
offset: (x, y) offset of the mask in the image coordinates
"""
elements = [int(el) for el in mask_encoding]
tl_x, tl_y, region_w, region_h = elements[:4]
rle = np.array([el for el in elements[4:]], dtype=np.int32)
# create mask from RLE within target region
mask = rle_to_mask(rle, region_w, region_h)
region = [tl_x, tl_y, region_w, region_h]
return mask, (tl_x, tl_y), region
@jit(nopython=True)
def rle_to_mask(rle, width, height):
"""
rle: input rle mask encoding
each evenly-indexed element represents number of consecutive 0s
each oddly indexed element represents number of consecutive 1s
width and height are dimensions of the mask
output: 2-D binary mask
"""
# allocate list of zeros
v = [0] * (width * height)
# set id of the last different element to the beginning of the vector
idx_ = 0
for i in range(len(rle)):
if i % 2 != 0:
# write as many 1s as RLE says (zeros are already in the vector)
for j in range(rle[i]):
v[idx_+j] = 1
idx_ += rle[i]