init commit of samurai

This commit is contained in:
Cheng-Yen Yang
2024-11-19 22:12:54 -08:00
parent f65f4ba181
commit c17e4cecc0
679 changed files with 123982 additions and 0 deletions

View File

@@ -0,0 +1,437 @@
__author__ = 'tylin'
__version__ = '2.0'
# Interface for accessing the Microsoft COCO dataset.
# Microsoft COCO is a large image dataset designed for object detection,
# segmentation, and caption generation. pycocotools is a Python API that
# assists in loading, parsing and visualizing the annotations in COCO.
# Please visit http://mscoco.org/ for more information on COCO, including
# for the data, paper, and tutorials. The exact format of the annotations
# is also described on the COCO website. For example usage of the pycocotools
# please see pycocotools_demo.ipynb. In addition to this API, please download both
# the COCO images and annotations in order to run the demo.
# An alternative to using the API is to load the annotations directly
# into Python dictionary
# Using the API provides additional utility functions. Note that this API
# supports both *instance* and *caption* annotations. In the case of
# captions not all functions are defined (e.g. categories are undefined).
# The following API functions are defined:
# COCO - COCO api class that loads COCO annotation file and prepare data structures.
# decodeMask - Decode binary mask M encoded via run-length encoding.
# encodeMask - Encode binary mask M using run-length encoding.
# getAnnIds - Get ann ids that satisfy given filter conditions.
# getCatIds - Get cat ids that satisfy given filter conditions.
# getImgIds - Get img ids that satisfy given filter conditions.
# loadAnns - Load anns with the specified ids.
# loadCats - Load cats with the specified ids.
# loadImgs - Load imgs with the specified ids.
# annToMask - Convert segmentation in an annotation to binary mask.
# showAnns - Display the specified annotations.
# loadRes - Load algorithm results and create API for accessing them.
# download - Download COCO images from mscoco.org server.
# Throughout the API "ann"=annotation, "cat"=category, and "img"=image.
# Help on each functions can be accessed by: "help COCO>function".
# See also COCO>decodeMask,
# COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds,
# COCO>getImgIds, COCO>loadAnns, COCO>loadCats,
# COCO>loadImgs, COCO>annToMask, COCO>showAnns
# Microsoft COCO Toolbox. version 2.0
# Data, paper, and tutorials available at: http://mscoco.org/
# Code written by Piotr Dollar and Tsung-Yi Lin, 2014.
# Licensed under the Simplified BSD License [see bsd.txt]
import json
import time
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
from matplotlib.patches import Polygon
import numpy as np
import copy
import itertools
from pycocotools import mask as maskUtils
import os
from collections import defaultdict
import sys
PYTHON_VERSION = sys.version_info[0]
if PYTHON_VERSION == 2:
from urllib import urlretrieve
elif PYTHON_VERSION == 3:
from urllib.request import urlretrieve
def _isArrayLike(obj):
return hasattr(obj, '__iter__') and hasattr(obj, '__len__')
class COCO:
def __init__(self, dataset):
"""
Constructor of Microsoft COCO helper class for reading and visualizing annotations.
:param annotation_file (str): location of annotation file
:param image_folder (str): location to the folder that hosts images.
:return:
"""
# load dataset
self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict()
self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset))
self.dataset = dataset
self.createIndex()
def createIndex(self):
# create index
print('creating index...')
anns, cats, imgs = {}, {}, {}
imgToAnns,catToImgs = defaultdict(list),defaultdict(list)
if 'annotations' in self.dataset:
for ann in self.dataset['annotations']:
imgToAnns[ann['image_id']].append(ann)
anns[ann['id']] = ann
if 'images' in self.dataset:
for img in self.dataset['images']:
imgs[img['id']] = img
if 'categories' in self.dataset:
for cat in self.dataset['categories']:
cats[cat['id']] = cat
if 'annotations' in self.dataset and 'categories' in self.dataset:
for ann in self.dataset['annotations']:
catToImgs[ann['category_id']].append(ann['image_id'])
print('index created!')
# create class members
self.anns = anns
self.imgToAnns = imgToAnns
self.catToImgs = catToImgs
self.imgs = imgs
self.cats = cats
def info(self):
"""
Print information about the annotation file.
:return:
"""
for key, value in self.dataset['info'].items():
print('{}: {}'.format(key, value))
def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
"""
Get ann ids that satisfy given filter conditions. default skips that filter
:param imgIds (int array) : get anns for given imgs
catIds (int array) : get anns for given cats
areaRng (float array) : get anns for given area range (e.g. [0 inf])
iscrowd (boolean) : get anns for given crowd label (False or True)
:return: ids (int array) : integer array of ann ids
"""
imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
catIds = catIds if _isArrayLike(catIds) else [catIds]
if len(imgIds) == len(catIds) == len(areaRng) == 0:
anns = self.dataset['annotations']
else:
if not len(imgIds) == 0:
lists = [self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns]
anns = list(itertools.chain.from_iterable(lists))
else:
anns = self.dataset['annotations']
anns = anns if len(catIds) == 0 else [ann for ann in anns if ann['category_id'] in catIds]
anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]]
if not iscrowd == None:
ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd]
else:
ids = [ann['id'] for ann in anns]
return ids
def getCatIds(self, catNms=[], supNms=[], catIds=[]):
"""
filtering parameters. default skips that filter.
:param catNms (str array) : get cats for given cat names
:param supNms (str array) : get cats for given supercategory names
:param catIds (int array) : get cats for given cat ids
:return: ids (int array) : integer array of cat ids
"""
catNms = catNms if _isArrayLike(catNms) else [catNms]
supNms = supNms if _isArrayLike(supNms) else [supNms]
catIds = catIds if _isArrayLike(catIds) else [catIds]
if len(catNms) == len(supNms) == len(catIds) == 0:
cats = self.dataset['categories']
else:
cats = self.dataset['categories']
cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name'] in catNms]
cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms]
cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id'] in catIds]
ids = [cat['id'] for cat in cats]
return ids
def getImgIds(self, imgIds=[], catIds=[]):
'''
Get img ids that satisfy given filter conditions.
:param imgIds (int array) : get imgs for given ids
:param catIds (int array) : get imgs with all given cats
:return: ids (int array) : integer array of img ids
'''
imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
catIds = catIds if _isArrayLike(catIds) else [catIds]
if len(imgIds) == len(catIds) == 0:
ids = self.imgs.keys()
else:
ids = set(imgIds)
for i, catId in enumerate(catIds):
if i == 0 and len(ids) == 0:
ids = set(self.catToImgs[catId])
else:
ids &= set(self.catToImgs[catId])
return list(ids)
def loadAnns(self, ids=[]):
"""
Load anns with the specified ids.
:param ids (int array) : integer ids specifying anns
:return: anns (object array) : loaded ann objects
"""
if _isArrayLike(ids):
return [self.anns[id] for id in ids]
elif type(ids) == int:
return [self.anns[ids]]
def loadCats(self, ids=[]):
"""
Load cats with the specified ids.
:param ids (int array) : integer ids specifying cats
:return: cats (object array) : loaded cat objects
"""
if _isArrayLike(ids):
return [self.cats[id] for id in ids]
elif type(ids) == int:
return [self.cats[ids]]
def loadImgs(self, ids=[]):
"""
Load anns with the specified ids.
:param ids (int array) : integer ids specifying img
:return: imgs (object array) : loaded img objects
"""
if _isArrayLike(ids):
return [self.imgs[id] for id in ids]
elif type(ids) == int:
return [self.imgs[ids]]
def showAnns(self, anns, draw_bbox=False):
"""
Display the specified annotations.
:param anns (array of object): annotations to display
:return: None
"""
if len(anns) == 0:
return 0
if 'segmentation' in anns[0] or 'keypoints' in anns[0]:
datasetType = 'instances'
elif 'caption' in anns[0]:
datasetType = 'captions'
else:
raise Exception('datasetType not supported')
if datasetType == 'instances':
ax = plt.gca()
ax.set_autoscale_on(False)
polygons = []
color = []
for ann in anns:
c = (np.random.random((1, 3))*0.6+0.4).tolist()[0]
if 'segmentation' in ann:
if type(ann['segmentation']) == list:
# polygon
for seg in ann['segmentation']:
poly = np.array(seg).reshape((int(len(seg)/2), 2))
polygons.append(Polygon(poly))
color.append(c)
else:
# mask
t = self.imgs[ann['image_id']]
if type(ann['segmentation']['counts']) == list:
rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width'])
else:
rle = [ann['segmentation']]
m = maskUtils.decode(rle)
img = np.ones( (m.shape[0], m.shape[1], 3) )
if ann['iscrowd'] == 1:
color_mask = np.array([2.0,166.0,101.0])/255
if ann['iscrowd'] == 0:
color_mask = np.random.random((1, 3)).tolist()[0]
for i in range(3):
img[:,:,i] = color_mask[i]
ax.imshow(np.dstack( (img, m*0.5) ))
if 'keypoints' in ann and type(ann['keypoints']) == list:
# turn skeleton into zero-based index
sks = np.array(self.loadCats(ann['category_id'])[0]['skeleton'])-1
kp = np.array(ann['keypoints'])
x = kp[0::3]
y = kp[1::3]
v = kp[2::3]
for sk in sks:
if np.all(v[sk]>0):
plt.plot(x[sk],y[sk], linewidth=3, color=c)
plt.plot(x[v>0], y[v>0],'o',markersize=8, markerfacecolor=c, markeredgecolor='k',markeredgewidth=2)
plt.plot(x[v>1], y[v>1],'o',markersize=8, markerfacecolor=c, markeredgecolor=c, markeredgewidth=2)
if draw_bbox:
[bbox_x, bbox_y, bbox_w, bbox_h] = ann['bbox']
poly = [[bbox_x, bbox_y], [bbox_x, bbox_y+bbox_h], [bbox_x+bbox_w, bbox_y+bbox_h], [bbox_x+bbox_w, bbox_y]]
np_poly = np.array(poly).reshape((4,2))
polygons.append(Polygon(np_poly))
color.append(c)
p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4)
ax.add_collection(p)
p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2)
ax.add_collection(p)
elif datasetType == 'captions':
for ann in anns:
print(ann['caption'])
def loadRes(self, resFile):
"""
Load result file and return a result api object.
:param resFile (str) : file name of result file
:return: res (obj) : result api object
"""
res = COCO()
res.dataset['images'] = [img for img in self.dataset['images']]
print('Loading and preparing results...')
tic = time.time()
if type(resFile) == str or (PYTHON_VERSION == 2 and type(resFile) == unicode):
with open(resFile) as f:
anns = json.load(f)
elif type(resFile) == np.ndarray:
anns = self.loadNumpyAnnotations(resFile)
else:
anns = resFile
assert type(anns) == list, 'results in not an array of objects'
annsImgIds = [ann['image_id'] for ann in anns]
assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
'Results do not correspond to current coco set'
if 'caption' in anns[0]:
imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
for id, ann in enumerate(anns):
ann['id'] = id+1
elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for id, ann in enumerate(anns):
bb = ann['bbox']
x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]]
if not 'segmentation' in ann:
ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
ann['area'] = bb[2]*bb[3]
ann['id'] = id+1
ann['iscrowd'] = 0
elif 'segmentation' in anns[0]:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for id, ann in enumerate(anns):
# now only support compressed RLE format as segmentation results
ann['area'] = maskUtils.area(ann['segmentation'])
if not 'bbox' in ann:
ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
ann['id'] = id+1
ann['iscrowd'] = 0
elif 'keypoints' in anns[0]:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for id, ann in enumerate(anns):
s = ann['keypoints']
x = s[0::3]
y = s[1::3]
x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y)
ann['area'] = (x1-x0)*(y1-y0)
ann['id'] = id + 1
ann['bbox'] = [x0,y0,x1-x0,y1-y0]
print('DONE (t={:0.2f}s)'.format(time.time()- tic))
res.dataset['annotations'] = anns
res.createIndex()
return res
def download(self, tarDir = None, imgIds = [] ):
'''
Download COCO images from mscoco.org server.
:param tarDir (str): COCO results directory name
imgIds (list): images to be downloaded
:return:
'''
if tarDir is None:
print('Please specify target directory')
return -1
if len(imgIds) == 0:
imgs = self.imgs.values()
else:
imgs = self.loadImgs(imgIds)
N = len(imgs)
if not os.path.exists(tarDir):
os.makedirs(tarDir)
for i, img in enumerate(imgs):
tic = time.time()
fname = os.path.join(tarDir, img['file_name'])
if not os.path.exists(fname):
urlretrieve(img['coco_url'], fname)
print('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic))
def loadNumpyAnnotations(self, data):
"""
Convert result data from a numpy array [Nx7] where each row contains {imageID,x1,y1,w,h,score,class}
:param data (numpy.ndarray)
:return: annotations (python nested list)
"""
print('Converting ndarray to lists...')
assert(type(data) == np.ndarray)
print(data.shape)
assert(data.shape[1] == 7)
N = data.shape[0]
ann = []
for i in range(N):
if i % 1000000 == 0:
print('{}/{}'.format(i,N))
ann += [{
'image_id' : int(data[i, 0]),
'bbox' : [ data[i, 1], data[i, 2], data[i, 3], data[i, 4] ],
'score' : data[i, 5],
'category_id': int(data[i, 6]),
}]
return ann
def annToRLE(self, ann):
"""
Convert annotation which can be polygons, uncompressed RLE to RLE.
:return: binary mask (numpy 2D array)
"""
t = self.imgs[ann['image_id']]
h, w = t['height'], t['width']
segm = ann['segmentation']
if type(segm) == list:
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles = maskUtils.frPyObjects(segm, h, w)
rle = maskUtils.merge(rles)
elif type(segm['counts']) == list:
# uncompressed RLE
rle = maskUtils.frPyObjects(segm, h, w)
else:
# rle
rle = ann['segmentation']
return rle
def annToMask(self, ann):
"""
Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
:return: binary mask (numpy 2D array)
"""
rle = self.annToRLE(ann)
m = maskUtils.decode(rle)
return m

View File

@@ -0,0 +1,11 @@
from .lasot import Lasot
from .got10k import Got10k
from .tracking_net import TrackingNet
from .imagenetvid import ImagenetVID
from .coco import MSCOCO
from .coco_seq import MSCOCOSeq
from .got10k_lmdb import Got10k_lmdb
from .lasot_lmdb import Lasot_lmdb
from .imagenetvid_lmdb import ImagenetVID_lmdb
from .coco_seq_lmdb import MSCOCOSeq_lmdb
from .tracking_net_lmdb import TrackingNet_lmdb

View File

@@ -0,0 +1,92 @@
import torch.utils.data
from lib.train.data.image_loader import jpeg4py_loader
class BaseImageDataset(torch.utils.data.Dataset):
""" Base class for image datasets """
def __init__(self, name, root, image_loader=jpeg4py_loader):
"""
args:
root - The root path to the dataset
image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
is used by default.
"""
self.name = name
self.root = root
self.image_loader = image_loader
self.image_list = [] # Contains the list of sequences.
self.class_list = []
def __len__(self):
""" Returns size of the dataset
returns:
int - number of samples in the dataset
"""
return self.get_num_images()
def __getitem__(self, index):
""" Not to be used! Check get_frames() instead.
"""
return None
def get_name(self):
""" Name of the dataset
returns:
string - Name of the dataset
"""
raise NotImplementedError
def get_num_images(self):
""" Number of sequences in a dataset
returns:
int - number of sequences in the dataset."""
return len(self.image_list)
def has_class_info(self):
return False
def get_class_name(self, image_id):
return None
def get_num_classes(self):
return len(self.class_list)
def get_class_list(self):
return self.class_list
def get_images_in_class(self, class_name):
raise NotImplementedError
def has_segmentation_info(self):
return False
def get_image_info(self, seq_id):
""" Returns information about a particular image,
args:
seq_id - index of the image
returns:
Dict
"""
raise NotImplementedError
def get_image(self, image_id, anno=None):
""" Get a image
args:
image_id - index of image
anno(None) - The annotation for the sequence (see get_sequence_info). If None, they will be loaded.
returns:
image -
anno -
dict - A dict containing meta information about the sequence, e.g. class of the target object.
"""
raise NotImplementedError

View File

@@ -0,0 +1,110 @@
import torch.utils.data
# 2021.1.5 use jpeg4py_loader_w_failsafe as default
from lib.train.data.image_loader import jpeg4py_loader_w_failsafe
class BaseVideoDataset(torch.utils.data.Dataset):
""" Base class for video datasets """
def __init__(self, name, root, image_loader=jpeg4py_loader_w_failsafe):
"""
args:
root - The root path to the dataset
image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
is used by default.
"""
self.name = name
self.root = root
self.image_loader = image_loader
self.sequence_list = [] # Contains the list of sequences.
self.class_list = []
def __len__(self):
""" Returns size of the dataset
returns:
int - number of samples in the dataset
"""
return self.get_num_sequences()
def __getitem__(self, index):
""" Not to be used! Check get_frames() instead.
"""
return None
def is_video_sequence(self):
""" Returns whether the dataset is a video dataset or an image dataset
returns:
bool - True if a video dataset
"""
return True
def is_synthetic_video_dataset(self):
""" Returns whether the dataset contains real videos or synthetic
returns:
bool - True if a video dataset
"""
return False
def get_name(self):
""" Name of the dataset
returns:
string - Name of the dataset
"""
raise NotImplementedError
def get_num_sequences(self):
""" Number of sequences in a dataset
returns:
int - number of sequences in the dataset."""
return len(self.sequence_list)
def has_class_info(self):
return False
def has_occlusion_info(self):
return False
def get_num_classes(self):
return len(self.class_list)
def get_class_list(self):
return self.class_list
def get_sequences_in_class(self, class_name):
raise NotImplementedError
def has_segmentation_info(self):
return False
def get_sequence_info(self, seq_id):
""" Returns information about a particular sequences,
args:
seq_id - index of the sequence
returns:
Dict
"""
raise NotImplementedError
def get_frames(self, seq_id, frame_ids, anno=None):
""" Get a set of frames from a particular sequence
args:
seq_id - index of sequence
frame_ids - a list of frame numbers
anno(None) - The annotation for the sequence (see get_sequence_info). If None, they will be loaded.
returns:
list - List of frames corresponding to frame_ids
list - List of dicts for each frame
dict - A dict containing meta information about the sequence, e.g. class of the target object.
"""
raise NotImplementedError

156
lib/train/dataset/coco.py Normal file
View File

@@ -0,0 +1,156 @@
import os
from .base_image_dataset import BaseImageDataset
import torch
import random
from collections import OrderedDict
from lib.train.data import jpeg4py_loader
from lib.train.admin import env_settings
from pycocotools.coco import COCO
class MSCOCO(BaseImageDataset):
""" The COCO object detection dataset.
Publication:
Microsoft COCO: Common Objects in Context.
Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B. Girshick, James Hays, Pietro Perona,
Deva Ramanan, Piotr Dollar and C. Lawrence Zitnick
ECCV, 2014
https://arxiv.org/pdf/1405.0312.pdf
Download the images along with annotations from http://cocodataset.org/#download. The root folder should be
organized as follows.
- coco_root
- annotations
- instances_train2014.json
- instances_train2017.json
- images
- train2014
- train2017
Note: You also have to install the coco pythonAPI from https://github.com/cocodataset/cocoapi.
"""
def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, min_area=None,
split="train", version="2014"):
"""
args:
root - path to coco root folder
image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
is used by default.
data_fraction - Fraction of dataset to be used. The complete dataset is used by default
min_area - Objects with area less than min_area are filtered out. Default is 0.0
split - 'train' or 'val'.
version - version of coco dataset (2014 or 2017)
"""
root = env_settings().coco_dir if root is None else root
super().__init__('COCO', root, image_loader)
self.img_pth = os.path.join(root, 'images/{}{}/'.format(split, version))
self.anno_path = os.path.join(root, 'annotations/instances_{}{}.json'.format(split, version))
self.coco_set = COCO(self.anno_path)
self.cats = self.coco_set.cats
self.class_list = self.get_class_list() # the parent class thing would happen in the sampler
self.image_list = self._get_image_list(min_area=min_area)
if data_fraction is not None:
self.image_list = random.sample(self.image_list, int(len(self.image_list) * data_fraction))
self.im_per_class = self._build_im_per_class()
def _get_image_list(self, min_area=None):
ann_list = list(self.coco_set.anns.keys())
image_list = [a for a in ann_list if self.coco_set.anns[a]['iscrowd'] == 0]
if min_area is not None:
image_list = [a for a in image_list if self.coco_set.anns[a]['area'] > min_area]
return image_list
def get_num_classes(self):
return len(self.class_list)
def get_name(self):
return 'coco'
def has_class_info(self):
return True
def has_segmentation_info(self):
return True
def get_class_list(self):
class_list = []
for cat_id in self.cats.keys():
class_list.append(self.cats[cat_id]['name'])
return class_list
def _build_im_per_class(self):
im_per_class = {}
for i, im in enumerate(self.image_list):
class_name = self.cats[self.coco_set.anns[im]['category_id']]['name']
if class_name not in im_per_class:
im_per_class[class_name] = [i]
else:
im_per_class[class_name].append(i)
return im_per_class
def get_images_in_class(self, class_name):
return self.im_per_class[class_name]
def get_image_info(self, im_id):
anno = self._get_anno(im_id)
bbox = torch.Tensor(anno['bbox']).view(4,)
mask = torch.Tensor(self.coco_set.annToMask(anno))
valid = (bbox[2] > 0) & (bbox[3] > 0)
visible = valid.clone().byte()
return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible}
def _get_anno(self, im_id):
anno = self.coco_set.anns[self.image_list[im_id]]
return anno
def _get_image(self, im_id):
path = self.coco_set.loadImgs([self.coco_set.anns[self.image_list[im_id]]['image_id']])[0]['file_name']
img = self.image_loader(os.path.join(self.img_pth, path))
return img
def get_meta_info(self, im_id):
try:
cat_dict_current = self.cats[self.coco_set.anns[self.image_list[im_id]]['category_id']]
object_meta = OrderedDict({'object_class_name': cat_dict_current['name'],
'motion_class': None,
'major_class': cat_dict_current['supercategory'],
'root_class': None,
'motion_adverb': None})
except:
object_meta = OrderedDict({'object_class_name': None,
'motion_class': None,
'major_class': None,
'root_class': None,
'motion_adverb': None})
return object_meta
def get_class_name(self, im_id):
cat_dict_current = self.cats[self.coco_set.anns[self.image_list[im_id]]['category_id']]
return cat_dict_current['name']
def get_image(self, image_id, anno=None):
frame = self._get_image(image_id)
if anno is None:
anno = self.get_image_info(image_id)
object_meta = self.get_meta_info(image_id)
return frame, anno, object_meta

View File

@@ -0,0 +1,170 @@
import os
from .base_video_dataset import BaseVideoDataset
from lib.train.data import jpeg4py_loader
import torch
import random
from pycocotools.coco import COCO
from collections import OrderedDict
from lib.train.admin import env_settings
class MSCOCOSeq(BaseVideoDataset):
""" The COCO dataset. COCO is an image dataset. Thus, we treat each image as a sequence of length 1.
Publication:
Microsoft COCO: Common Objects in Context.
Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B. Girshick, James Hays, Pietro Perona,
Deva Ramanan, Piotr Dollar and C. Lawrence Zitnick
ECCV, 2014
https://arxiv.org/pdf/1405.0312.pdf
Download the images along with annotations from http://cocodataset.org/#download. The root folder should be
organized as follows.
- coco_root
- annotations
- instances_train2014.json
- instances_train2017.json
- images
- train2014
- train2017
Note: You also have to install the coco pythonAPI from https://github.com/cocodataset/cocoapi.
"""
def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, split="train", version="2014"):
"""
args:
root - path to the coco dataset.
image_loader (default_image_loader) - The function to read the images. If installed,
jpeg4py (https://github.com/ajkxyz/jpeg4py) is used by default. Else,
opencv's imread is used.
data_fraction (None) - Fraction of images to be used. The images are selected randomly. If None, all the
images will be used
split - 'train' or 'val'.
version - version of coco dataset (2014 or 2017)
"""
root = env_settings().coco_dir if root is None else root
super().__init__('COCO', root, image_loader)
self.img_pth = os.path.join(root, 'images/{}{}/'.format(split, version))
self.anno_path = os.path.join(root, 'annotations/instances_{}{}.json'.format(split, version))
# Load the COCO set.
self.coco_set = COCO(self.anno_path)
self.cats = self.coco_set.cats
self.class_list = self.get_class_list()
self.sequence_list = self._get_sequence_list()
if data_fraction is not None:
self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list)*data_fraction))
self.seq_per_class = self._build_seq_per_class()
def _get_sequence_list(self):
ann_list = list(self.coco_set.anns.keys())
seq_list = [a for a in ann_list if self.coco_set.anns[a]['iscrowd'] == 0]
return seq_list
def is_video_sequence(self):
return False
def get_num_classes(self):
return len(self.class_list)
def get_name(self):
return 'coco'
def has_class_info(self):
return True
def get_class_list(self):
class_list = []
for cat_id in self.cats.keys():
class_list.append(self.cats[cat_id]['name'])
return class_list
def has_segmentation_info(self):
return True
def get_num_sequences(self):
return len(self.sequence_list)
def _build_seq_per_class(self):
seq_per_class = {}
for i, seq in enumerate(self.sequence_list):
class_name = self.cats[self.coco_set.anns[seq]['category_id']]['name']
if class_name not in seq_per_class:
seq_per_class[class_name] = [i]
else:
seq_per_class[class_name].append(i)
return seq_per_class
def get_sequences_in_class(self, class_name):
return self.seq_per_class[class_name]
def get_sequence_info(self, seq_id):
anno = self._get_anno(seq_id)
bbox = torch.Tensor(anno['bbox']).view(1, 4)
mask = torch.Tensor(self.coco_set.annToMask(anno)).unsqueeze(dim=0)
'''2021.1.3 To avoid too small bounding boxes. Here we change the threshold to 50 pixels'''
valid = (bbox[:, 2] > 50) & (bbox[:, 3] > 50)
visible = valid.clone().byte()
return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible}
def _get_anno(self, seq_id):
anno = self.coco_set.anns[self.sequence_list[seq_id]]
return anno
def _get_frames(self, seq_id):
path = self.coco_set.loadImgs([self.coco_set.anns[self.sequence_list[seq_id]]['image_id']])[0]['file_name']
img = self.image_loader(os.path.join(self.img_pth, path))
return img
def get_meta_info(self, seq_id):
try:
cat_dict_current = self.cats[self.coco_set.anns[self.sequence_list[seq_id]]['category_id']]
object_meta = OrderedDict({'object_class_name': cat_dict_current['name'],
'motion_class': None,
'major_class': cat_dict_current['supercategory'],
'root_class': None,
'motion_adverb': None})
except:
object_meta = OrderedDict({'object_class_name': None,
'motion_class': None,
'major_class': None,
'root_class': None,
'motion_adverb': None})
return object_meta
def get_class_name(self, seq_id):
cat_dict_current = self.cats[self.coco_set.anns[self.sequence_list[seq_id]]['category_id']]
return cat_dict_current['name']
def get_frames(self, seq_id=None, frame_ids=None, anno=None):
# COCO is an image dataset. Thus we replicate the image denoted by seq_id len(frame_ids) times, and return a
# list containing these replicated images.
frame = self._get_frames(seq_id)
frame_list = [frame.copy() for _ in frame_ids]
if anno is None:
anno = self.get_sequence_info(seq_id)
anno_frames = {}
for key, value in anno.items():
anno_frames[key] = [value[0, ...] for _ in frame_ids]
object_meta = self.get_meta_info(seq_id)
return frame_list, anno_frames, object_meta

View File

@@ -0,0 +1,177 @@
import os
from .base_video_dataset import BaseVideoDataset
from lib.train.data import jpeg4py_loader
import torch
import random
from collections import OrderedDict
from lib.train.admin import env_settings
from lib.train.dataset.COCO_tool import COCO
from lib.utils.lmdb_utils import decode_img, decode_json
import time
class MSCOCOSeq_lmdb(BaseVideoDataset):
""" The COCO dataset. COCO is an image dataset. Thus, we treat each image as a sequence of length 1.
Publication:
Microsoft COCO: Common Objects in Context.
Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B. Girshick, James Hays, Pietro Perona,
Deva Ramanan, Piotr Dollar and C. Lawrence Zitnick
ECCV, 2014
https://arxiv.org/pdf/1405.0312.pdf
Download the images along with annotations from http://cocodataset.org/#download. The root folder should be
organized as follows.
- coco_root
- annotations
- instances_train2014.json
- instances_train2017.json
- images
- train2014
- train2017
Note: You also have to install the coco pythonAPI from https://github.com/cocodataset/cocoapi.
"""
def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, split="train", version="2014"):
"""
args:
root - path to the coco dataset.
image_loader (default_image_loader) - The function to read the images. If installed,
jpeg4py (https://github.com/ajkxyz/jpeg4py) is used by default. Else,
opencv's imread is used.
data_fraction (None) - Fraction of images to be used. The images are selected randomly. If None, all the
images will be used
split - 'train' or 'val'.
version - version of coco dataset (2014 or 2017)
"""
root = env_settings().coco_dir if root is None else root
super().__init__('COCO_lmdb', root, image_loader)
self.root = root
self.img_pth = 'images/{}{}/'.format(split, version)
self.anno_path = 'annotations/instances_{}{}.json'.format(split, version)
# Load the COCO set.
print('loading annotations into memory...')
tic = time.time()
coco_json = decode_json(root, self.anno_path)
print('Done (t={:0.2f}s)'.format(time.time() - tic))
self.coco_set = COCO(coco_json)
self.cats = self.coco_set.cats
self.class_list = self.get_class_list()
self.sequence_list = self._get_sequence_list()
if data_fraction is not None:
self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list)*data_fraction))
self.seq_per_class = self._build_seq_per_class()
def _get_sequence_list(self):
ann_list = list(self.coco_set.anns.keys())
seq_list = [a for a in ann_list if self.coco_set.anns[a]['iscrowd'] == 0]
return seq_list
def is_video_sequence(self):
return False
def get_num_classes(self):
return len(self.class_list)
def get_name(self):
return 'coco_lmdb'
def has_class_info(self):
return True
def get_class_list(self):
class_list = []
for cat_id in self.cats.keys():
class_list.append(self.cats[cat_id]['name'])
return class_list
def has_segmentation_info(self):
return True
def get_num_sequences(self):
return len(self.sequence_list)
def _build_seq_per_class(self):
seq_per_class = {}
for i, seq in enumerate(self.sequence_list):
class_name = self.cats[self.coco_set.anns[seq]['category_id']]['name']
if class_name not in seq_per_class:
seq_per_class[class_name] = [i]
else:
seq_per_class[class_name].append(i)
return seq_per_class
def get_sequences_in_class(self, class_name):
return self.seq_per_class[class_name]
def get_sequence_info(self, seq_id):
anno = self._get_anno(seq_id)
bbox = torch.Tensor(anno['bbox']).view(1, 4)
mask = torch.Tensor(self.coco_set.annToMask(anno)).unsqueeze(dim=0)
'''2021.1.3 To avoid too small bounding boxes. Here we change the threshold to 50 pixels'''
valid = (bbox[:, 2] > 50) & (bbox[:, 3] > 50)
visible = valid.clone().byte()
return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible}
def _get_anno(self, seq_id):
anno = self.coco_set.anns[self.sequence_list[seq_id]]
return anno
def _get_frames(self, seq_id):
path = self.coco_set.loadImgs([self.coco_set.anns[self.sequence_list[seq_id]]['image_id']])[0]['file_name']
# img = self.image_loader(os.path.join(self.img_pth, path))
img = decode_img(self.root, os.path.join(self.img_pth, path))
return img
def get_meta_info(self, seq_id):
try:
cat_dict_current = self.cats[self.coco_set.anns[self.sequence_list[seq_id]]['category_id']]
object_meta = OrderedDict({'object_class_name': cat_dict_current['name'],
'motion_class': None,
'major_class': cat_dict_current['supercategory'],
'root_class': None,
'motion_adverb': None})
except:
object_meta = OrderedDict({'object_class_name': None,
'motion_class': None,
'major_class': None,
'root_class': None,
'motion_adverb': None})
return object_meta
def get_class_name(self, seq_id):
cat_dict_current = self.cats[self.coco_set.anns[self.sequence_list[seq_id]]['category_id']]
return cat_dict_current['name']
def get_frames(self, seq_id=None, frame_ids=None, anno=None):
# COCO is an image dataset. Thus we replicate the image denoted by seq_id len(frame_ids) times, and return a
# list containing these replicated images.
frame = self._get_frames(seq_id)
frame_list = [frame.copy() for _ in frame_ids]
if anno is None:
anno = self.get_sequence_info(seq_id)
anno_frames = {}
for key, value in anno.items():
anno_frames[key] = [value[0, ...] for _ in frame_ids]
object_meta = self.get_meta_info(seq_id)
return frame_list, anno_frames, object_meta

186
lib/train/dataset/got10k.py Normal file
View File

@@ -0,0 +1,186 @@
import os
import os.path
import numpy as np
import torch
import csv
import pandas
import random
from collections import OrderedDict
from .base_video_dataset import BaseVideoDataset
from lib.train.data import jpeg4py_loader
from lib.train.admin import env_settings
class Got10k(BaseVideoDataset):
""" GOT-10k dataset.
Publication:
GOT-10k: A Large High-Diversity Benchmark for Generic Object Tracking in the Wild
Lianghua Huang, Xin Zhao, and Kaiqi Huang
arXiv:1810.11981, 2018
https://arxiv.org/pdf/1810.11981.pdf
Download dataset from http://got-10k.aitestunion.com/downloads
"""
def __init__(self, root=None, image_loader=jpeg4py_loader, split=None, seq_ids=None, data_fraction=None):
"""
args:
root - path to the got-10k training data. Note: This should point to the 'train' folder inside GOT-10k
image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
is used by default.
split - 'train' or 'val'. Note: The validation split here is a subset of the official got-10k train split,
not NOT the official got-10k validation split. To use the official validation split, provide that as
the root folder instead.
seq_ids - List containing the ids of the videos to be used for training. Note: Only one of 'split' or 'seq_ids'
options can be used at the same time.
data_fraction - Fraction of dataset to be used. The complete dataset is used by default
"""
root = env_settings().got10k_dir if root is None else root
super().__init__('GOT10k', root, image_loader)
# all folders inside the root
self.sequence_list = self._get_sequence_list()
# seq_id is the index of the folder inside the got10k root path
if split is not None:
if seq_ids is not None:
raise ValueError('Cannot set both split_name and seq_ids.')
ltr_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
if split == 'train':
file_path = os.path.join(ltr_path, 'data_specs', 'got10k_train_split.txt')
elif split == 'val':
file_path = os.path.join(ltr_path, 'data_specs', 'got10k_val_split.txt')
elif split == 'train_full':
file_path = os.path.join(ltr_path, 'data_specs', 'got10k_train_full_split.txt')
elif split == 'vottrain':
file_path = os.path.join(ltr_path, 'data_specs', 'got10k_vot_train_split.txt')
elif split == 'votval':
file_path = os.path.join(ltr_path, 'data_specs', 'got10k_vot_val_split.txt')
else:
raise ValueError('Unknown split name.')
# seq_ids = pandas.read_csv(file_path, header=None, squeeze=True, dtype=np.int64).values.tolist()
seq_ids = pandas.read_csv(file_path, header=None, dtype=np.int64).squeeze("columns").values.tolist()
elif seq_ids is None:
seq_ids = list(range(0, len(self.sequence_list)))
self.sequence_list = [self.sequence_list[i] for i in seq_ids]
if data_fraction is not None:
self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list)*data_fraction))
self.sequence_meta_info = self._load_meta_info()
self.seq_per_class = self._build_seq_per_class()
self.class_list = list(self.seq_per_class.keys())
self.class_list.sort()
def get_name(self):
return 'got10k'
def has_class_info(self):
return True
def has_occlusion_info(self):
return True
def _load_meta_info(self):
sequence_meta_info = {s: self._read_meta(os.path.join(self.root, s)) for s in self.sequence_list}
return sequence_meta_info
def _read_meta(self, seq_path):
try:
with open(os.path.join(seq_path, 'meta_info.ini')) as f:
meta_info = f.readlines()
object_meta = OrderedDict({'object_class_name': meta_info[5].split(': ')[-1][:-1],
'motion_class': meta_info[6].split(': ')[-1][:-1],
'major_class': meta_info[7].split(': ')[-1][:-1],
'root_class': meta_info[8].split(': ')[-1][:-1],
'motion_adverb': meta_info[9].split(': ')[-1][:-1]})
except:
object_meta = OrderedDict({'object_class_name': None,
'motion_class': None,
'major_class': None,
'root_class': None,
'motion_adverb': None})
return object_meta
def _build_seq_per_class(self):
seq_per_class = {}
for i, s in enumerate(self.sequence_list):
object_class = self.sequence_meta_info[s]['object_class_name']
if object_class in seq_per_class:
seq_per_class[object_class].append(i)
else:
seq_per_class[object_class] = [i]
return seq_per_class
def get_sequences_in_class(self, class_name):
return self.seq_per_class[class_name]
def _get_sequence_list(self):
with open(os.path.join(self.root, 'list.txt')) as f:
dir_list = list(csv.reader(f))
dir_list = [dir_name[0] for dir_name in dir_list]
return dir_list
def _read_bb_anno(self, seq_path):
bb_anno_file = os.path.join(seq_path, "groundtruth.txt")
gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False, low_memory=False).values
return torch.tensor(gt)
def _read_target_visible(self, seq_path):
# Read full occlusion and out_of_view
occlusion_file = os.path.join(seq_path, "absence.label")
cover_file = os.path.join(seq_path, "cover.label")
with open(occlusion_file, 'r', newline='') as f:
occlusion = torch.ByteTensor([int(v[0]) for v in csv.reader(f)])
with open(cover_file, 'r', newline='') as f:
cover = torch.ByteTensor([int(v[0]) for v in csv.reader(f)])
target_visible = ~occlusion & (cover>0).byte()
visible_ratio = cover.float() / 8
return target_visible, visible_ratio
def _get_sequence_path(self, seq_id):
return os.path.join(self.root, self.sequence_list[seq_id])
def get_sequence_info(self, seq_id):
seq_path = self._get_sequence_path(seq_id)
bbox = self._read_bb_anno(seq_path)
valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
visible, visible_ratio = self._read_target_visible(seq_path)
visible = visible & valid.byte()
return {'bbox': bbox, 'valid': valid, 'visible': visible, 'visible_ratio': visible_ratio}
def _get_frame_path(self, seq_path, frame_id):
return os.path.join(seq_path, '{:08}.jpg'.format(frame_id+1)) # frames start from 1
def _get_frame(self, seq_path, frame_id):
return self.image_loader(self._get_frame_path(seq_path, frame_id))
def get_class_name(self, seq_id):
obj_meta = self.sequence_meta_info[self.sequence_list[seq_id]]
return obj_meta['object_class_name']
def get_frames(self, seq_id, frame_ids, anno=None):
seq_path = self._get_sequence_path(seq_id)
obj_meta = self.sequence_meta_info[self.sequence_list[seq_id]]
frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids]
if anno is None:
anno = self.get_sequence_info(seq_id)
anno_frames = {}
for key, value in anno.items():
anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
return frame_list, anno_frames, obj_meta

View File

@@ -0,0 +1,183 @@
import os
import os.path
import numpy as np
import torch
import csv
import pandas
import random
from collections import OrderedDict
from .base_video_dataset import BaseVideoDataset
from lib.train.data import jpeg4py_loader
from lib.train.admin import env_settings
'''2021.1.16 Gok10k for loading lmdb dataset'''
from lib.utils.lmdb_utils import *
class Got10k_lmdb(BaseVideoDataset):
def __init__(self, root=None, image_loader=jpeg4py_loader, split=None, seq_ids=None, data_fraction=None):
"""
args:
root - path to the got-10k training data. Note: This should point to the 'train' folder inside GOT-10k
image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
is used by default.
split - 'train' or 'val'. Note: The validation split here is a subset of the official got-10k train split,
not NOT the official got-10k validation split. To use the official validation split, provide that as
the root folder instead.
seq_ids - List containing the ids of the videos to be used for training. Note: Only one of 'split' or 'seq_ids'
options can be used at the same time.
data_fraction - Fraction of dataset to be used. The complete dataset is used by default
use_lmdb - whether the dataset is stored in lmdb format
"""
root = env_settings().got10k_lmdb_dir if root is None else root
super().__init__('GOT10k_lmdb', root, image_loader)
# all folders inside the root
self.sequence_list = self._get_sequence_list()
# seq_id is the index of the folder inside the got10k root path
if split is not None:
if seq_ids is not None:
raise ValueError('Cannot set both split_name and seq_ids.')
train_lib_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
if split == 'train':
file_path = os.path.join(train_lib_path, 'data_specs', 'got10k_train_split.txt')
elif split == 'val':
file_path = os.path.join(train_lib_path, 'data_specs', 'got10k_val_split.txt')
elif split == 'train_full':
file_path = os.path.join(train_lib_path, 'data_specs', 'got10k_train_full_split.txt')
elif split == 'vottrain':
file_path = os.path.join(train_lib_path, 'data_specs', 'got10k_vot_train_split.txt')
elif split == 'votval':
file_path = os.path.join(train_lib_path, 'data_specs', 'got10k_vot_val_split.txt')
else:
raise ValueError('Unknown split name.')
seq_ids = pandas.read_csv(file_path, header=None, squeeze=True, dtype=np.int64).values.tolist()
elif seq_ids is None:
seq_ids = list(range(0, len(self.sequence_list)))
self.sequence_list = [self.sequence_list[i] for i in seq_ids]
if data_fraction is not None:
self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list)*data_fraction))
self.sequence_meta_info = self._load_meta_info()
self.seq_per_class = self._build_seq_per_class()
self.class_list = list(self.seq_per_class.keys())
self.class_list.sort()
def get_name(self):
return 'got10k_lmdb'
def has_class_info(self):
return True
def has_occlusion_info(self):
return True
def _load_meta_info(self):
def _read_meta(meta_info):
object_meta = OrderedDict({'object_class_name': meta_info[5].split(': ')[-1],
'motion_class': meta_info[6].split(': ')[-1],
'major_class': meta_info[7].split(': ')[-1],
'root_class': meta_info[8].split(': ')[-1],
'motion_adverb': meta_info[9].split(': ')[-1]})
return object_meta
sequence_meta_info = {}
for s in self.sequence_list:
try:
meta_str = decode_str(self.root, "train/%s/meta_info.ini" %s)
sequence_meta_info[s] = _read_meta(meta_str.split('\n'))
except:
sequence_meta_info[s] = OrderedDict({'object_class_name': None,
'motion_class': None,
'major_class': None,
'root_class': None,
'motion_adverb': None})
return sequence_meta_info
def _build_seq_per_class(self):
seq_per_class = {}
for i, s in enumerate(self.sequence_list):
object_class = self.sequence_meta_info[s]['object_class_name']
if object_class in seq_per_class:
seq_per_class[object_class].append(i)
else:
seq_per_class[object_class] = [i]
return seq_per_class
def get_sequences_in_class(self, class_name):
return self.seq_per_class[class_name]
def _get_sequence_list(self):
dir_str = decode_str(self.root, 'train/list.txt')
dir_list = dir_str.split('\n')
return dir_list
def _read_bb_anno(self, seq_path):
bb_anno_file = os.path.join(seq_path, "groundtruth.txt")
gt_str_list = decode_str(self.root, bb_anno_file).split('\n')[:-1] # the last line in got10k is empty
gt_list = [list(map(float, line.split(','))) for line in gt_str_list]
gt_arr = np.array(gt_list).astype(np.float32)
return torch.tensor(gt_arr)
def _read_target_visible(self, seq_path):
# full occlusion and out_of_view files
occlusion_file = os.path.join(seq_path, "absence.label")
cover_file = os.path.join(seq_path, "cover.label")
# Read these files
occ_list = list(map(int, decode_str(self.root, occlusion_file).split('\n')[:-1])) # the last line in got10k is empty
occlusion = torch.ByteTensor(occ_list)
cover_list = list(map(int, decode_str(self.root, cover_file).split('\n')[:-1])) # the last line in got10k is empty
cover = torch.ByteTensor(cover_list)
target_visible = ~occlusion & (cover>0).byte()
visible_ratio = cover.float() / 8
return target_visible, visible_ratio
def _get_sequence_path(self, seq_id):
return os.path.join("train", self.sequence_list[seq_id])
def get_sequence_info(self, seq_id):
seq_path = self._get_sequence_path(seq_id)
bbox = self._read_bb_anno(seq_path)
valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
visible, visible_ratio = self._read_target_visible(seq_path)
visible = visible & valid.byte()
return {'bbox': bbox, 'valid': valid, 'visible': visible, 'visible_ratio': visible_ratio}
def _get_frame_path(self, seq_path, frame_id):
return os.path.join(seq_path, '{:08}.jpg'.format(frame_id+1)) # frames start from 1
def _get_frame(self, seq_path, frame_id):
return decode_img(self.root, self._get_frame_path(seq_path, frame_id))
def get_class_name(self, seq_id):
obj_meta = self.sequence_meta_info[self.sequence_list[seq_id]]
return obj_meta['object_class_name']
def get_frames(self, seq_id, frame_ids, anno=None):
seq_path = self._get_sequence_path(seq_id)
obj_meta = self.sequence_meta_info[self.sequence_list[seq_id]]
frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids]
if anno is None:
anno = self.get_sequence_info(seq_id)
anno_frames = {}
for key, value in anno.items():
anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
return frame_list, anno_frames, obj_meta

View File

@@ -0,0 +1,159 @@
import os
from .base_video_dataset import BaseVideoDataset
from lib.train.data import jpeg4py_loader
import xml.etree.ElementTree as ET
import json
import torch
from collections import OrderedDict
from lib.train.admin import env_settings
def get_target_to_image_ratio(seq):
anno = torch.Tensor(seq['anno'])
img_sz = torch.Tensor(seq['image_size'])
return (anno[0, 2:4].prod() / (img_sz.prod())).sqrt()
class ImagenetVID(BaseVideoDataset):
""" Imagenet VID dataset.
Publication:
ImageNet Large Scale Visual Recognition Challenge
Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh, Sean Ma, Zhiheng Huang, Andrej Karpathy,
Aditya Khosla, Michael Bernstein, Alexander C. Berg and Li Fei-Fei
IJCV, 2015
https://arxiv.org/pdf/1409.0575.pdf
Download the dataset from http://image-net.org/
"""
def __init__(self, root=None, image_loader=jpeg4py_loader, min_length=0, max_target_area=1):
"""
args:
root - path to the imagenet vid dataset.
image_loader (default_image_loader) - The function to read the images. If installed,
jpeg4py (https://github.com/ajkxyz/jpeg4py) is used by default. Else,
opencv's imread is used.
min_length - Minimum allowed sequence length.
max_target_area - max allowed ratio between target area and image area. Can be used to filter out targets
which cover complete image.
"""
root = env_settings().imagenet_dir if root is None else root
super().__init__("imagenetvid", root, image_loader)
cache_file = os.path.join(root, 'cache.json')
if os.path.isfile(cache_file):
# If available, load the pre-processed cache file containing meta-info for each sequence
with open(cache_file, 'r') as f:
sequence_list_dict = json.load(f)
self.sequence_list = sequence_list_dict
else:
# Else process the imagenet annotations and generate the cache file
self.sequence_list = self._process_anno(root)
with open(cache_file, 'w') as f:
json.dump(self.sequence_list, f)
# Filter the sequences based on min_length and max_target_area in the first frame
self.sequence_list = [x for x in self.sequence_list if len(x['anno']) >= min_length and
get_target_to_image_ratio(x) < max_target_area]
def get_name(self):
return 'imagenetvid'
def get_num_sequences(self):
return len(self.sequence_list)
def get_sequence_info(self, seq_id):
bb_anno = torch.Tensor(self.sequence_list[seq_id]['anno'])
valid = (bb_anno[:, 2] > 0) & (bb_anno[:, 3] > 0)
visible = torch.ByteTensor(self.sequence_list[seq_id]['target_visible']) & valid.byte()
return {'bbox': bb_anno, 'valid': valid, 'visible': visible}
def _get_frame(self, sequence, frame_id):
set_name = 'ILSVRC2015_VID_train_{:04d}'.format(sequence['set_id'])
vid_name = 'ILSVRC2015_train_{:08d}'.format(sequence['vid_id'])
frame_number = frame_id + sequence['start_frame']
frame_path = os.path.join(self.root, 'Data', 'VID', 'train', set_name, vid_name,
'{:06d}.JPEG'.format(frame_number))
return self.image_loader(frame_path)
def get_frames(self, seq_id, frame_ids, anno=None):
sequence = self.sequence_list[seq_id]
frame_list = [self._get_frame(sequence, f) for f in frame_ids]
if anno is None:
anno = self.get_sequence_info(seq_id)
# Create anno dict
anno_frames = {}
for key, value in anno.items():
anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
# added the class info to the meta info
object_meta = OrderedDict({'object_class': sequence['class_name'],
'motion_class': None,
'major_class': None,
'root_class': None,
'motion_adverb': None})
return frame_list, anno_frames, object_meta
def _process_anno(self, root):
# Builds individual tracklets
base_vid_anno_path = os.path.join(root, 'Annotations', 'VID', 'train')
all_sequences = []
for set in sorted(os.listdir(base_vid_anno_path)):
set_id = int(set.split('_')[-1])
for vid in sorted(os.listdir(os.path.join(base_vid_anno_path, set))):
vid_id = int(vid.split('_')[-1])
anno_files = sorted(os.listdir(os.path.join(base_vid_anno_path, set, vid)))
frame1_anno = ET.parse(os.path.join(base_vid_anno_path, set, vid, anno_files[0]))
image_size = [int(frame1_anno.find('size/width').text), int(frame1_anno.find('size/height').text)]
objects = [ET.ElementTree(file=os.path.join(base_vid_anno_path, set, vid, f)).findall('object')
for f in anno_files]
tracklets = {}
# Find all tracklets along with start frame
for f_id, all_targets in enumerate(objects):
for target in all_targets:
tracklet_id = target.find('trackid').text
if tracklet_id not in tracklets:
tracklets[tracklet_id] = f_id
for tracklet_id, tracklet_start in tracklets.items():
tracklet_anno = []
target_visible = []
class_name_id = None
for f_id in range(tracklet_start, len(objects)):
found = False
for target in objects[f_id]:
if target.find('trackid').text == tracklet_id:
if not class_name_id:
class_name_id = target.find('name').text
x1 = int(target.find('bndbox/xmin').text)
y1 = int(target.find('bndbox/ymin').text)
x2 = int(target.find('bndbox/xmax').text)
y2 = int(target.find('bndbox/ymax').text)
tracklet_anno.append([x1, y1, x2 - x1, y2 - y1])
target_visible.append(target.find('occluded').text == '0')
found = True
break
if not found:
break
new_sequence = {'set_id': set_id, 'vid_id': vid_id, 'class_name': class_name_id,
'start_frame': tracklet_start, 'anno': tracklet_anno,
'target_visible': target_visible, 'image_size': image_size}
all_sequences.append(new_sequence)
return all_sequences

View File

@@ -0,0 +1,90 @@
import os
from .base_video_dataset import BaseVideoDataset
from lib.train.data import jpeg4py_loader
import torch
from collections import OrderedDict
from lib.train.admin import env_settings
from lib.utils.lmdb_utils import decode_img, decode_json
def get_target_to_image_ratio(seq):
anno = torch.Tensor(seq['anno'])
img_sz = torch.Tensor(seq['image_size'])
return (anno[0, 2:4].prod() / (img_sz.prod())).sqrt()
class ImagenetVID_lmdb(BaseVideoDataset):
""" Imagenet VID dataset.
Publication:
ImageNet Large Scale Visual Recognition Challenge
Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh, Sean Ma, Zhiheng Huang, Andrej Karpathy,
Aditya Khosla, Michael Bernstein, Alexander C. Berg and Li Fei-Fei
IJCV, 2015
https://arxiv.org/pdf/1409.0575.pdf
Download the dataset from http://image-net.org/
"""
def __init__(self, root=None, image_loader=jpeg4py_loader, min_length=0, max_target_area=1):
"""
args:
root - path to the imagenet vid dataset.
image_loader (default_image_loader) - The function to read the images. If installed,
jpeg4py (https://github.com/ajkxyz/jpeg4py) is used by default. Else,
opencv's imread is used.
min_length - Minimum allowed sequence length.
max_target_area - max allowed ratio between target area and image area. Can be used to filter out targets
which cover complete image.
"""
root = env_settings().imagenet_dir if root is None else root
super().__init__("imagenetvid_lmdb", root, image_loader)
sequence_list_dict = decode_json(root, "cache.json")
self.sequence_list = sequence_list_dict
# Filter the sequences based on min_length and max_target_area in the first frame
self.sequence_list = [x for x in self.sequence_list if len(x['anno']) >= min_length and
get_target_to_image_ratio(x) < max_target_area]
def get_name(self):
return 'imagenetvid_lmdb'
def get_num_sequences(self):
return len(self.sequence_list)
def get_sequence_info(self, seq_id):
bb_anno = torch.Tensor(self.sequence_list[seq_id]['anno'])
valid = (bb_anno[:, 2] > 0) & (bb_anno[:, 3] > 0)
visible = torch.ByteTensor(self.sequence_list[seq_id]['target_visible']) & valid.byte()
return {'bbox': bb_anno, 'valid': valid, 'visible': visible}
def _get_frame(self, sequence, frame_id):
set_name = 'ILSVRC2015_VID_train_{:04d}'.format(sequence['set_id'])
vid_name = 'ILSVRC2015_train_{:08d}'.format(sequence['vid_id'])
frame_number = frame_id + sequence['start_frame']
frame_path = os.path.join('Data', 'VID', 'train', set_name, vid_name,
'{:06d}.JPEG'.format(frame_number))
return decode_img(self.root, frame_path)
def get_frames(self, seq_id, frame_ids, anno=None):
sequence = self.sequence_list[seq_id]
frame_list = [self._get_frame(sequence, f) for f in frame_ids]
if anno is None:
anno = self.get_sequence_info(seq_id)
# Create anno dict
anno_frames = {}
for key, value in anno.items():
anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
# added the class info to the meta info
object_meta = OrderedDict({'object_class': sequence['class_name'],
'motion_class': None,
'major_class': None,
'root_class': None,
'motion_adverb': None})
return frame_list, anno_frames, object_meta

169
lib/train/dataset/lasot.py Normal file
View File

@@ -0,0 +1,169 @@
import os
import os.path
import torch
import numpy as np
import pandas
import csv
import random
from collections import OrderedDict
from .base_video_dataset import BaseVideoDataset
from lib.train.data import jpeg4py_loader
from lib.train.admin import env_settings
class Lasot(BaseVideoDataset):
""" LaSOT dataset.
Publication:
LaSOT: A High-quality Benchmark for Large-scale Single Object Tracking
Heng Fan, Liting Lin, Fan Yang, Peng Chu, Ge Deng, Sijia Yu, Hexin Bai, Yong Xu, Chunyuan Liao and Haibin Ling
CVPR, 2019
https://arxiv.org/pdf/1809.07845.pdf
Download the dataset from https://cis.temple.edu/lasot/download.html
"""
def __init__(self, root=None, image_loader=jpeg4py_loader, vid_ids=None, split=None, data_fraction=None):
"""
args:
root - path to the lasot dataset.
image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
is used by default.
vid_ids - List containing the ids of the videos (1 - 20) used for training. If vid_ids = [1, 3, 5], then the
videos with subscripts -1, -3, and -5 from each class will be used for training.
split - If split='train', the official train split (protocol-II) is used for training. Note: Only one of
vid_ids or split option can be used at a time.
data_fraction - Fraction of dataset to be used. The complete dataset is used by default
"""
root = env_settings().lasot_dir if root is None else root
super().__init__('LaSOT', root, image_loader)
# Keep a list of all classes
self.class_list = [f for f in os.listdir(self.root)]
self.class_to_id = {cls_name: cls_id for cls_id, cls_name in enumerate(self.class_list)}
self.sequence_list = self._build_sequence_list(vid_ids, split)
if data_fraction is not None:
self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list)*data_fraction))
self.seq_per_class = self._build_class_list()
def _build_sequence_list(self, vid_ids=None, split=None):
if split is not None:
if vid_ids is not None:
raise ValueError('Cannot set both split_name and vid_ids.')
ltr_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
if split == 'train':
file_path = os.path.join(ltr_path, 'data_specs', 'lasot_train_split.txt')
else:
raise ValueError('Unknown split name.')
# sequence_list = pandas.read_csv(file_path, header=None, squeeze=True).values.tolist()
sequence_list = pandas.read_csv(file_path, header=None).squeeze("columns").values.tolist()
elif vid_ids is not None:
sequence_list = [c+'-'+str(v) for c in self.class_list for v in vid_ids]
else:
raise ValueError('Set either split_name or vid_ids.')
return sequence_list
def _build_class_list(self):
seq_per_class = {}
for seq_id, seq_name in enumerate(self.sequence_list):
class_name = seq_name.split('-')[0]
if class_name in seq_per_class:
seq_per_class[class_name].append(seq_id)
else:
seq_per_class[class_name] = [seq_id]
return seq_per_class
def get_name(self):
return 'lasot'
def has_class_info(self):
return True
def has_occlusion_info(self):
return True
def get_num_sequences(self):
return len(self.sequence_list)
def get_num_classes(self):
return len(self.class_list)
def get_sequences_in_class(self, class_name):
return self.seq_per_class[class_name]
def _read_bb_anno(self, seq_path):
bb_anno_file = os.path.join(seq_path, "groundtruth.txt")
gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False, low_memory=False).values
return torch.tensor(gt)
def _read_target_visible(self, seq_path):
# Read full occlusion and out_of_view
occlusion_file = os.path.join(seq_path, "full_occlusion.txt")
out_of_view_file = os.path.join(seq_path, "out_of_view.txt")
with open(occlusion_file, 'r', newline='') as f:
occlusion = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]])
with open(out_of_view_file, 'r') as f:
out_of_view = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]])
target_visible = ~occlusion & ~out_of_view
return target_visible
def _get_sequence_path(self, seq_id):
seq_name = self.sequence_list[seq_id]
class_name = seq_name.split('-')[0]
vid_id = seq_name.split('-')[1]
return os.path.join(self.root, class_name, class_name + '-' + vid_id)
def get_sequence_info(self, seq_id):
seq_path = self._get_sequence_path(seq_id)
bbox = self._read_bb_anno(seq_path)
valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
visible = self._read_target_visible(seq_path) & valid.byte()
return {'bbox': bbox, 'valid': valid, 'visible': visible}
def _get_frame_path(self, seq_path, frame_id):
return os.path.join(seq_path, 'img', '{:08}.jpg'.format(frame_id+1)) # frames start from 1
def _get_frame(self, seq_path, frame_id):
return self.image_loader(self._get_frame_path(seq_path, frame_id))
def _get_class(self, seq_path):
raw_class = seq_path.split('/')[-2]
return raw_class
def get_class_name(self, seq_id):
seq_path = self._get_sequence_path(seq_id)
obj_class = self._get_class(seq_path)
return obj_class
def get_frames(self, seq_id, frame_ids, anno=None):
seq_path = self._get_sequence_path(seq_id)
obj_class = self._get_class(seq_path)
frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids]
if anno is None:
anno = self.get_sequence_info(seq_id)
anno_frames = {}
for key, value in anno.items():
anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
object_meta = OrderedDict({'object_class_name': obj_class,
'motion_class': None,
'major_class': None,
'root_class': None,
'motion_adverb': None})
return frame_list, anno_frames, object_meta

View File

@@ -0,0 +1,165 @@
import os
import os.path
import torch
import numpy as np
import pandas
import csv
import random
from collections import OrderedDict
from .base_video_dataset import BaseVideoDataset
from lib.train.data import jpeg4py_loader
from lib.train.admin import env_settings
'''2021.1.16 Lasot for loading lmdb dataset'''
from lib.utils.lmdb_utils import *
class Lasot_lmdb(BaseVideoDataset):
def __init__(self, root=None, image_loader=jpeg4py_loader, vid_ids=None, split=None, data_fraction=None):
"""
args:
root - path to the lasot dataset.
image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
is used by default.
vid_ids - List containing the ids of the videos (1 - 20) used for training. If vid_ids = [1, 3, 5], then the
videos with subscripts -1, -3, and -5 from each class will be used for training.
split - If split='train', the official train split (protocol-II) is used for training. Note: Only one of
vid_ids or split option can be used at a time.
data_fraction - Fraction of dataset to be used. The complete dataset is used by default
"""
root = env_settings().lasot_lmdb_dir if root is None else root
super().__init__('LaSOT_lmdb', root, image_loader)
self.sequence_list = self._build_sequence_list(vid_ids, split)
class_list = [seq_name.split('-')[0] for seq_name in self.sequence_list]
self.class_list = []
for ele in class_list:
if ele not in self.class_list:
self.class_list.append(ele)
# Keep a list of all classes
self.class_to_id = {cls_name: cls_id for cls_id, cls_name in enumerate(self.class_list)}
if data_fraction is not None:
self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list)*data_fraction))
self.seq_per_class = self._build_class_list()
def _build_sequence_list(self, vid_ids=None, split=None):
if split is not None:
if vid_ids is not None:
raise ValueError('Cannot set both split_name and vid_ids.')
ltr_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
if split == 'train':
file_path = os.path.join(ltr_path, 'data_specs', 'lasot_train_split.txt')
else:
raise ValueError('Unknown split name.')
sequence_list = pandas.read_csv(file_path, header=None, squeeze=True).values.tolist()
elif vid_ids is not None:
sequence_list = [c+'-'+str(v) for c in self.class_list for v in vid_ids]
else:
raise ValueError('Set either split_name or vid_ids.')
return sequence_list
def _build_class_list(self):
seq_per_class = {}
for seq_id, seq_name in enumerate(self.sequence_list):
class_name = seq_name.split('-')[0]
if class_name in seq_per_class:
seq_per_class[class_name].append(seq_id)
else:
seq_per_class[class_name] = [seq_id]
return seq_per_class
def get_name(self):
return 'lasot_lmdb'
def has_class_info(self):
return True
def has_occlusion_info(self):
return True
def get_num_sequences(self):
return len(self.sequence_list)
def get_num_classes(self):
return len(self.class_list)
def get_sequences_in_class(self, class_name):
return self.seq_per_class[class_name]
def _read_bb_anno(self, seq_path):
bb_anno_file = os.path.join(seq_path, "groundtruth.txt")
gt_str_list = decode_str(self.root, bb_anno_file).split('\n')[:-1] # the last line is empty
gt_list = [list(map(float, line.split(','))) for line in gt_str_list]
gt_arr = np.array(gt_list).astype(np.float32)
return torch.tensor(gt_arr)
def _read_target_visible(self, seq_path):
# Read full occlusion and out_of_view
occlusion_file = os.path.join(seq_path, "full_occlusion.txt")
out_of_view_file = os.path.join(seq_path, "out_of_view.txt")
occ_list = list(map(int, decode_str(self.root, occlusion_file).split(',')))
occlusion = torch.ByteTensor(occ_list)
out_view_list = list(map(int, decode_str(self.root, out_of_view_file).split(',')))
out_of_view = torch.ByteTensor(out_view_list)
target_visible = ~occlusion & ~out_of_view
return target_visible
def _get_sequence_path(self, seq_id):
seq_name = self.sequence_list[seq_id]
class_name = seq_name.split('-')[0]
vid_id = seq_name.split('-')[1]
return os.path.join(class_name, class_name + '-' + vid_id)
def get_sequence_info(self, seq_id):
seq_path = self._get_sequence_path(seq_id)
bbox = self._read_bb_anno(seq_path)
valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
visible = self._read_target_visible(seq_path) & valid.byte()
return {'bbox': bbox, 'valid': valid, 'visible': visible}
def _get_frame_path(self, seq_path, frame_id):
return os.path.join(seq_path, 'img', '{:08}.jpg'.format(frame_id+1)) # frames start from 1
def _get_frame(self, seq_path, frame_id):
return decode_img(self.root, self._get_frame_path(seq_path, frame_id))
def _get_class(self, seq_path):
raw_class = seq_path.split('/')[-2]
return raw_class
def get_class_name(self, seq_id):
seq_path = self._get_sequence_path(seq_id)
obj_class = self._get_class(seq_path)
return obj_class
def get_frames(self, seq_id, frame_ids, anno=None):
seq_path = self._get_sequence_path(seq_id)
obj_class = self._get_class(seq_path)
frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids]
if anno is None:
anno = self.get_sequence_info(seq_id)
anno_frames = {}
for key, value in anno.items():
anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
object_meta = OrderedDict({'object_class_name': obj_class,
'motion_class': None,
'major_class': None,
'root_class': None,
'motion_adverb': None})
return frame_list, anno_frames, object_meta

View File

@@ -0,0 +1,151 @@
import torch
import os
import os.path
import numpy as np
import pandas
import random
from collections import OrderedDict
from lib.train.data import jpeg4py_loader
from .base_video_dataset import BaseVideoDataset
from lib.train.admin import env_settings
def list_sequences(root, set_ids):
""" Lists all the videos in the input set_ids. Returns a list of tuples (set_id, video_name)
args:
root: Root directory to TrackingNet
set_ids: Sets (0-11) which are to be used
returns:
list - list of tuples (set_id, video_name) containing the set_id and video_name for each sequence
"""
sequence_list = []
for s in set_ids:
anno_dir = os.path.join(root, "TRAIN_" + str(s), "anno")
sequences_cur_set = [(s, os.path.splitext(f)[0]) for f in os.listdir(anno_dir) if f.endswith('.txt')]
sequence_list += sequences_cur_set
return sequence_list
class TrackingNet(BaseVideoDataset):
""" TrackingNet dataset.
Publication:
TrackingNet: A Large-Scale Dataset and Benchmark for Object Tracking in the Wild.
Matthias Mueller,Adel Bibi, Silvio Giancola, Salman Al-Subaihi and Bernard Ghanem
ECCV, 2018
https://ivul.kaust.edu.sa/Documents/Publications/2018/TrackingNet%20A%20Large%20Scale%20Dataset%20and%20Benchmark%20for%20Object%20Tracking%20in%20the%20Wild.pdf
Download the dataset using the toolkit https://github.com/SilvioGiancola/TrackingNet-devkit.
"""
def __init__(self, root=None, image_loader=jpeg4py_loader, set_ids=None, data_fraction=None):
"""
args:
root - The path to the TrackingNet folder, containing the training sets.
image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
is used by default.
set_ids (None) - List containing the ids of the TrackingNet sets to be used for training. If None, all the
sets (0 - 11) will be used.
data_fraction - Fraction of dataset to be used. The complete dataset is used by default
"""
root = env_settings().trackingnet_dir if root is None else root
super().__init__('TrackingNet', root, image_loader)
if set_ids is None:
set_ids = [i for i in range(12)]
self.set_ids = set_ids
# Keep a list of all videos. Sequence list is a list of tuples (set_id, video_name) containing the set_id and
# video_name for each sequence
self.sequence_list = list_sequences(self.root, self.set_ids)
if data_fraction is not None:
self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list) * data_fraction))
self.seq_to_class_map, self.seq_per_class = self._load_class_info()
# we do not have the class_lists for the tracking net
self.class_list = list(self.seq_per_class.keys())
self.class_list.sort()
def _load_class_info(self):
ltr_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
class_map_path = os.path.join(ltr_path, 'data_specs', 'trackingnet_classmap.txt')
with open(class_map_path, 'r') as f:
seq_to_class_map = {seq_class.split('\t')[0]: seq_class.rstrip().split('\t')[1] for seq_class in f}
seq_per_class = {}
for i, seq in enumerate(self.sequence_list):
class_name = seq_to_class_map.get(seq[1], 'Unknown')
if class_name not in seq_per_class:
seq_per_class[class_name] = [i]
else:
seq_per_class[class_name].append(i)
return seq_to_class_map, seq_per_class
def get_name(self):
return 'trackingnet'
def has_class_info(self):
return True
def get_sequences_in_class(self, class_name):
return self.seq_per_class[class_name]
def _read_bb_anno(self, seq_id):
set_id = self.sequence_list[seq_id][0]
vid_name = self.sequence_list[seq_id][1]
bb_anno_file = os.path.join(self.root, "TRAIN_" + str(set_id), "anno", vid_name + ".txt")
gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False,
low_memory=False).values
return torch.tensor(gt)
def get_sequence_info(self, seq_id):
bbox = self._read_bb_anno(seq_id)
valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
visible = valid.clone().byte()
return {'bbox': bbox, 'valid': valid, 'visible': visible}
def _get_frame(self, seq_id, frame_id):
set_id = self.sequence_list[seq_id][0]
vid_name = self.sequence_list[seq_id][1]
frame_path = os.path.join(self.root, "TRAIN_" + str(set_id), "frames", vid_name, str(frame_id) + ".jpg")
return self.image_loader(frame_path)
def _get_class(self, seq_id):
seq_name = self.sequence_list[seq_id][1]
return self.seq_to_class_map[seq_name]
def get_class_name(self, seq_id):
obj_class = self._get_class(seq_id)
return obj_class
def get_frames(self, seq_id, frame_ids, anno=None):
frame_list = [self._get_frame(seq_id, f) for f in frame_ids]
if anno is None:
anno = self.get_sequence_info(seq_id)
anno_frames = {}
for key, value in anno.items():
anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
obj_class = self._get_class(seq_id)
object_meta = OrderedDict({'object_class_name': obj_class,
'motion_class': None,
'major_class': None,
'root_class': None,
'motion_adverb': None})
return frame_list, anno_frames, object_meta

View File

@@ -0,0 +1,147 @@
import torch
import os
import os.path
import numpy as np
import random
from collections import OrderedDict
from lib.train.data import jpeg4py_loader
from .base_video_dataset import BaseVideoDataset
from lib.train.admin import env_settings
import json
from lib.utils.lmdb_utils import decode_img, decode_str
def list_sequences(root):
""" Lists all the videos in the input set_ids. Returns a list of tuples (set_id, video_name)
args:
root: Root directory to TrackingNet
returns:
list - list of tuples (set_id, video_name) containing the set_id and video_name for each sequence
"""
fname = os.path.join(root, "seq_list.json")
with open(fname, "r") as f:
sequence_list = json.loads(f.read())
return sequence_list
class TrackingNet_lmdb(BaseVideoDataset):
""" TrackingNet dataset.
Publication:
TrackingNet: A Large-Scale Dataset and Benchmark for Object Tracking in the Wild.
Matthias Mueller,Adel Bibi, Silvio Giancola, Salman Al-Subaihi and Bernard Ghanem
ECCV, 2018
https://ivul.kaust.edu.sa/Documents/Publications/2018/TrackingNet%20A%20Large%20Scale%20Dataset%20and%20Benchmark%20for%20Object%20Tracking%20in%20the%20Wild.pdf
Download the dataset using the toolkit https://github.com/SilvioGiancola/TrackingNet-devkit.
"""
def __init__(self, root=None, image_loader=jpeg4py_loader, set_ids=None, data_fraction=None):
"""
args:
root - The path to the TrackingNet folder, containing the training sets.
image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
is used by default.
set_ids (None) - List containing the ids of the TrackingNet sets to be used for training. If None, all the
sets (0 - 11) will be used.
data_fraction - Fraction of dataset to be used. The complete dataset is used by default
"""
root = env_settings().trackingnet_lmdb_dir if root is None else root
super().__init__('TrackingNet_lmdb', root, image_loader)
if set_ids is None:
set_ids = [i for i in range(12)]
self.set_ids = set_ids
# Keep a list of all videos. Sequence list is a list of tuples (set_id, video_name) containing the set_id and
# video_name for each sequence
self.sequence_list = list_sequences(self.root)
if data_fraction is not None:
self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list) * data_fraction))
self.seq_to_class_map, self.seq_per_class = self._load_class_info()
# we do not have the class_lists for the tracking net
self.class_list = list(self.seq_per_class.keys())
self.class_list.sort()
def _load_class_info(self):
ltr_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
class_map_path = os.path.join(ltr_path, 'data_specs', 'trackingnet_classmap.txt')
with open(class_map_path, 'r') as f:
seq_to_class_map = {seq_class.split('\t')[0]: seq_class.rstrip().split('\t')[1] for seq_class in f}
seq_per_class = {}
for i, seq in enumerate(self.sequence_list):
class_name = seq_to_class_map.get(seq[1], 'Unknown')
if class_name not in seq_per_class:
seq_per_class[class_name] = [i]
else:
seq_per_class[class_name].append(i)
return seq_to_class_map, seq_per_class
def get_name(self):
return 'trackingnet_lmdb'
def has_class_info(self):
return True
def get_sequences_in_class(self, class_name):
return self.seq_per_class[class_name]
def _read_bb_anno(self, seq_id):
set_id = self.sequence_list[seq_id][0]
vid_name = self.sequence_list[seq_id][1]
gt_str_list = decode_str(os.path.join(self.root, "TRAIN_%d_lmdb" % set_id),
os.path.join("anno", vid_name + ".txt")).split('\n')[:-1]
gt_list = [list(map(float, line.split(','))) for line in gt_str_list]
gt_arr = np.array(gt_list).astype(np.float32)
return torch.tensor(gt_arr)
def get_sequence_info(self, seq_id):
bbox = self._read_bb_anno(seq_id)
valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
visible = valid.clone().byte()
return {'bbox': bbox, 'valid': valid, 'visible': visible}
def _get_frame(self, seq_id, frame_id):
set_id = self.sequence_list[seq_id][0]
vid_name = self.sequence_list[seq_id][1]
return decode_img(os.path.join(self.root, "TRAIN_%d_lmdb" % set_id),
os.path.join("frames", vid_name, str(frame_id) + ".jpg"))
def _get_class(self, seq_id):
seq_name = self.sequence_list[seq_id][1]
return self.seq_to_class_map[seq_name]
def get_class_name(self, seq_id):
obj_class = self._get_class(seq_id)
return obj_class
def get_frames(self, seq_id, frame_ids, anno=None):
frame_list = [self._get_frame(seq_id, f) for f in frame_ids]
if anno is None:
anno = self.get_sequence_info(seq_id)
anno_frames = {}
for key, value in anno.items():
anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
obj_class = self._get_class(seq_id)
object_meta = OrderedDict({'object_class_name': obj_class,
'motion_class': None,
'major_class': None,
'root_class': None,
'motion_adverb': None})
return frame_list, anno_frames, object_meta