init commit of samurai
This commit is contained in:
437
lib/train/dataset/COCO_tool.py
Normal file
437
lib/train/dataset/COCO_tool.py
Normal file
@@ -0,0 +1,437 @@
|
||||
__author__ = 'tylin'
|
||||
__version__ = '2.0'
|
||||
# Interface for accessing the Microsoft COCO dataset.
|
||||
|
||||
# Microsoft COCO is a large image dataset designed for object detection,
|
||||
# segmentation, and caption generation. pycocotools is a Python API that
|
||||
# assists in loading, parsing and visualizing the annotations in COCO.
|
||||
# Please visit http://mscoco.org/ for more information on COCO, including
|
||||
# for the data, paper, and tutorials. The exact format of the annotations
|
||||
# is also described on the COCO website. For example usage of the pycocotools
|
||||
# please see pycocotools_demo.ipynb. In addition to this API, please download both
|
||||
# the COCO images and annotations in order to run the demo.
|
||||
|
||||
# An alternative to using the API is to load the annotations directly
|
||||
# into Python dictionary
|
||||
# Using the API provides additional utility functions. Note that this API
|
||||
# supports both *instance* and *caption* annotations. In the case of
|
||||
# captions not all functions are defined (e.g. categories are undefined).
|
||||
|
||||
# The following API functions are defined:
|
||||
# COCO - COCO api class that loads COCO annotation file and prepare data structures.
|
||||
# decodeMask - Decode binary mask M encoded via run-length encoding.
|
||||
# encodeMask - Encode binary mask M using run-length encoding.
|
||||
# getAnnIds - Get ann ids that satisfy given filter conditions.
|
||||
# getCatIds - Get cat ids that satisfy given filter conditions.
|
||||
# getImgIds - Get img ids that satisfy given filter conditions.
|
||||
# loadAnns - Load anns with the specified ids.
|
||||
# loadCats - Load cats with the specified ids.
|
||||
# loadImgs - Load imgs with the specified ids.
|
||||
# annToMask - Convert segmentation in an annotation to binary mask.
|
||||
# showAnns - Display the specified annotations.
|
||||
# loadRes - Load algorithm results and create API for accessing them.
|
||||
# download - Download COCO images from mscoco.org server.
|
||||
# Throughout the API "ann"=annotation, "cat"=category, and "img"=image.
|
||||
# Help on each functions can be accessed by: "help COCO>function".
|
||||
|
||||
# See also COCO>decodeMask,
|
||||
# COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds,
|
||||
# COCO>getImgIds, COCO>loadAnns, COCO>loadCats,
|
||||
# COCO>loadImgs, COCO>annToMask, COCO>showAnns
|
||||
|
||||
# Microsoft COCO Toolbox. version 2.0
|
||||
# Data, paper, and tutorials available at: http://mscoco.org/
|
||||
# Code written by Piotr Dollar and Tsung-Yi Lin, 2014.
|
||||
# Licensed under the Simplified BSD License [see bsd.txt]
|
||||
|
||||
import json
|
||||
import time
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.collections import PatchCollection
|
||||
from matplotlib.patches import Polygon
|
||||
import numpy as np
|
||||
import copy
|
||||
import itertools
|
||||
from pycocotools import mask as maskUtils
|
||||
import os
|
||||
from collections import defaultdict
|
||||
import sys
|
||||
PYTHON_VERSION = sys.version_info[0]
|
||||
if PYTHON_VERSION == 2:
|
||||
from urllib import urlretrieve
|
||||
elif PYTHON_VERSION == 3:
|
||||
from urllib.request import urlretrieve
|
||||
|
||||
|
||||
def _isArrayLike(obj):
|
||||
return hasattr(obj, '__iter__') and hasattr(obj, '__len__')
|
||||
|
||||
|
||||
class COCO:
|
||||
def __init__(self, dataset):
|
||||
"""
|
||||
Constructor of Microsoft COCO helper class for reading and visualizing annotations.
|
||||
:param annotation_file (str): location of annotation file
|
||||
:param image_folder (str): location to the folder that hosts images.
|
||||
:return:
|
||||
"""
|
||||
# load dataset
|
||||
self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict()
|
||||
self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
|
||||
assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset))
|
||||
self.dataset = dataset
|
||||
self.createIndex()
|
||||
|
||||
def createIndex(self):
|
||||
# create index
|
||||
print('creating index...')
|
||||
anns, cats, imgs = {}, {}, {}
|
||||
imgToAnns,catToImgs = defaultdict(list),defaultdict(list)
|
||||
if 'annotations' in self.dataset:
|
||||
for ann in self.dataset['annotations']:
|
||||
imgToAnns[ann['image_id']].append(ann)
|
||||
anns[ann['id']] = ann
|
||||
|
||||
if 'images' in self.dataset:
|
||||
for img in self.dataset['images']:
|
||||
imgs[img['id']] = img
|
||||
|
||||
if 'categories' in self.dataset:
|
||||
for cat in self.dataset['categories']:
|
||||
cats[cat['id']] = cat
|
||||
|
||||
if 'annotations' in self.dataset and 'categories' in self.dataset:
|
||||
for ann in self.dataset['annotations']:
|
||||
catToImgs[ann['category_id']].append(ann['image_id'])
|
||||
|
||||
print('index created!')
|
||||
|
||||
# create class members
|
||||
self.anns = anns
|
||||
self.imgToAnns = imgToAnns
|
||||
self.catToImgs = catToImgs
|
||||
self.imgs = imgs
|
||||
self.cats = cats
|
||||
|
||||
def info(self):
|
||||
"""
|
||||
Print information about the annotation file.
|
||||
:return:
|
||||
"""
|
||||
for key, value in self.dataset['info'].items():
|
||||
print('{}: {}'.format(key, value))
|
||||
|
||||
def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
|
||||
"""
|
||||
Get ann ids that satisfy given filter conditions. default skips that filter
|
||||
:param imgIds (int array) : get anns for given imgs
|
||||
catIds (int array) : get anns for given cats
|
||||
areaRng (float array) : get anns for given area range (e.g. [0 inf])
|
||||
iscrowd (boolean) : get anns for given crowd label (False or True)
|
||||
:return: ids (int array) : integer array of ann ids
|
||||
"""
|
||||
imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
|
||||
catIds = catIds if _isArrayLike(catIds) else [catIds]
|
||||
|
||||
if len(imgIds) == len(catIds) == len(areaRng) == 0:
|
||||
anns = self.dataset['annotations']
|
||||
else:
|
||||
if not len(imgIds) == 0:
|
||||
lists = [self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns]
|
||||
anns = list(itertools.chain.from_iterable(lists))
|
||||
else:
|
||||
anns = self.dataset['annotations']
|
||||
anns = anns if len(catIds) == 0 else [ann for ann in anns if ann['category_id'] in catIds]
|
||||
anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]]
|
||||
if not iscrowd == None:
|
||||
ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd]
|
||||
else:
|
||||
ids = [ann['id'] for ann in anns]
|
||||
return ids
|
||||
|
||||
def getCatIds(self, catNms=[], supNms=[], catIds=[]):
|
||||
"""
|
||||
filtering parameters. default skips that filter.
|
||||
:param catNms (str array) : get cats for given cat names
|
||||
:param supNms (str array) : get cats for given supercategory names
|
||||
:param catIds (int array) : get cats for given cat ids
|
||||
:return: ids (int array) : integer array of cat ids
|
||||
"""
|
||||
catNms = catNms if _isArrayLike(catNms) else [catNms]
|
||||
supNms = supNms if _isArrayLike(supNms) else [supNms]
|
||||
catIds = catIds if _isArrayLike(catIds) else [catIds]
|
||||
|
||||
if len(catNms) == len(supNms) == len(catIds) == 0:
|
||||
cats = self.dataset['categories']
|
||||
else:
|
||||
cats = self.dataset['categories']
|
||||
cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name'] in catNms]
|
||||
cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms]
|
||||
cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id'] in catIds]
|
||||
ids = [cat['id'] for cat in cats]
|
||||
return ids
|
||||
|
||||
def getImgIds(self, imgIds=[], catIds=[]):
|
||||
'''
|
||||
Get img ids that satisfy given filter conditions.
|
||||
:param imgIds (int array) : get imgs for given ids
|
||||
:param catIds (int array) : get imgs with all given cats
|
||||
:return: ids (int array) : integer array of img ids
|
||||
'''
|
||||
imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
|
||||
catIds = catIds if _isArrayLike(catIds) else [catIds]
|
||||
|
||||
if len(imgIds) == len(catIds) == 0:
|
||||
ids = self.imgs.keys()
|
||||
else:
|
||||
ids = set(imgIds)
|
||||
for i, catId in enumerate(catIds):
|
||||
if i == 0 and len(ids) == 0:
|
||||
ids = set(self.catToImgs[catId])
|
||||
else:
|
||||
ids &= set(self.catToImgs[catId])
|
||||
return list(ids)
|
||||
|
||||
def loadAnns(self, ids=[]):
|
||||
"""
|
||||
Load anns with the specified ids.
|
||||
:param ids (int array) : integer ids specifying anns
|
||||
:return: anns (object array) : loaded ann objects
|
||||
"""
|
||||
if _isArrayLike(ids):
|
||||
return [self.anns[id] for id in ids]
|
||||
elif type(ids) == int:
|
||||
return [self.anns[ids]]
|
||||
|
||||
def loadCats(self, ids=[]):
|
||||
"""
|
||||
Load cats with the specified ids.
|
||||
:param ids (int array) : integer ids specifying cats
|
||||
:return: cats (object array) : loaded cat objects
|
||||
"""
|
||||
if _isArrayLike(ids):
|
||||
return [self.cats[id] for id in ids]
|
||||
elif type(ids) == int:
|
||||
return [self.cats[ids]]
|
||||
|
||||
def loadImgs(self, ids=[]):
|
||||
"""
|
||||
Load anns with the specified ids.
|
||||
:param ids (int array) : integer ids specifying img
|
||||
:return: imgs (object array) : loaded img objects
|
||||
"""
|
||||
if _isArrayLike(ids):
|
||||
return [self.imgs[id] for id in ids]
|
||||
elif type(ids) == int:
|
||||
return [self.imgs[ids]]
|
||||
|
||||
def showAnns(self, anns, draw_bbox=False):
|
||||
"""
|
||||
Display the specified annotations.
|
||||
:param anns (array of object): annotations to display
|
||||
:return: None
|
||||
"""
|
||||
if len(anns) == 0:
|
||||
return 0
|
||||
if 'segmentation' in anns[0] or 'keypoints' in anns[0]:
|
||||
datasetType = 'instances'
|
||||
elif 'caption' in anns[0]:
|
||||
datasetType = 'captions'
|
||||
else:
|
||||
raise Exception('datasetType not supported')
|
||||
if datasetType == 'instances':
|
||||
ax = plt.gca()
|
||||
ax.set_autoscale_on(False)
|
||||
polygons = []
|
||||
color = []
|
||||
for ann in anns:
|
||||
c = (np.random.random((1, 3))*0.6+0.4).tolist()[0]
|
||||
if 'segmentation' in ann:
|
||||
if type(ann['segmentation']) == list:
|
||||
# polygon
|
||||
for seg in ann['segmentation']:
|
||||
poly = np.array(seg).reshape((int(len(seg)/2), 2))
|
||||
polygons.append(Polygon(poly))
|
||||
color.append(c)
|
||||
else:
|
||||
# mask
|
||||
t = self.imgs[ann['image_id']]
|
||||
if type(ann['segmentation']['counts']) == list:
|
||||
rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width'])
|
||||
else:
|
||||
rle = [ann['segmentation']]
|
||||
m = maskUtils.decode(rle)
|
||||
img = np.ones( (m.shape[0], m.shape[1], 3) )
|
||||
if ann['iscrowd'] == 1:
|
||||
color_mask = np.array([2.0,166.0,101.0])/255
|
||||
if ann['iscrowd'] == 0:
|
||||
color_mask = np.random.random((1, 3)).tolist()[0]
|
||||
for i in range(3):
|
||||
img[:,:,i] = color_mask[i]
|
||||
ax.imshow(np.dstack( (img, m*0.5) ))
|
||||
if 'keypoints' in ann and type(ann['keypoints']) == list:
|
||||
# turn skeleton into zero-based index
|
||||
sks = np.array(self.loadCats(ann['category_id'])[0]['skeleton'])-1
|
||||
kp = np.array(ann['keypoints'])
|
||||
x = kp[0::3]
|
||||
y = kp[1::3]
|
||||
v = kp[2::3]
|
||||
for sk in sks:
|
||||
if np.all(v[sk]>0):
|
||||
plt.plot(x[sk],y[sk], linewidth=3, color=c)
|
||||
plt.plot(x[v>0], y[v>0],'o',markersize=8, markerfacecolor=c, markeredgecolor='k',markeredgewidth=2)
|
||||
plt.plot(x[v>1], y[v>1],'o',markersize=8, markerfacecolor=c, markeredgecolor=c, markeredgewidth=2)
|
||||
|
||||
if draw_bbox:
|
||||
[bbox_x, bbox_y, bbox_w, bbox_h] = ann['bbox']
|
||||
poly = [[bbox_x, bbox_y], [bbox_x, bbox_y+bbox_h], [bbox_x+bbox_w, bbox_y+bbox_h], [bbox_x+bbox_w, bbox_y]]
|
||||
np_poly = np.array(poly).reshape((4,2))
|
||||
polygons.append(Polygon(np_poly))
|
||||
color.append(c)
|
||||
|
||||
p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4)
|
||||
ax.add_collection(p)
|
||||
p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2)
|
||||
ax.add_collection(p)
|
||||
elif datasetType == 'captions':
|
||||
for ann in anns:
|
||||
print(ann['caption'])
|
||||
|
||||
def loadRes(self, resFile):
|
||||
"""
|
||||
Load result file and return a result api object.
|
||||
:param resFile (str) : file name of result file
|
||||
:return: res (obj) : result api object
|
||||
"""
|
||||
res = COCO()
|
||||
res.dataset['images'] = [img for img in self.dataset['images']]
|
||||
|
||||
print('Loading and preparing results...')
|
||||
tic = time.time()
|
||||
if type(resFile) == str or (PYTHON_VERSION == 2 and type(resFile) == unicode):
|
||||
with open(resFile) as f:
|
||||
anns = json.load(f)
|
||||
elif type(resFile) == np.ndarray:
|
||||
anns = self.loadNumpyAnnotations(resFile)
|
||||
else:
|
||||
anns = resFile
|
||||
assert type(anns) == list, 'results in not an array of objects'
|
||||
annsImgIds = [ann['image_id'] for ann in anns]
|
||||
assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
|
||||
'Results do not correspond to current coco set'
|
||||
if 'caption' in anns[0]:
|
||||
imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
|
||||
res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
|
||||
for id, ann in enumerate(anns):
|
||||
ann['id'] = id+1
|
||||
elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
|
||||
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
|
||||
for id, ann in enumerate(anns):
|
||||
bb = ann['bbox']
|
||||
x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]]
|
||||
if not 'segmentation' in ann:
|
||||
ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
|
||||
ann['area'] = bb[2]*bb[3]
|
||||
ann['id'] = id+1
|
||||
ann['iscrowd'] = 0
|
||||
elif 'segmentation' in anns[0]:
|
||||
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
|
||||
for id, ann in enumerate(anns):
|
||||
# now only support compressed RLE format as segmentation results
|
||||
ann['area'] = maskUtils.area(ann['segmentation'])
|
||||
if not 'bbox' in ann:
|
||||
ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
|
||||
ann['id'] = id+1
|
||||
ann['iscrowd'] = 0
|
||||
elif 'keypoints' in anns[0]:
|
||||
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
|
||||
for id, ann in enumerate(anns):
|
||||
s = ann['keypoints']
|
||||
x = s[0::3]
|
||||
y = s[1::3]
|
||||
x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y)
|
||||
ann['area'] = (x1-x0)*(y1-y0)
|
||||
ann['id'] = id + 1
|
||||
ann['bbox'] = [x0,y0,x1-x0,y1-y0]
|
||||
print('DONE (t={:0.2f}s)'.format(time.time()- tic))
|
||||
|
||||
res.dataset['annotations'] = anns
|
||||
res.createIndex()
|
||||
return res
|
||||
|
||||
def download(self, tarDir = None, imgIds = [] ):
|
||||
'''
|
||||
Download COCO images from mscoco.org server.
|
||||
:param tarDir (str): COCO results directory name
|
||||
imgIds (list): images to be downloaded
|
||||
:return:
|
||||
'''
|
||||
if tarDir is None:
|
||||
print('Please specify target directory')
|
||||
return -1
|
||||
if len(imgIds) == 0:
|
||||
imgs = self.imgs.values()
|
||||
else:
|
||||
imgs = self.loadImgs(imgIds)
|
||||
N = len(imgs)
|
||||
if not os.path.exists(tarDir):
|
||||
os.makedirs(tarDir)
|
||||
for i, img in enumerate(imgs):
|
||||
tic = time.time()
|
||||
fname = os.path.join(tarDir, img['file_name'])
|
||||
if not os.path.exists(fname):
|
||||
urlretrieve(img['coco_url'], fname)
|
||||
print('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic))
|
||||
|
||||
def loadNumpyAnnotations(self, data):
|
||||
"""
|
||||
Convert result data from a numpy array [Nx7] where each row contains {imageID,x1,y1,w,h,score,class}
|
||||
:param data (numpy.ndarray)
|
||||
:return: annotations (python nested list)
|
||||
"""
|
||||
print('Converting ndarray to lists...')
|
||||
assert(type(data) == np.ndarray)
|
||||
print(data.shape)
|
||||
assert(data.shape[1] == 7)
|
||||
N = data.shape[0]
|
||||
ann = []
|
||||
for i in range(N):
|
||||
if i % 1000000 == 0:
|
||||
print('{}/{}'.format(i,N))
|
||||
ann += [{
|
||||
'image_id' : int(data[i, 0]),
|
||||
'bbox' : [ data[i, 1], data[i, 2], data[i, 3], data[i, 4] ],
|
||||
'score' : data[i, 5],
|
||||
'category_id': int(data[i, 6]),
|
||||
}]
|
||||
return ann
|
||||
|
||||
def annToRLE(self, ann):
|
||||
"""
|
||||
Convert annotation which can be polygons, uncompressed RLE to RLE.
|
||||
:return: binary mask (numpy 2D array)
|
||||
"""
|
||||
t = self.imgs[ann['image_id']]
|
||||
h, w = t['height'], t['width']
|
||||
segm = ann['segmentation']
|
||||
if type(segm) == list:
|
||||
# polygon -- a single object might consist of multiple parts
|
||||
# we merge all parts into one mask rle code
|
||||
rles = maskUtils.frPyObjects(segm, h, w)
|
||||
rle = maskUtils.merge(rles)
|
||||
elif type(segm['counts']) == list:
|
||||
# uncompressed RLE
|
||||
rle = maskUtils.frPyObjects(segm, h, w)
|
||||
else:
|
||||
# rle
|
||||
rle = ann['segmentation']
|
||||
return rle
|
||||
|
||||
def annToMask(self, ann):
|
||||
"""
|
||||
Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
|
||||
:return: binary mask (numpy 2D array)
|
||||
"""
|
||||
rle = self.annToRLE(ann)
|
||||
m = maskUtils.decode(rle)
|
||||
return m
|
11
lib/train/dataset/__init__.py
Normal file
11
lib/train/dataset/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from .lasot import Lasot
|
||||
from .got10k import Got10k
|
||||
from .tracking_net import TrackingNet
|
||||
from .imagenetvid import ImagenetVID
|
||||
from .coco import MSCOCO
|
||||
from .coco_seq import MSCOCOSeq
|
||||
from .got10k_lmdb import Got10k_lmdb
|
||||
from .lasot_lmdb import Lasot_lmdb
|
||||
from .imagenetvid_lmdb import ImagenetVID_lmdb
|
||||
from .coco_seq_lmdb import MSCOCOSeq_lmdb
|
||||
from .tracking_net_lmdb import TrackingNet_lmdb
|
92
lib/train/dataset/base_image_dataset.py
Normal file
92
lib/train/dataset/base_image_dataset.py
Normal file
@@ -0,0 +1,92 @@
|
||||
import torch.utils.data
|
||||
from lib.train.data.image_loader import jpeg4py_loader
|
||||
|
||||
|
||||
class BaseImageDataset(torch.utils.data.Dataset):
|
||||
""" Base class for image datasets """
|
||||
|
||||
def __init__(self, name, root, image_loader=jpeg4py_loader):
|
||||
"""
|
||||
args:
|
||||
root - The root path to the dataset
|
||||
image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
|
||||
is used by default.
|
||||
"""
|
||||
self.name = name
|
||||
self.root = root
|
||||
self.image_loader = image_loader
|
||||
|
||||
self.image_list = [] # Contains the list of sequences.
|
||||
self.class_list = []
|
||||
|
||||
def __len__(self):
|
||||
""" Returns size of the dataset
|
||||
returns:
|
||||
int - number of samples in the dataset
|
||||
"""
|
||||
return self.get_num_images()
|
||||
|
||||
def __getitem__(self, index):
|
||||
""" Not to be used! Check get_frames() instead.
|
||||
"""
|
||||
return None
|
||||
|
||||
def get_name(self):
|
||||
""" Name of the dataset
|
||||
|
||||
returns:
|
||||
string - Name of the dataset
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def get_num_images(self):
|
||||
""" Number of sequences in a dataset
|
||||
|
||||
returns:
|
||||
int - number of sequences in the dataset."""
|
||||
return len(self.image_list)
|
||||
|
||||
def has_class_info(self):
|
||||
return False
|
||||
|
||||
def get_class_name(self, image_id):
|
||||
return None
|
||||
|
||||
def get_num_classes(self):
|
||||
return len(self.class_list)
|
||||
|
||||
def get_class_list(self):
|
||||
return self.class_list
|
||||
|
||||
def get_images_in_class(self, class_name):
|
||||
raise NotImplementedError
|
||||
|
||||
def has_segmentation_info(self):
|
||||
return False
|
||||
|
||||
def get_image_info(self, seq_id):
|
||||
""" Returns information about a particular image,
|
||||
|
||||
args:
|
||||
seq_id - index of the image
|
||||
|
||||
returns:
|
||||
Dict
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def get_image(self, image_id, anno=None):
|
||||
""" Get a image
|
||||
|
||||
args:
|
||||
image_id - index of image
|
||||
anno(None) - The annotation for the sequence (see get_sequence_info). If None, they will be loaded.
|
||||
|
||||
returns:
|
||||
image -
|
||||
anno -
|
||||
dict - A dict containing meta information about the sequence, e.g. class of the target object.
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
110
lib/train/dataset/base_video_dataset.py
Normal file
110
lib/train/dataset/base_video_dataset.py
Normal file
@@ -0,0 +1,110 @@
|
||||
import torch.utils.data
|
||||
# 2021.1.5 use jpeg4py_loader_w_failsafe as default
|
||||
from lib.train.data.image_loader import jpeg4py_loader_w_failsafe
|
||||
|
||||
|
||||
class BaseVideoDataset(torch.utils.data.Dataset):
|
||||
""" Base class for video datasets """
|
||||
|
||||
def __init__(self, name, root, image_loader=jpeg4py_loader_w_failsafe):
|
||||
"""
|
||||
args:
|
||||
root - The root path to the dataset
|
||||
image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
|
||||
is used by default.
|
||||
"""
|
||||
self.name = name
|
||||
self.root = root
|
||||
self.image_loader = image_loader
|
||||
|
||||
self.sequence_list = [] # Contains the list of sequences.
|
||||
self.class_list = []
|
||||
|
||||
def __len__(self):
|
||||
""" Returns size of the dataset
|
||||
returns:
|
||||
int - number of samples in the dataset
|
||||
"""
|
||||
return self.get_num_sequences()
|
||||
|
||||
def __getitem__(self, index):
|
||||
""" Not to be used! Check get_frames() instead.
|
||||
"""
|
||||
return None
|
||||
|
||||
def is_video_sequence(self):
|
||||
""" Returns whether the dataset is a video dataset or an image dataset
|
||||
|
||||
returns:
|
||||
bool - True if a video dataset
|
||||
"""
|
||||
return True
|
||||
|
||||
def is_synthetic_video_dataset(self):
|
||||
""" Returns whether the dataset contains real videos or synthetic
|
||||
|
||||
returns:
|
||||
bool - True if a video dataset
|
||||
"""
|
||||
return False
|
||||
|
||||
def get_name(self):
|
||||
""" Name of the dataset
|
||||
|
||||
returns:
|
||||
string - Name of the dataset
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def get_num_sequences(self):
|
||||
""" Number of sequences in a dataset
|
||||
|
||||
returns:
|
||||
int - number of sequences in the dataset."""
|
||||
return len(self.sequence_list)
|
||||
|
||||
def has_class_info(self):
|
||||
return False
|
||||
|
||||
def has_occlusion_info(self):
|
||||
return False
|
||||
|
||||
def get_num_classes(self):
|
||||
return len(self.class_list)
|
||||
|
||||
def get_class_list(self):
|
||||
return self.class_list
|
||||
|
||||
def get_sequences_in_class(self, class_name):
|
||||
raise NotImplementedError
|
||||
|
||||
def has_segmentation_info(self):
|
||||
return False
|
||||
|
||||
def get_sequence_info(self, seq_id):
|
||||
""" Returns information about a particular sequences,
|
||||
|
||||
args:
|
||||
seq_id - index of the sequence
|
||||
|
||||
returns:
|
||||
Dict
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def get_frames(self, seq_id, frame_ids, anno=None):
|
||||
""" Get a set of frames from a particular sequence
|
||||
|
||||
args:
|
||||
seq_id - index of sequence
|
||||
frame_ids - a list of frame numbers
|
||||
anno(None) - The annotation for the sequence (see get_sequence_info). If None, they will be loaded.
|
||||
|
||||
returns:
|
||||
list - List of frames corresponding to frame_ids
|
||||
list - List of dicts for each frame
|
||||
dict - A dict containing meta information about the sequence, e.g. class of the target object.
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
156
lib/train/dataset/coco.py
Normal file
156
lib/train/dataset/coco.py
Normal file
@@ -0,0 +1,156 @@
|
||||
import os
|
||||
from .base_image_dataset import BaseImageDataset
|
||||
import torch
|
||||
import random
|
||||
from collections import OrderedDict
|
||||
from lib.train.data import jpeg4py_loader
|
||||
from lib.train.admin import env_settings
|
||||
from pycocotools.coco import COCO
|
||||
|
||||
|
||||
class MSCOCO(BaseImageDataset):
|
||||
""" The COCO object detection dataset.
|
||||
|
||||
Publication:
|
||||
Microsoft COCO: Common Objects in Context.
|
||||
Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B. Girshick, James Hays, Pietro Perona,
|
||||
Deva Ramanan, Piotr Dollar and C. Lawrence Zitnick
|
||||
ECCV, 2014
|
||||
https://arxiv.org/pdf/1405.0312.pdf
|
||||
|
||||
Download the images along with annotations from http://cocodataset.org/#download. The root folder should be
|
||||
organized as follows.
|
||||
- coco_root
|
||||
- annotations
|
||||
- instances_train2014.json
|
||||
- instances_train2017.json
|
||||
- images
|
||||
- train2014
|
||||
- train2017
|
||||
|
||||
Note: You also have to install the coco pythonAPI from https://github.com/cocodataset/cocoapi.
|
||||
"""
|
||||
|
||||
def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, min_area=None,
|
||||
split="train", version="2014"):
|
||||
"""
|
||||
args:
|
||||
root - path to coco root folder
|
||||
image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
|
||||
is used by default.
|
||||
data_fraction - Fraction of dataset to be used. The complete dataset is used by default
|
||||
min_area - Objects with area less than min_area are filtered out. Default is 0.0
|
||||
split - 'train' or 'val'.
|
||||
version - version of coco dataset (2014 or 2017)
|
||||
"""
|
||||
|
||||
root = env_settings().coco_dir if root is None else root
|
||||
super().__init__('COCO', root, image_loader)
|
||||
|
||||
self.img_pth = os.path.join(root, 'images/{}{}/'.format(split, version))
|
||||
self.anno_path = os.path.join(root, 'annotations/instances_{}{}.json'.format(split, version))
|
||||
|
||||
self.coco_set = COCO(self.anno_path)
|
||||
|
||||
self.cats = self.coco_set.cats
|
||||
|
||||
self.class_list = self.get_class_list() # the parent class thing would happen in the sampler
|
||||
|
||||
self.image_list = self._get_image_list(min_area=min_area)
|
||||
|
||||
if data_fraction is not None:
|
||||
self.image_list = random.sample(self.image_list, int(len(self.image_list) * data_fraction))
|
||||
self.im_per_class = self._build_im_per_class()
|
||||
|
||||
def _get_image_list(self, min_area=None):
|
||||
ann_list = list(self.coco_set.anns.keys())
|
||||
image_list = [a for a in ann_list if self.coco_set.anns[a]['iscrowd'] == 0]
|
||||
|
||||
if min_area is not None:
|
||||
image_list = [a for a in image_list if self.coco_set.anns[a]['area'] > min_area]
|
||||
|
||||
return image_list
|
||||
|
||||
def get_num_classes(self):
|
||||
return len(self.class_list)
|
||||
|
||||
def get_name(self):
|
||||
return 'coco'
|
||||
|
||||
def has_class_info(self):
|
||||
return True
|
||||
|
||||
def has_segmentation_info(self):
|
||||
return True
|
||||
|
||||
def get_class_list(self):
|
||||
class_list = []
|
||||
for cat_id in self.cats.keys():
|
||||
class_list.append(self.cats[cat_id]['name'])
|
||||
return class_list
|
||||
|
||||
def _build_im_per_class(self):
|
||||
im_per_class = {}
|
||||
for i, im in enumerate(self.image_list):
|
||||
class_name = self.cats[self.coco_set.anns[im]['category_id']]['name']
|
||||
if class_name not in im_per_class:
|
||||
im_per_class[class_name] = [i]
|
||||
else:
|
||||
im_per_class[class_name].append(i)
|
||||
|
||||
return im_per_class
|
||||
|
||||
def get_images_in_class(self, class_name):
|
||||
return self.im_per_class[class_name]
|
||||
|
||||
def get_image_info(self, im_id):
|
||||
anno = self._get_anno(im_id)
|
||||
|
||||
bbox = torch.Tensor(anno['bbox']).view(4,)
|
||||
|
||||
mask = torch.Tensor(self.coco_set.annToMask(anno))
|
||||
|
||||
valid = (bbox[2] > 0) & (bbox[3] > 0)
|
||||
visible = valid.clone().byte()
|
||||
|
||||
return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible}
|
||||
|
||||
def _get_anno(self, im_id):
|
||||
anno = self.coco_set.anns[self.image_list[im_id]]
|
||||
|
||||
return anno
|
||||
|
||||
def _get_image(self, im_id):
|
||||
path = self.coco_set.loadImgs([self.coco_set.anns[self.image_list[im_id]]['image_id']])[0]['file_name']
|
||||
img = self.image_loader(os.path.join(self.img_pth, path))
|
||||
return img
|
||||
|
||||
def get_meta_info(self, im_id):
|
||||
try:
|
||||
cat_dict_current = self.cats[self.coco_set.anns[self.image_list[im_id]]['category_id']]
|
||||
object_meta = OrderedDict({'object_class_name': cat_dict_current['name'],
|
||||
'motion_class': None,
|
||||
'major_class': cat_dict_current['supercategory'],
|
||||
'root_class': None,
|
||||
'motion_adverb': None})
|
||||
except:
|
||||
object_meta = OrderedDict({'object_class_name': None,
|
||||
'motion_class': None,
|
||||
'major_class': None,
|
||||
'root_class': None,
|
||||
'motion_adverb': None})
|
||||
return object_meta
|
||||
|
||||
def get_class_name(self, im_id):
|
||||
cat_dict_current = self.cats[self.coco_set.anns[self.image_list[im_id]]['category_id']]
|
||||
return cat_dict_current['name']
|
||||
|
||||
def get_image(self, image_id, anno=None):
|
||||
frame = self._get_image(image_id)
|
||||
|
||||
if anno is None:
|
||||
anno = self.get_image_info(image_id)
|
||||
|
||||
object_meta = self.get_meta_info(image_id)
|
||||
|
||||
return frame, anno, object_meta
|
170
lib/train/dataset/coco_seq.py
Normal file
170
lib/train/dataset/coco_seq.py
Normal file
@@ -0,0 +1,170 @@
|
||||
import os
|
||||
from .base_video_dataset import BaseVideoDataset
|
||||
from lib.train.data import jpeg4py_loader
|
||||
import torch
|
||||
import random
|
||||
from pycocotools.coco import COCO
|
||||
from collections import OrderedDict
|
||||
from lib.train.admin import env_settings
|
||||
|
||||
|
||||
class MSCOCOSeq(BaseVideoDataset):
|
||||
""" The COCO dataset. COCO is an image dataset. Thus, we treat each image as a sequence of length 1.
|
||||
|
||||
Publication:
|
||||
Microsoft COCO: Common Objects in Context.
|
||||
Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B. Girshick, James Hays, Pietro Perona,
|
||||
Deva Ramanan, Piotr Dollar and C. Lawrence Zitnick
|
||||
ECCV, 2014
|
||||
https://arxiv.org/pdf/1405.0312.pdf
|
||||
|
||||
Download the images along with annotations from http://cocodataset.org/#download. The root folder should be
|
||||
organized as follows.
|
||||
- coco_root
|
||||
- annotations
|
||||
- instances_train2014.json
|
||||
- instances_train2017.json
|
||||
- images
|
||||
- train2014
|
||||
- train2017
|
||||
|
||||
Note: You also have to install the coco pythonAPI from https://github.com/cocodataset/cocoapi.
|
||||
"""
|
||||
|
||||
def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, split="train", version="2014"):
|
||||
"""
|
||||
args:
|
||||
root - path to the coco dataset.
|
||||
image_loader (default_image_loader) - The function to read the images. If installed,
|
||||
jpeg4py (https://github.com/ajkxyz/jpeg4py) is used by default. Else,
|
||||
opencv's imread is used.
|
||||
data_fraction (None) - Fraction of images to be used. The images are selected randomly. If None, all the
|
||||
images will be used
|
||||
split - 'train' or 'val'.
|
||||
version - version of coco dataset (2014 or 2017)
|
||||
"""
|
||||
root = env_settings().coco_dir if root is None else root
|
||||
super().__init__('COCO', root, image_loader)
|
||||
|
||||
self.img_pth = os.path.join(root, 'images/{}{}/'.format(split, version))
|
||||
self.anno_path = os.path.join(root, 'annotations/instances_{}{}.json'.format(split, version))
|
||||
|
||||
# Load the COCO set.
|
||||
self.coco_set = COCO(self.anno_path)
|
||||
|
||||
self.cats = self.coco_set.cats
|
||||
|
||||
self.class_list = self.get_class_list()
|
||||
|
||||
self.sequence_list = self._get_sequence_list()
|
||||
|
||||
if data_fraction is not None:
|
||||
self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list)*data_fraction))
|
||||
self.seq_per_class = self._build_seq_per_class()
|
||||
|
||||
def _get_sequence_list(self):
|
||||
ann_list = list(self.coco_set.anns.keys())
|
||||
seq_list = [a for a in ann_list if self.coco_set.anns[a]['iscrowd'] == 0]
|
||||
|
||||
return seq_list
|
||||
|
||||
def is_video_sequence(self):
|
||||
return False
|
||||
|
||||
def get_num_classes(self):
|
||||
return len(self.class_list)
|
||||
|
||||
def get_name(self):
|
||||
return 'coco'
|
||||
|
||||
def has_class_info(self):
|
||||
return True
|
||||
|
||||
def get_class_list(self):
|
||||
class_list = []
|
||||
for cat_id in self.cats.keys():
|
||||
class_list.append(self.cats[cat_id]['name'])
|
||||
return class_list
|
||||
|
||||
def has_segmentation_info(self):
|
||||
return True
|
||||
|
||||
def get_num_sequences(self):
|
||||
return len(self.sequence_list)
|
||||
|
||||
def _build_seq_per_class(self):
|
||||
seq_per_class = {}
|
||||
for i, seq in enumerate(self.sequence_list):
|
||||
class_name = self.cats[self.coco_set.anns[seq]['category_id']]['name']
|
||||
if class_name not in seq_per_class:
|
||||
seq_per_class[class_name] = [i]
|
||||
else:
|
||||
seq_per_class[class_name].append(i)
|
||||
|
||||
return seq_per_class
|
||||
|
||||
def get_sequences_in_class(self, class_name):
|
||||
return self.seq_per_class[class_name]
|
||||
|
||||
def get_sequence_info(self, seq_id):
|
||||
anno = self._get_anno(seq_id)
|
||||
|
||||
bbox = torch.Tensor(anno['bbox']).view(1, 4)
|
||||
|
||||
mask = torch.Tensor(self.coco_set.annToMask(anno)).unsqueeze(dim=0)
|
||||
|
||||
'''2021.1.3 To avoid too small bounding boxes. Here we change the threshold to 50 pixels'''
|
||||
valid = (bbox[:, 2] > 50) & (bbox[:, 3] > 50)
|
||||
|
||||
visible = valid.clone().byte()
|
||||
|
||||
return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible}
|
||||
|
||||
def _get_anno(self, seq_id):
|
||||
anno = self.coco_set.anns[self.sequence_list[seq_id]]
|
||||
|
||||
return anno
|
||||
|
||||
def _get_frames(self, seq_id):
|
||||
path = self.coco_set.loadImgs([self.coco_set.anns[self.sequence_list[seq_id]]['image_id']])[0]['file_name']
|
||||
img = self.image_loader(os.path.join(self.img_pth, path))
|
||||
return img
|
||||
|
||||
def get_meta_info(self, seq_id):
|
||||
try:
|
||||
cat_dict_current = self.cats[self.coco_set.anns[self.sequence_list[seq_id]]['category_id']]
|
||||
object_meta = OrderedDict({'object_class_name': cat_dict_current['name'],
|
||||
'motion_class': None,
|
||||
'major_class': cat_dict_current['supercategory'],
|
||||
'root_class': None,
|
||||
'motion_adverb': None})
|
||||
except:
|
||||
object_meta = OrderedDict({'object_class_name': None,
|
||||
'motion_class': None,
|
||||
'major_class': None,
|
||||
'root_class': None,
|
||||
'motion_adverb': None})
|
||||
return object_meta
|
||||
|
||||
|
||||
def get_class_name(self, seq_id):
|
||||
cat_dict_current = self.cats[self.coco_set.anns[self.sequence_list[seq_id]]['category_id']]
|
||||
return cat_dict_current['name']
|
||||
|
||||
def get_frames(self, seq_id=None, frame_ids=None, anno=None):
|
||||
# COCO is an image dataset. Thus we replicate the image denoted by seq_id len(frame_ids) times, and return a
|
||||
# list containing these replicated images.
|
||||
frame = self._get_frames(seq_id)
|
||||
|
||||
frame_list = [frame.copy() for _ in frame_ids]
|
||||
|
||||
if anno is None:
|
||||
anno = self.get_sequence_info(seq_id)
|
||||
|
||||
anno_frames = {}
|
||||
for key, value in anno.items():
|
||||
anno_frames[key] = [value[0, ...] for _ in frame_ids]
|
||||
|
||||
object_meta = self.get_meta_info(seq_id)
|
||||
|
||||
return frame_list, anno_frames, object_meta
|
177
lib/train/dataset/coco_seq_lmdb.py
Normal file
177
lib/train/dataset/coco_seq_lmdb.py
Normal file
@@ -0,0 +1,177 @@
|
||||
import os
|
||||
from .base_video_dataset import BaseVideoDataset
|
||||
from lib.train.data import jpeg4py_loader
|
||||
import torch
|
||||
import random
|
||||
from collections import OrderedDict
|
||||
from lib.train.admin import env_settings
|
||||
from lib.train.dataset.COCO_tool import COCO
|
||||
from lib.utils.lmdb_utils import decode_img, decode_json
|
||||
import time
|
||||
|
||||
class MSCOCOSeq_lmdb(BaseVideoDataset):
|
||||
""" The COCO dataset. COCO is an image dataset. Thus, we treat each image as a sequence of length 1.
|
||||
|
||||
Publication:
|
||||
Microsoft COCO: Common Objects in Context.
|
||||
Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B. Girshick, James Hays, Pietro Perona,
|
||||
Deva Ramanan, Piotr Dollar and C. Lawrence Zitnick
|
||||
ECCV, 2014
|
||||
https://arxiv.org/pdf/1405.0312.pdf
|
||||
|
||||
Download the images along with annotations from http://cocodataset.org/#download. The root folder should be
|
||||
organized as follows.
|
||||
- coco_root
|
||||
- annotations
|
||||
- instances_train2014.json
|
||||
- instances_train2017.json
|
||||
- images
|
||||
- train2014
|
||||
- train2017
|
||||
|
||||
Note: You also have to install the coco pythonAPI from https://github.com/cocodataset/cocoapi.
|
||||
"""
|
||||
|
||||
def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, split="train", version="2014"):
|
||||
"""
|
||||
args:
|
||||
root - path to the coco dataset.
|
||||
image_loader (default_image_loader) - The function to read the images. If installed,
|
||||
jpeg4py (https://github.com/ajkxyz/jpeg4py) is used by default. Else,
|
||||
opencv's imread is used.
|
||||
data_fraction (None) - Fraction of images to be used. The images are selected randomly. If None, all the
|
||||
images will be used
|
||||
split - 'train' or 'val'.
|
||||
version - version of coco dataset (2014 or 2017)
|
||||
"""
|
||||
root = env_settings().coco_dir if root is None else root
|
||||
super().__init__('COCO_lmdb', root, image_loader)
|
||||
self.root = root
|
||||
self.img_pth = 'images/{}{}/'.format(split, version)
|
||||
self.anno_path = 'annotations/instances_{}{}.json'.format(split, version)
|
||||
|
||||
# Load the COCO set.
|
||||
print('loading annotations into memory...')
|
||||
tic = time.time()
|
||||
coco_json = decode_json(root, self.anno_path)
|
||||
print('Done (t={:0.2f}s)'.format(time.time() - tic))
|
||||
|
||||
self.coco_set = COCO(coco_json)
|
||||
|
||||
self.cats = self.coco_set.cats
|
||||
|
||||
self.class_list = self.get_class_list()
|
||||
|
||||
self.sequence_list = self._get_sequence_list()
|
||||
|
||||
if data_fraction is not None:
|
||||
self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list)*data_fraction))
|
||||
self.seq_per_class = self._build_seq_per_class()
|
||||
|
||||
def _get_sequence_list(self):
|
||||
ann_list = list(self.coco_set.anns.keys())
|
||||
seq_list = [a for a in ann_list if self.coco_set.anns[a]['iscrowd'] == 0]
|
||||
|
||||
return seq_list
|
||||
|
||||
def is_video_sequence(self):
|
||||
return False
|
||||
|
||||
def get_num_classes(self):
|
||||
return len(self.class_list)
|
||||
|
||||
def get_name(self):
|
||||
return 'coco_lmdb'
|
||||
|
||||
def has_class_info(self):
|
||||
return True
|
||||
|
||||
def get_class_list(self):
|
||||
class_list = []
|
||||
for cat_id in self.cats.keys():
|
||||
class_list.append(self.cats[cat_id]['name'])
|
||||
return class_list
|
||||
|
||||
def has_segmentation_info(self):
|
||||
return True
|
||||
|
||||
def get_num_sequences(self):
|
||||
return len(self.sequence_list)
|
||||
|
||||
def _build_seq_per_class(self):
|
||||
seq_per_class = {}
|
||||
for i, seq in enumerate(self.sequence_list):
|
||||
class_name = self.cats[self.coco_set.anns[seq]['category_id']]['name']
|
||||
if class_name not in seq_per_class:
|
||||
seq_per_class[class_name] = [i]
|
||||
else:
|
||||
seq_per_class[class_name].append(i)
|
||||
|
||||
return seq_per_class
|
||||
|
||||
def get_sequences_in_class(self, class_name):
|
||||
return self.seq_per_class[class_name]
|
||||
|
||||
def get_sequence_info(self, seq_id):
|
||||
anno = self._get_anno(seq_id)
|
||||
|
||||
bbox = torch.Tensor(anno['bbox']).view(1, 4)
|
||||
|
||||
mask = torch.Tensor(self.coco_set.annToMask(anno)).unsqueeze(dim=0)
|
||||
|
||||
'''2021.1.3 To avoid too small bounding boxes. Here we change the threshold to 50 pixels'''
|
||||
valid = (bbox[:, 2] > 50) & (bbox[:, 3] > 50)
|
||||
|
||||
visible = valid.clone().byte()
|
||||
|
||||
return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible}
|
||||
|
||||
def _get_anno(self, seq_id):
|
||||
anno = self.coco_set.anns[self.sequence_list[seq_id]]
|
||||
|
||||
return anno
|
||||
|
||||
def _get_frames(self, seq_id):
|
||||
path = self.coco_set.loadImgs([self.coco_set.anns[self.sequence_list[seq_id]]['image_id']])[0]['file_name']
|
||||
# img = self.image_loader(os.path.join(self.img_pth, path))
|
||||
img = decode_img(self.root, os.path.join(self.img_pth, path))
|
||||
return img
|
||||
|
||||
def get_meta_info(self, seq_id):
|
||||
try:
|
||||
cat_dict_current = self.cats[self.coco_set.anns[self.sequence_list[seq_id]]['category_id']]
|
||||
object_meta = OrderedDict({'object_class_name': cat_dict_current['name'],
|
||||
'motion_class': None,
|
||||
'major_class': cat_dict_current['supercategory'],
|
||||
'root_class': None,
|
||||
'motion_adverb': None})
|
||||
except:
|
||||
object_meta = OrderedDict({'object_class_name': None,
|
||||
'motion_class': None,
|
||||
'major_class': None,
|
||||
'root_class': None,
|
||||
'motion_adverb': None})
|
||||
return object_meta
|
||||
|
||||
|
||||
def get_class_name(self, seq_id):
|
||||
cat_dict_current = self.cats[self.coco_set.anns[self.sequence_list[seq_id]]['category_id']]
|
||||
return cat_dict_current['name']
|
||||
|
||||
def get_frames(self, seq_id=None, frame_ids=None, anno=None):
|
||||
# COCO is an image dataset. Thus we replicate the image denoted by seq_id len(frame_ids) times, and return a
|
||||
# list containing these replicated images.
|
||||
frame = self._get_frames(seq_id)
|
||||
|
||||
frame_list = [frame.copy() for _ in frame_ids]
|
||||
|
||||
if anno is None:
|
||||
anno = self.get_sequence_info(seq_id)
|
||||
|
||||
anno_frames = {}
|
||||
for key, value in anno.items():
|
||||
anno_frames[key] = [value[0, ...] for _ in frame_ids]
|
||||
|
||||
object_meta = self.get_meta_info(seq_id)
|
||||
|
||||
return frame_list, anno_frames, object_meta
|
186
lib/train/dataset/got10k.py
Normal file
186
lib/train/dataset/got10k.py
Normal file
@@ -0,0 +1,186 @@
|
||||
import os
|
||||
import os.path
|
||||
import numpy as np
|
||||
import torch
|
||||
import csv
|
||||
import pandas
|
||||
import random
|
||||
from collections import OrderedDict
|
||||
from .base_video_dataset import BaseVideoDataset
|
||||
from lib.train.data import jpeg4py_loader
|
||||
from lib.train.admin import env_settings
|
||||
|
||||
|
||||
class Got10k(BaseVideoDataset):
|
||||
""" GOT-10k dataset.
|
||||
|
||||
Publication:
|
||||
GOT-10k: A Large High-Diversity Benchmark for Generic Object Tracking in the Wild
|
||||
Lianghua Huang, Xin Zhao, and Kaiqi Huang
|
||||
arXiv:1810.11981, 2018
|
||||
https://arxiv.org/pdf/1810.11981.pdf
|
||||
|
||||
Download dataset from http://got-10k.aitestunion.com/downloads
|
||||
"""
|
||||
|
||||
def __init__(self, root=None, image_loader=jpeg4py_loader, split=None, seq_ids=None, data_fraction=None):
|
||||
"""
|
||||
args:
|
||||
root - path to the got-10k training data. Note: This should point to the 'train' folder inside GOT-10k
|
||||
image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
|
||||
is used by default.
|
||||
split - 'train' or 'val'. Note: The validation split here is a subset of the official got-10k train split,
|
||||
not NOT the official got-10k validation split. To use the official validation split, provide that as
|
||||
the root folder instead.
|
||||
seq_ids - List containing the ids of the videos to be used for training. Note: Only one of 'split' or 'seq_ids'
|
||||
options can be used at the same time.
|
||||
data_fraction - Fraction of dataset to be used. The complete dataset is used by default
|
||||
"""
|
||||
root = env_settings().got10k_dir if root is None else root
|
||||
super().__init__('GOT10k', root, image_loader)
|
||||
|
||||
# all folders inside the root
|
||||
self.sequence_list = self._get_sequence_list()
|
||||
|
||||
# seq_id is the index of the folder inside the got10k root path
|
||||
if split is not None:
|
||||
if seq_ids is not None:
|
||||
raise ValueError('Cannot set both split_name and seq_ids.')
|
||||
ltr_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
|
||||
if split == 'train':
|
||||
file_path = os.path.join(ltr_path, 'data_specs', 'got10k_train_split.txt')
|
||||
elif split == 'val':
|
||||
file_path = os.path.join(ltr_path, 'data_specs', 'got10k_val_split.txt')
|
||||
elif split == 'train_full':
|
||||
file_path = os.path.join(ltr_path, 'data_specs', 'got10k_train_full_split.txt')
|
||||
elif split == 'vottrain':
|
||||
file_path = os.path.join(ltr_path, 'data_specs', 'got10k_vot_train_split.txt')
|
||||
elif split == 'votval':
|
||||
file_path = os.path.join(ltr_path, 'data_specs', 'got10k_vot_val_split.txt')
|
||||
else:
|
||||
raise ValueError('Unknown split name.')
|
||||
# seq_ids = pandas.read_csv(file_path, header=None, squeeze=True, dtype=np.int64).values.tolist()
|
||||
seq_ids = pandas.read_csv(file_path, header=None, dtype=np.int64).squeeze("columns").values.tolist()
|
||||
elif seq_ids is None:
|
||||
seq_ids = list(range(0, len(self.sequence_list)))
|
||||
|
||||
self.sequence_list = [self.sequence_list[i] for i in seq_ids]
|
||||
|
||||
if data_fraction is not None:
|
||||
self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list)*data_fraction))
|
||||
|
||||
self.sequence_meta_info = self._load_meta_info()
|
||||
self.seq_per_class = self._build_seq_per_class()
|
||||
|
||||
self.class_list = list(self.seq_per_class.keys())
|
||||
self.class_list.sort()
|
||||
|
||||
def get_name(self):
|
||||
return 'got10k'
|
||||
|
||||
def has_class_info(self):
|
||||
return True
|
||||
|
||||
def has_occlusion_info(self):
|
||||
return True
|
||||
|
||||
def _load_meta_info(self):
|
||||
sequence_meta_info = {s: self._read_meta(os.path.join(self.root, s)) for s in self.sequence_list}
|
||||
return sequence_meta_info
|
||||
|
||||
def _read_meta(self, seq_path):
|
||||
try:
|
||||
with open(os.path.join(seq_path, 'meta_info.ini')) as f:
|
||||
meta_info = f.readlines()
|
||||
object_meta = OrderedDict({'object_class_name': meta_info[5].split(': ')[-1][:-1],
|
||||
'motion_class': meta_info[6].split(': ')[-1][:-1],
|
||||
'major_class': meta_info[7].split(': ')[-1][:-1],
|
||||
'root_class': meta_info[8].split(': ')[-1][:-1],
|
||||
'motion_adverb': meta_info[9].split(': ')[-1][:-1]})
|
||||
except:
|
||||
object_meta = OrderedDict({'object_class_name': None,
|
||||
'motion_class': None,
|
||||
'major_class': None,
|
||||
'root_class': None,
|
||||
'motion_adverb': None})
|
||||
return object_meta
|
||||
|
||||
def _build_seq_per_class(self):
|
||||
seq_per_class = {}
|
||||
|
||||
for i, s in enumerate(self.sequence_list):
|
||||
object_class = self.sequence_meta_info[s]['object_class_name']
|
||||
if object_class in seq_per_class:
|
||||
seq_per_class[object_class].append(i)
|
||||
else:
|
||||
seq_per_class[object_class] = [i]
|
||||
|
||||
return seq_per_class
|
||||
|
||||
def get_sequences_in_class(self, class_name):
|
||||
return self.seq_per_class[class_name]
|
||||
|
||||
def _get_sequence_list(self):
|
||||
with open(os.path.join(self.root, 'list.txt')) as f:
|
||||
dir_list = list(csv.reader(f))
|
||||
dir_list = [dir_name[0] for dir_name in dir_list]
|
||||
return dir_list
|
||||
|
||||
def _read_bb_anno(self, seq_path):
|
||||
bb_anno_file = os.path.join(seq_path, "groundtruth.txt")
|
||||
gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False, low_memory=False).values
|
||||
return torch.tensor(gt)
|
||||
|
||||
def _read_target_visible(self, seq_path):
|
||||
# Read full occlusion and out_of_view
|
||||
occlusion_file = os.path.join(seq_path, "absence.label")
|
||||
cover_file = os.path.join(seq_path, "cover.label")
|
||||
|
||||
with open(occlusion_file, 'r', newline='') as f:
|
||||
occlusion = torch.ByteTensor([int(v[0]) for v in csv.reader(f)])
|
||||
with open(cover_file, 'r', newline='') as f:
|
||||
cover = torch.ByteTensor([int(v[0]) for v in csv.reader(f)])
|
||||
|
||||
target_visible = ~occlusion & (cover>0).byte()
|
||||
|
||||
visible_ratio = cover.float() / 8
|
||||
return target_visible, visible_ratio
|
||||
|
||||
def _get_sequence_path(self, seq_id):
|
||||
return os.path.join(self.root, self.sequence_list[seq_id])
|
||||
|
||||
def get_sequence_info(self, seq_id):
|
||||
seq_path = self._get_sequence_path(seq_id)
|
||||
bbox = self._read_bb_anno(seq_path)
|
||||
|
||||
valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
|
||||
visible, visible_ratio = self._read_target_visible(seq_path)
|
||||
visible = visible & valid.byte()
|
||||
|
||||
return {'bbox': bbox, 'valid': valid, 'visible': visible, 'visible_ratio': visible_ratio}
|
||||
|
||||
def _get_frame_path(self, seq_path, frame_id):
|
||||
return os.path.join(seq_path, '{:08}.jpg'.format(frame_id+1)) # frames start from 1
|
||||
|
||||
def _get_frame(self, seq_path, frame_id):
|
||||
return self.image_loader(self._get_frame_path(seq_path, frame_id))
|
||||
|
||||
def get_class_name(self, seq_id):
|
||||
obj_meta = self.sequence_meta_info[self.sequence_list[seq_id]]
|
||||
|
||||
return obj_meta['object_class_name']
|
||||
|
||||
def get_frames(self, seq_id, frame_ids, anno=None):
|
||||
seq_path = self._get_sequence_path(seq_id)
|
||||
obj_meta = self.sequence_meta_info[self.sequence_list[seq_id]]
|
||||
|
||||
frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids]
|
||||
|
||||
if anno is None:
|
||||
anno = self.get_sequence_info(seq_id)
|
||||
|
||||
anno_frames = {}
|
||||
for key, value in anno.items():
|
||||
anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
|
||||
|
||||
return frame_list, anno_frames, obj_meta
|
183
lib/train/dataset/got10k_lmdb.py
Normal file
183
lib/train/dataset/got10k_lmdb.py
Normal file
@@ -0,0 +1,183 @@
|
||||
import os
|
||||
import os.path
|
||||
import numpy as np
|
||||
import torch
|
||||
import csv
|
||||
import pandas
|
||||
import random
|
||||
from collections import OrderedDict
|
||||
from .base_video_dataset import BaseVideoDataset
|
||||
from lib.train.data import jpeg4py_loader
|
||||
from lib.train.admin import env_settings
|
||||
|
||||
'''2021.1.16 Gok10k for loading lmdb dataset'''
|
||||
from lib.utils.lmdb_utils import *
|
||||
|
||||
|
||||
class Got10k_lmdb(BaseVideoDataset):
|
||||
|
||||
def __init__(self, root=None, image_loader=jpeg4py_loader, split=None, seq_ids=None, data_fraction=None):
|
||||
"""
|
||||
args:
|
||||
root - path to the got-10k training data. Note: This should point to the 'train' folder inside GOT-10k
|
||||
image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
|
||||
is used by default.
|
||||
split - 'train' or 'val'. Note: The validation split here is a subset of the official got-10k train split,
|
||||
not NOT the official got-10k validation split. To use the official validation split, provide that as
|
||||
the root folder instead.
|
||||
seq_ids - List containing the ids of the videos to be used for training. Note: Only one of 'split' or 'seq_ids'
|
||||
options can be used at the same time.
|
||||
data_fraction - Fraction of dataset to be used. The complete dataset is used by default
|
||||
use_lmdb - whether the dataset is stored in lmdb format
|
||||
"""
|
||||
root = env_settings().got10k_lmdb_dir if root is None else root
|
||||
super().__init__('GOT10k_lmdb', root, image_loader)
|
||||
|
||||
# all folders inside the root
|
||||
self.sequence_list = self._get_sequence_list()
|
||||
|
||||
# seq_id is the index of the folder inside the got10k root path
|
||||
if split is not None:
|
||||
if seq_ids is not None:
|
||||
raise ValueError('Cannot set both split_name and seq_ids.')
|
||||
train_lib_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
|
||||
if split == 'train':
|
||||
file_path = os.path.join(train_lib_path, 'data_specs', 'got10k_train_split.txt')
|
||||
elif split == 'val':
|
||||
file_path = os.path.join(train_lib_path, 'data_specs', 'got10k_val_split.txt')
|
||||
elif split == 'train_full':
|
||||
file_path = os.path.join(train_lib_path, 'data_specs', 'got10k_train_full_split.txt')
|
||||
elif split == 'vottrain':
|
||||
file_path = os.path.join(train_lib_path, 'data_specs', 'got10k_vot_train_split.txt')
|
||||
elif split == 'votval':
|
||||
file_path = os.path.join(train_lib_path, 'data_specs', 'got10k_vot_val_split.txt')
|
||||
else:
|
||||
raise ValueError('Unknown split name.')
|
||||
seq_ids = pandas.read_csv(file_path, header=None, squeeze=True, dtype=np.int64).values.tolist()
|
||||
elif seq_ids is None:
|
||||
seq_ids = list(range(0, len(self.sequence_list)))
|
||||
|
||||
self.sequence_list = [self.sequence_list[i] for i in seq_ids]
|
||||
|
||||
if data_fraction is not None:
|
||||
self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list)*data_fraction))
|
||||
|
||||
self.sequence_meta_info = self._load_meta_info()
|
||||
self.seq_per_class = self._build_seq_per_class()
|
||||
|
||||
self.class_list = list(self.seq_per_class.keys())
|
||||
self.class_list.sort()
|
||||
|
||||
def get_name(self):
|
||||
return 'got10k_lmdb'
|
||||
|
||||
def has_class_info(self):
|
||||
return True
|
||||
|
||||
def has_occlusion_info(self):
|
||||
return True
|
||||
|
||||
def _load_meta_info(self):
|
||||
def _read_meta(meta_info):
|
||||
|
||||
object_meta = OrderedDict({'object_class_name': meta_info[5].split(': ')[-1],
|
||||
'motion_class': meta_info[6].split(': ')[-1],
|
||||
'major_class': meta_info[7].split(': ')[-1],
|
||||
'root_class': meta_info[8].split(': ')[-1],
|
||||
'motion_adverb': meta_info[9].split(': ')[-1]})
|
||||
|
||||
return object_meta
|
||||
sequence_meta_info = {}
|
||||
for s in self.sequence_list:
|
||||
try:
|
||||
meta_str = decode_str(self.root, "train/%s/meta_info.ini" %s)
|
||||
sequence_meta_info[s] = _read_meta(meta_str.split('\n'))
|
||||
except:
|
||||
sequence_meta_info[s] = OrderedDict({'object_class_name': None,
|
||||
'motion_class': None,
|
||||
'major_class': None,
|
||||
'root_class': None,
|
||||
'motion_adverb': None})
|
||||
return sequence_meta_info
|
||||
|
||||
def _build_seq_per_class(self):
|
||||
seq_per_class = {}
|
||||
|
||||
for i, s in enumerate(self.sequence_list):
|
||||
object_class = self.sequence_meta_info[s]['object_class_name']
|
||||
if object_class in seq_per_class:
|
||||
seq_per_class[object_class].append(i)
|
||||
else:
|
||||
seq_per_class[object_class] = [i]
|
||||
|
||||
return seq_per_class
|
||||
|
||||
def get_sequences_in_class(self, class_name):
|
||||
return self.seq_per_class[class_name]
|
||||
|
||||
def _get_sequence_list(self):
|
||||
dir_str = decode_str(self.root, 'train/list.txt')
|
||||
dir_list = dir_str.split('\n')
|
||||
return dir_list
|
||||
|
||||
def _read_bb_anno(self, seq_path):
|
||||
bb_anno_file = os.path.join(seq_path, "groundtruth.txt")
|
||||
gt_str_list = decode_str(self.root, bb_anno_file).split('\n')[:-1] # the last line in got10k is empty
|
||||
gt_list = [list(map(float, line.split(','))) for line in gt_str_list]
|
||||
gt_arr = np.array(gt_list).astype(np.float32)
|
||||
|
||||
return torch.tensor(gt_arr)
|
||||
|
||||
def _read_target_visible(self, seq_path):
|
||||
# full occlusion and out_of_view files
|
||||
occlusion_file = os.path.join(seq_path, "absence.label")
|
||||
cover_file = os.path.join(seq_path, "cover.label")
|
||||
# Read these files
|
||||
occ_list = list(map(int, decode_str(self.root, occlusion_file).split('\n')[:-1])) # the last line in got10k is empty
|
||||
occlusion = torch.ByteTensor(occ_list)
|
||||
cover_list = list(map(int, decode_str(self.root, cover_file).split('\n')[:-1])) # the last line in got10k is empty
|
||||
cover = torch.ByteTensor(cover_list)
|
||||
|
||||
target_visible = ~occlusion & (cover>0).byte()
|
||||
|
||||
visible_ratio = cover.float() / 8
|
||||
return target_visible, visible_ratio
|
||||
|
||||
def _get_sequence_path(self, seq_id):
|
||||
return os.path.join("train", self.sequence_list[seq_id])
|
||||
|
||||
def get_sequence_info(self, seq_id):
|
||||
seq_path = self._get_sequence_path(seq_id)
|
||||
bbox = self._read_bb_anno(seq_path)
|
||||
|
||||
valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
|
||||
visible, visible_ratio = self._read_target_visible(seq_path)
|
||||
visible = visible & valid.byte()
|
||||
|
||||
return {'bbox': bbox, 'valid': valid, 'visible': visible, 'visible_ratio': visible_ratio}
|
||||
|
||||
def _get_frame_path(self, seq_path, frame_id):
|
||||
return os.path.join(seq_path, '{:08}.jpg'.format(frame_id+1)) # frames start from 1
|
||||
|
||||
def _get_frame(self, seq_path, frame_id):
|
||||
return decode_img(self.root, self._get_frame_path(seq_path, frame_id))
|
||||
|
||||
def get_class_name(self, seq_id):
|
||||
obj_meta = self.sequence_meta_info[self.sequence_list[seq_id]]
|
||||
|
||||
return obj_meta['object_class_name']
|
||||
|
||||
def get_frames(self, seq_id, frame_ids, anno=None):
|
||||
seq_path = self._get_sequence_path(seq_id)
|
||||
obj_meta = self.sequence_meta_info[self.sequence_list[seq_id]]
|
||||
|
||||
frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids]
|
||||
|
||||
if anno is None:
|
||||
anno = self.get_sequence_info(seq_id)
|
||||
|
||||
anno_frames = {}
|
||||
for key, value in anno.items():
|
||||
anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
|
||||
|
||||
return frame_list, anno_frames, obj_meta
|
159
lib/train/dataset/imagenetvid.py
Normal file
159
lib/train/dataset/imagenetvid.py
Normal file
@@ -0,0 +1,159 @@
|
||||
import os
|
||||
from .base_video_dataset import BaseVideoDataset
|
||||
from lib.train.data import jpeg4py_loader
|
||||
import xml.etree.ElementTree as ET
|
||||
import json
|
||||
import torch
|
||||
from collections import OrderedDict
|
||||
from lib.train.admin import env_settings
|
||||
|
||||
|
||||
def get_target_to_image_ratio(seq):
|
||||
anno = torch.Tensor(seq['anno'])
|
||||
img_sz = torch.Tensor(seq['image_size'])
|
||||
return (anno[0, 2:4].prod() / (img_sz.prod())).sqrt()
|
||||
|
||||
|
||||
class ImagenetVID(BaseVideoDataset):
|
||||
""" Imagenet VID dataset.
|
||||
|
||||
Publication:
|
||||
ImageNet Large Scale Visual Recognition Challenge
|
||||
Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh, Sean Ma, Zhiheng Huang, Andrej Karpathy,
|
||||
Aditya Khosla, Michael Bernstein, Alexander C. Berg and Li Fei-Fei
|
||||
IJCV, 2015
|
||||
https://arxiv.org/pdf/1409.0575.pdf
|
||||
|
||||
Download the dataset from http://image-net.org/
|
||||
"""
|
||||
def __init__(self, root=None, image_loader=jpeg4py_loader, min_length=0, max_target_area=1):
|
||||
"""
|
||||
args:
|
||||
root - path to the imagenet vid dataset.
|
||||
image_loader (default_image_loader) - The function to read the images. If installed,
|
||||
jpeg4py (https://github.com/ajkxyz/jpeg4py) is used by default. Else,
|
||||
opencv's imread is used.
|
||||
min_length - Minimum allowed sequence length.
|
||||
max_target_area - max allowed ratio between target area and image area. Can be used to filter out targets
|
||||
which cover complete image.
|
||||
"""
|
||||
root = env_settings().imagenet_dir if root is None else root
|
||||
super().__init__("imagenetvid", root, image_loader)
|
||||
|
||||
cache_file = os.path.join(root, 'cache.json')
|
||||
if os.path.isfile(cache_file):
|
||||
# If available, load the pre-processed cache file containing meta-info for each sequence
|
||||
with open(cache_file, 'r') as f:
|
||||
sequence_list_dict = json.load(f)
|
||||
|
||||
self.sequence_list = sequence_list_dict
|
||||
else:
|
||||
# Else process the imagenet annotations and generate the cache file
|
||||
self.sequence_list = self._process_anno(root)
|
||||
|
||||
with open(cache_file, 'w') as f:
|
||||
json.dump(self.sequence_list, f)
|
||||
|
||||
# Filter the sequences based on min_length and max_target_area in the first frame
|
||||
self.sequence_list = [x for x in self.sequence_list if len(x['anno']) >= min_length and
|
||||
get_target_to_image_ratio(x) < max_target_area]
|
||||
|
||||
def get_name(self):
|
||||
return 'imagenetvid'
|
||||
|
||||
def get_num_sequences(self):
|
||||
return len(self.sequence_list)
|
||||
|
||||
def get_sequence_info(self, seq_id):
|
||||
bb_anno = torch.Tensor(self.sequence_list[seq_id]['anno'])
|
||||
valid = (bb_anno[:, 2] > 0) & (bb_anno[:, 3] > 0)
|
||||
visible = torch.ByteTensor(self.sequence_list[seq_id]['target_visible']) & valid.byte()
|
||||
return {'bbox': bb_anno, 'valid': valid, 'visible': visible}
|
||||
|
||||
def _get_frame(self, sequence, frame_id):
|
||||
set_name = 'ILSVRC2015_VID_train_{:04d}'.format(sequence['set_id'])
|
||||
vid_name = 'ILSVRC2015_train_{:08d}'.format(sequence['vid_id'])
|
||||
frame_number = frame_id + sequence['start_frame']
|
||||
frame_path = os.path.join(self.root, 'Data', 'VID', 'train', set_name, vid_name,
|
||||
'{:06d}.JPEG'.format(frame_number))
|
||||
return self.image_loader(frame_path)
|
||||
|
||||
def get_frames(self, seq_id, frame_ids, anno=None):
|
||||
sequence = self.sequence_list[seq_id]
|
||||
|
||||
frame_list = [self._get_frame(sequence, f) for f in frame_ids]
|
||||
|
||||
if anno is None:
|
||||
anno = self.get_sequence_info(seq_id)
|
||||
|
||||
# Create anno dict
|
||||
anno_frames = {}
|
||||
for key, value in anno.items():
|
||||
anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
|
||||
|
||||
# added the class info to the meta info
|
||||
object_meta = OrderedDict({'object_class': sequence['class_name'],
|
||||
'motion_class': None,
|
||||
'major_class': None,
|
||||
'root_class': None,
|
||||
'motion_adverb': None})
|
||||
|
||||
return frame_list, anno_frames, object_meta
|
||||
|
||||
def _process_anno(self, root):
|
||||
# Builds individual tracklets
|
||||
base_vid_anno_path = os.path.join(root, 'Annotations', 'VID', 'train')
|
||||
|
||||
all_sequences = []
|
||||
for set in sorted(os.listdir(base_vid_anno_path)):
|
||||
set_id = int(set.split('_')[-1])
|
||||
for vid in sorted(os.listdir(os.path.join(base_vid_anno_path, set))):
|
||||
|
||||
vid_id = int(vid.split('_')[-1])
|
||||
anno_files = sorted(os.listdir(os.path.join(base_vid_anno_path, set, vid)))
|
||||
|
||||
frame1_anno = ET.parse(os.path.join(base_vid_anno_path, set, vid, anno_files[0]))
|
||||
image_size = [int(frame1_anno.find('size/width').text), int(frame1_anno.find('size/height').text)]
|
||||
|
||||
objects = [ET.ElementTree(file=os.path.join(base_vid_anno_path, set, vid, f)).findall('object')
|
||||
for f in anno_files]
|
||||
|
||||
tracklets = {}
|
||||
|
||||
# Find all tracklets along with start frame
|
||||
for f_id, all_targets in enumerate(objects):
|
||||
for target in all_targets:
|
||||
tracklet_id = target.find('trackid').text
|
||||
if tracklet_id not in tracklets:
|
||||
tracklets[tracklet_id] = f_id
|
||||
|
||||
for tracklet_id, tracklet_start in tracklets.items():
|
||||
tracklet_anno = []
|
||||
target_visible = []
|
||||
class_name_id = None
|
||||
|
||||
for f_id in range(tracklet_start, len(objects)):
|
||||
found = False
|
||||
for target in objects[f_id]:
|
||||
if target.find('trackid').text == tracklet_id:
|
||||
if not class_name_id:
|
||||
class_name_id = target.find('name').text
|
||||
x1 = int(target.find('bndbox/xmin').text)
|
||||
y1 = int(target.find('bndbox/ymin').text)
|
||||
x2 = int(target.find('bndbox/xmax').text)
|
||||
y2 = int(target.find('bndbox/ymax').text)
|
||||
|
||||
tracklet_anno.append([x1, y1, x2 - x1, y2 - y1])
|
||||
target_visible.append(target.find('occluded').text == '0')
|
||||
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
break
|
||||
|
||||
new_sequence = {'set_id': set_id, 'vid_id': vid_id, 'class_name': class_name_id,
|
||||
'start_frame': tracklet_start, 'anno': tracklet_anno,
|
||||
'target_visible': target_visible, 'image_size': image_size}
|
||||
all_sequences.append(new_sequence)
|
||||
|
||||
return all_sequences
|
90
lib/train/dataset/imagenetvid_lmdb.py
Normal file
90
lib/train/dataset/imagenetvid_lmdb.py
Normal file
@@ -0,0 +1,90 @@
|
||||
import os
|
||||
from .base_video_dataset import BaseVideoDataset
|
||||
from lib.train.data import jpeg4py_loader
|
||||
import torch
|
||||
from collections import OrderedDict
|
||||
from lib.train.admin import env_settings
|
||||
from lib.utils.lmdb_utils import decode_img, decode_json
|
||||
|
||||
|
||||
def get_target_to_image_ratio(seq):
|
||||
anno = torch.Tensor(seq['anno'])
|
||||
img_sz = torch.Tensor(seq['image_size'])
|
||||
return (anno[0, 2:4].prod() / (img_sz.prod())).sqrt()
|
||||
|
||||
|
||||
class ImagenetVID_lmdb(BaseVideoDataset):
|
||||
""" Imagenet VID dataset.
|
||||
|
||||
Publication:
|
||||
ImageNet Large Scale Visual Recognition Challenge
|
||||
Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh, Sean Ma, Zhiheng Huang, Andrej Karpathy,
|
||||
Aditya Khosla, Michael Bernstein, Alexander C. Berg and Li Fei-Fei
|
||||
IJCV, 2015
|
||||
https://arxiv.org/pdf/1409.0575.pdf
|
||||
|
||||
Download the dataset from http://image-net.org/
|
||||
"""
|
||||
def __init__(self, root=None, image_loader=jpeg4py_loader, min_length=0, max_target_area=1):
|
||||
"""
|
||||
args:
|
||||
root - path to the imagenet vid dataset.
|
||||
image_loader (default_image_loader) - The function to read the images. If installed,
|
||||
jpeg4py (https://github.com/ajkxyz/jpeg4py) is used by default. Else,
|
||||
opencv's imread is used.
|
||||
min_length - Minimum allowed sequence length.
|
||||
max_target_area - max allowed ratio between target area and image area. Can be used to filter out targets
|
||||
which cover complete image.
|
||||
"""
|
||||
root = env_settings().imagenet_dir if root is None else root
|
||||
super().__init__("imagenetvid_lmdb", root, image_loader)
|
||||
|
||||
sequence_list_dict = decode_json(root, "cache.json")
|
||||
self.sequence_list = sequence_list_dict
|
||||
|
||||
# Filter the sequences based on min_length and max_target_area in the first frame
|
||||
self.sequence_list = [x for x in self.sequence_list if len(x['anno']) >= min_length and
|
||||
get_target_to_image_ratio(x) < max_target_area]
|
||||
|
||||
def get_name(self):
|
||||
return 'imagenetvid_lmdb'
|
||||
|
||||
def get_num_sequences(self):
|
||||
return len(self.sequence_list)
|
||||
|
||||
def get_sequence_info(self, seq_id):
|
||||
bb_anno = torch.Tensor(self.sequence_list[seq_id]['anno'])
|
||||
valid = (bb_anno[:, 2] > 0) & (bb_anno[:, 3] > 0)
|
||||
visible = torch.ByteTensor(self.sequence_list[seq_id]['target_visible']) & valid.byte()
|
||||
return {'bbox': bb_anno, 'valid': valid, 'visible': visible}
|
||||
|
||||
def _get_frame(self, sequence, frame_id):
|
||||
set_name = 'ILSVRC2015_VID_train_{:04d}'.format(sequence['set_id'])
|
||||
vid_name = 'ILSVRC2015_train_{:08d}'.format(sequence['vid_id'])
|
||||
frame_number = frame_id + sequence['start_frame']
|
||||
frame_path = os.path.join('Data', 'VID', 'train', set_name, vid_name,
|
||||
'{:06d}.JPEG'.format(frame_number))
|
||||
return decode_img(self.root, frame_path)
|
||||
|
||||
def get_frames(self, seq_id, frame_ids, anno=None):
|
||||
sequence = self.sequence_list[seq_id]
|
||||
|
||||
frame_list = [self._get_frame(sequence, f) for f in frame_ids]
|
||||
|
||||
if anno is None:
|
||||
anno = self.get_sequence_info(seq_id)
|
||||
|
||||
# Create anno dict
|
||||
anno_frames = {}
|
||||
for key, value in anno.items():
|
||||
anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
|
||||
|
||||
# added the class info to the meta info
|
||||
object_meta = OrderedDict({'object_class': sequence['class_name'],
|
||||
'motion_class': None,
|
||||
'major_class': None,
|
||||
'root_class': None,
|
||||
'motion_adverb': None})
|
||||
|
||||
return frame_list, anno_frames, object_meta
|
||||
|
169
lib/train/dataset/lasot.py
Normal file
169
lib/train/dataset/lasot.py
Normal file
@@ -0,0 +1,169 @@
|
||||
import os
|
||||
import os.path
|
||||
import torch
|
||||
import numpy as np
|
||||
import pandas
|
||||
import csv
|
||||
import random
|
||||
from collections import OrderedDict
|
||||
from .base_video_dataset import BaseVideoDataset
|
||||
from lib.train.data import jpeg4py_loader
|
||||
from lib.train.admin import env_settings
|
||||
|
||||
|
||||
class Lasot(BaseVideoDataset):
|
||||
""" LaSOT dataset.
|
||||
|
||||
Publication:
|
||||
LaSOT: A High-quality Benchmark for Large-scale Single Object Tracking
|
||||
Heng Fan, Liting Lin, Fan Yang, Peng Chu, Ge Deng, Sijia Yu, Hexin Bai, Yong Xu, Chunyuan Liao and Haibin Ling
|
||||
CVPR, 2019
|
||||
https://arxiv.org/pdf/1809.07845.pdf
|
||||
|
||||
Download the dataset from https://cis.temple.edu/lasot/download.html
|
||||
"""
|
||||
|
||||
def __init__(self, root=None, image_loader=jpeg4py_loader, vid_ids=None, split=None, data_fraction=None):
|
||||
"""
|
||||
args:
|
||||
root - path to the lasot dataset.
|
||||
image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
|
||||
is used by default.
|
||||
vid_ids - List containing the ids of the videos (1 - 20) used for training. If vid_ids = [1, 3, 5], then the
|
||||
videos with subscripts -1, -3, and -5 from each class will be used for training.
|
||||
split - If split='train', the official train split (protocol-II) is used for training. Note: Only one of
|
||||
vid_ids or split option can be used at a time.
|
||||
data_fraction - Fraction of dataset to be used. The complete dataset is used by default
|
||||
"""
|
||||
root = env_settings().lasot_dir if root is None else root
|
||||
super().__init__('LaSOT', root, image_loader)
|
||||
|
||||
# Keep a list of all classes
|
||||
self.class_list = [f for f in os.listdir(self.root)]
|
||||
self.class_to_id = {cls_name: cls_id for cls_id, cls_name in enumerate(self.class_list)}
|
||||
|
||||
self.sequence_list = self._build_sequence_list(vid_ids, split)
|
||||
|
||||
if data_fraction is not None:
|
||||
self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list)*data_fraction))
|
||||
|
||||
self.seq_per_class = self._build_class_list()
|
||||
|
||||
def _build_sequence_list(self, vid_ids=None, split=None):
|
||||
if split is not None:
|
||||
if vid_ids is not None:
|
||||
raise ValueError('Cannot set both split_name and vid_ids.')
|
||||
ltr_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
|
||||
if split == 'train':
|
||||
file_path = os.path.join(ltr_path, 'data_specs', 'lasot_train_split.txt')
|
||||
else:
|
||||
raise ValueError('Unknown split name.')
|
||||
# sequence_list = pandas.read_csv(file_path, header=None, squeeze=True).values.tolist()
|
||||
sequence_list = pandas.read_csv(file_path, header=None).squeeze("columns").values.tolist()
|
||||
elif vid_ids is not None:
|
||||
sequence_list = [c+'-'+str(v) for c in self.class_list for v in vid_ids]
|
||||
else:
|
||||
raise ValueError('Set either split_name or vid_ids.')
|
||||
|
||||
return sequence_list
|
||||
|
||||
def _build_class_list(self):
|
||||
seq_per_class = {}
|
||||
for seq_id, seq_name in enumerate(self.sequence_list):
|
||||
class_name = seq_name.split('-')[0]
|
||||
if class_name in seq_per_class:
|
||||
seq_per_class[class_name].append(seq_id)
|
||||
else:
|
||||
seq_per_class[class_name] = [seq_id]
|
||||
|
||||
return seq_per_class
|
||||
|
||||
def get_name(self):
|
||||
return 'lasot'
|
||||
|
||||
def has_class_info(self):
|
||||
return True
|
||||
|
||||
def has_occlusion_info(self):
|
||||
return True
|
||||
|
||||
def get_num_sequences(self):
|
||||
return len(self.sequence_list)
|
||||
|
||||
def get_num_classes(self):
|
||||
return len(self.class_list)
|
||||
|
||||
def get_sequences_in_class(self, class_name):
|
||||
return self.seq_per_class[class_name]
|
||||
|
||||
def _read_bb_anno(self, seq_path):
|
||||
bb_anno_file = os.path.join(seq_path, "groundtruth.txt")
|
||||
gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False, low_memory=False).values
|
||||
return torch.tensor(gt)
|
||||
|
||||
def _read_target_visible(self, seq_path):
|
||||
# Read full occlusion and out_of_view
|
||||
occlusion_file = os.path.join(seq_path, "full_occlusion.txt")
|
||||
out_of_view_file = os.path.join(seq_path, "out_of_view.txt")
|
||||
|
||||
with open(occlusion_file, 'r', newline='') as f:
|
||||
occlusion = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]])
|
||||
with open(out_of_view_file, 'r') as f:
|
||||
out_of_view = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]])
|
||||
|
||||
target_visible = ~occlusion & ~out_of_view
|
||||
|
||||
return target_visible
|
||||
|
||||
def _get_sequence_path(self, seq_id):
|
||||
seq_name = self.sequence_list[seq_id]
|
||||
class_name = seq_name.split('-')[0]
|
||||
vid_id = seq_name.split('-')[1]
|
||||
|
||||
return os.path.join(self.root, class_name, class_name + '-' + vid_id)
|
||||
|
||||
def get_sequence_info(self, seq_id):
|
||||
seq_path = self._get_sequence_path(seq_id)
|
||||
bbox = self._read_bb_anno(seq_path)
|
||||
|
||||
valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
|
||||
visible = self._read_target_visible(seq_path) & valid.byte()
|
||||
|
||||
return {'bbox': bbox, 'valid': valid, 'visible': visible}
|
||||
|
||||
def _get_frame_path(self, seq_path, frame_id):
|
||||
return os.path.join(seq_path, 'img', '{:08}.jpg'.format(frame_id+1)) # frames start from 1
|
||||
|
||||
def _get_frame(self, seq_path, frame_id):
|
||||
return self.image_loader(self._get_frame_path(seq_path, frame_id))
|
||||
|
||||
def _get_class(self, seq_path):
|
||||
raw_class = seq_path.split('/')[-2]
|
||||
return raw_class
|
||||
|
||||
def get_class_name(self, seq_id):
|
||||
seq_path = self._get_sequence_path(seq_id)
|
||||
obj_class = self._get_class(seq_path)
|
||||
|
||||
return obj_class
|
||||
|
||||
def get_frames(self, seq_id, frame_ids, anno=None):
|
||||
seq_path = self._get_sequence_path(seq_id)
|
||||
|
||||
obj_class = self._get_class(seq_path)
|
||||
frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids]
|
||||
|
||||
if anno is None:
|
||||
anno = self.get_sequence_info(seq_id)
|
||||
|
||||
anno_frames = {}
|
||||
for key, value in anno.items():
|
||||
anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
|
||||
|
||||
object_meta = OrderedDict({'object_class_name': obj_class,
|
||||
'motion_class': None,
|
||||
'major_class': None,
|
||||
'root_class': None,
|
||||
'motion_adverb': None})
|
||||
|
||||
return frame_list, anno_frames, object_meta
|
165
lib/train/dataset/lasot_lmdb.py
Normal file
165
lib/train/dataset/lasot_lmdb.py
Normal file
@@ -0,0 +1,165 @@
|
||||
import os
|
||||
import os.path
|
||||
import torch
|
||||
import numpy as np
|
||||
import pandas
|
||||
import csv
|
||||
import random
|
||||
from collections import OrderedDict
|
||||
from .base_video_dataset import BaseVideoDataset
|
||||
from lib.train.data import jpeg4py_loader
|
||||
from lib.train.admin import env_settings
|
||||
'''2021.1.16 Lasot for loading lmdb dataset'''
|
||||
from lib.utils.lmdb_utils import *
|
||||
|
||||
|
||||
class Lasot_lmdb(BaseVideoDataset):
|
||||
|
||||
def __init__(self, root=None, image_loader=jpeg4py_loader, vid_ids=None, split=None, data_fraction=None):
|
||||
"""
|
||||
args:
|
||||
root - path to the lasot dataset.
|
||||
image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
|
||||
is used by default.
|
||||
vid_ids - List containing the ids of the videos (1 - 20) used for training. If vid_ids = [1, 3, 5], then the
|
||||
videos with subscripts -1, -3, and -5 from each class will be used for training.
|
||||
split - If split='train', the official train split (protocol-II) is used for training. Note: Only one of
|
||||
vid_ids or split option can be used at a time.
|
||||
data_fraction - Fraction of dataset to be used. The complete dataset is used by default
|
||||
"""
|
||||
root = env_settings().lasot_lmdb_dir if root is None else root
|
||||
super().__init__('LaSOT_lmdb', root, image_loader)
|
||||
|
||||
self.sequence_list = self._build_sequence_list(vid_ids, split)
|
||||
class_list = [seq_name.split('-')[0] for seq_name in self.sequence_list]
|
||||
self.class_list = []
|
||||
for ele in class_list:
|
||||
if ele not in self.class_list:
|
||||
self.class_list.append(ele)
|
||||
# Keep a list of all classes
|
||||
self.class_to_id = {cls_name: cls_id for cls_id, cls_name in enumerate(self.class_list)}
|
||||
|
||||
if data_fraction is not None:
|
||||
self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list)*data_fraction))
|
||||
|
||||
self.seq_per_class = self._build_class_list()
|
||||
|
||||
def _build_sequence_list(self, vid_ids=None, split=None):
|
||||
if split is not None:
|
||||
if vid_ids is not None:
|
||||
raise ValueError('Cannot set both split_name and vid_ids.')
|
||||
ltr_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
|
||||
if split == 'train':
|
||||
file_path = os.path.join(ltr_path, 'data_specs', 'lasot_train_split.txt')
|
||||
else:
|
||||
raise ValueError('Unknown split name.')
|
||||
sequence_list = pandas.read_csv(file_path, header=None, squeeze=True).values.tolist()
|
||||
elif vid_ids is not None:
|
||||
sequence_list = [c+'-'+str(v) for c in self.class_list for v in vid_ids]
|
||||
else:
|
||||
raise ValueError('Set either split_name or vid_ids.')
|
||||
|
||||
return sequence_list
|
||||
|
||||
def _build_class_list(self):
|
||||
seq_per_class = {}
|
||||
for seq_id, seq_name in enumerate(self.sequence_list):
|
||||
class_name = seq_name.split('-')[0]
|
||||
if class_name in seq_per_class:
|
||||
seq_per_class[class_name].append(seq_id)
|
||||
else:
|
||||
seq_per_class[class_name] = [seq_id]
|
||||
|
||||
return seq_per_class
|
||||
|
||||
def get_name(self):
|
||||
return 'lasot_lmdb'
|
||||
|
||||
def has_class_info(self):
|
||||
return True
|
||||
|
||||
def has_occlusion_info(self):
|
||||
return True
|
||||
|
||||
def get_num_sequences(self):
|
||||
return len(self.sequence_list)
|
||||
|
||||
def get_num_classes(self):
|
||||
return len(self.class_list)
|
||||
|
||||
def get_sequences_in_class(self, class_name):
|
||||
return self.seq_per_class[class_name]
|
||||
|
||||
def _read_bb_anno(self, seq_path):
|
||||
bb_anno_file = os.path.join(seq_path, "groundtruth.txt")
|
||||
gt_str_list = decode_str(self.root, bb_anno_file).split('\n')[:-1] # the last line is empty
|
||||
gt_list = [list(map(float, line.split(','))) for line in gt_str_list]
|
||||
gt_arr = np.array(gt_list).astype(np.float32)
|
||||
return torch.tensor(gt_arr)
|
||||
|
||||
def _read_target_visible(self, seq_path):
|
||||
# Read full occlusion and out_of_view
|
||||
occlusion_file = os.path.join(seq_path, "full_occlusion.txt")
|
||||
out_of_view_file = os.path.join(seq_path, "out_of_view.txt")
|
||||
|
||||
occ_list = list(map(int, decode_str(self.root, occlusion_file).split(',')))
|
||||
occlusion = torch.ByteTensor(occ_list)
|
||||
out_view_list = list(map(int, decode_str(self.root, out_of_view_file).split(',')))
|
||||
out_of_view = torch.ByteTensor(out_view_list)
|
||||
|
||||
target_visible = ~occlusion & ~out_of_view
|
||||
|
||||
return target_visible
|
||||
|
||||
def _get_sequence_path(self, seq_id):
|
||||
seq_name = self.sequence_list[seq_id]
|
||||
class_name = seq_name.split('-')[0]
|
||||
vid_id = seq_name.split('-')[1]
|
||||
|
||||
return os.path.join(class_name, class_name + '-' + vid_id)
|
||||
|
||||
def get_sequence_info(self, seq_id):
|
||||
seq_path = self._get_sequence_path(seq_id)
|
||||
bbox = self._read_bb_anno(seq_path)
|
||||
|
||||
valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
|
||||
visible = self._read_target_visible(seq_path) & valid.byte()
|
||||
|
||||
return {'bbox': bbox, 'valid': valid, 'visible': visible}
|
||||
|
||||
def _get_frame_path(self, seq_path, frame_id):
|
||||
return os.path.join(seq_path, 'img', '{:08}.jpg'.format(frame_id+1)) # frames start from 1
|
||||
|
||||
def _get_frame(self, seq_path, frame_id):
|
||||
return decode_img(self.root, self._get_frame_path(seq_path, frame_id))
|
||||
|
||||
def _get_class(self, seq_path):
|
||||
raw_class = seq_path.split('/')[-2]
|
||||
return raw_class
|
||||
|
||||
def get_class_name(self, seq_id):
|
||||
seq_path = self._get_sequence_path(seq_id)
|
||||
obj_class = self._get_class(seq_path)
|
||||
|
||||
return obj_class
|
||||
|
||||
def get_frames(self, seq_id, frame_ids, anno=None):
|
||||
seq_path = self._get_sequence_path(seq_id)
|
||||
|
||||
obj_class = self._get_class(seq_path)
|
||||
frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids]
|
||||
|
||||
if anno is None:
|
||||
anno = self.get_sequence_info(seq_id)
|
||||
|
||||
anno_frames = {}
|
||||
for key, value in anno.items():
|
||||
anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
|
||||
|
||||
object_meta = OrderedDict({'object_class_name': obj_class,
|
||||
'motion_class': None,
|
||||
'major_class': None,
|
||||
'root_class': None,
|
||||
'motion_adverb': None})
|
||||
|
||||
return frame_list, anno_frames, object_meta
|
151
lib/train/dataset/tracking_net.py
Normal file
151
lib/train/dataset/tracking_net.py
Normal file
@@ -0,0 +1,151 @@
|
||||
import torch
|
||||
import os
|
||||
import os.path
|
||||
import numpy as np
|
||||
import pandas
|
||||
import random
|
||||
from collections import OrderedDict
|
||||
|
||||
from lib.train.data import jpeg4py_loader
|
||||
from .base_video_dataset import BaseVideoDataset
|
||||
from lib.train.admin import env_settings
|
||||
|
||||
|
||||
def list_sequences(root, set_ids):
|
||||
""" Lists all the videos in the input set_ids. Returns a list of tuples (set_id, video_name)
|
||||
|
||||
args:
|
||||
root: Root directory to TrackingNet
|
||||
set_ids: Sets (0-11) which are to be used
|
||||
|
||||
returns:
|
||||
list - list of tuples (set_id, video_name) containing the set_id and video_name for each sequence
|
||||
"""
|
||||
sequence_list = []
|
||||
|
||||
for s in set_ids:
|
||||
anno_dir = os.path.join(root, "TRAIN_" + str(s), "anno")
|
||||
|
||||
sequences_cur_set = [(s, os.path.splitext(f)[0]) for f in os.listdir(anno_dir) if f.endswith('.txt')]
|
||||
sequence_list += sequences_cur_set
|
||||
|
||||
return sequence_list
|
||||
|
||||
|
||||
class TrackingNet(BaseVideoDataset):
|
||||
""" TrackingNet dataset.
|
||||
|
||||
Publication:
|
||||
TrackingNet: A Large-Scale Dataset and Benchmark for Object Tracking in the Wild.
|
||||
Matthias Mueller,Adel Bibi, Silvio Giancola, Salman Al-Subaihi and Bernard Ghanem
|
||||
ECCV, 2018
|
||||
https://ivul.kaust.edu.sa/Documents/Publications/2018/TrackingNet%20A%20Large%20Scale%20Dataset%20and%20Benchmark%20for%20Object%20Tracking%20in%20the%20Wild.pdf
|
||||
|
||||
Download the dataset using the toolkit https://github.com/SilvioGiancola/TrackingNet-devkit.
|
||||
"""
|
||||
def __init__(self, root=None, image_loader=jpeg4py_loader, set_ids=None, data_fraction=None):
|
||||
"""
|
||||
args:
|
||||
root - The path to the TrackingNet folder, containing the training sets.
|
||||
image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
|
||||
is used by default.
|
||||
set_ids (None) - List containing the ids of the TrackingNet sets to be used for training. If None, all the
|
||||
sets (0 - 11) will be used.
|
||||
data_fraction - Fraction of dataset to be used. The complete dataset is used by default
|
||||
"""
|
||||
root = env_settings().trackingnet_dir if root is None else root
|
||||
super().__init__('TrackingNet', root, image_loader)
|
||||
|
||||
if set_ids is None:
|
||||
set_ids = [i for i in range(12)]
|
||||
|
||||
self.set_ids = set_ids
|
||||
|
||||
# Keep a list of all videos. Sequence list is a list of tuples (set_id, video_name) containing the set_id and
|
||||
# video_name for each sequence
|
||||
self.sequence_list = list_sequences(self.root, self.set_ids)
|
||||
|
||||
if data_fraction is not None:
|
||||
self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list) * data_fraction))
|
||||
|
||||
self.seq_to_class_map, self.seq_per_class = self._load_class_info()
|
||||
|
||||
# we do not have the class_lists for the tracking net
|
||||
self.class_list = list(self.seq_per_class.keys())
|
||||
self.class_list.sort()
|
||||
|
||||
def _load_class_info(self):
|
||||
ltr_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
|
||||
class_map_path = os.path.join(ltr_path, 'data_specs', 'trackingnet_classmap.txt')
|
||||
|
||||
with open(class_map_path, 'r') as f:
|
||||
seq_to_class_map = {seq_class.split('\t')[0]: seq_class.rstrip().split('\t')[1] for seq_class in f}
|
||||
|
||||
seq_per_class = {}
|
||||
for i, seq in enumerate(self.sequence_list):
|
||||
class_name = seq_to_class_map.get(seq[1], 'Unknown')
|
||||
if class_name not in seq_per_class:
|
||||
seq_per_class[class_name] = [i]
|
||||
else:
|
||||
seq_per_class[class_name].append(i)
|
||||
|
||||
return seq_to_class_map, seq_per_class
|
||||
|
||||
def get_name(self):
|
||||
return 'trackingnet'
|
||||
|
||||
def has_class_info(self):
|
||||
return True
|
||||
|
||||
def get_sequences_in_class(self, class_name):
|
||||
return self.seq_per_class[class_name]
|
||||
|
||||
def _read_bb_anno(self, seq_id):
|
||||
set_id = self.sequence_list[seq_id][0]
|
||||
vid_name = self.sequence_list[seq_id][1]
|
||||
bb_anno_file = os.path.join(self.root, "TRAIN_" + str(set_id), "anno", vid_name + ".txt")
|
||||
gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False,
|
||||
low_memory=False).values
|
||||
return torch.tensor(gt)
|
||||
|
||||
def get_sequence_info(self, seq_id):
|
||||
bbox = self._read_bb_anno(seq_id)
|
||||
|
||||
valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
|
||||
visible = valid.clone().byte()
|
||||
return {'bbox': bbox, 'valid': valid, 'visible': visible}
|
||||
|
||||
def _get_frame(self, seq_id, frame_id):
|
||||
set_id = self.sequence_list[seq_id][0]
|
||||
vid_name = self.sequence_list[seq_id][1]
|
||||
frame_path = os.path.join(self.root, "TRAIN_" + str(set_id), "frames", vid_name, str(frame_id) + ".jpg")
|
||||
return self.image_loader(frame_path)
|
||||
|
||||
def _get_class(self, seq_id):
|
||||
seq_name = self.sequence_list[seq_id][1]
|
||||
return self.seq_to_class_map[seq_name]
|
||||
|
||||
def get_class_name(self, seq_id):
|
||||
obj_class = self._get_class(seq_id)
|
||||
|
||||
return obj_class
|
||||
|
||||
def get_frames(self, seq_id, frame_ids, anno=None):
|
||||
frame_list = [self._get_frame(seq_id, f) for f in frame_ids]
|
||||
|
||||
if anno is None:
|
||||
anno = self.get_sequence_info(seq_id)
|
||||
|
||||
anno_frames = {}
|
||||
for key, value in anno.items():
|
||||
anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
|
||||
|
||||
obj_class = self._get_class(seq_id)
|
||||
|
||||
object_meta = OrderedDict({'object_class_name': obj_class,
|
||||
'motion_class': None,
|
||||
'major_class': None,
|
||||
'root_class': None,
|
||||
'motion_adverb': None})
|
||||
|
||||
return frame_list, anno_frames, object_meta
|
147
lib/train/dataset/tracking_net_lmdb.py
Normal file
147
lib/train/dataset/tracking_net_lmdb.py
Normal file
@@ -0,0 +1,147 @@
|
||||
import torch
|
||||
import os
|
||||
import os.path
|
||||
import numpy as np
|
||||
import random
|
||||
from collections import OrderedDict
|
||||
|
||||
from lib.train.data import jpeg4py_loader
|
||||
from .base_video_dataset import BaseVideoDataset
|
||||
from lib.train.admin import env_settings
|
||||
import json
|
||||
from lib.utils.lmdb_utils import decode_img, decode_str
|
||||
|
||||
|
||||
def list_sequences(root):
|
||||
""" Lists all the videos in the input set_ids. Returns a list of tuples (set_id, video_name)
|
||||
|
||||
args:
|
||||
root: Root directory to TrackingNet
|
||||
|
||||
returns:
|
||||
list - list of tuples (set_id, video_name) containing the set_id and video_name for each sequence
|
||||
"""
|
||||
fname = os.path.join(root, "seq_list.json")
|
||||
with open(fname, "r") as f:
|
||||
sequence_list = json.loads(f.read())
|
||||
return sequence_list
|
||||
|
||||
|
||||
class TrackingNet_lmdb(BaseVideoDataset):
|
||||
""" TrackingNet dataset.
|
||||
|
||||
Publication:
|
||||
TrackingNet: A Large-Scale Dataset and Benchmark for Object Tracking in the Wild.
|
||||
Matthias Mueller,Adel Bibi, Silvio Giancola, Salman Al-Subaihi and Bernard Ghanem
|
||||
ECCV, 2018
|
||||
https://ivul.kaust.edu.sa/Documents/Publications/2018/TrackingNet%20A%20Large%20Scale%20Dataset%20and%20Benchmark%20for%20Object%20Tracking%20in%20the%20Wild.pdf
|
||||
|
||||
Download the dataset using the toolkit https://github.com/SilvioGiancola/TrackingNet-devkit.
|
||||
"""
|
||||
def __init__(self, root=None, image_loader=jpeg4py_loader, set_ids=None, data_fraction=None):
|
||||
"""
|
||||
args:
|
||||
root - The path to the TrackingNet folder, containing the training sets.
|
||||
image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
|
||||
is used by default.
|
||||
set_ids (None) - List containing the ids of the TrackingNet sets to be used for training. If None, all the
|
||||
sets (0 - 11) will be used.
|
||||
data_fraction - Fraction of dataset to be used. The complete dataset is used by default
|
||||
"""
|
||||
root = env_settings().trackingnet_lmdb_dir if root is None else root
|
||||
super().__init__('TrackingNet_lmdb', root, image_loader)
|
||||
|
||||
if set_ids is None:
|
||||
set_ids = [i for i in range(12)]
|
||||
|
||||
self.set_ids = set_ids
|
||||
|
||||
# Keep a list of all videos. Sequence list is a list of tuples (set_id, video_name) containing the set_id and
|
||||
# video_name for each sequence
|
||||
self.sequence_list = list_sequences(self.root)
|
||||
|
||||
if data_fraction is not None:
|
||||
self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list) * data_fraction))
|
||||
|
||||
self.seq_to_class_map, self.seq_per_class = self._load_class_info()
|
||||
|
||||
# we do not have the class_lists for the tracking net
|
||||
self.class_list = list(self.seq_per_class.keys())
|
||||
self.class_list.sort()
|
||||
|
||||
def _load_class_info(self):
|
||||
ltr_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
|
||||
class_map_path = os.path.join(ltr_path, 'data_specs', 'trackingnet_classmap.txt')
|
||||
|
||||
with open(class_map_path, 'r') as f:
|
||||
seq_to_class_map = {seq_class.split('\t')[0]: seq_class.rstrip().split('\t')[1] for seq_class in f}
|
||||
|
||||
seq_per_class = {}
|
||||
for i, seq in enumerate(self.sequence_list):
|
||||
class_name = seq_to_class_map.get(seq[1], 'Unknown')
|
||||
if class_name not in seq_per_class:
|
||||
seq_per_class[class_name] = [i]
|
||||
else:
|
||||
seq_per_class[class_name].append(i)
|
||||
|
||||
return seq_to_class_map, seq_per_class
|
||||
|
||||
def get_name(self):
|
||||
return 'trackingnet_lmdb'
|
||||
|
||||
def has_class_info(self):
|
||||
return True
|
||||
|
||||
def get_sequences_in_class(self, class_name):
|
||||
return self.seq_per_class[class_name]
|
||||
|
||||
def _read_bb_anno(self, seq_id):
|
||||
set_id = self.sequence_list[seq_id][0]
|
||||
vid_name = self.sequence_list[seq_id][1]
|
||||
gt_str_list = decode_str(os.path.join(self.root, "TRAIN_%d_lmdb" % set_id),
|
||||
os.path.join("anno", vid_name + ".txt")).split('\n')[:-1]
|
||||
gt_list = [list(map(float, line.split(','))) for line in gt_str_list]
|
||||
gt_arr = np.array(gt_list).astype(np.float32)
|
||||
return torch.tensor(gt_arr)
|
||||
|
||||
def get_sequence_info(self, seq_id):
|
||||
bbox = self._read_bb_anno(seq_id)
|
||||
|
||||
valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
|
||||
visible = valid.clone().byte()
|
||||
return {'bbox': bbox, 'valid': valid, 'visible': visible}
|
||||
|
||||
def _get_frame(self, seq_id, frame_id):
|
||||
set_id = self.sequence_list[seq_id][0]
|
||||
vid_name = self.sequence_list[seq_id][1]
|
||||
return decode_img(os.path.join(self.root, "TRAIN_%d_lmdb" % set_id),
|
||||
os.path.join("frames", vid_name, str(frame_id) + ".jpg"))
|
||||
|
||||
def _get_class(self, seq_id):
|
||||
seq_name = self.sequence_list[seq_id][1]
|
||||
return self.seq_to_class_map[seq_name]
|
||||
|
||||
def get_class_name(self, seq_id):
|
||||
obj_class = self._get_class(seq_id)
|
||||
|
||||
return obj_class
|
||||
|
||||
def get_frames(self, seq_id, frame_ids, anno=None):
|
||||
frame_list = [self._get_frame(seq_id, f) for f in frame_ids]
|
||||
|
||||
if anno is None:
|
||||
anno = self.get_sequence_info(seq_id)
|
||||
|
||||
anno_frames = {}
|
||||
for key, value in anno.items():
|
||||
anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
|
||||
|
||||
obj_class = self._get_class(seq_id)
|
||||
|
||||
object_meta = OrderedDict({'object_class_name': obj_class,
|
||||
'motion_class': None,
|
||||
'major_class': None,
|
||||
'root_class': None,
|
||||
'motion_adverb': None})
|
||||
|
||||
return frame_list, anno_frames, object_meta
|
Reference in New Issue
Block a user