init commit of samurai

2024-11-19 22:12:54 -08:00
parent f65f4ba181
commit c17e4cecc0
679 changed files with 123982 additions and 0 deletions
--- a/lib/train/dataset/COCO_tool.py
+++ b/lib/train/dataset/COCO_tool.py
@@ -0,0 +1,437 @@
+__author__ = 'tylin'
+__version__ = '2.0'
+# Interface for accessing the Microsoft COCO dataset.
+
+# Microsoft COCO is a large image dataset designed for object detection,
+# segmentation, and caption generation. pycocotools is a Python API that
+# assists in loading, parsing and visualizing the annotations in COCO.
+# Please visit http://mscoco.org/ for more information on COCO, including
+# for the data, paper, and tutorials. The exact format of the annotations
+# is also described on the COCO website. For example usage of the pycocotools
+# please see pycocotools_demo.ipynb. In addition to this API, please download both
+# the COCO images and annotations in order to run the demo.
+
+# An alternative to using the API is to load the annotations directly
+# into Python dictionary
+# Using the API provides additional utility functions. Note that this API
+# supports both *instance* and *caption* annotations. In the case of
+# captions not all functions are defined (e.g. categories are undefined).
+
+# The following API functions are defined:
+#  COCO       - COCO api class that loads COCO annotation file and prepare data structures.
+#  decodeMask - Decode binary mask M encoded via run-length encoding.
+#  encodeMask - Encode binary mask M using run-length encoding.
+#  getAnnIds  - Get ann ids that satisfy given filter conditions.
+#  getCatIds  - Get cat ids that satisfy given filter conditions.
+#  getImgIds  - Get img ids that satisfy given filter conditions.
+#  loadAnns   - Load anns with the specified ids.
+#  loadCats   - Load cats with the specified ids.
+#  loadImgs   - Load imgs with the specified ids.
+#  annToMask  - Convert segmentation in an annotation to binary mask.
+#  showAnns   - Display the specified annotations.
+#  loadRes    - Load algorithm results and create API for accessing them.
+#  download   - Download COCO images from mscoco.org server.
+# Throughout the API "ann"=annotation, "cat"=category, and "img"=image.
+# Help on each functions can be accessed by: "help COCO>function".
+
+# See also COCO>decodeMask,
+# COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds,
+# COCO>getImgIds, COCO>loadAnns, COCO>loadCats,
+# COCO>loadImgs, COCO>annToMask, COCO>showAnns
+
+# Microsoft COCO Toolbox.      version 2.0
+# Data, paper, and tutorials available at:  http://mscoco.org/
+# Code written by Piotr Dollar and Tsung-Yi Lin, 2014.
+# Licensed under the Simplified BSD License [see bsd.txt]
+
+import json
+import time
+import matplotlib.pyplot as plt
+from matplotlib.collections import PatchCollection
+from matplotlib.patches import Polygon
+import numpy as np
+import copy
+import itertools
+from pycocotools import mask as maskUtils
+import os
+from collections import defaultdict
+import sys
+PYTHON_VERSION = sys.version_info[0]
+if PYTHON_VERSION == 2:
+    from urllib import urlretrieve
+elif PYTHON_VERSION == 3:
+    from urllib.request import urlretrieve
+
+
+def _isArrayLike(obj):
+    return hasattr(obj, '__iter__') and hasattr(obj, '__len__')
+
+
+class COCO:
+    def __init__(self, dataset):
+        """
+        Constructor of Microsoft COCO helper class for reading and visualizing annotations.
+        :param annotation_file (str): location of annotation file
+        :param image_folder (str): location to the folder that hosts images.
+        :return:
+        """
+        # load dataset
+        self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict()
+        self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
+        assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset))
+        self.dataset = dataset
+        self.createIndex()
+
+    def createIndex(self):
+        # create index
+        print('creating index...')
+        anns, cats, imgs = {}, {}, {}
+        imgToAnns,catToImgs = defaultdict(list),defaultdict(list)
+        if 'annotations' in self.dataset:
+            for ann in self.dataset['annotations']:
+                imgToAnns[ann['image_id']].append(ann)
+                anns[ann['id']] = ann
+
+        if 'images' in self.dataset:
+            for img in self.dataset['images']:
+                imgs[img['id']] = img
+
+        if 'categories' in self.dataset:
+            for cat in self.dataset['categories']:
+                cats[cat['id']] = cat
+
+        if 'annotations' in self.dataset and 'categories' in self.dataset:
+            for ann in self.dataset['annotations']:
+                catToImgs[ann['category_id']].append(ann['image_id'])
+
+        print('index created!')
+
+        # create class members
+        self.anns = anns
+        self.imgToAnns = imgToAnns
+        self.catToImgs = catToImgs
+        self.imgs = imgs
+        self.cats = cats
+
+    def info(self):
+        """
+        Print information about the annotation file.
+        :return:
+        """
+        for key, value in self.dataset['info'].items():
+            print('{}: {}'.format(key, value))
+
+    def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
+        """
+        Get ann ids that satisfy given filter conditions. default skips that filter
+        :param imgIds  (int array)     : get anns for given imgs
+               catIds  (int array)     : get anns for given cats
+               areaRng (float array)   : get anns for given area range (e.g. [0 inf])
+               iscrowd (boolean)       : get anns for given crowd label (False or True)
+        :return: ids (int array)       : integer array of ann ids
+        """
+        imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
+        catIds = catIds if _isArrayLike(catIds) else [catIds]
+
+        if len(imgIds) == len(catIds) == len(areaRng) == 0:
+            anns = self.dataset['annotations']
+        else:
+            if not len(imgIds) == 0:
+                lists = [self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns]
+                anns = list(itertools.chain.from_iterable(lists))
+            else:
+                anns = self.dataset['annotations']
+            anns = anns if len(catIds)  == 0 else [ann for ann in anns if ann['category_id'] in catIds]
+            anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]]
+        if not iscrowd == None:
+            ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd]
+        else:
+            ids = [ann['id'] for ann in anns]
+        return ids
+
+    def getCatIds(self, catNms=[], supNms=[], catIds=[]):
+        """
+        filtering parameters. default skips that filter.
+        :param catNms (str array)  : get cats for given cat names
+        :param supNms (str array)  : get cats for given supercategory names
+        :param catIds (int array)  : get cats for given cat ids
+        :return: ids (int array)   : integer array of cat ids
+        """
+        catNms = catNms if _isArrayLike(catNms) else [catNms]
+        supNms = supNms if _isArrayLike(supNms) else [supNms]
+        catIds = catIds if _isArrayLike(catIds) else [catIds]
+
+        if len(catNms) == len(supNms) == len(catIds) == 0:
+            cats = self.dataset['categories']
+        else:
+            cats = self.dataset['categories']
+            cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name']          in catNms]
+            cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms]
+            cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id']            in catIds]
+        ids = [cat['id'] for cat in cats]
+        return ids
+
+    def getImgIds(self, imgIds=[], catIds=[]):
+        '''
+        Get img ids that satisfy given filter conditions.
+        :param imgIds (int array) : get imgs for given ids
+        :param catIds (int array) : get imgs with all given cats
+        :return: ids (int array)  : integer array of img ids
+        '''
+        imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
+        catIds = catIds if _isArrayLike(catIds) else [catIds]
+
+        if len(imgIds) == len(catIds) == 0:
+            ids = self.imgs.keys()
+        else:
+            ids = set(imgIds)
+            for i, catId in enumerate(catIds):
+                if i == 0 and len(ids) == 0:
+                    ids = set(self.catToImgs[catId])
+                else:
+                    ids &= set(self.catToImgs[catId])
+        return list(ids)
+
+    def loadAnns(self, ids=[]):
+        """
+        Load anns with the specified ids.
+        :param ids (int array)       : integer ids specifying anns
+        :return: anns (object array) : loaded ann objects
+        """
+        if _isArrayLike(ids):
+            return [self.anns[id] for id in ids]
+        elif type(ids) == int:
+            return [self.anns[ids]]
+
+    def loadCats(self, ids=[]):
+        """
+        Load cats with the specified ids.
+        :param ids (int array)       : integer ids specifying cats
+        :return: cats (object array) : loaded cat objects
+        """
+        if _isArrayLike(ids):
+            return [self.cats[id] for id in ids]
+        elif type(ids) == int:
+            return [self.cats[ids]]
+
+    def loadImgs(self, ids=[]):
+        """
+        Load anns with the specified ids.
+        :param ids (int array)       : integer ids specifying img
+        :return: imgs (object array) : loaded img objects
+        """
+        if _isArrayLike(ids):
+            return [self.imgs[id] for id in ids]
+        elif type(ids) == int:
+            return [self.imgs[ids]]
+
+    def showAnns(self, anns, draw_bbox=False):
+        """
+        Display the specified annotations.
+        :param anns (array of object): annotations to display
+        :return: None
+        """
+        if len(anns) == 0:
+            return 0
+        if 'segmentation' in anns[0] or 'keypoints' in anns[0]:
+            datasetType = 'instances'
+        elif 'caption' in anns[0]:
+            datasetType = 'captions'
+        else:
+            raise Exception('datasetType not supported')
+        if datasetType == 'instances':
+            ax = plt.gca()
+            ax.set_autoscale_on(False)
+            polygons = []
+            color = []
+            for ann in anns:
+                c = (np.random.random((1, 3))*0.6+0.4).tolist()[0]
+                if 'segmentation' in ann:
+                    if type(ann['segmentation']) == list:
+                        # polygon
+                        for seg in ann['segmentation']:
+                            poly = np.array(seg).reshape((int(len(seg)/2), 2))
+                            polygons.append(Polygon(poly))
+                            color.append(c)
+                    else:
+                        # mask
+                        t = self.imgs[ann['image_id']]
+                        if type(ann['segmentation']['counts']) == list:
+                            rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width'])
+                        else:
+                            rle = [ann['segmentation']]
+                        m = maskUtils.decode(rle)
+                        img = np.ones( (m.shape[0], m.shape[1], 3) )
+                        if ann['iscrowd'] == 1:
+                            color_mask = np.array([2.0,166.0,101.0])/255
+                        if ann['iscrowd'] == 0:
+                            color_mask = np.random.random((1, 3)).tolist()[0]
+                        for i in range(3):
+                            img[:,:,i] = color_mask[i]
+                        ax.imshow(np.dstack( (img, m*0.5) ))
+                if 'keypoints' in ann and type(ann['keypoints']) == list:
+                    # turn skeleton into zero-based index
+                    sks = np.array(self.loadCats(ann['category_id'])[0]['skeleton'])-1
+                    kp = np.array(ann['keypoints'])
+                    x = kp[0::3]
+                    y = kp[1::3]
+                    v = kp[2::3]
+                    for sk in sks:
+                        if np.all(v[sk]>0):
+                            plt.plot(x[sk],y[sk], linewidth=3, color=c)
+                    plt.plot(x[v>0], y[v>0],'o',markersize=8, markerfacecolor=c, markeredgecolor='k',markeredgewidth=2)
+                    plt.plot(x[v>1], y[v>1],'o',markersize=8, markerfacecolor=c, markeredgecolor=c, markeredgewidth=2)
+
+                if draw_bbox:
+                    [bbox_x, bbox_y, bbox_w, bbox_h] = ann['bbox']
+                    poly = [[bbox_x, bbox_y], [bbox_x, bbox_y+bbox_h], [bbox_x+bbox_w, bbox_y+bbox_h], [bbox_x+bbox_w, bbox_y]]
+                    np_poly = np.array(poly).reshape((4,2))
+                    polygons.append(Polygon(np_poly))
+                    color.append(c)
+
+            p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4)
+            ax.add_collection(p)
+            p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2)
+            ax.add_collection(p)
+        elif datasetType == 'captions':
+            for ann in anns:
+                print(ann['caption'])
+
+    def loadRes(self, resFile):
+        """
+        Load result file and return a result api object.
+        :param   resFile (str)     : file name of result file
+        :return: res (obj)         : result api object
+        """
+        res = COCO()
+        res.dataset['images'] = [img for img in self.dataset['images']]
+
+        print('Loading and preparing results...')
+        tic = time.time()
+        if type(resFile) == str or (PYTHON_VERSION == 2 and type(resFile) == unicode):
+            with open(resFile) as f:
+                anns = json.load(f)
+        elif type(resFile) == np.ndarray:
+            anns = self.loadNumpyAnnotations(resFile)
+        else:
+            anns = resFile
+        assert type(anns) == list, 'results in not an array of objects'
+        annsImgIds = [ann['image_id'] for ann in anns]
+        assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
+               'Results do not correspond to current coco set'
+        if 'caption' in anns[0]:
+            imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
+            res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
+            for id, ann in enumerate(anns):
+                ann['id'] = id+1
+        elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
+            res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
+            for id, ann in enumerate(anns):
+                bb = ann['bbox']
+                x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]]
+                if not 'segmentation' in ann:
+                    ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
+                ann['area'] = bb[2]*bb[3]
+                ann['id'] = id+1
+                ann['iscrowd'] = 0
+        elif 'segmentation' in anns[0]:
+            res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
+            for id, ann in enumerate(anns):
+                # now only support compressed RLE format as segmentation results
+                ann['area'] = maskUtils.area(ann['segmentation'])
+                if not 'bbox' in ann:
+                    ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
+                ann['id'] = id+1
+                ann['iscrowd'] = 0
+        elif 'keypoints' in anns[0]:
+            res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
+            for id, ann in enumerate(anns):
+                s = ann['keypoints']
+                x = s[0::3]
+                y = s[1::3]
+                x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y)
+                ann['area'] = (x1-x0)*(y1-y0)
+                ann['id'] = id + 1
+                ann['bbox'] = [x0,y0,x1-x0,y1-y0]
+        print('DONE (t={:0.2f}s)'.format(time.time()- tic))
+
+        res.dataset['annotations'] = anns
+        res.createIndex()
+        return res
+
+    def download(self, tarDir = None, imgIds = [] ):
+        '''
+        Download COCO images from mscoco.org server.
+        :param tarDir (str): COCO results directory name
+               imgIds (list): images to be downloaded
+        :return:
+        '''
+        if tarDir is None:
+            print('Please specify target directory')
+            return -1
+        if len(imgIds) == 0:
+            imgs = self.imgs.values()
+        else:
+            imgs = self.loadImgs(imgIds)
+        N = len(imgs)
+        if not os.path.exists(tarDir):
+            os.makedirs(tarDir)
+        for i, img in enumerate(imgs):
+            tic = time.time()
+            fname = os.path.join(tarDir, img['file_name'])
+            if not os.path.exists(fname):
+                urlretrieve(img['coco_url'], fname)
+            print('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic))
+
+    def loadNumpyAnnotations(self, data):
+        """
+        Convert result data from a numpy array [Nx7] where each row contains {imageID,x1,y1,w,h,score,class}
+        :param  data (numpy.ndarray)
+        :return: annotations (python nested list)
+        """
+        print('Converting ndarray to lists...')
+        assert(type(data) == np.ndarray)
+        print(data.shape)
+        assert(data.shape[1] == 7)
+        N = data.shape[0]
+        ann = []
+        for i in range(N):
+            if i % 1000000 == 0:
+                print('{}/{}'.format(i,N))
+            ann += [{
+                'image_id'  : int(data[i, 0]),
+                'bbox'  : [ data[i, 1], data[i, 2], data[i, 3], data[i, 4] ],
+                'score' : data[i, 5],
+                'category_id': int(data[i, 6]),
+                }]
+        return ann
+
+    def annToRLE(self, ann):
+        """
+        Convert annotation which can be polygons, uncompressed RLE to RLE.
+        :return: binary mask (numpy 2D array)
+        """
+        t = self.imgs[ann['image_id']]
+        h, w = t['height'], t['width']
+        segm = ann['segmentation']
+        if type(segm) == list:
+            # polygon -- a single object might consist of multiple parts
+            # we merge all parts into one mask rle code
+            rles = maskUtils.frPyObjects(segm, h, w)
+            rle = maskUtils.merge(rles)
+        elif type(segm['counts']) == list:
+            # uncompressed RLE
+            rle = maskUtils.frPyObjects(segm, h, w)
+        else:
+            # rle
+            rle = ann['segmentation']
+        return rle
+
+    def annToMask(self, ann):
+        """
+        Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
+        :return: binary mask (numpy 2D array)
+        """
+        rle = self.annToRLE(ann)
+        m = maskUtils.decode(rle)
+        return m
--- a/lib/train/dataset/init.py
+++ b/lib/train/dataset/init.py
@@ -0,0 +1,11 @@
+from .lasot import Lasot
+from .got10k import Got10k
+from .tracking_net import TrackingNet
+from .imagenetvid import ImagenetVID
+from .coco import MSCOCO
+from .coco_seq import MSCOCOSeq
+from .got10k_lmdb import Got10k_lmdb
+from .lasot_lmdb import Lasot_lmdb
+from .imagenetvid_lmdb import ImagenetVID_lmdb
+from .coco_seq_lmdb import MSCOCOSeq_lmdb
+from .tracking_net_lmdb import TrackingNet_lmdb
--- a/lib/train/dataset/base_image_dataset.py
+++ b/lib/train/dataset/base_image_dataset.py
@@ -0,0 +1,92 @@
+import torch.utils.data
+from lib.train.data.image_loader import jpeg4py_loader
+
+
+class BaseImageDataset(torch.utils.data.Dataset):
+    """ Base class for image datasets """
+
+    def __init__(self, name, root, image_loader=jpeg4py_loader):
+        """
+        args:
+            root - The root path to the dataset
+            image_loader (jpeg4py_loader) -  The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
+                                            is used by default.
+        """
+        self.name = name
+        self.root = root
+        self.image_loader = image_loader
+
+        self.image_list = []     # Contains the list of sequences.
+        self.class_list = []
+
+    def __len__(self):
+        """ Returns size of the dataset
+        returns:
+            int - number of samples in the dataset
+        """
+        return self.get_num_images()
+
+    def __getitem__(self, index):
+        """ Not to be used! Check get_frames() instead.
+        """
+        return None
+
+    def get_name(self):
+        """ Name of the dataset
+
+        returns:
+            string - Name of the dataset
+        """
+        raise NotImplementedError
+
+    def get_num_images(self):
+        """ Number of sequences in a dataset
+
+        returns:
+            int - number of sequences in the dataset."""
+        return len(self.image_list)
+
+    def has_class_info(self):
+        return False
+
+    def get_class_name(self, image_id):
+        return None
+
+    def get_num_classes(self):
+        return len(self.class_list)
+
+    def get_class_list(self):
+        return self.class_list
+
+    def get_images_in_class(self, class_name):
+        raise NotImplementedError
+
+    def has_segmentation_info(self):
+        return False
+
+    def get_image_info(self, seq_id):
+        """ Returns information about a particular image,
+
+        args:
+            seq_id - index of the image
+
+        returns:
+            Dict
+            """
+        raise NotImplementedError
+
+    def get_image(self, image_id, anno=None):
+        """ Get a image
+
+        args:
+            image_id      - index of image
+            anno(None)  - The annotation for the sequence (see get_sequence_info). If None, they will be loaded.
+
+        returns:
+            image -
+            anno -
+            dict - A dict containing meta information about the sequence, e.g. class of the target object.
+
+        """
+        raise NotImplementedError
+
--- a/lib/train/dataset/base_video_dataset.py
+++ b/lib/train/dataset/base_video_dataset.py
@@ -0,0 +1,110 @@
+import torch.utils.data
+# 2021.1.5 use jpeg4py_loader_w_failsafe as default
+from lib.train.data.image_loader import jpeg4py_loader_w_failsafe
+
+
+class BaseVideoDataset(torch.utils.data.Dataset):
+    """ Base class for video datasets """
+
+    def __init__(self, name, root, image_loader=jpeg4py_loader_w_failsafe):
+        """
+        args:
+            root - The root path to the dataset
+            image_loader (jpeg4py_loader) -  The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
+                                            is used by default.
+        """
+        self.name = name
+        self.root = root
+        self.image_loader = image_loader
+
+        self.sequence_list = []     # Contains the list of sequences.
+        self.class_list = []
+
+    def __len__(self):
+        """ Returns size of the dataset
+        returns:
+            int - number of samples in the dataset
+        """
+        return self.get_num_sequences()
+
+    def __getitem__(self, index):
+        """ Not to be used! Check get_frames() instead.
+        """
+        return None
+
+    def is_video_sequence(self):
+        """ Returns whether the dataset is a video dataset or an image dataset
+
+        returns:
+            bool - True if a video dataset
+        """
+        return True
+
+    def is_synthetic_video_dataset(self):
+        """ Returns whether the dataset contains real videos or synthetic
+
+        returns:
+            bool - True if a video dataset
+        """
+        return False
+
+    def get_name(self):
+        """ Name of the dataset
+
+        returns:
+            string - Name of the dataset
+        """
+        raise NotImplementedError
+
+    def get_num_sequences(self):
+        """ Number of sequences in a dataset
+
+        returns:
+            int - number of sequences in the dataset."""
+        return len(self.sequence_list)
+
+    def has_class_info(self):
+        return False
+
+    def has_occlusion_info(self):
+        return False
+
+    def get_num_classes(self):
+        return len(self.class_list)
+
+    def get_class_list(self):
+        return self.class_list
+
+    def get_sequences_in_class(self, class_name):
+        raise NotImplementedError
+
+    def has_segmentation_info(self):
+        return False
+
+    def get_sequence_info(self, seq_id):
+        """ Returns information about a particular sequences,
+
+        args:
+            seq_id - index of the sequence
+
+        returns:
+            Dict
+            """
+        raise NotImplementedError
+
+    def get_frames(self, seq_id, frame_ids, anno=None):
+        """ Get a set of frames from a particular sequence
+
+        args:
+            seq_id      - index of sequence
+            frame_ids   - a list of frame numbers
+            anno(None)  - The annotation for the sequence (see get_sequence_info). If None, they will be loaded.
+
+        returns:
+            list - List of frames corresponding to frame_ids
+            list - List of dicts for each frame
+            dict - A dict containing meta information about the sequence, e.g. class of the target object.
+
+        """
+        raise NotImplementedError
+
--- a/lib/train/dataset/coco.py
+++ b/lib/train/dataset/coco.py
@@ -0,0 +1,156 @@
+import os
+from .base_image_dataset import BaseImageDataset
+import torch
+import random
+from collections import OrderedDict
+from lib.train.data import jpeg4py_loader
+from lib.train.admin import env_settings
+from pycocotools.coco import COCO
+
+
+class MSCOCO(BaseImageDataset):
+    """ The COCO object detection dataset.
+
+    Publication:
+        Microsoft COCO: Common Objects in Context.
+        Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B. Girshick, James Hays, Pietro Perona,
+        Deva Ramanan, Piotr Dollar and C. Lawrence Zitnick
+        ECCV, 2014
+        https://arxiv.org/pdf/1405.0312.pdf
+
+    Download the images along with annotations from http://cocodataset.org/#download. The root folder should be
+    organized as follows.
+        - coco_root
+            - annotations
+                - instances_train2014.json
+                - instances_train2017.json
+            - images
+                - train2014
+                - train2017
+
+    Note: You also have to install the coco pythonAPI from https://github.com/cocodataset/cocoapi.
+    """
+
+    def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, min_area=None,
+                 split="train", version="2014"):
+        """
+        args:
+            root - path to coco root folder
+            image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
+                                            is used by default.
+            data_fraction - Fraction of dataset to be used. The complete dataset is used by default
+            min_area - Objects with area less than min_area are filtered out. Default is 0.0
+            split - 'train' or 'val'.
+            version - version of coco dataset (2014 or 2017)
+        """
+
+        root = env_settings().coco_dir if root is None else root
+        super().__init__('COCO', root, image_loader)
+
+        self.img_pth = os.path.join(root, 'images/{}{}/'.format(split, version))
+        self.anno_path = os.path.join(root, 'annotations/instances_{}{}.json'.format(split, version))
+
+        self.coco_set = COCO(self.anno_path)
+
+        self.cats = self.coco_set.cats
+
+        self.class_list = self.get_class_list()  # the parent class thing would happen in the sampler
+
+        self.image_list = self._get_image_list(min_area=min_area)
+
+        if data_fraction is not None:
+            self.image_list = random.sample(self.image_list, int(len(self.image_list) * data_fraction))
+        self.im_per_class = self._build_im_per_class()
+
+    def _get_image_list(self, min_area=None):
+        ann_list = list(self.coco_set.anns.keys())
+        image_list = [a for a in ann_list if self.coco_set.anns[a]['iscrowd'] == 0]
+
+        if min_area is not None:
+            image_list = [a for a in image_list if self.coco_set.anns[a]['area'] > min_area]
+
+        return image_list
+
+    def get_num_classes(self):
+        return len(self.class_list)
+
+    def get_name(self):
+        return 'coco'
+
+    def has_class_info(self):
+        return True
+
+    def has_segmentation_info(self):
+        return True
+
+    def get_class_list(self):
+        class_list = []
+        for cat_id in self.cats.keys():
+            class_list.append(self.cats[cat_id]['name'])
+        return class_list
+
+    def _build_im_per_class(self):
+        im_per_class = {}
+        for i, im in enumerate(self.image_list):
+            class_name = self.cats[self.coco_set.anns[im]['category_id']]['name']
+            if class_name not in im_per_class:
+                im_per_class[class_name] = [i]
+            else:
+                im_per_class[class_name].append(i)
+
+        return im_per_class
+
+    def get_images_in_class(self, class_name):
+        return self.im_per_class[class_name]
+
+    def get_image_info(self, im_id):
+        anno = self._get_anno(im_id)
+
+        bbox = torch.Tensor(anno['bbox']).view(4,)
+
+        mask = torch.Tensor(self.coco_set.annToMask(anno))
+
+        valid = (bbox[2] > 0) & (bbox[3] > 0)
+        visible = valid.clone().byte()
+
+        return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible}
+
+    def _get_anno(self, im_id):
+        anno = self.coco_set.anns[self.image_list[im_id]]
+
+        return anno
+
+    def _get_image(self, im_id):
+        path = self.coco_set.loadImgs([self.coco_set.anns[self.image_list[im_id]]['image_id']])[0]['file_name']
+        img = self.image_loader(os.path.join(self.img_pth, path))
+        return img
+
+    def get_meta_info(self, im_id):
+        try:
+            cat_dict_current = self.cats[self.coco_set.anns[self.image_list[im_id]]['category_id']]
+            object_meta = OrderedDict({'object_class_name': cat_dict_current['name'],
+                                       'motion_class': None,
+                                       'major_class': cat_dict_current['supercategory'],
+                                       'root_class': None,
+                                       'motion_adverb': None})
+        except:
+            object_meta = OrderedDict({'object_class_name': None,
+                                       'motion_class': None,
+                                       'major_class': None,
+                                       'root_class': None,
+                                       'motion_adverb': None})
+        return object_meta
+
+    def get_class_name(self, im_id):
+        cat_dict_current = self.cats[self.coco_set.anns[self.image_list[im_id]]['category_id']]
+        return cat_dict_current['name']
+
+    def get_image(self, image_id, anno=None):
+        frame = self._get_image(image_id)
+
+        if anno is None:
+            anno = self.get_image_info(image_id)
+
+        object_meta = self.get_meta_info(image_id)
+
+        return frame, anno, object_meta
--- a/lib/train/dataset/coco_seq.py
+++ b/lib/train/dataset/coco_seq.py
@@ -0,0 +1,170 @@
+import os
+from .base_video_dataset import BaseVideoDataset
+from lib.train.data import jpeg4py_loader
+import torch
+import random
+from pycocotools.coco import COCO
+from collections import OrderedDict
+from lib.train.admin import env_settings
+
+
+class MSCOCOSeq(BaseVideoDataset):
+    """ The COCO dataset. COCO is an image dataset. Thus, we treat each image as a sequence of length 1.
+
+    Publication:
+        Microsoft COCO: Common Objects in Context.
+        Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B. Girshick, James Hays, Pietro Perona,
+        Deva Ramanan, Piotr Dollar and C. Lawrence Zitnick
+        ECCV, 2014
+        https://arxiv.org/pdf/1405.0312.pdf
+
+    Download the images along with annotations from http://cocodataset.org/#download. The root folder should be
+    organized as follows.
+        - coco_root
+            - annotations
+                - instances_train2014.json
+                - instances_train2017.json
+            - images
+                - train2014
+                - train2017
+
+    Note: You also have to install the coco pythonAPI from https://github.com/cocodataset/cocoapi.
+    """
+
+    def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, split="train", version="2014"):
+        """
+        args:
+            root - path to the coco dataset.
+            image_loader (default_image_loader) -  The function to read the images. If installed,
+                                                   jpeg4py (https://github.com/ajkxyz/jpeg4py) is used by default. Else,
+                                                   opencv's imread is used.
+            data_fraction (None) - Fraction of images to be used. The images are selected randomly. If None, all the
+                                  images  will be used
+            split - 'train' or 'val'.
+            version - version of coco dataset (2014 or 2017)
+        """
+        root = env_settings().coco_dir if root is None else root
+        super().__init__('COCO', root, image_loader)
+
+        self.img_pth = os.path.join(root, 'images/{}{}/'.format(split, version))
+        self.anno_path = os.path.join(root, 'annotations/instances_{}{}.json'.format(split, version))
+
+        # Load the COCO set.
+        self.coco_set = COCO(self.anno_path)
+
+        self.cats = self.coco_set.cats
+
+        self.class_list = self.get_class_list()
+
+        self.sequence_list = self._get_sequence_list()
+
+        if data_fraction is not None:
+            self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list)*data_fraction))
+        self.seq_per_class = self._build_seq_per_class()
+
+    def _get_sequence_list(self):
+        ann_list = list(self.coco_set.anns.keys())
+        seq_list = [a for a in ann_list if self.coco_set.anns[a]['iscrowd'] == 0]
+
+        return seq_list
+
+    def is_video_sequence(self):
+        return False
+
+    def get_num_classes(self):
+        return len(self.class_list)
+
+    def get_name(self):
+        return 'coco'
+
+    def has_class_info(self):
+        return True
+
+    def get_class_list(self):
+        class_list = []
+        for cat_id in self.cats.keys():
+            class_list.append(self.cats[cat_id]['name'])
+        return class_list
+
+    def has_segmentation_info(self):
+        return True
+
+    def get_num_sequences(self):
+        return len(self.sequence_list)
+
+    def _build_seq_per_class(self):
+        seq_per_class = {}
+        for i, seq in enumerate(self.sequence_list):
+            class_name = self.cats[self.coco_set.anns[seq]['category_id']]['name']
+            if class_name not in seq_per_class:
+                seq_per_class[class_name] = [i]
+            else:
+                seq_per_class[class_name].append(i)
+
+        return seq_per_class
+
+    def get_sequences_in_class(self, class_name):
+        return self.seq_per_class[class_name]
+
+    def get_sequence_info(self, seq_id):
+        anno = self._get_anno(seq_id)
+
+        bbox = torch.Tensor(anno['bbox']).view(1, 4)
+
+        mask = torch.Tensor(self.coco_set.annToMask(anno)).unsqueeze(dim=0)
+
+        '''2021.1.3 To avoid too small bounding boxes. Here we change the threshold to 50 pixels'''
+        valid = (bbox[:, 2] > 50) & (bbox[:, 3] > 50)
+
+        visible = valid.clone().byte()
+
+        return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible}
+
+    def _get_anno(self, seq_id):
+        anno = self.coco_set.anns[self.sequence_list[seq_id]]
+
+        return anno
+
+    def _get_frames(self, seq_id):
+        path = self.coco_set.loadImgs([self.coco_set.anns[self.sequence_list[seq_id]]['image_id']])[0]['file_name']
+        img = self.image_loader(os.path.join(self.img_pth, path))
+        return img
+
+    def get_meta_info(self, seq_id):
+        try:
+            cat_dict_current = self.cats[self.coco_set.anns[self.sequence_list[seq_id]]['category_id']]
+            object_meta = OrderedDict({'object_class_name': cat_dict_current['name'],
+                                       'motion_class': None,
+                                       'major_class': cat_dict_current['supercategory'],
+                                       'root_class': None,
+                                       'motion_adverb': None})
+        except:
+            object_meta = OrderedDict({'object_class_name': None,
+                                       'motion_class': None,
+                                       'major_class': None,
+                                       'root_class': None,
+                                       'motion_adverb': None})
+        return object_meta
+
+
+    def get_class_name(self, seq_id):
+        cat_dict_current = self.cats[self.coco_set.anns[self.sequence_list[seq_id]]['category_id']]
+        return cat_dict_current['name']
+
+    def get_frames(self, seq_id=None, frame_ids=None, anno=None):
+        # COCO is an image dataset. Thus we replicate the image denoted by seq_id len(frame_ids) times, and return a
+        # list containing these replicated images.
+        frame = self._get_frames(seq_id)
+
+        frame_list = [frame.copy() for _ in frame_ids]
+
+        if anno is None:
+            anno = self.get_sequence_info(seq_id)
+
+        anno_frames = {}
+        for key, value in anno.items():
+            anno_frames[key] = [value[0, ...] for _ in frame_ids]
+
+        object_meta = self.get_meta_info(seq_id)
+
+        return frame_list, anno_frames, object_meta
--- a/lib/train/dataset/coco_seq_lmdb.py
+++ b/lib/train/dataset/coco_seq_lmdb.py
@@ -0,0 +1,177 @@
+import os
+from .base_video_dataset import BaseVideoDataset
+from lib.train.data import jpeg4py_loader
+import torch
+import random
+from collections import OrderedDict
+from lib.train.admin import env_settings
+from lib.train.dataset.COCO_tool import COCO
+from lib.utils.lmdb_utils import decode_img, decode_json
+import time
+
+class MSCOCOSeq_lmdb(BaseVideoDataset):
+    """ The COCO dataset. COCO is an image dataset. Thus, we treat each image as a sequence of length 1.
+
+    Publication:
+        Microsoft COCO: Common Objects in Context.
+        Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B. Girshick, James Hays, Pietro Perona,
+        Deva Ramanan, Piotr Dollar and C. Lawrence Zitnick
+        ECCV, 2014
+        https://arxiv.org/pdf/1405.0312.pdf
+
+    Download the images along with annotations from http://cocodataset.org/#download. The root folder should be
+    organized as follows.
+        - coco_root
+            - annotations
+                - instances_train2014.json
+                - instances_train2017.json
+            - images
+                - train2014
+                - train2017
+
+    Note: You also have to install the coco pythonAPI from https://github.com/cocodataset/cocoapi.
+    """
+
+    def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, split="train", version="2014"):
+        """
+        args:
+            root - path to the coco dataset.
+            image_loader (default_image_loader) -  The function to read the images. If installed,
+                                                   jpeg4py (https://github.com/ajkxyz/jpeg4py) is used by default. Else,
+                                                   opencv's imread is used.
+            data_fraction (None) - Fraction of images to be used. The images are selected randomly. If None, all the
+                                  images  will be used
+            split - 'train' or 'val'.
+            version - version of coco dataset (2014 or 2017)
+        """
+        root = env_settings().coco_dir if root is None else root
+        super().__init__('COCO_lmdb', root, image_loader)
+        self.root = root
+        self.img_pth = 'images/{}{}/'.format(split, version)
+        self.anno_path = 'annotations/instances_{}{}.json'.format(split, version)
+
+        # Load the COCO set.
+        print('loading annotations into memory...')
+        tic = time.time()
+        coco_json = decode_json(root, self.anno_path)
+        print('Done (t={:0.2f}s)'.format(time.time() - tic))
+
+        self.coco_set = COCO(coco_json)
+
+        self.cats = self.coco_set.cats
+
+        self.class_list = self.get_class_list()
+
+        self.sequence_list = self._get_sequence_list()
+
+        if data_fraction is not None:
+            self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list)*data_fraction))
+        self.seq_per_class = self._build_seq_per_class()
+
+    def _get_sequence_list(self):
+        ann_list = list(self.coco_set.anns.keys())
+        seq_list = [a for a in ann_list if self.coco_set.anns[a]['iscrowd'] == 0]
+
+        return seq_list
+
+    def is_video_sequence(self):
+        return False
+
+    def get_num_classes(self):
+        return len(self.class_list)
+
+    def get_name(self):
+        return 'coco_lmdb'
+
+    def has_class_info(self):
+        return True
+
+    def get_class_list(self):
+        class_list = []
+        for cat_id in self.cats.keys():
+            class_list.append(self.cats[cat_id]['name'])
+        return class_list
+
+    def has_segmentation_info(self):
+        return True
+
+    def get_num_sequences(self):
+        return len(self.sequence_list)
+
+    def _build_seq_per_class(self):
+        seq_per_class = {}
+        for i, seq in enumerate(self.sequence_list):
+            class_name = self.cats[self.coco_set.anns[seq]['category_id']]['name']
+            if class_name not in seq_per_class:
+                seq_per_class[class_name] = [i]
+            else:
+                seq_per_class[class_name].append(i)
+
+        return seq_per_class
+
+    def get_sequences_in_class(self, class_name):
+        return self.seq_per_class[class_name]
+
+    def get_sequence_info(self, seq_id):
+        anno = self._get_anno(seq_id)
+
+        bbox = torch.Tensor(anno['bbox']).view(1, 4)
+
+        mask = torch.Tensor(self.coco_set.annToMask(anno)).unsqueeze(dim=0)
+
+        '''2021.1.3 To avoid too small bounding boxes. Here we change the threshold to 50 pixels'''
+        valid = (bbox[:, 2] > 50) & (bbox[:, 3] > 50)
+
+        visible = valid.clone().byte()
+
+        return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible}
+
+    def _get_anno(self, seq_id):
+        anno = self.coco_set.anns[self.sequence_list[seq_id]]
+
+        return anno
+
+    def _get_frames(self, seq_id):
+        path = self.coco_set.loadImgs([self.coco_set.anns[self.sequence_list[seq_id]]['image_id']])[0]['file_name']
+        # img = self.image_loader(os.path.join(self.img_pth, path))
+        img = decode_img(self.root, os.path.join(self.img_pth, path))
+        return img
+
+    def get_meta_info(self, seq_id):
+        try:
+            cat_dict_current = self.cats[self.coco_set.anns[self.sequence_list[seq_id]]['category_id']]
+            object_meta = OrderedDict({'object_class_name': cat_dict_current['name'],
+                                       'motion_class': None,
+                                       'major_class': cat_dict_current['supercategory'],
+                                       'root_class': None,
+                                       'motion_adverb': None})
+        except:
+            object_meta = OrderedDict({'object_class_name': None,
+                                       'motion_class': None,
+                                       'major_class': None,
+                                       'root_class': None,
+                                       'motion_adverb': None})
+        return object_meta
+
+
+    def get_class_name(self, seq_id):
+        cat_dict_current = self.cats[self.coco_set.anns[self.sequence_list[seq_id]]['category_id']]
+        return cat_dict_current['name']
+
+    def get_frames(self, seq_id=None, frame_ids=None, anno=None):
+        # COCO is an image dataset. Thus we replicate the image denoted by seq_id len(frame_ids) times, and return a
+        # list containing these replicated images.
+        frame = self._get_frames(seq_id)
+
+        frame_list = [frame.copy() for _ in frame_ids]
+
+        if anno is None:
+            anno = self.get_sequence_info(seq_id)
+
+        anno_frames = {}
+        for key, value in anno.items():
+            anno_frames[key] = [value[0, ...] for _ in frame_ids]
+
+        object_meta = self.get_meta_info(seq_id)
+
+        return frame_list, anno_frames, object_meta
--- a/lib/train/dataset/got10k.py
+++ b/lib/train/dataset/got10k.py
@@ -0,0 +1,186 @@
+import os
+import os.path
+import numpy as np
+import torch
+import csv
+import pandas
+import random
+from collections import OrderedDict
+from .base_video_dataset import BaseVideoDataset
+from lib.train.data import jpeg4py_loader
+from lib.train.admin import env_settings
+
+
+class Got10k(BaseVideoDataset):
+    """ GOT-10k dataset.
+
+    Publication:
+        GOT-10k: A Large High-Diversity Benchmark for Generic Object Tracking in the Wild
+        Lianghua Huang, Xin Zhao, and Kaiqi Huang
+        arXiv:1810.11981, 2018
+        https://arxiv.org/pdf/1810.11981.pdf
+
+    Download dataset from http://got-10k.aitestunion.com/downloads
+    """
+
+    def __init__(self, root=None, image_loader=jpeg4py_loader, split=None, seq_ids=None, data_fraction=None):
+        """
+        args:
+            root - path to the got-10k training data. Note: This should point to the 'train' folder inside GOT-10k
+            image_loader (jpeg4py_loader) -  The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
+                                            is used by default.
+            split - 'train' or 'val'. Note: The validation split here is a subset of the official got-10k train split,
+                    not NOT the official got-10k validation split. To use the official validation split, provide that as
+                    the root folder instead.
+            seq_ids - List containing the ids of the videos to be used for training. Note: Only one of 'split' or 'seq_ids'
+                        options can be used at the same time.
+            data_fraction - Fraction of dataset to be used. The complete dataset is used by default
+        """
+        root = env_settings().got10k_dir if root is None else root
+        super().__init__('GOT10k', root, image_loader)
+
+        # all folders inside the root
+        self.sequence_list = self._get_sequence_list()
+
+        # seq_id is the index of the folder inside the got10k root path
+        if split is not None:
+            if seq_ids is not None:
+                raise ValueError('Cannot set both split_name and seq_ids.')
+            ltr_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
+            if split == 'train':
+                file_path = os.path.join(ltr_path, 'data_specs', 'got10k_train_split.txt')
+            elif split == 'val':
+                file_path = os.path.join(ltr_path, 'data_specs', 'got10k_val_split.txt')
+            elif split == 'train_full':
+                file_path = os.path.join(ltr_path, 'data_specs', 'got10k_train_full_split.txt')
+            elif split == 'vottrain':
+                file_path = os.path.join(ltr_path, 'data_specs', 'got10k_vot_train_split.txt')
+            elif split == 'votval':
+                file_path = os.path.join(ltr_path, 'data_specs', 'got10k_vot_val_split.txt')
+            else:
+                raise ValueError('Unknown split name.')
+            # seq_ids = pandas.read_csv(file_path, header=None, squeeze=True, dtype=np.int64).values.tolist()
+            seq_ids = pandas.read_csv(file_path, header=None, dtype=np.int64).squeeze("columns").values.tolist()
+        elif seq_ids is None:
+            seq_ids = list(range(0, len(self.sequence_list)))
+
+        self.sequence_list = [self.sequence_list[i] for i in seq_ids]
+
+        if data_fraction is not None:
+            self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list)*data_fraction))
+
+        self.sequence_meta_info = self._load_meta_info()
+        self.seq_per_class = self._build_seq_per_class()
+
+        self.class_list = list(self.seq_per_class.keys())
+        self.class_list.sort()
+
+    def get_name(self):
+        return 'got10k'
+
+    def has_class_info(self):
+        return True
+
+    def has_occlusion_info(self):
+        return True
+
+    def _load_meta_info(self):
+        sequence_meta_info = {s: self._read_meta(os.path.join(self.root, s)) for s in self.sequence_list}
+        return sequence_meta_info
+
+    def _read_meta(self, seq_path):
+        try:
+            with open(os.path.join(seq_path, 'meta_info.ini')) as f:
+                meta_info = f.readlines()
+            object_meta = OrderedDict({'object_class_name': meta_info[5].split(': ')[-1][:-1],
+                                       'motion_class': meta_info[6].split(': ')[-1][:-1],
+                                       'major_class': meta_info[7].split(': ')[-1][:-1],
+                                       'root_class': meta_info[8].split(': ')[-1][:-1],
+                                       'motion_adverb': meta_info[9].split(': ')[-1][:-1]})
+        except:
+            object_meta = OrderedDict({'object_class_name': None,
+                                       'motion_class': None,
+                                       'major_class': None,
+                                       'root_class': None,
+                                       'motion_adverb': None})
+        return object_meta
+
+    def _build_seq_per_class(self):
+        seq_per_class = {}
+
+        for i, s in enumerate(self.sequence_list):
+            object_class = self.sequence_meta_info[s]['object_class_name']
+            if object_class in seq_per_class:
+                seq_per_class[object_class].append(i)
+            else:
+                seq_per_class[object_class] = [i]
+
+        return seq_per_class
+
+    def get_sequences_in_class(self, class_name):
+        return self.seq_per_class[class_name]
+
+    def _get_sequence_list(self):
+        with open(os.path.join(self.root, 'list.txt')) as f:
+            dir_list = list(csv.reader(f))
+        dir_list = [dir_name[0] for dir_name in dir_list]
+        return dir_list
+
+    def _read_bb_anno(self, seq_path):
+        bb_anno_file = os.path.join(seq_path, "groundtruth.txt")
+        gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False, low_memory=False).values
+        return torch.tensor(gt)
+
+    def _read_target_visible(self, seq_path):
+        # Read full occlusion and out_of_view
+        occlusion_file = os.path.join(seq_path, "absence.label")
+        cover_file = os.path.join(seq_path, "cover.label")
+
+        with open(occlusion_file, 'r', newline='') as f:
+            occlusion = torch.ByteTensor([int(v[0]) for v in csv.reader(f)])
+        with open(cover_file, 'r', newline='') as f:
+            cover = torch.ByteTensor([int(v[0]) for v in csv.reader(f)])
+
+        target_visible = ~occlusion & (cover>0).byte()
+
+        visible_ratio = cover.float() / 8
+        return target_visible, visible_ratio
+
+    def _get_sequence_path(self, seq_id):
+        return os.path.join(self.root, self.sequence_list[seq_id])
+
+    def get_sequence_info(self, seq_id):
+        seq_path = self._get_sequence_path(seq_id)
+        bbox = self._read_bb_anno(seq_path)
+
+        valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
+        visible, visible_ratio = self._read_target_visible(seq_path)
+        visible = visible & valid.byte()
+
+        return {'bbox': bbox, 'valid': valid, 'visible': visible, 'visible_ratio': visible_ratio}
+
+    def _get_frame_path(self, seq_path, frame_id):
+        return os.path.join(seq_path, '{:08}.jpg'.format(frame_id+1))    # frames start from 1
+
+    def _get_frame(self, seq_path, frame_id):
+        return self.image_loader(self._get_frame_path(seq_path, frame_id))
+
+    def get_class_name(self, seq_id):
+        obj_meta = self.sequence_meta_info[self.sequence_list[seq_id]]
+
+        return obj_meta['object_class_name']
+
+    def get_frames(self, seq_id, frame_ids, anno=None):
+        seq_path = self._get_sequence_path(seq_id)
+        obj_meta = self.sequence_meta_info[self.sequence_list[seq_id]]
+
+        frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids]
+
+        if anno is None:
+            anno = self.get_sequence_info(seq_id)
+
+        anno_frames = {}
+        for key, value in anno.items():
+            anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
+
+        return frame_list, anno_frames, obj_meta
--- a/lib/train/dataset/got10k_lmdb.py
+++ b/lib/train/dataset/got10k_lmdb.py
@@ -0,0 +1,183 @@
+import os
+import os.path
+import numpy as np
+import torch
+import csv
+import pandas
+import random
+from collections import OrderedDict
+from .base_video_dataset import BaseVideoDataset
+from lib.train.data import jpeg4py_loader
+from lib.train.admin import env_settings
+
+'''2021.1.16 Gok10k for loading lmdb dataset'''
+from lib.utils.lmdb_utils import *
+
+
+class Got10k_lmdb(BaseVideoDataset):
+
+    def __init__(self, root=None, image_loader=jpeg4py_loader, split=None, seq_ids=None, data_fraction=None):
+        """
+        args:
+            root - path to the got-10k training data. Note: This should point to the 'train' folder inside GOT-10k
+            image_loader (jpeg4py_loader) -  The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
+                                            is used by default.
+            split - 'train' or 'val'. Note: The validation split here is a subset of the official got-10k train split,
+                    not NOT the official got-10k validation split. To use the official validation split, provide that as
+                    the root folder instead.
+            seq_ids - List containing the ids of the videos to be used for training. Note: Only one of 'split' or 'seq_ids'
+                        options can be used at the same time.
+            data_fraction - Fraction of dataset to be used. The complete dataset is used by default
+            use_lmdb - whether the dataset is stored in lmdb format
+        """
+        root = env_settings().got10k_lmdb_dir if root is None else root
+        super().__init__('GOT10k_lmdb', root, image_loader)
+
+        # all folders inside the root
+        self.sequence_list = self._get_sequence_list()
+
+        # seq_id is the index of the folder inside the got10k root path
+        if split is not None:
+            if seq_ids is not None:
+                raise ValueError('Cannot set both split_name and seq_ids.')
+            train_lib_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
+            if split == 'train':
+                file_path = os.path.join(train_lib_path, 'data_specs', 'got10k_train_split.txt')
+            elif split == 'val':
+                file_path = os.path.join(train_lib_path, 'data_specs', 'got10k_val_split.txt')
+            elif split == 'train_full':
+                file_path = os.path.join(train_lib_path, 'data_specs', 'got10k_train_full_split.txt')
+            elif split == 'vottrain':
+                file_path = os.path.join(train_lib_path, 'data_specs', 'got10k_vot_train_split.txt')
+            elif split == 'votval':
+                file_path = os.path.join(train_lib_path, 'data_specs', 'got10k_vot_val_split.txt')
+            else:
+                raise ValueError('Unknown split name.')
+            seq_ids = pandas.read_csv(file_path, header=None, squeeze=True, dtype=np.int64).values.tolist()
+        elif seq_ids is None:
+            seq_ids = list(range(0, len(self.sequence_list)))
+
+        self.sequence_list = [self.sequence_list[i] for i in seq_ids]
+
+        if data_fraction is not None:
+            self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list)*data_fraction))
+
+        self.sequence_meta_info = self._load_meta_info()
+        self.seq_per_class = self._build_seq_per_class()
+
+        self.class_list = list(self.seq_per_class.keys())
+        self.class_list.sort()
+
+    def get_name(self):
+        return 'got10k_lmdb'
+
+    def has_class_info(self):
+        return True
+
+    def has_occlusion_info(self):
+        return True
+
+    def _load_meta_info(self):
+        def _read_meta(meta_info):
+
+            object_meta = OrderedDict({'object_class_name': meta_info[5].split(': ')[-1],
+                                       'motion_class': meta_info[6].split(': ')[-1],
+                                       'major_class': meta_info[7].split(': ')[-1],
+                                       'root_class': meta_info[8].split(': ')[-1],
+                                       'motion_adverb': meta_info[9].split(': ')[-1]})
+
+            return object_meta
+        sequence_meta_info = {}
+        for s in self.sequence_list:
+            try:
+                meta_str = decode_str(self.root, "train/%s/meta_info.ini" %s)
+                sequence_meta_info[s] = _read_meta(meta_str.split('\n'))
+            except:
+                sequence_meta_info[s] = OrderedDict({'object_class_name': None,
+                                                     'motion_class': None,
+                                                     'major_class': None,
+                                                     'root_class': None,
+                                                     'motion_adverb': None})
+        return sequence_meta_info
+
+    def _build_seq_per_class(self):
+        seq_per_class = {}
+
+        for i, s in enumerate(self.sequence_list):
+            object_class = self.sequence_meta_info[s]['object_class_name']
+            if object_class in seq_per_class:
+                seq_per_class[object_class].append(i)
+            else:
+                seq_per_class[object_class] = [i]
+
+        return seq_per_class
+
+    def get_sequences_in_class(self, class_name):
+        return self.seq_per_class[class_name]
+
+    def _get_sequence_list(self):
+        dir_str = decode_str(self.root, 'train/list.txt')
+        dir_list = dir_str.split('\n')
+        return dir_list
+
+    def _read_bb_anno(self, seq_path):
+        bb_anno_file = os.path.join(seq_path, "groundtruth.txt")
+        gt_str_list = decode_str(self.root, bb_anno_file).split('\n')[:-1]  # the last line in got10k is empty
+        gt_list = [list(map(float, line.split(','))) for line in gt_str_list]
+        gt_arr = np.array(gt_list).astype(np.float32)
+
+        return torch.tensor(gt_arr)
+
+    def _read_target_visible(self, seq_path):
+        # full occlusion and out_of_view files
+        occlusion_file = os.path.join(seq_path, "absence.label")
+        cover_file = os.path.join(seq_path, "cover.label")
+        # Read these files
+        occ_list = list(map(int, decode_str(self.root, occlusion_file).split('\n')[:-1]))  # the last line in got10k is empty
+        occlusion = torch.ByteTensor(occ_list)
+        cover_list = list(map(int, decode_str(self.root, cover_file).split('\n')[:-1]))  # the last line in got10k is empty
+        cover = torch.ByteTensor(cover_list)
+
+        target_visible = ~occlusion & (cover>0).byte()
+
+        visible_ratio = cover.float() / 8
+        return target_visible, visible_ratio
+
+    def _get_sequence_path(self, seq_id):
+        return os.path.join("train", self.sequence_list[seq_id])
+
+    def get_sequence_info(self, seq_id):
+        seq_path = self._get_sequence_path(seq_id)
+        bbox = self._read_bb_anno(seq_path)
+
+        valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
+        visible, visible_ratio = self._read_target_visible(seq_path)
+        visible = visible & valid.byte()
+
+        return {'bbox': bbox, 'valid': valid, 'visible': visible, 'visible_ratio': visible_ratio}
+
+    def _get_frame_path(self, seq_path, frame_id):
+        return os.path.join(seq_path, '{:08}.jpg'.format(frame_id+1))    # frames start from 1
+
+    def _get_frame(self, seq_path, frame_id):
+        return decode_img(self.root, self._get_frame_path(seq_path, frame_id))
+
+    def get_class_name(self, seq_id):
+        obj_meta = self.sequence_meta_info[self.sequence_list[seq_id]]
+
+        return obj_meta['object_class_name']
+
+    def get_frames(self, seq_id, frame_ids, anno=None):
+        seq_path = self._get_sequence_path(seq_id)
+        obj_meta = self.sequence_meta_info[self.sequence_list[seq_id]]
+
+        frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids]
+
+        if anno is None:
+            anno = self.get_sequence_info(seq_id)
+
+        anno_frames = {}
+        for key, value in anno.items():
+            anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
+
+        return frame_list, anno_frames, obj_meta
--- a/lib/train/dataset/imagenetvid.py
+++ b/lib/train/dataset/imagenetvid.py
@@ -0,0 +1,159 @@
+import os
+from .base_video_dataset import BaseVideoDataset
+from lib.train.data import jpeg4py_loader
+import xml.etree.ElementTree as ET
+import json
+import torch
+from collections import OrderedDict
+from lib.train.admin import env_settings
+
+
+def get_target_to_image_ratio(seq):
+    anno = torch.Tensor(seq['anno'])
+    img_sz = torch.Tensor(seq['image_size'])
+    return (anno[0, 2:4].prod() / (img_sz.prod())).sqrt()
+
+
+class ImagenetVID(BaseVideoDataset):
+    """ Imagenet VID dataset.
+
+    Publication:
+        ImageNet Large Scale Visual Recognition Challenge
+        Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh, Sean Ma, Zhiheng Huang, Andrej Karpathy,
+        Aditya Khosla, Michael Bernstein, Alexander C. Berg and Li Fei-Fei
+        IJCV, 2015
+        https://arxiv.org/pdf/1409.0575.pdf
+
+    Download the dataset from http://image-net.org/
+    """
+    def __init__(self, root=None, image_loader=jpeg4py_loader, min_length=0, max_target_area=1):
+        """
+        args:
+            root - path to the imagenet vid dataset.
+            image_loader (default_image_loader) -  The function to read the images. If installed,
+                                                   jpeg4py (https://github.com/ajkxyz/jpeg4py) is used by default. Else,
+                                                   opencv's imread is used.
+            min_length - Minimum allowed sequence length.
+            max_target_area - max allowed ratio between target area and image area. Can be used to filter out targets
+                                which cover complete image.
+        """
+        root = env_settings().imagenet_dir if root is None else root
+        super().__init__("imagenetvid", root, image_loader)
+
+        cache_file = os.path.join(root, 'cache.json')
+        if os.path.isfile(cache_file):
+            # If available, load the pre-processed cache file containing meta-info for each sequence
+            with open(cache_file, 'r') as f:
+                sequence_list_dict = json.load(f)
+
+            self.sequence_list = sequence_list_dict
+        else:
+            # Else process the imagenet annotations and generate the cache file
+            self.sequence_list = self._process_anno(root)
+
+            with open(cache_file, 'w') as f:
+                json.dump(self.sequence_list, f)
+
+        # Filter the sequences based on min_length and max_target_area in the first frame
+        self.sequence_list = [x for x in self.sequence_list if len(x['anno']) >= min_length and
+                              get_target_to_image_ratio(x) < max_target_area]
+
+    def get_name(self):
+        return 'imagenetvid'
+
+    def get_num_sequences(self):
+        return len(self.sequence_list)
+
+    def get_sequence_info(self, seq_id):
+        bb_anno = torch.Tensor(self.sequence_list[seq_id]['anno'])
+        valid = (bb_anno[:, 2] > 0) & (bb_anno[:, 3] > 0)
+        visible = torch.ByteTensor(self.sequence_list[seq_id]['target_visible']) & valid.byte()
+        return {'bbox': bb_anno, 'valid': valid, 'visible': visible}
+
+    def _get_frame(self, sequence, frame_id):
+        set_name = 'ILSVRC2015_VID_train_{:04d}'.format(sequence['set_id'])
+        vid_name = 'ILSVRC2015_train_{:08d}'.format(sequence['vid_id'])
+        frame_number = frame_id + sequence['start_frame']
+        frame_path = os.path.join(self.root, 'Data', 'VID', 'train', set_name, vid_name,
+                                  '{:06d}.JPEG'.format(frame_number))
+        return self.image_loader(frame_path)
+
+    def get_frames(self, seq_id, frame_ids, anno=None):
+        sequence = self.sequence_list[seq_id]
+
+        frame_list = [self._get_frame(sequence, f) for f in frame_ids]
+
+        if anno is None:
+            anno = self.get_sequence_info(seq_id)
+
+        # Create anno dict
+        anno_frames = {}
+        for key, value in anno.items():
+            anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
+
+        # added the class info to the meta info
+        object_meta = OrderedDict({'object_class': sequence['class_name'],
+                                   'motion_class': None,
+                                   'major_class': None,
+                                   'root_class': None,
+                                   'motion_adverb': None})
+
+        return frame_list, anno_frames, object_meta
+
+    def _process_anno(self, root):
+        # Builds individual tracklets
+        base_vid_anno_path = os.path.join(root, 'Annotations', 'VID', 'train')
+
+        all_sequences = []
+        for set in sorted(os.listdir(base_vid_anno_path)):
+            set_id = int(set.split('_')[-1])
+            for vid in sorted(os.listdir(os.path.join(base_vid_anno_path, set))):
+
+                vid_id = int(vid.split('_')[-1])
+                anno_files = sorted(os.listdir(os.path.join(base_vid_anno_path, set, vid)))
+
+                frame1_anno = ET.parse(os.path.join(base_vid_anno_path, set, vid, anno_files[0]))
+                image_size = [int(frame1_anno.find('size/width').text), int(frame1_anno.find('size/height').text)]
+
+                objects = [ET.ElementTree(file=os.path.join(base_vid_anno_path, set, vid, f)).findall('object')
+                           for f in anno_files]
+
+                tracklets = {}
+
+                # Find all tracklets along with start frame
+                for f_id, all_targets in enumerate(objects):
+                    for target in all_targets:
+                        tracklet_id = target.find('trackid').text
+                        if tracklet_id not in tracklets:
+                            tracklets[tracklet_id] = f_id
+
+                for tracklet_id, tracklet_start in tracklets.items():
+                    tracklet_anno = []
+                    target_visible = []
+                    class_name_id = None
+
+                    for f_id in range(tracklet_start, len(objects)):
+                        found = False
+                        for target in objects[f_id]:
+                            if target.find('trackid').text == tracklet_id:
+                                if not class_name_id:
+                                    class_name_id = target.find('name').text
+                                x1 = int(target.find('bndbox/xmin').text)
+                                y1 = int(target.find('bndbox/ymin').text)
+                                x2 = int(target.find('bndbox/xmax').text)
+                                y2 = int(target.find('bndbox/ymax').text)
+
+                                tracklet_anno.append([x1, y1, x2 - x1, y2 - y1])
+                                target_visible.append(target.find('occluded').text == '0')
+
+                                found = True
+                                break
+                        if not found:
+                            break
+
+                    new_sequence = {'set_id': set_id, 'vid_id': vid_id, 'class_name': class_name_id,
+                                    'start_frame': tracklet_start, 'anno': tracklet_anno,
+                                    'target_visible': target_visible, 'image_size': image_size}
+                    all_sequences.append(new_sequence)
+
+        return all_sequences
--- a/lib/train/dataset/imagenetvid_lmdb.py
+++ b/lib/train/dataset/imagenetvid_lmdb.py
@@ -0,0 +1,90 @@
+import os
+from .base_video_dataset import BaseVideoDataset
+from lib.train.data import jpeg4py_loader
+import torch
+from collections import OrderedDict
+from lib.train.admin import env_settings
+from lib.utils.lmdb_utils import decode_img, decode_json
+
+
+def get_target_to_image_ratio(seq):
+    anno = torch.Tensor(seq['anno'])
+    img_sz = torch.Tensor(seq['image_size'])
+    return (anno[0, 2:4].prod() / (img_sz.prod())).sqrt()
+
+
+class ImagenetVID_lmdb(BaseVideoDataset):
+    """ Imagenet VID dataset.
+
+    Publication:
+        ImageNet Large Scale Visual Recognition Challenge
+        Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh, Sean Ma, Zhiheng Huang, Andrej Karpathy,
+        Aditya Khosla, Michael Bernstein, Alexander C. Berg and Li Fei-Fei
+        IJCV, 2015
+        https://arxiv.org/pdf/1409.0575.pdf
+
+    Download the dataset from http://image-net.org/
+    """
+    def __init__(self, root=None, image_loader=jpeg4py_loader, min_length=0, max_target_area=1):
+        """
+        args:
+            root - path to the imagenet vid dataset.
+            image_loader (default_image_loader) -  The function to read the images. If installed,
+                                                   jpeg4py (https://github.com/ajkxyz/jpeg4py) is used by default. Else,
+                                                   opencv's imread is used.
+            min_length - Minimum allowed sequence length.
+            max_target_area - max allowed ratio between target area and image area. Can be used to filter out targets
+                                which cover complete image.
+        """
+        root = env_settings().imagenet_dir if root is None else root
+        super().__init__("imagenetvid_lmdb", root, image_loader)
+
+        sequence_list_dict = decode_json(root, "cache.json")
+        self.sequence_list = sequence_list_dict
+
+        # Filter the sequences based on min_length and max_target_area in the first frame
+        self.sequence_list = [x for x in self.sequence_list if len(x['anno']) >= min_length and
+                              get_target_to_image_ratio(x) < max_target_area]
+
+    def get_name(self):
+        return 'imagenetvid_lmdb'
+
+    def get_num_sequences(self):
+        return len(self.sequence_list)
+
+    def get_sequence_info(self, seq_id):
+        bb_anno = torch.Tensor(self.sequence_list[seq_id]['anno'])
+        valid = (bb_anno[:, 2] > 0) & (bb_anno[:, 3] > 0)
+        visible = torch.ByteTensor(self.sequence_list[seq_id]['target_visible']) & valid.byte()
+        return {'bbox': bb_anno, 'valid': valid, 'visible': visible}
+
+    def _get_frame(self, sequence, frame_id):
+        set_name = 'ILSVRC2015_VID_train_{:04d}'.format(sequence['set_id'])
+        vid_name = 'ILSVRC2015_train_{:08d}'.format(sequence['vid_id'])
+        frame_number = frame_id + sequence['start_frame']
+        frame_path = os.path.join('Data', 'VID', 'train', set_name, vid_name,
+                                  '{:06d}.JPEG'.format(frame_number))
+        return decode_img(self.root, frame_path)
+
+    def get_frames(self, seq_id, frame_ids, anno=None):
+        sequence = self.sequence_list[seq_id]
+
+        frame_list = [self._get_frame(sequence, f) for f in frame_ids]
+
+        if anno is None:
+            anno = self.get_sequence_info(seq_id)
+
+        # Create anno dict
+        anno_frames = {}
+        for key, value in anno.items():
+            anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
+
+        # added the class info to the meta info
+        object_meta = OrderedDict({'object_class': sequence['class_name'],
+                                   'motion_class': None,
+                                   'major_class': None,
+                                   'root_class': None,
+                                   'motion_adverb': None})
+
+        return frame_list, anno_frames, object_meta
+
--- a/lib/train/dataset/lasot.py
+++ b/lib/train/dataset/lasot.py
@@ -0,0 +1,169 @@
+import os
+import os.path
+import torch
+import numpy as np
+import pandas
+import csv
+import random
+from collections import OrderedDict
+from .base_video_dataset import BaseVideoDataset
+from lib.train.data import jpeg4py_loader
+from lib.train.admin import env_settings
+
+
+class Lasot(BaseVideoDataset):
+    """ LaSOT dataset.
+
+    Publication:
+        LaSOT: A High-quality Benchmark for Large-scale Single Object Tracking
+        Heng Fan, Liting Lin, Fan Yang, Peng Chu, Ge Deng, Sijia Yu, Hexin Bai, Yong Xu, Chunyuan Liao and Haibin Ling
+        CVPR, 2019
+        https://arxiv.org/pdf/1809.07845.pdf
+
+    Download the dataset from https://cis.temple.edu/lasot/download.html
+    """
+
+    def __init__(self, root=None, image_loader=jpeg4py_loader, vid_ids=None, split=None, data_fraction=None):
+        """
+        args:
+            root - path to the lasot dataset.
+            image_loader (jpeg4py_loader) -  The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
+                                            is used by default.
+            vid_ids - List containing the ids of the videos (1 - 20) used for training. If vid_ids = [1, 3, 5], then the
+                    videos with subscripts -1, -3, and -5 from each class will be used for training.
+            split - If split='train', the official train split (protocol-II) is used for training. Note: Only one of
+                    vid_ids or split option can be used at a time.
+            data_fraction - Fraction of dataset to be used. The complete dataset is used by default
+        """
+        root = env_settings().lasot_dir if root is None else root
+        super().__init__('LaSOT', root, image_loader)
+
+        # Keep a list of all classes
+        self.class_list = [f for f in os.listdir(self.root)]
+        self.class_to_id = {cls_name: cls_id for cls_id, cls_name in enumerate(self.class_list)}
+
+        self.sequence_list = self._build_sequence_list(vid_ids, split)
+
+        if data_fraction is not None:
+            self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list)*data_fraction))
+
+        self.seq_per_class = self._build_class_list()
+
+    def _build_sequence_list(self, vid_ids=None, split=None):
+        if split is not None:
+            if vid_ids is not None:
+                raise ValueError('Cannot set both split_name and vid_ids.')
+            ltr_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
+            if split == 'train':
+                file_path = os.path.join(ltr_path, 'data_specs', 'lasot_train_split.txt')
+            else:
+                raise ValueError('Unknown split name.')
+            # sequence_list = pandas.read_csv(file_path, header=None, squeeze=True).values.tolist()
+            sequence_list = pandas.read_csv(file_path, header=None).squeeze("columns").values.tolist()
+        elif vid_ids is not None:
+            sequence_list = [c+'-'+str(v) for c in self.class_list for v in vid_ids]
+        else:
+            raise ValueError('Set either split_name or vid_ids.')
+
+        return sequence_list
+
+    def _build_class_list(self):
+        seq_per_class = {}
+        for seq_id, seq_name in enumerate(self.sequence_list):
+            class_name = seq_name.split('-')[0]
+            if class_name in seq_per_class:
+                seq_per_class[class_name].append(seq_id)
+            else:
+                seq_per_class[class_name] = [seq_id]
+
+        return seq_per_class
+
+    def get_name(self):
+        return 'lasot'
+
+    def has_class_info(self):
+        return True
+
+    def has_occlusion_info(self):
+        return True
+
+    def get_num_sequences(self):
+        return len(self.sequence_list)
+
+    def get_num_classes(self):
+        return len(self.class_list)
+
+    def get_sequences_in_class(self, class_name):
+        return self.seq_per_class[class_name]
+
+    def _read_bb_anno(self, seq_path):
+        bb_anno_file = os.path.join(seq_path, "groundtruth.txt")
+        gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False, low_memory=False).values
+        return torch.tensor(gt)
+
+    def _read_target_visible(self, seq_path):
+        # Read full occlusion and out_of_view
+        occlusion_file = os.path.join(seq_path, "full_occlusion.txt")
+        out_of_view_file = os.path.join(seq_path, "out_of_view.txt")
+
+        with open(occlusion_file, 'r', newline='') as f:
+            occlusion = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]])
+        with open(out_of_view_file, 'r') as f:
+            out_of_view = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]])
+
+        target_visible = ~occlusion & ~out_of_view
+
+        return target_visible
+
+    def _get_sequence_path(self, seq_id):
+        seq_name = self.sequence_list[seq_id]
+        class_name = seq_name.split('-')[0]
+        vid_id = seq_name.split('-')[1]
+
+        return os.path.join(self.root, class_name, class_name + '-' + vid_id)
+
+    def get_sequence_info(self, seq_id):
+        seq_path = self._get_sequence_path(seq_id)
+        bbox = self._read_bb_anno(seq_path)
+
+        valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
+        visible = self._read_target_visible(seq_path) & valid.byte()
+
+        return {'bbox': bbox, 'valid': valid, 'visible': visible}
+
+    def _get_frame_path(self, seq_path, frame_id):
+        return os.path.join(seq_path, 'img', '{:08}.jpg'.format(frame_id+1))    # frames start from 1
+
+    def _get_frame(self, seq_path, frame_id):
+        return self.image_loader(self._get_frame_path(seq_path, frame_id))
+
+    def _get_class(self, seq_path):
+        raw_class = seq_path.split('/')[-2]
+        return raw_class
+
+    def get_class_name(self, seq_id):
+        seq_path = self._get_sequence_path(seq_id)
+        obj_class = self._get_class(seq_path)
+
+        return obj_class
+
+    def get_frames(self, seq_id, frame_ids, anno=None):
+        seq_path = self._get_sequence_path(seq_id)
+
+        obj_class = self._get_class(seq_path)
+        frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids]
+
+        if anno is None:
+            anno = self.get_sequence_info(seq_id)
+
+        anno_frames = {}
+        for key, value in anno.items():
+            anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
+
+        object_meta = OrderedDict({'object_class_name': obj_class,
+                                   'motion_class': None,
+                                   'major_class': None,
+                                   'root_class': None,
+                                   'motion_adverb': None})
+
+        return frame_list, anno_frames, object_meta
--- a/lib/train/dataset/lasot_lmdb.py
+++ b/lib/train/dataset/lasot_lmdb.py
@@ -0,0 +1,165 @@
+import os
+import os.path
+import torch
+import numpy as np
+import pandas
+import csv
+import random
+from collections import OrderedDict
+from .base_video_dataset import BaseVideoDataset
+from lib.train.data import jpeg4py_loader
+from lib.train.admin import env_settings
+'''2021.1.16 Lasot for loading lmdb dataset'''
+from lib.utils.lmdb_utils import *
+
+
+class Lasot_lmdb(BaseVideoDataset):
+
+    def __init__(self, root=None, image_loader=jpeg4py_loader, vid_ids=None, split=None, data_fraction=None):
+        """
+        args:
+            root - path to the lasot dataset.
+            image_loader (jpeg4py_loader) -  The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
+                                            is used by default.
+            vid_ids - List containing the ids of the videos (1 - 20) used for training. If vid_ids = [1, 3, 5], then the
+                    videos with subscripts -1, -3, and -5 from each class will be used for training.
+            split - If split='train', the official train split (protocol-II) is used for training. Note: Only one of
+                    vid_ids or split option can be used at a time.
+            data_fraction - Fraction of dataset to be used. The complete dataset is used by default
+        """
+        root = env_settings().lasot_lmdb_dir if root is None else root
+        super().__init__('LaSOT_lmdb', root, image_loader)
+
+        self.sequence_list = self._build_sequence_list(vid_ids, split)
+        class_list = [seq_name.split('-')[0] for seq_name in self.sequence_list]
+        self.class_list = []
+        for ele in class_list:
+            if ele not in self.class_list:
+                self.class_list.append(ele)
+        # Keep a list of all classes
+        self.class_to_id = {cls_name: cls_id for cls_id, cls_name in enumerate(self.class_list)}
+
+        if data_fraction is not None:
+            self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list)*data_fraction))
+
+        self.seq_per_class = self._build_class_list()
+
+    def _build_sequence_list(self, vid_ids=None, split=None):
+        if split is not None:
+            if vid_ids is not None:
+                raise ValueError('Cannot set both split_name and vid_ids.')
+            ltr_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
+            if split == 'train':
+                file_path = os.path.join(ltr_path, 'data_specs', 'lasot_train_split.txt')
+            else:
+                raise ValueError('Unknown split name.')
+            sequence_list = pandas.read_csv(file_path, header=None, squeeze=True).values.tolist()
+        elif vid_ids is not None:
+            sequence_list = [c+'-'+str(v) for c in self.class_list for v in vid_ids]
+        else:
+            raise ValueError('Set either split_name or vid_ids.')
+
+        return sequence_list
+
+    def _build_class_list(self):
+        seq_per_class = {}
+        for seq_id, seq_name in enumerate(self.sequence_list):
+            class_name = seq_name.split('-')[0]
+            if class_name in seq_per_class:
+                seq_per_class[class_name].append(seq_id)
+            else:
+                seq_per_class[class_name] = [seq_id]
+
+        return seq_per_class
+
+    def get_name(self):
+        return 'lasot_lmdb'
+
+    def has_class_info(self):
+        return True
+
+    def has_occlusion_info(self):
+        return True
+
+    def get_num_sequences(self):
+        return len(self.sequence_list)
+
+    def get_num_classes(self):
+        return len(self.class_list)
+
+    def get_sequences_in_class(self, class_name):
+        return self.seq_per_class[class_name]
+
+    def _read_bb_anno(self, seq_path):
+        bb_anno_file = os.path.join(seq_path, "groundtruth.txt")
+        gt_str_list = decode_str(self.root, bb_anno_file).split('\n')[:-1]  # the last line is empty
+        gt_list = [list(map(float, line.split(','))) for line in gt_str_list]
+        gt_arr = np.array(gt_list).astype(np.float32)
+        return torch.tensor(gt_arr)
+
+    def _read_target_visible(self, seq_path):
+        # Read full occlusion and out_of_view
+        occlusion_file = os.path.join(seq_path, "full_occlusion.txt")
+        out_of_view_file = os.path.join(seq_path, "out_of_view.txt")
+
+        occ_list = list(map(int, decode_str(self.root, occlusion_file).split(',')))
+        occlusion = torch.ByteTensor(occ_list)
+        out_view_list = list(map(int, decode_str(self.root, out_of_view_file).split(',')))
+        out_of_view = torch.ByteTensor(out_view_list)
+
+        target_visible = ~occlusion & ~out_of_view
+
+        return target_visible
+
+    def _get_sequence_path(self, seq_id):
+        seq_name = self.sequence_list[seq_id]
+        class_name = seq_name.split('-')[0]
+        vid_id = seq_name.split('-')[1]
+
+        return os.path.join(class_name, class_name + '-' + vid_id)
+
+    def get_sequence_info(self, seq_id):
+        seq_path = self._get_sequence_path(seq_id)
+        bbox = self._read_bb_anno(seq_path)
+
+        valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
+        visible = self._read_target_visible(seq_path) & valid.byte()
+
+        return {'bbox': bbox, 'valid': valid, 'visible': visible}
+
+    def _get_frame_path(self, seq_path, frame_id):
+        return os.path.join(seq_path, 'img', '{:08}.jpg'.format(frame_id+1))    # frames start from 1
+
+    def _get_frame(self, seq_path, frame_id):
+        return decode_img(self.root, self._get_frame_path(seq_path, frame_id))
+
+    def _get_class(self, seq_path):
+        raw_class = seq_path.split('/')[-2]
+        return raw_class
+
+    def get_class_name(self, seq_id):
+        seq_path = self._get_sequence_path(seq_id)
+        obj_class = self._get_class(seq_path)
+
+        return obj_class
+
+    def get_frames(self, seq_id, frame_ids, anno=None):
+        seq_path = self._get_sequence_path(seq_id)
+
+        obj_class = self._get_class(seq_path)
+        frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids]
+
+        if anno is None:
+            anno = self.get_sequence_info(seq_id)
+
+        anno_frames = {}
+        for key, value in anno.items():
+            anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
+
+        object_meta = OrderedDict({'object_class_name': obj_class,
+                                   'motion_class': None,
+                                   'major_class': None,
+                                   'root_class': None,
+                                   'motion_adverb': None})
+
+        return frame_list, anno_frames, object_meta
--- a/lib/train/dataset/tracking_net.py
+++ b/lib/train/dataset/tracking_net.py
@@ -0,0 +1,151 @@
+import torch
+import os
+import os.path
+import numpy as np
+import pandas
+import random
+from collections import OrderedDict
+
+from lib.train.data import jpeg4py_loader
+from .base_video_dataset import BaseVideoDataset
+from lib.train.admin import env_settings
+
+
+def list_sequences(root, set_ids):
+    """ Lists all the videos in the input set_ids. Returns a list of tuples (set_id, video_name)
+
+    args:
+        root: Root directory to TrackingNet
+        set_ids: Sets (0-11) which are to be used
+
+    returns:
+        list - list of tuples (set_id, video_name) containing the set_id and video_name for each sequence
+    """
+    sequence_list = []
+
+    for s in set_ids:
+        anno_dir = os.path.join(root, "TRAIN_" + str(s), "anno")
+
+        sequences_cur_set = [(s, os.path.splitext(f)[0]) for f in os.listdir(anno_dir) if f.endswith('.txt')]
+        sequence_list += sequences_cur_set
+
+    return sequence_list
+
+
+class TrackingNet(BaseVideoDataset):
+    """ TrackingNet dataset.
+
+    Publication:
+        TrackingNet: A Large-Scale Dataset and Benchmark for Object Tracking in the Wild.
+        Matthias Mueller,Adel Bibi, Silvio Giancola, Salman Al-Subaihi and Bernard Ghanem
+        ECCV, 2018
+        https://ivul.kaust.edu.sa/Documents/Publications/2018/TrackingNet%20A%20Large%20Scale%20Dataset%20and%20Benchmark%20for%20Object%20Tracking%20in%20the%20Wild.pdf
+
+    Download the dataset using the toolkit https://github.com/SilvioGiancola/TrackingNet-devkit.
+    """
+    def __init__(self, root=None, image_loader=jpeg4py_loader, set_ids=None, data_fraction=None):
+        """
+        args:
+            root        - The path to the TrackingNet folder, containing the training sets.
+            image_loader (jpeg4py_loader) -  The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
+                                            is used by default.
+            set_ids (None) - List containing the ids of the TrackingNet sets to be used for training. If None, all the
+                            sets (0 - 11) will be used.
+            data_fraction - Fraction of dataset to be used. The complete dataset is used by default
+        """
+        root = env_settings().trackingnet_dir if root is None else root
+        super().__init__('TrackingNet', root, image_loader)
+
+        if set_ids is None:
+            set_ids = [i for i in range(12)]
+
+        self.set_ids = set_ids
+
+        # Keep a list of all videos. Sequence list is a list of tuples (set_id, video_name) containing the set_id and
+        # video_name for each sequence
+        self.sequence_list = list_sequences(self.root, self.set_ids)
+
+        if data_fraction is not None:
+            self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list) * data_fraction))
+
+        self.seq_to_class_map, self.seq_per_class = self._load_class_info()
+
+        # we do not have the class_lists for the tracking net
+        self.class_list = list(self.seq_per_class.keys())
+        self.class_list.sort()
+
+    def _load_class_info(self):
+        ltr_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
+        class_map_path = os.path.join(ltr_path, 'data_specs', 'trackingnet_classmap.txt')
+
+        with open(class_map_path, 'r') as f:
+            seq_to_class_map = {seq_class.split('\t')[0]: seq_class.rstrip().split('\t')[1] for seq_class in f}
+
+        seq_per_class = {}
+        for i, seq in enumerate(self.sequence_list):
+            class_name = seq_to_class_map.get(seq[1], 'Unknown')
+            if class_name not in seq_per_class:
+                seq_per_class[class_name] = [i]
+            else:
+                seq_per_class[class_name].append(i)
+
+        return seq_to_class_map, seq_per_class
+
+    def get_name(self):
+        return 'trackingnet'
+
+    def has_class_info(self):
+        return True
+
+    def get_sequences_in_class(self, class_name):
+        return self.seq_per_class[class_name]
+
+    def _read_bb_anno(self, seq_id):
+        set_id = self.sequence_list[seq_id][0]
+        vid_name = self.sequence_list[seq_id][1]
+        bb_anno_file = os.path.join(self.root, "TRAIN_" + str(set_id), "anno", vid_name + ".txt")
+        gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False,
+                             low_memory=False).values
+        return torch.tensor(gt)
+
+    def get_sequence_info(self, seq_id):
+        bbox = self._read_bb_anno(seq_id)
+
+        valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
+        visible = valid.clone().byte()
+        return {'bbox': bbox, 'valid': valid, 'visible': visible}
+
+    def _get_frame(self, seq_id, frame_id):
+        set_id = self.sequence_list[seq_id][0]
+        vid_name = self.sequence_list[seq_id][1]
+        frame_path = os.path.join(self.root, "TRAIN_" + str(set_id), "frames", vid_name, str(frame_id) + ".jpg")
+        return self.image_loader(frame_path)
+
+    def _get_class(self, seq_id):
+        seq_name = self.sequence_list[seq_id][1]
+        return self.seq_to_class_map[seq_name]
+
+    def get_class_name(self, seq_id):
+        obj_class = self._get_class(seq_id)
+
+        return obj_class
+
+    def get_frames(self, seq_id, frame_ids, anno=None):
+        frame_list = [self._get_frame(seq_id, f) for f in frame_ids]
+
+        if anno is None:
+            anno = self.get_sequence_info(seq_id)
+
+        anno_frames = {}
+        for key, value in anno.items():
+            anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
+
+        obj_class = self._get_class(seq_id)
+
+        object_meta = OrderedDict({'object_class_name': obj_class,
+                                   'motion_class': None,
+                                   'major_class': None,
+                                   'root_class': None,
+                                   'motion_adverb': None})
+
+        return frame_list, anno_frames, object_meta
--- a/lib/train/dataset/tracking_net_lmdb.py
+++ b/lib/train/dataset/tracking_net_lmdb.py
@@ -0,0 +1,147 @@
+import torch
+import os
+import os.path
+import numpy as np
+import random
+from collections import OrderedDict
+
+from lib.train.data import jpeg4py_loader
+from .base_video_dataset import BaseVideoDataset
+from lib.train.admin import env_settings
+import json
+from lib.utils.lmdb_utils import decode_img, decode_str
+
+
+def list_sequences(root):
+    """ Lists all the videos in the input set_ids. Returns a list of tuples (set_id, video_name)
+
+    args:
+        root: Root directory to TrackingNet
+
+    returns:
+        list - list of tuples (set_id, video_name) containing the set_id and video_name for each sequence
+    """
+    fname = os.path.join(root, "seq_list.json")
+    with open(fname, "r") as f:
+        sequence_list = json.loads(f.read())
+    return sequence_list
+
+
+class TrackingNet_lmdb(BaseVideoDataset):
+    """ TrackingNet dataset.
+
+    Publication:
+        TrackingNet: A Large-Scale Dataset and Benchmark for Object Tracking in the Wild.
+        Matthias Mueller,Adel Bibi, Silvio Giancola, Salman Al-Subaihi and Bernard Ghanem
+        ECCV, 2018
+        https://ivul.kaust.edu.sa/Documents/Publications/2018/TrackingNet%20A%20Large%20Scale%20Dataset%20and%20Benchmark%20for%20Object%20Tracking%20in%20the%20Wild.pdf
+
+    Download the dataset using the toolkit https://github.com/SilvioGiancola/TrackingNet-devkit.
+    """
+    def __init__(self, root=None, image_loader=jpeg4py_loader, set_ids=None, data_fraction=None):
+        """
+        args:
+            root        - The path to the TrackingNet folder, containing the training sets.
+            image_loader (jpeg4py_loader) -  The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
+                                            is used by default.
+            set_ids (None) - List containing the ids of the TrackingNet sets to be used for training. If None, all the
+                            sets (0 - 11) will be used.
+            data_fraction - Fraction of dataset to be used. The complete dataset is used by default
+        """
+        root = env_settings().trackingnet_lmdb_dir if root is None else root
+        super().__init__('TrackingNet_lmdb', root, image_loader)
+
+        if set_ids is None:
+            set_ids = [i for i in range(12)]
+
+        self.set_ids = set_ids
+
+        # Keep a list of all videos. Sequence list is a list of tuples (set_id, video_name) containing the set_id and
+        # video_name for each sequence
+        self.sequence_list = list_sequences(self.root)
+
+        if data_fraction is not None:
+            self.sequence_list = random.sample(self.sequence_list, int(len(self.sequence_list) * data_fraction))
+
+        self.seq_to_class_map, self.seq_per_class = self._load_class_info()
+
+        # we do not have the class_lists for the tracking net
+        self.class_list = list(self.seq_per_class.keys())
+        self.class_list.sort()
+
+    def _load_class_info(self):
+        ltr_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
+        class_map_path = os.path.join(ltr_path, 'data_specs', 'trackingnet_classmap.txt')
+
+        with open(class_map_path, 'r') as f:
+            seq_to_class_map = {seq_class.split('\t')[0]: seq_class.rstrip().split('\t')[1] for seq_class in f}
+
+        seq_per_class = {}
+        for i, seq in enumerate(self.sequence_list):
+            class_name = seq_to_class_map.get(seq[1], 'Unknown')
+            if class_name not in seq_per_class:
+                seq_per_class[class_name] = [i]
+            else:
+                seq_per_class[class_name].append(i)
+
+        return seq_to_class_map, seq_per_class
+
+    def get_name(self):
+        return 'trackingnet_lmdb'
+
+    def has_class_info(self):
+        return True
+
+    def get_sequences_in_class(self, class_name):
+        return self.seq_per_class[class_name]
+
+    def _read_bb_anno(self, seq_id):
+        set_id = self.sequence_list[seq_id][0]
+        vid_name = self.sequence_list[seq_id][1]
+        gt_str_list = decode_str(os.path.join(self.root, "TRAIN_%d_lmdb" % set_id),
+                                 os.path.join("anno", vid_name + ".txt")).split('\n')[:-1]
+        gt_list = [list(map(float, line.split(','))) for line in gt_str_list]
+        gt_arr = np.array(gt_list).astype(np.float32)
+        return torch.tensor(gt_arr)
+
+    def get_sequence_info(self, seq_id):
+        bbox = self._read_bb_anno(seq_id)
+
+        valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
+        visible = valid.clone().byte()
+        return {'bbox': bbox, 'valid': valid, 'visible': visible}
+
+    def _get_frame(self, seq_id, frame_id):
+        set_id = self.sequence_list[seq_id][0]
+        vid_name = self.sequence_list[seq_id][1]
+        return decode_img(os.path.join(self.root, "TRAIN_%d_lmdb" % set_id),
+                          os.path.join("frames", vid_name, str(frame_id) + ".jpg"))
+
+    def _get_class(self, seq_id):
+        seq_name = self.sequence_list[seq_id][1]
+        return self.seq_to_class_map[seq_name]
+
+    def get_class_name(self, seq_id):
+        obj_class = self._get_class(seq_id)
+
+        return obj_class
+
+    def get_frames(self, seq_id, frame_ids, anno=None):
+        frame_list = [self._get_frame(seq_id, f) for f in frame_ids]
+
+        if anno is None:
+            anno = self.get_sequence_info(seq_id)
+
+        anno_frames = {}
+        for key, value in anno.items():
+            anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
+
+        obj_class = self._get_class(seq_id)
+
+        object_meta = OrderedDict({'object_class_name': obj_class,
+                                   'motion_class': None,
+                                   'major_class': None,
+                                   'root_class': None,
+                                   'motion_adverb': None})
+
+        return frame_list, anno_frames, object_meta