diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..37ed2f4
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,13 @@
+# Folders
+__pycache__/
+build/
+*.egg-info
+
+
+# Files
+*.weights
+*.t7
+*.mp4
+*.avi
+*.so
+*.txt
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..c2fccb5
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+# deep_count
diff --git a/configs/deep_sort.yaml b/configs/deep_sort.yaml
new file mode 100644
index 0000000..8aa24ad
--- /dev/null
+++ b/configs/deep_sort.yaml
@@ -0,0 +1,11 @@
+DEEPSORT:
+  REID_CKPT: "./deep_sort/deep/checkpoint/ckpt.t7"
+  REID_CKPT_Car: "./deep_sort/deep/checkpoint/ckpt_car.t7"
+  MAX_DIST: 0.2
+  MIN_CONFIDENCE: 0.3
+  NMS_MAX_OVERLAP: 1.0
+  MAX_IOU_DISTANCE: 0.7
+  MAX_AGE: 70
+  N_INIT: 3
+  NN_BUDGET: 100
+  
\ No newline at end of file
diff --git a/configs/yolov3.yaml b/configs/yolov3.yaml
new file mode 100644
index 0000000..a46e474
--- /dev/null
+++ b/configs/yolov3.yaml
@@ -0,0 +1,7 @@
+YOLOV3:
+  CFG: "./detector/YOLOv3/cfg/yolov4.cfg"
+  WEIGHT: "./detector/YOLOv3/weight/yolov4.weights"
+  CLASS_NAMES: "./detector/YOLOv3/cfg/coco.names"
+
+  SCORE_THRESH: 0.1
+  NMS_THRESH: 0.4
diff --git a/configs/yolov3_tiny.yaml b/configs/yolov3_tiny.yaml
new file mode 100644
index 0000000..1261e68
--- /dev/null
+++ b/configs/yolov3_tiny.yaml
@@ -0,0 +1,7 @@
+YOLOV3:
+  CFG: "./detector/YOLOv3/cfg/yolov3-tiny.cfg"
+  WEIGHT: "./detector/YOLOv3/weight/yolov3-tiny.weights"
+  CLASS_NAMES: "./detector/YOLOv3/cfg/coco.names"
+
+  SCORE_THRESH: 0.5
+  NMS_THRESH: 0.4
\ No newline at end of file
diff --git a/configs/yolov4_onnx.yaml b/configs/yolov4_onnx.yaml
new file mode 100644
index 0000000..d63eb08
--- /dev/null
+++ b/configs/yolov4_onnx.yaml
@@ -0,0 +1,7 @@
+YOLOV4:
+  CFG: "./detector/YOLOv3/cfg/yolov4.cfg"
+  WEIGHT: "./detector/YOLOv3/weight/yolov4_1_3_416_416_static.onnx"
+  CLASS_NAMES: "./detector/YOLOv3/cfg/coco.names"
+
+  SCORE_THRESH: 0.1
+  NMS_THRESH: 0.4
diff --git a/configs/yolov4_trt.yaml b/configs/yolov4_trt.yaml
new file mode 100644
index 0000000..367509d
--- /dev/null
+++ b/configs/yolov4_trt.yaml
@@ -0,0 +1,7 @@
+YOLOV4:
+  CFG: "./detector/YOLOv3/cfg/yolov4.cfg"
+  WEIGHT: "./detector/YOLOv3/weight/yolov4.engine"
+  CLASS_NAMES: "./detector/YOLOv3/cfg/coco.names"
+
+  SCORE_THRESH: 0.4
+  NMS_THRESH: 0.4
diff --git a/dataset.py b/dataset.py
new file mode 100644
index 0000000..5cfdf29
--- /dev/null
+++ b/dataset.py
@@ -0,0 +1,111 @@
+import cv2 
+import numpy as np 
+from threading import Thread
+import time
+import os
+class LoadStreams:  # multiple IP or RTSP cameras
+    def __init__(self, sources='streams.txt', img_size=640):
+        self.mode = 'images'
+        self.img_size = img_size
+        sources = [sources]
+
+        n = len(sources)
+        self.imgs = [None] * n
+        self.sources = sources
+        for i, s in enumerate(sources):
+            # Start the thread to read frames from the video stream
+            print('%g/%g: %s... ' % (i + 1, n, s), end='')
+            cap = cv2.VideoCapture(0 if s == '0' else s)
+            assert cap.isOpened(), 'Failed to open %s' % s
+            w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            fps = cap.get(cv2.CAP_PROP_FPS) % 100
+            _, self.imgs[i] = cap.read()  # guarantee first frame
+            thread = Thread(target=self.update, args=([i, cap]), daemon=True)
+            print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
+            thread.start()
+        print('')  # newline
+
+    def update(self, index, cap):
+        # Read next stream frame in a daemon thread
+        n = 0
+        while cap.isOpened():
+            n += 1
+            # _, self.imgs[index] = cap.read()
+            cap.grab()
+            if n == 4:  # read every 4th frame
+                _, self.imgs[index] = cap.retrieve()
+                n = 0
+            time.sleep(0.01)  # wait time
+
+    def __iter__(self):
+        self.count = -1
+        return self
+
+    def __next__(self):
+        self.count += 1
+        img0 = self.imgs.copy()
+        if cv2.waitKey(1) == ord('q'):  # q to quit
+            cv2.destroyAllWindows()
+            raise StopIteration
+        return self.sources, img0, None
+
+    def __len__(self):
+        return 0  # 1E12 frames = 32 streams at 30 FPS for 30 years
+
+
+
+class datasets:  # multiple IP or RTSP cameras
+    def __init__(self, sources='streams.txt', img_size=640):
+        self.mode = 'images'
+        self.img_size = img_size
+
+        if os.path.isfile(sources):
+            with open(sources, 'r') as f:
+                sources = [x.strip() for x in f.read().splitlines() if len(x.strip())]
+        else:
+            sources = [sources]
+
+        n = len(sources)
+        self.imgs = [None] * n
+        self.sources = sources
+        for i, s in enumerate(sources):
+            # Start the thread to read frames from the video stream
+            print('%g/%g: %s... ' % (i + 1, n, s), end='')
+            cap = cv2.VideoCapture(0 if s == '0' else s)
+            assert cap.isOpened(), 'Failed to open %s' % s
+            w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            fps = cap.get(cv2.CAP_PROP_FPS) % 100
+            _, self.imgs[i] = cap.read()  # guarantee first frame
+            thread = Thread(target=self.update, args=([i, cap]), daemon=True)
+            print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
+            thread.start()
+        print('')  # newline
+
+    def update(self, index, cap):
+        # Read next stream frame in a daemon thread
+        n = 0
+        while cap.isOpened():
+            n += 1
+            # _, self.imgs[index] = cap.read()
+            cap.grab()
+            if n == 1:  # read every 4th frame
+                _, self.imgs[index] = cap.retrieve()
+                n = 0
+            time.sleep(0.01)  # wait time
+
+    def __iter__(self):
+        self.count = -1
+        return self
+
+    def __next__(self):
+        self.count += 1
+        img0 = self.imgs.copy()
+        if cv2.waitKey(1) == ord('q'):  # q to quit
+            cv2.destroyAllWindows()
+            raise StopIteration
+        return self.sources, img0, None
+
+    def __len__(self):
+        return 0  # 1E12 frames = 32 streams at 30 FPS for 30 years
\ No newline at end of file
diff --git a/deep_sort/README.md b/deep_sort/README.md
new file mode 100644
index 0000000..e89c9b3
--- /dev/null
+++ b/deep_sort/README.md
@@ -0,0 +1,3 @@
+# Deep Sort 
+
+This is the implemention of deep sort with pytorch.
\ No newline at end of file
diff --git a/deep_sort/__init__.py b/deep_sort/__init__.py
new file mode 100644
index 0000000..8bb81a7
--- /dev/null
+++ b/deep_sort/__init__.py
@@ -0,0 +1,24 @@
+from deep_sort.deep_sort import DeepSort  #在我们执行import时，当前目录是不会变的（就算是执行子目录的文件），还是需要完整的包名。
+
+
+__all__ = ['DeepSort', 'build_tracker','build_tracker_car']  ##import * 的时候，防止导入过多的变量，把导入的限制在__all__中
+
+def build_tracker(cfg, use_cuda):
+    return DeepSort(cfg.DEEPSORT.REID_CKPT, 
+                max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, 
+                nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, 
+                max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda)
+    
+
+def build_tracker_car(cfg, use_cuda):
+    return DeepSort(cfg.DEEPSORT.REID_CKPT_Car, 
+                max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, 
+                nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, 
+                max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda,num_class = 685)
+
+
+
+
+
+
+
diff --git a/deep_sort/deep/__init__.py b/deep_sort/deep/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/deep_sort/deep/checkpoint/.gitkeep b/deep_sort/deep/checkpoint/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/deep_sort/deep/evaluate.py b/deep_sort/deep/evaluate.py
new file mode 100644
index 0000000..85eaa6f
--- /dev/null
+++ b/deep_sort/deep/evaluate.py
@@ -0,0 +1,15 @@
+import torch
+
+features = torch.load("features.pth")
+qf = features["qf"]
+ql = features["ql"]
+gf = features["gf"]
+gl = features["gl"]
+
+scores = qf.mm(gf.t())
+res = scores.topk(5, dim=1)[1][:,0]
+top1correct = gl[res].eq(ql).sum().item()
+
+print("Acc top1:{:.3f}".format(top1correct/ql.size(0)))
+
+
diff --git a/deep_sort/deep/feature_extractor.py b/deep_sort/deep/feature_extractor.py
new file mode 100644
index 0000000..1fb8ea2
--- /dev/null
+++ b/deep_sort/deep/feature_extractor.py
@@ -0,0 +1,55 @@
+import torch
+import torchvision.transforms as transforms
+import numpy as np
+import cv2
+import logging
+
+from deep_sort.deep.model import Net
+
+class Extractor(object):
+    def __init__(self, model_path, use_cuda=True,num_class = 751):
+        self.net = Net(reid=True,num_classes = num_class)
+        self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
+        state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)['net_dict']
+        self.net.load_state_dict(state_dict)
+        logger = logging.getLogger("root.tracker")
+        logger.info("Loading weights from {}... Done!".format(model_path))
+        self.net.to(self.device)
+        self.size = (64, 128)
+        self.norm = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+        ])
+        
+
+
+    def _preprocess(self, im_crops):
+        """
+        TODO:
+            1. to float with scale from 0 to 1
+            2. resize to (64, 128) as Market1501 dataset did
+            3. concatenate to a numpy array
+            3. to torch Tensor
+            4. normalize
+        """
+        def _resize(im, size):
+            return cv2.resize(im.astype(np.float32)/255., size)
+
+        im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float()
+        return im_batch
+
+
+    def __call__(self, im_crops):
+        im_batch = self._preprocess(im_crops)
+        with torch.no_grad():
+            im_batch = im_batch.to(self.device)
+            features = self.net(im_batch)
+        return features.cpu().numpy()
+
+
+if __name__ == '__main__':
+    img = cv2.imread("demo.jpg")[:,:,(2,1,0)]
+    extr = Extractor("checkpoint/ckpt.t7")
+    feature = extr(img)
+    print(feature.shape)
+
diff --git a/deep_sort/deep/model.py b/deep_sort/deep/model.py
new file mode 100644
index 0000000..0427f71
--- /dev/null
+++ b/deep_sort/deep/model.py
@@ -0,0 +1,104 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class BasicBlock(nn.Module):
+    def __init__(self, c_in, c_out,is_downsample=False):
+        super(BasicBlock,self).__init__()
+        self.is_downsample = is_downsample
+        if is_downsample:
+            self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
+        else:
+            self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(c_out)
+        self.relu = nn.ReLU(True)
+        self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(c_out)
+        if is_downsample:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
+                nn.BatchNorm2d(c_out)
+            )
+        elif c_in != c_out:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
+                nn.BatchNorm2d(c_out)
+            )
+            self.is_downsample = True
+
+    def forward(self,x):
+        y = self.conv1(x)
+        y = self.bn1(y)
+        y = self.relu(y)
+        y = self.conv2(y)
+        y = self.bn2(y)
+        if self.is_downsample:
+            x = self.downsample(x)
+        return F.relu(x.add(y),True)  ##残差网络在这
+
+def make_layers(c_in,c_out,repeat_times, is_downsample=False):
+    blocks = []
+    for i in range(repeat_times):
+        if i ==0:
+            blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),]
+        else:
+            blocks += [BasicBlock(c_out,c_out),]
+    return nn.Sequential(*blocks)
+
+class Net(nn.Module):
+    def __init__(self, num_classes=751 ,reid=False):
+        super(Net,self).__init__()
+        # 3 128 64
+        self.conv = nn.Sequential(
+            nn.Conv2d(3,64,3,stride=1,padding=1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True),
+            # nn.Conv2d(32,32,3,stride=1,padding=1),
+            # nn.BatchNorm2d(32),
+            # nn.ReLU(inplace=True),
+            nn.MaxPool2d(3,2,padding=1),
+        )
+        # 32 64 32
+        self.layer1 = make_layers(64,64,2,False)
+        # 32 64 32
+        self.layer2 = make_layers(64,128,2,True)
+        # 64 32 16
+        self.layer3 = make_layers(128,256,2,True)
+        # 128 16 8
+        self.layer4 = make_layers(256,512,2,True)
+        # 256 8 4
+        self.avgpool = nn.AvgPool2d((8,4),1)
+        # 256 1 1 
+        self.reid = reid
+        self.classifier = nn.Sequential(
+            nn.Linear(512, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(inplace=True),
+            nn.Dropout(),
+            nn.Linear(256, num_classes),
+        )
+    
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0),-1)
+        # B x 128
+        if self.reid:
+            x = x.div(x.norm(p=2,dim=1,keepdim=True))
+            return x
+        # classifier
+        x = self.classifier(x)
+        return x
+
+
+if __name__ == '__main__':
+    net = Net()
+    x = torch.randn(4,3,128,64)
+    y = net(x)
+    # import ipdb; ipdb.set_trace()
+
+
diff --git a/deep_sort/deep/original_model.py b/deep_sort/deep/original_model.py
new file mode 100644
index 0000000..72453a6
--- /dev/null
+++ b/deep_sort/deep/original_model.py
@@ -0,0 +1,106 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class BasicBlock(nn.Module):
+    def __init__(self, c_in, c_out,is_downsample=False):
+        super(BasicBlock,self).__init__()
+        self.is_downsample = is_downsample
+        if is_downsample:
+            self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
+        else:
+            self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(c_out)
+        self.relu = nn.ReLU(True)
+        self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(c_out)
+        if is_downsample:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
+                nn.BatchNorm2d(c_out)
+            )
+        elif c_in != c_out:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
+                nn.BatchNorm2d(c_out)
+            )
+            self.is_downsample = True
+
+    def forward(self,x):
+        y = self.conv1(x)
+        y = self.bn1(y)
+        y = self.relu(y)
+        y = self.conv2(y)
+        y = self.bn2(y)
+        if self.is_downsample:
+            x = self.downsample(x)
+        return F.relu(x.add(y),True)
+
+def make_layers(c_in,c_out,repeat_times, is_downsample=False):
+    blocks = []
+    for i in range(repeat_times):
+        if i ==0:
+            blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),]
+        else:
+            blocks += [BasicBlock(c_out,c_out),]
+    return nn.Sequential(*blocks)
+
+class Net(nn.Module):
+    def __init__(self, num_classes=625 ,reid=False):
+        super(Net,self).__init__()
+        # 3 128 64
+        self.conv = nn.Sequential(
+            nn.Conv2d(3,32,3,stride=1,padding=1),
+            nn.BatchNorm2d(32),
+            nn.ELU(inplace=True),
+            nn.Conv2d(32,32,3,stride=1,padding=1),
+            nn.BatchNorm2d(32),
+            nn.ELU(inplace=True),
+            nn.MaxPool2d(3,2,padding=1),
+        )
+        # 32 64 32
+        self.layer1 = make_layers(32,32,2,False)
+        # 32 64 32
+        self.layer2 = make_layers(32,64,2,True)
+        # 64 32 16
+        self.layer3 = make_layers(64,128,2,True)
+        # 128 16 8
+        self.dense = nn.Sequential(
+            nn.Dropout(p=0.6),
+            nn.Linear(128*16*8, 128),
+            nn.BatchNorm1d(128),
+            nn.ELU(inplace=True)
+        )
+        # 256 1 1 
+        self.reid = reid
+        self.batch_norm = nn.BatchNorm1d(128)
+        self.classifier = nn.Sequential(
+            nn.Linear(128, num_classes),
+        )
+    
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+
+        x = x.view(x.size(0),-1)
+        if self.reid:
+            x = self.dense[0](x)
+            x = self.dense[1](x)
+            x = x.div(x.norm(p=2,dim=1,keepdim=True))
+            return x
+        x = self.dense(x)
+        # B x 128
+        # classifier
+        x = self.classifier(x)
+        return x
+
+
+if __name__ == '__main__':
+    net = Net(reid=True)
+    x = torch.randn(4,3,128,64)
+    y = net(x)
+    import ipdb; ipdb.set_trace()
+
+
diff --git a/deep_sort/deep/test.py b/deep_sort/deep/test.py
new file mode 100644
index 0000000..ecac0ad
--- /dev/null
+++ b/deep_sort/deep/test.py
@@ -0,0 +1,77 @@
+import torch
+import torch.backends.cudnn as cudnn
+import torchvision
+
+import argparse
+import os
+
+from model import Net
+
+parser = argparse.ArgumentParser(description="Train on market1501")
+parser.add_argument("--data-dir",default='data',type=str)
+parser.add_argument("--no-cuda",action="store_true")
+parser.add_argument("--gpu-id",default=0,type=int)
+args = parser.parse_args()
+
+# device
+device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
+if torch.cuda.is_available() and not args.no_cuda:
+    cudnn.benchmark = True
+
+# data loader
+root = args.data_dir
+query_dir = os.path.join(root,"query")
+gallery_dir = os.path.join(root,"gallery")
+transform = torchvision.transforms.Compose([
+    torchvision.transforms.Resize((128,64)),
+    torchvision.transforms.ToTensor(),
+    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+])
+queryloader = torch.utils.data.DataLoader(
+    torchvision.datasets.ImageFolder(query_dir, transform=transform),
+    batch_size=64, shuffle=False
+)
+galleryloader = torch.utils.data.DataLoader(
+    torchvision.datasets.ImageFolder(gallery_dir, transform=transform),
+    batch_size=64, shuffle=False
+)
+
+# net definition
+net = Net(reid=True)
+assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
+print('Loading from checkpoint/ckpt.t7')
+checkpoint = torch.load("./checkpoint/ckpt.t7")
+net_dict = checkpoint['net_dict']
+net.load_state_dict(net_dict, strict=False)
+net.eval()
+net.to(device)
+
+# compute features
+query_features = torch.tensor([]).float()
+query_labels = torch.tensor([]).long()
+gallery_features = torch.tensor([]).float()
+gallery_labels = torch.tensor([]).long()
+
+with torch.no_grad():
+    for idx,(inputs,labels) in enumerate(queryloader):
+        inputs = inputs.to(device)
+        features = net(inputs).cpu()
+        query_features = torch.cat((query_features, features), dim=0)
+        query_labels = torch.cat((query_labels, labels))
+
+    for idx,(inputs,labels) in enumerate(galleryloader):
+        inputs = inputs.to(device)
+        features = net(inputs).cpu()
+        gallery_features = torch.cat((gallery_features, features), dim=0)
+        gallery_labels = torch.cat((gallery_labels, labels))
+
+gallery_labels -= 2
+
+# save features
+features = {
+    "qf": query_features,
+    "ql": query_labels,
+    "gf": gallery_features,
+    "gl": gallery_labels
+}
+torch.save(features,"features.pth")
\ No newline at end of file
diff --git a/deep_sort/deep/train.jpg b/deep_sort/deep/train.jpg
new file mode 100644
index 0000000..3635a61
Binary files /dev/null and b/deep_sort/deep/train.jpg differ
diff --git a/deep_sort/deep/train.py b/deep_sort/deep/train.py
new file mode 100644
index 0000000..5322af9
--- /dev/null
+++ b/deep_sort/deep/train.py
@@ -0,0 +1,189 @@
+import argparse
+import os
+import time
+
+import numpy as np
+import matplotlib.pyplot as plt
+import torch
+import torch.backends.cudnn as cudnn
+import torchvision
+
+from model import Net
+
+parser = argparse.ArgumentParser(description="Train on market1501")
+parser.add_argument("--data-dir",default='/home/hncr/workspace/MOT_TRACKING/deep_sort_pytorch-master/deep_sort/deep/data',type=str)
+parser.add_argument("--no-cuda",action="store_true")
+parser.add_argument("--gpu-id",default=0,type=int)
+parser.add_argument("--lr",default=0.01, type=float)
+parser.add_argument("--interval",'-i',default=20,type=int)
+parser.add_argument('--resume', '-r',default=False,action='store_true')
+args = parser.parse_args()
+
+# device
+device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
+if torch.cuda.is_available() and not args.no_cuda:
+    cudnn.benchmark = True
+
+# data loading
+root = args.data_dir
+train_dir = os.path.join(root,"train/")
+test_dir = os.path.join(root,"test/")
+transform_train = torchvision.transforms.Compose([
+    torchvision.transforms.RandomCrop((128,64),padding=4),
+    torchvision.transforms.RandomHorizontalFlip(),
+    torchvision.transforms.ToTensor(),
+    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+])
+transform_test = torchvision.transforms.Compose([
+    torchvision.transforms.Resize((128,64)),
+    torchvision.transforms.ToTensor(),
+    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+])
+trainloader = torch.utils.data.DataLoader(
+    torchvision.datasets.ImageFolder(train_dir, transform=transform_train),
+    batch_size=64,shuffle=True
+)
+testloader = torch.utils.data.DataLoader(
+    torchvision.datasets.ImageFolder(test_dir, transform=transform_test),
+    batch_size=64,shuffle=True
+)
+num_classes = max(len(trainloader.dataset.classes), len(testloader.dataset.classes))
+
+# net definition
+start_epoch = 0
+net = Net(num_classes=num_classes)
+if args.resume:
+    assert os.path.isfile("/home/hncr/workspace/MOT_TRACKING/deep_sort_pytorch-master/checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
+    print('Loading from checkpoint/ckpt.t7')
+    checkpoint = torch.load("/home/hncr/workspace/MOT_TRACKING/deep_sort_pytorch-master/checkpoint/ckpt.t7")
+    # import ipdb; ipdb.set_trace()
+    net_dict = checkpoint['net_dict']
+    net.load_state_dict(net_dict)
+    best_acc = checkpoint['acc']
+    start_epoch = checkpoint['epoch']
+net.to(device)
+
+# loss and optimizer
+criterion = torch.nn.CrossEntropyLoss()
+optimizer = torch.optim.SGD(net.parameters(), args.lr, momentum=0.9, weight_decay=5e-4)
+best_acc = 0.
+
+# train function for each epoch
+def train(epoch):
+    print("\nEpoch : %d"%(epoch+1))
+    net.train()
+    training_loss = 0.
+    train_loss = 0.
+    correct = 0
+    total = 0
+    interval = args.interval
+    start = time.time()
+    for idx, (inputs, labels) in enumerate(trainloader):
+        # forward
+        inputs,labels = inputs.to(device),labels.to(device)
+        outputs = net(inputs)
+        loss = criterion(outputs, labels)
+
+        # backward
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        # accumurating
+        training_loss += loss.item()
+        train_loss += loss.item()
+        correct += outputs.max(dim=1)[1].eq(labels).sum().item()
+        total += labels.size(0)
+
+        # print 
+        if (idx+1)%interval == 0:
+            end = time.time()
+            print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(
+                100.*(idx+1)/len(trainloader), end-start, training_loss/interval, correct, total, 100.*correct/total
+            ))
+            training_loss = 0.
+            start = time.time()
+    
+    return train_loss/len(trainloader), 1.- correct/total
+
+def test(epoch):
+    global best_acc
+    net.eval()
+    test_loss = 0.
+    correct = 0
+    total = 0
+    start = time.time()
+    with torch.no_grad():
+        for idx, (inputs, labels) in enumerate(testloader):
+            inputs, labels = inputs.to(device), labels.to(device)
+            outputs = net(inputs)
+            loss = criterion(outputs, labels)
+
+            test_loss += loss.item()
+            correct += outputs.max(dim=1)[1].eq(labels).sum().item()
+            total += labels.size(0)
+        
+        print("Testing ...")
+        end = time.time()
+        print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(
+                100.*(idx+1)/len(testloader), end-start, test_loss/len(testloader), correct, total, 100.*correct/total
+            ))
+
+    # saving checkpoint
+    acc = 100.*correct/total
+    if acc > best_acc:
+        best_acc = acc
+        print("Saving parameters to checkpoint/ckpt.t7")
+        checkpoint = {
+            'net_dict':net.state_dict(),
+            'acc':acc,
+            'epoch':epoch,
+        }
+        if not os.path.isdir('checkpoint'):
+            os.mkdir('checkpoint')
+        torch.save(checkpoint, './checkpoint/ckpt.t7')
+
+    return test_loss/len(testloader), 1.- correct/total
+
+# plot figure
+x_epoch = []
+record = {'train_loss':[], 'train_err':[], 'test_loss':[], 'test_err':[]}
+fig = plt.figure()
+ax0 = fig.add_subplot(121, title="loss")
+ax1 = fig.add_subplot(122, title="top1err")
+def draw_curve(epoch, train_loss, train_err, test_loss, test_err):
+    global record
+    record['train_loss'].append(train_loss)
+    record['train_err'].append(train_err)
+    record['test_loss'].append(test_loss)
+    record['test_err'].append(test_err)
+
+    x_epoch.append(epoch)
+    ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train')
+    ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val')
+    ax1.plot(x_epoch, record['train_err'], 'bo-', label='train')
+    ax1.plot(x_epoch, record['test_err'], 'ro-', label='val')
+    if epoch == 0:
+        ax0.legend()
+        ax1.legend()
+    fig.savefig("train.jpg")
+
+# lr decay
+def lr_decay():
+    global optimizer
+    for params in optimizer.param_groups:
+        params['lr'] *= 0.1
+        lr = params['lr']
+        print("Learning rate adjusted to {}".format(lr))
+
+def main():
+    for epoch in range(start_epoch, start_epoch+70):
+        train_loss, train_err = train(epoch)
+        test_loss, test_err = test(epoch)
+        draw_curve(epoch, train_loss, train_err, test_loss, test_err)
+        if (epoch+1)%20==0:
+            lr_decay()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/deep_sort/deep_sort.py b/deep_sort/deep_sort.py
new file mode 100644
index 0000000..e5476f6
--- /dev/null
+++ b/deep_sort/deep_sort.py
@@ -0,0 +1,118 @@
+import numpy as np
+import torch
+
+from deep_sort.deep.feature_extractor import Extractor
+from deep_sort.sort.nn_matching import NearestNeighborDistanceMetric
+from deep_sort.sort.preprocessing import non_max_suppression
+from deep_sort.sort.detection import Detection
+from deep_sort.sort.tracker import Tracker
+
+
+__all__ = ['DeepSort']
+
+
+class DeepSort(object):
+    def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, use_cuda=True,num_class = 751):
+        self.min_confidence = min_confidence
+        self.nms_max_overlap = nms_max_overlap
+
+        self.extractor = Extractor(model_path, use_cuda=use_cuda,num_class=num_class)
+
+        max_cosine_distance = max_dist
+        nn_budget = 100
+        metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
+        self.tracker = Tracker(metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init)
+
+    def update(self, bbox_xywh, confidences, ori_img,count):
+        self.height, self.width = ori_img.shape[:2]
+        # generate detections
+        features = self._get_features(bbox_xywh, ori_img)##reid 得到512的 行人embamding
+        bbox_tlwh = self._xywh_to_tlwh(bbox_xywh)  ##  xywh 转成 左上角 wh  
+        detections = [Detection(bbox_tlwh[i], conf, features[i]) for i,conf in enumerate(confidences) if conf>self.min_confidence]
+
+        # run on non-maximum supression
+        boxes = np.array([d.tlwh for d in detections])
+        scores = np.array([d.confidence for d in detections])
+        indices = non_max_suppression(boxes, self.nms_max_overlap, scores)
+        detections = [detections[i] for i in indices]
+
+        # update tracker
+        self.tracker.predict()
+        self.tracker.update(detections)
+
+        # output bbox identities
+        outputs = []
+        detection_id = len(bbox_xywh)
+
+        for track in self.tracker.tracks:
+            if not track.is_confirmed() or track.time_since_update > 1:
+                continue
+            box = track.to_tlwh()
+            x1,y1,x2,y2 = self._tlwh_to_xyxy(box)
+            track_id = track.track_id
+            count.append(int(track_id))
+            outputs.append(np.array([x1,y1,x2,y2,track_id], dtype=np.int))
+        if len(outputs) > 0:
+            outputs = np.stack(outputs,axis=0)
+        return outputs,count,detection_id
+
+
+    """
+    TODO:
+        Convert bbox from xc_yc_w_h to xtl_ytl_w_h
+    Thanks JieChen91@github.com for reporting this bug!
+    """
+    @staticmethod ##该方法不强制要求传递参数，可以不需要实例化：
+    def _xywh_to_tlwh(bbox_xywh):
+        if isinstance(bbox_xywh, np.ndarray):
+            bbox_tlwh = bbox_xywh.copy()
+        elif isinstance(bbox_xywh, torch.Tensor):
+            bbox_tlwh = bbox_xywh.clone()
+        bbox_tlwh[:,0] = bbox_xywh[:,0] - bbox_xywh[:,2]/2.
+        bbox_tlwh[:,1] = bbox_xywh[:,1] - bbox_xywh[:,3]/2.
+        return bbox_tlwh
+
+
+    def _xywh_to_xyxy(self, bbox_xywh):
+        x,y,w,h = bbox_xywh
+        x1 = max(int(x-w/2),0)
+        x2 = min(int(x+w/2),self.width-1)
+        y1 = max(int(y-h/2),0)
+        y2 = min(int(y+h/2),self.height-1)
+        return x1,y1,x2,y2
+
+    def _tlwh_to_xyxy(self, bbox_tlwh):
+        """
+        TODO:
+            Convert bbox from xtl_ytl_w_h to xc_yc_w_h
+        Thanks JieChen91@github.com for reporting this bug!
+        """
+        x,y,w,h = bbox_tlwh
+        x1 = max(int(x),0)
+        x2 = min(int(x+w),self.width-1)
+        y1 = max(int(y),0)
+        y2 = min(int(y+h),self.height-1)
+        return x1,y1,x2,y2
+
+    def _xyxy_to_tlwh(self, bbox_xyxy):
+        x1,y1,x2,y2 = bbox_xyxy
+
+        t = x1
+        l = y1
+        w = int(x2-x1)
+        h = int(y2-y1)
+        return t,l,w,h
+    
+    def _get_features(self, bbox_xywh, ori_img):
+        im_crops = []
+        for box in bbox_xywh:
+            x1,y1,x2,y2 = self._xywh_to_xyxy(box)
+            im = ori_img[y1:y2,x1:x2]
+            im_crops.append(im)
+        if im_crops:
+            features = self.extractor(im_crops)
+        else:
+            features = np.array([])
+        return features
+
+
diff --git a/deep_sort/sort/__init__.py b/deep_sort/sort/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/deep_sort/sort/detection.py b/deep_sort/sort/detection.py
new file mode 100644
index 0000000..87fc5fd
--- /dev/null
+++ b/deep_sort/sort/detection.py
@@ -0,0 +1,49 @@
+# vim: expandtab:ts=4:sw=4
+import numpy as np
+
+
+class Detection(object):
+    """
+    This class represents a bounding box detection in a single image.
+
+    Parameters
+    ----------
+    tlwh : array_like
+        Bounding box in format `(x, y, w, h)`.
+    confidence : float
+        Detector confidence score.
+    feature : array_like
+        A feature vector that describes the object contained in this image.
+
+    Attributes
+    ----------
+    tlwh : ndarray
+        Bounding box in format `(top left x, top left y, width, height)`.
+    confidence : ndarray
+        Detector confidence score.
+    feature : ndarray | NoneType
+        A feature vector that describes the object contained in this image.
+
+    """
+
+    def __init__(self, tlwh, confidence, feature):
+        self.tlwh = np.asarray(tlwh, dtype=np.float)
+        self.confidence = float(confidence)
+        self.feature = np.asarray(feature, dtype=np.float32)
+
+    def to_tlbr(self):
+        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
+        `(top left, bottom right)`.
+        """
+        ret = self.tlwh.copy()
+        ret[2:] += ret[:2]
+        return ret
+
+    def to_xyah(self):
+        """Convert bounding box to format `(center x, center y, aspect ratio,
+        height)`, where the aspect ratio is `width / height`.
+        """
+        ret = self.tlwh.copy()
+        ret[:2] += ret[2:] / 2
+        ret[2] /= ret[3]
+        return ret
diff --git a/deep_sort/sort/iou_matching.py b/deep_sort/sort/iou_matching.py
new file mode 100644
index 0000000..481e930
--- /dev/null
+++ b/deep_sort/sort/iou_matching.py
@@ -0,0 +1,81 @@
+# vim: expandtab:ts=4:sw=4
+from __future__ import absolute_import
+import numpy as np
+from . import linear_assignment
+
+
+def iou(bbox, candidates):
+    """Computer intersection over union.
+
+    Parameters
+    ----------
+    bbox : ndarray
+        A bounding box in format `(top left x, top left y, width, height)`.
+    candidates : ndarray
+        A matrix of candidate bounding boxes (one per row) in the same format
+        as `bbox`.
+
+    Returns
+    -------
+    ndarray
+        The intersection over union in [0, 1] between the `bbox` and each
+        candidate. A higher score means a larger fraction of the `bbox` is
+        occluded by the candidate.
+
+    """
+    bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
+    candidates_tl = candidates[:, :2]
+    candidates_br = candidates[:, :2] + candidates[:, 2:]
+
+    tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
+               np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
+    br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
+               np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
+    wh = np.maximum(0., br - tl)
+
+    area_intersection = wh.prod(axis=1)
+    area_bbox = bbox[2:].prod()
+    area_candidates = candidates[:, 2:].prod(axis=1)
+    return area_intersection / (area_bbox + area_candidates - area_intersection)
+
+
+def iou_cost(tracks, detections, track_indices=None,
+             detection_indices=None):
+    """An intersection over union distance metric.
+
+    Parameters
+    ----------
+    tracks : List[deep_sort.track.Track]
+        A list of tracks.
+    detections : List[deep_sort.detection.Detection]
+        A list of detections.
+    track_indices : Optional[List[int]]
+        A list of indices to tracks that should be matched. Defaults to
+        all `tracks`.
+    detection_indices : Optional[List[int]]
+        A list of indices to detections that should be matched. Defaults
+        to all `detections`.
+
+    Returns
+    -------
+    ndarray
+        Returns a cost matrix of shape
+        len(track_indices), len(detection_indices) where entry (i, j) is
+        `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
+
+    """
+    if track_indices is None:
+        track_indices = np.arange(len(tracks))
+    if detection_indices is None:
+        detection_indices = np.arange(len(detections))
+
+    cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
+    for row, track_idx in enumerate(track_indices):
+        if tracks[track_idx].time_since_update > 1:
+            cost_matrix[row, :] = linear_assignment.INFTY_COST
+            continue
+
+        bbox = tracks[track_idx].to_tlwh()
+        candidates = np.asarray([detections[i].tlwh for i in detection_indices])
+        cost_matrix[row, :] = 1. - iou(bbox, candidates)
+    return cost_matrix
diff --git a/deep_sort/sort/kalman_filter.py b/deep_sort/sort/kalman_filter.py
new file mode 100644
index 0000000..1e50a7c
--- /dev/null
+++ b/deep_sort/sort/kalman_filter.py
@@ -0,0 +1,241 @@
+# vim: expandtab:ts=4:sw=4
+import numpy as np
+import scipy.linalg
+
+
+"""
+Table for the 0.95 quantile of the chi-square distribution with N degrees of
+freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
+function and used as Mahalanobis gating threshold.
+"""
+chi2inv95 = {
+    1: 3.8415,
+    2: 5.9915,
+    3: 7.8147,
+    4: 9.4877,
+    5: 11.070,
+    6: 12.592,
+    7: 14.067,
+    8: 15.507,
+    9: 16.919}
+
+
+class KalmanFilter(object):
+    """
+    A simple Kalman filter for tracking bounding boxes in image space.
+
+    The 8-dimensional state space
+
+        x, y, a, h, vx, vy, va, vh
+
+    contains the bounding box center position (x, y), aspect ratio a, height h,
+    and their respective velocities.
+
+    Object motion follows a constant velocity model. The bounding box location
+    (x, y, a, h) is taken as direct observation of the state space (linear
+    observation model).
+
+    """
+
+    def __init__(self):
+        ndim, dt = 4, 1.
+
+        # Create Kalman filter model matrices.
+        self._motion_mat = np.eye(2 * ndim, 2 * ndim)
+        for i in range(ndim):
+            self._motion_mat[i, ndim + i] = dt
+        self._update_mat = np.eye(ndim, 2 * ndim)
+
+        # Motion and observation uncertainty are chosen relative to the current
+        # state estimate. These weights control the amount of uncertainty in
+        # the model. This is a bit hacky.
+        self._std_weight_position = 1. / 20
+        self._std_weight_velocity = 1. / 160
+
+    def initiate(self, measurement):
+        """Create track from unassociated measurement.
+
+        Parameters
+        ----------
+        measurement : ndarray
+            Bounding box coordinates (x, y, a, h) with center position (x, y),
+            aspect ratio a, and height h.
+
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the mean vector (8 dimensional) and covariance matrix (8x8
+            dimensional) of the new track. Unobserved velocities are initialized
+            to 0 mean.
+
+        """
+        mean_pos = measurement
+        mean_vel = np.zeros_like(mean_pos)
+        mean = np.r_[mean_pos, mean_vel]
+
+        std = [
+            2 * self._std_weight_position * measurement[3],
+            2 * self._std_weight_position * measurement[3],
+            1e-2,
+            2 * self._std_weight_position * measurement[3],
+            10 * self._std_weight_velocity * measurement[3],
+            10 * self._std_weight_velocity * measurement[3],
+            1e-5,
+            10 * self._std_weight_velocity * measurement[3]]
+        covariance = np.diag(np.square(std))
+        return mean, covariance
+
+    def predict(self, mean, covariance):
+        """Run Kalman filter prediction step.
+
+        Parameters
+        ----------
+        mean : ndarray
+            The 8 dimensional mean vector of the object state at the previous
+            time step.
+        covariance : ndarray
+            The 8x8 dimensional covariance matrix of the object state at the
+            previous time step.
+
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the mean vector and covariance matrix of the predicted
+            state. Unobserved velocities are initialized to 0 mean.
+
+        """
+        std_pos = [
+            self._std_weight_position * mean[3],
+            self._std_weight_position * mean[3],
+            1e-2,
+            self._std_weight_position * mean[3]]
+        std_vel = [
+            self._std_weight_velocity * mean[3],
+            self._std_weight_velocity * mean[3],
+            1e-5,
+            self._std_weight_velocity * mean[3]]
+        # np.r_ 按列连接两个矩阵
+        # 初始化噪声矩阵 Q 对角矩阵
+        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
+        ## x' = Fx
+        mean = np.dot(self._motion_mat, mean)
+        # P' = FPF^T+Q
+        covariance = np.linalg.multi_dot((
+            self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
+
+        return mean, covariance
+
+    def project(self, mean, covariance):
+        """Project state distribution to measurement space.
+
+        Parameters
+        ----------
+        mean : ndarray
+            The state's mean vector (8 dimensional array).
+        covariance : ndarray
+            The state's covariance matrix (8x8 dimensional).
+
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the projected mean and covariance matrix of the given state
+            estimate.
+
+        """
+        std = [
+            self._std_weight_position * mean[3],
+            self._std_weight_position * mean[3],
+            1e-1,
+            self._std_weight_position * mean[3]]
+        innovation_cov = np.diag(np.square(std))
+        # 将均值向量映射到检测空间,即 Hx'
+        mean = np.dot(self._update_mat, mean)
+        covariance = np.linalg.multi_dot((
+            self._update_mat, covariance, self._update_mat.T))
+        return mean, covariance + innovation_cov
+
+    def update(self, mean, covariance, measurement):
+        """Run Kalman filter correction step.
+
+        Parameters
+        ----------
+        mean : ndarray
+            The predicted state's mean vector (8 dimensional).
+        covariance : ndarray
+            The state's covariance matrix (8x8 dimensional).
+        measurement : ndarray
+            The 4 dimensional measurement vector (x, y, a, h), where (x, y)
+            is the center position, a the aspect ratio, and h the height of the
+            bounding box.
+
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the measurement-corrected state distribution.
+
+        """
+        #y = z − Hx ′
+        #S = HP ′ H T + R
+        #K = P ′ H T S −1
+        #x = x ′ + Ky
+        #P = (I − KH)P ′
+        # 将均值和协方差映射到检测空间,得到 Hx' 和 S
+        projected_mean, projected_cov = self.project(mean, covariance)
+        # 矩阵分解
+        chol_factor, lower = scipy.linalg.cho_factor(
+            projected_cov, lower=True, check_finite=False)
+        # 计算卡尔曼增益 K
+        kalman_gain = scipy.linalg.cho_solve(
+            (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
+            check_finite=False).T
+        # z - Hx'
+        innovation = measurement - projected_mean
+        #x = x ′ + Ky
+        new_mean = mean + np.dot(innovation, kalman_gain.T)
+        #P = (I − KH)P ′ ？？？？
+        new_covariance = covariance - np.linalg.multi_dot((
+            kalman_gain, projected_cov, kalman_gain.T))
+        return new_mean, new_covariance
+
+    def gating_distance(self, mean, covariance, measurements,
+                        only_position=False):
+        """Compute gating distance between state distribution and measurements.
+
+        A suitable distance threshold can be obtained from `chi2inv95`. If
+        `only_position` is False, the chi-square distribution has 4 degrees of
+        freedom, otherwise 2.
+
+        Parameters
+        ----------
+        mean : ndarray
+            Mean vector over the state distribution (8 dimensional).
+        covariance : ndarray
+            Covariance of the state distribution (8x8 dimensional).
+        measurements : ndarray
+            An Nx4 dimensional matrix of N measurements, each in
+            format (x, y, a, h) where (x, y) is the bounding box center
+            position, a the aspect ratio, and h the height.
+        only_position : Optional[bool]
+            If True, distance computation is done with respect to the bounding
+            box center position only.
+
+        Returns
+        -------
+        ndarray
+            Returns an array of length N, where the i-th element contains the
+            squared Mahalanobis distance between (mean, covariance) and
+            `measurements[i]`.
+
+        """
+        mean, covariance = self.project(mean, covariance)
+        if only_position:
+            mean, covariance = mean[:2], covariance[:2, :2]
+            measurements = measurements[:, :2]
+
+        cholesky_factor = np.linalg.cholesky(covariance)
+        d = measurements - mean
+        z = scipy.linalg.solve_triangular(
+            cholesky_factor, d.T, lower=True, check_finite=False,
+            overwrite_b=True)
+        squared_maha = np.sum(z * z, axis=0)
+        return squared_maha
diff --git a/deep_sort/sort/linear_assignment.py b/deep_sort/sort/linear_assignment.py
new file mode 100644
index 0000000..f1238dc
--- /dev/null
+++ b/deep_sort/sort/linear_assignment.py
@@ -0,0 +1,193 @@
+# vim: expandtab:ts=4:sw=4
+from __future__ import absolute_import
+import numpy as np
+# from sklearn.utils.linear_assignment_ import linear_assignment
+from scipy.optimize import linear_sum_assignment as linear_assignment
+from . import kalman_filter
+
+
+INFTY_COST = 1e+5
+
+
+def min_cost_matching(
+        distance_metric, max_distance, tracks, detections, track_indices=None,
+        detection_indices=None):
+    """Solve linear assignment problem.
+
+    Parameters
+    ----------
+    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
+        The distance metric is given a list of tracks and detections as well as
+        a list of N track indices and M detection indices. The metric should
+        return the NxM dimensional cost matrix, where element (i, j) is the
+        association cost between the i-th track in the given track indices and
+        the j-th detection in the given detection_indices.
+    max_distance : float
+        Gating threshold. Associations with cost larger than this value are
+        disregarded.
+    tracks : List[track.Track]
+        A list of predicted tracks at the current time step.
+    detections : List[detection.Detection]
+        A list of detections at the current time step.
+    track_indices : List[int]
+        List of track indices that maps rows in `cost_matrix` to tracks in
+        `tracks` (see description above).
+    detection_indices : List[int]
+        List of detection indices that maps columns in `cost_matrix` to
+        detections in `detections` (see description above).
+
+    Returns
+    -------
+    (List[(int, int)], List[int], List[int])
+        Returns a tuple with the following three entries:
+        * A list of matched track and detection indices.
+        * A list of unmatched track indices.
+        * A list of unmatched detection indices.
+
+    """
+    if track_indices is None:
+        track_indices = np.arange(len(tracks))
+    if detection_indices is None:
+        detection_indices = np.arange(len(detections))
+
+    if len(detection_indices) == 0 or len(track_indices) == 0:
+        return [], track_indices, detection_indices  # Nothing to match.
+
+    cost_matrix = distance_metric(
+        tracks, detections, track_indices, detection_indices)
+    cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
+
+    ##匈牙利或者KM匹配
+    row_indices, col_indices = linear_assignment(cost_matrix)
+
+    matches, unmatched_tracks, unmatched_detections = [], [], []
+    for col, detection_idx in enumerate(detection_indices):
+        if col not in col_indices:
+            unmatched_detections.append(detection_idx)
+    for row, track_idx in enumerate(track_indices):
+        if row not in row_indices:
+            unmatched_tracks.append(track_idx)
+    for row, col in zip(row_indices, col_indices):
+        track_idx = track_indices[row]
+        detection_idx = detection_indices[col]
+        if cost_matrix[row, col] > max_distance:
+            unmatched_tracks.append(track_idx)
+            unmatched_detections.append(detection_idx)
+        else:
+            matches.append((track_idx, detection_idx))
+    return matches, unmatched_tracks, unmatched_detections
+
+
+def matching_cascade(
+        distance_metric, max_distance, cascade_depth, tracks, detections,
+        track_indices=None, detection_indices=None):
+    """Run matching cascade.
+
+    Parameters
+    ----------
+    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
+        The distance metric is given a list of tracks and detections as well as
+        a list of N track indices and M detection indices. The metric should
+        return the NxM dimensional cost matrix, where element (i, j) is the
+        association cost between the i-th track in the given track indices and
+        the j-th detection in the given detection indices.
+    max_distance : float
+        Gating threshold. Associations with cost larger than this value are
+        disregarded.
+    cascade_depth: int
+        The cascade depth, should be se to the maximum track age.
+    tracks : List[track.Track]
+        A list of predicted tracks at the current time step.
+    detections : List[detection.Detection]
+        A list of detections at the current time step.
+    track_indices : Optional[List[int]]
+        List of track indices that maps rows in `cost_matrix` to tracks in
+        `tracks` (see description above). Defaults to all tracks.
+    detection_indices : Optional[List[int]]
+        List of detection indices that maps columns in `cost_matrix` to
+        detections in `detections` (see description above). Defaults to all
+        detections.
+
+    Returns
+    -------
+    (List[(int, int)], List[int], List[int])
+        Returns a tuple with the following three entries:
+        * A list of matched track and detection indices.
+        * A list of unmatched track indices.
+        * A list of unmatched detection indices.
+
+    """
+    if track_indices is None:
+        track_indices = list(range(len(tracks)))
+    if detection_indices is None:
+        detection_indices = list(range(len(detections)))
+
+    unmatched_detections = detection_indices
+    matches = []
+    for level in range(cascade_depth):
+        if len(unmatched_detections) == 0:  # No detections left
+            break
+
+        track_indices_l = [
+            k for k in track_indices
+            if tracks[k].time_since_update == 1 + level    #按照匹配先后顺序进行匹配，首先是level=0，因为每次update  time_since_update归0
+        ]
+        if len(track_indices_l) == 0:  # Nothing to match at this level  不同level  保留的track_indices_l不同
+            continue
+
+        matches_l, _, unmatched_detections = \
+            min_cost_matching(
+                distance_metric, max_distance, tracks, detections,
+                track_indices_l, unmatched_detections)
+        matches += matches_l
+    unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
+    return matches, unmatched_tracks, unmatched_detections
+
+
+def gate_cost_matrix(
+        kf, cost_matrix, tracks, detections, track_indices, detection_indices,
+        gated_cost=INFTY_COST, only_position=False):
+    """Invalidate infeasible entries in cost matrix based on the state
+    distributions obtained by Kalman filtering.
+
+    Parameters
+    ----------
+    kf : The Kalman filter.
+    cost_matrix : ndarray
+        The NxM dimensional cost matrix, where N is the number of track indices
+        and M is the number of detection indices, such that entry (i, j) is the
+        association cost between `tracks[track_indices[i]]` and
+        `detections[detection_indices[j]]`.
+    tracks : List[track.Track]
+        A list of predicted tracks at the current time step.
+    detections : List[detection.Detection]
+        A list of detections at the current time step.
+    track_indices : List[int]
+        List of track indices that maps rows in `cost_matrix` to tracks in
+        `tracks` (see description above).
+    detection_indices : List[int]
+        List of detection indices that maps columns in `cost_matrix` to
+        detections in `detections` (see description above).
+    gated_cost : Optional[float]
+        Entries in the cost matrix corresponding to infeasible associations are
+        set this value. Defaults to a very large value.
+    only_position : Optional[bool]
+        If True, only the x, y position of the state distribution is considered
+        during gating. Defaults to False.
+
+    Returns
+    -------
+    ndarray
+        Returns the modified cost matrix.
+
+    """
+    gating_dim = 2 if only_position else 4
+    gating_threshold = kalman_filter.chi2inv95[gating_dim]
+    measurements = np.asarray(
+        [detections[i].to_xyah() for i in detection_indices])
+    for row, track_idx in enumerate(track_indices):
+        track = tracks[track_idx]
+        gating_distance = kf.gating_distance(
+            track.mean, track.covariance, measurements, only_position)
+        cost_matrix[row, gating_distance > gating_threshold] = gated_cost
+    return cost_matrix
diff --git a/deep_sort/sort/nn_matching.py b/deep_sort/sort/nn_matching.py
new file mode 100644
index 0000000..6c7d68c
--- /dev/null
+++ b/deep_sort/sort/nn_matching.py
@@ -0,0 +1,178 @@
+# vim: expandtab:ts=4:sw=4
+import numpy as np
+
+
+def _pdist(a, b):
+    """Compute pair-wise squared distance between points in `a` and `b`.
+
+    Parameters
+    ----------
+    a : array_like
+        An NxM matrix of N samples of dimensionality M.
+    b : array_like
+        An LxM matrix of L samples of dimensionality M.
+
+    Returns
+    -------
+    ndarray
+        Returns a matrix of size len(a), len(b) such that eleement (i, j)
+        contains the squared distance between `a[i]` and `b[j]`.
+
+    """
+    a, b = np.asarray(a), np.asarray(b)
+    if len(a) == 0 or len(b) == 0:
+        return np.zeros((len(a), len(b)))
+    a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
+    r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
+    r2 = np.clip(r2, 0., float(np.inf))
+    return r2
+
+
+def _cosine_distance(a, b, data_is_normalized=False):
+    """Compute pair-wise cosine distance between points in `a` and `b`.
+
+    Parameters
+    ----------
+    a : array_like
+        An NxM matrix of N samples of dimensionality M.
+    b : array_like
+        An LxM matrix of L samples of dimensionality M.
+    data_is_normalized : Optional[bool]
+        If True, assumes rows in a and b are unit length vectors.
+        Otherwise, a and b are explicitly normalized to lenght 1.
+
+    Returns
+    -------
+    ndarray
+        Returns a matrix of size len(a), len(b) such that eleement (i, j)
+        contains the squared distance between `a[i]` and `b[j]`.
+
+    """
+    if not data_is_normalized:
+        a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)   ##求取二范数
+        b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
+    return 1. - np.dot(a, b.T)
+
+
+def _nn_euclidean_distance(x, y):
+    """ Helper function for nearest neighbor distance metric (Euclidean).
+
+    Parameters
+    ----------
+    x : ndarray
+        A matrix of N row-vectors (sample points).
+    y : ndarray
+        A matrix of M row-vectors (query points).
+
+    Returns
+    -------
+    ndarray
+        A vector of length M that contains for each entry in `y` the
+        smallest Euclidean distance to a sample in `x`.
+
+    """
+    distances = _pdist(x, y)
+    return np.maximum(0.0, distances.min(axis=0))
+
+
+def _nn_cosine_distance(x, y):
+    """ Helper function for nearest neighbor distance metric (cosine).
+
+    Parameters
+    ----------
+    x : ndarray
+        A matrix of N row-vectors (sample points).
+    y : ndarray
+        A matrix of M row-vectors (query points).
+
+    Returns
+    -------
+    ndarray
+        A vector of length M that contains for each entry in `y` the
+        smallest cosine distance to a sample in `x`.
+
+    """
+    distances = _cosine_distance(x, y)
+    #smallest cosine distance to a sample in `x`  [1 , ...]
+    return distances.min(axis=0)
+
+
+class NearestNeighborDistanceMetric(object):
+    """
+    A nearest neighbor distance metric that, for each target, returns
+    the closest distance to any sample that has been observed so far.
+
+    Parameters
+    ----------
+    metric : str
+        Either "euclidean" or "cosine".
+    matching_threshold: float
+        The matching threshold. Samples with larger distance are considered an
+        invalid match.
+    budget : Optional[int]
+        If not None, fix samples per class to at most this number. Removes
+        the oldest samples when the budget is reached.
+
+    Attributes
+    ----------
+    samples : Dict[int -> List[ndarray]]
+        A dictionary that maps from target identities to the list of samples
+        that have been observed so far.
+
+    """
+
+    def __init__(self, metric, matching_threshold, budget=None):
+
+
+        if metric == "euclidean":
+            self._metric = _nn_euclidean_distance
+        elif metric == "cosine":
+            self._metric = _nn_cosine_distance
+        else:
+            raise ValueError(
+                "Invalid metric; must be either 'euclidean' or 'cosine'")
+        self.matching_threshold = matching_threshold
+        self.budget = budget
+        self.samples = {}
+
+    def partial_fit(self, features, targets, active_targets):
+        """Update the distance metric with new data.
+
+        Parameters
+        ----------
+        features : ndarray
+            An NxM matrix of N features of dimensionality M.
+        targets : ndarray
+            An integer array of associated target identities.
+        active_targets : List[int]
+            A list of targets that are currently present in the scene.
+
+        """
+        for feature, target in zip(features, targets):
+            self.samples.setdefault(target, []).append(feature)
+            if self.budget is not None:
+                self.samples[target] = self.samples[target][-self.budget:]  # 取前100个features
+        self.samples = {k: self.samples[k] for k in active_targets}
+
+    def distance(self, features, targets):
+        """Compute distance between features and targets.
+
+        Parameters
+        ----------
+        features : ndarray
+            An NxM matrix of N features of dimensionality M.
+        targets : List[int]gated_metric
+            A list of targets to match the given `features` against.
+
+        Returns
+        -------
+        ndarray
+            Returns a cost matrix of shape len(targets), len(features), where
+            element (i, j) contains the closest squared distance between
+            `targets[i]` and `features[j]`.
+
+        """
+        cost_matrix = np.zeros((len(targets), len(features)))
+        for i, target in enumerate(targets):
+            cost_matrix[i, :] = self._metric(self.samples[target], features)
+        return cost_matrix
diff --git a/deep_sort/sort/preprocessing.py b/deep_sort/sort/preprocessing.py
new file mode 100644
index 0000000..de31eea
--- /dev/null
+++ b/deep_sort/sort/preprocessing.py
@@ -0,0 +1,73 @@
+# vim: expandtab:ts=4:sw=4
+import numpy as np
+import cv2
+
+
+def non_max_suppression(boxes, max_bbox_overlap, scores=None):
+    """Suppress overlapping detections.
+
+    Original code from [1]_ has been adapted to include confidence score.
+
+    .. [1] http://www.pyimagesearch.com/2015/02/16/
+           faster-non-maximum-suppression-python/
+
+    Examples
+    --------
+
+        >>> boxes = [d.roi for d in detections]
+        >>> scores = [d.confidence for d in detections]
+        >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
+        >>> detections = [detections[i] for i in indices]
+
+    Parameters
+    ----------
+    boxes : ndarray
+        Array of ROIs (x, y, width, height).
+    max_bbox_overlap : float
+        ROIs that overlap more than this values are suppressed.
+    scores : Optional[array_like]
+        Detector confidence score.
+
+    Returns
+    -------
+    List[int]
+        Returns indices of detections that have survived non-maxima suppression.
+
+    """
+    if len(boxes) == 0:
+        return []
+
+    boxes = boxes.astype(np.float)
+    pick = []
+
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2] + boxes[:, 0]
+    y2 = boxes[:, 3] + boxes[:, 1]
+
+    area = (x2 - x1 + 1) * (y2 - y1 + 1)
+    if scores is not None:
+        idxs = np.argsort(scores)
+    else:
+        idxs = np.argsort(y2)
+
+    while len(idxs) > 0:
+        last = len(idxs) - 1
+        i = idxs[last]
+        pick.append(i)
+
+        xx1 = np.maximum(x1[i], x1[idxs[:last]])
+        yy1 = np.maximum(y1[i], y1[idxs[:last]])
+        xx2 = np.minimum(x2[i], x2[idxs[:last]])
+        yy2 = np.minimum(y2[i], y2[idxs[:last]])
+
+        w = np.maximum(0, xx2 - xx1 + 1)
+        h = np.maximum(0, yy2 - yy1 + 1)
+
+        overlap = (w * h) / area[idxs[:last]]
+
+        idxs = np.delete(
+            idxs, np.concatenate(
+                ([last], np.where(overlap > max_bbox_overlap)[0])))
+
+    return pick
diff --git a/deep_sort/sort/track.py b/deep_sort/sort/track.py
new file mode 100644
index 0000000..550f9ca
--- /dev/null
+++ b/deep_sort/sort/track.py
@@ -0,0 +1,172 @@
+# vim: expandtab:ts=4:sw=4
+
+
+class TrackState:
+    """
+    Enumeration type for the single target track state. Newly created tracks are
+    classified as `tentative` until enough evidence has been collected. Then,
+    the track state is changed to `confirmed`. Tracks that are no longer alive
+    are classified as `deleted` to mark them for removal from the set of active
+    tracks.
+
+    """
+
+    Tentative = 1
+    Confirmed = 2
+    Deleted = 3
+
+
+class Track:
+    """
+    A single target track with state space `(x, y, a, h)` and associated
+    velocities, where `(x, y)` is the center of the bounding box, `a` is the
+    aspect ratio and `h` is the height.
+
+    Parameters
+    ----------
+    mean : ndarray
+        Mean vector of the initial state distribution.
+    covariance : ndarray
+        Covariance matrix of the initial state distribution.
+    track_id : int
+        A unique track identifier.
+    n_init : int
+        Number of consecutive detections before the track is confirmed. The
+        track state is set to `Deleted` if a miss occurs within the first
+        `n_init` frames.
+    max_age : int
+        The maximum number of consecutive misses before the track state is
+        set to `Deleted`.
+    feature : Optional[ndarray]
+        Feature vector of the detection this track originates from. If not None,
+        this feature is added to the `features` cache.
+
+    Attributes
+    ----------
+    mean : ndarray
+        Mean vector of the initial state distribution.
+    covariance : ndarray
+        Covariance matrix of the initial state distribution.
+    track_id : int
+        A unique track identifier.
+    hits : int
+        Total number of measurement updates.
+    age : int
+        Total number of frames since first occurance.
+    time_since_update : int
+        Total number of frames since last measurement update.
+    state : TrackState
+        The current track state.
+    features : List[ndarray]
+        A cache of features. On each measurement update, the associated feature
+        vector is added to this list.
+
+    """
+
+    def __init__(self, mean, covariance, track_id, n_init, max_age,
+                 feature=None):
+        self.mean = mean
+        self.covariance = covariance
+        self.track_id = track_id
+        # hits 和 n_init 进行比较
+        # hits 每次 update 的时候进行一次更新(只有 match 的时候才进行 update )
+        # hits 代表匹配上了多少次,匹配次数超过 n_init 就会设置为 confirmed 状态
+        self.hits = 1
+        self.age = 1 # 没有用到,和 time_since_update 功能重复
+        # 每次调用 predict 函数的时候就会 +1
+        # 每次调用 update 函数的时候就会设置为 0
+        self.time_since_update = 0
+
+        self.state = TrackState.Tentative        ##Newly created tracks areclassified as `tentative` until enough evidence has been collected.
+        # 每个 track 对应多个 features, 每次更新都将最新的 feature 添加到列表中
+        self.features = []
+        if feature is not None:
+            self.features.append(feature)
+
+        self._n_init = n_init
+        self._max_age = max_age
+
+    def to_tlwh(self):
+        """Get current position in bounding box format `(top left x, top left y,
+        width, height)`.
+
+        Returns
+        -------
+        ndarray
+            The bounding box.
+
+        """
+        ret = self.mean[:4].copy()
+        ret[2] *= ret[3]
+        ret[:2] -= ret[2:] / 2
+        return ret
+
+    def to_tlbr(self):
+        """Get current position in bounding box format `(min x, miny, max x,
+        max y)`.
+
+        Returns
+        -------
+        ndarray
+            The bounding box.
+
+        """
+        ret = self.to_tlwh()
+        ret[2:] = ret[:2] + ret[2:]
+        return ret
+
+    def predict(self, kf):
+        """Propagate the state distribution to the current time step using a
+        Kalman filter prediction step.
+
+        Parameters
+        ----------
+        kf : kalman_filter.KalmanFilter
+            The Kalman filter.
+
+        """
+        self.mean, self.covariance = kf.predict(self.mean, self.covariance)
+        self.age += 1
+        self.time_since_update += 1
+
+    def update(self, kf, detection):
+        """Perform Kalman filter measurement update step and update the feature
+        cache.
+
+        Parameters
+        ----------
+        kf : kalman_filter.KalmanFilter
+            The Kalman filter.
+        detection : Detection
+            The associated detection.
+
+        """
+        self.mean, self.covariance = kf.update(
+            self.mean, self.covariance, detection.to_xyah())
+        self.features.append(detection.feature)
+
+        self.hits += 1
+        self.time_since_update = 0
+        if self.state == TrackState.Tentative and self.hits >= self._n_init:
+            self.state = TrackState.Confirmed
+
+    def mark_missed(self):
+        """Mark this track as missed (no association at the current time step).
+        """
+        if self.state == TrackState.Tentative:  ##如果track 序列 没有被detction匹配过，则删除，这种概率很少！ 
+            self.state = TrackState.Deleted
+        elif self.time_since_update > self._max_age:   ##如果track 序列 以前被detction匹配过，但时间过长没有被匹配，则删除
+            self.state = TrackState.Deleted
+
+    def is_tentative(self):
+        """Returns True if this track is tentative (unconfirmed).
+        """
+        return self.state == TrackState.Tentative
+
+    def is_confirmed(self):
+        """Returns True if this track is confirmed."""
+        return self.state == TrackState.Confirmed
+
+    def is_deleted(self):
+        """Returns True if this track is dead and should be deleted."""
+        return self.state == TrackState.Deleted
diff --git a/deep_sort/sort/tracker.py b/deep_sort/sort/tracker.py
new file mode 100644
index 0000000..6eb0a82
--- /dev/null
+++ b/deep_sort/sort/tracker.py
@@ -0,0 +1,141 @@
+# vim: expandtab:ts=4:sw=4
+from __future__ import absolute_import
+import numpy as np
+from . import kalman_filter
+from . import linear_assignment
+from . import iou_matching
+from .track import Track
+
+
+class Tracker:
+    """
+    This is the multi-target tracker.
+
+    Parameters
+    ----------
+    metric : nn_matching.NearestNeighborDistanceMetric
+        A distance metric for measurement-to-track association.
+    max_age : int
+        Maximum number of missed misses before a track is deleted.
+    n_init : int
+        Number of consecutive detections before the track is confirmed. The
+        track state is set to `Deleted` if a miss occurs within the first
+        `n_init` frames.
+
+    Attributes
+    ----------
+    metric : nn_matching.NearestNeighborDistanceMetric
+        The distance metric used for measurement to track association.
+    max_age : int
+        Maximum number of missed misses before a track is deleted.
+    n_init : int
+        Number of frames that a track remains in initialization phase.
+    kf : kalman_filter.KalmanFilter
+        A Kalman filter to filter target trajectories in image space.
+    tracks : List[Track]
+        The list of active tracks at the current time step.
+
+    """
+
+    def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3):
+        # metric 是一个类,用于计算距离 ( 余弦距离或马氏距离 )
+        self.metric = metric
+        self.max_iou_distance = max_iou_distance
+        self.max_age = max_age
+        self.n_init = n_init
+
+        self.kf = kalman_filter.KalmanFilter()
+        self.tracks = []
+        self._next_id = 1
+
+    def predict(self):
+        """Propagate track state distributions one time step forward.
+
+        This function should be called once every time step, before `update`.
+        """
+        for track in self.tracks:
+            track.predict(self.kf)
+
+    def update(self, detections):
+        """Perform measurement update and track management.
+
+        Parameters
+        ----------
+        detections : List[deep_sort.detection.Detection]
+            A list of detections at the current time step.
+
+        """
+        # Run matching cascade.
+        matches, unmatched_tracks, unmatched_detections = \
+            self._match(detections)
+
+        # Update track set.
+        for track_idx, detection_idx in matches:
+            self.tracks[track_idx].update(
+                self.kf, detections[detection_idx])
+        for track_idx in unmatched_tracks:
+            self.tracks[track_idx].mark_missed()
+        for detection_idx in unmatched_detections:
+            self._initiate_track(detections[detection_idx])
+        self.tracks = [t for t in self.tracks if not t.is_deleted()]
+
+        # Update distance metric.
+        active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
+        features, targets = [], []
+        for track in self.tracks:
+            if not track.is_confirmed():
+                continue
+            features += track.features
+            targets += [track.track_id for _ in track.features]
+            track.features = []       ## 先将track.features置空，下次update时候，feature可以进来
+        self.metric.partial_fit(
+            np.asarray(features), np.asarray(targets), active_targets)
+
+    def _match(self, detections):
+
+        def gated_metric(tracks, dets, track_indices, detection_indices):
+            features = np.array([dets[i].feature for i in detection_indices])
+            targets = np.array([tracks[i].track_id for i in track_indices])
+            # 1. 通过最近邻计算出代价矩阵 cosine distance    每个框的embading进行cost_matrix计算，外观状态
+            cost_matrix = self.metric.distance(features, targets)
+            # 2. 计算马氏距离 , 得到新的状态矩阵，运动状态
+            cost_matrix = linear_assignment.gate_cost_matrix(
+                self.kf, cost_matrix, tracks, dets, track_indices,
+                detection_indices)
+
+            return cost_matrix
+
+        # Split track set into confirmed and unconfirmed tracks.
+        confirmed_tracks = [
+            i for i, t in enumerate(self.tracks) if t.is_confirmed()]
+        unconfirmed_tracks = [
+            i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
+
+        # Associate confirmed tracks using appearance features.  这里的gated_metric不带括号直接引用函数本身，在min里面实现
+        matches_a, unmatched_tracks_a, unmatched_detections = \
+            linear_assignment.matching_cascade(
+                gated_metric, self.metric.matching_threshold, self.max_age,
+                self.tracks, detections, confirmed_tracks)
+
+        # Associate remaining tracks together with unconfirmed tracks using IOU.
+        iou_track_candidates = unconfirmed_tracks + [
+               k for k in unmatched_tracks_a if
+            self.tracks[k].time_since_update == 1]
+        unmatched_tracks_a = [
+            k for k in unmatched_tracks_a if
+            self.tracks[k].time_since_update != 1]
+        matches_b, unmatched_tracks_b, unmatched_detections = \
+            linear_assignment.min_cost_matching(
+                iou_matching.iou_cost, self.max_iou_distance, self.tracks,
+                detections, iou_track_candidates, unmatched_detections)
+
+        matches = matches_a + matches_b
+        unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
+        return matches, unmatched_tracks, unmatched_detections
+
+    def _initiate_track(self, detection):
+        mean, covariance = self.kf.initiate(detection.to_xyah())
+        self.tracks.append(Track(
+            mean, covariance, self._next_id, self.n_init, self.max_age,
+            detection.feature))
+        self._next_id += 1
diff --git a/detector/YOLOv3/README.md b/detector/YOLOv3/README.md
new file mode 100644
index 0000000..ef8e168
--- /dev/null
+++ b/detector/YOLOv3/README.md
@@ -0,0 +1,11 @@
+# YOLOv3 for detection
+
+This is an implemention of YOLOv3 with only the forward part.
+
+If you want to train YOLOv3 on your custom dataset, please search `YOLOv3` on github.
+
+## Quick forward
+```bash
+cd YOLOv3
+python 
+```
\ No newline at end of file
diff --git a/detector/YOLOv3/__init__.py b/detector/YOLOv3/__init__.py
new file mode 100644
index 0000000..fff6a61
--- /dev/null
+++ b/detector/YOLOv3/__init__.py
@@ -0,0 +1,9 @@
+import sys
+sys.path.append("detector/YOLOv3")
+
+
+from .detector import YOLOv3
+__all__ = ['YOLOv3']
+
+
+
diff --git a/detector/YOLOv3/cfg.py b/detector/YOLOv3/cfg.py
new file mode 100644
index 0000000..9b2a0e7
--- /dev/null
+++ b/detector/YOLOv3/cfg.py
@@ -0,0 +1,248 @@
+import torch
+from .yolo_utils import convert2cpu
+
+
+def parse_cfg(cfgfile):
+    blocks = []
+    fp = open(cfgfile)
+    block = None
+    line = fp.readline()
+    while line != '':
+        line = line.rstrip()
+        if line == '' or line[0] == '#':
+            line = fp.readline()
+            continue
+        elif line[0] == '[':
+            if block:
+                blocks.append(block)
+            block = dict()
+            block['type'] = line.lstrip('[').rstrip(']')
+            # set default value
+            if block['type'] == 'convolutional':
+                block['batch_normalize'] = 0
+        else:
+            key, value = line.split('=')
+            key = key.strip()
+            if key == 'type':
+                key = '_type'
+            value = value.strip()
+            block[key] = value
+        line = fp.readline()
+
+    if block:
+        blocks.append(block)
+    fp.close()
+    return blocks
+
+
+def print_cfg(blocks):
+    print('layer     filters    size              input                output')
+    prev_width = 416
+    prev_height = 416
+    prev_filters = 3
+    out_filters = []
+    out_widths = []
+    out_heights = []
+    ind = -2
+    for block in blocks:
+        ind += 1
+        if block['type'] == 'net':
+            prev_width = int(block['width'])
+            prev_height = int(block['height'])
+            continue
+        elif block['type'] == 'convolutional':
+            filters = int(block['filters'])
+            kernel_size = int(block['size'])
+            stride = int(block['stride'])
+            is_pad = int(block['pad'])
+            pad = (kernel_size - 1) // 2 if is_pad else 0
+            width = (prev_width + 2 * pad - kernel_size) // stride + 1
+            height = (prev_height + 2 * pad - kernel_size) // stride + 1
+            print('%5d %-6s %4d  %d x %d / %d   %3d x %3d x%4d   ->   %3d x %3d x%4d' % (
+            ind, 'conv', filters, kernel_size, kernel_size, stride, prev_width, prev_height, prev_filters, width,
+            height, filters))
+            prev_width = width
+            prev_height = height
+            prev_filters = filters
+            out_widths.append(prev_width)
+            out_heights.append(prev_height)
+            out_filters.append(prev_filters)
+        elif block['type'] == 'maxpool':
+            pool_size = int(block['size'])
+            stride = int(block['stride'])
+            width = prev_width // stride
+            height = prev_height // stride
+            print('%5d %-6s       %d x %d / %d   %3d x %3d x%4d   ->   %3d x %3d x%4d' % (
+            ind, 'max', pool_size, pool_size, stride, prev_width, prev_height, prev_filters, width, height, filters))
+            prev_width = width
+            prev_height = height
+            prev_filters = filters
+            out_widths.append(prev_width)
+            out_heights.append(prev_height)
+            out_filters.append(prev_filters)
+        elif block['type'] == 'avgpool':
+            width = 1
+            height = 1
+            print('%5d %-6s                   %3d x %3d x%4d   ->  %3d' % (
+            ind, 'avg', prev_width, prev_height, prev_filters, prev_filters))
+            prev_width = width
+            prev_height = height
+            prev_filters = filters
+            out_widths.append(prev_width)
+            out_heights.append(prev_height)
+            out_filters.append(prev_filters)
+        elif block['type'] == 'softmax':
+            print('%5d %-6s                                    ->  %3d' % (ind, 'softmax', prev_filters))
+            out_widths.append(prev_width)
+            out_heights.append(prev_height)
+            out_filters.append(prev_filters)
+        elif block['type'] == 'cost':
+            print('%5d %-6s                                     ->  %3d' % (ind, 'cost', prev_filters))
+            out_widths.append(prev_width)
+            out_heights.append(prev_height)
+            out_filters.append(prev_filters)
+        elif block['type'] == 'reorg':
+            stride = int(block['stride'])
+            filters = stride * stride * prev_filters
+            width = prev_width // stride
+            height = prev_height // stride
+            print('%5d %-6s             / %d   %3d x %3d x%4d   ->   %3d x %3d x%4d' % (
+            ind, 'reorg', stride, prev_width, prev_height, prev_filters, width, height, filters))
+            prev_width = width
+            prev_height = height
+            prev_filters = filters
+            out_widths.append(prev_width)
+            out_heights.append(prev_height)
+            out_filters.append(prev_filters)
+        elif block['type'] == 'upsample':
+            stride = int(block['stride'])
+            filters = prev_filters
+            width = prev_width * stride
+            height = prev_height * stride
+            print('%5d %-6s           * %d   %3d x %3d x%4d   ->   %3d x %3d x%4d' % (
+            ind, 'upsample', stride, prev_width, prev_height, prev_filters, width, height, filters))
+            prev_width = width
+            prev_height = height
+            prev_filters = filters
+            out_widths.append(prev_width)
+            out_heights.append(prev_height)
+            out_filters.append(prev_filters)
+        elif block['type'] == 'route':
+            layers = block['layers'].split(',')
+            layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers]
+            if len(layers) == 1:
+                print('%5d %-6s %d' % (ind, 'route', layers[0]))
+                prev_width = out_widths[layers[0]]
+                prev_height = out_heights[layers[0]]
+                prev_filters = out_filters[layers[0]]
+            elif len(layers) == 2:
+                print('%5d %-6s %d %d' % (ind, 'route', layers[0], layers[1]))
+                prev_width = out_widths[layers[0]]
+                prev_height = out_heights[layers[0]]
+                assert (prev_width == out_widths[layers[1]])
+                assert (prev_height == out_heights[layers[1]])
+                prev_filters = out_filters[layers[0]] + out_filters[layers[1]]
+            out_widths.append(prev_width)
+            out_heights.append(prev_height)
+            out_filters.append(prev_filters)
+        elif block['type'] in ['region', 'yolo']:
+            print('%5d %-6s' % (ind, 'detection'))
+            out_widths.append(prev_width)
+            out_heights.append(prev_height)
+            out_filters.append(prev_filters)
+        elif block['type'] == 'shortcut':
+            from_id = int(block['from'])
+            from_id = from_id if from_id > 0 else from_id + ind
+            print('%5d %-6s %d' % (ind, 'shortcut', from_id))
+            prev_width = out_widths[from_id]
+            prev_height = out_heights[from_id]
+            prev_filters = out_filters[from_id]
+            out_widths.append(prev_width)
+            out_heights.append(prev_height)
+            out_filters.append(prev_filters)
+        elif block['type'] == 'connected':
+            filters = int(block['output'])
+            print('%5d %-6s                            %d  ->  %3d' % (ind, 'connected', prev_filters, filters))
+            prev_filters = filters
+            out_widths.append(1)
+            out_heights.append(1)
+            out_filters.append(prev_filters)
+        else:
+            print('unknown type %s' % (block['type']))
+
+
+def load_conv(buf, start, conv_model):
+    num_w = conv_model.weight.numel()
+    num_b = conv_model.bias.numel()
+    # print("start: {}, num_w: {}, num_b: {}".format(start, num_w, num_b))
+    # by ysyun, use .view_as()
+    conv_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]).view_as(conv_model.bias.data));
+    start = start + num_b
+    conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).view_as(conv_model.weight.data));
+    start = start + num_w
+    return start
+
+
+def save_conv(fp, conv_model):
+    if conv_model.bias.is_cuda:
+        convert2cpu(conv_model.bias.data).numpy().tofile(fp)
+        convert2cpu(conv_model.weight.data).numpy().tofile(fp)
+    else:
+        conv_model.bias.data.numpy().tofile(fp)
+        conv_model.weight.data.numpy().tofile(fp)
+
+
+def load_conv_bn(buf, start, conv_model, bn_model):
+    num_w = conv_model.weight.numel()
+    num_b = bn_model.bias.numel()
+    bn_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]));
+    start = start + num_b
+    bn_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_b]));
+    start = start + num_b
+    bn_model.running_mean.copy_(torch.from_numpy(buf[start:start + num_b]));
+    start = start + num_b
+    bn_model.running_var.copy_(torch.from_numpy(buf[start:start + num_b]));
+    start = start + num_b
+    # conv_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w])); start = start + num_w
+    conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).view_as(conv_model.weight.data));
+    start = start + num_w
+    return start
+
+
+def save_conv_bn(fp, conv_model, bn_model):
+    if bn_model.bias.is_cuda:
+        convert2cpu(bn_model.bias.data).numpy().tofile(fp)
+        convert2cpu(bn_model.weight.data).numpy().tofile(fp)
+        convert2cpu(bn_model.running_mean).numpy().tofile(fp)
+        convert2cpu(bn_model.running_var).numpy().tofile(fp)
+        convert2cpu(conv_model.weight.data).numpy().tofile(fp)
+    else:
+        bn_model.bias.data.numpy().tofile(fp)
+        bn_model.weight.data.numpy().tofile(fp)
+        bn_model.running_mean.numpy().tofile(fp)
+        bn_model.running_var.numpy().tofile(fp)
+        conv_model.weight.data.numpy().tofile(fp)
+
+
+def load_fc(buf, start, fc_model):
+    num_w = fc_model.weight.numel()
+    num_b = fc_model.bias.numel()
+    fc_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]));
+    start = start + num_b
+    fc_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]));
+    start = start + num_w
+    return start
+
+
+def save_fc(fp, fc_model):
+    fc_model.bias.data.numpy().tofile(fp)
+    fc_model.weight.data.numpy().tofile(fp)
+
+
+if __name__ == '__main__':
+    import sys
+
+    blocks = parse_cfg('cfg/yolo.cfg')
+    if len(sys.argv) == 2:
+        blocks = parse_cfg(sys.argv[1])
+    print_cfg(blocks)
diff --git a/detector/YOLOv3/cfg/coco.data b/detector/YOLOv3/cfg/coco.data
new file mode 100644
index 0000000..b7e31be
--- /dev/null
+++ b/detector/YOLOv3/cfg/coco.data
@@ -0,0 +1,5 @@
+train  = coco_train.txt
+valid  = coco_test.txt
+names = data/coco.names
+backup = backup
+gpus  = 0,1,2,3
diff --git a/detector/YOLOv3/cfg/coco.names b/detector/YOLOv3/cfg/coco.names
new file mode 100644
index 0000000..ca76c80
--- /dev/null
+++ b/detector/YOLOv3/cfg/coco.names
@@ -0,0 +1,80 @@
+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
diff --git a/detector/YOLOv3/cfg/darknet19_448.cfg b/detector/YOLOv3/cfg/darknet19_448.cfg
new file mode 100644
index 0000000..133c688
--- /dev/null
+++ b/detector/YOLOv3/cfg/darknet19_448.cfg
@@ -0,0 +1,200 @@
+[net]
+batch=128
+subdivisions=4
+height=448
+width=448
+max_crop=512
+channels=3
+momentum=0.9
+decay=0.0005
+
+learning_rate=0.001
+policy=poly
+power=4
+max_batches=100000
+
+angle=7
+hue = .1
+saturation=.75
+exposure=.75
+aspect=.75
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+filters=1000
+size=1
+stride=1
+pad=1
+activation=linear
+
+[avgpool]
+
+[softmax]
+groups=1
+
+[cost]
+type=sse
+
diff --git a/detector/YOLOv3/cfg/tiny-yolo-voc.cfg b/detector/YOLOv3/cfg/tiny-yolo-voc.cfg
new file mode 100644
index 0000000..ab2c066
--- /dev/null
+++ b/detector/YOLOv3/cfg/tiny-yolo-voc.cfg
@@ -0,0 +1,134 @@
+[net]
+batch=64
+subdivisions=8
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+max_batches = 40200
+policy=steps
+steps=-1,100,20000,30000
+scales=.1,10,.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=16
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=1
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+###########
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=125
+activation=linear
+
+[region]
+anchors = 1.08,1.19,  3.42,4.41,  6.63,11.38,  9.42,5.11,  16.62,10.52
+bias_match=1
+classes=20
+coords=4
+num=5
+softmax=1
+jitter=.2
+rescore=1
+
+object_scale=5
+noobject_scale=1
+class_scale=1
+coord_scale=1
+
+absolute=1
+thresh = .6
+random=1
diff --git a/detector/YOLOv3/cfg/tiny-yolo.cfg b/detector/YOLOv3/cfg/tiny-yolo.cfg
new file mode 100644
index 0000000..ac5770e
--- /dev/null
+++ b/detector/YOLOv3/cfg/tiny-yolo.cfg
@@ -0,0 +1,140 @@
+[net]
+# Training
+# batch=64
+# subdivisions=2
+# Testing
+batch=1
+subdivisions=1
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=16
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=1
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+###########
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=425
+activation=linear
+
+[region]
+anchors =  0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
+bias_match=1
+classes=80
+coords=4
+num=5
+softmax=1
+jitter=.2
+rescore=0
+
+object_scale=5
+noobject_scale=1
+class_scale=1
+coord_scale=1
+
+absolute=1
+thresh = .6
+random=1
+
diff --git a/detector/YOLOv3/cfg/voc.data b/detector/YOLOv3/cfg/voc.data
new file mode 100644
index 0000000..3329357
--- /dev/null
+++ b/detector/YOLOv3/cfg/voc.data
@@ -0,0 +1,5 @@
+train  = data/voc_train.txt
+valid  = data/2007_test.txt
+names = data/voc.names
+backup = backup
+gpus  = 3
diff --git a/detector/YOLOv3/cfg/voc.names b/detector/YOLOv3/cfg/voc.names
new file mode 100644
index 0000000..8420ab3
--- /dev/null
+++ b/detector/YOLOv3/cfg/voc.names
@@ -0,0 +1,20 @@
+aeroplane
+bicycle
+bird
+boat
+bottle
+bus
+car
+cat
+chair
+cow
+diningtable
+dog
+horse
+motorbike
+person
+pottedplant
+sheep
+sofa
+train
+tvmonitor
diff --git a/detector/YOLOv3/cfg/voc_gaotie.data b/detector/YOLOv3/cfg/voc_gaotie.data
new file mode 100644
index 0000000..66495ec
--- /dev/null
+++ b/detector/YOLOv3/cfg/voc_gaotie.data
@@ -0,0 +1,5 @@
+train  = data/gaotie_trainval.txt
+valid  = data/gaotie_test.txt
+names = data/voc.names
+backup = backup
+gpus  = 3
\ No newline at end of file
diff --git a/detector/YOLOv3/cfg/yolo-voc.cfg b/detector/YOLOv3/cfg/yolo-voc.cfg
new file mode 100644
index 0000000..d5bdfc1
--- /dev/null
+++ b/detector/YOLOv3/cfg/yolo-voc.cfg
@@ -0,0 +1,258 @@
+[net]
+# Testing
+batch=64
+subdivisions=8
+# Training
+# batch=64
+# subdivisions=8
+height=416
+width=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 80200
+policy=steps
+steps=-1,500,40000,60000
+scales=0.1,10,.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+
+#######
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[route]
+layers=-9
+
+[convolutional]
+batch_normalize=1
+size=1
+stride=1
+pad=1
+filters=64
+activation=leaky
+
+[reorg]
+stride=2
+
+[route]
+layers=-1,-4
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=125
+activation=linear
+
+
+[region]
+anchors =  1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
+bias_match=1
+classes=20
+coords=4
+num=5
+softmax=1
+jitter=.3
+rescore=1
+
+object_scale=5
+noobject_scale=1
+class_scale=1
+coord_scale=1
+
+absolute=1
+thresh = .6
+random=1
diff --git a/detector/YOLOv3/cfg/yolo.cfg b/detector/YOLOv3/cfg/yolo.cfg
new file mode 100644
index 0000000..2a0cd98
--- /dev/null
+++ b/detector/YOLOv3/cfg/yolo.cfg
@@ -0,0 +1,258 @@
+[net]
+# Testing
+batch=1
+subdivisions=1
+# Training
+# batch=64
+# subdivisions=8
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+
+#######
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[route]
+layers=-9
+
+[convolutional]
+batch_normalize=1
+size=1
+stride=1
+pad=1
+filters=64
+activation=leaky
+
+[reorg]
+stride=2
+
+[route]
+layers=-1,-4
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=425
+activation=linear
+
+
+[region]
+anchors =  0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
+bias_match=1
+classes=80
+coords=4
+num=5
+softmax=1
+jitter=.3
+rescore=1
+
+object_scale=5
+noobject_scale=1
+class_scale=1
+coord_scale=1
+
+absolute=1
+thresh = .6
+random=1
diff --git a/detector/YOLOv3/cfg/yolo_v3.cfg b/detector/YOLOv3/cfg/yolo_v3.cfg
new file mode 100644
index 0000000..f6a3d22
--- /dev/null
+++ b/detector/YOLOv3/cfg/yolo_v3.cfg
@@ -0,0 +1,789 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=16
+subdivisions=4
+width=608
+height=608
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=20,25
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+######################
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 6,7,8
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .5
+truth_thresh = 1
+random=1
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 61
+
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 3,4,5
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .5
+truth_thresh = 1
+random=1
+
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 36
+
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 0,1,2
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .5
+truth_thresh = 1
+random=1
+
diff --git a/detector/YOLOv3/cfg/yolov3-tiny.cfg b/detector/YOLOv3/cfg/yolov3-tiny.cfg
new file mode 100644
index 0000000..cfca3cf
--- /dev/null
+++ b/detector/YOLOv3/cfg/yolov3-tiny.cfg
@@ -0,0 +1,182 @@
+[net]
+# Testing
+batch=1
+subdivisions=1
+# Training
+# batch=64
+# subdivisions=2
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=16
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=1
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+###########
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+
+[yolo]
+mask = 3,4,5
+anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
+classes=80
+num=6
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 8
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+[yolo]
+mask = 0,1,2
+anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
+classes=80
+num=6
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
diff --git a/detector/YOLOv3/cfg/yolov4-tiny.cfg b/detector/YOLOv3/cfg/yolov4-tiny.cfg
new file mode 100644
index 0000000..dc6f5bf
--- /dev/null
+++ b/detector/YOLOv3/cfg/yolov4-tiny.cfg
@@ -0,0 +1,281 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=64
+subdivisions=1
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.00261
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-1
+groups=2
+group_id=1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1,-2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -6,-1
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-1
+groups=2
+group_id=1
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1,-2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -6,-1
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-1
+groups=2
+group_id=1
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1,-2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -6,-1
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+##################################
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+
+[yolo]
+mask = 3,4,5
+anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
+classes=80
+num=6
+jitter=.3
+scale_x_y = 1.05
+cls_normalizer=1.0
+iou_normalizer=0.07
+iou_loss=ciou
+ignore_thresh = .7
+truth_thresh = 1
+random=0
+resize=1.5
+nms_kind=greedynms
+beta_nms=0.6
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 23
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+[yolo]
+mask = 1,2,3
+anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
+classes=80
+num=6
+jitter=.3
+scale_x_y = 1.05
+cls_normalizer=1.0
+iou_normalizer=0.07
+iou_loss=ciou
+ignore_thresh = .7
+truth_thresh = 1
+random=0
+resize=1.5
+nms_kind=greedynms
+beta_nms=0.6
diff --git a/detector/YOLOv3/cfg/yolov4.cfg b/detector/YOLOv3/cfg/yolov4.cfg
new file mode 100644
index 0000000..2a1d171
--- /dev/null
+++ b/detector/YOLOv3/cfg/yolov4.cfg
@@ -0,0 +1,1157 @@
+[net]
+batch=64
+subdivisions=8
+# Training
+#width=512
+#height=512
+width=416
+height=416
+channels=3
+momentum=0.949
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.0013
+burn_in=1000
+max_batches = 500500
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+#cutmix=1
+mosaic=1
+
+#:104x104 54:52x52 85:26x26 104:13x13 for 416
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-7
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-10
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-28
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-28
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-16
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=1
+stride=1
+pad=1
+activation=mish
+
+##########################
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+### SPP ###
+[maxpool]
+stride=1
+size=5
+
+[route]
+layers=-2
+
+[maxpool]
+stride=1
+size=9
+
+[route]
+layers=-4
+
+[maxpool]
+stride=1
+size=13
+
+[route]
+layers=-1,-3,-5,-6
+### End SPP ###
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = 85
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = 54
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+##########################
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 0,1,2
+anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+scale_x_y = 1.2
+iou_thresh=0.213
+cls_normalizer=1.0
+iou_normalizer=0.07
+iou_loss=ciou
+nms_kind=greedynms
+beta_nms=0.6
+max_delta=5
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=256
+activation=leaky
+
+[route]
+layers = -1, -16
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 3,4,5
+anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+scale_x_y = 1.1
+iou_thresh=0.213
+cls_normalizer=1.0
+iou_normalizer=0.07
+iou_loss=ciou
+nms_kind=greedynms
+beta_nms=0.6
+max_delta=5
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=512
+activation=leaky
+
+[route]
+layers = -1, -37
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 6,7,8
+anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+scale_x_y = 1.05
+iou_thresh=0.213
+cls_normalizer=1.0
+iou_normalizer=0.07
+iou_loss=ciou
+nms_kind=greedynms
+beta_nms=0.6
+max_delta=5
diff --git a/detector/YOLOv3/darknet.py b/detector/YOLOv3/darknet.py
new file mode 100644
index 0000000..9cef048
--- /dev/null
+++ b/detector/YOLOv3/darknet.py
@@ -0,0 +1,453 @@
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from .cfg import *
+from .region_layer import RegionLayer
+from .yolo_layer import YoloLayer
+
+class MaxPoolStride1(nn.Module):
+    def __init__(self):
+        super(MaxPoolStride1, self).__init__()
+
+    def forward(self, x):
+        x = F.max_pool2d(F.pad(x, (0,1,0,1), mode='replicate'), 2, stride=1)
+        return x
+
+class Upsample(nn.Module):
+    def __init__(self, stride=2):
+        super(Upsample, self).__init__()
+        self.stride = stride
+    def forward(self, x):
+        stride = self.stride
+        assert(x.data.dim() == 4)
+        B = x.data.size(0)
+        C = x.data.size(1)
+        H = x.data.size(2)
+        W = x.data.size(3)
+        ws = stride
+        hs = stride
+        x = x.view(B, C, H, 1, W, 1).expand(B, C, H, hs, W, ws).contiguous().view(B, C, H*hs, W*ws)
+        return x
+
+class Reorg(nn.Module):
+    def __init__(self, stride=2):
+        super(Reorg, self).__init__()
+        self.stride = stride
+    def forward(self, x):
+        stride = self.stride
+        assert(x.data.dim() == 4)
+        B = x.data.size(0)
+        C = x.data.size(1)
+        H = x.data.size(2)
+        W = x.data.size(3)
+        assert(H % stride == 0)
+        assert(W % stride == 0)
+        ws = stride
+        hs = stride
+        x = x.view(B, C, H//hs, hs, W//ws, ws).transpose(3,4).contiguous()
+        x = x.view(B, C, (H//hs)*(W//ws), hs*ws).transpose(2,3).contiguous()
+        x = x.view(B, C, hs*ws, H//hs, W//ws).transpose(1,2).contiguous()
+        x = x.view(B, hs*ws*C, H//hs, W//ws)
+        return x
+
+class GlobalAvgPool2d(nn.Module):
+    def __init__(self):
+        super(GlobalAvgPool2d, self).__init__()
+
+    def forward(self, x):
+        N = x.data.size(0)
+        C = x.data.size(1)
+        H = x.data.size(2)
+        W = x.data.size(3)
+        x = F.avg_pool2d(x, (H, W))
+        x = x.view(N, C)
+        return x
+
+# for route and shortcut
+class EmptyModule(nn.Module):
+    def __init__(self):
+        super(EmptyModule, self).__init__()
+
+    def forward(self, x):
+        return x
+
+class Mish(nn.Module):
+    def __init__(self):
+        super(Mish , self).__init__()
+    def forward(self , x):
+        return x * torch.tanh(F.softplus(x))
+# support route shortcut and reorg
+
+class Darknet(nn.Module):
+    def getLossLayers(self):
+        loss_layers = []
+        for m in self.models:
+            if isinstance(m, RegionLayer) or isinstance(m, YoloLayer):
+                loss_layers.append(m)
+        return loss_layers
+
+    def __init__(self, cfgfile, use_cuda=True):
+        super(Darknet, self).__init__()
+        self.use_cuda = use_cuda
+        self.blocks = parse_cfg(cfgfile)
+        self.models = self.create_network(self.blocks) # merge conv, bn,leaky
+        self.loss_layers = self.getLossLayers()
+
+        #self.width = int(self.blocks[0]['width'])
+        #self.height = int(self.blocks[0]['height'])
+
+        if len(self.loss_layers) > 0:
+            last = len(self.loss_layers)-1
+            self.anchors = self.loss_layers[last].anchors
+            self.num_anchors = self.loss_layers[last].num_anchors
+            self.anchor_step = self.loss_layers[last].anchor_step
+            self.num_classes = self.loss_layers[last].num_classes
+
+        # default format : major=0, minor=1
+        self.header = torch.IntTensor([0,1,0,0])
+        self.seen = 0
+
+    def forward(self, x):
+        ind = -2
+        self.loss_layers = None
+        outputs = dict()
+        out_boxes = dict()
+        outno = 0
+        for block in self.blocks:
+            ind = ind + 1
+
+            if block['type'] == 'net':
+                continue
+            elif block['type'] in ['convolutional', 'maxpool', 'reorg', 'upsample', 'avgpool', 'softmax', 'connected']:
+                x = self.models[ind](x)
+                outputs[ind] = x
+            elif block['type'] == 'route':
+                layers = block['layers'].split(',')
+                layers = [int(i) if int(i) > 0 else int(i)+ind for i in layers]
+                if len(layers) == 1:
+                    x = outputs[layers[0]]
+                elif len(layers) == 2:
+                    x1 = outputs[layers[0]]
+                    x2 = outputs[layers[1]]
+                    x = torch.cat((x1,x2),1)
+                elif (len(layers) == 4):
+                    x1 = outputs[layers[0]]
+                    x2 = outputs[layers[1]]
+                    x3 = outputs[layers[2]]
+                    x4 = outputs[layers[3]]
+                    x = torch.cat((x1, x2, x3 , x4), 1)  # 在channel 维度进行cat
+                    outputs[ind] = x
+                outputs[ind] = x
+            elif block['type'] == 'shortcut':
+                from_layer = int(block['from'])
+                activation = block['activation']
+                from_layer = from_layer if from_layer > 0 else from_layer + ind
+                x1 = outputs[from_layer]
+                x2 = outputs[ind-1]
+                x  = x1 + x2
+                if activation == 'leaky':
+                    x = F.leaky_relu(x, 0.1, inplace=True)
+                elif activation == 'relu':
+                    x = F.relu(x, inplace=True)
+                outputs[ind] = x
+            elif block['type'] in [ 'region', 'yolo']:
+                boxes = self.models[ind].get_mask_boxes(x)
+                out_boxes[outno]= boxes
+                outno += 1
+                outputs[ind] = None
+            elif block['type'] == 'cost':
+                continue
+            else:
+                print('unknown type %s' % (block['type']))
+        return x if outno == 0 else out_boxes
+
+    def print_network(self):
+        print_cfg(self.blocks)
+
+    def create_network(self, blocks):
+        models = nn.ModuleList()
+    
+        prev_filters = 3
+        out_filters =[]
+        prev_stride = 1
+        out_strides = []
+        conv_id = 0
+        ind = -2
+        for block in blocks:
+            ind += 1
+            if block['type'] == 'net':
+                prev_filters = int(block['channels'])
+                self.width = int(block['width'])
+                self.height = int(block['height'])
+                continue
+            elif block['type'] == 'convolutional':
+                conv_id = conv_id + 1
+                batch_normalize = int(block['batch_normalize'])
+                filters = int(block['filters'])
+                kernel_size = int(block['size'])
+                stride = int(block['stride'])
+                is_pad = int(block['pad'])
+                pad = (kernel_size-1)//2 if is_pad else 0
+                activation = block['activation']
+                model = nn.Sequential()
+                if batch_normalize:
+                    model.add_module('conv{0}'.format(conv_id), nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=False))
+                    model.add_module('bn{0}'.format(conv_id), nn.BatchNorm2d(filters))
+                    #model.add_module('bn{0}'.format(conv_id), BN2d(filters))
+                else:
+                    model.add_module('conv{0}'.format(conv_id), nn.Conv2d(prev_filters, filters, kernel_size, stride, pad))
+                if activation == 'leaky':
+                    model.add_module('leaky{0}'.format(conv_id), nn.LeakyReLU(0.1, inplace=True))
+                elif activation == 'relu':
+                    model.add_module('relu{0}'.format(conv_id), nn.ReLU(inplace=True))
+                elif activation == 'mish':
+                    model.add_module("mish{0}".format(conv_id), Mish())
+                prev_filters = filters
+                out_filters.append(prev_filters)
+                prev_stride = stride * prev_stride
+                out_strides.append(prev_stride)                
+                models.append(model)
+            elif block['type'] == 'maxpool':
+                pool_size = int(block['size'])
+                stride = int(block['stride'])
+                if stride > 1:
+                    model = nn.MaxPool2d(pool_size, stride)
+                else:
+                    model = MaxPoolStride1()
+                out_filters.append(prev_filters)
+                prev_stride = stride * prev_stride
+                out_strides.append(prev_stride)                
+                models.append(model)
+            elif block['type'] == 'avgpool':
+                model = GlobalAvgPool2d()
+                out_filters.append(prev_filters)
+                models.append(model)
+            elif block['type'] == 'softmax':
+                model = nn.Softmax()
+                out_strides.append(prev_stride)
+                out_filters.append(prev_filters)
+                models.append(model)
+            elif block['type'] == 'cost':
+                if block['_type'] == 'sse':
+                    model = nn.MSELoss(size_average=True)
+                elif block['_type'] == 'L1':
+                    model = nn.L1Loss(size_average=True)
+                elif block['_type'] == 'smooth':
+                    model = nn.SmoothL1Loss(size_average=True)
+                out_filters.append(1)
+                out_strides.append(prev_stride)
+                models.append(model)
+            elif block['type'] == 'reorg':
+                stride = int(block['stride'])
+                prev_filters = stride * stride * prev_filters
+                out_filters.append(prev_filters)
+                prev_stride = prev_stride * stride
+                out_strides.append(prev_stride)                
+                models.append(Reorg(stride))
+            elif block['type'] == 'upsample':
+                stride = int(block['stride'])
+                out_filters.append(prev_filters)
+                prev_stride = prev_stride / stride
+                out_strides.append(prev_stride)                
+                #models.append(nn.Upsample(scale_factor=stride, mode='nearest'))
+                models.append(Upsample(stride))
+            elif block['type'] == 'route':
+                layers = block['layers'].split(',')
+                ind = len(models)
+                layers = [int(i) if int(i) > 0 else int(i)+ind for i in layers]
+                if len(layers) == 1:
+                    prev_filters = out_filters[layers[0]]
+                    prev_stride = out_strides[layers[0]]
+                elif len(layers) == 2:
+                    assert(layers[0] == ind - 1)
+                    prev_filters = out_filters[layers[0]] + out_filters[layers[1]]
+                    prev_stride = out_strides[layers[0]]
+                if (len(layers) == 4):
+                    prev_filters = out_filters[layers[0]] + \
+                        out_filters[layers[1]] +out_filters[layers[2]]   +out_filters[layers[3]]   # 这里是filter 相加
+                    prev_stride = out_strides[ind-1]
+                out_filters.append(prev_filters)
+                out_strides.append(prev_stride)
+                models.append(EmptyModule())
+            elif block['type'] == 'shortcut':
+                ind = len(models)
+                prev_filters = out_filters[ind-1]
+                out_filters.append(prev_filters)
+                prev_stride = out_strides[ind-1]
+                out_strides.append(prev_stride)
+                models.append(EmptyModule())
+            elif block['type'] == 'connected':
+                filters = int(block['output'])
+                if block['activation'] == 'linear':
+                    model = nn.Linear(prev_filters, filters)
+                elif block['activation'] == 'leaky':
+                    model = nn.Sequential(
+                               nn.Linear(prev_filters, filters),
+                               nn.LeakyReLU(0.1, inplace=True))
+                elif block['activation'] == 'relu':
+                    model = nn.Sequential(
+                               nn.Linear(prev_filters, filters),
+                               nn.ReLU(inplace=True))
+                prev_filters = filters
+                out_filters.append(prev_filters)
+                out_strides.append(prev_stride)
+                models.append(model)
+            elif block['type'] == 'region':
+                region_layer = RegionLayer(use_cuda=self.use_cuda)
+                anchors = block['anchors'].split(',')
+                region_layer.anchors = [float(i) for i in anchors]
+                region_layer.num_classes = int(block['classes'])
+                region_layer.num_anchors = int(block['num'])
+                region_layer.anchor_step = len(region_layer.anchors)//region_layer.num_anchors
+                region_layer.rescore = int(block['rescore'])
+                region_layer.object_scale = float(block['object_scale'])
+                region_layer.noobject_scale = float(block['noobject_scale'])
+                region_layer.class_scale = float(block['class_scale'])
+                region_layer.coord_scale = float(block['coord_scale'])
+                region_layer.thresh = float(block['thresh'])
+                out_filters.append(prev_filters)
+                out_strides.append(prev_stride)
+                models.append(region_layer)
+            elif block['type'] == 'yolo':
+                yolo_layer = YoloLayer(use_cuda=self.use_cuda)
+                anchors = block['anchors'].split(',')
+                anchor_mask = block['mask'].split(',')
+                yolo_layer.anchor_mask = [int(i) for i in anchor_mask]
+                yolo_layer.anchors = [float(i) for i in anchors]
+                yolo_layer.num_classes = int(block['classes'])
+                yolo_layer.num_anchors = int(block['num'])
+                yolo_layer.anchor_step = len(yolo_layer.anchors)//yolo_layer.num_anchors
+                try:
+                    yolo_layer.rescore = int(block['rescore'])
+                except:
+                    pass
+                yolo_layer.ignore_thresh = float(block['ignore_thresh'])
+                yolo_layer.truth_thresh = float(block['truth_thresh'])
+                yolo_layer.stride = prev_stride
+                yolo_layer.nth_layer = ind
+                yolo_layer.net_width = self.width
+                yolo_layer.net_height = self.height
+                out_filters.append(prev_filters)
+                out_strides.append(prev_stride)
+                models.append(yolo_layer)                
+            else:
+                print('unknown type %s' % (block['type']))
+    
+        return models
+
+    def load_binfile(self, weightfile):
+        fp = open(weightfile, 'rb')
+       
+        version = np.fromfile(fp, count=3, dtype=np.int32)
+        version = [int(i) for i in version]
+        if version[0]*10+version[1] >=2 and version[0] < 1000 and version[1] < 1000:
+            seen = np.fromfile(fp, count=1, dtype=np.int64)
+        else:
+            seen = np.fromfile(fp, count=1, dtype=np.int32)
+        self.header = torch.from_numpy(np.concatenate((version, seen), axis=0))
+        self.seen = int(seen)
+        body = np.fromfile(fp, dtype=np.float32)
+        fp.close()
+        return body
+
+    def load_weights(self, weightfile):
+        buf = self.load_binfile(weightfile)
+
+        start = 0
+        ind = -2
+        for block in self.blocks:
+            if start >= buf.size:
+                break
+            ind = ind + 1
+            if block['type'] == 'net':
+                continue
+            elif block['type'] == 'convolutional':
+                model = self.models[ind]
+                batch_normalize = int(block['batch_normalize'])
+                if batch_normalize:
+                    start = load_conv_bn(buf, start, model[0], model[1])
+                else:
+                    start = load_conv(buf, start, model[0])
+            elif block['type'] == 'connected':
+                model = self.models[ind]
+                if block['activation'] != 'linear':
+                    start = load_fc(buf, start, model[0])
+                else:
+                    start = load_fc(buf, start, model)
+            elif block['type'] == 'maxpool':
+                pass
+            elif block['type'] == 'reorg':
+                pass
+            elif block['type'] == 'upsample':
+                pass
+            elif block['type'] == 'route':
+                pass
+            elif block['type'] == 'shortcut':
+                pass
+            elif block['type'] == 'region':
+                pass
+            elif block['type'] == 'yolo':
+                pass                
+            elif block['type'] == 'avgpool':
+                pass
+            elif block['type'] == 'softmax':
+                pass
+            elif block['type'] == 'cost':
+                pass
+            else:
+                print('unknown type %s' % (block['type']))
+
+    def save_weights(self, outfile, cutoff=0):
+        if cutoff <= 0:
+            cutoff = len(self.blocks)-1
+
+        fp = open(outfile, 'wb')
+        self.header[3] = self.seen
+        header = np.array(self.header[0:3].numpy(), np.int32)
+        header.tofile(fp)
+        if (self.header[0]*10+self.header[1]) >= 2:
+            seen = np.array(self.seen, np.int64)
+        else:
+            seen = np.array(self.seen, np.int32)
+        seen.tofile(fp)
+
+        ind = -1
+        for blockId in range(1, cutoff+1):
+            ind = ind + 1
+            block = self.blocks[blockId]
+            if block['type'] == 'convolutional':
+                model = self.models[ind]
+                batch_normalize = int(block['batch_normalize'])
+                if batch_normalize:
+                    save_conv_bn(fp, model[0], model[1])
+                else:
+                    save_conv(fp, model[0])
+            elif block['type'] == 'connected':
+                model = self.models[ind]
+                if block['activation'] != 'linear':
+                    save_fc(fc, model)
+                else:
+                    save_fc(fc, model[0])
+            elif block['type'] == 'maxpool':
+                pass
+            elif block['type'] == 'reorg':
+                pass
+            elif block['type'] == 'upsample':
+                pass                
+            elif block['type'] == 'route':
+                pass
+            elif block['type'] == 'shortcut':
+                pass
+            elif block['type'] == 'region':
+                pass
+            elif block['type'] == 'yolo':
+                pass
+            elif block['type'] == 'avgpool':
+                pass
+            elif block['type'] == 'softmax':
+                pass
+            elif block['type'] == 'cost':
+                pass
+            else:
+                print('unknown type %s' % (block['type']))
+        fp.close()
diff --git a/detector/YOLOv3/demo/004545.jpg b/detector/YOLOv3/demo/004545.jpg
new file mode 100644
index 0000000..4e06c20
Binary files /dev/null and b/detector/YOLOv3/demo/004545.jpg differ
diff --git a/detector/YOLOv3/demo/results/004545.jpg b/detector/YOLOv3/demo/results/004545.jpg
new file mode 100644
index 0000000..4f8f75b
Binary files /dev/null and b/detector/YOLOv3/demo/results/004545.jpg differ
diff --git a/detector/YOLOv3/detect.py b/detector/YOLOv3/detect.py
new file mode 100644
index 0000000..9a091a3
--- /dev/null
+++ b/detector/YOLOv3/detect.py
@@ -0,0 +1,131 @@
+import sys
+import time
+from PIL import Image, ImageDraw
+#from models.tiny_yolo import TinyYoloNet
+from yolo_utils import *
+from darknet import Darknet
+
+import cv2
+
+namesfile=None
+def detect(cfgfile, weightfile, imgfolder):
+    m = Darknet(cfgfile)
+
+    #m.print_network()
+    m.load_weights(weightfile)
+    print('Loading weights from %s... Done!' % (weightfile))
+
+    # if m.num_classes == 20:
+    #     namesfile = 'data/voc.names'
+    # elif m.num_classes == 80:
+    #     namesfile = 'data/coco.names'
+    # else:
+    #     namesfile = 'data/names'
+    
+    use_cuda = True
+    if use_cuda:
+        m.cuda()
+
+    imgfiles = [x for x in os.listdir(imgfolder) if x[-4:] == '.jpg']
+    imgfiles.sort()
+    for imgname in imgfiles:
+        imgfile = os.path.join(imgfolder,imgname)
+        
+        img = Image.open(imgfile).convert('RGB')
+        sized = img.resize((m.width, m.height))
+
+        #for i in range(2):
+        start = time.time()
+        boxes = do_detect(m, sized, 0.5, 0.4, use_cuda)
+        finish = time.time()
+            #if i == 1:
+        print('%s: Predicted in %f seconds.' % (imgfile, (finish-start)))
+
+        class_names = load_class_names(namesfile)
+        img = plot_boxes(img, boxes, 'result/{}'.format(os.path.basename(imgfile)), class_names)
+        img = np.array(img)
+        cv2.imshow('{}'.format(os.path.basename(imgfolder)), img)
+        cv2.resizeWindow('{}'.format(os.path.basename(imgfolder)), 1000,800)
+        cv2.waitKey(1000)
+
+def detect_cv2(cfgfile, weightfile, imgfile):
+    import cv2
+    m = Darknet(cfgfile)
+
+    m.print_network()
+    m.load_weights(weightfile)
+    print('Loading weights from %s... Done!' % (weightfile))
+
+    if m.num_classes == 20:
+        namesfile = 'data/voc.names'
+    elif m.num_classes == 80:
+        namesfile = 'data/coco.names'
+    else:
+        namesfile = 'data/names'
+    
+    use_cuda = True
+    if use_cuda:
+        m.cuda()
+
+    img = cv2.imread(imgfile)
+    sized = cv2.resize(img, (m.width, m.height))
+    sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB)
+    
+    for i in range(2):
+        start = time.time()
+        boxes = do_detect(m, sized, 0.5, 0.4, use_cuda)
+        finish = time.time()
+        if i == 1:
+            print('%s: Predicted in %f seconds.' % (imgfile, (finish-start)))
+
+    class_names = load_class_names(namesfile)
+    plot_boxes_cv2(img, boxes, savename='predictions.jpg', class_names=class_names)
+
+def detect_skimage(cfgfile, weightfile, imgfile):
+    from skimage import io
+    from skimage.transform import resize
+    m = Darknet(cfgfile)
+
+    m.print_network()
+    m.load_weights(weightfile)
+    print('Loading weights from %s... Done!' % (weightfile))
+
+    if m.num_classes == 20:
+        namesfile = 'data/voc.names'
+    elif m.num_classes == 80:
+        namesfile = 'data/coco.names'
+    else:
+        namesfile = 'data/names'
+    
+    use_cuda = True
+    if use_cuda:
+        m.cuda()
+
+    img = io.imread(imgfile)
+    sized = resize(img, (m.width, m.height)) * 255
+    
+    for i in range(2):
+        start = time.time()
+        boxes = do_detect(m, sized, 0.5, 0.4, use_cuda)
+        finish = time.time()
+        if i == 1:
+            print('%s: Predicted in %f seconds.' % (imgfile, (finish-start)))
+
+    class_names = load_class_names(namesfile)
+    plot_boxes_cv2(img, boxes, savename='predictions.jpg', class_names=class_names)
+
+if __name__ == '__main__':
+    if len(sys.argv) == 5:
+        cfgfile = sys.argv[1]
+        weightfile = sys.argv[2]
+        imgfolder = sys.argv[3]
+        cv2.namedWindow('{}'.format(os.path.basename(imgfolder)), cv2.WINDOW_NORMAL )
+        cv2.resizeWindow('{}'.format(os.path.basename(imgfolder)), 1000,800)
+        globals()["namesfile"] = sys.argv[4]
+        detect(cfgfile, weightfile, imgfolder)
+        #detect_cv2(cfgfile, weightfile, imgfile)
+        #detect_skimage(cfgfile, weightfile, imgfile)
+    else:
+        print('Usage: ')
+        print('  python detect.py cfgfile weightfile imgfolder names')
+        #detect('cfg/tiny-yolo-voc.cfg', 'tiny-yolo-voc.weights', 'data/person.jpg', version=1)
diff --git a/detector/YOLOv3/detector.py b/detector/YOLOv3/detector.py
new file mode 100644
index 0000000..1dcbb93
--- /dev/null
+++ b/detector/YOLOv3/detector.py
@@ -0,0 +1,107 @@
+import torch
+import logging
+import numpy as np
+import cv2
+
+from .darknet import Darknet
+from .yolo_utils import get_all_boxes, nms, post_process, xywh_to_xyxy, xyxy_to_xywh
+from .nms import boxes_nms
+import time
+
+class YOLOv3(object):
+    def __init__(self, cfgfile, weightfile, namesfile, score_thresh=0.7, conf_thresh=0.01, nms_thresh=0.45,
+                 is_xywh=False, use_cuda=True):
+        # net definition
+        self.net = Darknet(cfgfile)
+        self.net.load_weights(weightfile)
+        logger = logging.getLogger("root.detector")
+        logger.info('Loading weights from %s... Done!' % (weightfile))
+        self.device = "cuda" if use_cuda else "cpu"
+        self.net.eval()
+        
+        self.net.to(self.device)
+
+        # constants
+        self.size = self.net.width, self.net.height
+        self.score_thresh = score_thresh
+        self.conf_thresh = conf_thresh
+        self.nms_thresh = nms_thresh
+        self.use_cuda = use_cuda
+        self.is_xywh = is_xywh
+        self.num_classes = self.net.num_classes
+        self.class_names = self.load_class_names(namesfile)
+
+    def __call__(self, ori_img):
+        # img to tensor
+        assert isinstance(ori_img, np.ndarray), "input must be a numpy array!"
+        img = ori_img.astype(np.float) / 255.
+
+        img = cv2.resize(img, self.size)
+
+        img = torch.from_numpy(img).float().permute(2, 0, 1).unsqueeze(0)  ##BGR 去推理
+
+        # forward
+        with torch.no_grad():
+            img = img.to(self.device)
+            # t5 = time.time()
+            out_boxes = self.net(img)
+            # t6 = time.time()
+            # print('       -------------infer----------------: %f' % (t5 - t6))
+            boxes = get_all_boxes(out_boxes, self.conf_thresh, self.num_classes,
+                                  use_cuda=self.use_cuda)  # batch size is 1
+            # boxes = nms(boxes, self.nms_thresh)
+
+            boxes = post_process(boxes, self.net.num_classes, self.conf_thresh, self.nms_thresh)[0].cpu()
+            boxes = boxes[boxes[:, -2] > self.score_thresh, :]  # bbox xmin ymin xmax ymax
+
+        if len(boxes) == 0:
+            bbox = torch.FloatTensor([]).reshape([0, 4])
+            cls_conf = torch.FloatTensor([])
+            cls_ids = torch.LongTensor([])
+        else:
+            height, width = ori_img.shape[:2]
+            bbox = boxes[:, :4]
+            if self.is_xywh:
+                # bbox x y w h
+                bbox = xyxy_to_xywh(bbox)
+
+            bbox *= torch.FloatTensor([[width, height, width, height]])
+            cls_conf = boxes[:, 5]
+            cls_ids = boxes[:, 6].long()
+        return bbox.numpy(), cls_conf.numpy(), cls_ids.numpy()
+
+    def load_class_names(self, namesfile):
+        with open(namesfile, 'r', encoding='utf8') as fp:
+            class_names = [line.strip() for line in fp.readlines()]
+        return class_names
+
+
+def demo():
+    import os
+    from vizer.draw import draw_boxes
+
+    yolo = YOLOv3("cfg/yolo_v3.cfg", "weight/yolov3.weights", "cfg/coco.names")
+    print("yolo.size =", yolo.size)
+    root = "./demo"
+    resdir = os.path.join(root, "results")
+    os.makedirs(resdir, exist_ok=True)
+    files = [os.path.join(root, file) for file in os.listdir(root) if file.endswith('.jpg')]
+    files.sort()
+    for filename in files:
+        img = cv2.imread(filename)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        bbox, cls_conf, cls_ids = yolo(img)
+
+        if bbox is not None:
+            img = draw_boxes(img, bbox, cls_ids, cls_conf, class_name_map=yolo.class_names)
+        # save results
+        cv2.imwrite(os.path.join(resdir, os.path.basename(filename)), img[:, :, (2, 1, 0)])
+        # imshow
+        # cv2.namedWindow("yolo", cv2.WINDOW_NORMAL)
+        # cv2.resizeWindow("yolo", 600,600)
+        # cv2.imshow("yolo",res[:,:,(2,1,0)])
+        # cv2.waitKey(0)
+
+
+if __name__ == "__main__":
+    demo()
diff --git a/detector/YOLOv3/nms/__init__.py b/detector/YOLOv3/nms/__init__.py
new file mode 100644
index 0000000..4da7007
--- /dev/null
+++ b/detector/YOLOv3/nms/__init__.py
@@ -0,0 +1 @@
+from .nms import boxes_nms
\ No newline at end of file
diff --git a/detector/YOLOv3/nms/build.sh b/detector/YOLOv3/nms/build.sh
new file mode 100644
index 0000000..44766a2
--- /dev/null
+++ b/detector/YOLOv3/nms/build.sh
@@ -0,0 +1,5 @@
+cd ext
+
+python build.py build_ext develop
+
+cd ..
diff --git a/detector/YOLOv3/nms/ext/__init__.py b/detector/YOLOv3/nms/ext/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/detector/YOLOv3/nms/ext/build.py b/detector/YOLOv3/nms/ext/build.py
new file mode 100644
index 0000000..66973bc
--- /dev/null
+++ b/detector/YOLOv3/nms/ext/build.py
@@ -0,0 +1,58 @@
+import glob
+import os
+
+import torch
+from setuptools import setup
+from torch.utils.cpp_extension import CUDA_HOME
+from torch.utils.cpp_extension import CppExtension
+from torch.utils.cpp_extension import CUDAExtension
+
+requirements = ["torch"]
+
+
+def get_extensions():
+    extensions_dir = os.path.dirname(os.path.abspath(__file__))
+
+    main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
+    source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
+    source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
+
+    sources = main_file + source_cpu
+    extension = CppExtension
+
+    extra_compile_args = {"cxx": []}
+    define_macros = []
+
+    if torch.cuda.is_available() and CUDA_HOME is not None:
+        extension = CUDAExtension
+        sources += source_cuda
+        define_macros += [("WITH_CUDA", None)]
+        extra_compile_args["nvcc"] = [
+            "-DCUDA_HAS_FP16=1",
+            "-D__CUDA_NO_HALF_OPERATORS__",
+            "-D__CUDA_NO_HALF_CONVERSIONS__",
+            "-D__CUDA_NO_HALF2_OPERATORS__",
+        ]
+
+    sources = [os.path.join(extensions_dir, s) for s in sources]
+
+    include_dirs = [extensions_dir]
+
+    ext_modules = [
+        extension(
+            "torch_extension",
+            sources,
+            include_dirs=include_dirs,
+            define_macros=define_macros,
+            extra_compile_args=extra_compile_args,
+        )
+    ]
+
+    return ext_modules
+
+
+setup(
+    name="torch_extension",
+    version="0.1",
+    ext_modules=get_extensions(),
+    cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension})
diff --git a/detector/YOLOv3/nms/ext/cpu/nms_cpu.cpp b/detector/YOLOv3/nms/ext/cpu/nms_cpu.cpp
new file mode 100644
index 0000000..5b3f93c
--- /dev/null
+++ b/detector/YOLOv3/nms/ext/cpu/nms_cpu.cpp
@@ -0,0 +1,75 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+#include "cpu/vision.h"
+
+
+template <typename scalar_t>
+at::Tensor nms_cpu_kernel(const at::Tensor& dets,
+                          const at::Tensor& scores,
+                          const float threshold) {
+  AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
+  AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
+  AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
+
+  if (dets.numel() == 0) {
+    return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
+  }
+
+  auto x1_t = dets.select(1, 0).contiguous();
+  auto y1_t = dets.select(1, 1).contiguous();
+  auto x2_t = dets.select(1, 2).contiguous();
+  auto y2_t = dets.select(1, 3).contiguous();
+
+  at::Tensor areas_t = (x2_t - x1_t) * (y2_t - y1_t);
+
+  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
+
+  auto ndets = dets.size(0);
+  at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
+
+  auto suppressed = suppressed_t.data<uint8_t>();
+  auto order = order_t.data<int64_t>();
+  auto x1 = x1_t.data<scalar_t>();
+  auto y1 = y1_t.data<scalar_t>();
+  auto x2 = x2_t.data<scalar_t>();
+  auto y2 = y2_t.data<scalar_t>();
+  auto areas = areas_t.data<scalar_t>();
+
+  for (int64_t _i = 0; _i < ndets; _i++) {
+    auto i = order[_i];
+    if (suppressed[i] == 1)
+      continue;
+    auto ix1 = x1[i];
+    auto iy1 = y1[i];
+    auto ix2 = x2[i];
+    auto iy2 = y2[i];
+    auto iarea = areas[i];
+
+    for (int64_t _j = _i + 1; _j < ndets; _j++) {
+      auto j = order[_j];
+      if (suppressed[j] == 1)
+        continue;
+      auto xx1 = std::max(ix1, x1[j]);
+      auto yy1 = std::max(iy1, y1[j]);
+      auto xx2 = std::min(ix2, x2[j]);
+      auto yy2 = std::min(iy2, y2[j]);
+
+      auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1);
+      auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1);
+      auto inter = w * h;
+      auto ovr = inter / (iarea + areas[j] - inter);
+      if (ovr >= threshold)
+        suppressed[j] = 1;
+   }
+  }
+  return at::nonzero(suppressed_t == 0).squeeze(1);
+}
+
+at::Tensor nms_cpu(const at::Tensor& dets,
+               const at::Tensor& scores,
+               const float threshold) {
+  at::Tensor result;
+  AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
+    result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
+  });
+  return result;
+}
\ No newline at end of file
diff --git a/detector/YOLOv3/nms/ext/cpu/vision.h b/detector/YOLOv3/nms/ext/cpu/vision.h
new file mode 100644
index 0000000..b3529ad
--- /dev/null
+++ b/detector/YOLOv3/nms/ext/cpu/vision.h
@@ -0,0 +1,7 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+#pragma once
+#include <torch/extension.h>
+
+at::Tensor nms_cpu(const at::Tensor& dets,
+                   const at::Tensor& scores,
+                   const float threshold);
diff --git a/detector/YOLOv3/nms/ext/cuda/nms.cu b/detector/YOLOv3/nms/ext/cuda/nms.cu
new file mode 100644
index 0000000..2eb4525
--- /dev/null
+++ b/detector/YOLOv3/nms/ext/cuda/nms.cu
@@ -0,0 +1,131 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+
+#include <THC/THC.h>
+#include <THC/THCDeviceUtils.cuh>
+
+#include <vector>
+#include <iostream>
+
+int const threadsPerBlock = sizeof(unsigned long long) * 8;
+
+__device__ inline float devIoU(float const * const a, float const * const b) {
+  float left = max(a[0], b[0]), right = min(a[2], b[2]);
+  float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
+  float width = max(right - left, 0.f), height = max(bottom - top, 0.f);
+  float interS = width * height;
+  float Sa = (a[2] - a[0]) * (a[3] - a[1]);
+  float Sb = (b[2] - b[0]) * (b[3] - b[1]);
+  return interS / (Sa + Sb - interS);
+}
+
+__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
+                           const float *dev_boxes, unsigned long long *dev_mask) {
+  const int row_start = blockIdx.y;
+  const int col_start = blockIdx.x;
+
+  // if (row_start > col_start) return;
+
+  const int row_size =
+        min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
+  const int col_size =
+        min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
+
+  __shared__ float block_boxes[threadsPerBlock * 5];
+  if (threadIdx.x < col_size) {
+    block_boxes[threadIdx.x * 5 + 0] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
+    block_boxes[threadIdx.x * 5 + 1] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
+    block_boxes[threadIdx.x * 5 + 2] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
+    block_boxes[threadIdx.x * 5 + 3] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
+    block_boxes[threadIdx.x * 5 + 4] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
+  }
+  __syncthreads();
+
+  if (threadIdx.x < row_size) {
+    const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
+    const float *cur_box = dev_boxes + cur_box_idx * 5;
+    int i = 0;
+    unsigned long long t = 0;
+    int start = 0;
+    if (row_start == col_start) {
+      start = threadIdx.x + 1;
+    }
+    for (i = start; i < col_size; i++) {
+      if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
+        t |= 1ULL << i;
+      }
+    }
+    const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
+    dev_mask[cur_box_idx * col_blocks + col_start] = t;
+  }
+}
+
+// boxes is a N x 5 tensor
+at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
+  using scalar_t = float;
+  AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
+  auto scores = boxes.select(1, 4);
+  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
+  auto boxes_sorted = boxes.index_select(0, order_t);
+
+  int boxes_num = boxes.size(0);
+
+  const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
+
+  scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
+
+  THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
+
+  unsigned long long* mask_dev = NULL;
+  //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
+  //                      boxes_num * col_blocks * sizeof(unsigned long long)));
+
+  mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
+
+  dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
+              THCCeilDiv(boxes_num, threadsPerBlock));
+  dim3 threads(threadsPerBlock);
+  nms_kernel<<<blocks, threads>>>(boxes_num,
+                                  nms_overlap_thresh,
+                                  boxes_dev,
+                                  mask_dev);
+
+  std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
+  THCudaCheck(cudaMemcpy(&mask_host[0],
+                        mask_dev,
+                        sizeof(unsigned long long) * boxes_num * col_blocks,
+                        cudaMemcpyDeviceToHost));
+
+  std::vector<unsigned long long> remv(col_blocks);
+  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
+
+  at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
+  int64_t* keep_out = keep.data<int64_t>();
+
+  int num_to_keep = 0;
+  for (int i = 0; i < boxes_num; i++) {
+    int nblock = i / threadsPerBlock;
+    int inblock = i % threadsPerBlock;
+
+    if (!(remv[nblock] & (1ULL << inblock))) {
+      keep_out[num_to_keep++] = i;
+      unsigned long long *p = &mask_host[0] + i * col_blocks;
+      for (int j = nblock; j < col_blocks; j++) {
+        remv[j] |= p[j];
+      }
+    }
+  }
+
+  THCudaFree(state, mask_dev);
+  // TODO improve this part
+  return std::get<0>(order_t.index({
+                       keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
+                         order_t.device(), keep.scalar_type())
+                     }).sort(0, false));
+}
\ No newline at end of file
diff --git a/detector/YOLOv3/nms/ext/cuda/vision.h b/detector/YOLOv3/nms/ext/cuda/vision.h
new file mode 100644
index 0000000..b5bd907
--- /dev/null
+++ b/detector/YOLOv3/nms/ext/cuda/vision.h
@@ -0,0 +1,7 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+#pragma once
+#include <torch/extension.h>
+
+at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
+
+
diff --git a/detector/YOLOv3/nms/ext/nms.h b/detector/YOLOv3/nms/ext/nms.h
new file mode 100644
index 0000000..312fed4
--- /dev/null
+++ b/detector/YOLOv3/nms/ext/nms.h
@@ -0,0 +1,28 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+#pragma once
+#include "cpu/vision.h"
+
+#ifdef WITH_CUDA
+#include "cuda/vision.h"
+#endif
+
+
+at::Tensor nms(const at::Tensor& dets,
+               const at::Tensor& scores,
+               const float threshold) {
+
+  if (dets.type().is_cuda()) {
+#ifdef WITH_CUDA
+    // TODO raise error if not compiled with CUDA
+    if (dets.numel() == 0)
+      return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
+    auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
+    return nms_cuda(b, threshold);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  }
+
+  at::Tensor result = nms_cpu(dets, scores, threshold);
+  return result;
+}
diff --git a/detector/YOLOv3/nms/ext/vision.cpp b/detector/YOLOv3/nms/ext/vision.cpp
new file mode 100644
index 0000000..726b77b
--- /dev/null
+++ b/detector/YOLOv3/nms/ext/vision.cpp
@@ -0,0 +1,7 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+#include "nms.h"
+
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("nms", &nms, "non-maximum suppression");
+}
diff --git a/detector/YOLOv3/nms/nms.py b/detector/YOLOv3/nms/nms.py
new file mode 100644
index 0000000..1b4a2db
--- /dev/null
+++ b/detector/YOLOv3/nms/nms.py
@@ -0,0 +1,34 @@
+import warnings
+import torchvision
+
+try:
+    import torch
+    import torch_extension
+
+    _nms = torch_extension.nms
+except ImportError:
+    if torchvision.__version__ >= '0.3.0':
+        _nms = torchvision.ops.nms
+    else:
+        from .python_nms import python_nms
+
+        _nms = python_nms
+        warnings.warn('You are using python version NMS, which is very very slow. Try compile c++ NMS '
+                      'using `cd ext & python build.py build_ext develop`')
+
+
+def boxes_nms(boxes, scores, nms_thresh, max_count=-1):
+    """ Performs non-maximum suppression, run on GPU or CPU according to
+    boxes's device.
+    Args:
+        boxes(Tensor): `xyxy` mode boxes, use absolute coordinates(or relative coordinates), shape is (n, 4)
+        scores(Tensor): scores, shape is (n, )
+        nms_thresh(float): thresh
+        max_count (int): if > 0, then only the top max_proposals are kept  after non-maximum suppression
+    Returns:
+        indices kept.
+    """
+    keep = _nms(boxes, scores, nms_thresh)
+    if max_count > 0:
+        keep = keep[:max_count]
+    return keep
diff --git a/detector/YOLOv3/nms/python_nms.py b/detector/YOLOv3/nms/python_nms.py
new file mode 100644
index 0000000..bd8a4ba
--- /dev/null
+++ b/detector/YOLOv3/nms/python_nms.py
@@ -0,0 +1,59 @@
+import torch
+import numpy as np
+
+
+def python_nms(boxes, scores, nms_thresh):
+    """ Performs non-maximum suppression using numpy
+        Args:
+            boxes(Tensor): `xyxy` mode boxes, use absolute coordinates(not support relative coordinates),
+                shape is (n, 4)
+            scores(Tensor): scores, shape is (n, )
+            nms_thresh(float): thresh
+        Returns:
+            indices kept.
+    """
+    if boxes.numel() == 0:
+        return torch.empty((0,), dtype=torch.long)
+    # Use numpy to run nms. Running nms in PyTorch code on CPU is really slow.
+    origin_device = boxes.device
+    cpu_device = torch.device('cpu')
+    boxes = boxes.to(cpu_device).numpy()
+    scores = scores.to(cpu_device).numpy()
+
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2]
+    y2 = boxes[:, 3]
+    areas = (x2 - x1) * (y2 - y1)
+    order = np.argsort(scores)[::-1]
+    num_detections = boxes.shape[0]
+    suppressed = np.zeros((num_detections,), dtype=np.bool)
+    for _i in range(num_detections):
+        i = order[_i]
+        if suppressed[i]:
+            continue
+        ix1 = x1[i]
+        iy1 = y1[i]
+        ix2 = x2[i]
+        iy2 = y2[i]
+        iarea = areas[i]
+
+        for _j in range(_i + 1, num_detections):
+            j = order[_j]
+            if suppressed[j]:
+                continue
+
+            xx1 = max(ix1, x1[j])
+            yy1 = max(iy1, y1[j])
+            xx2 = min(ix2, x2[j])
+            yy2 = min(iy2, y2[j])
+            w = max(0, xx2 - xx1)
+            h = max(0, yy2 - yy1)
+
+            inter = w * h
+            ovr = inter / (iarea + areas[j] - inter)
+            if ovr >= nms_thresh:
+                suppressed[j] = True
+    keep = np.nonzero(suppressed == 0)[0]
+    keep = torch.from_numpy(keep).to(origin_device)
+    return keep
diff --git a/detector/YOLOv3/region_layer.py b/detector/YOLOv3/region_layer.py
new file mode 100644
index 0000000..c55ef37
--- /dev/null
+++ b/detector/YOLOv3/region_layer.py
@@ -0,0 +1,185 @@
+import math
+import sys
+import time
+import torch
+import torch.nn as nn
+from .yolo_utils import bbox_iou, multi_bbox_ious, convert2cpu
+
+
+class RegionLayer(nn.Module):
+    def __init__(self, num_classes=0, anchors=[], num_anchors=1, use_cuda=None):
+        super(RegionLayer, self).__init__()
+        use_cuda = torch.cuda.is_available() and (True if use_cuda is None else use_cuda)
+        self.device = torch.device("cuda" if use_cuda else "cpu")
+        self.num_classes = num_classes
+        self.num_anchors = num_anchors
+        self.anchor_step = len(anchors) // num_anchors
+        # self.anchors = torch.stack(torch.FloatTensor(anchors).split(self.anchor_step)).to(self.device)
+        self.anchors = torch.FloatTensor(anchors).view(self.num_anchors, self.anchor_step).to(self.device)
+        self.rescore = 1
+        self.coord_scale = 1
+        self.noobject_scale = 1
+        self.object_scale = 5
+        self.class_scale = 1
+        self.thresh = 0.6
+        self.seen = 0
+
+    def build_targets(self, pred_boxes, target, nH, nW):
+        nB = target.size(0)
+        nA = self.num_anchors
+        conf_mask = torch.ones(nB, nA, nH, nW) * self.noobject_scale
+        coord_mask = torch.zeros(nB, nA, nH, nW)
+        cls_mask = torch.zeros(nB, nA, nH, nW)
+        tcoord = torch.zeros(4, nB, nA, nH, nW)
+        tconf = torch.zeros(nB, nA, nH, nW)
+        tcls = torch.zeros(nB, nA, nH, nW)
+
+        nAnchors = nA * nH * nW
+        nPixels = nH * nW
+        nGT = 0  # number of ground truth
+        nRecall = 0
+        # it works faster on CPU than on GPU.
+        anchors = self.anchors.to("cpu")
+
+        if self.seen < 12800:
+            tcoord[0].fill_(0.5)
+            tcoord[1].fill_(0.5)
+            coord_mask.fill_(1)
+
+        for b in range(nB):
+            cur_pred_boxes = pred_boxes[b * nAnchors:(b + 1) * nAnchors].t()
+            cur_ious = torch.zeros(nAnchors)
+            tbox = target[b].view(-1, 5).to("cpu")
+            for t in range(50):
+                if tbox[t][1] == 0:
+                    break
+                gx, gw = [i * nW for i in (tbox[t][1], tbox[t][3])]
+                gy, gh = [i * nH for i in (tbox[t][2], tbox[t][4])]
+                cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors, 1).t()
+                cur_ious = torch.max(cur_ious, multi_bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False))
+            ignore_ix = cur_ious > self.thresh
+            conf_mask[b][ignore_ix.view(nA, nH, nW)] = 0
+
+            for t in range(50):
+                if tbox[t][1] == 0:
+                    break
+                nGT += 1
+                gx, gw = [i * nW for i in (tbox[t][1], tbox[t][3])]
+                gy, gh = [i * nH for i in (tbox[t][2], tbox[t][4])]
+                gw, gh = gw.float(), gh.float()
+                gi, gj = int(gx), int(gy)
+
+                tmp_gt_boxes = torch.FloatTensor([0, 0, gw, gh]).repeat(nA, 1).t()
+                anchor_boxes = torch.cat((torch.zeros(nA, 2), anchors), 1).t()
+                tmp_ious = multi_bbox_ious(tmp_gt_boxes, anchor_boxes, x1y1x2y2=False)
+                best_iou, best_n = torch.max(tmp_ious, 0)
+
+                if self.anchor_step == 4:  # this part is not tested.
+                    tmp_ious_mask = (tmp_ious == best_iou)
+                    if tmp_ious_mask.sum() > 0:
+                        gt_pos = torch.FloatTensor([gi, gj, gx, gy]).repeat(nA, 1).t()
+                        an_pos = anchor_boxes[4:6]  # anchor_boxes are consisted of [0 0 aw ah ax ay]
+                        dist = pow(((gt_pos[0] + an_pos[0]) - gt_pos[2]), 2) + pow(
+                            ((gt_pos[1] + an_pos[1]) - gt_pos[3]), 2)
+                        dist[1 - tmp_ious_mask] = 10000  # set the large number for the small ious
+                        _, best_n = torch.min(dist, 0)
+
+                gt_box = torch.FloatTensor([gx, gy, gw, gh])
+                pred_box = pred_boxes[b * nAnchors + best_n * nPixels + gj * nW + gi]
+                iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)
+
+                coord_mask[b][best_n][gj][gi] = 1
+                cls_mask[b][best_n][gj][gi] = 1
+                conf_mask[b][best_n][gj][gi] = self.object_scale
+                tcoord[0][b][best_n][gj][gi] = gx - gi
+                tcoord[1][b][best_n][gj][gi] = gy - gj
+                tcoord[2][b][best_n][gj][gi] = math.log(gw / anchors[best_n][0])
+                tcoord[3][b][best_n][gj][gi] = math.log(gh / anchors[best_n][1])
+                tcls[b][best_n][gj][gi] = tbox[t][0]
+                tconf[b][best_n][gj][gi] = iou if self.rescore else 1.
+                if iou > 0.5:
+                    nRecall += 1
+
+        return nGT, nRecall, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls
+
+    def get_mask_boxes(self, output):
+        if not isinstance(self.anchors, torch.Tensor):
+            self.anchors = torch.FloatTensor(self.anchors).view(self.num_anchors, self.anchor_step).to(self.device)
+        masked_anchors = self.anchors.view(-1)
+        num_anchors = torch.IntTensor([self.num_anchors]).to(self.device)
+        return {'x': output, 'a': masked_anchors, 'n': num_anchors}
+
+    def forward(self, output, target):
+        # output : BxAs*(4+1+num_classes)*H*W
+        t0 = time.time()
+        nB = output.data.size(0)  # batch size
+        nA = self.num_anchors
+        nC = self.num_classes
+        nH = output.data.size(2)
+        nW = output.data.size(3)
+        cls_anchor_dim = nB * nA * nH * nW
+
+        if not isinstance(self.anchors, torch.Tensor):
+            self.anchors = torch.FloatTensor(self.anchors).view(self.num_anchors, self.anchor_step).to(self.device)
+
+        output = output.view(nB, nA, (5 + nC), nH, nW)
+        cls_grid = torch.linspace(5, 5 + nC - 1, nC).long().to(self.device)
+        ix = torch.LongTensor(range(0, 5)).to(self.device)
+        pred_boxes = torch.FloatTensor(4, cls_anchor_dim).to(self.device)
+
+        coord = output.index_select(2, ix[0:4]).view(nB * nA, -1, nH * nW).transpose(0, 1).contiguous().view(-1,
+                                                                                                             cls_anchor_dim)  # x, y, w, h
+        coord[0:2] = coord[0:2].sigmoid()  # x, y
+        conf = output.index_select(2, ix[4]).view(nB, nA, nH, nW).sigmoid()
+        cls = output.index_select(2, cls_grid)
+        cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(cls_anchor_dim, nC)
+
+        t1 = time.time()
+        grid_x = torch.linspace(0, nW - 1, nW).repeat(nB * nA, nH, 1).view(cls_anchor_dim).to(self.device)
+        grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(cls_anchor_dim).to(
+            self.device)
+        anchor_w = self.anchors.index_select(1, ix[0]).repeat(1, nB * nH * nW).view(cls_anchor_dim)
+        anchor_h = self.anchors.index_select(1, ix[1]).repeat(1, nB * nH * nW).view(cls_anchor_dim)
+
+        pred_boxes[0] = coord[0] + grid_x
+        pred_boxes[1] = coord[1] + grid_y
+        pred_boxes[2] = coord[2].exp() * anchor_w
+        pred_boxes[3] = coord[3].exp() * anchor_h
+        # for build_targets. it works faster on CPU than on GPU
+        pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4)).detach()
+
+        t2 = time.time()
+        nGT, nRecall, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = \
+            self.build_targets(pred_boxes, target.detach(), nH, nW)
+
+        cls_mask = (cls_mask == 1)
+        tcls = tcls[cls_mask].long().view(-1)
+        cls_mask = cls_mask.view(-1, 1).repeat(1, nC).to(self.device)
+        cls = cls[cls_mask].view(-1, nC)
+
+        nProposals = int((conf > 0.25).sum())
+
+        tcoord = tcoord.view(4, cls_anchor_dim).to(self.device)
+        tconf, tcls = tconf.to(self.device), tcls.to(self.device)
+        coord_mask, conf_mask = coord_mask.view(cls_anchor_dim).to(self.device), conf_mask.sqrt().to(self.device)
+
+        t3 = time.time()
+        loss_coord = self.coord_scale * nn.MSELoss(size_average=False)(coord * coord_mask, tcoord * coord_mask) / 2
+        # sqrt(object_scale)/2 is almost equal to 1.
+        loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) / 2
+        loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls) if cls.size(0) > 0 else 0
+        loss = loss_coord + loss_conf + loss_cls
+        t4 = time.time()
+        if False:
+            print('-' * 30)
+            print('        activation : %f' % (t1 - t0))
+            print(' create pred_boxes : %f' % (t2 - t1))
+            print('     build targets : %f' % (t3 - t2))
+            print('       create loss : %f' % (t4 - t3))
+            print('             total : %f' % (t4 - t0))
+        print('%d: nGT %3d, nRC %3d, nPP %3d, loss: box %6.3f, conf %6.3f, class %6.3f, total %7.3f'
+              % (self.seen, nGT, nRecall, nProposals, loss_coord, loss_conf, loss_cls, loss))
+        if math.isnan(loss.item()):
+            print(conf, tconf)
+            sys.exit(0)
+        return loss
diff --git a/detector/YOLOv3/weight/.gitkeep b/detector/YOLOv3/weight/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/detector/YOLOv3/yolo_layer.py b/detector/YOLOv3/yolo_layer.py
new file mode 100644
index 0000000..578969f
--- /dev/null
+++ b/detector/YOLOv3/yolo_layer.py
@@ -0,0 +1,181 @@
+import math
+import sys
+import time
+import torch
+import torch.nn as nn
+from .yolo_utils import bbox_iou, multi_bbox_ious, convert2cpu
+
+
+class YoloLayer(nn.Module):
+    def __init__(self, anchor_mask=[], num_classes=0, anchors=[], num_anchors=1, use_cuda=None):
+        super(YoloLayer, self).__init__()
+        use_cuda = torch.cuda.is_available() and (True if use_cuda is None else use_cuda)
+        self.device = torch.device("cuda" if use_cuda else "cpu")
+
+        self.anchor_mask = anchor_mask
+        self.num_classes = num_classes
+        self.anchors = anchors
+        self.num_anchors = num_anchors
+        self.anchor_step = len(anchors) // num_anchors
+        self.rescore = 0
+        self.ignore_thresh = 0.5
+        self.truth_thresh = 1.
+        self.stride = 32
+        self.nth_layer = 0
+        self.seen = 0
+        self.net_width = 0
+        self.net_height = 0
+
+    def get_mask_boxes(self, output):
+        masked_anchors = []
+        for m in self.anchor_mask:
+            masked_anchors += self.anchors[m * self.anchor_step:(m + 1) * self.anchor_step]
+        masked_anchors = [anchor / self.stride for anchor in masked_anchors]
+
+        masked_anchors = torch.FloatTensor(masked_anchors).to(self.device)
+        num_anchors = torch.IntTensor([len(self.anchor_mask)]).to(self.device)
+        return {'x': output, 'a': masked_anchors, 'n': num_anchors}
+
+    def build_targets(self, pred_boxes, target, anchors, nA, nH, nW):
+        nB = target.size(0)
+        anchor_step = anchors.size(1)  # anchors[nA][anchor_step]
+        conf_mask = torch.ones(nB, nA, nH, nW)
+        coord_mask = torch.zeros(nB, nA, nH, nW)
+        cls_mask = torch.zeros(nB, nA, nH, nW)
+        tcoord = torch.zeros(4, nB, nA, nH, nW)
+        tconf = torch.zeros(nB, nA, nH, nW)
+        tcls = torch.zeros(nB, nA, nH, nW)
+        twidth, theight = self.net_width / self.stride, self.net_height / self.stride
+
+        nAnchors = nA * nH * nW
+        nPixels = nH * nW
+        nGT = 0
+        nRecall = 0
+        nRecall75 = 0
+
+        # it works faster on CPU than on GPU.
+        anchors = anchors.to("cpu")
+
+        for b in range(nB):
+            cur_pred_boxes = pred_boxes[b * nAnchors:(b + 1) * nAnchors].t()
+            cur_ious = torch.zeros(nAnchors)
+            tbox = target[b].view(-1, 5).to("cpu")
+            for t in range(50):
+                if tbox[t][1] == 0:
+                    break
+                gx, gy = tbox[t][1] * nW, tbox[t][2] * nH
+                gw, gh = tbox[t][3] * twidth, tbox[t][4] * theight
+                cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors, 1).t()
+                cur_ious = torch.max(cur_ious, multi_bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False))
+            ignore_ix = cur_ious > self.ignore_thresh
+            conf_mask[b][ignore_ix.view(nA, nH, nW)] = 0
+
+            for t in range(50):
+                if tbox[t][1] == 0:
+                    break
+                nGT += 1
+                gx, gy = tbox[t][1] * nW, tbox[t][2] * nH
+                gw, gh = tbox[t][3] * twidth, tbox[t][4] * theight
+                gw, gh = gw.float(), gh.float()
+                gi, gj = int(gx), int(gy)
+
+                tmp_gt_boxes = torch.FloatTensor([0, 0, gw, gh]).repeat(nA, 1).t()
+                anchor_boxes = torch.cat((torch.zeros(nA, anchor_step), anchors), 1).t()
+                _, best_n = torch.max(multi_bbox_ious(tmp_gt_boxes, anchor_boxes, x1y1x2y2=False), 0)
+
+                gt_box = torch.FloatTensor([gx, gy, gw, gh])
+                pred_box = pred_boxes[b * nAnchors + best_n * nPixels + gj * nW + gi]
+                iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)
+
+                coord_mask[b][best_n][gj][gi] = 1
+                cls_mask[b][best_n][gj][gi] = 1
+                conf_mask[b][best_n][gj][gi] = 1
+                tcoord[0][b][best_n][gj][gi] = gx - gi
+                tcoord[1][b][best_n][gj][gi] = gy - gj
+                tcoord[2][b][best_n][gj][gi] = math.log(gw / anchors[best_n][0])
+                tcoord[3][b][best_n][gj][gi] = math.log(gh / anchors[best_n][1])
+                tcls[b][best_n][gj][gi] = tbox[t][0]
+                tconf[b][best_n][gj][gi] = iou if self.rescore else 1.
+
+                if iou > 0.5:
+                    nRecall += 1
+                    if iou > 0.75:
+                        nRecall75 += 1
+
+        return nGT, nRecall, nRecall75, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls
+
+    def forward(self, output, target):
+        # output : BxAs*(4+1+num_classes)*H*W
+        mask_tuple = self.get_mask_boxes(output)
+        t0 = time.time()
+        nB = output.data.size(0)  # batch size
+        nA = mask_tuple['n'].item()  # num_anchors
+        nC = self.num_classes
+        nH = output.data.size(2)
+        nW = output.data.size(3)
+        anchor_step = mask_tuple['a'].size(0) // nA
+        anchors = mask_tuple['a'].view(nA, anchor_step).to(self.device)
+        cls_anchor_dim = nB * nA * nH * nW
+
+        output = output.view(nB, nA, (5 + nC), nH, nW)
+        cls_grid = torch.linspace(5, 5 + nC - 1, nC).long().to(self.device)
+        ix = torch.LongTensor(range(0, 5)).to(self.device)
+        pred_boxes = torch.FloatTensor(4, cls_anchor_dim).to(self.device)
+
+        coord = output.index_select(2, ix[0:4]).view(nB * nA, -1, nH * nW).transpose(0, 1).contiguous().view(-1,
+                                                                                                             cls_anchor_dim)  # x, y, w, h
+        coord[0:2] = coord[0:2].sigmoid()  # x, y
+        conf = output.index_select(2, ix[4]).view(nB, nA, nH, nW).sigmoid()
+        cls = output.index_select(2, cls_grid)
+        cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(cls_anchor_dim, nC)
+
+        t1 = time.time()
+        grid_x = torch.linspace(0, nW - 1, nW).repeat(nB * nA, nH, 1).view(cls_anchor_dim).to(self.device)
+        grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(cls_anchor_dim).to(
+            self.device)
+        anchor_w = anchors.index_select(1, ix[0]).repeat(1, nB * nH * nW).view(cls_anchor_dim)
+        anchor_h = anchors.index_select(1, ix[1]).repeat(1, nB * nH * nW).view(cls_anchor_dim)
+
+        pred_boxes[0] = coord[0] + grid_x
+        pred_boxes[1] = coord[1] + grid_y
+        pred_boxes[2] = coord[2].exp() * anchor_w
+        pred_boxes[3] = coord[3].exp() * anchor_h
+        # for build_targets. it works faster on CPU than on GPU
+        pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4)).detach()
+
+        t2 = time.time()
+        nGT, nRecall, nRecall75, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = \
+            self.build_targets(pred_boxes, target.detach(), anchors.detach(), nA, nH, nW)
+
+        cls_mask = (cls_mask == 1)
+        tcls = tcls[cls_mask].long().view(-1)
+        cls_mask = cls_mask.view(-1, 1).repeat(1, nC).to(self.device)
+        cls = cls[cls_mask].view(-1, nC)
+
+        nProposals = int((conf > 0.25).sum())
+
+        tcoord = tcoord.view(4, cls_anchor_dim).to(self.device)
+        tconf, tcls = tconf.to(self.device), tcls.to(self.device)
+        coord_mask, conf_mask = coord_mask.view(cls_anchor_dim).to(self.device), conf_mask.to(self.device)
+
+        t3 = time.time()
+        loss_coord = nn.MSELoss(size_average=False)(coord * coord_mask, tcoord * coord_mask) / 2
+        loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask)
+        loss_cls = nn.CrossEntropyLoss(size_average=False)(cls, tcls) if cls.size(0) > 0 else 0
+        loss = loss_coord + loss_conf + loss_cls
+
+        t4 = time.time()
+        if False:
+            print('-' * 30)
+            print('        activation : %f' % (t1 - t0))
+            print(' create pred_boxes : %f' % (t2 - t1))
+            print('     build targets : %f' % (t3 - t2))
+            print('       create loss : %f' % (t4 - t3))
+            print('             total : %f' % (t4 - t0))
+        print(
+            '%d: Layer(%03d) nGT %3d, nRC %3d, nRC75 %3d, nPP %3d, loss: box %6.3f, conf %6.3f, class %6.3f, total %7.3f'
+            % (self.seen, self.nth_layer, nGT, nRecall, nRecall75, nProposals, loss_coord, loss_conf, loss_cls, loss))
+        if math.isnan(loss.item()):
+            print(conf, tconf)
+            sys.exit(0)
+        return loss
diff --git a/detector/YOLOv3/yolo_utils.py b/detector/YOLOv3/yolo_utils.py
new file mode 100644
index 0000000..b546eef
--- /dev/null
+++ b/detector/YOLOv3/yolo_utils.py
@@ -0,0 +1,589 @@
+import os
+import time
+import math
+import torch
+import numpy as np
+from PIL import Image, ImageDraw
+import struct  # get_image_size
+import imghdr  # get_image_size
+
+
+def sigmoid(x):
+    return 1.0 / (math.exp(-x) + 1.)
+
+
+def softmax(x):
+    x = torch.exp(x - torch.max(x))
+    x /= x.sum()
+    return x
+
+
+def bbox_iou(box1, box2, x1y1x2y2=True):
+    if x1y1x2y2:
+        x1_min = min(box1[0], box2[0])
+        x2_max = max(box1[2], box2[2])
+        y1_min = min(box1[1], box2[1])
+        y2_max = max(box1[3], box2[3])
+        w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
+        w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
+    else:
+        w1, h1 = box1[2], box1[3]
+        w2, h2 = box2[2], box2[3]
+        x1_min = min(box1[0] - w1 / 2.0, box2[0] - w2 / 2.0)
+        x2_max = max(box1[0] + w1 / 2.0, box2[0] + w2 / 2.0)
+        y1_min = min(box1[1] - h1 / 2.0, box2[1] - h2 / 2.0)
+        y2_max = max(box1[1] + h1 / 2.0, box2[1] + h2 / 2.0)
+
+    w_union = x2_max - x1_min
+    h_union = y2_max - y1_min
+    w_cross = w1 + w2 - w_union
+    h_cross = h1 + h2 - h_union
+    carea = 0
+    if w_cross <= 0 or h_cross <= 0:
+        return 0.0
+
+    area1 = w1 * h1
+    area2 = w2 * h2
+    carea = w_cross * h_cross
+    uarea = area1 + area2 - carea
+    return float(carea / uarea)
+
+
+def multi_bbox_ious(boxes1, boxes2, x1y1x2y2=True):
+    if x1y1x2y2:
+        x1_min = torch.min(boxes1[0], boxes2[0])
+        x2_max = torch.max(boxes1[2], boxes2[2])
+        y1_min = torch.min(boxes1[1], boxes2[1])
+        y2_max = torch.max(boxes1[3], boxes2[3])
+        w1, h1 = boxes1[2] - boxes1[0], boxes1[3] - boxes1[1]
+        w2, h2 = boxes2[2] - boxes2[0], boxes2[3] - boxes2[1]
+    else:
+        w1, h1 = boxes1[2], boxes1[3]
+        w2, h2 = boxes2[2], boxes2[3]
+        x1_min = torch.min(boxes1[0] - w1 / 2.0, boxes2[0] - w2 / 2.0)
+        x2_max = torch.max(boxes1[0] + w1 / 2.0, boxes2[0] + w2 / 2.0)
+        y1_min = torch.min(boxes1[1] - h1 / 2.0, boxes2[1] - h2 / 2.0)
+        y2_max = torch.max(boxes1[1] + h1 / 2.0, boxes2[1] + h2 / 2.0)
+
+    w_union = x2_max - x1_min
+    h_union = y2_max - y1_min
+    w_cross = w1 + w2 - w_union
+    h_cross = h1 + h2 - h_union
+    mask = (((w_cross <= 0) + (h_cross <= 0)) > 0)
+    area1 = w1 * h1
+    area2 = w2 * h2
+    carea = w_cross * h_cross
+    carea[mask] = 0
+    uarea = area1 + area2 - carea
+    return carea / uarea
+
+
+from .nms import boxes_nms
+
+
+def post_process(boxes, num_classes, conf_thresh=0.01, nms_thresh=0.45, obj_thresh=0.3):
+    batch_size = boxes.size(0)
+
+    # nms
+    results_boxes = []
+    for batch_id in range(batch_size):
+        processed_boxes = []
+        for cls_id in range(num_classes):
+            mask = (boxes[batch_id, :, -1] == cls_id) * (boxes[batch_id, :, 4] > obj_thresh)
+            masked_boxes = boxes[batch_id, mask]
+
+            keep = boxes_nms(masked_boxes[:, :4], masked_boxes[:, 5], nms_thresh)
+
+            nmsed_boxes = masked_boxes[keep, :]
+
+            processed_boxes.append(nmsed_boxes)
+        processed_boxes = torch.cat(processed_boxes, dim=0)
+
+    results_boxes.append(processed_boxes)
+
+    return results_boxes
+
+
+def xywh_to_xyxy(boxes_xywh):
+    boxes_xyxy = boxes_xywh.copy()
+    boxes_xyxy[:, 0] = boxes_xywh[:, 0] - boxes_xywh[:, 2] / 2.
+    boxes_xyxy[:, 0] = boxes_xywh[:, 0] - boxes_xywh[:, 2] / 2.
+    boxes_xyxy[:, 0] = boxes_xywh[:, 0] - boxes_xywh[:, 2] / 2.
+    boxes_xyxy[:, 0] = boxes_xywh[:, 0] - boxes_xywh[:, 2] / 2.
+
+    return boxes_xyxy
+
+
+def xyxy_to_xywh(boxes_xyxy):
+    if isinstance(boxes_xyxy, torch.Tensor):
+        boxes_xywh = boxes_xyxy.clone()
+    elif isinstance(boxes_xyxy, np.ndarray):
+        boxes_xywh = boxes_xyxy.copy()
+
+    boxes_xywh[:, 0] = (boxes_xyxy[:, 0] + boxes_xyxy[:, 2]) / 2.
+    boxes_xywh[:, 1] = (boxes_xyxy[:, 1] + boxes_xyxy[:, 3]) / 2.
+    boxes_xywh[:, 2] = boxes_xyxy[:, 2] - boxes_xyxy[:, 0]
+    boxes_xywh[:, 3] = boxes_xyxy[:, 3] - boxes_xyxy[:, 1]
+
+    return boxes_xywh
+
+
+def nms(boxes, nms_thresh):
+    if len(boxes) == 0:
+        return boxes
+
+    det_confs = torch.zeros(len(boxes))
+    for i in range(len(boxes)):
+        det_confs[i] = boxes[i][4]
+
+    _, sortIds = torch.sort(det_confs, descending=True)
+    out_boxes = []
+    for i in range(len(boxes)):
+        box_i = boxes[sortIds[i]]
+        if box_i[4] > 0:
+            out_boxes.append(box_i)
+            for j in range(i + 1, len(boxes)):
+                box_j = boxes[sortIds[j]]
+                if bbox_iou(box_i, box_j, x1y1x2y2=False) > nms_thresh:
+                    # print(box_i, box_j, bbox_iou(box_i, box_j, x1y1x2y2=False))
+                    box_j[4] = 0
+    return out_boxes
+
+
+def convert2cpu(gpu_matrix):
+    return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)
+
+
+def convert2cpu_long(gpu_matrix):
+    return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix)
+
+
+def get_all_boxes(output, conf_thresh, num_classes, only_objectness=1, validation=False, use_cuda=True):
+    # total number of inputs (batch size)
+    # first element (x) for first tuple (x, anchor_mask, num_anchor)
+    batchsize = output[0]['x'].data.size(0)
+
+    all_boxes = []
+    for i in range(len(output)):
+        pred, anchors, num_anchors = output[i]['x'].data, output[i]['a'], output[i]['n'].item()
+        boxes = get_region_boxes(pred, conf_thresh, num_classes, anchors, num_anchors, \
+                                 only_objectness=only_objectness, validation=validation, use_cuda=use_cuda)
+
+        all_boxes.append(boxes)
+    return torch.cat(all_boxes, dim=1)
+
+
+def get_region_boxes(output, obj_thresh, num_classes, anchors, num_anchors, only_objectness=1, validation=False,
+                     use_cuda=True):
+    device = torch.device("cuda" if use_cuda else "cpu")
+    anchors = anchors.to(device)
+    anchor_step = anchors.size(0) // num_anchors
+    if output.dim() == 3:
+        output = output.unsqueeze(0)
+    batch = output.size(0)
+    assert (output.size(1) == (5 + num_classes) * num_anchors)
+    h = output.size(2)
+    w = output.size(3)
+    cls_anchor_dim = batch * num_anchors * h * w
+
+    # all_boxes = []
+    output = output.view(batch * num_anchors, 5 + num_classes, h * w).transpose(0, 1).contiguous().view(5 + num_classes,
+                                                                                                        cls_anchor_dim)
+
+    grid_x = torch.linspace(0, w - 1, w).repeat(batch * num_anchors, h, 1).view(cls_anchor_dim).to(device)
+    grid_y = torch.linspace(0, h - 1, h).repeat(w, 1).t().repeat(batch * num_anchors, 1, 1).view(cls_anchor_dim).to(
+        device)
+    ix = torch.LongTensor(range(0, 2)).to(device)
+    anchor_w = anchors.view(num_anchors, anchor_step).index_select(1, ix[0]).repeat(1, batch, h * w).view(
+        cls_anchor_dim)
+    anchor_h = anchors.view(num_anchors, anchor_step).index_select(1, ix[1]).repeat(1, batch, h * w).view(
+        cls_anchor_dim)
+
+    xs, ys = torch.sigmoid(output[0]) + grid_x, torch.sigmoid(output[1]) + grid_y
+    ws, hs = torch.exp(output[2]) * anchor_w.detach(), torch.exp(output[3]) * anchor_h.detach()
+    det_confs = torch.sigmoid(output[4])
+
+    # by ysyun, dim=1 means input is 2D or even dimension else dim=0
+    cls_confs = torch.nn.Softmax(dim=1)(output[5:5 + num_classes].transpose(0, 1)).detach()
+    cls_max_confs, cls_max_ids = torch.max(cls_confs, 1)
+    cls_max_confs = cls_max_confs.view(-1)
+    cls_max_ids = cls_max_ids.view(-1).float()
+
+    # sz_hw = h*w
+    # sz_hwa = sz_hw*num_anchors
+    # det_confs = convert2cpu(det_confs)
+    # cls_max_confs = convert2cpu(cls_max_confs)
+    # cls_max_ids = convert2cpu_long(cls_max_ids)
+    # xs, ys = convert2cpu(xs), convert2cpu(ys)
+    # ws, hs = convert2cpu(ws), convert2cpu(hs)
+
+    cls_confs = det_confs * cls_max_confs
+
+    # boxes = [xs/w, ys/h, ws/w, hs/h, det_confs, cls_confs, cls_max_ids]
+    xs, ys, ws, hs = xs / w, ys / h, ws / w, hs / h
+    x1, y1, x2, y2 = torch.clamp_min(xs - ws / 2., 0.), torch.clamp_min(ys - hs / 2., 0.), torch.clamp_max(xs + ws / 2.,
+                                                                                                           1.), torch.clamp_max(
+        ys + hs / 2., 1.)
+    boxes = [x1, y1, x2, y2, det_confs, cls_confs, cls_max_ids]
+    boxes = list(map(lambda x: x.view(batch, -1), boxes))
+    boxes = torch.stack(boxes, dim=2)
+
+    # for b in range(batch):
+    #     boxes = []
+    #     for cy in range(h):
+    #         for cx in range(w):
+    #             for i in range(num_anchors):
+    #                 ind = b*sz_hwa + i*sz_hw + cy*w + cx
+    #                 det_conf =  det_confs[ind]
+    #                 if only_objectness:
+    #                     conf = det_confs[ind]
+    #                 else:
+    #                     conf = det_confs[ind] * cls_max_confs[ind]
+
+    #                 if conf > conf_thresh:
+    #                     bcx = xs[ind]
+    #                     bcy = ys[ind]
+    #                     bw = ws[ind]
+    #                     bh = hs[ind]
+    #                     cls_max_conf = cls_max_confs[ind]
+    #                     cls_max_id = cls_max_ids[ind]
+    #                     box = [bcx/w, bcy/h, bw/w, bh/h, det_conf, cls_max_conf, cls_max_id]
+
+    #                     boxes.append(box)
+    #     all_boxes.append(boxes)
+    return boxes
+
+
+# def get_all_boxes(output, conf_thresh, num_classes, only_objectness=1, validation=False, use_cuda=True):
+#     # total number of inputs (batch size)
+#     # first element (x) for first tuple (x, anchor_mask, num_anchor)
+#     tot = output[0]['x'].data.size(0)
+#     all_boxes = [[] for i in range(tot)]
+#     for i in range(len(output)):
+#         pred, anchors, num_anchors = output[i]['x'].data, output[i]['a'], output[i]['n'].item()
+#         b = get_region_boxes(pred, conf_thresh, num_classes, anchors, num_anchors, \
+#                 only_objectness=only_objectness, validation=validation, use_cuda=use_cuda)
+#         for t in range(tot):
+#             all_boxes[t] += b[t]
+#     return all_boxes
+
+# def get_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, only_objectness=1, validation=False, use_cuda=True):
+#     device = torch.device("cuda" if use_cuda else "cpu")
+#     anchors = anchors.to(device)
+#     anchor_step = anchors.size(0)//num_anchors
+#     if output.dim() == 3:
+#         output = output.unsqueeze(0)
+#     batch = output.size(0)
+#     assert(output.size(1) == (5+num_classes)*num_anchors)
+#     h = output.size(2)
+#     w = output.size(3)
+#     cls_anchor_dim = batch*num_anchors*h*w
+
+#     t0 = time.time()
+#     all_boxes = []
+#     output = output.view(batch*num_anchors, 5+num_classes, h*w).transpose(0,1).contiguous().view(5+num_classes, cls_anchor_dim)
+
+#     grid_x = torch.linspace(0, w-1, w).repeat(batch*num_anchors, h, 1).view(cls_anchor_dim).to(device)
+#     grid_y = torch.linspace(0, h-1, h).repeat(w,1).t().repeat(batch*num_anchors, 1, 1).view(cls_anchor_dim).to(device)
+#     ix = torch.LongTensor(range(0,2)).to(device)
+#     anchor_w = anchors.view(num_anchors, anchor_step).index_select(1, ix[0]).repeat(1, batch, h*w).view(cls_anchor_dim)
+#     anchor_h = anchors.view(num_anchors, anchor_step).index_select(1, ix[1]).repeat(1, batch, h*w).view(cls_anchor_dim)
+
+#     xs, ys = torch.sigmoid(output[0]) + grid_x, torch.sigmoid(output[1]) + grid_y
+#     ws, hs = torch.exp(output[2]) * anchor_w.detach(), torch.exp(output[3]) * anchor_h.detach()
+#     det_confs = torch.sigmoid(output[4])
+
+#     # by ysyun, dim=1 means input is 2D or even dimension else dim=0
+#     cls_confs = torch.nn.Softmax(dim=1)(output[5:5+num_classes].transpose(0,1)).detach()
+#     cls_max_confs, cls_max_ids = torch.max(cls_confs, 1)
+#     cls_max_confs = cls_max_confs.view(-1)
+#     cls_max_ids = cls_max_ids.view(-1)
+#     t1 = time.time()
+
+#     sz_hw = h*w
+#     sz_hwa = sz_hw*num_anchors
+#     det_confs = convert2cpu(det_confs)
+#     cls_max_confs = convert2cpu(cls_max_confs)
+#     cls_max_ids = convert2cpu_long(cls_max_ids)
+#     xs, ys = convert2cpu(xs), convert2cpu(ys)
+#     ws, hs = convert2cpu(ws), convert2cpu(hs)
+#     if validation:
+#         cls_confs = convert2cpu(cls_confs.view(-1, num_classes))
+
+#     t2 = time.time()
+#     for b in range(batch):
+#         boxes = []
+#         for cy in range(h):
+#             for cx in range(w):
+#                 for i in range(num_anchors):
+#                     ind = b*sz_hwa + i*sz_hw + cy*w + cx
+#                     det_conf =  det_confs[ind]
+#                     if only_objectness:
+#                         conf = det_confs[ind]
+#                     else:
+#                         conf = det_confs[ind] * cls_max_confs[ind]
+
+#                     if conf > conf_thresh:
+#                         bcx = xs[ind]
+#                         bcy = ys[ind]
+#                         bw = ws[ind]
+#                         bh = hs[ind]
+#                         cls_max_conf = cls_max_confs[ind]
+#                         cls_max_id = cls_max_ids[ind]
+#                         box = [bcx/w, bcy/h, bw/w, bh/h, det_conf, cls_max_conf, cls_max_id]
+#                         if (not only_objectness) and validation:
+#                             for c in range(num_classes):
+#                                 tmp_conf = cls_confs[ind][c]
+#                                 if c != cls_max_id and det_confs[ind]*tmp_conf > conf_thresh:
+#                                     box.append(tmp_conf)
+#                                     box.append(c)
+#                         boxes.append(box)
+#         all_boxes.append(boxes)
+#     t3 = time.time()
+#     if False:
+#         print('---------------------------------')
+#         print('matrix computation : %f' % (t1-t0))
+#         print('        gpu to cpu : %f' % (t2-t1))
+#         print('      boxes filter : %f' % (t3-t2))
+#         print('---------------------------------')
+#     return all_boxes
+
+def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None):
+    import cv2
+    colors = torch.FloatTensor([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]])
+
+    def get_color(c, x, max_val):
+        ratio = float(x) / max_val * 5
+        i = int(math.floor(ratio))
+        j = int(math.ceil(ratio))
+        ratio -= i
+        r = (1 - ratio) * colors[i][c] + ratio * colors[j][c]
+        return int(r * 255)
+
+    width = img.shape[1]
+    height = img.shape[0]
+    for i in range(len(boxes)):
+        box = boxes[i]
+        x1 = int(round((box[0] - box[2] / 2.0) * width))
+        y1 = int(round((box[1] - box[3] / 2.0) * height))
+        x2 = int(round((box[0] + box[2] / 2.0) * width))
+        y2 = int(round((box[1] + box[3] / 2.0) * height))
+
+        if color:
+            rgb = color
+        else:
+            rgb = (255, 0, 0)
+        if len(box) >= 7 and class_names:
+            cls_conf = box[5]
+            cls_id = box[6]
+            # print('%s: %f' % (class_names[cls_id], cls_conf))
+            classes = len(class_names)
+            offset = cls_id * 123457 % classes
+            red = get_color(2, offset, classes)
+            green = get_color(1, offset, classes)
+            blue = get_color(0, offset, classes)
+            if color is None:
+                rgb = (red, green, blue)
+            img = cv2.putText(img, class_names[cls_id], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.2, rgb, 1)
+        img = cv2.rectangle(img, (x1, y1), (x2, y2), rgb, 1)
+    if savename:
+        print("save plot results to %s" % savename)
+        cv2.imwrite(savename, img)
+    return img
+
+
+def plot_boxes(img, boxes, savename=None, class_names=None):
+    colors = torch.FloatTensor([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]])
+
+    def get_color(c, x, max_val):
+        ratio = float(x) / max_val * 5
+        i = int(math.floor(ratio))
+        j = int(math.ceil(ratio))
+        ratio -= i
+        r = (1 - ratio) * colors[i][c] + ratio * colors[j][c]
+        return int(r * 255)
+
+    width = img.width
+    height = img.height
+    draw = ImageDraw.Draw(img)
+    print("%d box(es) is(are) found" % len(boxes))
+    for i in range(len(boxes)):
+        box = boxes[i]
+        x1 = (box[0] - box[2] / 2.0) * width
+        y1 = (box[1] - box[3] / 2.0) * height
+        x2 = (box[0] + box[2] / 2.0) * width
+        y2 = (box[1] + box[3] / 2.0) * height
+
+        rgb = (255, 0, 0)
+        if len(box) >= 7 and class_names:
+            cls_conf = box[5]
+            cls_id = box[6]
+            print('%s: %f' % (class_names[cls_id], cls_conf))
+            classes = len(class_names)
+            offset = cls_id * 123457 % classes
+            red = get_color(2, offset, classes)
+            green = get_color(1, offset, classes)
+            blue = get_color(0, offset, classes)
+            rgb = (red, green, blue)
+            draw.text((x1, y1), class_names[cls_id], fill=rgb)
+        draw.rectangle([x1, y1, x2, y2], outline=rgb)
+    if savename:
+        print("save plot results to %s" % savename)
+        img.save(savename)
+    return img
+
+
+def read_truths(lab_path):
+    if not os.path.exists(lab_path):
+        return np.array([])
+    if os.path.getsize(lab_path):
+        truths = np.loadtxt(lab_path)
+        truths = truths.reshape(truths.size // 5, 5)  # to avoid single truth problem
+        return truths
+    else:
+        return np.array([])
+
+
+def read_truths_args(lab_path, min_box_scale):
+    truths = read_truths(lab_path)
+    new_truths = []
+    for i in range(truths.shape[0]):
+        if truths[i][3] < min_box_scale:
+            continue
+        new_truths.append([truths[i][0], truths[i][1], truths[i][2], truths[i][3], truths[i][4]])
+    return np.array(new_truths)
+
+
+def load_class_names(namesfile):
+    class_names = []
+    with open(namesfile, 'r', encoding='utf8') as fp:
+        lines = fp.readlines()
+    for line in lines:
+        class_names.append(line.strip())
+    return class_names
+
+
+def image2torch(img):
+    if isinstance(img, Image.Image):
+        width = img.width
+        height = img.height
+        img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes()))
+        img = img.view(height, width, 3).transpose(0, 1).transpose(0, 2).contiguous()
+        img = img.view(1, 3, height, width)
+        img = img.float().div(255.0)
+    elif type(img) == np.ndarray:  # cv2 image
+        img = torch.from_numpy(img.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0)
+    else:
+        print("unknown image type")
+        exit(-1)
+    return img
+
+
+def do_detect(model, img, conf_thresh, nms_thresh, use_cuda=True):
+    model.eval()
+    t0 = time.time()
+    img = image2torch(img)
+    t1 = time.time()
+
+    img = img.to(torch.device("cuda" if use_cuda else "cpu"))
+    t2 = time.time()
+
+    out_boxes = model(img)
+    boxes = get_all_boxes(out_boxes, conf_thresh, model.num_classes, use_cuda=use_cuda)[0]
+
+    t3 = time.time()
+    boxes = nms(boxes, nms_thresh)
+    t4 = time.time()
+
+    if False:
+        print('-----------------------------------')
+        print(' image to tensor : %f' % (t1 - t0))
+        print('  tensor to cuda : %f' % (t2 - t1))
+        print('         predict : %f' % (t3 - t2))
+        print('             nms : %f' % (t4 - t3))
+        print('           total : %f' % (t4 - t0))
+        print('-----------------------------------')
+    return boxes
+
+
+def read_data_cfg(datacfg):
+    options = dict()
+    options['gpus'] = '0,1,2,3'
+    options['num_workers'] = '10'
+    with open(datacfg) as fp:
+        lines = fp.readlines()
+
+    for line in lines:
+        line = line.strip()
+        if line == '':
+            continue
+        key, value = line.split('=')
+        key = key.strip()
+        value = value.strip()
+        options[key] = value
+    return options
+
+
+def scale_bboxes(bboxes, width, height):
+    import copy
+    dets = copy.deepcopy(bboxes)
+    for i in range(len(dets)):
+        dets[i][0] = dets[i][0] * width
+        dets[i][1] = dets[i][1] * height
+        dets[i][2] = dets[i][2] * width
+        dets[i][3] = dets[i][3] * height
+    return dets
+
+
+def file_lines(thefilepath):
+    count = 0
+    thefile = open(thefilepath, 'rb')
+    while True:
+        buffer = thefile.read(8192 * 1024)
+        if not buffer:
+            break
+        count += buffer.count(b'\n')
+    thefile.close()
+    return count
+
+
+def get_image_size(fname):
+    """
+    Determine the image type of fhandle and return its size.
+    from draco
+    """
+    with open(fname, 'rb') as fhandle:
+        head = fhandle.read(24)
+        if len(head) != 24:
+            return
+        if imghdr.what(fname) == 'png':
+            check = struct.unpack('>i', head[4:8])[0]
+            if check != 0x0d0a1a0a:
+                return
+            width, height = struct.unpack('>ii', head[16:24])
+        elif imghdr.what(fname) == 'gif':
+            width, height = struct.unpack('<HH', head[6:10])
+        elif imghdr.what(fname) == 'jpeg' or imghdr.what(fname) == 'jpg':
+            try:
+                fhandle.seek(0)  # Read 0xff next
+                size = 2
+                ftype = 0
+                while not 0xc0 <= ftype <= 0xcf:
+                    fhandle.seek(size, 1)
+                    byte = fhandle.read(1)
+                    while ord(byte) == 0xff:
+                        byte = fhandle.read(1)
+                    ftype = ord(byte)
+                    size = struct.unpack('>H', fhandle.read(2))[0] - 2
+                    # We are at a SOFn block
+                fhandle.seek(1, 1)  # Skip `precision' byte.
+                height, width = struct.unpack('>HH', fhandle.read(4))
+            except Exception:  # IGNORE:W0703
+                return
+        else:
+            return
+        return width, height
+
+
+def logging(message):
+    print('%s %s' % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), message))
diff --git a/detector/__init__.py b/detector/__init__.py
new file mode 100644
index 0000000..0e57c92
--- /dev/null
+++ b/detector/__init__.py
@@ -0,0 +1,132 @@
+from .YOLOv3 import YOLOv3
+import onnxruntime
+import numpy as np 
+import time
+import cv2
+import torch
+from detector.YOLOv3.nms import boxes_nms
+__all__ = ['build_detector','build_onnx']
+
+def xyxy_to_xywh(boxes_xyxy):
+    if isinstance(boxes_xyxy, torch.Tensor):
+        boxes_xywh = boxes_xyxy.clone()
+    elif isinstance(boxes_xyxy, np.ndarray):
+        boxes_xywh = boxes_xyxy.copy()
+
+    boxes_xywh[:, 0] = (boxes_xyxy[:, 0] + boxes_xyxy[:, 2]) / 2.
+    boxes_xywh[:, 1] = (boxes_xyxy[:, 1] + boxes_xyxy[:, 3]) / 2.
+    boxes_xywh[:, 2] = boxes_xyxy[:, 2] - boxes_xyxy[:, 0]
+    boxes_xywh[:, 3] = boxes_xyxy[:, 3] - boxes_xyxy[:, 1]
+
+    return boxes_xywh
+
+def build_detector(cfg, use_cuda):
+    return YOLOv3(cfg.YOLOV3.CFG, cfg.YOLOV3.WEIGHT, cfg.YOLOV3.CLASS_NAMES, 
+                    score_thresh=cfg.YOLOV3.SCORE_THRESH, nms_thresh=cfg.YOLOV3.NMS_THRESH, 
+                    is_xywh=True, use_cuda=use_cuda)
+
+
+class build_onnx():
+    def __init__(self , cfg):
+
+        self.session = onnxruntime.InferenceSession(cfg.YOLOV4.WEIGHT)
+        # session = onnx.load(onnx_path)
+        print("The model expects input shape: ", self.session.get_inputs()[0].shape)
+        self.class_names = self.load_class_names(cfg.YOLOV4.CLASS_NAMES)
+
+
+    def forward(self,img,video_width,video_height):
+        IN_IMAGE_H =  self.session.get_inputs()[0].shape[2]
+        IN_IMAGE_W =  self.session.get_inputs()[0].shape[3]
+
+    # Input
+        resized = cv2.resize(img, (IN_IMAGE_W, IN_IMAGE_H), interpolation=cv2.INTER_LINEAR)
+        img_in = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
+        img_in = np.transpose(img_in, (2, 0, 1)).astype(np.float32)
+        img_in = np.expand_dims(img_in, axis=0)
+        img_in /= 255.0
+        print("Shape of the network input: ", img_in.shape)
+
+        # Compute
+        input_name = self.session.get_inputs()[0].name
+        t5 = time.time()
+        outputs = self.session.run(None, {input_name: img_in})
+        t6 = time.time()
+        print('       -------------infer----------------: %f' % (t5 - t6))
+
+
+        self.boxes = np.array(self.post_processing(img_in, 0.4, 0.6, outputs))[0]
+        self.box =xyxy_to_xywh(self.boxes[:,0:4])
+        self.box = self.box * np.array([video_width, video_height, video_width, video_height])
+
+        self.cls = self.boxes[:,5]
+        self.id = self.boxes[:,6]
+        return self.box , self.cls  , self.id
+
+    def load_class_names(self , namesfile):
+        with open(namesfile, 'r', encoding='utf8') as fp:
+            class_names = [line.strip() for line in fp.readlines()]
+        return class_names
+
+    def post_processing(self,img, conf_thresh, nms_thresh, output):
+
+    # [batch, num, 1, 4]
+        box_array = output[0]
+    # [batch, num, num_classes]
+        confs = output[1]
+
+        t1 = time.time()
+
+        if type(box_array).__name__ != 'ndarray':
+            box_array = box_array.cpu().detach().numpy()
+            confs = confs.cpu().detach().numpy()
+
+        num_classes = confs.shape[2]
+
+    # [batch, num, 4]
+        box_array = box_array[:, :, 0]
+
+    # [batch, num, num_classes] --> [batch, num]
+        max_conf = np.max(confs, axis=2)
+        max_id = np.argmax(confs, axis=2)
+
+        t2 = time.time()
+
+        bboxes_batch = []
+        for i in range(box_array.shape[0]):
+       
+            argwhere = max_conf[i] > conf_thresh
+            l_box_array = box_array[i, argwhere, :]
+            l_max_conf = max_conf[i, argwhere]
+            l_max_id = max_id[i, argwhere]
+
+            bboxes = []
+        # nms for each class
+            for j in range(num_classes):
+
+                cls_argwhere = l_max_id == j
+                ll_box_array = l_box_array[cls_argwhere, :]
+                ll_max_conf = l_max_conf[cls_argwhere]
+                ll_max_id = l_max_id[cls_argwhere]
+
+                keep = np.array(boxes_nms(torch.tensor(ll_box_array), torch.tensor(ll_max_conf), nms_thresh))
+            
+                if (keep.size > 0):
+                    ll_box_array = ll_box_array[keep, :]
+                    ll_max_conf = ll_max_conf[keep]
+                    ll_max_id = ll_max_id[keep]
+
+                    for k in range(ll_box_array.shape[0]):
+                        bboxes.append([ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], ll_box_array[k, 3], ll_max_conf[k], ll_max_conf[k], ll_max_id[k]])
+        
+            bboxes_batch.append(bboxes)
+
+        t3 = time.time()
+
+        print('-----------------------------------')
+        print('       max and argmax : %f' % (t2 - t1))
+        print('                  nms : %f' % (t3 - t2))
+        print('Post processing total : %f' % (t3 - t1))
+        print('-----------------------------------')
+    
+        return bboxes_batch
diff --git a/detector/trt.py b/detector/trt.py
new file mode 100644
index 0000000..746ca8f
--- /dev/null
+++ b/detector/trt.py
@@ -0,0 +1,212 @@
+import sys
+import os
+import time
+import argparse
+import numpy as np
+import cv2
+# from PIL import Image
+import tensorrt as trt
+import pycuda.driver as cuda
+import pycuda.autoinit
+from detector.YOLOv3.nms import boxes_nms
+import torch
+
+try:
+    # Sometimes python2 does not understand FileNotFoundError
+    FileNotFoundError
+except NameError:
+    FileNotFoundError = IOError
+
+# __all__ = ['trt']
+
+class tensorrt():
+    def __init__(self , cfg, img_size = [416,416]):
+        self.cfg = cfg
+        self.engine =  self.get_engine(cfg.YOLOV4.WEIGHT)
+        self.context =  self.engine.create_execution_context()
+        self.buffers = self.allocate_buffers(self.engine, 1)
+        IN_IMAGE_H, IN_IMAGE_W = img_size
+        self.context.set_binding_shape(0, (1, 3, IN_IMAGE_H, IN_IMAGE_W))
+        self.num_classes = 80
+        self.image_size = img_size
+        # return context , buffers
+
+    def get_engine(self,engine_path):
+    # If a serialized engine exists, use it instead of building an engine.
+        print("Reading engine from file {}".format(engine_path))
+        TRT_LOGGER = trt.Logger()
+        with open(engine_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
+            return runtime.deserialize_cuda_engine(f.read())
+
+    def detect(self,context , buffers , image_src,video_width=416,video_height=416):
+        IN_IMAGE_H, IN_IMAGE_W = self.image_size
+        ta = time.time()
+    # Input
+        # image_src = cv2.imread(image_src)
+        resized = cv2.resize(image_src, (IN_IMAGE_W, IN_IMAGE_H), interpolation=cv2.INTER_LINEAR)
+        img_in = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
+        img_in = np.transpose(img_in, (2, 0, 1)).astype(np.float32)
+        img_in = np.expand_dims(img_in, axis=0)
+        img_in /= 255.0
+        img_in = np.ascontiguousarray(img_in)
+        print("Shape of the network input: ", img_in.shape)
+        # print(img_in)
+
+        inputs, outputs, bindings, stream = buffers
+        # print('Length of inputs: ', len(inputs))
+        inputs[0].host = img_in
+
+        trt_outputs = self.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
+
+        # print('Len of outputs: ', len(trt_outputs))
+
+        trt_outputs[0] = trt_outputs[0].reshape(1, -1, 1, 4)
+        trt_outputs[1] = trt_outputs[1].reshape(1, -1, self.num_classes)
+
+        tb = time.time()
+
+        # print('-----------------------------------')
+        # print('    TRT inference time: %f' % (tb - ta))
+        # print('-----------------------------------')
+
+        # boxes = post_processing(img_in, 0.4, 0.6, trt_outputs)
+        self.boxes = np.array(self.post_processing(img_in, self.cfg.YOLOV4.SCORE_THRESH, self.cfg.YOLOV4.NMS_THRESH, trt_outputs))[0]
+        # assert self.boxes[:,0:4]
+        self.box =self.xyxy_to_xywh(self.boxes[:,0:4])
+        self.box = self.box * np.array([video_width, video_height, video_width, video_height])
+
+        self.cls = self.boxes[:,5]
+        self.id = self.boxes[:,6]
+        return self.box , self.cls  , self.id
+
+    def post_processing(self,img, conf_thresh, nms_thresh, output):
+
+        box_array = output[0]
+    # [batch, num, num_classes]
+        confs = output[1]
+
+        t1 = time.time()
+
+        if type(box_array).__name__ != 'ndarray':
+            box_array = box_array.cpu().detach().numpy()
+            confs = confs.cpu().detach().numpy()
+
+        num_classes = confs.shape[2]
+
+    # [batch, num, 4]
+        box_array = box_array[:, :, 0]
+
+    # [batch, num, num_classes] --> [batch, num]
+        max_conf = np.max(confs, axis=2)
+        max_id = np.argmax(confs, axis=2)
+
+        t2 = time.time()
+
+        bboxes_batch = []
+        for i in range(box_array.shape[0]):
+       
+            argwhere = max_conf[i] > conf_thresh
+            l_box_array = box_array[i, argwhere, :]
+            l_max_conf = max_conf[i, argwhere]
+            l_max_id = max_id[i, argwhere]
+
+            bboxes = []
+        # nms for each class
+            for j in range(num_classes):
+
+                cls_argwhere = l_max_id == j
+                ll_box_array = l_box_array[cls_argwhere, :]
+                ll_max_conf = l_max_conf[cls_argwhere]
+                ll_max_id = l_max_id[cls_argwhere]
+
+                keep = np.array(boxes_nms(torch.tensor(ll_box_array), torch.tensor(ll_max_conf), nms_thresh))
+            
+                if (keep.size > 0):
+                    ll_box_array = ll_box_array[keep, :]
+                    ll_max_conf = ll_max_conf[keep]
+                    ll_max_id = ll_max_id[keep]
+
+                    for k in range(ll_box_array.shape[0]):
+                        bboxes.append([ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], ll_box_array[k, 3], ll_max_conf[k], ll_max_conf[k], ll_max_id[k]])
+        
+            bboxes_batch.append(bboxes)
+
+        t3 = time.time()
+
+        # print('-----------------------------------')
+        # print('       max and argmax : %f' % (t2 - t1))
+        # print('                  nms : %f' % (t3 - t2))
+        # print('Post processing total : %f' % (t3 - t1))
+        # print('-----------------------------------')
+    
+        return bboxes_batch
+
+        # Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
+    # Simple helper data class that's a little nicer to use than a 2-tuple.
+    class HostDeviceMem(object):
+        def __init__(self, host_mem, device_mem):
+            self.host = host_mem
+            self.device = device_mem
+
+        def __str__(self):
+            return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
+
+        def __repr__(self):
+            return self.__str__()
+
+    def allocate_buffers(self,engine, batch_size):
+        inputs = []
+        outputs = []
+        bindings = []
+        stream = cuda.Stream()
+        for binding in engine:
+
+            size = trt.volume(engine.get_binding_shape(binding)) * batch_size
+            dims = engine.get_binding_shape(binding)
+        
+        # in case batch dimension is -1 (dynamic)
+            if dims[0] < 0:
+                size *= -1
+        
+            dtype = trt.nptype(engine.get_binding_dtype(binding))
+        # Allocate host and device buffers
+            host_mem = cuda.pagelocked_empty(size, dtype)
+            device_mem = cuda.mem_alloc(host_mem.nbytes)
+        # Append the device buffer to device bindings.
+            bindings.append(int(device_mem))
+        # Append to the appropriate list.
+            if engine.binding_is_input(binding):
+                inputs.append(self.HostDeviceMem(host_mem, device_mem))
+            else:
+                outputs.append(self.HostDeviceMem(host_mem, device_mem))
+        return inputs, outputs, bindings, stream
+
+# This function is generalized for multiple inputs/outputs.
+# inputs and outputs are expected to be lists of HostDeviceMem objects.
+    def do_inference(self,context, bindings, inputs, outputs, stream):
+    # Transfer input data to the GPU.
+        [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
+    # Run inference.
+        context.execute_async(bindings=bindings, stream_handle=stream.handle)
+    # Transfer predictions back from the GPU.
+        [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
+    # Synchronize the stream
+        stream.synchronize()
+    # Return only the host outputs.
+        return [out.host for out in outputs]
+    def GiB(self,val):
+        return val * 1 << 30
+
+    def xyxy_to_xywh(self,boxes_xyxy):
+        if isinstance(boxes_xyxy, torch.Tensor):
+            boxes_xywh = boxes_xyxy.clone()
+        elif isinstance(boxes_xyxy, np.ndarray):
+            boxes_xywh = boxes_xyxy.copy()
+
+        boxes_xywh[:, 0] = (boxes_xyxy[:, 0] + boxes_xyxy[:, 2]) / 2.
+        boxes_xywh[:, 1] = (boxes_xyxy[:, 1] + boxes_xyxy[:, 3]) / 2.
+        boxes_xywh[:, 2] = boxes_xyxy[:, 2] - boxes_xyxy[:, 0]
+        boxes_xywh[:, 3] = boxes_xyxy[:, 3] - boxes_xyxy[:, 1]
+
+        return boxes_xywh
+
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/utils/asserts.py b/utils/asserts.py
new file mode 100644
index 0000000..59a73cc
--- /dev/null
+++ b/utils/asserts.py
@@ -0,0 +1,13 @@
+from os import environ
+
+
+def assert_in(file, files_to_check):
+    if file not in files_to_check:
+        raise AssertionError("{} does not exist in the list".format(str(file)))
+    return True
+
+
+def assert_in_env(check_list: list):
+    for item in check_list:
+        assert_in(item, environ.keys())
+    return True
diff --git a/utils/draw.py b/utils/draw.py
new file mode 100644
index 0000000..3366048
--- /dev/null
+++ b/utils/draw.py
@@ -0,0 +1,56 @@
+import numpy as np
+import cv2
+
+palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
+
+
+def compute_color_for_labels(label):
+    """
+    Simple function that adds fixed color depending on the class
+    """
+    color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
+    return tuple(color)
+
+
+def draw_boxes(img, output=None, count = [] , detection_id=0,Type='car',offset=(0,0)  ):
+    track_num = len(set(count))
+    if len(output) != 0:
+        bbox =  output[:, :4]
+        identities =  output[:, -1]
+        detection_id = len(identities)
+        for i,box in enumerate(bbox):
+            x1,y1,x2,y2 = [int(i) for i in box]
+            x1 += offset[0]
+            x2 += offset[0]
+            y1 += offset[1]
+            y2 += offset[1]
+            # box text and bar
+            id = int(identities[i]) if identities is not None else 0    
+            color = compute_color_for_labels(id)
+            label = '{}{:d}'.format("", id)
+            t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0]
+            cv2.rectangle(img,(x1, y1),(x2,y2),color,3)
+            cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1)  ##填充
+            cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2)
+        ##  将track 跟踪数量放到图上
+    else:
+        detection_id = 0
+    puttxt_height=img.shape[0]
+    puttxt_width=img.shape[1]
+    if Type=='car':
+        cv2.putText(img, "Total Car: " + str(track_num), (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2)
+        cv2.putText(img, "Current Car Counter: " + str(detection_id), (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2)
+    else:
+        cv2.putText(img, "Total Person: " + str(track_num), (int(4), int(25)), 0, 1, (255, 0, 255), 2)
+        cv2.putText(img, "Current Person Counter: " + str(detection_id), (int(4), int(50)), 0, 1, (255, 0, 255), 2)
+    # cv2.putText(img, "FPS: %.2f" % (fps), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3)
+    return img,track_num,detection_id
+
+
+
+
+
+
+if __name__ == '__main__':
+    for i in range(82):
+        print(compute_color_for_labels(i))
diff --git a/utils/evaluation.py b/utils/evaluation.py
new file mode 100644
index 0000000..1001794
--- /dev/null
+++ b/utils/evaluation.py
@@ -0,0 +1,103 @@
+import os
+import numpy as np
+import copy
+import motmetrics as mm
+mm.lap.default_solver = 'lap'
+from utils.io import read_results, unzip_objs
+
+
+class Evaluator(object):
+
+    def __init__(self, data_root, seq_name, data_type):
+        self.data_root = data_root
+        self.seq_name = seq_name
+        self.data_type = data_type
+
+        self.load_annotations()
+        self.reset_accumulator()
+
+    def load_annotations(self):
+        assert self.data_type == 'mot'
+
+        gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
+        self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
+        self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
+
+    def reset_accumulator(self):
+        self.acc = mm.MOTAccumulator(auto_id=True)
+
+    def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
+        # results
+        trk_tlwhs = np.copy(trk_tlwhs)
+        trk_ids = np.copy(trk_ids)
+
+        # gts
+        gt_objs = self.gt_frame_dict.get(frame_id, [])
+        gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
+
+        # ignore boxes
+        ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
+        ignore_tlwhs = unzip_objs(ignore_objs)[0]
+
+
+        # remove ignored results
+        keep = np.ones(len(trk_tlwhs), dtype=bool)
+        iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
+        if len(iou_distance) > 0:
+            match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
+            match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
+            match_ious = iou_distance[match_is, match_js]
+
+            match_js = np.asarray(match_js, dtype=int)
+            match_js = match_js[np.logical_not(np.isnan(match_ious))]
+            keep[match_js] = False
+            trk_tlwhs = trk_tlwhs[keep]
+            trk_ids = trk_ids[keep]
+
+        # get distance matrix
+        iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
+
+        # acc
+        self.acc.update(gt_ids, trk_ids, iou_distance)
+
+        if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
+            events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
+        else:
+            events = None
+        return events
+
+    def eval_file(self, filename):
+        self.reset_accumulator()
+
+        result_frame_dict = read_results(filename, self.data_type, is_gt=False)
+        frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
+        for frame_id in frames:
+            trk_objs = result_frame_dict.get(frame_id, [])
+            trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
+            self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
+
+        return self.acc
+
+    @staticmethod
+    def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
+        names = copy.deepcopy(names)
+        if metrics is None:
+            metrics = mm.metrics.motchallenge_metrics
+        metrics = copy.deepcopy(metrics)
+
+        mh = mm.metrics.create()
+        summary = mh.compute_many(
+            accs,
+            metrics=metrics,
+            names=names,
+            generate_overall=True
+        )
+
+        return summary
+
+    @staticmethod
+    def save_summary(summary, filename):
+        import pandas as pd
+        writer = pd.ExcelWriter(filename)
+        summary.to_excel(writer)
+        writer.save()
diff --git a/utils/io.py b/utils/io.py
new file mode 100644
index 0000000..2dc9afd
--- /dev/null
+++ b/utils/io.py
@@ -0,0 +1,133 @@
+import os
+from typing import Dict
+import numpy as np
+
+# from utils.log import get_logger
+
+
+def write_results(filename, results, data_type):
+    if data_type == 'mot':
+        save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n'
+    elif data_type == 'kitti':
+        save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
+    else:
+        raise ValueError(data_type)
+
+    with open(filename, 'w') as f:
+        for frame_id, tlwhs, track_ids in results:
+            if data_type == 'kitti':
+                frame_id -= 1
+            for tlwh, track_id in zip(tlwhs, track_ids):
+                if track_id < 0:
+                    continue
+                x1, y1, w, h = tlwh
+                x2, y2 = x1 + w, y1 + h
+                line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h)
+                f.write(line)
+
+
+# def write_results(filename, results_dict: Dict, data_type: str):
+#     if not filename:
+#         return
+#     path = os.path.dirname(filename)
+#     if not os.path.exists(path):
+#         os.makedirs(path)
+
+#     if data_type in ('mot', 'mcmot', 'lab'):
+#         save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
+#     elif data_type == 'kitti':
+#         save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
+#     else:
+#         raise ValueError(data_type)
+
+#     with open(filename, 'w') as f:
+#         for frame_id, frame_data in results_dict.items():
+#             if data_type == 'kitti':
+#                 frame_id -= 1
+#             for tlwh, track_id in frame_data:
+#                 if track_id < 0:
+#                     continue
+#                 x1, y1, w, h = tlwh
+#                 x2, y2 = x1 + w, y1 + h
+#                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
+#                 f.write(line)
+#     logger.info('Save results to {}'.format(filename))
+
+
+def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
+    if data_type in ('mot', 'lab'):
+        read_fun = read_mot_results
+    else:
+        raise ValueError('Unknown data type: {}'.format(data_type))
+
+    return read_fun(filename, is_gt, is_ignore)
+
+
+"""
+labels={'ped', ...			% 1
+'person_on_vhcl', ...	% 2
+'car', ...				% 3
+'bicycle', ...			% 4
+'mbike', ...			% 5
+'non_mot_vhcl', ...		% 6
+'static_person', ...	% 7
+'distractor', ...		% 8
+'occluder', ...			% 9
+'occluder_on_grnd', ...		%10
+'occluder_full', ...		% 11
+'reflection', ...		% 12
+'crowd' ...			% 13
+};
+"""
+
+
+def read_mot_results(filename, is_gt, is_ignore):
+    valid_labels = {1}
+    ignore_labels = {2, 7, 8, 12}
+    results_dict = dict()
+    if os.path.isfile(filename):
+        with open(filename, 'r') as f:
+            for line in f.readlines():
+                linelist = line.split(',')
+                if len(linelist) < 7:
+                    continue
+                fid = int(linelist[0])
+                if fid < 1:
+                    continue
+                results_dict.setdefault(fid, list())
+
+                if is_gt:
+                    if 'MOT16-' in filename or 'MOT17-' in filename:
+                        label = int(float(linelist[7]))
+                        mark = int(float(linelist[6]))
+                        if mark == 0 or label not in valid_labels:
+                            continue
+                    score = 1
+                elif is_ignore:
+                    if 'MOT16-' in filename or 'MOT17-' in filename:
+                        label = int(float(linelist[7]))
+                        vis_ratio = float(linelist[8])
+                        if label not in ignore_labels and vis_ratio >= 0:
+                            continue
+                    else:
+                        continue
+                    score = 1
+                else:
+                    score = float(linelist[6])
+
+                tlwh = tuple(map(float, linelist[2:6]))
+                target_id = int(linelist[1])
+
+                results_dict[fid].append((tlwh, target_id, score))
+
+    return results_dict
+
+
+def unzip_objs(objs):
+    if len(objs) > 0:
+        tlwhs, ids, scores = zip(*objs)
+    else:
+        tlwhs, ids, scores = [], [], []
+    tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
+
+    return tlwhs, ids, scores
\ No newline at end of file
diff --git a/utils/json_logger.py b/utils/json_logger.py
new file mode 100644
index 0000000..0afd0b4
--- /dev/null
+++ b/utils/json_logger.py
@@ -0,0 +1,383 @@
+"""
+References:
+    https://medium.com/analytics-vidhya/creating-a-custom-logging-mechanism-for-real-time-object-detection-using-tdd-4ca2cfcd0a2f
+"""
+import json
+from os import makedirs
+from os.path import exists, join
+from datetime import datetime
+
+
+class JsonMeta(object):
+    HOURS = 3
+    MINUTES = 59
+    SECONDS = 59
+    PATH_TO_SAVE = 'LOGS'
+    DEFAULT_FILE_NAME = 'remaining'
+
+
+class BaseJsonLogger(object):
+    """
+    This is the base class that returns __dict__ of its own
+    it also returns the dicts of objects in the attributes that are list instances
+
+    """
+
+    def dic(self):
+        # returns dicts of objects
+        out = {}
+        for k, v in self.__dict__.items():
+            if hasattr(v, 'dic'):
+                out[k] = v.dic()
+            elif isinstance(v, list):
+                out[k] = self.list(v)
+            else:
+                out[k] = v
+        return out
+
+    @staticmethod
+    def list(values):
+        # applies the dic method on items in the list
+        return [v.dic() if hasattr(v, 'dic') else v for v in values]
+
+
+class Label(BaseJsonLogger):
+    """
+    For each bounding box there are various categories with confidences. Label class keeps track of that information.
+    """
+
+    def __init__(self, category: str, confidence: float):
+        self.category = category
+        self.confidence = confidence
+
+
+class Bbox(BaseJsonLogger):
+    """
+    This module stores the information for each frame and use them in JsonParser
+    Attributes:
+        labels (list): List of label module.
+        top (int):
+        left (int):
+        width (int):
+        height (int):
+
+    Args:
+        bbox_id (float):
+        top (int):
+        left (int):
+        width (int):
+        height (int):
+
+    References:
+        Check Label module for better understanding.
+
+
+    """
+
+    def __init__(self, bbox_id, top, left, width, height):
+        self.labels = []
+        self.bbox_id = bbox_id
+        self.top = top
+        self.left = left
+        self.width = width
+        self.height = height
+
+    def add_label(self, category, confidence):
+        # adds category and confidence only if top_k is not exceeded.
+        self.labels.append(Label(category, confidence))
+
+    def labels_full(self, value):
+        return len(self.labels) == value
+
+
+class Frame(BaseJsonLogger):
+    """
+    This module stores the information for each frame and use them in JsonParser
+    Attributes:
+        timestamp (float): The elapsed time of captured frame
+        frame_id (int): The frame number of the captured video
+        bboxes (list of Bbox objects): Stores the list of bbox objects.
+
+    References:
+        Check Bbox class for better information
+
+    Args:
+        timestamp (float):
+        frame_id (int):
+
+    """
+
+    def __init__(self, frame_id: int, timestamp: float = None):
+        self.frame_id = frame_id
+        self.timestamp = timestamp
+        self.bboxes = []
+
+    def add_bbox(self, bbox_id: int, top: int, left: int, width: int, height: int):
+        bboxes_ids = [bbox.bbox_id for bbox in self.bboxes]
+        if bbox_id not in bboxes_ids:
+            self.bboxes.append(Bbox(bbox_id, top, left, width, height))
+        else:
+            raise ValueError("Frame with id: {} already has a Bbox with id: {}".format(self.frame_id, bbox_id))
+
+    def add_label_to_bbox(self, bbox_id: int, category: str, confidence: float):
+        bboxes = {bbox.id: bbox for bbox in self.bboxes}
+        if bbox_id in bboxes.keys():
+            res = bboxes.get(bbox_id)
+            res.add_label(category, confidence)
+        else:
+            raise ValueError('the bbox with id: {} does not exists!'.format(bbox_id))
+
+
+class BboxToJsonLogger(BaseJsonLogger):
+    """
+    ُ This module is designed to automate the task of logging jsons. An example json is used
+    to show the contents of json file shortly
+    Example:
+          {
+          "video_details": {
+            "frame_width": 1920,
+            "frame_height": 1080,
+            "frame_rate": 20,
+            "video_name": "/home/gpu/codes/MSD/pedestrian_2/project/public/camera1.avi"
+          },
+          "frames": [
+            {
+              "frame_id": 329,
+              "timestamp": 3365.1254
+              "bboxes": [
+                {
+                  "labels": [
+                    {
+                      "category": "pedestrian",
+                      "confidence": 0.9
+                    }
+                  ],
+                  "bbox_id": 0,
+                  "top": 1257,
+                  "left": 138,
+                  "width": 68,
+                  "height": 109
+                }
+              ]
+            }],
+
+    Attributes:
+        frames (dict): It's a dictionary that maps each frame_id to json attributes.
+        video_details (dict): information about video file.
+        top_k_labels (int): shows the allowed number of labels
+        start_time (datetime object): we use it to automate the json output by time.
+
+    Args:
+        top_k_labels (int): shows the allowed number of labels
+
+    """
+
+    def __init__(self, top_k_labels: int = 1):
+        self.frames = {}
+        self.video_details = self.video_details = dict(frame_width=None, frame_height=None, frame_rate=None,
+                                                       video_name=None)
+        self.top_k_labels = top_k_labels
+        self.start_time = datetime.now()
+
+    def set_top_k(self, value):
+        self.top_k_labels = value
+
+    def frame_exists(self, frame_id: int) -> bool:
+        """
+        Args:
+            frame_id (int):
+
+        Returns:
+            bool: true if frame_id is recognized
+        """
+        return frame_id in self.frames.keys()
+
+    def add_frame(self, frame_id: int, timestamp: float = None) -> None:
+        """
+        Args:
+            frame_id (int):
+            timestamp (float): opencv captured frame time property
+
+        Raises:
+             ValueError: if frame_id would not exist in class frames attribute
+
+        Returns:
+            None
+
+        """
+        if not self.frame_exists(frame_id):
+            self.frames[frame_id] = Frame(frame_id, timestamp)
+        else:
+            raise ValueError("Frame id: {} already exists".format(frame_id))
+
+    def bbox_exists(self, frame_id: int, bbox_id: int) -> bool:
+        """
+        Args:
+            frame_id:
+            bbox_id:
+
+        Returns:
+            bool: if bbox exists in frame bboxes list
+        """
+        bboxes = []
+        if self.frame_exists(frame_id=frame_id):
+            bboxes = [bbox.bbox_id for bbox in self.frames[frame_id].bboxes]
+        return bbox_id in bboxes
+
+    def find_bbox(self, frame_id: int, bbox_id: int):
+        """
+
+        Args:
+            frame_id:
+            bbox_id:
+
+        Returns:
+            bbox_id (int):
+
+        Raises:
+            ValueError: if bbox_id does not exist in the bbox list of specific frame.
+        """
+        if not self.bbox_exists(frame_id, bbox_id):
+            raise ValueError("frame with id: {} does not contain bbox with id: {}".format(frame_id, bbox_id))
+        bboxes = {bbox.bbox_id: bbox for bbox in self.frames[frame_id].bboxes}
+        return bboxes.get(bbox_id)
+
+    def add_bbox_to_frame(self, frame_id: int, bbox_id: int, top: int, left: int, width: int, height: int) -> None:
+        """
+
+        Args:
+            frame_id (int):
+            bbox_id (int):
+            top (int):
+            left (int):
+            width (int):
+            height (int):
+
+        Returns:
+            None
+
+        Raises:
+            ValueError: if bbox_id already exist in frame information with frame_id
+            ValueError: if frame_id does not exist in frames attribute
+        """
+        if self.frame_exists(frame_id):
+            frame = self.frames[frame_id]
+            if not self.bbox_exists(frame_id, bbox_id):
+                frame.add_bbox(bbox_id, top, left, width, height)
+            else:
+                raise ValueError(
+                    "frame with frame_id: {} already contains the bbox with id: {} ".format(frame_id, bbox_id))
+        else:
+            raise ValueError("frame with frame_id: {} does not exist".format(frame_id))
+
+    def add_label_to_bbox(self, frame_id: int, bbox_id: int, category: str, confidence: float):
+        """
+        Args:
+            frame_id:
+            bbox_id:
+            category:
+            confidence: the confidence value returned from yolo detection
+
+        Returns:
+            None
+
+        Raises:
+            ValueError: if labels quota (top_k_labels) exceeds.
+        """
+        bbox = self.find_bbox(frame_id, bbox_id)
+        if not bbox.labels_full(self.top_k_labels):
+            bbox.add_label(category, confidence)
+        else:
+            raise ValueError("labels in frame_id: {}, bbox_id: {} is fulled".format(frame_id, bbox_id))
+
+    def add_video_details(self, frame_width: int = None, frame_height: int = None, frame_rate: int = None,
+                          video_name: str = None):
+        self.video_details['frame_width'] = frame_width
+        self.video_details['frame_height'] = frame_height
+        self.video_details['frame_rate'] = frame_rate
+        self.video_details['video_name'] = video_name
+
+    def output(self):
+        output = {'video_details': self.video_details}
+        result = list(self.frames.values())
+        output['frames'] = [item.dic() for item in result]
+        return output
+
+    def json_output(self, output_name):
+        """
+        Args:
+            output_name:
+
+        Returns:
+            None
+
+        Notes:
+            It creates the json output with `output_name` name.
+        """
+        if not output_name.endswith('.json'):
+            output_name += '.json'
+        with open(output_name, 'w') as file:
+            json.dump(self.output(), file)
+        file.close()
+
+    def set_start(self):
+        self.start_time = datetime.now()
+
+    def schedule_output_by_time(self, output_dir=JsonMeta.PATH_TO_SAVE, hours: int = 0, minutes: int = 0,
+                                seconds: int = 60) -> None:
+        """
+        Notes:
+            Creates folder and then periodically stores the jsons on that address.
+
+        Args:
+            output_dir (str): the directory where output files will be stored
+            hours (int):
+            minutes (int):
+            seconds (int):
+
+        Returns:
+            None
+
+        """
+        end = datetime.now()
+        interval = 0
+        interval += abs(min([hours, JsonMeta.HOURS]) * 3600)
+        interval += abs(min([minutes, JsonMeta.MINUTES]) * 60)
+        interval += abs(min([seconds, JsonMeta.SECONDS]))
+        diff = (end - self.start_time).seconds
+
+        if diff > interval:
+            output_name = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '.json'
+            if not exists(output_dir):
+                makedirs(output_dir)
+            output = join(output_dir, output_name)
+            self.json_output(output_name=output)
+            self.frames = {}
+            self.start_time = datetime.now()
+
+    def schedule_output_by_frames(self, frames_quota, frame_counter, output_dir=JsonMeta.PATH_TO_SAVE):
+        """
+        saves as the number of frames quota increases higher.
+        :param frames_quota:
+        :param frame_counter:
+        :param output_dir:
+        :return:
+        """
+        pass
+
+    def flush(self, output_dir):
+        """
+        Notes:
+            We use this function to output jsons whenever possible.
+            like the time that we exit the while loop of opencv.
+
+        Args:
+            output_dir:
+
+        Returns:
+            None
+
+        """
+        filename = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '-remaining.json'
+        output = join(output_dir, filename)
+        self.json_output(output_name=output)
diff --git a/utils/log.py b/utils/log.py
new file mode 100644
index 0000000..5b8c940
--- /dev/null
+++ b/utils/log.py
@@ -0,0 +1,17 @@
+import logging
+
+
+def get_logger(name='root'):
+    formatter = logging.Formatter(
+        # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
+        fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
+
+    handler = logging.StreamHandler()
+    handler.setFormatter(formatter)
+
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.INFO)
+    logger.addHandler(handler)
+    return logger
+
+
diff --git a/utils/parser.py b/utils/parser.py
new file mode 100644
index 0000000..27fcf50
--- /dev/null
+++ b/utils/parser.py
@@ -0,0 +1,38 @@
+import os
+import yaml
+from easydict import EasyDict as edict
+
+class YamlParser(edict):
+    """
+    This is yaml parser based on EasyDict.
+    """
+    def __init__(self, cfg_dict=None, config_file=None):
+        if cfg_dict is None:
+            cfg_dict = {}
+
+        if config_file is not None:
+            assert(os.path.isfile(config_file))
+            with open(config_file, 'r') as fo:
+                cfg_dict.update(yaml.load(fo.read()))
+
+        super(YamlParser, self).__init__(cfg_dict)
+
+    
+    def merge_from_file(self, config_file):
+        with open(config_file, 'r') as fo:
+            self.update(yaml.load(fo.read()))
+
+    
+    def merge_from_dict(self, config_dict):
+        self.update(config_dict)
+
+
+def get_config(config_file=None):
+    return YamlParser(config_file=config_file)
+
+
+if __name__ == "__main__":
+    cfg = YamlParser(config_file="../configs/yolov3.yaml")
+    cfg.merge_from_file("../configs/deep_sort.yaml")
+
+    import ipdb; ipdb.set_trace()
\ No newline at end of file
diff --git a/utils/tools.py b/utils/tools.py
new file mode 100644
index 0000000..965fb69
--- /dev/null
+++ b/utils/tools.py
@@ -0,0 +1,39 @@
+from functools import wraps
+from time import time
+
+
+def is_video(ext: str):
+    """
+    Returns true if ext exists in
+    allowed_exts for video files.
+
+    Args:
+        ext:
+
+    Returns:
+
+    """
+
+    allowed_exts = ('.mp4', '.webm', '.ogg', '.avi', '.wmv', '.mkv', '.3gp')
+    return any((ext.endswith(x) for x in allowed_exts))
+
+
+def tik_tok(func):
+    """
+    keep track of time for each process.
+    Args:
+        func:
+
+    Returns:
+
+    """
+    @wraps(func)
+    def _time_it(*args, **kwargs):
+        start = time()
+        try:
+            return func(*args, **kwargs)
+        finally:
+            end_ = time()
+            print("time: {:.03f}s, fps: {:.03f}".format(end_ - start, 1 / (end_ - start)))
+
+    return _time_it
diff --git a/yolov4_deepsort.py b/yolov4_deepsort.py
new file mode 100644
index 0000000..20cadf1
--- /dev/null
+++ b/yolov4_deepsort.py
@@ -0,0 +1,141 @@
+import os
+import cv2
+import time
+import argparse
+import torch
+import warnings
+import numpy as np
+
+from detector import build_detector,build_onnx
+from deep_sort import build_tracker,build_tracker_car
+from utils.draw import draw_boxes
+from utils.parser import get_config
+from utils.log import get_logger
+from utils.io import write_results
+# from threading import Thread
+from dataset import LoadStreams
+from detector.trt import tensorrt
+import shutil
+
+class VideoTracker(object):
+    def __init__(self, cfg, args, video_path):
+        self.cfg = cfg
+        self.args = args
+        self.video_path = video_path
+        self.logger = get_logger("root")
+        self.cuda_ctx  = None
+
+        use_cuda = args.use_cuda and torch.cuda.is_available()
+        if not use_cuda:
+            warnings.warn("Running in cpu mode which maybe very slow!", UserWarning)
+
+        if args.display:
+            cv2.namedWindow("test", cv2.WINDOW_NORMAL)
+            cv2.resizeWindow("test", args.display_width, args.display_height)
+
+        if args.cam != -1:
+            print("Using webcam " + str(args.cam))
+            self.datasets = LoadStreams(args.cam)
+            self.cap = cv2.VideoCapture(args.cam)
+        else:
+            self.datasets = LoadStreams(args.VIDEO_PATH)
+            self.cap = cv2.VideoCapture()   
+        self.deepsort_person= build_tracker(cfg, use_cuda=use_cuda) 
+
+    def __enter__(self):  #__enter__(self):当with开始运行的时候触发此方法的运行
+        if isinstance(self.args.cam , int):
+            if self.args.cam != -1:
+                ret, frame = self.cap.read()
+                assert ret, "Error: Camera error"
+                self.im_width = frame.shape[0]
+                self.im_height = frame.shape[1]
+            else:
+                assert os.path.isfile(self.video_path), "Path error"
+                self.cap.open(self.video_path)
+                self.im_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+                self.im_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+                assert self.cap.isOpened()
+        elif isinstance(self.args.cam , str):
+            self.cap.open(self.args.cam)
+            self.im_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            self.im_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            assert self.cap.isOpened()
+
+
+        if self.args.save_path:
+            os.makedirs(self.args.save_path, exist_ok=True)
+            # path of saved video and results
+            self.save_video_path = os.path.join(self.args.save_path, "results.avi")
+            self.save_results_path = os.path.join(self.args.save_path, "results.txt")
+            # create video writer
+            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
+            self.writer = cv2.VideoWriter(self.save_video_path, fourcc, 20, (self.im_width, self.im_height))
+            # logging
+            self.logger.info("Save results to {}".format(self.args.save_path))
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_traceback):
+        if exc_type:
+            print(exc_type, exc_value, exc_traceback)
+
+    def run(self):
+        count_P = []
+        trt_person= tensorrt(self.cfg,[416,416])
+
+        while self.cap.grab():
+            ##socket
+            for _, im0s, _ in self.datasets: ##dataset 进行了__next__ 方法
+                start = time.time() 
+                # _, ori_im = self.cap.retrieve()
+                ori_im = im0s[0]
+                im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB)
+                # bbox_xywh, cls_conf, cls_ids = self.session.forward(ori_im,self.im_width,self.im_height)
+                ##这里非常重要, context  , buffer 全部应该在一个线程内实现,
+                bbox_xywh, cls_conf, cls_ids = trt_person.detect(trt_person.context , trt_person.buffers,ori_im,self.im_width,self.im_height)
+                # select person class  TODO
+                class_det_P = [0]
+                save_id = []
+                for i, id in enumerate(cls_ids):
+                    if id not in class_det_P:
+                        save_id.append(i)
+                ##numpy array 进行delete
+                bbox_xywh_P = np.delete(bbox_xywh , [save_id],axis=0)
+                cls_conf_P = np.delete(cls_conf ,[save_id])
+                outputs_P ,count_num_P,detection_id_P= self.deepsort_person.update(bbox_xywh_P, cls_conf_P, im,count_P)
+
+                # if len(outputs_P) > 0:
+                ori_im,track_num,detection_id = draw_boxes(ori_im, outputs_P,count_num_P,detection_id_P,Type='person' )
+
+                end = time.time()
+                fps =  1 / (end - start)
+                cv2.putText(ori_im, "FPS: %.2f" % (fps), (int(1050), int(200)), 0, 10e-3 * 200, (0, 255, 0), 2)
+
+                if self.args.display:
+                    cv2.imshow("test", ori_im)
+                    wirte_img = cv2.resize(ori_im,(800,600))
+                    cv2.waitKey(10)
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--VIDEO_PATH", type=str,default='MOT16-03.mp4')
+    parser.add_argument("--config_detection", type=str, default="./configs/yolov4_trt.yaml")
+    parser.add_argument("--config_deepsort", type=str, default="./configs/deep_sort.yaml")
+    parser.add_argument("--ignore_display", dest="display", action="store_false", default=True)
+    parser.add_argument("--display", action="store_true",default=True)
+    parser.add_argument("--frame_interval", type=int, default=2)
+    parser.add_argument("--display_width", type=int, default=800)
+    parser.add_argument("--display_height", type=int, default=600)
+    parser.add_argument("--save_path", type=str, default="./output/")
+    parser.add_argument("--cpu", dest="use_cuda", action="store_false", default=True)
+    parser.add_argument("--camera", action="store", dest="cam", type=int,default='-1')
+    # parser.add_argument("--camera", action="store", dest="cam", type=str, default="rtsp://admin:abc12345@192.168.1.64/ch2/main/av_stream")
+    return parser.parse_args()
+
+if __name__ == "__main__":
+    args = parse_args()
+    cfg = get_config()
+    cfg.merge_from_file(args.config_detection)
+    cfg.merge_from_file(args.config_deepsort)
+
+    with VideoTracker(cfg, args, video_path=args.VIDEO_PATH) as vdo_trk:
+        vdo_trk.run()