Init

3571f887 · wudiao · 3571f887 · 3571f887 · 3571f887 · 3571f887
Commit 3571f887 authored Sep 25, 2021 by wudiao
46 changed files
--- a/.DS_Store
+++ b/.DS_Store
--- a/.Funfile.generated.dockerfile
+++ b/.Funfile.generated.dockerfile
+FROM registry.cn-beijing.aliyuncs.com/aliyunfc/runtime-python3.6:build-1.9.13 as python3
+RUN fun-install pip install torch
+RUN fun-install pip install torchvision
+RUN fun-install pip install easydict
+RUN fun-install pip install pillow
+RUN fun-install pip install requests
\ No newline at end of file
--- a/.funignore
+++ b/.funignore
+.env
+template.yml
+.funignore
--- a/.gitignore
+++ b/.gitignore
+.fun
+.idea
\ No newline at end of file
--- a/.nas.yml
+++ b/.nas.yml
+nasMappings:
+  faces:
+    - localNasDir: .fun
+      remoteNasDir: /mnt/nas
+    - localNasDir: work_space/save
+      remoteNasDir: /mnt/nas
\ No newline at end of file
--- a/Funfile
+++ b/Funfile
+RUNTIME python3
+RUN fun-install pip install torch
+RUN fun-install pip install torchvision
+RUN fun-install pip install easydict
+RUN fun-install pip install pillow
+RUN fun-install pip install requests
--- a/Learner.py
+++ b/Learner.py
--- a/__pycache__/Learner.cpython-39.pyc
+++ b/__pycache__/Learner.cpython-39.pyc
--- a/__pycache__/config.cpython-39.pyc
+++ b/__pycache__/config.cpython-39.pyc
--- a/__pycache__/model.cpython-39.pyc
+++ b/__pycache__/model.cpython-39.pyc
--- a/__pycache__/mtcnn.cpython-39.pyc
+++ b/__pycache__/mtcnn.cpython-39.pyc
--- a/__pycache__/utils.cpython-39.pyc
+++ b/__pycache__/utils.cpython-39.pyc
--- a/config.py
+++ b/config.py
+from easydict import EasyDict as edict
+from pathlib import Path
+import torch
+from torch.nn import CrossEntropyLoss
+from torchvision import transforms as trans
+
+def get_config(training = True):
+    conf = edict()
+    conf.data_path = Path('data')
+    conf.work_path = Path('work_space/')
+    conf.model_path = conf.work_path/'models'
+    conf.log_path = conf.work_path/'log'
+    conf.save_path = conf.work_path/'save'
+    conf.input_size = [112,112]
+    conf.embedding_size = 512
+
+    # 如果要使用mobileNet 将该参数改为True
+    conf.use_mobilfacenet = False
+
+    conf.net_depth = 50
+    conf.drop_ratio = 0.6
+    conf.net_mode = 'ir_se' # or 'ir'
+    conf.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    conf.test_transform = trans.Compose([
+                    trans.ToTensor(),
+                    trans.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
+                ])
+    conf.data_mode = 'emore'
+    conf.vgg_folder = conf.data_path/'faces_vgg_112x112'
+    conf.ms1m_folder = conf.data_path/'faces_ms1m_112x112'
+    conf.emore_folder = conf.data_path/'faces_emore'
+    conf.batch_size = 100 # irse net depth 50 
+#   conf.batch_size = 200 # mobilefacenet
+#--------------------Training Config ------------------------    
+    if training:        
+        conf.log_path = conf.work_path/'log'
+        conf.save_path = conf.work_path/'save'
+    #     conf.weight_decay = 5e-4
+        conf.lr = 1e-3
+        conf.milestones = [12,15,18]
+        conf.momentum = 0.9
+        conf.pin_memory = True
+
+        conf.num_workers = 3
+        conf.ce_loss = CrossEntropyLoss()    
+#--------------------Inference Config ------------------------
+    else:
+        # conf.facebank_path = conf.data_path/'facebank'
+        conf.facebank_path = Path('/Users/wda/IdeaProjects/oss/src/main/resources/1315584165094813698')
+        conf.threshold = 1.5
+
+        conf.face_limit = 10
+        # 这个参数越大，检测越快
+        conf.min_face_size = 30
+
+    return conf
+
+cfg_mnet = {
+    'name': 'mobilenet0.25',
+    'min_sizes': [[16, 32], [64, 128], [256, 512]],
+    'steps': [8, 16, 32],
+    'variance': [0.1, 0.2],
+    'clip': False,
+    'loc_weight': 2.0,
+    #------------------------------------------------------------------#
+    #   视频上看到的训练图片大小为640，为了提高大图状态下的困难样本
+    #   的识别能力，我将训练图片进行调大
+    #------------------------------------------------------------------#
+    'train_image_size': 840,
+    'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
+    'in_channel': 32,
+    'out_channel': 64
+}
\ No newline at end of file
--- a/data/__init__.py
+++ b/data/__init__.py
--- a/data/__pycache__/__init__.cpython-39.pyc
+++ b/data/__pycache__/__init__.cpython-39.pyc
--- a/data/__pycache__/data_pipe.cpython-39.pyc
+++ b/data/__pycache__/data_pipe.cpython-39.pyc
--- a/data/data_pipe.py
+++ b/data/data_pipe.py
+def de_preprocess(tensor):
+    return tensor*0.5 + 0.5
--- a/index.py
+++ b/index.py
+# -*- coding: utf-8 -*-
+
+import logging
+import json
+from Learner import face_learner
+from config import get_config
+from PIL import Image
+from mtcnn import MTCNN
+from io import BytesIO
+import requests
+import oss2
+
+learner = None
+conf = None
+mtcnn = None
+bucket = None
+auth = None
+index_map = {}
+bucket_map = {}
+
+
+def initializer(context):
+    global mtcnn
+    mtcnn = MTCNN()
+    global conf
+    conf = get_config(False)
+    global learner
+    learner = face_learner(conf, True)
+    learner.threshold = 1.44
+    learner.load_state(conf, 'ir_se50.pth', True, True)
+    learner.model.eval()
+    global auth
+    # LTAI5tFb87uVi1B4BcW6SJfH 这个key只能访问 dev 和 rc
+    # auth = oss2.Auth("LTAI5t7B2jc6QFnZRi3kC3rd", "m127LkW55ha0LAWZThid94ojtAOQ2j")
+    auth = oss2.Auth("LTAI5tFb87uVi1B4BcW6SJfH", "3PdUomiQg6yAduq97KX4rBNpkhzdml")
+    global index_map
+    index_map = {"prod": "faces", "dev": "faces_dev", "rc": "faces_rc"}
+    global bucket_map
+    bucket_map = {"prod": "xm-prod-resource", "dev": "xmdev-resource", "rc": "xmrc-resource"}
+
+
+def handler(environ, start_response):
+    request_method = environ['REQUEST_METHOD']
+    recognized = {}
+    unknow = {}
+    bboxs_map = {}
+    message = "complete"
+    if request_method == 'POST':
+
+        try:
+            request_body_size = int(environ.get('CONTENT_LENGTH', 0))
+        except (ValueError):
+            request_body_size = 0
+
+        request_body = environ['wsgi.input'].read(request_body_size)
+        req = json.loads(request_body)
+        try:
+            threshold = req['threshold']
+        except KeyError:
+            threshold = 0.5
+        # 注册人脸
+        if 'student_id' in req and 'inst_id' in req and 'img_url' in req and 'env' in req:
+            message = ""
+            faces = None
+            bboxes = None
+            env_param = req['env']
+
+            # 根据环境测试决定去哪个es 的index
+            env = index_map[env_param]
+            # 带人脸ID
+            # 向ES插入新的人脸feature，有则更新，否则插入，保持幂等
+            try:
+                # 根据环境决定去哪个bucket查询
+                bucket_name = bucket_map[env_param]
+                bucket = oss2.Bucket(auth, "https://oss-cn-hangzhou.aliyuncs.com", bucket_name)
+
+                url = bucket.sign_url('GET', req['img_url'], 60) if not str(req['img_url']).startswith("http") else req['img_url']
+                print(url)
+                response = requests.get(url)
+                image = Image.open(BytesIO(response.content))
+                bboxes, faces = mtcnn.align_multi(image, conf.face_limit, conf.min_face_size)
+            except Exception as e:
+                message = "detect face error " + str(e)
+
+            if faces is not None and len(faces) > 1:
+                message = "find multi faces"
+            elif message != "" and faces is None:
+                message = message
+            elif message == "" and len(faces)==0:
+                message = 'find no face'
+            elif len(faces) == 1 and len(bboxes) == 1:
+                success = learner.update_student_feature(conf, faces, req['student_id'], req['inst_id'], env)
+                if success != 0:
+                    # 代表本身就有人脸了，更新成功
+                    message = "update face feature successful"
+                else:
+                    # 判断是否有不同的student_id 所注册的相似人脸
+                    similar_student_id = has_similar_face(env, faces[0], req, threshold)
+                    if similar_student_id!='unknow':
+                        message = 'detect similar face with different student_id {0}'.format(similar_student_id)
+                    else:
+                        # 本身没有人脸，需要新插入
+                        success = learner.insert_student_feature(conf, faces, req['student_id'], req['inst_id'], env)
+                        if success == 1:
+                            message = "insert new face feature successful"
+                        else:
+                            message = "insert new face feature failed"
+
+
+                # 如果更新或者插入成功了,返回识别到的框和请求的学生ID和message，否则除message外都为{}
+                if message == "insert new face feature successful" or message == "update face feature successful":
+                    bboxs_map[0] = {"left": list(bboxes[0])[0], "top": list(bboxes[0])[1], "right": list(bboxes[0])[2],
+                                    "bottom": list(bboxes[0])[3], "confidence": list(bboxes[0])[4]}
+                    recognized[0] = req['student_id']
+
+
+        # 查询人脸
+        elif 'img_url' in req and 'env' in req and 'inst_id' in req and 'student_id' not in req:
+            inst_id = req['inst_id']
+            env_param = req['env']
+            env = index_map[env_param]
+            bucket_name = bucket_map[env_param]
+            bucket = oss2.Bucket(auth, "https://oss-cn-hangzhou.aliyuncs.com", bucket_name)
+            url = bucket.sign_url('GET', req['img_url'], 60) if not str(req['img_url']).startswith("http") else req['img_url']
+            response = requests.get(url)
+            image = Image.open(BytesIO(response.content))
+            bboxes, faces = [], []
+            try:
+                bboxes, faces = mtcnn.align_multi(image, conf.face_limit, conf.min_face_size)
+                assert len(bboxes) == len(faces)
+            except Exception as e:
+                message = "face detect failed, no faces find"
+
+            if (len(bboxes)) != 0:
+                for i in range(len(bboxes)):
+                    try:
+                        bboxs_map[i] = {"left": list(bboxes[i])[0], "top": list(bboxes[i])[1],
+                                        "right": list(bboxes[i])[2],
+                                        "bottom": list(bboxes[i])[3], "confidence": list(bboxes[i])[4]}
+                        face = faces[i]
+                        result = learner.infer_es(conf, face, inst_id, env,threshold=threshold)
+                        if result != 'unknow':
+                            recognized[i] = result
+                        else:
+                            unknow[i] = 'unknow'
+                    except Exception as e:
+                        unknow[i] = "failed" + str(e)
+                        continue
+
+    # # do something here
+    status = '200 OK'
+    response_headers = [('Content-type', 'application/json')]
+    start_response(status, response_headers)
+    results = json.dumps({"faces": bboxs_map, "recognized": recognized, "unknown": unknow, "message": message})
+    return [bytes(results, encoding='utf-8')]
+
+
+def has_similar_face(env, face, req, threshold):
+    try:
+        result = learner.infer_es(conf, face, req['inst_id'], env, threshold=threshold)
+        return result
+    except Exception as e:
+        return "unknow"
--- a/model.py
+++ b/model.py
--- a/mtcnn.py
+++ b/mtcnn.py
+import numpy as np
+import torch
+from PIL import Image
+from mtcnn_pytorch.src.get_nets import PNet, RNet, ONet
+from mtcnn_pytorch.src.box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
+from mtcnn_pytorch.src.first_stage import run_first_stage
+from mtcnn_pytorch.src.align_trans import get_reference_facial_points, warp_and_crop_face
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+class MTCNN():
+    def __init__(self):
+        self.pnet = PNet().to(device)
+        self.rnet = RNet().to(device)
+        self.onet = ONet().to(device)
+        self.pnet.eval()
+        self.rnet.eval()
+        self.onet.eval()
+        self.refrence = get_reference_facial_points(default_square= True)
+        
+    def align(self, img):
+        _, landmarks = self.detect_faces(img)
+        facial5points = [[landmarks[0][j],landmarks[0][j+5]] for j in range(5)]
+        warped_face = warp_and_crop_face(np.array(img), facial5points, self.refrence, crop_size=(112,112))
+
+        return Image.fromarray(warped_face)
+    
+    def align_multi(self, img, limit=None, min_face_size=30.0):
+        boxes, landmarks = self.detect_faces(img, min_face_size)
+        if limit:
+            boxes = boxes[:limit]
+            landmarks = landmarks[:limit]
+        faces = []
+        for landmark in landmarks:
+            facial5points = [[landmark[j],landmark[j+5]] for j in range(5)]
+            warped_face = warp_and_crop_face(np.array(img), facial5points, self.refrence, crop_size=(112,112))
+            faces.append(Image.fromarray(warped_face))
+        return boxes, faces
+
+    def detect_faces(self, image, min_face_size=20.0,
+                     thresholds=[0.5, 0.6, 0.7],
+                     nms_thresholds=[0.5, 0.5, 0.5]):
+        """
+        Arguments:
+            image: an instance of PIL.Image.
+            min_face_size: a float number.
+            thresholds: a list of length 3.
+            nms_thresholds: a list of length 3.
+
+        Returns:
+            two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
+            bounding boxes and facial landmarks.
+        """
+
+        # BUILD AN IMAGE PYRAMID
+        width, height = image.size
+        min_length = min(height, width)
+
+        min_detection_size = 12
+        factor = 0.707  # sqrt(0.5)
+
+        # scales for scaling the image
+        scales = []
+
+        # scales the image so that
+        # minimum size that we can detect equals to
+        # minimum face size that we want to detect
+        m = min_detection_size/min_face_size
+        min_length *= m
+
+        factor_count = 0
+        while min_length > min_detection_size:
+            scales.append(m*factor**factor_count)
+            min_length *= factor
+            factor_count += 1
+
+        # STAGE 1
+
+        # it will be returned
+        bounding_boxes = []
+
+        with torch.no_grad():
+            # run P-Net on different scales
+            for s in scales:
+                boxes = run_first_stage(image, self.pnet, scale=s, threshold=thresholds[0])
+                bounding_boxes.append(boxes)
+
+            # collect boxes (and offsets, and scores) from different scales
+            bounding_boxes = [i for i in bounding_boxes if i is not None]
+            bounding_boxes = np.vstack(bounding_boxes)
+
+            keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
+            bounding_boxes = bounding_boxes[keep]
+
+            # use offsets predicted by pnet to transform bounding boxes
+            bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
+            # shape [n_boxes, 5]
+
+            bounding_boxes = convert_to_square(bounding_boxes)
+            bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
+
+            # STAGE 2
+
+            img_boxes = get_image_boxes(bounding_boxes, image, size=24)
+            img_boxes = torch.FloatTensor(img_boxes).to(device)
+
+            output = self.rnet(img_boxes)
+            offsets = output[0].cpu().data.numpy()  # shape [n_boxes, 4]
+            probs = output[1].cpu().data.numpy()  # shape [n_boxes, 2]
+
+            keep = np.where(probs[:, 1] > thresholds[1])[0]
+            bounding_boxes = bounding_boxes[keep]
+            bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
+            offsets = offsets[keep]
+
+            keep = nms(bounding_boxes, nms_thresholds[1])
+            bounding_boxes = bounding_boxes[keep]
+            bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
+            bounding_boxes = convert_to_square(bounding_boxes)
+            bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
+
+            # STAGE 3
+
+            img_boxes = get_image_boxes(bounding_boxes, image, size=48)
+            if len(img_boxes) == 0: 
+                return [], []
+            img_boxes = torch.FloatTensor(img_boxes).to(device)
+            output = self.onet(img_boxes)
+            landmarks = output[0].cpu().data.numpy()  # shape [n_boxes, 10]
+            offsets = output[1].cpu().data.numpy()  # shape [n_boxes, 4]
+            probs = output[2].cpu().data.numpy()  # shape [n_boxes, 2]
+
+            keep = np.where(probs[:, 1] > thresholds[2])[0]
+            bounding_boxes = bounding_boxes[keep]
+            bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
+            offsets = offsets[keep]
+            landmarks = landmarks[keep]
+
+            # compute landmark points
+            width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
+            height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
+            xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
+            landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1)*landmarks[:, 0:5]
+            landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1)*landmarks[:, 5:10]
+
+            bounding_boxes = calibrate_box(bounding_boxes, offsets)
+            keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
+            bounding_boxes = bounding_boxes[keep]
+            landmarks = landmarks[keep]
+
+        return bounding_boxes, landmarks
--- a/mtcnn_pytorch/src/__init__.py
+++ b/mtcnn_pytorch/src/__init__.py
+from .visualization_utils import show_bboxes
+from .detector import detect_faces
--- a/mtcnn_pytorch/src/__pycache__/__init__.cpython-39.pyc
+++ b/mtcnn_pytorch/src/__pycache__/__init__.cpython-39.pyc
--- a/mtcnn_pytorch/src/__pycache__/align_trans.cpython-39.pyc
+++ b/mtcnn_pytorch/src/__pycache__/align_trans.cpython-39.pyc
--- a/mtcnn_pytorch/src/__pycache__/box_utils.cpython-39.pyc
+++ b/mtcnn_pytorch/src/__pycache__/box_utils.cpython-39.pyc
--- a/mtcnn_pytorch/src/__pycache__/detector.cpython-39.pyc
+++ b/mtcnn_pytorch/src/__pycache__/detector.cpython-39.pyc
--- a/mtcnn_pytorch/src/__pycache__/first_stage.cpython-39.pyc
+++ b/mtcnn_pytorch/src/__pycache__/first_stage.cpython-39.pyc
--- a/mtcnn_pytorch/src/__pycache__/get_nets.cpython-39.pyc
+++ b/mtcnn_pytorch/src/__pycache__/get_nets.cpython-39.pyc
--- a/mtcnn_pytorch/src/__pycache__/matlab_cp2tform.cpython-39.pyc
+++ b/mtcnn_pytorch/src/__pycache__/matlab_cp2tform.cpython-39.pyc
--- a/mtcnn_pytorch/src/__pycache__/visualization_utils.cpython-39.pyc
+++ b/mtcnn_pytorch/src/__pycache__/visualization_utils.cpython-39.pyc
--- a/mtcnn_pytorch/src/align_trans.py
+++ b/mtcnn_pytorch/src/align_trans.py
--- a/mtcnn_pytorch/src/box_utils.py
+++ b/mtcnn_pytorch/src/box_utils.py
+import numpy as np
+from PIL import Image
+
+
+def nms(boxes, overlap_threshold=0.5, mode='union'):
+    """Non-maximum suppression.
+
+    Arguments:
+        boxes: a float numpy array of shape [n, 5],
+            where each row is (xmin, ymin, xmax, ymax, score).
+        overlap_threshold: a float number.
+        mode: 'union' or 'min'.
+
+    Returns:
+        list with indices of the selected boxes
+    """
+
+    # if there are no boxes, return the empty list
+    if len(boxes) == 0:
+        return []
+
+    # list of picked indices
+    pick = []
+
+    # grab the coordinates of the bounding boxes
+    x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
+
+    area = (x2 - x1 + 1.0)*(y2 - y1 + 1.0)
+    ids = np.argsort(score)  # in increasing order
+
+    while len(ids) > 0:
+
+        # grab index of the largest value
+        last = len(ids) - 1
+        i = ids[last]
+        pick.append(i)
+
+        # compute intersections
+        # of the box with the largest score
+        # with the rest of boxes
+
+        # left top corner of intersection boxes
+        ix1 = np.maximum(x1[i], x1[ids[:last]])
+        iy1 = np.maximum(y1[i], y1[ids[:last]])
+
+        # right bottom corner of intersection boxes
+        ix2 = np.minimum(x2[i], x2[ids[:last]])
+        iy2 = np.minimum(y2[i], y2[ids[:last]])
+
+        # width and height of intersection boxes
+        w = np.maximum(0.0, ix2 - ix1 + 1.0)
+        h = np.maximum(0.0, iy2 - iy1 + 1.0)
+
+        # intersections' areas
+        inter = w * h
+        if mode == 'min':
+            overlap = inter/np.minimum(area[i], area[ids[:last]])
+        elif mode == 'union':
+            # intersection over union (IoU)
+            overlap = inter/(area[i] + area[ids[:last]] - inter)
+
+        # delete all boxes where overlap is too big
+        ids = np.delete(
+            ids,
+            np.concatenate([[last], np.where(overlap > overlap_threshold)[0]])
+        )
+
+    return pick
+
+
+def convert_to_square(bboxes):
+    """Convert bounding boxes to a square form.
+
+    Arguments:
+        bboxes: a float numpy array of shape [n, 5].
+
+    Returns:
+        a float numpy array of shape [n, 5],
+            squared bounding boxes.
+    """
+
+    square_bboxes = np.zeros_like(bboxes)
+    x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
+    h = y2 - y1 + 1.0
+    w = x2 - x1 + 1.0
+    max_side = np.maximum(h, w)
+    square_bboxes[:, 0] = x1 + w*0.5 - max_side*0.5
+    square_bboxes[:, 1] = y1 + h*0.5 - max_side*0.5
+    square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0
+    square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0
+    return square_bboxes
+
+
+def calibrate_box(bboxes, offsets):
+    """Transform bounding boxes to be more like true bounding boxes.
+    'offsets' is one of the outputs of the nets.
+
+    Arguments:
+        bboxes: a float numpy array of shape [n, 5].
+        offsets: a float numpy array of shape [n, 4].
+
+    Returns:
+        a float numpy array of shape [n, 5].
+    """
+    x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
+    w = x2 - x1 + 1.0
+    h = y2 - y1 + 1.0
+    w = np.expand_dims(w, 1)
+    h = np.expand_dims(h, 1)
+
+    # this is what happening here:
+    # tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)]
+    # x1_true = x1 + tx1*w
+    # y1_true = y1 + ty1*h
+    # x2_true = x2 + tx2*w
+    # y2_true = y2 + ty2*h
+    # below is just more compact form of this
+
+    # are offsets always such that
+    # x1 < x2 and y1 < y2 ?
+
+    translation = np.hstack([w, h, w, h])*offsets
+    bboxes[:, 0:4] = bboxes[:, 0:4] + translation
+    return bboxes
+
+
+def get_image_boxes(bounding_boxes, img, size=24):
+    """Cut out boxes from the image.
+
+    Arguments:
+        bounding_boxes: a float numpy array of shape [n, 5].
+        img: an instance of PIL.Image.
+        size: an integer, size of cutouts.
+
+    Returns:
+        a float numpy array of shape [n, 3, size, size].
+    """
+
+    num_boxes = len(bounding_boxes)
+    width, height = img.size
+
+    [dy, edy, dx, edx, y, ey, x, ex, w, h] = correct_bboxes(bounding_boxes, width, height)
+    img_boxes = np.zeros((num_boxes, 3, size, size), 'float32')
+
+    for i in range(num_boxes):
+        img_box = np.zeros((h[i], w[i], 3), 'uint8')
+
+        img_array = np.asarray(img, 'uint8')
+        img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] =\
+            img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :]
+
+        # resize
+        img_box = Image.fromarray(img_box)
+        img_box = img_box.resize((size, size), Image.BILINEAR)
+        img_box = np.asarray(img_box, 'float32')
+
+        img_boxes[i, :, :, :] = _preprocess(img_box)
+
+    return img_boxes
+
+
+def correct_bboxes(bboxes, width, height):
+    """Crop boxes that are too big and get coordinates
+    with respect to cutouts.
+
+    Arguments:
+        bboxes: a float numpy array of shape [n, 5],
+            where each row is (xmin, ymin, xmax, ymax, score).
+        width: a float number.
+        height: a float number.
+
+    Returns:
+        dy, dx, edy, edx: a int numpy arrays of shape [n],
+            coordinates of the boxes with respect to the cutouts.
+        y, x, ey, ex: a int numpy arrays of shape [n],
+            corrected ymin, xmin, ymax, xmax.
+        h, w: a int numpy arrays of shape [n],
+            just heights and widths of boxes.
+
+        in the following order:
+            [dy, edy, dx, edx, y, ey, x, ex, w, h].
+    """
+
+    x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
+    w, h = x2 - x1 + 1.0,  y2 - y1 + 1.0
+    num_boxes = bboxes.shape[0]
+
+    # 'e' stands for end
+    # (x, y) -> (ex, ey)
+    x, y, ex, ey = x1, y1, x2, y2
+
+    # we need to cut out a box from the image.
+    # (x, y, ex, ey) are corrected coordinates of the box
+    # in the image.
+    # (dx, dy, edx, edy) are coordinates of the box in the cutout
+    # from the image.
+    dx, dy = np.zeros((num_boxes,)), np.zeros((num_boxes,))
+    edx, edy = w.copy() - 1.0, h.copy() - 1.0
+
+    # if box's bottom right corner is too far right
+    ind = np.where(ex > width - 1.0)[0]
+    edx[ind] = w[ind] + width - 2.0 - ex[ind]
+    ex[ind] = width - 1.0
+
+    # if box's bottom right corner is too low
+    ind = np.where(ey > height - 1.0)[0]
+    edy[ind] = h[ind] + height - 2.0 - ey[ind]
+    ey[ind] = height - 1.0
+
+    # if box's top left corner is too far left
+    ind = np.where(x < 0.0)[0]
+    dx[ind] = 0.0 - x[ind]
+    x[ind] = 0.0
+
+    # if box's top left corner is too high
+    ind = np.where(y < 0.0)[0]
+    dy[ind] = 0.0 - y[ind]
+    y[ind] = 0.0
+
+    return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h]
+    return_list = [i.astype('int32') for i in return_list]
+
+    return return_list
+
+
+def _preprocess(img):
+    """Preprocessing step before feeding the network.
+
+    Arguments:
+        img: a float numpy array of shape [h, w, c].
+
+    Returns:
+        a float numpy array of shape [1, c, h, w].
+    """
+    img = img.transpose((2, 0, 1))
+    img = np.expand_dims(img, 0)
+    img = (img - 127.5)*0.0078125
+    return img
--- a/mtcnn_pytorch/src/detector.py
+++ b/mtcnn_pytorch/src/detector.py
+import numpy as np
+import torch
+from torch.autograd import Variable
+from .get_nets import PNet, RNet, ONet
+from .box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
+from .first_stage import run_first_stage
+
+
+def detect_faces(image, min_face_size=20.0,
+                 thresholds=[0.6, 0.7, 0.8],
+                 nms_thresholds=[0.7, 0.7, 0.7]):
+    """
+    Arguments:
+        image: an instance of PIL.Image.
+        min_face_size: a float number.
+        thresholds: a list of length 3.
+        nms_thresholds: a list of length 3.
+
+    Returns:
+        two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
+        bounding boxes and facial landmarks.
+    """
+
+    # LOAD MODELS
+    pnet = PNet()
+    rnet = RNet()
+    onet = ONet()
+    onet.eval()
+
+    # BUILD AN IMAGE PYRAMID
+    width, height = image.size
+    min_length = min(height, width)
+
+    min_detection_size = 12
+    factor = 0.707  # sqrt(0.5)
+
+    # scales for scaling the image
+    scales = []
+
+    # scales the image so that
+    # minimum size that we can detect equals to
+    # minimum face size that we want to detect
+    m = min_detection_size/min_face_size
+    min_length *= m
+
+    factor_count = 0
+    while min_length > min_detection_size:
+        scales.append(m*factor**factor_count)
+        min_length *= factor
+        factor_count += 1
+
+    # STAGE 1
+
+    # it will be returned
+    bounding_boxes = []
+    
+    with torch.no_grad():
+        # run P-Net on different scales
+        for s in scales:
+            boxes = run_first_stage(image, pnet, scale=s, threshold=thresholds[0])
+            bounding_boxes.append(boxes)
+
+        # collect boxes (and offsets, and scores) from different scales
+        bounding_boxes = [i for i in bounding_boxes if i is not None]
+        bounding_boxes = np.vstack(bounding_boxes)
+
+        keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
+        bounding_boxes = bounding_boxes[keep]
+
+        # use offsets predicted by pnet to transform bounding boxes
+        bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
+        # shape [n_boxes, 5]
+
+        bounding_boxes = convert_to_square(bounding_boxes)
+        bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
+
+        # STAGE 2
+
+        img_boxes = get_image_boxes(bounding_boxes, image, size=24)
+        img_boxes = torch.FloatTensor(img_boxes)
+
+        output = rnet(img_boxes)
+        offsets = output[0].data.numpy()  # shape [n_boxes, 4]
+        probs = output[1].data.numpy()  # shape [n_boxes, 2]
+
+        keep = np.where(probs[:, 1] > thresholds[1])[0]
+        bounding_boxes = bounding_boxes[keep]
+        bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
+        offsets = offsets[keep]
+
+        keep = nms(bounding_boxes, nms_thresholds[1])
+        bounding_boxes = bounding_boxes[keep]
+        bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
+        bounding_boxes = convert_to_square(bounding_boxes)
+        bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
+
+        # STAGE 3
+
+        img_boxes = get_image_boxes(bounding_boxes, image, size=48)
+        if len(img_boxes) == 0: 
+            return [], []
+        img_boxes = torch.FloatTensor(img_boxes)
+        output = onet(img_boxes)
+        landmarks = output[0].data.numpy()  # shape [n_boxes, 10]
+        offsets = output[1].data.numpy()  # shape [n_boxes, 4]
+        probs = output[2].data.numpy()  # shape [n_boxes, 2]
+
+        keep = np.where(probs[:, 1] > thresholds[2])[0]
+        bounding_boxes = bounding_boxes[keep]
+        bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
+        offsets = offsets[keep]
+        landmarks = landmarks[keep]
+
+        # compute landmark points
+        width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
+        height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
+        xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
+        landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1)*landmarks[:, 0:5]
+        landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1)*landmarks[:, 5:10]
+
+        bounding_boxes = calibrate_box(bounding_boxes, offsets)
+        keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
+        bounding_boxes = bounding_boxes[keep]
+        landmarks = landmarks[keep]
+
+    return bounding_boxes, landmarks
--- a/mtcnn_pytorch/src/first_stage.py
+++ b/mtcnn_pytorch/src/first_stage.py
+import torch
+from torch.autograd import Variable
+import math
+from PIL import Image
+import numpy as np
+from .box_utils import nms, _preprocess
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+# device = 'cpu'
+
+def run_first_stage(image, net, scale, threshold):
+    """Run P-Net, generate bounding boxes, and do NMS.
+
+    Arguments:
+        image: an instance of PIL.Image.
+        net: an instance of pytorch's nn.Module, P-Net.
+        scale: a float number,
+            scale width and height of the image by this number.
+        threshold: a float number,
+            threshold on the probability of a face when generating
+            bounding boxes from predictions of the net.
+
+    Returns:
+        a float numpy array of shape [n_boxes, 9],
+            bounding boxes with scores and offsets (4 + 1 + 4).
+    """
+
+    # scale the image and convert it to a float array
+    width, height = image.size
+    sw, sh = math.ceil(width*scale), math.ceil(height*scale)
+    img = image.resize((sw, sh), Image.BILINEAR)
+    img = np.asarray(img, 'float32')
+
+    img = torch.FloatTensor(_preprocess(img)).to(device)
+    with torch.no_grad():
+        output = net(img)
+        probs = output[1].cpu().data.numpy()[0, 1, :, :]
+        offsets = output[0].cpu().data.numpy()
+        # probs: probability of a face at each sliding window
+        # offsets: transformations to true bounding boxes
+
+        boxes = _generate_bboxes(probs, offsets, scale, threshold)
+        if len(boxes) == 0:
+            return None
+
+        keep = nms(boxes[:, 0:5], overlap_threshold=0.5)
+    return boxes[keep]
+
+
+def _generate_bboxes(probs, offsets, scale, threshold):
+    """Generate bounding boxes at places
+    where there is probably a face.
+
+    Arguments:
+        probs: a float numpy array of shape [n, m].
+        offsets: a float numpy array of shape [1, 4, n, m].
+        scale: a float number,
+            width and height of the image were scaled by this number.
+        threshold: a float number.
+
+    Returns:
+        a float numpy array of shape [n_boxes, 9]
+    """
+
+    # applying P-Net is equivalent, in some sense, to
+    # moving 12x12 window with stride 2
+    stride = 2
+    cell_size = 12
+
+    # indices of boxes where there is probably a face
+    inds = np.where(probs > threshold)
+
+    if inds[0].size == 0:
+        return np.array([])
+
+    # transformations of bounding boxes
+    tx1, ty1, tx2, ty2 = [offsets[0, i, inds[0], inds[1]] for i in range(4)]
+    # they are defined as:
+    # w = x2 - x1 + 1
+    # h = y2 - y1 + 1
+    # x1_true = x1 + tx1*w
+    # x2_true = x2 + tx2*w
+    # y1_true = y1 + ty1*h
+    # y2_true = y2 + ty2*h
+
+    offsets = np.array([tx1, ty1, tx2, ty2])
+    score = probs[inds[0], inds[1]]
+
+    # P-Net is applied to scaled images
+    # so we need to rescale bounding boxes back
+    bounding_boxes = np.vstack([
+        np.round((stride*inds[1] + 1.0)/scale),
+        np.round((stride*inds[0] + 1.0)/scale),
+        np.round((stride*inds[1] + 1.0 + cell_size)/scale),
+        np.round((stride*inds[0] + 1.0 + cell_size)/scale),
+        score, offsets
+    ])
+    # why one is added?
+
+    return bounding_boxes.T
--- a/mtcnn_pytorch/src/get_nets.py
+++ b/mtcnn_pytorch/src/get_nets.py
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from collections import OrderedDict
+import numpy as np
+
+
+class Flatten(nn.Module):
+
+    def __init__(self):
+        super(Flatten, self).__init__()
+
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, c, h, w].
+        Returns:
+            a float tensor with shape [batch_size, c*h*w].
+        """
+
+        # without this pretrained model isn't working
+        x = x.transpose(3, 2).contiguous()
+
+        return x.view(x.size(0), -1)
+
+
+class PNet(nn.Module):
+
+    def __init__(self):
+
+        super(PNet, self).__init__()
+
+        # suppose we have input with size HxW, then
+        # after first layer: H - 2,
+        # after pool: ceil((H - 2)/2),
+        # after second conv: ceil((H - 2)/2) - 2,
+        # after last conv: ceil((H - 2)/2) - 4,
+        # and the same for W
+
+        self.features = nn.Sequential(OrderedDict([
+            ('conv1', nn.Conv2d(3, 10, 3, 1)),
+            ('prelu1', nn.PReLU(10)),
+            ('pool1', nn.MaxPool2d(2, 2, ceil_mode=True)),
+
+            ('conv2', nn.Conv2d(10, 16, 3, 1)),
+            ('prelu2', nn.PReLU(16)),
+
+            ('conv3', nn.Conv2d(16, 32, 3, 1)),
+            ('prelu3', nn.PReLU(32))
+        ]))
+
+        self.conv4_1 = nn.Conv2d(32, 2, 1, 1)
+        self.conv4_2 = nn.Conv2d(32, 4, 1, 1)
+
+        weights = np.load('mtcnn_pytorch/src/weights/pnet.npy',allow_pickle=True)[()]
+        for n, p in self.named_parameters():
+            p.data = torch.FloatTensor(weights[n])
+
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, 3, h, w].
+        Returns:
+            b: a float tensor with shape [batch_size, 4, h', w'].
+            a: a float tensor with shape [batch_size, 2, h', w'].
+        """
+        x = self.features(x)
+        a = self.conv4_1(x)
+        b = self.conv4_2(x)
+        a = F.softmax(a, dim=-1)
+        return b, a
+
+
+class RNet(nn.Module):
+
+    def __init__(self):
+
+        super(RNet, self).__init__()
+
+        self.features = nn.Sequential(OrderedDict([
+            ('conv1', nn.Conv2d(3, 28, 3, 1)),
+            ('prelu1', nn.PReLU(28)),
+            ('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)),
+
+            ('conv2', nn.Conv2d(28, 48, 3, 1)),
+            ('prelu2', nn.PReLU(48)),
+            ('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)),
+
+            ('conv3', nn.Conv2d(48, 64, 2, 1)),
+            ('prelu3', nn.PReLU(64)),
+
+            ('flatten', Flatten()),
+            ('conv4', nn.Linear(576, 128)),
+            ('prelu4', nn.PReLU(128))
+        ]))
+
+        self.conv5_1 = nn.Linear(128, 2)
+        self.conv5_2 = nn.Linear(128, 4)
+
+        weights = np.load('mtcnn_pytorch/src/weights/rnet.npy',allow_pickle=True)[()]
+        for n, p in self.named_parameters():
+            p.data = torch.FloatTensor(weights[n])
+
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, 3, h, w].
+        Returns:
+            b: a float tensor with shape [batch_size, 4].
+            a: a float tensor with shape [batch_size, 2].
+        """
+        x = self.features(x)
+        a = self.conv5_1(x)
+        b = self.conv5_2(x)
+        a = F.softmax(a, dim=-1)
+        return b, a
+
+
+class ONet(nn.Module):
+
+    def __init__(self):
+
+        super(ONet, self).__init__()
+
+        self.features = nn.Sequential(OrderedDict([
+            ('conv1', nn.Conv2d(3, 32, 3, 1)),
+            ('prelu1', nn.PReLU(32)),
+            ('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)),
+
+            ('conv2', nn.Conv2d(32, 64, 3, 1)),
+            ('prelu2', nn.PReLU(64)),
+            ('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)),
+
+            ('conv3', nn.Conv2d(64, 64, 3, 1)),
+            ('prelu3', nn.PReLU(64)),
+            ('pool3', nn.MaxPool2d(2, 2, ceil_mode=True)),
+
+            ('conv4', nn.Conv2d(64, 128, 2, 1)),
+            ('prelu4', nn.PReLU(128)),
+
+            ('flatten', Flatten()),
+            ('conv5', nn.Linear(1152, 256)),
+            ('drop5', nn.Dropout(0.25)),
+            ('prelu5', nn.PReLU(256)),
+        ]))
+
+        self.conv6_1 = nn.Linear(256, 2)
+        self.conv6_2 = nn.Linear(256, 4)
+        self.conv6_3 = nn.Linear(256, 10)
+
+        weights = np.load('mtcnn_pytorch/src/weights/onet.npy',allow_pickle=True)[()]
+        for n, p in self.named_parameters():
+            p.data = torch.FloatTensor(weights[n])
+
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, 3, h, w].
+        Returns:
+            c: a float tensor with shape [batch_size, 10].
+            b: a float tensor with shape [batch_size, 4].
+            a: a float tensor with shape [batch_size, 2].
+        """
+        x = self.features(x)
+        a = self.conv6_1(x)
+        b = self.conv6_2(x)
+        c = self.conv6_3(x)
+        a = F.softmax(a, dim = -1)
+        return c, b, a
--- a/mtcnn_pytorch/src/matlab_cp2tform.py
+++ b/mtcnn_pytorch/src/matlab_cp2tform.py
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jul 11 06:54:28 2017
+
+@author: zhaoyafei
+"""
+
+import numpy as np
+from numpy.linalg import inv, norm, lstsq
+from numpy.linalg import matrix_rank as rank
+
+class MatlabCp2tormException(Exception):
+    def __str__(self):
+        return 'In File {}:{}'.format(
+                __file__, super.__str__(self))
+
+def tformfwd(trans, uv):
+    """
+    Function:
+    ----------
+        apply affine transform 'trans' to uv
+
+    Parameters:
+    ----------
+        @trans: 3x3 np.array
+            transform matrix
+        @uv: Kx2 np.array
+            each row is a pair of coordinates (x, y)
+
+    Returns:
+    ----------
+        @xy: Kx2 np.array
+            each row is a pair of transformed coordinates (x, y)
+    """
+    uv = np.hstack((
+        uv, np.ones((uv.shape[0], 1))
+    ))
+    xy = np.dot(uv, trans)
+    xy = xy[:, 0:-1]
+    return xy
+
+
+def tforminv(trans, uv):
+    """
+    Function:
+    ----------
+        apply the inverse of affine transform 'trans' to uv
+
+    Parameters:
+    ----------
+        @trans: 3x3 np.array
+            transform matrix
+        @uv: Kx2 np.array
+            each row is a pair of coordinates (x, y)
+
+    Returns:
+    ----------
+        @xy: Kx2 np.array
+            each row is a pair of inverse-transformed coordinates (x, y)
+    """
+    Tinv = inv(trans)
+    xy = tformfwd(Tinv, uv)
+    return xy
+
+
+def findNonreflectiveSimilarity(uv, xy, options=None):
+
+    options = {'K': 2}
+
+    K = options['K']
+    M = xy.shape[0]
+    x = xy[:, 0].reshape((-1, 1))  # use reshape to keep a column vector
+    y = xy[:, 1].reshape((-1, 1))  # use reshape to keep a column vector
+    # print('--->x, y:\n', x, y
+
+    tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1))))
+    tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1))))
+    X = np.vstack((tmp1, tmp2))
+    # print('--->X.shape: ', X.shape
+    # print('X:\n', X
+
+    u = uv[:, 0].reshape((-1, 1))  # use reshape to keep a column vector
+    v = uv[:, 1].reshape((-1, 1))  # use reshape to keep a column vector
+    U = np.vstack((u, v))
+    # print('--->U.shape: ', U.shape
+    # print('U:\n', U
+
+    # We know that X * r = U
+    if rank(X) >= 2 * K:
+        r, _, _, _ = lstsq(X, U)
+        r = np.squeeze(r)
+    else:
+        raise Exception('cp2tform:twoUniquePointsReq')
+
+    # print('--->r:\n', r
+
+    sc = r[0]
+    ss = r[1]
+    tx = r[2]
+    ty = r[3]
+
+    Tinv = np.array([
+        [sc, -ss, 0],
+        [ss,  sc, 0],
+        [tx,  ty, 1]
+    ])
+
+    # print('--->Tinv:\n', Tinv
+
+    T = inv(Tinv)
+    # print('--->T:\n', T
+
+    T[:, 2] = np.array([0, 0, 1])
+
+    return T, Tinv
+
+
+def findSimilarity(uv, xy, options=None):
+
+    options = {'K': 2}
+
+#    uv = np.array(uv)
+#    xy = np.array(xy)
+
+    # Solve for trans1
+    trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options)
+
+    # Solve for trans2
+
+    # manually reflect the xy data across the Y-axis
+    xyR = xy
+    xyR[:, 0] = -1 * xyR[:, 0]
+
+    trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options)
+
+    # manually reflect the tform to undo the reflection done on xyR
+    TreflectY = np.array([
+        [-1, 0, 0],
+        [0, 1, 0],
+        [0, 0, 1]
+    ])
+
+    trans2 = np.dot(trans2r, TreflectY)
+
+    # Figure out if trans1 or trans2 is better
+    xy1 = tformfwd(trans1, uv)
+    norm1 = norm(xy1 - xy)
+
+    xy2 = tformfwd(trans2, uv)
+    norm2 = norm(xy2 - xy)
+
+    if norm1 <= norm2:
+        return trans1, trans1_inv
+    else:
+        trans2_inv = inv(trans2)
+        return trans2, trans2_inv
+
+
+def get_similarity_transform(src_pts, dst_pts, reflective=True):
+    """
+    Function:
+    ----------
+        Find Similarity Transform Matrix 'trans':
+            u = src_pts[:, 0]
+            v = src_pts[:, 1]
+            x = dst_pts[:, 0]
+            y = dst_pts[:, 1]
+            [x, y, 1] = [u, v, 1] * trans
+
+    Parameters:
+    ----------
+        @src_pts: Kx2 np.array
+            source points, each row is a pair of coordinates (x, y)
+        @dst_pts: Kx2 np.array
+            destination points, each row is a pair of transformed
+            coordinates (x, y)
+        @reflective: True or False
+            if True:
+                use reflective similarity transform
+            else:
+                use non-reflective similarity transform
+
+    Returns:
+    ----------
+       @trans: 3x3 np.array
+            transform matrix from uv to xy
+        trans_inv: 3x3 np.array
+            inverse of trans, transform matrix from xy to uv
+    """
+
+    if reflective:
+        trans, trans_inv = findSimilarity(src_pts, dst_pts)
+    else:
+        trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts)
+
+    return trans, trans_inv
+
+
+def cvt_tform_mat_for_cv2(trans):
+    """
+    Function:
+    ----------
+        Convert Transform Matrix 'trans' into 'cv2_trans' which could be
+        directly used by cv2.warpAffine():
+            u = src_pts[:, 0]
+            v = src_pts[:, 1]
+            x = dst_pts[:, 0]
+            y = dst_pts[:, 1]
+            [x, y].T = cv_trans * [u, v, 1].T
+
+    Parameters:
+    ----------
+        @trans: 3x3 np.array
+            transform matrix from uv to xy
+
+    Returns:
+    ----------
+        @cv2_trans: 2x3 np.array
+            transform matrix from src_pts to dst_pts, could be directly used
+            for cv2.warpAffine()
+    """
+    cv2_trans = trans[:, 0:2].T
+
+    return cv2_trans
+
+
+def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective=True):
+    """
+    Function:
+    ----------
+        Find Similarity Transform Matrix 'cv2_trans' which could be
+        directly used by cv2.warpAffine():
+            u = src_pts[:, 0]
+            v = src_pts[:, 1]
+            x = dst_pts[:, 0]
+            y = dst_pts[:, 1]
+            [x, y].T = cv_trans * [u, v, 1].T
+
+    Parameters:
+    ----------
+        @src_pts: Kx2 np.array
+            source points, each row is a pair of coordinates (x, y)
+        @dst_pts: Kx2 np.array
+            destination points, each row is a pair of transformed
+            coordinates (x, y)
+        reflective: True or False
+            if True:
+                use reflective similarity transform
+            else:
+                use non-reflective similarity transform
+
+    Returns:
+    ----------
+        @cv2_trans: 2x3 np.array
+            transform matrix from src_pts to dst_pts, could be directly used
+            for cv2.warpAffine()
+    """
+    trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective)
+    cv2_trans = cvt_tform_mat_for_cv2(trans)
+
+    return cv2_trans
+
+
+if __name__ == '__main__':
+    """
+    u = [0, 6, -2]
+    v = [0, 3, 5]
+    x = [-1, 0, 4]
+    y = [-1, -10, 4]
+
+    # In Matlab, run:
+    #
+    #   uv = [u'; v'];
+    #   xy = [x'; y'];
+    #   tform_sim=cp2tform(uv,xy,'similarity');
+    #
+    #   trans = tform_sim.tdata.T
+    #   ans =
+    #       -0.0764   -1.6190         0
+    #        1.6190   -0.0764         0
+    #       -3.2156    0.0290    1.0000
+    #   trans_inv = tform_sim.tdata.Tinv
+    #    ans =
+    #
+    #       -0.0291    0.6163         0
+    #       -0.6163   -0.0291         0
+    #       -0.0756    1.9826    1.0000
+    #    xy_m=tformfwd(tform_sim, u,v)
+    #
+    #    xy_m =
+    #
+    #       -3.2156    0.0290
+    #        1.1833   -9.9143
+    #        5.0323    2.8853
+    #    uv_m=tforminv(tform_sim, x,y)
+    #
+    #    uv_m =
+    #
+    #        0.5698    1.3953
+    #        6.0872    2.2733
+    #       -2.6570    4.3314
+    """
+    u = [0, 6, -2]
+    v = [0, 3, 5]
+    x = [-1, 0, 4]
+    y = [-1, -10, 4]
+
+    uv = np.array((u, v)).T
+    xy = np.array((x, y)).T
+
+    print('\n--->uv:')
+    print(uv)
+    print('\n--->xy:')
+    print(xy)
+
+    trans, trans_inv = get_similarity_transform(uv, xy)
+
+    print('\n--->trans matrix:')
+    print(trans)
+
+    print('\n--->trans_inv matrix:')
+    print(trans_inv)
+
+    print('\n---> apply transform to uv')
+    print('\nxy_m = uv_augmented * trans')
+    uv_aug = np.hstack((
+        uv, np.ones((uv.shape[0], 1))
+    ))
+    xy_m = np.dot(uv_aug, trans)
+    print(xy_m)
+
+    print('\nxy_m = tformfwd(trans, uv)')
+    xy_m = tformfwd(trans, uv)
+    print(xy_m)
+
+    print('\n---> apply inverse transform to xy')
+    print('\nuv_m = xy_augmented * trans_inv')
+    xy_aug = np.hstack((
+        xy, np.ones((xy.shape[0], 1))
+    ))
+    uv_m = np.dot(xy_aug, trans_inv)
+    print(uv_m)
+
+    print('\nuv_m = tformfwd(trans_inv, xy)')
+    uv_m = tformfwd(trans_inv, xy)
+    print(uv_m)
+
+    uv_m = tforminv(trans, xy)
+    print('\nuv_m = tforminv(trans, xy)')
+    print(uv_m)
--- a/mtcnn_pytorch/src/visualization_utils.py
+++ b/mtcnn_pytorch/src/visualization_utils.py
+from PIL import ImageDraw
+
+
+def show_bboxes(img, bounding_boxes, facial_landmarks=[]):
+    """Draw bounding boxes and facial landmarks.
+
+    Arguments:
+        img: an instance of PIL.Image.
+        bounding_boxes: a float numpy array of shape [n, 5].
+        facial_landmarks: a float numpy array of shape [n, 10].
+
+    Returns:
+        an instance of PIL.Image.
+    """
+
+    img_copy = img.copy()
+    draw = ImageDraw.Draw(img_copy)
+
+    for b in bounding_boxes:
+        draw.rectangle([
+            (b[0], b[1]), (b[2], b[3])
+        ], outline='white')
+
+    for p in facial_landmarks:
+        for i in range(5):
+            draw.ellipse([
+                (p[i] - 1.0, p[i + 5] - 1.0),
+                (p[i] + 1.0, p[i + 5] + 1.0)
+            ], outline='blue')
+
+    return img_copy
--- a/mtcnn_pytorch/src/weights/onet.npy
+++ b/mtcnn_pytorch/src/weights/onet.npy
--- a/mtcnn_pytorch/src/weights/pnet.npy
+++ b/mtcnn_pytorch/src/weights/pnet.npy
--- a/mtcnn_pytorch/src/weights/rnet.npy
+++ b/mtcnn_pytorch/src/weights/rnet.npy
--- a/send_requests.py
+++ b/send_requests.py
+import requests
+import json
+import base64
+def getByte(path):
+    with open(path, 'rb') as f:
+        img_byte = base64.b64encode(f.read())  # 二进制读取后变base64编码
+    img_str = img_byte.decode('ascii')
+    return img_str
+
+# 没有人脸
+requestsss = {'inst_id': '1315584165094813698', 'img_url': "https://img0.baidu.com/it/u=52681052,678098948&fm=253&fmt=auto&app=120&f=JPEG?w=1000&h=400"}
+# 返回值 {'faces': {}, 'recognized': {}, 'unknown': {}, 'message': 'face detect failed,no faces find'}
+# 不存在的人脸
+# requestsss = {'inst_id': '1315584165094813698', 'img_url': "https://gimg2.baidu.com/image_search/src=http%3A%2F%2Fwww.n63.com%2Fphotodir%2Fn63img%2F%3FN%3DX2hiJTI2ZGRXZ1drV2xXJTVEV2xXZldlZyU1QiUyNiUyQi5mJTI2b29vJTI3YXJfZmdfZllhbGYlNUQlNjBrJTI3ZmZZaFliJTI3%26v%3D.jpg&refer=http%3A%2F%2Fwww.n63.com&app=2002&size=f9999,10000&q=a80&n=0&g=0n&fmt=jpeg?sec=1630835867&t=717f70fd50bc5ccc7b81ad1f2fd9e938"}
+# 返回值 {'faces': {'0': {'left': 194.52226984500885, 'top': 104.55831748247147, 'right': 409.6213800907135, 'bottom': 377.34358298778534, 'confidence': 0.9952797889709473}}, 'recognized': {}, 'unknown': {'0': 'unknow'}, 'message': 'complete'}
+
+req = json.dumps(requestsss)  # 字典数据结构变json(所有程序语言都认识的字符串)
+res = requests.post('https://1290825930774375.cn-hangzhou.fc.aliyuncs.com/2016-08-15/proxy/faces/faces/', data=req)
+
+
+print(res.json())
+
--- a/take_picture.py
+++ b/take_picture.py
+import cv2
+import argparse
+from pathlib import Path
+from PIL import Image
+from mtcnn import MTCNN
+from datetime import datetime
+
+from PIL import Image
+import numpy as np
+from mtcnn_pytorch.src.align_trans import get_reference_facial_points, warp_and_crop_face
+
+parser = argparse.ArgumentParser(description='take a picture')
+parser.add_argument('--name', '-n', default='unknown', type=str, help='input the name of the recording person')
+args = parser.parse_args()
+from pathlib import Path
+
+data_path = Path('data')
+save_path = data_path / 'facebank' / args.name
+if not save_path.exists():
+    save_path.mkdir()
+
+# 初始化摄像头
+cap = cv2.VideoCapture(0)
+# 我的摄像头默认像素640*480，可以根据摄像头素质调整分辨率
+cap.set(3, 1280)
+cap.set(4, 720)
+mtcnn = MTCNN()
+
+while cap.isOpened():
+    # 采集一帧一帧的图像数据
+    isSuccess, frame = cap.read()
+    # 实时的将采集到的数据显示到界面上
+    if isSuccess:
+        frame_text = cv2.putText(frame,
+                                 'Press t to take a picture,q to quit.....',
+                                 (5, 100),
+                                 cv2.FONT_HERSHEY_SIMPLEX,
+                                 2,
+                                 (0, 255, 0),
+                                 3,
+                                 cv2.LINE_AA)
+        cv2.imshow("My Capture", frame_text)
+    # 实现按下“t”键拍照
+    if cv2.waitKey(1) & 0xFF == ord('t'):
+        p = Image.fromarray(frame[..., ::-1])
+        try:
+            warped_face = np.array(mtcnn.align(p))[..., ::-1]
+            print(save_path / '{}.jpg'.format(str(datetime.now())[:-7].replace(":", "-").replace(" ", "-")))
+            cv2.imwrite(str(save_path / '{}.jpg'.format(str(datetime.now())[:-7].replace(":", "-").replace(" ", "-"))),
+                        warped_face)
+            name = input("输入接下来要拍摄的人名")
+            if name == "继续":
+                continue
+            save_path = data_path / 'facebank' / name
+            if not save_path.exists():
+                save_path.mkdir()
+        except:
+            print('no face captured')
+
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        cap.release()
+        cv2.destroyAllWindows()
+        break
+
+# 释放摄像头资源
+
--- a/template.yml
+++ b/template.yml
+ROSTemplateFormatVersion: '2015-09-01'
+Transform: 'Aliyun::Serverless-2018-04-03'
+Resources:
+  faces:
+    Type: 'Aliyun::Serverless::Service'
+    Properties:
+      VpcConfig:
+        VpcId: 'vpc-bp1tl9xp5tiofeobcev6n'
+        VSwitchIds: [ 'vsw-bp1hcr2yhzer8j6vt4u7i' ]
+        SecurityGroupId: 'sg-bp18y43m16vy66b1axzm'
+      NasConfig:
+        UserId: 10003
+        GroupId: 10003
+        MountPoints:
+          - ServerAddr: '0df8f4b72f-hoj6.cn-hangzhou.nas.aliyuncs.com:/'
+            MountDir: '/mnt/nas'
+      Description: 'face extrate'
+    faces:
+      Type: 'Aliyun::Serverless::Function'
+      Properties:
+        Initializer: 'index.initializer'
+        Handler: index.handler
+        Runtime: python3
+        CodeUri: './'
+        InitializationTimeout: 13
+        MemorySize: 1024
+        Timeout: 15
+      Events:
+        httpTrigger:
+          Type: HTTP
+          Properties:
+            AuthType: ANONYMOUS
+            Methods: ['POST', 'GET']
\ No newline at end of file
--- a/test.py
+++ b/test.py
+from Learner import face_learner
+from config import get_config
+from mtcnn import MTCNN
+from utils import insert_feature_to_es
+
+from PIL import Image
+import cv2
+import numpy as np
+from datetime import datetime
+from pathlib import Path
+import os
+import time
+from utils import prepare_facebank_to_es
+from elasticsearch7 import Elasticsearch
+# 把人脸的照片通过MCTNN检测，裁剪出人脸区域保存到facebank文件夹下
+for root,dir,files in os.walk("/Users/wda/PycharmProjects/faces/data/tmp"):
+    for file in files:
+        i = 1
+        if(file.endswith(".jpg") or file.endswith(".jpeg")):
+            # data_path = Path('data')
+            name = root.split("/")[-1]
+            save_path = '/Users/wda/PycharmProjects/faces/data/facebank/1404722168218124290/'
+            try:
+                frame = cv2.imread(os.path.join(root,file))
+                p =  Image.fromarray(frame[...,::-1])
+                mtcnn = MTCNN()
+                warped_face = np.array(mtcnn.align(p))[..., ::-1]
+                # if not save_path.exists():
+                #     save_path.mkdir()
+                cv2.imwrite("/Users/wda/PycharmProjects/faces/data/facebank/1404722168218124290/image0_112*112.jpg",warped_face)
+                # cv2.imwrite(str(save_path/'{}.jpg'.format(str(datetime.now())[:-7].replace(":","-").replace(" ","-")+"-"+str(i))), warped_face)
+                i = i + 1
+            except IndexError as e:
+                print(e)
+                continue
+            except ValueError as e:
+                print(e)
+                continue
+            except TypeError as e:
+                print(e)
+                continue
+            if i == 2:
+                break;
+
+# host = "es-cn-7mz28jd5z001oqrc9.public.elasticsearch.aliyuncs.com"
+# port = 9200
+# user = "elastic"
+# password = "4rfv%TGB"
+# authen = None
+# uripart = host + ':' + str(port)
+# if user is not None:
+#     authen = user
+# if authen is not None and password is not None:
+#     authen += ':' + password
+# if authen is not None:
+#     uripart = authen + '@' + uripart
+# protocol = 'http'
+# if protocol is not None:
+#     protocol = protocol
+#     uri = protocol + '://' + uripart
+# es = Elasticsearch(uri)
+# print(es.ping())
+# # # 测试将512特征插入ES成功
+conf = get_config(False)
+learner = face_learner(conf, True)
+learner.threshold = 1.44
+if conf.device.type == 'cpu':
+    learner.load_state(conf, 'ir_se50.pth', True, True)
+else:
+    # 这个分支没用，都用上面cpu版本的
+    learner.load_state(conf, 'final.pth', True, True)
+learner.model.eval()
+print('learner loaded')
+#
+mtcnn = MTCNN()
+# import oss2
+# auth = oss2.Auth("LTAI5tFb87uVi1B4BcW6SJfH", "3PdUomiQg6yAduq97KX4rBNpkhzdml")
+# bucket = oss2.Bucket(auth, "https://oss-cn-hangzhou.aliyuncs.com", "xmrc-resource")
+# url = bucket.sign_url('GET','inst/1031482107660075104/student/1176335046671904770/face/20190924140010/3Fnks2mGnZAxzCtc',60)
+# import requests
+# from io import BytesIO
+# response = requests.get(url)
+# image = Image.open(BytesIO(response.content))
+# bboxes, faces = mtcnn.align_multi(image, conf.face_limit, conf.min_face_size)
+# success = learner.update_student_feature(conf, faces, 1349974664509480962, 1349972667086348289, "faces_rc")
+# print(success)
+#
--- a/utils.py
+++ b/utils.py
--- a/work_space/save/model_ir_se50.pth
+++ b/work_space/save/model_ir_se50.pth
--- a/work_space/save/model_mobilefacenet.pth
+++ b/work_space/save/model_mobilefacenet.pth