diff --git a/README.md b/README.md index f146e4c..394f51e 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,14 @@ +## This repository is a fork of [thearn/webcam-pulse-detector](https://github.com/thearn/webcam-pulse-detector) with few modifications made for automated video processing support: + +- Tracking starts automatically when face is stable for N seconds. +- Tracking restarts automatically when face is not detected for some time or if it moves out of the range. +- The face frame is stabilized for better performance. +- Added a video processing example in `get_pulse_from_video.py` + +### Enjoy! + +--- + ![Alt text](http://i.imgur.com/2ngZopS.jpg "Screenshot") webcam-pulse-detector diff --git a/dl_face_detection/deploy.prototxt.txt b/dl_face_detection/deploy.prototxt.txt new file mode 100644 index 0000000..905580e --- /dev/null +++ b/dl_face_detection/deploy.prototxt.txt @@ -0,0 +1,1789 @@ +input: "data" +input_shape { + dim: 1 + dim: 3 + dim: 300 + dim: 300 +} + +layer { + name: "data_bn" + type: "BatchNorm" + bottom: "data" + top: "data_bn" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "data_scale" + type: "Scale" + bottom: "data_bn" + top: "data_bn" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "conv1_h" + type: "Convolution" + bottom: "data_bn" + top: "conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 32 + pad: 3 + kernel_size: 7 + stride: 2 + weight_filler { + type: "msra" + variance_norm: FAN_OUT + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv1_bn_h" + type: "BatchNorm" + bottom: "conv1_h" + top: "conv1_h" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "conv1_scale_h" + type: "Scale" + bottom: "conv1_h" + top: "conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "conv1_relu" + type: "ReLU" + bottom: "conv1_h" + top: "conv1_h" +} +layer { + name: "conv1_pool" + type: "Pooling" + bottom: "conv1_h" + top: "conv1_pool" + pooling_param { + kernel_size: 3 + stride: 2 + } +} +layer { + name: "layer_64_1_conv1_h" + type: "Convolution" + bottom: "conv1_pool" + top: "layer_64_1_conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 32 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_64_1_bn2_h" + type: "BatchNorm" + bottom: "layer_64_1_conv1_h" + top: "layer_64_1_conv1_h" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_64_1_scale2_h" + type: "Scale" + bottom: "layer_64_1_conv1_h" + top: "layer_64_1_conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_64_1_relu2" + type: "ReLU" + bottom: "layer_64_1_conv1_h" + top: "layer_64_1_conv1_h" +} +layer { + name: "layer_64_1_conv2_h" + type: "Convolution" + bottom: "layer_64_1_conv1_h" + top: "layer_64_1_conv2_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 32 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_64_1_sum" + type: "Eltwise" + bottom: "layer_64_1_conv2_h" + bottom: "conv1_pool" + top: "layer_64_1_sum" +} +layer { + name: "layer_128_1_bn1_h" + type: "BatchNorm" + bottom: "layer_64_1_sum" + top: "layer_128_1_bn1_h" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_128_1_scale1_h" + type: "Scale" + bottom: "layer_128_1_bn1_h" + top: "layer_128_1_bn1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_128_1_relu1" + type: "ReLU" + bottom: "layer_128_1_bn1_h" + top: "layer_128_1_bn1_h" +} +layer { + name: "layer_128_1_conv1_h" + type: "Convolution" + bottom: "layer_128_1_bn1_h" + top: "layer_128_1_conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 128 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_128_1_bn2" + type: "BatchNorm" + bottom: "layer_128_1_conv1_h" + top: "layer_128_1_conv1_h" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_128_1_scale2" + type: "Scale" + bottom: "layer_128_1_conv1_h" + top: "layer_128_1_conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_128_1_relu2" + type: "ReLU" + bottom: "layer_128_1_conv1_h" + top: "layer_128_1_conv1_h" +} +layer { + name: "layer_128_1_conv2" + type: "Convolution" + bottom: "layer_128_1_conv1_h" + top: "layer_128_1_conv2" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 128 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_128_1_conv_expand_h" + type: "Convolution" + bottom: "layer_128_1_bn1_h" + top: "layer_128_1_conv_expand_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 128 + bias_term: false + pad: 0 + kernel_size: 1 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_128_1_sum" + type: "Eltwise" + bottom: "layer_128_1_conv2" + bottom: "layer_128_1_conv_expand_h" + top: "layer_128_1_sum" +} +layer { + name: "layer_256_1_bn1" + type: "BatchNorm" + bottom: "layer_128_1_sum" + top: "layer_256_1_bn1" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_256_1_scale1" + type: "Scale" + bottom: "layer_256_1_bn1" + top: "layer_256_1_bn1" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_256_1_relu1" + type: "ReLU" + bottom: "layer_256_1_bn1" + top: "layer_256_1_bn1" +} +layer { + name: "layer_256_1_conv1" + type: "Convolution" + bottom: "layer_256_1_bn1" + top: "layer_256_1_conv1" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_256_1_bn2" + type: "BatchNorm" + bottom: "layer_256_1_conv1" + top: "layer_256_1_conv1" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_256_1_scale2" + type: "Scale" + bottom: "layer_256_1_conv1" + top: "layer_256_1_conv1" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_256_1_relu2" + type: "ReLU" + bottom: "layer_256_1_conv1" + top: "layer_256_1_conv1" +} +layer { + name: "layer_256_1_conv2" + type: "Convolution" + bottom: "layer_256_1_conv1" + top: "layer_256_1_conv2" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_256_1_conv_expand" + type: "Convolution" + bottom: "layer_256_1_bn1" + top: "layer_256_1_conv_expand" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 0 + kernel_size: 1 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_256_1_sum" + type: "Eltwise" + bottom: "layer_256_1_conv2" + bottom: "layer_256_1_conv_expand" + top: "layer_256_1_sum" +} +layer { + name: "layer_512_1_bn1" + type: "BatchNorm" + bottom: "layer_256_1_sum" + top: "layer_512_1_bn1" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_512_1_scale1" + type: "Scale" + bottom: "layer_512_1_bn1" + top: "layer_512_1_bn1" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_512_1_relu1" + type: "ReLU" + bottom: "layer_512_1_bn1" + top: "layer_512_1_bn1" +} +layer { + name: "layer_512_1_conv1_h" + type: "Convolution" + bottom: "layer_512_1_bn1" + top: "layer_512_1_conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 128 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 1 # 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_512_1_bn2_h" + type: "BatchNorm" + bottom: "layer_512_1_conv1_h" + top: "layer_512_1_conv1_h" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_512_1_scale2_h" + type: "Scale" + bottom: "layer_512_1_conv1_h" + top: "layer_512_1_conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_512_1_relu2" + type: "ReLU" + bottom: "layer_512_1_conv1_h" + top: "layer_512_1_conv1_h" +} +layer { + name: "layer_512_1_conv2_h" + type: "Convolution" + bottom: "layer_512_1_conv1_h" + top: "layer_512_1_conv2_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 2 # 1 + kernel_size: 3 + stride: 1 + dilation: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_512_1_conv_expand_h" + type: "Convolution" + bottom: "layer_512_1_bn1" + top: "layer_512_1_conv_expand_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 0 + kernel_size: 1 + stride: 1 # 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_512_1_sum" + type: "Eltwise" + bottom: "layer_512_1_conv2_h" + bottom: "layer_512_1_conv_expand_h" + top: "layer_512_1_sum" +} +layer { + name: "last_bn_h" + type: "BatchNorm" + bottom: "layer_512_1_sum" + top: "layer_512_1_sum" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "last_scale_h" + type: "Scale" + bottom: "layer_512_1_sum" + top: "layer_512_1_sum" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "last_relu" + type: "ReLU" + bottom: "layer_512_1_sum" + top: "fc7" +} + +layer { + name: "conv6_1_h" + type: "Convolution" + bottom: "fc7" + top: "conv6_1_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_1_relu" + type: "ReLU" + bottom: "conv6_1_h" + top: "conv6_1_h" +} +layer { + name: "conv6_2_h" + type: "Convolution" + bottom: "conv6_1_h" + top: "conv6_2_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_2_relu" + type: "ReLU" + bottom: "conv6_2_h" + top: "conv6_2_h" +} +layer { + name: "conv7_1_h" + type: "Convolution" + bottom: "conv6_2_h" + top: "conv7_1_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_1_relu" + type: "ReLU" + bottom: "conv7_1_h" + top: "conv7_1_h" +} +layer { + name: "conv7_2_h" + type: "Convolution" + bottom: "conv7_1_h" + top: "conv7_2_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_2_relu" + type: "ReLU" + bottom: "conv7_2_h" + top: "conv7_2_h" +} +layer { + name: "conv8_1_h" + type: "Convolution" + bottom: "conv7_2_h" + top: "conv8_1_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_1_relu" + type: "ReLU" + bottom: "conv8_1_h" + top: "conv8_1_h" +} +layer { + name: "conv8_2_h" + type: "Convolution" + bottom: "conv8_1_h" + top: "conv8_2_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_2_relu" + type: "ReLU" + bottom: "conv8_2_h" + top: "conv8_2_h" +} +layer { + name: "conv9_1_h" + type: "Convolution" + bottom: "conv8_2_h" + top: "conv9_1_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_1_relu" + type: "ReLU" + bottom: "conv9_1_h" + top: "conv9_1_h" +} +layer { + name: "conv9_2_h" + type: "Convolution" + bottom: "conv9_1_h" + top: "conv9_2_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_2_relu" + type: "ReLU" + bottom: "conv9_2_h" + top: "conv9_2_h" +} +layer { + name: "conv4_3_norm" + type: "Normalize" + bottom: "layer_256_1_bn1" + top: "conv4_3_norm" + norm_param { + across_spatial: false + scale_filler { + type: "constant" + value: 20 + } + channel_shared: false + } +} +layer { + name: "conv4_3_norm_mbox_loc" + type: "Convolution" + bottom: "conv4_3_norm" + top: "conv4_3_norm_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv4_3_norm_mbox_loc_perm" + type: "Permute" + bottom: "conv4_3_norm_mbox_loc" + top: "conv4_3_norm_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv4_3_norm_mbox_loc_flat" + type: "Flatten" + bottom: "conv4_3_norm_mbox_loc_perm" + top: "conv4_3_norm_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv4_3_norm_mbox_conf" + type: "Convolution" + bottom: "conv4_3_norm" + top: "conv4_3_norm_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 8 # 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv4_3_norm_mbox_conf_perm" + type: "Permute" + bottom: "conv4_3_norm_mbox_conf" + top: "conv4_3_norm_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv4_3_norm_mbox_conf_flat" + type: "Flatten" + bottom: "conv4_3_norm_mbox_conf_perm" + top: "conv4_3_norm_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv4_3_norm_mbox_priorbox" + type: "PriorBox" + bottom: "conv4_3_norm" + bottom: "data" + top: "conv4_3_norm_mbox_priorbox" + prior_box_param { + min_size: 30.0 + max_size: 60.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 8 + offset: 0.5 + } +} +layer { + name: "fc7_mbox_loc" + type: "Convolution" + bottom: "fc7" + top: "fc7_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "fc7_mbox_loc_perm" + type: "Permute" + bottom: "fc7_mbox_loc" + top: "fc7_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "fc7_mbox_loc_flat" + type: "Flatten" + bottom: "fc7_mbox_loc_perm" + top: "fc7_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "fc7_mbox_conf" + type: "Convolution" + bottom: "fc7" + top: "fc7_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 12 # 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "fc7_mbox_conf_perm" + type: "Permute" + bottom: "fc7_mbox_conf" + top: "fc7_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "fc7_mbox_conf_flat" + type: "Flatten" + bottom: "fc7_mbox_conf_perm" + top: "fc7_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "fc7_mbox_priorbox" + type: "PriorBox" + bottom: "fc7" + bottom: "data" + top: "fc7_mbox_priorbox" + prior_box_param { + min_size: 60.0 + max_size: 111.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 16 + offset: 0.5 + } +} +layer { + name: "conv6_2_mbox_loc" + type: "Convolution" + bottom: "conv6_2_h" + top: "conv6_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_2_mbox_loc_perm" + type: "Permute" + bottom: "conv6_2_mbox_loc" + top: "conv6_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv6_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv6_2_mbox_loc_perm" + top: "conv6_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv6_2_mbox_conf" + type: "Convolution" + bottom: "conv6_2_h" + top: "conv6_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 12 # 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_2_mbox_conf_perm" + type: "Permute" + bottom: "conv6_2_mbox_conf" + top: "conv6_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv6_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv6_2_mbox_conf_perm" + top: "conv6_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv6_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv6_2_h" + bottom: "data" + top: "conv6_2_mbox_priorbox" + prior_box_param { + min_size: 111.0 + max_size: 162.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 32 + offset: 0.5 + } +} +layer { + name: "conv7_2_mbox_loc" + type: "Convolution" + bottom: "conv7_2_h" + top: "conv7_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_2_mbox_loc_perm" + type: "Permute" + bottom: "conv7_2_mbox_loc" + top: "conv7_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv7_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv7_2_mbox_loc_perm" + top: "conv7_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv7_2_mbox_conf" + type: "Convolution" + bottom: "conv7_2_h" + top: "conv7_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 12 # 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_2_mbox_conf_perm" + type: "Permute" + bottom: "conv7_2_mbox_conf" + top: "conv7_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv7_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv7_2_mbox_conf_perm" + top: "conv7_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv7_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv7_2_h" + bottom: "data" + top: "conv7_2_mbox_priorbox" + prior_box_param { + min_size: 162.0 + max_size: 213.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 64 + offset: 0.5 + } +} +layer { + name: "conv8_2_mbox_loc" + type: "Convolution" + bottom: "conv8_2_h" + top: "conv8_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_2_mbox_loc_perm" + type: "Permute" + bottom: "conv8_2_mbox_loc" + top: "conv8_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv8_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv8_2_mbox_loc_perm" + top: "conv8_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv8_2_mbox_conf" + type: "Convolution" + bottom: "conv8_2_h" + top: "conv8_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 8 # 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_2_mbox_conf_perm" + type: "Permute" + bottom: "conv8_2_mbox_conf" + top: "conv8_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv8_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv8_2_mbox_conf_perm" + top: "conv8_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv8_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv8_2_h" + bottom: "data" + top: "conv8_2_mbox_priorbox" + prior_box_param { + min_size: 213.0 + max_size: 264.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 100 + offset: 0.5 + } +} +layer { + name: "conv9_2_mbox_loc" + type: "Convolution" + bottom: "conv9_2_h" + top: "conv9_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_2_mbox_loc_perm" + type: "Permute" + bottom: "conv9_2_mbox_loc" + top: "conv9_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv9_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv9_2_mbox_loc_perm" + top: "conv9_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv9_2_mbox_conf" + type: "Convolution" + bottom: "conv9_2_h" + top: "conv9_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 8 # 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_2_mbox_conf_perm" + type: "Permute" + bottom: "conv9_2_mbox_conf" + top: "conv9_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv9_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv9_2_mbox_conf_perm" + top: "conv9_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv9_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv9_2_h" + bottom: "data" + top: "conv9_2_mbox_priorbox" + prior_box_param { + min_size: 264.0 + max_size: 315.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 300 + offset: 0.5 + } +} +layer { + name: "mbox_loc" + type: "Concat" + bottom: "conv4_3_norm_mbox_loc_flat" + bottom: "fc7_mbox_loc_flat" + bottom: "conv6_2_mbox_loc_flat" + bottom: "conv7_2_mbox_loc_flat" + bottom: "conv8_2_mbox_loc_flat" + bottom: "conv9_2_mbox_loc_flat" + top: "mbox_loc" + concat_param { + axis: 1 + } +} +layer { + name: "mbox_conf" + type: "Concat" + bottom: "conv4_3_norm_mbox_conf_flat" + bottom: "fc7_mbox_conf_flat" + bottom: "conv6_2_mbox_conf_flat" + bottom: "conv7_2_mbox_conf_flat" + bottom: "conv8_2_mbox_conf_flat" + bottom: "conv9_2_mbox_conf_flat" + top: "mbox_conf" + concat_param { + axis: 1 + } +} +layer { + name: "mbox_priorbox" + type: "Concat" + bottom: "conv4_3_norm_mbox_priorbox" + bottom: "fc7_mbox_priorbox" + bottom: "conv6_2_mbox_priorbox" + bottom: "conv7_2_mbox_priorbox" + bottom: "conv8_2_mbox_priorbox" + bottom: "conv9_2_mbox_priorbox" + top: "mbox_priorbox" + concat_param { + axis: 2 + } +} + +layer { + name: "mbox_conf_reshape" + type: "Reshape" + bottom: "mbox_conf" + top: "mbox_conf_reshape" + reshape_param { + shape { + dim: 0 + dim: -1 + dim: 2 + } + } +} +layer { + name: "mbox_conf_softmax" + type: "Softmax" + bottom: "mbox_conf_reshape" + top: "mbox_conf_softmax" + softmax_param { + axis: 2 + } +} +layer { + name: "mbox_conf_flatten" + type: "Flatten" + bottom: "mbox_conf_softmax" + top: "mbox_conf_flatten" + flatten_param { + axis: 1 + } +} + +layer { + name: "detection_out" + type: "DetectionOutput" + bottom: "mbox_loc" + bottom: "mbox_conf_flatten" + bottom: "mbox_priorbox" + top: "detection_out" + include { + phase: TEST + } + detection_output_param { + num_classes: 2 + share_location: true + background_label_id: 0 + nms_param { + nms_threshold: 0.45 + top_k: 400 + } + code_type: CENTER_SIZE + keep_top_k: 200 + confidence_threshold: 0.01 + } +} diff --git a/get_pulse.py b/get_pulse.py index 5ba151d..d4585d9 100644 --- a/get_pulse.py +++ b/get_pulse.py @@ -10,6 +10,7 @@ import socket import sys + class getPulseApp(object): """ @@ -67,9 +68,7 @@ def __init__(self, args): # Basically, everything that isn't communication # to the camera device or part of the GUI - self.processor = findFaceGetPulse(bpm_limits=[50, 160], - data_spike_limit=2500., - face_detector_smoothness=10.) + self.processor = findFaceGetPulse() # Init parameters for the cardiac data plot self.bpm_plot = False diff --git a/get_pulse_from_video.py b/get_pulse_from_video.py new file mode 100644 index 0000000..f52c5fe --- /dev/null +++ b/get_pulse_from_video.py @@ -0,0 +1,218 @@ +from lib.processors_noopenmdao import findFaceGetPulse +from lib.interface import plotXY, imshow, waitKey, destroyWindow +import cv2 +from cv2 import moveWindow +import argparse +import numpy as np +import datetime +import socket +import sys +from PIL import Image + + +class getPulseApp(object): + + """ + Python application that finds a face in a video then isolates the + forehead. + + Then the average green-light intensity in the forehead region is gathered + over time, and the detected person's pulse is estimated. + """ + + def __init__(self, args, fps): + serial = args.serial + baud = args.baud + self.send_serial = False + self.send_udp = False + if serial: + self.send_serial = True + if not baud: + baud = 9600 + else: + baud = int(baud) + self.serial = Serial(port=serial, baudrate=baud) + + udp = args.udp + if udp: + self.send_udp = True + if ":" not in udp: + ip = udp + port = 5005 + else: + ip, port = udp.split(":") + port = int(port) + self.udp = (ip, port) + self.sock = socket.socket(socket.AF_INET, # Internet + socket.SOCK_DGRAM) # UDP + + self.w, self.h = 0, 0 + self.pressed = 0 + + # Containerized analysis of recieved image frames (an openMDAO assembly) + # is defined next. + + # This assembly is designed to handle all image & signal analysis, + # such as face detection, forehead isolation, time series collection, + # heart-beat detection, etc. + + # Basically, everything that isn't communication + # to the camera device or part of the GUI + self.processor = findFaceGetPulse(fps, True) + + # Init parameters for the cardiac data plot + self.bpm_plot = False + self.plot_title = "Data display - raw signal (top) and PSD (bottom)" + + # Maps keystrokes to specified methods + # (A GUI window must have focus for these to work) + self.key_controls = {"s": self.toggle_search, + "d": self.toggle_display_plot, + "f": self.write_csv} + + def write_csv(self): + """ + Writes current data to a csv file + """ + fn = "video-pulse" + str(datetime.datetime.now()) + fn = fn.replace(":", "_").replace(".", "_") + data = np.vstack((self.processor.times, self.processor.samples)).T + np.savetxt(fn + ".csv", data, delimiter=',') + print("Writing csv") + + def toggle_search(self): + """ + Toggles a motion lock on the processor's face detection component. + + Locking the forehead location in place significantly improves + data quality, once a forehead has been sucessfully isolated. + """ + #state = self.processor.find_faces.toggle() + state = self.processor.find_faces_toggle() + print("face detection lock =", not state) + + def toggle_display_plot(self): + """ + Toggles the data display. + """ + if self.bpm_plot: + print("bpm plot disabled") + self.bpm_plot = False + destroyWindow(self.plot_title) + else: + print("bpm plot enabled") + if self.processor.find_faces: + self.toggle_search() + self.bpm_plot = True + self.make_bpm_plot() + moveWindow(self.plot_title, self.w, 0) + + def make_bpm_plot(self): + """ + Creates and/or updates the data display + """ + plotXY([[self.processor.times, + self.processor.samples], + [self.processor.freqs, + self.processor.fft]], + labels=[False, True], + showmax=[False, "bpm"], + label_ndigits=[0, 0], + showmax_digits=[0, 1], + skip=[3, 3], + name=self.plot_title, + bg=self.processor.slices[0]) + + def key_handler(self): + """ + Handle keystrokes, as set at the bottom of __init__() + + A plotting or camera frame window must have focus for keypresses to be + detected. + """ + + self.pressed = waitKey(10) & 255 # wait for keypress for 10 ms + if self.pressed == 27: # exit program on 'esc' + print("Exiting") + if self.send_serial: + self.serial.close() + sys.exit() + + for key in self.key_controls.keys(): + if chr(self.pressed) == key: + self.key_controls[key]() + + def main_loop(self, frame): + """ + Single iteration of the application's main loop. + """ + # Get current image frame from the video file + self.h, self.w, _c = frame.shape + + # display unaltered frame + # imshow("Original",frame) + + # set current image frame to the processor's input + self.processor.frame_in = frame + # process the image frame to perform all needed analysis + self.processor.run() + # collect the output frame for display + output_frame = self.processor.frame_out + + # show the processed/annotated output frame + imshow("Processed", output_frame) + + # create and/or update the raw data display if needed + if self.bpm_plot: + self.make_bpm_plot() + + if self.send_serial: + self.serial.write(str(self.processor.bpm) + "\r\n") + + if self.send_udp: + self.sock.sendto(str(self.processor.bpm), self.udp) + + # handle any key presses + self.key_handler() + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Webcam pulse detector.') + parser.add_argument('--serial', default=None, + help='serial port destination for bpm data') + parser.add_argument('--baud', default=None, + help='Baud rate for serial transmission') + parser.add_argument('--udp', default=None, + help='udp address:port destination for bpm data') + args = parser.parse_args() + + # Find OpenCV version + (major_ver, minor_ver, subminor_ver) = (cv2.__version__).split('.') + + video_cap = cv2.VideoCapture('data/67_bpm_cut.mp4') + + # get FPS + if int(major_ver) < 3: + fps = video_cap.get(cv2.cv.CV_CAP_PROP_FPS) + print("Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps)) + else: + fps = video_cap.get(cv2.CAP_PROP_FPS) + print("Frames per second using video.get(cv2.CAP_PROP_FPS) : {0}".format(fps)) + + App = getPulseApp(args, fps) + + plot_enabled = False + + while video_cap.isOpened(): + ret, frame = video_cap.read() + + if frame is None: + break + + App.main_loop(frame) + + if not plot_enabled: + App.toggle_display_plot() + plot_enabled = True + + print([int(val) for val in App.processor.results]) + print("mean: ", np.mean(App.processor.results)) diff --git a/lib/dl_face_detector.py b/lib/dl_face_detector.py new file mode 100644 index 0000000..e3af9b5 --- /dev/null +++ b/lib/dl_face_detector.py @@ -0,0 +1,68 @@ +# check this amazing tutorial for more details: +# https://www.pyimagesearch.com/2018/02/26/face-detection-with-opencv-and-deep-learning/ + +import numpy as np +import cv2 + +min_confidence = 0.6 + +# path to Caffe 'deploy' prototxt file +prototxt_path = './dl_face_detection/deploy.prototxt.txt' + +# path to Caffe pre-trained model +model_path = './dl_face_detection/res10_300x300_ssd_iter_140000.caffemodel' + +# load our serialized model from disk +net = cv2.dnn.readNetFromCaffe(prototxt_path, model_path) + +# using deep learning model to detect the face + + +def get_face_from_img(image): + # add paddings to make the image square + (h, w) = image.shape[:2] + + pad_right, pad_bottom = (0, 0) + + # pad right if image is too tall + if h > w: + pad_right = h - w + + # pad right if image is too wide + if w > h: + pad_bottom = w - h + + if pad_right or pad_bottom: + color = [0, 0, 0] + image = cv2.copyMakeBorder(image, 0, pad_bottom, 0, pad_right, cv2.BORDER_CONSTANT, + value=color) + w += pad_right + h += pad_bottom + + # construct an input blob for the image + # by resizing to a fixed 300x300 pixels and then normalizing it + resized_image = cv2.resize(image, (300, 300)) + blob = cv2.dnn.blobFromImage(resized_image, 1.0, (300, 300)) + + # pass the blob through the network and obtain the detections and + # predictions + net.setInput(blob) + detections = net.forward() + + # loop over the detections + for i in range(0, detections.shape[2]): + # extract the confidence (i.e., probability) associated with the + # prediction + confidence = detections[0, 0, i, 2] + + # filter out weak detections by ensuring the `confidence` is + # greater than the minimum confidence + if confidence > min_confidence: + # compute the (x, y)-coordinates of the bounding box for the + # object + box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) + (startX, startY, endX, endY) = box.astype("int") + + return (startX, startY, endX - startX, endY - startY) + + return None diff --git a/lib/processors_noopenmdao.py b/lib/processors_noopenmdao.py index 2dc39db..0c4f59d 100644 --- a/lib/processors_noopenmdao.py +++ b/lib/processors_noopenmdao.py @@ -1,10 +1,13 @@ import numpy as np +import pandas as pd import time import cv2 import pylab import os import sys +from .dl_face_detector import get_face_from_img + def resource_path(relative_path): """ Get absolute path to resource, works for dev and for PyInstaller """ @@ -17,36 +20,59 @@ def resource_path(relative_path): return os.path.join(base_path, relative_path) +default_color = (100, 255, 100) +red_color = (100, 100, 255) + + class findFaceGetPulse(object): - def __init__(self, bpm_limits=[], data_spike_limit=250, - face_detector_smoothness=10): + def __init__(self, fps=None, running_on_video=False): + self.running_on_video = running_on_video + self.fps = fps + self.seconds_per_frame = 1 / fps if running_on_video else None + + self.fps_calculator_ticks = None + self.fps_calculator_start = None + # we need few seconds to calculate FPS + self.fps_calculator_min_seconds = 3 self.frame_in = np.zeros((10, 10)) self.frame_out = np.zeros((10, 10)) - self.fps = 0 self.buffer_size = 250 - #self.window = np.hamming(self.buffer_size) + + self.last_face_rects = pd.DataFrame(columns=['x', 'y', 'h', 'w']) + self.fixed_face = None + + # restart tracking if face can't be detected for 0.5 seconds + self.no_face_tolerance = 0.5 + self.no_face_counter = 0 + self.tracking_running = False + + # start calculating heart rate if face is stable for 2 seconds + self.stable_face_threshold = 2 + + self.tracking_batch_size = 50 + self.results = [] + self.bpm_buffer = [] + self.stable_face_counter = 0 + + self.frame_i = None + self.gray = None self.data_buffer = [] self.times = [] - self.ttimes = [] self.samples = [] self.freqs = [] self.fft = [] self.slices = [[0]] - self.t0 = time.time() - self.bpms = [] + self.t0 = None self.bpm = 0 dpath = resource_path("haarcascade_frontalface_alt.xml") if not os.path.exists(dpath): print("Cascade file not present!") self.face_cascade = cv2.CascadeClassifier(dpath) - self.face_rect = [1, 1, 2, 2] - self.last_center = np.array([0, 0]) - self.last_wh = np.array([0, 0]) + self.face_rect = None self.output_dim = 13 - self.trained = False self.idx = 1 self.find_faces = True @@ -58,17 +84,9 @@ def find_faces_toggle(self): def get_faces(self): return - def shift(self, detected): - x, y, w, h = detected - center = np.array([x + 0.5 * w, y + 0.5 * h]) - shift = np.linalg.norm(center - self.last_center) - - self.last_center = center - return shift - def draw_rect(self, rect, col=(0, 255, 0)): x, y, w, h = rect - cv2.rectangle(self.frame_out, (x, y), (x + w, y + h), col, 1) + cv2.rectangle(self.frame_out, (x, y), (x + w, y + h), default_color, 1) def get_subface_coord(self, fh_x, fh_y, fh_w, fh_h): x, y, w, h = self.face_rect @@ -86,10 +104,6 @@ def get_subface_means(self, coord): return (v1 + v2 + v3) / 3. - def train(self): - self.trained = not self.trained - return self.trained - def plot(self): data = np.array(self.data_buffer).T np.savetxt("data.dat", data) @@ -115,66 +129,182 @@ def plot(self): pylab.savefig("data_fft.png") quit() - def run(self, cam): - self.times.append(time.time() - self.t0) + def run(self, cam=None): + if self.t0 is None: + self.t0 = time.time() + self.frame_i = 0 + else: + self.frame_i += 1 + self.frame_out = self.frame_in - self.gray = cv2.equalizeHist(cv2.cvtColor(self.frame_in, - cv2.COLOR_BGR2GRAY)) - col = (100, 255, 100) - if self.find_faces: + self.bpm = 0 + + # first we have to calculate fps + # if it's not ready - skip everything else + if not self.calculate_fps(): + cv2.putText( + self.frame_out, "Calculating FPS", + (10, 450), cv2.FONT_HERSHEY_PLAIN, 1.5, red_color, 2) + return + + # try to detect face + self.detect_face() + + if self.face_rect is None: + self.no_face_counter += 1 + + # too long without a face, restart tracking + if self.no_face_counter > self.no_face_tolerance * self.fps: + print('no face reset') + self.clear_buffers() + + # otherwise - skip this frame but don't stop reset tracking just yet + else: + self.no_face_counter = 0 + + # if face is out of range - clear buffers and stop tracking + if self.current_face_out_of_range(): + #if self.stable_face_counter: + print('out of range reset') + self.clear_buffers() + else: + # we've got a stable face + if not self.tracking_running: + self.stable_face_counter += 1 + + # check if face is stable long enough, start tracking if it's + if self.stable_face_counter >= self.stable_face_threshold * self.fps: + self.tracking_running = True + print('stabilized') + + # tracking is running + if self.tracking_running: + self.track_rate() + + if self.bpm: + self.bpm_buffer.append(self.bpm) + # print("(BPM estimate: %0.1f bpm)" % (self.bpm)) + + # if we have enough data - store the mean bpm + if len(self.bpm_buffer) >= self.tracking_batch_size: + new_mean = np.mean(self.bpm_buffer) + print("(BPM estimate: %0.1f bpm. fps: %d)" % (new_mean, self.fps)) + self.results.append(new_mean) + self.bpm_buffer = [] + + self.draw_face_rect() + + # print menu + if not self.tracking_running: + self.print_start_menu(cam) + else: + self.print_tracking_menu(cam) + + def print_start_menu(self, cam): + if cam is not None: cv2.putText( self.frame_out, "Press 'C' to change camera (current: %s)" % str( cam), - (10, 25), cv2.FONT_HERSHEY_PLAIN, 1.25, col) + (10, 25), cv2.FONT_HERSHEY_PLAIN, 1.25, default_color) + + cv2.putText(self.frame_out, "Press 'Esc' to quit", + (10, 75), cv2.FONT_HERSHEY_PLAIN, 1.25, default_color) + + def print_tracking_menu(self, cam): + if cam is not None: cv2.putText( - self.frame_out, "Press 'S' to lock face and begin", - (10, 50), cv2.FONT_HERSHEY_PLAIN, 1.25, col) - cv2.putText(self.frame_out, "Press 'Esc' to quit", - (10, 75), cv2.FONT_HERSHEY_PLAIN, 1.25, col) - self.data_buffer, self.times, self.trained = [], [], False - detected = list(self.face_cascade.detectMultiScale(self.gray, - scaleFactor=1.3, - minNeighbors=4, - minSize=( - 50, 50), - flags=cv2.CASCADE_SCALE_IMAGE)) - - if len(detected) > 0: - detected.sort(key=lambda a: a[-1] * a[-2]) - - if self.shift(detected[-1]) > 10: - self.face_rect = detected[-1] - forehead1 = self.get_subface_coord(0.5, 0.18, 0.25, 0.15) - self.draw_rect(self.face_rect, col=(255, 0, 0)) - x, y, w, h = self.face_rect - cv2.putText(self.frame_out, "Face", - (x, y), cv2.FONT_HERSHEY_PLAIN, 1.5, col) - self.draw_rect(forehead1) - x, y, w, h = forehead1 - cv2.putText(self.frame_out, "Forehead", - (x, y), cv2.FONT_HERSHEY_PLAIN, 1.5, col) - return - if set(self.face_rect) == set([1, 1, 2, 2]): - return - cv2.putText( - self.frame_out, "Press 'C' to change camera (current: %s)" % str( - cam), - (10, 25), cv2.FONT_HERSHEY_PLAIN, 1.25, col) + self.frame_out, "Press 'C' to change camera (current: %s)" % str( + cam), + (10, 25), cv2.FONT_HERSHEY_PLAIN, 1.25, default_color) + cv2.putText( self.frame_out, "Press 'S' to restart", - (10, 50), cv2.FONT_HERSHEY_PLAIN, 1.5, col) + (10, 50), cv2.FONT_HERSHEY_PLAIN, 1.5, default_color) cv2.putText(self.frame_out, "Press 'D' to toggle data plot", - (10, 75), cv2.FONT_HERSHEY_PLAIN, 1.5, col) + (10, 75), cv2.FONT_HERSHEY_PLAIN, 1.5, default_color) cv2.putText(self.frame_out, "Press 'Esc' to quit", - (10, 100), cv2.FONT_HERSHEY_PLAIN, 1.5, col) + (10, 100), cv2.FONT_HERSHEY_PLAIN, 1.5, default_color) + + def clear_buffers(self): + self.data_buffer, self.times = [], [] + self.last_face_rects = self.last_face_rects.iloc[0:0] + self.bpm_buffer = [] + self.stable_face_counter = 0 + + def detect_face(self): + self.gray = cv2.equalizeHist(cv2.cvtColor(self.frame_in, + cv2.COLOR_BGR2GRAY)) + + self.face_rect = get_face_from_img(self.frame_in) + + def draw_face_rect(self): + forehead1 = self.get_subface_coord(0.5, 0.18, 0.25, 0.15) + self.draw_rect(self.face_rect, col=(255, 0, 0)) + x, y, w, h = self.face_rect + cv2.putText(self.frame_out, "Face", + (x, y), cv2.FONT_HERSHEY_PLAIN, 1.5, default_color) + self.draw_rect(forehead1) + + def face_dict_to_rect(selfs, face_dict): + return (int(face_dict['x']), + int(face_dict['y']), + int(face_dict['w']), + int(face_dict['h'])) + + def is_face_close(self, face1, face2): + delta = .07 + d_width = face1['w'] * delta + d_height = face1['h'] * delta + + return abs(face1['x'] - face2['x']) < d_width and \ + abs(face1['w'] - face2['w']) < d_width and \ + abs(face1['y'] - face2['y']) < d_height and \ + abs(face1['h'] - face2['h']) < d_height + + def current_face_out_of_range(self): + out_of_range = False + + # take current face rectangle + x, y, w, h = self.face_rect + face_dict = {'x': x, 'y': y, 'h': h, 'w': w} + + # append current face rectangle to a list + self.last_face_rects = self.last_face_rects.append(face_dict, ignore_index=True) + self.last_face_rects = self.last_face_rects.tail(20) + # get average frame as a new candidate + face_candidate = self.last_face_rects.mean() + + # if we don't have fixed frame, or current candidate is to far - reset + if self.fixed_face is None or not self.is_face_close(face_candidate, self.fixed_face): + out_of_range = True + self.fixed_face = face_candidate + + self.face_rect = self.face_dict_to_rect(self.fixed_face) + + return out_of_range + + def track_rate(self): forehead1 = self.get_subface_coord(0.5, 0.18, 0.25, 0.15) self.draw_rect(forehead1) - vals = self.get_subface_means(forehead1) + x, y, w, h = forehead1 + avg_val = self.get_subface_means(forehead1) / (h * w) + + # using fps to calculate time for video + # and real time when working with camera + if self.running_on_video: + self.times.append(self.frame_i * self.seconds_per_frame) + else: + self.times.append(time.time() - self.t0) + + self.data_buffer.append(avg_val) + + self.data_buffer[-1] = np.mean(self.data_buffer[-2:]) - self.data_buffer.append(vals) L = len(self.data_buffer) + + # trim the data buffer and times buffer, we don't need more then buffer_size if L > self.buffer_size: self.data_buffer = self.data_buffer[-self.buffer_size:] self.times = self.times[-self.buffer_size:] @@ -185,7 +315,6 @@ def run(self, cam): if L > 10: self.output_dim = processed.shape[0] - self.fps = float(L) / (self.times[-1] - self.times[0]) even_times = np.linspace(self.times[0], self.times[-1], L) interpolated = np.interp(even_times, self.times, processed) interpolated = np.hamming(L) * interpolated @@ -211,7 +340,7 @@ def run(self, cam): alpha = t beta = 1 - t - self.bpm = self.freqs[idx2] + bpm_estimate = self.freqs[idx2] self.idx += 1 x, y, w, h = self.get_subface_coord(0.5, 0.18, 0.25, 0.15) @@ -225,14 +354,36 @@ def run(self, cam): b]) x1, y1, w1, h1 = self.face_rect self.slices = [np.copy(self.frame_out[y1:y1 + h1, x1:x1 + w1, 1])] - col = (100, 255, 100) gap = (self.buffer_size - L) / self.fps - # self.bpms.append(bpm) - # self.ttimes.append(time.time()) if gap: - text = "(estimate: %0.1f bpm, wait %0.0f s)" % (self.bpm, gap) + text = "(estimate: %0.1f bpm, wait %0.0f s)" % (bpm_estimate, gap) else: - text = "(estimate: %0.1f bpm)" % (self.bpm) + text = "(estimate: %0.1f bpm)" % (bpm_estimate) + self.bpm = bpm_estimate + tsize = 1 cv2.putText(self.frame_out, text, - (int(x - w / 2), int(y)), cv2.FONT_HERSHEY_PLAIN, tsize, col) + (int(x - w / 2), int(y)), cv2.FONT_HERSHEY_PLAIN, tsize, default_color) + + def calculate_fps(self): + if self.fps: + return True + + # if we didn't start counting - let's start + if not self.fps_calculator_start: + self.fps_calculator_start = time.time() + self.fps_calculator_ticks = 0 + + return + + # we started calculating the FPS + self.fps_calculator_ticks += 1 + + # time elapsed + seconds = time.time() - self.fps_calculator_start + + if seconds >= self.fps_calculator_min_seconds: + # calculate frames per second + self.fps = self.fps_calculator_ticks / seconds + + return True