From 078a6ce66deae8f19b9334bafa8842aa5b57e0ab Mon Sep 17 00:00:00 2001 From: Jasper Date: Fri, 28 May 2021 12:27:12 +0200 Subject: [PATCH 01/24] [ADD] added all the files --- .gitignore | 2 + res/pose/coco/pose_deploy_linevec.prototxt | 2976 +++++++++++++++++ ...se_deploy_linevec_faster_4_stages.prototxt | 2081 ++++++++++++ src/computervision/ObjectDetection.cpp | 6 +- src/computervision/OpenPoseImage.cpp | 149 + src/computervision/OpenPoseVideo.cpp | 144 + src/computervision/OpenPoseVideo.h | 19 + src/main.cpp | 11 +- 8 files changed, 5383 insertions(+), 5 deletions(-) create mode 100644 res/pose/coco/pose_deploy_linevec.prototxt create mode 100644 res/pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt create mode 100644 src/computervision/OpenPoseImage.cpp create mode 100644 src/computervision/OpenPoseVideo.cpp create mode 100644 src/computervision/OpenPoseVideo.h diff --git a/.gitignore b/.gitignore index 9232336..488f5fe 100644 --- a/.gitignore +++ b/.gitignore @@ -428,4 +428,6 @@ FodyWeavers.xsd **/docs/* **/doc/* +**/pose_iter_160000.caffemodel + # End of https://www.toptal.com/developers/gitignore/api/c++,visualstudio,visualstudiocode,opencv diff --git a/res/pose/coco/pose_deploy_linevec.prototxt b/res/pose/coco/pose_deploy_linevec.prototxt new file mode 100644 index 0000000..90a54fd --- /dev/null +++ b/res/pose/coco/pose_deploy_linevec.prototxt @@ -0,0 +1,2976 @@ +input: "image" +input_dim: 1 +input_dim: 3 +input_dim: 1 # This value will be defined at runtime +input_dim: 1 # This value will be defined at runtime +layer { + name: "conv1_1" + type: "Convolution" + bottom: "image" + top: "conv1_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu1_1" + type: "ReLU" + bottom: "conv1_1" + top: "conv1_1" +} +layer { + name: "conv1_2" + type: "Convolution" + bottom: "conv1_1" + top: "conv1_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu1_2" + type: "ReLU" + bottom: "conv1_2" + top: "conv1_2" +} +layer { + name: "pool1_stage1" + type: "Pooling" + bottom: "conv1_2" + top: "pool1_stage1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv2_1" + type: "Convolution" + bottom: "pool1_stage1" + top: "conv2_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu2_1" + type: "ReLU" + bottom: "conv2_1" + top: "conv2_1" +} +layer { + name: "conv2_2" + type: "Convolution" + bottom: "conv2_1" + top: "conv2_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu2_2" + type: "ReLU" + bottom: "conv2_2" + top: "conv2_2" +} +layer { + name: "pool2_stage1" + type: "Pooling" + bottom: "conv2_2" + top: "pool2_stage1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv3_1" + type: "Convolution" + bottom: "pool2_stage1" + top: "conv3_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu3_1" + type: "ReLU" + bottom: "conv3_1" + top: "conv3_1" +} +layer { + name: "conv3_2" + type: "Convolution" + bottom: "conv3_1" + top: "conv3_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu3_2" + type: "ReLU" + bottom: "conv3_2" + top: "conv3_2" +} +layer { + name: "conv3_3" + type: "Convolution" + bottom: "conv3_2" + top: "conv3_3" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu3_3" + type: "ReLU" + bottom: "conv3_3" + top: "conv3_3" +} +layer { + name: "conv3_4" + type: "Convolution" + bottom: "conv3_3" + top: "conv3_4" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu3_4" + type: "ReLU" + bottom: "conv3_4" + top: "conv3_4" +} +layer { + name: "pool3_stage1" + type: "Pooling" + bottom: "conv3_4" + top: "pool3_stage1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv4_1" + type: "Convolution" + bottom: "pool3_stage1" + top: "conv4_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu4_1" + type: "ReLU" + bottom: "conv4_1" + top: "conv4_1" +} +layer { + name: "conv4_2" + type: "Convolution" + bottom: "conv4_1" + top: "conv4_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu4_2" + type: "ReLU" + bottom: "conv4_2" + top: "conv4_2" +} +layer { + name: "conv4_3_CPM" + type: "Convolution" + bottom: "conv4_2" + top: "conv4_3_CPM" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu4_3_CPM" + type: "ReLU" + bottom: "conv4_3_CPM" + top: "conv4_3_CPM" +} +layer { + name: "conv4_4_CPM" + type: "Convolution" + bottom: "conv4_3_CPM" + top: "conv4_4_CPM" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu4_4_CPM" + type: "ReLU" + bottom: "conv4_4_CPM" + top: "conv4_4_CPM" +} +layer { + name: "conv5_1_CPM_L1" + type: "Convolution" + bottom: "conv4_4_CPM" + top: "conv5_1_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_1_CPM_L1" + type: "ReLU" + bottom: "conv5_1_CPM_L1" + top: "conv5_1_CPM_L1" +} +layer { + name: "conv5_1_CPM_L2" + type: "Convolution" + bottom: "conv4_4_CPM" + top: "conv5_1_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_1_CPM_L2" + type: "ReLU" + bottom: "conv5_1_CPM_L2" + top: "conv5_1_CPM_L2" +} +layer { + name: "conv5_2_CPM_L1" + type: "Convolution" + bottom: "conv5_1_CPM_L1" + top: "conv5_2_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_2_CPM_L1" + type: "ReLU" + bottom: "conv5_2_CPM_L1" + top: "conv5_2_CPM_L1" +} +layer { + name: "conv5_2_CPM_L2" + type: "Convolution" + bottom: "conv5_1_CPM_L2" + top: "conv5_2_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_2_CPM_L2" + type: "ReLU" + bottom: "conv5_2_CPM_L2" + top: "conv5_2_CPM_L2" +} +layer { + name: "conv5_3_CPM_L1" + type: "Convolution" + bottom: "conv5_2_CPM_L1" + top: "conv5_3_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_3_CPM_L1" + type: "ReLU" + bottom: "conv5_3_CPM_L1" + top: "conv5_3_CPM_L1" +} +layer { + name: "conv5_3_CPM_L2" + type: "Convolution" + bottom: "conv5_2_CPM_L2" + top: "conv5_3_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_3_CPM_L2" + type: "ReLU" + bottom: "conv5_3_CPM_L2" + top: "conv5_3_CPM_L2" +} +layer { + name: "conv5_4_CPM_L1" + type: "Convolution" + bottom: "conv5_3_CPM_L1" + top: "conv5_4_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_4_CPM_L1" + type: "ReLU" + bottom: "conv5_4_CPM_L1" + top: "conv5_4_CPM_L1" +} +layer { + name: "conv5_4_CPM_L2" + type: "Convolution" + bottom: "conv5_3_CPM_L2" + top: "conv5_4_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_4_CPM_L2" + type: "ReLU" + bottom: "conv5_4_CPM_L2" + top: "conv5_4_CPM_L2" +} +layer { + name: "conv5_5_CPM_L1" + type: "Convolution" + bottom: "conv5_4_CPM_L1" + top: "conv5_5_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 38 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "conv5_5_CPM_L2" + type: "Convolution" + bottom: "conv5_4_CPM_L2" + top: "conv5_5_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 19 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage2" + type: "Concat" + bottom: "conv5_5_CPM_L1" + bottom: "conv5_5_CPM_L2" + bottom: "conv4_4_CPM" + top: "concat_stage2" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage2_L1" + type: "Convolution" + bottom: "concat_stage2" + top: "Mconv1_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage2_L1" + type: "ReLU" + bottom: "Mconv1_stage2_L1" + top: "Mconv1_stage2_L1" +} +layer { + name: "Mconv1_stage2_L2" + type: "Convolution" + bottom: "concat_stage2" + top: "Mconv1_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage2_L2" + type: "ReLU" + bottom: "Mconv1_stage2_L2" + top: "Mconv1_stage2_L2" +} +layer { + name: "Mconv2_stage2_L1" + type: "Convolution" + bottom: "Mconv1_stage2_L1" + top: "Mconv2_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage2_L1" + type: "ReLU" + bottom: "Mconv2_stage2_L1" + top: "Mconv2_stage2_L1" +} +layer { + name: "Mconv2_stage2_L2" + type: "Convolution" + bottom: "Mconv1_stage2_L2" + top: "Mconv2_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage2_L2" + type: "ReLU" + bottom: "Mconv2_stage2_L2" + top: "Mconv2_stage2_L2" +} +layer { + name: "Mconv3_stage2_L1" + type: "Convolution" + bottom: "Mconv2_stage2_L1" + top: "Mconv3_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage2_L1" + type: "ReLU" + bottom: "Mconv3_stage2_L1" + top: "Mconv3_stage2_L1" +} +layer { + name: "Mconv3_stage2_L2" + type: "Convolution" + bottom: "Mconv2_stage2_L2" + top: "Mconv3_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage2_L2" + type: "ReLU" + bottom: "Mconv3_stage2_L2" + top: "Mconv3_stage2_L2" +} +layer { + name: "Mconv4_stage2_L1" + type: "Convolution" + bottom: "Mconv3_stage2_L1" + top: "Mconv4_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage2_L1" + type: "ReLU" + bottom: "Mconv4_stage2_L1" + top: "Mconv4_stage2_L1" +} +layer { + name: "Mconv4_stage2_L2" + type: "Convolution" + bottom: "Mconv3_stage2_L2" + top: "Mconv4_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage2_L2" + type: "ReLU" + bottom: "Mconv4_stage2_L2" + top: "Mconv4_stage2_L2" +} +layer { + name: "Mconv5_stage2_L1" + type: "Convolution" + bottom: "Mconv4_stage2_L1" + top: "Mconv5_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage2_L1" + type: "ReLU" + bottom: "Mconv5_stage2_L1" + top: "Mconv5_stage2_L1" +} +layer { + name: "Mconv5_stage2_L2" + type: "Convolution" + bottom: "Mconv4_stage2_L2" + top: "Mconv5_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage2_L2" + type: "ReLU" + bottom: "Mconv5_stage2_L2" + top: "Mconv5_stage2_L2" +} +layer { + name: "Mconv6_stage2_L1" + type: "Convolution" + bottom: "Mconv5_stage2_L1" + top: "Mconv6_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage2_L1" + type: "ReLU" + bottom: "Mconv6_stage2_L1" + top: "Mconv6_stage2_L1" +} +layer { + name: "Mconv6_stage2_L2" + type: "Convolution" + bottom: "Mconv5_stage2_L2" + top: "Mconv6_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage2_L2" + type: "ReLU" + bottom: "Mconv6_stage2_L2" + top: "Mconv6_stage2_L2" +} +layer { + name: "Mconv7_stage2_L1" + type: "Convolution" + bottom: "Mconv6_stage2_L1" + top: "Mconv7_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 38 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mconv7_stage2_L2" + type: "Convolution" + bottom: "Mconv6_stage2_L2" + top: "Mconv7_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 19 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage3" + type: "Concat" + bottom: "Mconv7_stage2_L1" + bottom: "Mconv7_stage2_L2" + bottom: "conv4_4_CPM" + top: "concat_stage3" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage3_L1" + type: "Convolution" + bottom: "concat_stage3" + top: "Mconv1_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage3_L1" + type: "ReLU" + bottom: "Mconv1_stage3_L1" + top: "Mconv1_stage3_L1" +} +layer { + name: "Mconv1_stage3_L2" + type: "Convolution" + bottom: "concat_stage3" + top: "Mconv1_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage3_L2" + type: "ReLU" + bottom: "Mconv1_stage3_L2" + top: "Mconv1_stage3_L2" +} +layer { + name: "Mconv2_stage3_L1" + type: "Convolution" + bottom: "Mconv1_stage3_L1" + top: "Mconv2_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage3_L1" + type: "ReLU" + bottom: "Mconv2_stage3_L1" + top: "Mconv2_stage3_L1" +} +layer { + name: "Mconv2_stage3_L2" + type: "Convolution" + bottom: "Mconv1_stage3_L2" + top: "Mconv2_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage3_L2" + type: "ReLU" + bottom: "Mconv2_stage3_L2" + top: "Mconv2_stage3_L2" +} +layer { + name: "Mconv3_stage3_L1" + type: "Convolution" + bottom: "Mconv2_stage3_L1" + top: "Mconv3_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage3_L1" + type: "ReLU" + bottom: "Mconv3_stage3_L1" + top: "Mconv3_stage3_L1" +} +layer { + name: "Mconv3_stage3_L2" + type: "Convolution" + bottom: "Mconv2_stage3_L2" + top: "Mconv3_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage3_L2" + type: "ReLU" + bottom: "Mconv3_stage3_L2" + top: "Mconv3_stage3_L2" +} +layer { + name: "Mconv4_stage3_L1" + type: "Convolution" + bottom: "Mconv3_stage3_L1" + top: "Mconv4_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage3_L1" + type: "ReLU" + bottom: "Mconv4_stage3_L1" + top: "Mconv4_stage3_L1" +} +layer { + name: "Mconv4_stage3_L2" + type: "Convolution" + bottom: "Mconv3_stage3_L2" + top: "Mconv4_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage3_L2" + type: "ReLU" + bottom: "Mconv4_stage3_L2" + top: "Mconv4_stage3_L2" +} +layer { + name: "Mconv5_stage3_L1" + type: "Convolution" + bottom: "Mconv4_stage3_L1" + top: "Mconv5_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage3_L1" + type: "ReLU" + bottom: "Mconv5_stage3_L1" + top: "Mconv5_stage3_L1" +} +layer { + name: "Mconv5_stage3_L2" + type: "Convolution" + bottom: "Mconv4_stage3_L2" + top: "Mconv5_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage3_L2" + type: "ReLU" + bottom: "Mconv5_stage3_L2" + top: "Mconv5_stage3_L2" +} +layer { + name: "Mconv6_stage3_L1" + type: "Convolution" + bottom: "Mconv5_stage3_L1" + top: "Mconv6_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage3_L1" + type: "ReLU" + bottom: "Mconv6_stage3_L1" + top: "Mconv6_stage3_L1" +} +layer { + name: "Mconv6_stage3_L2" + type: "Convolution" + bottom: "Mconv5_stage3_L2" + top: "Mconv6_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage3_L2" + type: "ReLU" + bottom: "Mconv6_stage3_L2" + top: "Mconv6_stage3_L2" +} +layer { + name: "Mconv7_stage3_L1" + type: "Convolution" + bottom: "Mconv6_stage3_L1" + top: "Mconv7_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 38 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mconv7_stage3_L2" + type: "Convolution" + bottom: "Mconv6_stage3_L2" + top: "Mconv7_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 19 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage4" + type: "Concat" + bottom: "Mconv7_stage3_L1" + bottom: "Mconv7_stage3_L2" + bottom: "conv4_4_CPM" + top: "concat_stage4" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage4_L1" + type: "Convolution" + bottom: "concat_stage4" + top: "Mconv1_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage4_L1" + type: "ReLU" + bottom: "Mconv1_stage4_L1" + top: "Mconv1_stage4_L1" +} +layer { + name: "Mconv1_stage4_L2" + type: "Convolution" + bottom: "concat_stage4" + top: "Mconv1_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage4_L2" + type: "ReLU" + bottom: "Mconv1_stage4_L2" + top: "Mconv1_stage4_L2" +} +layer { + name: "Mconv2_stage4_L1" + type: "Convolution" + bottom: "Mconv1_stage4_L1" + top: "Mconv2_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage4_L1" + type: "ReLU" + bottom: "Mconv2_stage4_L1" + top: "Mconv2_stage4_L1" +} +layer { + name: "Mconv2_stage4_L2" + type: "Convolution" + bottom: "Mconv1_stage4_L2" + top: "Mconv2_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage4_L2" + type: "ReLU" + bottom: "Mconv2_stage4_L2" + top: "Mconv2_stage4_L2" +} +layer { + name: "Mconv3_stage4_L1" + type: "Convolution" + bottom: "Mconv2_stage4_L1" + top: "Mconv3_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage4_L1" + type: "ReLU" + bottom: "Mconv3_stage4_L1" + top: "Mconv3_stage4_L1" +} +layer { + name: "Mconv3_stage4_L2" + type: "Convolution" + bottom: "Mconv2_stage4_L2" + top: "Mconv3_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage4_L2" + type: "ReLU" + bottom: "Mconv3_stage4_L2" + top: "Mconv3_stage4_L2" +} +layer { + name: "Mconv4_stage4_L1" + type: "Convolution" + bottom: "Mconv3_stage4_L1" + top: "Mconv4_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage4_L1" + type: "ReLU" + bottom: "Mconv4_stage4_L1" + top: "Mconv4_stage4_L1" +} +layer { + name: "Mconv4_stage4_L2" + type: "Convolution" + bottom: "Mconv3_stage4_L2" + top: "Mconv4_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage4_L2" + type: "ReLU" + bottom: "Mconv4_stage4_L2" + top: "Mconv4_stage4_L2" +} +layer { + name: "Mconv5_stage4_L1" + type: "Convolution" + bottom: "Mconv4_stage4_L1" + top: "Mconv5_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage4_L1" + type: "ReLU" + bottom: "Mconv5_stage4_L1" + top: "Mconv5_stage4_L1" +} +layer { + name: "Mconv5_stage4_L2" + type: "Convolution" + bottom: "Mconv4_stage4_L2" + top: "Mconv5_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage4_L2" + type: "ReLU" + bottom: "Mconv5_stage4_L2" + top: "Mconv5_stage4_L2" +} +layer { + name: "Mconv6_stage4_L1" + type: "Convolution" + bottom: "Mconv5_stage4_L1" + top: "Mconv6_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage4_L1" + type: "ReLU" + bottom: "Mconv6_stage4_L1" + top: "Mconv6_stage4_L1" +} +layer { + name: "Mconv6_stage4_L2" + type: "Convolution" + bottom: "Mconv5_stage4_L2" + top: "Mconv6_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage4_L2" + type: "ReLU" + bottom: "Mconv6_stage4_L2" + top: "Mconv6_stage4_L2" +} +layer { + name: "Mconv7_stage4_L1" + type: "Convolution" + bottom: "Mconv6_stage4_L1" + top: "Mconv7_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 38 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mconv7_stage4_L2" + type: "Convolution" + bottom: "Mconv6_stage4_L2" + top: "Mconv7_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 19 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage5" + type: "Concat" + bottom: "Mconv7_stage4_L1" + bottom: "Mconv7_stage4_L2" + bottom: "conv4_4_CPM" + top: "concat_stage5" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage5_L1" + type: "Convolution" + bottom: "concat_stage5" + top: "Mconv1_stage5_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage5_L1" + type: "ReLU" + bottom: "Mconv1_stage5_L1" + top: "Mconv1_stage5_L1" +} +layer { + name: "Mconv1_stage5_L2" + type: "Convolution" + bottom: "concat_stage5" + top: "Mconv1_stage5_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage5_L2" + type: "ReLU" + bottom: "Mconv1_stage5_L2" + top: "Mconv1_stage5_L2" +} +layer { + name: "Mconv2_stage5_L1" + type: "Convolution" + bottom: "Mconv1_stage5_L1" + top: "Mconv2_stage5_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage5_L1" + type: "ReLU" + bottom: "Mconv2_stage5_L1" + top: "Mconv2_stage5_L1" +} +layer { + name: "Mconv2_stage5_L2" + type: "Convolution" + bottom: "Mconv1_stage5_L2" + top: "Mconv2_stage5_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage5_L2" + type: "ReLU" + bottom: "Mconv2_stage5_L2" + top: "Mconv2_stage5_L2" +} +layer { + name: "Mconv3_stage5_L1" + type: "Convolution" + bottom: "Mconv2_stage5_L1" + top: "Mconv3_stage5_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage5_L1" + type: "ReLU" + bottom: "Mconv3_stage5_L1" + top: "Mconv3_stage5_L1" +} +layer { + name: "Mconv3_stage5_L2" + type: "Convolution" + bottom: "Mconv2_stage5_L2" + top: "Mconv3_stage5_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage5_L2" + type: "ReLU" + bottom: "Mconv3_stage5_L2" + top: "Mconv3_stage5_L2" +} +layer { + name: "Mconv4_stage5_L1" + type: "Convolution" + bottom: "Mconv3_stage5_L1" + top: "Mconv4_stage5_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage5_L1" + type: "ReLU" + bottom: "Mconv4_stage5_L1" + top: "Mconv4_stage5_L1" +} +layer { + name: "Mconv4_stage5_L2" + type: "Convolution" + bottom: "Mconv3_stage5_L2" + top: "Mconv4_stage5_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage5_L2" + type: "ReLU" + bottom: "Mconv4_stage5_L2" + top: "Mconv4_stage5_L2" +} +layer { + name: "Mconv5_stage5_L1" + type: "Convolution" + bottom: "Mconv4_stage5_L1" + top: "Mconv5_stage5_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage5_L1" + type: "ReLU" + bottom: "Mconv5_stage5_L1" + top: "Mconv5_stage5_L1" +} +layer { + name: "Mconv5_stage5_L2" + type: "Convolution" + bottom: "Mconv4_stage5_L2" + top: "Mconv5_stage5_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage5_L2" + type: "ReLU" + bottom: "Mconv5_stage5_L2" + top: "Mconv5_stage5_L2" +} +layer { + name: "Mconv6_stage5_L1" + type: "Convolution" + bottom: "Mconv5_stage5_L1" + top: "Mconv6_stage5_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage5_L1" + type: "ReLU" + bottom: "Mconv6_stage5_L1" + top: "Mconv6_stage5_L1" +} +layer { + name: "Mconv6_stage5_L2" + type: "Convolution" + bottom: "Mconv5_stage5_L2" + top: "Mconv6_stage5_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage5_L2" + type: "ReLU" + bottom: "Mconv6_stage5_L2" + top: "Mconv6_stage5_L2" +} +layer { + name: "Mconv7_stage5_L1" + type: "Convolution" + bottom: "Mconv6_stage5_L1" + top: "Mconv7_stage5_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 38 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mconv7_stage5_L2" + type: "Convolution" + bottom: "Mconv6_stage5_L2" + top: "Mconv7_stage5_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 19 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage6" + type: "Concat" + bottom: "Mconv7_stage5_L1" + bottom: "Mconv7_stage5_L2" + bottom: "conv4_4_CPM" + top: "concat_stage6" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage6_L1" + type: "Convolution" + bottom: "concat_stage6" + top: "Mconv1_stage6_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage6_L1" + type: "ReLU" + bottom: "Mconv1_stage6_L1" + top: "Mconv1_stage6_L1" +} +layer { + name: "Mconv1_stage6_L2" + type: "Convolution" + bottom: "concat_stage6" + top: "Mconv1_stage6_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage6_L2" + type: "ReLU" + bottom: "Mconv1_stage6_L2" + top: "Mconv1_stage6_L2" +} +layer { + name: "Mconv2_stage6_L1" + type: "Convolution" + bottom: "Mconv1_stage6_L1" + top: "Mconv2_stage6_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage6_L1" + type: "ReLU" + bottom: "Mconv2_stage6_L1" + top: "Mconv2_stage6_L1" +} +layer { + name: "Mconv2_stage6_L2" + type: "Convolution" + bottom: "Mconv1_stage6_L2" + top: "Mconv2_stage6_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage6_L2" + type: "ReLU" + bottom: "Mconv2_stage6_L2" + top: "Mconv2_stage6_L2" +} +layer { + name: "Mconv3_stage6_L1" + type: "Convolution" + bottom: "Mconv2_stage6_L1" + top: "Mconv3_stage6_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage6_L1" + type: "ReLU" + bottom: "Mconv3_stage6_L1" + top: "Mconv3_stage6_L1" +} +layer { + name: "Mconv3_stage6_L2" + type: "Convolution" + bottom: "Mconv2_stage6_L2" + top: "Mconv3_stage6_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage6_L2" + type: "ReLU" + bottom: "Mconv3_stage6_L2" + top: "Mconv3_stage6_L2" +} +layer { + name: "Mconv4_stage6_L1" + type: "Convolution" + bottom: "Mconv3_stage6_L1" + top: "Mconv4_stage6_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage6_L1" + type: "ReLU" + bottom: "Mconv4_stage6_L1" + top: "Mconv4_stage6_L1" +} +layer { + name: "Mconv4_stage6_L2" + type: "Convolution" + bottom: "Mconv3_stage6_L2" + top: "Mconv4_stage6_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage6_L2" + type: "ReLU" + bottom: "Mconv4_stage6_L2" + top: "Mconv4_stage6_L2" +} +layer { + name: "Mconv5_stage6_L1" + type: "Convolution" + bottom: "Mconv4_stage6_L1" + top: "Mconv5_stage6_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage6_L1" + type: "ReLU" + bottom: "Mconv5_stage6_L1" + top: "Mconv5_stage6_L1" +} +layer { + name: "Mconv5_stage6_L2" + type: "Convolution" + bottom: "Mconv4_stage6_L2" + top: "Mconv5_stage6_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage6_L2" + type: "ReLU" + bottom: "Mconv5_stage6_L2" + top: "Mconv5_stage6_L2" +} +layer { + name: "Mconv6_stage6_L1" + type: "Convolution" + bottom: "Mconv5_stage6_L1" + top: "Mconv6_stage6_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage6_L1" + type: "ReLU" + bottom: "Mconv6_stage6_L1" + top: "Mconv6_stage6_L1" +} +layer { + name: "Mconv6_stage6_L2" + type: "Convolution" + bottom: "Mconv5_stage6_L2" + top: "Mconv6_stage6_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage6_L2" + type: "ReLU" + bottom: "Mconv6_stage6_L2" + top: "Mconv6_stage6_L2" +} +layer { + name: "Mconv7_stage6_L1" + type: "Convolution" + bottom: "Mconv6_stage6_L1" + top: "Mconv7_stage6_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 38 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mconv7_stage6_L2" + type: "Convolution" + bottom: "Mconv6_stage6_L2" + top: "Mconv7_stage6_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 19 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage7" + type: "Concat" + bottom: "Mconv7_stage6_L2" + bottom: "Mconv7_stage6_L1" + # top: "concat_stage7" + top: "net_output" + concat_param { + axis: 1 + } +} \ No newline at end of file diff --git a/res/pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt b/res/pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt new file mode 100644 index 0000000..02ec183 --- /dev/null +++ b/res/pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt @@ -0,0 +1,2081 @@ +input: "image" +input_dim: 1 +input_dim: 3 +input_dim: 1 # This value will be defined at runtime +input_dim: 1 # This value will be defined at runtime +layer { + name: "conv1_1" + type: "Convolution" + bottom: "image" + top: "conv1_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu1_1" + type: "ReLU" + bottom: "conv1_1" + top: "conv1_1" +} +layer { + name: "conv1_2" + type: "Convolution" + bottom: "conv1_1" + top: "conv1_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu1_2" + type: "ReLU" + bottom: "conv1_2" + top: "conv1_2" +} +layer { + name: "pool1_stage1" + type: "Pooling" + bottom: "conv1_2" + top: "pool1_stage1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv2_1" + type: "Convolution" + bottom: "pool1_stage1" + top: "conv2_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu2_1" + type: "ReLU" + bottom: "conv2_1" + top: "conv2_1" +} +layer { + name: "conv2_2" + type: "Convolution" + bottom: "conv2_1" + top: "conv2_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu2_2" + type: "ReLU" + bottom: "conv2_2" + top: "conv2_2" +} +layer { + name: "pool2_stage1" + type: "Pooling" + bottom: "conv2_2" + top: "pool2_stage1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv3_1" + type: "Convolution" + bottom: "pool2_stage1" + top: "conv3_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu3_1" + type: "ReLU" + bottom: "conv3_1" + top: "conv3_1" +} +layer { + name: "conv3_2" + type: "Convolution" + bottom: "conv3_1" + top: "conv3_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu3_2" + type: "ReLU" + bottom: "conv3_2" + top: "conv3_2" +} +layer { + name: "conv3_3" + type: "Convolution" + bottom: "conv3_2" + top: "conv3_3" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu3_3" + type: "ReLU" + bottom: "conv3_3" + top: "conv3_3" +} +layer { + name: "conv3_4" + type: "Convolution" + bottom: "conv3_3" + top: "conv3_4" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu3_4" + type: "ReLU" + bottom: "conv3_4" + top: "conv3_4" +} +layer { + name: "pool3_stage1" + type: "Pooling" + bottom: "conv3_4" + top: "pool3_stage1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv4_1" + type: "Convolution" + bottom: "pool3_stage1" + top: "conv4_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu4_1" + type: "ReLU" + bottom: "conv4_1" + top: "conv4_1" +} +layer { + name: "conv4_2" + type: "Convolution" + bottom: "conv4_1" + top: "conv4_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu4_2" + type: "ReLU" + bottom: "conv4_2" + top: "conv4_2" +} +layer { + name: "conv4_3_CPM" + type: "Convolution" + bottom: "conv4_2" + top: "conv4_3_CPM" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu4_3_CPM" + type: "ReLU" + bottom: "conv4_3_CPM" + top: "conv4_3_CPM" +} +layer { + name: "conv4_4_CPM" + type: "Convolution" + bottom: "conv4_3_CPM" + top: "conv4_4_CPM" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu4_4_CPM" + type: "ReLU" + bottom: "conv4_4_CPM" + top: "conv4_4_CPM" +} +layer { + name: "conv5_1_CPM_L1" + type: "Convolution" + bottom: "conv4_4_CPM" + top: "conv5_1_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_1_CPM_L1" + type: "ReLU" + bottom: "conv5_1_CPM_L1" + top: "conv5_1_CPM_L1" +} +layer { + name: "conv5_1_CPM_L2" + type: "Convolution" + bottom: "conv4_4_CPM" + top: "conv5_1_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_1_CPM_L2" + type: "ReLU" + bottom: "conv5_1_CPM_L2" + top: "conv5_1_CPM_L2" +} +layer { + name: "conv5_2_CPM_L1" + type: "Convolution" + bottom: "conv5_1_CPM_L1" + top: "conv5_2_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_2_CPM_L1" + type: "ReLU" + bottom: "conv5_2_CPM_L1" + top: "conv5_2_CPM_L1" +} +layer { + name: "conv5_2_CPM_L2" + type: "Convolution" + bottom: "conv5_1_CPM_L2" + top: "conv5_2_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_2_CPM_L2" + type: "ReLU" + bottom: "conv5_2_CPM_L2" + top: "conv5_2_CPM_L2" +} +layer { + name: "conv5_3_CPM_L1" + type: "Convolution" + bottom: "conv5_2_CPM_L1" + top: "conv5_3_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_3_CPM_L1" + type: "ReLU" + bottom: "conv5_3_CPM_L1" + top: "conv5_3_CPM_L1" +} +layer { + name: "conv5_3_CPM_L2" + type: "Convolution" + bottom: "conv5_2_CPM_L2" + top: "conv5_3_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_3_CPM_L2" + type: "ReLU" + bottom: "conv5_3_CPM_L2" + top: "conv5_3_CPM_L2" +} +layer { + name: "conv5_4_CPM_L1" + type: "Convolution" + bottom: "conv5_3_CPM_L1" + top: "conv5_4_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_4_CPM_L1" + type: "ReLU" + bottom: "conv5_4_CPM_L1" + top: "conv5_4_CPM_L1" +} +layer { + name: "conv5_4_CPM_L2" + type: "Convolution" + bottom: "conv5_3_CPM_L2" + top: "conv5_4_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_4_CPM_L2" + type: "ReLU" + bottom: "conv5_4_CPM_L2" + top: "conv5_4_CPM_L2" +} +layer { + name: "conv5_5_CPM_L1" + type: "Convolution" + bottom: "conv5_4_CPM_L1" + top: "conv5_5_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 28 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "conv5_5_CPM_L2" + type: "Convolution" + bottom: "conv5_4_CPM_L2" + top: "conv5_5_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage2" + type: "Concat" + bottom: "conv5_5_CPM_L1" + bottom: "conv5_5_CPM_L2" + bottom: "conv4_4_CPM" + top: "concat_stage2" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage2_L1" + type: "Convolution" + bottom: "concat_stage2" + top: "Mconv1_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage2_L1" + type: "ReLU" + bottom: "Mconv1_stage2_L1" + top: "Mconv1_stage2_L1" +} +layer { + name: "Mconv1_stage2_L2" + type: "Convolution" + bottom: "concat_stage2" + top: "Mconv1_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage2_L2" + type: "ReLU" + bottom: "Mconv1_stage2_L2" + top: "Mconv1_stage2_L2" +} +layer { + name: "Mconv2_stage2_L1" + type: "Convolution" + bottom: "Mconv1_stage2_L1" + top: "Mconv2_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage2_L1" + type: "ReLU" + bottom: "Mconv2_stage2_L1" + top: "Mconv2_stage2_L1" +} +layer { + name: "Mconv2_stage2_L2" + type: "Convolution" + bottom: "Mconv1_stage2_L2" + top: "Mconv2_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage2_L2" + type: "ReLU" + bottom: "Mconv2_stage2_L2" + top: "Mconv2_stage2_L2" +} +layer { + name: "Mconv3_stage2_L1" + type: "Convolution" + bottom: "Mconv2_stage2_L1" + top: "Mconv3_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage2_L1" + type: "ReLU" + bottom: "Mconv3_stage2_L1" + top: "Mconv3_stage2_L1" +} +layer { + name: "Mconv3_stage2_L2" + type: "Convolution" + bottom: "Mconv2_stage2_L2" + top: "Mconv3_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage2_L2" + type: "ReLU" + bottom: "Mconv3_stage2_L2" + top: "Mconv3_stage2_L2" +} +layer { + name: "Mconv4_stage2_L1" + type: "Convolution" + bottom: "Mconv3_stage2_L1" + top: "Mconv4_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage2_L1" + type: "ReLU" + bottom: "Mconv4_stage2_L1" + top: "Mconv4_stage2_L1" +} +layer { + name: "Mconv4_stage2_L2" + type: "Convolution" + bottom: "Mconv3_stage2_L2" + top: "Mconv4_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage2_L2" + type: "ReLU" + bottom: "Mconv4_stage2_L2" + top: "Mconv4_stage2_L2" +} +layer { + name: "Mconv5_stage2_L1" + type: "Convolution" + bottom: "Mconv4_stage2_L1" + top: "Mconv5_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage2_L1" + type: "ReLU" + bottom: "Mconv5_stage2_L1" + top: "Mconv5_stage2_L1" +} +layer { + name: "Mconv5_stage2_L2" + type: "Convolution" + bottom: "Mconv4_stage2_L2" + top: "Mconv5_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage2_L2" + type: "ReLU" + bottom: "Mconv5_stage2_L2" + top: "Mconv5_stage2_L2" +} +layer { + name: "Mconv6_stage2_L1" + type: "Convolution" + bottom: "Mconv5_stage2_L1" + top: "Mconv6_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage2_L1" + type: "ReLU" + bottom: "Mconv6_stage2_L1" + top: "Mconv6_stage2_L1" +} +layer { + name: "Mconv6_stage2_L2" + type: "Convolution" + bottom: "Mconv5_stage2_L2" + top: "Mconv6_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage2_L2" + type: "ReLU" + bottom: "Mconv6_stage2_L2" + top: "Mconv6_stage2_L2" +} +layer { + name: "Mconv7_stage2_L1" + type: "Convolution" + bottom: "Mconv6_stage2_L1" + top: "Mconv7_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 28 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mconv7_stage2_L2" + type: "Convolution" + bottom: "Mconv6_stage2_L2" + top: "Mconv7_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage3" + type: "Concat" + bottom: "Mconv7_stage2_L1" + bottom: "Mconv7_stage2_L2" + bottom: "conv4_4_CPM" + top: "concat_stage3" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage3_L1" + type: "Convolution" + bottom: "concat_stage3" + top: "Mconv1_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage3_L1" + type: "ReLU" + bottom: "Mconv1_stage3_L1" + top: "Mconv1_stage3_L1" +} +layer { + name: "Mconv1_stage3_L2" + type: "Convolution" + bottom: "concat_stage3" + top: "Mconv1_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage3_L2" + type: "ReLU" + bottom: "Mconv1_stage3_L2" + top: "Mconv1_stage3_L2" +} +layer { + name: "Mconv2_stage3_L1" + type: "Convolution" + bottom: "Mconv1_stage3_L1" + top: "Mconv2_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage3_L1" + type: "ReLU" + bottom: "Mconv2_stage3_L1" + top: "Mconv2_stage3_L1" +} +layer { + name: "Mconv2_stage3_L2" + type: "Convolution" + bottom: "Mconv1_stage3_L2" + top: "Mconv2_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage3_L2" + type: "ReLU" + bottom: "Mconv2_stage3_L2" + top: "Mconv2_stage3_L2" +} +layer { + name: "Mconv3_stage3_L1" + type: "Convolution" + bottom: "Mconv2_stage3_L1" + top: "Mconv3_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage3_L1" + type: "ReLU" + bottom: "Mconv3_stage3_L1" + top: "Mconv3_stage3_L1" +} +layer { + name: "Mconv3_stage3_L2" + type: "Convolution" + bottom: "Mconv2_stage3_L2" + top: "Mconv3_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage3_L2" + type: "ReLU" + bottom: "Mconv3_stage3_L2" + top: "Mconv3_stage3_L2" +} +layer { + name: "Mconv4_stage3_L1" + type: "Convolution" + bottom: "Mconv3_stage3_L1" + top: "Mconv4_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage3_L1" + type: "ReLU" + bottom: "Mconv4_stage3_L1" + top: "Mconv4_stage3_L1" +} +layer { + name: "Mconv4_stage3_L2" + type: "Convolution" + bottom: "Mconv3_stage3_L2" + top: "Mconv4_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage3_L2" + type: "ReLU" + bottom: "Mconv4_stage3_L2" + top: "Mconv4_stage3_L2" +} +layer { + name: "Mconv5_stage3_L1" + type: "Convolution" + bottom: "Mconv4_stage3_L1" + top: "Mconv5_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage3_L1" + type: "ReLU" + bottom: "Mconv5_stage3_L1" + top: "Mconv5_stage3_L1" +} +layer { + name: "Mconv5_stage3_L2" + type: "Convolution" + bottom: "Mconv4_stage3_L2" + top: "Mconv5_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage3_L2" + type: "ReLU" + bottom: "Mconv5_stage3_L2" + top: "Mconv5_stage3_L2" +} +layer { + name: "Mconv6_stage3_L1" + type: "Convolution" + bottom: "Mconv5_stage3_L1" + top: "Mconv6_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage3_L1" + type: "ReLU" + bottom: "Mconv6_stage3_L1" + top: "Mconv6_stage3_L1" +} +layer { + name: "Mconv6_stage3_L2" + type: "Convolution" + bottom: "Mconv5_stage3_L2" + top: "Mconv6_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage3_L2" + type: "ReLU" + bottom: "Mconv6_stage3_L2" + top: "Mconv6_stage3_L2" +} +layer { + name: "Mconv7_stage3_L1" + type: "Convolution" + bottom: "Mconv6_stage3_L1" + top: "Mconv7_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 28 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mconv7_stage3_L2" + type: "Convolution" + bottom: "Mconv6_stage3_L2" + top: "Mconv7_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage4" + type: "Concat" + bottom: "Mconv7_stage3_L1" + bottom: "Mconv7_stage3_L2" + bottom: "conv4_4_CPM" + top: "concat_stage4" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage4_L1" + type: "Convolution" + bottom: "concat_stage4" + top: "Mconv1_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage4_L1" + type: "ReLU" + bottom: "Mconv1_stage4_L1" + top: "Mconv1_stage4_L1" +} +layer { + name: "Mconv1_stage4_L2" + type: "Convolution" + bottom: "concat_stage4" + top: "Mconv1_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage4_L2" + type: "ReLU" + bottom: "Mconv1_stage4_L2" + top: "Mconv1_stage4_L2" +} +layer { + name: "Mconv2_stage4_L1" + type: "Convolution" + bottom: "Mconv1_stage4_L1" + top: "Mconv2_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage4_L1" + type: "ReLU" + bottom: "Mconv2_stage4_L1" + top: "Mconv2_stage4_L1" +} +layer { + name: "Mconv2_stage4_L2" + type: "Convolution" + bottom: "Mconv1_stage4_L2" + top: "Mconv2_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage4_L2" + type: "ReLU" + bottom: "Mconv2_stage4_L2" + top: "Mconv2_stage4_L2" +} +layer { + name: "Mconv3_stage4_L1" + type: "Convolution" + bottom: "Mconv2_stage4_L1" + top: "Mconv3_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage4_L1" + type: "ReLU" + bottom: "Mconv3_stage4_L1" + top: "Mconv3_stage4_L1" +} +layer { + name: "Mconv3_stage4_L2" + type: "Convolution" + bottom: "Mconv2_stage4_L2" + top: "Mconv3_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage4_L2" + type: "ReLU" + bottom: "Mconv3_stage4_L2" + top: "Mconv3_stage4_L2" +} +layer { + name: "Mconv4_stage4_L1" + type: "Convolution" + bottom: "Mconv3_stage4_L1" + top: "Mconv4_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage4_L1" + type: "ReLU" + bottom: "Mconv4_stage4_L1" + top: "Mconv4_stage4_L1" +} +layer { + name: "Mconv4_stage4_L2" + type: "Convolution" + bottom: "Mconv3_stage4_L2" + top: "Mconv4_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage4_L2" + type: "ReLU" + bottom: "Mconv4_stage4_L2" + top: "Mconv4_stage4_L2" +} +layer { + name: "Mconv5_stage4_L1" + type: "Convolution" + bottom: "Mconv4_stage4_L1" + top: "Mconv5_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage4_L1" + type: "ReLU" + bottom: "Mconv5_stage4_L1" + top: "Mconv5_stage4_L1" +} +layer { + name: "Mconv5_stage4_L2" + type: "Convolution" + bottom: "Mconv4_stage4_L2" + top: "Mconv5_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage4_L2" + type: "ReLU" + bottom: "Mconv5_stage4_L2" + top: "Mconv5_stage4_L2" +} +layer { + name: "Mconv6_stage4_L1" + type: "Convolution" + bottom: "Mconv5_stage4_L1" + top: "Mconv6_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage4_L1" + type: "ReLU" + bottom: "Mconv6_stage4_L1" + top: "Mconv6_stage4_L1" +} +layer { + name: "Mconv6_stage4_L2" + type: "Convolution" + bottom: "Mconv5_stage4_L2" + top: "Mconv6_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage4_L2" + type: "ReLU" + bottom: "Mconv6_stage4_L2" + top: "Mconv6_stage4_L2" +} +layer { + name: "Mconv7_stage4_L1" + type: "Convolution" + bottom: "Mconv6_stage4_L1" + top: "Mconv7_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 28 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mconv7_stage4_L2" + type: "Convolution" + bottom: "Mconv6_stage4_L2" + top: "Mconv7_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage7" + type: "Concat" + bottom: "Mconv7_stage4_L2" + bottom: "Mconv7_stage4_L1" + top: "net_output" + concat_param { + axis: 1 + } +} diff --git a/src/computervision/ObjectDetection.cpp b/src/computervision/ObjectDetection.cpp index 62236d2..381b3cf 100644 --- a/src/computervision/ObjectDetection.cpp +++ b/src/computervision/ObjectDetection.cpp @@ -11,7 +11,7 @@ namespace computervision { - cv::VideoCapture cap(0); + cv::VideoCapture cap(1); cv::Mat img, imgGray, img2, img2Gray, img3, img4; @@ -59,10 +59,10 @@ namespace computervision putText(cameraFrame,hand_text, Point(10, 75), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 255),3); imshow("camera", cameraFrame); - /* imshow("output", frameOut); + imshow("output", frameOut); imshow("foreground", foreground); imshow("handMask", handMask); - imshow("handDetection", fingerCountDebug);*/ + imshow("handDetection", fingerCountDebug); int key = waitKey(1); diff --git a/src/computervision/OpenPoseImage.cpp b/src/computervision/OpenPoseImage.cpp new file mode 100644 index 0000000..9e6578a --- /dev/null +++ b/src/computervision/OpenPoseImage.cpp @@ -0,0 +1,149 @@ +#include "OpenPoseImage.h" + +using namespace std; +using namespace cv; +using namespace cv::dnn; + +namespace computervision +{ +#define MPI + + +#ifdef MPI + const int POSE_PAIRS[14][2] = + { + {0,1}, {1,2}, {2,3}, + {3,4}, {1,5}, {5,6}, + {6,7}, {1,14}, {14,8}, {8,9}, + {9,10}, {14,11}, {11,12}, {12,13} + }; + + string protoFile = "res/pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt"; + string weightsFile = "res/pose/mpi/pose_iter_160000.caffemodel"; + + int nPoints = 15; +#endif + +#ifdef COCO + const int POSE_PAIRS[17][2] = + { + {1,2}, {1,5}, {2,3}, + {3,4}, {5,6}, {6,7}, + {1,8}, {8,9}, {9,10}, + {1,11}, {11,12}, {12,13}, + {1,0}, {0,14}, + {14,16}, {0,15}, {15,17} + }; + + string protoFile = "pose/coco/pose_deploy_linevec.prototxt"; + string weightsFile = "pose/coco/pose_iter_440000.caffemodel"; + + int nPoints = 18; +#endif + + void OpenPoseImage::movementSkeleton(Mat inputImage) { + + + cout << "USAGE : ./OpenPose " << endl; + cout << "USAGE : ./OpenPose " << endl; + + string device = "cpu"; + + //string imageFile = "single.jpeg"; + // Take arguments from commmand line + /* if (argc == 2) + { + if ((string)argv[1] == "gpu") + device = "gpu"; + else + imageFile = argv[1]; + } + else if (argc == 3) + { + imageFile = argv[1]; + if ((string)argv[2] == "gpu") + device = "gpu"; + }*/ + + + + int inWidth = 368; + int inHeight = 368; + float thresh = 0.1; + + Mat frame = inputImage; + Mat frameCopy = frame.clone(); + int frameWidth = frame.cols; + int frameHeight = frame.rows; + + double t = (double)cv::getTickCount(); + Net net = readNetFromCaffe(protoFile, weightsFile); + + if (device == "cpu") + { + cout << "Using CPU device" << endl; + net.setPreferableBackend(DNN_TARGET_CPU); + } + else if (device == "gpu") + { + cout << "Using GPU device" << endl; + net.setPreferableBackend(DNN_BACKEND_CUDA); + net.setPreferableTarget(DNN_TARGET_CUDA); + } + + Mat inpBlob = blobFromImage(frame, 1.0 / 255, Size(inWidth, inHeight), Scalar(0, 0, 0), false, false); + + net.setInput(inpBlob); + + Mat output = net.forward(); + + int H = output.size[2]; + int W = output.size[3]; + + // find the position of the body parts + vector points(nPoints); + for (int n = 0; n < nPoints; n++) + { + // Probability map of corresponding body's part. + Mat probMap(H, W, CV_32F, output.ptr(0, n)); + + Point2f p(-1, -1); + Point maxLoc; + double prob; + minMaxLoc(probMap, 0, &prob, 0, &maxLoc); + if (prob > thresh) + { + p = maxLoc; + p.x *= (float)frameWidth / W; + p.y *= (float)frameHeight / H; + + circle(frameCopy, cv::Point((int)p.x, (int)p.y), 8, Scalar(0, 255, 255), -1); + cv::putText(frameCopy, cv::format("%d", n), cv::Point((int)p.x, (int)p.y), cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 0, 255), 2); + + } + points[n] = p; + } + + int nPairs = sizeof(POSE_PAIRS) / sizeof(POSE_PAIRS[0]); + + for (int n = 0; n < nPairs; n++) + { + // lookup 2 connected body/hand parts + Point2f partA = points[POSE_PAIRS[n][0]]; + Point2f partB = points[POSE_PAIRS[n][1]]; + + if (partA.x <= 0 || partA.y <= 0 || partB.x <= 0 || partB.y <= 0) + continue; + + line(frame, partA, partB, Scalar(0, 255, 255), 8); + circle(frame, partA, 8, Scalar(0, 0, 255), -1); + circle(frame, partB, 8, Scalar(0, 0, 255), -1); + } + + t = ((double)cv::getTickCount() - t) / cv::getTickFrequency(); + cout << "Time Taken = " << t << endl; + imshow("Output-Keypoints", frameCopy); + imshow("Output-Skeleton", frame); + imwrite("Output-Skeleton.jpg", frame); + } +} \ No newline at end of file diff --git a/src/computervision/OpenPoseVideo.cpp b/src/computervision/OpenPoseVideo.cpp new file mode 100644 index 0000000..882c728 --- /dev/null +++ b/src/computervision/OpenPoseVideo.cpp @@ -0,0 +1,144 @@ +#include "OpenPoseVideo.h" + +using namespace std; +using namespace cv; +using namespace cv::dnn; + +namespace computervision +{ +#define MPI + +#ifdef MPI + const int POSE_PAIRS[7][2] = + { + {0,1}, {1,2}, {2,3}, + {3,4}, {1,5}, {5,6}, + {6,7} + }; + + string protoFile = "res/pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt"; + string weightsFile = "res/pose/mpi/pose_iter_160000.caffemodel"; + + int nPoints = 8; +#endif + +#ifdef COCO + const int POSE_PAIRS[17][2] = + { + {1,2}, {1,5}, {2,3}, + {3,4}, {5,6}, {6,7}, + {1,8}, {8,9}, {9,10}, + {1,11}, {11,12}, {12,13}, + {1,0}, {0,14}, + {14,16}, {0,15}, {15,17} + }; + + string protoFile = "pose/coco/pose_deploy_linevec.prototxt"; + string weightsFile = "pose/coco/pose_iter_440000.caffemodel"; + + int nPoints = 18; +#endif + Net net; + + void OpenPoseVideo::setup() { + net = readNetFromCaffe(protoFile, weightsFile); + } + + void OpenPoseVideo::movementSkeleton(Mat inputImage) { + + //string device = "cpu"; + //string videoFile = "sample_video.mp4"; + + // Take arguments from commmand line + /*if (argc == 2) + { + if ((string)argv[1] == "gpu") + device = "gpu"; + else + videoFile = argv[1]; + } + else if (argc == 3) + { + videoFile = argv[1]; + if ((string)argv[2] == "gpu") + device = "gpu"; + }*/ + + int inWidth = 368; + int inHeight = 368; + float thresh = 0.01; + + Mat frame, frameCopy; + int frameWidth = inputImage.size().width; + int frameHeight = inputImage.size().height; + + /*if (device == "cpu") + { + cout << "Using CPU device" << endl; + net.setPreferableBackend(DNN_TARGET_CPU); + } + else if (device == "gpu") + { + cout << "Using GPU device" << endl; + net.setPreferableBackend(DNN_BACKEND_CUDA); + net.setPreferableTarget(DNN_TARGET_CUDA); + }*/ + + double t = (double)cv::getTickCount(); + + frame = inputImage; + frameCopy = frame.clone(); + Mat inpBlob = blobFromImage(frame, 1.0 / 255, Size(inWidth, inHeight), Scalar(0, 0, 0), false, false); + + net.setInput(inpBlob); + + Mat output = net.forward(); + + int H = output.size[2]; + int W = output.size[3]; + + // find the position of the body parts + vector points(nPoints); + for (int n = 0; n < nPoints; n++) + { + // Probability map of corresponding body's part. + Mat probMap(H, W, CV_32F, output.ptr(0, n)); + + Point2f p(-1, -1); + Point maxLoc; + double prob; + minMaxLoc(probMap, 0, &prob, 0, &maxLoc); + if (prob > thresh) + { + p = maxLoc; + p.x *= (float)frameWidth / W; + p.y *= (float)frameHeight / H; + + circle(frameCopy, cv::Point((int)p.x, (int)p.y), 8, Scalar(0, 255, 255), -1); + cv::putText(frameCopy, cv::format("%d", n), cv::Point((int)p.x, (int)p.y), cv::FONT_HERSHEY_COMPLEX, 1.1, cv::Scalar(0, 0, 255), 2); + } + points[n] = p; + } + + int nPairs = sizeof(POSE_PAIRS) / sizeof(POSE_PAIRS[0]); + + for (int n = 0; n < nPairs; n++) + { + // lookup 2 connected body/hand parts + Point2f partA = points[POSE_PAIRS[n][0]]; + Point2f partB = points[POSE_PAIRS[n][1]]; + + if (partA.x <= 0 || partA.y <= 0 || partB.x <= 0 || partB.y <= 0) + continue; + + line(frame, partA, partB, Scalar(0, 255, 255), 8); + circle(frame, partA, 8, Scalar(0, 0, 255), -1); + circle(frame, partB, 8, Scalar(0, 0, 255), -1); + } + + t = ((double)cv::getTickCount() - t) / cv::getTickFrequency(); + cv::putText(frame, cv::format("time taken = %.2f sec", t), cv::Point(50, 50), cv::FONT_HERSHEY_COMPLEX, .8, cv::Scalar(255, 50, 0), 2); + // imshow("Output-Keypoints", frameCopy); + imshow("Output-Skeleton", frame); + } +} \ No newline at end of file diff --git a/src/computervision/OpenPoseVideo.h b/src/computervision/OpenPoseVideo.h new file mode 100644 index 0000000..eaef736 --- /dev/null +++ b/src/computervision/OpenPoseVideo.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include +#include +#include + +using namespace cv; + +namespace computervision +{ + class OpenPoseVideo{ + private: + + public: + void movementSkeleton(Mat inputImage); + void setup(); + }; +} diff --git a/src/main.cpp b/src/main.cpp index 7bab3f9..1e78430 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -18,6 +18,8 @@ #include "toolbox/toolbox.h" #include "computervision/ObjectDetection.h" +//#include "computervision/OpenPoseImage.h" +#include "computervision/OpenPoseVideo.h" #pragma comment(lib, "glfw3.lib") #pragma comment(lib, "glew32s.lib") @@ -64,12 +66,17 @@ int main(void) // create object detection object instance computervision::ObjectDetection objDetect; + //computervision::OpenPoseImage openPoseImage; + computervision::OpenPoseVideo openPoseVideo; // set up object detection //objDetect.setup(); cv::Mat cameraFrame; + + openPoseVideo.setup(); + // Main game loop while (!glfwWindowShouldClose(window)) { @@ -87,8 +94,8 @@ int main(void) render_engine::renderer::Render(entity, shader); cameraFrame = objDetect.readCamera(); - objDetect.detectHand(cameraFrame); - + //objDetect.detectHand(cameraFrame); + openPoseVideo.movementSkeleton(cameraFrame); // Finish up shader.Stop(); From a68c6a57bf65754881f6824e6ffc8f23de7d845a Mon Sep 17 00:00:00 2001 From: Jasper Date: Fri, 28 May 2021 12:32:10 +0200 Subject: [PATCH 02/24] [EDIT] edited file --- src/computervision/OpenPoseImage.cpp | 149 --------------------------- wk2_fps.vcxproj | 2 + wk2_fps.vcxproj.filters | 6 ++ 3 files changed, 8 insertions(+), 149 deletions(-) delete mode 100644 src/computervision/OpenPoseImage.cpp diff --git a/src/computervision/OpenPoseImage.cpp b/src/computervision/OpenPoseImage.cpp deleted file mode 100644 index 9e6578a..0000000 --- a/src/computervision/OpenPoseImage.cpp +++ /dev/null @@ -1,149 +0,0 @@ -#include "OpenPoseImage.h" - -using namespace std; -using namespace cv; -using namespace cv::dnn; - -namespace computervision -{ -#define MPI - - -#ifdef MPI - const int POSE_PAIRS[14][2] = - { - {0,1}, {1,2}, {2,3}, - {3,4}, {1,5}, {5,6}, - {6,7}, {1,14}, {14,8}, {8,9}, - {9,10}, {14,11}, {11,12}, {12,13} - }; - - string protoFile = "res/pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt"; - string weightsFile = "res/pose/mpi/pose_iter_160000.caffemodel"; - - int nPoints = 15; -#endif - -#ifdef COCO - const int POSE_PAIRS[17][2] = - { - {1,2}, {1,5}, {2,3}, - {3,4}, {5,6}, {6,7}, - {1,8}, {8,9}, {9,10}, - {1,11}, {11,12}, {12,13}, - {1,0}, {0,14}, - {14,16}, {0,15}, {15,17} - }; - - string protoFile = "pose/coco/pose_deploy_linevec.prototxt"; - string weightsFile = "pose/coco/pose_iter_440000.caffemodel"; - - int nPoints = 18; -#endif - - void OpenPoseImage::movementSkeleton(Mat inputImage) { - - - cout << "USAGE : ./OpenPose " << endl; - cout << "USAGE : ./OpenPose " << endl; - - string device = "cpu"; - - //string imageFile = "single.jpeg"; - // Take arguments from commmand line - /* if (argc == 2) - { - if ((string)argv[1] == "gpu") - device = "gpu"; - else - imageFile = argv[1]; - } - else if (argc == 3) - { - imageFile = argv[1]; - if ((string)argv[2] == "gpu") - device = "gpu"; - }*/ - - - - int inWidth = 368; - int inHeight = 368; - float thresh = 0.1; - - Mat frame = inputImage; - Mat frameCopy = frame.clone(); - int frameWidth = frame.cols; - int frameHeight = frame.rows; - - double t = (double)cv::getTickCount(); - Net net = readNetFromCaffe(protoFile, weightsFile); - - if (device == "cpu") - { - cout << "Using CPU device" << endl; - net.setPreferableBackend(DNN_TARGET_CPU); - } - else if (device == "gpu") - { - cout << "Using GPU device" << endl; - net.setPreferableBackend(DNN_BACKEND_CUDA); - net.setPreferableTarget(DNN_TARGET_CUDA); - } - - Mat inpBlob = blobFromImage(frame, 1.0 / 255, Size(inWidth, inHeight), Scalar(0, 0, 0), false, false); - - net.setInput(inpBlob); - - Mat output = net.forward(); - - int H = output.size[2]; - int W = output.size[3]; - - // find the position of the body parts - vector points(nPoints); - for (int n = 0; n < nPoints; n++) - { - // Probability map of corresponding body's part. - Mat probMap(H, W, CV_32F, output.ptr(0, n)); - - Point2f p(-1, -1); - Point maxLoc; - double prob; - minMaxLoc(probMap, 0, &prob, 0, &maxLoc); - if (prob > thresh) - { - p = maxLoc; - p.x *= (float)frameWidth / W; - p.y *= (float)frameHeight / H; - - circle(frameCopy, cv::Point((int)p.x, (int)p.y), 8, Scalar(0, 255, 255), -1); - cv::putText(frameCopy, cv::format("%d", n), cv::Point((int)p.x, (int)p.y), cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 0, 255), 2); - - } - points[n] = p; - } - - int nPairs = sizeof(POSE_PAIRS) / sizeof(POSE_PAIRS[0]); - - for (int n = 0; n < nPairs; n++) - { - // lookup 2 connected body/hand parts - Point2f partA = points[POSE_PAIRS[n][0]]; - Point2f partB = points[POSE_PAIRS[n][1]]; - - if (partA.x <= 0 || partA.y <= 0 || partB.x <= 0 || partB.y <= 0) - continue; - - line(frame, partA, partB, Scalar(0, 255, 255), 8); - circle(frame, partA, 8, Scalar(0, 0, 255), -1); - circle(frame, partB, 8, Scalar(0, 0, 255), -1); - } - - t = ((double)cv::getTickCount() - t) / cv::getTickFrequency(); - cout << "Time Taken = " << t << endl; - imshow("Output-Keypoints", frameCopy); - imshow("Output-Skeleton", frame); - imwrite("Output-Skeleton.jpg", frame); - } -} \ No newline at end of file diff --git a/wk2_fps.vcxproj b/wk2_fps.vcxproj index fe5e71a..62e66d6 100644 --- a/wk2_fps.vcxproj +++ b/wk2_fps.vcxproj @@ -21,6 +21,7 @@ + @@ -38,6 +39,7 @@ + diff --git a/wk2_fps.vcxproj.filters b/wk2_fps.vcxproj.filters index 54a06e5..9e83634 100644 --- a/wk2_fps.vcxproj.filters +++ b/wk2_fps.vcxproj.filters @@ -57,6 +57,9 @@ Source Files + + Source Files + @@ -104,6 +107,9 @@ Header Files + + Header Files + From 40529f84b3e504fbb6720ba2bdc972af5addb757 Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Fri, 28 May 2021 15:31:21 +0200 Subject: [PATCH 03/24] [ADD] basis for async arm detection --- src/computervision/ObjectDetection.cpp | 7 +- src/computervision/ObjectDetection.h | 2 + src/computervision/OpenPoseVideo.cpp | 64 ++++--------------- src/computervision/OpenPoseVideo.h | 2 +- .../async/async_arm_detection.cpp | 43 +++++++++++++ .../async/async_arm_detection.h | 22 +++++++ src/main.cpp | 17 ++++- wk2_fps.vcxproj | 7 ++ wk2_fps.vcxproj.filters | 11 ++++ 9 files changed, 119 insertions(+), 56 deletions(-) create mode 100644 src/computervision/async/async_arm_detection.cpp create mode 100644 src/computervision/async/async_arm_detection.h diff --git a/src/computervision/ObjectDetection.cpp b/src/computervision/ObjectDetection.cpp index 381b3cf..a975c51 100644 --- a/src/computervision/ObjectDetection.cpp +++ b/src/computervision/ObjectDetection.cpp @@ -11,7 +11,7 @@ namespace computervision { - cv::VideoCapture cap(1); + cv::VideoCapture cap(0); cv::Mat img, imgGray, img2, img2Gray, img3, img4; @@ -33,6 +33,11 @@ namespace computervision return img; } + cv::VideoCapture ObjectDetection::getCap() + { + return cap; + } + bool ObjectDetection::detectHand(Mat cameraFrame) { Mat inputFrame = generateHandMaskSquare(cameraFrame); diff --git a/src/computervision/ObjectDetection.h b/src/computervision/ObjectDetection.h index bddf4ba..14c2c3d 100644 --- a/src/computervision/ObjectDetection.h +++ b/src/computervision/ObjectDetection.h @@ -65,6 +65,8 @@ namespace computervision */ bool drawHandMaskRect(cv::Mat *input); + cv::VideoCapture getCap(); + }; diff --git a/src/computervision/OpenPoseVideo.cpp b/src/computervision/OpenPoseVideo.cpp index 882c728..9cf1cad 100644 --- a/src/computervision/OpenPoseVideo.cpp +++ b/src/computervision/OpenPoseVideo.cpp @@ -44,59 +44,34 @@ namespace computervision net = readNetFromCaffe(protoFile, weightsFile); } - void OpenPoseVideo::movementSkeleton(Mat inputImage) { - - //string device = "cpu"; - //string videoFile = "sample_video.mp4"; - - // Take arguments from commmand line - /*if (argc == 2) - { - if ((string)argv[1] == "gpu") - device = "gpu"; - else - videoFile = argv[1]; - } - else if (argc == 3) - { - videoFile = argv[1]; - if ((string)argv[2] == "gpu") - device = "gpu"; - }*/ + void OpenPoseVideo::movementSkeleton(Mat inputImage, std::function)> f) { + std::cout << "movement skeleton start" << std::endl; int inWidth = 368; int inHeight = 368; float thresh = 0.01; - Mat frame, frameCopy; + Mat frame; int frameWidth = inputImage.size().width; int frameHeight = inputImage.size().height; - /*if (device == "cpu") - { - cout << "Using CPU device" << endl; - net.setPreferableBackend(DNN_TARGET_CPU); - } - else if (device == "gpu") - { - cout << "Using GPU device" << endl; - net.setPreferableBackend(DNN_BACKEND_CUDA); - net.setPreferableTarget(DNN_TARGET_CUDA); - }*/ - double t = (double)cv::getTickCount(); + std::cout << "reading input image and blob" << std::endl; frame = inputImage; - frameCopy = frame.clone(); Mat inpBlob = blobFromImage(frame, 1.0 / 255, Size(inWidth, inHeight), Scalar(0, 0, 0), false, false); + + std::cout << "done reading image and blob" << std::endl; net.setInput(inpBlob); + std::cout << "done setting input to net" << std::endl; Mat output = net.forward(); int H = output.size[2]; int W = output.size[3]; + std::cout << "about to find position of boxy parts" << std::endl; // find the position of the body parts vector points(nPoints); for (int n = 0; n < nPoints; n++) @@ -114,31 +89,16 @@ namespace computervision p.x *= (float)frameWidth / W; p.y *= (float)frameHeight / H; - circle(frameCopy, cv::Point((int)p.x, (int)p.y), 8, Scalar(0, 255, 255), -1); - cv::putText(frameCopy, cv::format("%d", n), cv::Point((int)p.x, (int)p.y), cv::FONT_HERSHEY_COMPLEX, 1.1, cv::Scalar(0, 0, 255), 2); + circle(frame, cv::Point((int)p.x, (int)p.y), 8, Scalar(0, 255, 255), -1); + cv::putText(frame, cv::format("%d", n), cv::Point((int)p.x, (int)p.y), cv::FONT_HERSHEY_COMPLEX, 1.1, cv::Scalar(0, 0, 255), 2); } points[n] = p; } - int nPairs = sizeof(POSE_PAIRS) / sizeof(POSE_PAIRS[0]); - - for (int n = 0; n < nPairs; n++) - { - // lookup 2 connected body/hand parts - Point2f partA = points[POSE_PAIRS[n][0]]; - Point2f partB = points[POSE_PAIRS[n][1]]; - - if (partA.x <= 0 || partA.y <= 0 || partB.x <= 0 || partB.y <= 0) - continue; - - line(frame, partA, partB, Scalar(0, 255, 255), 8); - circle(frame, partA, 8, Scalar(0, 0, 255), -1); - circle(frame, partB, 8, Scalar(0, 0, 255), -1); - } - - t = ((double)cv::getTickCount() - t) / cv::getTickFrequency(); cv::putText(frame, cv::format("time taken = %.2f sec", t), cv::Point(50, 50), cv::FONT_HERSHEY_COMPLEX, .8, cv::Scalar(255, 50, 0), 2); // imshow("Output-Keypoints", frameCopy); imshow("Output-Skeleton", frame); + std::cout << "about to call points receiving method" << std::endl; + f(points); } } \ No newline at end of file diff --git a/src/computervision/OpenPoseVideo.h b/src/computervision/OpenPoseVideo.h index eaef736..8b9d75e 100644 --- a/src/computervision/OpenPoseVideo.h +++ b/src/computervision/OpenPoseVideo.h @@ -13,7 +13,7 @@ namespace computervision private: public: - void movementSkeleton(Mat inputImage); + void movementSkeleton(Mat inputImage,std::function)> f); void setup(); }; } diff --git a/src/computervision/async/async_arm_detection.cpp b/src/computervision/async/async_arm_detection.cpp new file mode 100644 index 0000000..6b27611 --- /dev/null +++ b/src/computervision/async/async_arm_detection.cpp @@ -0,0 +1,43 @@ +#include +#include "async_arm_detection.h" +#include "../OpenPoseVideo.h" +#include + + +namespace computervision +{ + AsyncArmDetection::AsyncArmDetection() + { + + } + + void AsyncArmDetection::run_arm_detection() + { + + } + + void AsyncArmDetection::start(std::function)> points_ready_func, cv::VideoCapture cap, OpenPoseVideo op) + { + + auto lambda = [](std::function)> f, cv::VideoCapture c, OpenPoseVideo op) { + std::cout << "STARTING THREAD LAMBDA" << std::endl; + cv::VideoCapture cap(0); + + if (!cap.isOpened()) + { + std::cout << "error opening video" << std::endl; + return; + } + + while (true) + { + Mat img; + cap.read(img); + op.movementSkeleton(img, f); + } + }; + + std::cout << "starting function" << std::endl; + std::thread async_arm_detect_thread(lambda, points_ready_func, cap, op); + } +} diff --git a/src/computervision/async/async_arm_detection.h b/src/computervision/async/async_arm_detection.h new file mode 100644 index 0000000..6d7572a --- /dev/null +++ b/src/computervision/async/async_arm_detection.h @@ -0,0 +1,22 @@ +#pragma once +#include +#include +#include +#include +#include "../OpenPoseVideo.h" + + +namespace computervision +{ + class AsyncArmDetection + { + public: + AsyncArmDetection(void); + + + void start(std::function)>, cv::VideoCapture cap, computervision::OpenPoseVideo op); + private: + void run_arm_detection(); + }; + +} diff --git a/src/main.cpp b/src/main.cpp index 1e78430..a14dd84 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include #define STB_IMAGE_IMPLEMENTATION #include "stb_image.h" #include @@ -21,6 +23,8 @@ //#include "computervision/OpenPoseImage.h" #include "computervision/OpenPoseVideo.h" +#include "computervision/async/async_arm_detection.h" + #pragma comment(lib, "glfw3.lib") #pragma comment(lib, "glew32s.lib") #pragma comment(lib, "opengl32.lib") @@ -29,6 +33,11 @@ static double UpdateDelta(); static GLFWwindow* window; +void retrieve_points(std::vector arm_points) +{ + std::cout << "got points!!" << std::endl; + std::cout << "points: " << arm_points << std::endl; +} int main(void) { @@ -68,6 +77,7 @@ int main(void) computervision::ObjectDetection objDetect; //computervision::OpenPoseImage openPoseImage; computervision::OpenPoseVideo openPoseVideo; + openPoseVideo.setup(); // set up object detection @@ -75,7 +85,11 @@ int main(void) cv::Mat cameraFrame; - openPoseVideo.setup(); + //openPoseVideo.setup(); + + computervision::AsyncArmDetection as; + as.start(retrieve_points, objDetect.getCap(),openPoseVideo); + // Main game loop while (!glfwWindowShouldClose(window)) @@ -95,7 +109,6 @@ int main(void) cameraFrame = objDetect.readCamera(); //objDetect.detectHand(cameraFrame); - openPoseVideo.movementSkeleton(cameraFrame); // Finish up shader.Stop(); diff --git a/wk2_fps.vcxproj b/wk2_fps.vcxproj index 62e66d6..098b361 100644 --- a/wk2_fps.vcxproj +++ b/wk2_fps.vcxproj @@ -19,6 +19,7 @@ + @@ -36,6 +37,7 @@ + @@ -56,6 +58,11 @@ + + + + + 16.0 {A7ECF1BE-DB22-4BF7-BFF6-E3BF72691EE6} diff --git a/wk2_fps.vcxproj.filters b/wk2_fps.vcxproj.filters index 9e83634..eda7a9e 100644 --- a/wk2_fps.vcxproj.filters +++ b/wk2_fps.vcxproj.filters @@ -60,6 +60,9 @@ Source Files + + Source Files + @@ -110,8 +113,16 @@ Header Files + + Header Files + + + + + + \ No newline at end of file From cc7cb378400f8fa0f4f5c42efd9a7b67995c27e0 Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Wed, 2 Jun 2021 09:44:30 +0200 Subject: [PATCH 04/24] [ADD] caffemodel project entry --- wk2_fps.vcxproj | 1 + wk2_fps.vcxproj.filters | 1 + 2 files changed, 2 insertions(+) diff --git a/wk2_fps.vcxproj b/wk2_fps.vcxproj index 098b361..5f36c6f 100644 --- a/wk2_fps.vcxproj +++ b/wk2_fps.vcxproj @@ -59,6 +59,7 @@ + diff --git a/wk2_fps.vcxproj.filters b/wk2_fps.vcxproj.filters index eda7a9e..0986f9d 100644 --- a/wk2_fps.vcxproj.filters +++ b/wk2_fps.vcxproj.filters @@ -124,5 +124,6 @@ + \ No newline at end of file From 1a149b8b7e649beb310a581ee11466f7b212fee5 Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Wed, 2 Jun 2021 10:05:09 +0200 Subject: [PATCH 05/24] [ADD] static camera instance --- src/computervision/ObjectDetection.cpp | 4 +++- src/computervision/ObjectDetection.h | 1 + src/computervision/async/StaticCameraInstance.h | 12 ++++++++++++ src/computervision/async/async_arm_detection.cpp | 6 +++--- src/computervision/async/async_arm_detection.h | 1 + src/main.cpp | 2 -- wk2_fps.vcxproj | 1 + wk2_fps.vcxproj.filters | 3 +++ 8 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 src/computervision/async/StaticCameraInstance.h diff --git a/src/computervision/ObjectDetection.cpp b/src/computervision/ObjectDetection.cpp index a975c51..dc405ac 100644 --- a/src/computervision/ObjectDetection.cpp +++ b/src/computervision/ObjectDetection.cpp @@ -8,10 +8,10 @@ #include "SkinDetector.h" #include "FaceDetector.h" #include "FingerCount.h" +#include "async/StaticCameraInstance.h" namespace computervision { - cv::VideoCapture cap(0); cv::Mat img, imgGray, img2, img2Gray, img3, img4; @@ -24,6 +24,8 @@ namespace computervision FaceDetector faceDetector; FingerCount fingerCount; + cv::VideoCapture cap = computervision_async::getCap(); + ObjectDetection::ObjectDetection() { } diff --git a/src/computervision/ObjectDetection.h b/src/computervision/ObjectDetection.h index 14c2c3d..45f4c6d 100644 --- a/src/computervision/ObjectDetection.h +++ b/src/computervision/ObjectDetection.h @@ -65,6 +65,7 @@ namespace computervision */ bool drawHandMaskRect(cv::Mat *input); + cv::VideoCapture getCap(); }; diff --git a/src/computervision/async/StaticCameraInstance.h b/src/computervision/async/StaticCameraInstance.h new file mode 100644 index 0000000..625d478 --- /dev/null +++ b/src/computervision/async/StaticCameraInstance.h @@ -0,0 +1,12 @@ +#pragma once +#include + +namespace static_camera +{ + + static cv::VideoCapture getCap() + { + static cv::VideoCapture cap(0); + return cap; + } +}; diff --git a/src/computervision/async/async_arm_detection.cpp b/src/computervision/async/async_arm_detection.cpp index 6b27611..2557e6e 100644 --- a/src/computervision/async/async_arm_detection.cpp +++ b/src/computervision/async/async_arm_detection.cpp @@ -21,12 +21,12 @@ namespace computervision auto lambda = [](std::function)> f, cv::VideoCapture c, OpenPoseVideo op) { std::cout << "STARTING THREAD LAMBDA" << std::endl; - cv::VideoCapture cap(0); + cv::VideoCapture cap = computervision_async::getCap(); if (!cap.isOpened()) { - std::cout << "error opening video" << std::endl; - return; + std::cout << "capture was closed, opening..." << std::endl; + cap.open(0); } while (true) diff --git a/src/computervision/async/async_arm_detection.h b/src/computervision/async/async_arm_detection.h index 6d7572a..a265ce6 100644 --- a/src/computervision/async/async_arm_detection.h +++ b/src/computervision/async/async_arm_detection.h @@ -4,6 +4,7 @@ #include #include #include "../OpenPoseVideo.h" +#include "StaticCameraInstance.h" namespace computervision diff --git a/src/main.cpp b/src/main.cpp index a14dd84..00492e6 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -85,8 +85,6 @@ int main(void) cv::Mat cameraFrame; - //openPoseVideo.setup(); - computervision::AsyncArmDetection as; as.start(retrieve_points, objDetect.getCap(),openPoseVideo); diff --git a/wk2_fps.vcxproj b/wk2_fps.vcxproj index 5f36c6f..ddc8113 100644 --- a/wk2_fps.vcxproj +++ b/wk2_fps.vcxproj @@ -38,6 +38,7 @@ + diff --git a/wk2_fps.vcxproj.filters b/wk2_fps.vcxproj.filters index 0986f9d..a15b786 100644 --- a/wk2_fps.vcxproj.filters +++ b/wk2_fps.vcxproj.filters @@ -116,6 +116,9 @@ Header Files + + Header Files + From ab30c41beed21a0e5929253cf316a93e367638ab Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Wed, 2 Jun 2021 10:41:50 +0200 Subject: [PATCH 06/24] [FIX] crashing with pose detection --- src/computervision/ObjectDetection.cpp | 2 +- .../async/async_arm_detection.cpp | 40 +++++++++---------- .../async/async_arm_detection.h | 2 +- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/computervision/ObjectDetection.cpp b/src/computervision/ObjectDetection.cpp index dc405ac..b305e71 100644 --- a/src/computervision/ObjectDetection.cpp +++ b/src/computervision/ObjectDetection.cpp @@ -24,7 +24,7 @@ namespace computervision FaceDetector faceDetector; FingerCount fingerCount; - cv::VideoCapture cap = computervision_async::getCap(); + cv::VideoCapture cap = static_camera::getCap(); ObjectDetection::ObjectDetection() { diff --git a/src/computervision/async/async_arm_detection.cpp b/src/computervision/async/async_arm_detection.cpp index 2557e6e..cd88542 100644 --- a/src/computervision/async/async_arm_detection.cpp +++ b/src/computervision/async/async_arm_detection.cpp @@ -11,33 +11,33 @@ namespace computervision } - void AsyncArmDetection::run_arm_detection() + void AsyncArmDetection::run_arm_detection(std::function)> points_ready_func, cv::VideoCapture cap, OpenPoseVideo op) { + std::cout << "STARTING THREAD LAMBDA" << std::endl; + /*cv::VideoCapture cap = static_camera::getCap();*/ + if (!cap.isOpened()) + { + std::cout << "capture was closed, opening..." << std::endl; + cap.open(0); + } + + while (true) + { + Mat img; + cap.read(img); + op.movementSkeleton(img, points_ready_func); + } } void AsyncArmDetection::start(std::function)> points_ready_func, cv::VideoCapture cap, OpenPoseVideo op) { - auto lambda = [](std::function)> f, cv::VideoCapture c, OpenPoseVideo op) { - std::cout << "STARTING THREAD LAMBDA" << std::endl; - cv::VideoCapture cap = computervision_async::getCap(); - - if (!cap.isOpened()) - { - std::cout << "capture was closed, opening..." << std::endl; - cap.open(0); - } - - while (true) - { - Mat img; - cap.read(img); - op.movementSkeleton(img, f); - } - }; - std::cout << "starting function" << std::endl; - std::thread async_arm_detect_thread(lambda, points_ready_func, cap, op); + + + std::thread async_arm_detect_thread(&AsyncArmDetection::run_arm_detection,this, points_ready_func, cap, op); + + async_arm_detect_thread.detach(); // makes sure the thread is detached from the variable. } } diff --git a/src/computervision/async/async_arm_detection.h b/src/computervision/async/async_arm_detection.h index a265ce6..c577dc7 100644 --- a/src/computervision/async/async_arm_detection.h +++ b/src/computervision/async/async_arm_detection.h @@ -17,7 +17,7 @@ namespace computervision void start(std::function)>, cv::VideoCapture cap, computervision::OpenPoseVideo op); private: - void run_arm_detection(); + void run_arm_detection(std::function)> points_ready_func, cv::VideoCapture cap, OpenPoseVideo op); }; } From fe94b0f83d5814fdd45f16ebfbe1eb46466bf84b Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Fri, 4 Jun 2021 10:55:53 +0200 Subject: [PATCH 07/24] [FIX] showing of pose detection points --- src/computervision/OpenPoseVideo.cpp | 11 +++++----- src/computervision/OpenPoseVideo.h | 2 +- .../async/async_arm_detection.cpp | 9 +++++--- .../async/async_arm_detection.h | 4 ++-- src/main.cpp | 21 +++++++++++++++---- wk2_fps.vcxproj | 6 +++--- 6 files changed, 35 insertions(+), 18 deletions(-) diff --git a/src/computervision/OpenPoseVideo.cpp b/src/computervision/OpenPoseVideo.cpp index 9cf1cad..dbaf6e5 100644 --- a/src/computervision/OpenPoseVideo.cpp +++ b/src/computervision/OpenPoseVideo.cpp @@ -44,7 +44,7 @@ namespace computervision net = readNetFromCaffe(protoFile, weightsFile); } - void OpenPoseVideo::movementSkeleton(Mat inputImage, std::function)> f) { + void OpenPoseVideo::movementSkeleton(Mat inputImage, std::function,cv::Mat poinst_on_image)> f) { std::cout << "movement skeleton start" << std::endl; int inWidth = 368; @@ -60,7 +60,7 @@ namespace computervision frame = inputImage; Mat inpBlob = blobFromImage(frame, 1.0 / 255, Size(inWidth, inHeight), Scalar(0, 0, 0), false, false); - + std::cout << "done reading image and blob" << std::endl; net.setInput(inpBlob); @@ -96,9 +96,10 @@ namespace computervision } cv::putText(frame, cv::format("time taken = %.2f sec", t), cv::Point(50, 50), cv::FONT_HERSHEY_COMPLEX, .8, cv::Scalar(255, 50, 0), 2); - // imshow("Output-Keypoints", frameCopy); - imshow("Output-Skeleton", frame); + std::cout << "time taken: " << t << std::endl; + //imshow("Output-Keypoints", frame); + //imshow("Output-Skeleton", frame); std::cout << "about to call points receiving method" << std::endl; - f(points); + f(points,frame); } } \ No newline at end of file diff --git a/src/computervision/OpenPoseVideo.h b/src/computervision/OpenPoseVideo.h index 8b9d75e..7f61449 100644 --- a/src/computervision/OpenPoseVideo.h +++ b/src/computervision/OpenPoseVideo.h @@ -13,7 +13,7 @@ namespace computervision private: public: - void movementSkeleton(Mat inputImage,std::function)> f); + void movementSkeleton(Mat inputImage,std::function, cv::Mat poinst_on_image)> f); void setup(); }; } diff --git a/src/computervision/async/async_arm_detection.cpp b/src/computervision/async/async_arm_detection.cpp index cd88542..e9649a1 100644 --- a/src/computervision/async/async_arm_detection.cpp +++ b/src/computervision/async/async_arm_detection.cpp @@ -2,6 +2,7 @@ #include "async_arm_detection.h" #include "../OpenPoseVideo.h" #include +#include "StaticCameraInstance.h" namespace computervision @@ -11,8 +12,10 @@ namespace computervision } - void AsyncArmDetection::run_arm_detection(std::function)> points_ready_func, cv::VideoCapture cap, OpenPoseVideo op) + void AsyncArmDetection::run_arm_detection(std::function, cv::Mat poinst_on_image)> points_ready_func, OpenPoseVideo op) { + VideoCapture cap = static_camera::getCap(); + std::cout << "STARTING THREAD LAMBDA" << std::endl; /*cv::VideoCapture cap = static_camera::getCap();*/ @@ -30,13 +33,13 @@ namespace computervision } } - void AsyncArmDetection::start(std::function)> points_ready_func, cv::VideoCapture cap, OpenPoseVideo op) + void AsyncArmDetection::start(std::function, cv::Mat poinst_on_image)> points_ready_func, OpenPoseVideo op) { std::cout << "starting function" << std::endl; - std::thread async_arm_detect_thread(&AsyncArmDetection::run_arm_detection,this, points_ready_func, cap, op); + std::thread async_arm_detect_thread(&AsyncArmDetection::run_arm_detection,this, points_ready_func, op); async_arm_detect_thread.detach(); // makes sure the thread is detached from the variable. } diff --git a/src/computervision/async/async_arm_detection.h b/src/computervision/async/async_arm_detection.h index c577dc7..98fd163 100644 --- a/src/computervision/async/async_arm_detection.h +++ b/src/computervision/async/async_arm_detection.h @@ -15,9 +15,9 @@ namespace computervision AsyncArmDetection(void); - void start(std::function)>, cv::VideoCapture cap, computervision::OpenPoseVideo op); + void start(std::function, cv::Mat poinst_on_image)>, computervision::OpenPoseVideo op); private: - void run_arm_detection(std::function)> points_ready_func, cv::VideoCapture cap, OpenPoseVideo op); + void run_arm_detection(std::function, cv::Mat poinst_on_image)> points_ready_func, OpenPoseVideo op); }; } diff --git a/src/main.cpp b/src/main.cpp index 00492e6..260694c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -32,11 +32,16 @@ static double UpdateDelta(); static GLFWwindow* window; +bool points_img_available = false; +cv::Mat points_img; -void retrieve_points(std::vector arm_points) +void retrieve_points(std::vector arm_points, cv::Mat points_on_image) { + std::cout << "got points!!" << std::endl; std::cout << "points: " << arm_points << std::endl; + points_img = points_on_image; + points_img_available = true; } int main(void) @@ -82,11 +87,15 @@ int main(void) // set up object detection //objDetect.setup(); - cv::Mat cameraFrame; + cv::VideoCapture cam = objDetect.getCap(); + cv::Mat img; + cam.read(img); + imshow("camera in main loop", img); computervision::AsyncArmDetection as; - as.start(retrieve_points, objDetect.getCap(),openPoseVideo); + + as.start(retrieve_points,openPoseVideo); // Main game loop @@ -105,8 +114,12 @@ int main(void) render_engine::renderer::Render(entity, shader); - cameraFrame = objDetect.readCamera(); //objDetect.detectHand(cameraFrame); + if (points_img_available) + { + imshow("points", points_img); + points_img_available = false; + } // Finish up shader.Stop(); diff --git a/wk2_fps.vcxproj b/wk2_fps.vcxproj index ddc8113..64c02b8 100644 --- a/wk2_fps.vcxproj +++ b/wk2_fps.vcxproj @@ -131,8 +131,8 @@ false - $(VC_IncludePath);$(WindowsSDK_IncludePath);;C:\opencv\opencv\build\include - $(VC_LibraryPath_x64);$(WindowsSDK_LibraryPath_x64);C:\opencv\opencv\build\x64\vc15\lib + C:\opencv\build\include\;$(VC_IncludePath);$(WindowsSDK_IncludePath);C:\opencv\opencv\build\include + C:\opencv\build\x64\vc15\lib;$(VC_LibraryPath_x64);$(WindowsSDK_LibraryPath_x64);C:\opencv\opencv\build\x64\vc15\lib @@ -205,7 +205,7 @@ true true $(SolutionDir)lib\glfw-3.3.2\$(Platform);$(SolutionDir)lib\glew-2.1.0\lib\Release\$(Platform);%(AdditionalLibraryDirectories) - kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies); opencv_world452.lib + opencv_world452.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) From 921609de5dfcceafd2ed20206f9ecc10b19f1de2 Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Fri, 4 Jun 2021 11:39:23 +0200 Subject: [PATCH 08/24] [EDIT] stuff --- src/computervision/OpenPoseVideo.cpp | 5 ++++- src/computervision/OpenPoseVideo.h | 2 +- src/main.cpp | 7 ++++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/computervision/OpenPoseVideo.cpp b/src/computervision/OpenPoseVideo.cpp index dbaf6e5..33527a1 100644 --- a/src/computervision/OpenPoseVideo.cpp +++ b/src/computervision/OpenPoseVideo.cpp @@ -42,9 +42,11 @@ namespace computervision void OpenPoseVideo::setup() { net = readNetFromCaffe(protoFile, weightsFile); + + net.setPreferableBackend(DNN_TARGET_CPU); } - void OpenPoseVideo::movementSkeleton(Mat inputImage, std::function,cv::Mat poinst_on_image)> f) { + void OpenPoseVideo::movementSkeleton(Mat& inputImage, std::function&, cv::Mat& poinst_on_image)> f) { std::cout << "movement skeleton start" << std::endl; int inWidth = 368; @@ -67,6 +69,7 @@ namespace computervision std::cout << "done setting input to net" << std::endl; Mat output = net.forward(); + std::cout << "time took to set input and forward: " << t << std::endl; int H = output.size[2]; int W = output.size[3]; diff --git a/src/computervision/OpenPoseVideo.h b/src/computervision/OpenPoseVideo.h index 7f61449..e05737d 100644 --- a/src/computervision/OpenPoseVideo.h +++ b/src/computervision/OpenPoseVideo.h @@ -13,7 +13,7 @@ namespace computervision private: public: - void movementSkeleton(Mat inputImage,std::function, cv::Mat poinst_on_image)> f); + void movementSkeleton(Mat& inputImage, std::function&, cv::Mat& poinst_on_image)> f); void setup(); }; } diff --git a/src/main.cpp b/src/main.cpp index 260694c..d1ef94e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -87,10 +87,11 @@ int main(void) // set up object detection //objDetect.setup(); - cv::VideoCapture cam = objDetect.getCap(); + //cv::VideoCapture cam = objDetect.getCap(); cv::Mat img; - cam.read(img); - imshow("camera in main loop", img); + cv::VideoCapture cap = objDetect.getCap(); + //cam.read(img); + //imshow("camera in main loop", img); computervision::AsyncArmDetection as; From f5926fffcbac70d8e9029ff137271041afdd7790 Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Fri, 4 Jun 2021 13:09:51 +0200 Subject: [PATCH 09/24] [ADD] detecting if hand is in square --- src/computervision/ObjectDetection.cpp | 13 +- src/main.cpp | 2 +- src/scenes/startup_Scene.cpp | 3 + wk2_fps.vcxproj | 2 + wk2_fps.vcxproj.filters | 220 ++++++------------------- 5 files changed, 69 insertions(+), 171 deletions(-) diff --git a/src/computervision/ObjectDetection.cpp b/src/computervision/ObjectDetection.cpp index b305e71..f0edf1e 100644 --- a/src/computervision/ObjectDetection.cpp +++ b/src/computervision/ObjectDetection.cpp @@ -9,6 +9,7 @@ #include "FaceDetector.h" #include "FingerCount.h" #include "async/StaticCameraInstance.h" +#include "HandPresentChecker.h" namespace computervision { @@ -66,10 +67,14 @@ namespace computervision putText(cameraFrame,hand_text, Point(10, 75), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 255),3); imshow("camera", cameraFrame); - imshow("output", frameOut); - imshow("foreground", foreground); - imshow("handMask", handMask); - imshow("handDetection", fingerCountDebug); + //imshow("output", frameOut); + //imshow("foreground", foreground); + //imshow("handMask", handMask); + //imshow("handDetection", fingerCountDebug); + + check_if_hand_present(handMask); + + int key = waitKey(1); diff --git a/src/main.cpp b/src/main.cpp index 9bc4c80..9e0783f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -37,7 +37,7 @@ static double UpdateDelta(); -scene::Scene& current_scene; +scene::Scene* current_scene; static GLFWwindow* window; bool points_img_available = false; diff --git a/src/scenes/startup_Scene.cpp b/src/scenes/startup_Scene.cpp index fe67b0f..7d94bb0 100644 --- a/src/scenes/startup_Scene.cpp +++ b/src/scenes/startup_Scene.cpp @@ -2,9 +2,11 @@ #include #include #include "startup_Scene.h" +#include "../computervision/ObjectDetection.h" namespace scene { + computervision::ObjectDetection objDetect; scene::Scenes scene::Startup_Scene::start(GLFWwindow *window) { @@ -27,6 +29,7 @@ namespace scene void scene::Startup_Scene::update(GLFWwindow* window) { + objDetect.detectHand(objDetect.readCamera()); } diff --git a/wk2_fps.vcxproj b/wk2_fps.vcxproj index 50aac4f..f962a64 100644 --- a/wk2_fps.vcxproj +++ b/wk2_fps.vcxproj @@ -20,6 +20,7 @@ + @@ -45,6 +46,7 @@ + diff --git a/wk2_fps.vcxproj.filters b/wk2_fps.vcxproj.filters index d118db9..a60c7ce 100644 --- a/wk2_fps.vcxproj.filters +++ b/wk2_fps.vcxproj.filters @@ -1,181 +1,69 @@  - - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} - cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx - - - {93995380-89BD-4b04-88EB-625FBE52EBFB} - h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd - - - {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} - rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms - + + + + + + + + + + + + + + + + + + + + + + + - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - - Header Files - - - Header Files - - - Header Files - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - \ No newline at end of file From 81dec3b9f4c27845b9bfedbc9c57cff52e90ae1b Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Fri, 4 Jun 2021 13:10:11 +0200 Subject: [PATCH 10/24] [ADD] detecting if hand is in square --- src/computervision/HandPresentChecker.cpp | 18 ++++++++++++++++++ src/computervision/HandPresentChecker.h | 7 +++++++ 2 files changed, 25 insertions(+) create mode 100644 src/computervision/HandPresentChecker.cpp create mode 100644 src/computervision/HandPresentChecker.h diff --git a/src/computervision/HandPresentChecker.cpp b/src/computervision/HandPresentChecker.cpp new file mode 100644 index 0000000..2cb4784 --- /dev/null +++ b/src/computervision/HandPresentChecker.cpp @@ -0,0 +1,18 @@ +#include "HandPresentChecker.h" +#include +#include + + +namespace computervision +{ + bool check_if_hand_present(cv::Mat inputImage) + { + std::vector> points; + cv::Mat imgCont; + cv::findContours(inputImage, points, cv::RetrievalModes::RETR_LIST, cv::ContourApproximationModes::CHAIN_APPROX_SIMPLE); + bool hand_present = points.size() > 0; + std::cout << (hand_present ? "hey a hand!" : "damn no hand") << std::endl; + return hand_present; + } + +} diff --git a/src/computervision/HandPresentChecker.h b/src/computervision/HandPresentChecker.h new file mode 100644 index 0000000..5f0b0b7 --- /dev/null +++ b/src/computervision/HandPresentChecker.h @@ -0,0 +1,7 @@ +#pragma once + +#include +namespace computervision +{ + bool check_if_hand_present(cv::Mat inputImage); +} From ca2959bf2d1e3a3c10f4a67c93b2c8e248e847a2 Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Fri, 4 Jun 2021 14:48:19 +0200 Subject: [PATCH 11/24] [FIX] detecting hand when its just a finger or hair --- src/computervision/HandPresentChecker.cpp | 14 +++++++++++--- src/computervision/ObjectDetection.cpp | 4 ++-- src/computervision/ObjectDetection.h | 3 ++- src/scenes/startup_Scene.cpp | 5 ++++- 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/computervision/HandPresentChecker.cpp b/src/computervision/HandPresentChecker.cpp index 2cb4784..74ec08d 100644 --- a/src/computervision/HandPresentChecker.cpp +++ b/src/computervision/HandPresentChecker.cpp @@ -2,6 +2,7 @@ #include #include +#define MIN_HAND_SIZE 10000 namespace computervision { @@ -10,9 +11,16 @@ namespace computervision std::vector> points; cv::Mat imgCont; cv::findContours(inputImage, points, cv::RetrievalModes::RETR_LIST, cv::ContourApproximationModes::CHAIN_APPROX_SIMPLE); - bool hand_present = points.size() > 0; - std::cout << (hand_present ? "hey a hand!" : "damn no hand") << std::endl; - return hand_present; + + if (points.size() == 0) return false; + + for (int p = 0; p < points.size(); p++) + { + int area = cv::contourArea(points[p]); + if (area > MIN_HAND_SIZE) return true; + } + + return false; } } diff --git a/src/computervision/ObjectDetection.cpp b/src/computervision/ObjectDetection.cpp index f0edf1e..d0f554f 100644 --- a/src/computervision/ObjectDetection.cpp +++ b/src/computervision/ObjectDetection.cpp @@ -41,7 +41,7 @@ namespace computervision return cap; } - bool ObjectDetection::detectHand(Mat cameraFrame) + bool ObjectDetection::detectHand(Mat cameraFrame, bool& hand_present) { Mat inputFrame = generateHandMaskSquare(cameraFrame); frameOut = inputFrame.clone(); @@ -72,7 +72,7 @@ namespace computervision //imshow("handMask", handMask); //imshow("handDetection", fingerCountDebug); - check_if_hand_present(handMask); + hand_present = check_if_hand_present(handMask); diff --git a/src/computervision/ObjectDetection.h b/src/computervision/ObjectDetection.h index 45f4c6d..5deeaa6 100644 --- a/src/computervision/ObjectDetection.h +++ b/src/computervision/ObjectDetection.h @@ -54,9 +54,10 @@ namespace computervision * @brief detects a hand based on the given hand mask input frame. * * @param inputFrame the input frame from the camera + * @param hand_present boolean that will hold true if the hand is detected, false if not. * @return true if hand is open, false if hand is closed */ - bool detectHand(cv::Mat cameraFrame); + bool detectHand(cv::Mat cameraFrame, bool& hand_present); /** * @brief draws the hand mask rectangle on the given input matrix. diff --git a/src/scenes/startup_Scene.cpp b/src/scenes/startup_Scene.cpp index 7d94bb0..3a990f1 100644 --- a/src/scenes/startup_Scene.cpp +++ b/src/scenes/startup_Scene.cpp @@ -3,6 +3,7 @@ #include #include "startup_Scene.h" #include "../computervision/ObjectDetection.h" +#include namespace scene { @@ -29,7 +30,9 @@ namespace scene void scene::Startup_Scene::update(GLFWwindow* window) { - objDetect.detectHand(objDetect.readCamera()); + bool hand_detected = false; + objDetect.detectHand(objDetect.readCamera(),hand_detected); + if (hand_detected) std::cout << "there's a hand!" << std::endl; } From b80653b668185647abe452fbc430d22331983ff9 Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Fri, 4 Jun 2021 15:07:36 +0200 Subject: [PATCH 12/24] [EDIT] change method names in compliance with code style guide --- src/computervision/HandPresentChecker.cpp | 5 ++--- src/computervision/HandPresentChecker.h | 8 +++++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/computervision/HandPresentChecker.cpp b/src/computervision/HandPresentChecker.cpp index 74ec08d..31a9e00 100644 --- a/src/computervision/HandPresentChecker.cpp +++ b/src/computervision/HandPresentChecker.cpp @@ -6,11 +6,10 @@ namespace computervision { - bool check_if_hand_present(cv::Mat inputImage) + bool check_if_hand_present(cv::Mat input_image) { std::vector> points; - cv::Mat imgCont; - cv::findContours(inputImage, points, cv::RetrievalModes::RETR_LIST, cv::ContourApproximationModes::CHAIN_APPROX_SIMPLE); + cv::findContours(input_image, points, cv::RetrievalModes::RETR_LIST, cv::ContourApproximationModes::CHAIN_APPROX_SIMPLE); if (points.size() == 0) return false; diff --git a/src/computervision/HandPresentChecker.h b/src/computervision/HandPresentChecker.h index 5f0b0b7..6a6a8f3 100644 --- a/src/computervision/HandPresentChecker.h +++ b/src/computervision/HandPresentChecker.h @@ -3,5 +3,11 @@ #include namespace computervision { - bool check_if_hand_present(cv::Mat inputImage); + /** + * @brief checks if the hand is present in the input image. + * + * @param input_image the image to check + * @return true if the hand was found, false if not + */ + bool CheckIfHandPresent(cv::Mat input_image); } From e4b5dc39c062fa52dda2cd642ea7e89d5fccab5f Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Fri, 4 Jun 2021 15:10:19 +0200 Subject: [PATCH 13/24] [EDIT] change variable names in compliance with code style guide --- src/computervision/ObjectDetection.cpp | 51 +++++++++++++------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/src/computervision/ObjectDetection.cpp b/src/computervision/ObjectDetection.cpp index d0f554f..5fcac46 100644 --- a/src/computervision/ObjectDetection.cpp +++ b/src/computervision/ObjectDetection.cpp @@ -19,11 +19,10 @@ namespace computervision int handMaskStartXPos, handMaskStartYPos, handMaskWidth, handMaskHeight; bool handMaskGenerated = false; - Mat frame, frameOut, handMask, foreground, fingerCountDebug; - BackgroundRemover backgroundRemover; - SkinDetector skinDetector; - FaceDetector faceDetector; - FingerCount fingerCount; + Mat frame, frame_out, handMask, foreground, fingerCountDebug; + BackgroundRemover background_remover; + SkinDetector skin_detector; + FingerCount finger_count; cv::VideoCapture cap = static_camera::getCap(); @@ -31,62 +30,62 @@ namespace computervision { } - cv::Mat ObjectDetection::readCamera() { + cv::Mat ObjectDetection::ReadCamera() { cap.read(img); return img; } - cv::VideoCapture ObjectDetection::getCap() + cv::VideoCapture ObjectDetection::GetCap() { return cap; } - bool ObjectDetection::detectHand(Mat cameraFrame, bool& hand_present) + bool ObjectDetection::DetectHand(Mat camera_frame, bool& hand_present) { - Mat inputFrame = generateHandMaskSquare(cameraFrame); - frameOut = inputFrame.clone(); + Mat input_frame = GenerateHandMaskSquare(camera_frame); + frame_out = input_frame.clone(); // detect skin color - skinDetector.drawSkinColorSampler(frameOut); + skin_detector.drawSkinColorSampler(frame_out); // remove background from image - foreground = backgroundRemover.getForeground(inputFrame); + foreground = background_remover.getForeground(input_frame); // detect the hand contours - handMask = skinDetector.getSkinMask(foreground); + handMask = skin_detector.getSkinMask(foreground); // count the amount of fingers and put the info on the matrix - fingerCountDebug = fingerCount.findFingersCount(handMask, frameOut); + fingerCountDebug = finger_count.findFingersCount(handMask, frame_out); // get the amount of fingers - int fingers_amount = fingerCount.getAmountOfFingers(); + int fingers_amount = finger_count.getAmountOfFingers(); // draw the hand rectangle on the camera input, and draw text showing if the hand is open or closed. - drawHandMaskRect(&cameraFrame); + DrawHandMask(&camera_frame); string hand_text = fingers_amount > 0 ? "open" : "closed"; - putText(cameraFrame,hand_text, Point(10, 75), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 255),3); - imshow("camera", cameraFrame); + putText(camera_frame,hand_text, Point(10, 75), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 255),3); + imshow("camera", camera_frame); - //imshow("output", frameOut); + //imshow("output", frame_out); //imshow("foreground", foreground); //imshow("handMask", handMask); //imshow("handDetection", fingerCountDebug); - hand_present = check_if_hand_present(handMask); + hand_present = CheckIfHandPresent(handMask); int key = waitKey(1); if (key == 98) // b, calibrate the background - backgroundRemover.calibrate(inputFrame); + background_remover.calibrate(input_frame); else if (key == 115) // s, calibrate the skin color - skinDetector.calibrate(inputFrame); + skin_detector.calibrate(input_frame); return fingers_amount > 0; } - void ObjectDetection::calculateDifference() + void ObjectDetection::CalculateDifference() { cap.read(img); cap.read(img2); @@ -101,7 +100,7 @@ namespace computervision } - cv::Mat ObjectDetection::generateHandMaskSquare(cv::Mat img) + cv::Mat ObjectDetection::GenerateHandMaskSquare(cv::Mat img) { handMaskStartXPos = 20; handMaskStartYPos = img.rows / 5; @@ -121,14 +120,14 @@ namespace computervision } - bool ObjectDetection::drawHandMaskRect(cv::Mat* input) + bool ObjectDetection::DrawHandMask(cv::Mat* input) { if (!handMaskGenerated) return false; rectangle(*input, Rect(handMaskStartXPos, handMaskStartYPos, handMaskWidth, handMaskHeight), Scalar(255, 255, 255)); return true; } - void ObjectDetection::showWebcam() + void ObjectDetection::ShowWebcam() { imshow("Webcam image", img); } From 1ab5ae798e0a07f9d3b514dd5f9dfdcbaf6a3d28 Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Fri, 4 Jun 2021 16:27:30 +0200 Subject: [PATCH 14/24] [ADD] hand calibration screen --- src/computervision/FaceDetector.cpp | 53 --------------- src/computervision/FaceDetector.h | 31 --------- src/computervision/HandPresentChecker.cpp | 25 ------- src/computervision/ObjectDetection.cpp | 56 +++++++++------ src/computervision/ObjectDetection.h | 14 ++-- src/computervision/SkinDetector.cpp | 2 +- .../async/async_arm_detection.cpp | 2 +- .../calibration/HandCalibrator.cpp | 68 +++++++++++++++++++ .../calibration/HandCalibrator.h | 42 ++++++++++++ .../calibration/HandPresentChecker.cpp | 14 ++++ .../{ => calibration}/HandPresentChecker.h | 0 src/scenes/startup_Scene.cpp | 2 +- wk2_fps.vcxproj | 8 +-- wk2_fps.vcxproj.filters | 8 +-- 14 files changed, 176 insertions(+), 149 deletions(-) delete mode 100644 src/computervision/FaceDetector.cpp delete mode 100644 src/computervision/FaceDetector.h delete mode 100644 src/computervision/HandPresentChecker.cpp create mode 100644 src/computervision/calibration/HandCalibrator.cpp create mode 100644 src/computervision/calibration/HandCalibrator.h create mode 100644 src/computervision/calibration/HandPresentChecker.cpp rename src/computervision/{ => calibration}/HandPresentChecker.h (100%) diff --git a/src/computervision/FaceDetector.cpp b/src/computervision/FaceDetector.cpp deleted file mode 100644 index a628983..0000000 --- a/src/computervision/FaceDetector.cpp +++ /dev/null @@ -1,53 +0,0 @@ -#include "FaceDetector.h" - - -/* - Author: Pierfrancesco Soffritti https://github.com/PierfrancescoSoffritti -*/ -namespace computervision -{ - Rect getFaceRect(Mat input); - - String faceClassifierFileName = "res/haarcascade_frontalface_alt.xml"; - CascadeClassifier faceCascadeClassifier; - - FaceDetector::FaceDetector(void) { - if (!faceCascadeClassifier.load(faceClassifierFileName)) - throw runtime_error("can't load file " + faceClassifierFileName); - } - - void FaceDetector::removeFaces(Mat input, Mat output) { - vector faces; - Mat frameGray; - - cvtColor(input, frameGray, CV_BGR2GRAY); - equalizeHist(frameGray, frameGray); - - faceCascadeClassifier.detectMultiScale(frameGray, faces, 1.1, 2, 0 | 2, Size(120, 120)); // HAAR_SCALE_IMAGE is 2 - - for (size_t i = 0; i < faces.size(); i++) { - rectangle( - output, - Point(faces[i].x, faces[i].y), - Point(faces[i].x + faces[i].width, faces[i].y + faces[i].height), - Scalar(0, 0, 0), - -1 - ); - } - } - - Rect getFaceRect(Mat input) { - vector faceRectangles; - Mat inputGray; - - cvtColor(input, inputGray, CV_BGR2GRAY); - equalizeHist(inputGray, inputGray); - - faceCascadeClassifier.detectMultiScale(inputGray, faceRectangles, 1.1, 2, 0 | 2, Size(120, 120)); // HAAR_SCALE_IMAGE is 2 - - if (faceRectangles.size() > 0) - return faceRectangles[0]; - else - return Rect(0, 0, 1, 1); - } -} \ No newline at end of file diff --git a/src/computervision/FaceDetector.h b/src/computervision/FaceDetector.h deleted file mode 100644 index 208e051..0000000 --- a/src/computervision/FaceDetector.h +++ /dev/null @@ -1,31 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -/* - Author: Pierfrancesco Soffritti https://github.com/PierfrancescoSoffritti -*/ - -using namespace cv; -using namespace std; - -namespace computervision -{ - class FaceDetector { - public: - /** - * @brief Constructor for the class FaceDetector, loads training data from a file - * - */ - FaceDetector(void); - /** - * @brief Detects faces on an image and blocks them with a black rectangle - * - * @param input Input image - * @param output Output image - */ - void removeFaces(Mat input, Mat output); - }; -} \ No newline at end of file diff --git a/src/computervision/HandPresentChecker.cpp b/src/computervision/HandPresentChecker.cpp deleted file mode 100644 index 31a9e00..0000000 --- a/src/computervision/HandPresentChecker.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#include "HandPresentChecker.h" -#include -#include - -#define MIN_HAND_SIZE 10000 - -namespace computervision -{ - bool check_if_hand_present(cv::Mat input_image) - { - std::vector> points; - cv::findContours(input_image, points, cv::RetrievalModes::RETR_LIST, cv::ContourApproximationModes::CHAIN_APPROX_SIMPLE); - - if (points.size() == 0) return false; - - for (int p = 0; p < points.size(); p++) - { - int area = cv::contourArea(points[p]); - if (area > MIN_HAND_SIZE) return true; - } - - return false; - } - -} diff --git a/src/computervision/ObjectDetection.cpp b/src/computervision/ObjectDetection.cpp index 5fcac46..669a229 100644 --- a/src/computervision/ObjectDetection.cpp +++ b/src/computervision/ObjectDetection.cpp @@ -6,23 +6,24 @@ #include "ObjectDetection.h" #include "BackgroundRemover.h" #include "SkinDetector.h" -#include "FaceDetector.h" #include "FingerCount.h" #include "async/StaticCameraInstance.h" -#include "HandPresentChecker.h" +#include "calibration/HandPresentChecker.h" +#include "calibration/HandCalibrator.h" namespace computervision { - cv::Mat img, imgGray, img2, img2Gray, img3, img4; + cv::Mat img, img_gray, img2, img2_gray, img3, img4; - int handMaskStartXPos, handMaskStartYPos, handMaskWidth, handMaskHeight; - bool handMaskGenerated = false; + int hand_mask_start_x_pos, hand_mask_start_y_pos, hand_mask_width, hand_mask_height; + bool hand_mask_generated = false; Mat frame, frame_out, handMask, foreground, fingerCountDebug; BackgroundRemover background_remover; SkinDetector skin_detector; FingerCount finger_count; + handcalibration::HandCalibrator hand_calibrator; cv::VideoCapture cap = static_camera::getCap(); @@ -46,7 +47,7 @@ namespace computervision frame_out = input_frame.clone(); // detect skin color - skin_detector.drawSkinColorSampler(frame_out); + skin_detector.drawSkinColorSampler(camera_frame); // remove background from image foreground = background_remover.getForeground(input_frame); @@ -63,7 +64,9 @@ namespace computervision // draw the hand rectangle on the camera input, and draw text showing if the hand is open or closed. DrawHandMask(&camera_frame); string hand_text = fingers_amount > 0 ? "open" : "closed"; - putText(camera_frame,hand_text, Point(10, 75), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 255),3); + putText(camera_frame, hand_text, Point(10, 75), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 255), 3); + + hand_calibrator.DrawHandCalibrationText(camera_frame); imshow("camera", camera_frame); //imshow("output", frame_out); @@ -71,16 +74,25 @@ namespace computervision //imshow("handMask", handMask); //imshow("handDetection", fingerCountDebug); - hand_present = CheckIfHandPresent(handMask); + hand_present = hand_calibrator.CheckIfHandPresent(handMask); + hand_calibrator.SetHandPresent(hand_present); int key = waitKey(1); if (key == 98) // b, calibrate the background + { background_remover.calibrate(input_frame); + hand_calibrator.SetBackGroundCalibrated(true); + } else if (key == 115) // s, calibrate the skin color + { skin_detector.calibrate(input_frame); + hand_calibrator.SetSkinCalibration(true); + + } + return fingers_amount > 0; } @@ -90,10 +102,10 @@ namespace computervision cap.read(img); cap.read(img2); - cv::cvtColor(img, imgGray, cv::COLOR_RGBA2GRAY); - cv::cvtColor(img2, img2Gray, cv::COLOR_RGBA2GRAY); + cv::cvtColor(img, img_gray, cv::COLOR_RGBA2GRAY); + cv::cvtColor(img2, img2_gray, cv::COLOR_RGBA2GRAY); - cv::absdiff(imgGray, img2Gray, img3); + cv::absdiff(img_gray, img2_gray, img3); cv::threshold(img3, img4, 50, 170, cv::THRESH_BINARY); imshow("threshold", img4); @@ -102,28 +114,28 @@ namespace computervision cv::Mat ObjectDetection::GenerateHandMaskSquare(cv::Mat img) { - handMaskStartXPos = 20; - handMaskStartYPos = img.rows / 5; - handMaskWidth = img.cols / 3; - handMaskHeight = img.cols / 3; + hand_mask_start_x_pos = 20; + hand_mask_start_y_pos = img.rows / 5; + hand_mask_width = img.cols / 3; + hand_mask_height = img.cols / 3; cv::Mat mask = cv::Mat::zeros(img.size(), img.type()); - cv::Mat dstImg = cv::Mat::zeros(img.size(), img.type()); + cv::Mat distance_img = cv::Mat::zeros(img.size(), img.type()); - cv::rectangle(mask, Rect(handMaskStartXPos, handMaskStartYPos, handMaskWidth, handMaskHeight), Scalar(255, 255, 255), -1); + cv::rectangle(mask, Rect(hand_mask_start_x_pos, hand_mask_start_y_pos, hand_mask_width, hand_mask_height), Scalar(255, 255, 255), -1); - img.copyTo(dstImg, mask); + img.copyTo(distance_img, mask); - handMaskGenerated = true; - return dstImg; + hand_mask_generated = true; + return distance_img; } bool ObjectDetection::DrawHandMask(cv::Mat* input) { - if (!handMaskGenerated) return false; - rectangle(*input, Rect(handMaskStartXPos, handMaskStartYPos, handMaskWidth, handMaskHeight), Scalar(255, 255, 255)); + if (!hand_mask_generated) return false; + rectangle(*input, Rect(hand_mask_start_x_pos, hand_mask_start_y_pos, hand_mask_width, hand_mask_height), Scalar(255, 255, 255)); return true; } diff --git a/src/computervision/ObjectDetection.h b/src/computervision/ObjectDetection.h index 5deeaa6..1b65e1f 100644 --- a/src/computervision/ObjectDetection.h +++ b/src/computervision/ObjectDetection.h @@ -27,13 +27,13 @@ namespace computervision * @brief Displays an image of the current webcam-footage * */ - void showWebcam(); + void ShowWebcam(); /** * @brief Calculates the difference between two images * and outputs an image that only shows the difference * */ - void calculateDifference(); + void CalculateDifference(); /** * @brief generates the square that will hold the mask in which the hand will be detected. @@ -41,14 +41,14 @@ namespace computervision * @param img the current camear frame * @return a matrix containing the mask */ - cv::Mat generateHandMaskSquare(cv::Mat img); + cv::Mat GenerateHandMaskSquare(cv::Mat img); /** * @brief reads the camera and returns it in a matrix. * * @return the camera frame in a matrix */ - cv::Mat readCamera(); + cv::Mat ReadCamera(); /** * @brief detects a hand based on the given hand mask input frame. @@ -57,17 +57,17 @@ namespace computervision * @param hand_present boolean that will hold true if the hand is detected, false if not. * @return true if hand is open, false if hand is closed */ - bool detectHand(cv::Mat cameraFrame, bool& hand_present); + bool DetectHand(cv::Mat camera_frame, bool& hand_present); /** * @brief draws the hand mask rectangle on the given input matrix. * * @param input the input matrix to draw the rectangle on */ - bool drawHandMaskRect(cv::Mat *input); + bool DrawHandMask(cv::Mat *input); - cv::VideoCapture getCap(); + cv::VideoCapture GetCap(); }; diff --git a/src/computervision/SkinDetector.cpp b/src/computervision/SkinDetector.cpp index 088cce0..971ff2a 100644 --- a/src/computervision/SkinDetector.cpp +++ b/src/computervision/SkinDetector.cpp @@ -23,7 +23,7 @@ namespace computervision int frameWidth = input.size().width, frameHeight = input.size().height; int rectangleSize = 25; - Scalar rectangleColor = Scalar(255, 0, 255); + Scalar rectangleColor = Scalar(0, 255, 255); skinColorSamplerRectangle1 = Rect(frameWidth / 5, frameHeight / 2, rectangleSize, rectangleSize); skinColorSamplerRectangle2 = Rect(frameWidth / 5, frameHeight / 3, rectangleSize, rectangleSize); diff --git a/src/computervision/async/async_arm_detection.cpp b/src/computervision/async/async_arm_detection.cpp index e9649a1..a43b7dc 100644 --- a/src/computervision/async/async_arm_detection.cpp +++ b/src/computervision/async/async_arm_detection.cpp @@ -17,7 +17,7 @@ namespace computervision VideoCapture cap = static_camera::getCap(); std::cout << "STARTING THREAD LAMBDA" << std::endl; - /*cv::VideoCapture cap = static_camera::getCap();*/ + /*cv::VideoCapture cap = static_camera::GetCap();*/ if (!cap.isOpened()) { diff --git a/src/computervision/calibration/HandCalibrator.cpp b/src/computervision/calibration/HandCalibrator.cpp new file mode 100644 index 0000000..9d91a46 --- /dev/null +++ b/src/computervision/calibration/HandCalibrator.cpp @@ -0,0 +1,68 @@ + +#include "HandCalibrator.h" + +#define MIN_HAND_SIZE 10000 +namespace computervision +{ + namespace handcalibration + { + + static bool background_calibrated; + static bool skintone_calibrated; + static bool hand_present; + + HandCalibrator::HandCalibrator() + { + + } + + void HandCalibrator::DrawHandCalibrationText(cv::Mat& output_frame) + { + cv::rectangle(output_frame,cv::Rect(0, 0, output_frame.cols, 40),cv::Scalar(0,0,0),-1); + cv::putText(output_frame, "Hand calibration", cv::Point(output_frame.cols/2-100, 25), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(18, 219, 65), 2); + cv::putText(output_frame, "press 'b' to calibrate background,then press 's' to calibrate skin tone", cv::Point(5, 35), cv::FONT_HERSHEY_PLAIN, 1.0, cv::Scalar(18, 219, 65), 1); + + cv::putText(output_frame, "hand in frame:", cv::Point(5, output_frame.rows - 50), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 255, 0), 1); + cv::rectangle(output_frame, cv::Rect(270, output_frame.rows - 70, 20, 20), hand_present ? cv::Scalar(0, 255, 0) : cv::Scalar(0,0,255), -1); + + + cv::putText(output_frame, (background_calibrated ? "background calibrated" : "background not calibrated"), cv::Point(5, output_frame.rows-30), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 255, 0), 1); + cv::putText(output_frame, (skintone_calibrated ? "skincolor calibrated" : "skincolor not calibrated"), cv::Point(5, output_frame.rows-10), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 255, 0), 1); + } + + void HandCalibrator::SetSkinCalibration(bool val) + { + skintone_calibrated = val; + } + + void HandCalibrator::SetBackGroundCalibrated(bool val) + { + background_calibrated = val; + } + + void HandCalibrator::SetHandPresent(bool val) + { + hand_present = val; + } + + bool HandCalibrator::CheckIfHandPresent(cv::Mat input_image) + { + std::vector> points; + cv::findContours(input_image, points, cv::RetrievalModes::RETR_LIST, cv::ContourApproximationModes::CHAIN_APPROX_SIMPLE); + + if (points.size() == 0) return false; + + for (int p = 0; p < points.size(); p++) + { + int area = cv::contourArea(points[p]); + if (area > MIN_HAND_SIZE) return true; + } + + return false; + } + + + + + } +} diff --git a/src/computervision/calibration/HandCalibrator.h b/src/computervision/calibration/HandCalibrator.h new file mode 100644 index 0000000..40fdd95 --- /dev/null +++ b/src/computervision/calibration/HandCalibrator.h @@ -0,0 +1,42 @@ +#pragma once +#include +#include +#include + +namespace computervision +{ + namespace handcalibration + { + class HandCalibrator + { + public: + HandCalibrator(); + + /** + * @brief draws the text to show the status of the calibration on the image + * + * @param output_frame the frame to draw on. + */ + void DrawHandCalibrationText(cv::Mat& output_frame); + + /** + * @brief sets the skin calibration variable. + * + * @param val the value to set + */ + void SetSkinCalibration(bool val); + + /** + * @brief sets the background calibration variable. + * + * @param val the value to set + */ + void SetBackGroundCalibrated(bool val); + + bool CheckIfHandPresent(cv::Mat input_image); + + void SetHandPresent(bool val); + }; + + } +} diff --git a/src/computervision/calibration/HandPresentChecker.cpp b/src/computervision/calibration/HandPresentChecker.cpp new file mode 100644 index 0000000..194f6c9 --- /dev/null +++ b/src/computervision/calibration/HandPresentChecker.cpp @@ -0,0 +1,14 @@ +#include "HandPresentChecker.h" +#include +#include + + + +namespace computervision +{ + namespace handcalibration + { + + } + +} diff --git a/src/computervision/HandPresentChecker.h b/src/computervision/calibration/HandPresentChecker.h similarity index 100% rename from src/computervision/HandPresentChecker.h rename to src/computervision/calibration/HandPresentChecker.h diff --git a/src/scenes/startup_Scene.cpp b/src/scenes/startup_Scene.cpp index 3a990f1..e889139 100644 --- a/src/scenes/startup_Scene.cpp +++ b/src/scenes/startup_Scene.cpp @@ -31,7 +31,7 @@ namespace scene void scene::Startup_Scene::update(GLFWwindow* window) { bool hand_detected = false; - objDetect.detectHand(objDetect.readCamera(),hand_detected); + objDetect.DetectHand(objDetect.ReadCamera(),hand_detected); if (hand_detected) std::cout << "there's a hand!" << std::endl; } diff --git a/wk2_fps.vcxproj b/wk2_fps.vcxproj index f962a64..ba853d9 100644 --- a/wk2_fps.vcxproj +++ b/wk2_fps.vcxproj @@ -20,10 +20,10 @@ - + + - @@ -46,12 +46,12 @@ - + + - diff --git a/wk2_fps.vcxproj.filters b/wk2_fps.vcxproj.filters index a60c7ce..129165a 100644 --- a/wk2_fps.vcxproj.filters +++ b/wk2_fps.vcxproj.filters @@ -4,7 +4,6 @@ - @@ -23,7 +22,8 @@ - + + @@ -32,7 +32,6 @@ - @@ -55,7 +54,8 @@ - + + From e70d2ef19dbead7631e0b427f3c5007662c5bde3 Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Fri, 4 Jun 2021 16:32:30 +0200 Subject: [PATCH 15/24] [EDIT] removed unused hand stuff --- src/computervision/ObjectDetection.cpp | 9 ++++----- .../calibration/HandPresentChecker.cpp | 14 -------------- .../calibration/HandPresentChecker.h | 13 ------------- wk2_fps.vcxproj | 2 -- wk2_fps.vcxproj.filters | 2 -- 5 files changed, 4 insertions(+), 36 deletions(-) delete mode 100644 src/computervision/calibration/HandPresentChecker.cpp delete mode 100644 src/computervision/calibration/HandPresentChecker.h diff --git a/src/computervision/ObjectDetection.cpp b/src/computervision/ObjectDetection.cpp index 669a229..ba8030d 100644 --- a/src/computervision/ObjectDetection.cpp +++ b/src/computervision/ObjectDetection.cpp @@ -8,7 +8,6 @@ #include "SkinDetector.h" #include "FingerCount.h" #include "async/StaticCameraInstance.h" -#include "calibration/HandPresentChecker.h" #include "calibration/HandCalibrator.h" namespace computervision @@ -69,10 +68,10 @@ namespace computervision hand_calibrator.DrawHandCalibrationText(camera_frame); imshow("camera", camera_frame); - //imshow("output", frame_out); - //imshow("foreground", foreground); - //imshow("handMask", handMask); - //imshow("handDetection", fingerCountDebug); + /*imshow("output", frame_out); + imshow("foreground", foreground); + imshow("handMask", handMask); + imshow("handDetection", fingerCountDebug);*/ hand_present = hand_calibrator.CheckIfHandPresent(handMask); hand_calibrator.SetHandPresent(hand_present); diff --git a/src/computervision/calibration/HandPresentChecker.cpp b/src/computervision/calibration/HandPresentChecker.cpp deleted file mode 100644 index 194f6c9..0000000 --- a/src/computervision/calibration/HandPresentChecker.cpp +++ /dev/null @@ -1,14 +0,0 @@ -#include "HandPresentChecker.h" -#include -#include - - - -namespace computervision -{ - namespace handcalibration - { - - } - -} diff --git a/src/computervision/calibration/HandPresentChecker.h b/src/computervision/calibration/HandPresentChecker.h deleted file mode 100644 index 6a6a8f3..0000000 --- a/src/computervision/calibration/HandPresentChecker.h +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once - -#include -namespace computervision -{ - /** - * @brief checks if the hand is present in the input image. - * - * @param input_image the image to check - * @return true if the hand was found, false if not - */ - bool CheckIfHandPresent(cv::Mat input_image); -} diff --git a/wk2_fps.vcxproj b/wk2_fps.vcxproj index ba853d9..9658b78 100644 --- a/wk2_fps.vcxproj +++ b/wk2_fps.vcxproj @@ -21,7 +21,6 @@ - @@ -47,7 +46,6 @@ - diff --git a/wk2_fps.vcxproj.filters b/wk2_fps.vcxproj.filters index 129165a..de921ef 100644 --- a/wk2_fps.vcxproj.filters +++ b/wk2_fps.vcxproj.filters @@ -22,7 +22,6 @@ - @@ -54,7 +53,6 @@ - From bb68d98bfe1ebd0fd61180d60f037f260adb9cf1 Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Tue, 8 Jun 2021 10:43:59 +0200 Subject: [PATCH 16/24] [ADD] comments --- src/computervision/calibration/HandCalibrator.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/computervision/calibration/HandCalibrator.h b/src/computervision/calibration/HandCalibrator.h index 40fdd95..f402adb 100644 --- a/src/computervision/calibration/HandCalibrator.h +++ b/src/computervision/calibration/HandCalibrator.h @@ -33,9 +33,20 @@ namespace computervision */ void SetBackGroundCalibrated(bool val); + /** + * @brief sets the value for if the hand is present. + * + * @param val the value to set. + */ + void SetHandPresent(bool val); + + /** + * @brief checks if the hand is present in the given image + * + * @param input_image the input image to check. + */ bool CheckIfHandPresent(cv::Mat input_image); - void SetHandPresent(bool val); }; } From afd3e00ddb0e44c6584f70b72e607819fac45391 Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Tue, 8 Jun 2021 11:03:22 +0200 Subject: [PATCH 17/24] [ADD] contour of hand in calibration screen --- src/computervision/FingerCount.cpp | 11 +++++++---- src/computervision/FingerCount.h | 10 ++++++++++ src/computervision/ObjectDetection.cpp | 2 +- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/computervision/FingerCount.cpp b/src/computervision/FingerCount.cpp index 590e2a2..bdd1938 100644 --- a/src/computervision/FingerCount.cpp +++ b/src/computervision/FingerCount.cpp @@ -14,6 +14,7 @@ namespace computervision { + FingerCount::FingerCount(void) { color_blue = Scalar(255, 0, 0); color_green = Scalar(0, 255, 0); @@ -35,9 +36,6 @@ namespace computervision if (input_image.channels() != 1) return contours_image; - vector> contours; - vector hierarchy; - findContours(input_image, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE); // we need at least one contour to work @@ -45,7 +43,7 @@ namespace computervision return contours_image; // find the biggest contour (let's suppose it's our hand) - int biggest_contour_index = -1; + biggest_contour_index = -1; double biggest_area = 0.0; for (int i = 0; i < contours.size(); i++) { @@ -156,6 +154,11 @@ namespace computervision return contours_image; } + void FingerCount::DrawHandContours(Mat& image) + { + drawContours(image, contours, biggest_contour_index, color_green, 2, 8, hierarchy); + } + int FingerCount::getAmountOfFingers() { return amount_of_fingers; diff --git a/src/computervision/FingerCount.h b/src/computervision/FingerCount.h index 3319150..4b31c92 100644 --- a/src/computervision/FingerCount.h +++ b/src/computervision/FingerCount.h @@ -31,7 +31,15 @@ namespace computervision */ int getAmountOfFingers(); + void DrawHandContours(Mat& image); + private: + + int biggest_contour_index; + vector> contours; + vector hierarchy; + + // colors to use Scalar color_blue; Scalar color_green; @@ -115,5 +123,7 @@ namespace computervision * @param with_numbers if the numbers should be drawn with the points */ void drawVectorPoints(Mat image, vector points, Scalar color, bool with_numbers); + + }; } \ No newline at end of file diff --git a/src/computervision/ObjectDetection.cpp b/src/computervision/ObjectDetection.cpp index ba8030d..1d36826 100644 --- a/src/computervision/ObjectDetection.cpp +++ b/src/computervision/ObjectDetection.cpp @@ -64,7 +64,7 @@ namespace computervision DrawHandMask(&camera_frame); string hand_text = fingers_amount > 0 ? "open" : "closed"; putText(camera_frame, hand_text, Point(10, 75), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 255), 3); - + finger_count.DrawHandContours(camera_frame); hand_calibrator.DrawHandCalibrationText(camera_frame); imshow("camera", camera_frame); From ef470bd4f1db9c1eaf4908227b4f4a5c34c7bb5c Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Tue, 8 Jun 2021 11:54:48 +0200 Subject: [PATCH 18/24] [ADD] start of multiple squares --- src/computervision/HandDetectRegion.cpp | 10 +++++++ src/computervision/HandDetectRegion.h | 22 ++++++++++++++ src/computervision/ObjectDetection.cpp | 5 ++-- src/computervision/ObjectDetection.h | 18 ++++++++++++ .../calibration/HandCalibrator.cpp | 29 +++++++++++++------ .../calibration/HandCalibrator.h | 12 ++++++++ 6 files changed, 85 insertions(+), 11 deletions(-) create mode 100644 src/computervision/HandDetectRegion.cpp create mode 100644 src/computervision/HandDetectRegion.h diff --git a/src/computervision/HandDetectRegion.cpp b/src/computervision/HandDetectRegion.cpp new file mode 100644 index 0000000..0b012db --- /dev/null +++ b/src/computervision/HandDetectRegion.cpp @@ -0,0 +1,10 @@ + +#include "HandDetectRegion.h" +namespace computervision +{ + HandDetectRegion::HandDetectRegion() + { + } + + +} diff --git a/src/computervision/HandDetectRegion.h b/src/computervision/HandDetectRegion.h new file mode 100644 index 0000000..4bdadf2 --- /dev/null +++ b/src/computervision/HandDetectRegion.h @@ -0,0 +1,22 @@ +#pragma once + +#include +namespace computervision +{ + class HandDetectRegion + { + public: + HandDetectRegion(); + + cv::Mat GenerateHandMaskSquare(); + + void detectHand(cv::Mat camera_frame); + + private: + int start_x_pos; + int start_y_pos; + int height; + int width; + }; + +} diff --git a/src/computervision/ObjectDetection.cpp b/src/computervision/ObjectDetection.cpp index 1d36826..530e474 100644 --- a/src/computervision/ObjectDetection.cpp +++ b/src/computervision/ObjectDetection.cpp @@ -62,8 +62,9 @@ namespace computervision // draw the hand rectangle on the camera input, and draw text showing if the hand is open or closed. DrawHandMask(&camera_frame); - string hand_text = fingers_amount > 0 ? "open" : "closed"; - putText(camera_frame, hand_text, Point(10, 75), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 255), 3); + + + hand_calibrator.SetAmountOfFingers(fingers_amount); finger_count.DrawHandContours(camera_frame); hand_calibrator.DrawHandCalibrationText(camera_frame); imshow("camera", camera_frame); diff --git a/src/computervision/ObjectDetection.h b/src/computervision/ObjectDetection.h index 1b65e1f..92fc335 100644 --- a/src/computervision/ObjectDetection.h +++ b/src/computervision/ObjectDetection.h @@ -66,9 +66,27 @@ namespace computervision */ bool DrawHandMask(cv::Mat *input); + /** + * @brief checks if the hand of the user is open. + * + * @return true if the hand is open, false if not. + */ + bool IsHandOpen(); + + + /** + * @brief checks whether the hand is held within the detection square. + * + * @return true if the hand is in the detection square, false if not. + */ + bool IsHandPresent(); cv::VideoCapture GetCap(); + private: + bool is_hand_open; + bool is_hand_present; + }; diff --git a/src/computervision/calibration/HandCalibrator.cpp b/src/computervision/calibration/HandCalibrator.cpp index 9d91a46..36c64ea 100644 --- a/src/computervision/calibration/HandCalibrator.cpp +++ b/src/computervision/calibration/HandCalibrator.cpp @@ -7,10 +7,6 @@ namespace computervision namespace handcalibration { - static bool background_calibrated; - static bool skintone_calibrated; - static bool hand_present; - HandCalibrator::HandCalibrator() { @@ -18,16 +14,26 @@ namespace computervision void HandCalibrator::DrawHandCalibrationText(cv::Mat& output_frame) { - cv::rectangle(output_frame,cv::Rect(0, 0, output_frame.cols, 40),cv::Scalar(0,0,0),-1); - cv::putText(output_frame, "Hand calibration", cv::Point(output_frame.cols/2-100, 25), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(18, 219, 65), 2); + cv::rectangle(output_frame, cv::Rect(0, 0, output_frame.cols, 40), cv::Scalar(0, 0, 0), -1); + cv::putText(output_frame, "Hand calibration", cv::Point(output_frame.cols / 2 - 100, 25), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(18, 219, 65), 2); cv::putText(output_frame, "press 'b' to calibrate background,then press 's' to calibrate skin tone", cv::Point(5, 35), cv::FONT_HERSHEY_PLAIN, 1.0, cv::Scalar(18, 219, 65), 1); + cv::rectangle(output_frame, cv::Rect(0, output_frame.rows - 80, 450, output_frame.cols), cv::Scalar(0, 0, 0), -1); + cv::putText(output_frame, "hand in frame:", cv::Point(5, output_frame.rows - 50), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 255, 0), 1); - cv::rectangle(output_frame, cv::Rect(270, output_frame.rows - 70, 20, 20), hand_present ? cv::Scalar(0, 255, 0) : cv::Scalar(0,0,255), -1); + cv::rectangle(output_frame, cv::Rect(420, output_frame.rows - 67, 15, 15), hand_present ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 0, 255), -1); + cv::putText(output_frame, "background calibrated:", cv::Point(5, output_frame.rows - 30), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 255, 0), 1); + cv::rectangle(output_frame, cv::Rect(420, output_frame.rows - 47, 15, 15), background_calibrated ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 0, 255), -1); - cv::putText(output_frame, (background_calibrated ? "background calibrated" : "background not calibrated"), cv::Point(5, output_frame.rows-30), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 255, 0), 1); - cv::putText(output_frame, (skintone_calibrated ? "skincolor calibrated" : "skincolor not calibrated"), cv::Point(5, output_frame.rows-10), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 255, 0), 1); + cv::putText(output_frame, "skin color calibrated:", cv::Point(5, output_frame.rows - 10), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 255, 0), 1); + cv::rectangle(output_frame, cv::Rect(420, output_frame.rows - 27, 15, 15), skintone_calibrated ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 0, 255), -1); + + if (hand_present) + { + std::string hand_text = fingers_amount > 0 ? "open" : "closed"; + cv::putText(output_frame, hand_text, cv::Point(10, 75), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 0, 255), 3); + } } void HandCalibrator::SetSkinCalibration(bool val) @@ -45,6 +51,11 @@ namespace computervision hand_present = val; } + void HandCalibrator::SetAmountOfFingers(int amount) + { + fingers_amount = amount; + } + bool HandCalibrator::CheckIfHandPresent(cv::Mat input_image) { std::vector> points; diff --git a/src/computervision/calibration/HandCalibrator.h b/src/computervision/calibration/HandCalibrator.h index f402adb..9b2f7ae 100644 --- a/src/computervision/calibration/HandCalibrator.h +++ b/src/computervision/calibration/HandCalibrator.h @@ -47,6 +47,18 @@ namespace computervision */ bool CheckIfHandPresent(cv::Mat input_image); + /** + * @brief sets the amount of fingers that are currently detected. + * + * @param amount the amount of fingers. + */ + void SetAmountOfFingers(int amount); + private: + + bool background_calibrated; + bool skintone_calibrated; + bool hand_present; + int fingers_amount; }; } From 1e55736615593b22471ddb6fb7bcdacbba93ff82 Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Tue, 8 Jun 2021 13:17:07 +0200 Subject: [PATCH 19/24] [ADD] multiple hand detection squares --- src/computervision/HandDetectRegion.cpp | 97 ++++++++++++++++++++++++- src/computervision/HandDetectRegion.h | 34 +++++++-- src/computervision/SkinDetector.cpp | 70 ++++++++++++++++++ src/computervision/SkinDetector.h | 9 +++ src/scenes/startup_Scene.cpp | 33 ++++++++- wk2_fps.vcxproj | 2 + wk2_fps.vcxproj.filters | 2 + 7 files changed, 238 insertions(+), 9 deletions(-) diff --git a/src/computervision/HandDetectRegion.cpp b/src/computervision/HandDetectRegion.cpp index 0b012db..cf09f07 100644 --- a/src/computervision/HandDetectRegion.cpp +++ b/src/computervision/HandDetectRegion.cpp @@ -1,10 +1,105 @@ #include "HandDetectRegion.h" + namespace computervision { - HandDetectRegion::HandDetectRegion() + + HandDetectRegion::HandDetectRegion(std::string id,int x_pos, int y_pos, int width, int height) { + region_id = id; + start_x_pos = x_pos; + start_y_pos = y_pos; + region_width = width; + region_height = height; + hand_mask_generated = false; + hand_present = false; } + void HandDetectRegion::DetectHand(cv::Mat& camera_frame) + { + Mat input_frame = GenerateHandMaskSquare(camera_frame); + frame_out = input_frame.clone(); + + // detect skin color + skin_detector.drawSkinColorSampler(camera_frame,start_x_pos,start_y_pos,region_width,region_height); + + // remove background from image + foreground = background_remover.getForeground(input_frame); + + // detect the hand contours + handMask = skin_detector.getSkinMask(foreground); + + // count the amount of fingers and put the info on the matrix + //fingerCountDebug = finger_count.findFingersCount(handMask, frame_out); + + //// get the amount of fingers + //int fingers_amount = finger_count.getAmountOfFingers(); + + // draw the hand rectangle on the camera input, and draw text showing if the hand is open or closed. + DrawHandMask(&camera_frame); + + + //hand_calibrator.SetAmountOfFingers(fingers_amount); + //finger_count.DrawHandContours(camera_frame); + //hand_calibrator.DrawHandCalibrationText(camera_frame); + //imshow("camera", camera_frame); + + imshow("output" + region_id, frame_out); + imshow("foreground" + region_id, foreground); + imshow("handMask" + region_id, handMask); + /*imshow("handDetection", fingerCountDebug);*/ + + hand_present = hand_calibrator.CheckIfHandPresent(handMask); + std::string text = (hand_present ? "hand" : "no"); + cv::putText(camera_frame, text, cv::Point(start_x_pos, start_y_pos), cv::FONT_HERSHEY_COMPLEX, 2.0, cv::Scalar(0, 255, 255), 2); + hand_calibrator.SetHandPresent(hand_present); + + } + + cv::Mat HandDetectRegion::GenerateHandMaskSquare(cv::Mat img) + { + cv::Mat mask = cv::Mat::zeros(img.size(), img.type()); + cv::Mat distance_img = cv::Mat::zeros(img.size(), img.type()); + + cv::rectangle(mask, cv::Rect(start_x_pos, start_y_pos, region_width, region_height), cv::Scalar(255, 255, 255), -1); + + img.copyTo(distance_img, mask); + + hand_mask_generated = true; + return distance_img; + } + + bool HandDetectRegion::DrawHandMask(cv::Mat* input) + { + if (!hand_mask_generated) return false; + rectangle(*input, Rect(start_x_pos, start_y_pos, region_width, region_height), Scalar(255, 255, 255)); + return true; + } + + bool HandDetectRegion::IsHandPresent() + { + return hand_present; + } + + void HandDetectRegion::CalibrateBackground() + { + background_remover.calibrate(frame_out); + hand_calibrator.SetBackGroundCalibrated(true); + } + void HandDetectRegion::CalibrateSkin() + { + skin_detector.calibrate(frame_out); + hand_calibrator.SetSkinCalibration(true); + } + + std::vector HandDetectRegion::CalculateSkinTresholds() + { + return skin_detector.calibrateAndReturn(frame_out); + } + + void HandDetectRegion::setSkinTresholds(std::vector& tresholds) + { + skin_detector.setTresholds(tresholds); + } } diff --git a/src/computervision/HandDetectRegion.h b/src/computervision/HandDetectRegion.h index 4bdadf2..3594ff3 100644 --- a/src/computervision/HandDetectRegion.h +++ b/src/computervision/HandDetectRegion.h @@ -1,22 +1,46 @@ #pragma once #include +#include +#include "async/StaticCameraInstance.h" +#include "calibration/HandCalibrator.h" +#include "BackgroundRemover.h" +#include "SkinDetector.h" +#include "FingerCount.h" namespace computervision { class HandDetectRegion { public: - HandDetectRegion(); + HandDetectRegion(std::string id,int x_pos, int y_pos, int width, int height); - cv::Mat GenerateHandMaskSquare(); + cv::Mat GenerateHandMaskSquare(cv::Mat img); - void detectHand(cv::Mat camera_frame); + void DetectHand(cv::Mat& camera_frame); + + bool IsHandPresent(); + + void CalibrateBackground(); + void CalibrateSkin(); + + std::vector CalculateSkinTresholds(); + + void setSkinTresholds(std::vector& tresholds); private: int start_x_pos; int start_y_pos; - int height; - int width; + int region_height; + int region_width; + bool hand_mask_generated; + bool hand_present; + cv::Mat frame, frame_out, handMask, foreground, fingerCountDebug; + BackgroundRemover background_remover; + SkinDetector skin_detector; + handcalibration::HandCalibrator hand_calibrator; + std::string region_id; + + bool DrawHandMask(cv::Mat* input); }; } diff --git a/src/computervision/SkinDetector.cpp b/src/computervision/SkinDetector.cpp index 971ff2a..100f25f 100644 --- a/src/computervision/SkinDetector.cpp +++ b/src/computervision/SkinDetector.cpp @@ -1,4 +1,5 @@ #include "SkinDetector.h" +#include /* Author: Pierfrancesco Soffritti https://github.com/PierfrancescoSoffritti @@ -41,6 +42,29 @@ namespace computervision ); } + void SkinDetector::drawSkinColorSampler(Mat input,int x, int y,int width, int height) { + int frameWidth = width, frameHeight = height; + + int rectangleSize = 25; + Scalar rectangleColor = Scalar(0, 255, 255); + + skinColorSamplerRectangle1 = Rect(frameWidth / 5 + x, frameHeight / 2 + y, rectangleSize, rectangleSize); + skinColorSamplerRectangle2 = Rect(frameWidth / 5 + x, frameHeight / 3 + y, rectangleSize, rectangleSize); + + rectangle( + input, + skinColorSamplerRectangle1, + rectangleColor + ); + + rectangle( + input, + skinColorSamplerRectangle2, + rectangleColor + ); + } + + void SkinDetector::calibrate(Mat input) { Mat hsvInput; @@ -54,6 +78,19 @@ namespace computervision calibrated = true; } + std::vector SkinDetector::calibrateAndReturn(Mat input) + { + Mat hsvInput; + cvtColor(input, hsvInput, CV_BGR2HSV); + + Mat sample1 = Mat(hsvInput, skinColorSamplerRectangle1); + Mat sample2 = Mat(hsvInput, skinColorSamplerRectangle2); + + calibrated = true; + return calculateAndReturnTresholds(sample1, sample2); + + } + void SkinDetector::calculateThresholds(Mat sample1, Mat sample2) { int offsetLowThreshold = 80; int offsetHighThreshold = 30; @@ -75,6 +112,39 @@ namespace computervision //vHighThreshold = 255; } + std::vector SkinDetector::calculateAndReturnTresholds(Mat sample1, Mat sample2) + { + + calculateThresholds(sample1, sample2); + std::vector res; + res.push_back(hLowThreshold); + res.push_back(hHighThreshold); + res.push_back(sLowThreshold); + res.push_back(sHighThreshold); + res.push_back(vLowThreshold); + res.push_back(vHighThreshold); + return res; + } + + void SkinDetector::setTresholds(std::vector& tresholds) + { + if (tresholds.size() != 6) + { + std::cout << "tresholds array not the right size!" << std::endl; + return; + } + + hLowThreshold = tresholds[0]; + hHighThreshold = tresholds[1]; + sLowThreshold = tresholds[2]; + sHighThreshold = tresholds[3]; + vLowThreshold = tresholds[4]; + vHighThreshold = tresholds[5]; + + calibrated = true; + + } + Mat SkinDetector::getSkinMask(Mat input) { Mat skinMask; diff --git a/src/computervision/SkinDetector.h b/src/computervision/SkinDetector.h index c6cf158..02e9dfb 100644 --- a/src/computervision/SkinDetector.h +++ b/src/computervision/SkinDetector.h @@ -24,6 +24,9 @@ namespace computervision */ void drawSkinColorSampler(Mat input); + void drawSkinColorSampler(Mat input, int x, int y, int width, int heigth); + + /* * @brief calibrates the skin color detector with the given input frame * @@ -31,6 +34,10 @@ namespace computervision */ void calibrate(Mat input); + std::vector calibrateAndReturn(Mat input); + + void setTresholds(std::vector& tresholds); + /* * @brief gets the mask for the hand * @@ -63,6 +70,8 @@ namespace computervision */ void calculateThresholds(Mat sample1, Mat sample2); + std::vector calculateAndReturnTresholds(Mat sample1, Mat sample2); + /** * @brief the opening. it generates the structuring element and performs the morphological transformations required to detect the hand. * This needs to be done to get the skin mask. diff --git a/src/scenes/startup_Scene.cpp b/src/scenes/startup_Scene.cpp index e889139..dc9d12c 100644 --- a/src/scenes/startup_Scene.cpp +++ b/src/scenes/startup_Scene.cpp @@ -3,14 +3,20 @@ #include #include "startup_Scene.h" #include "../computervision/ObjectDetection.h" +#include "../computervision/HandDetectRegion.h" #include namespace scene { + std::vector regions; computervision::ObjectDetection objDetect; + computervision::HandDetectRegion reg1("left",20,100,150,150); + computervision::HandDetectRegion reg2("right",200,200,150,150); scene::Scenes scene::Startup_Scene::start(GLFWwindow *window) { + regions.push_back(reg1); + regions.push_back(reg2); while (return_value == scene::Scenes::STARTUP) { render(); @@ -30,9 +36,30 @@ namespace scene void scene::Startup_Scene::update(GLFWwindow* window) { - bool hand_detected = false; - objDetect.DetectHand(objDetect.ReadCamera(),hand_detected); - if (hand_detected) std::cout << "there's a hand!" << std::endl; + cv::Mat camera_frame = objDetect.ReadCamera(); + reg1.DetectHand(camera_frame); + reg2.DetectHand(camera_frame); + + cv::imshow("camera", camera_frame); + + int key = cv::waitKey(1); + + if (key == 98) // b, calibrate the background + { + for (int i = 0; i < regions.size(); i++) + { + regions[i].CalibrateBackground(); + } + } + else if (key == 115) // s, calibrate the skin color + { + std::vector tresholds = regions[0].CalculateSkinTresholds(); + for (int i = 1; i < regions.size(); i++) + { + regions[i].setSkinTresholds(tresholds); + } + + } } diff --git a/wk2_fps.vcxproj b/wk2_fps.vcxproj index 9658b78..4d0fabf 100644 --- a/wk2_fps.vcxproj +++ b/wk2_fps.vcxproj @@ -21,6 +21,7 @@ + @@ -46,6 +47,7 @@ + diff --git a/wk2_fps.vcxproj.filters b/wk2_fps.vcxproj.filters index de921ef..3e9201f 100644 --- a/wk2_fps.vcxproj.filters +++ b/wk2_fps.vcxproj.filters @@ -23,6 +23,7 @@ + @@ -54,6 +55,7 @@ + From cadee7d8e9a2a4fc5db6010f8ea996f8dbdf4a8b Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Tue, 8 Jun 2021 13:38:47 +0200 Subject: [PATCH 20/24] [ADD] hand detection type enum --- src/computervision/HandDetectRegion.cpp | 19 ++++-------- .../calibration/HandCalibrator.cpp | 11 +++++-- .../calibration/HandCalibrator.h | 11 ++++++- src/scenes/in_Game_Scene.cpp | 29 ++++++++++++++++++ src/scenes/in_Game_Scene.h | 1 + src/scenes/startup_Scene.cpp | 30 ------------------- 6 files changed, 53 insertions(+), 48 deletions(-) diff --git a/src/computervision/HandDetectRegion.cpp b/src/computervision/HandDetectRegion.cpp index cf09f07..f789db6 100644 --- a/src/computervision/HandDetectRegion.cpp +++ b/src/computervision/HandDetectRegion.cpp @@ -29,23 +29,11 @@ namespace computervision // detect the hand contours handMask = skin_detector.getSkinMask(foreground); - // count the amount of fingers and put the info on the matrix - //fingerCountDebug = finger_count.findFingersCount(handMask, frame_out); - - //// get the amount of fingers - //int fingers_amount = finger_count.getAmountOfFingers(); - // draw the hand rectangle on the camera input, and draw text showing if the hand is open or closed. DrawHandMask(&camera_frame); - - //hand_calibrator.SetAmountOfFingers(fingers_amount); - //finger_count.DrawHandContours(camera_frame); - //hand_calibrator.DrawHandCalibrationText(camera_frame); - //imshow("camera", camera_frame); - - imshow("output" + region_id, frame_out); - imshow("foreground" + region_id, foreground); + //imshow("output" + region_id, frame_out); + //imshow("foreground" + region_id, foreground); imshow("handMask" + region_id, handMask); /*imshow("handDetection", fingerCountDebug);*/ @@ -83,6 +71,7 @@ namespace computervision void HandDetectRegion::CalibrateBackground() { + std::cout << "calibrating background " << region_id << std::endl; background_remover.calibrate(frame_out); hand_calibrator.SetBackGroundCalibrated(true); } @@ -94,11 +83,13 @@ namespace computervision std::vector HandDetectRegion::CalculateSkinTresholds() { + std::cout << "calibrating skin " << region_id << std::endl; return skin_detector.calibrateAndReturn(frame_out); } void HandDetectRegion::setSkinTresholds(std::vector& tresholds) { + std::cout << "setting skin " << region_id << std::endl; skin_detector.setTresholds(tresholds); } diff --git a/src/computervision/calibration/HandCalibrator.cpp b/src/computervision/calibration/HandCalibrator.cpp index 36c64ea..86f3aa5 100644 --- a/src/computervision/calibration/HandCalibrator.cpp +++ b/src/computervision/calibration/HandCalibrator.cpp @@ -1,7 +1,8 @@ #include "HandCalibrator.h" -#define MIN_HAND_SIZE 10000 +#define MIN_MENU_HAND_SIZE 10000 +#define MIN_GAME_HAND_SIZE 10000 // todo change namespace computervision { namespace handcalibration @@ -56,7 +57,7 @@ namespace computervision fingers_amount = amount; } - bool HandCalibrator::CheckIfHandPresent(cv::Mat input_image) + bool HandCalibrator::CheckIfHandPresent(cv::Mat input_image, HandDetectionType type) { std::vector> points; cv::findContours(input_image, points, cv::RetrievalModes::RETR_LIST, cv::ContourApproximationModes::CHAIN_APPROX_SIMPLE); @@ -66,7 +67,11 @@ namespace computervision for (int p = 0; p < points.size(); p++) { int area = cv::contourArea(points[p]); - if (area > MIN_HAND_SIZE) return true; + if (type == handcalibration::HandDetectionType::MENU) + if (area > MIN_MENU_HAND_SIZE) return true; + + if (type == handcalibration::HandDetectionType::GAME) + if (area > MIN_GAME_HAND_SIZE) return true; } return false; diff --git a/src/computervision/calibration/HandCalibrator.h b/src/computervision/calibration/HandCalibrator.h index 9b2f7ae..fadc66d 100644 --- a/src/computervision/calibration/HandCalibrator.h +++ b/src/computervision/calibration/HandCalibrator.h @@ -7,11 +7,19 @@ namespace computervision { namespace handcalibration { + enum class HandDetectionType + { + MENU, + GAME + }; + class HandCalibrator { public: HandCalibrator(); + + /** * @brief draws the text to show the status of the calibration on the image * @@ -45,7 +53,7 @@ namespace computervision * * @param input_image the input image to check. */ - bool CheckIfHandPresent(cv::Mat input_image); + bool CheckIfHandPresent(cv::Mat input_image, HandDetectionType type); /** * @brief sets the amount of fingers that are currently detected. @@ -53,6 +61,7 @@ namespace computervision * @param amount the amount of fingers. */ void SetAmountOfFingers(int amount); + private: bool background_calibrated; diff --git a/src/scenes/in_Game_Scene.cpp b/src/scenes/in_Game_Scene.cpp index 0e7c268..bbadf93 100644 --- a/src/scenes/in_Game_Scene.cpp +++ b/src/scenes/in_Game_Scene.cpp @@ -10,6 +10,9 @@ #include "../renderEngine/renderer.h" #include "../shaders/entity_shader.h" #include "../toolbox/toolbox.h" +#include +#include "../computervision/HandDetectRegion.h" +#include "../computervision/ObjectDetection.h" namespace scene @@ -23,6 +26,11 @@ namespace scene entities::Camera camera(glm::vec3(0, 0, 0), glm::vec3(0, 0, 0)); std::vector guis; + std::vector regions; + computervision::ObjectDetection objDetect; + computervision::HandDetectRegion reg1("left", 20, 100, 150, 150); + computervision::HandDetectRegion reg2("right", 200, 200, 150, 150); + In_Game_Scene::In_Game_Scene() { @@ -105,6 +113,7 @@ namespace scene void scene::In_Game_Scene::update(GLFWwindow* window) { camera.Move(window); + update_hand_detection(); } void scene::In_Game_Scene::onKey(GLFWwindow* window, int key, int scancode, int action, int mods) @@ -113,6 +122,26 @@ namespace scene { return_value = scene::Scenes::STOP; } + + if (glfwGetKey(window, GLFW_KEY_B) == GLFW_PRESS) + { + reg1.CalibrateBackground(); + reg2.CalibrateBackground(); + } + + if (glfwGetKey(window, GLFW_KEY_S) == GLFW_PRESS) + { + std::vector tresholds = reg1.CalculateSkinTresholds(); + reg2.setSkinTresholds(tresholds); + } } + void scene::In_Game_Scene::update_hand_detection() + { + cv::Mat camera_frame = objDetect.ReadCamera(); + reg1.DetectHand(camera_frame); + reg2.DetectHand(camera_frame); + + cv::imshow("camera", camera_frame); + } } diff --git a/src/scenes/in_Game_Scene.h b/src/scenes/in_Game_Scene.h index 4581855..cb420d3 100644 --- a/src/scenes/in_Game_Scene.h +++ b/src/scenes/in_Game_Scene.h @@ -8,6 +8,7 @@ namespace scene { private: scene::Scenes return_value = scene::Scenes::INGAME; + void update_hand_detection(); public: In_Game_Scene(); diff --git a/src/scenes/startup_Scene.cpp b/src/scenes/startup_Scene.cpp index dc9d12c..7ef51db 100644 --- a/src/scenes/startup_Scene.cpp +++ b/src/scenes/startup_Scene.cpp @@ -8,15 +8,9 @@ namespace scene { - std::vector regions; - computervision::ObjectDetection objDetect; - computervision::HandDetectRegion reg1("left",20,100,150,150); - computervision::HandDetectRegion reg2("right",200,200,150,150); scene::Scenes scene::Startup_Scene::start(GLFWwindow *window) { - regions.push_back(reg1); - regions.push_back(reg2); while (return_value == scene::Scenes::STARTUP) { render(); @@ -36,30 +30,6 @@ namespace scene void scene::Startup_Scene::update(GLFWwindow* window) { - cv::Mat camera_frame = objDetect.ReadCamera(); - reg1.DetectHand(camera_frame); - reg2.DetectHand(camera_frame); - - cv::imshow("camera", camera_frame); - - int key = cv::waitKey(1); - - if (key == 98) // b, calibrate the background - { - for (int i = 0; i < regions.size(); i++) - { - regions[i].CalibrateBackground(); - } - } - else if (key == 115) // s, calibrate the skin color - { - std::vector tresholds = regions[0].CalculateSkinTresholds(); - for (int i = 1; i < regions.size(); i++) - { - regions[i].setSkinTresholds(tresholds); - } - - } } From 5e137faef5ead4894838780f1a3475205a4602fe Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Tue, 8 Jun 2021 14:48:46 +0200 Subject: [PATCH 21/24] [ADD] up left and right detection regions --- src/computervision/HandDetectRegion.cpp | 2 +- src/computervision/HandDetectRegion.h | 10 +++++++ src/computervision/ObjectDetection.cpp | 2 +- .../calibration/HandCalibrator.cpp | 5 +++- src/scenes/in_Game_Scene.cpp | 28 +++++++++++++------ 5 files changed, 36 insertions(+), 11 deletions(-) diff --git a/src/computervision/HandDetectRegion.cpp b/src/computervision/HandDetectRegion.cpp index f789db6..9c451a9 100644 --- a/src/computervision/HandDetectRegion.cpp +++ b/src/computervision/HandDetectRegion.cpp @@ -37,7 +37,7 @@ namespace computervision imshow("handMask" + region_id, handMask); /*imshow("handDetection", fingerCountDebug);*/ - hand_present = hand_calibrator.CheckIfHandPresent(handMask); + hand_present = hand_calibrator.CheckIfHandPresent(handMask,handcalibration::HandDetectionType::GAME); std::string text = (hand_present ? "hand" : "no"); cv::putText(camera_frame, text, cv::Point(start_x_pos, start_y_pos), cv::FONT_HERSHEY_COMPLEX, 2.0, cv::Scalar(0, 255, 255), 2); hand_calibrator.SetHandPresent(hand_present); diff --git a/src/computervision/HandDetectRegion.h b/src/computervision/HandDetectRegion.h index 3594ff3..7cc1a9a 100644 --- a/src/computervision/HandDetectRegion.h +++ b/src/computervision/HandDetectRegion.h @@ -14,6 +14,16 @@ namespace computervision public: HandDetectRegion(std::string id,int x_pos, int y_pos, int width, int height); + void SetXPos(int x) { start_x_pos = x; } + void SetYPos(int y) { start_y_pos = y; } + int GetXPos() { return start_x_pos; } + int GetYPos() { return start_y_pos; } + + void SetWidth(int width) { region_width = width; } + void SetHeigth(int height) { region_height = height; } + int GetWidth() { return region_width; } + int GetHeight() { return region_height; } + cv::Mat GenerateHandMaskSquare(cv::Mat img); void DetectHand(cv::Mat& camera_frame); diff --git a/src/computervision/ObjectDetection.cpp b/src/computervision/ObjectDetection.cpp index 530e474..155512e 100644 --- a/src/computervision/ObjectDetection.cpp +++ b/src/computervision/ObjectDetection.cpp @@ -74,7 +74,7 @@ namespace computervision imshow("handMask", handMask); imshow("handDetection", fingerCountDebug);*/ - hand_present = hand_calibrator.CheckIfHandPresent(handMask); + hand_present = hand_calibrator.CheckIfHandPresent(handMask,handcalibration::HandDetectionType::MENU); hand_calibrator.SetHandPresent(hand_present); diff --git a/src/computervision/calibration/HandCalibrator.cpp b/src/computervision/calibration/HandCalibrator.cpp index 86f3aa5..caa4d03 100644 --- a/src/computervision/calibration/HandCalibrator.cpp +++ b/src/computervision/calibration/HandCalibrator.cpp @@ -1,8 +1,9 @@ #include "HandCalibrator.h" +#include #define MIN_MENU_HAND_SIZE 10000 -#define MIN_GAME_HAND_SIZE 10000 // todo change +#define MIN_GAME_HAND_SIZE 4000 // todo change namespace computervision { namespace handcalibration @@ -64,9 +65,11 @@ namespace computervision if (points.size() == 0) return false; + std::cout << std::endl; for (int p = 0; p < points.size(); p++) { int area = cv::contourArea(points[p]); + std::cout << area << std::endl; if (type == handcalibration::HandDetectionType::MENU) if (area > MIN_MENU_HAND_SIZE) return true; diff --git a/src/scenes/in_Game_Scene.cpp b/src/scenes/in_Game_Scene.cpp index bbadf93..0eede19 100644 --- a/src/scenes/in_Game_Scene.cpp +++ b/src/scenes/in_Game_Scene.cpp @@ -26,10 +26,10 @@ namespace scene entities::Camera camera(glm::vec3(0, 0, 0), glm::vec3(0, 0, 0)); std::vector guis; + std::vector regions; computervision::ObjectDetection objDetect; - computervision::HandDetectRegion reg1("left", 20, 100, 150, 150); - computervision::HandDetectRegion reg2("right", 200, 200, 150, 150); + computervision::HandDetectRegion reg_left("left", 0, 0, 150, 150), reg_right("right", 0, 0, 150, 150), reg_up("up", 0, 0, 150, 150); In_Game_Scene::In_Game_Scene() @@ -44,6 +44,15 @@ namespace scene scene::Scenes scene::In_Game_Scene::start(GLFWwindow* window) { + cv::Mat camera_frame = objDetect.ReadCamera(); // get camera frame to know the width and heigth + reg_left.SetXPos(10); + reg_left.SetYPos(camera_frame.rows / 2 - reg_left.GetHeight()/2); + reg_right.SetXPos(camera_frame.cols - 10 - reg_right.GetWidth()); + reg_right.SetYPos(camera_frame.rows / 2 - reg_right.GetHeight()/2); + reg_up.SetXPos(camera_frame.cols / 2 - reg_up.GetWidth() / 2); + reg_up.SetYPos(10); + + raw_model = render_engine::LoadObjModel("res/House.obj"); texture = { render_engine::loader::LoadTexture("res/Texture.png") }; texture.shine_damper = 10; @@ -125,22 +134,25 @@ namespace scene if (glfwGetKey(window, GLFW_KEY_B) == GLFW_PRESS) { - reg1.CalibrateBackground(); - reg2.CalibrateBackground(); + reg_left.CalibrateBackground(); + reg_right.CalibrateBackground(); + reg_up.CalibrateBackground(); } if (glfwGetKey(window, GLFW_KEY_S) == GLFW_PRESS) { - std::vector tresholds = reg1.CalculateSkinTresholds(); - reg2.setSkinTresholds(tresholds); + std::vector tresholds = reg_left.CalculateSkinTresholds(); + reg_right.setSkinTresholds(tresholds); + reg_up.setSkinTresholds(tresholds); } } void scene::In_Game_Scene::update_hand_detection() { cv::Mat camera_frame = objDetect.ReadCamera(); - reg1.DetectHand(camera_frame); - reg2.DetectHand(camera_frame); + reg_left.DetectHand(camera_frame); + reg_right.DetectHand(camera_frame); + reg_up.DetectHand(camera_frame); cv::imshow("camera", camera_frame); } From 27594d466b405fe13a0798401e22304d8e7796c3 Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Tue, 8 Jun 2021 15:11:54 +0200 Subject: [PATCH 22/24] [ADD] better info on camera --- src/computervision/HandDetectRegion.cpp | 13 +++++++++--- .../calibration/HandCalibrator.cpp | 21 ++++++++++++------- .../calibration/HandCalibrator.h | 2 ++ src/scenes/in_Game_Scene.cpp | 1 + 4 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/computervision/HandDetectRegion.cpp b/src/computervision/HandDetectRegion.cpp index 9c451a9..3ef6c28 100644 --- a/src/computervision/HandDetectRegion.cpp +++ b/src/computervision/HandDetectRegion.cpp @@ -38,10 +38,15 @@ namespace computervision /*imshow("handDetection", fingerCountDebug);*/ hand_present = hand_calibrator.CheckIfHandPresent(handMask,handcalibration::HandDetectionType::GAME); - std::string text = (hand_present ? "hand" : "no"); - cv::putText(camera_frame, text, cv::Point(start_x_pos, start_y_pos), cv::FONT_HERSHEY_COMPLEX, 2.0, cv::Scalar(0, 255, 255), 2); + //std::string text = (hand_present ? "hand" : "no"); + //cv::putText(camera_frame, text, cv::Point(start_x_pos, start_y_pos), cv::FONT_HERSHEY_COMPLEX, 2.0, cv::Scalar(0, 255, 255), 2); hand_calibrator.SetHandPresent(hand_present); + //draw black rectangle behind calibration information text + cv::rectangle(camera_frame, cv::Rect(0, camera_frame.rows - 55, 450, camera_frame.cols), cv::Scalar(0, 0, 0), -1); + + hand_calibrator.DrawBackgroundSkinCalibrated(camera_frame); + } cv::Mat HandDetectRegion::GenerateHandMaskSquare(cv::Mat img) @@ -60,7 +65,7 @@ namespace computervision bool HandDetectRegion::DrawHandMask(cv::Mat* input) { if (!hand_mask_generated) return false; - rectangle(*input, Rect(start_x_pos, start_y_pos, region_width, region_height), Scalar(255, 255, 255)); + rectangle(*input, Rect(start_x_pos, start_y_pos, region_width, region_height), (hand_present ? Scalar(0, 255, 0) : Scalar(0,0,255)),2); return true; } @@ -84,6 +89,7 @@ namespace computervision std::vector HandDetectRegion::CalculateSkinTresholds() { std::cout << "calibrating skin " << region_id << std::endl; + hand_calibrator.SetSkinCalibration(true); return skin_detector.calibrateAndReturn(frame_out); } @@ -91,6 +97,7 @@ namespace computervision { std::cout << "setting skin " << region_id << std::endl; skin_detector.setTresholds(tresholds); + hand_calibrator.SetSkinCalibration(true); } } diff --git a/src/computervision/calibration/HandCalibrator.cpp b/src/computervision/calibration/HandCalibrator.cpp index caa4d03..dcf2911 100644 --- a/src/computervision/calibration/HandCalibrator.cpp +++ b/src/computervision/calibration/HandCalibrator.cpp @@ -3,7 +3,7 @@ #include #define MIN_MENU_HAND_SIZE 10000 -#define MIN_GAME_HAND_SIZE 4000 // todo change +#define MIN_GAME_HAND_SIZE 3000 // todo change namespace computervision { namespace handcalibration @@ -25,11 +25,7 @@ namespace computervision cv::putText(output_frame, "hand in frame:", cv::Point(5, output_frame.rows - 50), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 255, 0), 1); cv::rectangle(output_frame, cv::Rect(420, output_frame.rows - 67, 15, 15), hand_present ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 0, 255), -1); - cv::putText(output_frame, "background calibrated:", cv::Point(5, output_frame.rows - 30), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 255, 0), 1); - cv::rectangle(output_frame, cv::Rect(420, output_frame.rows - 47, 15, 15), background_calibrated ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 0, 255), -1); - - cv::putText(output_frame, "skin color calibrated:", cv::Point(5, output_frame.rows - 10), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 255, 0), 1); - cv::rectangle(output_frame, cv::Rect(420, output_frame.rows - 27, 15, 15), skintone_calibrated ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 0, 255), -1); + DrawBackgroundSkinCalibrated(output_frame); if (hand_present) { @@ -38,6 +34,16 @@ namespace computervision } } + void HandCalibrator::DrawBackgroundSkinCalibrated(cv::Mat& output_frame) + { + + cv::putText(output_frame, "background calibrated:", cv::Point(5, output_frame.rows - 30), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 255, 0), 1); + cv::rectangle(output_frame, cv::Rect(420, output_frame.rows - 47, 15, 15), background_calibrated ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 0, 255), -1); + + cv::putText(output_frame, "skin color calibrated:", cv::Point(5, output_frame.rows - 10), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 255, 0), 1); + cv::rectangle(output_frame, cv::Rect(420, output_frame.rows - 27, 15, 15), skintone_calibrated ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 0, 255), -1); + } + void HandCalibrator::SetSkinCalibration(bool val) { skintone_calibrated = val; @@ -65,11 +71,10 @@ namespace computervision if (points.size() == 0) return false; - std::cout << std::endl; for (int p = 0; p < points.size(); p++) { int area = cv::contourArea(points[p]); - std::cout << area << std::endl; + if (type == handcalibration::HandDetectionType::MENU) if (area > MIN_MENU_HAND_SIZE) return true; diff --git a/src/computervision/calibration/HandCalibrator.h b/src/computervision/calibration/HandCalibrator.h index fadc66d..ca71fde 100644 --- a/src/computervision/calibration/HandCalibrator.h +++ b/src/computervision/calibration/HandCalibrator.h @@ -62,6 +62,8 @@ namespace computervision */ void SetAmountOfFingers(int amount); + void DrawBackgroundSkinCalibrated(cv::Mat& output_frame); + private: bool background_calibrated; diff --git a/src/scenes/in_Game_Scene.cpp b/src/scenes/in_Game_Scene.cpp index 0eede19..44db2e9 100644 --- a/src/scenes/in_Game_Scene.cpp +++ b/src/scenes/in_Game_Scene.cpp @@ -44,6 +44,7 @@ namespace scene scene::Scenes scene::In_Game_Scene::start(GLFWwindow* window) { + // set up squares according to size of camera input cv::Mat camera_frame = objDetect.ReadCamera(); // get camera frame to know the width and heigth reg_left.SetXPos(10); reg_left.SetYPos(camera_frame.rows / 2 - reg_left.GetHeight()/2); From 88252f4dc8be82211652f2fdbd075623e1bff122 Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Tue, 8 Jun 2021 15:35:03 +0200 Subject: [PATCH 23/24] [ADD] static skin treshold --- src/computervision/HandDetectRegion.cpp | 4 +++- src/computervision/HandDetectRegion.h | 1 + src/computervision/ObjectDetection.cpp | 11 +++++++++-- src/computervision/calibration/StaticSkinTreshold.h | 10 ++++++++++ src/scenes/in_Game_Scene.cpp | 1 + 5 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 src/computervision/calibration/StaticSkinTreshold.h diff --git a/src/computervision/HandDetectRegion.cpp b/src/computervision/HandDetectRegion.cpp index 3ef6c28..87f3538 100644 --- a/src/computervision/HandDetectRegion.cpp +++ b/src/computervision/HandDetectRegion.cpp @@ -34,7 +34,7 @@ namespace computervision //imshow("output" + region_id, frame_out); //imshow("foreground" + region_id, foreground); - imshow("handMask" + region_id, handMask); + //imshow("handMask" + region_id, handMask); /*imshow("handDetection", fingerCountDebug);*/ hand_present = hand_calibrator.CheckIfHandPresent(handMask,handcalibration::HandDetectionType::GAME); @@ -47,6 +47,8 @@ namespace computervision hand_calibrator.DrawBackgroundSkinCalibrated(camera_frame); + + } cv::Mat HandDetectRegion::GenerateHandMaskSquare(cv::Mat img) diff --git a/src/computervision/HandDetectRegion.h b/src/computervision/HandDetectRegion.h index 7cc1a9a..067badd 100644 --- a/src/computervision/HandDetectRegion.h +++ b/src/computervision/HandDetectRegion.h @@ -7,6 +7,7 @@ #include "BackgroundRemover.h" #include "SkinDetector.h" #include "FingerCount.h" +#include "calibration/StaticSkinTreshold.h" namespace computervision { class HandDetectRegion diff --git a/src/computervision/ObjectDetection.cpp b/src/computervision/ObjectDetection.cpp index 155512e..ad3e13e 100644 --- a/src/computervision/ObjectDetection.cpp +++ b/src/computervision/ObjectDetection.cpp @@ -10,6 +10,8 @@ #include "async/StaticCameraInstance.h" #include "calibration/HandCalibrator.h" +#include "calibration/StaticSkinTreshold.h" + namespace computervision { @@ -62,7 +64,6 @@ namespace computervision // draw the hand rectangle on the camera input, and draw text showing if the hand is open or closed. DrawHandMask(&camera_frame); - hand_calibrator.SetAmountOfFingers(fingers_amount); finger_count.DrawHandContours(camera_frame); @@ -88,7 +89,13 @@ namespace computervision } else if (key == 115) // s, calibrate the skin color { - skin_detector.calibrate(input_frame); + std::vector treshold = skin_detector.calibrateAndReturn(input_frame); + StaticSkinTreshold::hLowThreshold = treshold[0]; + StaticSkinTreshold::hHighThreshold = treshold[1]; + StaticSkinTreshold::sLowThreshold = treshold[2]; + StaticSkinTreshold::sHighThreshold = treshold[3]; + StaticSkinTreshold::vLowThreshold = treshold[4]; + StaticSkinTreshold::vHighThreshold = treshold[5]; hand_calibrator.SetSkinCalibration(true); } diff --git a/src/computervision/calibration/StaticSkinTreshold.h b/src/computervision/calibration/StaticSkinTreshold.h new file mode 100644 index 0000000..cfce1f7 --- /dev/null +++ b/src/computervision/calibration/StaticSkinTreshold.h @@ -0,0 +1,10 @@ +#pragma once +namespace StaticSkinTreshold +{ + static int hLowThreshold, + hHighThreshold, + sLowThreshold, + sHighThreshold, + vLowThreshold, + vHighThreshold; +}; diff --git a/src/scenes/in_Game_Scene.cpp b/src/scenes/in_Game_Scene.cpp index 44db2e9..48d099b 100644 --- a/src/scenes/in_Game_Scene.cpp +++ b/src/scenes/in_Game_Scene.cpp @@ -130,6 +130,7 @@ namespace scene { if (glfwGetKey(window, GLFW_KEY_SPACE) == GLFW_PRESS) { + cv::destroyWindow("camera"); return_value = scene::Scenes::STOP; } From a8996f63ef50177615893c7c88e220b35c7b2ba2 Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Tue, 8 Jun 2021 16:06:46 +0200 Subject: [PATCH 24/24] [ADD] start game scene fingers --- src/computervision/HandDetectRegion.h | 1 - src/computervision/ObjectDetection.cpp | 13 ++++--------- src/computervision/calibration/StaticSkinTreshold.h | 10 ---------- src/scenes/in_Game_Scene.cpp | 7 ++++--- src/scenes/startup_Scene.cpp | 6 ++++-- wk2_fps.vcxproj | 1 + wk2_fps.vcxproj.filters | 1 + 7 files changed, 14 insertions(+), 25 deletions(-) delete mode 100644 src/computervision/calibration/StaticSkinTreshold.h diff --git a/src/computervision/HandDetectRegion.h b/src/computervision/HandDetectRegion.h index 067badd..7cc1a9a 100644 --- a/src/computervision/HandDetectRegion.h +++ b/src/computervision/HandDetectRegion.h @@ -7,7 +7,6 @@ #include "BackgroundRemover.h" #include "SkinDetector.h" #include "FingerCount.h" -#include "calibration/StaticSkinTreshold.h" namespace computervision { class HandDetectRegion diff --git a/src/computervision/ObjectDetection.cpp b/src/computervision/ObjectDetection.cpp index ad3e13e..829953c 100644 --- a/src/computervision/ObjectDetection.cpp +++ b/src/computervision/ObjectDetection.cpp @@ -10,8 +10,6 @@ #include "async/StaticCameraInstance.h" #include "calibration/HandCalibrator.h" -#include "calibration/StaticSkinTreshold.h" - namespace computervision { @@ -64,12 +62,15 @@ namespace computervision // draw the hand rectangle on the camera input, and draw text showing if the hand is open or closed. DrawHandMask(&camera_frame); + hand_calibrator.SetAmountOfFingers(fingers_amount); finger_count.DrawHandContours(camera_frame); hand_calibrator.DrawHandCalibrationText(camera_frame); imshow("camera", camera_frame); + + /*imshow("output", frame_out); imshow("foreground", foreground); imshow("handMask", handMask); @@ -89,13 +90,7 @@ namespace computervision } else if (key == 115) // s, calibrate the skin color { - std::vector treshold = skin_detector.calibrateAndReturn(input_frame); - StaticSkinTreshold::hLowThreshold = treshold[0]; - StaticSkinTreshold::hHighThreshold = treshold[1]; - StaticSkinTreshold::sLowThreshold = treshold[2]; - StaticSkinTreshold::sHighThreshold = treshold[3]; - StaticSkinTreshold::vLowThreshold = treshold[4]; - StaticSkinTreshold::vHighThreshold = treshold[5]; + skin_detector.calibrate(input_frame); hand_calibrator.SetSkinCalibration(true); } diff --git a/src/computervision/calibration/StaticSkinTreshold.h b/src/computervision/calibration/StaticSkinTreshold.h deleted file mode 100644 index cfce1f7..0000000 --- a/src/computervision/calibration/StaticSkinTreshold.h +++ /dev/null @@ -1,10 +0,0 @@ -#pragma once -namespace StaticSkinTreshold -{ - static int hLowThreshold, - hHighThreshold, - sLowThreshold, - sHighThreshold, - vLowThreshold, - vHighThreshold; -}; diff --git a/src/scenes/in_Game_Scene.cpp b/src/scenes/in_Game_Scene.cpp index 48d099b..f0d5946 100644 --- a/src/scenes/in_Game_Scene.cpp +++ b/src/scenes/in_Game_Scene.cpp @@ -28,7 +28,6 @@ namespace scene std::vector regions; - computervision::ObjectDetection objDetect; computervision::HandDetectRegion reg_left("left", 0, 0, 150, 150), reg_right("right", 0, 0, 150, 150), reg_up("up", 0, 0, 150, 150); @@ -45,7 +44,8 @@ namespace scene scene::Scenes scene::In_Game_Scene::start(GLFWwindow* window) { // set up squares according to size of camera input - cv::Mat camera_frame = objDetect.ReadCamera(); // get camera frame to know the width and heigth + cv::Mat camera_frame; + static_camera::getCap().read(camera_frame); // get camera frame to know the width and heigth reg_left.SetXPos(10); reg_left.SetYPos(camera_frame.rows / 2 - reg_left.GetHeight()/2); reg_right.SetXPos(camera_frame.cols - 10 - reg_right.GetWidth()); @@ -151,7 +151,8 @@ namespace scene void scene::In_Game_Scene::update_hand_detection() { - cv::Mat camera_frame = objDetect.ReadCamera(); + cv::Mat camera_frame; + static_camera::getCap().read(camera_frame); reg_left.DetectHand(camera_frame); reg_right.DetectHand(camera_frame); reg_up.DetectHand(camera_frame); diff --git a/src/scenes/startup_Scene.cpp b/src/scenes/startup_Scene.cpp index 7ef51db..a492c18 100644 --- a/src/scenes/startup_Scene.cpp +++ b/src/scenes/startup_Scene.cpp @@ -8,7 +8,7 @@ namespace scene { - + computervision::ObjectDetection objDetect; scene::Scenes scene::Startup_Scene::start(GLFWwindow *window) { while (return_value == scene::Scenes::STARTUP) @@ -30,7 +30,8 @@ namespace scene void scene::Startup_Scene::update(GLFWwindow* window) { - + bool hand_present; + objDetect.DetectHand(objDetect.ReadCamera(),hand_present); } void scene::Startup_Scene::onKey(GLFWwindow* window, int key, int scancode, int action, int mods) @@ -38,6 +39,7 @@ namespace scene if (glfwGetKey(window, GLFW_KEY_SPACE) == GLFW_PRESS) { return_value = scene::Scenes::INGAME; + cv::destroyWindow("camera"); } } } diff --git a/wk2_fps.vcxproj b/wk2_fps.vcxproj index 4d0fabf..28b43a2 100644 --- a/wk2_fps.vcxproj +++ b/wk2_fps.vcxproj @@ -47,6 +47,7 @@ + diff --git a/wk2_fps.vcxproj.filters b/wk2_fps.vcxproj.filters index 3e9201f..abb170d 100644 --- a/wk2_fps.vcxproj.filters +++ b/wk2_fps.vcxproj.filters @@ -56,6 +56,7 @@ +