diff --git a/res/pose/coco/pose_deploy_linevec.prototxt b/res/pose/coco/pose_deploy_linevec.prototxt new file mode 100644 index 0000000..90a54fd --- /dev/null +++ b/res/pose/coco/pose_deploy_linevec.prototxt @@ -0,0 +1,2976 @@ +input: "image" +input_dim: 1 +input_dim: 3 +input_dim: 1 # This value will be defined at runtime +input_dim: 1 # This value will be defined at runtime +layer { + name: "conv1_1" + type: "Convolution" + bottom: "image" + top: "conv1_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu1_1" + type: "ReLU" + bottom: "conv1_1" + top: "conv1_1" +} +layer { + name: "conv1_2" + type: "Convolution" + bottom: "conv1_1" + top: "conv1_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu1_2" + type: "ReLU" + bottom: "conv1_2" + top: "conv1_2" +} +layer { + name: "pool1_stage1" + type: "Pooling" + bottom: "conv1_2" + top: "pool1_stage1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv2_1" + type: "Convolution" + bottom: "pool1_stage1" + top: "conv2_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu2_1" + type: "ReLU" + bottom: "conv2_1" + top: "conv2_1" +} +layer { + name: "conv2_2" + type: "Convolution" + bottom: "conv2_1" + top: "conv2_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu2_2" + type: "ReLU" + bottom: "conv2_2" + top: "conv2_2" +} +layer { + name: "pool2_stage1" + type: "Pooling" + bottom: "conv2_2" + top: "pool2_stage1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv3_1" + type: "Convolution" + bottom: "pool2_stage1" + top: "conv3_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu3_1" + type: "ReLU" + bottom: "conv3_1" + top: "conv3_1" +} +layer { + name: "conv3_2" + type: "Convolution" + bottom: "conv3_1" + top: "conv3_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu3_2" + type: "ReLU" + bottom: "conv3_2" + top: "conv3_2" +} +layer { + name: "conv3_3" + type: "Convolution" + bottom: "conv3_2" + top: "conv3_3" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu3_3" + type: "ReLU" + bottom: "conv3_3" + top: "conv3_3" +} +layer { + name: "conv3_4" + type: "Convolution" + bottom: "conv3_3" + top: "conv3_4" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu3_4" + type: "ReLU" + bottom: "conv3_4" + top: "conv3_4" +} +layer { + name: "pool3_stage1" + type: "Pooling" + bottom: "conv3_4" + top: "pool3_stage1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv4_1" + type: "Convolution" + bottom: "pool3_stage1" + top: "conv4_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu4_1" + type: "ReLU" + bottom: "conv4_1" + top: "conv4_1" +} +layer { + name: "conv4_2" + type: "Convolution" + bottom: "conv4_1" + top: "conv4_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu4_2" + type: "ReLU" + bottom: "conv4_2" + top: "conv4_2" +} +layer { + name: "conv4_3_CPM" + type: "Convolution" + bottom: "conv4_2" + top: "conv4_3_CPM" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu4_3_CPM" + type: "ReLU" + bottom: "conv4_3_CPM" + top: "conv4_3_CPM" +} +layer { + name: "conv4_4_CPM" + type: "Convolution" + bottom: "conv4_3_CPM" + top: "conv4_4_CPM" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu4_4_CPM" + type: "ReLU" + bottom: "conv4_4_CPM" + top: "conv4_4_CPM" +} +layer { + name: "conv5_1_CPM_L1" + type: "Convolution" + bottom: "conv4_4_CPM" + top: "conv5_1_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_1_CPM_L1" + type: "ReLU" + bottom: "conv5_1_CPM_L1" + top: "conv5_1_CPM_L1" +} +layer { + name: "conv5_1_CPM_L2" + type: "Convolution" + bottom: "conv4_4_CPM" + top: "conv5_1_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_1_CPM_L2" + type: "ReLU" + bottom: "conv5_1_CPM_L2" + top: "conv5_1_CPM_L2" +} +layer { + name: "conv5_2_CPM_L1" + type: "Convolution" + bottom: "conv5_1_CPM_L1" + top: "conv5_2_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_2_CPM_L1" + type: "ReLU" + bottom: "conv5_2_CPM_L1" + top: "conv5_2_CPM_L1" +} +layer { + name: "conv5_2_CPM_L2" + type: "Convolution" + bottom: "conv5_1_CPM_L2" + top: "conv5_2_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_2_CPM_L2" + type: "ReLU" + bottom: "conv5_2_CPM_L2" + top: "conv5_2_CPM_L2" +} +layer { + name: "conv5_3_CPM_L1" + type: "Convolution" + bottom: "conv5_2_CPM_L1" + top: "conv5_3_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_3_CPM_L1" + type: "ReLU" + bottom: "conv5_3_CPM_L1" + top: "conv5_3_CPM_L1" +} +layer { + name: "conv5_3_CPM_L2" + type: "Convolution" + bottom: "conv5_2_CPM_L2" + top: "conv5_3_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_3_CPM_L2" + type: "ReLU" + bottom: "conv5_3_CPM_L2" + top: "conv5_3_CPM_L2" +} +layer { + name: "conv5_4_CPM_L1" + type: "Convolution" + bottom: "conv5_3_CPM_L1" + top: "conv5_4_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_4_CPM_L1" + type: "ReLU" + bottom: "conv5_4_CPM_L1" + top: "conv5_4_CPM_L1" +} +layer { + name: "conv5_4_CPM_L2" + type: "Convolution" + bottom: "conv5_3_CPM_L2" + top: "conv5_4_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_4_CPM_L2" + type: "ReLU" + bottom: "conv5_4_CPM_L2" + top: "conv5_4_CPM_L2" +} +layer { + name: "conv5_5_CPM_L1" + type: "Convolution" + bottom: "conv5_4_CPM_L1" + top: "conv5_5_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 38 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "conv5_5_CPM_L2" + type: "Convolution" + bottom: "conv5_4_CPM_L2" + top: "conv5_5_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 19 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage2" + type: "Concat" + bottom: "conv5_5_CPM_L1" + bottom: "conv5_5_CPM_L2" + bottom: "conv4_4_CPM" + top: "concat_stage2" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage2_L1" + type: "Convolution" + bottom: "concat_stage2" + top: "Mconv1_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage2_L1" + type: "ReLU" + bottom: "Mconv1_stage2_L1" + top: "Mconv1_stage2_L1" +} +layer { + name: "Mconv1_stage2_L2" + type: "Convolution" + bottom: "concat_stage2" + top: "Mconv1_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage2_L2" + type: "ReLU" + bottom: "Mconv1_stage2_L2" + top: "Mconv1_stage2_L2" +} +layer { + name: "Mconv2_stage2_L1" + type: "Convolution" + bottom: "Mconv1_stage2_L1" + top: "Mconv2_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage2_L1" + type: "ReLU" + bottom: "Mconv2_stage2_L1" + top: "Mconv2_stage2_L1" +} +layer { + name: "Mconv2_stage2_L2" + type: "Convolution" + bottom: "Mconv1_stage2_L2" + top: "Mconv2_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage2_L2" + type: "ReLU" + bottom: "Mconv2_stage2_L2" + top: "Mconv2_stage2_L2" +} +layer { + name: "Mconv3_stage2_L1" + type: "Convolution" + bottom: "Mconv2_stage2_L1" + top: "Mconv3_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage2_L1" + type: "ReLU" + bottom: "Mconv3_stage2_L1" + top: "Mconv3_stage2_L1" +} +layer { + name: "Mconv3_stage2_L2" + type: "Convolution" + bottom: "Mconv2_stage2_L2" + top: "Mconv3_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage2_L2" + type: "ReLU" + bottom: "Mconv3_stage2_L2" + top: "Mconv3_stage2_L2" +} +layer { + name: "Mconv4_stage2_L1" + type: "Convolution" + bottom: "Mconv3_stage2_L1" + top: "Mconv4_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage2_L1" + type: "ReLU" + bottom: "Mconv4_stage2_L1" + top: "Mconv4_stage2_L1" +} +layer { + name: "Mconv4_stage2_L2" + type: "Convolution" + bottom: "Mconv3_stage2_L2" + top: "Mconv4_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage2_L2" + type: "ReLU" + bottom: "Mconv4_stage2_L2" + top: "Mconv4_stage2_L2" +} +layer { + name: "Mconv5_stage2_L1" + type: "Convolution" + bottom: "Mconv4_stage2_L1" + top: "Mconv5_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage2_L1" + type: "ReLU" + bottom: "Mconv5_stage2_L1" + top: "Mconv5_stage2_L1" +} +layer { + name: "Mconv5_stage2_L2" + type: "Convolution" + bottom: "Mconv4_stage2_L2" + top: "Mconv5_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage2_L2" + type: "ReLU" + bottom: "Mconv5_stage2_L2" + top: "Mconv5_stage2_L2" +} +layer { + name: "Mconv6_stage2_L1" + type: "Convolution" + bottom: "Mconv5_stage2_L1" + top: "Mconv6_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage2_L1" + type: "ReLU" + bottom: "Mconv6_stage2_L1" + top: "Mconv6_stage2_L1" +} +layer { + name: "Mconv6_stage2_L2" + type: "Convolution" + bottom: "Mconv5_stage2_L2" + top: "Mconv6_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage2_L2" + type: "ReLU" + bottom: "Mconv6_stage2_L2" + top: "Mconv6_stage2_L2" +} +layer { + name: "Mconv7_stage2_L1" + type: "Convolution" + bottom: "Mconv6_stage2_L1" + top: "Mconv7_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 38 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mconv7_stage2_L2" + type: "Convolution" + bottom: "Mconv6_stage2_L2" + top: "Mconv7_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 19 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage3" + type: "Concat" + bottom: "Mconv7_stage2_L1" + bottom: "Mconv7_stage2_L2" + bottom: "conv4_4_CPM" + top: "concat_stage3" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage3_L1" + type: "Convolution" + bottom: "concat_stage3" + top: "Mconv1_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage3_L1" + type: "ReLU" + bottom: "Mconv1_stage3_L1" + top: "Mconv1_stage3_L1" +} +layer { + name: "Mconv1_stage3_L2" + type: "Convolution" + bottom: "concat_stage3" + top: "Mconv1_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage3_L2" + type: "ReLU" + bottom: "Mconv1_stage3_L2" + top: "Mconv1_stage3_L2" +} +layer { + name: "Mconv2_stage3_L1" + type: "Convolution" + bottom: "Mconv1_stage3_L1" + top: "Mconv2_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage3_L1" + type: "ReLU" + bottom: "Mconv2_stage3_L1" + top: "Mconv2_stage3_L1" +} +layer { + name: "Mconv2_stage3_L2" + type: "Convolution" + bottom: "Mconv1_stage3_L2" + top: "Mconv2_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage3_L2" + type: "ReLU" + bottom: "Mconv2_stage3_L2" + top: "Mconv2_stage3_L2" +} +layer { + name: "Mconv3_stage3_L1" + type: "Convolution" + bottom: "Mconv2_stage3_L1" + top: "Mconv3_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage3_L1" + type: "ReLU" + bottom: "Mconv3_stage3_L1" + top: "Mconv3_stage3_L1" +} +layer { + name: "Mconv3_stage3_L2" + type: "Convolution" + bottom: "Mconv2_stage3_L2" + top: "Mconv3_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage3_L2" + type: "ReLU" + bottom: "Mconv3_stage3_L2" + top: "Mconv3_stage3_L2" +} +layer { + name: "Mconv4_stage3_L1" + type: "Convolution" + bottom: "Mconv3_stage3_L1" + top: "Mconv4_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage3_L1" + type: "ReLU" + bottom: "Mconv4_stage3_L1" + top: "Mconv4_stage3_L1" +} +layer { + name: "Mconv4_stage3_L2" + type: "Convolution" + bottom: "Mconv3_stage3_L2" + top: "Mconv4_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage3_L2" + type: "ReLU" + bottom: "Mconv4_stage3_L2" + top: "Mconv4_stage3_L2" +} +layer { + name: "Mconv5_stage3_L1" + type: "Convolution" + bottom: "Mconv4_stage3_L1" + top: "Mconv5_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage3_L1" + type: "ReLU" + bottom: "Mconv5_stage3_L1" + top: "Mconv5_stage3_L1" +} +layer { + name: "Mconv5_stage3_L2" + type: "Convolution" + bottom: "Mconv4_stage3_L2" + top: "Mconv5_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage3_L2" + type: "ReLU" + bottom: "Mconv5_stage3_L2" + top: "Mconv5_stage3_L2" +} +layer { + name: "Mconv6_stage3_L1" + type: "Convolution" + bottom: "Mconv5_stage3_L1" + top: "Mconv6_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage3_L1" + type: "ReLU" + bottom: "Mconv6_stage3_L1" + top: "Mconv6_stage3_L1" +} +layer { + name: "Mconv6_stage3_L2" + type: "Convolution" + bottom: "Mconv5_stage3_L2" + top: "Mconv6_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage3_L2" + type: "ReLU" + bottom: "Mconv6_stage3_L2" + top: "Mconv6_stage3_L2" +} +layer { + name: "Mconv7_stage3_L1" + type: "Convolution" + bottom: "Mconv6_stage3_L1" + top: "Mconv7_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 38 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mconv7_stage3_L2" + type: "Convolution" + bottom: "Mconv6_stage3_L2" + top: "Mconv7_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 19 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage4" + type: "Concat" + bottom: "Mconv7_stage3_L1" + bottom: "Mconv7_stage3_L2" + bottom: "conv4_4_CPM" + top: "concat_stage4" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage4_L1" + type: "Convolution" + bottom: "concat_stage4" + top: "Mconv1_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage4_L1" + type: "ReLU" + bottom: "Mconv1_stage4_L1" + top: "Mconv1_stage4_L1" +} +layer { + name: "Mconv1_stage4_L2" + type: "Convolution" + bottom: "concat_stage4" + top: "Mconv1_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage4_L2" + type: "ReLU" + bottom: "Mconv1_stage4_L2" + top: "Mconv1_stage4_L2" +} +layer { + name: "Mconv2_stage4_L1" + type: "Convolution" + bottom: "Mconv1_stage4_L1" + top: "Mconv2_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage4_L1" + type: "ReLU" + bottom: "Mconv2_stage4_L1" + top: "Mconv2_stage4_L1" +} +layer { + name: "Mconv2_stage4_L2" + type: "Convolution" + bottom: "Mconv1_stage4_L2" + top: "Mconv2_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage4_L2" + type: "ReLU" + bottom: "Mconv2_stage4_L2" + top: "Mconv2_stage4_L2" +} +layer { + name: "Mconv3_stage4_L1" + type: "Convolution" + bottom: "Mconv2_stage4_L1" + top: "Mconv3_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage4_L1" + type: "ReLU" + bottom: "Mconv3_stage4_L1" + top: "Mconv3_stage4_L1" +} +layer { + name: "Mconv3_stage4_L2" + type: "Convolution" + bottom: "Mconv2_stage4_L2" + top: "Mconv3_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage4_L2" + type: "ReLU" + bottom: "Mconv3_stage4_L2" + top: "Mconv3_stage4_L2" +} +layer { + name: "Mconv4_stage4_L1" + type: "Convolution" + bottom: "Mconv3_stage4_L1" + top: "Mconv4_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage4_L1" + type: "ReLU" + bottom: "Mconv4_stage4_L1" + top: "Mconv4_stage4_L1" +} +layer { + name: "Mconv4_stage4_L2" + type: "Convolution" + bottom: "Mconv3_stage4_L2" + top: "Mconv4_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage4_L2" + type: "ReLU" + bottom: "Mconv4_stage4_L2" + top: "Mconv4_stage4_L2" +} +layer { + name: "Mconv5_stage4_L1" + type: "Convolution" + bottom: "Mconv4_stage4_L1" + top: "Mconv5_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage4_L1" + type: "ReLU" + bottom: "Mconv5_stage4_L1" + top: "Mconv5_stage4_L1" +} +layer { + name: "Mconv5_stage4_L2" + type: "Convolution" + bottom: "Mconv4_stage4_L2" + top: "Mconv5_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage4_L2" + type: "ReLU" + bottom: "Mconv5_stage4_L2" + top: "Mconv5_stage4_L2" +} +layer { + name: "Mconv6_stage4_L1" + type: "Convolution" + bottom: "Mconv5_stage4_L1" + top: "Mconv6_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage4_L1" + type: "ReLU" + bottom: "Mconv6_stage4_L1" + top: "Mconv6_stage4_L1" +} +layer { + name: "Mconv6_stage4_L2" + type: "Convolution" + bottom: "Mconv5_stage4_L2" + top: "Mconv6_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage4_L2" + type: "ReLU" + bottom: "Mconv6_stage4_L2" + top: "Mconv6_stage4_L2" +} +layer { + name: "Mconv7_stage4_L1" + type: "Convolution" + bottom: "Mconv6_stage4_L1" + top: "Mconv7_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 38 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mconv7_stage4_L2" + type: "Convolution" + bottom: "Mconv6_stage4_L2" + top: "Mconv7_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 19 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage5" + type: "Concat" + bottom: "Mconv7_stage4_L1" + bottom: "Mconv7_stage4_L2" + bottom: "conv4_4_CPM" + top: "concat_stage5" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage5_L1" + type: "Convolution" + bottom: "concat_stage5" + top: "Mconv1_stage5_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage5_L1" + type: "ReLU" + bottom: "Mconv1_stage5_L1" + top: "Mconv1_stage5_L1" +} +layer { + name: "Mconv1_stage5_L2" + type: "Convolution" + bottom: "concat_stage5" + top: "Mconv1_stage5_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage5_L2" + type: "ReLU" + bottom: "Mconv1_stage5_L2" + top: "Mconv1_stage5_L2" +} +layer { + name: "Mconv2_stage5_L1" + type: "Convolution" + bottom: "Mconv1_stage5_L1" + top: "Mconv2_stage5_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage5_L1" + type: "ReLU" + bottom: "Mconv2_stage5_L1" + top: "Mconv2_stage5_L1" +} +layer { + name: "Mconv2_stage5_L2" + type: "Convolution" + bottom: "Mconv1_stage5_L2" + top: "Mconv2_stage5_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage5_L2" + type: "ReLU" + bottom: "Mconv2_stage5_L2" + top: "Mconv2_stage5_L2" +} +layer { + name: "Mconv3_stage5_L1" + type: "Convolution" + bottom: "Mconv2_stage5_L1" + top: "Mconv3_stage5_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage5_L1" + type: "ReLU" + bottom: "Mconv3_stage5_L1" + top: "Mconv3_stage5_L1" +} +layer { + name: "Mconv3_stage5_L2" + type: "Convolution" + bottom: "Mconv2_stage5_L2" + top: "Mconv3_stage5_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage5_L2" + type: "ReLU" + bottom: "Mconv3_stage5_L2" + top: "Mconv3_stage5_L2" +} +layer { + name: "Mconv4_stage5_L1" + type: "Convolution" + bottom: "Mconv3_stage5_L1" + top: "Mconv4_stage5_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage5_L1" + type: "ReLU" + bottom: "Mconv4_stage5_L1" + top: "Mconv4_stage5_L1" +} +layer { + name: "Mconv4_stage5_L2" + type: "Convolution" + bottom: "Mconv3_stage5_L2" + top: "Mconv4_stage5_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage5_L2" + type: "ReLU" + bottom: "Mconv4_stage5_L2" + top: "Mconv4_stage5_L2" +} +layer { + name: "Mconv5_stage5_L1" + type: "Convolution" + bottom: "Mconv4_stage5_L1" + top: "Mconv5_stage5_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage5_L1" + type: "ReLU" + bottom: "Mconv5_stage5_L1" + top: "Mconv5_stage5_L1" +} +layer { + name: "Mconv5_stage5_L2" + type: "Convolution" + bottom: "Mconv4_stage5_L2" + top: "Mconv5_stage5_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage5_L2" + type: "ReLU" + bottom: "Mconv5_stage5_L2" + top: "Mconv5_stage5_L2" +} +layer { + name: "Mconv6_stage5_L1" + type: "Convolution" + bottom: "Mconv5_stage5_L1" + top: "Mconv6_stage5_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage5_L1" + type: "ReLU" + bottom: "Mconv6_stage5_L1" + top: "Mconv6_stage5_L1" +} +layer { + name: "Mconv6_stage5_L2" + type: "Convolution" + bottom: "Mconv5_stage5_L2" + top: "Mconv6_stage5_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage5_L2" + type: "ReLU" + bottom: "Mconv6_stage5_L2" + top: "Mconv6_stage5_L2" +} +layer { + name: "Mconv7_stage5_L1" + type: "Convolution" + bottom: "Mconv6_stage5_L1" + top: "Mconv7_stage5_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 38 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mconv7_stage5_L2" + type: "Convolution" + bottom: "Mconv6_stage5_L2" + top: "Mconv7_stage5_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 19 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage6" + type: "Concat" + bottom: "Mconv7_stage5_L1" + bottom: "Mconv7_stage5_L2" + bottom: "conv4_4_CPM" + top: "concat_stage6" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage6_L1" + type: "Convolution" + bottom: "concat_stage6" + top: "Mconv1_stage6_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage6_L1" + type: "ReLU" + bottom: "Mconv1_stage6_L1" + top: "Mconv1_stage6_L1" +} +layer { + name: "Mconv1_stage6_L2" + type: "Convolution" + bottom: "concat_stage6" + top: "Mconv1_stage6_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage6_L2" + type: "ReLU" + bottom: "Mconv1_stage6_L2" + top: "Mconv1_stage6_L2" +} +layer { + name: "Mconv2_stage6_L1" + type: "Convolution" + bottom: "Mconv1_stage6_L1" + top: "Mconv2_stage6_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage6_L1" + type: "ReLU" + bottom: "Mconv2_stage6_L1" + top: "Mconv2_stage6_L1" +} +layer { + name: "Mconv2_stage6_L2" + type: "Convolution" + bottom: "Mconv1_stage6_L2" + top: "Mconv2_stage6_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage6_L2" + type: "ReLU" + bottom: "Mconv2_stage6_L2" + top: "Mconv2_stage6_L2" +} +layer { + name: "Mconv3_stage6_L1" + type: "Convolution" + bottom: "Mconv2_stage6_L1" + top: "Mconv3_stage6_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage6_L1" + type: "ReLU" + bottom: "Mconv3_stage6_L1" + top: "Mconv3_stage6_L1" +} +layer { + name: "Mconv3_stage6_L2" + type: "Convolution" + bottom: "Mconv2_stage6_L2" + top: "Mconv3_stage6_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage6_L2" + type: "ReLU" + bottom: "Mconv3_stage6_L2" + top: "Mconv3_stage6_L2" +} +layer { + name: "Mconv4_stage6_L1" + type: "Convolution" + bottom: "Mconv3_stage6_L1" + top: "Mconv4_stage6_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage6_L1" + type: "ReLU" + bottom: "Mconv4_stage6_L1" + top: "Mconv4_stage6_L1" +} +layer { + name: "Mconv4_stage6_L2" + type: "Convolution" + bottom: "Mconv3_stage6_L2" + top: "Mconv4_stage6_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage6_L2" + type: "ReLU" + bottom: "Mconv4_stage6_L2" + top: "Mconv4_stage6_L2" +} +layer { + name: "Mconv5_stage6_L1" + type: "Convolution" + bottom: "Mconv4_stage6_L1" + top: "Mconv5_stage6_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage6_L1" + type: "ReLU" + bottom: "Mconv5_stage6_L1" + top: "Mconv5_stage6_L1" +} +layer { + name: "Mconv5_stage6_L2" + type: "Convolution" + bottom: "Mconv4_stage6_L2" + top: "Mconv5_stage6_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage6_L2" + type: "ReLU" + bottom: "Mconv5_stage6_L2" + top: "Mconv5_stage6_L2" +} +layer { + name: "Mconv6_stage6_L1" + type: "Convolution" + bottom: "Mconv5_stage6_L1" + top: "Mconv6_stage6_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage6_L1" + type: "ReLU" + bottom: "Mconv6_stage6_L1" + top: "Mconv6_stage6_L1" +} +layer { + name: "Mconv6_stage6_L2" + type: "Convolution" + bottom: "Mconv5_stage6_L2" + top: "Mconv6_stage6_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage6_L2" + type: "ReLU" + bottom: "Mconv6_stage6_L2" + top: "Mconv6_stage6_L2" +} +layer { + name: "Mconv7_stage6_L1" + type: "Convolution" + bottom: "Mconv6_stage6_L1" + top: "Mconv7_stage6_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 38 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mconv7_stage6_L2" + type: "Convolution" + bottom: "Mconv6_stage6_L2" + top: "Mconv7_stage6_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 19 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage7" + type: "Concat" + bottom: "Mconv7_stage6_L2" + bottom: "Mconv7_stage6_L1" + # top: "concat_stage7" + top: "net_output" + concat_param { + axis: 1 + } +} \ No newline at end of file diff --git a/res/pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt b/res/pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt new file mode 100644 index 0000000..02ec183 --- /dev/null +++ b/res/pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt @@ -0,0 +1,2081 @@ +input: "image" +input_dim: 1 +input_dim: 3 +input_dim: 1 # This value will be defined at runtime +input_dim: 1 # This value will be defined at runtime +layer { + name: "conv1_1" + type: "Convolution" + bottom: "image" + top: "conv1_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu1_1" + type: "ReLU" + bottom: "conv1_1" + top: "conv1_1" +} +layer { + name: "conv1_2" + type: "Convolution" + bottom: "conv1_1" + top: "conv1_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu1_2" + type: "ReLU" + bottom: "conv1_2" + top: "conv1_2" +} +layer { + name: "pool1_stage1" + type: "Pooling" + bottom: "conv1_2" + top: "pool1_stage1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv2_1" + type: "Convolution" + bottom: "pool1_stage1" + top: "conv2_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu2_1" + type: "ReLU" + bottom: "conv2_1" + top: "conv2_1" +} +layer { + name: "conv2_2" + type: "Convolution" + bottom: "conv2_1" + top: "conv2_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu2_2" + type: "ReLU" + bottom: "conv2_2" + top: "conv2_2" +} +layer { + name: "pool2_stage1" + type: "Pooling" + bottom: "conv2_2" + top: "pool2_stage1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv3_1" + type: "Convolution" + bottom: "pool2_stage1" + top: "conv3_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu3_1" + type: "ReLU" + bottom: "conv3_1" + top: "conv3_1" +} +layer { + name: "conv3_2" + type: "Convolution" + bottom: "conv3_1" + top: "conv3_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu3_2" + type: "ReLU" + bottom: "conv3_2" + top: "conv3_2" +} +layer { + name: "conv3_3" + type: "Convolution" + bottom: "conv3_2" + top: "conv3_3" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu3_3" + type: "ReLU" + bottom: "conv3_3" + top: "conv3_3" +} +layer { + name: "conv3_4" + type: "Convolution" + bottom: "conv3_3" + top: "conv3_4" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu3_4" + type: "ReLU" + bottom: "conv3_4" + top: "conv3_4" +} +layer { + name: "pool3_stage1" + type: "Pooling" + bottom: "conv3_4" + top: "pool3_stage1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv4_1" + type: "Convolution" + bottom: "pool3_stage1" + top: "conv4_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu4_1" + type: "ReLU" + bottom: "conv4_1" + top: "conv4_1" +} +layer { + name: "conv4_2" + type: "Convolution" + bottom: "conv4_1" + top: "conv4_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu4_2" + type: "ReLU" + bottom: "conv4_2" + top: "conv4_2" +} +layer { + name: "conv4_3_CPM" + type: "Convolution" + bottom: "conv4_2" + top: "conv4_3_CPM" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu4_3_CPM" + type: "ReLU" + bottom: "conv4_3_CPM" + top: "conv4_3_CPM" +} +layer { + name: "conv4_4_CPM" + type: "Convolution" + bottom: "conv4_3_CPM" + top: "conv4_4_CPM" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu4_4_CPM" + type: "ReLU" + bottom: "conv4_4_CPM" + top: "conv4_4_CPM" +} +layer { + name: "conv5_1_CPM_L1" + type: "Convolution" + bottom: "conv4_4_CPM" + top: "conv5_1_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_1_CPM_L1" + type: "ReLU" + bottom: "conv5_1_CPM_L1" + top: "conv5_1_CPM_L1" +} +layer { + name: "conv5_1_CPM_L2" + type: "Convolution" + bottom: "conv4_4_CPM" + top: "conv5_1_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_1_CPM_L2" + type: "ReLU" + bottom: "conv5_1_CPM_L2" + top: "conv5_1_CPM_L2" +} +layer { + name: "conv5_2_CPM_L1" + type: "Convolution" + bottom: "conv5_1_CPM_L1" + top: "conv5_2_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_2_CPM_L1" + type: "ReLU" + bottom: "conv5_2_CPM_L1" + top: "conv5_2_CPM_L1" +} +layer { + name: "conv5_2_CPM_L2" + type: "Convolution" + bottom: "conv5_1_CPM_L2" + top: "conv5_2_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_2_CPM_L2" + type: "ReLU" + bottom: "conv5_2_CPM_L2" + top: "conv5_2_CPM_L2" +} +layer { + name: "conv5_3_CPM_L1" + type: "Convolution" + bottom: "conv5_2_CPM_L1" + top: "conv5_3_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_3_CPM_L1" + type: "ReLU" + bottom: "conv5_3_CPM_L1" + top: "conv5_3_CPM_L1" +} +layer { + name: "conv5_3_CPM_L2" + type: "Convolution" + bottom: "conv5_2_CPM_L2" + top: "conv5_3_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_3_CPM_L2" + type: "ReLU" + bottom: "conv5_3_CPM_L2" + top: "conv5_3_CPM_L2" +} +layer { + name: "conv5_4_CPM_L1" + type: "Convolution" + bottom: "conv5_3_CPM_L1" + top: "conv5_4_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_4_CPM_L1" + type: "ReLU" + bottom: "conv5_4_CPM_L1" + top: "conv5_4_CPM_L1" +} +layer { + name: "conv5_4_CPM_L2" + type: "Convolution" + bottom: "conv5_3_CPM_L2" + top: "conv5_4_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "relu5_4_CPM_L2" + type: "ReLU" + bottom: "conv5_4_CPM_L2" + top: "conv5_4_CPM_L2" +} +layer { + name: "conv5_5_CPM_L1" + type: "Convolution" + bottom: "conv5_4_CPM_L1" + top: "conv5_5_CPM_L1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 28 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "conv5_5_CPM_L2" + type: "Convolution" + bottom: "conv5_4_CPM_L2" + top: "conv5_5_CPM_L2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage2" + type: "Concat" + bottom: "conv5_5_CPM_L1" + bottom: "conv5_5_CPM_L2" + bottom: "conv4_4_CPM" + top: "concat_stage2" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage2_L1" + type: "Convolution" + bottom: "concat_stage2" + top: "Mconv1_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage2_L1" + type: "ReLU" + bottom: "Mconv1_stage2_L1" + top: "Mconv1_stage2_L1" +} +layer { + name: "Mconv1_stage2_L2" + type: "Convolution" + bottom: "concat_stage2" + top: "Mconv1_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage2_L2" + type: "ReLU" + bottom: "Mconv1_stage2_L2" + top: "Mconv1_stage2_L2" +} +layer { + name: "Mconv2_stage2_L1" + type: "Convolution" + bottom: "Mconv1_stage2_L1" + top: "Mconv2_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage2_L1" + type: "ReLU" + bottom: "Mconv2_stage2_L1" + top: "Mconv2_stage2_L1" +} +layer { + name: "Mconv2_stage2_L2" + type: "Convolution" + bottom: "Mconv1_stage2_L2" + top: "Mconv2_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage2_L2" + type: "ReLU" + bottom: "Mconv2_stage2_L2" + top: "Mconv2_stage2_L2" +} +layer { + name: "Mconv3_stage2_L1" + type: "Convolution" + bottom: "Mconv2_stage2_L1" + top: "Mconv3_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage2_L1" + type: "ReLU" + bottom: "Mconv3_stage2_L1" + top: "Mconv3_stage2_L1" +} +layer { + name: "Mconv3_stage2_L2" + type: "Convolution" + bottom: "Mconv2_stage2_L2" + top: "Mconv3_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage2_L2" + type: "ReLU" + bottom: "Mconv3_stage2_L2" + top: "Mconv3_stage2_L2" +} +layer { + name: "Mconv4_stage2_L1" + type: "Convolution" + bottom: "Mconv3_stage2_L1" + top: "Mconv4_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage2_L1" + type: "ReLU" + bottom: "Mconv4_stage2_L1" + top: "Mconv4_stage2_L1" +} +layer { + name: "Mconv4_stage2_L2" + type: "Convolution" + bottom: "Mconv3_stage2_L2" + top: "Mconv4_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage2_L2" + type: "ReLU" + bottom: "Mconv4_stage2_L2" + top: "Mconv4_stage2_L2" +} +layer { + name: "Mconv5_stage2_L1" + type: "Convolution" + bottom: "Mconv4_stage2_L1" + top: "Mconv5_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage2_L1" + type: "ReLU" + bottom: "Mconv5_stage2_L1" + top: "Mconv5_stage2_L1" +} +layer { + name: "Mconv5_stage2_L2" + type: "Convolution" + bottom: "Mconv4_stage2_L2" + top: "Mconv5_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage2_L2" + type: "ReLU" + bottom: "Mconv5_stage2_L2" + top: "Mconv5_stage2_L2" +} +layer { + name: "Mconv6_stage2_L1" + type: "Convolution" + bottom: "Mconv5_stage2_L1" + top: "Mconv6_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage2_L1" + type: "ReLU" + bottom: "Mconv6_stage2_L1" + top: "Mconv6_stage2_L1" +} +layer { + name: "Mconv6_stage2_L2" + type: "Convolution" + bottom: "Mconv5_stage2_L2" + top: "Mconv6_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage2_L2" + type: "ReLU" + bottom: "Mconv6_stage2_L2" + top: "Mconv6_stage2_L2" +} +layer { + name: "Mconv7_stage2_L1" + type: "Convolution" + bottom: "Mconv6_stage2_L1" + top: "Mconv7_stage2_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 28 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mconv7_stage2_L2" + type: "Convolution" + bottom: "Mconv6_stage2_L2" + top: "Mconv7_stage2_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage3" + type: "Concat" + bottom: "Mconv7_stage2_L1" + bottom: "Mconv7_stage2_L2" + bottom: "conv4_4_CPM" + top: "concat_stage3" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage3_L1" + type: "Convolution" + bottom: "concat_stage3" + top: "Mconv1_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage3_L1" + type: "ReLU" + bottom: "Mconv1_stage3_L1" + top: "Mconv1_stage3_L1" +} +layer { + name: "Mconv1_stage3_L2" + type: "Convolution" + bottom: "concat_stage3" + top: "Mconv1_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage3_L2" + type: "ReLU" + bottom: "Mconv1_stage3_L2" + top: "Mconv1_stage3_L2" +} +layer { + name: "Mconv2_stage3_L1" + type: "Convolution" + bottom: "Mconv1_stage3_L1" + top: "Mconv2_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage3_L1" + type: "ReLU" + bottom: "Mconv2_stage3_L1" + top: "Mconv2_stage3_L1" +} +layer { + name: "Mconv2_stage3_L2" + type: "Convolution" + bottom: "Mconv1_stage3_L2" + top: "Mconv2_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage3_L2" + type: "ReLU" + bottom: "Mconv2_stage3_L2" + top: "Mconv2_stage3_L2" +} +layer { + name: "Mconv3_stage3_L1" + type: "Convolution" + bottom: "Mconv2_stage3_L1" + top: "Mconv3_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage3_L1" + type: "ReLU" + bottom: "Mconv3_stage3_L1" + top: "Mconv3_stage3_L1" +} +layer { + name: "Mconv3_stage3_L2" + type: "Convolution" + bottom: "Mconv2_stage3_L2" + top: "Mconv3_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage3_L2" + type: "ReLU" + bottom: "Mconv3_stage3_L2" + top: "Mconv3_stage3_L2" +} +layer { + name: "Mconv4_stage3_L1" + type: "Convolution" + bottom: "Mconv3_stage3_L1" + top: "Mconv4_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage3_L1" + type: "ReLU" + bottom: "Mconv4_stage3_L1" + top: "Mconv4_stage3_L1" +} +layer { + name: "Mconv4_stage3_L2" + type: "Convolution" + bottom: "Mconv3_stage3_L2" + top: "Mconv4_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage3_L2" + type: "ReLU" + bottom: "Mconv4_stage3_L2" + top: "Mconv4_stage3_L2" +} +layer { + name: "Mconv5_stage3_L1" + type: "Convolution" + bottom: "Mconv4_stage3_L1" + top: "Mconv5_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage3_L1" + type: "ReLU" + bottom: "Mconv5_stage3_L1" + top: "Mconv5_stage3_L1" +} +layer { + name: "Mconv5_stage3_L2" + type: "Convolution" + bottom: "Mconv4_stage3_L2" + top: "Mconv5_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage3_L2" + type: "ReLU" + bottom: "Mconv5_stage3_L2" + top: "Mconv5_stage3_L2" +} +layer { + name: "Mconv6_stage3_L1" + type: "Convolution" + bottom: "Mconv5_stage3_L1" + top: "Mconv6_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage3_L1" + type: "ReLU" + bottom: "Mconv6_stage3_L1" + top: "Mconv6_stage3_L1" +} +layer { + name: "Mconv6_stage3_L2" + type: "Convolution" + bottom: "Mconv5_stage3_L2" + top: "Mconv6_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage3_L2" + type: "ReLU" + bottom: "Mconv6_stage3_L2" + top: "Mconv6_stage3_L2" +} +layer { + name: "Mconv7_stage3_L1" + type: "Convolution" + bottom: "Mconv6_stage3_L1" + top: "Mconv7_stage3_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 28 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mconv7_stage3_L2" + type: "Convolution" + bottom: "Mconv6_stage3_L2" + top: "Mconv7_stage3_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage4" + type: "Concat" + bottom: "Mconv7_stage3_L1" + bottom: "Mconv7_stage3_L2" + bottom: "conv4_4_CPM" + top: "concat_stage4" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage4_L1" + type: "Convolution" + bottom: "concat_stage4" + top: "Mconv1_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage4_L1" + type: "ReLU" + bottom: "Mconv1_stage4_L1" + top: "Mconv1_stage4_L1" +} +layer { + name: "Mconv1_stage4_L2" + type: "Convolution" + bottom: "concat_stage4" + top: "Mconv1_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu1_stage4_L2" + type: "ReLU" + bottom: "Mconv1_stage4_L2" + top: "Mconv1_stage4_L2" +} +layer { + name: "Mconv2_stage4_L1" + type: "Convolution" + bottom: "Mconv1_stage4_L1" + top: "Mconv2_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage4_L1" + type: "ReLU" + bottom: "Mconv2_stage4_L1" + top: "Mconv2_stage4_L1" +} +layer { + name: "Mconv2_stage4_L2" + type: "Convolution" + bottom: "Mconv1_stage4_L2" + top: "Mconv2_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu2_stage4_L2" + type: "ReLU" + bottom: "Mconv2_stage4_L2" + top: "Mconv2_stage4_L2" +} +layer { + name: "Mconv3_stage4_L1" + type: "Convolution" + bottom: "Mconv2_stage4_L1" + top: "Mconv3_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage4_L1" + type: "ReLU" + bottom: "Mconv3_stage4_L1" + top: "Mconv3_stage4_L1" +} +layer { + name: "Mconv3_stage4_L2" + type: "Convolution" + bottom: "Mconv2_stage4_L2" + top: "Mconv3_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu3_stage4_L2" + type: "ReLU" + bottom: "Mconv3_stage4_L2" + top: "Mconv3_stage4_L2" +} +layer { + name: "Mconv4_stage4_L1" + type: "Convolution" + bottom: "Mconv3_stage4_L1" + top: "Mconv4_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage4_L1" + type: "ReLU" + bottom: "Mconv4_stage4_L1" + top: "Mconv4_stage4_L1" +} +layer { + name: "Mconv4_stage4_L2" + type: "Convolution" + bottom: "Mconv3_stage4_L2" + top: "Mconv4_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu4_stage4_L2" + type: "ReLU" + bottom: "Mconv4_stage4_L2" + top: "Mconv4_stage4_L2" +} +layer { + name: "Mconv5_stage4_L1" + type: "Convolution" + bottom: "Mconv4_stage4_L1" + top: "Mconv5_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage4_L1" + type: "ReLU" + bottom: "Mconv5_stage4_L1" + top: "Mconv5_stage4_L1" +} +layer { + name: "Mconv5_stage4_L2" + type: "Convolution" + bottom: "Mconv4_stage4_L2" + top: "Mconv5_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu5_stage4_L2" + type: "ReLU" + bottom: "Mconv5_stage4_L2" + top: "Mconv5_stage4_L2" +} +layer { + name: "Mconv6_stage4_L1" + type: "Convolution" + bottom: "Mconv5_stage4_L1" + top: "Mconv6_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage4_L1" + type: "ReLU" + bottom: "Mconv6_stage4_L1" + top: "Mconv6_stage4_L1" +} +layer { + name: "Mconv6_stage4_L2" + type: "Convolution" + bottom: "Mconv5_stage4_L2" + top: "Mconv6_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mrelu6_stage4_L2" + type: "ReLU" + bottom: "Mconv6_stage4_L2" + top: "Mconv6_stage4_L2" +} +layer { + name: "Mconv7_stage4_L1" + type: "Convolution" + bottom: "Mconv6_stage4_L1" + top: "Mconv7_stage4_L1" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 28 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "Mconv7_stage4_L2" + type: "Convolution" + bottom: "Mconv6_stage4_L2" + top: "Mconv7_stage4_L2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "concat_stage7" + type: "Concat" + bottom: "Mconv7_stage4_L2" + bottom: "Mconv7_stage4_L1" + top: "net_output" + concat_param { + axis: 1 + } +} diff --git a/src/computervision/FaceDetector.cpp b/src/computervision/FaceDetector.cpp deleted file mode 100644 index a628983..0000000 --- a/src/computervision/FaceDetector.cpp +++ /dev/null @@ -1,53 +0,0 @@ -#include "FaceDetector.h" - - -/* - Author: Pierfrancesco Soffritti https://github.com/PierfrancescoSoffritti -*/ -namespace computervision -{ - Rect getFaceRect(Mat input); - - String faceClassifierFileName = "res/haarcascade_frontalface_alt.xml"; - CascadeClassifier faceCascadeClassifier; - - FaceDetector::FaceDetector(void) { - if (!faceCascadeClassifier.load(faceClassifierFileName)) - throw runtime_error("can't load file " + faceClassifierFileName); - } - - void FaceDetector::removeFaces(Mat input, Mat output) { - vector faces; - Mat frameGray; - - cvtColor(input, frameGray, CV_BGR2GRAY); - equalizeHist(frameGray, frameGray); - - faceCascadeClassifier.detectMultiScale(frameGray, faces, 1.1, 2, 0 | 2, Size(120, 120)); // HAAR_SCALE_IMAGE is 2 - - for (size_t i = 0; i < faces.size(); i++) { - rectangle( - output, - Point(faces[i].x, faces[i].y), - Point(faces[i].x + faces[i].width, faces[i].y + faces[i].height), - Scalar(0, 0, 0), - -1 - ); - } - } - - Rect getFaceRect(Mat input) { - vector faceRectangles; - Mat inputGray; - - cvtColor(input, inputGray, CV_BGR2GRAY); - equalizeHist(inputGray, inputGray); - - faceCascadeClassifier.detectMultiScale(inputGray, faceRectangles, 1.1, 2, 0 | 2, Size(120, 120)); // HAAR_SCALE_IMAGE is 2 - - if (faceRectangles.size() > 0) - return faceRectangles[0]; - else - return Rect(0, 0, 1, 1); - } -} \ No newline at end of file diff --git a/src/computervision/FaceDetector.h b/src/computervision/FaceDetector.h deleted file mode 100644 index 208e051..0000000 --- a/src/computervision/FaceDetector.h +++ /dev/null @@ -1,31 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -/* - Author: Pierfrancesco Soffritti https://github.com/PierfrancescoSoffritti -*/ - -using namespace cv; -using namespace std; - -namespace computervision -{ - class FaceDetector { - public: - /** - * @brief Constructor for the class FaceDetector, loads training data from a file - * - */ - FaceDetector(void); - /** - * @brief Detects faces on an image and blocks them with a black rectangle - * - * @param input Input image - * @param output Output image - */ - void removeFaces(Mat input, Mat output); - }; -} \ No newline at end of file diff --git a/src/computervision/FingerCount.cpp b/src/computervision/FingerCount.cpp index 590e2a2..bdd1938 100644 --- a/src/computervision/FingerCount.cpp +++ b/src/computervision/FingerCount.cpp @@ -14,6 +14,7 @@ namespace computervision { + FingerCount::FingerCount(void) { color_blue = Scalar(255, 0, 0); color_green = Scalar(0, 255, 0); @@ -35,9 +36,6 @@ namespace computervision if (input_image.channels() != 1) return contours_image; - vector> contours; - vector hierarchy; - findContours(input_image, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE); // we need at least one contour to work @@ -45,7 +43,7 @@ namespace computervision return contours_image; // find the biggest contour (let's suppose it's our hand) - int biggest_contour_index = -1; + biggest_contour_index = -1; double biggest_area = 0.0; for (int i = 0; i < contours.size(); i++) { @@ -156,6 +154,11 @@ namespace computervision return contours_image; } + void FingerCount::DrawHandContours(Mat& image) + { + drawContours(image, contours, biggest_contour_index, color_green, 2, 8, hierarchy); + } + int FingerCount::getAmountOfFingers() { return amount_of_fingers; diff --git a/src/computervision/FingerCount.h b/src/computervision/FingerCount.h index 3319150..4b31c92 100644 --- a/src/computervision/FingerCount.h +++ b/src/computervision/FingerCount.h @@ -31,7 +31,15 @@ namespace computervision */ int getAmountOfFingers(); + void DrawHandContours(Mat& image); + private: + + int biggest_contour_index; + vector> contours; + vector hierarchy; + + // colors to use Scalar color_blue; Scalar color_green; @@ -115,5 +123,7 @@ namespace computervision * @param with_numbers if the numbers should be drawn with the points */ void drawVectorPoints(Mat image, vector points, Scalar color, bool with_numbers); + + }; } \ No newline at end of file diff --git a/src/computervision/HandDetectRegion.cpp b/src/computervision/HandDetectRegion.cpp new file mode 100644 index 0000000..87f3538 --- /dev/null +++ b/src/computervision/HandDetectRegion.cpp @@ -0,0 +1,105 @@ + +#include "HandDetectRegion.h" + +namespace computervision +{ + + HandDetectRegion::HandDetectRegion(std::string id,int x_pos, int y_pos, int width, int height) + { + region_id = id; + start_x_pos = x_pos; + start_y_pos = y_pos; + region_width = width; + region_height = height; + hand_mask_generated = false; + hand_present = false; + } + + void HandDetectRegion::DetectHand(cv::Mat& camera_frame) + { + Mat input_frame = GenerateHandMaskSquare(camera_frame); + frame_out = input_frame.clone(); + + // detect skin color + skin_detector.drawSkinColorSampler(camera_frame,start_x_pos,start_y_pos,region_width,region_height); + + // remove background from image + foreground = background_remover.getForeground(input_frame); + + // detect the hand contours + handMask = skin_detector.getSkinMask(foreground); + + // draw the hand rectangle on the camera input, and draw text showing if the hand is open or closed. + DrawHandMask(&camera_frame); + + //imshow("output" + region_id, frame_out); + //imshow("foreground" + region_id, foreground); + //imshow("handMask" + region_id, handMask); + /*imshow("handDetection", fingerCountDebug);*/ + + hand_present = hand_calibrator.CheckIfHandPresent(handMask,handcalibration::HandDetectionType::GAME); + //std::string text = (hand_present ? "hand" : "no"); + //cv::putText(camera_frame, text, cv::Point(start_x_pos, start_y_pos), cv::FONT_HERSHEY_COMPLEX, 2.0, cv::Scalar(0, 255, 255), 2); + hand_calibrator.SetHandPresent(hand_present); + + //draw black rectangle behind calibration information text + cv::rectangle(camera_frame, cv::Rect(0, camera_frame.rows - 55, 450, camera_frame.cols), cv::Scalar(0, 0, 0), -1); + + hand_calibrator.DrawBackgroundSkinCalibrated(camera_frame); + + + + } + + cv::Mat HandDetectRegion::GenerateHandMaskSquare(cv::Mat img) + { + cv::Mat mask = cv::Mat::zeros(img.size(), img.type()); + cv::Mat distance_img = cv::Mat::zeros(img.size(), img.type()); + + cv::rectangle(mask, cv::Rect(start_x_pos, start_y_pos, region_width, region_height), cv::Scalar(255, 255, 255), -1); + + img.copyTo(distance_img, mask); + + hand_mask_generated = true; + return distance_img; + } + + bool HandDetectRegion::DrawHandMask(cv::Mat* input) + { + if (!hand_mask_generated) return false; + rectangle(*input, Rect(start_x_pos, start_y_pos, region_width, region_height), (hand_present ? Scalar(0, 255, 0) : Scalar(0,0,255)),2); + return true; + } + + bool HandDetectRegion::IsHandPresent() + { + return hand_present; + } + + void HandDetectRegion::CalibrateBackground() + { + std::cout << "calibrating background " << region_id << std::endl; + background_remover.calibrate(frame_out); + hand_calibrator.SetBackGroundCalibrated(true); + } + void HandDetectRegion::CalibrateSkin() + { + skin_detector.calibrate(frame_out); + hand_calibrator.SetSkinCalibration(true); + } + + std::vector HandDetectRegion::CalculateSkinTresholds() + { + std::cout << "calibrating skin " << region_id << std::endl; + hand_calibrator.SetSkinCalibration(true); + return skin_detector.calibrateAndReturn(frame_out); + } + + void HandDetectRegion::setSkinTresholds(std::vector& tresholds) + { + std::cout << "setting skin " << region_id << std::endl; + skin_detector.setTresholds(tresholds); + hand_calibrator.SetSkinCalibration(true); + } + +} diff --git a/src/computervision/HandDetectRegion.h b/src/computervision/HandDetectRegion.h new file mode 100644 index 0000000..7cc1a9a --- /dev/null +++ b/src/computervision/HandDetectRegion.h @@ -0,0 +1,56 @@ +#pragma once + +#include +#include +#include "async/StaticCameraInstance.h" +#include "calibration/HandCalibrator.h" +#include "BackgroundRemover.h" +#include "SkinDetector.h" +#include "FingerCount.h" +namespace computervision +{ + class HandDetectRegion + { + public: + HandDetectRegion(std::string id,int x_pos, int y_pos, int width, int height); + + void SetXPos(int x) { start_x_pos = x; } + void SetYPos(int y) { start_y_pos = y; } + int GetXPos() { return start_x_pos; } + int GetYPos() { return start_y_pos; } + + void SetWidth(int width) { region_width = width; } + void SetHeigth(int height) { region_height = height; } + int GetWidth() { return region_width; } + int GetHeight() { return region_height; } + + cv::Mat GenerateHandMaskSquare(cv::Mat img); + + void DetectHand(cv::Mat& camera_frame); + + bool IsHandPresent(); + + void CalibrateBackground(); + void CalibrateSkin(); + + std::vector CalculateSkinTresholds(); + + void setSkinTresholds(std::vector& tresholds); + + private: + int start_x_pos; + int start_y_pos; + int region_height; + int region_width; + bool hand_mask_generated; + bool hand_present; + cv::Mat frame, frame_out, handMask, foreground, fingerCountDebug; + BackgroundRemover background_remover; + SkinDetector skin_detector; + handcalibration::HandCalibrator hand_calibrator; + std::string region_id; + + bool DrawHandMask(cv::Mat* input); + }; + +} diff --git a/src/computervision/ObjectDetection.cpp b/src/computervision/ObjectDetection.cpp index 62236d2..829953c 100644 --- a/src/computervision/ObjectDetection.cpp +++ b/src/computervision/ObjectDetection.cpp @@ -6,117 +6,142 @@ #include "ObjectDetection.h" #include "BackgroundRemover.h" #include "SkinDetector.h" -#include "FaceDetector.h" #include "FingerCount.h" +#include "async/StaticCameraInstance.h" +#include "calibration/HandCalibrator.h" namespace computervision { - cv::VideoCapture cap(0); - cv::Mat img, imgGray, img2, img2Gray, img3, img4; + cv::Mat img, img_gray, img2, img2_gray, img3, img4; - int handMaskStartXPos, handMaskStartYPos, handMaskWidth, handMaskHeight; - bool handMaskGenerated = false; + int hand_mask_start_x_pos, hand_mask_start_y_pos, hand_mask_width, hand_mask_height; + bool hand_mask_generated = false; - Mat frame, frameOut, handMask, foreground, fingerCountDebug; - BackgroundRemover backgroundRemover; - SkinDetector skinDetector; - FaceDetector faceDetector; - FingerCount fingerCount; + Mat frame, frame_out, handMask, foreground, fingerCountDebug; + BackgroundRemover background_remover; + SkinDetector skin_detector; + FingerCount finger_count; + handcalibration::HandCalibrator hand_calibrator; + + cv::VideoCapture cap = static_camera::getCap(); ObjectDetection::ObjectDetection() { } - cv::Mat ObjectDetection::readCamera() { + cv::Mat ObjectDetection::ReadCamera() { cap.read(img); return img; } - bool ObjectDetection::detectHand(Mat cameraFrame) + cv::VideoCapture ObjectDetection::GetCap() { - Mat inputFrame = generateHandMaskSquare(cameraFrame); - frameOut = inputFrame.clone(); + return cap; + } + + bool ObjectDetection::DetectHand(Mat camera_frame, bool& hand_present) + { + Mat input_frame = GenerateHandMaskSquare(camera_frame); + frame_out = input_frame.clone(); // detect skin color - skinDetector.drawSkinColorSampler(frameOut); + skin_detector.drawSkinColorSampler(camera_frame); // remove background from image - foreground = backgroundRemover.getForeground(inputFrame); + foreground = background_remover.getForeground(input_frame); // detect the hand contours - handMask = skinDetector.getSkinMask(foreground); + handMask = skin_detector.getSkinMask(foreground); // count the amount of fingers and put the info on the matrix - fingerCountDebug = fingerCount.findFingersCount(handMask, frameOut); + fingerCountDebug = finger_count.findFingersCount(handMask, frame_out); // get the amount of fingers - int fingers_amount = fingerCount.getAmountOfFingers(); + int fingers_amount = finger_count.getAmountOfFingers(); // draw the hand rectangle on the camera input, and draw text showing if the hand is open or closed. - drawHandMaskRect(&cameraFrame); - string hand_text = fingers_amount > 0 ? "open" : "closed"; - putText(cameraFrame,hand_text, Point(10, 75), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 255),3); - imshow("camera", cameraFrame); + DrawHandMask(&camera_frame); + - /* imshow("output", frameOut); + hand_calibrator.SetAmountOfFingers(fingers_amount); + finger_count.DrawHandContours(camera_frame); + hand_calibrator.DrawHandCalibrationText(camera_frame); + imshow("camera", camera_frame); + + + + /*imshow("output", frame_out); imshow("foreground", foreground); imshow("handMask", handMask); imshow("handDetection", fingerCountDebug);*/ + hand_present = hand_calibrator.CheckIfHandPresent(handMask,handcalibration::HandDetectionType::MENU); + hand_calibrator.SetHandPresent(hand_present); + + + int key = waitKey(1); if (key == 98) // b, calibrate the background - backgroundRemover.calibrate(inputFrame); + { + background_remover.calibrate(input_frame); + hand_calibrator.SetBackGroundCalibrated(true); + } else if (key == 115) // s, calibrate the skin color - skinDetector.calibrate(inputFrame); + { + skin_detector.calibrate(input_frame); + hand_calibrator.SetSkinCalibration(true); + + } + return fingers_amount > 0; } - void ObjectDetection::calculateDifference() + void ObjectDetection::CalculateDifference() { cap.read(img); cap.read(img2); - cv::cvtColor(img, imgGray, cv::COLOR_RGBA2GRAY); - cv::cvtColor(img2, img2Gray, cv::COLOR_RGBA2GRAY); + cv::cvtColor(img, img_gray, cv::COLOR_RGBA2GRAY); + cv::cvtColor(img2, img2_gray, cv::COLOR_RGBA2GRAY); - cv::absdiff(imgGray, img2Gray, img3); + cv::absdiff(img_gray, img2_gray, img3); cv::threshold(img3, img4, 50, 170, cv::THRESH_BINARY); imshow("threshold", img4); } - cv::Mat ObjectDetection::generateHandMaskSquare(cv::Mat img) + cv::Mat ObjectDetection::GenerateHandMaskSquare(cv::Mat img) { - handMaskStartXPos = 20; - handMaskStartYPos = img.rows / 5; - handMaskWidth = img.cols / 3; - handMaskHeight = img.cols / 3; + hand_mask_start_x_pos = 20; + hand_mask_start_y_pos = img.rows / 5; + hand_mask_width = img.cols / 3; + hand_mask_height = img.cols / 3; cv::Mat mask = cv::Mat::zeros(img.size(), img.type()); - cv::Mat dstImg = cv::Mat::zeros(img.size(), img.type()); + cv::Mat distance_img = cv::Mat::zeros(img.size(), img.type()); - cv::rectangle(mask, Rect(handMaskStartXPos, handMaskStartYPos, handMaskWidth, handMaskHeight), Scalar(255, 255, 255), -1); + cv::rectangle(mask, Rect(hand_mask_start_x_pos, hand_mask_start_y_pos, hand_mask_width, hand_mask_height), Scalar(255, 255, 255), -1); - img.copyTo(dstImg, mask); + img.copyTo(distance_img, mask); - handMaskGenerated = true; - return dstImg; + hand_mask_generated = true; + return distance_img; } - bool ObjectDetection::drawHandMaskRect(cv::Mat* input) + bool ObjectDetection::DrawHandMask(cv::Mat* input) { - if (!handMaskGenerated) return false; - rectangle(*input, Rect(handMaskStartXPos, handMaskStartYPos, handMaskWidth, handMaskHeight), Scalar(255, 255, 255)); + if (!hand_mask_generated) return false; + rectangle(*input, Rect(hand_mask_start_x_pos, hand_mask_start_y_pos, hand_mask_width, hand_mask_height), Scalar(255, 255, 255)); return true; } - void ObjectDetection::showWebcam() + void ObjectDetection::ShowWebcam() { imshow("Webcam image", img); } diff --git a/src/computervision/ObjectDetection.h b/src/computervision/ObjectDetection.h index bddf4ba..92fc335 100644 --- a/src/computervision/ObjectDetection.h +++ b/src/computervision/ObjectDetection.h @@ -27,13 +27,13 @@ namespace computervision * @brief Displays an image of the current webcam-footage * */ - void showWebcam(); + void ShowWebcam(); /** * @brief Calculates the difference between two images * and outputs an image that only shows the difference * */ - void calculateDifference(); + void CalculateDifference(); /** * @brief generates the square that will hold the mask in which the hand will be detected. @@ -41,29 +41,51 @@ namespace computervision * @param img the current camear frame * @return a matrix containing the mask */ - cv::Mat generateHandMaskSquare(cv::Mat img); + cv::Mat GenerateHandMaskSquare(cv::Mat img); /** * @brief reads the camera and returns it in a matrix. * * @return the camera frame in a matrix */ - cv::Mat readCamera(); + cv::Mat ReadCamera(); /** * @brief detects a hand based on the given hand mask input frame. * * @param inputFrame the input frame from the camera + * @param hand_present boolean that will hold true if the hand is detected, false if not. * @return true if hand is open, false if hand is closed */ - bool detectHand(cv::Mat cameraFrame); + bool DetectHand(cv::Mat camera_frame, bool& hand_present); /** * @brief draws the hand mask rectangle on the given input matrix. * * @param input the input matrix to draw the rectangle on */ - bool drawHandMaskRect(cv::Mat *input); + bool DrawHandMask(cv::Mat *input); + + /** + * @brief checks if the hand of the user is open. + * + * @return true if the hand is open, false if not. + */ + bool IsHandOpen(); + + + /** + * @brief checks whether the hand is held within the detection square. + * + * @return true if the hand is in the detection square, false if not. + */ + bool IsHandPresent(); + + cv::VideoCapture GetCap(); + + private: + bool is_hand_open; + bool is_hand_present; }; diff --git a/src/computervision/OpenPoseVideo.cpp b/src/computervision/OpenPoseVideo.cpp new file mode 100644 index 0000000..33527a1 --- /dev/null +++ b/src/computervision/OpenPoseVideo.cpp @@ -0,0 +1,108 @@ +#include "OpenPoseVideo.h" + +using namespace std; +using namespace cv; +using namespace cv::dnn; + +namespace computervision +{ +#define MPI + +#ifdef MPI + const int POSE_PAIRS[7][2] = + { + {0,1}, {1,2}, {2,3}, + {3,4}, {1,5}, {5,6}, + {6,7} + }; + + string protoFile = "res/pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt"; + string weightsFile = "res/pose/mpi/pose_iter_160000.caffemodel"; + + int nPoints = 8; +#endif + +#ifdef COCO + const int POSE_PAIRS[17][2] = + { + {1,2}, {1,5}, {2,3}, + {3,4}, {5,6}, {6,7}, + {1,8}, {8,9}, {9,10}, + {1,11}, {11,12}, {12,13}, + {1,0}, {0,14}, + {14,16}, {0,15}, {15,17} + }; + + string protoFile = "pose/coco/pose_deploy_linevec.prototxt"; + string weightsFile = "pose/coco/pose_iter_440000.caffemodel"; + + int nPoints = 18; +#endif + Net net; + + void OpenPoseVideo::setup() { + net = readNetFromCaffe(protoFile, weightsFile); + + net.setPreferableBackend(DNN_TARGET_CPU); + } + + void OpenPoseVideo::movementSkeleton(Mat& inputImage, std::function&, cv::Mat& poinst_on_image)> f) { + std::cout << "movement skeleton start" << std::endl; + + int inWidth = 368; + int inHeight = 368; + float thresh = 0.01; + + Mat frame; + int frameWidth = inputImage.size().width; + int frameHeight = inputImage.size().height; + + double t = (double)cv::getTickCount(); + std::cout << "reading input image and blob" << std::endl; + + frame = inputImage; + Mat inpBlob = blobFromImage(frame, 1.0 / 255, Size(inWidth, inHeight), Scalar(0, 0, 0), false, false); + + std::cout << "done reading image and blob" << std::endl; + + net.setInput(inpBlob); + + std::cout << "done setting input to net" << std::endl; + Mat output = net.forward(); + std::cout << "time took to set input and forward: " << t << std::endl; + + int H = output.size[2]; + int W = output.size[3]; + + std::cout << "about to find position of boxy parts" << std::endl; + // find the position of the body parts + vector points(nPoints); + for (int n = 0; n < nPoints; n++) + { + // Probability map of corresponding body's part. + Mat probMap(H, W, CV_32F, output.ptr(0, n)); + + Point2f p(-1, -1); + Point maxLoc; + double prob; + minMaxLoc(probMap, 0, &prob, 0, &maxLoc); + if (prob > thresh) + { + p = maxLoc; + p.x *= (float)frameWidth / W; + p.y *= (float)frameHeight / H; + + circle(frame, cv::Point((int)p.x, (int)p.y), 8, Scalar(0, 255, 255), -1); + cv::putText(frame, cv::format("%d", n), cv::Point((int)p.x, (int)p.y), cv::FONT_HERSHEY_COMPLEX, 1.1, cv::Scalar(0, 0, 255), 2); + } + points[n] = p; + } + + cv::putText(frame, cv::format("time taken = %.2f sec", t), cv::Point(50, 50), cv::FONT_HERSHEY_COMPLEX, .8, cv::Scalar(255, 50, 0), 2); + std::cout << "time taken: " << t << std::endl; + //imshow("Output-Keypoints", frame); + //imshow("Output-Skeleton", frame); + std::cout << "about to call points receiving method" << std::endl; + f(points,frame); + } +} \ No newline at end of file diff --git a/src/computervision/OpenPoseVideo.h b/src/computervision/OpenPoseVideo.h new file mode 100644 index 0000000..e05737d --- /dev/null +++ b/src/computervision/OpenPoseVideo.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include +#include +#include + +using namespace cv; + +namespace computervision +{ + class OpenPoseVideo{ + private: + + public: + void movementSkeleton(Mat& inputImage, std::function&, cv::Mat& poinst_on_image)> f); + void setup(); + }; +} diff --git a/src/computervision/SkinDetector.cpp b/src/computervision/SkinDetector.cpp index 088cce0..100f25f 100644 --- a/src/computervision/SkinDetector.cpp +++ b/src/computervision/SkinDetector.cpp @@ -1,4 +1,5 @@ #include "SkinDetector.h" +#include /* Author: Pierfrancesco Soffritti https://github.com/PierfrancescoSoffritti @@ -23,7 +24,7 @@ namespace computervision int frameWidth = input.size().width, frameHeight = input.size().height; int rectangleSize = 25; - Scalar rectangleColor = Scalar(255, 0, 255); + Scalar rectangleColor = Scalar(0, 255, 255); skinColorSamplerRectangle1 = Rect(frameWidth / 5, frameHeight / 2, rectangleSize, rectangleSize); skinColorSamplerRectangle2 = Rect(frameWidth / 5, frameHeight / 3, rectangleSize, rectangleSize); @@ -41,6 +42,29 @@ namespace computervision ); } + void SkinDetector::drawSkinColorSampler(Mat input,int x, int y,int width, int height) { + int frameWidth = width, frameHeight = height; + + int rectangleSize = 25; + Scalar rectangleColor = Scalar(0, 255, 255); + + skinColorSamplerRectangle1 = Rect(frameWidth / 5 + x, frameHeight / 2 + y, rectangleSize, rectangleSize); + skinColorSamplerRectangle2 = Rect(frameWidth / 5 + x, frameHeight / 3 + y, rectangleSize, rectangleSize); + + rectangle( + input, + skinColorSamplerRectangle1, + rectangleColor + ); + + rectangle( + input, + skinColorSamplerRectangle2, + rectangleColor + ); + } + + void SkinDetector::calibrate(Mat input) { Mat hsvInput; @@ -54,6 +78,19 @@ namespace computervision calibrated = true; } + std::vector SkinDetector::calibrateAndReturn(Mat input) + { + Mat hsvInput; + cvtColor(input, hsvInput, CV_BGR2HSV); + + Mat sample1 = Mat(hsvInput, skinColorSamplerRectangle1); + Mat sample2 = Mat(hsvInput, skinColorSamplerRectangle2); + + calibrated = true; + return calculateAndReturnTresholds(sample1, sample2); + + } + void SkinDetector::calculateThresholds(Mat sample1, Mat sample2) { int offsetLowThreshold = 80; int offsetHighThreshold = 30; @@ -75,6 +112,39 @@ namespace computervision //vHighThreshold = 255; } + std::vector SkinDetector::calculateAndReturnTresholds(Mat sample1, Mat sample2) + { + + calculateThresholds(sample1, sample2); + std::vector res; + res.push_back(hLowThreshold); + res.push_back(hHighThreshold); + res.push_back(sLowThreshold); + res.push_back(sHighThreshold); + res.push_back(vLowThreshold); + res.push_back(vHighThreshold); + return res; + } + + void SkinDetector::setTresholds(std::vector& tresholds) + { + if (tresholds.size() != 6) + { + std::cout << "tresholds array not the right size!" << std::endl; + return; + } + + hLowThreshold = tresholds[0]; + hHighThreshold = tresholds[1]; + sLowThreshold = tresholds[2]; + sHighThreshold = tresholds[3]; + vLowThreshold = tresholds[4]; + vHighThreshold = tresholds[5]; + + calibrated = true; + + } + Mat SkinDetector::getSkinMask(Mat input) { Mat skinMask; diff --git a/src/computervision/SkinDetector.h b/src/computervision/SkinDetector.h index c6cf158..02e9dfb 100644 --- a/src/computervision/SkinDetector.h +++ b/src/computervision/SkinDetector.h @@ -24,6 +24,9 @@ namespace computervision */ void drawSkinColorSampler(Mat input); + void drawSkinColorSampler(Mat input, int x, int y, int width, int heigth); + + /* * @brief calibrates the skin color detector with the given input frame * @@ -31,6 +34,10 @@ namespace computervision */ void calibrate(Mat input); + std::vector calibrateAndReturn(Mat input); + + void setTresholds(std::vector& tresholds); + /* * @brief gets the mask for the hand * @@ -63,6 +70,8 @@ namespace computervision */ void calculateThresholds(Mat sample1, Mat sample2); + std::vector calculateAndReturnTresholds(Mat sample1, Mat sample2); + /** * @brief the opening. it generates the structuring element and performs the morphological transformations required to detect the hand. * This needs to be done to get the skin mask. diff --git a/src/computervision/async/StaticCameraInstance.h b/src/computervision/async/StaticCameraInstance.h new file mode 100644 index 0000000..625d478 --- /dev/null +++ b/src/computervision/async/StaticCameraInstance.h @@ -0,0 +1,12 @@ +#pragma once +#include + +namespace static_camera +{ + + static cv::VideoCapture getCap() + { + static cv::VideoCapture cap(0); + return cap; + } +}; diff --git a/src/computervision/async/async_arm_detection.cpp b/src/computervision/async/async_arm_detection.cpp new file mode 100644 index 0000000..a43b7dc --- /dev/null +++ b/src/computervision/async/async_arm_detection.cpp @@ -0,0 +1,46 @@ +#include +#include "async_arm_detection.h" +#include "../OpenPoseVideo.h" +#include +#include "StaticCameraInstance.h" + + +namespace computervision +{ + AsyncArmDetection::AsyncArmDetection() + { + + } + + void AsyncArmDetection::run_arm_detection(std::function, cv::Mat poinst_on_image)> points_ready_func, OpenPoseVideo op) + { + VideoCapture cap = static_camera::getCap(); + + std::cout << "STARTING THREAD LAMBDA" << std::endl; + /*cv::VideoCapture cap = static_camera::GetCap();*/ + + if (!cap.isOpened()) + { + std::cout << "capture was closed, opening..." << std::endl; + cap.open(0); + } + + while (true) + { + Mat img; + cap.read(img); + op.movementSkeleton(img, points_ready_func); + } + } + + void AsyncArmDetection::start(std::function, cv::Mat poinst_on_image)> points_ready_func, OpenPoseVideo op) + { + + std::cout << "starting function" << std::endl; + + + std::thread async_arm_detect_thread(&AsyncArmDetection::run_arm_detection,this, points_ready_func, op); + + async_arm_detect_thread.detach(); // makes sure the thread is detached from the variable. + } +} diff --git a/src/computervision/async/async_arm_detection.h b/src/computervision/async/async_arm_detection.h new file mode 100644 index 0000000..98fd163 --- /dev/null +++ b/src/computervision/async/async_arm_detection.h @@ -0,0 +1,23 @@ +#pragma once +#include +#include +#include +#include +#include "../OpenPoseVideo.h" +#include "StaticCameraInstance.h" + + +namespace computervision +{ + class AsyncArmDetection + { + public: + AsyncArmDetection(void); + + + void start(std::function, cv::Mat poinst_on_image)>, computervision::OpenPoseVideo op); + private: + void run_arm_detection(std::function, cv::Mat poinst_on_image)> points_ready_func, OpenPoseVideo op); + }; + +} diff --git a/src/computervision/calibration/HandCalibrator.cpp b/src/computervision/calibration/HandCalibrator.cpp new file mode 100644 index 0000000..dcf2911 --- /dev/null +++ b/src/computervision/calibration/HandCalibrator.cpp @@ -0,0 +1,92 @@ + +#include "HandCalibrator.h" +#include + +#define MIN_MENU_HAND_SIZE 10000 +#define MIN_GAME_HAND_SIZE 3000 // todo change +namespace computervision +{ + namespace handcalibration + { + + HandCalibrator::HandCalibrator() + { + + } + + void HandCalibrator::DrawHandCalibrationText(cv::Mat& output_frame) + { + cv::rectangle(output_frame, cv::Rect(0, 0, output_frame.cols, 40), cv::Scalar(0, 0, 0), -1); + cv::putText(output_frame, "Hand calibration", cv::Point(output_frame.cols / 2 - 100, 25), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(18, 219, 65), 2); + cv::putText(output_frame, "press 'b' to calibrate background,then press 's' to calibrate skin tone", cv::Point(5, 35), cv::FONT_HERSHEY_PLAIN, 1.0, cv::Scalar(18, 219, 65), 1); + + cv::rectangle(output_frame, cv::Rect(0, output_frame.rows - 80, 450, output_frame.cols), cv::Scalar(0, 0, 0), -1); + + cv::putText(output_frame, "hand in frame:", cv::Point(5, output_frame.rows - 50), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 255, 0), 1); + cv::rectangle(output_frame, cv::Rect(420, output_frame.rows - 67, 15, 15), hand_present ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 0, 255), -1); + + DrawBackgroundSkinCalibrated(output_frame); + + if (hand_present) + { + std::string hand_text = fingers_amount > 0 ? "open" : "closed"; + cv::putText(output_frame, hand_text, cv::Point(10, 75), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 0, 255), 3); + } + } + + void HandCalibrator::DrawBackgroundSkinCalibrated(cv::Mat& output_frame) + { + + cv::putText(output_frame, "background calibrated:", cv::Point(5, output_frame.rows - 30), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 255, 0), 1); + cv::rectangle(output_frame, cv::Rect(420, output_frame.rows - 47, 15, 15), background_calibrated ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 0, 255), -1); + + cv::putText(output_frame, "skin color calibrated:", cv::Point(5, output_frame.rows - 10), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 255, 0), 1); + cv::rectangle(output_frame, cv::Rect(420, output_frame.rows - 27, 15, 15), skintone_calibrated ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 0, 255), -1); + } + + void HandCalibrator::SetSkinCalibration(bool val) + { + skintone_calibrated = val; + } + + void HandCalibrator::SetBackGroundCalibrated(bool val) + { + background_calibrated = val; + } + + void HandCalibrator::SetHandPresent(bool val) + { + hand_present = val; + } + + void HandCalibrator::SetAmountOfFingers(int amount) + { + fingers_amount = amount; + } + + bool HandCalibrator::CheckIfHandPresent(cv::Mat input_image, HandDetectionType type) + { + std::vector> points; + cv::findContours(input_image, points, cv::RetrievalModes::RETR_LIST, cv::ContourApproximationModes::CHAIN_APPROX_SIMPLE); + + if (points.size() == 0) return false; + + for (int p = 0; p < points.size(); p++) + { + int area = cv::contourArea(points[p]); + + if (type == handcalibration::HandDetectionType::MENU) + if (area > MIN_MENU_HAND_SIZE) return true; + + if (type == handcalibration::HandDetectionType::GAME) + if (area > MIN_GAME_HAND_SIZE) return true; + } + + return false; + } + + + + + } +} diff --git a/src/computervision/calibration/HandCalibrator.h b/src/computervision/calibration/HandCalibrator.h new file mode 100644 index 0000000..ca71fde --- /dev/null +++ b/src/computervision/calibration/HandCalibrator.h @@ -0,0 +1,76 @@ +#pragma once +#include +#include +#include + +namespace computervision +{ + namespace handcalibration + { + enum class HandDetectionType + { + MENU, + GAME + }; + + class HandCalibrator + { + public: + HandCalibrator(); + + + + /** + * @brief draws the text to show the status of the calibration on the image + * + * @param output_frame the frame to draw on. + */ + void DrawHandCalibrationText(cv::Mat& output_frame); + + /** + * @brief sets the skin calibration variable. + * + * @param val the value to set + */ + void SetSkinCalibration(bool val); + + /** + * @brief sets the background calibration variable. + * + * @param val the value to set + */ + void SetBackGroundCalibrated(bool val); + + /** + * @brief sets the value for if the hand is present. + * + * @param val the value to set. + */ + void SetHandPresent(bool val); + + /** + * @brief checks if the hand is present in the given image + * + * @param input_image the input image to check. + */ + bool CheckIfHandPresent(cv::Mat input_image, HandDetectionType type); + + /** + * @brief sets the amount of fingers that are currently detected. + * + * @param amount the amount of fingers. + */ + void SetAmountOfFingers(int amount); + + void DrawBackgroundSkinCalibrated(cv::Mat& output_frame); + + private: + + bool background_calibrated; + bool skintone_calibrated; + bool hand_present; + int fingers_amount; + }; + + } +} diff --git a/src/main.cpp b/src/main.cpp index cd145fe..c93af64 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include #define STB_IMAGE_IMPLEMENTATION #include #include @@ -25,6 +27,10 @@ #include "scenes/startup_Scene.h" #include "computervision/ObjectDetection.h" +//#include "computervision/OpenPoseImage.h" +#include "computervision/OpenPoseVideo.h" + +#include "computervision/async/async_arm_detection.h" #pragma comment(lib, "glfw3.lib") #pragma comment(lib, "glew32s.lib") @@ -32,9 +38,21 @@ static double UpdateDelta(); -static GLFWwindow* window; scene::Scene* current_scene; +static GLFWwindow* window; +bool points_img_available = false; +cv::Mat points_img; + +void retrieve_points(std::vector arm_points, cv::Mat points_on_image) +{ + + std::cout << "got points!!" << std::endl; + std::cout << "points: " << arm_points << std::endl; + points_img = points_on_image; + points_img_available = true; +} + int main(void) { #pragma region OPENGL_SETTINGS diff --git a/src/scenes/in_Game_Scene.cpp b/src/scenes/in_Game_Scene.cpp index 7f53413..0dc070d 100644 --- a/src/scenes/in_Game_Scene.cpp +++ b/src/scenes/in_Game_Scene.cpp @@ -15,11 +15,15 @@ #include #include #include +#include +#include "../computervision/HandDetectRegion.h" +#include "../computervision/ObjectDetection.h" #define MAX_MODEL_DEQUE_SIZE 6 // max amount of models to load at the same time #define UPCOMING_MODEL_AMOUNT 4 // how much models should be loaded in front of us + namespace scene { entities::HouseGenerator* house_generator; @@ -35,6 +39,9 @@ namespace scene std::vector guis; + std::vector regions; + computervision::HandDetectRegion reg_left("left", 0, 0, 150, 150), reg_right("right", 0, 0, 150, 150), reg_up("up", 0, 0, 150, 150); + In_Game_Scene::In_Game_Scene() { shader = new shaders::EntityShader; @@ -59,6 +66,15 @@ namespace scene void load_chunk(int model_pos) { static unsigned int furniture_count = 0; + // set up squares according to size of camera input + cv::Mat camera_frame; + static_camera::getCap().read(camera_frame); // get camera frame to know the width and heigth + reg_left.SetXPos(10); + reg_left.SetYPos(camera_frame.rows / 2 - reg_left.GetHeight()/2); + reg_right.SetXPos(camera_frame.cols - 10 - reg_right.GetWidth()); + reg_right.SetYPos(camera_frame.rows / 2 - reg_right.GetHeight()/2); + reg_up.SetXPos(camera_frame.cols / 2 - reg_up.GetWidth() / 2); + reg_up.SetYPos(10); std::cout << "loading model chunk" << std::endl; if (house_models.size() >= MAX_MODEL_DEQUE_SIZE * furniture_count) @@ -158,14 +174,41 @@ namespace scene } // remember the position at which the new model was added last_model_pos = model_pos; + + update_hand_detection(); } void scene::In_Game_Scene::onKey(GLFWwindow* window, int key, int scancode, int action, int mods) { if (glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS) { + cv::destroyWindow("camera"); return_value = scene::Scenes::STOP; } + + if (glfwGetKey(window, GLFW_KEY_B) == GLFW_PRESS) + { + reg_left.CalibrateBackground(); + reg_right.CalibrateBackground(); + reg_up.CalibrateBackground(); + } + + if (glfwGetKey(window, GLFW_KEY_S) == GLFW_PRESS) + { + std::vector tresholds = reg_left.CalculateSkinTresholds(); + reg_right.setSkinTresholds(tresholds); + reg_up.setSkinTresholds(tresholds); + } } + void scene::In_Game_Scene::update_hand_detection() + { + cv::Mat camera_frame; + static_camera::getCap().read(camera_frame); + reg_left.DetectHand(camera_frame); + reg_right.DetectHand(camera_frame); + reg_up.DetectHand(camera_frame); + + cv::imshow("camera", camera_frame); + } } diff --git a/src/scenes/in_Game_Scene.h b/src/scenes/in_Game_Scene.h index eb5f1a8..e4dea16 100644 --- a/src/scenes/in_Game_Scene.h +++ b/src/scenes/in_Game_Scene.h @@ -8,6 +8,7 @@ namespace scene { private: scene::Scenes return_value = scene::Scenes::INGAME; + void update_hand_detection(); public: In_Game_Scene(); diff --git a/src/scenes/startup_Scene.cpp b/src/scenes/startup_Scene.cpp index fe67b0f..a492c18 100644 --- a/src/scenes/startup_Scene.cpp +++ b/src/scenes/startup_Scene.cpp @@ -2,10 +2,13 @@ #include #include #include "startup_Scene.h" +#include "../computervision/ObjectDetection.h" +#include "../computervision/HandDetectRegion.h" +#include namespace scene { - + computervision::ObjectDetection objDetect; scene::Scenes scene::Startup_Scene::start(GLFWwindow *window) { while (return_value == scene::Scenes::STARTUP) @@ -27,7 +30,8 @@ namespace scene void scene::Startup_Scene::update(GLFWwindow* window) { - + bool hand_present; + objDetect.DetectHand(objDetect.ReadCamera(),hand_present); } void scene::Startup_Scene::onKey(GLFWwindow* window, int key, int scancode, int action, int mods) @@ -35,6 +39,7 @@ namespace scene if (glfwGetKey(window, GLFW_KEY_SPACE) == GLFW_PRESS) { return_value = scene::Scenes::INGAME; + cv::destroyWindow("camera"); } } } diff --git a/wk2_fps.vcxproj b/wk2_fps.vcxproj index d8fb3c7..0c15567 100644 --- a/wk2_fps.vcxproj +++ b/wk2_fps.vcxproj @@ -21,9 +21,12 @@ + + - + + @@ -45,11 +48,16 @@ + + + - + + + @@ -73,6 +81,12 @@ + + + + + + 16.0 {A7ECF1BE-DB22-4BF7-BFF6-E3BF72691EE6} @@ -141,6 +155,8 @@ false $(VC_IncludePath);$(WindowsSDK_IncludePath);;C:\opencv\opencv\build\include;C:\opencv\build\include $(VC_LibraryPath_x64);$(WindowsSDK_LibraryPath_x64);C:\opencv\opencv\build\x64\vc15\lib;C:\opencv\build\x64\vc15\lib + C:\opencv\build\include\;$(VC_IncludePath);$(WindowsSDK_IncludePath);C:\opencv\opencv\build\include + C:\opencv\build\x64\vc15\lib;$(VC_LibraryPath_x64);$(WindowsSDK_LibraryPath_x64);C:\opencv\opencv\build\x64\vc15\lib @@ -214,6 +230,7 @@ true $(SolutionDir)lib\glfw-3.3.2\$(Platform);$(SolutionDir)lib\glew-2.1.0\lib\Release\$(Platform);%(AdditionalLibraryDirectories) kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies); opencv_world452.lib;opencv_world452d.lib + opencv_world452.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) diff --git a/wk2_fps.vcxproj.filters b/wk2_fps.vcxproj.filters index 0ab51e8..6f20dba 100644 --- a/wk2_fps.vcxproj.filters +++ b/wk2_fps.vcxproj.filters @@ -1,18 +1,29 @@  - - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} - cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx - - - {93995380-89BD-4b04-88EB-625FBE52EBFB} - h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd - - - {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} - rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms - + + + + + + + + + + + + + + + + + + + + + + + @@ -161,8 +172,45 @@ Header Files + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file