4 years ago · 3c7cad69f2
--- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
@@ -19,10 +19,27 @@
 
				  */
			
 
				 
			
 
				 #include "libavutil/avassert.h"
			
 
				+#include "libavutil/thread.h"
			
 
				+#include "libavutil/cpu.h"
			
 
				 #include "dnn_backend_native_layer_conv2d.h"
			
 
				 
			
 
				 #define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x)))
			
 
				 
			
 
				+//struct to pass parameters
			
 
				+typedef struct thread_common_param{
			
 
				+    DnnOperand *operands;
			
 
				+    const int32_t *input_operand_indexes;
			
 
				+    int32_t output_operand_index;
			
 
				+    const void *parameters;
			
 
				+    NativeContext *ctx;
			
 
				+    int thread_num;
			
 
				+} thread_common_param;
			
 
				+
			
 
				+typedef struct thread_param{
			
 
				+    thread_common_param *thread_common_param;
			
 
				+    int thread_index;
			
 
				+} thread_param;
			
 
				+
			
 
				 int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
			
 
				 {
			
 
				     ConvolutionalParams *conv_params;
			
@@ -88,17 +105,20 @@ int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int fil
 
				     return dnn_size;
			
 
				 }
			
 
				 
			
 
				-int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_indexes,
			
 
				-                             int32_t output_operand_index, const void *parameters, NativeContext *ctx)
			
 
				+static void * dnn_execute_layer_conv2d_thread(void *threadarg)
			
 
				 {
			
 
				+    //pass parameters
			
 
				+    thread_param *thread_param = (struct thread_param *)threadarg;
			
 
				+    thread_common_param *thread_common_param = thread_param->thread_common_param;
			
 
				+    DnnOperand *operands = thread_common_param->operands;
			
 
				     float *output;
			
 
				-    int32_t input_operand_index = input_operand_indexes[0];
			
 
				+    int32_t input_operand_index = thread_common_param->input_operand_indexes[0];
			
 
				     int number = operands[input_operand_index].dims[0];
			
 
				     int height = operands[input_operand_index].dims[1];
			
 
				     int width = operands[input_operand_index].dims[2];
			
 
				     int channel = operands[input_operand_index].dims[3];
			
 
				     const float *input = operands[input_operand_index].data;
			
 
				-    const ConvolutionalParams *conv_params = (const ConvolutionalParams *)parameters;
			
 
				+    const ConvolutionalParams *conv_params = (const ConvolutionalParams *)(thread_common_param->parameters);
			
 
				 
			
 
				     int radius = conv_params->kernel_size >> 1;
			
 
				     int src_linesize = width * conv_params->input_num;
			
@@ -106,7 +126,11 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_
 
				     int filter_size = conv_params->kernel_size * filter_linesize;
			
 
				     int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0;
			
 
				 
			
 
				-    DnnOperand *output_operand = &operands[output_operand_index];
			
 
				+    int thread_stride = (height - pad_size * 2) / thread_common_param->thread_num;
			
 
				+    int thread_start = thread_stride * thread_param->thread_index + pad_size;
			
 
				+    int thread_end = (thread_param->thread_index == thread_common_param->thread_num - 1) ? (height - pad_size) : (thread_start + thread_stride);
			
 
				+
			
 
				+    DnnOperand *output_operand = &operands[thread_common_param->output_operand_index];
			
 
				     output_operand->dims[0] = number;
			
 
				     output_operand->dims[1] = height - pad_size * 2;
			
 
				     output_operand->dims[2] = width - pad_size * 2;
			
@@ -114,19 +138,21 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_
 
				     output_operand->data_type = operands[input_operand_index].data_type;
			
 
				     output_operand->length = calculate_operand_data_length(output_operand);
			
 
				     if (output_operand->length <= 0) {
			
 
				-        av_log(ctx, AV_LOG_ERROR, "The output data length overflow\n");
			
 
				-        return DNN_ERROR;
			
 
				+        av_log(thread_common_param->ctx, AV_LOG_ERROR, "The output data length overflow\n");
			
 
				+        return (void *)DNN_ERROR;
			
 
				     }
			
 
				     output_operand->data = av_realloc(output_operand->data, output_operand->length);
			
 
				     if (!output_operand->data) {
			
 
				-        av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n");
			
 
				-        return DNN_ERROR;
			
 
				+        av_log(thread_common_param->ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n");
			
 
				+        return (void *)DNN_ERROR;
			
 
				     }
			
 
				+
			
 
				     output = output_operand->data;
			
 
				+    output += (conv_params->output_num) * (width - 2 * pad_size) * (thread_start - pad_size);
			
 
				 
			
 
				     av_assert0(channel == conv_params->input_num);
			
 
				 
			
 
				-    for (int y = pad_size; y < height - pad_size; ++y) {
			
 
				+    for (int y = thread_start; y < thread_end; ++y) {
			
 
				         for (int x = pad_size; x < width - pad_size; ++x) {
			
 
				             for (int n_filter = 0; n_filter < conv_params->output_num; ++n_filter) {
			
 
				                 if (conv_params->has_bias)
			
@@ -174,5 +200,64 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_
 
				             output += conv_params->output_num;
			
 
				         }
			
 
				     }
			
 
				-    return 0;
			
 
				+    return (void *)DNN_SUCCESS;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_indexes,
			
 
				+                             int32_t output_operand_index, const void *parameters, NativeContext *ctx)
			
 
				+{
			
 
				+    int thread_num = (ctx->options.conv2d_threads <= 0 || ctx->options.conv2d_threads > av_cpu_count())
			
 
				+        ? (av_cpu_count() + 1) : (ctx->options.conv2d_threads);
			
 
				+#if HAVE_PTHREAD_CANCEL
			
 
				+    pthread_t *thread_id = av_malloc(thread_num * sizeof(pthread_t));
			
 
				+#endif
			
 
				+    thread_param **thread_param = av_malloc(thread_num * sizeof(*thread_param));
			
 
				+    void *res;
			
 
				+    int error_flag = DNN_SUCCESS;
			
 
				+
			
 
				+    //struct used to pass parameters
			
 
				+    thread_common_param thread_common_param;
			
 
				+    thread_common_param.operands = operands;
			
 
				+    thread_common_param.input_operand_indexes = input_operand_indexes;
			
 
				+    thread_common_param.output_operand_index = output_operand_index;
			
 
				+    thread_common_param.parameters = parameters;
			
 
				+    thread_common_param.ctx = ctx;
			
 
				+#if HAVE_PTHREAD_CANCEL
			
 
				+    thread_common_param.thread_num = thread_num;
			
 
				+
			
 
				+    //create threads
			
 
				+    for (int i = 0; i < thread_num; i++){
			
 
				+        thread_param[i] = av_malloc(sizeof(thread_param));
			
 
				+        thread_param[i]->thread_common_param = &thread_common_param;
			
 
				+        thread_param[i]->thread_index = i;
			
 
				+        pthread_create(&thread_id[i], NULL, dnn_execute_layer_conv2d_thread, (void *)thread_param[i]);
			
 
				+    }
			
 
				+
			
 
				+    //join threads, res gets function return
			
 
				+    for (int i = 0; i < thread_num; i++){
			
 
				+        pthread_join(thread_id[i], &res);
			
 
				+        if ((int)res != DNN_SUCCESS)
			
 
				+            error_flag = (int)res;
			
 
				+    }
			
 
				+
			
 
				+    //release memory
			
 
				+    av_free(thread_id);
			
 
				+
			
 
				+    for (int i = 0; i < thread_num; i++){
			
 
				+        av_free(thread_param[i]);
			
 
				+    }
			
 
				+#else
			
 
				+    thread_common_param.thread_num = 1;
			
 
				+    thread_param[0] = av_malloc(sizeof(thread_param));
			
 
				+    thread_param[0]->thread_common_param = &thread_common_param;
			
 
				+    thread_param[0]->thread_index = 0;
			
 
				+    res = dnn_execute_layer_conv2d_thread((void *)thread_param[0]);
			
 
				+    if ((int)res != DNN_SUCCESS)
			
 
				+        error_flag = (int)res;
			
 
				+    av_free(thread_param[0]);
			
 
				+#endif
			
 
				+
			
 
				+    av_free(thread_param);
			
 
				+    return error_flag;
			
 
				 }
			
--- a/tests/dnn/dnn-layer-conv2d-test.c
+++ b/tests/dnn/dnn-layer-conv2d-test.c
@@ -25,6 +25,8 @@
 
				 
			
 
				 #define EPSON 0.00001
			
 
				 
			
 
				+extern const AVClass dnn_native_class;
			
 
				+
			
 
				 static int test_with_same_dilate(void)
			
 
				 {
			
 
				     // the input data and expected data are generated with below python code.
			
@@ -96,6 +98,10 @@ static int test_with_same_dilate(void)
 
				     };
			
 
				     float bias[2] = { -1.6574852, -0.72915393 };
			
 
				 
			
 
				+    NativeContext ctx;
			
 
				+    ctx.class = &dnn_native_class;
			
 
				+    ctx.options.conv2d_threads = 1;
			
 
				+
			
 
				     params.activation = TANH;
			
 
				     params.has_bias = 1;
			
 
				     params.biases = bias;
			
@@ -114,7 +120,7 @@ static int test_with_same_dilate(void)
 
				     operands[1].data = NULL;
			
 
				 
			
 
				     input_indexes[0] = 0;
			
 
				-    dnn_execute_layer_conv2d(operands, input_indexes, 1, &params, NULL);
			
 
				+    dnn_execute_layer_conv2d(operands, input_indexes, 1, &params, &ctx);
			
 
				 
			
 
				     output = operands[1].data;
			
 
				     for (int i = 0; i < sizeof(expected_output) / sizeof(float); i++) {
			
@@ -196,6 +202,10 @@ static int test_with_valid(void)
 
				     };
			
 
				     float bias[2] = { -0.4773722, -0.19620377 };
			
 
				 
			
 
				+    NativeContext ctx;
			
 
				+    ctx.class = &dnn_native_class;
			
 
				+    ctx.options.conv2d_threads = 1;
			
 
				+
			
 
				     params.activation = TANH;
			
 
				     params.has_bias = 1;
			
 
				     params.biases = bias;
			
@@ -214,7 +224,7 @@ static int test_with_valid(void)
 
				     operands[1].data = NULL;
			
 
				 
			
 
				     input_indexes[0] = 0;
			
 
				-    dnn_execute_layer_conv2d(operands, input_indexes, 1, &params, NULL);
			
 
				+    dnn_execute_layer_conv2d(operands, input_indexes, 1, &params, &ctx);
			
 
				 
			
 
				     output = operands[1].data;
			
 
				     for (int i = 0; i < sizeof(expected_output) / sizeof(float); i++) {