unsharp_opencl.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389
  1. /*
  2. * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
  3. * Copyright (C) 2013 Lenny Wang
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * unsharp input video
  24. */
  25. #include "unsharp_opencl.h"
  26. #include "libavutil/common.h"
  27. #include "libavutil/opencl_internal.h"
  28. #define PLANE_NUM 3
  29. #define ROUND_TO_16(a) (((((a) - 1)/16)+1)*16)
  30. static inline void add_mask_counter(uint32_t *dst, uint32_t *counter1, uint32_t *counter2, int len)
  31. {
  32. int i;
  33. for (i = 0; i < len; i++) {
  34. dst[i] = counter1[i] + counter2[i];
  35. }
  36. }
  37. static int compute_mask(int step, uint32_t *mask)
  38. {
  39. int i, z, ret = 0;
  40. int counter_size = sizeof(uint32_t) * (2 * step + 1);
  41. uint32_t *temp1_counter, *temp2_counter, **counter;
  42. temp1_counter = av_mallocz(counter_size);
  43. if (!temp1_counter) {
  44. ret = AVERROR(ENOMEM);
  45. goto end;
  46. }
  47. temp2_counter = av_mallocz(counter_size);
  48. if (!temp2_counter) {
  49. ret = AVERROR(ENOMEM);
  50. goto end;
  51. }
  52. counter = av_mallocz_array(2 * step + 1, sizeof(uint32_t *));
  53. if (!counter) {
  54. ret = AVERROR(ENOMEM);
  55. goto end;
  56. }
  57. for (i = 0; i < 2 * step + 1; i++) {
  58. counter[i] = av_mallocz(counter_size);
  59. if (!counter[i]) {
  60. ret = AVERROR(ENOMEM);
  61. goto end;
  62. }
  63. }
  64. for (i = 0; i < 2 * step + 1; i++) {
  65. memset(temp1_counter, 0, counter_size);
  66. temp1_counter[i] = 1;
  67. for (z = 0; z < step * 2; z += 2) {
  68. add_mask_counter(temp2_counter, counter[z], temp1_counter, step * 2);
  69. memcpy(counter[z], temp1_counter, counter_size);
  70. add_mask_counter(temp1_counter, counter[z + 1], temp2_counter, step * 2);
  71. memcpy(counter[z + 1], temp2_counter, counter_size);
  72. }
  73. }
  74. memcpy(mask, temp1_counter, counter_size);
  75. end:
  76. av_freep(&temp1_counter);
  77. av_freep(&temp2_counter);
  78. for (i = 0; i < 2 * step + 1; i++) {
  79. av_freep(&counter[i]);
  80. }
  81. av_freep(&counter);
  82. return ret;
  83. }
  84. static int compute_mask_matrix(cl_mem cl_mask_matrix, int step_x, int step_y)
  85. {
  86. int i, j, ret = 0;
  87. uint32_t *mask_matrix, *mask_x, *mask_y;
  88. size_t size_matrix = sizeof(uint32_t) * (2 * step_x + 1) * (2 * step_y + 1);
  89. mask_x = av_mallocz_array(2 * step_x + 1, sizeof(uint32_t));
  90. if (!mask_x) {
  91. ret = AVERROR(ENOMEM);
  92. goto end;
  93. }
  94. mask_y = av_mallocz_array(2 * step_y + 1, sizeof(uint32_t));
  95. if (!mask_y) {
  96. ret = AVERROR(ENOMEM);
  97. goto end;
  98. }
  99. mask_matrix = av_mallocz(size_matrix);
  100. if (!mask_matrix) {
  101. ret = AVERROR(ENOMEM);
  102. goto end;
  103. }
  104. ret = compute_mask(step_x, mask_x);
  105. if (ret < 0)
  106. goto end;
  107. ret = compute_mask(step_y, mask_y);
  108. if (ret < 0)
  109. goto end;
  110. for (j = 0; j < 2 * step_y + 1; j++) {
  111. for (i = 0; i < 2 * step_x + 1; i++) {
  112. mask_matrix[i + j * (2 * step_x + 1)] = mask_y[j] * mask_x[i];
  113. }
  114. }
  115. ret = av_opencl_buffer_write(cl_mask_matrix, (uint8_t *)mask_matrix, size_matrix);
  116. end:
  117. av_freep(&mask_x);
  118. av_freep(&mask_y);
  119. av_freep(&mask_matrix);
  120. return ret;
  121. }
  122. static int generate_mask(AVFilterContext *ctx)
  123. {
  124. UnsharpContext *unsharp = ctx->priv;
  125. int i, ret = 0, step_x[2], step_y[2];
  126. cl_mem mask_matrix[2];
  127. mask_matrix[0] = unsharp->opencl_ctx.cl_luma_mask;
  128. mask_matrix[1] = unsharp->opencl_ctx.cl_chroma_mask;
  129. step_x[0] = unsharp->luma.steps_x;
  130. step_x[1] = unsharp->chroma.steps_x;
  131. step_y[0] = unsharp->luma.steps_y;
  132. step_y[1] = unsharp->chroma.steps_y;
  133. /* use default kernel if any matrix dim larger than 8 due to limited local mem size */
  134. if (step_x[0]>8 || step_x[1]>8 || step_y[0]>8 || step_y[1]>8)
  135. unsharp->opencl_ctx.use_fast_kernels = 0;
  136. else
  137. unsharp->opencl_ctx.use_fast_kernels = 1;
  138. if (!mask_matrix[0] || !mask_matrix[1]) {
  139. av_log(ctx, AV_LOG_ERROR, "Luma mask and chroma mask should not be NULL\n");
  140. return AVERROR(EINVAL);
  141. }
  142. for (i = 0; i < 2; i++) {
  143. ret = compute_mask_matrix(mask_matrix[i], step_x[i], step_y[i]);
  144. if (ret < 0)
  145. return ret;
  146. }
  147. return ret;
  148. }
  149. int ff_opencl_apply_unsharp(AVFilterContext *ctx, AVFrame *in, AVFrame *out)
  150. {
  151. int ret;
  152. AVFilterLink *link = ctx->inputs[0];
  153. UnsharpContext *unsharp = ctx->priv;
  154. cl_int status;
  155. FFOpenclParam kernel1 = {0};
  156. FFOpenclParam kernel2 = {0};
  157. int width = link->w;
  158. int height = link->h;
  159. int cw = FF_CEIL_RSHIFT(link->w, unsharp->hsub);
  160. int ch = FF_CEIL_RSHIFT(link->h, unsharp->vsub);
  161. size_t globalWorkSize1d = width * height + 2 * ch * cw;
  162. size_t globalWorkSize2dLuma[2];
  163. size_t globalWorkSize2dChroma[2];
  164. size_t localWorkSize2d[2] = {16, 16};
  165. if (unsharp->opencl_ctx.use_fast_kernels) {
  166. globalWorkSize2dLuma[0] = (size_t)ROUND_TO_16(width);
  167. globalWorkSize2dLuma[1] = (size_t)ROUND_TO_16(height);
  168. globalWorkSize2dChroma[0] = (size_t)ROUND_TO_16(cw);
  169. globalWorkSize2dChroma[1] = (size_t)(2*ROUND_TO_16(ch));
  170. kernel1.ctx = ctx;
  171. kernel1.kernel = unsharp->opencl_ctx.kernel_luma;
  172. ret = avpriv_opencl_set_parameter(&kernel1,
  173. FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_inbuf),
  174. FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_outbuf),
  175. FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_luma_mask),
  176. FF_OPENCL_PARAM_INFO(unsharp->luma.amount),
  177. FF_OPENCL_PARAM_INFO(unsharp->luma.scalebits),
  178. FF_OPENCL_PARAM_INFO(unsharp->luma.halfscale),
  179. FF_OPENCL_PARAM_INFO(in->linesize[0]),
  180. FF_OPENCL_PARAM_INFO(out->linesize[0]),
  181. FF_OPENCL_PARAM_INFO(width),
  182. FF_OPENCL_PARAM_INFO(height),
  183. NULL);
  184. if (ret < 0)
  185. return ret;
  186. kernel2.ctx = ctx;
  187. kernel2.kernel = unsharp->opencl_ctx.kernel_chroma;
  188. ret = avpriv_opencl_set_parameter(&kernel2,
  189. FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_inbuf),
  190. FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_outbuf),
  191. FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_chroma_mask),
  192. FF_OPENCL_PARAM_INFO(unsharp->chroma.amount),
  193. FF_OPENCL_PARAM_INFO(unsharp->chroma.scalebits),
  194. FF_OPENCL_PARAM_INFO(unsharp->chroma.halfscale),
  195. FF_OPENCL_PARAM_INFO(in->linesize[0]),
  196. FF_OPENCL_PARAM_INFO(in->linesize[1]),
  197. FF_OPENCL_PARAM_INFO(out->linesize[0]),
  198. FF_OPENCL_PARAM_INFO(out->linesize[1]),
  199. FF_OPENCL_PARAM_INFO(link->w),
  200. FF_OPENCL_PARAM_INFO(link->h),
  201. FF_OPENCL_PARAM_INFO(cw),
  202. FF_OPENCL_PARAM_INFO(ch),
  203. NULL);
  204. if (ret < 0)
  205. return ret;
  206. status = clEnqueueNDRangeKernel(unsharp->opencl_ctx.command_queue,
  207. unsharp->opencl_ctx.kernel_luma, 2, NULL,
  208. globalWorkSize2dLuma, localWorkSize2d, 0, NULL, NULL);
  209. status |=clEnqueueNDRangeKernel(unsharp->opencl_ctx.command_queue,
  210. unsharp->opencl_ctx.kernel_chroma, 2, NULL,
  211. globalWorkSize2dChroma, localWorkSize2d, 0, NULL, NULL);
  212. if (status != CL_SUCCESS) {
  213. av_log(ctx, AV_LOG_ERROR, "OpenCL run kernel error occurred: %s\n", av_opencl_errstr(status));
  214. return AVERROR_EXTERNAL;
  215. }
  216. } else { /* use default kernel */
  217. kernel1.ctx = ctx;
  218. kernel1.kernel = unsharp->opencl_ctx.kernel_default;
  219. ret = avpriv_opencl_set_parameter(&kernel1,
  220. FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_inbuf),
  221. FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_outbuf),
  222. FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_luma_mask),
  223. FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_chroma_mask),
  224. FF_OPENCL_PARAM_INFO(unsharp->luma.amount),
  225. FF_OPENCL_PARAM_INFO(unsharp->chroma.amount),
  226. FF_OPENCL_PARAM_INFO(unsharp->luma.steps_x),
  227. FF_OPENCL_PARAM_INFO(unsharp->luma.steps_y),
  228. FF_OPENCL_PARAM_INFO(unsharp->chroma.steps_x),
  229. FF_OPENCL_PARAM_INFO(unsharp->chroma.steps_y),
  230. FF_OPENCL_PARAM_INFO(unsharp->luma.scalebits),
  231. FF_OPENCL_PARAM_INFO(unsharp->chroma.scalebits),
  232. FF_OPENCL_PARAM_INFO(unsharp->luma.halfscale),
  233. FF_OPENCL_PARAM_INFO(unsharp->chroma.halfscale),
  234. FF_OPENCL_PARAM_INFO(in->linesize[0]),
  235. FF_OPENCL_PARAM_INFO(in->linesize[1]),
  236. FF_OPENCL_PARAM_INFO(out->linesize[0]),
  237. FF_OPENCL_PARAM_INFO(out->linesize[1]),
  238. FF_OPENCL_PARAM_INFO(link->h),
  239. FF_OPENCL_PARAM_INFO(link->w),
  240. FF_OPENCL_PARAM_INFO(ch),
  241. FF_OPENCL_PARAM_INFO(cw),
  242. NULL);
  243. if (ret < 0)
  244. return ret;
  245. status = clEnqueueNDRangeKernel(unsharp->opencl_ctx.command_queue,
  246. unsharp->opencl_ctx.kernel_default, 1, NULL,
  247. &globalWorkSize1d, NULL, 0, NULL, NULL);
  248. if (status != CL_SUCCESS) {
  249. av_log(ctx, AV_LOG_ERROR, "OpenCL run kernel error occurred: %s\n", av_opencl_errstr(status));
  250. return AVERROR_EXTERNAL;
  251. }
  252. }
  253. clFinish(unsharp->opencl_ctx.command_queue);
  254. return av_opencl_buffer_read_image(out->data, unsharp->opencl_ctx.out_plane_size,
  255. unsharp->opencl_ctx.plane_num, unsharp->opencl_ctx.cl_outbuf,
  256. unsharp->opencl_ctx.cl_outbuf_size);
  257. }
  258. int ff_opencl_unsharp_init(AVFilterContext *ctx)
  259. {
  260. int ret = 0;
  261. char build_opts[96];
  262. UnsharpContext *unsharp = ctx->priv;
  263. ret = av_opencl_init(NULL);
  264. if (ret < 0)
  265. return ret;
  266. ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_luma_mask,
  267. sizeof(uint32_t) * (2 * unsharp->luma.steps_x + 1) * (2 * unsharp->luma.steps_y + 1),
  268. CL_MEM_READ_ONLY, NULL);
  269. if (ret < 0)
  270. return ret;
  271. ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_chroma_mask,
  272. sizeof(uint32_t) * (2 * unsharp->chroma.steps_x + 1) * (2 * unsharp->chroma.steps_y + 1),
  273. CL_MEM_READ_ONLY, NULL);
  274. if (ret < 0)
  275. return ret;
  276. ret = generate_mask(ctx);
  277. if (ret < 0)
  278. return ret;
  279. unsharp->opencl_ctx.plane_num = PLANE_NUM;
  280. unsharp->opencl_ctx.command_queue = av_opencl_get_command_queue();
  281. if (!unsharp->opencl_ctx.command_queue) {
  282. av_log(ctx, AV_LOG_ERROR, "Unable to get OpenCL command queue in filter 'unsharp'\n");
  283. return AVERROR(EINVAL);
  284. }
  285. snprintf(build_opts, 96, "-D LU_RADIUS_X=%d -D LU_RADIUS_Y=%d -D CH_RADIUS_X=%d -D CH_RADIUS_Y=%d",
  286. 2*unsharp->luma.steps_x+1, 2*unsharp->luma.steps_y+1, 2*unsharp->chroma.steps_x+1, 2*unsharp->chroma.steps_y+1);
  287. unsharp->opencl_ctx.program = av_opencl_compile("unsharp", build_opts);
  288. if (!unsharp->opencl_ctx.program) {
  289. av_log(ctx, AV_LOG_ERROR, "OpenCL failed to compile program 'unsharp'\n");
  290. return AVERROR(EINVAL);
  291. }
  292. if (unsharp->opencl_ctx.use_fast_kernels) {
  293. if (!unsharp->opencl_ctx.kernel_luma) {
  294. unsharp->opencl_ctx.kernel_luma = clCreateKernel(unsharp->opencl_ctx.program, "unsharp_luma", &ret);
  295. if (ret != CL_SUCCESS) {
  296. av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'unsharp_luma'\n");
  297. return ret;
  298. }
  299. }
  300. if (!unsharp->opencl_ctx.kernel_chroma) {
  301. unsharp->opencl_ctx.kernel_chroma = clCreateKernel(unsharp->opencl_ctx.program, "unsharp_chroma", &ret);
  302. if (ret < 0) {
  303. av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'unsharp_chroma'\n");
  304. return ret;
  305. }
  306. }
  307. }
  308. else {
  309. if (!unsharp->opencl_ctx.kernel_default) {
  310. unsharp->opencl_ctx.kernel_default = clCreateKernel(unsharp->opencl_ctx.program, "unsharp_default", &ret);
  311. if (ret < 0) {
  312. av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'unsharp_default'\n");
  313. return ret;
  314. }
  315. }
  316. }
  317. return ret;
  318. }
  319. void ff_opencl_unsharp_uninit(AVFilterContext *ctx)
  320. {
  321. UnsharpContext *unsharp = ctx->priv;
  322. av_opencl_buffer_release(&unsharp->opencl_ctx.cl_inbuf);
  323. av_opencl_buffer_release(&unsharp->opencl_ctx.cl_outbuf);
  324. av_opencl_buffer_release(&unsharp->opencl_ctx.cl_luma_mask);
  325. av_opencl_buffer_release(&unsharp->opencl_ctx.cl_chroma_mask);
  326. clReleaseKernel(unsharp->opencl_ctx.kernel_default);
  327. clReleaseKernel(unsharp->opencl_ctx.kernel_luma);
  328. clReleaseKernel(unsharp->opencl_ctx.kernel_chroma);
  329. clReleaseProgram(unsharp->opencl_ctx.program);
  330. unsharp->opencl_ctx.command_queue = NULL;
  331. av_opencl_uninit();
  332. }
  333. int ff_opencl_unsharp_process_inout_buf(AVFilterContext *ctx, AVFrame *in, AVFrame *out)
  334. {
  335. int ret = 0;
  336. AVFilterLink *link = ctx->inputs[0];
  337. UnsharpContext *unsharp = ctx->priv;
  338. int ch = FF_CEIL_RSHIFT(link->h, unsharp->vsub);
  339. if ((!unsharp->opencl_ctx.cl_inbuf) || (!unsharp->opencl_ctx.cl_outbuf)) {
  340. unsharp->opencl_ctx.in_plane_size[0] = (in->linesize[0] * in->height);
  341. unsharp->opencl_ctx.in_plane_size[1] = (in->linesize[1] * ch);
  342. unsharp->opencl_ctx.in_plane_size[2] = (in->linesize[2] * ch);
  343. unsharp->opencl_ctx.out_plane_size[0] = (out->linesize[0] * out->height);
  344. unsharp->opencl_ctx.out_plane_size[1] = (out->linesize[1] * ch);
  345. unsharp->opencl_ctx.out_plane_size[2] = (out->linesize[2] * ch);
  346. unsharp->opencl_ctx.cl_inbuf_size = unsharp->opencl_ctx.in_plane_size[0] +
  347. unsharp->opencl_ctx.in_plane_size[1] +
  348. unsharp->opencl_ctx.in_plane_size[2];
  349. unsharp->opencl_ctx.cl_outbuf_size = unsharp->opencl_ctx.out_plane_size[0] +
  350. unsharp->opencl_ctx.out_plane_size[1] +
  351. unsharp->opencl_ctx.out_plane_size[2];
  352. if (!unsharp->opencl_ctx.cl_inbuf) {
  353. ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_inbuf,
  354. unsharp->opencl_ctx.cl_inbuf_size,
  355. CL_MEM_READ_ONLY, NULL);
  356. if (ret < 0)
  357. return ret;
  358. }
  359. if (!unsharp->opencl_ctx.cl_outbuf) {
  360. ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_outbuf,
  361. unsharp->opencl_ctx.cl_outbuf_size,
  362. CL_MEM_READ_WRITE, NULL);
  363. if (ret < 0)
  364. return ret;
  365. }
  366. }
  367. return av_opencl_buffer_write_image(unsharp->opencl_ctx.cl_inbuf,
  368. unsharp->opencl_ctx.cl_inbuf_size,
  369. 0, in->data, unsharp->opencl_ctx.in_plane_size,
  370. unsharp->opencl_ctx.plane_num);
  371. }