vf_ilpack.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. /*
  2. * This file is part of MPlayer.
  3. *
  4. * MPlayer is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * MPlayer is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License along
  15. * with MPlayer; if not, write to the Free Software Foundation, Inc.,
  16. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  17. */
  18. #include <stdio.h>
  19. #include <stdlib.h>
  20. #include <string.h>
  21. #include <inttypes.h>
  22. #include "config.h"
  23. #include "mp_msg.h"
  24. #include "cpudetect.h"
  25. #include "img_format.h"
  26. #include "mp_image.h"
  27. #include "vf.h"
  28. #include "libavutil/attributes.h"
  29. #include "libavutil/x86/asm.h"
  30. typedef void (pack_func_t)(unsigned char *dst, unsigned char *y,
  31. unsigned char *u, unsigned char *v, int w, int us, int vs);
  32. struct vf_priv_s {
  33. int mode;
  34. pack_func_t *pack[2];
  35. };
  36. static void pack_nn_C(unsigned char *dst, unsigned char *y,
  37. unsigned char *u, unsigned char *v, int w,
  38. int av_unused us, int av_unused vs)
  39. {
  40. int j;
  41. for (j = w/2; j; j--) {
  42. *dst++ = *y++;
  43. *dst++ = *u++;
  44. *dst++ = *y++;
  45. *dst++ = *v++;
  46. }
  47. }
  48. static void pack_li_0_C(unsigned char *dst, unsigned char *y,
  49. unsigned char *u, unsigned char *v, int w, int us, int vs)
  50. {
  51. int j;
  52. for (j = w/2; j; j--) {
  53. *dst++ = *y++;
  54. *dst++ = (u[us+us] + 7*u[0])>>3;
  55. *dst++ = *y++;
  56. *dst++ = (v[vs+vs] + 7*v[0])>>3;
  57. u++; v++;
  58. }
  59. }
  60. static void pack_li_1_C(unsigned char *dst, unsigned char *y,
  61. unsigned char *u, unsigned char *v, int w, int us, int vs)
  62. {
  63. int j;
  64. for (j = w/2; j; j--) {
  65. *dst++ = *y++;
  66. *dst++ = (3*u[us+us] + 5*u[0])>>3;
  67. *dst++ = *y++;
  68. *dst++ = (3*v[vs+vs] + 5*v[0])>>3;
  69. u++; v++;
  70. }
  71. }
  72. #if HAVE_MMX
  73. static void pack_nn_MMX(unsigned char *dst, unsigned char *y,
  74. unsigned char *u, unsigned char *v, int w,
  75. int av_unused us, int av_unused vs)
  76. {
  77. __asm__ volatile (""
  78. ASMALIGN(4)
  79. "1: \n\t"
  80. "movq (%0), %%mm1 \n\t"
  81. "movq (%0), %%mm2 \n\t"
  82. "movq (%1), %%mm4 \n\t"
  83. "movq (%2), %%mm6 \n\t"
  84. "punpcklbw %%mm6, %%mm4 \n\t"
  85. "punpcklbw %%mm4, %%mm1 \n\t"
  86. "punpckhbw %%mm4, %%mm2 \n\t"
  87. "add $8, %0 \n\t"
  88. "add $4, %1 \n\t"
  89. "add $4, %2 \n\t"
  90. "movq %%mm1, (%3) \n\t"
  91. "movq %%mm2, 8(%3) \n\t"
  92. "add $16, %3 \n\t"
  93. "decl %4 \n\t"
  94. "jnz 1b \n\t"
  95. "emms \n\t"
  96. :
  97. : "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8)
  98. : "memory"
  99. );
  100. pack_nn_C(dst, y, u, v, (w&7), 0, 0);
  101. }
  102. #if HAVE_EBX_AVAILABLE
  103. static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
  104. unsigned char *u, unsigned char *v, int w, int us, int vs)
  105. {
  106. __asm__ volatile (""
  107. "push %%"REG_BP" \n\t"
  108. #if ARCH_X86_64
  109. "mov %6, %%"REG_BP" \n\t"
  110. #else
  111. "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
  112. "movl (%%"REG_d"), %%"REG_d" \n\t"
  113. #endif
  114. "pxor %%mm0, %%mm0 \n\t"
  115. ASMALIGN(4)
  116. ".Lli0: \n\t"
  117. "movq (%%"REG_S"), %%mm1 \n\t"
  118. "movq (%%"REG_S"), %%mm2 \n\t"
  119. "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
  120. "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
  121. "punpcklbw %%mm0, %%mm4 \n\t"
  122. "punpcklbw %%mm0, %%mm6 \n\t"
  123. "movq (%%"REG_a"), %%mm3 \n\t"
  124. "movq (%%"REG_b"), %%mm5 \n\t"
  125. "punpcklbw %%mm0, %%mm3 \n\t"
  126. "punpcklbw %%mm0, %%mm5 \n\t"
  127. "paddw %%mm3, %%mm4 \n\t"
  128. "paddw %%mm5, %%mm6 \n\t"
  129. "paddw %%mm3, %%mm4 \n\t"
  130. "paddw %%mm5, %%mm6 \n\t"
  131. "paddw %%mm3, %%mm4 \n\t"
  132. "paddw %%mm5, %%mm6 \n\t"
  133. "paddw %%mm3, %%mm4 \n\t"
  134. "paddw %%mm5, %%mm6 \n\t"
  135. "paddw %%mm3, %%mm4 \n\t"
  136. "paddw %%mm5, %%mm6 \n\t"
  137. "paddw %%mm3, %%mm4 \n\t"
  138. "paddw %%mm5, %%mm6 \n\t"
  139. "paddw %%mm3, %%mm4 \n\t"
  140. "paddw %%mm5, %%mm6 \n\t"
  141. "psrlw $3, %%mm4 \n\t"
  142. "psrlw $3, %%mm6 \n\t"
  143. "packuswb %%mm4, %%mm4 \n\t"
  144. "packuswb %%mm6, %%mm6 \n\t"
  145. "punpcklbw %%mm6, %%mm4 \n\t"
  146. "punpcklbw %%mm4, %%mm1 \n\t"
  147. "punpckhbw %%mm4, %%mm2 \n\t"
  148. "movq %%mm1, (%%"REG_D") \n\t"
  149. "movq %%mm2, 8(%%"REG_D") \n\t"
  150. "movq 8(%%"REG_S"), %%mm1 \n\t"
  151. "movq 8(%%"REG_S"), %%mm2 \n\t"
  152. "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
  153. "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
  154. "punpckhbw %%mm0, %%mm4 \n\t"
  155. "punpckhbw %%mm0, %%mm6 \n\t"
  156. "movq (%%"REG_a"), %%mm3 \n\t"
  157. "movq (%%"REG_b"), %%mm5 \n\t"
  158. "punpckhbw %%mm0, %%mm3 \n\t"
  159. "punpckhbw %%mm0, %%mm5 \n\t"
  160. "paddw %%mm3, %%mm4 \n\t"
  161. "paddw %%mm5, %%mm6 \n\t"
  162. "paddw %%mm3, %%mm4 \n\t"
  163. "paddw %%mm5, %%mm6 \n\t"
  164. "paddw %%mm3, %%mm4 \n\t"
  165. "paddw %%mm5, %%mm6 \n\t"
  166. "paddw %%mm3, %%mm4 \n\t"
  167. "paddw %%mm5, %%mm6 \n\t"
  168. "paddw %%mm3, %%mm4 \n\t"
  169. "paddw %%mm5, %%mm6 \n\t"
  170. "paddw %%mm3, %%mm4 \n\t"
  171. "paddw %%mm5, %%mm6 \n\t"
  172. "paddw %%mm3, %%mm4 \n\t"
  173. "paddw %%mm5, %%mm6 \n\t"
  174. "psrlw $3, %%mm4 \n\t"
  175. "psrlw $3, %%mm6 \n\t"
  176. "packuswb %%mm4, %%mm4 \n\t"
  177. "packuswb %%mm6, %%mm6 \n\t"
  178. "punpcklbw %%mm6, %%mm4 \n\t"
  179. "punpcklbw %%mm4, %%mm1 \n\t"
  180. "punpckhbw %%mm4, %%mm2 \n\t"
  181. "add $16, %%"REG_S" \n\t"
  182. "add $8, %%"REG_a" \n\t"
  183. "add $8, %%"REG_b" \n\t"
  184. "movq %%mm1, 16(%%"REG_D") \n\t"
  185. "movq %%mm2, 24(%%"REG_D") \n\t"
  186. "add $32, %%"REG_D" \n\t"
  187. "decl %%ecx \n\t"
  188. "jnz .Lli0 \n\t"
  189. "emms \n\t"
  190. "pop %%"REG_BP" \n\t"
  191. :
  192. : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
  193. #if ARCH_X86_64
  194. "d" ((x86_reg)us), "r" ((x86_reg)vs)
  195. #else
  196. "d" (&us)
  197. #endif
  198. : "memory"
  199. );
  200. pack_li_0_C(dst, y, u, v, (w&15), us, vs);
  201. }
  202. static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
  203. unsigned char *u, unsigned char *v, int w, int us, int vs)
  204. {
  205. __asm__ volatile (""
  206. "push %%"REG_BP" \n\t"
  207. #if ARCH_X86_64
  208. "mov %6, %%"REG_BP" \n\t"
  209. #else
  210. "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
  211. "movl (%%"REG_d"), %%"REG_d" \n\t"
  212. #endif
  213. "pxor %%mm0, %%mm0 \n\t"
  214. ASMALIGN(4)
  215. ".Lli1: \n\t"
  216. "movq (%%"REG_S"), %%mm1 \n\t"
  217. "movq (%%"REG_S"), %%mm2 \n\t"
  218. "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
  219. "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
  220. "punpcklbw %%mm0, %%mm4 \n\t"
  221. "punpcklbw %%mm0, %%mm6 \n\t"
  222. "movq (%%"REG_a"), %%mm3 \n\t"
  223. "movq (%%"REG_b"), %%mm5 \n\t"
  224. "punpcklbw %%mm0, %%mm3 \n\t"
  225. "punpcklbw %%mm0, %%mm5 \n\t"
  226. "movq %%mm4, %%mm7 \n\t"
  227. "paddw %%mm4, %%mm4 \n\t"
  228. "paddw %%mm7, %%mm4 \n\t"
  229. "movq %%mm6, %%mm7 \n\t"
  230. "paddw %%mm6, %%mm6 \n\t"
  231. "paddw %%mm7, %%mm6 \n\t"
  232. "paddw %%mm3, %%mm4 \n\t"
  233. "paddw %%mm5, %%mm6 \n\t"
  234. "paddw %%mm3, %%mm4 \n\t"
  235. "paddw %%mm5, %%mm6 \n\t"
  236. "paddw %%mm3, %%mm4 \n\t"
  237. "paddw %%mm5, %%mm6 \n\t"
  238. "paddw %%mm3, %%mm4 \n\t"
  239. "paddw %%mm5, %%mm6 \n\t"
  240. "paddw %%mm3, %%mm4 \n\t"
  241. "paddw %%mm5, %%mm6 \n\t"
  242. "psrlw $3, %%mm4 \n\t"
  243. "psrlw $3, %%mm6 \n\t"
  244. "packuswb %%mm4, %%mm4 \n\t"
  245. "packuswb %%mm6, %%mm6 \n\t"
  246. "punpcklbw %%mm6, %%mm4 \n\t"
  247. "punpcklbw %%mm4, %%mm1 \n\t"
  248. "punpckhbw %%mm4, %%mm2 \n\t"
  249. "movq %%mm1, (%%"REG_D") \n\t"
  250. "movq %%mm2, 8(%%"REG_D") \n\t"
  251. "movq 8(%%"REG_S"), %%mm1 \n\t"
  252. "movq 8(%%"REG_S"), %%mm2 \n\t"
  253. "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
  254. "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
  255. "punpckhbw %%mm0, %%mm4 \n\t"
  256. "punpckhbw %%mm0, %%mm6 \n\t"
  257. "movq (%%"REG_a"), %%mm3 \n\t"
  258. "movq (%%"REG_b"), %%mm5 \n\t"
  259. "punpckhbw %%mm0, %%mm3 \n\t"
  260. "punpckhbw %%mm0, %%mm5 \n\t"
  261. "movq %%mm4, %%mm7 \n\t"
  262. "paddw %%mm4, %%mm4 \n\t"
  263. "paddw %%mm7, %%mm4 \n\t"
  264. "movq %%mm6, %%mm7 \n\t"
  265. "paddw %%mm6, %%mm6 \n\t"
  266. "paddw %%mm7, %%mm6 \n\t"
  267. "paddw %%mm3, %%mm4 \n\t"
  268. "paddw %%mm5, %%mm6 \n\t"
  269. "paddw %%mm3, %%mm4 \n\t"
  270. "paddw %%mm5, %%mm6 \n\t"
  271. "paddw %%mm3, %%mm4 \n\t"
  272. "paddw %%mm5, %%mm6 \n\t"
  273. "paddw %%mm3, %%mm4 \n\t"
  274. "paddw %%mm5, %%mm6 \n\t"
  275. "paddw %%mm3, %%mm4 \n\t"
  276. "paddw %%mm5, %%mm6 \n\t"
  277. "psrlw $3, %%mm4 \n\t"
  278. "psrlw $3, %%mm6 \n\t"
  279. "packuswb %%mm4, %%mm4 \n\t"
  280. "packuswb %%mm6, %%mm6 \n\t"
  281. "punpcklbw %%mm6, %%mm4 \n\t"
  282. "punpcklbw %%mm4, %%mm1 \n\t"
  283. "punpckhbw %%mm4, %%mm2 \n\t"
  284. "add $16, %%"REG_S" \n\t"
  285. "add $8, %%"REG_a" \n\t"
  286. "add $8, %%"REG_b" \n\t"
  287. "movq %%mm1, 16(%%"REG_D") \n\t"
  288. "movq %%mm2, 24(%%"REG_D") \n\t"
  289. "add $32, %%"REG_D" \n\t"
  290. "decl %%ecx \n\t"
  291. "jnz .Lli1 \n\t"
  292. "emms \n\t"
  293. "pop %%"REG_BP" \n\t"
  294. :
  295. : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
  296. #if ARCH_X86_64
  297. "d" ((x86_reg)us), "r" ((x86_reg)vs)
  298. #else
  299. "d" (&us)
  300. #endif
  301. : "memory"
  302. );
  303. pack_li_1_C(dst, y, u, v, (w&15), us, vs);
  304. }
  305. #endif /* HAVE_EBX_AVAILABLE */
  306. #endif
  307. static pack_func_t *pack_nn;
  308. static pack_func_t *pack_li_0;
  309. static pack_func_t *pack_li_1;
  310. static void ilpack(unsigned char *dst, unsigned char *src[3],
  311. int dststride, int srcstride[3], int w, int h, pack_func_t *pack[2])
  312. {
  313. int i;
  314. unsigned char *y, *u, *v;
  315. int ys = srcstride[0], us = srcstride[1], vs = srcstride[2];
  316. int a, b;
  317. y = src[0];
  318. u = src[1];
  319. v = src[2];
  320. pack_nn(dst, y, u, v, w, 0, 0);
  321. y += ys; dst += dststride;
  322. pack_nn(dst, y, u+us, v+vs, w, 0, 0);
  323. y += ys; dst += dststride;
  324. for (i=2; i<h-2; i++) {
  325. a = (i&2) ? 1 : -1;
  326. b = (i&1) ^ ((i&2)>>1);
  327. pack[b](dst, y, u, v, w, us*a, vs*a);
  328. y += ys;
  329. if ((i&3) == 1) {
  330. u -= us;
  331. v -= vs;
  332. } else {
  333. u += us;
  334. v += vs;
  335. }
  336. dst += dststride;
  337. }
  338. pack_nn(dst, y, u, v, w, 0, 0);
  339. y += ys; dst += dststride; u += us; v += vs;
  340. pack_nn(dst, y, u, v, w, 0, 0);
  341. }
  342. static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
  343. {
  344. mp_image_t *dmpi;
  345. // hope we'll get DR buffer:
  346. dmpi=ff_vf_get_image(vf->next, IMGFMT_YUY2,
  347. MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE,
  348. mpi->w, mpi->h);
  349. ilpack(dmpi->planes[0], mpi->planes, dmpi->stride[0], mpi->stride, mpi->w, mpi->h, vf->priv->pack);
  350. return ff_vf_next_put_image(vf,dmpi, pts);
  351. }
  352. static int config(struct vf_instance *vf,
  353. int width, int height, int d_width, int d_height,
  354. unsigned int flags, unsigned int outfmt)
  355. {
  356. /* FIXME - also support UYVY output? */
  357. return ff_vf_next_config(vf, width, height, d_width, d_height, flags, IMGFMT_YUY2);
  358. }
  359. static int query_format(struct vf_instance *vf, unsigned int fmt)
  360. {
  361. /* FIXME - really any YUV 4:2:0 input format should work */
  362. switch (fmt) {
  363. case IMGFMT_YV12:
  364. case IMGFMT_IYUV:
  365. case IMGFMT_I420:
  366. return ff_vf_next_query_format(vf,IMGFMT_YUY2);
  367. }
  368. return 0;
  369. }
  370. static int vf_open(vf_instance_t *vf, char *args)
  371. {
  372. vf->config=config;
  373. vf->query_format=query_format;
  374. vf->put_image=put_image;
  375. vf->priv = calloc(1, sizeof(struct vf_priv_s));
  376. vf->priv->mode = 1;
  377. if (args) sscanf(args, "%d", &vf->priv->mode);
  378. pack_nn = pack_nn_C;
  379. pack_li_0 = pack_li_0_C;
  380. pack_li_1 = pack_li_1_C;
  381. #if HAVE_MMX
  382. if(ff_gCpuCaps.hasMMX) {
  383. pack_nn = pack_nn_MMX;
  384. #if HAVE_EBX_AVAILABLE
  385. pack_li_0 = pack_li_0_MMX;
  386. pack_li_1 = pack_li_1_MMX;
  387. #endif
  388. }
  389. #endif
  390. switch(vf->priv->mode) {
  391. case 0:
  392. vf->priv->pack[0] = vf->priv->pack[1] = pack_nn;
  393. break;
  394. default:
  395. ff_mp_msg(MSGT_VFILTER, MSGL_WARN,
  396. "ilpack: unknown mode %d (fallback to linear)\n",
  397. vf->priv->mode);
  398. /* Fallthrough */
  399. case 1:
  400. vf->priv->pack[0] = pack_li_0;
  401. vf->priv->pack[1] = pack_li_1;
  402. break;
  403. }
  404. return 1;
  405. }
  406. const vf_info_t ff_vf_info_ilpack = {
  407. "4:2:0 planar -> 4:2:2 packed reinterlacer",
  408. "ilpack",
  409. "Richard Felker",
  410. "",
  411. vf_open,
  412. NULL
  413. };