vf_spp.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620
  1. /*
  2. * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at>
  3. *
  4. * This file is part of MPlayer.
  5. *
  6. * MPlayer is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * MPlayer is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License along
  17. * with MPlayer; if not, write to the Free Software Foundation, Inc.,
  18. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  19. */
  20. /*
  21. * This implementation is based on an algorithm described in
  22. * "Aria Nosratinia Embedded Post-Processing for
  23. * Enhancement of Compressed Images (1999)"
  24. * (http://citeseer.nj.nec.com/nosratinia99embedded.html)
  25. */
  26. #include <stdio.h>
  27. #include <stdlib.h>
  28. #include <string.h>
  29. #include <inttypes.h>
  30. #include <math.h>
  31. #include "config.h"
  32. #include "mp_msg.h"
  33. #include "cpudetect.h"
  34. #include "libavutil/internal.h"
  35. #include "libavutil/intreadwrite.h"
  36. #include "libavcodec/avcodec.h"
  37. #include "libavcodec/dsputil.h"
  38. #undef fprintf
  39. #undef free
  40. #undef malloc
  41. #include "img_format.h"
  42. #include "mp_image.h"
  43. #include "vf.h"
  44. #include "vd_ffmpeg.h"
  45. #include "libvo/fastmemcpy.h"
  46. #define XMIN(a,b) ((a) < (b) ? (a) : (b))
  47. //===========================================================================//
  48. static const uint8_t __attribute__((aligned(8))) dither[8][8]={
  49. { 0, 48, 12, 60, 3, 51, 15, 63, },
  50. { 32, 16, 44, 28, 35, 19, 47, 31, },
  51. { 8, 56, 4, 52, 11, 59, 7, 55, },
  52. { 40, 24, 36, 20, 43, 27, 39, 23, },
  53. { 2, 50, 14, 62, 1, 49, 13, 61, },
  54. { 34, 18, 46, 30, 33, 17, 45, 29, },
  55. { 10, 58, 6, 54, 9, 57, 5, 53, },
  56. { 42, 26, 38, 22, 41, 25, 37, 21, },
  57. };
  58. static const uint8_t offset[127][2]= {
  59. {0,0},
  60. {0,0}, {4,4},
  61. {0,0}, {2,2}, {6,4}, {4,6},
  62. {0,0}, {5,1}, {2,2}, {7,3}, {4,4}, {1,5}, {6,6}, {3,7},
  63. {0,0}, {4,0}, {1,1}, {5,1}, {3,2}, {7,2}, {2,3}, {6,3},
  64. {0,4}, {4,4}, {1,5}, {5,5}, {3,6}, {7,6}, {2,7}, {6,7},
  65. {0,0}, {0,2}, {0,4}, {0,6}, {1,1}, {1,3}, {1,5}, {1,7},
  66. {2,0}, {2,2}, {2,4}, {2,6}, {3,1}, {3,3}, {3,5}, {3,7},
  67. {4,0}, {4,2}, {4,4}, {4,6}, {5,1}, {5,3}, {5,5}, {5,7},
  68. {6,0}, {6,2}, {6,4}, {6,6}, {7,1}, {7,3}, {7,5}, {7,7},
  69. {0,0}, {4,4}, {0,4}, {4,0}, {2,2}, {6,6}, {2,6}, {6,2},
  70. {0,2}, {4,6}, {0,6}, {4,2}, {2,0}, {6,4}, {2,4}, {6,0},
  71. {1,1}, {5,5}, {1,5}, {5,1}, {3,3}, {7,7}, {3,7}, {7,3},
  72. {1,3}, {5,7}, {1,7}, {5,3}, {3,1}, {7,5}, {3,5}, {7,1},
  73. {0,1}, {4,5}, {0,5}, {4,1}, {2,3}, {6,7}, {2,7}, {6,3},
  74. {0,3}, {4,7}, {0,7}, {4,3}, {2,1}, {6,5}, {2,5}, {6,1},
  75. {1,0}, {5,4}, {1,4}, {5,0}, {3,2}, {7,6}, {3,6}, {7,2},
  76. {1,2}, {5,6}, {1,6}, {5,2}, {3,0}, {7,4}, {3,4}, {7,0},
  77. };
  78. struct vf_priv_s {
  79. int log2_count;
  80. int qp;
  81. int mode;
  82. int mpeg2;
  83. int temp_stride;
  84. uint8_t *src;
  85. int16_t *temp;
  86. AVCodecContext *avctx;
  87. DSPContext dsp;
  88. char *non_b_qp;
  89. };
  90. #define SHIFT 22
  91. static void hardthresh_c(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation){
  92. int i;
  93. int bias= 0; //FIXME
  94. unsigned int threshold1, threshold2;
  95. threshold1= qp*((1<<4) - bias) - 1;
  96. threshold2= (threshold1<<1);
  97. memset(dst, 0, 64*sizeof(DCTELEM));
  98. dst[0]= (src[0] + 4)>>3;
  99. for(i=1; i<64; i++){
  100. int level= src[i];
  101. if(((unsigned)(level+threshold1))>threshold2){
  102. const int j= permutation[i];
  103. dst[j]= (level + 4)>>3;
  104. }
  105. }
  106. }
  107. static void softthresh_c(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation){
  108. int i;
  109. int bias= 0; //FIXME
  110. unsigned int threshold1, threshold2;
  111. threshold1= qp*((1<<4) - bias) - 1;
  112. threshold2= (threshold1<<1);
  113. memset(dst, 0, 64*sizeof(DCTELEM));
  114. dst[0]= (src[0] + 4)>>3;
  115. for(i=1; i<64; i++){
  116. int level= src[i];
  117. if(((unsigned)(level+threshold1))>threshold2){
  118. const int j= permutation[i];
  119. if(level>0)
  120. dst[j]= (level - threshold1 + 4)>>3;
  121. else
  122. dst[j]= (level + threshold1 + 4)>>3;
  123. }
  124. }
  125. }
  126. #if HAVE_MMX
  127. static void hardthresh_mmx(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation){
  128. int bias= 0; //FIXME
  129. unsigned int threshold1;
  130. threshold1= qp*((1<<4) - bias) - 1;
  131. __asm__ volatile(
  132. #define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
  133. "movq " #src0 ", %%mm0 \n\t"\
  134. "movq " #src1 ", %%mm1 \n\t"\
  135. "movq " #src2 ", %%mm2 \n\t"\
  136. "movq " #src3 ", %%mm3 \n\t"\
  137. "psubw %%mm4, %%mm0 \n\t"\
  138. "psubw %%mm4, %%mm1 \n\t"\
  139. "psubw %%mm4, %%mm2 \n\t"\
  140. "psubw %%mm4, %%mm3 \n\t"\
  141. "paddusw %%mm5, %%mm0 \n\t"\
  142. "paddusw %%mm5, %%mm1 \n\t"\
  143. "paddusw %%mm5, %%mm2 \n\t"\
  144. "paddusw %%mm5, %%mm3 \n\t"\
  145. "paddw %%mm6, %%mm0 \n\t"\
  146. "paddw %%mm6, %%mm1 \n\t"\
  147. "paddw %%mm6, %%mm2 \n\t"\
  148. "paddw %%mm6, %%mm3 \n\t"\
  149. "psubusw %%mm6, %%mm0 \n\t"\
  150. "psubusw %%mm6, %%mm1 \n\t"\
  151. "psubusw %%mm6, %%mm2 \n\t"\
  152. "psubusw %%mm6, %%mm3 \n\t"\
  153. "psraw $3, %%mm0 \n\t"\
  154. "psraw $3, %%mm1 \n\t"\
  155. "psraw $3, %%mm2 \n\t"\
  156. "psraw $3, %%mm3 \n\t"\
  157. \
  158. "movq %%mm0, %%mm7 \n\t"\
  159. "punpcklwd %%mm2, %%mm0 \n\t" /*A*/\
  160. "punpckhwd %%mm2, %%mm7 \n\t" /*C*/\
  161. "movq %%mm1, %%mm2 \n\t"\
  162. "punpcklwd %%mm3, %%mm1 \n\t" /*B*/\
  163. "punpckhwd %%mm3, %%mm2 \n\t" /*D*/\
  164. "movq %%mm0, %%mm3 \n\t"\
  165. "punpcklwd %%mm1, %%mm0 \n\t" /*A*/\
  166. "punpckhwd %%mm7, %%mm3 \n\t" /*C*/\
  167. "punpcklwd %%mm2, %%mm7 \n\t" /*B*/\
  168. "punpckhwd %%mm2, %%mm1 \n\t" /*D*/\
  169. \
  170. "movq %%mm0, " #dst0 " \n\t"\
  171. "movq %%mm7, " #dst1 " \n\t"\
  172. "movq %%mm3, " #dst2 " \n\t"\
  173. "movq %%mm1, " #dst3 " \n\t"
  174. "movd %2, %%mm4 \n\t"
  175. "movd %3, %%mm5 \n\t"
  176. "movd %4, %%mm6 \n\t"
  177. "packssdw %%mm4, %%mm4 \n\t"
  178. "packssdw %%mm5, %%mm5 \n\t"
  179. "packssdw %%mm6, %%mm6 \n\t"
  180. "packssdw %%mm4, %%mm4 \n\t"
  181. "packssdw %%mm5, %%mm5 \n\t"
  182. "packssdw %%mm6, %%mm6 \n\t"
  183. REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0))
  184. REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
  185. REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
  186. REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
  187. : : "r" (src), "r" (dst), "g" (threshold1+1), "g" (threshold1+5), "g" (threshold1-4) //FIXME maybe more accurate then needed?
  188. );
  189. dst[0]= (src[0] + 4)>>3;
  190. }
  191. static void softthresh_mmx(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation){
  192. int bias= 0; //FIXME
  193. unsigned int threshold1;
  194. threshold1= qp*((1<<4) - bias) - 1;
  195. __asm__ volatile(
  196. #undef REQUANT_CORE
  197. #define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
  198. "movq " #src0 ", %%mm0 \n\t"\
  199. "movq " #src1 ", %%mm1 \n\t"\
  200. "pxor %%mm6, %%mm6 \n\t"\
  201. "pxor %%mm7, %%mm7 \n\t"\
  202. "pcmpgtw %%mm0, %%mm6 \n\t"\
  203. "pcmpgtw %%mm1, %%mm7 \n\t"\
  204. "pxor %%mm6, %%mm0 \n\t"\
  205. "pxor %%mm7, %%mm1 \n\t"\
  206. "psubusw %%mm4, %%mm0 \n\t"\
  207. "psubusw %%mm4, %%mm1 \n\t"\
  208. "pxor %%mm6, %%mm0 \n\t"\
  209. "pxor %%mm7, %%mm1 \n\t"\
  210. "movq " #src2 ", %%mm2 \n\t"\
  211. "movq " #src3 ", %%mm3 \n\t"\
  212. "pxor %%mm6, %%mm6 \n\t"\
  213. "pxor %%mm7, %%mm7 \n\t"\
  214. "pcmpgtw %%mm2, %%mm6 \n\t"\
  215. "pcmpgtw %%mm3, %%mm7 \n\t"\
  216. "pxor %%mm6, %%mm2 \n\t"\
  217. "pxor %%mm7, %%mm3 \n\t"\
  218. "psubusw %%mm4, %%mm2 \n\t"\
  219. "psubusw %%mm4, %%mm3 \n\t"\
  220. "pxor %%mm6, %%mm2 \n\t"\
  221. "pxor %%mm7, %%mm3 \n\t"\
  222. \
  223. "paddsw %%mm5, %%mm0 \n\t"\
  224. "paddsw %%mm5, %%mm1 \n\t"\
  225. "paddsw %%mm5, %%mm2 \n\t"\
  226. "paddsw %%mm5, %%mm3 \n\t"\
  227. "psraw $3, %%mm0 \n\t"\
  228. "psraw $3, %%mm1 \n\t"\
  229. "psraw $3, %%mm2 \n\t"\
  230. "psraw $3, %%mm3 \n\t"\
  231. \
  232. "movq %%mm0, %%mm7 \n\t"\
  233. "punpcklwd %%mm2, %%mm0 \n\t" /*A*/\
  234. "punpckhwd %%mm2, %%mm7 \n\t" /*C*/\
  235. "movq %%mm1, %%mm2 \n\t"\
  236. "punpcklwd %%mm3, %%mm1 \n\t" /*B*/\
  237. "punpckhwd %%mm3, %%mm2 \n\t" /*D*/\
  238. "movq %%mm0, %%mm3 \n\t"\
  239. "punpcklwd %%mm1, %%mm0 \n\t" /*A*/\
  240. "punpckhwd %%mm7, %%mm3 \n\t" /*C*/\
  241. "punpcklwd %%mm2, %%mm7 \n\t" /*B*/\
  242. "punpckhwd %%mm2, %%mm1 \n\t" /*D*/\
  243. \
  244. "movq %%mm0, " #dst0 " \n\t"\
  245. "movq %%mm7, " #dst1 " \n\t"\
  246. "movq %%mm3, " #dst2 " \n\t"\
  247. "movq %%mm1, " #dst3 " \n\t"
  248. "movd %2, %%mm4 \n\t"
  249. "movd %3, %%mm5 \n\t"
  250. "packssdw %%mm4, %%mm4 \n\t"
  251. "packssdw %%mm5, %%mm5 \n\t"
  252. "packssdw %%mm4, %%mm4 \n\t"
  253. "packssdw %%mm5, %%mm5 \n\t"
  254. REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0))
  255. REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
  256. REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
  257. REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
  258. : : "r" (src), "r" (dst), "g" (threshold1), "rm" (4) //FIXME maybe more accurate then needed?
  259. );
  260. dst[0]= (src[0] + 4)>>3;
  261. }
  262. #endif
  263. static inline void add_block(int16_t *dst, int stride, DCTELEM block[64]){
  264. int y;
  265. for(y=0; y<8; y++){
  266. *(uint32_t*)&dst[0 + y*stride]+= *(uint32_t*)&block[0 + y*8];
  267. *(uint32_t*)&dst[2 + y*stride]+= *(uint32_t*)&block[2 + y*8];
  268. *(uint32_t*)&dst[4 + y*stride]+= *(uint32_t*)&block[4 + y*8];
  269. *(uint32_t*)&dst[6 + y*stride]+= *(uint32_t*)&block[6 + y*8];
  270. }
  271. }
  272. static void store_slice_c(uint8_t *dst, int16_t *src, int dst_stride, int src_stride, int width, int height, int log2_scale){
  273. int y, x;
  274. #define STORE(pos) \
  275. temp= ((src[x + y*src_stride + pos]<<log2_scale) + d[pos])>>6;\
  276. if(temp & 0x100) temp= ~(temp>>31);\
  277. dst[x + y*dst_stride + pos]= temp;
  278. for(y=0; y<height; y++){
  279. const uint8_t *d= dither[y];
  280. for(x=0; x<width; x+=8){
  281. int temp;
  282. STORE(0);
  283. STORE(1);
  284. STORE(2);
  285. STORE(3);
  286. STORE(4);
  287. STORE(5);
  288. STORE(6);
  289. STORE(7);
  290. }
  291. }
  292. }
  293. #if HAVE_MMX
  294. static void store_slice_mmx(uint8_t *dst, int16_t *src, int dst_stride, int src_stride, int width, int height, int log2_scale){
  295. int y;
  296. for(y=0; y<height; y++){
  297. uint8_t *dst1= dst;
  298. int16_t *src1= src;
  299. __asm__ volatile(
  300. "movq (%3), %%mm3 \n\t"
  301. "movq (%3), %%mm4 \n\t"
  302. "movd %4, %%mm2 \n\t"
  303. "pxor %%mm0, %%mm0 \n\t"
  304. "punpcklbw %%mm0, %%mm3 \n\t"
  305. "punpckhbw %%mm0, %%mm4 \n\t"
  306. "psraw %%mm2, %%mm3 \n\t"
  307. "psraw %%mm2, %%mm4 \n\t"
  308. "movd %5, %%mm2 \n\t"
  309. "1: \n\t"
  310. "movq (%0), %%mm0 \n\t"
  311. "movq 8(%0), %%mm1 \n\t"
  312. "paddw %%mm3, %%mm0 \n\t"
  313. "paddw %%mm4, %%mm1 \n\t"
  314. "psraw %%mm2, %%mm0 \n\t"
  315. "psraw %%mm2, %%mm1 \n\t"
  316. "packuswb %%mm1, %%mm0 \n\t"
  317. "movq %%mm0, (%1) \n\t"
  318. "add $16, %0 \n\t"
  319. "add $8, %1 \n\t"
  320. "cmp %2, %1 \n\t"
  321. " jb 1b \n\t"
  322. : "+r" (src1), "+r"(dst1)
  323. : "r"(dst + width), "r"(dither[y]), "g"(log2_scale), "g"(6-log2_scale)
  324. );
  325. src += src_stride;
  326. dst += dst_stride;
  327. }
  328. // if(width != mmxw)
  329. // store_slice_c(dst + mmxw, src + mmxw, dst_stride, src_stride, width - mmxw, log2_scale);
  330. }
  331. #endif
  332. static void (*store_slice)(uint8_t *dst, int16_t *src, int dst_stride, int src_stride, int width, int height, int log2_scale)= store_slice_c;
  333. static void (*requantize)(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation)= hardthresh_c;
  334. static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src, int dst_stride, int src_stride, int width, int height, uint8_t *qp_store, int qp_stride, int is_luma){
  335. int x, y, i;
  336. const int count= 1<<p->log2_count;
  337. const int stride= is_luma ? p->temp_stride : ((width+16+15)&(~15));
  338. uint64_t __attribute__((aligned(16))) block_align[32];
  339. DCTELEM *block = (DCTELEM *)block_align;
  340. DCTELEM *block2= (DCTELEM *)(block_align+16);
  341. if (!src || !dst) return; // HACK avoid crash for Y8 colourspace
  342. for(y=0; y<height; y++){
  343. int index= 8 + 8*stride + y*stride;
  344. fast_memcpy(p->src + index, src + y*src_stride, width);
  345. for(x=0; x<8; x++){
  346. p->src[index - x - 1]= p->src[index + x ];
  347. p->src[index + width + x ]= p->src[index + width - x - 1];
  348. }
  349. }
  350. for(y=0; y<8; y++){
  351. fast_memcpy(p->src + ( 7-y)*stride, p->src + ( y+8)*stride, stride);
  352. fast_memcpy(p->src + (height+8+y)*stride, p->src + (height-y+7)*stride, stride);
  353. }
  354. //FIXME (try edge emu)
  355. for(y=0; y<height+8; y+=8){
  356. memset(p->temp + (8+y)*stride, 0, 8*stride*sizeof(int16_t));
  357. for(x=0; x<width+8; x+=8){
  358. const int qps= 3 + is_luma;
  359. int qp;
  360. if(p->qp)
  361. qp= p->qp;
  362. else{
  363. qp= qp_store[ (XMIN(x, width-1)>>qps) + (XMIN(y, height-1)>>qps) * qp_stride];
  364. qp = FFMAX(1, norm_qscale(qp, p->mpeg2));
  365. }
  366. for(i=0; i<count; i++){
  367. const int x1= x + offset[i+count-1][0];
  368. const int y1= y + offset[i+count-1][1];
  369. const int index= x1 + y1*stride;
  370. p->dsp.get_pixels(block, p->src + index, stride);
  371. p->dsp.fdct(block);
  372. requantize(block2, block, qp, p->dsp.idct_permutation);
  373. p->dsp.idct(block2);
  374. add_block(p->temp + index, stride, block2);
  375. }
  376. }
  377. if(y)
  378. store_slice(dst + (y-8)*dst_stride, p->temp + 8 + y*stride, dst_stride, stride, width, XMIN(8, height+8-y), 6-p->log2_count);
  379. }
  380. #if 0
  381. for(y=0; y<height; y++){
  382. for(x=0; x<width; x++){
  383. if((((x>>6) ^ (y>>6)) & 1) == 0)
  384. dst[x + y*dst_stride]= p->src[8 + 8*stride + x + y*stride];
  385. if((x&63) == 0 || (y&63)==0)
  386. dst[x + y*dst_stride] += 128;
  387. }
  388. }
  389. #endif
  390. //FIXME reorder for better caching
  391. }
  392. static int config(struct vf_instance *vf,
  393. int width, int height, int d_width, int d_height,
  394. unsigned int flags, unsigned int outfmt){
  395. int h= (height+16+15)&(~15);
  396. vf->priv->temp_stride= (width+16+15)&(~15);
  397. vf->priv->temp= malloc(vf->priv->temp_stride*h*sizeof(int16_t));
  398. vf->priv->src = malloc(vf->priv->temp_stride*h*sizeof(uint8_t));
  399. return vf_next_config(vf,width,height,d_width,d_height,flags,outfmt);
  400. }
  401. static void get_image(struct vf_instance *vf, mp_image_t *mpi){
  402. if(mpi->flags&MP_IMGFLAG_PRESERVE) return; // don't change
  403. // ok, we can do pp in-place (or pp disabled):
  404. vf->dmpi=vf_get_image(vf->next,mpi->imgfmt,
  405. mpi->type, mpi->flags | MP_IMGFLAG_READABLE, mpi->width, mpi->height);
  406. mpi->planes[0]=vf->dmpi->planes[0];
  407. mpi->stride[0]=vf->dmpi->stride[0];
  408. mpi->width=vf->dmpi->width;
  409. if(mpi->flags&MP_IMGFLAG_PLANAR){
  410. mpi->planes[1]=vf->dmpi->planes[1];
  411. mpi->planes[2]=vf->dmpi->planes[2];
  412. mpi->stride[1]=vf->dmpi->stride[1];
  413. mpi->stride[2]=vf->dmpi->stride[2];
  414. }
  415. mpi->flags|=MP_IMGFLAG_DIRECT;
  416. }
  417. static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts){
  418. mp_image_t *dmpi;
  419. if(!(mpi->flags&MP_IMGFLAG_DIRECT)){
  420. // no DR, so get a new image! hope we'll get DR buffer:
  421. dmpi=vf_get_image(vf->next,mpi->imgfmt,
  422. MP_IMGTYPE_TEMP,
  423. MP_IMGFLAG_ACCEPT_STRIDE|MP_IMGFLAG_PREFER_ALIGNED_STRIDE,
  424. mpi->width,mpi->height);
  425. vf_clone_mpi_attributes(dmpi, mpi);
  426. }else{
  427. dmpi=vf->dmpi;
  428. }
  429. vf->priv->mpeg2= mpi->qscale_type;
  430. if(mpi->pict_type != 3 && mpi->qscale && !vf->priv->qp){
  431. int w = mpi->qstride;
  432. int h = (mpi->h + 15) >> 4;
  433. if (!w) {
  434. w = (mpi->w + 15) >> 4;
  435. h = 1;
  436. }
  437. if(!vf->priv->non_b_qp)
  438. vf->priv->non_b_qp= malloc(w*h);
  439. fast_memcpy(vf->priv->non_b_qp, mpi->qscale, w*h);
  440. }
  441. if(vf->priv->log2_count || !(mpi->flags&MP_IMGFLAG_DIRECT)){
  442. char *qp_tab= vf->priv->non_b_qp;
  443. if((vf->priv->mode&4) || !qp_tab)
  444. qp_tab= mpi->qscale;
  445. if(qp_tab || vf->priv->qp){
  446. filter(vf->priv, dmpi->planes[0], mpi->planes[0], dmpi->stride[0], mpi->stride[0], mpi->w, mpi->h, qp_tab, mpi->qstride, 1);
  447. filter(vf->priv, dmpi->planes[1], mpi->planes[1], dmpi->stride[1], mpi->stride[1], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, qp_tab, mpi->qstride, 0);
  448. filter(vf->priv, dmpi->planes[2], mpi->planes[2], dmpi->stride[2], mpi->stride[2], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, qp_tab, mpi->qstride, 0);
  449. }else{
  450. memcpy_pic(dmpi->planes[0], mpi->planes[0], mpi->w, mpi->h, dmpi->stride[0], mpi->stride[0]);
  451. memcpy_pic(dmpi->planes[1], mpi->planes[1], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[1], mpi->stride[1]);
  452. memcpy_pic(dmpi->planes[2], mpi->planes[2], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[2], mpi->stride[2]);
  453. }
  454. }
  455. #if HAVE_MMX
  456. if(gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t");
  457. #endif
  458. #if HAVE_MMX2
  459. if(gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t");
  460. #endif
  461. return vf_next_put_image(vf,dmpi, pts);
  462. }
  463. static void uninit(struct vf_instance *vf){
  464. if(!vf->priv) return;
  465. free(vf->priv->temp);
  466. vf->priv->temp= NULL;
  467. free(vf->priv->src);
  468. vf->priv->src= NULL;
  469. free(vf->priv->avctx);
  470. vf->priv->avctx= NULL;
  471. free(vf->priv->non_b_qp);
  472. vf->priv->non_b_qp= NULL;
  473. free(vf->priv);
  474. vf->priv=NULL;
  475. }
  476. //===========================================================================//
  477. static int query_format(struct vf_instance *vf, unsigned int fmt){
  478. switch(fmt){
  479. case IMGFMT_YVU9:
  480. case IMGFMT_IF09:
  481. case IMGFMT_YV12:
  482. case IMGFMT_I420:
  483. case IMGFMT_IYUV:
  484. case IMGFMT_CLPL:
  485. case IMGFMT_Y800:
  486. case IMGFMT_Y8:
  487. case IMGFMT_444P:
  488. case IMGFMT_422P:
  489. case IMGFMT_411P:
  490. return vf_next_query_format(vf,fmt);
  491. }
  492. return 0;
  493. }
  494. static int control(struct vf_instance *vf, int request, void* data){
  495. switch(request){
  496. case VFCTRL_QUERY_MAX_PP_LEVEL:
  497. return 6;
  498. case VFCTRL_SET_PP_LEVEL:
  499. vf->priv->log2_count= *((unsigned int*)data);
  500. return CONTROL_TRUE;
  501. }
  502. return vf_next_control(vf,request,data);
  503. }
  504. static int vf_open(vf_instance_t *vf, char *args){
  505. int log2c=-1;
  506. vf->config=config;
  507. vf->put_image=put_image;
  508. vf->get_image=get_image;
  509. vf->query_format=query_format;
  510. vf->uninit=uninit;
  511. vf->control= control;
  512. vf->priv=malloc(sizeof(struct vf_priv_s));
  513. memset(vf->priv, 0, sizeof(struct vf_priv_s));
  514. init_avcodec();
  515. vf->priv->avctx= avcodec_alloc_context();
  516. dsputil_init(&vf->priv->dsp, vf->priv->avctx);
  517. vf->priv->log2_count= 3;
  518. if (args) sscanf(args, "%d:%d:%d", &log2c, &vf->priv->qp, &vf->priv->mode);
  519. if( log2c >=0 && log2c <=6 )
  520. vf->priv->log2_count = log2c;
  521. if(vf->priv->qp < 0)
  522. vf->priv->qp = 0;
  523. switch(vf->priv->mode&3){
  524. default:
  525. case 0: requantize= hardthresh_c; break;
  526. case 1: requantize= softthresh_c; break;
  527. }
  528. #if HAVE_MMX
  529. if(gCpuCaps.hasMMX){
  530. store_slice= store_slice_mmx;
  531. switch(vf->priv->mode&3){
  532. case 0: requantize= hardthresh_mmx; break;
  533. case 1: requantize= softthresh_mmx; break;
  534. }
  535. }
  536. #endif
  537. return 1;
  538. }
  539. const vf_info_t vf_info_spp = {
  540. "simple postprocess",
  541. "spp",
  542. "Michael Niedermayer",
  543. "",
  544. vf_open,
  545. NULL
  546. };