vf_filmdint.c 52 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461
  1. /*
  2. * This file is part of MPlayer.
  3. *
  4. * MPlayer is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * MPlayer is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License along
  15. * with MPlayer; if not, write to the Free Software Foundation, Inc.,
  16. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  17. */
  18. #include <stdio.h>
  19. #include <stdlib.h>
  20. #include <string.h>
  21. #include <sys/time.h>
  22. #include "config.h"
  23. #include "mp_msg.h"
  24. #include "cpudetect.h"
  25. #include "img_format.h"
  26. #include "mp_image.h"
  27. #include "vd.h"
  28. #include "vf.h"
  29. #include "cmmx.h"
  30. #include "libvo/fastmemcpy.h"
  31. #define NUM_STORED 4
  32. enum pu_field_type_t {
  33. PU_1ST_OF_3,
  34. PU_2ND_OF_3,
  35. PU_3RD_OF_3,
  36. PU_1ST_OF_2,
  37. PU_2ND_OF_2,
  38. PU_INTERLACED
  39. };
  40. struct metrics {
  41. /* This struct maps to a packed word 64-bit MMX register */
  42. unsigned short int even;
  43. unsigned short int odd;
  44. unsigned short int noise;
  45. unsigned short int temp;
  46. } __attribute__ ((aligned (8)));
  47. struct frame_stats {
  48. struct metrics tiny, low, high, bigger, twox, max;
  49. struct { unsigned int even, odd, noise, temp; } sad;
  50. unsigned short interlaced_high;
  51. unsigned short interlaced_low;
  52. unsigned short num_blocks;
  53. };
  54. struct vf_priv_s {
  55. unsigned long inframes;
  56. unsigned long outframes;
  57. enum pu_field_type_t prev_type;
  58. unsigned swapped, chroma_swapped;
  59. unsigned luma_only;
  60. unsigned verbose;
  61. unsigned fast;
  62. unsigned long w, h, cw, ch, stride, chroma_stride, nplanes;
  63. unsigned long sad_thres;
  64. unsigned long dint_thres;
  65. unsigned char *memory_allocated;
  66. unsigned char *planes[2*NUM_STORED][4];
  67. unsigned char **old_planes;
  68. unsigned long static_idx;
  69. unsigned long temp_idx;
  70. unsigned long crop_x, crop_y, crop_cx, crop_cy;
  71. unsigned long export_count, merge_count;
  72. unsigned long num_breaks;
  73. unsigned long num_copies;
  74. long in_inc, out_dec, iosync;
  75. long num_fields;
  76. long prev_fields;
  77. long notout;
  78. long mmx2;
  79. unsigned small_bytes[2];
  80. unsigned mmx_temp[2];
  81. struct frame_stats stats[2];
  82. struct metrics thres;
  83. char chflag;
  84. double diff_time, merge_time, decode_time, vo_time, filter_time;
  85. };
  86. #define PPZ { 2000, 2000, 0, 2000 }
  87. #define PPR { 2000, 2000, 0, 2000 }
  88. static const struct frame_stats ppzs = {PPZ,PPZ,PPZ,PPZ,PPZ,PPZ,PPZ,0,0,9999};
  89. static const struct frame_stats pprs = {PPR,PPR,PPR,PPR,PPR,PPR,PPR,0,0,9999};
  90. #ifndef MIN
  91. #define MIN(a,b) (((a)<(b))?(a):(b))
  92. #endif
  93. #ifndef MAX
  94. #define MAX(a,b) (((a)>(b))?(a):(b))
  95. #endif
  96. #define PDIFFUB(X,Y,T) "movq " #X "," #T "\n\t" \
  97. "psubusb " #Y "," #T "\n\t" \
  98. "psubusb " #X "," #Y "\n\t" \
  99. "paddusb " #Y "," #T "\n\t"
  100. #define PDIFFUBT(X,Y,T) "movq " #X "," #T "\n\t" \
  101. "psubusb " #Y "," #T "\n\t" \
  102. "psubusb " #X "," #Y "\n\t" \
  103. "paddusb " #T "," #Y "\n\t"
  104. #define PSUMBW(X,T,Z) "movq " #X "," #T "\n\t" \
  105. "punpcklbw " #Z "," #X "\n\t" \
  106. "punpckhbw " #Z "," #T "\n\t" \
  107. "paddw " #T "," #X "\n\t" \
  108. "movq " #X "," #T "\n\t" \
  109. "psllq $32, " #T "\n\t" \
  110. "paddw " #T "," #X "\n\t" \
  111. "movq " #X "," #T "\n\t" \
  112. "psllq $16, " #T "\n\t" \
  113. "paddw " #T "," #X "\n\t" \
  114. "psrlq $48, " #X "\n\t"
  115. #define PSADBW(X,Y,T,Z) PDIFFUBT(X,Y,T) PSUMBW(Y,T,Z)
  116. #define PMAXUB(X,Y) "psubusb " #X "," #Y "\n\tpaddusb " #X "," #Y "\n\t"
  117. #define PMAXUW(X,Y) "psubusw " #X "," #Y "\n\tpaddusw " #X "," #Y "\n\t"
  118. #define PMINUBT(X,Y,T) "movq " #Y "," #T "\n\t" \
  119. "psubusb " #X "," #T "\n\t" \
  120. "psubusb " #T "," #Y "\n\t"
  121. #define PAVGB(X,Y) "pavgusb " #X "," #Y "\n\t"
  122. static inline void
  123. get_metrics_c(unsigned char *a, unsigned char *b, int as, int bs, int lines,
  124. struct metrics *m)
  125. {
  126. a -= as;
  127. b -= bs;
  128. do {
  129. cmmx_t old_po = *(cmmx_t*)(a );
  130. cmmx_t po = *(cmmx_t*)(b );
  131. cmmx_t e = *(cmmx_t*)(b + bs);
  132. cmmx_t old_o = *(cmmx_t*)(a + 2*as);
  133. cmmx_t o = *(cmmx_t*)(b + 2*bs);
  134. cmmx_t ne = *(cmmx_t*)(b + 3*bs);
  135. cmmx_t old_no = *(cmmx_t*)(a + 4*as);
  136. cmmx_t no = *(cmmx_t*)(b + 4*bs);
  137. cmmx_t qup_old_odd = p31avgb(old_o, old_po);
  138. cmmx_t qup_odd = p31avgb( o, po);
  139. cmmx_t qdown_old_odd = p31avgb(old_o, old_no);
  140. cmmx_t qdown_odd = p31avgb( o, no);
  141. cmmx_t qup_even = p31avgb(ne, e);
  142. cmmx_t qdown_even = p31avgb(e, ne);
  143. cmmx_t temp_up_diff = pdiffub(qdown_even, qup_old_odd);
  144. cmmx_t noise_up_diff = pdiffub(qdown_even, qup_odd);
  145. cmmx_t temp_down_diff = pdiffub(qup_even, qdown_old_odd);
  146. cmmx_t noise_down_diff = pdiffub(qup_even, qdown_odd);
  147. cmmx_t odd_diff = pdiffub(o, old_o);
  148. m->odd += psumbw(odd_diff);
  149. m->even += psadbw(e, *(cmmx_t*)(a+as));
  150. temp_up_diff = pminub(temp_up_diff, temp_down_diff);
  151. temp_up_diff = pminub(temp_up_diff, odd_diff);
  152. m->temp += psumbw(temp_up_diff);
  153. noise_up_diff = pminub(noise_up_diff, odd_diff);
  154. noise_up_diff = pminub(noise_up_diff, noise_down_diff);
  155. m->noise += psumbw(noise_up_diff);
  156. a += 2*as;
  157. b += 2*bs;
  158. } while (--lines);
  159. }
  160. static inline void
  161. get_metrics_fast_c(unsigned char *a, unsigned char *b, int as, int bs,
  162. int lines, struct metrics *m)
  163. {
  164. a -= as;
  165. b -= bs;
  166. do {
  167. cmmx_t old_po = (*(cmmx_t*)(a ) >> 1) & ~SIGN_BITS;
  168. cmmx_t po = (*(cmmx_t*)(b ) >> 1) & ~SIGN_BITS;
  169. cmmx_t old_e = (*(cmmx_t*)(a + as) >> 1) & ~SIGN_BITS;
  170. cmmx_t e = (*(cmmx_t*)(b + bs) >> 1) & ~SIGN_BITS;
  171. cmmx_t old_o = (*(cmmx_t*)(a + 2*as) >> 1) & ~SIGN_BITS;
  172. cmmx_t o = (*(cmmx_t*)(b + 2*bs) >> 1) & ~SIGN_BITS;
  173. cmmx_t ne = (*(cmmx_t*)(b + 3*bs) >> 1) & ~SIGN_BITS;
  174. cmmx_t old_no = (*(cmmx_t*)(a + 4*as) >> 1) & ~SIGN_BITS;
  175. cmmx_t no = (*(cmmx_t*)(b + 4*bs) >> 1) & ~SIGN_BITS;
  176. cmmx_t qup_old_odd = p31avgb_s(old_o, old_po);
  177. cmmx_t qup_odd = p31avgb_s( o, po);
  178. cmmx_t qdown_old_odd = p31avgb_s(old_o, old_no);
  179. cmmx_t qdown_odd = p31avgb_s( o, no);
  180. cmmx_t qup_even = p31avgb_s(ne, e);
  181. cmmx_t qdown_even = p31avgb_s(e, ne);
  182. cmmx_t temp_up_diff = pdiffub_s(qdown_even, qup_old_odd);
  183. cmmx_t noise_up_diff = pdiffub_s(qdown_even, qup_odd);
  184. cmmx_t temp_down_diff = pdiffub_s(qup_even, qdown_old_odd);
  185. cmmx_t noise_down_diff = pdiffub_s(qup_even, qdown_odd);
  186. cmmx_t odd_diff = pdiffub_s(o, old_o);
  187. m->odd += psumbw_s(odd_diff) << 1;
  188. m->even += psadbw_s(e, old_e) << 1;
  189. temp_up_diff = pminub_s(temp_up_diff, temp_down_diff);
  190. temp_up_diff = pminub_s(temp_up_diff, odd_diff);
  191. m->temp += psumbw_s(temp_up_diff) << 1;
  192. noise_up_diff = pminub_s(noise_up_diff, odd_diff);
  193. noise_up_diff = pminub_s(noise_up_diff, noise_down_diff);
  194. m->noise += psumbw_s(noise_up_diff) << 1;
  195. a += 2*as;
  196. b += 2*bs;
  197. } while (--lines);
  198. }
  199. static inline void
  200. get_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs,
  201. int lines, struct metrics *m)
  202. {
  203. a -= as;
  204. b -= bs;
  205. do {
  206. cmmx_t old_po = (*(cmmx_t*)(a )>>1) & ~SIGN_BITS;
  207. cmmx_t po = (*(cmmx_t*)(b )>>1) & ~SIGN_BITS;
  208. cmmx_t old_e = (*(cmmx_t*)(a + as)>>1) & ~SIGN_BITS;
  209. cmmx_t e = (*(cmmx_t*)(b + bs)>>1) & ~SIGN_BITS;
  210. cmmx_t old_o = (*(cmmx_t*)(a + 2*as)>>1) & ~SIGN_BITS;
  211. cmmx_t o = (*(cmmx_t*)(b + 2*bs)>>1) & ~SIGN_BITS;
  212. cmmx_t ne = (*(cmmx_t*)(b + 3*bs)>>1) & ~SIGN_BITS;
  213. cmmx_t down_even = p31avgb_s(e, ne);
  214. cmmx_t up_odd = p31avgb_s(o, po);
  215. cmmx_t up_old_odd = p31avgb_s(old_o, old_po);
  216. cmmx_t odd_diff = pdiffub_s(o, old_o);
  217. cmmx_t temp_diff = pdiffub_s(down_even, up_old_odd);
  218. cmmx_t noise_diff = pdiffub_s(down_even, up_odd);
  219. m->even += psadbw_s(e, old_e) << 1;
  220. m->odd += psumbw_s(odd_diff) << 1;
  221. temp_diff = pminub_s(temp_diff, odd_diff);
  222. noise_diff = pminub_s(noise_diff, odd_diff);
  223. m->noise += psumbw_s(noise_diff) << 1;
  224. m->temp += psumbw_s(temp_diff) << 1;
  225. a += 2*as;
  226. b += 2*bs;
  227. } while (--lines);
  228. }
  229. static inline void
  230. get_block_stats(struct metrics *m, struct vf_priv_s *p, struct frame_stats *s)
  231. {
  232. unsigned two_e = m->even + MAX(m->even , p->thres.even );
  233. unsigned two_o = m->odd + MAX(m->odd , p->thres.odd );
  234. unsigned two_n = m->noise + MAX(m->noise, p->thres.noise);
  235. unsigned two_t = m->temp + MAX(m->temp , p->thres.temp );
  236. unsigned e_big = m->even >= (m->odd + two_o + 1)/2;
  237. unsigned o_big = m->odd >= (m->even + two_e + 1)/2;
  238. unsigned n_big = m->noise >= (m->temp + two_t + 1)/2;
  239. unsigned t_big = m->temp >= (m->noise + two_n + 1)/2;
  240. unsigned e2x = m->even >= two_o;
  241. unsigned o2x = m->odd >= two_e;
  242. unsigned n2x = m->noise >= two_t;
  243. unsigned t2x = m->temp >= two_n;
  244. unsigned ntiny_e = m->even > p->thres.even ;
  245. unsigned ntiny_o = m->odd > p->thres.odd ;
  246. unsigned ntiny_n = m->noise > p->thres.noise;
  247. unsigned ntiny_t = m->temp > p->thres.temp ;
  248. unsigned nlow_e = m->even > 2*p->thres.even ;
  249. unsigned nlow_o = m->odd > 2*p->thres.odd ;
  250. unsigned nlow_n = m->noise > 2*p->thres.noise;
  251. unsigned nlow_t = m->temp > 2*p->thres.temp ;
  252. unsigned high_e = m->even > 4*p->thres.even ;
  253. unsigned high_o = m->odd > 4*p->thres.odd ;
  254. unsigned high_n = m->noise > 4*p->thres.noise;
  255. unsigned high_t = m->temp > 4*p->thres.temp ;
  256. unsigned low_il = !n_big && !t_big && ntiny_n && ntiny_t;
  257. unsigned high_il = !n_big && !t_big && nlow_n && nlow_t;
  258. if (low_il | high_il) {
  259. s->interlaced_low += low_il;
  260. s->interlaced_high += high_il;
  261. } else {
  262. s->tiny.even += ntiny_e;
  263. s->tiny.odd += ntiny_o;
  264. s->tiny.noise += ntiny_n;
  265. s->tiny.temp += ntiny_t;
  266. s->low .even += nlow_e ;
  267. s->low .odd += nlow_o ;
  268. s->low .noise += nlow_n ;
  269. s->low .temp += nlow_t ;
  270. s->high.even += high_e ;
  271. s->high.odd += high_o ;
  272. s->high.noise += high_n ;
  273. s->high.temp += high_t ;
  274. if (m->even >= p->sad_thres) s->sad.even += m->even ;
  275. if (m->odd >= p->sad_thres) s->sad.odd += m->odd ;
  276. if (m->noise >= p->sad_thres) s->sad.noise += m->noise;
  277. if (m->temp >= p->sad_thres) s->sad.temp += m->temp ;
  278. }
  279. s->num_blocks++;
  280. s->max.even = MAX(s->max.even , m->even );
  281. s->max.odd = MAX(s->max.odd , m->odd );
  282. s->max.noise = MAX(s->max.noise, m->noise);
  283. s->max.temp = MAX(s->max.temp , m->temp );
  284. s->bigger.even += e_big ;
  285. s->bigger.odd += o_big ;
  286. s->bigger.noise += n_big ;
  287. s->bigger.temp += t_big ;
  288. s->twox.even += e2x ;
  289. s->twox.odd += o2x ;
  290. s->twox.noise += n2x ;
  291. s->twox.temp += t2x ;
  292. }
  293. static inline struct metrics
  294. block_metrics_c(unsigned char *a, unsigned char *b, int as, int bs,
  295. int lines, struct vf_priv_s *p, struct frame_stats *s)
  296. {
  297. struct metrics tm;
  298. tm.even = tm.odd = tm.noise = tm.temp = 0;
  299. get_metrics_c(a, b, as, bs, lines, &tm);
  300. if (sizeof(cmmx_t) < 8)
  301. get_metrics_c(a+4, b+4, as, bs, lines, &tm);
  302. get_block_stats(&tm, p, s);
  303. return tm;
  304. }
  305. static inline struct metrics
  306. block_metrics_fast_c(unsigned char *a, unsigned char *b, int as, int bs,
  307. int lines, struct vf_priv_s *p, struct frame_stats *s)
  308. {
  309. struct metrics tm;
  310. tm.even = tm.odd = tm.noise = tm.temp = 0;
  311. get_metrics_fast_c(a, b, as, bs, lines, &tm);
  312. if (sizeof(cmmx_t) < 8)
  313. get_metrics_fast_c(a+4, b+4, as, bs, lines, &tm);
  314. get_block_stats(&tm, p, s);
  315. return tm;
  316. }
  317. static inline struct metrics
  318. block_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs,
  319. int lines, struct vf_priv_s *p, struct frame_stats *s)
  320. {
  321. struct metrics tm;
  322. tm.even = tm.odd = tm.noise = tm.temp = 0;
  323. get_metrics_faster_c(a, b, as, bs, lines, &tm);
  324. if (sizeof(cmmx_t) < 8)
  325. get_metrics_faster_c(a+4, b+4, as, bs, lines, &tm);
  326. get_block_stats(&tm, p, s);
  327. return tm;
  328. }
  329. #define MEQ(X,Y) ((X).even == (Y).even && (X).odd == (Y).odd && (X).temp == (Y).temp && (X).noise == (Y).noise)
  330. #define BLOCK_METRICS_TEMPLATE() \
  331. __asm__ volatile("pxor %mm7, %mm7\n\t" /* The result is colleted in mm7 */ \
  332. "pxor %mm6, %mm6\n\t" /* Temp to stay at 0 */ \
  333. ); \
  334. a -= as; \
  335. b -= bs; \
  336. do { \
  337. __asm__ volatile( \
  338. "movq (%0,%2), %%mm0\n\t" \
  339. "movq (%1,%3), %%mm1\n\t" /* mm1 = even */ \
  340. PSADBW(%%mm1, %%mm0, %%mm4, %%mm6) \
  341. "paddusw %%mm0, %%mm7\n\t" /* even diff */ \
  342. "movq (%0,%2,2), %%mm0\n\t" /* mm0 = old odd */ \
  343. "movq (%1,%3,2), %%mm2\n\t" /* mm2 = odd */ \
  344. "movq (%0), %%mm3\n\t" \
  345. "psubusb %4, %%mm3\n\t" \
  346. PAVGB(%%mm0, %%mm3) \
  347. PAVGB(%%mm0, %%mm3) /* mm3 = qup old odd */ \
  348. "movq %%mm0, %%mm5\n\t" \
  349. PSADBW(%%mm2, %%mm0, %%mm4, %%mm6) \
  350. "psllq $16, %%mm0\n\t" \
  351. "paddusw %%mm0, %%mm7\n\t" \
  352. "movq (%1), %%mm4\n\t" \
  353. "lea (%0,%2,2), %0\n\t" \
  354. "lea (%1,%3,2), %1\n\t" \
  355. "psubusb %4, %%mm4\n\t" \
  356. PAVGB(%%mm2, %%mm4) \
  357. PAVGB(%%mm2, %%mm4) /* mm4 = qup odd */ \
  358. PDIFFUBT(%%mm5, %%mm2, %%mm0) /* mm2 =abs(oldodd-odd) */ \
  359. "movq (%1,%3), %%mm5\n\t" \
  360. "psubusb %4, %%mm5\n\t" \
  361. PAVGB(%%mm1, %%mm5) \
  362. PAVGB(%%mm5, %%mm1) /* mm1 = qdown even */ \
  363. PAVGB((%1,%3), %%mm5) /* mm5 = qup next even */ \
  364. PDIFFUBT(%%mm1, %%mm3, %%mm0) /* mm3 = abs(qupoldo-qde) */ \
  365. PDIFFUBT(%%mm1, %%mm4, %%mm0) /* mm4 = abs(qupodd-qde) */ \
  366. PMINUBT(%%mm2, %%mm3, %%mm0) /* limit temp to odd diff */ \
  367. PMINUBT(%%mm2, %%mm4, %%mm0) /* limit noise to odd diff */ \
  368. "movq (%1,%3,2), %%mm2\n\t" \
  369. "psubusb %4, %%mm2\n\t" \
  370. PAVGB((%1), %%mm2) \
  371. PAVGB((%1), %%mm2) /* mm2 = qdown odd */ \
  372. "movq (%0,%2,2), %%mm1\n\t" \
  373. "psubusb %4, %%mm1\n\t" \
  374. PAVGB((%0), %%mm1) \
  375. PAVGB((%0), %%mm1) /* mm1 = qdown old odd */ \
  376. PDIFFUBT(%%mm5, %%mm2, %%mm0) /* mm2 = abs(qdo-qune) */ \
  377. PDIFFUBT(%%mm5, %%mm1, %%mm0) /* mm1 = abs(qdoo-qune) */ \
  378. PMINUBT(%%mm4, %%mm2, %%mm0) /* current */ \
  379. PMINUBT(%%mm3, %%mm1, %%mm0) /* old */ \
  380. PSUMBW(%%mm2, %%mm0, %%mm6) \
  381. PSUMBW(%%mm1, %%mm0, %%mm6) \
  382. "psllq $32, %%mm2\n\t" \
  383. "psllq $48, %%mm1\n\t" \
  384. "paddusw %%mm2, %%mm7\n\t" \
  385. "paddusw %%mm1, %%mm7\n\t" \
  386. : "=r" (a), "=r" (b) \
  387. : "r"((x86_reg)as), "r"((x86_reg)bs), "m" (ones), "0"(a), "1"(b), "X"(*a), "X"(*b) \
  388. ); \
  389. } while (--lines);
  390. static inline struct metrics
  391. block_metrics_3dnow(unsigned char *a, unsigned char *b, int as, int bs,
  392. int lines, struct vf_priv_s *p, struct frame_stats *s)
  393. {
  394. struct metrics tm;
  395. #if !HAVE_AMD3DNOW
  396. mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_3dnow: internal error\n");
  397. #else
  398. static const unsigned long long ones = 0x0101010101010101ull;
  399. BLOCK_METRICS_TEMPLATE();
  400. __asm__ volatile("movq %%mm7, %0\n\temms" : "=m" (tm));
  401. get_block_stats(&tm, p, s);
  402. #endif
  403. return tm;
  404. }
  405. #undef PSUMBW
  406. #undef PSADBW
  407. #undef PMAXUB
  408. #undef PMINUBT
  409. #undef PAVGB
  410. #define PSUMBW(X,T,Z) "psadbw " #Z "," #X "\n\t"
  411. #define PSADBW(X,Y,T,Z) "psadbw " #X "," #Y "\n\t"
  412. #define PMAXUB(X,Y) "pmaxub " #X "," #Y "\n\t"
  413. #define PMINUBT(X,Y,T) "pminub " #X "," #Y "\n\t"
  414. #define PAVGB(X,Y) "pavgb " #X "," #Y "\n\t"
  415. static inline struct metrics
  416. block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs,
  417. int lines, struct vf_priv_s *p, struct frame_stats *s)
  418. {
  419. struct metrics tm;
  420. #if !HAVE_MMX
  421. mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_mmx2: internal error\n");
  422. #else
  423. static const unsigned long long ones = 0x0101010101010101ull;
  424. x86_reg interlaced;
  425. x86_reg prefetch_line = (((long)a>>3) & 7) + 10;
  426. #ifdef DEBUG
  427. struct frame_stats ts = *s;
  428. #endif
  429. __asm__ volatile("prefetcht0 (%0,%2)\n\t"
  430. "prefetcht0 (%1,%3)\n\t" :
  431. : "r" (a), "r" (b),
  432. "r" (prefetch_line * as), "r" (prefetch_line * bs));
  433. BLOCK_METRICS_TEMPLATE();
  434. s->num_blocks++;
  435. __asm__ volatile(
  436. "movq %3, %%mm0\n\t"
  437. "movq %%mm7, %%mm1\n\t"
  438. "psubusw %%mm0, %%mm1\n\t"
  439. "movq %%mm1, %%mm2\n\t"
  440. "paddusw %%mm0, %%mm2\n\t"
  441. "paddusw %%mm7, %%mm2\n\t"
  442. "pshufw $0xb1, %%mm2, %%mm3\n\t"
  443. "pavgw %%mm7, %%mm2\n\t"
  444. "pshufw $0xb1, %%mm2, %%mm2\n\t"
  445. "psubusw %%mm7, %%mm2\n\t"
  446. "pcmpeqw %%mm6, %%mm2\n\t" /* 1 if >= 1.5x */
  447. "psubusw %%mm7, %%mm3\n\t"
  448. "pcmpeqw %%mm6, %%mm3\n\t" /* 1 if >= 2x */
  449. "movq %1, %%mm4\n\t"
  450. "movq %2, %%mm5\n\t"
  451. "psubw %%mm2, %%mm4\n\t"
  452. "psubw %%mm3, %%mm5\n\t"
  453. "movq %%mm4, %1\n\t"
  454. "movq %%mm5, %2\n\t"
  455. "pxor %%mm4, %%mm4\n\t"
  456. "pcmpeqw %%mm1, %%mm4\n\t" /* 1 if <= t */
  457. "psubusw %%mm0, %%mm1\n\t"
  458. "pxor %%mm5, %%mm5\n\t"
  459. "pcmpeqw %%mm1, %%mm5\n\t" /* 1 if <= 2t */
  460. "psubusw %%mm0, %%mm1\n\t"
  461. "psubusw %%mm0, %%mm1\n\t"
  462. "pcmpeqw %%mm6, %%mm1\n\t" /* 1 if <= 4t */
  463. "pshufw $0xb1, %%mm2, %%mm0\n\t"
  464. "por %%mm2, %%mm0\n\t" /* 1 if not close */
  465. "punpckhdq %%mm0, %%mm0\n\t"
  466. "movq %%mm4, %%mm2\n\t" /* tttt */
  467. "punpckhdq %%mm5, %%mm2\n\t" /* ttll */
  468. "por %%mm2, %%mm0\n\t"
  469. "pcmpeqd %%mm6, %%mm0\n\t" /* close && big */
  470. "psrlq $16, %%mm0\n\t"
  471. "psrlw $15, %%mm0\n\t"
  472. "movd %%mm0, %0\n\t"
  473. : "=r" (interlaced), "=m" (s->bigger), "=m" (s->twox)
  474. : "m" (p->thres)
  475. );
  476. if (interlaced) {
  477. s->interlaced_high += interlaced >> 16;
  478. s->interlaced_low += interlaced;
  479. } else {
  480. __asm__ volatile(
  481. "pcmpeqw %%mm0, %%mm0\n\t" /* -1 */
  482. "psubw %%mm0, %%mm4\n\t"
  483. "psubw %%mm0, %%mm5\n\t"
  484. "psubw %%mm0, %%mm1\n\t"
  485. "paddw %0, %%mm4\n\t"
  486. "paddw %1, %%mm5\n\t"
  487. "paddw %2, %%mm1\n\t"
  488. "movq %%mm4, %0\n\t"
  489. "movq %%mm5, %1\n\t"
  490. "movq %%mm1, %2\n\t"
  491. : "=m" (s->tiny), "=m" (s->low), "=m" (s->high)
  492. );
  493. __asm__ volatile(
  494. "pshufw $0, %2, %%mm0\n\t"
  495. "psubusw %%mm7, %%mm0\n\t"
  496. "pcmpeqw %%mm6, %%mm0\n\t" /* 0 if below sad_thres */
  497. "pand %%mm7, %%mm0\n\t"
  498. "movq %%mm0, %%mm1\n\t"
  499. "punpcklwd %%mm6, %%mm0\n\t" /* sad even, odd */
  500. "punpckhwd %%mm6, %%mm1\n\t" /* sad noise, temp */
  501. "paddd %0, %%mm0\n\t"
  502. "paddd %1, %%mm1\n\t"
  503. "movq %%mm0, %0\n\t"
  504. "movq %%mm1, %1\n\t"
  505. : "=m" (s->sad.even), "=m" (s->sad.noise)
  506. : "m" (p->sad_thres)
  507. );
  508. }
  509. __asm__ volatile(
  510. "movq %%mm7, (%1)\n\t"
  511. PMAXUW((%0), %%mm7)
  512. "movq %%mm7, (%0)\n\t"
  513. "emms"
  514. : : "r" (&s->max), "r" (&tm), "X" (s->max)
  515. : "memory"
  516. );
  517. #ifdef DEBUG
  518. if (1) {
  519. struct metrics cm;
  520. a -= 7*as;
  521. b -= 7*bs;
  522. cm = block_metrics_c(a, b, as, bs, 4, p, &ts);
  523. if (!MEQ(tm, cm))
  524. mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad metrics\n");
  525. if (s) {
  526. # define CHECK(X) if (!MEQ(s->X, ts.X)) \
  527. mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad " #X "\n");
  528. CHECK(tiny);
  529. CHECK(low);
  530. CHECK(high);
  531. CHECK(sad);
  532. CHECK(max);
  533. }
  534. }
  535. #endif
  536. #endif
  537. return tm;
  538. }
  539. static inline int
  540. dint_copy_line_mmx2(unsigned char *dst, unsigned char *a, long bos,
  541. long cos, int ds, int ss, int w, int t)
  542. {
  543. #if !HAVE_MMX
  544. mp_msg(MSGT_VFILTER, MSGL_FATAL, "dint_copy_line_mmx2: internal error\n");
  545. return 0;
  546. #else
  547. unsigned long len = (w+7) >> 3;
  548. int ret;
  549. __asm__ volatile (
  550. "pxor %%mm6, %%mm6 \n\t" /* deinterlaced pixel counter */
  551. "movd %0, %%mm7 \n\t"
  552. "punpcklbw %%mm7, %%mm7 \n\t"
  553. "punpcklwd %%mm7, %%mm7 \n\t"
  554. "punpckldq %%mm7, %%mm7 \n\t" /* mm7 = threshold */
  555. : /* no output */
  556. : "rm" (t)
  557. );
  558. do {
  559. __asm__ volatile (
  560. "movq (%0), %%mm0\n\t"
  561. "movq (%0,%3,2), %%mm1\n\t"
  562. "movq %%mm0, (%2)\n\t"
  563. "pmaxub %%mm1, %%mm0\n\t"
  564. "pavgb (%0), %%mm1\n\t"
  565. "psubusb %%mm1, %%mm0\n\t"
  566. "paddusb %%mm7, %%mm0\n\t" /* mm0 = max-avg+thr */
  567. "movq (%0,%1), %%mm2\n\t"
  568. "movq (%0,%5), %%mm3\n\t"
  569. "movq %%mm2, %%mm4\n\t"
  570. PDIFFUBT(%%mm1, %%mm2, %%mm5)
  571. PDIFFUBT(%%mm1, %%mm3, %%mm5)
  572. "pminub %%mm2, %%mm3\n\t"
  573. "pcmpeqb %%mm3, %%mm2\n\t" /* b = min */
  574. "pand %%mm2, %%mm4\n\t"
  575. "pandn (%0,%5), %%mm2\n\t"
  576. "por %%mm4, %%mm2\n\t"
  577. "pminub %%mm0, %%mm3\n\t"
  578. "pcmpeqb %%mm0, %%mm3\n\t" /* set to 1s if >= threshold */
  579. "psubb %%mm3, %%mm6\n\t" /* count pixels above thr. */
  580. "pand %%mm3, %%mm1 \n\t"
  581. "pandn %%mm2, %%mm3 \n\t"
  582. "por %%mm3, %%mm1 \n\t" /* avg if >= threshold */
  583. "movq %%mm1, (%2,%4) \n\t"
  584. : /* no output */
  585. : "r" (a), "r" ((x86_reg)bos), "r" ((x86_reg)dst), "r" ((x86_reg)ss), "r" ((x86_reg)ds), "r" ((x86_reg)cos)
  586. );
  587. a += 8;
  588. dst += 8;
  589. } while (--len);
  590. __asm__ volatile ("pxor %%mm7, %%mm7 \n\t"
  591. "psadbw %%mm6, %%mm7 \n\t"
  592. "movd %%mm7, %0 \n\t"
  593. "emms \n\t"
  594. : "=r" (ret)
  595. );
  596. return ret;
  597. #endif
  598. }
  599. static inline int
  600. dint_copy_line(unsigned char *dst, unsigned char *a, long bos,
  601. long cos, int ds, int ss, int w, int t)
  602. {
  603. unsigned long len = ((unsigned long)w+sizeof(cmmx_t)-1) / sizeof(cmmx_t);
  604. cmmx_t dint_count = 0;
  605. cmmx_t thr;
  606. t |= t << 8;
  607. thr = t | (t << 16);
  608. if (sizeof(cmmx_t) > 4)
  609. thr |= thr << (sizeof(cmmx_t)*4);
  610. do {
  611. cmmx_t e = *(cmmx_t*)a;
  612. cmmx_t ne = *(cmmx_t*)(a+2*ss);
  613. cmmx_t o = *(cmmx_t*)(a+bos);
  614. cmmx_t oo = *(cmmx_t*)(a+cos);
  615. cmmx_t maxe = pmaxub(e, ne);
  616. cmmx_t avge = pavgb(e, ne);
  617. cmmx_t max_diff = maxe - avge + thr; /* 0<=max-avg<128, thr<128 */
  618. cmmx_t diffo = pdiffub(avge, o);
  619. cmmx_t diffoo = pdiffub(avge, oo);
  620. cmmx_t diffcmp = pcmpgtub(diffo, diffoo);
  621. cmmx_t bo = ((oo ^ o) & diffcmp) ^ o;
  622. cmmx_t diffbo = ((diffoo ^ diffo) & diffcmp) ^ diffo;
  623. cmmx_t above_thr = ~pcmpgtub(max_diff, diffbo);
  624. cmmx_t bo_or_avg = ((avge ^ bo) & above_thr) ^ bo;
  625. dint_count += above_thr & ONE_BYTES;
  626. *(cmmx_t*)(dst) = e;
  627. *(cmmx_t*)(dst+ds) = bo_or_avg;
  628. a += sizeof(cmmx_t);
  629. dst += sizeof(cmmx_t);
  630. } while (--len);
  631. return psumbw(dint_count);
  632. }
  633. static int
  634. dint_copy_plane(unsigned char *d, unsigned char *a, unsigned char *b,
  635. unsigned char *c, unsigned long w, unsigned long h,
  636. unsigned long ds, unsigned long ss, unsigned long threshold,
  637. long field, long mmx2)
  638. {
  639. unsigned long ret = 0;
  640. long bos = b - a;
  641. long cos = c - a;
  642. if (field) {
  643. fast_memcpy(d, b, w);
  644. h--;
  645. d += ds;
  646. a += ss;
  647. }
  648. bos += ss;
  649. cos += ss;
  650. while (h > 2) {
  651. if (threshold >= 128) {
  652. fast_memcpy(d, a, w);
  653. fast_memcpy(d+ds, a+bos, w);
  654. } else if (mmx2 == 1) {
  655. ret += dint_copy_line_mmx2(d, a, bos, cos, ds, ss, w, threshold);
  656. } else
  657. ret += dint_copy_line(d, a, bos, cos, ds, ss, w, threshold);
  658. h -= 2;
  659. d += 2*ds;
  660. a += 2*ss;
  661. }
  662. fast_memcpy(d, a, w);
  663. if (h == 2)
  664. fast_memcpy(d+ds, a+bos, w);
  665. return ret;
  666. }
  667. static void
  668. copy_merge_fields(struct vf_priv_s *p, mp_image_t *dmpi,
  669. unsigned char **old, unsigned char **new, unsigned long show)
  670. {
  671. unsigned long threshold = 256;
  672. unsigned long field = p->swapped;
  673. unsigned long dint_pixels = 0;
  674. unsigned char **other = old;
  675. if (show >= 12 || !(show & 3))
  676. show >>= 2, other = new, new = old;
  677. if (show <= 2) { /* Single field: de-interlace */
  678. threshold = p->dint_thres;
  679. field ^= show & 1;
  680. old = new;
  681. } else if (show == 3)
  682. old = new;
  683. else
  684. field ^= 1;
  685. dint_pixels +=dint_copy_plane(dmpi->planes[0], old[0], new[0],
  686. other[0], p->w, p->h, dmpi->stride[0],
  687. p->stride, threshold, field, p->mmx2);
  688. if (dmpi->flags & MP_IMGFLAG_PLANAR) {
  689. if (p->luma_only)
  690. old = new, other = new;
  691. else
  692. threshold = threshold/2 + 1;
  693. field ^= p->chroma_swapped;
  694. dint_copy_plane(dmpi->planes[1], old[1], new[1],
  695. other[1], p->cw, p->ch, dmpi->stride[1],
  696. p->chroma_stride, threshold, field, p->mmx2);
  697. dint_copy_plane(dmpi->planes[2], old[2], new[2],
  698. other[2], p->cw, p->ch, dmpi->stride[2],
  699. p->chroma_stride, threshold, field, p->mmx2);
  700. }
  701. if (dint_pixels > 0 && p->verbose)
  702. mp_msg(MSGT_VFILTER,MSGL_INFO,"Deinterlaced %lu pixels\n",dint_pixels);
  703. }
  704. static void diff_planes(struct vf_priv_s *p, struct frame_stats *s,
  705. unsigned char *of, unsigned char *nf,
  706. int w, int h, int os, int ns, int swapped)
  707. {
  708. int i, y;
  709. int align = -(long)nf & 7;
  710. of += align;
  711. nf += align;
  712. w -= align;
  713. if (swapped)
  714. of -= os, nf -= ns;
  715. i = (h*3 >> 7) & ~1;
  716. of += i*os + 8;
  717. nf += i*ns + 8;
  718. h -= i;
  719. w -= 16;
  720. memset(s, 0, sizeof(*s));
  721. for (y = (h-8) >> 3; y; y--) {
  722. if (p->mmx2 == 1) {
  723. for (i = 0; i < w; i += 8)
  724. block_metrics_mmx2(of+i, nf+i, os, ns, 4, p, s);
  725. } else if (p->mmx2 == 2) {
  726. for (i = 0; i < w; i += 8)
  727. block_metrics_3dnow(of+i, nf+i, os, ns, 4, p, s);
  728. } else if (p->fast > 3) {
  729. for (i = 0; i < w; i += 8)
  730. block_metrics_faster_c(of+i, nf+i, os, ns, 4, p, s);
  731. } else if (p->fast > 1) {
  732. for (i = 0; i < w; i += 8)
  733. block_metrics_fast_c(of+i, nf+i, os, ns, 4, p, s);
  734. } else {
  735. for (i = 0; i < w; i += 8)
  736. block_metrics_c(of+i, nf+i, os, ns, 4, p, s);
  737. }
  738. of += 8*os;
  739. nf += 8*ns;
  740. }
  741. }
  742. #define METRICS(X) (X).even, (X).odd, (X).noise, (X).temp
  743. static void diff_fields(struct vf_priv_s *p, struct frame_stats *s,
  744. unsigned char **old, unsigned char **new)
  745. {
  746. diff_planes(p, s, old[0], new[0], p->w, p->h,
  747. p->stride, p->stride, p->swapped);
  748. s->sad.even = (s->sad.even * 16ul) / s->num_blocks;
  749. s->sad.odd = (s->sad.odd * 16ul) / s->num_blocks;
  750. s->sad.noise = (s->sad.noise * 16ul) / s->num_blocks;
  751. s->sad.temp = (s->sad.temp * 16ul) / s->num_blocks;
  752. if (p->verbose)
  753. mp_msg(MSGT_VFILTER, MSGL_INFO, "%lu%c M:%d/%d/%d/%d - %d, "
  754. "t:%d/%d/%d/%d, l:%d/%d/%d/%d, h:%d/%d/%d/%d, bg:%d/%d/%d/%d, "
  755. "2x:%d/%d/%d/%d, sad:%d/%d/%d/%d, lil:%d, hil:%d, ios:%.1f\n",
  756. p->inframes, p->chflag, METRICS(s->max), s->num_blocks,
  757. METRICS(s->tiny), METRICS(s->low), METRICS(s->high),
  758. METRICS(s->bigger), METRICS(s->twox), METRICS(s->sad),
  759. s->interlaced_low, s->interlaced_high,
  760. p->iosync / (double) p->in_inc);
  761. }
  762. static const char *parse_args(struct vf_priv_s *p, const char *args)
  763. {
  764. args--;
  765. while (args && *++args &&
  766. (sscanf(args, "io=%lu:%lu", &p->out_dec, &p->in_inc) == 2 ||
  767. sscanf(args, "diff_thres=%hu", &p->thres.even ) == 1 ||
  768. sscanf(args, "comb_thres=%hu", &p->thres.noise) == 1 ||
  769. sscanf(args, "sad_thres=%lu", &p->sad_thres ) == 1 ||
  770. sscanf(args, "dint_thres=%lu", &p->dint_thres ) == 1 ||
  771. sscanf(args, "fast=%u", &p->fast ) == 1 ||
  772. sscanf(args, "mmx2=%lu", &p->mmx2 ) == 1 ||
  773. sscanf(args, "luma_only=%u", &p->luma_only ) == 1 ||
  774. sscanf(args, "verbose=%u", &p->verbose ) == 1 ||
  775. sscanf(args, "crop=%lu:%lu:%lu:%lu", &p->w,
  776. &p->h, &p->crop_x, &p->crop_y) == 4))
  777. args = strchr(args, '/');
  778. return args;
  779. }
  780. static unsigned long gcd(unsigned long x, unsigned long y)
  781. {
  782. unsigned long t;
  783. if (x > y)
  784. t = x, x = y, y = t;
  785. while (x) {
  786. t = y % x;
  787. y = x;
  788. x = t;
  789. }
  790. return y;
  791. }
  792. static void init(struct vf_priv_s *p, mp_image_t *mpi)
  793. {
  794. unsigned long i;
  795. unsigned long plane_size, chroma_plane_size;
  796. unsigned char *plane;
  797. unsigned long cos, los;
  798. p->crop_cx = p->crop_x >> mpi->chroma_x_shift;
  799. p->crop_cy = p->crop_y >> mpi->chroma_y_shift;
  800. if (mpi->flags & MP_IMGFLAG_ACCEPT_STRIDE) {
  801. p->stride = (mpi->w + 15) & ~15;
  802. p->chroma_stride = p->stride >> mpi->chroma_x_shift;
  803. } else {
  804. p->stride = mpi->width;
  805. p->chroma_stride = mpi->chroma_width;
  806. }
  807. p->cw = p->w >> mpi->chroma_x_shift;
  808. p->ch = p->h >> mpi->chroma_y_shift;
  809. p->nplanes = 1;
  810. p->static_idx = 0;
  811. p->temp_idx = 0;
  812. p->old_planes = p->planes[0];
  813. plane_size = mpi->h * p->stride;
  814. chroma_plane_size = mpi->flags & MP_IMGFLAG_PLANAR ?
  815. mpi->chroma_height * p->chroma_stride : 0;
  816. p->memory_allocated =
  817. malloc(NUM_STORED * (plane_size+2*chroma_plane_size) +
  818. 8*p->chroma_stride + 4096);
  819. /* align to page boundary */
  820. plane = p->memory_allocated + (-(long)p->memory_allocated & 4095);
  821. memset(plane, 0, NUM_STORED * plane_size);
  822. los = p->crop_x + p->crop_y * p->stride;
  823. cos = p->crop_cx + p->crop_cy * p->chroma_stride;
  824. for (i = 0; i != NUM_STORED; i++, plane += plane_size) {
  825. p->planes[i][0] = plane;
  826. p->planes[NUM_STORED + i][0] = plane + los;
  827. }
  828. if (mpi->flags & MP_IMGFLAG_PLANAR) {
  829. p->nplanes = 3;
  830. memset(plane, 0x80, NUM_STORED * 2 * chroma_plane_size);
  831. for (i = 0; i != NUM_STORED; i++) {
  832. p->planes[i][1] = plane;
  833. p->planes[NUM_STORED + i][1] = plane + cos;
  834. plane += chroma_plane_size;
  835. p->planes[i][2] = plane;
  836. p->planes[NUM_STORED + i][2] = plane + cos;
  837. plane += chroma_plane_size;
  838. }
  839. }
  840. p->out_dec <<= 2;
  841. i = gcd(p->in_inc, p->out_dec);
  842. p->in_inc /= i;
  843. p->out_dec /= i;
  844. p->iosync = 0;
  845. p->num_fields = 3;
  846. }
  847. static inline double get_time(void)
  848. {
  849. struct timeval tv;
  850. gettimeofday(&tv, 0);
  851. return tv.tv_sec + tv.tv_usec * 1e-6;
  852. }
  853. static void get_image(struct vf_instance *vf, mp_image_t *mpi)
  854. {
  855. struct vf_priv_s *p = vf->priv;
  856. static unsigned char **planes, planes_idx;
  857. if (mpi->type == MP_IMGTYPE_STATIC) return;
  858. if (!p->planes[0][0]) init(p, mpi);
  859. if (mpi->type == MP_IMGTYPE_TEMP ||
  860. (mpi->type == MP_IMGTYPE_IPB && !(mpi->flags & MP_IMGFLAG_READABLE)))
  861. planes_idx = NUM_STORED/2 + (++p->temp_idx % (NUM_STORED/2));
  862. else
  863. planes_idx = ++p->static_idx % (NUM_STORED/2);
  864. planes = p->planes[planes_idx];
  865. mpi->priv = p->planes[NUM_STORED + planes_idx];
  866. if (mpi->priv == p->old_planes) {
  867. unsigned char **old_planes =
  868. p->planes[NUM_STORED + 2 + (++p->temp_idx & 1)];
  869. my_memcpy_pic(old_planes[0], p->old_planes[0],
  870. p->w, p->h, p->stride, p->stride);
  871. if (mpi->flags & MP_IMGFLAG_PLANAR) {
  872. my_memcpy_pic(old_planes[1], p->old_planes[1],
  873. p->cw, p->ch, p->chroma_stride, p->chroma_stride);
  874. my_memcpy_pic(old_planes[2], p->old_planes[2],
  875. p->cw, p->ch, p->chroma_stride, p->chroma_stride);
  876. }
  877. p->old_planes = old_planes;
  878. p->num_copies++;
  879. }
  880. mpi->planes[0] = planes[0];
  881. mpi->stride[0] = p->stride;
  882. if (mpi->flags & MP_IMGFLAG_PLANAR) {
  883. mpi->planes[1] = planes[1];
  884. mpi->planes[2] = planes[2];
  885. mpi->stride[1] = mpi->stride[2] = p->chroma_stride;
  886. }
  887. mpi->width = p->stride;
  888. mpi->flags |= MP_IMGFLAG_DIRECT;
  889. mpi->flags &= ~MP_IMGFLAG_DRAW_CALLBACK;
  890. }
  891. static inline long
  892. cmpe(unsigned long x, unsigned long y, unsigned long err, unsigned long e)
  893. {
  894. long diff = x-y;
  895. long unit = ((x+y+err) >> e);
  896. long ret = (diff > unit) - (diff < -unit);
  897. unit >>= 1;
  898. return ret + (diff > unit) - (diff < -unit);
  899. }
  900. static unsigned long
  901. find_breaks(struct vf_priv_s *p, struct frame_stats *s)
  902. {
  903. struct frame_stats *ps = &p->stats[(p->inframes-1) & 1];
  904. long notfilm = 5*p->in_inc - p->out_dec;
  905. unsigned long n = s->num_blocks >> 8;
  906. unsigned long sad_comb_cmp = cmpe(s->sad.temp, s->sad.noise, 512, 1);
  907. unsigned long ret = 8;
  908. if (cmpe(s->sad.temp, s->sad.even, 512, 1) > 0)
  909. mp_msg(MSGT_VFILTER, MSGL_WARN,
  910. "@@@@@@@@ Bottom-first field??? @@@@@@@@\n");
  911. if (s->sad.temp > 1000 && s->sad.noise > 1000)
  912. return 3;
  913. if (s->interlaced_high >= 2*n && s->sad.temp > 256 && s->sad.noise > 256)
  914. return 3;
  915. if (s->high.noise > s->num_blocks/4 && s->sad.noise > 10000 &&
  916. s->sad.noise > 2*s->sad.even && s->sad.noise > 2*ps->sad.odd) {
  917. // Mid-frame scene change
  918. if (s->tiny.temp + s->interlaced_low < n ||
  919. s->low.temp + s->interlaced_high < n/4 ||
  920. s->high.temp + s->interlaced_high < n/8 ||
  921. s->sad.temp < 160)
  922. return 1;
  923. return 3;
  924. }
  925. if (s->high.temp > s->num_blocks/4 && s->sad.temp > 10000 &&
  926. s->sad.temp > 2*ps->sad.odd && s->sad.temp > 2*ps->sad.even) {
  927. // Start frame scene change
  928. if (s->tiny.noise + s->interlaced_low < n ||
  929. s->low.noise + s->interlaced_high < n/4 ||
  930. s->high.noise + s->interlaced_high < n/8 ||
  931. s->sad.noise < 160)
  932. return 2;
  933. return 3;
  934. }
  935. if (sad_comb_cmp == 2)
  936. return 2;
  937. if (sad_comb_cmp == -2)
  938. return 1;
  939. if (s->tiny.odd > 3*MAX(n,s->tiny.even) + s->interlaced_low)
  940. return 1;
  941. if (s->tiny.even > 3*MAX(n,s->tiny.odd)+s->interlaced_low &&
  942. (!sad_comb_cmp || (s->low.noise <= n/4 && s->low.temp <= n/4)))
  943. return 4;
  944. if (s->sad.noise < 64 && s->sad.temp < 64 &&
  945. s->low.noise <= n/2 && s->high.noise <= n/4 &&
  946. s->low.temp <= n/2 && s->high.temp <= n/4)
  947. goto still;
  948. if (s->tiny.temp > 3*MAX(n,s->tiny.noise) + s->interlaced_low)
  949. return 2;
  950. if (s->tiny.noise > 3*MAX(n,s->tiny.temp) + s->interlaced_low)
  951. return 1;
  952. if (s->low.odd > 3*MAX(n/4,s->low.even) + s->interlaced_high)
  953. return 1;
  954. if (s->low.even > 3*MAX(n/4,s->low.odd)+s->interlaced_high &&
  955. s->sad.even > 2*s->sad.odd &&
  956. (!sad_comb_cmp || (s->low.noise <= n/4 && s->low.temp <= n/4)))
  957. return 4;
  958. if (s->low.temp > 3*MAX(n/4,s->low.noise) + s->interlaced_high)
  959. return 2;
  960. if (s->low.noise > 3*MAX(n/4,s->low.temp) + s->interlaced_high)
  961. return 1;
  962. if (sad_comb_cmp == 1 && s->sad.noise < 64)
  963. return 2;
  964. if (sad_comb_cmp == -1 && s->sad.temp < 64)
  965. return 1;
  966. if (s->tiny.odd <= n || (s->tiny.noise <= n/2 && s->tiny.temp <= n/2)) {
  967. if (s->interlaced_low <= n) {
  968. if (p->num_fields == 1)
  969. goto still;
  970. if (s->tiny.even <= n || ps->tiny.noise <= n/2)
  971. /* Still frame */
  972. goto still;
  973. if (s->bigger.even >= 2*MAX(n,s->bigger.odd) + s->interlaced_low)
  974. return 4;
  975. if (s->low.even >= 2*n + s->interlaced_low)
  976. return 4;
  977. goto still;
  978. }
  979. }
  980. if (s->low.odd <= n/4) {
  981. if (s->interlaced_high <= n/4) {
  982. if (p->num_fields == 1)
  983. goto still;
  984. if (s->low.even <= n/4)
  985. /* Still frame */
  986. goto still;
  987. if (s->bigger.even >= 2*MAX(n/4,s->bigger.odd)+s->interlaced_high)
  988. return 4;
  989. if (s->low.even >= n/2 + s->interlaced_high)
  990. return 4;
  991. goto still;
  992. }
  993. }
  994. if (s->bigger.temp > 2*MAX(n,s->bigger.noise) + s->interlaced_low)
  995. return 2;
  996. if (s->bigger.noise > 2*MAX(n,s->bigger.temp) + s->interlaced_low)
  997. return 1;
  998. if (s->bigger.temp > 2*MAX(n,s->bigger.noise) + s->interlaced_high)
  999. return 2;
  1000. if (s->bigger.noise > 2*MAX(n,s->bigger.temp) + s->interlaced_high)
  1001. return 1;
  1002. if (s->twox.temp > 2*MAX(n,s->twox.noise) + s->interlaced_high)
  1003. return 2;
  1004. if (s->twox.noise > 2*MAX(n,s->twox.temp) + s->interlaced_high)
  1005. return 1;
  1006. if (s->bigger.even > 2*MAX(n,s->bigger.odd) + s->interlaced_low &&
  1007. s->bigger.temp < n && s->bigger.noise < n)
  1008. return 4;
  1009. if (s->interlaced_low > MIN(2*n, s->tiny.odd))
  1010. return 3;
  1011. ret = 8 + (1 << (s->sad.temp > s->sad.noise));
  1012. still:
  1013. if (p->num_fields == 1 && p->prev_fields == 3 && notfilm >= 0 &&
  1014. (s->tiny.temp <= s->tiny.noise || s->sad.temp < s->sad.noise+16))
  1015. return 1;
  1016. if (p->notout < p->num_fields && p->iosync > 2*p->in_inc && notfilm < 0)
  1017. notfilm = 0;
  1018. if (p->num_fields < 2 ||
  1019. (p->num_fields == 2 && p->prev_fields == 2 && notfilm < 0))
  1020. return ret;
  1021. if (!notfilm && (p->prev_fields&~1) == 2) {
  1022. if (p->prev_fields + p->num_fields == 5) {
  1023. if (s->tiny.noise <= s->tiny.temp ||
  1024. s->low.noise == 0 || s->low.noise < s->low.temp ||
  1025. s->sad.noise < s->sad.temp+16)
  1026. return 2;
  1027. }
  1028. if (p->prev_fields + p->num_fields == 4) {
  1029. if (s->tiny.temp <= s->tiny.noise ||
  1030. s->low.temp == 0 || s->low.temp < s->low.noise ||
  1031. s->sad.temp < s->sad.noise+16)
  1032. return 1;
  1033. }
  1034. }
  1035. if (p->num_fields > 2 &&
  1036. ps->sad.noise > s->sad.noise && ps->sad.noise > s->sad.temp)
  1037. return 4;
  1038. return 2 >> (s->sad.noise > s->sad.temp);
  1039. }
  1040. #define ITOC(X) (!(X) ? ' ' : (X) + ((X)>9 ? 'a'-10 : '0'))
  1041. static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
  1042. {
  1043. mp_image_t *dmpi;
  1044. struct vf_priv_s *p = vf->priv;
  1045. unsigned char **planes, **old_planes;
  1046. struct frame_stats *s = &p->stats[p->inframes & 1];
  1047. struct frame_stats *ps = &p->stats[(p->inframes-1) & 1];
  1048. int swapped = 0;
  1049. const int flags = mpi->fields;
  1050. int breaks, prev;
  1051. int show_fields = 0;
  1052. int dropped_fields = 0;
  1053. double start_time, diff_time;
  1054. char prev_chflag = p->chflag;
  1055. int keep_rate;
  1056. if (!p->planes[0][0]) init(p, mpi);
  1057. old_planes = p->old_planes;
  1058. if ((mpi->flags & MP_IMGFLAG_DIRECT) && mpi->priv) {
  1059. planes = mpi->priv;
  1060. mpi->priv = 0;
  1061. } else {
  1062. planes = p->planes[2 + (++p->temp_idx & 1)];
  1063. my_memcpy_pic(planes[0],
  1064. mpi->planes[0] + p->crop_x + p->crop_y * mpi->stride[0],
  1065. p->w, p->h, p->stride, mpi->stride[0]);
  1066. if (mpi->flags & MP_IMGFLAG_PLANAR) {
  1067. my_memcpy_pic(planes[1],
  1068. mpi->planes[1] + p->crop_cx + p->crop_cy * mpi->stride[1],
  1069. p->cw, p->ch, p->chroma_stride, mpi->stride[1]);
  1070. my_memcpy_pic(planes[2],
  1071. mpi->planes[2] + p->crop_cx + p->crop_cy * mpi->stride[2],
  1072. p->cw, p->ch, p->chroma_stride, mpi->stride[2]);
  1073. p->num_copies++;
  1074. }
  1075. }
  1076. p->old_planes = planes;
  1077. p->chflag = ';';
  1078. if (flags & MP_IMGFIELD_ORDERED) {
  1079. swapped = !(flags & MP_IMGFIELD_TOP_FIRST);
  1080. p->chflag = (flags & MP_IMGFIELD_REPEAT_FIRST ? '|' :
  1081. flags & MP_IMGFIELD_TOP_FIRST ? ':' : '.');
  1082. }
  1083. p->swapped = swapped;
  1084. start_time = get_time();
  1085. if (p->chflag == '|') {
  1086. *s = ppzs;
  1087. p->iosync += p->in_inc;
  1088. } else if ((p->fast & 1) && prev_chflag == '|')
  1089. *s = pprs;
  1090. else
  1091. diff_fields(p, s, old_planes, planes);
  1092. diff_time = get_time();
  1093. p->diff_time += diff_time - start_time;
  1094. breaks = p->inframes ? find_breaks(p, s) : 2;
  1095. p->inframes++;
  1096. keep_rate = 4*p->in_inc == p->out_dec;
  1097. switch (breaks) {
  1098. case 0:
  1099. case 8:
  1100. case 9:
  1101. case 10:
  1102. if (!keep_rate && p->notout < p->num_fields && p->iosync < 2*p->in_inc)
  1103. break;
  1104. if (p->notout < p->num_fields)
  1105. dropped_fields = -2;
  1106. case 4:
  1107. if (keep_rate || p->iosync >= -2*p->in_inc)
  1108. show_fields = (4<<p->num_fields)-1;
  1109. break;
  1110. case 3:
  1111. if (keep_rate)
  1112. show_fields = 2;
  1113. else if (p->iosync > 0) {
  1114. if (p->notout >= p->num_fields && p->iosync > 2*p->in_inc) {
  1115. show_fields = 4; /* prev odd only */
  1116. if (p->num_fields > 1)
  1117. show_fields |= 8; /* + prev even */
  1118. } else {
  1119. show_fields = 2; /* even only */
  1120. if (p->notout >= p->num_fields)
  1121. dropped_fields += p->num_fields;
  1122. }
  1123. }
  1124. break;
  1125. case 2:
  1126. if (p->iosync <= -3*p->in_inc) {
  1127. if (p->notout >= p->num_fields)
  1128. dropped_fields = p->num_fields;
  1129. break;
  1130. }
  1131. if (p->num_fields == 1) {
  1132. int prevbreak = ps->sad.noise >= 128;
  1133. if (p->iosync < 4*p->in_inc) {
  1134. show_fields = 3;
  1135. dropped_fields = prevbreak;
  1136. } else {
  1137. show_fields = 4 | (!prevbreak << 3);
  1138. if (p->notout < 1 + p->prev_fields)
  1139. dropped_fields = -!prevbreak;
  1140. }
  1141. break;
  1142. }
  1143. default:
  1144. if (keep_rate)
  1145. show_fields = 3 << (breaks & 1);
  1146. else if (p->notout >= p->num_fields &&
  1147. p->iosync >= (breaks == 1 ? -p->in_inc :
  1148. p->in_inc << (p->num_fields == 1))) {
  1149. show_fields = (1 << (2 + p->num_fields)) - (1<<breaks);
  1150. } else {
  1151. if (p->notout >= p->num_fields)
  1152. dropped_fields += p->num_fields + 2 - breaks;
  1153. if (breaks == 1) {
  1154. if (p->iosync >= 4*p->in_inc)
  1155. show_fields = 6;
  1156. } else if (p->iosync > -3*p->in_inc)
  1157. show_fields = 3; /* odd+even */
  1158. }
  1159. break;
  1160. }
  1161. show_fields &= 15;
  1162. prev = p->prev_fields;
  1163. if (breaks < 8) {
  1164. if (p->num_fields == 1)
  1165. breaks &= ~4;
  1166. if (breaks)
  1167. p->num_breaks++;
  1168. if (breaks == 3)
  1169. p->prev_fields = p->num_fields = 1;
  1170. else if (breaks) {
  1171. p->prev_fields = p->num_fields + (breaks==1) - (breaks==4);
  1172. p->num_fields = breaks - (breaks == 4) + (p->chflag == '|');
  1173. } else
  1174. p->num_fields += 2;
  1175. } else
  1176. p->num_fields += 2;
  1177. p->iosync += 4 * p->in_inc;
  1178. if (p->chflag == '|')
  1179. p->iosync += p->in_inc;
  1180. if (show_fields) {
  1181. p->iosync -= p->out_dec;
  1182. p->notout = !(show_fields & 1) + !(show_fields & 3);
  1183. if (((show_fields & 3) == 3 &&
  1184. (s->low.noise + s->interlaced_low < (s->num_blocks>>8) ||
  1185. s->sad.noise < 160)) ||
  1186. ((show_fields & 12) == 12 &&
  1187. (ps->low.noise + ps->interlaced_low < (s->num_blocks>>8) ||
  1188. ps->sad.noise < 160))) {
  1189. p->export_count++;
  1190. dmpi = vf_get_image(vf->next, mpi->imgfmt, MP_IMGTYPE_EXPORT,
  1191. MP_IMGFLAG_PRESERVE|MP_IMGFLAG_READABLE,
  1192. p->w, p->h);
  1193. if ((show_fields & 3) != 3) planes = old_planes;
  1194. dmpi->planes[0] = planes[0];
  1195. dmpi->stride[0] = p->stride;
  1196. dmpi->width = mpi->width;
  1197. if (mpi->flags & MP_IMGFLAG_PLANAR) {
  1198. dmpi->planes[1] = planes[1];
  1199. dmpi->planes[2] = planes[2];
  1200. dmpi->stride[1] = p->chroma_stride;
  1201. dmpi->stride[2] = p->chroma_stride;
  1202. }
  1203. } else {
  1204. p->merge_count++;
  1205. dmpi = vf_get_image(vf->next, mpi->imgfmt,
  1206. MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE,
  1207. p->w, p->h);
  1208. copy_merge_fields(p, dmpi, old_planes, planes, show_fields);
  1209. }
  1210. p->outframes++;
  1211. } else
  1212. p->notout += 2;
  1213. if (p->verbose)
  1214. mp_msg(MSGT_VFILTER, MSGL_INFO, "%lu %lu: %x %c %c %lu%s%s%c%s\n",
  1215. p->inframes, p->outframes,
  1216. breaks, breaks<8 && breaks>0 ? (int) p->prev_fields+'0' : ' ',
  1217. ITOC(show_fields),
  1218. p->num_breaks, 5*p->in_inc == p->out_dec && breaks<8 &&
  1219. breaks>0 && ((prev&~1)!=2 || prev+p->prev_fields!=5) ?
  1220. " ######## bad telecine ########" : "",
  1221. dropped_fields ? " ======== dropped ":"", ITOC(dropped_fields),
  1222. !show_fields || (show_fields & (show_fields-1)) ?
  1223. "" : " @@@@@@@@@@@@@@@@@");
  1224. p->merge_time += get_time() - diff_time;
  1225. return show_fields ? vf_next_put_image(vf, dmpi, MP_NOPTS_VALUE) : 0;
  1226. }
  1227. static int query_format(struct vf_instance *vf, unsigned int fmt)
  1228. {
  1229. /* FIXME - support more formats */
  1230. switch (fmt) {
  1231. case IMGFMT_YV12:
  1232. case IMGFMT_IYUV:
  1233. case IMGFMT_I420:
  1234. case IMGFMT_411P:
  1235. case IMGFMT_422P:
  1236. case IMGFMT_444P:
  1237. return vf_next_query_format(vf, fmt);
  1238. }
  1239. return 0;
  1240. }
  1241. static int config(struct vf_instance *vf,
  1242. int width, int height, int d_width, int d_height,
  1243. unsigned int flags, unsigned int outfmt)
  1244. {
  1245. unsigned long cxm = 0;
  1246. unsigned long cym = 0;
  1247. struct vf_priv_s *p = vf->priv;
  1248. // rounding:
  1249. if(!IMGFMT_IS_RGB(outfmt) && !IMGFMT_IS_BGR(outfmt)){
  1250. switch(outfmt){
  1251. case IMGFMT_444P:
  1252. case IMGFMT_Y800:
  1253. case IMGFMT_Y8:
  1254. break;
  1255. case IMGFMT_YVU9:
  1256. case IMGFMT_IF09:
  1257. cym = 3;
  1258. case IMGFMT_411P:
  1259. cxm = 3;
  1260. break;
  1261. case IMGFMT_YV12:
  1262. case IMGFMT_I420:
  1263. case IMGFMT_IYUV:
  1264. cym = 1;
  1265. default:
  1266. cxm = 1;
  1267. }
  1268. }
  1269. p->chroma_swapped = !!(p->crop_y & (cym+1));
  1270. if (p->w) p->w += p->crop_x & cxm;
  1271. if (p->h) p->h += p->crop_y & cym;
  1272. p->crop_x &= ~cxm;
  1273. p->crop_y &= ~cym;
  1274. if (!p->w || p->w > width ) p->w = width;
  1275. if (!p->h || p->h > height) p->h = height;
  1276. if (p->crop_x + p->w > width ) p->crop_x = 0;
  1277. if (p->crop_y + p->h > height) p->crop_y = 0;
  1278. if(!opt_screen_size_x && !opt_screen_size_y){
  1279. d_width = d_width * p->w/width;
  1280. d_height = d_height * p->h/height;
  1281. }
  1282. return vf_next_config(vf, p->w, p->h, d_width, d_height, flags, outfmt);
  1283. }
  1284. static void uninit(struct vf_instance *vf)
  1285. {
  1286. struct vf_priv_s *p = vf->priv;
  1287. mp_msg(MSGT_VFILTER, MSGL_INFO, "diff_time: %.3f, merge_time: %.3f, "
  1288. "export: %lu, merge: %lu, copy: %lu\n", p->diff_time, p->merge_time,
  1289. p->export_count, p->merge_count, p->num_copies);
  1290. free(p->memory_allocated);
  1291. free(p);
  1292. }
  1293. static int vf_open(vf_instance_t *vf, char *args)
  1294. {
  1295. struct vf_priv_s *p;
  1296. vf->get_image = get_image;
  1297. vf->put_image = put_image;
  1298. vf->config = config;
  1299. vf->query_format = query_format;
  1300. vf->uninit = uninit;
  1301. vf->default_reqs = VFCAP_ACCEPT_STRIDE;
  1302. vf->priv = p = calloc(1, sizeof(struct vf_priv_s));
  1303. p->out_dec = 5;
  1304. p->in_inc = 4;
  1305. p->thres.noise = 128;
  1306. p->thres.even = 128;
  1307. p->sad_thres = 64;
  1308. p->dint_thres = 4;
  1309. p->luma_only = 0;
  1310. p->fast = 3;
  1311. p->mmx2 = gCpuCaps.hasMMX2 ? 1 : gCpuCaps.has3DNow ? 2 : 0;
  1312. if (args) {
  1313. const char *args_remain = parse_args(p, args);
  1314. if (args_remain) {
  1315. mp_msg(MSGT_VFILTER, MSGL_FATAL,
  1316. "filmdint: unknown suboption: %s\n", args_remain);
  1317. return 0;
  1318. }
  1319. if (p->out_dec < p->in_inc) {
  1320. mp_msg(MSGT_VFILTER, MSGL_FATAL,
  1321. "filmdint: increasing the frame rate is not supported\n");
  1322. return 0;
  1323. }
  1324. }
  1325. if (p->mmx2 > 2)
  1326. p->mmx2 = 0;
  1327. #if !HAVE_MMX
  1328. p->mmx2 = 0;
  1329. #endif
  1330. #if !HAVE_AMD3DNOW
  1331. p->mmx2 &= 1;
  1332. #endif
  1333. p->thres.odd = p->thres.even;
  1334. p->thres.temp = p->thres.noise;
  1335. p->diff_time = 0;
  1336. p->merge_time = 0;
  1337. return 1;
  1338. }
  1339. const vf_info_t vf_info_filmdint = {
  1340. "Advanced inverse telecine filer",
  1341. "filmdint",
  1342. "Zoltan Hidvegi",
  1343. "",
  1344. vf_open,
  1345. NULL
  1346. };