rgb2rgb_template.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967
  1. /*
  2. * software RGB to RGB converter
  3. * pluralize by software PAL8 to RGB converter
  4. * software YUV to YUV converter
  5. * software YUV to RGB converter
  6. * Written by Nick Kurshev.
  7. * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
  8. * lot of big-endian byte order fixes by Alex Beregszaszi
  9. *
  10. * This file is part of Libav.
  11. *
  12. * Libav is free software; you can redistribute it and/or
  13. * modify it under the terms of the GNU Lesser General Public
  14. * License as published by the Free Software Foundation; either
  15. * version 2.1 of the License, or (at your option) any later version.
  16. *
  17. * Libav is distributed in the hope that it will be useful,
  18. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  20. * Lesser General Public License for more details.
  21. *
  22. * You should have received a copy of the GNU Lesser General Public
  23. * License along with Libav; if not, write to the Free Software
  24. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  25. */
  26. #include <stddef.h>
  27. static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, long src_size)
  28. {
  29. uint8_t *dest = dst;
  30. const uint8_t *s = src;
  31. const uint8_t *end;
  32. end = s + src_size;
  33. while (s < end) {
  34. #if HAVE_BIGENDIAN
  35. /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
  36. *dest++ = 255;
  37. *dest++ = s[2];
  38. *dest++ = s[1];
  39. *dest++ = s[0];
  40. s+=3;
  41. #else
  42. *dest++ = *s++;
  43. *dest++ = *s++;
  44. *dest++ = *s++;
  45. *dest++ = 255;
  46. #endif
  47. }
  48. }
  49. static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
  50. {
  51. uint8_t *dest = dst;
  52. const uint8_t *s = src;
  53. const uint8_t *end;
  54. end = s + src_size;
  55. while (s < end) {
  56. #if HAVE_BIGENDIAN
  57. /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
  58. s++;
  59. dest[2] = *s++;
  60. dest[1] = *s++;
  61. dest[0] = *s++;
  62. dest += 3;
  63. #else
  64. *dest++ = *s++;
  65. *dest++ = *s++;
  66. *dest++ = *s++;
  67. s++;
  68. #endif
  69. }
  70. }
  71. /*
  72. original by Strepto/Astral
  73. ported to gcc & bugfixed: A'rpi
  74. MMX2, 3DNOW optimization by Nick Kurshev
  75. 32-bit C version, and and&add trick by Michael Niedermayer
  76. */
  77. static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, long src_size)
  78. {
  79. register const uint8_t* s=src;
  80. register uint8_t* d=dst;
  81. register const uint8_t *end;
  82. const uint8_t *mm_end;
  83. end = s + src_size;
  84. mm_end = end - 3;
  85. while (s < mm_end) {
  86. register unsigned x= *((const uint32_t *)s);
  87. *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
  88. d+=4;
  89. s+=4;
  90. }
  91. if (s < end) {
  92. register unsigned short x= *((const uint16_t *)s);
  93. *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
  94. }
  95. }
  96. static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, long src_size)
  97. {
  98. register const uint8_t* s=src;
  99. register uint8_t* d=dst;
  100. register const uint8_t *end;
  101. const uint8_t *mm_end;
  102. end = s + src_size;
  103. mm_end = end - 3;
  104. while (s < mm_end) {
  105. register uint32_t x= *((const uint32_t*)s);
  106. *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
  107. s+=4;
  108. d+=4;
  109. }
  110. if (s < end) {
  111. register uint16_t x= *((const uint16_t*)s);
  112. *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
  113. }
  114. }
  115. static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, long src_size)
  116. {
  117. const uint8_t *s = src;
  118. const uint8_t *end;
  119. uint16_t *d = (uint16_t *)dst;
  120. end = s + src_size;
  121. while (s < end) {
  122. register int rgb = *(const uint32_t*)s; s += 4;
  123. *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
  124. }
  125. }
  126. static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, long src_size)
  127. {
  128. const uint8_t *s = src;
  129. const uint8_t *end;
  130. uint16_t *d = (uint16_t *)dst;
  131. end = s + src_size;
  132. while (s < end) {
  133. register int rgb = *(const uint32_t*)s; s += 4;
  134. *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
  135. }
  136. }
  137. static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, long src_size)
  138. {
  139. const uint8_t *s = src;
  140. const uint8_t *end;
  141. uint16_t *d = (uint16_t *)dst;
  142. end = s + src_size;
  143. while (s < end) {
  144. register int rgb = *(const uint32_t*)s; s += 4;
  145. *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
  146. }
  147. }
  148. static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, long src_size)
  149. {
  150. const uint8_t *s = src;
  151. const uint8_t *end;
  152. uint16_t *d = (uint16_t *)dst;
  153. end = s + src_size;
  154. while (s < end) {
  155. register int rgb = *(const uint32_t*)s; s += 4;
  156. *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
  157. }
  158. }
  159. static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, long src_size)
  160. {
  161. const uint8_t *s = src;
  162. const uint8_t *end;
  163. uint16_t *d = (uint16_t *)dst;
  164. end = s + src_size;
  165. while (s < end) {
  166. const int b = *s++;
  167. const int g = *s++;
  168. const int r = *s++;
  169. *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
  170. }
  171. }
  172. static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, long src_size)
  173. {
  174. const uint8_t *s = src;
  175. const uint8_t *end;
  176. uint16_t *d = (uint16_t *)dst;
  177. end = s + src_size;
  178. while (s < end) {
  179. const int r = *s++;
  180. const int g = *s++;
  181. const int b = *s++;
  182. *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
  183. }
  184. }
  185. static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, long src_size)
  186. {
  187. const uint8_t *s = src;
  188. const uint8_t *end;
  189. uint16_t *d = (uint16_t *)dst;
  190. end = s + src_size;
  191. while (s < end) {
  192. const int b = *s++;
  193. const int g = *s++;
  194. const int r = *s++;
  195. *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
  196. }
  197. }
  198. static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, long src_size)
  199. {
  200. const uint8_t *s = src;
  201. const uint8_t *end;
  202. uint16_t *d = (uint16_t *)dst;
  203. end = s + src_size;
  204. while (s < end) {
  205. const int r = *s++;
  206. const int g = *s++;
  207. const int b = *s++;
  208. *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
  209. }
  210. }
  211. /*
  212. I use less accurate approximation here by simply left-shifting the input
  213. value and filling the low order bits with zeroes. This method improves PNG
  214. compression but this scheme cannot reproduce white exactly, since it does
  215. not generate an all-ones maximum value; the net effect is to darken the
  216. image slightly.
  217. The better method should be "left bit replication":
  218. 4 3 2 1 0
  219. ---------
  220. 1 1 0 1 1
  221. 7 6 5 4 3 2 1 0
  222. ----------------
  223. 1 1 0 1 1 1 1 0
  224. |=======| |===|
  225. | leftmost bits repeated to fill open bits
  226. |
  227. original bits
  228. */
  229. static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
  230. {
  231. const uint16_t *end;
  232. uint8_t *d = dst;
  233. const uint16_t *s = (const uint16_t*)src;
  234. end = s + src_size/2;
  235. while (s < end) {
  236. register uint16_t bgr;
  237. bgr = *s++;
  238. *d++ = (bgr&0x1F)<<3;
  239. *d++ = (bgr&0x3E0)>>2;
  240. *d++ = (bgr&0x7C00)>>7;
  241. }
  242. }
  243. static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
  244. {
  245. const uint16_t *end;
  246. uint8_t *d = (uint8_t *)dst;
  247. const uint16_t *s = (const uint16_t *)src;
  248. end = s + src_size/2;
  249. while (s < end) {
  250. register uint16_t bgr;
  251. bgr = *s++;
  252. *d++ = (bgr&0x1F)<<3;
  253. *d++ = (bgr&0x7E0)>>3;
  254. *d++ = (bgr&0xF800)>>8;
  255. }
  256. }
  257. /*
  258. * mm0 = 00 B3 00 B2 00 B1 00 B0
  259. * mm1 = 00 G3 00 G2 00 G1 00 G0
  260. * mm2 = 00 R3 00 R2 00 R1 00 R0
  261. * mm6 = FF FF FF FF FF FF FF FF
  262. * mm7 = 00 00 00 00 00 00 00 00
  263. */
  264. #define PACK_RGB32 \
  265. "packuswb %%mm7, %%mm0 \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \
  266. "packuswb %%mm7, %%mm1 \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \
  267. "packuswb %%mm7, %%mm2 \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \
  268. "punpcklbw %%mm1, %%mm0 \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \
  269. "punpcklbw %%mm6, %%mm2 \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \
  270. "movq %%mm0, %%mm3 \n\t" \
  271. "punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
  272. "punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
  273. MOVNTQ" %%mm0, %0 \n\t" \
  274. MOVNTQ" %%mm3, 8%0 \n\t" \
  275. static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, long src_size)
  276. {
  277. const uint16_t *end;
  278. uint8_t *d = dst;
  279. const uint16_t *s = (const uint16_t *)src;
  280. end = s + src_size/2;
  281. while (s < end) {
  282. register uint16_t bgr;
  283. bgr = *s++;
  284. #if HAVE_BIGENDIAN
  285. *d++ = 255;
  286. *d++ = (bgr&0x7C00)>>7;
  287. *d++ = (bgr&0x3E0)>>2;
  288. *d++ = (bgr&0x1F)<<3;
  289. #else
  290. *d++ = (bgr&0x1F)<<3;
  291. *d++ = (bgr&0x3E0)>>2;
  292. *d++ = (bgr&0x7C00)>>7;
  293. *d++ = 255;
  294. #endif
  295. }
  296. }
  297. static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, long src_size)
  298. {
  299. const uint16_t *end;
  300. uint8_t *d = dst;
  301. const uint16_t *s = (const uint16_t*)src;
  302. end = s + src_size/2;
  303. while (s < end) {
  304. register uint16_t bgr;
  305. bgr = *s++;
  306. #if HAVE_BIGENDIAN
  307. *d++ = 255;
  308. *d++ = (bgr&0xF800)>>8;
  309. *d++ = (bgr&0x7E0)>>3;
  310. *d++ = (bgr&0x1F)<<3;
  311. #else
  312. *d++ = (bgr&0x1F)<<3;
  313. *d++ = (bgr&0x7E0)>>3;
  314. *d++ = (bgr&0xF800)>>8;
  315. *d++ = 255;
  316. #endif
  317. }
  318. }
  319. static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, long src_size)
  320. {
  321. int idx = 15 - src_size;
  322. const uint8_t *s = src-idx;
  323. uint8_t *d = dst-idx;
  324. for (; idx<15; idx+=4) {
  325. register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
  326. v &= 0xff00ff;
  327. *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
  328. }
  329. }
  330. static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
  331. {
  332. unsigned i;
  333. for (i=0; i<src_size; i+=3) {
  334. register uint8_t x;
  335. x = src[i + 2];
  336. dst[i + 1] = src[i + 1];
  337. dst[i + 2] = src[i + 0];
  338. dst[i + 0] = x;
  339. }
  340. }
  341. static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
  342. const uint8_t *vsrc, uint8_t *dst,
  343. long width, long height,
  344. long lumStride, long chromStride,
  345. long dstStride, long vertLumPerChroma)
  346. {
  347. long y;
  348. const int chromWidth = width >> 1;
  349. for (y=0; y<height; y++) {
  350. #if HAVE_FAST_64BIT
  351. int i;
  352. uint64_t *ldst = (uint64_t *) dst;
  353. const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
  354. for (i = 0; i < chromWidth; i += 2) {
  355. uint64_t k, l;
  356. k = yc[0] + (uc[0] << 8) +
  357. (yc[1] << 16) + (vc[0] << 24);
  358. l = yc[2] + (uc[1] << 8) +
  359. (yc[3] << 16) + (vc[1] << 24);
  360. *ldst++ = k + (l << 32);
  361. yc += 4;
  362. uc += 2;
  363. vc += 2;
  364. }
  365. #else
  366. int i, *idst = (int32_t *) dst;
  367. const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
  368. for (i = 0; i < chromWidth; i++) {
  369. #if HAVE_BIGENDIAN
  370. *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
  371. (yc[1] << 8) + (vc[0] << 0);
  372. #else
  373. *idst++ = yc[0] + (uc[0] << 8) +
  374. (yc[1] << 16) + (vc[0] << 24);
  375. #endif
  376. yc += 2;
  377. uc++;
  378. vc++;
  379. }
  380. #endif
  381. if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
  382. usrc += chromStride;
  383. vsrc += chromStride;
  384. }
  385. ysrc += lumStride;
  386. dst += dstStride;
  387. }
  388. }
  389. /**
  390. * Height should be a multiple of 2 and width should be a multiple of 16.
  391. * (If this is a problem for anyone then tell me, and I will fix it.)
  392. */
  393. static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
  394. const uint8_t *vsrc, uint8_t *dst,
  395. long width, long height,
  396. long lumStride, long chromStride,
  397. long dstStride)
  398. {
  399. //FIXME interpolate chroma
  400. yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
  401. chromStride, dstStride, 2);
  402. }
  403. static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
  404. const uint8_t *vsrc, uint8_t *dst,
  405. long width, long height,
  406. long lumStride, long chromStride,
  407. long dstStride, long vertLumPerChroma)
  408. {
  409. long y;
  410. const int chromWidth = width >> 1;
  411. for (y=0; y<height; y++) {
  412. #if HAVE_FAST_64BIT
  413. int i;
  414. uint64_t *ldst = (uint64_t *) dst;
  415. const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
  416. for (i = 0; i < chromWidth; i += 2) {
  417. uint64_t k, l;
  418. k = uc[0] + (yc[0] << 8) +
  419. (vc[0] << 16) + (yc[1] << 24);
  420. l = uc[1] + (yc[2] << 8) +
  421. (vc[1] << 16) + (yc[3] << 24);
  422. *ldst++ = k + (l << 32);
  423. yc += 4;
  424. uc += 2;
  425. vc += 2;
  426. }
  427. #else
  428. int i, *idst = (int32_t *) dst;
  429. const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
  430. for (i = 0; i < chromWidth; i++) {
  431. #if HAVE_BIGENDIAN
  432. *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
  433. (vc[0] << 8) + (yc[1] << 0);
  434. #else
  435. *idst++ = uc[0] + (yc[0] << 8) +
  436. (vc[0] << 16) + (yc[1] << 24);
  437. #endif
  438. yc += 2;
  439. uc++;
  440. vc++;
  441. }
  442. #endif
  443. if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
  444. usrc += chromStride;
  445. vsrc += chromStride;
  446. }
  447. ysrc += lumStride;
  448. dst += dstStride;
  449. }
  450. }
  451. /**
  452. * Height should be a multiple of 2 and width should be a multiple of 16
  453. * (If this is a problem for anyone then tell me, and I will fix it.)
  454. */
  455. static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
  456. const uint8_t *vsrc, uint8_t *dst,
  457. long width, long height,
  458. long lumStride, long chromStride,
  459. long dstStride)
  460. {
  461. //FIXME interpolate chroma
  462. yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
  463. chromStride, dstStride, 2);
  464. }
  465. /**
  466. * Width should be a multiple of 16.
  467. */
  468. static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
  469. const uint8_t *vsrc, uint8_t *dst,
  470. long width, long height,
  471. long lumStride, long chromStride,
  472. long dstStride)
  473. {
  474. yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
  475. chromStride, dstStride, 1);
  476. }
  477. /**
  478. * Width should be a multiple of 16.
  479. */
  480. static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
  481. const uint8_t *vsrc, uint8_t *dst,
  482. long width, long height,
  483. long lumStride, long chromStride,
  484. long dstStride)
  485. {
  486. yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
  487. chromStride, dstStride, 1);
  488. }
  489. /**
  490. * Height should be a multiple of 2 and width should be a multiple of 16.
  491. * (If this is a problem for anyone then tell me, and I will fix it.)
  492. */
  493. static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
  494. uint8_t *udst, uint8_t *vdst,
  495. long width, long height,
  496. long lumStride, long chromStride,
  497. long srcStride)
  498. {
  499. long y;
  500. const int chromWidth = width >> 1;
  501. for (y=0; y<height; y+=2) {
  502. long i;
  503. for (i=0; i<chromWidth; i++) {
  504. ydst[2*i+0] = src[4*i+0];
  505. udst[i] = src[4*i+1];
  506. ydst[2*i+1] = src[4*i+2];
  507. vdst[i] = src[4*i+3];
  508. }
  509. ydst += lumStride;
  510. src += srcStride;
  511. for (i=0; i<chromWidth; i++) {
  512. ydst[2*i+0] = src[4*i+0];
  513. ydst[2*i+1] = src[4*i+2];
  514. }
  515. udst += chromStride;
  516. vdst += chromStride;
  517. ydst += lumStride;
  518. src += srcStride;
  519. }
  520. }
  521. static inline void planar2x_c(const uint8_t *src, uint8_t *dst, long srcWidth,
  522. long srcHeight, long srcStride, long dstStride)
  523. {
  524. long x,y;
  525. dst[0]= src[0];
  526. // first line
  527. for (x=0; x<srcWidth-1; x++) {
  528. dst[2*x+1]= (3*src[x] + src[x+1])>>2;
  529. dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
  530. }
  531. dst[2*srcWidth-1]= src[srcWidth-1];
  532. dst+= dstStride;
  533. for (y=1; y<srcHeight; y++) {
  534. const int mmxSize = 1;
  535. dst[0 ]= (3*src[0] + src[srcStride])>>2;
  536. dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
  537. for (x=mmxSize-1; x<srcWidth-1; x++) {
  538. dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
  539. dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2;
  540. dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2;
  541. dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2;
  542. }
  543. dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2;
  544. dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
  545. dst+=dstStride*2;
  546. src+=srcStride;
  547. }
  548. // last line
  549. #if 1
  550. dst[0]= src[0];
  551. for (x=0; x<srcWidth-1; x++) {
  552. dst[2*x+1]= (3*src[x] + src[x+1])>>2;
  553. dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
  554. }
  555. dst[2*srcWidth-1]= src[srcWidth-1];
  556. #else
  557. for (x=0; x<srcWidth; x++) {
  558. dst[2*x+0]=
  559. dst[2*x+1]= src[x];
  560. }
  561. #endif
  562. }
  563. /**
  564. * Height should be a multiple of 2 and width should be a multiple of 16.
  565. * (If this is a problem for anyone then tell me, and I will fix it.)
  566. * Chrominance data is only taken from every second line, others are ignored.
  567. * FIXME: Write HQ version.
  568. */
  569. static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
  570. uint8_t *udst, uint8_t *vdst,
  571. long width, long height,
  572. long lumStride, long chromStride,
  573. long srcStride)
  574. {
  575. long y;
  576. const int chromWidth = width >> 1;
  577. for (y=0; y<height; y+=2) {
  578. long i;
  579. for (i=0; i<chromWidth; i++) {
  580. udst[i] = src[4*i+0];
  581. ydst[2*i+0] = src[4*i+1];
  582. vdst[i] = src[4*i+2];
  583. ydst[2*i+1] = src[4*i+3];
  584. }
  585. ydst += lumStride;
  586. src += srcStride;
  587. for (i=0; i<chromWidth; i++) {
  588. ydst[2*i+0] = src[4*i+1];
  589. ydst[2*i+1] = src[4*i+3];
  590. }
  591. udst += chromStride;
  592. vdst += chromStride;
  593. ydst += lumStride;
  594. src += srcStride;
  595. }
  596. }
  597. /**
  598. * Height should be a multiple of 2 and width should be a multiple of 2.
  599. * (If this is a problem for anyone then tell me, and I will fix it.)
  600. * Chrominance data is only taken from every second line,
  601. * others are ignored in the C version.
  602. * FIXME: Write HQ version.
  603. */
  604. static inline void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst,
  605. uint8_t *udst, uint8_t *vdst,
  606. long width, long height,
  607. long lumStride, long chromStride,
  608. long srcStride)
  609. {
  610. long y;
  611. const int chromWidth = width >> 1;
  612. y=0;
  613. for (; y<height; y+=2) {
  614. long i;
  615. for (i=0; i<chromWidth; i++) {
  616. unsigned int b = src[6*i+0];
  617. unsigned int g = src[6*i+1];
  618. unsigned int r = src[6*i+2];
  619. unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
  620. unsigned int V = ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128;
  621. unsigned int U = ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128;
  622. udst[i] = U;
  623. vdst[i] = V;
  624. ydst[2*i] = Y;
  625. b = src[6*i+3];
  626. g = src[6*i+4];
  627. r = src[6*i+5];
  628. Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
  629. ydst[2*i+1] = Y;
  630. }
  631. ydst += lumStride;
  632. src += srcStride;
  633. for (i=0; i<chromWidth; i++) {
  634. unsigned int b = src[6*i+0];
  635. unsigned int g = src[6*i+1];
  636. unsigned int r = src[6*i+2];
  637. unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
  638. ydst[2*i] = Y;
  639. b = src[6*i+3];
  640. g = src[6*i+4];
  641. r = src[6*i+5];
  642. Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
  643. ydst[2*i+1] = Y;
  644. }
  645. udst += chromStride;
  646. vdst += chromStride;
  647. ydst += lumStride;
  648. src += srcStride;
  649. }
  650. }
  651. static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
  652. uint8_t *dest, long width,
  653. long height, long src1Stride,
  654. long src2Stride, long dstStride)
  655. {
  656. long h;
  657. for (h=0; h < height; h++) {
  658. long w;
  659. for (w=0; w < width; w++) {
  660. dest[2*w+0] = src1[w];
  661. dest[2*w+1] = src2[w];
  662. }
  663. dest += dstStride;
  664. src1 += src1Stride;
  665. src2 += src2Stride;
  666. }
  667. }
  668. static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
  669. uint8_t *dst1, uint8_t *dst2,
  670. long width, long height,
  671. long srcStride1, long srcStride2,
  672. long dstStride1, long dstStride2)
  673. {
  674. int y;
  675. long x,w,h;
  676. w=width/2; h=height/2;
  677. for (y=0;y<h;y++) {
  678. const uint8_t* s1=src1+srcStride1*(y>>1);
  679. uint8_t* d=dst1+dstStride1*y;
  680. x=0;
  681. for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
  682. }
  683. for (y=0;y<h;y++) {
  684. const uint8_t* s2=src2+srcStride2*(y>>1);
  685. uint8_t* d=dst2+dstStride2*y;
  686. x=0;
  687. for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
  688. }
  689. }
  690. static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
  691. const uint8_t *src3, uint8_t *dst,
  692. long width, long height,
  693. long srcStride1, long srcStride2,
  694. long srcStride3, long dstStride)
  695. {
  696. int x;
  697. long y,w,h;
  698. w=width/2; h=height;
  699. for (y=0;y<h;y++) {
  700. const uint8_t* yp=src1+srcStride1*y;
  701. const uint8_t* up=src2+srcStride2*(y>>2);
  702. const uint8_t* vp=src3+srcStride3*(y>>2);
  703. uint8_t* d=dst+dstStride*y;
  704. x=0;
  705. for (; x<w; x++) {
  706. const long x2 = x<<2;
  707. d[8*x+0] = yp[x2];
  708. d[8*x+1] = up[x];
  709. d[8*x+2] = yp[x2+1];
  710. d[8*x+3] = vp[x];
  711. d[8*x+4] = yp[x2+2];
  712. d[8*x+5] = up[x];
  713. d[8*x+6] = yp[x2+3];
  714. d[8*x+7] = vp[x];
  715. }
  716. }
  717. }
  718. static void extract_even_c(const uint8_t *src, uint8_t *dst, int count)
  719. {
  720. dst += count;
  721. src += 2*count;
  722. count= - count;
  723. while(count<0) {
  724. dst[count]= src[2*count];
  725. count++;
  726. }
  727. }
  728. static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
  729. int count)
  730. {
  731. dst0+= count;
  732. dst1+= count;
  733. src += 4*count;
  734. count= - count;
  735. while(count<0) {
  736. dst0[count]= src[4*count+0];
  737. dst1[count]= src[4*count+2];
  738. count++;
  739. }
  740. }
  741. static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1,
  742. uint8_t *dst0, uint8_t *dst1, int count)
  743. {
  744. dst0 += count;
  745. dst1 += count;
  746. src0 += 4*count;
  747. src1 += 4*count;
  748. count= - count;
  749. while(count<0) {
  750. dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
  751. dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
  752. count++;
  753. }
  754. }
  755. static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
  756. int count)
  757. {
  758. dst0+= count;
  759. dst1+= count;
  760. src += 4*count;
  761. count= - count;
  762. src++;
  763. while(count<0) {
  764. dst0[count]= src[4*count+0];
  765. dst1[count]= src[4*count+2];
  766. count++;
  767. }
  768. }
  769. static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1,
  770. uint8_t *dst0, uint8_t *dst1, int count)
  771. {
  772. dst0 += count;
  773. dst1 += count;
  774. src0 += 4*count;
  775. src1 += 4*count;
  776. count= - count;
  777. src0++;
  778. src1++;
  779. while(count<0) {
  780. dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
  781. dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
  782. count++;
  783. }
  784. }
  785. static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
  786. const uint8_t *src, long width, long height,
  787. long lumStride, long chromStride, long srcStride)
  788. {
  789. long y;
  790. const long chromWidth= -((-width)>>1);
  791. for (y=0; y<height; y++) {
  792. extract_even_c(src, ydst, width);
  793. if(y&1) {
  794. extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth);
  795. udst+= chromStride;
  796. vdst+= chromStride;
  797. }
  798. src += srcStride;
  799. ydst+= lumStride;
  800. }
  801. }
  802. static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
  803. const uint8_t *src, long width, long height,
  804. long lumStride, long chromStride, long srcStride)
  805. {
  806. long y;
  807. const long chromWidth= -((-width)>>1);
  808. for (y=0; y<height; y++) {
  809. extract_even_c(src, ydst, width);
  810. extract_odd2_c(src, udst, vdst, chromWidth);
  811. src += srcStride;
  812. ydst+= lumStride;
  813. udst+= chromStride;
  814. vdst+= chromStride;
  815. }
  816. }
  817. static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
  818. const uint8_t *src, long width, long height,
  819. long lumStride, long chromStride, long srcStride)
  820. {
  821. long y;
  822. const long chromWidth= -((-width)>>1);
  823. for (y=0; y<height; y++) {
  824. extract_even_c(src + 1, ydst, width);
  825. if(y&1) {
  826. extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth);
  827. udst+= chromStride;
  828. vdst+= chromStride;
  829. }
  830. src += srcStride;
  831. ydst+= lumStride;
  832. }
  833. }
  834. static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
  835. const uint8_t *src, long width, long height,
  836. long lumStride, long chromStride, long srcStride)
  837. {
  838. long y;
  839. const long chromWidth= -((-width)>>1);
  840. for (y=0; y<height; y++) {
  841. extract_even_c(src + 1, ydst, width);
  842. extract_even2_c(src, udst, vdst, chromWidth);
  843. src += srcStride;
  844. ydst+= lumStride;
  845. udst+= chromStride;
  846. vdst+= chromStride;
  847. }
  848. }
  849. static inline void rgb2rgb_init_c(void)
  850. {
  851. rgb15to16 = rgb15to16_c;
  852. rgb15tobgr24 = rgb15tobgr24_c;
  853. rgb15to32 = rgb15to32_c;
  854. rgb16tobgr24 = rgb16tobgr24_c;
  855. rgb16to32 = rgb16to32_c;
  856. rgb16to15 = rgb16to15_c;
  857. rgb24tobgr16 = rgb24tobgr16_c;
  858. rgb24tobgr15 = rgb24tobgr15_c;
  859. rgb24tobgr32 = rgb24tobgr32_c;
  860. rgb32to16 = rgb32to16_c;
  861. rgb32to15 = rgb32to15_c;
  862. rgb32tobgr24 = rgb32tobgr24_c;
  863. rgb24to15 = rgb24to15_c;
  864. rgb24to16 = rgb24to16_c;
  865. rgb24tobgr24 = rgb24tobgr24_c;
  866. shuffle_bytes_2103 = shuffle_bytes_2103_c;
  867. rgb32tobgr16 = rgb32tobgr16_c;
  868. rgb32tobgr15 = rgb32tobgr15_c;
  869. yv12toyuy2 = yv12toyuy2_c;
  870. yv12touyvy = yv12touyvy_c;
  871. yuv422ptoyuy2 = yuv422ptoyuy2_c;
  872. yuv422ptouyvy = yuv422ptouyvy_c;
  873. yuy2toyv12 = yuy2toyv12_c;
  874. planar2x = planar2x_c;
  875. rgb24toyv12 = rgb24toyv12_c;
  876. interleaveBytes = interleaveBytes_c;
  877. vu9_to_vu12 = vu9_to_vu12_c;
  878. yvu9_to_yuy2 = yvu9_to_yuy2_c;
  879. uyvytoyuv420 = uyvytoyuv420_c;
  880. uyvytoyuv422 = uyvytoyuv422_c;
  881. yuyvtoyuv420 = yuyvtoyuv420_c;
  882. yuyvtoyuv422 = yuyvtoyuv422_c;
  883. }