cabac.h 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758
  1. /*
  2. * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  3. * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file libavcodec/cabac.h
  23. * Context Adaptive Binary Arithmetic Coder.
  24. */
  25. #ifndef AVCODEC_CABAC_H
  26. #define AVCODEC_CABAC_H
  27. #include "bitstream.h"
  28. //#undef NDEBUG
  29. #include <assert.h>
  30. #include "libavutil/x86_cpu.h"
  31. #define CABAC_BITS 16
  32. #define CABAC_MASK ((1<<CABAC_BITS)-1)
  33. #define BRANCHLESS_CABAC_DECODER 1
  34. //#define ARCH_X86_DISABLED 1
  35. typedef struct CABACContext{
  36. int low;
  37. int range;
  38. int outstanding_count;
  39. #ifdef STRICT_LIMITS
  40. int symCount;
  41. #endif
  42. const uint8_t *bytestream_start;
  43. const uint8_t *bytestream;
  44. const uint8_t *bytestream_end;
  45. PutBitContext pb;
  46. }CABACContext;
  47. extern uint8_t ff_h264_mlps_state[4*64];
  48. extern uint8_t ff_h264_lps_range[4*2*64]; ///< rangeTabLPS
  49. extern uint8_t ff_h264_mps_state[2*64]; ///< transIdxMPS
  50. extern uint8_t ff_h264_lps_state[2*64]; ///< transIdxLPS
  51. extern const uint8_t ff_h264_norm_shift[512];
  52. void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size);
  53. void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
  54. void ff_init_cabac_states(CABACContext *c);
  55. static inline void put_cabac_bit(CABACContext *c, int b){
  56. put_bits(&c->pb, 1, b);
  57. for(;c->outstanding_count; c->outstanding_count--){
  58. put_bits(&c->pb, 1, 1-b);
  59. }
  60. }
  61. static inline void renorm_cabac_encoder(CABACContext *c){
  62. while(c->range < 0x100){
  63. //FIXME optimize
  64. if(c->low<0x100){
  65. put_cabac_bit(c, 0);
  66. }else if(c->low<0x200){
  67. c->outstanding_count++;
  68. c->low -= 0x100;
  69. }else{
  70. put_cabac_bit(c, 1);
  71. c->low -= 0x200;
  72. }
  73. c->range+= c->range;
  74. c->low += c->low;
  75. }
  76. }
  77. #ifdef TEST
  78. static void put_cabac(CABACContext *c, uint8_t * const state, int bit){
  79. int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + *state];
  80. if(bit == ((*state)&1)){
  81. c->range -= RangeLPS;
  82. *state= ff_h264_mps_state[*state];
  83. }else{
  84. c->low += c->range - RangeLPS;
  85. c->range = RangeLPS;
  86. *state= ff_h264_lps_state[*state];
  87. }
  88. renorm_cabac_encoder(c);
  89. #ifdef STRICT_LIMITS
  90. c->symCount++;
  91. #endif
  92. }
  93. static void put_cabac_static(CABACContext *c, int RangeLPS, int bit){
  94. assert(c->range > RangeLPS);
  95. if(!bit){
  96. c->range -= RangeLPS;
  97. }else{
  98. c->low += c->range - RangeLPS;
  99. c->range = RangeLPS;
  100. }
  101. renorm_cabac_encoder(c);
  102. #ifdef STRICT_LIMITS
  103. c->symCount++;
  104. #endif
  105. }
  106. /**
  107. * @param bit 0 -> write zero bit, !=0 write one bit
  108. */
  109. static void put_cabac_bypass(CABACContext *c, int bit){
  110. c->low += c->low;
  111. if(bit){
  112. c->low += c->range;
  113. }
  114. //FIXME optimize
  115. if(c->low<0x200){
  116. put_cabac_bit(c, 0);
  117. }else if(c->low<0x400){
  118. c->outstanding_count++;
  119. c->low -= 0x200;
  120. }else{
  121. put_cabac_bit(c, 1);
  122. c->low -= 0x400;
  123. }
  124. #ifdef STRICT_LIMITS
  125. c->symCount++;
  126. #endif
  127. }
  128. /**
  129. *
  130. * @return the number of bytes written
  131. */
  132. static int put_cabac_terminate(CABACContext *c, int bit){
  133. c->range -= 2;
  134. if(!bit){
  135. renorm_cabac_encoder(c);
  136. }else{
  137. c->low += c->range;
  138. c->range= 2;
  139. renorm_cabac_encoder(c);
  140. assert(c->low <= 0x1FF);
  141. put_cabac_bit(c, c->low>>9);
  142. put_bits(&c->pb, 2, ((c->low>>7)&3)|1);
  143. flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong
  144. }
  145. #ifdef STRICT_LIMITS
  146. c->symCount++;
  147. #endif
  148. return (put_bits_count(&c->pb)+7)>>3;
  149. }
  150. /**
  151. * put (truncated) unary binarization.
  152. */
  153. static void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){
  154. int i;
  155. assert(v <= max);
  156. #if 1
  157. for(i=0; i<v; i++){
  158. put_cabac(c, state, 1);
  159. if(i < max_index) state++;
  160. }
  161. if(truncated==0 || v<max)
  162. put_cabac(c, state, 0);
  163. #else
  164. if(v <= max_index){
  165. for(i=0; i<v; i++){
  166. put_cabac(c, state+i, 1);
  167. }
  168. if(truncated==0 || v<max)
  169. put_cabac(c, state+i, 0);
  170. }else{
  171. for(i=0; i<=max_index; i++){
  172. put_cabac(c, state+i, 1);
  173. }
  174. for(; i<v; i++){
  175. put_cabac(c, state+max_index, 1);
  176. }
  177. if(truncated==0 || v<max)
  178. put_cabac(c, state+max_index, 0);
  179. }
  180. #endif
  181. }
  182. /**
  183. * put unary exp golomb k-th order binarization.
  184. */
  185. static void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int max, int is_signed, int k, int max_index){
  186. int i;
  187. if(v==0)
  188. put_cabac(c, state, 0);
  189. else{
  190. const int sign= v < 0;
  191. if(is_signed) v= FFABS(v);
  192. if(v<max){
  193. for(i=0; i<v; i++){
  194. put_cabac(c, state, 1);
  195. if(i < max_index) state++;
  196. }
  197. put_cabac(c, state, 0);
  198. }else{
  199. int m= 1<<k;
  200. for(i=0; i<max; i++){
  201. put_cabac(c, state, 1);
  202. if(i < max_index) state++;
  203. }
  204. v -= max;
  205. while(v >= m){ //FIXME optimize
  206. put_cabac_bypass(c, 1);
  207. v-= m;
  208. m+= m;
  209. }
  210. put_cabac_bypass(c, 0);
  211. while(m>>=1){
  212. put_cabac_bypass(c, v&m);
  213. }
  214. }
  215. if(is_signed)
  216. put_cabac_bypass(c, sign);
  217. }
  218. }
  219. #endif /* TEST */
  220. static void refill(CABACContext *c){
  221. #if CABAC_BITS == 16
  222. c->low+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
  223. #else
  224. c->low+= c->bytestream[0]<<1;
  225. #endif
  226. c->low -= CABAC_MASK;
  227. c->bytestream+= CABAC_BITS/8;
  228. }
  229. #if ! ( ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) )
  230. static void refill2(CABACContext *c){
  231. int i, x;
  232. x= c->low ^ (c->low-1);
  233. i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS-1)];
  234. x= -CABAC_MASK;
  235. #if CABAC_BITS == 16
  236. x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
  237. #else
  238. x+= c->bytestream[0]<<1;
  239. #endif
  240. c->low += x<<i;
  241. c->bytestream+= CABAC_BITS/8;
  242. }
  243. #endif
  244. static inline void renorm_cabac_decoder(CABACContext *c){
  245. while(c->range < 0x100){
  246. c->range+= c->range;
  247. c->low+= c->low;
  248. if(!(c->low & CABAC_MASK))
  249. refill(c);
  250. }
  251. }
  252. static inline void renorm_cabac_decoder_once(CABACContext *c){
  253. #ifdef ARCH_X86_DISABLED
  254. int temp;
  255. #if 0
  256. //P3:683 athlon:475
  257. __asm__(
  258. "lea -0x100(%0), %2 \n\t"
  259. "shr $31, %2 \n\t" //FIXME 31->63 for x86-64
  260. "shl %%cl, %0 \n\t"
  261. "shl %%cl, %1 \n\t"
  262. : "+r"(c->range), "+r"(c->low), "+c"(temp)
  263. );
  264. #elif 0
  265. //P3:680 athlon:474
  266. __asm__(
  267. "cmp $0x100, %0 \n\t"
  268. "setb %%cl \n\t" //FIXME 31->63 for x86-64
  269. "shl %%cl, %0 \n\t"
  270. "shl %%cl, %1 \n\t"
  271. : "+r"(c->range), "+r"(c->low), "+c"(temp)
  272. );
  273. #elif 1
  274. int temp2;
  275. //P3:665 athlon:517
  276. __asm__(
  277. "lea -0x100(%0), %%eax \n\t"
  278. "cltd \n\t"
  279. "mov %0, %%eax \n\t"
  280. "and %%edx, %0 \n\t"
  281. "and %1, %%edx \n\t"
  282. "add %%eax, %0 \n\t"
  283. "add %%edx, %1 \n\t"
  284. : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
  285. );
  286. #elif 0
  287. int temp2;
  288. //P3:673 athlon:509
  289. __asm__(
  290. "cmp $0x100, %0 \n\t"
  291. "sbb %%edx, %%edx \n\t"
  292. "mov %0, %%eax \n\t"
  293. "and %%edx, %0 \n\t"
  294. "and %1, %%edx \n\t"
  295. "add %%eax, %0 \n\t"
  296. "add %%edx, %1 \n\t"
  297. : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
  298. );
  299. #else
  300. int temp2;
  301. //P3:677 athlon:511
  302. __asm__(
  303. "cmp $0x100, %0 \n\t"
  304. "lea (%0, %0), %%eax \n\t"
  305. "lea (%1, %1), %%edx \n\t"
  306. "cmovb %%eax, %0 \n\t"
  307. "cmovb %%edx, %1 \n\t"
  308. : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
  309. );
  310. #endif
  311. #else
  312. //P3:675 athlon:476
  313. int shift= (uint32_t)(c->range - 0x100)>>31;
  314. c->range<<= shift;
  315. c->low <<= shift;
  316. #endif
  317. if(!(c->low & CABAC_MASK))
  318. refill(c);
  319. }
  320. static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const state){
  321. //FIXME gcc generates duplicate load/stores for c->low and c->range
  322. #define LOW "0"
  323. #define RANGE "4"
  324. #if ARCH_X86_64
  325. #define BYTESTART "16"
  326. #define BYTE "24"
  327. #define BYTEEND "32"
  328. #else
  329. #define BYTESTART "12"
  330. #define BYTE "16"
  331. #define BYTEEND "20"
  332. #endif
  333. #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
  334. int bit;
  335. #ifndef BRANCHLESS_CABAC_DECODER
  336. __asm__ volatile(
  337. "movzbl (%1), %0 \n\t"
  338. "movl "RANGE "(%2), %%ebx \n\t"
  339. "movl "RANGE "(%2), %%edx \n\t"
  340. "andl $0xC0, %%ebx \n\t"
  341. "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%ebx, 2), %%esi\n\t"
  342. "movl "LOW "(%2), %%ebx \n\t"
  343. //eax:state ebx:low, edx:range, esi:RangeLPS
  344. "subl %%esi, %%edx \n\t"
  345. "movl %%edx, %%ecx \n\t"
  346. "shll $17, %%ecx \n\t"
  347. "cmpl %%ecx, %%ebx \n\t"
  348. " ja 1f \n\t"
  349. #if 1
  350. //athlon:4067 P3:4110
  351. "lea -0x100(%%edx), %%ecx \n\t"
  352. "shr $31, %%ecx \n\t"
  353. "shl %%cl, %%edx \n\t"
  354. "shl %%cl, %%ebx \n\t"
  355. #else
  356. //athlon:4057 P3:4130
  357. "cmp $0x100, %%edx \n\t" //FIXME avoidable
  358. "setb %%cl \n\t"
  359. "shl %%cl, %%edx \n\t"
  360. "shl %%cl, %%ebx \n\t"
  361. #endif
  362. "movzbl "MANGLE(ff_h264_mps_state)"(%0), %%ecx \n\t"
  363. "movb %%cl, (%1) \n\t"
  364. //eax:state ebx:low, edx:range, esi:RangeLPS
  365. "test %%bx, %%bx \n\t"
  366. " jnz 2f \n\t"
  367. "mov "BYTE "(%2), %%"REG_S" \n\t"
  368. "subl $0xFFFF, %%ebx \n\t"
  369. "movzwl (%%"REG_S"), %%ecx \n\t"
  370. "bswap %%ecx \n\t"
  371. "shrl $15, %%ecx \n\t"
  372. "add $2, %%"REG_S" \n\t"
  373. "addl %%ecx, %%ebx \n\t"
  374. "mov %%"REG_S", "BYTE "(%2) \n\t"
  375. "jmp 2f \n\t"
  376. "1: \n\t"
  377. //eax:state ebx:low, edx:range, esi:RangeLPS
  378. "subl %%ecx, %%ebx \n\t"
  379. "movl %%esi, %%edx \n\t"
  380. "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
  381. "shll %%cl, %%ebx \n\t"
  382. "shll %%cl, %%edx \n\t"
  383. "movzbl "MANGLE(ff_h264_lps_state)"(%0), %%ecx \n\t"
  384. "movb %%cl, (%1) \n\t"
  385. "add $1, %0 \n\t"
  386. "test %%bx, %%bx \n\t"
  387. " jnz 2f \n\t"
  388. "mov "BYTE "(%2), %%"REG_c" \n\t"
  389. "movzwl (%%"REG_c"), %%esi \n\t"
  390. "bswap %%esi \n\t"
  391. "shrl $15, %%esi \n\t"
  392. "subl $0xFFFF, %%esi \n\t"
  393. "add $2, %%"REG_c" \n\t"
  394. "mov %%"REG_c", "BYTE "(%2) \n\t"
  395. "leal -1(%%ebx), %%ecx \n\t"
  396. "xorl %%ebx, %%ecx \n\t"
  397. "shrl $15, %%ecx \n\t"
  398. "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
  399. "neg %%ecx \n\t"
  400. "add $7, %%ecx \n\t"
  401. "shll %%cl , %%esi \n\t"
  402. "addl %%esi, %%ebx \n\t"
  403. "2: \n\t"
  404. "movl %%edx, "RANGE "(%2) \n\t"
  405. "movl %%ebx, "LOW "(%2) \n\t"
  406. :"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or miscompiles it (for example if "+a"(bit) or "+m"(*state) is used
  407. :"r"(state), "r"(c)
  408. : "%"REG_c, "%ebx", "%edx", "%"REG_S, "memory"
  409. );
  410. bit&=1;
  411. #else /* BRANCHLESS_CABAC_DECODER */
  412. #if HAVE_FAST_CMOV
  413. #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
  414. "mov "tmp" , %%ecx \n\t"\
  415. "shl $17 , "tmp" \n\t"\
  416. "cmp "low" , "tmp" \n\t"\
  417. "cmova %%ecx , "range" \n\t"\
  418. "sbb %%ecx , %%ecx \n\t"\
  419. "and %%ecx , "tmp" \n\t"\
  420. "sub "tmp" , "low" \n\t"\
  421. "xor %%ecx , "ret" \n\t"
  422. #else /* HAVE_FAST_CMOV */
  423. #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
  424. "mov "tmp" , %%ecx \n\t"\
  425. "shl $17 , "tmp" \n\t"\
  426. "sub "low" , "tmp" \n\t"\
  427. "sar $31 , "tmp" \n\t" /*lps_mask*/\
  428. "sub %%ecx , "range" \n\t" /*RangeLPS - range*/\
  429. "and "tmp" , "range" \n\t" /*(RangeLPS - range)&lps_mask*/\
  430. "add %%ecx , "range" \n\t" /*new range*/\
  431. "shl $17 , %%ecx \n\t"\
  432. "and "tmp" , %%ecx \n\t"\
  433. "sub %%ecx , "low" \n\t"\
  434. "xor "tmp" , "ret" \n\t"
  435. #endif /* HAVE_FAST_CMOV */
  436. #define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
  437. "movzbl "statep" , "ret" \n\t"\
  438. "mov "range" , "tmp" \n\t"\
  439. "and $0xC0 , "range" \n\t"\
  440. "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
  441. "sub "range" , "tmp" \n\t"\
  442. BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
  443. "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\
  444. "shl %%cl , "range" \n\t"\
  445. "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\
  446. "mov "tmpbyte" , "statep" \n\t"\
  447. "shl %%cl , "low" \n\t"\
  448. "test "lowword" , "lowword" \n\t"\
  449. " jnz 1f \n\t"\
  450. "mov "BYTE"("cabac"), %%"REG_c" \n\t"\
  451. "movzwl (%%"REG_c") , "tmp" \n\t"\
  452. "bswap "tmp" \n\t"\
  453. "shr $15 , "tmp" \n\t"\
  454. "sub $0xFFFF , "tmp" \n\t"\
  455. "add $2 , %%"REG_c" \n\t"\
  456. "mov %%"REG_c" , "BYTE "("cabac") \n\t"\
  457. "lea -1("low") , %%ecx \n\t"\
  458. "xor "low" , %%ecx \n\t"\
  459. "shr $15 , %%ecx \n\t"\
  460. "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\
  461. "neg %%ecx \n\t"\
  462. "add $7 , %%ecx \n\t"\
  463. "shl %%cl , "tmp" \n\t"\
  464. "add "tmp" , "low" \n\t"\
  465. "1: \n\t"
  466. __asm__ volatile(
  467. "movl "RANGE "(%2), %%esi \n\t"
  468. "movl "LOW "(%2), %%ebx \n\t"
  469. BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl")
  470. "movl %%esi, "RANGE "(%2) \n\t"
  471. "movl %%ebx, "LOW "(%2) \n\t"
  472. :"=&a"(bit)
  473. :"r"(state), "r"(c)
  474. : "%"REG_c, "%ebx", "%edx", "%esi", "memory"
  475. );
  476. bit&=1;
  477. #endif /* BRANCHLESS_CABAC_DECODER */
  478. #else /* ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) */
  479. int s = *state;
  480. int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + s];
  481. int bit, lps_mask av_unused;
  482. c->range -= RangeLPS;
  483. #ifndef BRANCHLESS_CABAC_DECODER
  484. if(c->low < (c->range<<(CABAC_BITS+1))){
  485. bit= s&1;
  486. *state= ff_h264_mps_state[s];
  487. renorm_cabac_decoder_once(c);
  488. }else{
  489. bit= ff_h264_norm_shift[RangeLPS];
  490. c->low -= (c->range<<(CABAC_BITS+1));
  491. *state= ff_h264_lps_state[s];
  492. c->range = RangeLPS<<bit;
  493. c->low <<= bit;
  494. bit= (s&1)^1;
  495. if(!(c->low & CABAC_MASK)){
  496. refill2(c);
  497. }
  498. }
  499. #else /* BRANCHLESS_CABAC_DECODER */
  500. lps_mask= ((c->range<<(CABAC_BITS+1)) - c->low)>>31;
  501. c->low -= (c->range<<(CABAC_BITS+1)) & lps_mask;
  502. c->range += (RangeLPS - c->range) & lps_mask;
  503. s^=lps_mask;
  504. *state= (ff_h264_mlps_state+128)[s];
  505. bit= s&1;
  506. lps_mask= ff_h264_norm_shift[c->range];
  507. c->range<<= lps_mask;
  508. c->low <<= lps_mask;
  509. if(!(c->low & CABAC_MASK))
  510. refill2(c);
  511. #endif /* BRANCHLESS_CABAC_DECODER */
  512. #endif /* ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) */
  513. return bit;
  514. }
  515. static int av_noinline av_unused get_cabac_noinline(CABACContext *c, uint8_t * const state){
  516. return get_cabac_inline(c,state);
  517. }
  518. static int av_unused get_cabac(CABACContext *c, uint8_t * const state){
  519. return get_cabac_inline(c,state);
  520. }
  521. static int av_unused get_cabac_bypass(CABACContext *c){
  522. #if 0 //not faster
  523. int bit;
  524. __asm__ volatile(
  525. "movl "RANGE "(%1), %%ebx \n\t"
  526. "movl "LOW "(%1), %%eax \n\t"
  527. "shl $17, %%ebx \n\t"
  528. "add %%eax, %%eax \n\t"
  529. "sub %%ebx, %%eax \n\t"
  530. "cltd \n\t"
  531. "and %%edx, %%ebx \n\t"
  532. "add %%ebx, %%eax \n\t"
  533. "test %%ax, %%ax \n\t"
  534. " jnz 1f \n\t"
  535. "movl "BYTE "(%1), %%"REG_b" \n\t"
  536. "subl $0xFFFF, %%eax \n\t"
  537. "movzwl (%%"REG_b"), %%ecx \n\t"
  538. "bswap %%ecx \n\t"
  539. "shrl $15, %%ecx \n\t"
  540. "addl $2, %%"REG_b" \n\t"
  541. "addl %%ecx, %%eax \n\t"
  542. "movl %%"REG_b", "BYTE "(%1) \n\t"
  543. "1: \n\t"
  544. "movl %%eax, "LOW "(%1) \n\t"
  545. :"=&d"(bit)
  546. :"r"(c)
  547. : "%eax", "%"REG_b, "%ecx", "memory"
  548. );
  549. return bit+1;
  550. #else
  551. int range;
  552. c->low += c->low;
  553. if(!(c->low & CABAC_MASK))
  554. refill(c);
  555. range= c->range<<(CABAC_BITS+1);
  556. if(c->low < range){
  557. return 0;
  558. }else{
  559. c->low -= range;
  560. return 1;
  561. }
  562. #endif
  563. }
  564. static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
  565. #if ARCH_X86 && !(defined(PIC) && defined(__GNUC__))
  566. __asm__ volatile(
  567. "movl "RANGE "(%1), %%ebx \n\t"
  568. "movl "LOW "(%1), %%eax \n\t"
  569. "shl $17, %%ebx \n\t"
  570. "add %%eax, %%eax \n\t"
  571. "sub %%ebx, %%eax \n\t"
  572. "cltd \n\t"
  573. "and %%edx, %%ebx \n\t"
  574. "add %%ebx, %%eax \n\t"
  575. "xor %%edx, %%ecx \n\t"
  576. "sub %%edx, %%ecx \n\t"
  577. "test %%ax, %%ax \n\t"
  578. " jnz 1f \n\t"
  579. "mov "BYTE "(%1), %%"REG_b" \n\t"
  580. "subl $0xFFFF, %%eax \n\t"
  581. "movzwl (%%"REG_b"), %%edx \n\t"
  582. "bswap %%edx \n\t"
  583. "shrl $15, %%edx \n\t"
  584. "add $2, %%"REG_b" \n\t"
  585. "addl %%edx, %%eax \n\t"
  586. "mov %%"REG_b", "BYTE "(%1) \n\t"
  587. "1: \n\t"
  588. "movl %%eax, "LOW "(%1) \n\t"
  589. :"+c"(val)
  590. :"r"(c)
  591. : "%eax", "%"REG_b, "%edx", "memory"
  592. );
  593. return val;
  594. #else
  595. int range, mask;
  596. c->low += c->low;
  597. if(!(c->low & CABAC_MASK))
  598. refill(c);
  599. range= c->range<<(CABAC_BITS+1);
  600. c->low -= range;
  601. mask= c->low >> 31;
  602. range &= mask;
  603. c->low += range;
  604. return (val^mask)-mask;
  605. #endif
  606. }
  607. /**
  608. *
  609. * @return the number of bytes read or 0 if no end
  610. */
  611. static int av_unused get_cabac_terminate(CABACContext *c){
  612. c->range -= 2;
  613. if(c->low < c->range<<(CABAC_BITS+1)){
  614. renorm_cabac_decoder_once(c);
  615. return 0;
  616. }else{
  617. return c->bytestream - c->bytestream_start;
  618. }
  619. }
  620. #if 0
  621. /**
  622. * Get (truncated) unary binarization.
  623. */
  624. static int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max_index, int truncated){
  625. int i;
  626. for(i=0; i<max; i++){
  627. if(get_cabac(c, state)==0)
  628. return i;
  629. if(i< max_index) state++;
  630. }
  631. return truncated ? max : -1;
  632. }
  633. /**
  634. * get unary exp golomb k-th order binarization.
  635. */
  636. static int get_cabac_ueg(CABACContext *c, uint8_t * state, int max, int is_signed, int k, int max_index){
  637. int i, v;
  638. int m= 1<<k;
  639. if(get_cabac(c, state)==0)
  640. return 0;
  641. if(0 < max_index) state++;
  642. for(i=1; i<max; i++){
  643. if(get_cabac(c, state)==0){
  644. if(is_signed && get_cabac_bypass(c)){
  645. return -i;
  646. }else
  647. return i;
  648. }
  649. if(i < max_index) state++;
  650. }
  651. while(get_cabac_bypass(c)){
  652. i+= m;
  653. m+= m;
  654. }
  655. v=0;
  656. while(m>>=1){
  657. v+= v + get_cabac_bypass(c);
  658. }
  659. i += v;
  660. if(is_signed && get_cabac_bypass(c)){
  661. return -i;
  662. }else
  663. return i;
  664. }
  665. #endif /* 0 */
  666. #endif /* AVCODEC_CABAC_H */