t1.c 95 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741
  1. /*
  2. * The copyright in this software is being made available under the 2-clauses
  3. * BSD License, included below. This software may be subject to other third
  4. * party and contributor rights, including patent rights, and no such rights
  5. * are granted under this license.
  6. *
  7. * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
  8. * Copyright (c) 2002-2014, Professor Benoit Macq
  9. * Copyright (c) 2001-2003, David Janssens
  10. * Copyright (c) 2002-2003, Yannick Verschueren
  11. * Copyright (c) 2003-2007, Francois-Olivier Devaux
  12. * Copyright (c) 2003-2014, Antonin Descampe
  13. * Copyright (c) 2005, Herve Drolon, FreeImage Team
  14. * Copyright (c) 2007, Callum Lerwick <seg@haxxed.com>
  15. * Copyright (c) 2012, Carl Hetherington
  16. * Copyright (c) 2017, IntoPIX SA <support@intopix.com>
  17. * All rights reserved.
  18. *
  19. * Redistribution and use in source and binary forms, with or without
  20. * modification, are permitted provided that the following conditions
  21. * are met:
  22. * 1. Redistributions of source code must retain the above copyright
  23. * notice, this list of conditions and the following disclaimer.
  24. * 2. Redistributions in binary form must reproduce the above copyright
  25. * notice, this list of conditions and the following disclaimer in the
  26. * documentation and/or other materials provided with the distribution.
  27. *
  28. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
  29. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  30. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  31. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  32. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  33. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  34. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  35. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  36. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  37. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38. * POSSIBILITY OF SUCH DAMAGE.
  39. */
  40. #define OPJ_SKIP_POISON
  41. #include "opj_includes.h"
  42. #ifdef __SSE__
  43. #include <xmmintrin.h>
  44. #endif
  45. #ifdef __SSE2__
  46. #include <emmintrin.h>
  47. #endif
  48. #if (defined(__AVX2__) || defined(__AVX512F__))
  49. #include <immintrin.h>
  50. #endif
  51. #if defined(__GNUC__)
  52. #pragma GCC poison malloc calloc realloc free
  53. #endif
  54. #include "t1_luts.h"
  55. /** @defgroup T1 T1 - Implementation of the tier-1 coding */
  56. /*@{*/
  57. #define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)])
  58. #define opj_t1_setcurctx(curctx, ctxno) curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
  59. /* Macros to deal with signed integer with just MSB bit set for
  60. * negative values (smr = signed magnitude representation) */
  61. #define opj_smr_abs(x) (((OPJ_UINT32)(x)) & 0x7FFFFFFFU)
  62. #define opj_smr_sign(x) (((OPJ_UINT32)(x)) >> 31)
  63. #define opj_to_smr(x) ((x) >= 0 ? (OPJ_UINT32)(x) : ((OPJ_UINT32)(-x) | 0x80000000U))
  64. /** @name Local static functions */
  65. /*@{*/
  66. static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f);
  67. static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f);
  68. static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos);
  69. static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos);
  70. static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
  71. OPJ_UINT32 s, OPJ_UINT32 stride,
  72. OPJ_UINT32 vsc);
  73. /**
  74. Decode significant pass
  75. */
  76. static INLINE void opj_t1_dec_sigpass_step_raw(
  77. opj_t1_t *t1,
  78. opj_flag_t *flagsp,
  79. OPJ_INT32 *datap,
  80. OPJ_INT32 oneplushalf,
  81. OPJ_UINT32 vsc,
  82. OPJ_UINT32 row);
  83. static INLINE void opj_t1_dec_sigpass_step_mqc(
  84. opj_t1_t *t1,
  85. opj_flag_t *flagsp,
  86. OPJ_INT32 *datap,
  87. OPJ_INT32 oneplushalf,
  88. OPJ_UINT32 row,
  89. OPJ_UINT32 flags_stride,
  90. OPJ_UINT32 vsc);
  91. /**
  92. Encode significant pass
  93. */
  94. static void opj_t1_enc_sigpass(opj_t1_t *t1,
  95. OPJ_INT32 bpno,
  96. OPJ_INT32 *nmsedec,
  97. OPJ_BYTE type,
  98. OPJ_UINT32 cblksty);
  99. /**
  100. Decode significant pass
  101. */
  102. static void opj_t1_dec_sigpass_raw(
  103. opj_t1_t *t1,
  104. OPJ_INT32 bpno,
  105. OPJ_INT32 cblksty);
  106. /**
  107. Encode refinement pass
  108. */
  109. static void opj_t1_enc_refpass(opj_t1_t *t1,
  110. OPJ_INT32 bpno,
  111. OPJ_INT32 *nmsedec,
  112. OPJ_BYTE type);
  113. /**
  114. Decode refinement pass
  115. */
  116. static void opj_t1_dec_refpass_raw(
  117. opj_t1_t *t1,
  118. OPJ_INT32 bpno);
  119. /**
  120. Decode refinement pass
  121. */
  122. static INLINE void opj_t1_dec_refpass_step_raw(
  123. opj_t1_t *t1,
  124. opj_flag_t *flagsp,
  125. OPJ_INT32 *datap,
  126. OPJ_INT32 poshalf,
  127. OPJ_UINT32 row);
  128. static INLINE void opj_t1_dec_refpass_step_mqc(
  129. opj_t1_t *t1,
  130. opj_flag_t *flagsp,
  131. OPJ_INT32 *datap,
  132. OPJ_INT32 poshalf,
  133. OPJ_UINT32 row);
  134. /**
  135. Decode clean-up pass
  136. */
  137. static void opj_t1_dec_clnpass_step(
  138. opj_t1_t *t1,
  139. opj_flag_t *flagsp,
  140. OPJ_INT32 *datap,
  141. OPJ_INT32 oneplushalf,
  142. OPJ_UINT32 row,
  143. OPJ_UINT32 vsc);
  144. /**
  145. Encode clean-up pass
  146. */
  147. static void opj_t1_enc_clnpass(
  148. opj_t1_t *t1,
  149. OPJ_INT32 bpno,
  150. OPJ_INT32 *nmsedec,
  151. OPJ_UINT32 cblksty);
  152. static OPJ_FLOAT64 opj_t1_getwmsedec(
  153. OPJ_INT32 nmsedec,
  154. OPJ_UINT32 compno,
  155. OPJ_UINT32 level,
  156. OPJ_UINT32 orient,
  157. OPJ_INT32 bpno,
  158. OPJ_UINT32 qmfbid,
  159. OPJ_FLOAT64 stepsize,
  160. OPJ_UINT32 numcomps,
  161. const OPJ_FLOAT64 * mct_norms,
  162. OPJ_UINT32 mct_numcomps);
  163. /** Return "cumwmsedec" that should be used to increase tile->distotile */
  164. static double opj_t1_encode_cblk(opj_t1_t *t1,
  165. opj_tcd_cblk_enc_t* cblk,
  166. OPJ_UINT32 orient,
  167. OPJ_UINT32 compno,
  168. OPJ_UINT32 level,
  169. OPJ_UINT32 qmfbid,
  170. OPJ_FLOAT64 stepsize,
  171. OPJ_UINT32 cblksty,
  172. OPJ_UINT32 numcomps,
  173. const OPJ_FLOAT64 * mct_norms,
  174. OPJ_UINT32 mct_numcomps);
  175. /**
  176. Decode 1 code-block
  177. @param t1 T1 handle
  178. @param cblk Code-block coding parameters
  179. @param orient
  180. @param roishift Region of interest shifting value
  181. @param cblksty Code-block style
  182. @param p_manager the event manager
  183. @param p_manager_mutex mutex for the event manager
  184. @param check_pterm whether PTERM correct termination should be checked
  185. */
  186. static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
  187. opj_tcd_cblk_dec_t* cblk,
  188. OPJ_UINT32 orient,
  189. OPJ_UINT32 roishift,
  190. OPJ_UINT32 cblksty,
  191. opj_event_mgr_t *p_manager,
  192. opj_mutex_t* p_manager_mutex,
  193. OPJ_BOOL check_pterm);
  194. /**
  195. Decode 1 HT code-block
  196. @param t1 T1 handle
  197. @param cblk Code-block coding parameters
  198. @param orient
  199. @param roishift Region of interest shifting value
  200. @param cblksty Code-block style
  201. @param p_manager the event manager
  202. @param p_manager_mutex mutex for the event manager
  203. @param check_pterm whether PTERM correct termination should be checked
  204. */
  205. OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
  206. opj_tcd_cblk_dec_t* cblk,
  207. OPJ_UINT32 orient,
  208. OPJ_UINT32 roishift,
  209. OPJ_UINT32 cblksty,
  210. opj_event_mgr_t *p_manager,
  211. opj_mutex_t* p_manager_mutex,
  212. OPJ_BOOL check_pterm);
  213. static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1,
  214. OPJ_UINT32 w,
  215. OPJ_UINT32 h);
  216. /*@}*/
  217. /*@}*/
  218. /* ----------------------------------------------------------------------- */
  219. static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f)
  220. {
  221. return mqc->lut_ctxno_zc_orient[(f & T1_SIGMA_NEIGHBOURS)];
  222. }
  223. static INLINE OPJ_UINT32 opj_t1_getctxtno_sc_or_spb_index(OPJ_UINT32 fX,
  224. OPJ_UINT32 pfX,
  225. OPJ_UINT32 nfX,
  226. OPJ_UINT32 ci)
  227. {
  228. /*
  229. 0 pfX T1_CHI_THIS T1_LUT_SGN_W
  230. 1 tfX T1_SIGMA_1 T1_LUT_SIG_N
  231. 2 nfX T1_CHI_THIS T1_LUT_SGN_E
  232. 3 tfX T1_SIGMA_3 T1_LUT_SIG_W
  233. 4 fX T1_CHI_(THIS - 1) T1_LUT_SGN_N
  234. 5 tfX T1_SIGMA_5 T1_LUT_SIG_E
  235. 6 fX T1_CHI_(THIS + 1) T1_LUT_SGN_S
  236. 7 tfX T1_SIGMA_7 T1_LUT_SIG_S
  237. */
  238. OPJ_UINT32 lu = (fX >> (ci * 3U)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 |
  239. T1_SIGMA_7);
  240. lu |= (pfX >> (T1_CHI_THIS_I + (ci * 3U))) & (1U << 0);
  241. lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2);
  242. if (ci == 0U) {
  243. lu |= (fX >> (T1_CHI_0_I - 4U)) & (1U << 4);
  244. } else {
  245. lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4);
  246. }
  247. lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6);
  248. return lu;
  249. }
  250. static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 lu)
  251. {
  252. return lut_ctxno_sc[lu];
  253. }
  254. static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f)
  255. {
  256. OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
  257. OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp;
  258. return tmp2;
  259. }
  260. static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 lu)
  261. {
  262. return lut_spb[lu];
  263. }
  264. static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos)
  265. {
  266. if (bitpos > 0) {
  267. return lut_nmsedec_sig[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
  268. }
  269. return lut_nmsedec_sig0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
  270. }
  271. static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos)
  272. {
  273. if (bitpos > 0) {
  274. return lut_nmsedec_ref[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
  275. }
  276. return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
  277. }
  278. #define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride, vsc) \
  279. { \
  280. /* east */ \
  281. flagsp[-1] |= T1_SIGMA_5 << (3U * ci); \
  282. \
  283. /* mark target as significant */ \
  284. flags |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); \
  285. \
  286. /* west */ \
  287. flagsp[1] |= T1_SIGMA_3 << (3U * ci); \
  288. \
  289. /* north-west, north, north-east */ \
  290. if (ci == 0U && !(vsc)) { \
  291. opj_flag_t* north = flagsp - (stride); \
  292. *north |= (s << T1_CHI_5_I) | T1_SIGMA_16; \
  293. north[-1] |= T1_SIGMA_17; \
  294. north[1] |= T1_SIGMA_15; \
  295. } \
  296. \
  297. /* south-west, south, south-east */ \
  298. if (ci == 3U) { \
  299. opj_flag_t* south = flagsp + (stride); \
  300. *south |= (s << T1_CHI_0_I) | T1_SIGMA_1; \
  301. south[-1] |= T1_SIGMA_2; \
  302. south[1] |= T1_SIGMA_0; \
  303. } \
  304. }
  305. static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
  306. OPJ_UINT32 s, OPJ_UINT32 stride,
  307. OPJ_UINT32 vsc)
  308. {
  309. opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride, vsc);
  310. }
  311. /**
  312. Encode significant pass
  313. */
  314. #define opj_t1_enc_sigpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, type, ciIn, vscIn) \
  315. { \
  316. OPJ_UINT32 v; \
  317. const OPJ_UINT32 ci = (ciIn); \
  318. const OPJ_UINT32 vsc = (vscIn); \
  319. const OPJ_INT32* l_datap = (datapIn); \
  320. opj_flag_t* flagsp = (flagspIn); \
  321. OPJ_UINT32 const flags = *flagsp; \
  322. if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
  323. (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
  324. OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
  325. v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
  326. /* #ifdef DEBUG_ENC_SIG */ \
  327. /* fprintf(stderr, " ctxt1=%d\n", ctxt1); */ \
  328. /* #endif */ \
  329. opj_t1_setcurctx(curctx, ctxt1); \
  330. if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \
  331. opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
  332. } else { \
  333. opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
  334. } \
  335. if (v) { \
  336. OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
  337. *flagsp, \
  338. flagsp[-1], flagsp[1], \
  339. ci); \
  340. OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
  341. v = opj_smr_sign(*l_datap); \
  342. *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
  343. (OPJ_UINT32)bpno); \
  344. /* #ifdef DEBUG_ENC_SIG */ \
  345. /* fprintf(stderr, " ctxt2=%d\n", ctxt2); */ \
  346. /* #endif */ \
  347. opj_t1_setcurctx(curctx, ctxt2); \
  348. if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \
  349. opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
  350. } else { \
  351. OPJ_UINT32 spb = opj_t1_getspb(lu); \
  352. /* #ifdef DEBUG_ENC_SIG */ \
  353. /* fprintf(stderr, " spb=%d\n", spb); */ \
  354. /* #endif */ \
  355. opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
  356. } \
  357. opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); \
  358. } \
  359. *flagsp |= T1_PI_THIS << (ci * 3U); \
  360. } \
  361. }
  362. static INLINE void opj_t1_dec_sigpass_step_raw(
  363. opj_t1_t *t1,
  364. opj_flag_t *flagsp,
  365. OPJ_INT32 *datap,
  366. OPJ_INT32 oneplushalf,
  367. OPJ_UINT32 vsc,
  368. OPJ_UINT32 ci)
  369. {
  370. OPJ_UINT32 v;
  371. opj_mqc_t *mqc = &(t1->mqc); /* RAW component */
  372. OPJ_UINT32 const flags = *flagsp;
  373. if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
  374. (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
  375. if (opj_mqc_raw_decode(mqc)) {
  376. v = opj_mqc_raw_decode(mqc);
  377. *datap = v ? -oneplushalf : oneplushalf;
  378. opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
  379. }
  380. *flagsp |= T1_PI_THIS << (ci * 3U);
  381. }
  382. }
  383. #define opj_t1_dec_sigpass_step_mqc_macro(flags, flagsp, flags_stride, data, \
  384. data_stride, ci, mqc, curctx, \
  385. v, a, c, ct, oneplushalf, vsc) \
  386. { \
  387. if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
  388. (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
  389. OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
  390. opj_t1_setcurctx(curctx, ctxt1); \
  391. opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
  392. if (v) { \
  393. OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
  394. flags, \
  395. flagsp[-1], flagsp[1], \
  396. ci); \
  397. OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
  398. OPJ_UINT32 spb = opj_t1_getspb(lu); \
  399. opj_t1_setcurctx(curctx, ctxt2); \
  400. opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
  401. v = v ^ spb; \
  402. data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
  403. opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
  404. } \
  405. flags |= T1_PI_THIS << (ci * 3U); \
  406. } \
  407. }
  408. static INLINE void opj_t1_dec_sigpass_step_mqc(
  409. opj_t1_t *t1,
  410. opj_flag_t *flagsp,
  411. OPJ_INT32 *datap,
  412. OPJ_INT32 oneplushalf,
  413. OPJ_UINT32 ci,
  414. OPJ_UINT32 flags_stride,
  415. OPJ_UINT32 vsc)
  416. {
  417. OPJ_UINT32 v;
  418. opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
  419. opj_t1_dec_sigpass_step_mqc_macro(*flagsp, flagsp, flags_stride, datap,
  420. 0, ci, mqc, mqc->curctx,
  421. v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
  422. }
  423. static void opj_t1_enc_sigpass(opj_t1_t *t1,
  424. OPJ_INT32 bpno,
  425. OPJ_INT32 *nmsedec,
  426. OPJ_BYTE type,
  427. OPJ_UINT32 cblksty
  428. )
  429. {
  430. OPJ_UINT32 i, k;
  431. OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
  432. opj_flag_t* f = &T1_FLAGS(0, 0);
  433. OPJ_UINT32 const extra = 2;
  434. opj_mqc_t* mqc = &(t1->mqc);
  435. DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
  436. const OPJ_INT32* datap = t1->data;
  437. *nmsedec = 0;
  438. #ifdef DEBUG_ENC_SIG
  439. fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno);
  440. #endif
  441. for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
  442. const OPJ_UINT32 w = t1->w;
  443. #ifdef DEBUG_ENC_SIG
  444. fprintf(stderr, " k=%d\n", k);
  445. #endif
  446. for (i = 0; i < w; ++i, ++f, datap += 4) {
  447. #ifdef DEBUG_ENC_SIG
  448. fprintf(stderr, " i=%d\n", i);
  449. #endif
  450. if (*f == 0U) {
  451. /* Nothing to do for any of the 4 data points */
  452. continue;
  453. }
  454. opj_t1_enc_sigpass_step_macro(
  455. mqc, curctx, a, c, ct,
  456. f,
  457. &datap[0],
  458. bpno,
  459. one,
  460. nmsedec,
  461. type,
  462. 0, cblksty & J2K_CCP_CBLKSTY_VSC);
  463. opj_t1_enc_sigpass_step_macro(
  464. mqc, curctx, a, c, ct,
  465. f,
  466. &datap[1],
  467. bpno,
  468. one,
  469. nmsedec,
  470. type,
  471. 1, 0);
  472. opj_t1_enc_sigpass_step_macro(
  473. mqc, curctx, a, c, ct,
  474. f,
  475. &datap[2],
  476. bpno,
  477. one,
  478. nmsedec,
  479. type,
  480. 2, 0);
  481. opj_t1_enc_sigpass_step_macro(
  482. mqc, curctx, a, c, ct,
  483. f,
  484. &datap[3],
  485. bpno,
  486. one,
  487. nmsedec,
  488. type,
  489. 3, 0);
  490. }
  491. }
  492. if (k < t1->h) {
  493. OPJ_UINT32 j;
  494. #ifdef DEBUG_ENC_SIG
  495. fprintf(stderr, " k=%d\n", k);
  496. #endif
  497. for (i = 0; i < t1->w; ++i, ++f) {
  498. #ifdef DEBUG_ENC_SIG
  499. fprintf(stderr, " i=%d\n", i);
  500. #endif
  501. if (*f == 0U) {
  502. /* Nothing to do for any of the 4 data points */
  503. datap += (t1->h - k);
  504. continue;
  505. }
  506. for (j = k; j < t1->h; ++j, ++datap) {
  507. opj_t1_enc_sigpass_step_macro(
  508. mqc, curctx, a, c, ct,
  509. f,
  510. &datap[0],
  511. bpno,
  512. one,
  513. nmsedec,
  514. type,
  515. j - k,
  516. (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0));
  517. }
  518. }
  519. }
  520. UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
  521. }
  522. static void opj_t1_dec_sigpass_raw(
  523. opj_t1_t *t1,
  524. OPJ_INT32 bpno,
  525. OPJ_INT32 cblksty)
  526. {
  527. OPJ_INT32 one, half, oneplushalf;
  528. OPJ_UINT32 i, j, k;
  529. OPJ_INT32 *data = t1->data;
  530. opj_flag_t *flagsp = &T1_FLAGS(0, 0);
  531. const OPJ_UINT32 l_w = t1->w;
  532. one = 1 << bpno;
  533. half = one >> 1;
  534. oneplushalf = one | half;
  535. for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
  536. for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
  537. opj_flag_t flags = *flagsp;
  538. if (flags != 0) {
  539. opj_t1_dec_sigpass_step_raw(
  540. t1,
  541. flagsp,
  542. data,
  543. oneplushalf,
  544. cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
  545. 0U);
  546. opj_t1_dec_sigpass_step_raw(
  547. t1,
  548. flagsp,
  549. data + l_w,
  550. oneplushalf,
  551. OPJ_FALSE, /* vsc */
  552. 1U);
  553. opj_t1_dec_sigpass_step_raw(
  554. t1,
  555. flagsp,
  556. data + 2 * l_w,
  557. oneplushalf,
  558. OPJ_FALSE, /* vsc */
  559. 2U);
  560. opj_t1_dec_sigpass_step_raw(
  561. t1,
  562. flagsp,
  563. data + 3 * l_w,
  564. oneplushalf,
  565. OPJ_FALSE, /* vsc */
  566. 3U);
  567. }
  568. }
  569. }
  570. if (k < t1->h) {
  571. for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
  572. for (j = 0; j < t1->h - k; ++j) {
  573. opj_t1_dec_sigpass_step_raw(
  574. t1,
  575. flagsp,
  576. data + j * l_w,
  577. oneplushalf,
  578. cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
  579. j);
  580. }
  581. }
  582. }
  583. }
  584. #define opj_t1_dec_sigpass_mqc_internal(t1, bpno, vsc, w, h, flags_stride) \
  585. { \
  586. OPJ_INT32 one, half, oneplushalf; \
  587. OPJ_UINT32 i, j, k; \
  588. register OPJ_INT32 *data = t1->data; \
  589. register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \
  590. const OPJ_UINT32 l_w = w; \
  591. opj_mqc_t* mqc = &(t1->mqc); \
  592. DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
  593. register OPJ_UINT32 v; \
  594. one = 1 << bpno; \
  595. half = one >> 1; \
  596. oneplushalf = one | half; \
  597. for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
  598. for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
  599. opj_flag_t flags = *flagsp; \
  600. if( flags != 0 ) { \
  601. opj_t1_dec_sigpass_step_mqc_macro( \
  602. flags, flagsp, flags_stride, data, \
  603. l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf, vsc); \
  604. opj_t1_dec_sigpass_step_mqc_macro( \
  605. flags, flagsp, flags_stride, data, \
  606. l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
  607. opj_t1_dec_sigpass_step_mqc_macro( \
  608. flags, flagsp, flags_stride, data, \
  609. l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
  610. opj_t1_dec_sigpass_step_mqc_macro( \
  611. flags, flagsp, flags_stride, data, \
  612. l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
  613. *flagsp = flags; \
  614. } \
  615. } \
  616. } \
  617. UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
  618. if( k < h ) { \
  619. for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
  620. for (j = 0; j < h - k; ++j) { \
  621. opj_t1_dec_sigpass_step_mqc(t1, flagsp, \
  622. data + j * l_w, oneplushalf, j, flags_stride, vsc); \
  623. } \
  624. } \
  625. } \
  626. }
  627. static void opj_t1_dec_sigpass_mqc_64x64_novsc(
  628. opj_t1_t *t1,
  629. OPJ_INT32 bpno)
  630. {
  631. opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
  632. }
  633. static void opj_t1_dec_sigpass_mqc_64x64_vsc(
  634. opj_t1_t *t1,
  635. OPJ_INT32 bpno)
  636. {
  637. opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
  638. }
  639. static void opj_t1_dec_sigpass_mqc_generic_novsc(
  640. opj_t1_t *t1,
  641. OPJ_INT32 bpno)
  642. {
  643. opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
  644. t1->w + 2U);
  645. }
  646. static void opj_t1_dec_sigpass_mqc_generic_vsc(
  647. opj_t1_t *t1,
  648. OPJ_INT32 bpno)
  649. {
  650. opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
  651. t1->w + 2U);
  652. }
  653. static void opj_t1_dec_sigpass_mqc(
  654. opj_t1_t *t1,
  655. OPJ_INT32 bpno,
  656. OPJ_INT32 cblksty)
  657. {
  658. if (t1->w == 64 && t1->h == 64) {
  659. if (cblksty & J2K_CCP_CBLKSTY_VSC) {
  660. opj_t1_dec_sigpass_mqc_64x64_vsc(t1, bpno);
  661. } else {
  662. opj_t1_dec_sigpass_mqc_64x64_novsc(t1, bpno);
  663. }
  664. } else {
  665. if (cblksty & J2K_CCP_CBLKSTY_VSC) {
  666. opj_t1_dec_sigpass_mqc_generic_vsc(t1, bpno);
  667. } else {
  668. opj_t1_dec_sigpass_mqc_generic_novsc(t1, bpno);
  669. }
  670. }
  671. }
  672. /**
  673. Encode refinement pass step
  674. */
  675. #define opj_t1_enc_refpass_step_macro(mqc, curctx, a, c, ct, flags, flagsUpdated, datap, bpno, one, nmsedec, type, ci) \
  676. {\
  677. OPJ_UINT32 v; \
  678. if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << ((ci) * 3U))) == (T1_SIGMA_THIS << ((ci) * 3U))) { \
  679. const OPJ_UINT32 shift_flags = (flags >> ((ci) * 3U)); \
  680. OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); \
  681. OPJ_UINT32 abs_data = opj_smr_abs(*datap); \
  682. *nmsedec += opj_t1_getnmsedec_ref(abs_data, \
  683. (OPJ_UINT32)bpno); \
  684. v = ((OPJ_INT32)abs_data & one) ? 1 : 0; \
  685. /* #ifdef DEBUG_ENC_REF */ \
  686. /* fprintf(stderr, " ctxt=%d\n", ctxt); */ \
  687. /* #endif */ \
  688. opj_t1_setcurctx(curctx, ctxt); \
  689. if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \
  690. opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
  691. } else { \
  692. opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
  693. } \
  694. flagsUpdated |= T1_MU_THIS << ((ci) * 3U); \
  695. } \
  696. }
  697. static INLINE void opj_t1_dec_refpass_step_raw(
  698. opj_t1_t *t1,
  699. opj_flag_t *flagsp,
  700. OPJ_INT32 *datap,
  701. OPJ_INT32 poshalf,
  702. OPJ_UINT32 ci)
  703. {
  704. OPJ_UINT32 v;
  705. opj_mqc_t *mqc = &(t1->mqc); /* RAW component */
  706. if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) ==
  707. (T1_SIGMA_THIS << (ci * 3U))) {
  708. v = opj_mqc_raw_decode(mqc);
  709. *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf;
  710. *flagsp |= T1_MU_THIS << (ci * 3U);
  711. }
  712. }
  713. #define opj_t1_dec_refpass_step_mqc_macro(flags, data, data_stride, ci, \
  714. mqc, curctx, v, a, c, ct, poshalf) \
  715. { \
  716. if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == \
  717. (T1_SIGMA_THIS << (ci * 3U))) { \
  718. OPJ_UINT32 ctxt = opj_t1_getctxno_mag(flags >> (ci * 3U)); \
  719. opj_t1_setcurctx(curctx, ctxt); \
  720. opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
  721. data[ci*data_stride] += (v ^ (data[ci*data_stride] < 0)) ? poshalf : -poshalf; \
  722. flags |= T1_MU_THIS << (ci * 3U); \
  723. } \
  724. }
  725. static INLINE void opj_t1_dec_refpass_step_mqc(
  726. opj_t1_t *t1,
  727. opj_flag_t *flagsp,
  728. OPJ_INT32 *datap,
  729. OPJ_INT32 poshalf,
  730. OPJ_UINT32 ci)
  731. {
  732. OPJ_UINT32 v;
  733. opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
  734. opj_t1_dec_refpass_step_mqc_macro(*flagsp, datap, 0, ci,
  735. mqc, mqc->curctx, v, mqc->a, mqc->c,
  736. mqc->ct, poshalf);
  737. }
  738. static void opj_t1_enc_refpass(
  739. opj_t1_t *t1,
  740. OPJ_INT32 bpno,
  741. OPJ_INT32 *nmsedec,
  742. OPJ_BYTE type)
  743. {
  744. OPJ_UINT32 i, k;
  745. const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
  746. opj_flag_t* f = &T1_FLAGS(0, 0);
  747. const OPJ_UINT32 extra = 2U;
  748. opj_mqc_t* mqc = &(t1->mqc);
  749. DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
  750. const OPJ_INT32* datap = t1->data;
  751. *nmsedec = 0;
  752. #ifdef DEBUG_ENC_REF
  753. fprintf(stderr, "enc_refpass: bpno=%d\n", bpno);
  754. #endif
  755. for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
  756. #ifdef DEBUG_ENC_REF
  757. fprintf(stderr, " k=%d\n", k);
  758. #endif
  759. for (i = 0; i < t1->w; ++i, f++, datap += 4) {
  760. const OPJ_UINT32 flags = *f;
  761. OPJ_UINT32 flagsUpdated = flags;
  762. #ifdef DEBUG_ENC_REF
  763. fprintf(stderr, " i=%d\n", i);
  764. #endif
  765. if ((flags & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
  766. /* none significant */
  767. continue;
  768. }
  769. if ((flags & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
  770. (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) {
  771. /* all processed by sigpass */
  772. continue;
  773. }
  774. opj_t1_enc_refpass_step_macro(
  775. mqc, curctx, a, c, ct,
  776. flags, flagsUpdated,
  777. &datap[0],
  778. bpno,
  779. one,
  780. nmsedec,
  781. type,
  782. 0);
  783. opj_t1_enc_refpass_step_macro(
  784. mqc, curctx, a, c, ct,
  785. flags, flagsUpdated,
  786. &datap[1],
  787. bpno,
  788. one,
  789. nmsedec,
  790. type,
  791. 1);
  792. opj_t1_enc_refpass_step_macro(
  793. mqc, curctx, a, c, ct,
  794. flags, flagsUpdated,
  795. &datap[2],
  796. bpno,
  797. one,
  798. nmsedec,
  799. type,
  800. 2);
  801. opj_t1_enc_refpass_step_macro(
  802. mqc, curctx, a, c, ct,
  803. flags, flagsUpdated,
  804. &datap[3],
  805. bpno,
  806. one,
  807. nmsedec,
  808. type,
  809. 3);
  810. *f = flagsUpdated;
  811. }
  812. }
  813. if (k < t1->h) {
  814. OPJ_UINT32 j;
  815. const OPJ_UINT32 remaining_lines = t1->h - k;
  816. #ifdef DEBUG_ENC_REF
  817. fprintf(stderr, " k=%d\n", k);
  818. #endif
  819. for (i = 0; i < t1->w; ++i, ++f) {
  820. #ifdef DEBUG_ENC_REF
  821. fprintf(stderr, " i=%d\n", i);
  822. #endif
  823. if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
  824. /* none significant */
  825. datap += remaining_lines;
  826. continue;
  827. }
  828. for (j = 0; j < remaining_lines; ++j, datap ++) {
  829. opj_t1_enc_refpass_step_macro(
  830. mqc, curctx, a, c, ct,
  831. *f, *f,
  832. &datap[0],
  833. bpno,
  834. one,
  835. nmsedec,
  836. type,
  837. j);
  838. }
  839. }
  840. }
  841. UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
  842. }
  843. static void opj_t1_dec_refpass_raw(
  844. opj_t1_t *t1,
  845. OPJ_INT32 bpno)
  846. {
  847. OPJ_INT32 one, poshalf;
  848. OPJ_UINT32 i, j, k;
  849. OPJ_INT32 *data = t1->data;
  850. opj_flag_t *flagsp = &T1_FLAGS(0, 0);
  851. const OPJ_UINT32 l_w = t1->w;
  852. one = 1 << bpno;
  853. poshalf = one >> 1;
  854. for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
  855. for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
  856. opj_flag_t flags = *flagsp;
  857. if (flags != 0) {
  858. opj_t1_dec_refpass_step_raw(
  859. t1,
  860. flagsp,
  861. data,
  862. poshalf,
  863. 0U);
  864. opj_t1_dec_refpass_step_raw(
  865. t1,
  866. flagsp,
  867. data + l_w,
  868. poshalf,
  869. 1U);
  870. opj_t1_dec_refpass_step_raw(
  871. t1,
  872. flagsp,
  873. data + 2 * l_w,
  874. poshalf,
  875. 2U);
  876. opj_t1_dec_refpass_step_raw(
  877. t1,
  878. flagsp,
  879. data + 3 * l_w,
  880. poshalf,
  881. 3U);
  882. }
  883. }
  884. }
  885. if (k < t1->h) {
  886. for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
  887. for (j = 0; j < t1->h - k; ++j) {
  888. opj_t1_dec_refpass_step_raw(
  889. t1,
  890. flagsp,
  891. data + j * l_w,
  892. poshalf,
  893. j);
  894. }
  895. }
  896. }
  897. }
  898. #define opj_t1_dec_refpass_mqc_internal(t1, bpno, w, h, flags_stride) \
  899. { \
  900. OPJ_INT32 one, poshalf; \
  901. OPJ_UINT32 i, j, k; \
  902. register OPJ_INT32 *data = t1->data; \
  903. register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
  904. const OPJ_UINT32 l_w = w; \
  905. opj_mqc_t* mqc = &(t1->mqc); \
  906. DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
  907. register OPJ_UINT32 v; \
  908. one = 1 << bpno; \
  909. poshalf = one >> 1; \
  910. for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
  911. for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
  912. opj_flag_t flags = *flagsp; \
  913. if( flags != 0 ) { \
  914. opj_t1_dec_refpass_step_mqc_macro( \
  915. flags, data, l_w, 0, \
  916. mqc, curctx, v, a, c, ct, poshalf); \
  917. opj_t1_dec_refpass_step_mqc_macro( \
  918. flags, data, l_w, 1, \
  919. mqc, curctx, v, a, c, ct, poshalf); \
  920. opj_t1_dec_refpass_step_mqc_macro( \
  921. flags, data, l_w, 2, \
  922. mqc, curctx, v, a, c, ct, poshalf); \
  923. opj_t1_dec_refpass_step_mqc_macro( \
  924. flags, data, l_w, 3, \
  925. mqc, curctx, v, a, c, ct, poshalf); \
  926. *flagsp = flags; \
  927. } \
  928. } \
  929. } \
  930. UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
  931. if( k < h ) { \
  932. for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
  933. for (j = 0; j < h - k; ++j) { \
  934. opj_t1_dec_refpass_step_mqc(t1, flagsp, data + j * l_w, poshalf, j); \
  935. } \
  936. } \
  937. } \
  938. }
  939. static void opj_t1_dec_refpass_mqc_64x64(
  940. opj_t1_t *t1,
  941. OPJ_INT32 bpno)
  942. {
  943. opj_t1_dec_refpass_mqc_internal(t1, bpno, 64, 64, 66);
  944. }
  945. static void opj_t1_dec_refpass_mqc_generic(
  946. opj_t1_t *t1,
  947. OPJ_INT32 bpno)
  948. {
  949. opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2U);
  950. }
  951. static void opj_t1_dec_refpass_mqc(
  952. opj_t1_t *t1,
  953. OPJ_INT32 bpno)
  954. {
  955. if (t1->w == 64 && t1->h == 64) {
  956. opj_t1_dec_refpass_mqc_64x64(t1, bpno);
  957. } else {
  958. opj_t1_dec_refpass_mqc_generic(t1, bpno);
  959. }
  960. }
  961. /**
  962. Encode clean-up pass step
  963. */
  964. #define opj_t1_enc_clnpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, agg, runlen, lim, cblksty) \
  965. { \
  966. OPJ_UINT32 v; \
  967. OPJ_UINT32 ci; \
  968. opj_flag_t* const flagsp = (flagspIn); \
  969. const OPJ_INT32* l_datap = (datapIn); \
  970. const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | \
  971. T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
  972. \
  973. if ((*flagsp & check) == check) { \
  974. if (runlen == 0) { \
  975. *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
  976. } else if (runlen == 1) { \
  977. *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); \
  978. } else if (runlen == 2) { \
  979. *flagsp &= ~(T1_PI_2 | T1_PI_3); \
  980. } else if (runlen == 3) { \
  981. *flagsp &= ~(T1_PI_3); \
  982. } \
  983. } \
  984. else \
  985. for (ci = runlen; ci < lim; ++ci) { \
  986. OPJ_BOOL goto_PARTIAL = OPJ_FALSE; \
  987. if ((agg != 0) && (ci == runlen)) { \
  988. goto_PARTIAL = OPJ_TRUE; \
  989. } \
  990. else if (!(*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { \
  991. OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); \
  992. /* #ifdef DEBUG_ENC_CLN */ \
  993. /* printf(" ctxt1=%d\n", ctxt1); */ \
  994. /* #endif */ \
  995. opj_t1_setcurctx(curctx, ctxt1); \
  996. v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
  997. opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
  998. if (v) { \
  999. goto_PARTIAL = OPJ_TRUE; \
  1000. } \
  1001. } \
  1002. if( goto_PARTIAL ) { \
  1003. OPJ_UINT32 vsc; \
  1004. OPJ_UINT32 ctxt2, spb; \
  1005. OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
  1006. *flagsp, \
  1007. flagsp[-1], flagsp[1], \
  1008. ci); \
  1009. *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
  1010. (OPJ_UINT32)bpno); \
  1011. ctxt2 = opj_t1_getctxno_sc(lu); \
  1012. /* #ifdef DEBUG_ENC_CLN */ \
  1013. /* printf(" ctxt2=%d\n", ctxt2); */ \
  1014. /* #endif */ \
  1015. opj_t1_setcurctx(curctx, ctxt2); \
  1016. \
  1017. v = opj_smr_sign(*l_datap); \
  1018. spb = opj_t1_getspb(lu); \
  1019. /* #ifdef DEBUG_ENC_CLN */ \
  1020. /* printf(" spb=%d\n", spb); */\
  1021. /* #endif */ \
  1022. opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
  1023. vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0; \
  1024. opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc); \
  1025. } \
  1026. *flagsp &= ~(T1_PI_THIS << (3U * ci)); \
  1027. l_datap ++; \
  1028. } \
  1029. }
  1030. #define opj_t1_dec_clnpass_step_macro(check_flags, partial, \
  1031. flags, flagsp, flags_stride, data, \
  1032. data_stride, ci, mqc, curctx, \
  1033. v, a, c, ct, oneplushalf, vsc) \
  1034. { \
  1035. if ( !check_flags || !(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {\
  1036. do { \
  1037. if( !partial ) { \
  1038. OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
  1039. opj_t1_setcurctx(curctx, ctxt1); \
  1040. opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
  1041. if( !v ) \
  1042. break; \
  1043. } \
  1044. { \
  1045. OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
  1046. flags, flagsp[-1], flagsp[1], \
  1047. ci); \
  1048. opj_t1_setcurctx(curctx, opj_t1_getctxno_sc(lu)); \
  1049. opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
  1050. v = v ^ opj_t1_getspb(lu); \
  1051. data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
  1052. opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
  1053. } \
  1054. } while(0); \
  1055. } \
  1056. }
  1057. static void opj_t1_dec_clnpass_step(
  1058. opj_t1_t *t1,
  1059. opj_flag_t *flagsp,
  1060. OPJ_INT32 *datap,
  1061. OPJ_INT32 oneplushalf,
  1062. OPJ_UINT32 ci,
  1063. OPJ_UINT32 vsc)
  1064. {
  1065. OPJ_UINT32 v;
  1066. opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
  1067. opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE,
  1068. *flagsp, flagsp, t1->w + 2U, datap,
  1069. 0, ci, mqc, mqc->curctx,
  1070. v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
  1071. }
  1072. static void opj_t1_enc_clnpass(
  1073. opj_t1_t *t1,
  1074. OPJ_INT32 bpno,
  1075. OPJ_INT32 *nmsedec,
  1076. OPJ_UINT32 cblksty)
  1077. {
  1078. OPJ_UINT32 i, k;
  1079. const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
  1080. opj_mqc_t* mqc = &(t1->mqc);
  1081. DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
  1082. const OPJ_INT32* datap = t1->data;
  1083. opj_flag_t *f = &T1_FLAGS(0, 0);
  1084. const OPJ_UINT32 extra = 2U;
  1085. *nmsedec = 0;
  1086. #ifdef DEBUG_ENC_CLN
  1087. printf("enc_clnpass: bpno=%d\n", bpno);
  1088. #endif
  1089. for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
  1090. #ifdef DEBUG_ENC_CLN
  1091. printf(" k=%d\n", k);
  1092. #endif
  1093. for (i = 0; i < t1->w; ++i, f++) {
  1094. OPJ_UINT32 agg, runlen;
  1095. #ifdef DEBUG_ENC_CLN
  1096. printf(" i=%d\n", i);
  1097. #endif
  1098. agg = !*f;
  1099. #ifdef DEBUG_ENC_CLN
  1100. printf(" agg=%d\n", agg);
  1101. #endif
  1102. if (agg) {
  1103. for (runlen = 0; runlen < 4; ++runlen, ++datap) {
  1104. if (opj_smr_abs(*datap) & (OPJ_UINT32)one) {
  1105. break;
  1106. }
  1107. }
  1108. opj_t1_setcurctx(curctx, T1_CTXNO_AGG);
  1109. opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen != 4);
  1110. if (runlen == 4) {
  1111. continue;
  1112. }
  1113. opj_t1_setcurctx(curctx, T1_CTXNO_UNI);
  1114. opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen >> 1);
  1115. opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen & 1);
  1116. } else {
  1117. runlen = 0;
  1118. }
  1119. opj_t1_enc_clnpass_step_macro(
  1120. mqc, curctx, a, c, ct,
  1121. f,
  1122. datap,
  1123. bpno,
  1124. one,
  1125. nmsedec,
  1126. agg,
  1127. runlen,
  1128. 4U,
  1129. cblksty);
  1130. datap += 4 - runlen;
  1131. }
  1132. }
  1133. if (k < t1->h) {
  1134. const OPJ_UINT32 agg = 0;
  1135. const OPJ_UINT32 runlen = 0;
  1136. #ifdef DEBUG_ENC_CLN
  1137. printf(" k=%d\n", k);
  1138. #endif
  1139. for (i = 0; i < t1->w; ++i, f++) {
  1140. #ifdef DEBUG_ENC_CLN
  1141. printf(" i=%d\n", i);
  1142. printf(" agg=%d\n", agg);
  1143. #endif
  1144. opj_t1_enc_clnpass_step_macro(
  1145. mqc, curctx, a, c, ct,
  1146. f,
  1147. datap,
  1148. bpno,
  1149. one,
  1150. nmsedec,
  1151. agg,
  1152. runlen,
  1153. t1->h - k,
  1154. cblksty);
  1155. datap += t1->h - k;
  1156. }
  1157. }
  1158. UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
  1159. }
  1160. #define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \
  1161. { \
  1162. OPJ_INT32 one, half, oneplushalf; \
  1163. OPJ_UINT32 runlen; \
  1164. OPJ_UINT32 i, j, k; \
  1165. const OPJ_UINT32 l_w = w; \
  1166. opj_mqc_t* mqc = &(t1->mqc); \
  1167. register OPJ_INT32 *data = t1->data; \
  1168. register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
  1169. DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
  1170. register OPJ_UINT32 v; \
  1171. one = 1 << bpno; \
  1172. half = one >> 1; \
  1173. oneplushalf = one | half; \
  1174. for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
  1175. for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
  1176. opj_flag_t flags = *flagsp; \
  1177. if (flags == 0) { \
  1178. OPJ_UINT32 partial = OPJ_TRUE; \
  1179. opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \
  1180. opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
  1181. if (!v) { \
  1182. continue; \
  1183. } \
  1184. opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \
  1185. opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \
  1186. opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
  1187. runlen = (runlen << 1) | v; \
  1188. switch(runlen) { \
  1189. case 0: \
  1190. opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\
  1191. flags, flagsp, flags_stride, data, \
  1192. l_w, 0, mqc, curctx, \
  1193. v, a, c, ct, oneplushalf, vsc); \
  1194. partial = OPJ_FALSE; \
  1195. /* FALLTHRU */ \
  1196. case 1: \
  1197. opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
  1198. flags, flagsp, flags_stride, data, \
  1199. l_w, 1, mqc, curctx, \
  1200. v, a, c, ct, oneplushalf, OPJ_FALSE); \
  1201. partial = OPJ_FALSE; \
  1202. /* FALLTHRU */ \
  1203. case 2: \
  1204. opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
  1205. flags, flagsp, flags_stride, data, \
  1206. l_w, 2, mqc, curctx, \
  1207. v, a, c, ct, oneplushalf, OPJ_FALSE); \
  1208. partial = OPJ_FALSE; \
  1209. /* FALLTHRU */ \
  1210. case 3: \
  1211. opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
  1212. flags, flagsp, flags_stride, data, \
  1213. l_w, 3, mqc, curctx, \
  1214. v, a, c, ct, oneplushalf, OPJ_FALSE); \
  1215. break; \
  1216. } \
  1217. } else { \
  1218. opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
  1219. flags, flagsp, flags_stride, data, \
  1220. l_w, 0, mqc, curctx, \
  1221. v, a, c, ct, oneplushalf, vsc); \
  1222. opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
  1223. flags, flagsp, flags_stride, data, \
  1224. l_w, 1, mqc, curctx, \
  1225. v, a, c, ct, oneplushalf, OPJ_FALSE); \
  1226. opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
  1227. flags, flagsp, flags_stride, data, \
  1228. l_w, 2, mqc, curctx, \
  1229. v, a, c, ct, oneplushalf, OPJ_FALSE); \
  1230. opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
  1231. flags, flagsp, flags_stride, data, \
  1232. l_w, 3, mqc, curctx, \
  1233. v, a, c, ct, oneplushalf, OPJ_FALSE); \
  1234. } \
  1235. *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
  1236. } \
  1237. } \
  1238. UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
  1239. if( k < h ) { \
  1240. for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \
  1241. for (j = 0; j < h - k; ++j) { \
  1242. opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j, vsc); \
  1243. } \
  1244. *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
  1245. } \
  1246. } \
  1247. }
  1248. static void opj_t1_dec_clnpass_check_segsym(opj_t1_t *t1, OPJ_INT32 cblksty)
  1249. {
  1250. if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
  1251. opj_mqc_t* mqc = &(t1->mqc);
  1252. OPJ_UINT32 v, v2;
  1253. opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
  1254. opj_mqc_decode(v, mqc);
  1255. opj_mqc_decode(v2, mqc);
  1256. v = (v << 1) | v2;
  1257. opj_mqc_decode(v2, mqc);
  1258. v = (v << 1) | v2;
  1259. opj_mqc_decode(v2, mqc);
  1260. v = (v << 1) | v2;
  1261. /*
  1262. if (v!=0xa) {
  1263. opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v);
  1264. }
  1265. */
  1266. }
  1267. }
  1268. static void opj_t1_dec_clnpass_64x64_novsc(
  1269. opj_t1_t *t1,
  1270. OPJ_INT32 bpno)
  1271. {
  1272. opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
  1273. }
  1274. static void opj_t1_dec_clnpass_64x64_vsc(
  1275. opj_t1_t *t1,
  1276. OPJ_INT32 bpno)
  1277. {
  1278. opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
  1279. }
  1280. static void opj_t1_dec_clnpass_generic_novsc(
  1281. opj_t1_t *t1,
  1282. OPJ_INT32 bpno)
  1283. {
  1284. opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
  1285. t1->w + 2U);
  1286. }
  1287. static void opj_t1_dec_clnpass_generic_vsc(
  1288. opj_t1_t *t1,
  1289. OPJ_INT32 bpno)
  1290. {
  1291. opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
  1292. t1->w + 2U);
  1293. }
  1294. static void opj_t1_dec_clnpass(
  1295. opj_t1_t *t1,
  1296. OPJ_INT32 bpno,
  1297. OPJ_INT32 cblksty)
  1298. {
  1299. if (t1->w == 64 && t1->h == 64) {
  1300. if (cblksty & J2K_CCP_CBLKSTY_VSC) {
  1301. opj_t1_dec_clnpass_64x64_vsc(t1, bpno);
  1302. } else {
  1303. opj_t1_dec_clnpass_64x64_novsc(t1, bpno);
  1304. }
  1305. } else {
  1306. if (cblksty & J2K_CCP_CBLKSTY_VSC) {
  1307. opj_t1_dec_clnpass_generic_vsc(t1, bpno);
  1308. } else {
  1309. opj_t1_dec_clnpass_generic_novsc(t1, bpno);
  1310. }
  1311. }
  1312. opj_t1_dec_clnpass_check_segsym(t1, cblksty);
  1313. }
  1314. static OPJ_FLOAT64 opj_t1_getwmsedec(
  1315. OPJ_INT32 nmsedec,
  1316. OPJ_UINT32 compno,
  1317. OPJ_UINT32 level,
  1318. OPJ_UINT32 orient,
  1319. OPJ_INT32 bpno,
  1320. OPJ_UINT32 qmfbid,
  1321. OPJ_FLOAT64 stepsize,
  1322. OPJ_UINT32 numcomps,
  1323. const OPJ_FLOAT64 * mct_norms,
  1324. OPJ_UINT32 mct_numcomps)
  1325. {
  1326. OPJ_FLOAT64 w1 = 1, w2, wmsedec;
  1327. OPJ_ARG_NOT_USED(numcomps);
  1328. if (mct_norms && (compno < mct_numcomps)) {
  1329. w1 = mct_norms[compno];
  1330. }
  1331. if (qmfbid == 1) {
  1332. w2 = opj_dwt_getnorm(level, orient);
  1333. } else { /* if (qmfbid == 0) */
  1334. const OPJ_INT32 log2_gain = (orient == 0) ? 0 :
  1335. (orient == 3) ? 2 : 1;
  1336. w2 = opj_dwt_getnorm_real(level, orient);
  1337. /* Not sure this is right. But preserves past behaviour */
  1338. stepsize /= (1 << log2_gain);
  1339. }
  1340. wmsedec = w1 * w2 * stepsize * (1 << bpno);
  1341. wmsedec *= wmsedec * nmsedec / 8192.0;
  1342. return wmsedec;
  1343. }
  1344. static OPJ_BOOL opj_t1_allocate_buffers(
  1345. opj_t1_t *t1,
  1346. OPJ_UINT32 w,
  1347. OPJ_UINT32 h)
  1348. {
  1349. OPJ_UINT32 flagssize;
  1350. OPJ_UINT32 flags_stride;
  1351. /* No risk of overflow. Prior checks ensure those assert are met */
  1352. /* They are per the specification */
  1353. assert(w <= 1024);
  1354. assert(h <= 1024);
  1355. assert(w * h <= 4096);
  1356. /* encoder uses tile buffer, so no need to allocate */
  1357. {
  1358. OPJ_UINT32 datasize = w * h;
  1359. if (datasize > t1->datasize) {
  1360. opj_aligned_free(t1->data);
  1361. t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
  1362. if (!t1->data) {
  1363. /* FIXME event manager error callback */
  1364. return OPJ_FALSE;
  1365. }
  1366. t1->datasize = datasize;
  1367. }
  1368. /* memset first arg is declared to never be null by gcc */
  1369. if (t1->data != NULL) {
  1370. memset(t1->data, 0, datasize * sizeof(OPJ_INT32));
  1371. }
  1372. }
  1373. flags_stride = w + 2U; /* can't be 0U */
  1374. flagssize = (h + 3U) / 4U + 2U;
  1375. flagssize *= flags_stride;
  1376. {
  1377. opj_flag_t* p;
  1378. OPJ_UINT32 x;
  1379. OPJ_UINT32 flags_height = (h + 3U) / 4U;
  1380. if (flagssize > t1->flagssize) {
  1381. opj_aligned_free(t1->flags);
  1382. t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(
  1383. opj_flag_t));
  1384. if (!t1->flags) {
  1385. /* FIXME event manager error callback */
  1386. return OPJ_FALSE;
  1387. }
  1388. }
  1389. t1->flagssize = flagssize;
  1390. memset(t1->flags, 0, flagssize * sizeof(opj_flag_t));
  1391. p = &t1->flags[0];
  1392. for (x = 0; x < flags_stride; ++x) {
  1393. /* magic value to hopefully stop any passes being interested in this entry */
  1394. *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
  1395. }
  1396. p = &t1->flags[((flags_height + 1) * flags_stride)];
  1397. for (x = 0; x < flags_stride; ++x) {
  1398. /* magic value to hopefully stop any passes being interested in this entry */
  1399. *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
  1400. }
  1401. if (h % 4) {
  1402. OPJ_UINT32 v = 0;
  1403. p = &t1->flags[((flags_height) * flags_stride)];
  1404. if (h % 4 == 1) {
  1405. v |= T1_PI_1 | T1_PI_2 | T1_PI_3;
  1406. } else if (h % 4 == 2) {
  1407. v |= T1_PI_2 | T1_PI_3;
  1408. } else if (h % 4 == 3) {
  1409. v |= T1_PI_3;
  1410. }
  1411. for (x = 0; x < flags_stride; ++x) {
  1412. *p++ = v;
  1413. }
  1414. }
  1415. }
  1416. t1->w = w;
  1417. t1->h = h;
  1418. return OPJ_TRUE;
  1419. }
  1420. /* ----------------------------------------------------------------------- */
  1421. /* ----------------------------------------------------------------------- */
  1422. /**
  1423. * Creates a new Tier 1 handle
  1424. * and initializes the look-up tables of the Tier-1 coder/decoder
  1425. * @return a new T1 handle if successful, returns NULL otherwise
  1426. */
  1427. opj_t1_t* opj_t1_create(OPJ_BOOL isEncoder)
  1428. {
  1429. opj_t1_t *l_t1 = 00;
  1430. l_t1 = (opj_t1_t*) opj_calloc(1, sizeof(opj_t1_t));
  1431. if (!l_t1) {
  1432. return 00;
  1433. }
  1434. l_t1->encoder = isEncoder;
  1435. return l_t1;
  1436. }
  1437. /**
  1438. * Destroys a previously created T1 handle
  1439. *
  1440. * @param p_t1 Tier 1 handle to destroy
  1441. */
  1442. void opj_t1_destroy(opj_t1_t *p_t1)
  1443. {
  1444. if (! p_t1) {
  1445. return;
  1446. }
  1447. if (p_t1->data) {
  1448. opj_aligned_free(p_t1->data);
  1449. p_t1->data = 00;
  1450. }
  1451. if (p_t1->flags) {
  1452. opj_aligned_free(p_t1->flags);
  1453. p_t1->flags = 00;
  1454. }
  1455. opj_free(p_t1->cblkdatabuffer);
  1456. opj_free(p_t1);
  1457. }
  1458. typedef struct {
  1459. OPJ_BOOL whole_tile_decoding;
  1460. OPJ_UINT32 resno;
  1461. opj_tcd_cblk_dec_t* cblk;
  1462. opj_tcd_band_t* band;
  1463. opj_tcd_tilecomp_t* tilec;
  1464. opj_tccp_t* tccp;
  1465. OPJ_BOOL mustuse_cblkdatabuffer;
  1466. volatile OPJ_BOOL* pret;
  1467. opj_event_mgr_t *p_manager;
  1468. opj_mutex_t* p_manager_mutex;
  1469. OPJ_BOOL check_pterm;
  1470. } opj_t1_cblk_decode_processing_job_t;
  1471. static void opj_t1_destroy_wrapper(void* t1)
  1472. {
  1473. opj_t1_destroy((opj_t1_t*) t1);
  1474. }
  1475. static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
  1476. {
  1477. opj_tcd_cblk_dec_t* cblk;
  1478. opj_tcd_band_t* band;
  1479. opj_tcd_tilecomp_t* tilec;
  1480. opj_tccp_t* tccp;
  1481. OPJ_INT32* OPJ_RESTRICT datap;
  1482. OPJ_UINT32 cblk_w, cblk_h;
  1483. OPJ_INT32 x, y;
  1484. OPJ_UINT32 i, j;
  1485. opj_t1_cblk_decode_processing_job_t* job;
  1486. opj_t1_t* t1;
  1487. OPJ_UINT32 resno;
  1488. OPJ_UINT32 tile_w;
  1489. job = (opj_t1_cblk_decode_processing_job_t*) user_data;
  1490. cblk = job->cblk;
  1491. if (!job->whole_tile_decoding) {
  1492. cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
  1493. cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
  1494. cblk->decoded_data = (OPJ_INT32*)opj_aligned_malloc(sizeof(OPJ_INT32) *
  1495. cblk_w * cblk_h);
  1496. if (cblk->decoded_data == NULL) {
  1497. if (job->p_manager_mutex) {
  1498. opj_mutex_lock(job->p_manager_mutex);
  1499. }
  1500. opj_event_msg(job->p_manager, EVT_ERROR,
  1501. "Cannot allocate cblk->decoded_data\n");
  1502. if (job->p_manager_mutex) {
  1503. opj_mutex_unlock(job->p_manager_mutex);
  1504. }
  1505. *(job->pret) = OPJ_FALSE;
  1506. opj_free(job);
  1507. return;
  1508. }
  1509. /* Zero-init required */
  1510. memset(cblk->decoded_data, 0, sizeof(OPJ_INT32) * cblk_w * cblk_h);
  1511. } else if (cblk->decoded_data) {
  1512. /* Not sure if that code path can happen, but better be */
  1513. /* safe than sorry */
  1514. opj_aligned_free(cblk->decoded_data);
  1515. cblk->decoded_data = NULL;
  1516. }
  1517. resno = job->resno;
  1518. band = job->band;
  1519. tilec = job->tilec;
  1520. tccp = job->tccp;
  1521. tile_w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1
  1522. -
  1523. tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
  1524. if (!*(job->pret)) {
  1525. opj_free(job);
  1526. return;
  1527. }
  1528. t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
  1529. if (t1 == NULL) {
  1530. t1 = opj_t1_create(OPJ_FALSE);
  1531. if (t1 == NULL) {
  1532. opj_event_msg(job->p_manager, EVT_ERROR,
  1533. "Cannot allocate Tier 1 handle\n");
  1534. *(job->pret) = OPJ_FALSE;
  1535. opj_free(job);
  1536. return;
  1537. }
  1538. if (!opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper)) {
  1539. opj_event_msg(job->p_manager, EVT_ERROR,
  1540. "Unable to set t1 handle as TLS\n");
  1541. opj_t1_destroy(t1);
  1542. *(job->pret) = OPJ_FALSE;
  1543. opj_free(job);
  1544. return;
  1545. }
  1546. }
  1547. t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
  1548. if ((tccp->cblksty & J2K_CCP_CBLKSTY_HT) != 0) {
  1549. if (OPJ_FALSE == opj_t1_ht_decode_cblk(
  1550. t1,
  1551. cblk,
  1552. band->bandno,
  1553. (OPJ_UINT32)tccp->roishift,
  1554. tccp->cblksty,
  1555. job->p_manager,
  1556. job->p_manager_mutex,
  1557. job->check_pterm)) {
  1558. *(job->pret) = OPJ_FALSE;
  1559. opj_free(job);
  1560. return;
  1561. }
  1562. } else {
  1563. if (OPJ_FALSE == opj_t1_decode_cblk(
  1564. t1,
  1565. cblk,
  1566. band->bandno,
  1567. (OPJ_UINT32)tccp->roishift,
  1568. tccp->cblksty,
  1569. job->p_manager,
  1570. job->p_manager_mutex,
  1571. job->check_pterm)) {
  1572. *(job->pret) = OPJ_FALSE;
  1573. opj_free(job);
  1574. return;
  1575. }
  1576. }
  1577. x = cblk->x0 - band->x0;
  1578. y = cblk->y0 - band->y0;
  1579. if (band->bandno & 1) {
  1580. opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
  1581. x += pres->x1 - pres->x0;
  1582. }
  1583. if (band->bandno & 2) {
  1584. opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
  1585. y += pres->y1 - pres->y0;
  1586. }
  1587. datap = cblk->decoded_data ? cblk->decoded_data : t1->data;
  1588. cblk_w = t1->w;
  1589. cblk_h = t1->h;
  1590. if (tccp->roishift) {
  1591. if (tccp->roishift >= 31) {
  1592. for (j = 0; j < cblk_h; ++j) {
  1593. for (i = 0; i < cblk_w; ++i) {
  1594. datap[(j * cblk_w) + i] = 0;
  1595. }
  1596. }
  1597. } else {
  1598. OPJ_INT32 thresh = 1 << tccp->roishift;
  1599. for (j = 0; j < cblk_h; ++j) {
  1600. for (i = 0; i < cblk_w; ++i) {
  1601. OPJ_INT32 val = datap[(j * cblk_w) + i];
  1602. OPJ_INT32 mag = abs(val);
  1603. if (mag >= thresh) {
  1604. mag >>= tccp->roishift;
  1605. datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
  1606. }
  1607. }
  1608. }
  1609. }
  1610. }
  1611. /* Both can be non NULL if for example decoding a full tile and then */
  1612. /* partially a tile. In which case partial decoding should be the */
  1613. /* priority */
  1614. assert((cblk->decoded_data != NULL) || (tilec->data != NULL));
  1615. if (cblk->decoded_data) {
  1616. OPJ_UINT32 cblk_size = cblk_w * cblk_h;
  1617. if (tccp->qmfbid == 1) {
  1618. for (i = 0; i < cblk_size; ++i) {
  1619. datap[i] /= 2;
  1620. }
  1621. } else { /* if (tccp->qmfbid == 0) */
  1622. const float stepsize = 0.5f * band->stepsize;
  1623. i = 0;
  1624. #ifdef __SSE2__
  1625. {
  1626. const __m128 xmm_stepsize = _mm_set1_ps(stepsize);
  1627. for (; i < (cblk_size & ~15U); i += 16) {
  1628. __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
  1629. datap + 0)));
  1630. __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
  1631. datap + 4)));
  1632. __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
  1633. datap + 8)));
  1634. __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
  1635. datap + 12)));
  1636. _mm_store_ps((float*)(datap + 0), _mm_mul_ps(xmm0_data, xmm_stepsize));
  1637. _mm_store_ps((float*)(datap + 4), _mm_mul_ps(xmm1_data, xmm_stepsize));
  1638. _mm_store_ps((float*)(datap + 8), _mm_mul_ps(xmm2_data, xmm_stepsize));
  1639. _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize));
  1640. datap += 16;
  1641. }
  1642. }
  1643. #endif
  1644. for (; i < cblk_size; ++i) {
  1645. OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * stepsize;
  1646. memcpy(datap, &tmp, sizeof(tmp));
  1647. datap++;
  1648. }
  1649. }
  1650. } else if (tccp->qmfbid == 1) {
  1651. OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
  1652. (OPJ_SIZE_T)x];
  1653. for (j = 0; j < cblk_h; ++j) {
  1654. //positive -> round down aka. (83)/2 = 41.5 -> 41
  1655. //negative -> round up aka. (-83)/2 = -41.5 -> -41
  1656. #if defined(__AVX512F__)
  1657. OPJ_INT32* ptr_in = datap + (j * cblk_w);
  1658. OPJ_INT32* ptr_out = tiledp + (j * (OPJ_SIZE_T)tile_w);
  1659. for (i = 0; i < cblk_w / 16; ++i) {
  1660. __m512i in_avx = _mm512_loadu_si512((__m512i*)(ptr_in));
  1661. const __m512i add_avx = _mm512_srli_epi32(in_avx, 31);
  1662. in_avx = _mm512_add_epi32(in_avx, add_avx);
  1663. _mm512_storeu_si512((__m512i*)(ptr_out), _mm512_srai_epi32(in_avx, 1));
  1664. ptr_in += 16;
  1665. ptr_out += 16;
  1666. }
  1667. for (i = 0; i < cblk_w % 16; ++i) {
  1668. ptr_out[i] = ptr_in[i] / 2;
  1669. }
  1670. #elif defined(__AVX2__)
  1671. OPJ_INT32* ptr_in = datap + (j * cblk_w);
  1672. OPJ_INT32* ptr_out = tiledp + (j * (OPJ_SIZE_T)tile_w);
  1673. for (i = 0; i < cblk_w / 8; ++i) {
  1674. __m256i in_avx = _mm256_loadu_si256((__m256i*)(ptr_in));
  1675. const __m256i add_avx = _mm256_srli_epi32(in_avx, 31);
  1676. in_avx = _mm256_add_epi32(in_avx, add_avx);
  1677. _mm256_storeu_si256((__m256i*)(ptr_out), _mm256_srai_epi32(in_avx, 1));
  1678. ptr_in += 8;
  1679. ptr_out += 8;
  1680. }
  1681. for (i = 0; i < cblk_w % 8; ++i) {
  1682. ptr_out[i] = ptr_in[i] / 2;
  1683. }
  1684. #else
  1685. i = 0;
  1686. for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
  1687. OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U];
  1688. OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
  1689. OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
  1690. OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
  1691. ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2;
  1692. ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2;
  1693. ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2;
  1694. ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2;
  1695. }
  1696. for (; i < cblk_w; ++i) {
  1697. OPJ_INT32 tmp = datap[(j * cblk_w) + i];
  1698. ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
  1699. }
  1700. #endif
  1701. }
  1702. } else { /* if (tccp->qmfbid == 0) */
  1703. const float stepsize = 0.5f * band->stepsize;
  1704. OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
  1705. tile_w + (OPJ_SIZE_T)x];
  1706. for (j = 0; j < cblk_h; ++j) {
  1707. OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
  1708. for (i = 0; i < cblk_w; ++i) {
  1709. OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * stepsize;
  1710. *tiledp2 = tmp;
  1711. datap++;
  1712. tiledp2++;
  1713. }
  1714. tiledp += tile_w;
  1715. }
  1716. }
  1717. opj_free(job);
  1718. }
  1719. void opj_t1_decode_cblks(opj_tcd_t* tcd,
  1720. volatile OPJ_BOOL* pret,
  1721. opj_tcd_tilecomp_t* tilec,
  1722. opj_tccp_t* tccp,
  1723. opj_event_mgr_t *p_manager,
  1724. opj_mutex_t* p_manager_mutex,
  1725. OPJ_BOOL check_pterm
  1726. )
  1727. {
  1728. opj_thread_pool_t* tp = tcd->thread_pool;
  1729. OPJ_UINT32 resno, bandno, precno, cblkno;
  1730. #ifdef DEBUG_VERBOSE
  1731. OPJ_UINT32 codeblocks_decoded = 0;
  1732. printf("Enter opj_t1_decode_cblks()\n");
  1733. #endif
  1734. for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
  1735. opj_tcd_resolution_t* res = &tilec->resolutions[resno];
  1736. for (bandno = 0; bandno < res->numbands; ++bandno) {
  1737. opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
  1738. for (precno = 0; precno < res->pw * res->ph; ++precno) {
  1739. opj_tcd_precinct_t* precinct = &band->precincts[precno];
  1740. if (!opj_tcd_is_subband_area_of_interest(tcd,
  1741. tilec->compno,
  1742. resno,
  1743. band->bandno,
  1744. (OPJ_UINT32)precinct->x0,
  1745. (OPJ_UINT32)precinct->y0,
  1746. (OPJ_UINT32)precinct->x1,
  1747. (OPJ_UINT32)precinct->y1)) {
  1748. for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
  1749. opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
  1750. if (cblk->decoded_data) {
  1751. #ifdef DEBUG_VERBOSE
  1752. printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
  1753. cblk->x0, cblk->y0, resno, bandno);
  1754. #endif
  1755. opj_aligned_free(cblk->decoded_data);
  1756. cblk->decoded_data = NULL;
  1757. }
  1758. }
  1759. continue;
  1760. }
  1761. for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
  1762. opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
  1763. opj_t1_cblk_decode_processing_job_t* job;
  1764. if (!opj_tcd_is_subband_area_of_interest(tcd,
  1765. tilec->compno,
  1766. resno,
  1767. band->bandno,
  1768. (OPJ_UINT32)cblk->x0,
  1769. (OPJ_UINT32)cblk->y0,
  1770. (OPJ_UINT32)cblk->x1,
  1771. (OPJ_UINT32)cblk->y1)) {
  1772. if (cblk->decoded_data) {
  1773. #ifdef DEBUG_VERBOSE
  1774. printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
  1775. cblk->x0, cblk->y0, resno, bandno);
  1776. #endif
  1777. opj_aligned_free(cblk->decoded_data);
  1778. cblk->decoded_data = NULL;
  1779. }
  1780. continue;
  1781. }
  1782. if (!tcd->whole_tile_decoding) {
  1783. OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
  1784. OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
  1785. if (cblk->decoded_data != NULL) {
  1786. #ifdef DEBUG_VERBOSE
  1787. printf("Reusing codeblock %d,%d at resno=%d, bandno=%d\n",
  1788. cblk->x0, cblk->y0, resno, bandno);
  1789. #endif
  1790. continue;
  1791. }
  1792. if (cblk_w == 0 || cblk_h == 0) {
  1793. continue;
  1794. }
  1795. #ifdef DEBUG_VERBOSE
  1796. printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n",
  1797. cblk->x0, cblk->y0, resno, bandno);
  1798. #endif
  1799. }
  1800. job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1,
  1801. sizeof(opj_t1_cblk_decode_processing_job_t));
  1802. if (!job) {
  1803. *pret = OPJ_FALSE;
  1804. return;
  1805. }
  1806. job->whole_tile_decoding = tcd->whole_tile_decoding;
  1807. job->resno = resno;
  1808. job->cblk = cblk;
  1809. job->band = band;
  1810. job->tilec = tilec;
  1811. job->tccp = tccp;
  1812. job->pret = pret;
  1813. job->p_manager_mutex = p_manager_mutex;
  1814. job->p_manager = p_manager;
  1815. job->check_pterm = check_pterm;
  1816. job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
  1817. opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job);
  1818. #ifdef DEBUG_VERBOSE
  1819. codeblocks_decoded ++;
  1820. #endif
  1821. if (!(*pret)) {
  1822. return;
  1823. }
  1824. } /* cblkno */
  1825. } /* precno */
  1826. } /* bandno */
  1827. } /* resno */
  1828. #ifdef DEBUG_VERBOSE
  1829. printf("Leave opj_t1_decode_cblks(). Number decoded: %d\n", codeblocks_decoded);
  1830. #endif
  1831. return;
  1832. }
  1833. static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
  1834. opj_tcd_cblk_dec_t* cblk,
  1835. OPJ_UINT32 orient,
  1836. OPJ_UINT32 roishift,
  1837. OPJ_UINT32 cblksty,
  1838. opj_event_mgr_t *p_manager,
  1839. opj_mutex_t* p_manager_mutex,
  1840. OPJ_BOOL check_pterm)
  1841. {
  1842. opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
  1843. OPJ_INT32 bpno_plus_one;
  1844. OPJ_UINT32 passtype;
  1845. OPJ_UINT32 segno, passno;
  1846. OPJ_BYTE* cblkdata = NULL;
  1847. OPJ_UINT32 cblkdataindex = 0;
  1848. OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */
  1849. OPJ_INT32* original_t1_data = NULL;
  1850. mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
  1851. if (!opj_t1_allocate_buffers(
  1852. t1,
  1853. (OPJ_UINT32)(cblk->x1 - cblk->x0),
  1854. (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
  1855. return OPJ_FALSE;
  1856. }
  1857. bpno_plus_one = (OPJ_INT32)(roishift + cblk->numbps);
  1858. if (bpno_plus_one >= 31) {
  1859. if (p_manager_mutex) {
  1860. opj_mutex_lock(p_manager_mutex);
  1861. }
  1862. opj_event_msg(p_manager, EVT_WARNING,
  1863. "opj_t1_decode_cblk(): unsupported bpno_plus_one = %d >= 31\n",
  1864. bpno_plus_one);
  1865. if (p_manager_mutex) {
  1866. opj_mutex_unlock(p_manager_mutex);
  1867. }
  1868. return OPJ_FALSE;
  1869. }
  1870. passtype = 2;
  1871. opj_mqc_resetstates(mqc);
  1872. opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
  1873. opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
  1874. opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
  1875. if (cblk->corrupted) {
  1876. assert(cblk->numchunks == 0);
  1877. return OPJ_TRUE;
  1878. }
  1879. /* Even if we have a single chunk, in multi-threaded decoding */
  1880. /* the insertion of our synthetic marker might potentially override */
  1881. /* valid codestream of other codeblocks decoded in parallel. */
  1882. if (cblk->numchunks > 1 || (t1->mustuse_cblkdatabuffer &&
  1883. cblk->numchunks > 0)) {
  1884. OPJ_UINT32 i;
  1885. OPJ_UINT32 cblk_len;
  1886. /* Compute whole codeblock length from chunk lengths */
  1887. cblk_len = 0;
  1888. for (i = 0; i < cblk->numchunks; i++) {
  1889. cblk_len += cblk->chunks[i].len;
  1890. }
  1891. /* Allocate temporary memory if needed */
  1892. if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
  1893. cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer,
  1894. cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
  1895. if (cblkdata == NULL) {
  1896. return OPJ_FALSE;
  1897. }
  1898. t1->cblkdatabuffer = cblkdata;
  1899. memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
  1900. t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
  1901. }
  1902. /* Concatenate all chunks */
  1903. cblkdata = t1->cblkdatabuffer;
  1904. cblk_len = 0;
  1905. for (i = 0; i < cblk->numchunks; i++) {
  1906. memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
  1907. cblk_len += cblk->chunks[i].len;
  1908. }
  1909. } else if (cblk->numchunks == 1) {
  1910. cblkdata = cblk->chunks[0].data;
  1911. } else {
  1912. /* Not sure if that can happen in practice, but avoid Coverity to */
  1913. /* think we will dereference a null cblkdta pointer */
  1914. return OPJ_TRUE;
  1915. }
  1916. /* For subtile decoding, directly decode in the decoded_data buffer of */
  1917. /* the code-block. Hack t1->data to point to it, and restore it later */
  1918. if (cblk->decoded_data) {
  1919. original_t1_data = t1->data;
  1920. t1->data = cblk->decoded_data;
  1921. }
  1922. for (segno = 0; segno < cblk->real_num_segs; ++segno) {
  1923. opj_tcd_seg_t *seg = &cblk->segs[segno];
  1924. /* BYPASS mode */
  1925. type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) &&
  1926. (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
  1927. if (type == T1_TYPE_RAW) {
  1928. opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
  1929. OPJ_COMMON_CBLK_DATA_EXTRA);
  1930. } else {
  1931. opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
  1932. OPJ_COMMON_CBLK_DATA_EXTRA);
  1933. }
  1934. cblkdataindex += seg->len;
  1935. for (passno = 0; (passno < seg->real_num_passes) &&
  1936. (bpno_plus_one >= 1); ++passno) {
  1937. switch (passtype) {
  1938. case 0:
  1939. if (type == T1_TYPE_RAW) {
  1940. opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty);
  1941. } else {
  1942. opj_t1_dec_sigpass_mqc(t1, bpno_plus_one, (OPJ_INT32)cblksty);
  1943. }
  1944. break;
  1945. case 1:
  1946. if (type == T1_TYPE_RAW) {
  1947. opj_t1_dec_refpass_raw(t1, bpno_plus_one);
  1948. } else {
  1949. opj_t1_dec_refpass_mqc(t1, bpno_plus_one);
  1950. }
  1951. break;
  1952. case 2:
  1953. opj_t1_dec_clnpass(t1, bpno_plus_one, (OPJ_INT32)cblksty);
  1954. break;
  1955. }
  1956. if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) {
  1957. opj_mqc_resetstates(mqc);
  1958. opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
  1959. opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
  1960. opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
  1961. }
  1962. if (++passtype == 3) {
  1963. passtype = 0;
  1964. bpno_plus_one--;
  1965. }
  1966. }
  1967. opq_mqc_finish_dec(mqc);
  1968. }
  1969. if (check_pterm) {
  1970. if (mqc->bp + 2 < mqc->end) {
  1971. if (p_manager_mutex) {
  1972. opj_mutex_lock(p_manager_mutex);
  1973. }
  1974. opj_event_msg(p_manager, EVT_WARNING,
  1975. "PTERM check failure: %d remaining bytes in code block (%d used / %d)\n",
  1976. (int)(mqc->end - mqc->bp) - 2,
  1977. (int)(mqc->bp - mqc->start),
  1978. (int)(mqc->end - mqc->start));
  1979. if (p_manager_mutex) {
  1980. opj_mutex_unlock(p_manager_mutex);
  1981. }
  1982. } else if (mqc->end_of_byte_stream_counter > 2) {
  1983. if (p_manager_mutex) {
  1984. opj_mutex_lock(p_manager_mutex);
  1985. }
  1986. opj_event_msg(p_manager, EVT_WARNING,
  1987. "PTERM check failure: %d synthesized 0xFF markers read\n",
  1988. mqc->end_of_byte_stream_counter);
  1989. if (p_manager_mutex) {
  1990. opj_mutex_unlock(p_manager_mutex);
  1991. }
  1992. }
  1993. }
  1994. /* Restore original t1->data is needed */
  1995. if (cblk->decoded_data) {
  1996. t1->data = original_t1_data;
  1997. }
  1998. return OPJ_TRUE;
  1999. }
  2000. typedef struct {
  2001. OPJ_UINT32 compno;
  2002. OPJ_UINT32 resno;
  2003. opj_tcd_cblk_enc_t* cblk;
  2004. opj_tcd_tile_t *tile;
  2005. opj_tcd_band_t* band;
  2006. opj_tcd_tilecomp_t* tilec;
  2007. opj_tccp_t* tccp;
  2008. const OPJ_FLOAT64 * mct_norms;
  2009. OPJ_UINT32 mct_numcomps;
  2010. volatile OPJ_BOOL* pret;
  2011. opj_mutex_t* mutex;
  2012. } opj_t1_cblk_encode_processing_job_t;
  2013. /** Procedure to deal with a asynchronous code-block encoding job.
  2014. *
  2015. * @param user_data Pointer to a opj_t1_cblk_encode_processing_job_t* structure
  2016. * @param tls TLS handle.
  2017. */
  2018. static void opj_t1_cblk_encode_processor(void* user_data, opj_tls_t* tls)
  2019. {
  2020. opj_t1_cblk_encode_processing_job_t* job =
  2021. (opj_t1_cblk_encode_processing_job_t*)user_data;
  2022. opj_tcd_cblk_enc_t* cblk = job->cblk;
  2023. const opj_tcd_band_t* band = job->band;
  2024. const opj_tcd_tilecomp_t* tilec = job->tilec;
  2025. const opj_tccp_t* tccp = job->tccp;
  2026. const OPJ_UINT32 resno = job->resno;
  2027. opj_t1_t* t1;
  2028. const OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
  2029. OPJ_INT32* OPJ_RESTRICT tiledp;
  2030. OPJ_UINT32 cblk_w;
  2031. OPJ_UINT32 cblk_h;
  2032. OPJ_UINT32 i, j;
  2033. OPJ_INT32 x = cblk->x0 - band->x0;
  2034. OPJ_INT32 y = cblk->y0 - band->y0;
  2035. if (!*(job->pret)) {
  2036. opj_free(job);
  2037. return;
  2038. }
  2039. t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
  2040. if (t1 == NULL) {
  2041. t1 = opj_t1_create(OPJ_TRUE); /* OPJ_TRUE == T1 for encoding */
  2042. opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
  2043. }
  2044. if (band->bandno & 1) {
  2045. opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
  2046. x += pres->x1 - pres->x0;
  2047. }
  2048. if (band->bandno & 2) {
  2049. opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
  2050. y += pres->y1 - pres->y0;
  2051. }
  2052. if (!opj_t1_allocate_buffers(
  2053. t1,
  2054. (OPJ_UINT32)(cblk->x1 - cblk->x0),
  2055. (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
  2056. *(job->pret) = OPJ_FALSE;
  2057. opj_free(job);
  2058. return;
  2059. }
  2060. cblk_w = t1->w;
  2061. cblk_h = t1->h;
  2062. tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
  2063. if (tccp->qmfbid == 1) {
  2064. /* Do multiplication on unsigned type, even if the
  2065. * underlying type is signed, to avoid potential
  2066. * int overflow on large value (the output will be
  2067. * incorrect in such situation, but whatever...)
  2068. * This assumes complement-to-2 signed integer
  2069. * representation
  2070. * Fixes https://github.com/uclouvain/openjpeg/issues/1053
  2071. */
  2072. OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
  2073. OPJ_UINT32* OPJ_RESTRICT t1data = (OPJ_UINT32*) t1->data;
  2074. /* Change from "natural" order to "zigzag" order of T1 passes */
  2075. for (j = 0; j < (cblk_h & ~3U); j += 4) {
  2076. #if defined(__AVX512F__)
  2077. const __m512i perm1 = _mm512_setr_epi64(2, 3, 10, 11, 4, 5, 12, 13);
  2078. const __m512i perm2 = _mm512_setr_epi64(6, 7, 14, 15, 0, 0, 0, 0);
  2079. OPJ_UINT32* ptr = tiledp_u;
  2080. for (i = 0; i < cblk_w / 16; ++i) {
  2081. // INPUT OUTPUT
  2082. // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
  2083. // 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37
  2084. // 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F 08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B
  2085. // 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F 0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F
  2086. __m512i in1 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
  2087. (j + 0) * tile_w)), T1_NMSEDEC_FRACBITS);
  2088. __m512i in2 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
  2089. (j + 1) * tile_w)), T1_NMSEDEC_FRACBITS);
  2090. __m512i in3 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
  2091. (j + 2) * tile_w)), T1_NMSEDEC_FRACBITS);
  2092. __m512i in4 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
  2093. (j + 3) * tile_w)), T1_NMSEDEC_FRACBITS);
  2094. __m512i tmp1 = _mm512_unpacklo_epi32(in1, in2);
  2095. __m512i tmp2 = _mm512_unpacklo_epi32(in3, in4);
  2096. __m512i tmp3 = _mm512_unpackhi_epi32(in1, in2);
  2097. __m512i tmp4 = _mm512_unpackhi_epi32(in3, in4);
  2098. in1 = _mm512_unpacklo_epi64(tmp1, tmp2);
  2099. in2 = _mm512_unpacklo_epi64(tmp3, tmp4);
  2100. in3 = _mm512_unpackhi_epi64(tmp1, tmp2);
  2101. in4 = _mm512_unpackhi_epi64(tmp3, tmp4);
  2102. _mm_storeu_si128((__m128i*)(t1data + 0), _mm512_castsi512_si128(in1));
  2103. _mm_storeu_si128((__m128i*)(t1data + 4), _mm512_castsi512_si128(in3));
  2104. _mm_storeu_si128((__m128i*)(t1data + 8), _mm512_castsi512_si128(in2));
  2105. _mm_storeu_si128((__m128i*)(t1data + 12), _mm512_castsi512_si128(in4));
  2106. tmp1 = _mm512_permutex2var_epi64(in1, perm1, in3);
  2107. tmp2 = _mm512_permutex2var_epi64(in2, perm1, in4);
  2108. _mm256_storeu_si256((__m256i*)(t1data + 16), _mm512_castsi512_si256(tmp1));
  2109. _mm256_storeu_si256((__m256i*)(t1data + 24), _mm512_castsi512_si256(tmp2));
  2110. _mm256_storeu_si256((__m256i*)(t1data + 32), _mm512_extracti64x4_epi64(tmp1,
  2111. 0x1));
  2112. _mm256_storeu_si256((__m256i*)(t1data + 40), _mm512_extracti64x4_epi64(tmp2,
  2113. 0x1));
  2114. _mm256_storeu_si256((__m256i*)(t1data + 48),
  2115. _mm512_castsi512_si256(_mm512_permutex2var_epi64(in1, perm2, in3)));
  2116. _mm256_storeu_si256((__m256i*)(t1data + 56),
  2117. _mm512_castsi512_si256(_mm512_permutex2var_epi64(in2, perm2, in4)));
  2118. t1data += 64;
  2119. ptr += 16;
  2120. }
  2121. for (i = 0; i < cblk_w % 16; ++i) {
  2122. t1data[0] = ptr[(j + 0) * tile_w] << T1_NMSEDEC_FRACBITS;
  2123. t1data[1] = ptr[(j + 1) * tile_w] << T1_NMSEDEC_FRACBITS;
  2124. t1data[2] = ptr[(j + 2) * tile_w] << T1_NMSEDEC_FRACBITS;
  2125. t1data[3] = ptr[(j + 3) * tile_w] << T1_NMSEDEC_FRACBITS;
  2126. t1data += 4;
  2127. ptr += 1;
  2128. }
  2129. #elif defined(__AVX2__)
  2130. OPJ_UINT32* ptr = tiledp_u;
  2131. for (i = 0; i < cblk_w / 8; ++i) {
  2132. // INPUT OUTPUT
  2133. // 00 01 02 03 04 05 06 07 00 10 20 30 01 11 21 31
  2134. // 10 11 12 13 14 15 16 17 02 12 22 32 03 13 23 33
  2135. // 20 21 22 23 24 25 26 27 04 14 24 34 05 15 25 35
  2136. // 30 31 32 33 34 35 36 37 06 16 26 36 07 17 27 37
  2137. __m256i in1 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
  2138. (j + 0) * tile_w)), T1_NMSEDEC_FRACBITS);
  2139. __m256i in2 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
  2140. (j + 1) * tile_w)), T1_NMSEDEC_FRACBITS);
  2141. __m256i in3 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
  2142. (j + 2) * tile_w)), T1_NMSEDEC_FRACBITS);
  2143. __m256i in4 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
  2144. (j + 3) * tile_w)), T1_NMSEDEC_FRACBITS);
  2145. __m256i tmp1 = _mm256_unpacklo_epi32(in1, in2);
  2146. __m256i tmp2 = _mm256_unpacklo_epi32(in3, in4);
  2147. __m256i tmp3 = _mm256_unpackhi_epi32(in1, in2);
  2148. __m256i tmp4 = _mm256_unpackhi_epi32(in3, in4);
  2149. in1 = _mm256_unpacklo_epi64(tmp1, tmp2);
  2150. in2 = _mm256_unpacklo_epi64(tmp3, tmp4);
  2151. in3 = _mm256_unpackhi_epi64(tmp1, tmp2);
  2152. in4 = _mm256_unpackhi_epi64(tmp3, tmp4);
  2153. _mm_storeu_si128((__m128i*)(t1data + 0), _mm256_castsi256_si128(in1));
  2154. _mm_storeu_si128((__m128i*)(t1data + 4), _mm256_castsi256_si128(in3));
  2155. _mm_storeu_si128((__m128i*)(t1data + 8), _mm256_castsi256_si128(in2));
  2156. _mm_storeu_si128((__m128i*)(t1data + 12), _mm256_castsi256_si128(in4));
  2157. _mm256_storeu_si256((__m256i*)(t1data + 16), _mm256_permute2x128_si256(in1, in3,
  2158. 0x31));
  2159. _mm256_storeu_si256((__m256i*)(t1data + 24), _mm256_permute2x128_si256(in2, in4,
  2160. 0x31));
  2161. t1data += 32;
  2162. ptr += 8;
  2163. }
  2164. for (i = 0; i < cblk_w % 8; ++i) {
  2165. t1data[0] = ptr[(j + 0) * tile_w] << T1_NMSEDEC_FRACBITS;
  2166. t1data[1] = ptr[(j + 1) * tile_w] << T1_NMSEDEC_FRACBITS;
  2167. t1data[2] = ptr[(j + 2) * tile_w] << T1_NMSEDEC_FRACBITS;
  2168. t1data[3] = ptr[(j + 3) * tile_w] << T1_NMSEDEC_FRACBITS;
  2169. t1data += 4;
  2170. ptr += 1;
  2171. }
  2172. #else
  2173. for (i = 0; i < cblk_w; ++i) {
  2174. t1data[0] = tiledp_u[(j + 0) * tile_w + i] << T1_NMSEDEC_FRACBITS;
  2175. t1data[1] = tiledp_u[(j + 1) * tile_w + i] << T1_NMSEDEC_FRACBITS;
  2176. t1data[2] = tiledp_u[(j + 2) * tile_w + i] << T1_NMSEDEC_FRACBITS;
  2177. t1data[3] = tiledp_u[(j + 3) * tile_w + i] << T1_NMSEDEC_FRACBITS;
  2178. t1data += 4;
  2179. }
  2180. #endif
  2181. }
  2182. if (j < cblk_h) {
  2183. for (i = 0; i < cblk_w; ++i) {
  2184. OPJ_UINT32 k;
  2185. for (k = j; k < cblk_h; k++) {
  2186. t1data[0] = tiledp_u[k * tile_w + i] << T1_NMSEDEC_FRACBITS;
  2187. t1data ++;
  2188. }
  2189. }
  2190. }
  2191. } else { /* if (tccp->qmfbid == 0) */
  2192. OPJ_FLOAT32* OPJ_RESTRICT tiledp_f = (OPJ_FLOAT32*) tiledp;
  2193. OPJ_INT32* OPJ_RESTRICT t1data = t1->data;
  2194. /* Change from "natural" order to "zigzag" order of T1 passes */
  2195. for (j = 0; j < (cblk_h & ~3U); j += 4) {
  2196. for (i = 0; i < cblk_w; ++i) {
  2197. t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 0) * tile_w + i] /
  2198. band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
  2199. t1data[1] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 1) * tile_w + i] /
  2200. band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
  2201. t1data[2] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 2) * tile_w + i] /
  2202. band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
  2203. t1data[3] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 3) * tile_w + i] /
  2204. band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
  2205. t1data += 4;
  2206. }
  2207. }
  2208. if (j < cblk_h) {
  2209. for (i = 0; i < cblk_w; ++i) {
  2210. OPJ_UINT32 k;
  2211. for (k = j; k < cblk_h; k++) {
  2212. t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[k * tile_w + i] / band->stepsize)
  2213. * (1 << T1_NMSEDEC_FRACBITS));
  2214. t1data ++;
  2215. }
  2216. }
  2217. }
  2218. }
  2219. {
  2220. OPJ_FLOAT64 cumwmsedec =
  2221. opj_t1_encode_cblk(
  2222. t1,
  2223. cblk,
  2224. band->bandno,
  2225. job->compno,
  2226. tilec->numresolutions - 1 - resno,
  2227. tccp->qmfbid,
  2228. band->stepsize,
  2229. tccp->cblksty,
  2230. job->tile->numcomps,
  2231. job->mct_norms,
  2232. job->mct_numcomps);
  2233. if (job->mutex) {
  2234. opj_mutex_lock(job->mutex);
  2235. }
  2236. job->tile->distotile += cumwmsedec;
  2237. if (job->mutex) {
  2238. opj_mutex_unlock(job->mutex);
  2239. }
  2240. }
  2241. opj_free(job);
  2242. }
  2243. OPJ_BOOL opj_t1_encode_cblks(opj_tcd_t* tcd,
  2244. opj_tcd_tile_t *tile,
  2245. opj_tcp_t *tcp,
  2246. const OPJ_FLOAT64 * mct_norms,
  2247. OPJ_UINT32 mct_numcomps
  2248. )
  2249. {
  2250. volatile OPJ_BOOL ret = OPJ_TRUE;
  2251. opj_thread_pool_t* tp = tcd->thread_pool;
  2252. OPJ_UINT32 compno, resno, bandno, precno, cblkno;
  2253. opj_mutex_t* mutex = opj_mutex_create();
  2254. tile->distotile = 0;
  2255. for (compno = 0; compno < tile->numcomps; ++compno) {
  2256. opj_tcd_tilecomp_t* tilec = &tile->comps[compno];
  2257. opj_tccp_t* tccp = &tcp->tccps[compno];
  2258. for (resno = 0; resno < tilec->numresolutions; ++resno) {
  2259. opj_tcd_resolution_t *res = &tilec->resolutions[resno];
  2260. for (bandno = 0; bandno < res->numbands; ++bandno) {
  2261. opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
  2262. /* Skip empty bands */
  2263. if (opj_tcd_is_band_empty(band)) {
  2264. continue;
  2265. }
  2266. for (precno = 0; precno < res->pw * res->ph; ++precno) {
  2267. opj_tcd_precinct_t *prc = &band->precincts[precno];
  2268. for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
  2269. opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
  2270. opj_t1_cblk_encode_processing_job_t* job =
  2271. (opj_t1_cblk_encode_processing_job_t*) opj_calloc(1,
  2272. sizeof(opj_t1_cblk_encode_processing_job_t));
  2273. if (!job) {
  2274. ret = OPJ_FALSE;
  2275. goto end;
  2276. }
  2277. job->compno = compno;
  2278. job->tile = tile;
  2279. job->resno = resno;
  2280. job->cblk = cblk;
  2281. job->band = band;
  2282. job->tilec = tilec;
  2283. job->tccp = tccp;
  2284. job->mct_norms = mct_norms;
  2285. job->mct_numcomps = mct_numcomps;
  2286. job->pret = &ret;
  2287. job->mutex = mutex;
  2288. opj_thread_pool_submit_job(tp, opj_t1_cblk_encode_processor, job);
  2289. } /* cblkno */
  2290. } /* precno */
  2291. } /* bandno */
  2292. } /* resno */
  2293. } /* compno */
  2294. end:
  2295. opj_thread_pool_wait_completion(tcd->thread_pool, 0);
  2296. if (mutex) {
  2297. opj_mutex_destroy(mutex);
  2298. }
  2299. return ret;
  2300. }
  2301. /* Returns whether the pass (bpno, passtype) is terminated */
  2302. static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk,
  2303. OPJ_UINT32 cblksty,
  2304. OPJ_INT32 bpno,
  2305. OPJ_UINT32 passtype)
  2306. {
  2307. /* Is it the last cleanup pass ? */
  2308. if (passtype == 2 && bpno == 0) {
  2309. return OPJ_TRUE;
  2310. }
  2311. if (cblksty & J2K_CCP_CBLKSTY_TERMALL) {
  2312. return OPJ_TRUE;
  2313. }
  2314. if ((cblksty & J2K_CCP_CBLKSTY_LAZY)) {
  2315. /* For bypass arithmetic bypass, terminate the 4th cleanup pass */
  2316. if ((bpno == ((OPJ_INT32)cblk->numbps - 4)) && (passtype == 2)) {
  2317. return OPJ_TRUE;
  2318. }
  2319. /* and beyond terminate all the magnitude refinement passes (in raw) */
  2320. /* and cleanup passes (in MQC) */
  2321. if ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype > 0)) {
  2322. return OPJ_TRUE;
  2323. }
  2324. }
  2325. return OPJ_FALSE;
  2326. }
  2327. static OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1,
  2328. opj_tcd_cblk_enc_t* cblk,
  2329. OPJ_UINT32 orient,
  2330. OPJ_UINT32 compno,
  2331. OPJ_UINT32 level,
  2332. OPJ_UINT32 qmfbid,
  2333. OPJ_FLOAT64 stepsize,
  2334. OPJ_UINT32 cblksty,
  2335. OPJ_UINT32 numcomps,
  2336. const OPJ_FLOAT64 * mct_norms,
  2337. OPJ_UINT32 mct_numcomps)
  2338. {
  2339. OPJ_FLOAT64 cumwmsedec = 0.0;
  2340. opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
  2341. OPJ_UINT32 passno;
  2342. OPJ_INT32 bpno;
  2343. OPJ_UINT32 passtype;
  2344. OPJ_INT32 nmsedec = 0;
  2345. OPJ_INT32 max;
  2346. OPJ_UINT32 i, j;
  2347. OPJ_BYTE type = T1_TYPE_MQ;
  2348. OPJ_FLOAT64 tempwmsedec;
  2349. OPJ_INT32* datap;
  2350. #ifdef EXTRA_DEBUG
  2351. printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n",
  2352. cblk->x0, cblk->y0, cblk->x1, cblk->y1, orient, compno, level);
  2353. #endif
  2354. mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
  2355. max = 0;
  2356. datap = t1->data;
  2357. for (j = 0; j < t1->h; ++j) {
  2358. const OPJ_UINT32 w = t1->w;
  2359. for (i = 0; i < w; ++i, ++datap) {
  2360. OPJ_INT32 tmp = *datap;
  2361. if (tmp < 0) {
  2362. OPJ_UINT32 tmp_unsigned;
  2363. if (tmp == INT_MIN) {
  2364. /* To avoid undefined behaviour when negating INT_MIN */
  2365. /* but if we go here, it means we have supplied an input */
  2366. /* with more bit depth than we we can really support. */
  2367. /* Cf https://github.com/uclouvain/openjpeg/issues/1432 */
  2368. tmp = INT_MIN + 1;
  2369. }
  2370. max = opj_int_max(max, -tmp);
  2371. tmp_unsigned = opj_to_smr(tmp);
  2372. memcpy(datap, &tmp_unsigned, sizeof(OPJ_INT32));
  2373. } else {
  2374. max = opj_int_max(max, tmp);
  2375. }
  2376. }
  2377. }
  2378. cblk->numbps = max ? (OPJ_UINT32)((opj_int_floorlog2(max) + 1) -
  2379. T1_NMSEDEC_FRACBITS) : 0;
  2380. if (cblk->numbps == 0) {
  2381. cblk->totalpasses = 0;
  2382. return cumwmsedec;
  2383. }
  2384. bpno = (OPJ_INT32)(cblk->numbps - 1);
  2385. passtype = 2;
  2386. opj_mqc_resetstates(mqc);
  2387. opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
  2388. opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
  2389. opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
  2390. opj_mqc_init_enc(mqc, cblk->data);
  2391. for (passno = 0; bpno >= 0; ++passno) {
  2392. opj_tcd_pass_t *pass = &cblk->passes[passno];
  2393. type = ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype < 2) &&
  2394. (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
  2395. /* If the previous pass was terminating, we need to reset the encoder */
  2396. if (passno > 0 && cblk->passes[passno - 1].term) {
  2397. if (type == T1_TYPE_RAW) {
  2398. opj_mqc_bypass_init_enc(mqc);
  2399. } else {
  2400. opj_mqc_restart_init_enc(mqc);
  2401. }
  2402. }
  2403. switch (passtype) {
  2404. case 0:
  2405. opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty);
  2406. break;
  2407. case 1:
  2408. opj_t1_enc_refpass(t1, bpno, &nmsedec, type);
  2409. break;
  2410. case 2:
  2411. opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty);
  2412. /* code switch SEGMARK (i.e. SEGSYM) */
  2413. if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
  2414. opj_mqc_segmark_enc(mqc);
  2415. }
  2416. break;
  2417. }
  2418. tempwmsedec = opj_t1_getwmsedec(nmsedec, compno, level, orient, bpno, qmfbid,
  2419. stepsize, numcomps, mct_norms, mct_numcomps) ;
  2420. cumwmsedec += tempwmsedec;
  2421. pass->distortiondec = cumwmsedec;
  2422. if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) {
  2423. /* If it is a terminated pass, terminate it */
  2424. if (type == T1_TYPE_RAW) {
  2425. opj_mqc_bypass_flush_enc(mqc, cblksty & J2K_CCP_CBLKSTY_PTERM);
  2426. } else {
  2427. if (cblksty & J2K_CCP_CBLKSTY_PTERM) {
  2428. opj_mqc_erterm_enc(mqc);
  2429. } else {
  2430. opj_mqc_flush(mqc);
  2431. }
  2432. }
  2433. pass->term = 1;
  2434. pass->rate = opj_mqc_numbytes(mqc);
  2435. } else {
  2436. /* Non terminated pass */
  2437. OPJ_UINT32 rate_extra_bytes;
  2438. if (type == T1_TYPE_RAW) {
  2439. rate_extra_bytes = opj_mqc_bypass_get_extra_bytes(
  2440. mqc, (cblksty & J2K_CCP_CBLKSTY_PTERM));
  2441. } else {
  2442. rate_extra_bytes = 3;
  2443. }
  2444. pass->term = 0;
  2445. pass->rate = opj_mqc_numbytes(mqc) + rate_extra_bytes;
  2446. }
  2447. if (++passtype == 3) {
  2448. passtype = 0;
  2449. bpno--;
  2450. }
  2451. /* Code-switch "RESET" */
  2452. if (cblksty & J2K_CCP_CBLKSTY_RESET) {
  2453. opj_mqc_reset_enc(mqc);
  2454. }
  2455. }
  2456. cblk->totalpasses = passno;
  2457. if (cblk->totalpasses) {
  2458. /* Make sure that pass rates are increasing */
  2459. OPJ_UINT32 last_pass_rate = opj_mqc_numbytes(mqc);
  2460. for (passno = cblk->totalpasses; passno > 0;) {
  2461. opj_tcd_pass_t *pass = &cblk->passes[--passno];
  2462. if (pass->rate > last_pass_rate) {
  2463. pass->rate = last_pass_rate;
  2464. } else {
  2465. last_pass_rate = pass->rate;
  2466. }
  2467. }
  2468. }
  2469. for (passno = 0; passno < cblk->totalpasses; passno++) {
  2470. opj_tcd_pass_t *pass = &cblk->passes[passno];
  2471. /* Prevent generation of FF as last data byte of a pass*/
  2472. /* For terminating passes, the flushing procedure ensured this already */
  2473. assert(pass->rate > 0);
  2474. if (cblk->data[pass->rate - 1] == 0xFF) {
  2475. pass->rate--;
  2476. }
  2477. pass->len = pass->rate - (passno == 0 ? 0 : cblk->passes[passno - 1].rate);
  2478. }
  2479. #ifdef EXTRA_DEBUG
  2480. printf(" len=%d\n", (cblk->totalpasses) ? opj_mqc_numbytes(mqc) : 0);
  2481. /* Check that there not 0xff >=0x90 sequences */
  2482. if (cblk->totalpasses) {
  2483. OPJ_UINT32 i;
  2484. OPJ_UINT32 len = opj_mqc_numbytes(mqc);
  2485. for (i = 1; i < len; ++i) {
  2486. if (cblk->data[i - 1] == 0xff && cblk->data[i] >= 0x90) {
  2487. printf("0xff %02x at offset %d\n", cblk->data[i], i - 1);
  2488. abort();
  2489. }
  2490. }
  2491. }
  2492. #endif
  2493. return cumwmsedec;
  2494. }