jsimd.c 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076
  1. /*
  2. * jsimd_x86_64.c
  3. *
  4. * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  5. * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander.
  6. * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
  7. *
  8. * Based on the x86 SIMD extension for IJG JPEG library,
  9. * Copyright (C) 1999-2006, MIYASAKA Masaru.
  10. * For conditions of distribution and use, see copyright notice in jsimdext.inc
  11. *
  12. * This file contains the interface between the "normal" portions
  13. * of the library and the SIMD implementations when running on a
  14. * 64-bit x86 architecture.
  15. */
  16. #define JPEG_INTERNALS
  17. #include "../../jinclude.h"
  18. #include "../../jpeglib.h"
  19. #include "../../jsimd.h"
  20. #include "../../jdct.h"
  21. #include "../../jsimddct.h"
  22. #include "../jsimd.h"
  23. #include "jconfigint.h"
  24. /*
  25. * In the PIC cases, we have no guarantee that constants will keep
  26. * their alignment. This macro allows us to verify it at runtime.
  27. */
  28. #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
  29. #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
  30. #define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
  31. static unsigned int simd_support = (unsigned int)(~0);
  32. static unsigned int simd_huffman = 1;
  33. /*
  34. * Check what SIMD accelerations are supported.
  35. *
  36. * FIXME: This code is racy under a multi-threaded environment.
  37. */
  38. LOCAL(void)
  39. init_simd(void)
  40. {
  41. #ifndef NO_GETENV
  42. char env[2] = { 0 };
  43. #endif
  44. if (simd_support != ~0U)
  45. return;
  46. simd_support = jpeg_simd_cpu_support();
  47. #ifndef NO_GETENV
  48. /* Force different settings through environment variables */
  49. if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
  50. simd_support &= JSIMD_SSE2;
  51. if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
  52. simd_support &= JSIMD_AVX2;
  53. if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
  54. simd_support = 0;
  55. if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
  56. simd_huffman = 0;
  57. #endif
  58. }
  59. GLOBAL(int)
  60. jsimd_can_rgb_ycc(void)
  61. {
  62. init_simd();
  63. /* The code is optimised for these values only */
  64. if (BITS_IN_JSAMPLE != 8)
  65. return 0;
  66. if (sizeof(JDIMENSION) != 4)
  67. return 0;
  68. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  69. return 0;
  70. if ((simd_support & JSIMD_AVX2) &&
  71. IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
  72. return 1;
  73. if ((simd_support & JSIMD_SSE2) &&
  74. IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
  75. return 1;
  76. return 0;
  77. }
  78. GLOBAL(int)
  79. jsimd_can_rgb_gray(void)
  80. {
  81. init_simd();
  82. /* The code is optimised for these values only */
  83. if (BITS_IN_JSAMPLE != 8)
  84. return 0;
  85. if (sizeof(JDIMENSION) != 4)
  86. return 0;
  87. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  88. return 0;
  89. if ((simd_support & JSIMD_AVX2) &&
  90. IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
  91. return 1;
  92. if ((simd_support & JSIMD_SSE2) &&
  93. IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
  94. return 1;
  95. return 0;
  96. }
  97. GLOBAL(int)
  98. jsimd_can_ycc_rgb(void)
  99. {
  100. init_simd();
  101. #ifndef WITH_SANITIZER
  102. /* The code is optimised for these values only */
  103. if (BITS_IN_JSAMPLE != 8)
  104. return 0;
  105. if (sizeof(JDIMENSION) != 4)
  106. return 0;
  107. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  108. return 0;
  109. if ((simd_support & JSIMD_AVX2) &&
  110. IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
  111. return 1;
  112. if ((simd_support & JSIMD_SSE2) &&
  113. IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
  114. return 1;
  115. #endif
  116. return 0;
  117. }
  118. GLOBAL(int)
  119. jsimd_can_ycc_rgb565(void)
  120. {
  121. return 0;
  122. }
  123. GLOBAL(void)
  124. jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
  125. JSAMPIMAGE output_buf, JDIMENSION output_row,
  126. int num_rows)
  127. {
  128. void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  129. void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  130. switch (cinfo->in_color_space) {
  131. case JCS_EXT_RGB:
  132. avx2fct = jsimd_extrgb_ycc_convert_avx2;
  133. sse2fct = jsimd_extrgb_ycc_convert_sse2;
  134. break;
  135. case JCS_EXT_RGBX:
  136. case JCS_EXT_RGBA:
  137. avx2fct = jsimd_extrgbx_ycc_convert_avx2;
  138. sse2fct = jsimd_extrgbx_ycc_convert_sse2;
  139. break;
  140. case JCS_EXT_BGR:
  141. avx2fct = jsimd_extbgr_ycc_convert_avx2;
  142. sse2fct = jsimd_extbgr_ycc_convert_sse2;
  143. break;
  144. case JCS_EXT_BGRX:
  145. case JCS_EXT_BGRA:
  146. avx2fct = jsimd_extbgrx_ycc_convert_avx2;
  147. sse2fct = jsimd_extbgrx_ycc_convert_sse2;
  148. break;
  149. case JCS_EXT_XBGR:
  150. case JCS_EXT_ABGR:
  151. avx2fct = jsimd_extxbgr_ycc_convert_avx2;
  152. sse2fct = jsimd_extxbgr_ycc_convert_sse2;
  153. break;
  154. case JCS_EXT_XRGB:
  155. case JCS_EXT_ARGB:
  156. avx2fct = jsimd_extxrgb_ycc_convert_avx2;
  157. sse2fct = jsimd_extxrgb_ycc_convert_sse2;
  158. break;
  159. default:
  160. avx2fct = jsimd_rgb_ycc_convert_avx2;
  161. sse2fct = jsimd_rgb_ycc_convert_sse2;
  162. break;
  163. }
  164. if (simd_support & JSIMD_AVX2)
  165. avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  166. else
  167. sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  168. }
  169. GLOBAL(void)
  170. jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
  171. JSAMPIMAGE output_buf, JDIMENSION output_row,
  172. int num_rows)
  173. {
  174. void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  175. void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  176. switch (cinfo->in_color_space) {
  177. case JCS_EXT_RGB:
  178. avx2fct = jsimd_extrgb_gray_convert_avx2;
  179. sse2fct = jsimd_extrgb_gray_convert_sse2;
  180. break;
  181. case JCS_EXT_RGBX:
  182. case JCS_EXT_RGBA:
  183. avx2fct = jsimd_extrgbx_gray_convert_avx2;
  184. sse2fct = jsimd_extrgbx_gray_convert_sse2;
  185. break;
  186. case JCS_EXT_BGR:
  187. avx2fct = jsimd_extbgr_gray_convert_avx2;
  188. sse2fct = jsimd_extbgr_gray_convert_sse2;
  189. break;
  190. case JCS_EXT_BGRX:
  191. case JCS_EXT_BGRA:
  192. avx2fct = jsimd_extbgrx_gray_convert_avx2;
  193. sse2fct = jsimd_extbgrx_gray_convert_sse2;
  194. break;
  195. case JCS_EXT_XBGR:
  196. case JCS_EXT_ABGR:
  197. avx2fct = jsimd_extxbgr_gray_convert_avx2;
  198. sse2fct = jsimd_extxbgr_gray_convert_sse2;
  199. break;
  200. case JCS_EXT_XRGB:
  201. case JCS_EXT_ARGB:
  202. avx2fct = jsimd_extxrgb_gray_convert_avx2;
  203. sse2fct = jsimd_extxrgb_gray_convert_sse2;
  204. break;
  205. default:
  206. avx2fct = jsimd_rgb_gray_convert_avx2;
  207. sse2fct = jsimd_rgb_gray_convert_sse2;
  208. break;
  209. }
  210. if (simd_support & JSIMD_AVX2)
  211. avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  212. else
  213. sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  214. }
  215. GLOBAL(void)
  216. jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  217. JDIMENSION input_row, JSAMPARRAY output_buf,
  218. int num_rows)
  219. {
  220. void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
  221. void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
  222. switch (cinfo->out_color_space) {
  223. case JCS_EXT_RGB:
  224. avx2fct = jsimd_ycc_extrgb_convert_avx2;
  225. sse2fct = jsimd_ycc_extrgb_convert_sse2;
  226. break;
  227. case JCS_EXT_RGBX:
  228. case JCS_EXT_RGBA:
  229. avx2fct = jsimd_ycc_extrgbx_convert_avx2;
  230. sse2fct = jsimd_ycc_extrgbx_convert_sse2;
  231. break;
  232. case JCS_EXT_BGR:
  233. avx2fct = jsimd_ycc_extbgr_convert_avx2;
  234. sse2fct = jsimd_ycc_extbgr_convert_sse2;
  235. break;
  236. case JCS_EXT_BGRX:
  237. case JCS_EXT_BGRA:
  238. avx2fct = jsimd_ycc_extbgrx_convert_avx2;
  239. sse2fct = jsimd_ycc_extbgrx_convert_sse2;
  240. break;
  241. case JCS_EXT_XBGR:
  242. case JCS_EXT_ABGR:
  243. avx2fct = jsimd_ycc_extxbgr_convert_avx2;
  244. sse2fct = jsimd_ycc_extxbgr_convert_sse2;
  245. break;
  246. case JCS_EXT_XRGB:
  247. case JCS_EXT_ARGB:
  248. avx2fct = jsimd_ycc_extxrgb_convert_avx2;
  249. sse2fct = jsimd_ycc_extxrgb_convert_sse2;
  250. break;
  251. default:
  252. avx2fct = jsimd_ycc_rgb_convert_avx2;
  253. sse2fct = jsimd_ycc_rgb_convert_sse2;
  254. break;
  255. }
  256. if (simd_support & JSIMD_AVX2)
  257. avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
  258. else
  259. sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
  260. }
  261. GLOBAL(void)
  262. jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  263. JDIMENSION input_row, JSAMPARRAY output_buf,
  264. int num_rows)
  265. {
  266. }
  267. GLOBAL(int)
  268. jsimd_can_h2v2_downsample(void)
  269. {
  270. init_simd();
  271. /* The code is optimised for these values only */
  272. if (BITS_IN_JSAMPLE != 8)
  273. return 0;
  274. if (sizeof(JDIMENSION) != 4)
  275. return 0;
  276. if (simd_support & JSIMD_AVX2)
  277. return 1;
  278. if (simd_support & JSIMD_SSE2)
  279. return 1;
  280. return 0;
  281. }
  282. GLOBAL(int)
  283. jsimd_can_h2v1_downsample(void)
  284. {
  285. init_simd();
  286. /* The code is optimised for these values only */
  287. if (BITS_IN_JSAMPLE != 8)
  288. return 0;
  289. if (sizeof(JDIMENSION) != 4)
  290. return 0;
  291. if (simd_support & JSIMD_AVX2)
  292. return 1;
  293. if (simd_support & JSIMD_SSE2)
  294. return 1;
  295. return 0;
  296. }
  297. GLOBAL(void)
  298. jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
  299. JSAMPARRAY input_data, JSAMPARRAY output_data)
  300. {
  301. if (simd_support & JSIMD_AVX2)
  302. jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
  303. compptr->v_samp_factor,
  304. compptr->width_in_blocks, input_data,
  305. output_data);
  306. else
  307. jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
  308. compptr->v_samp_factor,
  309. compptr->width_in_blocks, input_data,
  310. output_data);
  311. }
  312. GLOBAL(void)
  313. jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
  314. JSAMPARRAY input_data, JSAMPARRAY output_data)
  315. {
  316. if (simd_support & JSIMD_AVX2)
  317. jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
  318. compptr->v_samp_factor,
  319. compptr->width_in_blocks, input_data,
  320. output_data);
  321. else
  322. jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
  323. compptr->v_samp_factor,
  324. compptr->width_in_blocks, input_data,
  325. output_data);
  326. }
  327. GLOBAL(int)
  328. jsimd_can_h2v2_upsample(void)
  329. {
  330. init_simd();
  331. /* The code is optimised for these values only */
  332. if (BITS_IN_JSAMPLE != 8)
  333. return 0;
  334. if (sizeof(JDIMENSION) != 4)
  335. return 0;
  336. if (simd_support & JSIMD_AVX2)
  337. return 1;
  338. if (simd_support & JSIMD_SSE2)
  339. return 1;
  340. return 0;
  341. }
  342. GLOBAL(int)
  343. jsimd_can_h2v1_upsample(void)
  344. {
  345. init_simd();
  346. /* The code is optimised for these values only */
  347. if (BITS_IN_JSAMPLE != 8)
  348. return 0;
  349. if (sizeof(JDIMENSION) != 4)
  350. return 0;
  351. if (simd_support & JSIMD_AVX2)
  352. return 1;
  353. if (simd_support & JSIMD_SSE2)
  354. return 1;
  355. return 0;
  356. }
  357. GLOBAL(void)
  358. jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  359. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  360. {
  361. if (simd_support & JSIMD_AVX2)
  362. jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
  363. input_data, output_data_ptr);
  364. else
  365. jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
  366. input_data, output_data_ptr);
  367. }
  368. GLOBAL(void)
  369. jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  370. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  371. {
  372. if (simd_support & JSIMD_AVX2)
  373. jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
  374. input_data, output_data_ptr);
  375. else
  376. jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
  377. input_data, output_data_ptr);
  378. }
  379. GLOBAL(int)
  380. jsimd_can_h2v2_fancy_upsample(void)
  381. {
  382. init_simd();
  383. /* The code is optimised for these values only */
  384. if (BITS_IN_JSAMPLE != 8)
  385. return 0;
  386. if (sizeof(JDIMENSION) != 4)
  387. return 0;
  388. if ((simd_support & JSIMD_AVX2) &&
  389. IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
  390. return 1;
  391. if ((simd_support & JSIMD_SSE2) &&
  392. IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
  393. return 1;
  394. return 0;
  395. }
  396. GLOBAL(int)
  397. jsimd_can_h2v1_fancy_upsample(void)
  398. {
  399. init_simd();
  400. /* The code is optimised for these values only */
  401. if (BITS_IN_JSAMPLE != 8)
  402. return 0;
  403. if (sizeof(JDIMENSION) != 4)
  404. return 0;
  405. if ((simd_support & JSIMD_AVX2) &&
  406. IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
  407. return 1;
  408. if ((simd_support & JSIMD_SSE2) &&
  409. IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
  410. return 1;
  411. return 0;
  412. }
  413. GLOBAL(void)
  414. jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  415. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  416. {
  417. if (simd_support & JSIMD_AVX2)
  418. jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
  419. compptr->downsampled_width, input_data,
  420. output_data_ptr);
  421. else
  422. jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
  423. compptr->downsampled_width, input_data,
  424. output_data_ptr);
  425. }
  426. GLOBAL(void)
  427. jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  428. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  429. {
  430. if (simd_support & JSIMD_AVX2)
  431. jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
  432. compptr->downsampled_width, input_data,
  433. output_data_ptr);
  434. else
  435. jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
  436. compptr->downsampled_width, input_data,
  437. output_data_ptr);
  438. }
  439. GLOBAL(int)
  440. jsimd_can_h2v2_merged_upsample(void)
  441. {
  442. init_simd();
  443. /* The code is optimised for these values only */
  444. if (BITS_IN_JSAMPLE != 8)
  445. return 0;
  446. if (sizeof(JDIMENSION) != 4)
  447. return 0;
  448. if ((simd_support & JSIMD_AVX2) &&
  449. IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
  450. return 1;
  451. if ((simd_support & JSIMD_SSE2) &&
  452. IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
  453. return 1;
  454. return 0;
  455. }
  456. GLOBAL(int)
  457. jsimd_can_h2v1_merged_upsample(void)
  458. {
  459. init_simd();
  460. /* The code is optimised for these values only */
  461. if (BITS_IN_JSAMPLE != 8)
  462. return 0;
  463. if (sizeof(JDIMENSION) != 4)
  464. return 0;
  465. if ((simd_support & JSIMD_AVX2) &&
  466. IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
  467. return 1;
  468. if ((simd_support & JSIMD_SSE2) &&
  469. IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
  470. return 1;
  471. return 0;
  472. }
  473. GLOBAL(void)
  474. jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  475. JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
  476. {
  477. void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  478. void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  479. switch (cinfo->out_color_space) {
  480. case JCS_EXT_RGB:
  481. avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
  482. sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
  483. break;
  484. case JCS_EXT_RGBX:
  485. case JCS_EXT_RGBA:
  486. avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
  487. sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
  488. break;
  489. case JCS_EXT_BGR:
  490. avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
  491. sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
  492. break;
  493. case JCS_EXT_BGRX:
  494. case JCS_EXT_BGRA:
  495. avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
  496. sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
  497. break;
  498. case JCS_EXT_XBGR:
  499. case JCS_EXT_ABGR:
  500. avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
  501. sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
  502. break;
  503. case JCS_EXT_XRGB:
  504. case JCS_EXT_ARGB:
  505. avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
  506. sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
  507. break;
  508. default:
  509. avx2fct = jsimd_h2v2_merged_upsample_avx2;
  510. sse2fct = jsimd_h2v2_merged_upsample_sse2;
  511. break;
  512. }
  513. if (simd_support & JSIMD_AVX2)
  514. avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  515. else
  516. sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  517. }
  518. GLOBAL(void)
  519. jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  520. JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
  521. {
  522. void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  523. void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  524. switch (cinfo->out_color_space) {
  525. case JCS_EXT_RGB:
  526. avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
  527. sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
  528. break;
  529. case JCS_EXT_RGBX:
  530. case JCS_EXT_RGBA:
  531. avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
  532. sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
  533. break;
  534. case JCS_EXT_BGR:
  535. avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
  536. sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
  537. break;
  538. case JCS_EXT_BGRX:
  539. case JCS_EXT_BGRA:
  540. avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
  541. sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
  542. break;
  543. case JCS_EXT_XBGR:
  544. case JCS_EXT_ABGR:
  545. avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
  546. sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
  547. break;
  548. case JCS_EXT_XRGB:
  549. case JCS_EXT_ARGB:
  550. avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
  551. sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
  552. break;
  553. default:
  554. avx2fct = jsimd_h2v1_merged_upsample_avx2;
  555. sse2fct = jsimd_h2v1_merged_upsample_sse2;
  556. break;
  557. }
  558. if (simd_support & JSIMD_AVX2)
  559. avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  560. else
  561. sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  562. }
  563. GLOBAL(int)
  564. jsimd_can_convsamp(void)
  565. {
  566. init_simd();
  567. /* The code is optimised for these values only */
  568. if (DCTSIZE != 8)
  569. return 0;
  570. if (BITS_IN_JSAMPLE != 8)
  571. return 0;
  572. if (sizeof(JDIMENSION) != 4)
  573. return 0;
  574. if (sizeof(DCTELEM) != 2)
  575. return 0;
  576. if (simd_support & JSIMD_AVX2)
  577. return 1;
  578. if (simd_support & JSIMD_SSE2)
  579. return 1;
  580. return 0;
  581. }
  582. GLOBAL(int)
  583. jsimd_can_convsamp_float(void)
  584. {
  585. init_simd();
  586. /* The code is optimised for these values only */
  587. if (DCTSIZE != 8)
  588. return 0;
  589. if (BITS_IN_JSAMPLE != 8)
  590. return 0;
  591. if (sizeof(JDIMENSION) != 4)
  592. return 0;
  593. if (sizeof(FAST_FLOAT) != 4)
  594. return 0;
  595. if (simd_support & JSIMD_SSE2)
  596. return 1;
  597. return 0;
  598. }
  599. GLOBAL(void)
  600. jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
  601. DCTELEM *workspace)
  602. {
  603. if (simd_support & JSIMD_AVX2)
  604. jsimd_convsamp_avx2(sample_data, start_col, workspace);
  605. else
  606. jsimd_convsamp_sse2(sample_data, start_col, workspace);
  607. }
  608. GLOBAL(void)
  609. jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
  610. FAST_FLOAT *workspace)
  611. {
  612. jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
  613. }
  614. GLOBAL(int)
  615. jsimd_can_fdct_islow(void)
  616. {
  617. init_simd();
  618. /* The code is optimised for these values only */
  619. if (DCTSIZE != 8)
  620. return 0;
  621. if (sizeof(DCTELEM) != 2)
  622. return 0;
  623. if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
  624. return 1;
  625. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
  626. return 1;
  627. return 0;
  628. }
  629. GLOBAL(int)
  630. jsimd_can_fdct_ifast(void)
  631. {
  632. init_simd();
  633. /* The code is optimised for these values only */
  634. if (DCTSIZE != 8)
  635. return 0;
  636. if (sizeof(DCTELEM) != 2)
  637. return 0;
  638. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
  639. return 1;
  640. return 0;
  641. }
  642. GLOBAL(int)
  643. jsimd_can_fdct_float(void)
  644. {
  645. init_simd();
  646. /* The code is optimised for these values only */
  647. if (DCTSIZE != 8)
  648. return 0;
  649. if (sizeof(FAST_FLOAT) != 4)
  650. return 0;
  651. if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
  652. return 1;
  653. return 0;
  654. }
  655. GLOBAL(void)
  656. jsimd_fdct_islow(DCTELEM *data)
  657. {
  658. if (simd_support & JSIMD_AVX2)
  659. jsimd_fdct_islow_avx2(data);
  660. else
  661. jsimd_fdct_islow_sse2(data);
  662. }
  663. GLOBAL(void)
  664. jsimd_fdct_ifast(DCTELEM *data)
  665. {
  666. jsimd_fdct_ifast_sse2(data);
  667. }
  668. GLOBAL(void)
  669. jsimd_fdct_float(FAST_FLOAT *data)
  670. {
  671. jsimd_fdct_float_sse(data);
  672. }
  673. GLOBAL(int)
  674. jsimd_can_quantize(void)
  675. {
  676. init_simd();
  677. /* The code is optimised for these values only */
  678. if (DCTSIZE != 8)
  679. return 0;
  680. if (sizeof(JCOEF) != 2)
  681. return 0;
  682. if (sizeof(DCTELEM) != 2)
  683. return 0;
  684. if (simd_support & JSIMD_AVX2)
  685. return 1;
  686. if (simd_support & JSIMD_SSE2)
  687. return 1;
  688. return 0;
  689. }
  690. GLOBAL(int)
  691. jsimd_can_quantize_float(void)
  692. {
  693. init_simd();
  694. /* The code is optimised for these values only */
  695. if (DCTSIZE != 8)
  696. return 0;
  697. if (sizeof(JCOEF) != 2)
  698. return 0;
  699. if (sizeof(FAST_FLOAT) != 4)
  700. return 0;
  701. if (simd_support & JSIMD_SSE2)
  702. return 1;
  703. return 0;
  704. }
  705. GLOBAL(void)
  706. jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
  707. {
  708. if (simd_support & JSIMD_AVX2)
  709. jsimd_quantize_avx2(coef_block, divisors, workspace);
  710. else
  711. jsimd_quantize_sse2(coef_block, divisors, workspace);
  712. }
  713. GLOBAL(void)
  714. jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
  715. FAST_FLOAT *workspace)
  716. {
  717. jsimd_quantize_float_sse2(coef_block, divisors, workspace);
  718. }
  719. GLOBAL(int)
  720. jsimd_can_idct_2x2(void)
  721. {
  722. init_simd();
  723. /* The code is optimised for these values only */
  724. if (DCTSIZE != 8)
  725. return 0;
  726. if (sizeof(JCOEF) != 2)
  727. return 0;
  728. if (BITS_IN_JSAMPLE != 8)
  729. return 0;
  730. if (sizeof(JDIMENSION) != 4)
  731. return 0;
  732. if (sizeof(ISLOW_MULT_TYPE) != 2)
  733. return 0;
  734. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
  735. return 1;
  736. return 0;
  737. }
  738. GLOBAL(int)
  739. jsimd_can_idct_4x4(void)
  740. {
  741. init_simd();
  742. /* The code is optimised for these values only */
  743. if (DCTSIZE != 8)
  744. return 0;
  745. if (sizeof(JCOEF) != 2)
  746. return 0;
  747. if (BITS_IN_JSAMPLE != 8)
  748. return 0;
  749. if (sizeof(JDIMENSION) != 4)
  750. return 0;
  751. if (sizeof(ISLOW_MULT_TYPE) != 2)
  752. return 0;
  753. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
  754. return 1;
  755. return 0;
  756. }
  757. GLOBAL(void)
  758. jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  759. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  760. JDIMENSION output_col)
  761. {
  762. jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
  763. }
  764. GLOBAL(void)
  765. jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  766. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  767. JDIMENSION output_col)
  768. {
  769. jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
  770. }
  771. GLOBAL(int)
  772. jsimd_can_idct_islow(void)
  773. {
  774. init_simd();
  775. /* The code is optimised for these values only */
  776. if (DCTSIZE != 8)
  777. return 0;
  778. if (sizeof(JCOEF) != 2)
  779. return 0;
  780. if (BITS_IN_JSAMPLE != 8)
  781. return 0;
  782. if (sizeof(JDIMENSION) != 4)
  783. return 0;
  784. if (sizeof(ISLOW_MULT_TYPE) != 2)
  785. return 0;
  786. if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
  787. return 1;
  788. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
  789. return 1;
  790. return 0;
  791. }
  792. GLOBAL(int)
  793. jsimd_can_idct_ifast(void)
  794. {
  795. init_simd();
  796. /* The code is optimised for these values only */
  797. if (DCTSIZE != 8)
  798. return 0;
  799. if (sizeof(JCOEF) != 2)
  800. return 0;
  801. if (BITS_IN_JSAMPLE != 8)
  802. return 0;
  803. if (sizeof(JDIMENSION) != 4)
  804. return 0;
  805. if (sizeof(IFAST_MULT_TYPE) != 2)
  806. return 0;
  807. if (IFAST_SCALE_BITS != 2)
  808. return 0;
  809. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
  810. return 1;
  811. return 0;
  812. }
  813. GLOBAL(int)
  814. jsimd_can_idct_float(void)
  815. {
  816. init_simd();
  817. if (DCTSIZE != 8)
  818. return 0;
  819. if (sizeof(JCOEF) != 2)
  820. return 0;
  821. if (BITS_IN_JSAMPLE != 8)
  822. return 0;
  823. if (sizeof(JDIMENSION) != 4)
  824. return 0;
  825. if (sizeof(FAST_FLOAT) != 4)
  826. return 0;
  827. if (sizeof(FLOAT_MULT_TYPE) != 4)
  828. return 0;
  829. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
  830. return 1;
  831. return 0;
  832. }
  833. GLOBAL(void)
  834. jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  835. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  836. JDIMENSION output_col)
  837. {
  838. if (simd_support & JSIMD_AVX2)
  839. jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
  840. output_col);
  841. else
  842. jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
  843. output_col);
  844. }
  845. GLOBAL(void)
  846. jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  847. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  848. JDIMENSION output_col)
  849. {
  850. jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
  851. output_col);
  852. }
  853. GLOBAL(void)
  854. jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  855. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  856. JDIMENSION output_col)
  857. {
  858. jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
  859. output_col);
  860. }
  861. GLOBAL(int)
  862. jsimd_can_huff_encode_one_block(void)
  863. {
  864. init_simd();
  865. #ifndef WITH_SANITIZER
  866. if (DCTSIZE != 8)
  867. return 0;
  868. if (sizeof(JCOEF) != 2)
  869. return 0;
  870. if ((simd_support & JSIMD_SSE2) && simd_huffman &&
  871. IS_ALIGNED_SSE(jconst_huff_encode_one_block))
  872. return 1;
  873. #endif
  874. return 0;
  875. }
  876. GLOBAL(JOCTET *)
  877. jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
  878. int last_dc_val, c_derived_tbl *dctbl,
  879. c_derived_tbl *actbl)
  880. {
  881. return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
  882. dctbl, actbl);
  883. }
  884. GLOBAL(int)
  885. jsimd_can_encode_mcu_AC_first_prepare(void)
  886. {
  887. init_simd();
  888. #ifndef WITH_SANITIZER
  889. if (DCTSIZE != 8)
  890. return 0;
  891. if (sizeof(JCOEF) != 2)
  892. return 0;
  893. if (simd_support & JSIMD_SSE2)
  894. return 1;
  895. #endif
  896. return 0;
  897. }
  898. GLOBAL(void)
  899. jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
  900. const int *jpeg_natural_order_start, int Sl,
  901. int Al, JCOEF *values, size_t *zerobits)
  902. {
  903. jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
  904. Sl, Al, values, zerobits);
  905. }
  906. GLOBAL(int)
  907. jsimd_can_encode_mcu_AC_refine_prepare(void)
  908. {
  909. init_simd();
  910. #ifndef WITH_SANITIZER
  911. if (DCTSIZE != 8)
  912. return 0;
  913. if (sizeof(JCOEF) != 2)
  914. return 0;
  915. if (simd_support & JSIMD_SSE2)
  916. return 1;
  917. #endif
  918. return 0;
  919. }
  920. GLOBAL(int)
  921. jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
  922. const int *jpeg_natural_order_start, int Sl,
  923. int Al, JCOEF *absvalues, size_t *bits)
  924. {
  925. return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
  926. jpeg_natural_order_start,
  927. Sl, Al, absvalues, bits);
  928. }