jsimd.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056
  1. /*
  2. * jsimd_arm64.c
  3. *
  4. * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  5. * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
  6. * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, D. R. Commander.
  7. * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
  8. * Copyright (C) 2020, Arm Limited.
  9. *
  10. * Based on the x86 SIMD extension for IJG JPEG library,
  11. * Copyright (C) 1999-2006, MIYASAKA Masaru.
  12. * For conditions of distribution and use, see copyright notice in jsimdext.inc
  13. *
  14. * This file contains the interface between the "normal" portions
  15. * of the library and the SIMD implementations when running on a
  16. * 64-bit Arm architecture.
  17. */
  18. #define JPEG_INTERNALS
  19. #include "../../../jinclude.h"
  20. #include "../../../jpeglib.h"
  21. #include "../../../jsimd.h"
  22. #include "../../../jdct.h"
  23. #include "../../../jsimddct.h"
  24. #include "../../jsimd.h"
  25. #include "jconfigint.h"
  26. #include <ctype.h>
  27. #define JSIMD_FASTLD3 1
  28. #define JSIMD_FASTST3 2
  29. #define JSIMD_FASTTBL 4
  30. static unsigned int simd_support = ~0;
  31. static unsigned int simd_huffman = 1;
  32. static unsigned int simd_features = JSIMD_FASTLD3 | JSIMD_FASTST3 |
  33. JSIMD_FASTTBL;
  34. #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
  35. #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
  36. LOCAL(int)
  37. check_cpuinfo(char *buffer, const char *field, char *value)
  38. {
  39. char *p;
  40. if (*value == 0)
  41. return 0;
  42. if (strncmp(buffer, field, strlen(field)) != 0)
  43. return 0;
  44. buffer += strlen(field);
  45. while (isspace(*buffer))
  46. buffer++;
  47. /* Check if 'value' is present in the buffer as a separate word */
  48. while ((p = strstr(buffer, value))) {
  49. if (p > buffer && !isspace(*(p - 1))) {
  50. buffer++;
  51. continue;
  52. }
  53. p += strlen(value);
  54. if (*p != 0 && !isspace(*p)) {
  55. buffer++;
  56. continue;
  57. }
  58. return 1;
  59. }
  60. return 0;
  61. }
  62. LOCAL(int)
  63. parse_proc_cpuinfo(int bufsize)
  64. {
  65. char *buffer = (char *)malloc(bufsize);
  66. FILE *fd;
  67. if (!buffer)
  68. return 0;
  69. fd = fopen("/proc/cpuinfo", "r");
  70. if (fd) {
  71. while (fgets(buffer, bufsize, fd)) {
  72. if (!strchr(buffer, '\n') && !feof(fd)) {
  73. /* "impossible" happened - insufficient size of the buffer! */
  74. fclose(fd);
  75. free(buffer);
  76. return 0;
  77. }
  78. if (check_cpuinfo(buffer, "CPU part", "0xd03") ||
  79. check_cpuinfo(buffer, "CPU part", "0xd07"))
  80. /* The Cortex-A53 has a slow tbl implementation. We can gain a few
  81. percent speedup by disabling the use of that instruction. The
  82. speedup on Cortex-A57 is more subtle but still measurable. */
  83. simd_features &= ~JSIMD_FASTTBL;
  84. else if (check_cpuinfo(buffer, "CPU part", "0x0a1"))
  85. /* The SIMD version of Huffman encoding is slower than the C version on
  86. Cavium ThunderX. Also, ld3 and st3 are abyssmally slow on that
  87. CPU. */
  88. simd_huffman = simd_features = 0;
  89. }
  90. fclose(fd);
  91. }
  92. free(buffer);
  93. return 1;
  94. }
  95. #endif
  96. /*
  97. * Check what SIMD accelerations are supported.
  98. *
  99. * FIXME: This code is racy under a multi-threaded environment.
  100. */
  101. /*
  102. * Armv8 architectures support Neon extensions by default.
  103. * It is no longer optional as it was with Armv7.
  104. */
  105. LOCAL(void)
  106. init_simd(void)
  107. {
  108. #ifndef NO_GETENV
  109. char env[2] = { 0 };
  110. #endif
  111. #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
  112. int bufsize = 1024; /* an initial guess for the line buffer size limit */
  113. #endif
  114. if (simd_support != ~0U)
  115. return;
  116. simd_support = 0;
  117. simd_support |= JSIMD_NEON;
  118. #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
  119. while (!parse_proc_cpuinfo(bufsize)) {
  120. bufsize *= 2;
  121. if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
  122. break;
  123. }
  124. #endif
  125. #ifndef NO_GETENV
  126. /* Force different settings through environment variables */
  127. if (!GETENV_S(env, 2, "JSIMD_FORCENEON") && !strcmp(env, "1"))
  128. simd_support = JSIMD_NEON;
  129. if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
  130. simd_support = 0;
  131. if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
  132. simd_huffman = 0;
  133. if (!GETENV_S(env, 2, "JSIMD_FASTLD3") && !strcmp(env, "1"))
  134. simd_features |= JSIMD_FASTLD3;
  135. if (!GETENV_S(env, 2, "JSIMD_FASTLD3") && !strcmp(env, "0"))
  136. simd_features &= ~JSIMD_FASTLD3;
  137. if (!GETENV_S(env, 2, "JSIMD_FASTST3") && !strcmp(env, "1"))
  138. simd_features |= JSIMD_FASTST3;
  139. if (!GETENV_S(env, 2, "JSIMD_FASTST3") && !strcmp(env, "0"))
  140. simd_features &= ~JSIMD_FASTST3;
  141. #endif
  142. }
  143. GLOBAL(int)
  144. jsimd_can_rgb_ycc(void)
  145. {
  146. init_simd();
  147. /* The code is optimised for these values only */
  148. if (BITS_IN_JSAMPLE != 8)
  149. return 0;
  150. if (sizeof(JDIMENSION) != 4)
  151. return 0;
  152. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  153. return 0;
  154. if (simd_support & JSIMD_NEON)
  155. return 1;
  156. return 0;
  157. }
  158. GLOBAL(int)
  159. jsimd_can_rgb_gray(void)
  160. {
  161. init_simd();
  162. /* The code is optimised for these values only */
  163. if (BITS_IN_JSAMPLE != 8)
  164. return 0;
  165. if (sizeof(JDIMENSION) != 4)
  166. return 0;
  167. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  168. return 0;
  169. if (simd_support & JSIMD_NEON)
  170. return 1;
  171. return 0;
  172. }
  173. GLOBAL(int)
  174. jsimd_can_ycc_rgb(void)
  175. {
  176. init_simd();
  177. /* The code is optimised for these values only */
  178. if (BITS_IN_JSAMPLE != 8)
  179. return 0;
  180. if (sizeof(JDIMENSION) != 4)
  181. return 0;
  182. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  183. return 0;
  184. if (simd_support & JSIMD_NEON)
  185. return 1;
  186. return 0;
  187. }
  188. GLOBAL(int)
  189. jsimd_can_ycc_rgb565(void)
  190. {
  191. init_simd();
  192. /* The code is optimised for these values only */
  193. if (BITS_IN_JSAMPLE != 8)
  194. return 0;
  195. if (sizeof(JDIMENSION) != 4)
  196. return 0;
  197. if (simd_support & JSIMD_NEON)
  198. return 1;
  199. return 0;
  200. }
  201. GLOBAL(void)
  202. jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
  203. JSAMPIMAGE output_buf, JDIMENSION output_row,
  204. int num_rows)
  205. {
  206. void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  207. switch (cinfo->in_color_space) {
  208. case JCS_EXT_RGB:
  209. #ifndef NEON_INTRINSICS
  210. if (simd_features & JSIMD_FASTLD3)
  211. #endif
  212. neonfct = jsimd_extrgb_ycc_convert_neon;
  213. #ifndef NEON_INTRINSICS
  214. else
  215. neonfct = jsimd_extrgb_ycc_convert_neon_slowld3;
  216. #endif
  217. break;
  218. case JCS_EXT_RGBX:
  219. case JCS_EXT_RGBA:
  220. neonfct = jsimd_extrgbx_ycc_convert_neon;
  221. break;
  222. case JCS_EXT_BGR:
  223. #ifndef NEON_INTRINSICS
  224. if (simd_features & JSIMD_FASTLD3)
  225. #endif
  226. neonfct = jsimd_extbgr_ycc_convert_neon;
  227. #ifndef NEON_INTRINSICS
  228. else
  229. neonfct = jsimd_extbgr_ycc_convert_neon_slowld3;
  230. #endif
  231. break;
  232. case JCS_EXT_BGRX:
  233. case JCS_EXT_BGRA:
  234. neonfct = jsimd_extbgrx_ycc_convert_neon;
  235. break;
  236. case JCS_EXT_XBGR:
  237. case JCS_EXT_ABGR:
  238. neonfct = jsimd_extxbgr_ycc_convert_neon;
  239. break;
  240. case JCS_EXT_XRGB:
  241. case JCS_EXT_ARGB:
  242. neonfct = jsimd_extxrgb_ycc_convert_neon;
  243. break;
  244. default:
  245. #ifndef NEON_INTRINSICS
  246. if (simd_features & JSIMD_FASTLD3)
  247. #endif
  248. neonfct = jsimd_extrgb_ycc_convert_neon;
  249. #ifndef NEON_INTRINSICS
  250. else
  251. neonfct = jsimd_extrgb_ycc_convert_neon_slowld3;
  252. #endif
  253. break;
  254. }
  255. neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  256. }
  257. GLOBAL(void)
  258. jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
  259. JSAMPIMAGE output_buf, JDIMENSION output_row,
  260. int num_rows)
  261. {
  262. void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  263. switch (cinfo->in_color_space) {
  264. case JCS_EXT_RGB:
  265. neonfct = jsimd_extrgb_gray_convert_neon;
  266. break;
  267. case JCS_EXT_RGBX:
  268. case JCS_EXT_RGBA:
  269. neonfct = jsimd_extrgbx_gray_convert_neon;
  270. break;
  271. case JCS_EXT_BGR:
  272. neonfct = jsimd_extbgr_gray_convert_neon;
  273. break;
  274. case JCS_EXT_BGRX:
  275. case JCS_EXT_BGRA:
  276. neonfct = jsimd_extbgrx_gray_convert_neon;
  277. break;
  278. case JCS_EXT_XBGR:
  279. case JCS_EXT_ABGR:
  280. neonfct = jsimd_extxbgr_gray_convert_neon;
  281. break;
  282. case JCS_EXT_XRGB:
  283. case JCS_EXT_ARGB:
  284. neonfct = jsimd_extxrgb_gray_convert_neon;
  285. break;
  286. default:
  287. neonfct = jsimd_extrgb_gray_convert_neon;
  288. break;
  289. }
  290. neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  291. }
  292. GLOBAL(void)
  293. jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  294. JDIMENSION input_row, JSAMPARRAY output_buf,
  295. int num_rows)
  296. {
  297. void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
  298. switch (cinfo->out_color_space) {
  299. case JCS_EXT_RGB:
  300. #ifndef NEON_INTRINSICS
  301. if (simd_features & JSIMD_FASTST3)
  302. #endif
  303. neonfct = jsimd_ycc_extrgb_convert_neon;
  304. #ifndef NEON_INTRINSICS
  305. else
  306. neonfct = jsimd_ycc_extrgb_convert_neon_slowst3;
  307. #endif
  308. break;
  309. case JCS_EXT_RGBX:
  310. case JCS_EXT_RGBA:
  311. neonfct = jsimd_ycc_extrgbx_convert_neon;
  312. break;
  313. case JCS_EXT_BGR:
  314. #ifndef NEON_INTRINSICS
  315. if (simd_features & JSIMD_FASTST3)
  316. #endif
  317. neonfct = jsimd_ycc_extbgr_convert_neon;
  318. #ifndef NEON_INTRINSICS
  319. else
  320. neonfct = jsimd_ycc_extbgr_convert_neon_slowst3;
  321. #endif
  322. break;
  323. case JCS_EXT_BGRX:
  324. case JCS_EXT_BGRA:
  325. neonfct = jsimd_ycc_extbgrx_convert_neon;
  326. break;
  327. case JCS_EXT_XBGR:
  328. case JCS_EXT_ABGR:
  329. neonfct = jsimd_ycc_extxbgr_convert_neon;
  330. break;
  331. case JCS_EXT_XRGB:
  332. case JCS_EXT_ARGB:
  333. neonfct = jsimd_ycc_extxrgb_convert_neon;
  334. break;
  335. default:
  336. #ifndef NEON_INTRINSICS
  337. if (simd_features & JSIMD_FASTST3)
  338. #endif
  339. neonfct = jsimd_ycc_extrgb_convert_neon;
  340. #ifndef NEON_INTRINSICS
  341. else
  342. neonfct = jsimd_ycc_extrgb_convert_neon_slowst3;
  343. #endif
  344. break;
  345. }
  346. neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
  347. }
  348. GLOBAL(void)
  349. jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  350. JDIMENSION input_row, JSAMPARRAY output_buf,
  351. int num_rows)
  352. {
  353. jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
  354. output_buf, num_rows);
  355. }
  356. GLOBAL(int)
  357. jsimd_can_h2v2_downsample(void)
  358. {
  359. init_simd();
  360. /* The code is optimised for these values only */
  361. if (BITS_IN_JSAMPLE != 8)
  362. return 0;
  363. if (DCTSIZE != 8)
  364. return 0;
  365. if (sizeof(JDIMENSION) != 4)
  366. return 0;
  367. if (simd_support & JSIMD_NEON)
  368. return 1;
  369. return 0;
  370. }
  371. GLOBAL(int)
  372. jsimd_can_h2v1_downsample(void)
  373. {
  374. init_simd();
  375. /* The code is optimised for these values only */
  376. if (BITS_IN_JSAMPLE != 8)
  377. return 0;
  378. if (DCTSIZE != 8)
  379. return 0;
  380. if (sizeof(JDIMENSION) != 4)
  381. return 0;
  382. if (simd_support & JSIMD_NEON)
  383. return 1;
  384. return 0;
  385. }
  386. GLOBAL(void)
  387. jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
  388. JSAMPARRAY input_data, JSAMPARRAY output_data)
  389. {
  390. jsimd_h2v2_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
  391. compptr->v_samp_factor, compptr->width_in_blocks,
  392. input_data, output_data);
  393. }
  394. GLOBAL(void)
  395. jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
  396. JSAMPARRAY input_data, JSAMPARRAY output_data)
  397. {
  398. jsimd_h2v1_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
  399. compptr->v_samp_factor, compptr->width_in_blocks,
  400. input_data, output_data);
  401. }
  402. GLOBAL(int)
  403. jsimd_can_h2v2_upsample(void)
  404. {
  405. init_simd();
  406. /* The code is optimised for these values only */
  407. if (BITS_IN_JSAMPLE != 8)
  408. return 0;
  409. if (sizeof(JDIMENSION) != 4)
  410. return 0;
  411. if (simd_support & JSIMD_NEON)
  412. return 1;
  413. return 0;
  414. }
  415. GLOBAL(int)
  416. jsimd_can_h2v1_upsample(void)
  417. {
  418. init_simd();
  419. /* The code is optimised for these values only */
  420. if (BITS_IN_JSAMPLE != 8)
  421. return 0;
  422. if (sizeof(JDIMENSION) != 4)
  423. return 0;
  424. if (simd_support & JSIMD_NEON)
  425. return 1;
  426. return 0;
  427. }
  428. GLOBAL(void)
  429. jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  430. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  431. {
  432. jsimd_h2v2_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
  433. input_data, output_data_ptr);
  434. }
  435. GLOBAL(void)
  436. jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  437. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  438. {
  439. jsimd_h2v1_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
  440. input_data, output_data_ptr);
  441. }
  442. GLOBAL(int)
  443. jsimd_can_h2v2_fancy_upsample(void)
  444. {
  445. init_simd();
  446. /* The code is optimised for these values only */
  447. if (BITS_IN_JSAMPLE != 8)
  448. return 0;
  449. if (sizeof(JDIMENSION) != 4)
  450. return 0;
  451. if (simd_support & JSIMD_NEON)
  452. return 1;
  453. return 0;
  454. }
  455. GLOBAL(int)
  456. jsimd_can_h2v1_fancy_upsample(void)
  457. {
  458. init_simd();
  459. /* The code is optimised for these values only */
  460. if (BITS_IN_JSAMPLE != 8)
  461. return 0;
  462. if (sizeof(JDIMENSION) != 4)
  463. return 0;
  464. if (simd_support & JSIMD_NEON)
  465. return 1;
  466. return 0;
  467. }
  468. GLOBAL(int)
  469. jsimd_can_h1v2_fancy_upsample(void)
  470. {
  471. init_simd();
  472. /* The code is optimised for these values only */
  473. if (BITS_IN_JSAMPLE != 8)
  474. return 0;
  475. if (sizeof(JDIMENSION) != 4)
  476. return 0;
  477. if (simd_support & JSIMD_NEON)
  478. return 1;
  479. return 0;
  480. }
  481. GLOBAL(void)
  482. jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  483. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  484. {
  485. jsimd_h2v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
  486. compptr->downsampled_width, input_data,
  487. output_data_ptr);
  488. }
  489. GLOBAL(void)
  490. jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  491. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  492. {
  493. jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
  494. compptr->downsampled_width, input_data,
  495. output_data_ptr);
  496. }
  497. GLOBAL(void)
  498. jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  499. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  500. {
  501. jsimd_h1v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
  502. compptr->downsampled_width, input_data,
  503. output_data_ptr);
  504. }
  505. GLOBAL(int)
  506. jsimd_can_h2v2_merged_upsample(void)
  507. {
  508. init_simd();
  509. /* The code is optimised for these values only */
  510. if (BITS_IN_JSAMPLE != 8)
  511. return 0;
  512. if (sizeof(JDIMENSION) != 4)
  513. return 0;
  514. if (simd_support & JSIMD_NEON)
  515. return 1;
  516. return 0;
  517. }
  518. GLOBAL(int)
  519. jsimd_can_h2v1_merged_upsample(void)
  520. {
  521. init_simd();
  522. /* The code is optimised for these values only */
  523. if (BITS_IN_JSAMPLE != 8)
  524. return 0;
  525. if (sizeof(JDIMENSION) != 4)
  526. return 0;
  527. if (simd_support & JSIMD_NEON)
  528. return 1;
  529. return 0;
  530. }
  531. GLOBAL(void)
  532. jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  533. JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
  534. {
  535. void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  536. switch (cinfo->out_color_space) {
  537. case JCS_EXT_RGB:
  538. neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
  539. break;
  540. case JCS_EXT_RGBX:
  541. case JCS_EXT_RGBA:
  542. neonfct = jsimd_h2v2_extrgbx_merged_upsample_neon;
  543. break;
  544. case JCS_EXT_BGR:
  545. neonfct = jsimd_h2v2_extbgr_merged_upsample_neon;
  546. break;
  547. case JCS_EXT_BGRX:
  548. case JCS_EXT_BGRA:
  549. neonfct = jsimd_h2v2_extbgrx_merged_upsample_neon;
  550. break;
  551. case JCS_EXT_XBGR:
  552. case JCS_EXT_ABGR:
  553. neonfct = jsimd_h2v2_extxbgr_merged_upsample_neon;
  554. break;
  555. case JCS_EXT_XRGB:
  556. case JCS_EXT_ARGB:
  557. neonfct = jsimd_h2v2_extxrgb_merged_upsample_neon;
  558. break;
  559. default:
  560. neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
  561. break;
  562. }
  563. neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  564. }
  565. GLOBAL(void)
  566. jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  567. JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
  568. {
  569. void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  570. switch (cinfo->out_color_space) {
  571. case JCS_EXT_RGB:
  572. neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
  573. break;
  574. case JCS_EXT_RGBX:
  575. case JCS_EXT_RGBA:
  576. neonfct = jsimd_h2v1_extrgbx_merged_upsample_neon;
  577. break;
  578. case JCS_EXT_BGR:
  579. neonfct = jsimd_h2v1_extbgr_merged_upsample_neon;
  580. break;
  581. case JCS_EXT_BGRX:
  582. case JCS_EXT_BGRA:
  583. neonfct = jsimd_h2v1_extbgrx_merged_upsample_neon;
  584. break;
  585. case JCS_EXT_XBGR:
  586. case JCS_EXT_ABGR:
  587. neonfct = jsimd_h2v1_extxbgr_merged_upsample_neon;
  588. break;
  589. case JCS_EXT_XRGB:
  590. case JCS_EXT_ARGB:
  591. neonfct = jsimd_h2v1_extxrgb_merged_upsample_neon;
  592. break;
  593. default:
  594. neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
  595. break;
  596. }
  597. neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  598. }
  599. GLOBAL(int)
  600. jsimd_can_convsamp(void)
  601. {
  602. init_simd();
  603. /* The code is optimised for these values only */
  604. if (DCTSIZE != 8)
  605. return 0;
  606. if (BITS_IN_JSAMPLE != 8)
  607. return 0;
  608. if (sizeof(JDIMENSION) != 4)
  609. return 0;
  610. if (sizeof(DCTELEM) != 2)
  611. return 0;
  612. if (simd_support & JSIMD_NEON)
  613. return 1;
  614. return 0;
  615. }
  616. GLOBAL(int)
  617. jsimd_can_convsamp_float(void)
  618. {
  619. return 0;
  620. }
  621. GLOBAL(void)
  622. jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
  623. DCTELEM *workspace)
  624. {
  625. jsimd_convsamp_neon(sample_data, start_col, workspace);
  626. }
  627. GLOBAL(void)
  628. jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
  629. FAST_FLOAT *workspace)
  630. {
  631. }
  632. GLOBAL(int)
  633. jsimd_can_fdct_islow(void)
  634. {
  635. init_simd();
  636. /* The code is optimised for these values only */
  637. if (DCTSIZE != 8)
  638. return 0;
  639. if (sizeof(DCTELEM) != 2)
  640. return 0;
  641. if (simd_support & JSIMD_NEON)
  642. return 1;
  643. return 0;
  644. }
  645. GLOBAL(int)
  646. jsimd_can_fdct_ifast(void)
  647. {
  648. init_simd();
  649. /* The code is optimised for these values only */
  650. if (DCTSIZE != 8)
  651. return 0;
  652. if (sizeof(DCTELEM) != 2)
  653. return 0;
  654. if (simd_support & JSIMD_NEON)
  655. return 1;
  656. return 0;
  657. }
  658. GLOBAL(int)
  659. jsimd_can_fdct_float(void)
  660. {
  661. return 0;
  662. }
  663. GLOBAL(void)
  664. jsimd_fdct_islow(DCTELEM *data)
  665. {
  666. jsimd_fdct_islow_neon(data);
  667. }
  668. GLOBAL(void)
  669. jsimd_fdct_ifast(DCTELEM *data)
  670. {
  671. jsimd_fdct_ifast_neon(data);
  672. }
  673. GLOBAL(void)
  674. jsimd_fdct_float(FAST_FLOAT *data)
  675. {
  676. }
  677. GLOBAL(int)
  678. jsimd_can_quantize(void)
  679. {
  680. init_simd();
  681. /* The code is optimised for these values only */
  682. if (DCTSIZE != 8)
  683. return 0;
  684. if (sizeof(JCOEF) != 2)
  685. return 0;
  686. if (sizeof(DCTELEM) != 2)
  687. return 0;
  688. if (simd_support & JSIMD_NEON)
  689. return 1;
  690. return 0;
  691. }
  692. GLOBAL(int)
  693. jsimd_can_quantize_float(void)
  694. {
  695. return 0;
  696. }
  697. GLOBAL(void)
  698. jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
  699. {
  700. jsimd_quantize_neon(coef_block, divisors, workspace);
  701. }
  702. GLOBAL(void)
  703. jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
  704. FAST_FLOAT *workspace)
  705. {
  706. }
  707. GLOBAL(int)
  708. jsimd_can_idct_2x2(void)
  709. {
  710. init_simd();
  711. /* The code is optimised for these values only */
  712. if (DCTSIZE != 8)
  713. return 0;
  714. if (sizeof(JCOEF) != 2)
  715. return 0;
  716. if (BITS_IN_JSAMPLE != 8)
  717. return 0;
  718. if (sizeof(JDIMENSION) != 4)
  719. return 0;
  720. if (sizeof(ISLOW_MULT_TYPE) != 2)
  721. return 0;
  722. if (simd_support & JSIMD_NEON)
  723. return 1;
  724. return 0;
  725. }
  726. GLOBAL(int)
  727. jsimd_can_idct_4x4(void)
  728. {
  729. init_simd();
  730. /* The code is optimised for these values only */
  731. if (DCTSIZE != 8)
  732. return 0;
  733. if (sizeof(JCOEF) != 2)
  734. return 0;
  735. if (BITS_IN_JSAMPLE != 8)
  736. return 0;
  737. if (sizeof(JDIMENSION) != 4)
  738. return 0;
  739. if (sizeof(ISLOW_MULT_TYPE) != 2)
  740. return 0;
  741. if (simd_support & JSIMD_NEON)
  742. return 1;
  743. return 0;
  744. }
  745. GLOBAL(void)
  746. jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  747. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  748. JDIMENSION output_col)
  749. {
  750. jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
  751. }
  752. GLOBAL(void)
  753. jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  754. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  755. JDIMENSION output_col)
  756. {
  757. jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
  758. }
  759. GLOBAL(int)
  760. jsimd_can_idct_islow(void)
  761. {
  762. init_simd();
  763. /* The code is optimised for these values only */
  764. if (DCTSIZE != 8)
  765. return 0;
  766. if (sizeof(JCOEF) != 2)
  767. return 0;
  768. if (BITS_IN_JSAMPLE != 8)
  769. return 0;
  770. if (sizeof(JDIMENSION) != 4)
  771. return 0;
  772. if (sizeof(ISLOW_MULT_TYPE) != 2)
  773. return 0;
  774. if (simd_support & JSIMD_NEON)
  775. return 1;
  776. return 0;
  777. }
  778. GLOBAL(int)
  779. jsimd_can_idct_ifast(void)
  780. {
  781. init_simd();
  782. /* The code is optimised for these values only */
  783. if (DCTSIZE != 8)
  784. return 0;
  785. if (sizeof(JCOEF) != 2)
  786. return 0;
  787. if (BITS_IN_JSAMPLE != 8)
  788. return 0;
  789. if (sizeof(JDIMENSION) != 4)
  790. return 0;
  791. if (sizeof(IFAST_MULT_TYPE) != 2)
  792. return 0;
  793. if (IFAST_SCALE_BITS != 2)
  794. return 0;
  795. if (simd_support & JSIMD_NEON)
  796. return 1;
  797. return 0;
  798. }
  799. GLOBAL(int)
  800. jsimd_can_idct_float(void)
  801. {
  802. return 0;
  803. }
  804. GLOBAL(void)
  805. jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  806. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  807. JDIMENSION output_col)
  808. {
  809. jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
  810. output_col);
  811. }
  812. GLOBAL(void)
  813. jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  814. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  815. JDIMENSION output_col)
  816. {
  817. jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
  818. output_col);
  819. }
  820. GLOBAL(void)
  821. jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  822. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  823. JDIMENSION output_col)
  824. {
  825. }
  826. GLOBAL(int)
  827. jsimd_can_huff_encode_one_block(void)
  828. {
  829. init_simd();
  830. if (DCTSIZE != 8)
  831. return 0;
  832. if (sizeof(JCOEF) != 2)
  833. return 0;
  834. if (simd_support & JSIMD_NEON && simd_huffman)
  835. return 1;
  836. return 0;
  837. }
  838. GLOBAL(JOCTET *)
  839. jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
  840. int last_dc_val, c_derived_tbl *dctbl,
  841. c_derived_tbl *actbl)
  842. {
  843. #ifndef NEON_INTRINSICS
  844. if (simd_features & JSIMD_FASTTBL)
  845. #endif
  846. return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val,
  847. dctbl, actbl);
  848. #ifndef NEON_INTRINSICS
  849. else
  850. return jsimd_huff_encode_one_block_neon_slowtbl(state, buffer, block,
  851. last_dc_val, dctbl, actbl);
  852. #endif
  853. }
  854. GLOBAL(int)
  855. jsimd_can_encode_mcu_AC_first_prepare(void)
  856. {
  857. init_simd();
  858. if (DCTSIZE != 8)
  859. return 0;
  860. if (sizeof(JCOEF) != 2)
  861. return 0;
  862. if (SIZEOF_SIZE_T != 8)
  863. return 0;
  864. if (simd_support & JSIMD_NEON)
  865. return 1;
  866. return 0;
  867. }
  868. GLOBAL(void)
  869. jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
  870. const int *jpeg_natural_order_start, int Sl,
  871. int Al, JCOEF *values, size_t *zerobits)
  872. {
  873. jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
  874. Sl, Al, values, zerobits);
  875. }
  876. GLOBAL(int)
  877. jsimd_can_encode_mcu_AC_refine_prepare(void)
  878. {
  879. init_simd();
  880. if (DCTSIZE != 8)
  881. return 0;
  882. if (sizeof(JCOEF) != 2)
  883. return 0;
  884. if (SIZEOF_SIZE_T != 8)
  885. return 0;
  886. if (simd_support & JSIMD_NEON)
  887. return 1;
  888. return 0;
  889. }
  890. GLOBAL(int)
  891. jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
  892. const int *jpeg_natural_order_start, int Sl,
  893. int Al, JCOEF *absvalues, size_t *bits)
  894. {
  895. return jsimd_encode_mcu_AC_refine_prepare_neon(block,
  896. jpeg_natural_order_start,
  897. Sl, Al, absvalues, bits);
  898. }