ec_base_vsx.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. #ifndef _ERASURE_CODE_PPC64LE_H_
  2. #define _ERASURE_CODE_PPC64LE_H_
  3. #include "erasure_code.h"
  4. #include <altivec.h>
  5. #ifdef __cplusplus
  6. extern "C" {
  7. #endif
  8. #if defined(__ibmxl__)
  9. #define EC_vec_xl(a, b) vec_xl_be(a, b)
  10. #define EC_vec_permxor(va, vb, vc) __vpermxor(va, vb, vc)
  11. #elif defined __GNUC__ && __GNUC__ >= 8
  12. #define EC_vec_xl(a, b) vec_xl_be(a, b)
  13. #define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vc)
  14. #elif defined __GNUC__ && __GNUC__ >= 7
  15. #if defined _ARCH_PWR9
  16. #define EC_vec_xl(a, b) vec_vsx_ld(a, b)
  17. #define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
  18. #else
  19. inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) {
  20. vector unsigned char vc;
  21. __asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0" : "=wa" (vc) : "r" (off), "r" (ptr));
  22. return vc;
  23. }
  24. #define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
  25. #endif
  26. #else
  27. #if defined _ARCH_PWR8
  28. inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) {
  29. vector unsigned char vc;
  30. __asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0" : "=wa" (vc) : "r" (off), "r" (ptr));
  31. return vc;
  32. }
  33. #define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
  34. #else
  35. #error "This code is only supported on ppc64le."
  36. #endif
  37. #endif
  38. /**
  39. * @brief GF(2^8) vector multiply. VSX version.
  40. *
  41. * Does a GF(2^8) multiply across each byte of input source with expanded
  42. * constant and save to destination array. Can be used for erasure coding encode
  43. * and decode update when only one source is available at a time. Function
  44. * requires pre-calculation of a 32 byte constant array based on the input
  45. * coefficients.
  46. * @requires VSX
  47. *
  48. * @param len Length of each vector in bytes.
  49. * @param gftbls Pointer to array of input tables generated from coding
  50. * coefficients in ec_init_tables(). Must be of size 32.
  51. * @param src Array of pointers to source inputs.
  52. * @param dest Pointer to destination data array.
  53. * @returns none
  54. */
  55. void gf_vect_mul_vsx(int len, unsigned char *gftbls, unsigned char *src, unsigned char *dest);
  56. /**
  57. * @brief GF(2^8) vector dot product. VSX version.
  58. *
  59. * Does a GF(2^8) dot product across each byte of the input array and a constant
  60. * set of coefficients to produce each byte of the output. Can be used for
  61. * erasure coding encode and decode. Function requires pre-calculation of a
  62. * 32*vlen byte constant array based on the input coefficients.
  63. * @requires VSX
  64. *
  65. * @param len Length of each vector in bytes.
  66. * @param vlen Number of vector sources.
  67. * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
  68. * on the array of input coefficients.
  69. * @param src Array of pointers to source inputs.
  70. * @param dest Pointer to destination data array.
  71. * @returns none
  72. */
  73. void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
  74. unsigned char **src, unsigned char *dest);
  75. /**
  76. * @brief GF(2^8) vector dot product with two outputs. VSX version.
  77. *
  78. * Vector dot product optimized to calculate two outputs at a time. Does two
  79. * GF(2^8) dot products across each byte of the input array and two constant
  80. * sets of coefficients to produce each byte of the outputs. Can be used for
  81. * erasure coding encode and decode. Function requires pre-calculation of a
  82. * 2*32*vlen byte constant array based on the two sets of input coefficients.
  83. * @requires VSX
  84. *
  85. * @param len Length of each vector in bytes.
  86. * @param vlen Number of vector sources.
  87. * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
  88. * based on the array of input coefficients.
  89. * @param src Array of pointers to source inputs.
  90. * @param dest Array of pointers to destination data buffers.
  91. * @returns none
  92. */
  93. void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
  94. unsigned char **src, unsigned char **dest);
  95. /**
  96. * @brief GF(2^8) vector dot product with three outputs. VSX version.
  97. *
  98. * Vector dot product optimized to calculate three outputs at a time. Does three
  99. * GF(2^8) dot products across each byte of the input array and three constant
  100. * sets of coefficients to produce each byte of the outputs. Can be used for
  101. * erasure coding encode and decode. Function requires pre-calculation of a
  102. * 3*32*vlen byte constant array based on the three sets of input coefficients.
  103. * @requires VSX
  104. *
  105. * @param len Length of each vector in bytes.
  106. * @param vlen Number of vector sources.
  107. * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
  108. * based on the array of input coefficients.
  109. * @param src Array of pointers to source inputs.
  110. * @param dest Array of pointers to destination data buffers.
  111. * @returns none
  112. */
  113. void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
  114. unsigned char **src, unsigned char **dest);
  115. /**
  116. * @brief GF(2^8) vector dot product with four outputs. VSX version.
  117. *
  118. * Vector dot product optimized to calculate four outputs at a time. Does four
  119. * GF(2^8) dot products across each byte of the input array and four constant
  120. * sets of coefficients to produce each byte of the outputs. Can be used for
  121. * erasure coding encode and decode. Function requires pre-calculation of a
  122. * 4*32*vlen byte constant array based on the four sets of input coefficients.
  123. * @requires VSX
  124. *
  125. * @param len Length of each vector in bytes.
  126. * @param vlen Number of vector sources.
  127. * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
  128. * based on the array of input coefficients.
  129. * @param src Array of pointers to source inputs.
  130. * @param dest Array of pointers to destination data buffers.
  131. * @returns none
  132. */
  133. void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
  134. unsigned char **src, unsigned char **dest);
  135. /**
  136. * @brief GF(2^8) vector dot product with five outputs. VSX version.
  137. *
  138. * Vector dot product optimized to calculate five outputs at a time. Does five
  139. * GF(2^8) dot products across each byte of the input array and five constant
  140. * sets of coefficients to produce each byte of the outputs. Can be used for
  141. * erasure coding encode and decode. Function requires pre-calculation of a
  142. * 5*32*vlen byte constant array based on the five sets of input coefficients.
  143. * @requires VSX
  144. *
  145. * @param len Length of each vector in bytes. Must >= 16.
  146. * @param vlen Number of vector sources.
  147. * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
  148. * based on the array of input coefficients.
  149. * @param src Array of pointers to source inputs.
  150. * @param dest Array of pointers to destination data buffers.
  151. * @returns none
  152. */
  153. void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
  154. unsigned char **src, unsigned char **dest);
  155. /**
  156. * @brief GF(2^8) vector dot product with six outputs. VSX version.
  157. *
  158. * Vector dot product optimized to calculate six outputs at a time. Does six
  159. * GF(2^8) dot products across each byte of the input array and six constant
  160. * sets of coefficients to produce each byte of the outputs. Can be used for
  161. * erasure coding encode and decode. Function requires pre-calculation of a
  162. * 6*32*vlen byte constant array based on the six sets of input coefficients.
  163. * @requires VSX
  164. *
  165. * @param len Length of each vector in bytes.
  166. * @param vlen Number of vector sources.
  167. * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
  168. * based on the array of input coefficients.
  169. * @param src Array of pointers to source inputs.
  170. * @param dest Array of pointers to destination data buffers.
  171. * @returns none
  172. */
  173. void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
  174. unsigned char **src, unsigned char **dest);
  175. /**
  176. * @brief GF(2^8) vector multiply accumulate. VSX version.
  177. *
  178. * Does a GF(2^8) multiply across each byte of input source with expanded
  179. * constant and add to destination array. Can be used for erasure coding encode
  180. * and decode update when only one source is available at a time. Function
  181. * requires pre-calculation of a 32*vec byte constant array based on the input
  182. * coefficients.
  183. * @requires VSX
  184. *
  185. * @param len Length of each vector in bytes.
  186. * @param vec The number of vector sources or rows in the generator matrix
  187. * for coding.
  188. * @param vec_i The vector index corresponding to the single input source.
  189. * @param gftbls Pointer to array of input tables generated from coding
  190. * coefficients in ec_init_tables(). Must be of size 32*vec.
  191. * @param src Array of pointers to source inputs.
  192. * @param dest Pointer to destination data array.
  193. * @returns none
  194. */
  195. void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  196. unsigned char *dest);
  197. /**
  198. * @brief GF(2^8) vector multiply with 2 accumulate. VSX version.
  199. *
  200. * Does a GF(2^8) multiply across each byte of input source with expanded
  201. * constants and add to destination arrays. Can be used for erasure coding
  202. * encode and decode update when only one source is available at a
  203. * time. Function requires pre-calculation of a 32*vec byte constant array based
  204. * on the input coefficients.
  205. * @requires VSX
  206. *
  207. * @param len Length of each vector in bytes.
  208. * @param vec The number of vector sources or rows in the generator matrix
  209. * for coding.
  210. * @param vec_i The vector index corresponding to the single input source.
  211. * @param gftbls Pointer to array of input tables generated from coding
  212. * coefficients in ec_init_tables(). Must be of size 32*vec.
  213. * @param src Pointer to source input array.
  214. * @param dest Array of pointers to destination input/outputs.
  215. * @returns none
  216. */
  217. void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  218. unsigned char **dest);
  219. /**
  220. * @brief GF(2^8) vector multiply with 3 accumulate. VSX version.
  221. *
  222. * Does a GF(2^8) multiply across each byte of input source with expanded
  223. * constants and add to destination arrays. Can be used for erasure coding
  224. * encode and decode update when only one source is available at a
  225. * time. Function requires pre-calculation of a 32*vec byte constant array based
  226. * on the input coefficients.
  227. * @requires VSX
  228. *
  229. * @param len Length of each vector in bytes.
  230. * @param vec The number of vector sources or rows in the generator matrix
  231. * for coding.
  232. * @param vec_i The vector index corresponding to the single input source.
  233. * @param gftbls Pointer to array of input tables generated from coding
  234. * coefficients in ec_init_tables(). Must be of size 32*vec.
  235. * @param src Pointer to source input array.
  236. * @param dest Array of pointers to destination input/outputs.
  237. * @returns none
  238. */
  239. void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  240. unsigned char **dest);
  241. /**
  242. * @brief GF(2^8) vector multiply with 4 accumulate. VSX version.
  243. *
  244. * Does a GF(2^8) multiply across each byte of input source with expanded
  245. * constants and add to destination arrays. Can be used for erasure coding
  246. * encode and decode update when only one source is available at a
  247. * time. Function requires pre-calculation of a 32*vec byte constant array based
  248. * on the input coefficients.
  249. * @requires VSX
  250. *
  251. * @param len Length of each vector in bytes.
  252. * @param vec The number of vector sources or rows in the generator matrix
  253. * for coding.
  254. * @param vec_i The vector index corresponding to the single input source.
  255. * @param gftbls Pointer to array of input tables generated from coding
  256. * coefficients in ec_init_tables(). Must be of size 32*vec.
  257. * @param src Pointer to source input array.
  258. * @param dest Array of pointers to destination input/outputs.
  259. * @returns none
  260. */
  261. void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  262. unsigned char **dest);
  263. /**
  264. * @brief GF(2^8) vector multiply with 5 accumulate. VSX version.
  265. *
  266. * Does a GF(2^8) multiply across each byte of input source with expanded
  267. * constants and add to destination arrays. Can be used for erasure coding
  268. * encode and decode update when only one source is available at a
  269. * time. Function requires pre-calculation of a 32*vec byte constant array based
  270. * on the input coefficients.
  271. * @requires VSX
  272. *
  273. * @param len Length of each vector in bytes.
  274. * @param vec The number of vector sources or rows in the generator matrix
  275. * for coding.
  276. * @param vec_i The vector index corresponding to the single input source.
  277. * @param gftbls Pointer to array of input tables generated from coding
  278. * coefficients in ec_init_tables(). Must be of size 32*vec.
  279. * @param src Pointer to source input array.
  280. * @param dest Array of pointers to destination input/outputs.
  281. * @returns none
  282. */
  283. void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  284. unsigned char **dest);
  285. /**
  286. * @brief GF(2^8) vector multiply with 6 accumulate. VSX version.
  287. *
  288. * Does a GF(2^8) multiply across each byte of input source with expanded
  289. * constants and add to destination arrays. Can be used for erasure coding
  290. * encode and decode update when only one source is available at a
  291. * time. Function requires pre-calculation of a 32*vec byte constant array based
  292. * on the input coefficients.
  293. * @requires VSX
  294. *
  295. * @param len Length of each vector in bytes.
  296. * @param vec The number of vector sources or rows in the generator matrix
  297. * for coding.
  298. * @param vec_i The vector index corresponding to the single input source.
  299. * @param gftbls Pointer to array of input tables generated from coding
  300. * coefficients in ec_init_tables(). Must be of size 32*vec.
  301. * @param src Pointer to source input array.
  302. * @param dest Array of pointers to destination input/outputs.
  303. * @returns none
  304. */
  305. void gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  306. unsigned char **dest);
  307. #ifdef __cplusplus
  308. }
  309. #endif
  310. #endif //_ERASURE_CODE_PPC64LE_H_