erasure_code.h 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955
  1. /**********************************************************************
  2. Copyright(c) 2011-2015 Intel Corporation All rights reserved.
  3. Redistribution and use in source and binary forms, with or without
  4. modification, are permitted provided that the following conditions
  5. are met:
  6. * Redistributions of source code must retain the above copyright
  7. notice, this list of conditions and the following disclaimer.
  8. * Redistributions in binary form must reproduce the above copyright
  9. notice, this list of conditions and the following disclaimer in
  10. the documentation and/or other materials provided with the
  11. distribution.
  12. * Neither the name of Intel Corporation nor the names of its
  13. contributors may be used to endorse or promote products derived
  14. from this software without specific prior written permission.
  15. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  16. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  17. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  18. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  19. OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  20. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  21. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  25. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. **********************************************************************/
  27. #ifndef _ERASURE_CODE_H_
  28. #define _ERASURE_CODE_H_
  29. /**
  30. * @file erasure_code.h
  31. * @brief Interface to functions supporting erasure code encode and decode.
  32. *
  33. * This file defines the interface to optimized functions used in erasure
  34. * codes. Encode and decode of erasures in GF(2^8) are made by calculating the
  35. * dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a
  36. * set of coefficients. Values for the coefficients are determined by the type
  37. * of erasure code. Using a general dot product means that any sequence of
  38. * coefficients may be used including erasure codes based on random
  39. * coefficients.
  40. * Multiple versions of dot product are supplied to calculate 1-6 output
  41. * vectors in one pass.
  42. * Base GF multiply and divide functions can be sped up by defining
  43. * GF_LARGE_TABLES at the expense of memory size.
  44. *
  45. */
  46. #include "gf_vect_mul.h"
  47. #ifdef __cplusplus
  48. extern "C" {
  49. #endif
  50. /**
  51. * @brief Initialize tables for fast Erasure Code encode and decode.
  52. *
  53. * Generates the expanded tables needed for fast encode or decode for erasure
  54. * codes on blocks of data. 32bytes is generated for each input coefficient.
  55. *
  56. * @param k The number of vector sources or rows in the generator matrix
  57. * for coding.
  58. * @param rows The number of output vectors to concurrently encode/decode.
  59. * @param a Pointer to sets of arrays of input coefficients used to encode
  60. * or decode data.
  61. * @param gftbls Pointer to start of space for concatenated output tables
  62. * generated from input coefficients. Must be of size 32*k*rows.
  63. * @returns none
  64. */
  65. void ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls);
  66. /**
  67. * @brief Initialize tables for fast Erasure Code encode and decode, runs baseline version.
  68. *
  69. * Baseline version of ec_encode_data() with same parameters.
  70. */
  71. void ec_init_tables_base(int k, int rows, unsigned char* a, unsigned char* gftbls);
  72. /**
  73. * @brief Generate or decode erasure codes on blocks of data, runs appropriate version.
  74. *
  75. * Given a list of source data blocks, generate one or multiple blocks of
  76. * encoded data as specified by a matrix of GF(2^8) coefficients. When given a
  77. * suitable set of coefficients, this function will perform the fast generation
  78. * or decoding of Reed-Solomon type erasure codes.
  79. *
  80. * This function determines what instruction sets are enabled and
  81. * selects the appropriate version at runtime.
  82. *
  83. * @param len Length of each block of data (vector) of source or dest data.
  84. * @param k The number of vector sources or rows in the generator matrix
  85. * for coding.
  86. * @param rows The number of output vectors to concurrently encode/decode.
  87. * @param gftbls Pointer to array of input tables generated from coding
  88. * coefficients in ec_init_tables(). Must be of size 32*k*rows
  89. * @param data Array of pointers to source input buffers.
  90. * @param coding Array of pointers to coded output buffers.
  91. * @returns none
  92. */
  93. void ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
  94. unsigned char **coding);
  95. /**
  96. * @brief Generate or decode erasure codes on blocks of data, runs baseline version.
  97. *
  98. * Baseline version of ec_encode_data() with same parameters.
  99. */
  100. void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
  101. unsigned char **dest);
  102. /**
  103. * @brief Generate update for encode or decode of erasure codes from single source, runs appropriate version.
  104. *
  105. * Given one source data block, update one or multiple blocks of encoded data as
  106. * specified by a matrix of GF(2^8) coefficients. When given a suitable set of
  107. * coefficients, this function will perform the fast generation or decoding of
  108. * Reed-Solomon type erasure codes from one input source at a time.
  109. *
  110. * This function determines what instruction sets are enabled and selects the
  111. * appropriate version at runtime.
  112. *
  113. * @param len Length of each block of data (vector) of source or dest data.
  114. * @param k The number of vector sources or rows in the generator matrix
  115. * for coding.
  116. * @param rows The number of output vectors to concurrently encode/decode.
  117. * @param vec_i The vector index corresponding to the single input source.
  118. * @param g_tbls Pointer to array of input tables generated from coding
  119. * coefficients in ec_init_tables(). Must be of size 32*k*rows
  120. * @param data Pointer to single input source used to update output parity.
  121. * @param coding Array of pointers to coded output buffers.
  122. * @returns none
  123. */
  124. void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
  125. unsigned char *data, unsigned char **coding);
  126. /**
  127. * @brief Generate update for encode or decode of erasure codes from single source.
  128. *
  129. * Baseline version of ec_encode_data_update().
  130. */
  131. void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
  132. unsigned char *data, unsigned char **dest);
  133. /**
  134. * @brief GF(2^8) vector dot product, runs baseline version.
  135. *
  136. * Does a GF(2^8) dot product across each byte of the input array and a constant
  137. * set of coefficients to produce each byte of the output. Can be used for
  138. * erasure coding encode and decode. Function requires pre-calculation of a
  139. * 32*vlen byte constant array based on the input coefficients.
  140. *
  141. * @param len Length of each vector in bytes. Must be >= 16.
  142. * @param vlen Number of vector sources.
  143. * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
  144. * on the array of input coefficients. Only elements 32*CONST*j + 1
  145. * of this array are used, where j = (0, 1, 2...) and CONST is the
  146. * number of elements in the array of input coefficients. The
  147. * elements used correspond to the original input coefficients.
  148. * @param src Array of pointers to source inputs.
  149. * @param dest Pointer to destination data array.
  150. * @returns none
  151. */
  152. void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls,
  153. unsigned char **src, unsigned char *dest);
  154. /**
  155. * @brief GF(2^8) vector dot product, runs appropriate version.
  156. *
  157. * Does a GF(2^8) dot product across each byte of the input array and a constant
  158. * set of coefficients to produce each byte of the output. Can be used for
  159. * erasure coding encode and decode. Function requires pre-calculation of a
  160. * 32*vlen byte constant array based on the input coefficients.
  161. *
  162. * This function determines what instruction sets are enabled and
  163. * selects the appropriate version at runtime.
  164. *
  165. * @param len Length of each vector in bytes. Must be >= 32.
  166. * @param vlen Number of vector sources.
  167. * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
  168. * on the array of input coefficients.
  169. * @param src Array of pointers to source inputs.
  170. * @param dest Pointer to destination data array.
  171. * @returns none
  172. */
  173. void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls,
  174. unsigned char **src, unsigned char *dest);
  175. /**
  176. * @brief GF(2^8) vector multiply accumulate, runs appropriate version.
  177. *
  178. * Does a GF(2^8) multiply across each byte of input source with expanded
  179. * constant and add to destination array. Can be used for erasure coding encode
  180. * and decode update when only one source is available at a time. Function
  181. * requires pre-calculation of a 32*vec byte constant array based on the input
  182. * coefficients.
  183. *
  184. * This function determines what instruction sets are enabled and selects the
  185. * appropriate version at runtime.
  186. *
  187. * @param len Length of each vector in bytes. Must be >= 64.
  188. * @param vec The number of vector sources or rows in the generator matrix
  189. * for coding.
  190. * @param vec_i The vector index corresponding to the single input source.
  191. * @param gftbls Pointer to array of input tables generated from coding
  192. * coefficients in ec_init_tables(). Must be of size 32*vec.
  193. * @param src Array of pointers to source inputs.
  194. * @param dest Pointer to destination data array.
  195. * @returns none
  196. */
  197. void gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  198. unsigned char *dest);
  199. /**
  200. * @brief GF(2^8) vector multiply accumulate, baseline version.
  201. *
  202. * Baseline version of gf_vect_mad() with same parameters.
  203. */
  204. void gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
  205. unsigned char *dest);
  206. // x86 only
  207. #if defined(__i386__) || defined(__x86_64__)
  208. /**
  209. * @brief Generate or decode erasure codes on blocks of data.
  210. *
  211. * Arch specific version of ec_encode_data() with same parameters.
  212. * @requires SSE4.1
  213. */
  214. void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
  215. unsigned char **coding);
  216. /**
  217. * @brief Generate or decode erasure codes on blocks of data.
  218. *
  219. * Arch specific version of ec_encode_data() with same parameters.
  220. * @requires AVX
  221. */
  222. void ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
  223. unsigned char **coding);
  224. /**
  225. * @brief Generate or decode erasure codes on blocks of data.
  226. *
  227. * Arch specific version of ec_encode_data() with same parameters.
  228. * @requires AVX2
  229. */
  230. void ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
  231. unsigned char **coding);
  232. /**
  233. * @brief Generate update for encode or decode of erasure codes from single source.
  234. *
  235. * Arch specific version of ec_encode_data_update() with same parameters.
  236. * @requires SSE4.1
  237. */
  238. void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
  239. unsigned char *data, unsigned char **coding);
  240. /**
  241. * @brief Generate update for encode or decode of erasure codes from single source.
  242. *
  243. * Arch specific version of ec_encode_data_update() with same parameters.
  244. * @requires AVX
  245. */
  246. void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
  247. unsigned char *data, unsigned char **coding);
  248. /**
  249. * @brief Generate update for encode or decode of erasure codes from single source.
  250. *
  251. * Arch specific version of ec_encode_data_update() with same parameters.
  252. * @requires AVX2
  253. */
  254. void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
  255. unsigned char *data, unsigned char **coding);
  256. /**
  257. * @brief GF(2^8) vector dot product.
  258. *
  259. * Does a GF(2^8) dot product across each byte of the input array and a constant
  260. * set of coefficients to produce each byte of the output. Can be used for
  261. * erasure coding encode and decode. Function requires pre-calculation of a
  262. * 32*vlen byte constant array based on the input coefficients.
  263. * @requires SSE4.1
  264. *
  265. * @param len Length of each vector in bytes. Must be >= 16.
  266. * @param vlen Number of vector sources.
  267. * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
  268. * on the array of input coefficients.
  269. * @param src Array of pointers to source inputs.
  270. * @param dest Pointer to destination data array.
  271. * @returns none
  272. */
  273. void gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
  274. unsigned char **src, unsigned char *dest);
  275. /**
  276. * @brief GF(2^8) vector dot product.
  277. *
  278. * Does a GF(2^8) dot product across each byte of the input array and a constant
  279. * set of coefficients to produce each byte of the output. Can be used for
  280. * erasure coding encode and decode. Function requires pre-calculation of a
  281. * 32*vlen byte constant array based on the input coefficients.
  282. * @requires AVX
  283. *
  284. * @param len Length of each vector in bytes. Must be >= 16.
  285. * @param vlen Number of vector sources.
  286. * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
  287. * on the array of input coefficients.
  288. * @param src Array of pointers to source inputs.
  289. * @param dest Pointer to destination data array.
  290. * @returns none
  291. */
  292. void gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
  293. unsigned char **src, unsigned char *dest);
  294. /**
  295. * @brief GF(2^8) vector dot product.
  296. *
  297. * Does a GF(2^8) dot product across each byte of the input array and a constant
  298. * set of coefficients to produce each byte of the output. Can be used for
  299. * erasure coding encode and decode. Function requires pre-calculation of a
  300. * 32*vlen byte constant array based on the input coefficients.
  301. * @requires AVX2
  302. *
  303. * @param len Length of each vector in bytes. Must be >= 32.
  304. * @param vlen Number of vector sources.
  305. * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
  306. * on the array of input coefficients.
  307. * @param src Array of pointers to source inputs.
  308. * @param dest Pointer to destination data array.
  309. * @returns none
  310. */
  311. void gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
  312. unsigned char **src, unsigned char *dest);
  313. /**
  314. * @brief GF(2^8) vector dot product with two outputs.
  315. *
  316. * Vector dot product optimized to calculate two outputs at a time. Does two
  317. * GF(2^8) dot products across each byte of the input array and two constant
  318. * sets of coefficients to produce each byte of the outputs. Can be used for
  319. * erasure coding encode and decode. Function requires pre-calculation of a
  320. * 2*32*vlen byte constant array based on the two sets of input coefficients.
  321. * @requires SSE4.1
  322. *
  323. * @param len Length of each vector in bytes. Must be >= 16.
  324. * @param vlen Number of vector sources.
  325. * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
  326. * based on the array of input coefficients.
  327. * @param src Array of pointers to source inputs.
  328. * @param dest Array of pointers to destination data buffers.
  329. * @returns none
  330. */
  331. void gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
  332. unsigned char **src, unsigned char **dest);
  333. /**
  334. * @brief GF(2^8) vector dot product with two outputs.
  335. *
  336. * Vector dot product optimized to calculate two outputs at a time. Does two
  337. * GF(2^8) dot products across each byte of the input array and two constant
  338. * sets of coefficients to produce each byte of the outputs. Can be used for
  339. * erasure coding encode and decode. Function requires pre-calculation of a
  340. * 2*32*vlen byte constant array based on the two sets of input coefficients.
  341. * @requires AVX
  342. *
  343. * @param len Length of each vector in bytes. Must be >= 16.
  344. * @param vlen Number of vector sources.
  345. * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
  346. * based on the array of input coefficients.
  347. * @param src Array of pointers to source inputs.
  348. * @param dest Array of pointers to destination data buffers.
  349. * @returns none
  350. */
  351. void gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
  352. unsigned char **src, unsigned char **dest);
  353. /**
  354. * @brief GF(2^8) vector dot product with two outputs.
  355. *
  356. * Vector dot product optimized to calculate two outputs at a time. Does two
  357. * GF(2^8) dot products across each byte of the input array and two constant
  358. * sets of coefficients to produce each byte of the outputs. Can be used for
  359. * erasure coding encode and decode. Function requires pre-calculation of a
  360. * 2*32*vlen byte constant array based on the two sets of input coefficients.
  361. * @requires AVX2
  362. *
  363. * @param len Length of each vector in bytes. Must be >= 32.
  364. * @param vlen Number of vector sources.
  365. * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
  366. * based on the array of input coefficients.
  367. * @param src Array of pointers to source inputs.
  368. * @param dest Array of pointers to destination data buffers.
  369. * @returns none
  370. */
  371. void gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
  372. unsigned char **src, unsigned char **dest);
  373. /**
  374. * @brief GF(2^8) vector dot product with three outputs.
  375. *
  376. * Vector dot product optimized to calculate three outputs at a time. Does three
  377. * GF(2^8) dot products across each byte of the input array and three constant
  378. * sets of coefficients to produce each byte of the outputs. Can be used for
  379. * erasure coding encode and decode. Function requires pre-calculation of a
  380. * 3*32*vlen byte constant array based on the three sets of input coefficients.
  381. * @requires SSE4.1
  382. *
  383. * @param len Length of each vector in bytes. Must be >= 16.
  384. * @param vlen Number of vector sources.
  385. * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
  386. * based on the array of input coefficients.
  387. * @param src Array of pointers to source inputs.
  388. * @param dest Array of pointers to destination data buffers.
  389. * @returns none
  390. */
  391. void gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
  392. unsigned char **src, unsigned char **dest);
  393. /**
  394. * @brief GF(2^8) vector dot product with three outputs.
  395. *
  396. * Vector dot product optimized to calculate three outputs at a time. Does three
  397. * GF(2^8) dot products across each byte of the input array and three constant
  398. * sets of coefficients to produce each byte of the outputs. Can be used for
  399. * erasure coding encode and decode. Function requires pre-calculation of a
  400. * 3*32*vlen byte constant array based on the three sets of input coefficients.
  401. * @requires AVX
  402. *
  403. * @param len Length of each vector in bytes. Must be >= 16.
  404. * @param vlen Number of vector sources.
  405. * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
  406. * based on the array of input coefficients.
  407. * @param src Array of pointers to source inputs.
  408. * @param dest Array of pointers to destination data buffers.
  409. * @returns none
  410. */
  411. void gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
  412. unsigned char **src, unsigned char **dest);
  413. /**
  414. * @brief GF(2^8) vector dot product with three outputs.
  415. *
  416. * Vector dot product optimized to calculate three outputs at a time. Does three
  417. * GF(2^8) dot products across each byte of the input array and three constant
  418. * sets of coefficients to produce each byte of the outputs. Can be used for
  419. * erasure coding encode and decode. Function requires pre-calculation of a
  420. * 3*32*vlen byte constant array based on the three sets of input coefficients.
  421. * @requires AVX2
  422. *
  423. * @param len Length of each vector in bytes. Must be >= 32.
  424. * @param vlen Number of vector sources.
  425. * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
  426. * based on the array of input coefficients.
  427. * @param src Array of pointers to source inputs.
  428. * @param dest Array of pointers to destination data buffers.
  429. * @returns none
  430. */
  431. void gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
  432. unsigned char **src, unsigned char **dest);
  433. /**
  434. * @brief GF(2^8) vector dot product with four outputs.
  435. *
  436. * Vector dot product optimized to calculate four outputs at a time. Does four
  437. * GF(2^8) dot products across each byte of the input array and four constant
  438. * sets of coefficients to produce each byte of the outputs. Can be used for
  439. * erasure coding encode and decode. Function requires pre-calculation of a
  440. * 4*32*vlen byte constant array based on the four sets of input coefficients.
  441. * @requires SSE4.1
  442. *
  443. * @param len Length of each vector in bytes. Must be >= 16.
  444. * @param vlen Number of vector sources.
  445. * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
  446. * based on the array of input coefficients.
  447. * @param src Array of pointers to source inputs.
  448. * @param dest Array of pointers to destination data buffers.
  449. * @returns none
  450. */
  451. void gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
  452. unsigned char **src, unsigned char **dest);
  453. /**
  454. * @brief GF(2^8) vector dot product with four outputs.
  455. *
  456. * Vector dot product optimized to calculate four outputs at a time. Does four
  457. * GF(2^8) dot products across each byte of the input array and four constant
  458. * sets of coefficients to produce each byte of the outputs. Can be used for
  459. * erasure coding encode and decode. Function requires pre-calculation of a
  460. * 4*32*vlen byte constant array based on the four sets of input coefficients.
  461. * @requires AVX
  462. *
  463. * @param len Length of each vector in bytes. Must be >= 16.
  464. * @param vlen Number of vector sources.
  465. * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
  466. * based on the array of input coefficients.
  467. * @param src Array of pointers to source inputs.
  468. * @param dest Array of pointers to destination data buffers.
  469. * @returns none
  470. */
  471. void gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
  472. unsigned char **src, unsigned char **dest);
  473. /**
  474. * @brief GF(2^8) vector dot product with four outputs.
  475. *
  476. * Vector dot product optimized to calculate four outputs at a time. Does four
  477. * GF(2^8) dot products across each byte of the input array and four constant
  478. * sets of coefficients to produce each byte of the outputs. Can be used for
  479. * erasure coding encode and decode. Function requires pre-calculation of a
  480. * 4*32*vlen byte constant array based on the four sets of input coefficients.
  481. * @requires AVX2
  482. *
  483. * @param len Length of each vector in bytes. Must be >= 32.
  484. * @param vlen Number of vector sources.
  485. * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
  486. * based on the array of input coefficients.
  487. * @param src Array of pointers to source inputs.
  488. * @param dest Array of pointers to destination data buffers.
  489. * @returns none
  490. */
  491. void gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
  492. unsigned char **src, unsigned char **dest);
  493. /**
  494. * @brief GF(2^8) vector dot product with five outputs.
  495. *
  496. * Vector dot product optimized to calculate five outputs at a time. Does five
  497. * GF(2^8) dot products across each byte of the input array and five constant
  498. * sets of coefficients to produce each byte of the outputs. Can be used for
  499. * erasure coding encode and decode. Function requires pre-calculation of a
  500. * 5*32*vlen byte constant array based on the five sets of input coefficients.
  501. * @requires SSE4.1
  502. *
  503. * @param len Length of each vector in bytes. Must >= 16.
  504. * @param vlen Number of vector sources.
  505. * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
  506. * based on the array of input coefficients.
  507. * @param src Array of pointers to source inputs.
  508. * @param dest Array of pointers to destination data buffers.
  509. * @returns none
  510. */
  511. void gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
  512. unsigned char **src, unsigned char **dest);
  513. /**
  514. * @brief GF(2^8) vector dot product with five outputs.
  515. *
  516. * Vector dot product optimized to calculate five outputs at a time. Does five
  517. * GF(2^8) dot products across each byte of the input array and five constant
  518. * sets of coefficients to produce each byte of the outputs. Can be used for
  519. * erasure coding encode and decode. Function requires pre-calculation of a
  520. * 5*32*vlen byte constant array based on the five sets of input coefficients.
  521. * @requires AVX
  522. *
  523. * @param len Length of each vector in bytes. Must >= 16.
  524. * @param vlen Number of vector sources.
  525. * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
  526. * based on the array of input coefficients.
  527. * @param src Array of pointers to source inputs.
  528. * @param dest Array of pointers to destination data buffers.
  529. * @returns none
  530. */
  531. void gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
  532. unsigned char **src, unsigned char **dest);
  533. /**
  534. * @brief GF(2^8) vector dot product with five outputs.
  535. *
  536. * Vector dot product optimized to calculate five outputs at a time. Does five
  537. * GF(2^8) dot products across each byte of the input array and five constant
  538. * sets of coefficients to produce each byte of the outputs. Can be used for
  539. * erasure coding encode and decode. Function requires pre-calculation of a
  540. * 5*32*vlen byte constant array based on the five sets of input coefficients.
  541. * @requires AVX2
  542. *
  543. * @param len Length of each vector in bytes. Must >= 32.
  544. * @param vlen Number of vector sources.
  545. * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
  546. * based on the array of input coefficients.
  547. * @param src Array of pointers to source inputs.
  548. * @param dest Array of pointers to destination data buffers.
  549. * @returns none
  550. */
  551. void gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
  552. unsigned char **src, unsigned char **dest);
  553. /**
  554. * @brief GF(2^8) vector dot product with six outputs.
  555. *
  556. * Vector dot product optimized to calculate six outputs at a time. Does six
  557. * GF(2^8) dot products across each byte of the input array and six constant
  558. * sets of coefficients to produce each byte of the outputs. Can be used for
  559. * erasure coding encode and decode. Function requires pre-calculation of a
  560. * 6*32*vlen byte constant array based on the six sets of input coefficients.
  561. * @requires SSE4.1
  562. *
  563. * @param len Length of each vector in bytes. Must be >= 16.
  564. * @param vlen Number of vector sources.
  565. * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
  566. * based on the array of input coefficients.
  567. * @param src Array of pointers to source inputs.
  568. * @param dest Array of pointers to destination data buffers.
  569. * @returns none
  570. */
  571. void gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
  572. unsigned char **src, unsigned char **dest);
  573. /**
  574. * @brief GF(2^8) vector dot product with six outputs.
  575. *
  576. * Vector dot product optimized to calculate six outputs at a time. Does six
  577. * GF(2^8) dot products across each byte of the input array and six constant
  578. * sets of coefficients to produce each byte of the outputs. Can be used for
  579. * erasure coding encode and decode. Function requires pre-calculation of a
  580. * 6*32*vlen byte constant array based on the six sets of input coefficients.
  581. * @requires AVX
  582. *
  583. * @param len Length of each vector in bytes. Must be >= 16.
  584. * @param vlen Number of vector sources.
  585. * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
  586. * based on the array of input coefficients.
  587. * @param src Array of pointers to source inputs.
  588. * @param dest Array of pointers to destination data buffers.
  589. * @returns none
  590. */
  591. void gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
  592. unsigned char **src, unsigned char **dest);
  593. /**
  594. * @brief GF(2^8) vector dot product with six outputs.
  595. *
  596. * Vector dot product optimized to calculate six outputs at a time. Does six
  597. * GF(2^8) dot products across each byte of the input array and six constant
  598. * sets of coefficients to produce each byte of the outputs. Can be used for
  599. * erasure coding encode and decode. Function requires pre-calculation of a
  600. * 6*32*vlen byte constant array based on the six sets of input coefficients.
  601. * @requires AVX2
  602. *
  603. * @param len Length of each vector in bytes. Must be >= 32.
  604. * @param vlen Number of vector sources.
  605. * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
  606. * based on the array of input coefficients.
  607. * @param src Array of pointers to source inputs.
  608. * @param dest Array of pointers to destination data buffers.
  609. * @returns none
  610. */
  611. void gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
  612. unsigned char **src, unsigned char **dest);
  613. /**
  614. * @brief GF(2^8) vector multiply accumulate, arch specific version.
  615. *
  616. * Arch specific version of gf_vect_mad() with same parameters.
  617. * @requires SSE4.1
  618. */
  619. void gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  620. unsigned char *dest);
  621. /**
  622. * @brief GF(2^8) vector multiply accumulate, arch specific version.
  623. *
  624. * Arch specific version of gf_vect_mad() with same parameters.
  625. * @requires AVX
  626. */
  627. void gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  628. unsigned char *dest);
  629. /**
  630. * @brief GF(2^8) vector multiply accumulate, arch specific version.
  631. *
  632. * Arch specific version of gf_vect_mad() with same parameters.
  633. * @requires AVX2
  634. */
  635. void gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  636. unsigned char *dest);
  637. /**
  638. * @brief GF(2^8) vector multiply with 2 accumulate. SSE version.
  639. *
  640. * Does a GF(2^8) multiply across each byte of input source with expanded
  641. * constants and add to destination arrays. Can be used for erasure coding
  642. * encode and decode update when only one source is available at a
  643. * time. Function requires pre-calculation of a 32*vec byte constant array based
  644. * on the input coefficients.
  645. * @requires SSE4.1
  646. *
  647. * @param len Length of each vector in bytes. Must be >= 32.
  648. * @param vec The number of vector sources or rows in the generator matrix
  649. * for coding.
  650. * @param vec_i The vector index corresponding to the single input source.
  651. * @param gftbls Pointer to array of input tables generated from coding
  652. * coefficients in ec_init_tables(). Must be of size 32*vec.
  653. * @param src Pointer to source input array.
  654. * @param dest Array of pointers to destination input/outputs.
  655. * @returns none
  656. */
  657. void gf_2vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  658. unsigned char **dest);
  659. /**
  660. * @brief GF(2^8) vector multiply with 2 accumulate. AVX version of gf_2vect_mad_sse().
  661. * @requires AVX
  662. */
  663. void gf_2vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  664. unsigned char **dest);
  665. /**
  666. * @brief GF(2^8) vector multiply with 2 accumulate. AVX2 version of gf_2vect_mad_sse().
  667. * @requires AVX2
  668. */
  669. void gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  670. unsigned char **dest);
  671. /**
  672. * @brief GF(2^8) vector multiply with 3 accumulate. SSE version.
  673. *
  674. * Does a GF(2^8) multiply across each byte of input source with expanded
  675. * constants and add to destination arrays. Can be used for erasure coding
  676. * encode and decode update when only one source is available at a
  677. * time. Function requires pre-calculation of a 32*vec byte constant array based
  678. * on the input coefficients.
  679. * @requires SSE4.1
  680. *
  681. * @param len Length of each vector in bytes. Must be >= 32.
  682. * @param vec The number of vector sources or rows in the generator matrix
  683. * for coding.
  684. * @param vec_i The vector index corresponding to the single input source.
  685. * @param gftbls Pointer to array of input tables generated from coding
  686. * coefficients in ec_init_tables(). Must be of size 32*vec.
  687. * @param src Pointer to source input array.
  688. * @param dest Array of pointers to destination input/outputs.
  689. * @returns none
  690. */
  691. void gf_3vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  692. unsigned char **dest);
  693. /**
  694. * @brief GF(2^8) vector multiply with 3 accumulate. AVX version of gf_3vect_mad_sse().
  695. * @requires AVX
  696. */
  697. void gf_3vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  698. unsigned char **dest);
  699. /**
  700. * @brief GF(2^8) vector multiply with 3 accumulate. AVX2 version of gf_3vect_mad_sse().
  701. * @requires AVX2
  702. */
  703. void gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  704. unsigned char **dest);
  705. /**
  706. * @brief GF(2^8) vector multiply with 4 accumulate. SSE version.
  707. *
  708. * Does a GF(2^8) multiply across each byte of input source with expanded
  709. * constants and add to destination arrays. Can be used for erasure coding
  710. * encode and decode update when only one source is available at a
  711. * time. Function requires pre-calculation of a 32*vec byte constant array based
  712. * on the input coefficients.
  713. * @requires SSE4.1
  714. *
  715. * @param len Length of each vector in bytes. Must be >= 32.
  716. * @param vec The number of vector sources or rows in the generator matrix
  717. * for coding.
  718. * @param vec_i The vector index corresponding to the single input source.
  719. * @param gftbls Pointer to array of input tables generated from coding
  720. * coefficients in ec_init_tables(). Must be of size 32*vec.
  721. * @param src Pointer to source input array.
  722. * @param dest Array of pointers to destination input/outputs.
  723. * @returns none
  724. */
  725. void gf_4vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  726. unsigned char **dest);
  727. /**
  728. * @brief GF(2^8) vector multiply with 4 accumulate. AVX version of gf_4vect_mad_sse().
  729. * @requires AVX
  730. */
  731. void gf_4vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  732. unsigned char **dest);
  733. /**
  734. * @brief GF(2^8) vector multiply with 4 accumulate. AVX2 version of gf_4vect_mad_sse().
  735. * @requires AVX2
  736. */
  737. void gf_4vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  738. unsigned char **dest);
  739. /**
  740. * @brief GF(2^8) vector multiply with 5 accumulate. SSE version.
  741. * @requires SSE4.1
  742. */
  743. void gf_5vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  744. unsigned char **dest);
  745. /**
  746. * @brief GF(2^8) vector multiply with 5 accumulate. AVX version.
  747. * @requires AVX
  748. */
  749. void gf_5vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  750. unsigned char **dest);
  751. /**
  752. * @brief GF(2^8) vector multiply with 5 accumulate. AVX2 version.
  753. * @requires AVX2
  754. */
  755. void gf_5vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  756. unsigned char **dest);
  757. /**
  758. * @brief GF(2^8) vector multiply with 6 accumulate. SSE version.
  759. * @requires SSE4.1
  760. */
  761. void gf_6vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  762. unsigned char **dest);
  763. /**
  764. * @brief GF(2^8) vector multiply with 6 accumulate. AVX version.
  765. * @requires AVX
  766. */
  767. void gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  768. unsigned char **dest);
  769. /**
  770. * @brief GF(2^8) vector multiply with 6 accumulate. AVX2 version.
  771. * @requires AVX2
  772. */
  773. void gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
  774. unsigned char **dest);
  775. #endif
  776. /**********************************************************************
  777. * The remaining are lib support functions used in GF(2^8) operations.
  778. */
  779. /**
  780. * @brief Single element GF(2^8) multiply.
  781. *
  782. * @param a Multiplicand a
  783. * @param b Multiplicand b
  784. * @returns Product of a and b in GF(2^8)
  785. */
  786. unsigned char gf_mul_erasure(unsigned char a, unsigned char b);
  787. /**
  788. * @brief Single element GF(2^8) inverse.
  789. *
  790. * @param a Input element
  791. * @returns Field element b such that a x b = {1}
  792. */
  793. unsigned char gf_inv(unsigned char a);
  794. /**
  795. * @brief Generate a matrix of coefficients to be used for encoding.
  796. *
  797. * Vandermonde matrix example of encoding coefficients where high portion of
  798. * matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)}
  799. * i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in
  800. * erasure encoding but does not guarantee invertable for every sub matrix. For
  801. * large pairs of m and k it is possible to find cases where the decode matrix
  802. * chosen from sources and parity is not invertable. Users may want to adjust
  803. * for certain pairs m and k. If m and k satisfy one of the following
  804. * inequalities, no adjustment is required:
  805. *
  806. * - k <= 3
  807. * - k = 4, m <= 25
  808. * - k = 5, m <= 10
  809. * - k <= 21, m-k = 4
  810. * - m - k <= 3.
  811. *
  812. * @param a [m x k] array to hold coefficients
  813. * @param m number of rows in matrix corresponding to srcs + parity.
  814. * @param k number of columns in matrix corresponding to srcs.
  815. * @returns none
  816. */
  817. void gf_gen_rs_matrix(unsigned char *a, int m, int k);
  818. /**
  819. * @brief Generate a Cauchy matrix of coefficients to be used for encoding.
  820. *
  821. * Cauchy matrix example of encoding coefficients where high portion of matrix
  822. * is identity matrix I and lower portion is constructed as 1/(i + j) | i != j,
  823. * i:{0,k-1} j:{k,m-1}. Any sub-matrix of a Cauchy matrix should be invertable.
  824. *
  825. * @param a [m x k] array to hold coefficients
  826. * @param m number of rows in matrix corresponding to srcs + parity.
  827. * @param k number of columns in matrix corresponding to srcs.
  828. * @returns none
  829. */
  830. void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k);
  831. /**
  832. * @brief Invert a matrix in GF(2^8)
  833. *
  834. * Attempts to construct an n x n inverse of the input matrix. Returns non-zero
  835. * if singular. Will always destroy input matrix in process.
  836. *
  837. * @param in input matrix, destroyed by invert process
  838. * @param out output matrix such that [in] x [out] = [I] - identity matrix
  839. * @param n size of matrix [nxn]
  840. * @returns 0 successful, other fail on singular input matrix
  841. */
  842. int gf_invert_matrix(unsigned char *in, unsigned char *out, const int n);
  843. /*************************************************************/
  844. #ifdef __cplusplus
  845. }
  846. #endif
  847. #endif //_ERASURE_CODE_H_