hist.c 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. /* ******************************************************************
  2. * hist : Histogram functions
  3. * part of Finite State Entropy project
  4. * Copyright (c) Meta Platforms, Inc. and affiliates.
  5. *
  6. * You can contact the author at :
  7. * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
  8. * - Public forum : https://groups.google.com/forum/#!forum/lz4c
  9. *
  10. * This source code is licensed under both the BSD-style license (found in the
  11. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  12. * in the COPYING file in the root directory of this source tree).
  13. * You may select, at your option, one of the above-listed licenses.
  14. ****************************************************************** */
  15. /* --- dependencies --- */
  16. #include "../common/mem.h" /* U32, BYTE, etc. */
  17. #include "../common/debug.h" /* assert, DEBUGLOG */
  18. #include "../common/error_private.h" /* ERROR */
  19. #include "hist.h"
  20. /* --- Error management --- */
  21. unsigned HIST_isError(size_t code) { return ERR_isError(code); }
  22. /*-**************************************************************
  23. * Histogram functions
  24. ****************************************************************/
  25. void HIST_add(unsigned* count, const void* src, size_t srcSize)
  26. {
  27. const BYTE* ip = (const BYTE*)src;
  28. const BYTE* const end = ip + srcSize;
  29. while (ip<end) {
  30. count[*ip++]++;
  31. }
  32. }
  33. unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
  34. const void* src, size_t srcSize)
  35. {
  36. const BYTE* ip = (const BYTE*)src;
  37. const BYTE* const end = ip + srcSize;
  38. unsigned maxSymbolValue = *maxSymbolValuePtr;
  39. unsigned largestCount=0;
  40. ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
  41. if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
  42. while (ip<end) {
  43. assert(*ip <= maxSymbolValue);
  44. count[*ip++]++;
  45. }
  46. while (!count[maxSymbolValue]) maxSymbolValue--;
  47. *maxSymbolValuePtr = maxSymbolValue;
  48. { U32 s;
  49. for (s=0; s<=maxSymbolValue; s++)
  50. if (count[s] > largestCount) largestCount = count[s];
  51. }
  52. return largestCount;
  53. }
  54. typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
  55. /* HIST_count_parallel_wksp() :
  56. * store histogram into 4 intermediate tables, recombined at the end.
  57. * this design makes better use of OoO cpus,
  58. * and is noticeably faster when some values are heavily repeated.
  59. * But it needs some additional workspace for intermediate tables.
  60. * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
  61. * @return : largest histogram frequency,
  62. * or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
  63. static size_t HIST_count_parallel_wksp(
  64. unsigned* count, unsigned* maxSymbolValuePtr,
  65. const void* source, size_t sourceSize,
  66. HIST_checkInput_e check,
  67. U32* const workSpace)
  68. {
  69. const BYTE* ip = (const BYTE*)source;
  70. const BYTE* const iend = ip+sourceSize;
  71. size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
  72. unsigned max=0;
  73. U32* const Counting1 = workSpace;
  74. U32* const Counting2 = Counting1 + 256;
  75. U32* const Counting3 = Counting2 + 256;
  76. U32* const Counting4 = Counting3 + 256;
  77. /* safety checks */
  78. assert(*maxSymbolValuePtr <= 255);
  79. if (!sourceSize) {
  80. ZSTD_memset(count, 0, countSize);
  81. *maxSymbolValuePtr = 0;
  82. return 0;
  83. }
  84. ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));
  85. /* by stripes of 16 bytes */
  86. { U32 cached = MEM_read32(ip); ip += 4;
  87. while (ip < iend-15) {
  88. U32 c = cached; cached = MEM_read32(ip); ip += 4;
  89. Counting1[(BYTE) c ]++;
  90. Counting2[(BYTE)(c>>8) ]++;
  91. Counting3[(BYTE)(c>>16)]++;
  92. Counting4[ c>>24 ]++;
  93. c = cached; cached = MEM_read32(ip); ip += 4;
  94. Counting1[(BYTE) c ]++;
  95. Counting2[(BYTE)(c>>8) ]++;
  96. Counting3[(BYTE)(c>>16)]++;
  97. Counting4[ c>>24 ]++;
  98. c = cached; cached = MEM_read32(ip); ip += 4;
  99. Counting1[(BYTE) c ]++;
  100. Counting2[(BYTE)(c>>8) ]++;
  101. Counting3[(BYTE)(c>>16)]++;
  102. Counting4[ c>>24 ]++;
  103. c = cached; cached = MEM_read32(ip); ip += 4;
  104. Counting1[(BYTE) c ]++;
  105. Counting2[(BYTE)(c>>8) ]++;
  106. Counting3[(BYTE)(c>>16)]++;
  107. Counting4[ c>>24 ]++;
  108. }
  109. ip-=4;
  110. }
  111. /* finish last symbols */
  112. while (ip<iend) Counting1[*ip++]++;
  113. { U32 s;
  114. for (s=0; s<256; s++) {
  115. Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
  116. if (Counting1[s] > max) max = Counting1[s];
  117. } }
  118. { unsigned maxSymbolValue = 255;
  119. while (!Counting1[maxSymbolValue]) maxSymbolValue--;
  120. if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
  121. *maxSymbolValuePtr = maxSymbolValue;
  122. ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */
  123. }
  124. return (size_t)max;
  125. }
  126. /* HIST_countFast_wksp() :
  127. * Same as HIST_countFast(), but using an externally provided scratch buffer.
  128. * `workSpace` is a writable buffer which must be 4-bytes aligned,
  129. * `workSpaceSize` must be >= HIST_WKSP_SIZE
  130. */
  131. size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
  132. const void* source, size_t sourceSize,
  133. void* workSpace, size_t workSpaceSize)
  134. {
  135. if (sourceSize < 1500) /* heuristic threshold */
  136. return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
  137. if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
  138. if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
  139. return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
  140. }
  141. /* HIST_count_wksp() :
  142. * Same as HIST_count(), but using an externally provided scratch buffer.
  143. * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
  144. size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
  145. const void* source, size_t sourceSize,
  146. void* workSpace, size_t workSpaceSize)
  147. {
  148. if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
  149. if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
  150. if (*maxSymbolValuePtr < 255)
  151. return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
  152. *maxSymbolValuePtr = 255;
  153. return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
  154. }
  155. #ifndef ZSTD_NO_UNUSED_FUNCTIONS
  156. /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
  157. size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
  158. const void* source, size_t sourceSize)
  159. {
  160. unsigned tmpCounters[HIST_WKSP_SIZE_U32];
  161. return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
  162. }
  163. size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
  164. const void* src, size_t srcSize)
  165. {
  166. unsigned tmpCounters[HIST_WKSP_SIZE_U32];
  167. return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
  168. }
  169. #endif