hist.c 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. /* ******************************************************************
  2. * hist : Histogram functions
  3. * part of Finite State Entropy project
  4. * Copyright (c) Meta Platforms, Inc. and affiliates.
  5. *
  6. * You can contact the author at :
  7. * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
  8. * - Public forum : https://groups.google.com/forum/#!forum/lz4c
  9. *
  10. * This source code is licensed under both the BSD-style license (found in the
  11. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  12. * in the COPYING file in the root directory of this source tree).
  13. * You may select, at your option, one of the above-listed licenses.
  14. ****************************************************************** */
  15. /* --- dependencies --- */
  16. #include "../common/mem.h" /* U32, BYTE, etc. */
  17. #include "../common/debug.h" /* assert, DEBUGLOG */
  18. #include "../common/error_private.h" /* ERROR */
  19. #include "hist.h"
  20. /* --- Error management --- */
  21. unsigned HIST_isError(size_t code) { return ERR_isError(code); }
  22. /*-**************************************************************
  23. * Histogram functions
  24. ****************************************************************/
  25. unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
  26. const void* src, size_t srcSize)
  27. {
  28. const BYTE* ip = (const BYTE*)src;
  29. const BYTE* const end = ip + srcSize;
  30. unsigned maxSymbolValue = *maxSymbolValuePtr;
  31. unsigned largestCount=0;
  32. ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
  33. if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
  34. while (ip<end) {
  35. assert(*ip <= maxSymbolValue);
  36. count[*ip++]++;
  37. }
  38. while (!count[maxSymbolValue]) maxSymbolValue--;
  39. *maxSymbolValuePtr = maxSymbolValue;
  40. { U32 s;
  41. for (s=0; s<=maxSymbolValue; s++)
  42. if (count[s] > largestCount) largestCount = count[s];
  43. }
  44. return largestCount;
  45. }
  46. typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
  47. /* HIST_count_parallel_wksp() :
  48. * store histogram into 4 intermediate tables, recombined at the end.
  49. * this design makes better use of OoO cpus,
  50. * and is noticeably faster when some values are heavily repeated.
  51. * But it needs some additional workspace for intermediate tables.
  52. * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
  53. * @return : largest histogram frequency,
  54. * or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
  55. static size_t HIST_count_parallel_wksp(
  56. unsigned* count, unsigned* maxSymbolValuePtr,
  57. const void* source, size_t sourceSize,
  58. HIST_checkInput_e check,
  59. U32* const workSpace)
  60. {
  61. const BYTE* ip = (const BYTE*)source;
  62. const BYTE* const iend = ip+sourceSize;
  63. size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
  64. unsigned max=0;
  65. U32* const Counting1 = workSpace;
  66. U32* const Counting2 = Counting1 + 256;
  67. U32* const Counting3 = Counting2 + 256;
  68. U32* const Counting4 = Counting3 + 256;
  69. /* safety checks */
  70. assert(*maxSymbolValuePtr <= 255);
  71. if (!sourceSize) {
  72. ZSTD_memset(count, 0, countSize);
  73. *maxSymbolValuePtr = 0;
  74. return 0;
  75. }
  76. ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));
  77. /* by stripes of 16 bytes */
  78. { U32 cached = MEM_read32(ip); ip += 4;
  79. while (ip < iend-15) {
  80. U32 c = cached; cached = MEM_read32(ip); ip += 4;
  81. Counting1[(BYTE) c ]++;
  82. Counting2[(BYTE)(c>>8) ]++;
  83. Counting3[(BYTE)(c>>16)]++;
  84. Counting4[ c>>24 ]++;
  85. c = cached; cached = MEM_read32(ip); ip += 4;
  86. Counting1[(BYTE) c ]++;
  87. Counting2[(BYTE)(c>>8) ]++;
  88. Counting3[(BYTE)(c>>16)]++;
  89. Counting4[ c>>24 ]++;
  90. c = cached; cached = MEM_read32(ip); ip += 4;
  91. Counting1[(BYTE) c ]++;
  92. Counting2[(BYTE)(c>>8) ]++;
  93. Counting3[(BYTE)(c>>16)]++;
  94. Counting4[ c>>24 ]++;
  95. c = cached; cached = MEM_read32(ip); ip += 4;
  96. Counting1[(BYTE) c ]++;
  97. Counting2[(BYTE)(c>>8) ]++;
  98. Counting3[(BYTE)(c>>16)]++;
  99. Counting4[ c>>24 ]++;
  100. }
  101. ip-=4;
  102. }
  103. /* finish last symbols */
  104. while (ip<iend) Counting1[*ip++]++;
  105. { U32 s;
  106. for (s=0; s<256; s++) {
  107. Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
  108. if (Counting1[s] > max) max = Counting1[s];
  109. } }
  110. { unsigned maxSymbolValue = 255;
  111. while (!Counting1[maxSymbolValue]) maxSymbolValue--;
  112. if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
  113. *maxSymbolValuePtr = maxSymbolValue;
  114. ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */
  115. }
  116. return (size_t)max;
  117. }
  118. /* HIST_countFast_wksp() :
  119. * Same as HIST_countFast(), but using an externally provided scratch buffer.
  120. * `workSpace` is a writable buffer which must be 4-bytes aligned,
  121. * `workSpaceSize` must be >= HIST_WKSP_SIZE
  122. */
  123. size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
  124. const void* source, size_t sourceSize,
  125. void* workSpace, size_t workSpaceSize)
  126. {
  127. if (sourceSize < 1500) /* heuristic threshold */
  128. return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
  129. if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
  130. if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
  131. return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
  132. }
  133. /* HIST_count_wksp() :
  134. * Same as HIST_count(), but using an externally provided scratch buffer.
  135. * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
  136. size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
  137. const void* source, size_t sourceSize,
  138. void* workSpace, size_t workSpaceSize)
  139. {
  140. if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
  141. if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
  142. if (*maxSymbolValuePtr < 255)
  143. return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
  144. *maxSymbolValuePtr = 255;
  145. return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
  146. }
  147. #ifndef ZSTD_NO_UNUSED_FUNCTIONS
  148. /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
  149. size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
  150. const void* source, size_t sourceSize)
  151. {
  152. unsigned tmpCounters[HIST_WKSP_SIZE_U32];
  153. return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
  154. }
  155. size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
  156. const void* src, size_t srcSize)
  157. {
  158. unsigned tmpCounters[HIST_WKSP_SIZE_U32];
  159. return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
  160. }
  161. #endif