benchfn.c 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. /*
  2. * Copyright (c) Meta Platforms, Inc. and affiliates.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. /* *************************************
  11. * Includes
  12. ***************************************/
  13. #include <stdlib.h> /* malloc, free */
  14. #include <string.h> /* memset */
  15. #include <assert.h> /* assert */
  16. #include "timefn.h" /* UTIL_time_t, UTIL_getTime */
  17. #include "benchfn.h"
  18. /* *************************************
  19. * Constants
  20. ***************************************/
  21. #define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */
  22. #define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */
  23. #define KB *(1 <<10)
  24. #define MB *(1 <<20)
  25. #define GB *(1U<<30)
  26. /* *************************************
  27. * Debug errors
  28. ***************************************/
  29. #if defined(DEBUG) && (DEBUG >= 1)
  30. # include <stdio.h> /* fprintf */
  31. # define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
  32. # define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
  33. #else
  34. # define DEBUGOUTPUT(...)
  35. #endif
  36. /* error without displaying */
  37. #define RETURN_QUIET_ERROR(retValue, ...) { \
  38. DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
  39. DEBUGOUTPUT("Error : "); \
  40. DEBUGOUTPUT(__VA_ARGS__); \
  41. DEBUGOUTPUT(" \n"); \
  42. return retValue; \
  43. }
  44. /* Abort execution if a condition is not met */
  45. #define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } }
  46. /* *************************************
  47. * Benchmarking an arbitrary function
  48. ***************************************/
  49. int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
  50. {
  51. return outcome.error_tag_never_ever_use_directly == 0;
  52. }
  53. /* warning : this function will stop program execution if outcome is invalid !
  54. * check outcome validity first, using BMK_isValid_runResult() */
  55. BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
  56. {
  57. CONTROL(outcome.error_tag_never_ever_use_directly == 0);
  58. return outcome.internal_never_ever_use_directly;
  59. }
  60. size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
  61. {
  62. CONTROL(outcome.error_tag_never_ever_use_directly != 0);
  63. return outcome.error_result_never_ever_use_directly;
  64. }
  65. static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
  66. {
  67. BMK_runOutcome_t b;
  68. memset(&b, 0, sizeof(b));
  69. b.error_tag_never_ever_use_directly = 1;
  70. b.error_result_never_ever_use_directly = errorResult;
  71. return b;
  72. }
  73. static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
  74. {
  75. BMK_runOutcome_t outcome;
  76. outcome.error_tag_never_ever_use_directly = 0;
  77. outcome.internal_never_ever_use_directly = runTime;
  78. return outcome;
  79. }
  80. /* initFn will be measured once, benchFn will be measured `nbLoops` times */
  81. /* initFn is optional, provide NULL if none */
  82. /* benchFn must return a size_t value that errorFn can interpret */
  83. /* takes # of blocks and list of size & stuff for each. */
  84. /* can report result of benchFn for each block into blockResult. */
  85. /* blockResult is optional, provide NULL if this information is not required */
  86. /* note : time per loop can be reported as zero if run time < timer resolution */
  87. BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
  88. unsigned nbLoops)
  89. {
  90. size_t dstSize = 0;
  91. nbLoops += !nbLoops; /* minimum nbLoops is 1 */
  92. /* init */
  93. { size_t i;
  94. for(i = 0; i < p.blockCount; i++) {
  95. memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */
  96. } }
  97. /* benchmark */
  98. { UTIL_time_t const clockStart = UTIL_getTime();
  99. unsigned loopNb, blockNb;
  100. if (p.initFn != NULL) p.initFn(p.initPayload);
  101. for (loopNb = 0; loopNb < nbLoops; loopNb++) {
  102. for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
  103. size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
  104. p.dstBuffers[blockNb], p.dstCapacities[blockNb],
  105. p.benchPayload);
  106. if (loopNb == 0) {
  107. if (p.blockResults != NULL) p.blockResults[blockNb] = res;
  108. if ((p.errorFn != NULL) && (p.errorFn(res))) {
  109. RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
  110. "Function benchmark failed on block %u (of size %u) with error %i",
  111. blockNb, (unsigned)p.srcSizes[blockNb], (int)res);
  112. }
  113. dstSize += res;
  114. } }
  115. } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
  116. { PTime const totalTime = UTIL_clockSpanNano(clockStart);
  117. BMK_runTime_t rt;
  118. rt.nanoSecPerRun = (double)totalTime / nbLoops;
  119. rt.sumOfReturn = dstSize;
  120. return BMK_setValid_runTime(rt);
  121. } }
  122. }
  123. /* ==== Benchmarking any function, providing intermediate results ==== */
  124. struct BMK_timedFnState_s {
  125. PTime timeSpent_ns;
  126. PTime timeBudget_ns;
  127. PTime runBudget_ns;
  128. BMK_runTime_t fastestRun;
  129. unsigned nbLoops;
  130. UTIL_time_t coolTime;
  131. }; /* typedef'd to BMK_timedFnState_t within bench.h */
  132. BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
  133. {
  134. BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
  135. if (r == NULL) return NULL; /* malloc() error */
  136. BMK_resetTimedFnState(r, total_ms, run_ms);
  137. return r;
  138. }
  139. void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }
  140. BMK_timedFnState_t*
  141. BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
  142. {
  143. typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1]; /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
  144. typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align; /* force tfs to be aligned at its next best position */
  145. size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
  146. BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
  147. if (buffer == NULL) return NULL;
  148. if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
  149. if ((size_t)buffer % tfs_alignment) return NULL; /* buffer must be properly aligned */
  150. BMK_resetTimedFnState(r, total_ms, run_ms);
  151. return r;
  152. }
  153. void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
  154. {
  155. if (!total_ms) total_ms = 1 ;
  156. if (!run_ms) run_ms = 1;
  157. if (run_ms > total_ms) run_ms = total_ms;
  158. timedFnState->timeSpent_ns = 0;
  159. timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
  160. timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
  161. timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */
  162. timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
  163. timedFnState->nbLoops = 1;
  164. timedFnState->coolTime = UTIL_getTime();
  165. }
  166. /* Tells if nb of seconds set in timedFnState for all runs is spent.
  167. * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
  168. int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
  169. {
  170. return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
  171. }
  172. #undef MIN
  173. #define MIN(a,b) ( (a) < (b) ? (a) : (b) )
  174. #define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */
  175. BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
  176. BMK_benchParams_t p)
  177. {
  178. PTime const runBudget_ns = cont->runBudget_ns;
  179. PTime const runTimeMin_ns = runBudget_ns / 2;
  180. int completed = 0;
  181. BMK_runTime_t bestRunTime = cont->fastestRun;
  182. while (!completed) {
  183. BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);
  184. if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
  185. return runResult;
  186. }
  187. { BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
  188. double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
  189. cont->timeSpent_ns += (unsigned long long)loopDuration_ns;
  190. /* estimate nbLoops for next run to last approximately 1 second */
  191. if (loopDuration_ns > ((double)runBudget_ns / 50)) {
  192. double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
  193. cont->nbLoops = (unsigned)((double)runBudget_ns / fastestRun_ns) + 1;
  194. } else {
  195. /* previous run was too short : blindly increase workload by x multiplier */
  196. const unsigned multiplier = 10;
  197. assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */
  198. cont->nbLoops *= multiplier;
  199. }
  200. if(loopDuration_ns < (double)runTimeMin_ns) {
  201. /* don't report results for which benchmark run time was too small : increased risks of rounding errors */
  202. assert(completed == 0);
  203. continue;
  204. } else {
  205. if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
  206. bestRunTime = newRunTime;
  207. }
  208. completed = 1;
  209. }
  210. }
  211. } /* while (!completed) */
  212. return BMK_setValid_runTime(bestRunTime);
  213. }