123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256 |
- /*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
- /* *************************************
- * Includes
- ***************************************/
- #include <stdlib.h> /* malloc, free */
- #include <string.h> /* memset */
- #include <assert.h> /* assert */
- #include "timefn.h" /* UTIL_time_t, UTIL_getTime */
- #include "benchfn.h"
- /* *************************************
- * Constants
- ***************************************/
- #define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */
- #define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */
- #define KB *(1 <<10)
- #define MB *(1 <<20)
- #define GB *(1U<<30)
- /* *************************************
- * Debug errors
- ***************************************/
- #if defined(DEBUG) && (DEBUG >= 1)
- # include <stdio.h> /* fprintf */
- # define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
- # define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
- #else
- # define DEBUGOUTPUT(...)
- #endif
- /* error without displaying */
- #define RETURN_QUIET_ERROR(retValue, ...) { \
- DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
- DEBUGOUTPUT("Error : "); \
- DEBUGOUTPUT(__VA_ARGS__); \
- DEBUGOUTPUT(" \n"); \
- return retValue; \
- }
- /* Abort execution if a condition is not met */
- #define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } }
- /* *************************************
- * Benchmarking an arbitrary function
- ***************************************/
- int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
- {
- return outcome.error_tag_never_ever_use_directly == 0;
- }
- /* warning : this function will stop program execution if outcome is invalid !
- * check outcome validity first, using BMK_isValid_runResult() */
- BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
- {
- CONTROL(outcome.error_tag_never_ever_use_directly == 0);
- return outcome.internal_never_ever_use_directly;
- }
- size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
- {
- CONTROL(outcome.error_tag_never_ever_use_directly != 0);
- return outcome.error_result_never_ever_use_directly;
- }
- static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
- {
- BMK_runOutcome_t b;
- memset(&b, 0, sizeof(b));
- b.error_tag_never_ever_use_directly = 1;
- b.error_result_never_ever_use_directly = errorResult;
- return b;
- }
- static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
- {
- BMK_runOutcome_t outcome;
- outcome.error_tag_never_ever_use_directly = 0;
- outcome.internal_never_ever_use_directly = runTime;
- return outcome;
- }
- /* initFn will be measured once, benchFn will be measured `nbLoops` times */
- /* initFn is optional, provide NULL if none */
- /* benchFn must return a size_t value that errorFn can interpret */
- /* takes # of blocks and list of size & stuff for each. */
- /* can report result of benchFn for each block into blockResult. */
- /* blockResult is optional, provide NULL if this information is not required */
- /* note : time per loop can be reported as zero if run time < timer resolution */
- BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
- unsigned nbLoops)
- {
- size_t dstSize = 0;
- nbLoops += !nbLoops; /* minimum nbLoops is 1 */
- /* init */
- { size_t i;
- for(i = 0; i < p.blockCount; i++) {
- memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */
- } }
- /* benchmark */
- { UTIL_time_t const clockStart = UTIL_getTime();
- unsigned loopNb, blockNb;
- if (p.initFn != NULL) p.initFn(p.initPayload);
- for (loopNb = 0; loopNb < nbLoops; loopNb++) {
- for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
- size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
- p.dstBuffers[blockNb], p.dstCapacities[blockNb],
- p.benchPayload);
- if (loopNb == 0) {
- if (p.blockResults != NULL) p.blockResults[blockNb] = res;
- if ((p.errorFn != NULL) && (p.errorFn(res))) {
- RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
- "Function benchmark failed on block %u (of size %u) with error %i",
- blockNb, (unsigned)p.srcSizes[blockNb], (int)res);
- }
- dstSize += res;
- } }
- } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
- { PTime const totalTime = UTIL_clockSpanNano(clockStart);
- BMK_runTime_t rt;
- rt.nanoSecPerRun = (double)totalTime / nbLoops;
- rt.sumOfReturn = dstSize;
- return BMK_setValid_runTime(rt);
- } }
- }
- /* ==== Benchmarking any function, providing intermediate results ==== */
- struct BMK_timedFnState_s {
- PTime timeSpent_ns;
- PTime timeBudget_ns;
- PTime runBudget_ns;
- BMK_runTime_t fastestRun;
- unsigned nbLoops;
- UTIL_time_t coolTime;
- }; /* typedef'd to BMK_timedFnState_t within bench.h */
- BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
- {
- BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
- if (r == NULL) return NULL; /* malloc() error */
- BMK_resetTimedFnState(r, total_ms, run_ms);
- return r;
- }
- void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }
- BMK_timedFnState_t*
- BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
- {
- typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1]; /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
- typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align; /* force tfs to be aligned at its next best position */
- size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
- BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
- if (buffer == NULL) return NULL;
- if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
- if ((size_t)buffer % tfs_alignment) return NULL; /* buffer must be properly aligned */
- BMK_resetTimedFnState(r, total_ms, run_ms);
- return r;
- }
- void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
- {
- if (!total_ms) total_ms = 1 ;
- if (!run_ms) run_ms = 1;
- if (run_ms > total_ms) run_ms = total_ms;
- timedFnState->timeSpent_ns = 0;
- timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
- timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
- timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */
- timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
- timedFnState->nbLoops = 1;
- timedFnState->coolTime = UTIL_getTime();
- }
- /* Tells if nb of seconds set in timedFnState for all runs is spent.
- * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
- int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
- {
- return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
- }
- #undef MIN
- #define MIN(a,b) ( (a) < (b) ? (a) : (b) )
- #define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */
- BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
- BMK_benchParams_t p)
- {
- PTime const runBudget_ns = cont->runBudget_ns;
- PTime const runTimeMin_ns = runBudget_ns / 2;
- int completed = 0;
- BMK_runTime_t bestRunTime = cont->fastestRun;
- while (!completed) {
- BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);
- if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
- return runResult;
- }
- { BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
- double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
- cont->timeSpent_ns += (unsigned long long)loopDuration_ns;
- /* estimate nbLoops for next run to last approximately 1 second */
- if (loopDuration_ns > ((double)runBudget_ns / 50)) {
- double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
- cont->nbLoops = (unsigned)((double)runBudget_ns / fastestRun_ns) + 1;
- } else {
- /* previous run was too short : blindly increase workload by x multiplier */
- const unsigned multiplier = 10;
- assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */
- cont->nbLoops *= multiplier;
- }
- if(loopDuration_ns < (double)runTimeMin_ns) {
- /* don't report results for which benchmark run time was too small : increased risks of rounding errors */
- assert(completed == 0);
- continue;
- } else {
- if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
- bestRunTime = newRunTime;
- }
- completed = 1;
- }
- }
- } /* while (!completed) */
- return BMK_setValid_runTime(bestRunTime);
- }
|