123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358 |
- /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\
- |*
- |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- |* See https://llvm.org/LICENSE.txt for license information.
- |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- |*
- \*===----------------------------------------------------------------------===*/
- #include <assert.h>
- #include <limits.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include "InstrProfiling.h"
- #include "InstrProfilingInternal.h"
- #include "InstrProfilingUtil.h"
- #define INSTR_PROF_VALUE_PROF_DATA
- #define INSTR_PROF_COMMON_API_IMPL
- #define INSTR_PROF_VALUE_PROF_MEMOP_API
- #include "profile/InstrProfData.inc"
- static int hasStaticCounters = 1;
- static int OutOfNodesWarnings = 0;
- static int hasNonDefaultValsPerSite = 0;
- #define INSTR_PROF_MAX_VP_WARNS 10
- #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24
- #define INSTR_PROF_VNODE_POOL_SIZE 1024
- #ifndef _MSC_VER
- /* A shared static pool in addition to the vnodes statically
- * allocated by the compiler. */
- COMPILER_RT_VISIBILITY ValueProfNode
- lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION(
- COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME);
- #endif
- COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite =
- INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE;
- COMPILER_RT_VISIBILITY void lprofSetupValueProfiler() {
- const char *Str = 0;
- Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE");
- if (Str && Str[0]) {
- VPMaxNumValsPerSite = atoi(Str);
- hasNonDefaultValsPerSite = 1;
- }
- if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE)
- VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
- }
- COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) {
- VPMaxNumValsPerSite = MaxVals;
- hasNonDefaultValsPerSite = 1;
- }
- /* This method is only used in value profiler mock testing. */
- COMPILER_RT_VISIBILITY void
- __llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
- uint32_t ValueKind, uint16_t NumValueSites) {
- *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites;
- }
- /* This method is only used in value profiler mock testing. */
- COMPILER_RT_VISIBILITY const __llvm_profile_data *
- __llvm_profile_iterate_data(const __llvm_profile_data *Data) {
- return Data + 1;
- }
- /* This method is only used in value profiler mock testing. */
- COMPILER_RT_VISIBILITY void *
- __llvm_get_function_addr(const __llvm_profile_data *Data) {
- return Data->FunctionPointer;
- }
- /* Allocate an array that holds the pointers to the linked lists of
- * value profile counter nodes. The number of element of the array
- * is the total number of value profile sites instrumented. Returns
- * 0 if allocation fails.
- */
- static int allocateValueProfileCounters(__llvm_profile_data *Data) {
- uint64_t NumVSites = 0;
- uint32_t VKI;
- /* This function will never be called when value site array is allocated
- statically at compile time. */
- hasStaticCounters = 0;
- /* When dynamic allocation is enabled, allow tracking the max number of
- * values allowd. */
- if (!hasNonDefaultValsPerSite)
- VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
- for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
- NumVSites += Data->NumValueSites[VKI];
- // If NumVSites = 0, calloc is allowed to return a non-null pointer.
- assert(NumVSites > 0 && "NumVSites can't be zero");
- ValueProfNode **Mem =
- (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *));
- if (!Mem)
- return 0;
- if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) {
- free(Mem);
- return 0;
- }
- return 1;
- }
- static ValueProfNode *allocateOneNode(void) {
- ValueProfNode *Node;
- if (!hasStaticCounters)
- return (ValueProfNode *)calloc(1, sizeof(ValueProfNode));
- /* Early check to avoid value wrapping around. */
- if (CurrentVNode + 1 > EndVNode) {
- if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) {
- PROF_WARN("Unable to track new values: %s. "
- " Consider using option -mllvm -vp-counters-per-site=<n> to "
- "allocate more"
- " value profile counters at compile time. \n",
- "Running out of static counters");
- }
- return 0;
- }
- Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1);
- /* Due to section padding, EndVNode point to a byte which is one pass
- * an incomplete VNode, so we need to skip the last incomplete node. */
- if (Node + 1 > EndVNode)
- return 0;
- return Node;
- }
- static COMPILER_RT_ALWAYS_INLINE void
- instrumentTargetValueImpl(uint64_t TargetValue, void *Data,
- uint32_t CounterIndex, uint64_t CountValue) {
- __llvm_profile_data *PData = (__llvm_profile_data *)Data;
- if (!PData)
- return;
- if (!CountValue)
- return;
- if (!PData->Values) {
- if (!allocateValueProfileCounters(PData))
- return;
- }
- ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values;
- ValueProfNode *PrevVNode = NULL;
- ValueProfNode *MinCountVNode = NULL;
- ValueProfNode *CurVNode = ValueCounters[CounterIndex];
- uint64_t MinCount = UINT64_MAX;
- uint8_t VDataCount = 0;
- while (CurVNode) {
- if (TargetValue == CurVNode->Value) {
- CurVNode->Count += CountValue;
- return;
- }
- if (CurVNode->Count < MinCount) {
- MinCount = CurVNode->Count;
- MinCountVNode = CurVNode;
- }
- PrevVNode = CurVNode;
- CurVNode = CurVNode->Next;
- ++VDataCount;
- }
- if (VDataCount >= VPMaxNumValsPerSite) {
- /* Bump down the min count node's count. If it reaches 0,
- * evict it. This eviction/replacement policy makes hot
- * targets more sticky while cold targets less so. In other
- * words, it makes it less likely for the hot targets to be
- * prematurally evicted during warmup/establishment period,
- * when their counts are still low. In a special case when
- * the number of values tracked is reduced to only one, this
- * policy will guarantee that the dominating target with >50%
- * total count will survive in the end. Note that this scheme
- * allows the runtime to track the min count node in an adaptive
- * manner. It can correct previous mistakes and eventually
- * lock on a cold target that is alread in stable state.
- *
- * In very rare cases, this replacement scheme may still lead
- * to target loss. For instance, out of \c N value slots, \c N-1
- * slots are occupied by luke warm targets during the warmup
- * period and the remaining one slot is competed by two or more
- * very hot targets. If those hot targets occur in an interleaved
- * way, none of them will survive (gain enough weight to throw out
- * other established entries) due to the ping-pong effect.
- * To handle this situation, user can choose to increase the max
- * number of tracked values per value site. Alternatively, a more
- * expensive eviction mechanism can be implemented. It requires
- * the runtime to track the total number of evictions per-site.
- * When the total number of evictions reaches certain threshold,
- * the runtime can wipe out more than one lowest count entries
- * to give space for hot targets.
- */
- if (MinCountVNode->Count <= CountValue) {
- CurVNode = MinCountVNode;
- CurVNode->Value = TargetValue;
- CurVNode->Count = CountValue;
- } else
- MinCountVNode->Count -= CountValue;
- return;
- }
- CurVNode = allocateOneNode();
- if (!CurVNode)
- return;
- CurVNode->Value = TargetValue;
- CurVNode->Count += CountValue;
- uint32_t Success = 0;
- if (!ValueCounters[CounterIndex])
- Success =
- COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode);
- else if (PrevVNode && !PrevVNode->Next)
- Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode);
- if (!Success && !hasStaticCounters) {
- free(CurVNode);
- return;
- }
- }
- COMPILER_RT_VISIBILITY void
- __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
- uint32_t CounterIndex) {
- instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1);
- }
- COMPILER_RT_VISIBILITY void
- __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data,
- uint32_t CounterIndex,
- uint64_t CountValue) {
- instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue);
- }
- /*
- * The target values are partitioned into multiple ranges. The range spec is
- * defined in InstrProfData.inc.
- */
- COMPILER_RT_VISIBILITY void
- __llvm_profile_instrument_memop(uint64_t TargetValue, void *Data,
- uint32_t CounterIndex) {
- // Map the target value to the representative value of its range.
- uint64_t RepValue = InstrProfGetRangeRepValue(TargetValue);
- __llvm_profile_instrument_target(RepValue, Data, CounterIndex);
- }
- /*
- * A wrapper struct that represents value profile runtime data.
- * Like InstrProfRecord class which is used by profiling host tools,
- * ValueProfRuntimeRecord also implements the abstract interfaces defined in
- * ValueProfRecordClosure so that the runtime data can be serialized using
- * shared C implementation.
- */
- typedef struct ValueProfRuntimeRecord {
- const __llvm_profile_data *Data;
- ValueProfNode **NodesKind[IPVK_Last + 1];
- uint8_t **SiteCountArray;
- } ValueProfRuntimeRecord;
- /* ValueProfRecordClosure Interface implementation. */
- static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {
- return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK];
- }
- static uint32_t getNumValueDataRT(const void *R, uint32_t VK) {
- uint32_t S = 0, I;
- const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
- if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR)
- return 0;
- for (I = 0; I < Record->Data->NumValueSites[VK]; I++)
- S += Record->SiteCountArray[VK][I];
- return S;
- }
- static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK,
- uint32_t S) {
- const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
- return Record->SiteCountArray[VK][S];
- }
- static ValueProfRuntimeRecord RTRecord;
- static ValueProfRecordClosure RTRecordClosure = {
- &RTRecord, INSTR_PROF_NULLPTR, /* GetNumValueKinds */
- getNumValueSitesRT, getNumValueDataRT, getNumValueDataForSiteRT,
- INSTR_PROF_NULLPTR, /* RemapValueData */
- INSTR_PROF_NULLPTR, /* GetValueForSite, */
- INSTR_PROF_NULLPTR /* AllocValueProfData */
- };
- static uint32_t
- initializeValueProfRuntimeRecord(const __llvm_profile_data *Data,
- uint8_t *SiteCountArray[]) {
- unsigned I, J, S = 0, NumValueKinds = 0;
- ValueProfNode **Nodes = (ValueProfNode **)Data->Values;
- RTRecord.Data = Data;
- RTRecord.SiteCountArray = SiteCountArray;
- for (I = 0; I <= IPVK_Last; I++) {
- uint16_t N = Data->NumValueSites[I];
- if (!N)
- continue;
- NumValueKinds++;
- RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR;
- for (J = 0; J < N; J++) {
- /* Compute value count for each site. */
- uint32_t C = 0;
- ValueProfNode *Site =
- Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR;
- while (Site) {
- C++;
- Site = Site->Next;
- }
- if (C > UCHAR_MAX)
- C = UCHAR_MAX;
- RTRecord.SiteCountArray[I][J] = C;
- }
- S += N;
- }
- return NumValueKinds;
- }
- static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site,
- InstrProfValueData *Dst,
- ValueProfNode *StartNode, uint32_t N) {
- unsigned I;
- ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site];
- for (I = 0; I < N; I++) {
- Dst[I].Value = VNode->Value;
- Dst[I].Count = VNode->Count;
- VNode = VNode->Next;
- }
- return VNode;
- }
- static uint32_t getValueProfDataSizeWrapper(void) {
- return getValueProfDataSize(&RTRecordClosure);
- }
- static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) {
- return getNumValueDataForSiteRT(&RTRecord, VK, S);
- }
- static VPDataReaderType TheVPDataReader = {
- initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize,
- getFirstValueProfRecord, getNumValueDataForSiteWrapper,
- getValueProfDataSizeWrapper, getNextNValueData};
- COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader() {
- return &TheVPDataReader;
- }
|