InstrProfilingValue.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\
  2. |*
  3. |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. |* See https://llvm.org/LICENSE.txt for license information.
  5. |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. |*
  7. \*===----------------------------------------------------------------------===*/
  8. #include <assert.h>
  9. #include <limits.h>
  10. #include <stdio.h>
  11. #include <stdlib.h>
  12. #include <string.h>
  13. #include "InstrProfiling.h"
  14. #include "InstrProfilingInternal.h"
  15. #include "InstrProfilingUtil.h"
  16. #define INSTR_PROF_VALUE_PROF_DATA
  17. #define INSTR_PROF_COMMON_API_IMPL
  18. #define INSTR_PROF_VALUE_PROF_MEMOP_API
  19. #include "profile/InstrProfData.inc"
  20. static int hasStaticCounters = 1;
  21. static int OutOfNodesWarnings = 0;
  22. static int hasNonDefaultValsPerSite = 0;
  23. #define INSTR_PROF_MAX_VP_WARNS 10
  24. #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24
  25. #define INSTR_PROF_VNODE_POOL_SIZE 1024
  26. #ifndef _MSC_VER
  27. /* A shared static pool in addition to the vnodes statically
  28. * allocated by the compiler. */
  29. COMPILER_RT_VISIBILITY ValueProfNode
  30. lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION(
  31. COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME);
  32. #endif
  33. COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite =
  34. INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE;
  35. COMPILER_RT_VISIBILITY void lprofSetupValueProfiler() {
  36. const char *Str = 0;
  37. Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE");
  38. if (Str && Str[0]) {
  39. VPMaxNumValsPerSite = atoi(Str);
  40. hasNonDefaultValsPerSite = 1;
  41. }
  42. if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE)
  43. VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
  44. }
  45. COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) {
  46. VPMaxNumValsPerSite = MaxVals;
  47. hasNonDefaultValsPerSite = 1;
  48. }
  49. /* This method is only used in value profiler mock testing. */
  50. COMPILER_RT_VISIBILITY void
  51. __llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
  52. uint32_t ValueKind, uint16_t NumValueSites) {
  53. *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites;
  54. }
  55. /* This method is only used in value profiler mock testing. */
  56. COMPILER_RT_VISIBILITY const __llvm_profile_data *
  57. __llvm_profile_iterate_data(const __llvm_profile_data *Data) {
  58. return Data + 1;
  59. }
  60. /* This method is only used in value profiler mock testing. */
  61. COMPILER_RT_VISIBILITY void *
  62. __llvm_get_function_addr(const __llvm_profile_data *Data) {
  63. return Data->FunctionPointer;
  64. }
  65. /* Allocate an array that holds the pointers to the linked lists of
  66. * value profile counter nodes. The number of element of the array
  67. * is the total number of value profile sites instrumented. Returns
  68. * 0 if allocation fails.
  69. */
  70. static int allocateValueProfileCounters(__llvm_profile_data *Data) {
  71. uint64_t NumVSites = 0;
  72. uint32_t VKI;
  73. /* This function will never be called when value site array is allocated
  74. statically at compile time. */
  75. hasStaticCounters = 0;
  76. /* When dynamic allocation is enabled, allow tracking the max number of
  77. * values allowd. */
  78. if (!hasNonDefaultValsPerSite)
  79. VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
  80. for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
  81. NumVSites += Data->NumValueSites[VKI];
  82. // If NumVSites = 0, calloc is allowed to return a non-null pointer.
  83. assert(NumVSites > 0 && "NumVSites can't be zero");
  84. ValueProfNode **Mem =
  85. (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *));
  86. if (!Mem)
  87. return 0;
  88. if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) {
  89. free(Mem);
  90. return 0;
  91. }
  92. return 1;
  93. }
  94. static ValueProfNode *allocateOneNode(void) {
  95. ValueProfNode *Node;
  96. if (!hasStaticCounters)
  97. return (ValueProfNode *)calloc(1, sizeof(ValueProfNode));
  98. /* Early check to avoid value wrapping around. */
  99. if (CurrentVNode + 1 > EndVNode) {
  100. if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) {
  101. PROF_WARN("Unable to track new values: %s. "
  102. " Consider using option -mllvm -vp-counters-per-site=<n> to "
  103. "allocate more"
  104. " value profile counters at compile time. \n",
  105. "Running out of static counters");
  106. }
  107. return 0;
  108. }
  109. Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1);
  110. /* Due to section padding, EndVNode point to a byte which is one pass
  111. * an incomplete VNode, so we need to skip the last incomplete node. */
  112. if (Node + 1 > EndVNode)
  113. return 0;
  114. return Node;
  115. }
  116. static COMPILER_RT_ALWAYS_INLINE void
  117. instrumentTargetValueImpl(uint64_t TargetValue, void *Data,
  118. uint32_t CounterIndex, uint64_t CountValue) {
  119. __llvm_profile_data *PData = (__llvm_profile_data *)Data;
  120. if (!PData)
  121. return;
  122. if (!CountValue)
  123. return;
  124. if (!PData->Values) {
  125. if (!allocateValueProfileCounters(PData))
  126. return;
  127. }
  128. ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values;
  129. ValueProfNode *PrevVNode = NULL;
  130. ValueProfNode *MinCountVNode = NULL;
  131. ValueProfNode *CurVNode = ValueCounters[CounterIndex];
  132. uint64_t MinCount = UINT64_MAX;
  133. uint8_t VDataCount = 0;
  134. while (CurVNode) {
  135. if (TargetValue == CurVNode->Value) {
  136. CurVNode->Count += CountValue;
  137. return;
  138. }
  139. if (CurVNode->Count < MinCount) {
  140. MinCount = CurVNode->Count;
  141. MinCountVNode = CurVNode;
  142. }
  143. PrevVNode = CurVNode;
  144. CurVNode = CurVNode->Next;
  145. ++VDataCount;
  146. }
  147. if (VDataCount >= VPMaxNumValsPerSite) {
  148. /* Bump down the min count node's count. If it reaches 0,
  149. * evict it. This eviction/replacement policy makes hot
  150. * targets more sticky while cold targets less so. In other
  151. * words, it makes it less likely for the hot targets to be
  152. * prematurally evicted during warmup/establishment period,
  153. * when their counts are still low. In a special case when
  154. * the number of values tracked is reduced to only one, this
  155. * policy will guarantee that the dominating target with >50%
  156. * total count will survive in the end. Note that this scheme
  157. * allows the runtime to track the min count node in an adaptive
  158. * manner. It can correct previous mistakes and eventually
  159. * lock on a cold target that is alread in stable state.
  160. *
  161. * In very rare cases, this replacement scheme may still lead
  162. * to target loss. For instance, out of \c N value slots, \c N-1
  163. * slots are occupied by luke warm targets during the warmup
  164. * period and the remaining one slot is competed by two or more
  165. * very hot targets. If those hot targets occur in an interleaved
  166. * way, none of them will survive (gain enough weight to throw out
  167. * other established entries) due to the ping-pong effect.
  168. * To handle this situation, user can choose to increase the max
  169. * number of tracked values per value site. Alternatively, a more
  170. * expensive eviction mechanism can be implemented. It requires
  171. * the runtime to track the total number of evictions per-site.
  172. * When the total number of evictions reaches certain threshold,
  173. * the runtime can wipe out more than one lowest count entries
  174. * to give space for hot targets.
  175. */
  176. if (MinCountVNode->Count <= CountValue) {
  177. CurVNode = MinCountVNode;
  178. CurVNode->Value = TargetValue;
  179. CurVNode->Count = CountValue;
  180. } else
  181. MinCountVNode->Count -= CountValue;
  182. return;
  183. }
  184. CurVNode = allocateOneNode();
  185. if (!CurVNode)
  186. return;
  187. CurVNode->Value = TargetValue;
  188. CurVNode->Count += CountValue;
  189. uint32_t Success = 0;
  190. if (!ValueCounters[CounterIndex])
  191. Success =
  192. COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode);
  193. else if (PrevVNode && !PrevVNode->Next)
  194. Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode);
  195. if (!Success && !hasStaticCounters) {
  196. free(CurVNode);
  197. return;
  198. }
  199. }
  200. COMPILER_RT_VISIBILITY void
  201. __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
  202. uint32_t CounterIndex) {
  203. instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1);
  204. }
  205. COMPILER_RT_VISIBILITY void
  206. __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data,
  207. uint32_t CounterIndex,
  208. uint64_t CountValue) {
  209. instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue);
  210. }
  211. /*
  212. * The target values are partitioned into multiple ranges. The range spec is
  213. * defined in InstrProfData.inc.
  214. */
  215. COMPILER_RT_VISIBILITY void
  216. __llvm_profile_instrument_memop(uint64_t TargetValue, void *Data,
  217. uint32_t CounterIndex) {
  218. // Map the target value to the representative value of its range.
  219. uint64_t RepValue = InstrProfGetRangeRepValue(TargetValue);
  220. __llvm_profile_instrument_target(RepValue, Data, CounterIndex);
  221. }
  222. /*
  223. * A wrapper struct that represents value profile runtime data.
  224. * Like InstrProfRecord class which is used by profiling host tools,
  225. * ValueProfRuntimeRecord also implements the abstract interfaces defined in
  226. * ValueProfRecordClosure so that the runtime data can be serialized using
  227. * shared C implementation.
  228. */
  229. typedef struct ValueProfRuntimeRecord {
  230. const __llvm_profile_data *Data;
  231. ValueProfNode **NodesKind[IPVK_Last + 1];
  232. uint8_t **SiteCountArray;
  233. } ValueProfRuntimeRecord;
  234. /* ValueProfRecordClosure Interface implementation. */
  235. static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {
  236. return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK];
  237. }
  238. static uint32_t getNumValueDataRT(const void *R, uint32_t VK) {
  239. uint32_t S = 0, I;
  240. const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
  241. if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR)
  242. return 0;
  243. for (I = 0; I < Record->Data->NumValueSites[VK]; I++)
  244. S += Record->SiteCountArray[VK][I];
  245. return S;
  246. }
  247. static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK,
  248. uint32_t S) {
  249. const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
  250. return Record->SiteCountArray[VK][S];
  251. }
  252. static ValueProfRuntimeRecord RTRecord;
  253. static ValueProfRecordClosure RTRecordClosure = {
  254. &RTRecord, INSTR_PROF_NULLPTR, /* GetNumValueKinds */
  255. getNumValueSitesRT, getNumValueDataRT, getNumValueDataForSiteRT,
  256. INSTR_PROF_NULLPTR, /* RemapValueData */
  257. INSTR_PROF_NULLPTR, /* GetValueForSite, */
  258. INSTR_PROF_NULLPTR /* AllocValueProfData */
  259. };
  260. static uint32_t
  261. initializeValueProfRuntimeRecord(const __llvm_profile_data *Data,
  262. uint8_t *SiteCountArray[]) {
  263. unsigned I, J, S = 0, NumValueKinds = 0;
  264. ValueProfNode **Nodes = (ValueProfNode **)Data->Values;
  265. RTRecord.Data = Data;
  266. RTRecord.SiteCountArray = SiteCountArray;
  267. for (I = 0; I <= IPVK_Last; I++) {
  268. uint16_t N = Data->NumValueSites[I];
  269. if (!N)
  270. continue;
  271. NumValueKinds++;
  272. RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR;
  273. for (J = 0; J < N; J++) {
  274. /* Compute value count for each site. */
  275. uint32_t C = 0;
  276. ValueProfNode *Site =
  277. Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR;
  278. while (Site) {
  279. C++;
  280. Site = Site->Next;
  281. }
  282. if (C > UCHAR_MAX)
  283. C = UCHAR_MAX;
  284. RTRecord.SiteCountArray[I][J] = C;
  285. }
  286. S += N;
  287. }
  288. return NumValueKinds;
  289. }
  290. static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site,
  291. InstrProfValueData *Dst,
  292. ValueProfNode *StartNode, uint32_t N) {
  293. unsigned I;
  294. ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site];
  295. for (I = 0; I < N; I++) {
  296. Dst[I].Value = VNode->Value;
  297. Dst[I].Count = VNode->Count;
  298. VNode = VNode->Next;
  299. }
  300. return VNode;
  301. }
  302. static uint32_t getValueProfDataSizeWrapper(void) {
  303. return getValueProfDataSize(&RTRecordClosure);
  304. }
  305. static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) {
  306. return getNumValueDataForSiteRT(&RTRecord, VK, S);
  307. }
  308. static VPDataReaderType TheVPDataReader = {
  309. initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize,
  310. getFirstValueProfRecord, getNumValueDataForSiteWrapper,
  311. getValueProfDataSizeWrapper, getNextNValueData};
  312. COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader() {
  313. return &TheVPDataReader;
  314. }