InstrProfilingValue.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\
  2. |*
  3. |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. |* See https://llvm.org/LICENSE.txt for license information.
  5. |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. |*
  7. \*===----------------------------------------------------------------------===*/
  8. #include <assert.h>
  9. #include <limits.h>
  10. #include <stdio.h>
  11. #include <stdlib.h>
  12. #include <string.h>
  13. #include "InstrProfiling.h"
  14. #include "InstrProfilingInternal.h"
  15. #include "InstrProfilingUtil.h"
  16. #define INSTR_PROF_VALUE_PROF_DATA
  17. #define INSTR_PROF_COMMON_API_IMPL
  18. #define INSTR_PROF_VALUE_PROF_MEMOP_API
  19. #include "profile/InstrProfData.inc"
  20. static int hasStaticCounters = 1;
  21. static int OutOfNodesWarnings = 0;
  22. static int hasNonDefaultValsPerSite = 0;
  23. #define INSTR_PROF_MAX_VP_WARNS 10
  24. #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24
  25. #define INSTR_PROF_VNODE_POOL_SIZE 1024
  26. #ifndef _MSC_VER
  27. /* A shared static pool in addition to the vnodes statically
  28. * allocated by the compiler. */
  29. COMPILER_RT_VISIBILITY ValueProfNode
  30. lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION(
  31. COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME);
  32. #endif
  33. COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite =
  34. INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE;
  35. COMPILER_RT_VISIBILITY void lprofSetupValueProfiler(void) {
  36. const char *Str = 0;
  37. Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE");
  38. if (Str && Str[0]) {
  39. VPMaxNumValsPerSite = atoi(Str);
  40. hasNonDefaultValsPerSite = 1;
  41. }
  42. if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE)
  43. VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
  44. }
  45. COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) {
  46. VPMaxNumValsPerSite = MaxVals;
  47. hasNonDefaultValsPerSite = 1;
  48. }
  49. /* This method is only used in value profiler mock testing. */
  50. COMPILER_RT_VISIBILITY void
  51. __llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
  52. uint32_t ValueKind, uint16_t NumValueSites) {
  53. #ifdef __GNUC__
  54. #pragma GCC diagnostic push
  55. #pragma GCC diagnostic ignored "-Wcast-qual"
  56. #elif defined(__clang__)
  57. #pragma clang diagnostic push
  58. #pragma clang diagnostic ignored "-Wcast-qual"
  59. #endif
  60. *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites;
  61. #ifdef __GNUC__
  62. #pragma GCC diagnostic pop
  63. #elif defined(__clang__)
  64. #pragma clang diagnostic pop
  65. #endif
  66. }
  67. /* This method is only used in value profiler mock testing. */
  68. COMPILER_RT_VISIBILITY const __llvm_profile_data *
  69. __llvm_profile_iterate_data(const __llvm_profile_data *Data) {
  70. return Data + 1;
  71. }
  72. /* This method is only used in value profiler mock testing. */
  73. COMPILER_RT_VISIBILITY void *
  74. __llvm_get_function_addr(const __llvm_profile_data *Data) {
  75. return Data->FunctionPointer;
  76. }
  77. /* Allocate an array that holds the pointers to the linked lists of
  78. * value profile counter nodes. The number of element of the array
  79. * is the total number of value profile sites instrumented. Returns
  80. * 0 if allocation fails.
  81. */
  82. static int allocateValueProfileCounters(__llvm_profile_data *Data) {
  83. uint64_t NumVSites = 0;
  84. uint32_t VKI;
  85. /* This function will never be called when value site array is allocated
  86. statically at compile time. */
  87. hasStaticCounters = 0;
  88. /* When dynamic allocation is enabled, allow tracking the max number of
  89. * values allowd. */
  90. if (!hasNonDefaultValsPerSite)
  91. VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
  92. for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
  93. NumVSites += Data->NumValueSites[VKI];
  94. // If NumVSites = 0, calloc is allowed to return a non-null pointer.
  95. assert(NumVSites > 0 && "NumVSites can't be zero");
  96. ValueProfNode **Mem =
  97. (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *));
  98. if (!Mem)
  99. return 0;
  100. if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) {
  101. free(Mem);
  102. return 0;
  103. }
  104. return 1;
  105. }
  106. static ValueProfNode *allocateOneNode(void) {
  107. ValueProfNode *Node;
  108. if (!hasStaticCounters)
  109. return (ValueProfNode *)calloc(1, sizeof(ValueProfNode));
  110. /* Early check to avoid value wrapping around. */
  111. if (CurrentVNode + 1 > EndVNode) {
  112. if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) {
  113. PROF_WARN("Unable to track new values: %s. "
  114. " Consider using option -mllvm -vp-counters-per-site=<n> to "
  115. "allocate more"
  116. " value profile counters at compile time. \n",
  117. "Running out of static counters");
  118. }
  119. return 0;
  120. }
  121. Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1);
  122. /* Due to section padding, EndVNode point to a byte which is one pass
  123. * an incomplete VNode, so we need to skip the last incomplete node. */
  124. if (Node + 1 > EndVNode)
  125. return 0;
  126. return Node;
  127. }
  128. static COMPILER_RT_ALWAYS_INLINE void
  129. instrumentTargetValueImpl(uint64_t TargetValue, void *Data,
  130. uint32_t CounterIndex, uint64_t CountValue) {
  131. __llvm_profile_data *PData = (__llvm_profile_data *)Data;
  132. if (!PData)
  133. return;
  134. if (!CountValue)
  135. return;
  136. if (!PData->Values) {
  137. if (!allocateValueProfileCounters(PData))
  138. return;
  139. }
  140. ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values;
  141. ValueProfNode *PrevVNode = NULL;
  142. ValueProfNode *MinCountVNode = NULL;
  143. ValueProfNode *CurVNode = ValueCounters[CounterIndex];
  144. uint64_t MinCount = UINT64_MAX;
  145. uint8_t VDataCount = 0;
  146. while (CurVNode) {
  147. if (TargetValue == CurVNode->Value) {
  148. CurVNode->Count += CountValue;
  149. return;
  150. }
  151. if (CurVNode->Count < MinCount) {
  152. MinCount = CurVNode->Count;
  153. MinCountVNode = CurVNode;
  154. }
  155. PrevVNode = CurVNode;
  156. CurVNode = CurVNode->Next;
  157. ++VDataCount;
  158. }
  159. if (VDataCount >= VPMaxNumValsPerSite) {
  160. /* Bump down the min count node's count. If it reaches 0,
  161. * evict it. This eviction/replacement policy makes hot
  162. * targets more sticky while cold targets less so. In other
  163. * words, it makes it less likely for the hot targets to be
  164. * prematurally evicted during warmup/establishment period,
  165. * when their counts are still low. In a special case when
  166. * the number of values tracked is reduced to only one, this
  167. * policy will guarantee that the dominating target with >50%
  168. * total count will survive in the end. Note that this scheme
  169. * allows the runtime to track the min count node in an adaptive
  170. * manner. It can correct previous mistakes and eventually
  171. * lock on a cold target that is alread in stable state.
  172. *
  173. * In very rare cases, this replacement scheme may still lead
  174. * to target loss. For instance, out of \c N value slots, \c N-1
  175. * slots are occupied by luke warm targets during the warmup
  176. * period and the remaining one slot is competed by two or more
  177. * very hot targets. If those hot targets occur in an interleaved
  178. * way, none of them will survive (gain enough weight to throw out
  179. * other established entries) due to the ping-pong effect.
  180. * To handle this situation, user can choose to increase the max
  181. * number of tracked values per value site. Alternatively, a more
  182. * expensive eviction mechanism can be implemented. It requires
  183. * the runtime to track the total number of evictions per-site.
  184. * When the total number of evictions reaches certain threshold,
  185. * the runtime can wipe out more than one lowest count entries
  186. * to give space for hot targets.
  187. */
  188. if (MinCountVNode->Count <= CountValue) {
  189. CurVNode = MinCountVNode;
  190. CurVNode->Value = TargetValue;
  191. CurVNode->Count = CountValue;
  192. } else
  193. MinCountVNode->Count -= CountValue;
  194. return;
  195. }
  196. CurVNode = allocateOneNode();
  197. if (!CurVNode)
  198. return;
  199. CurVNode->Value = TargetValue;
  200. CurVNode->Count += CountValue;
  201. uint32_t Success = 0;
  202. if (!ValueCounters[CounterIndex])
  203. Success =
  204. COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode);
  205. else if (PrevVNode && !PrevVNode->Next)
  206. Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode);
  207. if (!Success && !hasStaticCounters) {
  208. free(CurVNode);
  209. return;
  210. }
  211. }
  212. COMPILER_RT_VISIBILITY void
  213. __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
  214. uint32_t CounterIndex) {
  215. instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1);
  216. }
  217. COMPILER_RT_VISIBILITY void
  218. __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data,
  219. uint32_t CounterIndex,
  220. uint64_t CountValue) {
  221. instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue);
  222. }
  223. /*
  224. * The target values are partitioned into multiple ranges. The range spec is
  225. * defined in InstrProfData.inc.
  226. */
  227. COMPILER_RT_VISIBILITY void
  228. __llvm_profile_instrument_memop(uint64_t TargetValue, void *Data,
  229. uint32_t CounterIndex) {
  230. // Map the target value to the representative value of its range.
  231. uint64_t RepValue = InstrProfGetRangeRepValue(TargetValue);
  232. __llvm_profile_instrument_target(RepValue, Data, CounterIndex);
  233. }
  234. /*
  235. * A wrapper struct that represents value profile runtime data.
  236. * Like InstrProfRecord class which is used by profiling host tools,
  237. * ValueProfRuntimeRecord also implements the abstract interfaces defined in
  238. * ValueProfRecordClosure so that the runtime data can be serialized using
  239. * shared C implementation.
  240. */
  241. typedef struct ValueProfRuntimeRecord {
  242. const __llvm_profile_data *Data;
  243. ValueProfNode **NodesKind[IPVK_Last + 1];
  244. uint8_t **SiteCountArray;
  245. } ValueProfRuntimeRecord;
  246. /* ValueProfRecordClosure Interface implementation. */
  247. static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {
  248. return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK];
  249. }
  250. static uint32_t getNumValueDataRT(const void *R, uint32_t VK) {
  251. uint32_t S = 0, I;
  252. const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
  253. if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR)
  254. return 0;
  255. for (I = 0; I < Record->Data->NumValueSites[VK]; I++)
  256. S += Record->SiteCountArray[VK][I];
  257. return S;
  258. }
  259. static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK,
  260. uint32_t S) {
  261. const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
  262. return Record->SiteCountArray[VK][S];
  263. }
  264. static ValueProfRuntimeRecord RTRecord;
  265. static ValueProfRecordClosure RTRecordClosure = {
  266. &RTRecord, INSTR_PROF_NULLPTR, /* GetNumValueKinds */
  267. getNumValueSitesRT, getNumValueDataRT, getNumValueDataForSiteRT,
  268. INSTR_PROF_NULLPTR, /* RemapValueData */
  269. INSTR_PROF_NULLPTR, /* GetValueForSite, */
  270. INSTR_PROF_NULLPTR /* AllocValueProfData */
  271. };
  272. static uint32_t
  273. initializeValueProfRuntimeRecord(const __llvm_profile_data *Data,
  274. uint8_t *SiteCountArray[]) {
  275. unsigned I, J, S = 0, NumValueKinds = 0;
  276. ValueProfNode **Nodes = (ValueProfNode **)Data->Values;
  277. RTRecord.Data = Data;
  278. RTRecord.SiteCountArray = SiteCountArray;
  279. for (I = 0; I <= IPVK_Last; I++) {
  280. uint16_t N = Data->NumValueSites[I];
  281. if (!N)
  282. continue;
  283. NumValueKinds++;
  284. RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR;
  285. for (J = 0; J < N; J++) {
  286. /* Compute value count for each site. */
  287. uint32_t C = 0;
  288. ValueProfNode *Site =
  289. Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR;
  290. while (Site) {
  291. C++;
  292. Site = Site->Next;
  293. }
  294. if (C > UCHAR_MAX)
  295. C = UCHAR_MAX;
  296. RTRecord.SiteCountArray[I][J] = C;
  297. }
  298. S += N;
  299. }
  300. return NumValueKinds;
  301. }
  302. static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site,
  303. InstrProfValueData *Dst,
  304. ValueProfNode *StartNode, uint32_t N) {
  305. unsigned I;
  306. ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site];
  307. for (I = 0; I < N; I++) {
  308. Dst[I].Value = VNode->Value;
  309. Dst[I].Count = VNode->Count;
  310. VNode = VNode->Next;
  311. }
  312. return VNode;
  313. }
  314. static uint32_t getValueProfDataSizeWrapper(void) {
  315. return getValueProfDataSize(&RTRecordClosure);
  316. }
  317. static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) {
  318. return getNumValueDataForSiteRT(&RTRecord, VK, S);
  319. }
  320. static VPDataReaderType TheVPDataReader = {
  321. initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize,
  322. getFirstValueProfRecord, getNumValueDataForSiteWrapper,
  323. getValueProfDataSizeWrapper, getNextNValueData};
  324. COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader(void) {
  325. return &TheVPDataReader;
  326. }