1 /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\
2 |*
3 |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 |* See https://llvm.org/LICENSE.txt for license information.
5 |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 |*
7 \*===----------------------------------------------------------------------===*/
8
9 #include <assert.h>
10 #include <limits.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14
15 #include "InstrProfiling.h"
16 #include "InstrProfilingInternal.h"
17 #include "InstrProfilingUtil.h"
18
19 #define INSTR_PROF_VALUE_PROF_DATA
20 #define INSTR_PROF_COMMON_API_IMPL
21 #define INSTR_PROF_VALUE_PROF_MEMOP_API
22 #include "profile/InstrProfData.inc"
23
24 static int hasStaticCounters = 1;
25 static int OutOfNodesWarnings = 0;
26 static int hasNonDefaultValsPerSite = 0;
27 #define INSTR_PROF_MAX_VP_WARNS 10
28 #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24
29 #define INSTR_PROF_VNODE_POOL_SIZE 1024
30
31 #ifndef _MSC_VER
32 /* A shared static pool in addition to the vnodes statically
33 * allocated by the compiler. */
34 COMPILER_RT_VISIBILITY ValueProfNode
35 lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION(
36 COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME);
37 #endif
38
39 COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite =
40 INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE;
41
lprofSetupValueProfiler(void)42 COMPILER_RT_VISIBILITY void lprofSetupValueProfiler(void) {
43 const char *Str = 0;
44 Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE");
45 if (Str && Str[0]) {
46 VPMaxNumValsPerSite = atoi(Str);
47 hasNonDefaultValsPerSite = 1;
48 }
49 if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE)
50 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
51 }
52
lprofSetMaxValsPerSite(uint32_t MaxVals)53 COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) {
54 VPMaxNumValsPerSite = MaxVals;
55 hasNonDefaultValsPerSite = 1;
56 }
57
58 /* This method is only used in value profiler mock testing. */
59 COMPILER_RT_VISIBILITY void
__llvm_profile_set_num_value_sites(__llvm_profile_data * Data,uint32_t ValueKind,uint16_t NumValueSites)60 __llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
61 uint32_t ValueKind, uint16_t NumValueSites) {
62 *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites;
63 }
64
65 /* This method is only used in value profiler mock testing. */
66 COMPILER_RT_VISIBILITY const __llvm_profile_data *
__llvm_profile_iterate_data(const __llvm_profile_data * Data)67 __llvm_profile_iterate_data(const __llvm_profile_data *Data) {
68 return Data + 1;
69 }
70
71 /* This method is only used in value profiler mock testing. */
72 COMPILER_RT_VISIBILITY void *
__llvm_get_function_addr(const __llvm_profile_data * Data)73 __llvm_get_function_addr(const __llvm_profile_data *Data) {
74 return Data->FunctionPointer;
75 }
76
77 /* Allocate an array that holds the pointers to the linked lists of
78 * value profile counter nodes. The number of element of the array
79 * is the total number of value profile sites instrumented. Returns
80 * 0 if allocation fails.
81 */
82
allocateValueProfileCounters(__llvm_profile_data * Data)83 static int allocateValueProfileCounters(__llvm_profile_data *Data) {
84 uint64_t NumVSites = 0;
85 uint32_t VKI;
86
87 /* This function will never be called when value site array is allocated
88 statically at compile time. */
89 hasStaticCounters = 0;
90 /* When dynamic allocation is enabled, allow tracking the max number of
91 * values allowd. */
92 if (!hasNonDefaultValsPerSite)
93 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
94
95 for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
96 NumVSites += Data->NumValueSites[VKI];
97
98 // If NumVSites = 0, calloc is allowed to return a non-null pointer.
99 assert(NumVSites > 0 && "NumVSites can't be zero");
100 ValueProfNode **Mem =
101 (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *));
102 if (!Mem)
103 return 0;
104 if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) {
105 free(Mem);
106 return 0;
107 }
108 return 1;
109 }
110
allocateOneNode(void)111 static ValueProfNode *allocateOneNode(void) {
112 ValueProfNode *Node;
113
114 if (!hasStaticCounters)
115 return (ValueProfNode *)calloc(1, sizeof(ValueProfNode));
116
117 /* Early check to avoid value wrapping around. */
118 if (CurrentVNode + 1 > EndVNode) {
119 if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) {
120 PROF_WARN("Unable to track new values: %s. "
121 " Consider using option -mllvm -vp-counters-per-site=<n> to "
122 "allocate more"
123 " value profile counters at compile time. \n",
124 "Running out of static counters");
125 }
126 return 0;
127 }
128 Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1);
129 /* Due to section padding, EndVNode point to a byte which is one pass
130 * an incomplete VNode, so we need to skip the last incomplete node. */
131 if (Node + 1 > EndVNode)
132 return 0;
133
134 return Node;
135 }
136
137 static COMPILER_RT_ALWAYS_INLINE void
instrumentTargetValueImpl(uint64_t TargetValue,void * Data,uint32_t CounterIndex,uint64_t CountValue)138 instrumentTargetValueImpl(uint64_t TargetValue, void *Data,
139 uint32_t CounterIndex, uint64_t CountValue) {
140 __llvm_profile_data *PData = (__llvm_profile_data *)Data;
141 if (!PData)
142 return;
143 if (!CountValue)
144 return;
145 if (!PData->Values) {
146 if (!allocateValueProfileCounters(PData))
147 return;
148 }
149
150 ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values;
151 ValueProfNode *PrevVNode = NULL;
152 ValueProfNode *MinCountVNode = NULL;
153 ValueProfNode *CurVNode = ValueCounters[CounterIndex];
154 uint64_t MinCount = UINT64_MAX;
155
156 uint8_t VDataCount = 0;
157 while (CurVNode) {
158 if (TargetValue == CurVNode->Value) {
159 CurVNode->Count += CountValue;
160 return;
161 }
162 if (CurVNode->Count < MinCount) {
163 MinCount = CurVNode->Count;
164 MinCountVNode = CurVNode;
165 }
166 PrevVNode = CurVNode;
167 CurVNode = CurVNode->Next;
168 ++VDataCount;
169 }
170
171 if (VDataCount >= VPMaxNumValsPerSite) {
172 /* Bump down the min count node's count. If it reaches 0,
173 * evict it. This eviction/replacement policy makes hot
174 * targets more sticky while cold targets less so. In other
175 * words, it makes it less likely for the hot targets to be
176 * prematurally evicted during warmup/establishment period,
177 * when their counts are still low. In a special case when
178 * the number of values tracked is reduced to only one, this
179 * policy will guarantee that the dominating target with >50%
180 * total count will survive in the end. Note that this scheme
181 * allows the runtime to track the min count node in an adaptive
182 * manner. It can correct previous mistakes and eventually
183 * lock on a cold target that is alread in stable state.
184 *
185 * In very rare cases, this replacement scheme may still lead
186 * to target loss. For instance, out of \c N value slots, \c N-1
187 * slots are occupied by luke warm targets during the warmup
188 * period and the remaining one slot is competed by two or more
189 * very hot targets. If those hot targets occur in an interleaved
190 * way, none of them will survive (gain enough weight to throw out
191 * other established entries) due to the ping-pong effect.
192 * To handle this situation, user can choose to increase the max
193 * number of tracked values per value site. Alternatively, a more
194 * expensive eviction mechanism can be implemented. It requires
195 * the runtime to track the total number of evictions per-site.
196 * When the total number of evictions reaches certain threshold,
197 * the runtime can wipe out more than one lowest count entries
198 * to give space for hot targets.
199 */
200 if (MinCountVNode->Count <= CountValue) {
201 CurVNode = MinCountVNode;
202 CurVNode->Value = TargetValue;
203 CurVNode->Count = CountValue;
204 } else
205 MinCountVNode->Count -= CountValue;
206
207 return;
208 }
209
210 CurVNode = allocateOneNode();
211 if (!CurVNode)
212 return;
213 CurVNode->Value = TargetValue;
214 CurVNode->Count += CountValue;
215
216 uint32_t Success = 0;
217 if (!ValueCounters[CounterIndex])
218 Success =
219 COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode);
220 else if (PrevVNode && !PrevVNode->Next)
221 Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode);
222
223 if (!Success && !hasStaticCounters) {
224 free(CurVNode);
225 return;
226 }
227 }
228
229 COMPILER_RT_VISIBILITY void
__llvm_profile_instrument_target(uint64_t TargetValue,void * Data,uint32_t CounterIndex)230 __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
231 uint32_t CounterIndex) {
232 instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1);
233 }
234 COMPILER_RT_VISIBILITY void
__llvm_profile_instrument_target_value(uint64_t TargetValue,void * Data,uint32_t CounterIndex,uint64_t CountValue)235 __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data,
236 uint32_t CounterIndex,
237 uint64_t CountValue) {
238 instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue);
239 }
240
241 /*
242 * The target values are partitioned into multiple ranges. The range spec is
243 * defined in InstrProfData.inc.
244 */
245 COMPILER_RT_VISIBILITY void
__llvm_profile_instrument_memop(uint64_t TargetValue,void * Data,uint32_t CounterIndex)246 __llvm_profile_instrument_memop(uint64_t TargetValue, void *Data,
247 uint32_t CounterIndex) {
248 // Map the target value to the representative value of its range.
249 uint64_t RepValue = InstrProfGetRangeRepValue(TargetValue);
250 __llvm_profile_instrument_target(RepValue, Data, CounterIndex);
251 }
252
253 /*
254 * A wrapper struct that represents value profile runtime data.
255 * Like InstrProfRecord class which is used by profiling host tools,
256 * ValueProfRuntimeRecord also implements the abstract interfaces defined in
257 * ValueProfRecordClosure so that the runtime data can be serialized using
258 * shared C implementation.
259 */
260 typedef struct ValueProfRuntimeRecord {
261 const __llvm_profile_data *Data;
262 ValueProfNode **NodesKind[IPVK_Last + 1];
263 uint8_t **SiteCountArray;
264 } ValueProfRuntimeRecord;
265
266 /* ValueProfRecordClosure Interface implementation. */
267
getNumValueSitesRT(const void * R,uint32_t VK)268 static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {
269 return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK];
270 }
271
getNumValueDataRT(const void * R,uint32_t VK)272 static uint32_t getNumValueDataRT(const void *R, uint32_t VK) {
273 uint32_t S = 0, I;
274 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
275 if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR)
276 return 0;
277 for (I = 0; I < Record->Data->NumValueSites[VK]; I++)
278 S += Record->SiteCountArray[VK][I];
279 return S;
280 }
281
getNumValueDataForSiteRT(const void * R,uint32_t VK,uint32_t S)282 static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK,
283 uint32_t S) {
284 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
285 return Record->SiteCountArray[VK][S];
286 }
287
288 static ValueProfRuntimeRecord RTRecord;
289 static ValueProfRecordClosure RTRecordClosure = {
290 &RTRecord, INSTR_PROF_NULLPTR, /* GetNumValueKinds */
291 getNumValueSitesRT, getNumValueDataRT, getNumValueDataForSiteRT,
292 INSTR_PROF_NULLPTR, /* RemapValueData */
293 INSTR_PROF_NULLPTR, /* GetValueForSite, */
294 INSTR_PROF_NULLPTR /* AllocValueProfData */
295 };
296
297 static uint32_t
initializeValueProfRuntimeRecord(const __llvm_profile_data * Data,uint8_t * SiteCountArray[])298 initializeValueProfRuntimeRecord(const __llvm_profile_data *Data,
299 uint8_t *SiteCountArray[]) {
300 unsigned I, J, S = 0, NumValueKinds = 0;
301 ValueProfNode **Nodes = (ValueProfNode **)Data->Values;
302 RTRecord.Data = Data;
303 RTRecord.SiteCountArray = SiteCountArray;
304 for (I = 0; I <= IPVK_Last; I++) {
305 uint16_t N = Data->NumValueSites[I];
306 if (!N)
307 continue;
308
309 NumValueKinds++;
310
311 RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR;
312 for (J = 0; J < N; J++) {
313 /* Compute value count for each site. */
314 uint32_t C = 0;
315 ValueProfNode *Site =
316 Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR;
317 while (Site) {
318 C++;
319 Site = Site->Next;
320 }
321 if (C > UCHAR_MAX)
322 C = UCHAR_MAX;
323 RTRecord.SiteCountArray[I][J] = C;
324 }
325 S += N;
326 }
327 return NumValueKinds;
328 }
329
getNextNValueData(uint32_t VK,uint32_t Site,InstrProfValueData * Dst,ValueProfNode * StartNode,uint32_t N)330 static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site,
331 InstrProfValueData *Dst,
332 ValueProfNode *StartNode, uint32_t N) {
333 unsigned I;
334 ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site];
335 for (I = 0; I < N; I++) {
336 Dst[I].Value = VNode->Value;
337 Dst[I].Count = VNode->Count;
338 VNode = VNode->Next;
339 }
340 return VNode;
341 }
342
getValueProfDataSizeWrapper(void)343 static uint32_t getValueProfDataSizeWrapper(void) {
344 return getValueProfDataSize(&RTRecordClosure);
345 }
346
getNumValueDataForSiteWrapper(uint32_t VK,uint32_t S)347 static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) {
348 return getNumValueDataForSiteRT(&RTRecord, VK, S);
349 }
350
351 static VPDataReaderType TheVPDataReader = {
352 initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize,
353 getFirstValueProfRecord, getNumValueDataForSiteWrapper,
354 getValueProfDataSizeWrapper, getNextNValueData};
355
lprofGetVPDataReader(void)356 COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader(void) {
357 return &TheVPDataReader;
358 }
359