1 /*
2 Copyright (c) 2009-2020, Intel Corporation
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6
7 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9 * Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
10
11 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12 */
13 // written by Roman Dementiev
14 // Otto Bruggeman
15 // Thomas Willhalm
16 // Pat Fay
17 // Austen Ott
18 // Jim Harris (FreeBSD)
19
20 /*! \file cpucounters.cpp
21 \brief The bulk of PCM implementation
22 */
23
24 //#define PCM_TEST_FALLBACK_TO_ATOM
25
26 #include <stdio.h>
27 #include <assert.h>
28 #ifdef PCM_EXPORTS
29 // pcm-lib.h includes cpucounters.h
30 #include "PCM-Lib_Win\pcm-lib.h"
31 #else
32 #include "cpucounters.h"
33 #endif
34 #include "msr.h"
35 #include "pci.h"
36 #include "types.h"
37 #include "utils.h"
38 #include "topology.h"
39
40 #if defined (__FreeBSD__) || defined(__DragonFly__)
41 #include <sys/param.h>
42 #include <sys/module.h>
43 #include <sys/types.h>
44 #include <sys/sysctl.h>
45 #include <sys/sem.h>
46 #include <sys/ioccom.h>
47 #include <sys/cpuctl.h>
48 #include <machine/cpufunc.h>
49 #endif
50
51 #ifdef _MSC_VER
52 #include <intrin.h>
53 #include <windows.h>
54 #include <comdef.h>
55 #include <tchar.h>
56 #include "winring0/OlsApiInit.h"
57 #include "PCM_Win/windriver.h"
58 #else
59 #include <pthread.h>
60 #if defined(__FreeBSD__) || (defined(__DragonFly__) && __DragonFly_version >= 400707)
61 #include <pthread_np.h>
62 #endif
63 #include <errno.h>
64 #include <sys/time.h>
65 #ifdef __linux__
66 #include <sys/mman.h>
67 #endif
68 #endif
69
70 #include <string.h>
71 #include <limits>
72 #include <map>
73 #include <algorithm>
74 #include <thread>
75 #include <future>
76 #include <functional>
77 #include <queue>
78 #include <condition_variable>
79 #include <mutex>
80 #include <atomic>
81
82 #ifdef __APPLE__
83 #include <sys/types.h>
84 #include <sys/sysctl.h>
85 #include <sys/sem.h>
86 #endif
87
88 namespace pcm {
89
90 #ifdef __APPLE__
91 // convertUnknownToInt is used in the safe sysctl call to convert an unknown size to an int
92 int convertUnknownToInt(size_t size, char* value);
93 #endif
94
95 #undef PCM_DEBUG_TOPOLOGY // debug of topology enumeration routine
96
97 // FreeBSD is much more restrictive about names for semaphores
98 #if defined (__FreeBSD__)
99 #define PCM_INSTANCE_LOCK_SEMAPHORE_NAME "/PCM_inst_lock"
100 #define PCM_NUM_INSTANCES_SEMAPHORE_NAME "/num_PCM_inst"
101 #else
102 #define PCM_INSTANCE_LOCK_SEMAPHORE_NAME "PCM inst lock"
103 #define PCM_NUM_INSTANCES_SEMAPHORE_NAME "Num PCM insts"
104 #endif
105
106 #ifdef _MSC_VER
107
108 HMODULE hOpenLibSys = NULL;
109
110 #ifndef NO_WINRING
initWinRing0Lib()111 bool PCM::initWinRing0Lib()
112 {
113 const BOOL result = InitOpenLibSys(&hOpenLibSys);
114
115 if (result == FALSE)
116 {
117 CloseHandle(hOpenLibSys);
118 hOpenLibSys = NULL;
119 return false;
120 }
121
122 BYTE major, minor, revision, release;
123 GetDriverVersion(&major, &minor, &revision, &release);
124 wchar_t buffer[128];
125 swprintf_s(buffer, 128, _T("\\\\.\\WinRing0_%d_%d_%d"),(int)major,(int)minor, (int)revision);
126 restrictDriverAccess(buffer);
127
128 return true;
129 }
130 #endif // NO_WINRING
131
132 class InstanceLock
133 {
134 HANDLE Mutex;
135
136 InstanceLock();
137 public:
InstanceLock(const bool global)138 InstanceLock(const bool global)
139 {
140 Mutex = CreateMutex(NULL, FALSE,
141 global?(L"Global\\Processor Counter Monitor instance create/destroy lock"):(L"Local\\Processor Counter Monitor instance create/destroy lock"));
142 // lock
143 WaitForSingleObject(Mutex, INFINITE);
144 }
~InstanceLock()145 ~InstanceLock()
146 {
147 // unlock
148 ReleaseMutex(Mutex);
149 CloseHandle(Mutex);
150 }
151 };
152 #else // Linux or Apple
153
154 pthread_mutex_t processIntanceMutex = PTHREAD_MUTEX_INITIALIZER;
155
156 class InstanceLock
157 {
158 const char * globalSemaphoreName;
159 sem_t * globalSemaphore;
160 bool global;
161
162 InstanceLock();
163 public:
InstanceLock(const bool global_)164 InstanceLock(const bool global_) : globalSemaphoreName(PCM_INSTANCE_LOCK_SEMAPHORE_NAME), globalSemaphore(NULL), global(global_)
165 {
166 if(!global)
167 {
168 pthread_mutex_lock(&processIntanceMutex);
169 return;
170 }
171 umask(0);
172 while (1)
173 {
174 //sem_unlink(globalSemaphoreName); // temporary
175 globalSemaphore = sem_open(globalSemaphoreName, O_CREAT, S_IRWXU | S_IRWXG | S_IRWXO, 1);
176 if (SEM_FAILED == globalSemaphore)
177 {
178 if (EACCES == errno)
179 {
180 std::cerr << "PCM Error, do not have permissions to open semaphores in /dev/shm/. Waiting one second and retrying...\n";
181 sleep(1);
182 }
183 }
184 else
185 {
186 /*
187 if (sem_post(globalSemaphore)) {
188 perror("sem_post error");
189 }
190 */
191 break; // success
192 }
193 }
194 if (sem_wait(globalSemaphore)) {
195 perror("sem_wait error");
196 }
197 }
~InstanceLock()198 ~InstanceLock()
199 {
200 if(!global)
201 {
202 pthread_mutex_unlock(&processIntanceMutex);
203 return;
204 }
205 if (sem_post(globalSemaphore)) {
206 perror("sem_post error");
207 }
208 }
209 };
210 #endif // end of _MSC_VER else
211
212 #if defined(__FreeBSD__)
213 #define cpu_set_t cpuset_t
214 #endif
215
216 class TemporalThreadAffinity // speedup trick for Linux, FreeBSD, DragonFlyBSD, Windows
217 {
218 TemporalThreadAffinity(); // forbiden
219 #if defined(__FreeBSD__) || (defined(__DragonFly__) && __DragonFly_version >= 400707)
220 cpu_set_t old_affinity;
221
222 public:
TemporalThreadAffinity(uint32 core_id,bool checkStatus=true)223 TemporalThreadAffinity(uint32 core_id, bool checkStatus = true)
224 {
225 pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &old_affinity);
226
227 cpu_set_t new_affinity;
228 CPU_ZERO(&new_affinity);
229 CPU_SET(core_id, &new_affinity);
230 const auto res = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &new_affinity);
231 if (res != 0 && checkStatus)
232 {
233 std::cerr << "ERROR: pthread_setaffinity_np for core " << core_id << " failed with code " << res << "\n";
234 throw std::exception();
235 }
236 }
~TemporalThreadAffinity()237 ~TemporalThreadAffinity()
238 {
239 pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &old_affinity);
240 }
supported() const241 bool supported() const { return true; }
242
243 #elif defined(__linux__)
244 cpu_set_t * old_affinity;
245 static constexpr auto maxCPUs = 8192;
246 const size_t set_size;
247
248 public:
TemporalThreadAffinity(const uint32 core_id,bool checkStatus=true)249 TemporalThreadAffinity(const uint32 core_id, bool checkStatus = true)
250 : set_size(CPU_ALLOC_SIZE(maxCPUs))
251 {
252 old_affinity = CPU_ALLOC(maxCPUs);
253 assert(old_affinity);
254 pthread_getaffinity_np(pthread_self(), set_size, old_affinity);
255
256 cpu_set_t * new_affinity = CPU_ALLOC(maxCPUs);
257 assert(new_affinity);
258 CPU_ZERO_S(set_size, new_affinity);
259 CPU_SET_S(core_id, set_size, new_affinity);
260 const auto res = pthread_setaffinity_np(pthread_self(), set_size, new_affinity);
261 CPU_FREE(new_affinity);
262 if (res != 0 && checkStatus)
263 {
264 std::cerr << "ERROR: pthread_setaffinity_np for core " << core_id << " failed with code " << res << "\n";
265 throw std::exception();
266 }
267 }
~TemporalThreadAffinity()268 ~TemporalThreadAffinity()
269 {
270 pthread_setaffinity_np(pthread_self(), set_size, old_affinity);
271 CPU_FREE(old_affinity);
272 }
supported() const273 bool supported() const { return true; }
274 #elif defined(_MSC_VER)
275 ThreadGroupTempAffinity affinity;
276 public:
TemporalThreadAffinity(uint32 core,bool checkStatus=true)277 TemporalThreadAffinity(uint32 core, bool checkStatus = true) : affinity(core, checkStatus) {}
supported() const278 bool supported() const { return true; }
279 #else // not implemented for os x
280 public:
TemporalThreadAffinity(uint32)281 TemporalThreadAffinity(uint32) { }
TemporalThreadAffinity(uint32,bool)282 TemporalThreadAffinity(uint32, bool) {}
supported() const283 bool supported() const { return false; }
284 #endif
285 };
286
287
288 PCM * PCM::instance = NULL;
289
290 /*
291 static int bitCount(uint64 n)
292 {
293 int count = 0;
294 while (n)
295 {
296 count += static_cast<int>(n & 0x00000001);
297 n >>= static_cast<uint64>(1);
298 }
299 return count;
300 }
301 */
302
getInstance()303 PCM * PCM::getInstance()
304 {
305 // no lock here
306 if (instance) return instance;
307
308 InstanceLock lock(false);
309 if (instance) return instance;
310
311 return instance = new PCM();
312 }
313
extractCoreGenCounterValue(uint64 val)314 uint64 PCM::extractCoreGenCounterValue(uint64 val)
315 {
316 if (canUsePerf) return val;
317
318 if(core_gen_counter_width)
319 return extract_bits(val, 0, core_gen_counter_width-1);
320
321 return val;
322 }
323
extractCoreFixedCounterValue(uint64 val)324 uint64 PCM::extractCoreFixedCounterValue(uint64 val)
325 {
326 if (canUsePerf) return val;
327
328 if(core_fixed_counter_width)
329 return extract_bits(val, 0, core_fixed_counter_width-1);
330
331 return val;
332 }
333
extractUncoreGenCounterValue(uint64 val)334 uint64 PCM::extractUncoreGenCounterValue(uint64 val)
335 {
336 if(uncore_gen_counter_width)
337 return extract_bits(val, 0, uncore_gen_counter_width-1);
338
339 return val;
340 }
341
extractUncoreFixedCounterValue(uint64 val)342 uint64 PCM::extractUncoreFixedCounterValue(uint64 val)
343 {
344 if(uncore_fixed_counter_width)
345 return extract_bits(val, 0, uncore_fixed_counter_width-1);
346
347 return val;
348 }
349
extractQOSMonitoring(uint64 val)350 uint64 PCM::extractQOSMonitoring(uint64 val)
351 {
352 //Check if any of the error bit(63) or Unavailable bit(62) of the IA32_QM_CTR MSR are 1
353 if(val & (3ULL<<62))
354 {
355 // invalid reading
356 return static_cast<uint64>(PCM_INVALID_QOS_MONITORING_DATA);
357 }
358
359 // valid reading
360 return extract_bits(val,0,61);
361 }
extractThermalHeadroom(uint64 val)362 int32 extractThermalHeadroom(uint64 val)
363 {
364 if(val & (1ULL<<31ULL))
365 { // valid reading
366 return static_cast<int32>(extract_bits(val, 16, 22));
367 }
368
369 // invalid reading
370 return static_cast<int32>(PCM_INVALID_THERMAL_HEADROOM);
371 }
372
373
374 uint64 get_frequency_from_cpuid();
375
376
377
378 /* Adding the new version of cpuid with leaf and subleaf as an input */
pcm_cpuid(const unsigned leaf,const unsigned subleaf,PCM_CPUID_INFO & info)379 void pcm_cpuid(const unsigned leaf, const unsigned subleaf, PCM_CPUID_INFO & info)
380 {
381 #ifdef _MSC_VER
382 __cpuidex(info.array, leaf, subleaf);
383 #else
384 __asm__ __volatile__ ("cpuid" : \
385 "=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf), "c" (subleaf));
386 #endif
387 }
388
readCoreCounterConfig(const bool complainAboutMSR)389 void PCM::readCoreCounterConfig(const bool complainAboutMSR)
390 {
391 if (max_cpuid >= 0xa)
392 {
393 // get counter related info
394 PCM_CPUID_INFO cpuinfo;
395 pcm_cpuid(0xa, cpuinfo);
396 perfmon_version = extract_bits_ui(cpuinfo.array[0], 0, 7);
397 core_gen_counter_num_max = extract_bits_ui(cpuinfo.array[0], 8, 15);
398 core_gen_counter_width = extract_bits_ui(cpuinfo.array[0], 16, 23);
399 if (perfmon_version > 1)
400 {
401 core_fixed_counter_num_max = extract_bits_ui(cpuinfo.array[3], 0, 4);
402 core_fixed_counter_width = extract_bits_ui(cpuinfo.array[3], 5, 12);
403 }
404 else if (1 == perfmon_version)
405 {
406 core_fixed_counter_num_max = 3;
407 core_fixed_counter_width = core_gen_counter_width;
408 }
409 if (isForceRTMAbortModeAvailable())
410 {
411 uint64 TSXForceAbort = 0;
412 if (MSR.empty())
413 {
414 if (complainAboutMSR)
415 {
416 std::cerr << "PCM Error: Can't determine the number of available counters reliably because of no access to MSR.\n";
417 }
418 }
419 else if (MSR[0]->read(MSR_TSX_FORCE_ABORT, &TSXForceAbort) == sizeof(uint64))
420 {
421 TSXForceAbort &= 1;
422 /*
423 TSXForceAbort is 0 (default mode) => the number of useful gen counters is 3
424 TSXForceAbort is 1 => the number of gen counters is unchanged
425 */
426 if (TSXForceAbort == 0)
427 {
428 core_gen_counter_num_max = 3;
429 }
430 }
431 else
432 {
433 std::cerr << "PCM Error: Can't determine the number of available counters reliably because reading MSR_TSX_FORCE_ABORT failed.\n";
434 }
435 }
436 #if defined(__linux__)
437 const auto env = std::getenv("PCM_NO_AWS_WORKAROUND");
438 auto aws_workaround = true;
439 if (env != nullptr && std::string(env) == std::string("1"))
440 {
441 aws_workaround = false;
442 }
443 if (aws_workaround == true && vm == true && linux_arch_perfmon == true && core_gen_counter_num_max > 3)
444 {
445 core_gen_counter_num_max = 3;
446 std::cerr << "INFO: Reducing the number of programmable counters to 3 to workaround the fixed cycle counter virtualization issue on AWS.\n";
447 std::cerr << " You can disable the workaround by setting PCM_NO_AWS_WORKAROUND=1 environment variable\n";
448 }
449 #endif
450 }
451 }
452
isFixedCounterSupported(unsigned c)453 bool PCM::isFixedCounterSupported(unsigned c)
454 {
455 if (max_cpuid >= 0xa)
456 {
457 PCM_CPUID_INFO cpuinfo;
458 pcm_cpuid(0xa, cpuinfo);
459 return extract_bits_ui(cpuinfo.reg.ecx, c, c) || (extract_bits_ui(cpuinfo.reg.edx, 4, 0) > c);
460 }
461 return false;
462 }
463
isHWTMAL1Supported() const464 bool PCM::isHWTMAL1Supported() const
465 {
466 static int supported = -1;
467 if (supported < 0)
468 {
469 supported = 0;
470 PCM_CPUID_INFO cpuinfo;
471 pcm_cpuid(1, cpuinfo);
472 if (extract_bits_ui(cpuinfo.reg.ecx, 15, 15) && MSR.size())
473 {
474 uint64 perf_cap;
475 if (MSR[0]->read(MSR_PERF_CAPABILITIES, &perf_cap) == sizeof(uint64))
476 {
477 supported = (int)extract_bits(perf_cap, 15, 15);
478 }
479 }
480 }
481 return supported > 0;
482 }
483
readCPUMicrocodeLevel()484 void PCM::readCPUMicrocodeLevel()
485 {
486 if (MSR.empty()) return;
487 const int ref_core = 0;
488 TemporalThreadAffinity affinity(ref_core);
489 if (affinity.supported() && isCoreOnline(ref_core))
490 { // see "Update Signature and Verification" and "Determining the Signature"
491 // sections in Intel SDM how to read ucode level
492 if (MSR[ref_core]->write(MSR_IA32_BIOS_SIGN_ID, 0) == sizeof(uint64))
493 {
494 PCM_CPUID_INFO cpuinfo;
495 pcm_cpuid(1, cpuinfo); // cpuid instructions updates MSR_IA32_BIOS_SIGN_ID
496 uint64 result = 0;
497 if (MSR[ref_core]->read(MSR_IA32_BIOS_SIGN_ID, &result) == sizeof(uint64))
498 {
499 cpu_microcode_level = result >> 32;
500 }
501 }
502 }
503 }
504
getMaxCustomCoreEvents()505 int32 PCM::getMaxCustomCoreEvents()
506 {
507 return core_gen_counter_num_max;
508 }
509
detectModel()510 bool PCM::detectModel()
511 {
512 char buffer[1024];
513 union {
514 char cbuf[16];
515 int ibuf[16 / sizeof(int)];
516 } buf;
517 PCM_CPUID_INFO cpuinfo;
518 pcm_cpuid(0, cpuinfo);
519 memset(buffer, 0, 1024);
520 memset(buf.cbuf, 0, 16);
521 buf.ibuf[0] = cpuinfo.array[1];
522 buf.ibuf[1] = cpuinfo.array[3];
523 buf.ibuf[2] = cpuinfo.array[2];
524 if (strncmp(buf.cbuf, "GenuineIntel", 4 * 3) != 0)
525 {
526 std::cerr << getUnsupportedMessage() << "\n";
527 return false;
528 }
529 max_cpuid = cpuinfo.array[0];
530
531 pcm_cpuid(1, cpuinfo);
532 cpu_family = (((cpuinfo.array[0]) >> 8) & 0xf) | ((cpuinfo.array[0] & 0xf00000) >> 16);
533 cpu_model = (((cpuinfo.array[0]) & 0xf0) >> 4) | ((cpuinfo.array[0] & 0xf0000) >> 12);
534 cpu_stepping = cpuinfo.array[0] & 0x0f;
535
536 if (cpuinfo.reg.ecx & (1UL << 31UL)) {
537 vm = true;
538 std::cerr << "Detected a hypervisor/virtualization technology. Some metrics might not be available due to configuration or availability of virtual hardware features.\n";
539 }
540
541 readCoreCounterConfig();
542
543 if (cpu_family != 6)
544 {
545 std::cerr << getUnsupportedMessage() << " CPU Family: " << cpu_family << "\n";
546 return false;
547 }
548
549 pcm_cpuid(7, 0, cpuinfo);
550
551 #ifdef __linux__
552 auto checkLinuxCpuinfoFlag = [](const std::string& flag) -> bool
553 {
554 std::ifstream linuxCpuinfo("/proc/cpuinfo");
555 if (linuxCpuinfo.is_open())
556 {
557 std::string line;
558 while (std::getline(linuxCpuinfo, line))
559 {
560 auto tokens = split(line, ':');
561 if (tokens.size() >= 2 && tokens[0].find("flags") == 0)
562 {
563 for (auto curFlag : split(tokens[1], ' '))
564 {
565 if (flag == curFlag)
566 {
567 return true;
568 }
569 }
570 }
571 }
572 linuxCpuinfo.close();
573 }
574 return false;
575 };
576 linux_arch_perfmon = checkLinuxCpuinfoFlag("arch_perfmon");
577 std::cerr << "Linux arch_perfmon flag : " << (linux_arch_perfmon ? "yes" : "no") << "\n";
578 if (vm == true && linux_arch_perfmon == false)
579 {
580 std::cerr << "ERROR: vPMU is not enabled in the hypervisor. Please see details in https://software.intel.com/content/www/us/en/develop/documentation/vtune-help/top/set-up-analysis-target/on-virtual-machine.html \n";
581 std::cerr << " you can force-continue by setting PCM_IGNORE_ARCH_PERFMON=1 environment variable.\n";
582 auto env = std::getenv("PCM_IGNORE_ARCH_PERFMON");
583 auto ignore_arch_perfmon = false;
584 if (env != nullptr && std::string(env) == std::string("1"))
585 {
586 ignore_arch_perfmon = true;
587 }
588 if (!ignore_arch_perfmon)
589 {
590 return false;
591 }
592 }
593 #endif
594
595 std::cerr << "IBRS and IBPB supported : " << ((cpuinfo.reg.edx & (1 << 26)) ? "yes" : "no") << "\n";
596 std::cerr << "STIBP supported : " << ((cpuinfo.reg.edx & (1 << 27)) ? "yes" : "no") << "\n";
597 std::cerr << "Spec arch caps supported : " << ((cpuinfo.reg.edx & (1 << 29)) ? "yes" : "no") << "\n";
598
599 return true;
600 }
601
isRDTDisabled() const602 bool PCM::isRDTDisabled() const
603 {
604 static int flag = -1;
605 if (flag < 0)
606 {
607 // flag not yet initialized
608 const char * varname = "PCM_NO_RDT";
609 char* env = nullptr;
610 #ifdef _MSC_VER
611 _dupenv_s(&env, NULL, varname);
612 #else
613 env = std::getenv(varname);
614 #endif
615 if (env != nullptr && std::string(env) == std::string("1"))
616 {
617 std::cout << "Disabling RDT usage because PCM_NO_RDT=1 environment variable is set.\n";
618 flag = 1;
619 }
620 else
621 {
622 flag = 0;
623 }
624 #ifdef _MSC_VER
625 free(env);
626 #endif
627 }
628 return flag > 0;
629 }
630
QOSMetricAvailable() const631 bool PCM::QOSMetricAvailable() const
632 {
633 if (isRDTDisabled()) return false;
634 #ifndef __linux__
635 if (isSecureBoot()) return false;
636 #endif
637 PCM_CPUID_INFO cpuinfo;
638 pcm_cpuid(0x7,0,cpuinfo);
639 return (cpuinfo.reg.ebx & (1<<12))?true:false;
640 }
641
L3QOSMetricAvailable() const642 bool PCM::L3QOSMetricAvailable() const
643 {
644 if (isRDTDisabled()) return false;
645 #ifndef __linux__
646 if (isSecureBoot()) return false;
647 #endif
648 PCM_CPUID_INFO cpuinfo;
649 pcm_cpuid(0xf,0,cpuinfo);
650 return (cpuinfo.reg.edx & (1<<1))?true:false;
651 }
652
L3CacheOccupancyMetricAvailable() const653 bool PCM::L3CacheOccupancyMetricAvailable() const
654 {
655 PCM_CPUID_INFO cpuinfo;
656 if (!(QOSMetricAvailable() && L3QOSMetricAvailable()))
657 return false;
658 pcm_cpuid(0xf,0x1,cpuinfo);
659 return (cpuinfo.reg.edx & 1)?true:false;
660 }
661
CoreLocalMemoryBWMetricAvailable() const662 bool PCM::CoreLocalMemoryBWMetricAvailable() const
663 {
664 if (cpu_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata
665 PCM_CPUID_INFO cpuinfo;
666 if (!(QOSMetricAvailable() && L3QOSMetricAvailable()))
667 return false;
668 pcm_cpuid(0xf,0x1,cpuinfo);
669 return (cpuinfo.reg.edx & 2)?true:false;
670 }
671
CoreRemoteMemoryBWMetricAvailable() const672 bool PCM::CoreRemoteMemoryBWMetricAvailable() const
673 {
674 if (cpu_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata
675 PCM_CPUID_INFO cpuinfo;
676 if (!(QOSMetricAvailable() && L3QOSMetricAvailable()))
677 return false;
678 pcm_cpuid(0xf, 0x1, cpuinfo);
679 return (cpuinfo.reg.edx & 4) ? true : false;
680 }
681
getMaxRMID() const682 unsigned PCM::getMaxRMID() const
683 {
684 unsigned maxRMID = 0;
685 PCM_CPUID_INFO cpuinfo;
686 pcm_cpuid(0xf,0,cpuinfo);
687 maxRMID = (unsigned)cpuinfo.reg.ebx + 1;
688 return maxRMID;
689 }
690
initRDT()691 void PCM::initRDT()
692 {
693 if (!(QOSMetricAvailable() && L3QOSMetricAvailable()))
694 return;
695 #ifdef __linux__
696 auto env = std::getenv("PCM_USE_RESCTRL");
697 if (env != nullptr && std::string(env) == std::string("1"))
698 {
699 std::cout << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because environment variable PCM_USE_RESCTRL=1\n";
700 resctrl.init();
701 useResctrl = true;
702 return;
703 }
704 if (resctrl.isMounted())
705 {
706 std::cout << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because resctrl driver is mounted.\n";
707 resctrl.init();
708 useResctrl = true;
709 return;
710 }
711 if (isSecureBoot())
712 {
713 std::cout << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because Secure Boot mode is enabled.\n";
714 resctrl.init();
715 useResctrl = true;
716 return;
717 }
718 #endif
719 std::cout << "Initializing RMIDs" << std::endl;
720 unsigned maxRMID;
721 /* Calculate maximum number of RMID supported by socket */
722 maxRMID = getMaxRMID();
723 // std::cout << "Maximum RMIDs per socket in the system : " << maxRMID << "\n";
724 std::vector<uint32> rmid(num_sockets);
725 for(int32 i = 0; i < num_sockets; i ++)
726 rmid[i] = maxRMID - 1;
727
728 /* Associate each core with 1 RMID */
729 for(int32 core = 0; core < num_cores; core ++ )
730 {
731 if(!isCoreOnline(core)) continue;
732
733 uint64 msr_pqr_assoc = 0 ;
734 uint64 msr_qm_evtsel = 0 ;
735 MSR[core]->lock();
736 //Read 0xC8F MSR for each core
737 MSR[core]->read(IA32_PQR_ASSOC, &msr_pqr_assoc);
738 //std::cout << "initRMID reading IA32_PQR_ASSOC 0x" << std::hex << msr_pqr_assoc << std::dec << "\n";
739
740 //std::cout << "Socket Id : " << topology[core].socket;
741 msr_pqr_assoc &= 0xffffffff00000000ULL;
742 msr_pqr_assoc |= (uint64)(rmid[topology[core].socket] & ((1ULL<<10)-1ULL));
743 //std::cout << "initRMID writing IA32_PQR_ASSOC 0x" << std::hex << msr_pqr_assoc << std::dec << "\n";
744 //Write 0xC8F MSR with new RMID for each core
745 MSR[core]->write(IA32_PQR_ASSOC,msr_pqr_assoc);
746
747 msr_qm_evtsel = static_cast<uint64>(rmid[topology[core].socket] & ((1ULL<<10)-1ULL));
748 msr_qm_evtsel <<= 32;
749 //Write 0xC8D MSR with new RMID for each core
750 //std::cout << "initRMID writing IA32_QM_EVTSEL 0x" << std::hex << msr_qm_evtsel << std::dec << "\n";
751 MSR[core]->write(IA32_QM_EVTSEL,msr_qm_evtsel);
752 MSR[core]->unlock();
753
754 /* Initializing the memory bandwidth counters */
755 if (CoreLocalMemoryBWMetricAvailable())
756 {
757 memory_bw_local.push_back(std::make_shared<CounterWidthExtender>(new CounterWidthExtender::MBLCounter(MSR[core]), 24, 1000));
758 if (CoreRemoteMemoryBWMetricAvailable())
759 {
760 memory_bw_total.push_back(std::make_shared<CounterWidthExtender>(new CounterWidthExtender::MBTCounter(MSR[core]), 24, 1000));
761 }
762 }
763 rmid[topology[core].socket] --;
764 //std::cout << std::flush; // Explicitly flush after each iteration
765 }
766 /* Get The scaling factor by running CPUID.0xF.0x1 instruction */
767 L3ScalingFactor = getL3ScalingFactor();
768 }
769
initQOSevent(const uint64 event,const int32 core)770 void PCM::initQOSevent(const uint64 event, const int32 core)
771 {
772 if(!isCoreOnline(core)) return;
773 uint64 msr_qm_evtsel = 0 ;
774 //Write 0xC8D MSR with the event id
775 MSR[core]->read(IA32_QM_EVTSEL, &msr_qm_evtsel);
776 //std::cout << "initQOSevent reading IA32_QM_EVTSEL 0x" << std::hex << msr_qm_evtsel << std::dec << "\n";
777 msr_qm_evtsel &= 0xfffffffffffffff0ULL;
778 msr_qm_evtsel |= event & ((1ULL<<8)-1ULL);
779 //std::cout << "initQOSevent writing IA32_QM_EVTSEL 0x" << std::hex << msr_qm_evtsel << std::dec << "\n";
780 MSR[core]->write(IA32_QM_EVTSEL,msr_qm_evtsel);
781 //std::cout << std::flush;
782 }
783
784
initCStateSupportTables()785 void PCM::initCStateSupportTables()
786 {
787 #define PCM_PARAM_PROTECT(...) __VA_ARGS__
788 #define PCM_CSTATE_ARRAY(array_ , val ) \
789 { \
790 static uint64 tmp[] = val; \
791 PCM_COMPILE_ASSERT(sizeof(tmp) / sizeof(uint64) == (static_cast<int>(MAX_C_STATE)+1)); \
792 array_ = tmp; \
793 break; \
794 }
795
796 // fill package C state array
797 switch(cpu_model)
798 {
799 case ATOM:
800 case ATOM_2:
801 case CENTERTON:
802 case AVOTON:
803 case BAYTRAIL:
804 case CHERRYTRAIL:
805 case APOLLO_LAKE:
806 case DENVERTON:
807 case SNOWRIDGE:
808 PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x3F8, 0, 0x3F9, 0, 0x3FA, 0, 0, 0, 0 }) );
809 case NEHALEM_EP:
810 case NEHALEM:
811 case CLARKDALE:
812 case WESTMERE_EP:
813 case NEHALEM_EX:
814 case WESTMERE_EX:
815 PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0x3F8, 0, 0, 0x3F9, 0x3FA, 0, 0, 0}) );
816 case SANDY_BRIDGE:
817 case JAKETOWN:
818 case IVY_BRIDGE:
819 case IVYTOWN:
820 PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x60D, 0x3F8, 0, 0, 0x3F9, 0x3FA, 0, 0, 0}) );
821 case HASWELL:
822 case HASWELL_2:
823 case HASWELLX:
824 case BDX_DE:
825 case BDX:
826 case KNL:
827 PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x60D, 0x3F8, 0, 0, 0x3F9, 0x3FA, 0, 0, 0}) );
828 case SKX:
829 case ICX:
830 PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x60D, 0, 0, 0, 0x3F9, 0, 0, 0, 0}) );
831 case HASWELL_ULT:
832 case BROADWELL:
833 PCM_SKL_PATH_CASES
834 case BROADWELL_XEON_E3:
835 PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x60D, 0x3F8, 0, 0, 0x3F9, 0x3FA, 0x630, 0x631, 0x632}) );
836
837 default:
838 std::cerr << "PCM error: package C-states support array is not initialized. Package C-states metrics will not be shown.\n";
839 PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }) );
840 };
841
842 // fill core C state array
843 switch(cpu_model)
844 {
845 case ATOM:
846 case ATOM_2:
847 case CENTERTON:
848 PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }) );
849 case NEHALEM_EP:
850 case NEHALEM:
851 case CLARKDALE:
852 case WESTMERE_EP:
853 case NEHALEM_EX:
854 case WESTMERE_EX:
855 PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0x3FC, 0, 0, 0x3FD, 0, 0, 0, 0}) );
856 case SANDY_BRIDGE:
857 case JAKETOWN:
858 case IVY_BRIDGE:
859 case IVYTOWN:
860 case HASWELL:
861 case HASWELL_2:
862 case HASWELL_ULT:
863 case HASWELLX:
864 case BDX_DE:
865 case BDX:
866 case BROADWELL:
867 case BROADWELL_XEON_E3:
868 case BAYTRAIL:
869 case AVOTON:
870 case CHERRYTRAIL:
871 case APOLLO_LAKE:
872 case DENVERTON:
873 PCM_SKL_PATH_CASES
874 case SNOWRIDGE:
875 case ICX:
876 PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0x3FC, 0, 0, 0x3FD, 0x3FE, 0, 0, 0}) );
877 case KNL:
878 PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0, 0, 0, 0x3FF, 0, 0, 0, 0}) );
879 case SKX:
880 PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0, 0, 0, 0x3FD, 0, 0, 0, 0}) );
881 default:
882 std::cerr << "PCM error: core C-states support array is not initialized. Core C-states metrics will not be shown.\n";
883 PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }) );
884 };
885 }
886
887
888 #ifdef __linux__
tryOpen(const char * path,const char * mode)889 FILE * tryOpen(const char * path, const char * mode)
890 {
891 FILE * f = fopen(path, mode);
892 if (!f)
893 {
894 f = fopen((std::string("/pcm") + path).c_str(), mode);
895 }
896 return f;
897 }
898
readSysFS(const char * path,bool silent=false)899 std::string readSysFS(const char * path, bool silent = false)
900 {
901 FILE * f = tryOpen(path, "r");
902 if (!f)
903 {
904 if (silent == false) std::cerr << "ERROR: Can not open " << path << " file.\n";
905 return std::string();
906 }
907 char buffer[1024];
908 if(NULL == fgets(buffer, 1024, f))
909 {
910 if (silent == false) std::cerr << "ERROR: Can not read from " << path << ".\n";
911 fclose(f);
912 return std::string();
913 }
914 fclose(f);
915 return std::string(buffer);
916 }
917
writeSysFS(const char * path,const std::string & value,bool silent=false)918 bool writeSysFS(const char * path, const std::string & value, bool silent = false)
919 {
920 FILE * f = tryOpen(path, "w");
921 if (!f)
922 {
923 if (silent == false) std::cerr << "ERROR: Can not open " << path << " file.\n";
924 return false;
925 }
926 if (fputs(value.c_str(), f) < 0)
927 {
928 if (silent == false) std::cerr << "ERROR: Can not write to " << path << ".\n";
929 fclose(f);
930 return false;
931 }
932 fclose(f);
933 return true;
934 }
935
readMaxFromSysFS(const char * path)936 int readMaxFromSysFS(const char * path)
937 {
938 std::string content = readSysFS(path);
939 const char * buffer = content.c_str();
940 int result = -1;
941 pcm_sscanf(buffer) >> s_expect("0-") >> result;
942 if(result == -1)
943 {
944 pcm_sscanf(buffer) >> result;
945 }
946 return result;
947 }
948
949 constexpr auto perfSlotsPath = "/sys/bus/event_source/devices/cpu/events/slots";
950 constexpr auto perfBadSpecPath = "/sys/bus/event_source/devices/cpu/events/topdown-bad-spec";
951 constexpr auto perfBackEndPath = "/sys/bus/event_source/devices/cpu/events/topdown-be-bound";
952 constexpr auto perfFrontEndPath = "/sys/bus/event_source/devices/cpu/events/topdown-fe-bound";
953 constexpr auto perfRetiringPath = "/sys/bus/event_source/devices/cpu/events/topdown-retiring";
954
perfSupportsTopDown()955 bool perfSupportsTopDown()
956 {
957 static int yes = -1;
958 if (-1 == yes)
959 {
960 const auto slots = readSysFS(perfSlotsPath, true);
961 const auto bad = readSysFS(perfBadSpecPath, true);
962 const auto be = readSysFS(perfBackEndPath, true);
963 const auto fe = readSysFS(perfFrontEndPath, true);
964 const auto ret = readSysFS(perfRetiringPath, true);
965 yes = (slots.size() && bad.size() && be.size() && fe.size() && ret.size()) ? 1 : 0;
966 }
967 return 1 == yes;
968 }
969
970 #endif
971
discoverSystemTopology()972 bool PCM::discoverSystemTopology()
973 {
974 typedef std::map<uint32, uint32> socketIdMap_type;
975 socketIdMap_type socketIdMap;
976
977 PCM_CPUID_INFO cpuid_args;
978 // init constants for CPU topology leaf 0xB
979 // adapted from Topology Enumeration Reference code for Intel 64 Architecture
980 // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
981 int wasCoreReported = 0, wasThreadReported = 0;
982 int subleaf = 0, levelType, levelShift;
983 //uint32 coreSelectMask = 0, smtSelectMask = 0;
984 uint32 smtMaskWidth = 0;
985 //uint32 pkgSelectMask = (-1), pkgSelectMaskShift = 0;
986 uint32 corePlusSMTMaskWidth = 0;
987 uint32 coreMaskWidth = 0;
988
989 {
990 TemporalThreadAffinity aff0(0);
991 do
992 {
993 pcm_cpuid(0xb, subleaf, cpuid_args);
994 if (cpuid_args.array[1] == 0)
995 { // if EBX ==0 then this subleaf is not valid, we can exit the loop
996 break;
997 }
998 levelType = extract_bits_ui(cpuid_args.array[2], 8, 15);
999 levelShift = extract_bits_ui(cpuid_args.array[0], 0, 4);
1000 switch (levelType)
1001 {
1002 case 1: //level type is SMT, so levelShift is the SMT_Mask_Width
1003 smtMaskWidth = levelShift;
1004 wasThreadReported = 1;
1005 break;
1006 case 2: //level type is Core, so levelShift is the CorePlusSMT_Mask_Width
1007 corePlusSMTMaskWidth = levelShift;
1008 wasCoreReported = 1;
1009 break;
1010 default:
1011 break;
1012 }
1013 subleaf++;
1014 } while (1);
1015 }
1016
1017 if (wasThreadReported && wasCoreReported)
1018 {
1019 coreMaskWidth = corePlusSMTMaskWidth - smtMaskWidth;
1020 }
1021 else if (!wasCoreReported && wasThreadReported)
1022 {
1023 coreMaskWidth = smtMaskWidth;
1024 }
1025 else
1026 {
1027 std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";
1028 return false;
1029 }
1030
1031 uint32 l2CacheMaskShift = 0;
1032 #ifdef PCM_DEBUG_TOPOLOGY
1033 uint32 threadsSharingL2;
1034 #endif
1035 uint32 l2CacheMaskWidth;
1036
1037 pcm_cpuid(0x4, 2, cpuid_args); // get ID for L2 cache
1038 l2CacheMaskWidth = 1 + extract_bits_ui(cpuid_args.array[0],14,25); // number of APIC IDs sharing L2 cache
1039 #ifdef PCM_DEBUG_TOPOLOGY
1040 threadsSharingL2 = l2CacheMaskWidth;
1041 #endif
1042 for( ; l2CacheMaskWidth > 1; l2CacheMaskWidth >>= 1)
1043 {
1044 l2CacheMaskShift++;
1045 }
1046 #ifdef PCM_DEBUG_TOPOLOGY
1047 std::cerr << "DEBUG: Number of threads sharing L2 cache = " << threadsSharingL2
1048 << " [the most significant bit = " << l2CacheMaskShift << "]\n";
1049 #endif
1050
1051 auto populateEntry = [&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift](TopologyEntry & entry, const int apic_id)
1052 {
1053 entry.thread_id = smtMaskWidth ? extract_bits_ui(apic_id, 0, smtMaskWidth - 1) : 0;
1054 entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_ui(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0;
1055 entry.socket = extract_bits_ui(apic_id, smtMaskWidth + coreMaskWidth, 31);
1056 entry.tile_id = extract_bits_ui(apic_id, l2CacheMaskShift, 31);
1057 };
1058
1059 #ifdef _MSC_VER
1060 // version for Windows 7 and later version
1061
1062 char * slpi = new char[sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)];
1063 DWORD len = (DWORD)sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX);
1064 BOOL res = GetLogicalProcessorInformationEx(RelationAll, (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)slpi, &len);
1065
1066 while (res == FALSE)
1067 {
1068 delete[] slpi;
1069
1070 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
1071 {
1072 slpi = new char[len];
1073 res = GetLogicalProcessorInformationEx(RelationAll, (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)slpi, &len);
1074 }
1075 else
1076 {
1077 std::wcerr << "Error in Windows function 'GetLogicalProcessorInformationEx': " <<
1078 GetLastError() << " ";
1079 const TCHAR * strError = _com_error(GetLastError()).ErrorMessage();
1080 if (strError) std::wcerr << strError;
1081 std::wcerr << "\n";
1082 return false;
1083 }
1084 }
1085
1086 char * base_slpi = slpi;
1087 PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pi = NULL;
1088
1089 for ( ; slpi < base_slpi + len; slpi += (DWORD)pi->Size)
1090 {
1091 pi = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)slpi;
1092 if (pi->Relationship == RelationProcessorCore)
1093 {
1094 threads_per_core = (pi->Processor.Flags == LTP_PC_SMT) ? 2 : 1;
1095 // std::cout << "thr per core: " << threads_per_core << "\n";
1096 num_cores += threads_per_core;
1097 }
1098 }
1099 // std::cout << std::flush;
1100
1101 num_online_cores = num_cores;
1102
1103 if (num_cores != GetActiveProcessorCount(ALL_PROCESSOR_GROUPS))
1104 {
1105 std::cerr << "Error in processor group size counting: " << num_cores << "!=" << GetActiveProcessorCount(ALL_PROCESSOR_GROUPS) << "\n";
1106 std::cerr << "Make sure your binary is compiled for 64-bit: using 'x64' platform configuration.\n";
1107 return false;
1108 }
1109
1110 for (int i = 0; i < (int)num_cores; i++)
1111 {
1112 ThreadGroupTempAffinity affinity(i);
1113
1114 pcm_cpuid(0xb, 0x0, cpuid_args);
1115
1116 int apic_id = cpuid_args.array[3];
1117
1118 TopologyEntry entry;
1119 entry.os_id = i;
1120
1121 populateEntry(entry, apic_id);
1122
1123 topology.push_back(entry);
1124 socketIdMap[entry.socket] = 0;
1125 }
1126
1127 delete[] base_slpi;
1128
1129 #else
1130 // for Linux, Mac OS, FreeBSD and DragonFlyBSD
1131
1132 TopologyEntry entry;
1133
1134 #ifdef __linux__
1135 num_cores = readMaxFromSysFS("/sys/devices/system/cpu/present");
1136 if(num_cores == -1)
1137 {
1138 std::cerr << "Cannot read number of present cores\n";
1139 return false;
1140 }
1141 ++num_cores;
1142
1143 // open /proc/cpuinfo
1144 FILE * f_cpuinfo = fopen("/proc/cpuinfo", "r");
1145 if (!f_cpuinfo)
1146 {
1147 std::cerr << "Cannot open /proc/cpuinfo file.\n";
1148 return false;
1149 }
1150
1151 // map with key=pkg_apic_id (not necessarily zero based or sequential) and
1152 // associated value=socket_id that should be 0 based and sequential
1153 std::map<int, int> found_pkg_ids;
1154 topology.resize(num_cores);
1155 char buffer[1024];
1156 while (0 != fgets(buffer, 1024, f_cpuinfo))
1157 {
1158 if (strncmp(buffer, "processor", sizeof("processor") - 1) == 0)
1159 {
1160 pcm_sscanf(buffer) >> s_expect("processor\t: ") >> entry.os_id;
1161 //std::cout << "os_core_id: " << entry.os_id << "\n";
1162 TemporalThreadAffinity _(entry.os_id);
1163 pcm_cpuid(0xb, 0x0, cpuid_args);
1164 int apic_id = cpuid_args.array[3];
1165
1166 populateEntry(entry, apic_id);
1167
1168 topology[entry.os_id] = entry;
1169 socketIdMap[entry.socket] = 0;
1170 ++num_online_cores;
1171 }
1172 }
1173 //std::cout << std::flush;
1174 fclose(f_cpuinfo);
1175
1176 // produce debug output similar to Intel MPI cpuinfo
1177 #ifdef PCM_DEBUG_TOPOLOGY
1178 std::cerr << "===== Processor identification =====\n";
1179 std::cerr << "Processor Thread Id. Core Id. Tile Id. Package Id.\n";
1180 std::map<uint32, std::vector<uint32> > os_id_by_core, os_id_by_tile, core_id_by_socket;
1181 for(auto it = topology.begin(); it != topology.end(); ++it)
1182 {
1183 std::cerr << std::left << std::setfill(' ')
1184 << std::setw(16) << it->os_id
1185 << std::setw(16) << it->thread_id
1186 << std::setw(16) << it->core_id
1187 << std::setw(16) << it->tile_id
1188 << std::setw(16) << it->socket
1189 << "\n";
1190 if(std::find(core_id_by_socket[it->socket].begin(), core_id_by_socket[it->socket].end(), it->core_id)
1191 == core_id_by_socket[it->socket].end())
1192 core_id_by_socket[it->socket].push_back(it->core_id);
1193 // add socket offset to distinguish cores and tiles from different sockets
1194 os_id_by_core[(it->socket << 15) + it->core_id].push_back(it->os_id);
1195 os_id_by_tile[(it->socket << 15) + it->tile_id].push_back(it->os_id);
1196 }
1197 std::cerr << "===== Placement on packages =====\n";
1198 std::cerr << "Package Id. Core Id. Processors\n";
1199 for(auto pkg = core_id_by_socket.begin(); pkg != core_id_by_socket.end(); ++pkg)
1200 {
1201 auto core_id = pkg->second.begin();
1202 std::cerr << std::left << std::setfill(' ') << std::setw(15) << pkg->first << *core_id;
1203 for(++core_id; core_id != pkg->second.end(); ++core_id)
1204 {
1205 std::cerr << "," << *core_id;
1206 }
1207 std::cerr << "\n";
1208 }
1209 std::cerr << "\n===== Core/Tile sharing =====\n";
1210 std::cerr << "Level Processors\nCore ";
1211 for(auto core = os_id_by_core.begin(); core != os_id_by_core.end(); ++core)
1212 {
1213 auto os_id = core->second.begin();
1214 std::cerr << "(" << *os_id;
1215 for(++os_id; os_id != core->second.end(); ++os_id) {
1216 std::cerr << "," << *os_id;
1217 }
1218 std::cerr << ")";
1219 }
1220 std::cerr << "\nTile / L2$ ";
1221 for(auto core = os_id_by_tile.begin(); core != os_id_by_tile.end(); ++core)
1222 {
1223 auto os_id = core->second.begin();
1224 std::cerr << "(" << *os_id;
1225 for(++os_id; os_id != core->second.end(); ++os_id) {
1226 std::cerr << "," << *os_id;
1227 }
1228 std::cerr << ")";
1229 }
1230 std::cerr << "\n";
1231 #endif // PCM_DEBUG_TOPOLOGY
1232 #elif defined(__FreeBSD__) || defined(__DragonFly__)
1233
1234 size_t size = sizeof(num_cores);
1235 cpuctl_cpuid_args_t cpuid_args_freebsd;
1236 int fd;
1237
1238 if(0 != sysctlbyname("hw.ncpu", &num_cores, &size, NULL, 0))
1239 {
1240 std::cerr << "Unable to get hw.ncpu from sysctl.\n";
1241 return false;
1242 }
1243 num_online_cores = num_cores;
1244
1245 if (modfind("cpuctl") == -1)
1246 {
1247 std::cerr << "cpuctl(4) not loaded.\n";
1248 return false;
1249 }
1250
1251 for (int i = 0; i < num_cores; i++)
1252 {
1253 char cpuctl_name[64];
1254 int apic_id;
1255
1256 snprintf(cpuctl_name, 64, "/dev/cpuctl%d", i);
1257 fd = ::open(cpuctl_name, O_RDWR);
1258
1259 cpuid_args_freebsd.level = 0xb;
1260
1261 ::ioctl(fd, CPUCTL_CPUID, &cpuid_args_freebsd);
1262
1263 apic_id = cpuid_args_freebsd.data[3];
1264
1265 entry.os_id = i;
1266
1267 populateEntry(entry, apic_id);
1268
1269 if (entry.socket == 0 && entry.core_id == 0) ++threads_per_core;
1270
1271 topology.push_back(entry);
1272 socketIdMap[entry.socket] = 0;
1273 }
1274
1275 #else // Getting processor info for Mac OS
1276 #define SAFE_SYSCTLBYNAME(message, ret_value) \
1277 { \
1278 size_t size; \
1279 char *pParam; \
1280 if(0 != sysctlbyname(message, NULL, &size, NULL, 0)) \
1281 { \
1282 std::cerr << "Unable to determine size of " << message << " sysctl return type.\n"; \
1283 return false; \
1284 } \
1285 if(NULL == (pParam = (char *)malloc(size))) \
1286 { \
1287 std::cerr << "Unable to allocate memory for " << message << "\n"; \
1288 return false; \
1289 } \
1290 if(0 != sysctlbyname(message, (void*)pParam, &size, NULL, 0)) \
1291 { \
1292 std::cerr << "Unable to get " << message << " from sysctl.\n"; \
1293 return false; \
1294 } \
1295 ret_value = convertUnknownToInt(size, pParam); \
1296 free(pParam); \
1297 }
1298 // End SAFE_SYSCTLBYNAME
1299
1300 // Using OSXs sysctl to get the number of CPUs right away
1301 SAFE_SYSCTLBYNAME("hw.logicalcpu", num_cores)
1302 num_online_cores = num_cores;
1303
1304 #undef SAFE_SYSCTLBYNAME
1305
1306 // The OSX version needs the MSR handle earlier so that it can build the CPU topology.
1307 // This topology functionality should potentially go into a different KEXT
1308 for(int i = 0; i < num_cores; i++)
1309 {
1310 MSR.push_back(std::make_shared<SafeMsrHandle>(i));
1311 }
1312
1313 TopologyEntry *entries = new TopologyEntry[num_cores];
1314 MSR[0]->buildTopology(num_cores, entries);
1315 for(int i = 0; i < num_cores; i++){
1316 socketIdMap[entries[i].socket] = 0;
1317 if(entries[i].os_id >= 0)
1318 {
1319 if(entries[i].core_id == 0 && entries[i].socket == 0) ++threads_per_core;
1320 topology.push_back(entries[i]);
1321 }
1322 }
1323 delete[] entries;
1324 // End of OSX specific code
1325 #endif // end of ifndef __APPLE__
1326
1327 #endif //end of ifdef _MSC_VER
1328
1329 if(num_cores == 0) {
1330 num_cores = (int32)topology.size();
1331 }
1332 if(num_sockets == 0) {
1333 num_sockets = (int32)(std::max)(socketIdMap.size(), (size_t)1);
1334 }
1335
1336 socketIdMap_type::iterator s = socketIdMap.begin();
1337 for (uint32 sid = 0; s != socketIdMap.end(); ++s)
1338 {
1339 s->second = sid++;
1340 // first is apic id, second is logical socket id
1341 systemTopology->addSocket( s->first, s->second );
1342 }
1343
1344 for (int32 cid = 0; cid < num_cores; ++cid)
1345 {
1346 //std::cerr << "Cid: " << cid << "\n";
1347 systemTopology->addThread( cid, topology[cid] );
1348 }
1349
1350 // All threads are here now so we can set the refCore for a socket
1351 for ( auto socket : systemTopology->sockets() )
1352 socket->setRefCore();
1353
1354 // use map to change apic socket id to the logical socket id
1355 for (int i = 0; (i < (int)num_cores) && (!socketIdMap.empty()); ++i)
1356 {
1357 if(isCoreOnline((int32)i))
1358 topology[i].socket = socketIdMap[topology[i].socket];
1359 }
1360
1361 #if 0
1362 std::cerr << "Number of socket ids: " << socketIdMap.size() << "\n";
1363 std::cerr << "Topology:\nsocket os_id core_id\n";
1364 for (int i = 0; i < num_cores; ++i)
1365 {
1366 std::cerr << topology[i].socket << " " << topology[i].os_id << " " << topology[i].core_id << "\n";
1367 }
1368 #endif
1369 if (threads_per_core == 0)
1370 {
1371 for (int i = 0; i < (int)num_cores; ++i)
1372 {
1373 if (topology[i].socket == topology[0].socket && topology[i].core_id == topology[0].core_id)
1374 ++threads_per_core;
1375 }
1376 assert(threads_per_core != 0);
1377 }
1378 if(num_phys_cores_per_socket == 0 && num_cores == num_online_cores) num_phys_cores_per_socket = num_cores / num_sockets / threads_per_core;
1379 if(num_online_cores == 0) num_online_cores = num_cores;
1380
1381 int32 i = 0;
1382
1383 socketRefCore.resize(num_sockets, -1);
1384 for(i = 0; i < num_cores; ++i)
1385 {
1386 if(isCoreOnline(i))
1387 {
1388 socketRefCore[topology[i].socket] = i;
1389 }
1390 }
1391
1392 num_online_sockets = 0;
1393 for(i = 0; i < num_sockets; ++i)
1394 {
1395 if(isSocketOnline(i))
1396 {
1397 ++num_online_sockets;
1398 }
1399 }
1400
1401 FrontendBoundSlots.resize(num_cores, 0);
1402 BadSpeculationSlots.resize(num_cores, 0);
1403 BackendBoundSlots.resize(num_cores, 0);
1404 RetiringSlots.resize(num_cores, 0);
1405 AllSlotsRaw.resize(num_cores, 0);
1406
1407 #if 0
1408 std::cerr << "Socket reference cores:\n";
1409 for(int32 i=0; i< num_sockets;++i)
1410 {
1411 std::cerr << "socketRefCore[" << i << "]=" << socketRefCore[i] << "\n";
1412 }
1413 #endif
1414
1415 return true;
1416 }
1417
printSystemTopology() const1418 void PCM::printSystemTopology() const
1419 {
1420 if(num_cores == num_online_cores)
1421 {
1422 std::cerr << "Number of physical cores: " << (num_cores/threads_per_core) << "\n";
1423 }
1424
1425 std::cerr << "Number of logical cores: " << num_cores << "\n";
1426 std::cerr << "Number of online logical cores: " << num_online_cores << "\n";
1427
1428 if(num_cores == num_online_cores)
1429 {
1430 std::cerr << "Threads (logical cores) per physical core: " << threads_per_core << "\n";
1431 }
1432 else
1433 {
1434 std::cerr << "Offlined cores: ";
1435 for (int i = 0; i < (int)num_cores; ++i)
1436 if(isCoreOnline((int32)i) == false)
1437 std::cerr << i << " ";
1438 std::cerr << "\n";
1439 }
1440 std::cerr << "Num sockets: " << num_sockets << "\n";
1441 if (num_phys_cores_per_socket > 0)
1442 {
1443 std::cerr << "Physical cores per socket: " << num_phys_cores_per_socket << "\n";
1444 }
1445 std::cerr << "Last level cache slices per socket: " << getMaxNumOfCBoxes() << "\n";
1446 std::cerr << "Core PMU (perfmon) version: " << perfmon_version << "\n";
1447 std::cerr << "Number of core PMU generic (programmable) counters: " << core_gen_counter_num_max << "\n";
1448 std::cerr << "Width of generic (programmable) counters: " << core_gen_counter_width << " bits\n";
1449 if (perfmon_version > 0)
1450 {
1451 std::cerr << "Number of core PMU fixed counters: " << core_fixed_counter_num_max << "\n";
1452 std::cerr << "Width of fixed counters: " << core_fixed_counter_width << " bits\n";
1453 }
1454 if (perfmon_version < 2 && vm == true)
1455 {
1456 std::cerr << "Warning: detected an unsupported virtualized environment: the hypervisor has limited the core PMU (perfmon) version to " << perfmon_version << "\n";
1457 }
1458 }
1459
initMSR()1460 bool PCM::initMSR()
1461 {
1462 #ifndef __APPLE__
1463 try
1464 {
1465 for (int i = 0; i < (int)num_cores; ++i)
1466 {
1467 if ( isCoreOnline( (int32)i ) ) {
1468 MSR.push_back(std::make_shared<SafeMsrHandle>(i));
1469 systemTopology->addMSRHandleToOSThread( MSR.back(), (uint32)i );
1470 } else { // the core is offlined, assign an invalid MSR handle
1471 MSR.push_back(std::make_shared<SafeMsrHandle>());
1472 systemTopology->addMSRHandleToOSThread( MSR.back(), (uint32)i );
1473 }
1474 }
1475 }
1476 catch (...)
1477 {
1478 // failed
1479 MSR.clear();
1480
1481 std::cerr << "Can not access CPUs Model Specific Registers (MSRs).\n";
1482 #ifdef _MSC_VER
1483 std::cerr << "You must have signed msr.sys driver in your current directory and have administrator rights to run this program.\n";
1484 #elif defined(__linux__)
1485 std::cerr << "Try to execute 'modprobe msr' as root user and then\n";
1486 std::cerr << "you also must have read and write permissions for /dev/cpu/*/msr devices (/dev/msr* for Android). The 'chown' command can help.\n";
1487 #elif defined(__FreeBSD__) || defined(__DragonFly__)
1488 std::cerr << "Ensure cpuctl module is loaded and that you have read and write\n";
1489 std::cerr << "permissions for /dev/cpuctl* devices (the 'chown' command can help).\n";
1490 #endif
1491 return false;
1492 }
1493 #endif
1494 return true;
1495 }
1496
detectNominalFrequency()1497 bool PCM::detectNominalFrequency()
1498 {
1499 if (MSR.size())
1500 {
1501 uint64 freq = 0;
1502 MSR[socketRefCore[0]]->read(PLATFORM_INFO_ADDR, &freq);
1503 const uint64 bus_freq = (
1504 cpu_model == SANDY_BRIDGE
1505 || cpu_model == JAKETOWN
1506 || cpu_model == IVYTOWN
1507 || cpu_model == HASWELLX
1508 || cpu_model == BDX_DE
1509 || cpu_model == BDX
1510 || cpu_model == IVY_BRIDGE
1511 || cpu_model == HASWELL
1512 || cpu_model == BROADWELL
1513 || cpu_model == AVOTON
1514 || cpu_model == APOLLO_LAKE
1515 || cpu_model == DENVERTON
1516 || useSKLPath()
1517 || cpu_model == SNOWRIDGE
1518 || cpu_model == KNL
1519 || cpu_model == SKX
1520 || cpu_model == ICX
1521 ) ? (100000000ULL) : (133333333ULL);
1522
1523 nominal_frequency = ((freq >> 8) & 255) * bus_freq;
1524
1525 if(!nominal_frequency)
1526 nominal_frequency = get_frequency_from_cpuid();
1527
1528 if(!nominal_frequency)
1529 {
1530 std::cerr << "Error: Can not detect core frequency.\n";
1531 destroyMSR();
1532 return false;
1533 }
1534
1535 #ifndef PCM_SILENT
1536 std::cerr << "Nominal core frequency: " << nominal_frequency << " Hz\n";
1537 #endif
1538 }
1539
1540 return true;
1541 }
1542
initEnergyMonitoring()1543 void PCM::initEnergyMonitoring()
1544 {
1545 if(packageEnergyMetricsAvailable() && MSR.size())
1546 {
1547 uint64 rapl_power_unit = 0;
1548 MSR[socketRefCore[0]]->read(MSR_RAPL_POWER_UNIT,&rapl_power_unit);
1549 uint64 energy_status_unit = extract_bits(rapl_power_unit,8,12);
1550 if (cpu_model == PCM::CHERRYTRAIL || cpu_model == PCM::BAYTRAIL)
1551 joulesPerEnergyUnit = double(1ULL << energy_status_unit)/1000000.; // (2)^energy_status_unit microJoules
1552 else
1553 joulesPerEnergyUnit = 1./double(1ULL<<energy_status_unit); // (1/2)^energy_status_unit
1554 //std::cout << "MSR_RAPL_POWER_UNIT: " << energy_status_unit << "; Joules/unit " << joulesPerEnergyUnit << "\n";
1555 uint64 power_unit = extract_bits(rapl_power_unit,0,3);
1556 double wattsPerPowerUnit = 1./double(1ULL<<power_unit);
1557
1558 uint64 package_power_info = 0;
1559 MSR[socketRefCore[0]]->read(MSR_PKG_POWER_INFO,&package_power_info);
1560 pkgThermalSpecPower = (int32) (double(extract_bits(package_power_info, 0, 14))*wattsPerPowerUnit);
1561 pkgMinimumPower = (int32) (double(extract_bits(package_power_info, 16, 30))*wattsPerPowerUnit);
1562 pkgMaximumPower = (int32) (double(extract_bits(package_power_info, 32, 46))*wattsPerPowerUnit);
1563
1564 #ifndef PCM_SILENT
1565 std::cerr << "Package thermal spec power: " << pkgThermalSpecPower << " Watt; ";
1566 std::cerr << "Package minimum power: " << pkgMinimumPower << " Watt; ";
1567 std::cerr << "Package maximum power: " << pkgMaximumPower << " Watt;\n";
1568 #endif
1569
1570 int i = 0;
1571
1572 if(energy_status.empty())
1573 for (i = 0; i < (int)num_sockets; ++i)
1574 energy_status.push_back(
1575 std::make_shared<CounterWidthExtender>(
1576 new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[i]], MSR_PKG_ENERGY_STATUS), 32, 10000));
1577
1578 if(dramEnergyMetricsAvailable() && dram_energy_status.empty())
1579 for (i = 0; i < (int)num_sockets; ++i)
1580 dram_energy_status.push_back(
1581 std::make_shared<CounterWidthExtender>(
1582 new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[i]], MSR_DRAM_ENERGY_STATUS), 32, 10000));
1583 }
1584 }
1585
1586 static const uint32 UBOX0_DEV_IDS[] = {
1587 0x3451
1588 };
1589
1590 std::vector<std::pair<uint32, uint32> > socket2UBOX0bus;
1591
1592 void initSocket2Bus(std::vector<std::pair<uint32, uint32> > & socket2bus, uint32 device, uint32 function, const uint32 DEV_IDS[], uint32 devIdsSize);
1593
initSocket2Ubox0Bus()1594 void initSocket2Ubox0Bus()
1595 {
1596 initSocket2Bus(socket2UBOX0bus, SERVER_UBOX0_REGISTER_DEV_ADDR, SERVER_UBOX0_REGISTER_FUNC_ADDR,
1597 UBOX0_DEV_IDS, (uint32)sizeof(UBOX0_DEV_IDS) / sizeof(UBOX0_DEV_IDS[0]));
1598 }
1599
initUncoreObjects()1600 void PCM::initUncoreObjects()
1601 {
1602 if (hasPCICFGUncore() && MSR.size())
1603 {
1604 int i = 0;
1605 bool failed = false;
1606 try
1607 {
1608 for (i = 0; i < (int)num_sockets; ++i)
1609 {
1610 server_pcicfg_uncore.push_back(std::make_shared<ServerPCICFGUncore>(i, this));
1611 }
1612 }
1613 catch (std::runtime_error & e)
1614 {
1615 std::cerr << e.what() << "\n";
1616 failed = true;
1617 }
1618 catch (...)
1619 {
1620 failed = true;
1621 }
1622 if (failed)
1623 {
1624 server_pcicfg_uncore.clear();
1625 std::cerr << "Can not access server uncore PCI configuration space. Access to uncore counters (memory and QPI bandwidth) is disabled.\n";
1626 #ifdef _MSC_VER
1627 std::cerr << "You must have signed msr.sys driver in your current directory and have administrator rights to run this program.\n";
1628 #else
1629 //std::cerr << "you must have read and write permissions for /proc/bus/pci/7f/10.* and /proc/bus/pci/ff/10.* devices (the 'chown' command can help).\n";
1630 //std::cerr << "you must have read and write permissions for /dev/mem device (the 'chown' command can help).\n";
1631 //std::cerr << "you must have read permission for /sys/firmware/acpi/tables/MCFG device (the 'chmod' command can help).\n";
1632 std::cerr << "You must be root to access server uncore counters in PCM.\n";
1633 #endif
1634 }
1635 } else if(hasClientMCCounters() && MSR.size())
1636 {
1637 // initialize memory bandwidth counting
1638 try
1639 {
1640 switch (cpu_model)
1641 {
1642 case TGL:
1643 clientBW = std::make_shared<TGLClientBW>();
1644 break;
1645 default:
1646 clientBW = std::make_shared<ClientBW>();
1647 }
1648 clientImcReads = std::make_shared<CounterWidthExtender>(
1649 new CounterWidthExtender::ClientImcReadsCounter(clientBW), 32, 10000);
1650 clientImcWrites = std::make_shared<CounterWidthExtender>(
1651 new CounterWidthExtender::ClientImcWritesCounter(clientBW), 32, 10000);
1652 clientIoRequests = std::make_shared<CounterWidthExtender>(
1653 new CounterWidthExtender::ClientIoRequestsCounter(clientBW), 32, 10000);
1654
1655 } catch(...)
1656 {
1657 std::cerr << "Can not read memory controller counter information from PCI configuration space. Access to memory bandwidth counters is not possible.\n";
1658 #ifdef _MSC_VER
1659 // TODO: add message here
1660 #endif
1661 #ifdef __linux__
1662 std::cerr << "You must be root to access these SandyBridge/IvyBridge/Haswell counters in PCM. \n";
1663 #endif
1664 }
1665 }
1666 if (cpu_model == ICX || cpu_model == SNOWRIDGE)
1667 {
1668 bool failed = false;
1669 try
1670 {
1671 initSocket2Ubox0Bus();
1672 }
1673 catch (std::exception & e)
1674 {
1675 std::cerr << e.what() << "\n";
1676 failed = true;
1677 }
1678 catch (...)
1679 {
1680 failed = true;
1681 }
1682 if (failed)
1683 {
1684 std::cerr << "Can not read PCI configuration space bus mapping. Access to uncore counters is disabled.\n";
1685 }
1686 for (size_t s = 0; s < (size_t)num_sockets && s < socket2UBOX0bus.size() && s < server_pcicfg_uncore.size(); ++s)
1687 {
1688 serverBW.push_back(std::make_shared<ServerBW>(server_pcicfg_uncore[s]->getNumMC(), socket2UBOX0bus[s].first, socket2UBOX0bus[s].second));
1689 // std::cout << " Added serverBW object server_pcicfg_uncore[s]->getNumMC() = " << server_pcicfg_uncore[s]->getNumMC() << std::endl;
1690 }
1691 if (socket2UBOX0bus.size() != (size_t)num_sockets)
1692 {
1693 std::cerr << "PCM warning: found " << socket2UBOX0bus.size() << " uboxes. Expected " << num_sockets << std::endl;
1694 }
1695 }
1696
1697 if (useLinuxPerfForUncore())
1698 {
1699 initUncorePMUsPerf();
1700 }
1701 else
1702 {
1703 initUncorePMUsDirect();
1704 }
1705 }
1706
initUncorePMUsDirect()1707 void PCM::initUncorePMUsDirect()
1708 {
1709 for (uint32 s = 0; s < (uint32)num_sockets; ++s)
1710 {
1711 auto & handle = MSR[socketRefCore[s]];
1712 // unfreeze uncore PMUs
1713 switch (cpu_model)
1714 {
1715 case SKX:
1716 handle->write(MSR_UNCORE_PMON_GLOBAL_CTL, 1ULL << 61ULL);
1717 break;
1718 case HASWELLX:
1719 case BDX:
1720 handle->write(MSR_UNCORE_PMON_GLOBAL_CTL, 1ULL << 29ULL);
1721 break;
1722 case IVYTOWN:
1723 handle->write(IVT_MSR_UNCORE_PMON_GLOBAL_CTL, 1ULL << 29ULL);
1724 break;
1725 }
1726 if (IVYTOWN == cpu_model || JAKETOWN == cpu_model)
1727 {
1728 uboxPMUs.push_back(
1729 UncorePMU(
1730 std::shared_ptr<MSRRegister>(),
1731 std::make_shared<MSRRegister>(handle, JKTIVT_UBOX_MSR_PMON_CTL0_ADDR),
1732 std::make_shared<MSRRegister>(handle, JKTIVT_UBOX_MSR_PMON_CTL1_ADDR),
1733 std::shared_ptr<MSRRegister>(),
1734 std::shared_ptr<MSRRegister>(),
1735 std::make_shared<MSRRegister>(handle, JKTIVT_UBOX_MSR_PMON_CTR0_ADDR),
1736 std::make_shared<MSRRegister>(handle, JKTIVT_UBOX_MSR_PMON_CTR1_ADDR),
1737 std::shared_ptr<MSRRegister>(),
1738 std::shared_ptr<MSRRegister>(),
1739 std::make_shared<MSRRegister>(handle, JKTIVT_UCLK_FIXED_CTL_ADDR),
1740 std::make_shared<MSRRegister>(handle, JKTIVT_UCLK_FIXED_CTR_ADDR)
1741 )
1742 );
1743 }
1744 else
1745 {
1746 uboxPMUs.push_back(
1747 UncorePMU(
1748 std::shared_ptr<MSRRegister>(),
1749 std::make_shared<MSRRegister>(handle, UBOX_MSR_PMON_CTL0_ADDR),
1750 std::make_shared<MSRRegister>(handle, UBOX_MSR_PMON_CTL1_ADDR),
1751 std::shared_ptr<MSRRegister>(),
1752 std::shared_ptr<MSRRegister>(),
1753 std::make_shared<MSRRegister>(handle, UBOX_MSR_PMON_CTR0_ADDR),
1754 std::make_shared<MSRRegister>(handle, UBOX_MSR_PMON_CTR1_ADDR),
1755 std::shared_ptr<MSRRegister>(),
1756 std::shared_ptr<MSRRegister>(),
1757 std::make_shared<MSRRegister>(handle, UCLK_FIXED_CTL_ADDR),
1758 std::make_shared<MSRRegister>(handle, UCLK_FIXED_CTR_ADDR)
1759 )
1760 );
1761 }
1762 switch (cpu_model)
1763 {
1764 case IVYTOWN:
1765 case JAKETOWN:
1766 pcuPMUs.push_back(
1767 UncorePMU(
1768 std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_BOX_CTL_ADDR),
1769 std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_CTL0_ADDR),
1770 std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_CTL1_ADDR),
1771 std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_CTL2_ADDR),
1772 std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_CTL3_ADDR),
1773 std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_CTR0_ADDR),
1774 std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_CTR1_ADDR),
1775 std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_CTR2_ADDR),
1776 std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_CTR3_ADDR),
1777 std::shared_ptr<MSRRegister>(),
1778 std::shared_ptr<MSRRegister>(),
1779 std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_BOX_FILTER_ADDR)
1780 )
1781 );
1782 break;
1783 case BDX_DE:
1784 case BDX:
1785 case KNL:
1786 case HASWELLX:
1787 case SKX:
1788 case ICX:
1789 pcuPMUs.push_back(
1790 UncorePMU(
1791 std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_BOX_CTL_ADDR),
1792 std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_CTL0_ADDR),
1793 std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_CTL1_ADDR),
1794 std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_CTL2_ADDR),
1795 std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_CTL3_ADDR),
1796 std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_CTR0_ADDR),
1797 std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_CTR1_ADDR),
1798 std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_CTR2_ADDR),
1799 std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_CTR3_ADDR),
1800 std::shared_ptr<MSRRegister>(),
1801 std::shared_ptr<MSRRegister>(),
1802 std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_BOX_FILTER_ADDR)
1803 )
1804 );
1805 break;
1806 }
1807 }
1808 // init IIO addresses
1809 if (getCPUModel() == PCM::SKX)
1810 {
1811 iioPMUs.resize(num_sockets);
1812 for (uint32 s = 0; s < (uint32)num_sockets; ++s)
1813 {
1814 auto & handle = MSR[socketRefCore[s]];
1815 for (int unit = 0; unit < SKX_IIO_STACK_COUNT; ++unit)
1816 {
1817 iioPMUs[s][unit] = UncorePMU(
1818 std::make_shared<MSRRegister>(handle, SKX_IIO_CBDMA_UNIT_CTL + SKX_IIO_PM_REG_STEP * unit),
1819 std::make_shared<MSRRegister>(handle, SKX_IIO_CBDMA_CTL0 + SKX_IIO_PM_REG_STEP * unit + 0),
1820 std::make_shared<MSRRegister>(handle, SKX_IIO_CBDMA_CTL0 + SKX_IIO_PM_REG_STEP * unit + 1),
1821 std::make_shared<MSRRegister>(handle, SKX_IIO_CBDMA_CTL0 + SKX_IIO_PM_REG_STEP * unit + 2),
1822 std::make_shared<MSRRegister>(handle, SKX_IIO_CBDMA_CTL0 + SKX_IIO_PM_REG_STEP * unit + 3),
1823 std::make_shared<MSRRegister>(handle, SKX_IIO_CBDMA_CTR0 + SKX_IIO_PM_REG_STEP * unit + 0),
1824 std::make_shared<MSRRegister>(handle, SKX_IIO_CBDMA_CTR0 + SKX_IIO_PM_REG_STEP * unit + 1),
1825 std::make_shared<MSRRegister>(handle, SKX_IIO_CBDMA_CTR0 + SKX_IIO_PM_REG_STEP * unit + 2),
1826 std::make_shared<MSRRegister>(handle, SKX_IIO_CBDMA_CTR0 + SKX_IIO_PM_REG_STEP * unit + 3)
1827 );
1828 }
1829 }
1830 }
1831 else if (getCPUModel() == PCM::ICX)
1832 {
1833 iioPMUs.resize(num_sockets);
1834 for (uint32 s = 0; s < (uint32)num_sockets; ++s)
1835 {
1836 auto & handle = MSR[socketRefCore[s]];
1837 for (int unit = 0; unit < ICX_IIO_STACK_COUNT; ++unit)
1838 {
1839 iioPMUs[s][unit] = UncorePMU(
1840 std::make_shared<MSRRegister>(handle, ICX_IIO_UNIT_CTL[unit]),
1841 std::make_shared<MSRRegister>(handle, ICX_IIO_UNIT_CTL[unit] + ICX_IIO_CTL_REG_OFFSET + 0),
1842 std::make_shared<MSRRegister>(handle, ICX_IIO_UNIT_CTL[unit] + ICX_IIO_CTL_REG_OFFSET + 1),
1843 std::make_shared<MSRRegister>(handle, ICX_IIO_UNIT_CTL[unit] + ICX_IIO_CTL_REG_OFFSET + 2),
1844 std::make_shared<MSRRegister>(handle, ICX_IIO_UNIT_CTL[unit] + ICX_IIO_CTL_REG_OFFSET + 3),
1845 std::make_shared<MSRRegister>(handle, ICX_IIO_UNIT_CTL[unit] + ICX_IIO_CTR_REG_OFFSET + 0),
1846 std::make_shared<MSRRegister>(handle, ICX_IIO_UNIT_CTL[unit] + ICX_IIO_CTR_REG_OFFSET + 1),
1847 std::make_shared<MSRRegister>(handle, ICX_IIO_UNIT_CTL[unit] + ICX_IIO_CTR_REG_OFFSET + 2),
1848 std::make_shared<MSRRegister>(handle, ICX_IIO_UNIT_CTL[unit] + ICX_IIO_CTR_REG_OFFSET + 3)
1849 );
1850 }
1851 }
1852 }
1853 else if (getCPUModel() == PCM::SNOWRIDGE)
1854 {
1855 iioPMUs.resize(num_sockets);
1856 for (uint32 s = 0; s < (uint32)num_sockets; ++s)
1857 {
1858 auto & handle = MSR[socketRefCore[s]];
1859 for (int unit = 0; unit < SNR_IIO_STACK_COUNT; ++unit)
1860 {
1861 iioPMUs[s][unit] = UncorePMU(
1862 std::make_shared<MSRRegister>(handle, SNR_IIO_CBDMA_UNIT_CTL + SNR_IIO_PM_REG_STEP * unit),
1863 std::make_shared<MSRRegister>(handle, SNR_IIO_CBDMA_CTL0 + SNR_IIO_PM_REG_STEP * unit + 0),
1864 std::make_shared<MSRRegister>(handle, SNR_IIO_CBDMA_CTL0 + SNR_IIO_PM_REG_STEP * unit + 1),
1865 std::make_shared<MSRRegister>(handle, SNR_IIO_CBDMA_CTL0 + SNR_IIO_PM_REG_STEP * unit + 2),
1866 std::make_shared<MSRRegister>(handle, SNR_IIO_CBDMA_CTL0 + SNR_IIO_PM_REG_STEP * unit + 3),
1867 std::make_shared<MSRRegister>(handle, SNR_IIO_CBDMA_CTR0 + SNR_IIO_PM_REG_STEP * unit + 0),
1868 std::make_shared<MSRRegister>(handle, SNR_IIO_CBDMA_CTR0 + SNR_IIO_PM_REG_STEP * unit + 1),
1869 std::make_shared<MSRRegister>(handle, SNR_IIO_CBDMA_CTR0 + SNR_IIO_PM_REG_STEP * unit + 2),
1870 std::make_shared<MSRRegister>(handle, SNR_IIO_CBDMA_CTR0 + SNR_IIO_PM_REG_STEP * unit + 3)
1871 );
1872 }
1873 }
1874 }
1875
1876 if (hasPCICFGUncore() && MSR.size())
1877 {
1878 cboPMUs.resize(num_sockets);
1879 for (uint32 s = 0; s < (uint32)num_sockets; ++s)
1880 {
1881 auto & handle = MSR[socketRefCore[s]];
1882 for (uint32 cbo = 0; cbo < getMaxNumOfCBoxes(); ++cbo)
1883 {
1884 const auto filter1MSR = CX_MSR_PMON_BOX_FILTER1(cbo);
1885 std::shared_ptr<HWRegister> filter1MSRHandle = filter1MSR ? std::make_shared<MSRRegister>(handle, filter1MSR) : std::shared_ptr<HWRegister>();
1886 cboPMUs[s].push_back(
1887 UncorePMU(
1888 std::make_shared<MSRRegister>(handle, CX_MSR_PMON_BOX_CTL(cbo)),
1889 std::make_shared<MSRRegister>(handle, CX_MSR_PMON_CTLY(cbo, 0)),
1890 std::make_shared<MSRRegister>(handle, CX_MSR_PMON_CTLY(cbo, 1)),
1891 std::make_shared<MSRRegister>(handle, CX_MSR_PMON_CTLY(cbo, 2)),
1892 std::make_shared<MSRRegister>(handle, CX_MSR_PMON_CTLY(cbo, 3)),
1893 std::make_shared<CounterWidthExtenderRegister>(
1894 std::make_shared<CounterWidthExtender>(new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[s]], CX_MSR_PMON_CTRY(cbo, 0)), 48, 5555)),
1895 std::make_shared<CounterWidthExtenderRegister>(
1896 std::make_shared<CounterWidthExtender>(new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[s]], CX_MSR_PMON_CTRY(cbo, 1)), 48, 5555)),
1897 std::make_shared<CounterWidthExtenderRegister>(
1898 std::make_shared<CounterWidthExtender>(new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[s]], CX_MSR_PMON_CTRY(cbo, 2)), 48, 5555)),
1899 std::make_shared<CounterWidthExtenderRegister>(
1900 std::make_shared<CounterWidthExtender>(new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[s]], CX_MSR_PMON_CTRY(cbo, 3)), 48, 5555)),
1901 std::shared_ptr<MSRRegister>(),
1902 std::shared_ptr<MSRRegister>(),
1903 std::make_shared<MSRRegister>(handle, CX_MSR_PMON_BOX_FILTER(cbo)),
1904 filter1MSRHandle
1905 )
1906 );
1907 }
1908 }
1909 }
1910 }
1911
1912 #ifdef PCM_USE_PERF
1913 std::vector<int> enumeratePerfPMUs(const std::string & type, int max_id);
1914 void populatePerfPMUs(unsigned socket_, const std::vector<int> & ids, std::vector<UncorePMU> & pmus, bool fixed, bool filter0 = false, bool filter1 = false);
1915 #endif
1916
initUncorePMUsPerf()1917 void PCM::initUncorePMUsPerf()
1918 {
1919 #ifdef PCM_USE_PERF
1920 iioPMUs.resize(num_sockets);
1921 cboPMUs.resize(num_sockets);
1922 for (uint32 s = 0; s < (uint32)num_sockets; ++s)
1923 {
1924 populatePerfPMUs(s, enumeratePerfPMUs("pcu", 100), pcuPMUs, false, true);
1925 populatePerfPMUs(s, enumeratePerfPMUs("ubox", 100), uboxPMUs, true);
1926 populatePerfPMUs(s, enumeratePerfPMUs("cbox", 100), cboPMUs[s], false, true, true);
1927 populatePerfPMUs(s, enumeratePerfPMUs("cha", 200), cboPMUs[s], false, true, true);
1928 std::vector<UncorePMU> iioPMUVector;
1929 populatePerfPMUs(s, enumeratePerfPMUs("iio", 100), iioPMUVector, false);
1930 for (size_t i = 0; i < iioPMUVector.size(); ++i)
1931 {
1932 iioPMUs[s][i] = iioPMUVector[i];
1933 }
1934 }
1935 #endif
1936 }
1937
1938 #ifdef __linux__
1939
1940 #define PCM_NMI_WATCHDOG_PATH "/proc/sys/kernel/nmi_watchdog"
1941
isNMIWatchdogEnabled()1942 bool isNMIWatchdogEnabled()
1943 {
1944 const auto watchdog = readSysFS(PCM_NMI_WATCHDOG_PATH);
1945 if (watchdog.length() == 0)
1946 {
1947 return false;
1948 }
1949
1950 return (std::atoi(watchdog.c_str()) == 1);
1951 }
1952
disableNMIWatchdog(const bool silent)1953 void disableNMIWatchdog(const bool silent)
1954 {
1955 if (!silent) std::cerr << "Disabling NMI watchdog since it consumes one hw-PMU counter.\n";
1956 writeSysFS(PCM_NMI_WATCHDOG_PATH, "0");
1957 }
1958
enableNMIWatchdog(const bool silent)1959 void enableNMIWatchdog(const bool silent)
1960 {
1961 if (!silent) std::cerr << " Re-enabling NMI watchdog.\n";
1962 writeSysFS(PCM_NMI_WATCHDOG_PATH, "1");
1963 }
1964 #endif
1965
1966 class CoreTaskQueue
1967 {
1968 std::queue<std::packaged_task<void()> > wQueue;
1969 std::mutex m;
1970 std::condition_variable condVar;
1971 std::thread worker;
1972 CoreTaskQueue() = delete;
1973 CoreTaskQueue(CoreTaskQueue &) = delete;
1974 public:
CoreTaskQueue(int32 core)1975 CoreTaskQueue(int32 core) :
1976 worker([=]() {
1977 TemporalThreadAffinity tempThreadAffinity(core, false);
1978 std::unique_lock<std::mutex> lock(m);
1979 while (1) {
1980 while (wQueue.empty()) {
1981 condVar.wait(lock);
1982 }
1983 while (!wQueue.empty()) {
1984 wQueue.front()();
1985 wQueue.pop();
1986 }
1987 }
1988 })
1989 {}
push(std::packaged_task<void ()> & task)1990 void push(std::packaged_task<void()> & task)
1991 {
1992 std::unique_lock<std::mutex> lock(m);
1993 wQueue.push(std::move(task));
1994 condVar.notify_one();
1995 }
1996 };
1997
PCM()1998 PCM::PCM() :
1999 cpu_family(-1),
2000 cpu_model(-1),
2001 cpu_stepping(-1),
2002 cpu_microcode_level(-1),
2003 max_cpuid(-1),
2004 threads_per_core(0),
2005 num_cores(0),
2006 num_sockets(0),
2007 num_phys_cores_per_socket(0),
2008 num_online_cores(0),
2009 num_online_sockets(0),
2010 core_gen_counter_num_max(0),
2011 core_gen_counter_num_used(0), // 0 means no core gen counters used
2012 core_gen_counter_width(0),
2013 core_fixed_counter_num_max(0),
2014 core_fixed_counter_num_used(0),
2015 core_fixed_counter_width(0),
2016 uncore_gen_counter_num_max(8),
2017 uncore_gen_counter_num_used(0),
2018 uncore_gen_counter_width(48),
2019 uncore_fixed_counter_num_max(1),
2020 uncore_fixed_counter_num_used(0),
2021 uncore_fixed_counter_width(48),
2022 perfmon_version(0),
2023 perfmon_config_anythread(1),
2024 nominal_frequency(0),
2025 max_qpi_speed(0),
2026 L3ScalingFactor(0),
2027 pkgThermalSpecPower(-1),
2028 pkgMinimumPower(-1),
2029 pkgMaximumPower(-1),
2030 systemTopology(new SystemRoot(this)),
2031 allow_multiple_instances(false),
2032 programmed_pmu(false),
2033 joulesPerEnergyUnit(0),
2034 #ifdef __linux__
2035 resctrl(*this),
2036 #endif
2037 useResctrl(false),
2038 disable_JKT_workaround(false),
2039 blocked(false),
2040 coreCStateMsr(NULL),
2041 pkgCStateMsr(NULL),
2042 L2CacheHitRatioAvailable(false),
2043 L3CacheHitRatioAvailable(false),
2044 L3CacheMissesAvailable(false),
2045 L2CacheMissesAvailable(false),
2046 L2CacheHitsAvailable(false),
2047 L3CacheHitsNoSnoopAvailable(false),
2048 L3CacheHitsSnoopAvailable(false),
2049 L3CacheHitsAvailable(false),
2050 forceRTMAbortMode(false),
2051 mode(INVALID_MODE),
2052 numInstancesSemaphore(NULL),
2053 canUsePerf(false),
2054 outfile(NULL),
2055 backup_ofile(NULL),
2056 run_state(1),
2057 needToRestoreNMIWatchdog(false)
2058 {
2059 #ifdef _MSC_VER
2060 // WARNING: This driver code (msr.sys) is only for testing purposes, not for production use
2061 Driver drv(Driver::msrLocalPath());
2062 // drv.stop(); // restart driver (usually not needed)
2063 if (!drv.start())
2064 {
2065 std::wcerr << "Cannot access CPU counters\n";
2066 std::wcerr << "You must have a signed driver at " << drv.driverPath() << " and have administrator rights to run this program\n";
2067 return;
2068 }
2069 #endif
2070
2071 if(!detectModel()) return;
2072
2073 if(!checkModel()) return;
2074
2075 initCStateSupportTables();
2076
2077 if(!discoverSystemTopology()) return;
2078
2079 if(!initMSR()) return;
2080
2081 readCoreCounterConfig(true);
2082
2083 #ifndef PCM_SILENT
2084 printSystemTopology();
2085 #endif
2086
2087 if(!detectNominalFrequency()) return;
2088
2089 showSpecControlMSRs();
2090
2091 initEnergyMonitoring();
2092
2093 initUncoreObjects();
2094
2095 initRDT();
2096
2097 readCPUMicrocodeLevel();
2098
2099 #ifdef PCM_USE_PERF
2100 canUsePerf = true;
2101 std::vector<int> dummy(PERF_MAX_COUNTERS, -1);
2102 perfEventHandle.resize(num_cores, dummy);
2103 #endif
2104
2105 for (int32 i = 0; i < num_cores; ++i)
2106 {
2107 coreTaskQueues.push_back(std::make_shared<CoreTaskQueue>(i));
2108 }
2109 }
2110
enableJKTWorkaround(bool enable)2111 void PCM::enableJKTWorkaround(bool enable)
2112 {
2113 if(disable_JKT_workaround) return;
2114 std::cerr << "Using PCM on your system might have a performance impact as per http://software.intel.com/en-us/articles/performance-impact-when-sampling-certain-llc-events-on-snb-ep-with-vtune\n";
2115 std::cerr << "You can avoid the performance impact by using the option --noJKTWA, however the cache metrics might be wrong then.\n";
2116 if(MSR.size())
2117 {
2118 for(int32 i = 0; i < num_cores; ++i)
2119 {
2120 uint64 val64 = 0;
2121 MSR[i]->read(0x39C, &val64);
2122 if(enable)
2123 val64 |= 1ULL;
2124 else
2125 val64 &= (~1ULL);
2126 MSR[i]->write(0x39C, val64);
2127 }
2128 }
2129 for (size_t i = 0; i < (size_t)server_pcicfg_uncore.size(); ++i)
2130 {
2131 if(server_pcicfg_uncore[i].get()) server_pcicfg_uncore[i]->enableJKTWorkaround(enable);
2132 }
2133 }
2134
showSpecControlMSRs()2135 void PCM::showSpecControlMSRs()
2136 {
2137 PCM_CPUID_INFO cpuinfo;
2138 pcm_cpuid(7, 0, cpuinfo);
2139
2140 if (MSR.size())
2141 {
2142 if ((cpuinfo.reg.edx & (1 << 26)) || (cpuinfo.reg.edx & (1 << 27)))
2143 {
2144 uint64 val64 = 0;
2145 MSR[0]->read(MSR_IA32_SPEC_CTRL, &val64);
2146 std::cerr << "IBRS enabled in the kernel : " << ((val64 & 1) ? "yes" : "no") << "\n";
2147 std::cerr << "STIBP enabled in the kernel : " << ((val64 & 2) ? "yes" : "no") << "\n";
2148 }
2149 if (cpuinfo.reg.edx & (1 << 29))
2150 {
2151 uint64 val64 = 0;
2152 MSR[0]->read(MSR_IA32_ARCH_CAPABILITIES, &val64);
2153 std::cerr << "The processor is not susceptible to Rogue Data Cache Load: " << ((val64 & 1) ? "yes" : "no") << "\n";
2154 std::cerr << "The processor supports enhanced IBRS : " << ((val64 & 2) ? "yes" : "no") << "\n";
2155 }
2156 }
2157 }
2158
isCoreOnline(int32 os_core_id) const2159 bool PCM::isCoreOnline(int32 os_core_id) const
2160 {
2161 return (topology[os_core_id].os_id != -1) && (topology[os_core_id].core_id != -1) && (topology[os_core_id].socket != -1);
2162 }
2163
isSocketOnline(int32 socket_id) const2164 bool PCM::isSocketOnline(int32 socket_id) const
2165 {
2166 return socketRefCore[socket_id] != -1;
2167 }
2168
isCPUModelSupported(const int model_)2169 bool PCM::isCPUModelSupported(const int model_)
2170 {
2171 return ( model_ == NEHALEM_EP
2172 || model_ == NEHALEM_EX
2173 || model_ == WESTMERE_EP
2174 || model_ == WESTMERE_EX
2175 || isAtom(model_)
2176 || model_ == SNOWRIDGE
2177 || model_ == CLARKDALE
2178 || model_ == SANDY_BRIDGE
2179 || model_ == JAKETOWN
2180 || model_ == IVY_BRIDGE
2181 || model_ == HASWELL
2182 || model_ == IVYTOWN
2183 || model_ == HASWELLX
2184 || model_ == BDX_DE
2185 || model_ == BDX
2186 || model_ == BROADWELL
2187 || model_ == KNL
2188 || model_ == SKL
2189 || model_ == SKL_UY
2190 || model_ == KBL
2191 || model_ == KBL_1
2192 || model_ == CML
2193 || model_ == ICL
2194 || model_ == RKL
2195 || model_ == TGL
2196 || model_ == SKX
2197 || model_ == ICX
2198 );
2199 }
2200
checkModel()2201 bool PCM::checkModel()
2202 {
2203 if (cpu_model == NEHALEM) cpu_model = NEHALEM_EP;
2204 if (cpu_model == ATOM_2) cpu_model = ATOM;
2205 if (cpu_model == HASWELL_ULT || cpu_model == HASWELL_2) cpu_model = HASWELL;
2206 if (cpu_model == BROADWELL_XEON_E3) cpu_model = BROADWELL;
2207 if (cpu_model == ICX_D) cpu_model = ICX;
2208 if (cpu_model == CML_1) cpu_model = CML;
2209 if (cpu_model == ICL_1) cpu_model = ICL;
2210 if (cpu_model == TGL_1) cpu_model = TGL;
2211
2212 if(!isCPUModelSupported((int)cpu_model))
2213 {
2214 std::cerr << getUnsupportedMessage() << " CPU model number: " << cpu_model << " Brand: \"" << getCPUBrandString().c_str() << "\"\n";
2215 /* FOR TESTING PURPOSES ONLY */
2216 #ifdef PCM_TEST_FALLBACK_TO_ATOM
2217 std::cerr << "Fall back to ATOM functionality.\n";
2218 cpu_model = ATOM;
2219 return true;
2220 #endif
2221 return false;
2222 }
2223 return true;
2224 }
2225
destroyMSR()2226 void PCM::destroyMSR()
2227 {
2228 MSR.clear();
2229 }
2230
~PCM()2231 PCM::~PCM()
2232 {
2233 InstanceLock lock(allow_multiple_instances);
2234 if (instance)
2235 {
2236 destroyMSR();
2237 instance = NULL;
2238 delete systemTopology;
2239 }
2240 }
2241
good()2242 bool PCM::good()
2243 {
2244 return !MSR.empty();
2245 }
2246
2247 #ifdef PCM_USE_PERF
PCM_init_perf_event_attr(bool group=true)2248 perf_event_attr PCM_init_perf_event_attr(bool group = true)
2249 {
2250 perf_event_attr e;
2251 bzero(&e,sizeof(perf_event_attr));
2252 e.type = -1; // must be set up later
2253 e.size = sizeof(e);
2254 e.config = -1; // must be set up later
2255 e.sample_period = 0;
2256 e.sample_type = 0;
2257 e.read_format = group ? PERF_FORMAT_GROUP : 0; /* PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING |
2258 PERF_FORMAT_ID | PERF_FORMAT_GROUP ; */
2259 e.disabled = 0;
2260 e.inherit = 0;
2261 e.pinned = 1;
2262 e.exclusive = 0;
2263 e.exclude_user = 0;
2264 e.exclude_kernel = 0;
2265 e.exclude_hv = 0;
2266 e.exclude_idle = 0;
2267 e.mmap = 0;
2268 e.comm = 0;
2269 e.freq = 0;
2270 e.inherit_stat = 0;
2271 e.enable_on_exec = 0;
2272 e.task = 0;
2273 e.watermark = 0;
2274 e.wakeup_events = 0;
2275 return e;
2276 }
2277 #endif
2278
program(const PCM::ProgramMode mode_,const void * parameter_,const bool silent)2279 PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter_, const bool silent)
2280 {
2281 #ifdef __linux__
2282 if (isNMIWatchdogEnabled())
2283 {
2284 disableNMIWatchdog(silent);
2285 needToRestoreNMIWatchdog = true;
2286 }
2287 #endif
2288
2289 if(allow_multiple_instances && (EXT_CUSTOM_CORE_EVENTS == mode_ || CUSTOM_CORE_EVENTS == mode_))
2290 {
2291 allow_multiple_instances = false;
2292 std::cerr << "Warning: multiple PCM instance mode is not allowed with custom events.\n";
2293 }
2294
2295 InstanceLock lock(allow_multiple_instances);
2296 if (MSR.empty()) return PCM::MSRAccessDenied;
2297
2298 ExtendedCustomCoreEventDescription * pExtDesc = (ExtendedCustomCoreEventDescription *)parameter_;
2299
2300 #ifdef PCM_USE_PERF
2301 if (!silent) std::cerr << "Trying to use Linux perf events...\n";
2302 const char * no_perf_env = std::getenv("PCM_NO_PERF");
2303 if (no_perf_env != NULL && std::string(no_perf_env) == std::string("1"))
2304 {
2305 canUsePerf = false;
2306 if (!silent) std::cerr << "Usage of Linux perf events is disabled through PCM_NO_PERF environment variable. Using direct PMU programming...\n";
2307 }
2308 /*
2309 if(num_online_cores < num_cores)
2310 {
2311 canUsePerf = false;
2312 std::cerr << "PCM does not support using Linux perf API on systems with offlined cores. Falling-back to direct PMU programming.\n";
2313 }
2314 */
2315 else if(PERF_COUNT_HW_MAX <= PCM_PERF_COUNT_HW_REF_CPU_CYCLES)
2316 {
2317 canUsePerf = false;
2318 if (!silent) std::cerr << "Can not use Linux perf because your Linux kernel does not support PERF_COUNT_HW_REF_CPU_CYCLES event. Falling-back to direct PMU programming.\n";
2319 }
2320 else if(EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc && pExtDesc->fixedCfg)
2321 {
2322 canUsePerf = false;
2323 if (!silent) std::cerr << "Can not use Linux perf because non-standard fixed counter configuration requested. Falling-back to direct PMU programming.\n";
2324 }
2325 else if(EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc && (pExtDesc->OffcoreResponseMsrValue[0] || pExtDesc->OffcoreResponseMsrValue[1]))
2326 {
2327 const std::string offcore_rsp_format = readSysFS("/sys/bus/event_source/devices/cpu/format/offcore_rsp");
2328 if (offcore_rsp_format != "config1:0-63\n")
2329 {
2330 canUsePerf = false;
2331 if (!silent) std::cerr << "Can not use Linux perf because OffcoreResponse usage is not supported. Falling-back to direct PMU programming.\n";
2332 }
2333 }
2334 if (isHWTMAL1Supported() == true && perfSupportsTopDown() == false)
2335 {
2336 canUsePerf = false;
2337 if (!silent) std::cerr << "Installed Linux kernel perf does not support hardware top-down level-1 counters. Using direct PMU programming instead.\n";
2338 }
2339 #endif
2340
2341 if(allow_multiple_instances)
2342 {
2343 //std::cerr << "Checking for other instances of PCM...\n";
2344 #ifdef _MSC_VER
2345
2346 numInstancesSemaphore = CreateSemaphore(NULL, 0, 1 << 20, L"Global\\Number of running Processor Counter Monitor instances");
2347 if (!numInstancesSemaphore)
2348 {
2349 _com_error error(GetLastError());
2350 std::wcerr << "Error in Windows function 'CreateSemaphore': " << GetLastError() << " ";
2351 const TCHAR * strError = _com_error(GetLastError()).ErrorMessage();
2352 if (strError) std::wcerr << strError;
2353 std::wcerr << "\n";
2354 return PCM::UnknownError;
2355 }
2356 LONG prevValue = 0;
2357 if (!ReleaseSemaphore(numInstancesSemaphore, 1, &prevValue))
2358 {
2359 _com_error error(GetLastError());
2360 std::wcerr << "Error in Windows function 'ReleaseSemaphore': " << GetLastError() << " ";
2361 const TCHAR * strError = _com_error(GetLastError()).ErrorMessage();
2362 if (strError) std::wcerr << strError;
2363 std::wcerr << "\n";
2364 return PCM::UnknownError;
2365 }
2366 if (prevValue > 0) // already programmed since another instance exists
2367 {
2368 if (!silent) std::cerr << "Number of PCM instances: " << (prevValue + 1) << "\n";
2369 if (hasPCICFGUncore() && max_qpi_speed==0)
2370 for (size_t i = 0; i < (size_t)server_pcicfg_uncore.size(); ++i)
2371 if (server_pcicfg_uncore[i].get())
2372 max_qpi_speed = (std::max)(server_pcicfg_uncore[i]->computeQPISpeed(socketRefCore[i], cpu_model), max_qpi_speed); // parenthesis to avoid macro expansion on Windows
2373
2374 reportQPISpeed();
2375 return PCM::Success;
2376 }
2377
2378 #else // if linux, apple, freebsd or dragonflybsd
2379 numInstancesSemaphore = sem_open(PCM_NUM_INSTANCES_SEMAPHORE_NAME, O_CREAT, S_IRWXU | S_IRWXG | S_IRWXO, 0);
2380 if (SEM_FAILED == numInstancesSemaphore)
2381 {
2382 if (EACCES == errno)
2383 std::cerr << "PCM Error, do not have permissions to open semaphores in /dev/shm/. Clean up them.\n";
2384 return PCM::UnknownError;
2385 }
2386 #ifndef __APPLE__
2387 sem_post(numInstancesSemaphore);
2388 int curValue = 0;
2389 sem_getvalue(numInstancesSemaphore, &curValue);
2390 #else //if it is apple
2391 uint32 curValue = PCM::incrementNumInstances();
2392 sem_post(numInstancesSemaphore);
2393 #endif // end ifndef __APPLE__
2394
2395 if (curValue > 1) // already programmed since another instance exists
2396 {
2397 if (!silent) std::cerr << "Number of PCM instances: " << curValue << "\n";
2398 if (hasPCICFGUncore() && max_qpi_speed==0)
2399 for (int i = 0; i < (int)server_pcicfg_uncore.size(); ++i) {
2400 if(server_pcicfg_uncore[i].get())
2401 max_qpi_speed = std::max(server_pcicfg_uncore[i]->computeQPISpeed(socketRefCore[i],cpu_model), max_qpi_speed);
2402 reportQPISpeed();
2403 }
2404 if(!canUsePerf) return PCM::Success;
2405 }
2406
2407 #endif // end ifdef _MSC_VER
2408
2409 #ifdef PCM_USE_PERF
2410 /*
2411 numInst>1 && canUsePerf==false -> not reachable, already PMU programmed in another PCM instance
2412 numInst>1 && canUsePerf==true -> perf programmed in different PCM, is not allowed
2413 numInst<=1 && canUsePerf==false -> we are first, perf cannot be used, *check* if PMU busy
2414 numInst<=1 && canUsePerf==true -> we are first, perf will be used, *dont check*, this is now perf business
2415 */
2416 if(curValue > 1 && (canUsePerf == true))
2417 {
2418 std::cerr << "Running several clients using the same counters is not possible with Linux perf. Recompile PCM without Linux Perf support to allow such usage. \n";
2419 decrementInstanceSemaphore();
2420 return PCM::UnknownError;
2421 }
2422
2423 if((curValue <= 1) && (canUsePerf == false) && PMUinUse())
2424 {
2425 decrementInstanceSemaphore();
2426 return PCM::PMUBusy;
2427 }
2428 #else
2429 if (PMUinUse())
2430 {
2431 decrementInstanceSemaphore();
2432 return PCM::PMUBusy;
2433 }
2434 #endif
2435 }
2436 else
2437 {
2438 if((canUsePerf == false) && PMUinUse())
2439 {
2440 return PCM::PMUBusy;
2441 }
2442 }
2443
2444 mode = mode_;
2445
2446 // copy custom event descriptions
2447 if (mode == CUSTOM_CORE_EVENTS)
2448 {
2449 if (!parameter_)
2450 {
2451 std::cerr << "PCM Internal Error: data structure for custom event not initialized\n";
2452 return PCM::UnknownError;
2453 }
2454 CustomCoreEventDescription * pDesc = (CustomCoreEventDescription *)parameter_;
2455 coreEventDesc[0] = pDesc[0];
2456 coreEventDesc[1] = pDesc[1];
2457 if (isAtom() == false && cpu_model != KNL)
2458 {
2459 coreEventDesc[2] = pDesc[2];
2460 core_gen_counter_num_used = 3;
2461 if (core_gen_counter_num_max > 3) {
2462 coreEventDesc[3] = pDesc[3];
2463 core_gen_counter_num_used = 4;
2464 }
2465 }
2466 else
2467 core_gen_counter_num_used = 2;
2468 }
2469 else if (mode != EXT_CUSTOM_CORE_EVENTS)
2470 {
2471 if (isAtom() || cpu_model == KNL)
2472 {
2473 coreEventDesc[0].event_number = ARCH_LLC_MISS_EVTNR;
2474 coreEventDesc[0].umask_value = ARCH_LLC_MISS_UMASK;
2475 coreEventDesc[1].event_number = ARCH_LLC_REFERENCE_EVTNR;
2476 coreEventDesc[1].umask_value = ARCH_LLC_REFERENCE_UMASK;
2477 L2CacheHitRatioAvailable = true;
2478 L2CacheMissesAvailable = true;
2479 L2CacheHitsAvailable = true;
2480 core_gen_counter_num_used = 2;
2481 }
2482 else
2483 switch ( cpu_model ) {
2484 case SNOWRIDGE:
2485 coreEventDesc[0].event_number = ARCH_LLC_MISS_EVTNR;
2486 coreEventDesc[0].umask_value = ARCH_LLC_MISS_UMASK;
2487 coreEventDesc[1].event_number = ARCH_LLC_REFERENCE_EVTNR;
2488 coreEventDesc[1].umask_value = ARCH_LLC_REFERENCE_UMASK;
2489 coreEventDesc[2].event_number = SKL_MEM_LOAD_RETIRED_L2_MISS_EVTNR;
2490 coreEventDesc[2].umask_value = SKL_MEM_LOAD_RETIRED_L2_MISS_UMASK;
2491 coreEventDesc[3].event_number = SKL_MEM_LOAD_RETIRED_L2_HIT_EVTNR;
2492 coreEventDesc[3].umask_value = SKL_MEM_LOAD_RETIRED_L2_HIT_UMASK;
2493 L2CacheHitRatioAvailable = true;
2494 L3CacheHitRatioAvailable = true;
2495 L3CacheMissesAvailable = true;
2496 L2CacheMissesAvailable = true;
2497 L2CacheHitsAvailable = true;
2498 L3CacheHitsSnoopAvailable = true;
2499 L3CacheHitsAvailable = true;
2500 core_gen_counter_num_used = 4;
2501 break;
2502 PCM_SKL_PATH_CASES
2503 case SKX:
2504 case ICX:
2505 assert(useSkylakeEvents());
2506 coreEventDesc[0].event_number = SKL_MEM_LOAD_RETIRED_L3_MISS_EVTNR;
2507 coreEventDesc[0].umask_value = SKL_MEM_LOAD_RETIRED_L3_MISS_UMASK;
2508 coreEventDesc[1].event_number = SKL_MEM_LOAD_RETIRED_L3_HIT_EVTNR;
2509 coreEventDesc[1].umask_value = SKL_MEM_LOAD_RETIRED_L3_HIT_UMASK;
2510 coreEventDesc[2].event_number = SKL_MEM_LOAD_RETIRED_L2_MISS_EVTNR;
2511 coreEventDesc[2].umask_value = SKL_MEM_LOAD_RETIRED_L2_MISS_UMASK;
2512 coreEventDesc[3].event_number = SKL_MEM_LOAD_RETIRED_L2_HIT_EVTNR;
2513 coreEventDesc[3].umask_value = SKL_MEM_LOAD_RETIRED_L2_HIT_UMASK;
2514 if (core_gen_counter_num_max == 3)
2515 {
2516 L3CacheHitRatioAvailable = true;
2517 L3CacheMissesAvailable = true;
2518 L2CacheMissesAvailable = true;
2519 L3CacheHitsSnoopAvailable = true;
2520 L3CacheHitsAvailable = true;
2521 core_gen_counter_num_used = 3;
2522 break;
2523 }
2524 L2CacheHitRatioAvailable = true;
2525 L3CacheHitRatioAvailable = true;
2526 L3CacheMissesAvailable = true;
2527 L2CacheMissesAvailable = true;
2528 L2CacheHitsAvailable = true;
2529 L3CacheHitsSnoopAvailable = true;
2530 L3CacheHitsAvailable = true;
2531 core_gen_counter_num_used = 4;
2532 break;
2533 case SANDY_BRIDGE:
2534 case JAKETOWN:
2535 case IVYTOWN:
2536 case IVY_BRIDGE:
2537 case HASWELL:
2538 case HASWELLX:
2539 case BROADWELL:
2540 case BDX_DE:
2541 case BDX:
2542 coreEventDesc[0].event_number = ARCH_LLC_MISS_EVTNR;
2543 coreEventDesc[0].umask_value = ARCH_LLC_MISS_UMASK;
2544 coreEventDesc[1].event_number = MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_NONE_EVTNR;
2545 coreEventDesc[1].umask_value = MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_NONE_UMASK;
2546 coreEventDesc[2].event_number = MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_EVTNR;
2547 coreEventDesc[2].umask_value = MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_UMASK;
2548 coreEventDesc[3].event_number = MEM_LOAD_UOPS_RETIRED_L2_HIT_EVTNR;
2549 coreEventDesc[3].umask_value = MEM_LOAD_UOPS_RETIRED_L2_HIT_UMASK;
2550 if (core_gen_counter_num_max == 3)
2551 {
2552 L3CacheHitRatioAvailable = true;
2553 L3CacheMissesAvailable = true;
2554 L2CacheMissesAvailable = true;
2555 L3CacheHitsNoSnoopAvailable = true;
2556 L3CacheHitsSnoopAvailable = true;
2557 L3CacheHitsAvailable = true;
2558 core_gen_counter_num_used = 3;
2559 break;
2560 }
2561 L2CacheHitRatioAvailable = true;
2562 L3CacheHitRatioAvailable = true;
2563 L3CacheMissesAvailable = true;
2564 L2CacheMissesAvailable = true;
2565 L2CacheHitsAvailable = true;
2566 L3CacheHitsNoSnoopAvailable = true;
2567 L3CacheHitsSnoopAvailable = true;
2568 L3CacheHitsAvailable = true;
2569 core_gen_counter_num_used = 4;
2570 break;
2571 case NEHALEM_EP:
2572 case WESTMERE_EP:
2573 case CLARKDALE:
2574 coreEventDesc[0].event_number = MEM_LOAD_RETIRED_L3_MISS_EVTNR;
2575 coreEventDesc[0].umask_value = MEM_LOAD_RETIRED_L3_MISS_UMASK;
2576 coreEventDesc[1].event_number = MEM_LOAD_RETIRED_L3_UNSHAREDHIT_EVTNR;
2577 coreEventDesc[1].umask_value = MEM_LOAD_RETIRED_L3_UNSHAREDHIT_UMASK;
2578 coreEventDesc[2].event_number = MEM_LOAD_RETIRED_L2_HITM_EVTNR;
2579 coreEventDesc[2].umask_value = MEM_LOAD_RETIRED_L2_HITM_UMASK;
2580 coreEventDesc[3].event_number = MEM_LOAD_RETIRED_L2_HIT_EVTNR;
2581 coreEventDesc[3].umask_value = MEM_LOAD_RETIRED_L2_HIT_UMASK;
2582 L2CacheHitRatioAvailable = true;
2583 L3CacheHitRatioAvailable = true;
2584 L3CacheMissesAvailable = true;
2585 L2CacheMissesAvailable = true;
2586 L2CacheHitsAvailable = true;
2587 L3CacheHitsNoSnoopAvailable = true;
2588 L3CacheHitsSnoopAvailable = true;
2589 L3CacheHitsAvailable = true;
2590 core_gen_counter_num_used = 4;
2591 break;
2592 default:
2593 assert(!useSkylakeEvents());
2594 coreEventDesc[0].event_number = ARCH_LLC_MISS_EVTNR;
2595 coreEventDesc[0].umask_value = ARCH_LLC_MISS_UMASK;
2596 coreEventDesc[1].event_number = MEM_LOAD_RETIRED_L3_UNSHAREDHIT_EVTNR;
2597 coreEventDesc[1].umask_value = MEM_LOAD_RETIRED_L3_UNSHAREDHIT_UMASK;
2598 coreEventDesc[2].event_number = MEM_LOAD_RETIRED_L2_HITM_EVTNR;
2599 coreEventDesc[2].umask_value = MEM_LOAD_RETIRED_L2_HITM_UMASK;
2600 coreEventDesc[3].event_number = MEM_LOAD_RETIRED_L2_HIT_EVTNR;
2601 coreEventDesc[3].umask_value = MEM_LOAD_RETIRED_L2_HIT_UMASK;
2602 L2CacheHitRatioAvailable = true;
2603 L3CacheHitRatioAvailable = true;
2604 L3CacheMissesAvailable = true;
2605 L2CacheMissesAvailable = true;
2606 L2CacheHitsAvailable = true;
2607 L3CacheHitsNoSnoopAvailable = true;
2608 L3CacheHitsSnoopAvailable = true;
2609 L3CacheHitsAvailable = true;
2610 core_gen_counter_num_used = 4;
2611 }
2612 }
2613
2614 core_fixed_counter_num_used = 3;
2615
2616 if(EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc && pExtDesc->gpCounterCfg)
2617 {
2618 core_gen_counter_num_used = pExtDesc->nGPCounters;
2619 }
2620
2621 if(cpu_model == JAKETOWN)
2622 {
2623 bool enableWA = false;
2624 for(uint32 i = 0; i< core_gen_counter_num_used; ++i)
2625 {
2626 if(coreEventDesc[i].event_number == MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_EVTNR)
2627 enableWA = true;
2628 }
2629 enableJKTWorkaround(enableWA); // this has a performance penalty on memory access
2630 }
2631
2632 if (core_gen_counter_num_used > core_gen_counter_num_max)
2633 {
2634 std::cerr << "PCM ERROR: Trying to program " << core_gen_counter_num_used << " general purpose counters with only "
2635 << core_gen_counter_num_max << " available\n";
2636 return PCM::UnknownError;
2637 }
2638 if (core_fixed_counter_num_used > core_fixed_counter_num_max)
2639 {
2640 std::cerr << "PCM ERROR: Trying to program " << core_fixed_counter_num_used << " fixed counters with only "
2641 << core_fixed_counter_num_max << " available\n";
2642 return PCM::UnknownError;
2643 }
2644
2645 programmed_pmu = true;
2646
2647 lastProgrammedCustomCounters.clear();
2648 lastProgrammedCustomCounters.resize(num_cores);
2649 // Version for linux/windows/freebsd/dragonflybsd
2650 for (int i = 0; i < (int)num_cores; ++i)
2651 {
2652 if (isCoreOnline(i) == false) continue;
2653 TemporalThreadAffinity tempThreadAffinity(i, false); // speedup trick for Linux
2654
2655 const auto status = programCoreCounters(i, mode_, pExtDesc, lastProgrammedCustomCounters[i]);
2656 if (status != PCM::Success)
2657 {
2658 return status;
2659 }
2660
2661 // program uncore counters
2662
2663 if (cpu_model == NEHALEM_EP || cpu_model == WESTMERE_EP || cpu_model == CLARKDALE)
2664 {
2665 programNehalemEPUncore(i);
2666 }
2667 else if (hasBecktonUncore())
2668 {
2669 programBecktonUncore(i);
2670 }
2671 }
2672
2673 if (canUsePerf && !silent)
2674 {
2675 std::cerr << "Successfully programmed on-core PMU using Linux perf\n";
2676 }
2677
2678 if (hasPCICFGUncore())
2679 {
2680 std::vector<std::future<uint64>> qpi_speeds;
2681 for (size_t i = 0; i < (size_t)server_pcicfg_uncore.size(); ++i)
2682 {
2683 server_pcicfg_uncore[i]->program();
2684 qpi_speeds.push_back(std::async(std::launch::async,
2685 &ServerPCICFGUncore::computeQPISpeed, server_pcicfg_uncore[i].get(), socketRefCore[i], cpu_model));
2686 }
2687 for (size_t i = 0; i < (size_t)server_pcicfg_uncore.size(); ++i)
2688 {
2689 max_qpi_speed = (std::max)(qpi_speeds[i].get(), max_qpi_speed);
2690 }
2691
2692 programCbo();
2693 }
2694
2695 if (!silent) reportQPISpeed();
2696
2697 return PCM::Success;
2698 }
2699
programCoreCounters(const int i,const PCM::ProgramMode mode_,const ExtendedCustomCoreEventDescription * pExtDesc,std::vector<EventSelectRegister> & result)2700 PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
2701 const PCM::ProgramMode mode_,
2702 const ExtendedCustomCoreEventDescription * pExtDesc,
2703 std::vector<EventSelectRegister> & result)
2704 {
2705 // program core counters
2706
2707 result.clear();
2708 FixedEventControlRegister ctrl_reg;
2709 #ifdef PCM_USE_PERF
2710 int leader_counter = -1;
2711 perf_event_attr e = PCM_init_perf_event_attr();
2712 auto programPerfEvent = [this, &e, &leader_counter, &i](const int eventPos, const std::string & eventName) -> bool
2713 {
2714 // if (i == 0) std::cerr << "DEBUG: programming event "<< std::hex << e.config << std::dec << "\n";
2715 if ((perfEventHandle[i][eventPos] = syscall(SYS_perf_event_open, &e, -1,
2716 i /* core id */, leader_counter /* group leader */, 0)) <= 0)
2717 {
2718 std::cerr << "Linux Perf: Error when programming " << eventName << ", error: " << strerror(errno) << "\n";
2719 if (24 == errno)
2720 {
2721 std::cerr << "try executing 'ulimit -n 10000' to increase the limit on the number of open files.\n";
2722 }
2723 else
2724 {
2725 std::cerr << "try running with environment variable PCM_NO_PERF=1\n";
2726 }
2727 decrementInstanceSemaphore();
2728 return false;
2729 }
2730 return true;
2731 };
2732 if (canUsePerf)
2733 {
2734 e.type = PERF_TYPE_HARDWARE;
2735 e.config = PERF_COUNT_HW_INSTRUCTIONS;
2736 if (programPerfEvent(PERF_INST_RETIRED_POS, "INST_RETIRED") == false)
2737 {
2738 return PCM::UnknownError;
2739 }
2740 leader_counter = perfEventHandle[i][PERF_INST_RETIRED_POS];
2741 e.pinned = 0; // all following counter are not leaders, thus need not be pinned explicitly
2742 e.config = PERF_COUNT_HW_CPU_CYCLES;
2743 if (programPerfEvent(PERF_CPU_CLK_UNHALTED_THREAD_POS, "CPU_CLK_UNHALTED_THREAD") == false)
2744 {
2745 return PCM::UnknownError;
2746 }
2747 e.config = PCM_PERF_COUNT_HW_REF_CPU_CYCLES;
2748 if (programPerfEvent(PERF_CPU_CLK_UNHALTED_REF_POS, "CPU_CLK_UNHALTED_REF") == false)
2749 {
2750 return PCM::UnknownError;
2751 }
2752 }
2753 else
2754 #endif
2755 {
2756 // disable counters while programming
2757 MSR[i]->write(IA32_CR_PERF_GLOBAL_CTRL, 0);
2758 MSR[i]->read(IA32_CR_FIXED_CTR_CTRL, &ctrl_reg.value);
2759
2760
2761 if (EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc && pExtDesc->fixedCfg)
2762 {
2763 ctrl_reg = *(pExtDesc->fixedCfg);
2764 }
2765 else
2766 {
2767 ctrl_reg.value = 0;
2768
2769 ctrl_reg.fields.os0 = 1;
2770 ctrl_reg.fields.usr0 = 1;
2771
2772 ctrl_reg.fields.os1 = 1;
2773 ctrl_reg.fields.usr1 = 1;
2774
2775 ctrl_reg.fields.os2 = 1;
2776 ctrl_reg.fields.usr2 = 1;
2777
2778 if (isFixedCounterSupported(3))
2779 {
2780 ctrl_reg.fields.os3 = 1;
2781 ctrl_reg.fields.usr3 = 1;
2782 }
2783 }
2784
2785 MSR[i]->write(INST_RETIRED_ADDR, 0);
2786 MSR[i]->write(CPU_CLK_UNHALTED_THREAD_ADDR, 0);
2787 MSR[i]->write(CPU_CLK_UNHALTED_REF_ADDR, 0);
2788 MSR[i]->write(IA32_CR_FIXED_CTR_CTRL, ctrl_reg.value);
2789 }
2790
2791 if (EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc)
2792 {
2793 if (pExtDesc->OffcoreResponseMsrValue[0]) // still need to do also if perf API is used due to a bug in perf
2794 MSR[i]->write(MSR_OFFCORE_RSP0, pExtDesc->OffcoreResponseMsrValue[0]);
2795 if (pExtDesc->OffcoreResponseMsrValue[1])
2796 MSR[i]->write(MSR_OFFCORE_RSP1, pExtDesc->OffcoreResponseMsrValue[1]);
2797 }
2798
2799 auto setEvent = [] (EventSelectRegister & reg, const uint64 event, const uint64 umask)
2800 {
2801 reg.fields.event_select = event;
2802 reg.fields.umask = umask;
2803 reg.fields.usr = 1;
2804 reg.fields.os = 1;
2805 reg.fields.edge = 0;
2806 reg.fields.pin_control = 0;
2807 reg.fields.apic_int = 0;
2808 reg.fields.any_thread = 0;
2809 reg.fields.enable = 1;
2810 reg.fields.invert = 0;
2811 reg.fields.cmask = 0;
2812 reg.fields.in_tx = 0;
2813 reg.fields.in_txcp = 0;
2814 };
2815 EventSelectRegister event_select_reg;
2816 for (uint32 j = 0; j < core_gen_counter_num_used; ++j)
2817 {
2818 if (EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc && pExtDesc->gpCounterCfg)
2819 {
2820 event_select_reg = pExtDesc->gpCounterCfg[j];
2821 event_select_reg.fields.enable = 1;
2822 }
2823 else
2824 {
2825 MSR[i]->read(IA32_PERFEVTSEL0_ADDR + j, &event_select_reg.value); // read-only also safe for perf
2826
2827 setEvent(event_select_reg, coreEventDesc[j].event_number, coreEventDesc[j].umask_value);
2828 }
2829 result.push_back(event_select_reg);
2830 #ifdef PCM_USE_PERF
2831 if (canUsePerf)
2832 {
2833 e.type = PERF_TYPE_RAW;
2834 e.config = (1ULL << 63ULL) + event_select_reg.value;
2835 if (event_select_reg.fields.event_select == OFFCORE_RESPONSE_0_EVTNR)
2836 e.config1 = pExtDesc->OffcoreResponseMsrValue[0];
2837 if (event_select_reg.fields.event_select == OFFCORE_RESPONSE_1_EVTNR)
2838 e.config1 = pExtDesc->OffcoreResponseMsrValue[1];
2839 if (programPerfEvent(PERF_GEN_EVENT_0_POS + j, std::string("generic event #") + std::to_string(i)) == false)
2840 {
2841 return PCM::UnknownError;
2842 }
2843 }
2844 else
2845 #endif
2846 {
2847 MSR[i]->write(IA32_PMC0 + j, 0);
2848 MSR[i]->write(IA32_PERFEVTSEL0_ADDR + j, event_select_reg.value);
2849 }
2850 }
2851
2852 if (!canUsePerf)
2853 {
2854 // start counting, enable all (4 programmable + 3 fixed) counters
2855 uint64 value = (1ULL << 0) + (1ULL << 1) + (1ULL << 2) + (1ULL << 3) + (1ULL << 32) + (1ULL << 33) + (1ULL << 34);
2856
2857 if (isFixedCounterSupported(3))
2858 {
2859 value |= (1ULL << 35);
2860 MSR[i]->write(TOPDOWN_SLOTS_ADDR, 0);
2861 }
2862
2863 if (isHWTMAL1Supported())
2864 {
2865 value |= (1ULL << 48);
2866 MSR[i]->write(PERF_METRICS_ADDR, 0);
2867 }
2868
2869 if (isAtom() || cpu_model == KNL) // KNL and Atom have 3 fixed + only 2 programmable counters
2870 value = (1ULL << 0) + (1ULL << 1) + (1ULL << 32) + (1ULL << 33) + (1ULL << 34);
2871
2872 for (uint32 j = 0; j < core_gen_counter_num_used; ++j)
2873 {
2874 value |= (1ULL << j); // enable all custom counters (if > 4)
2875 }
2876
2877 MSR[i]->write(IA32_PERF_GLOBAL_OVF_CTRL, value);
2878 MSR[i]->write(IA32_CR_PERF_GLOBAL_CTRL, value);
2879 }
2880 #ifdef PCM_USE_PERF
2881 else
2882 {
2883 if (isFixedCounterSupported(3) && isHWTMAL1Supported() && perfSupportsTopDown())
2884 {
2885 const auto topDownEvents = { std::make_pair(perfSlotsPath, PERF_TOPDOWN_SLOTS_POS),
2886 std::make_pair(perfBadSpecPath, PERF_TOPDOWN_BADSPEC_POS),
2887 std::make_pair(perfBackEndPath, PERF_TOPDOWN_BACKEND_POS),
2888 std::make_pair(perfFrontEndPath, PERF_TOPDOWN_FRONTEND_POS),
2889 std::make_pair(perfRetiringPath, PERF_TOPDOWN_RETIRING_POS)};
2890 int readPos = core_fixed_counter_num_used + core_gen_counter_num_used;
2891 leader_counter = -1;
2892 for (auto event : topDownEvents)
2893 {
2894 uint64 eventSel = 0, umask = 0;
2895 const auto eventDesc = readSysFS(event.first);
2896 const auto tokens = split(eventDesc, ',');
2897 for (auto token : tokens)
2898 {
2899 if (match(token, "event=", &eventSel)) {}
2900 else if (match(token, "umask=", &umask)) {}
2901 else
2902 {
2903 std::cerr << "ERROR: unknown token " << token << " in event description \"" << eventDesc << "\" from " << event.first << "\n";
2904 decrementInstanceSemaphore();
2905 return PCM::UnknownError;
2906 }
2907 }
2908 EventSelectRegister reg;
2909 setEvent(reg, eventSel, umask);
2910 e.type = PERF_TYPE_RAW;
2911 e.config = (1ULL << 63ULL) + reg.value;
2912 // std::cerr << "Programming perf event " << std::hex << e.config << "\n";
2913 if (programPerfEvent(event.second, std::string("event ") + event.first + " " + eventDesc) == false)
2914 {
2915 return PCM::UnknownError;
2916 }
2917 leader_counter = perfEventHandle[i][PERF_TOPDOWN_SLOTS_POS];
2918 perfTopDownPos[event.second] = readPos++;
2919 }
2920 }
2921 }
2922 #endif
2923 return PCM::Success;
2924 }
2925
reportQPISpeed() const2926 void PCM::reportQPISpeed() const
2927 {
2928 if (!max_qpi_speed) return;
2929
2930 if (hasPCICFGUncore()) {
2931 for (size_t i = 0; i < (size_t)server_pcicfg_uncore.size(); ++i)
2932 {
2933 std::cerr << "Socket " << i << "\n";
2934 if(server_pcicfg_uncore[i].get()) server_pcicfg_uncore[i]->reportQPISpeed();
2935 }
2936 } else {
2937 std::cerr << "Max QPI speed: " << max_qpi_speed / (1e9) << " GBytes/second (" << max_qpi_speed / (1e9*getBytesPerLinkTransfer()) << " GT/second)\n";
2938 }
2939
2940 }
2941
programNehalemEPUncore(int32 core)2942 void PCM::programNehalemEPUncore(int32 core)
2943 {
2944
2945 #define CPUCNT_INIT_THE_REST_OF_EVTCNT \
2946 unc_event_select_reg.fields.occ_ctr_rst = 1; \
2947 unc_event_select_reg.fields.edge = 0; \
2948 unc_event_select_reg.fields.enable_pmi = 0; \
2949 unc_event_select_reg.fields.enable = 1; \
2950 unc_event_select_reg.fields.invert = 0; \
2951 unc_event_select_reg.fields.cmask = 0;
2952
2953 uncore_gen_counter_num_used = 8;
2954
2955 UncoreEventSelectRegister unc_event_select_reg;
2956
2957 MSR[core]->read(MSR_UNCORE_PERFEVTSEL0_ADDR, &unc_event_select_reg.value);
2958
2959 unc_event_select_reg.fields.event_select = UNC_QMC_WRITES_FULL_ANY_EVTNR;
2960 unc_event_select_reg.fields.umask = UNC_QMC_WRITES_FULL_ANY_UMASK;
2961
2962 CPUCNT_INIT_THE_REST_OF_EVTCNT
2963
2964 MSR[core]->write(MSR_UNCORE_PERFEVTSEL0_ADDR, unc_event_select_reg.value);
2965
2966
2967 MSR[core]->read(MSR_UNCORE_PERFEVTSEL1_ADDR, &unc_event_select_reg.value);
2968
2969 unc_event_select_reg.fields.event_select = UNC_QMC_NORMAL_READS_ANY_EVTNR;
2970 unc_event_select_reg.fields.umask = UNC_QMC_NORMAL_READS_ANY_UMASK;
2971
2972 CPUCNT_INIT_THE_REST_OF_EVTCNT
2973
2974 MSR[core]->write(MSR_UNCORE_PERFEVTSEL1_ADDR, unc_event_select_reg.value);
2975
2976
2977 MSR[core]->read(MSR_UNCORE_PERFEVTSEL2_ADDR, &unc_event_select_reg.value);
2978 unc_event_select_reg.fields.event_select = UNC_QHL_REQUESTS_EVTNR;
2979 unc_event_select_reg.fields.umask = UNC_QHL_REQUESTS_IOH_READS_UMASK;
2980 CPUCNT_INIT_THE_REST_OF_EVTCNT
2981 MSR[core]->write(MSR_UNCORE_PERFEVTSEL2_ADDR, unc_event_select_reg.value);
2982
2983 MSR[core]->read(MSR_UNCORE_PERFEVTSEL3_ADDR, &unc_event_select_reg.value);
2984 unc_event_select_reg.fields.event_select = UNC_QHL_REQUESTS_EVTNR;
2985 unc_event_select_reg.fields.umask = UNC_QHL_REQUESTS_IOH_WRITES_UMASK;
2986 CPUCNT_INIT_THE_REST_OF_EVTCNT
2987 MSR[core]->write(MSR_UNCORE_PERFEVTSEL3_ADDR, unc_event_select_reg.value);
2988
2989 MSR[core]->read(MSR_UNCORE_PERFEVTSEL4_ADDR, &unc_event_select_reg.value);
2990 unc_event_select_reg.fields.event_select = UNC_QHL_REQUESTS_EVTNR;
2991 unc_event_select_reg.fields.umask = UNC_QHL_REQUESTS_REMOTE_READS_UMASK;
2992 CPUCNT_INIT_THE_REST_OF_EVTCNT
2993 MSR[core]->write(MSR_UNCORE_PERFEVTSEL4_ADDR, unc_event_select_reg.value);
2994
2995 MSR[core]->read(MSR_UNCORE_PERFEVTSEL5_ADDR, &unc_event_select_reg.value);
2996 unc_event_select_reg.fields.event_select = UNC_QHL_REQUESTS_EVTNR;
2997 unc_event_select_reg.fields.umask = UNC_QHL_REQUESTS_REMOTE_WRITES_UMASK;
2998 CPUCNT_INIT_THE_REST_OF_EVTCNT
2999 MSR[core]->write(MSR_UNCORE_PERFEVTSEL5_ADDR, unc_event_select_reg.value);
3000
3001 MSR[core]->read(MSR_UNCORE_PERFEVTSEL6_ADDR, &unc_event_select_reg.value);
3002 unc_event_select_reg.fields.event_select = UNC_QHL_REQUESTS_EVTNR;
3003 unc_event_select_reg.fields.umask = UNC_QHL_REQUESTS_LOCAL_READS_UMASK;
3004 CPUCNT_INIT_THE_REST_OF_EVTCNT
3005 MSR[core]->write(MSR_UNCORE_PERFEVTSEL6_ADDR, unc_event_select_reg.value);
3006
3007 MSR[core]->read(MSR_UNCORE_PERFEVTSEL7_ADDR, &unc_event_select_reg.value);
3008 unc_event_select_reg.fields.event_select = UNC_QHL_REQUESTS_EVTNR;
3009 unc_event_select_reg.fields.umask = UNC_QHL_REQUESTS_LOCAL_WRITES_UMASK;
3010 CPUCNT_INIT_THE_REST_OF_EVTCNT
3011 MSR[core]->write(MSR_UNCORE_PERFEVTSEL7_ADDR, unc_event_select_reg.value);
3012
3013
3014 #undef CPUCNT_INIT_THE_REST_OF_EVTCNT
3015
3016 // start uncore counting
3017 uint64 value = 255 + (1ULL << 32); // enable all counters
3018 MSR[core]->write(MSR_UNCORE_PERF_GLOBAL_CTRL_ADDR, value);
3019
3020 // synchronise counters
3021 MSR[core]->write(MSR_UNCORE_PMC0, 0);
3022 MSR[core]->write(MSR_UNCORE_PMC1, 0);
3023 MSR[core]->write(MSR_UNCORE_PMC2, 0);
3024 MSR[core]->write(MSR_UNCORE_PMC3, 0);
3025 MSR[core]->write(MSR_UNCORE_PMC4, 0);
3026 MSR[core]->write(MSR_UNCORE_PMC5, 0);
3027 MSR[core]->write(MSR_UNCORE_PMC6, 0);
3028 MSR[core]->write(MSR_UNCORE_PMC7, 0);
3029 }
3030
programBecktonUncore(int32 core)3031 void PCM::programBecktonUncore(int32 core)
3032 {
3033 // program Beckton uncore
3034 if (core == socketRefCore[0]) computeQPISpeedBeckton((int)core);
3035
3036 uint64 value = 1 << 29ULL; // reset all counters
3037 MSR[core]->write(U_MSR_PMON_GLOBAL_CTL, value);
3038
3039 BecktonUncorePMUZDPCTLFVCRegister FVCreg;
3040 FVCreg.value = 0;
3041 if (cpu_model == NEHALEM_EX)
3042 {
3043 FVCreg.fields.bcmd = 0; // rd_bcmd
3044 FVCreg.fields.resp = 0; // ack_resp
3045 FVCreg.fields.evnt0 = 5; // bcmd_match
3046 FVCreg.fields.evnt1 = 6; // resp_match
3047 FVCreg.fields.pbox_init_err = 0;
3048 }
3049 else
3050 {
3051 FVCreg.fields_wsm.bcmd = 0; // rd_bcmd
3052 FVCreg.fields_wsm.resp = 0; // ack_resp
3053 FVCreg.fields_wsm.evnt0 = 5; // bcmd_match
3054 FVCreg.fields_wsm.evnt1 = 6; // resp_match
3055 FVCreg.fields_wsm.pbox_init_err = 0;
3056 }
3057 MSR[core]->write(MB0_MSR_PMU_ZDP_CTL_FVC, FVCreg.value);
3058 MSR[core]->write(MB1_MSR_PMU_ZDP_CTL_FVC, FVCreg.value);
3059
3060 BecktonUncorePMUCNTCTLRegister CNTCTLreg;
3061 CNTCTLreg.value = 0;
3062 CNTCTLreg.fields.en = 1;
3063 CNTCTLreg.fields.pmi_en = 0;
3064 CNTCTLreg.fields.count_mode = 0;
3065 CNTCTLreg.fields.storage_mode = 0;
3066 CNTCTLreg.fields.wrap_mode = 1;
3067 CNTCTLreg.fields.flag_mode = 0;
3068 CNTCTLreg.fields.inc_sel = 0x0d; // FVC_EV0
3069 MSR[core]->write(MB0_MSR_PMU_CNT_CTL_0, CNTCTLreg.value);
3070 MSR[core]->write(MB1_MSR_PMU_CNT_CTL_0, CNTCTLreg.value);
3071 CNTCTLreg.fields.inc_sel = 0x0e; // FVC_EV1
3072 MSR[core]->write(MB0_MSR_PMU_CNT_CTL_1, CNTCTLreg.value);
3073 MSR[core]->write(MB1_MSR_PMU_CNT_CTL_1, CNTCTLreg.value);
3074
3075 value = 1 + ((0x0C) << 1ULL); // enable bit + (event select IMT_INSERTS_WR)
3076 MSR[core]->write(BB0_MSR_PERF_CNT_CTL_1, value);
3077 MSR[core]->write(BB1_MSR_PERF_CNT_CTL_1, value);
3078
3079 MSR[core]->write(MB0_MSR_PERF_GLOBAL_CTL, 3); // enable two counters
3080 MSR[core]->write(MB1_MSR_PERF_GLOBAL_CTL, 3); // enable two counters
3081
3082 MSR[core]->write(BB0_MSR_PERF_GLOBAL_CTL, 2); // enable second counter
3083 MSR[core]->write(BB1_MSR_PERF_GLOBAL_CTL, 2); // enable second counter
3084
3085 // program R-Box to monitor QPI traffic
3086
3087 // enable counting on all counters on the left side (port 0-3)
3088 MSR[core]->write(R_MSR_PMON_GLOBAL_CTL_7_0, 255);
3089 // ... on the right side (port 4-7)
3090 MSR[core]->write(R_MSR_PMON_GLOBAL_CTL_15_8, 255);
3091
3092 // pick the event
3093 value = (1 << 7ULL) + (1 << 6ULL) + (1 << 2ULL); // count any (incoming) data responses
3094 MSR[core]->write(R_MSR_PORT0_IPERF_CFG0, value);
3095 MSR[core]->write(R_MSR_PORT1_IPERF_CFG0, value);
3096 MSR[core]->write(R_MSR_PORT4_IPERF_CFG0, value);
3097 MSR[core]->write(R_MSR_PORT5_IPERF_CFG0, value);
3098
3099 // pick the event
3100 value = (1ULL << 30ULL); // count null idle flits sent
3101 MSR[core]->write(R_MSR_PORT0_IPERF_CFG1, value);
3102 MSR[core]->write(R_MSR_PORT1_IPERF_CFG1, value);
3103 MSR[core]->write(R_MSR_PORT4_IPERF_CFG1, value);
3104 MSR[core]->write(R_MSR_PORT5_IPERF_CFG1, value);
3105
3106 // choose counter 0 to monitor R_MSR_PORT0_IPERF_CFG0
3107 MSR[core]->write(R_MSR_PMON_CTL0, 1 + 2 * (0));
3108 // choose counter 1 to monitor R_MSR_PORT1_IPERF_CFG0
3109 MSR[core]->write(R_MSR_PMON_CTL1, 1 + 2 * (6));
3110 // choose counter 8 to monitor R_MSR_PORT4_IPERF_CFG0
3111 MSR[core]->write(R_MSR_PMON_CTL8, 1 + 2 * (0));
3112 // choose counter 9 to monitor R_MSR_PORT5_IPERF_CFG0
3113 MSR[core]->write(R_MSR_PMON_CTL9, 1 + 2 * (6));
3114
3115 // choose counter 2 to monitor R_MSR_PORT0_IPERF_CFG1
3116 MSR[core]->write(R_MSR_PMON_CTL2, 1 + 2 * (1));
3117 // choose counter 3 to monitor R_MSR_PORT1_IPERF_CFG1
3118 MSR[core]->write(R_MSR_PMON_CTL3, 1 + 2 * (7));
3119 // choose counter 10 to monitor R_MSR_PORT4_IPERF_CFG1
3120 MSR[core]->write(R_MSR_PMON_CTL10, 1 + 2 * (1));
3121 // choose counter 11 to monitor R_MSR_PORT5_IPERF_CFG1
3122 MSR[core]->write(R_MSR_PMON_CTL11, 1 + 2 * (7));
3123
3124 // enable uncore TSC counter (fixed one)
3125 MSR[core]->write(W_MSR_PMON_GLOBAL_CTL, 1ULL << 31ULL);
3126 MSR[core]->write(W_MSR_PMON_FIXED_CTR_CTL, 1ULL);
3127
3128 value = (1 << 28ULL) + 1; // enable all counters
3129 MSR[core]->write(U_MSR_PMON_GLOBAL_CTL, value);
3130 }
3131
3132 uint64 RDTSC();
3133
computeNominalFrequency()3134 void PCM::computeNominalFrequency()
3135 {
3136 const int ref_core = 0;
3137 uint64 before = 0, after = 0;
3138 MSR[ref_core]->read(IA32_TIME_STAMP_COUNTER, &before);
3139 MySleepMs(1000);
3140 MSR[ref_core]->read(IA32_TIME_STAMP_COUNTER, &after);
3141 nominal_frequency = after-before;
3142 }
getCPUBrandString()3143 std::string PCM::getCPUBrandString()
3144 {
3145 char buffer[sizeof(int)*4*3+1];
3146 PCM_CPUID_INFO * info = (PCM_CPUID_INFO *) buffer;
3147 pcm_cpuid(0x80000002, *info);
3148 ++info;
3149 pcm_cpuid(0x80000003, *info);
3150 ++info;
3151 pcm_cpuid(0x80000004, *info);
3152 buffer[sizeof(int)*4*3] = 0;
3153 std::string result(buffer);
3154 while(result[0]==' ') result.erase(0,1);
3155 std::string::size_type i;
3156 while((i = result.find(" ")) != std::string::npos) result.replace(i,2," "); // remove duplicate spaces
3157 return result;
3158 }
3159
getCPUFamilyModelString()3160 std::string PCM::getCPUFamilyModelString()
3161 {
3162 char buffer[sizeof(int)*4*3+6];
3163 memset(buffer,0,sizeof(buffer));
3164 #ifdef _MSC_VER
3165 sprintf_s(buffer,sizeof(buffer),"GenuineIntel-%d-%2X-%X",this->cpu_family,this->cpu_model,this->cpu_stepping);
3166 #else
3167 snprintf(buffer,sizeof(buffer),"GenuineIntel-%d-%2X-%X",this->cpu_family,this->cpu_model,this->cpu_stepping);
3168 #endif
3169 std::string result(buffer);
3170 return result;
3171 }
3172
enableForceRTMAbortMode(const bool silent)3173 void PCM::enableForceRTMAbortMode(const bool silent)
3174 {
3175 // std::cout << "enableForceRTMAbortMode(): forceRTMAbortMode=" << forceRTMAbortMode << "\n";
3176 if (!forceRTMAbortMode)
3177 {
3178 if (isForceRTMAbortModeAvailable() && (core_gen_counter_num_max < 4))
3179 {
3180 for (auto m : MSR)
3181 {
3182 const auto res = m->write(MSR_TSX_FORCE_ABORT, 1);
3183 if (res != sizeof(uint64))
3184 {
3185 std::cerr << "Warning: writing 1 to MSR_TSX_FORCE_ABORT failed with error "
3186 << res << " on core " << m->getCoreId() << "\n";
3187 }
3188 }
3189 readCoreCounterConfig(true); // re-read core_gen_counter_num_max from CPUID
3190 if (!silent) std::cerr << "The number of custom counters is now " << core_gen_counter_num_max << "\n";
3191 if (core_gen_counter_num_max < 4)
3192 {
3193 std::cerr << "PCM Warning: the number of custom counters did not increase (" << core_gen_counter_num_max << ")\n";
3194 }
3195 forceRTMAbortMode = true;
3196 }
3197 }
3198 }
3199
isForceRTMAbortModeEnabled() const3200 bool PCM::isForceRTMAbortModeEnabled() const
3201 {
3202 return forceRTMAbortMode;
3203 }
3204
disableForceRTMAbortMode(const bool silent)3205 void PCM::disableForceRTMAbortMode(const bool silent)
3206 {
3207 // std::cout << "disableForceRTMAbortMode(): forceRTMAbortMode=" << forceRTMAbortMode << "\n";
3208 if (forceRTMAbortMode)
3209 {
3210 for (auto m : MSR)
3211 {
3212 const auto res = m->write(MSR_TSX_FORCE_ABORT, 0);
3213 if (res != sizeof(uint64))
3214 {
3215 std::cerr << "Warning: writing 0 to MSR_TSX_FORCE_ABORT failed with error "
3216 << res << " on core " << m->getCoreId() << "\n";
3217 }
3218 }
3219 readCoreCounterConfig(true); // re-read core_gen_counter_num_max from CPUID
3220 if (!silent) std::cerr << "The number of custom counters is now " << core_gen_counter_num_max << "\n";
3221 if (core_gen_counter_num_max != 3)
3222 {
3223 std::cerr << "PCM Warning: the number of custom counters is not 3 (" << core_gen_counter_num_max << ")\n";
3224 }
3225 forceRTMAbortMode = false;
3226 }
3227 }
3228
isForceRTMAbortModeAvailable() const3229 bool PCM::isForceRTMAbortModeAvailable() const
3230 {
3231 PCM_CPUID_INFO info;
3232 pcm_cpuid(7, 0, info); // leaf 7, subleaf 0
3233 return (info.reg.edx & (0x1 << 13)) ? true : false;
3234 }
3235
get_frequency_from_cpuid()3236 uint64 get_frequency_from_cpuid() // from Pat Fay (Intel)
3237 {
3238 double speed=0;
3239 std::string brand = PCM::getCPUBrandString();
3240 if (brand.length() > std::string::size_type(0))
3241 {
3242 std::string::size_type unitsg = brand.find("GHz");
3243 if(unitsg != std::string::npos)
3244 {
3245 std::string::size_type atsign = brand.rfind(' ', unitsg);
3246 if(atsign != std::string::npos)
3247 {
3248 std::istringstream(brand.substr(atsign)) >> speed;
3249 speed *= 1000;
3250 }
3251 }
3252 else
3253 {
3254 std::string::size_type unitsg = brand.find("MHz");
3255 if(unitsg != std::string::npos)
3256 {
3257 std::string::size_type atsign = brand.rfind(' ', unitsg);
3258 if(atsign != std::string::npos)
3259 {
3260 std::istringstream(brand.substr(atsign)) >> speed;
3261 }
3262 }
3263 }
3264 }
3265 return (uint64)(speed * 1000. * 1000.);
3266 }
3267
getSupportedUarchCodenames() const3268 std::string PCM::getSupportedUarchCodenames() const
3269 {
3270 std::ostringstream ostr;
3271 for(int32 i=0; i < static_cast<int32>(PCM::END_OF_MODEL_LIST) ; ++i)
3272 if(isCPUModelSupported((int)i))
3273 ostr << getUArchCodename(i) << ", ";
3274 return std::string(ostr.str().substr(0, ostr.str().length() - 2));
3275 }
3276
getUnsupportedMessage() const3277 std::string PCM::getUnsupportedMessage() const
3278 {
3279 std::ostringstream ostr;
3280 ostr << "Error: unsupported processor. Only Intel(R) processors are supported (Atom(R) and microarchitecture codename " << getSupportedUarchCodenames() << ").";
3281 return std::string(ostr.str());
3282 }
3283
computeQPISpeedBeckton(int core_nr)3284 void PCM::computeQPISpeedBeckton(int core_nr)
3285 {
3286 uint64 startFlits = 0;
3287 // reset all counters
3288 MSR[core_nr]->write(U_MSR_PMON_GLOBAL_CTL, 1 << 29ULL);
3289
3290 // enable counting on all counters on the left side (port 0-3)
3291 MSR[core_nr]->write(R_MSR_PMON_GLOBAL_CTL_7_0, 255);
3292 // disable on the right side (port 4-7)
3293 MSR[core_nr]->write(R_MSR_PMON_GLOBAL_CTL_15_8, 0);
3294
3295 // count flits sent
3296 MSR[core_nr]->write(R_MSR_PORT0_IPERF_CFG0, 1ULL << 31ULL);
3297
3298 // choose counter 0 to monitor R_MSR_PORT0_IPERF_CFG0
3299 MSR[core_nr]->write(R_MSR_PMON_CTL0, 1 + 2 * (0));
3300
3301 // enable all counters
3302 MSR[core_nr]->write(U_MSR_PMON_GLOBAL_CTL, (1 << 28ULL) + 1);
3303
3304 MSR[core_nr]->read(R_MSR_PMON_CTR0, &startFlits);
3305
3306 const uint64 timerGranularity = 1000000ULL; // mks
3307 uint64 startTSC = getTickCount(timerGranularity, (uint32) core_nr);
3308 uint64 endTSC;
3309 do
3310 {
3311 endTSC = getTickCount(timerGranularity, (uint32) core_nr);
3312 } while (endTSC - startTSC < 200000ULL); // spin for 200 ms
3313
3314 uint64 endFlits = 0;
3315 MSR[core_nr]->read(R_MSR_PMON_CTR0, &endFlits);
3316 max_qpi_speed = (endFlits - startFlits) * 8ULL * timerGranularity / (endTSC - startTSC);
3317
3318 }
3319
checkCustomCoreProgramming(std::shared_ptr<SafeMsrHandle> msr)3320 uint32 PCM::checkCustomCoreProgramming(std::shared_ptr<SafeMsrHandle> msr)
3321 {
3322 const auto core = msr->getCoreId();
3323 if (size_t(core) >= lastProgrammedCustomCounters.size() || canUsePerf)
3324 {
3325 // checking 'canUsePerf'because corruption detection curently works
3326 // only if perf is not used, see https://github.com/opcm/pcm/issues/106
3327 return 0;
3328 }
3329 uint32 corruptedCountersMask = 0;
3330
3331 for (size_t ctr = 0; ctr < lastProgrammedCustomCounters[core].size(); ++ctr)
3332 {
3333 EventSelectRegister current;
3334 if (msr->read(IA32_PERFEVTSEL0_ADDR + ctr, ¤t.value) != sizeof(current.value))
3335 {
3336 std::cerr << "PCM Error: can not read MSR 0x" << std::hex << (IA32_PERFEVTSEL0_ADDR + ctr) <<
3337 " on core " << std::dec << core << "\n";
3338 continue;
3339 }
3340 if (canUsePerf)
3341 {
3342 current.fields.apic_int = 0; // perf sets this bit
3343 }
3344 if (current.value != lastProgrammedCustomCounters[core][ctr].value)
3345 {
3346 std::cerr << "PCM Error: someone has corrupted custom counter " << ctr << " on core " << core
3347 << " expected value " << lastProgrammedCustomCounters[core][ctr].value << " value read "
3348 << current.value << "\n";
3349
3350 corruptedCountersMask |= (1<<ctr);
3351 }
3352 }
3353 return corruptedCountersMask;
3354 }
3355
PMUinUse()3356 bool PCM::PMUinUse()
3357 {
3358 // follow the "Performance Monitoring Unit Sharing Guide" by P. Irelan and Sh. Kuo
3359 for (int i = 0; i < (int)num_cores; ++i)
3360 {
3361 //std::cout << "Core " << i << " examine registers\n";
3362 uint64 value = 0;
3363 if (perfmon_version >= 4)
3364 {
3365 MSR[i]->read(MSR_PERF_GLOBAL_INUSE, &value);
3366 for (uint32 j = 0; j < core_gen_counter_num_max; ++j)
3367 {
3368 if (value & (1ULL << j))
3369 {
3370 std::cerr << "WARNING: Custom counter " << j << " is in use. MSR_PERF_GLOBAL_INUSE on core " << i << ": 0x" << std::hex << value << std::dec << "\n";
3371 /*
3372 Testing MSR_PERF_GLOBAL_INUSE mechanism for a moment. At a later point in time will report BUSY.
3373 return true;
3374 */
3375 }
3376 }
3377 }
3378
3379 MSR[i]->read(IA32_CR_PERF_GLOBAL_CTRL, &value);
3380 // std::cout << "Core " << i << " IA32_CR_PERF_GLOBAL_CTRL is " << std::hex << value << std::dec << "\n";
3381
3382 EventSelectRegister event_select_reg;
3383 event_select_reg.value = 0xFFFFFFFFFFFFFFFF;
3384
3385 for (uint32 j = 0; j < core_gen_counter_num_max; ++j)
3386 {
3387 MSR[i]->read(IA32_PERFEVTSEL0_ADDR + j, &event_select_reg.value);
3388
3389 if (event_select_reg.fields.event_select != 0 || event_select_reg.fields.apic_int != 0)
3390 {
3391 std::cerr << "WARNING: Core " << i <<" IA32_PERFEVTSEL" << j << "_ADDR is not zeroed " << event_select_reg.value << "\n";
3392
3393 if (needToRestoreNMIWatchdog == true && event_select_reg.fields.event_select == 0x3C && event_select_reg.fields.umask == 0)
3394 {
3395 // NMI watchdog did not clear its event, ignore it
3396 continue;
3397 }
3398 return true;
3399 }
3400 }
3401
3402 FixedEventControlRegister ctrl_reg;
3403 ctrl_reg.value = 0xffffffffffffffff;
3404
3405 MSR[i]->read(IA32_CR_FIXED_CTR_CTRL, &ctrl_reg.value);
3406
3407 // Check if someone has installed pmi handler on counter overflow.
3408 // If so, that agent might potentially need to change counter value
3409 // for the "sample after"-mode messing up PCM measurements
3410 if(ctrl_reg.fields.enable_pmi0 || ctrl_reg.fields.enable_pmi1 || ctrl_reg.fields.enable_pmi2)
3411 {
3412 std::cerr << "WARNING: Core " << i << " fixed ctrl:" << ctrl_reg.value << "\n";
3413 if (needToRestoreNMIWatchdog == false) // if NMI watchdog did not clear the fields, ignore it
3414 {
3415 return true;
3416 }
3417 }
3418 #if 0
3419 // either os=0,usr=0 (not running) or os=1,usr=1 (fits PCM modus) are ok, other combinations are not
3420 if(ctrl_reg.fields.os0 != ctrl_reg.fields.usr0 ||
3421 ctrl_reg.fields.os1 != ctrl_reg.fields.usr1 ||
3422 ctrl_reg.fields.os2 != ctrl_reg.fields.usr2)
3423 {
3424 std::cerr << "WARNING: Core " << i << " fixed ctrl:" << ctrl_reg.value << "\n";
3425 return true;
3426 }
3427 #endif
3428 }
3429 //std::cout << std::flush
3430 return false;
3431 }
3432
getUArchCodename(const int32 cpu_model_param) const3433 const char * PCM::getUArchCodename(const int32 cpu_model_param) const
3434 {
3435 auto cpu_model_ = cpu_model_param;
3436 if(cpu_model_ < 0)
3437 cpu_model_ = this->cpu_model ;
3438
3439 switch(cpu_model_)
3440 {
3441 case CENTERTON:
3442 return "Centerton";
3443 case BAYTRAIL:
3444 return "Baytrail";
3445 case AVOTON:
3446 return "Avoton";
3447 case CHERRYTRAIL:
3448 return "Cherrytrail";
3449 case APOLLO_LAKE:
3450 return "Apollo Lake";
3451 case DENVERTON:
3452 return "Denverton";
3453 case SNOWRIDGE:
3454 return "Snowridge";
3455 case NEHALEM_EP:
3456 case NEHALEM:
3457 return "Nehalem/Nehalem-EP";
3458 case ATOM:
3459 return "Atom(tm)";
3460 case CLARKDALE:
3461 return "Westmere/Clarkdale";
3462 case WESTMERE_EP:
3463 return "Westmere-EP";
3464 case NEHALEM_EX:
3465 return "Nehalem-EX";
3466 case WESTMERE_EX:
3467 return "Westmere-EX";
3468 case SANDY_BRIDGE:
3469 return "Sandy Bridge";
3470 case JAKETOWN:
3471 return "Sandy Bridge-EP/Jaketown";
3472 case IVYTOWN:
3473 return "Ivy Bridge-EP/EN/EX/Ivytown";
3474 case HASWELLX:
3475 return "Haswell-EP/EN/EX";
3476 case BDX_DE:
3477 return "Broadwell-DE";
3478 case BDX:
3479 return "Broadwell-EP/EX";
3480 case KNL:
3481 return "Knights Landing";
3482 case IVY_BRIDGE:
3483 return "Ivy Bridge";
3484 case HASWELL:
3485 return "Haswell";
3486 case BROADWELL:
3487 return "Broadwell";
3488 case SKL:
3489 return "Skylake";
3490 case SKL_UY:
3491 return "Skylake U/Y";
3492 case KBL:
3493 return "Kabylake";
3494 case KBL_1:
3495 return "Kabylake/Whiskey Lake";
3496 case CML:
3497 return "Comet Lake";
3498 case ICL:
3499 return "Icelake";
3500 case RKL:
3501 return "Rocket Lake";
3502 case TGL:
3503 return "Tiger Lake";
3504 case SKX:
3505 if (cpu_model_param >= 0)
3506 {
3507 // query for specified cpu_model_param, stepping not provided
3508 return "Skylake-SP, Cascade Lake-SP";
3509 }
3510 if (isCLX())
3511 {
3512 return "Cascade Lake-SP";
3513 }
3514 if (isCPX())
3515 {
3516 return "Cooper Lake";
3517 }
3518 return "Skylake-SP";
3519 case ICX:
3520 return "Icelake-SP";
3521 }
3522 return "unknown";
3523 }
3524
cleanupPMU(const bool silent)3525 void PCM::cleanupPMU(const bool silent)
3526 {
3527 #ifdef PCM_USE_PERF
3528 if(canUsePerf)
3529 {
3530 for (int i = 0; i < num_cores; ++i)
3531 for(int c = 0; c < PERF_MAX_COUNTERS; ++c)
3532 ::close(perfEventHandle[i][c]);
3533
3534 return;
3535 }
3536 #endif
3537
3538 // follow the "Performance Monitoring Unit Sharing Guide" by P. Irelan and Sh. Kuo
3539 for (int i = 0; i < (int)num_cores; ++i)
3540 {
3541 // disable generic counters and continue free running counting for fixed counters
3542 MSR[i]->write(IA32_CR_PERF_GLOBAL_CTRL, (1ULL << 32) + (1ULL << 33) + (1ULL << 34));
3543
3544 for (uint32 j = 0; j < core_gen_counter_num_max; ++j)
3545 {
3546 MSR[i]->write(IA32_PERFEVTSEL0_ADDR + j, 0);
3547 }
3548 }
3549
3550 if(cpu_model == JAKETOWN)
3551 enableJKTWorkaround(false);
3552
3553 #ifndef PCM_SILENT
3554 if (!silent) std::cerr << " Zeroed PMU registers\n";
3555 #endif
3556 }
3557
cleanupUncorePMUs(const bool silent)3558 void PCM::cleanupUncorePMUs(const bool silent)
3559 {
3560 for (auto & sPMUs : iioPMUs)
3561 {
3562 for (auto & pmu : sPMUs)
3563 {
3564 pmu.second.cleanup();
3565 }
3566 }
3567 for (auto & sCBOPMUs : cboPMUs)
3568 {
3569 for (auto & pmu : sCBOPMUs)
3570 {
3571 pmu.cleanup();
3572 }
3573 }
3574 for (auto & pmu : pcuPMUs)
3575 {
3576 pmu.cleanup();
3577 }
3578 for (auto & uncore : server_pcicfg_uncore)
3579 {
3580 uncore->cleanupPMUs();
3581 }
3582 #ifndef PCM_SILENT
3583 if (!silent) std::cerr << " Zeroed uncore PMU registers\n";
3584 #endif
3585 }
3586
resetPMU()3587 void PCM::resetPMU()
3588 {
3589 for (int i = 0; i < (int)MSR.size(); ++i)
3590 {
3591 // disable all counters
3592 MSR[i]->write(IA32_CR_PERF_GLOBAL_CTRL, 0);
3593
3594 for (uint32 j = 0; j < core_gen_counter_num_max; ++j)
3595 {
3596 MSR[i]->write(IA32_PERFEVTSEL0_ADDR + j, 0);
3597 }
3598
3599
3600 FixedEventControlRegister ctrl_reg;
3601 ctrl_reg.value = 0xffffffffffffffff;
3602
3603 MSR[i]->read(IA32_CR_FIXED_CTR_CTRL, &ctrl_reg.value);
3604 if ((ctrl_reg.fields.os0 ||
3605 ctrl_reg.fields.usr0 ||
3606 ctrl_reg.fields.enable_pmi0 ||
3607 ctrl_reg.fields.os1 ||
3608 ctrl_reg.fields.usr1 ||
3609 ctrl_reg.fields.enable_pmi1 ||
3610 ctrl_reg.fields.os2 ||
3611 ctrl_reg.fields.usr2 ||
3612 ctrl_reg.fields.enable_pmi2)
3613 != 0)
3614 MSR[i]->write(IA32_CR_FIXED_CTR_CTRL, 0);
3615 }
3616
3617 #ifndef PCM_SILENT
3618 std::cerr << " Zeroed PMU registers\n";
3619 #endif
3620 }
cleanupRDT(const bool silent)3621 void PCM::cleanupRDT(const bool silent)
3622 {
3623 if(!(QOSMetricAvailable() && L3QOSMetricAvailable())) {
3624 return;
3625 }
3626 #ifdef __linux__
3627 if (useResctrl)
3628 {
3629 resctrl.cleanup();
3630 return;
3631 }
3632 #endif
3633
3634 for(int32 core = 0; core < num_cores; core ++ )
3635 {
3636 if(!isCoreOnline(core)) continue;
3637 uint64 msr_pqr_assoc = 0 ;
3638 uint64 msr_qm_evtsel = 0;
3639 int32 rmid = 0;
3640 int32 event = 0;
3641
3642 //Read 0xC8F MSR for each core
3643 MSR[core]->read(IA32_PQR_ASSOC, &msr_pqr_assoc);
3644 msr_pqr_assoc &= 0xffffffff00000000ULL;
3645
3646 //Write 0xC8F MSR with RMID 0
3647 MSR[core]->write(IA32_PQR_ASSOC,msr_pqr_assoc);
3648
3649 msr_qm_evtsel = rmid & ((1ULL<<10)-1ULL) ;
3650 msr_qm_evtsel <<= 32 ;
3651 msr_qm_evtsel |= event & ((1ULL<<8)-1ULL);
3652
3653 //Write Event Id as 0 and RMID 0 to the MSR for each core
3654 MSR[core]->write(IA32_QM_EVTSEL,msr_qm_evtsel);
3655
3656 }
3657
3658
3659 if (!silent) std::cerr << " Freeing up all RMIDs\n";
3660 }
3661
setOutput(const std::string filename)3662 void PCM::setOutput(const std::string filename)
3663 {
3664 outfile = new std::ofstream(filename.c_str());
3665 backup_ofile = std::cout.rdbuf();
3666 std::cout.rdbuf(outfile->rdbuf());
3667 }
3668
restoreOutput()3669 void PCM::restoreOutput()
3670 {
3671 // restore cout back to what it was originally
3672 if(backup_ofile)
3673 std::cout.rdbuf(backup_ofile);
3674
3675 // close output file
3676 if(outfile)
3677 outfile->close();
3678 }
3679
cleanup(const bool silent)3680 void PCM::cleanup(const bool silent)
3681 {
3682 InstanceLock lock(allow_multiple_instances);
3683
3684 if (MSR.empty()) return;
3685
3686 if (!silent) std::cerr << "Cleaning up\n";
3687
3688 if (decrementInstanceSemaphore())
3689 cleanupPMU(silent);
3690
3691 disableForceRTMAbortMode(silent);
3692
3693 cleanupUncorePMUs(silent);
3694 cleanupRDT(silent);
3695 #ifdef __linux__
3696 if (needToRestoreNMIWatchdog)
3697 {
3698 enableNMIWatchdog(silent);
3699 needToRestoreNMIWatchdog = false;
3700 }
3701 #endif
3702 }
3703
3704 // hle is only available when cpuid has this:
3705 // HLE: CPUID.07H.EBX.HLE [bit 4] = 1
supportsHLE() const3706 bool PCM::supportsHLE() const
3707 {
3708 PCM_CPUID_INFO info;
3709 pcm_cpuid(7, 0, info); // leaf 7, subleaf 0
3710
3711 return (info.reg.ebx & (0x1 << 4)) ? true : false;
3712 }
3713
3714 // rtm is only available when cpuid has this:
3715 // RTM: CPUID.07H.EBX.RTM [bit 11] = 1
supportsRTM() const3716 bool PCM::supportsRTM() const
3717 {
3718 PCM_CPUID_INFO info;
3719 pcm_cpuid(7, 0, info); // leaf 7, subleaf 0
3720
3721 return (info.reg.ebx & (0x1 << 11)) ? true : false;
3722 }
3723
3724 #ifdef __APPLE__
3725
getNumInstances()3726 uint32 PCM::getNumInstances()
3727 {
3728 return MSR[0]->getNumInstances();
3729 }
3730
3731
incrementNumInstances()3732 uint32 PCM::incrementNumInstances()
3733 {
3734 return MSR[0]->incrementNumInstances();
3735 }
3736
decrementNumInstances()3737 uint32 PCM::decrementNumInstances()
3738 {
3739 return MSR[0]->decrementNumInstances();;
3740 }
3741
convertUnknownToInt(size_t size,char * value)3742 int convertUnknownToInt(size_t size, char* value)
3743 {
3744 if(sizeof(int) == size)
3745 {
3746 return *(int*)value;
3747 }
3748 else if(sizeof(long) == size)
3749 {
3750 return *(long *)value;
3751 }
3752 else if(sizeof(long long) == size)
3753 {
3754 return *(long long *)value;
3755 }
3756 else
3757 {
3758 // In this case, we don't know what it is so we guess int
3759 return *(int *)value;
3760 }
3761 }
3762
3763 #endif
3764
decrementInstanceSemaphore()3765 bool PCM::decrementInstanceSemaphore()
3766 {
3767 if(allow_multiple_instances == false)
3768 {
3769 return programmed_pmu;
3770 }
3771 bool isLastInstance = false;
3772 // when decrement was called before program() the numInstancesSemaphore
3773 // may not be initialized, causing SIGSEGV. This fixes it.
3774 if(numInstancesSemaphore == NULL)
3775 return true;
3776
3777 #ifdef _MSC_VER
3778 WaitForSingleObject(numInstancesSemaphore, 0);
3779
3780 DWORD res = WaitForSingleObject(numInstancesSemaphore, 0);
3781 if (res == WAIT_TIMEOUT)
3782 {
3783 // I have the last instance of monitor
3784
3785 isLastInstance = true;
3786
3787 CloseHandle(numInstancesSemaphore);
3788 }
3789 else if (res == WAIT_OBJECT_0)
3790 {
3791 ReleaseSemaphore(numInstancesSemaphore, 1, NULL);
3792
3793 // std::cerr << "Someone else is running monitor instance, no cleanup needed\n";
3794 }
3795 else
3796 {
3797 // unknown error
3798 std::cerr << "ERROR: Bad semaphore. Performed cleanup twice?\n";
3799 }
3800
3801 #elif __APPLE__
3802 sem_wait(numInstancesSemaphore);
3803 uint32 oldValue = PCM::getNumInstances();
3804 sem_post(numInstancesSemaphore);
3805 if(oldValue == 0)
3806 {
3807 // see same case for linux
3808 return false;
3809 }
3810 sem_wait(numInstancesSemaphore);
3811 uint32 currValue = PCM::decrementNumInstances();
3812 sem_post(numInstancesSemaphore);
3813 if(currValue == 0){
3814 isLastInstance = true;
3815 }
3816
3817 #else // if linux
3818 int oldValue = -1;
3819 sem_getvalue(numInstancesSemaphore, &oldValue);
3820 if(oldValue == 0)
3821 {
3822 // the current value is already zero, somewhere the semaphore has been already decremented (and thus the clean up has been done if needed)
3823 // that means logically we are do not own the last instance anymore, thus returning false
3824 return false;
3825 }
3826 sem_wait(numInstancesSemaphore);
3827 int curValue = -1;
3828 sem_getvalue(numInstancesSemaphore, &curValue);
3829 if (curValue == 0)
3830 {
3831 // I have the last instance of monitor
3832
3833 isLastInstance = true;
3834
3835 // std::cerr << "I am the last one\n";
3836 }
3837 #endif // end ifdef _MSC_VER
3838
3839 return isLastInstance;
3840 }
3841
getTickCount(uint64 multiplier,uint32 core)3842 uint64 PCM::getTickCount(uint64 multiplier, uint32 core)
3843 {
3844 return (multiplier * getInvariantTSC(CoreCounterState(), getCoreCounterState(core))) / getNominalFrequency();
3845 }
3846
getTickCountRDTSCP(uint64 multiplier)3847 uint64 PCM::getTickCountRDTSCP(uint64 multiplier)
3848 {
3849 return (multiplier*RDTSCP())/getNominalFrequency();
3850 }
3851
getSystemCounterState()3852 SystemCounterState getSystemCounterState()
3853 {
3854 PCM * inst = PCM::getInstance();
3855 SystemCounterState result;
3856 if (inst) result = inst->getSystemCounterState();
3857 return result;
3858 }
3859
getSocketCounterState(uint32 socket)3860 SocketCounterState getSocketCounterState(uint32 socket)
3861 {
3862 PCM * inst = PCM::getInstance();
3863 SocketCounterState result;
3864 if (inst) result = inst->getSocketCounterState(socket);
3865 return result;
3866 }
3867
getCoreCounterState(uint32 core)3868 CoreCounterState getCoreCounterState(uint32 core)
3869 {
3870 PCM * inst = PCM::getInstance();
3871 CoreCounterState result;
3872 if (inst) result = inst->getCoreCounterState(core);
3873 return result;
3874 }
3875
3876 #ifdef PCM_USE_PERF
readPerfData(uint32 core,std::vector<uint64> & outData)3877 void PCM::readPerfData(uint32 core, std::vector<uint64> & outData)
3878 {
3879 auto readPerfDataHelper = [this](const uint32 core, std::vector<uint64>& outData, const uint32 leader, const uint32 num_counters)
3880 {
3881 if (perfEventHandle[core][leader] < 0)
3882 {
3883 std::fill(outData.begin(), outData.end(), 0);
3884 return;
3885 }
3886 uint64 data[1 + PERF_MAX_COUNTERS];
3887 const int32 bytes2read = sizeof(uint64) * (1 + num_counters);
3888 int result = ::read(perfEventHandle[core][leader], data, bytes2read);
3889 // data layout: nr counters; counter 0, counter 1, counter 2,...
3890 if (result != bytes2read)
3891 {
3892 std::cerr << "Error while reading perf data. Result is " << result << "\n";
3893 std::cerr << "Check if you run other competing Linux perf clients.\n";
3894 }
3895 else if (data[0] != num_counters)
3896 {
3897 std::cerr << "Number of counters read from perf is wrong. Elements read: " << data[0] << "\n";
3898 }
3899 else
3900 { // copy all counters, they start from position 1 in data
3901 std::copy((data + 1), (data + 1) + data[0], outData.begin());
3902 }
3903 };
3904 readPerfDataHelper(core, outData, PERF_GROUP_LEADER_COUNTER, core_fixed_counter_num_used + core_gen_counter_num_used);
3905 if (isHWTMAL1Supported() && perfSupportsTopDown())
3906 {
3907 std::vector<uint64> outTopDownData(outData.size(), 0);
3908 readPerfDataHelper(core, outTopDownData, PERF_TOPDOWN_GROUP_LEADER_COUNTER, PERF_TOPDOWN_COUNTERS);
3909 std::copy(outTopDownData.begin(), outTopDownData.begin() + PERF_TOPDOWN_COUNTERS, outData.begin() + core_fixed_counter_num_used + core_gen_counter_num_used);
3910 }
3911 }
3912 #endif
3913
readAndAggregateTSC(std::shared_ptr<SafeMsrHandle> msr)3914 void BasicCounterState::readAndAggregateTSC(std::shared_ptr<SafeMsrHandle> msr)
3915 {
3916 uint64 cInvariantTSC = 0;
3917 PCM * m = PCM::getInstance();
3918 const auto cpu_model = m->getCPUModel();
3919 if(m->isAtom() == false || cpu_model == PCM::AVOTON) msr->read(IA32_TIME_STAMP_COUNTER, &cInvariantTSC);
3920 else
3921 {
3922 #ifdef _MSC_VER
3923 cInvariantTSC = ((static_cast<uint64>(GetTickCount()/1000ULL)))*m->getNominalFrequency();
3924 #else
3925 struct timeval tp;
3926 gettimeofday(&tp, NULL);
3927 cInvariantTSC = (double(tp.tv_sec) + tp.tv_usec / 1000000.)*m->getNominalFrequency();
3928 #endif
3929 }
3930 InvariantTSC += cInvariantTSC;
3931 }
3932
readAndAggregate(std::shared_ptr<SafeMsrHandle> msr)3933 void BasicCounterState::readAndAggregate(std::shared_ptr<SafeMsrHandle> msr)
3934 {
3935 uint64 cInstRetiredAny = 0, cCpuClkUnhaltedThread = 0, cCpuClkUnhaltedRef = 0;
3936 uint64 cL3Occupancy = 0;
3937 uint64 cCustomEvents[PERF_MAX_CUSTOM_COUNTERS] = {0ULL, 0ULL, 0ULL, 0ULL, 0ULL, 0ULL, 0ULL, 0ULL };
3938 uint64 cCStateResidency[PCM::MAX_C_STATE + 1];
3939 memset(cCStateResidency, 0, sizeof(cCStateResidency));
3940 uint64 thermStatus = 0;
3941 uint64 cSMICount = 0;
3942 uint64 cFrontendBoundSlots = 0;
3943 uint64 cBadSpeculationSlots = 0;
3944 uint64 cBackendBoundSlots = 0;
3945 uint64 cRetiringSlots = 0;
3946 uint64 cAllSlotsRaw = 0;
3947 const int32 core_id = msr->getCoreId();
3948 TemporalThreadAffinity tempThreadAffinity(core_id); // speedup trick for Linux
3949
3950 PCM * m = PCM::getInstance();
3951 const int32 core_gen_counter_num_max = m->getMaxCustomCoreEvents();
3952 uint64 overflows = 0;
3953
3954 const auto corruptedCountersMask = m->checkCustomCoreProgramming(msr);
3955 // reading core PMU counters
3956 #ifdef PCM_USE_PERF
3957 if(m->canUsePerf)
3958 {
3959 std::vector<uint64> perfData(PERF_MAX_COUNTERS, 0ULL);
3960 m->readPerfData(msr->getCoreId(), perfData);
3961 cInstRetiredAny = perfData[PCM::PERF_INST_RETIRED_POS];
3962 cCpuClkUnhaltedThread = perfData[PCM::PERF_CPU_CLK_UNHALTED_THREAD_POS];
3963 cCpuClkUnhaltedRef = perfData[PCM::PERF_CPU_CLK_UNHALTED_REF_POS];
3964 for (int i = 0; i < core_gen_counter_num_max; ++i)
3965 {
3966 cCustomEvents[i] = perfData[PCM::PERF_GEN_EVENT_0_POS + i];
3967 }
3968 if (m->isHWTMAL1Supported() && perfSupportsTopDown())
3969 {
3970 cFrontendBoundSlots = perfData[m->perfTopDownPos[PCM::PERF_TOPDOWN_FRONTEND_POS]];
3971 cBadSpeculationSlots = perfData[m->perfTopDownPos[PCM::PERF_TOPDOWN_BADSPEC_POS]];
3972 cBackendBoundSlots = perfData[m->perfTopDownPos[PCM::PERF_TOPDOWN_BACKEND_POS]];
3973 cRetiringSlots = perfData[m->perfTopDownPos[PCM::PERF_TOPDOWN_RETIRING_POS]];
3974 cAllSlotsRaw = perfData[m->perfTopDownPos[PCM::PERF_TOPDOWN_SLOTS_POS]];
3975 // if (core_id == 0) std::cout << "DEBUG: "<< cAllSlotsRaw << " " << cFrontendBoundSlots << " " << cBadSpeculationSlots << " " << cBackendBoundSlots << " " << cRetiringSlots << std::endl;
3976 }
3977 }
3978 else
3979 #endif
3980 {
3981 uint64 overflows_after = 0;
3982
3983 do
3984 {
3985 msr->read(IA32_PERF_GLOBAL_STATUS, &overflows); // read overflows
3986 // std::cerr << "Debug " << core_id << " IA32_PERF_GLOBAL_STATUS: " << overflows << std::endl;
3987
3988 msr->read(INST_RETIRED_ADDR, &cInstRetiredAny);
3989 msr->read(CPU_CLK_UNHALTED_THREAD_ADDR, &cCpuClkUnhaltedThread);
3990 msr->read(CPU_CLK_UNHALTED_REF_ADDR, &cCpuClkUnhaltedRef);
3991 for (int i = 0; i < core_gen_counter_num_max; ++i)
3992 {
3993 msr->read(IA32_PMC0 + i, &cCustomEvents[i]);
3994 }
3995
3996 msr->read(IA32_PERF_GLOBAL_STATUS, &overflows_after); // read overflows again
3997 // std::cerr << "Debug " << core_id << " IA32_PERF_GLOBAL_STATUS: " << overflows << std::endl;
3998
3999 } while (overflows != overflows_after); // repeat the reading if an overflow happened during the reading
4000
4001 msr->write(IA32_PERF_GLOBAL_OVF_CTRL, overflows); // clear overflows
4002 if (m->isHWTMAL1Supported())
4003 {
4004 uint64 perfMetrics = 0, slots = 0;
4005 msr->lock();
4006 msr->read(PERF_METRICS_ADDR, &perfMetrics);
4007 msr->read(TOPDOWN_SLOTS_ADDR, &slots);
4008 msr->write(PERF_METRICS_ADDR, 0);
4009 msr->write(TOPDOWN_SLOTS_ADDR, 0);
4010 cFrontendBoundSlots = extract_bits(perfMetrics, 16, 23);
4011 cBadSpeculationSlots = extract_bits(perfMetrics, 8, 15);
4012 cBackendBoundSlots = extract_bits(perfMetrics, 24, 31);
4013 cRetiringSlots = extract_bits(perfMetrics, 0, 7);
4014 const double total = double(cFrontendBoundSlots + cBadSpeculationSlots + cBackendBoundSlots + cRetiringSlots);
4015 cFrontendBoundSlots = m->FrontendBoundSlots[core_id] += uint64((double(cFrontendBoundSlots) / total) * double(slots));
4016 cBadSpeculationSlots = m->BadSpeculationSlots[core_id] += uint64((double(cBadSpeculationSlots) / total) * double(slots));
4017 cBackendBoundSlots = m->BackendBoundSlots[core_id] += uint64((double(cBackendBoundSlots) / total) * double(slots));
4018 cRetiringSlots = m->RetiringSlots[core_id] += uint64((double(cRetiringSlots) / total) * double(slots));
4019 cAllSlotsRaw = m->AllSlotsRaw[core_id] += slots;
4020 // std::cout << "DEBUG: "<< slots << " " << cFrontendBoundSlots << " " << cBadSpeculationSlots << " " << cBackendBoundSlots << " " << cRetiringSlots << std::endl;
4021 msr->unlock();
4022 }
4023 }
4024
4025 for (int i = 0; i < core_gen_counter_num_max; ++i)
4026 {
4027 if (corruptedCountersMask & (1<<i)) cCustomEvents[i] = ~0ULL;
4028 }
4029
4030 // std::cout << "DEBUG1: " << msr->getCoreId() << " " << cInstRetiredAny << " \n";
4031 if (m->L3CacheOccupancyMetricAvailable() && m->useResctrl == false)
4032 {
4033 msr->lock();
4034 uint64 event = 1;
4035 m->initQOSevent(event, core_id);
4036 msr->read(IA32_QM_CTR, &cL3Occupancy);
4037 //std::cout << "readAndAggregate reading IA32_QM_CTR " << std::dec << cL3Occupancy << std::dec << "\n";
4038 msr->unlock();
4039 }
4040
4041 m->readAndAggregateMemoryBWCounters(static_cast<uint32>(core_id), *this);
4042
4043 readAndAggregateTSC(msr);
4044
4045 // reading core C state counters
4046 for(int i=0; i <= (int)(PCM::MAX_C_STATE) ;++i)
4047 if(m->coreCStateMsr && m->coreCStateMsr[i])
4048 msr->read(m->coreCStateMsr[i], &(cCStateResidency[i]));
4049
4050 // reading temperature
4051 msr->read(MSR_IA32_THERM_STATUS, &thermStatus);
4052
4053 msr->read(MSR_SMI_COUNT, &cSMICount);
4054
4055 InstRetiredAny += checked_uint64(m->extractCoreFixedCounterValue(cInstRetiredAny), extract_bits(overflows, 32, 32));
4056 CpuClkUnhaltedThread += checked_uint64(m->extractCoreFixedCounterValue(cCpuClkUnhaltedThread), extract_bits(overflows, 33, 33));
4057 CpuClkUnhaltedRef += checked_uint64(m->extractCoreFixedCounterValue(cCpuClkUnhaltedRef), extract_bits(overflows, 34, 34));
4058 for (int i = 0; i < core_gen_counter_num_max; ++i)
4059 {
4060 Event[i] += checked_uint64(m->extractCoreGenCounterValue(cCustomEvents[i]), extract_bits(overflows, i, i));
4061 }
4062 #ifdef __linux__
4063 if (m->useResctrl)
4064 {
4065 L3Occupancy = m->resctrl.getL3OCC(core_id) / 1024;
4066 }
4067 else
4068 #endif
4069 {
4070 //std::cout << "Scaling Factor " << m->L3ScalingFactor;
4071 cL3Occupancy = m->extractQOSMonitoring(cL3Occupancy);
4072 L3Occupancy = (cL3Occupancy==PCM_INVALID_QOS_MONITORING_DATA)? PCM_INVALID_QOS_MONITORING_DATA : (uint64)((double)(cL3Occupancy * m->L3ScalingFactor) / 1024.0);
4073 }
4074 for(int i=0; i <= int(PCM::MAX_C_STATE);++i)
4075 CStateResidency[i] += cCStateResidency[i];
4076 ThermalHeadroom = extractThermalHeadroom(thermStatus);
4077 SMICount += cSMICount;
4078 FrontendBoundSlots += cFrontendBoundSlots;
4079 BadSpeculationSlots += cBadSpeculationSlots;
4080 BackendBoundSlots += cBackendBoundSlots;
4081 RetiringSlots += cRetiringSlots;
4082 AllSlotsRaw += cAllSlotsRaw;
4083 }
4084
programServerUncoreLatencyMetrics(bool enable_pmm)4085 PCM::ErrorCode PCM::programServerUncoreLatencyMetrics(bool enable_pmm)
4086 {
4087 uint32 DDRConfig[4] = {0,0,0,0};
4088
4089 if (enable_pmm == false)
4090 { //DDR is false
4091 if (ICX == cpu_model)
4092 {
4093 DDRConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(0x80) + MC_CH_PCI_PMON_CTL_UMASK(1); // DRAM RPQ occupancy
4094 DDRConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(0x10) + MC_CH_PCI_PMON_CTL_UMASK(1); // DRAM RPQ Insert
4095 DDRConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(0x81) + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM WPQ Occupancy
4096 DDRConfig[3] = MC_CH_PCI_PMON_CTL_EVENT(0x20) + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM WPQ Insert
4097
4098 } else {
4099
4100 DDRConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(0x80) + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM RPQ occupancy
4101 DDRConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(0x10) + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM RPQ Insert
4102 DDRConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(0x81) + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM WPQ Occupancy
4103 DDRConfig[3] = MC_CH_PCI_PMON_CTL_EVENT(0x20) + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM WPQ Insert
4104 }
4105 } else {
4106 DDRConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(0xe0) + MC_CH_PCI_PMON_CTL_UMASK(1); // PMM RDQ occupancy
4107 DDRConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(0xe3) + MC_CH_PCI_PMON_CTL_UMASK(0); // PMM RDQ Insert
4108 DDRConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(0xe4) + MC_CH_PCI_PMON_CTL_UMASK(1); // PMM WPQ Occupancy
4109 DDRConfig[3] = MC_CH_PCI_PMON_CTL_EVENT(0xe7) + MC_CH_PCI_PMON_CTL_UMASK(0); // PMM WPQ Insert
4110 }
4111
4112 if (DDRLatencyMetricsAvailable())
4113 {
4114 for (size_t i = 0; i < (size_t)server_pcicfg_uncore.size(); ++i)
4115 {
4116 server_pcicfg_uncore[i]->programIMC(DDRConfig);
4117 }
4118 }
4119 return PCM::Success;
4120 }
4121
programServerUncoreMemoryMetrics(const ServerUncoreMemoryMetrics & metrics,int rankA,int rankB)4122 PCM::ErrorCode PCM::programServerUncoreMemoryMetrics(const ServerUncoreMemoryMetrics & metrics, int rankA, int rankB)
4123 {
4124 if(MSR.empty() || server_pcicfg_uncore.empty()) return PCM::MSRAccessDenied;
4125
4126 for (int i = 0; (i < (int)server_pcicfg_uncore.size()) && MSR.size(); ++i)
4127 {
4128 server_pcicfg_uncore[i]->programServerUncoreMemoryMetrics(metrics, rankA, rankB);
4129 }
4130
4131 return PCM::Success;
4132 }
4133
programServerUncorePowerMetrics(int mc_profile,int pcu_profile,int * freq_bands)4134 PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_profile, int * freq_bands)
4135 {
4136 if(MSR.empty() || server_pcicfg_uncore.empty()) return PCM::MSRAccessDenied;
4137
4138 uint32 PCUCntConf[4] = {0,0,0,0};
4139
4140 PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0); // clock ticks
4141
4142 switch(pcu_profile)
4143 {
4144 case 0:
4145 PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0xB); // FREQ_BAND0_CYCLES
4146 PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0xC); // FREQ_BAND1_CYCLES
4147 PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0xD); // FREQ_BAND2_CYCLES
4148 break;
4149 case 1:
4150 PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x80) + PCU_MSR_PMON_CTL_OCC_SEL(1); // POWER_STATE_OCCUPANCY.C0 using CLOCKTICKS + 8th-bit
4151 PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x80) + PCU_MSR_PMON_CTL_OCC_SEL(2); // POWER_STATE_OCCUPANCY.C3 using CLOCKTICKS + 8th-bit
4152 PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x80) + PCU_MSR_PMON_CTL_OCC_SEL(3); // POWER_STATE_OCCUPANCY.C6 using CLOCKTICKS + 8th-bit
4153 break;
4154 case 2:
4155 PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x09); // PROCHOT_INTERNAL_CYCLES
4156 PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x0A); // PROCHOT_EXTERNAL_CYCLES
4157 PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x04); // Thermal frequency limit cycles: FREQ_MAX_LIMIT_THERMAL_CYCLES
4158 break;
4159 case 3:
4160 PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x04); // Thermal frequency limit cycles: FREQ_MAX_LIMIT_THERMAL_CYCLES
4161 PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x05); // Power frequency limit cycles: FREQ_MAX_POWER_CYCLES
4162 PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX and ICX and SNOWRIDGE)
4163 break;
4164 case 4: // not supported on SKX and ICX and SNOWRIDGE
4165 PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x06); // OS frequency limit cycles: FREQ_MAX_OS_CYCLES
4166 PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x05); // Power frequency limit cycles: FREQ_MAX_POWER_CYCLES
4167 PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX and ICX and SNOWRIDGE)
4168 break;
4169 case 5:
4170 if(JAKETOWN == cpu_model)
4171 {
4172 PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0) + PCU_MSR_PMON_CTL_EXTRA_SEL + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions
4173 PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0) + PCU_MSR_PMON_CTL_EXTRA_SEL ; // cycles spent changing frequency
4174 } else if (IVYTOWN == cpu_model )
4175 {
4176 PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x60) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions
4177 PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x60) ; // cycles spent changing frequency: FREQ_TRANS_CYCLES
4178 } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model || ICX == cpu_model || SNOWRIDGE == cpu_model)
4179 {
4180 PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x74) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions
4181 PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x74) ; // cycles spent changing frequency: FREQ_TRANS_CYCLES
4182 if(HASWELLX == cpu_model)
4183 {
4184 PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x79) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of UFS transitions
4185 PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x79) ; // UFS transition cycles
4186 }
4187 } else
4188 {
4189 std::cerr << "ERROR: no frequency transition events defined for CPU model " << cpu_model << "\n";
4190 }
4191 break;
4192 case 6:
4193 if (IVYTOWN == cpu_model )
4194 {
4195 PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2 transitions
4196 PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions
4197 } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model || ICX == cpu_model || SNOWRIDGE == cpu_model)
4198 {
4199 PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x4E) ; // PC1e residenicies (not supported on SKX and ICX and SNOWRIDGE)
4200 PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x4E) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC1 transitions (not supported on SKX and ICX and SNOWRIDGE)
4201 PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2 transitions
4202 PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions
4203 } else
4204 {
4205 std::cerr << "ERROR: no package C-state transition events defined for CPU model " << cpu_model << "\n";
4206 }
4207 break;
4208 case 7:
4209 if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model)
4210 {
4211 PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x7E) ; // UFS_TRANSITIONS_PERF_P_LIMIT
4212 PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x7D) ; // UFS_TRANSITIONS_IO_P_LIMIT
4213 PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x7A) ; // UFS_TRANSITIONS_UP_RING_TRAFFIC
4214 PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x7B) ; // UFS_TRANSITIONS_UP_STALL_CYCLES
4215 } else
4216 {
4217 std::cerr << "ERROR: no UFS transition events defined for CPU model " << cpu_model << "\n";
4218 }
4219 break;
4220 case 8:
4221 if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model)
4222 {
4223 PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x7C) ; // UFS_TRANSITIONS_DOWN
4224 } else
4225 {
4226 std::cerr << "ERROR: no UFS transition events defined for CPU model " << cpu_model << "\n";
4227 }
4228 break;
4229 default:
4230 std::cerr << "ERROR: unsupported PCU profile " << pcu_profile << "\n";
4231 }
4232
4233 for (auto u : server_pcicfg_uncore)
4234 {
4235 u->program_power_metrics(mc_profile);
4236 }
4237 uint64 filter = 0;
4238 if (freq_bands == NULL)
4239 {
4240 filter =
4241 PCU_MSR_PMON_BOX_FILTER_BAND_0(10) + // 1000 MHz
4242 PCU_MSR_PMON_BOX_FILTER_BAND_1(20) + // 2000 MHz
4243 PCU_MSR_PMON_BOX_FILTER_BAND_2(30); // 3000 MHz
4244 }
4245 else
4246 {
4247 filter =
4248 PCU_MSR_PMON_BOX_FILTER_BAND_0(freq_bands[0]) +
4249 PCU_MSR_PMON_BOX_FILTER_BAND_1(freq_bands[1]) +
4250 PCU_MSR_PMON_BOX_FILTER_BAND_2(freq_bands[2]);
4251 }
4252 programPCU(PCUCntConf, filter);
4253
4254 return PCM::Success;
4255 }
4256
programPCU(uint32 * PCUCntConf,const uint64 filter)4257 void PCM::programPCU(uint32* PCUCntConf, const uint64 filter)
4258 {
4259 for (int i = 0; (i < (int)server_pcicfg_uncore.size()) && MSR.size(); ++i)
4260 {
4261 if (i >= (int)pcuPMUs.size())
4262 {
4263 continue;
4264 }
4265
4266 uint32 refCore = socketRefCore[i];
4267 TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
4268
4269 pcuPMUs[i].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN);
4270
4271 if (pcuPMUs[i].filter[0].get())
4272 {
4273 *pcuPMUs[i].filter[0] = filter;
4274 }
4275
4276 program(pcuPMUs[i], &PCUCntConf[0], &PCUCntConf[4], UNC_PMON_UNIT_CTL_FRZ_EN);
4277 }
4278 }
4279
program(const RawPMUConfigs & curPMUConfigs_,const bool silent)4280 PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool silent)
4281 {
4282 if (MSR.empty()) return PCM::MSRAccessDenied;
4283 RawPMUConfigs curPMUConfigs = curPMUConfigs_;
4284 constexpr auto globalRegPos = 0;
4285 if (curPMUConfigs.count("core"))
4286 {
4287 // need to program core PMU first
4288 EventSelectRegister regs[PERF_MAX_CUSTOM_COUNTERS];
4289 PCM::ExtendedCustomCoreEventDescription conf;
4290 conf.OffcoreResponseMsrValue[0] = 0;
4291 conf.OffcoreResponseMsrValue[1] = 0;
4292 FixedEventControlRegister fixedReg;
4293
4294 auto corePMUConfig = curPMUConfigs["core"];
4295 if (corePMUConfig.programmable.size() > (size_t)getMaxCustomCoreEvents())
4296 {
4297 std::cerr << "ERROR: trying to program " << corePMUConfig.programmable.size() << " core PMU counters, which exceeds the max num possible ("<< getMaxCustomCoreEvents() << ").";
4298 return PCM::UnknownError;
4299 }
4300 size_t c = 0;
4301 for (; c < corePMUConfig.programmable.size() && c < (size_t)getMaxCustomCoreEvents() && c < PERF_MAX_CUSTOM_COUNTERS; ++c)
4302 {
4303 regs[c].value = corePMUConfig.programmable[c].first[0];
4304 }
4305 if (globalRegPos < corePMUConfig.programmable.size())
4306 {
4307 conf.OffcoreResponseMsrValue[0] = corePMUConfig.programmable[globalRegPos].first[1];
4308 conf.OffcoreResponseMsrValue[1] = corePMUConfig.programmable[globalRegPos].first[2];
4309 }
4310 conf.nGPCounters = (uint32)c;
4311 conf.gpCounterCfg = regs;
4312 if (corePMUConfig.fixed.empty())
4313 {
4314 conf.fixedCfg = NULL; // default
4315 }
4316 else
4317 {
4318 fixedReg.value = 0;
4319 for (auto cfg : corePMUConfig.fixed)
4320 {
4321 fixedReg.value |= cfg.first[0];
4322 }
4323 conf.fixedCfg = &fixedReg;
4324 }
4325
4326 const auto status = program(PCM::EXT_CUSTOM_CORE_EVENTS, &conf, silent);
4327 if (status != PCM::Success)
4328 {
4329 return status;
4330 }
4331 curPMUConfigs.erase("core");
4332 }
4333 for (auto pmuConfig : curPMUConfigs)
4334 {
4335 const auto & type = pmuConfig.first;
4336 const auto & events = pmuConfig.second;
4337 if (events.programmable.empty() && events.fixed.empty())
4338 {
4339 continue;
4340 }
4341 if (events.programmable.size() > ServerUncoreCounterState::maxCounters)
4342 {
4343 std::cerr << "ERROR: trying to program " << events.programmable.size() << " core PMU counters, which exceeds the max num possible (" << ServerUncoreCounterState::maxCounters << ").";
4344 return PCM::UnknownError;
4345 }
4346 uint32 events32[ServerUncoreCounterState::maxCounters] = { 0,0,0,0 };
4347 uint64 events64[ServerUncoreCounterState::maxCounters] = { 0,0,0,0 };
4348 for (size_t c = 0; c < events.programmable.size() && c < ServerUncoreCounterState::maxCounters; ++c)
4349 {
4350 events32[c] = (uint32)events.programmable[c].first[0];
4351 events64[c] = events.programmable[c].first[0];
4352 }
4353 if (type == "m3upi")
4354 {
4355 for (auto uncore : server_pcicfg_uncore)
4356 {
4357 uncore->programM3UPI(events32);
4358 }
4359 }
4360 else if (type == "xpi" || type == "upi" || type == "qpi")
4361 {
4362 for (auto uncore : server_pcicfg_uncore)
4363 {
4364 uncore->programXPI(events32);
4365 }
4366 }
4367 else if (type == "imc")
4368 {
4369 for (auto uncore : server_pcicfg_uncore)
4370 {
4371 uncore->programIMC(events32);
4372 }
4373 }
4374 else if (type == "m2m")
4375 {
4376 for (auto uncore : server_pcicfg_uncore)
4377 {
4378 uncore->programM2M(events64);
4379 }
4380 }
4381 else if (type == "pcu")
4382 {
4383 uint64 filter = 0;
4384 if (globalRegPos < events.programmable.size())
4385 {
4386 filter = events.programmable[globalRegPos].first[1];
4387 }
4388 programPCU(events32, filter);
4389 }
4390 else if (type == "ubox")
4391 {
4392 programUBOX(events64);
4393 }
4394 else if (type == "cbo" || type == "cha")
4395 {
4396 uint64 filter0 = 0, filter1 = 0;
4397 if (globalRegPos < events.programmable.size())
4398 {
4399 filter0 = events.programmable[globalRegPos].first[1];
4400 filter1 = events.programmable[globalRegPos].first[2];
4401 }
4402 programCboRaw(events64, filter0, filter1);
4403 }
4404 else if (type == "iio")
4405 {
4406 programIIOCounters(events64);
4407 }
4408 else
4409 {
4410 std::cerr << "ERROR: unrecognized PMU type \"" << type << "\"\n";
4411 return PCM::UnknownError;
4412 }
4413 }
4414 return PCM::Success;
4415 }
4416
freezeServerUncoreCounters()4417 void PCM::freezeServerUncoreCounters()
4418 {
4419 for (int i = 0; (i < (int)server_pcicfg_uncore.size()) && MSR.size(); ++i)
4420 {
4421 server_pcicfg_uncore[i]->freezeCounters();
4422 pcuPMUs[i].freeze(UNC_PMON_UNIT_CTL_FRZ_EN);
4423
4424 if (IIOEventsAvailable())
4425 {
4426 for (auto & pmu : iioPMUs[i])
4427 {
4428 pmu.second.freeze(UNC_PMON_UNIT_CTL_RSV);
4429 }
4430 }
4431
4432 const auto refCore = socketRefCore[i];
4433 TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
4434 for (auto & pmu : cboPMUs[i])
4435 {
4436 pmu.freeze(UNC_PMON_UNIT_CTL_FRZ_EN);
4437 }
4438 }
4439 }
unfreezeServerUncoreCounters()4440 void PCM::unfreezeServerUncoreCounters()
4441 {
4442 for (int i = 0; (i < (int)server_pcicfg_uncore.size()) && MSR.size(); ++i)
4443 {
4444 server_pcicfg_uncore[i]->unfreezeCounters();
4445 pcuPMUs[i].unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN);
4446
4447 if (IIOEventsAvailable())
4448 {
4449 for (auto & pmu : iioPMUs[i])
4450 {
4451 pmu.second.unfreeze(UNC_PMON_UNIT_CTL_RSV);
4452 }
4453 }
4454
4455 const auto refCore = socketRefCore[i];
4456 TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
4457 for (auto & pmu : cboPMUs[i])
4458 {
4459 pmu.unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN);
4460 }
4461 }
4462 }
readAndAggregate(std::shared_ptr<SafeMsrHandle> msr)4463 void UncoreCounterState::readAndAggregate(std::shared_ptr<SafeMsrHandle> msr)
4464 {
4465 const auto coreID = msr->getCoreId();
4466 TemporalThreadAffinity tempThreadAffinity(coreID); // speedup trick for Linux
4467
4468 auto pcm = PCM::getInstance();
4469 pcm->readAndAggregatePackageCStateResidencies(msr, *this);
4470 }
4471
getSystemCounterState()4472 SystemCounterState PCM::getSystemCounterState()
4473 {
4474 SystemCounterState result;
4475 if (MSR.size())
4476 {
4477 // read core and uncore counter state
4478 for (int32 core = 0; core < num_cores; ++core)
4479 if ( isCoreOnline( core ) )
4480 result.readAndAggregate(MSR[core]);
4481
4482 for (uint32 s = 0; s < (uint32)num_sockets; s++)
4483 {
4484 if ( isSocketOnline( s ) ) {
4485 readAndAggregateUncoreMCCounters(s, result);
4486 readAndAggregateEnergyCounters(s, result);
4487 }
4488 }
4489
4490 readQPICounters(result);
4491
4492 result.ThermalHeadroom = static_cast<int32>(PCM_INVALID_THERMAL_HEADROOM); // not available for system
4493 }
4494 return result;
4495 }
4496
4497 template <class CounterStateType>
readAndAggregateMemoryBWCounters(const uint32 core,CounterStateType & result)4498 void PCM::readAndAggregateMemoryBWCounters(const uint32 core, CounterStateType & result)
4499 {
4500 #ifdef __linux__
4501 if (useResctrl)
4502 {
4503 if (CoreLocalMemoryBWMetricAvailable())
4504 {
4505 result.MemoryBWLocal += resctrl.getMBL(core) / (1024*1024);
4506 }
4507 if (CoreRemoteMemoryBWMetricAvailable())
4508 {
4509 result.MemoryBWTotal += resctrl.getMBT(core) / (1024*1024);
4510 }
4511 return;
4512 }
4513 #endif
4514 uint64 cMemoryBWLocal = 0;
4515 uint64 cMemoryBWTotal = 0;
4516
4517 if(core < memory_bw_local.size())
4518 {
4519 cMemoryBWLocal = memory_bw_local[core]->read();
4520 cMemoryBWLocal = extractQOSMonitoring(cMemoryBWLocal);
4521 //std::cout << "Read MemoryBWLocal " << cMemoryBWLocal << "\n";
4522 if(cMemoryBWLocal==PCM_INVALID_QOS_MONITORING_DATA)
4523 result.MemoryBWLocal = PCM_INVALID_QOS_MONITORING_DATA; // do not accumulate invalid reading
4524 else
4525 result.MemoryBWLocal += (uint64)((double)(cMemoryBWLocal * L3ScalingFactor) / (1024.0 * 1024.0));
4526 }
4527 if(core < memory_bw_total.size())
4528 {
4529 cMemoryBWTotal = memory_bw_total[core]->read();
4530 cMemoryBWTotal = extractQOSMonitoring(cMemoryBWTotal);
4531 //std::cout << "Read MemoryBWTotal " << cMemoryBWTotal << "\n";
4532 if(cMemoryBWTotal==PCM_INVALID_QOS_MONITORING_DATA)
4533 result.MemoryBWTotal = PCM_INVALID_QOS_MONITORING_DATA; // do not accumulate invalid reading
4534 else
4535 result.MemoryBWTotal += (uint64)((double)(cMemoryBWTotal * L3ScalingFactor) / (1024.0 * 1024.0));
4536 }
4537 //std::cout << std::flush;
4538 }
4539
4540 template <class CounterStateType>
readAndAggregateUncoreMCCounters(const uint32 socket,CounterStateType & result)4541 void PCM::readAndAggregateUncoreMCCounters(const uint32 socket, CounterStateType & result)
4542 {
4543 if (LLCReadMissLatencyMetricsAvailable())
4544 {
4545 result.TOROccupancyIAMiss += getCBOCounterState(socket, EventPosition::TOR_OCCUPANCY);
4546 result.TORInsertsIAMiss += getCBOCounterState(socket, EventPosition::TOR_INSERTS);
4547 result.UncClocks += getUncoreClocks(socket);
4548 }
4549
4550 const bool ReadMCStatsFromServerBW = (socket < serverBW.size());
4551 if (ReadMCStatsFromServerBW)
4552 {
4553 result.UncMCNormalReads += serverBW[socket]->getImcReads();
4554 result.UncMCFullWrites += serverBW[socket]->getImcWrites();
4555 if (PMMTrafficMetricsAvailable())
4556 {
4557 result.UncPMMReads += serverBW[socket]->getPMMReads();
4558 result.UncPMMWrites += serverBW[socket]->getPMMWrites();
4559 }
4560 }
4561
4562 if (hasPCICFGUncore())
4563 {
4564 if (server_pcicfg_uncore.size() && server_pcicfg_uncore[socket].get())
4565 {
4566 server_pcicfg_uncore[socket]->freezeCounters();
4567 if (ReadMCStatsFromServerBW == false)
4568 {
4569 result.UncMCNormalReads += server_pcicfg_uncore[socket]->getImcReads();
4570 result.UncMCFullWrites += server_pcicfg_uncore[socket]->getImcWrites();
4571 }
4572 if (localMemoryRequestRatioMetricAvailable())
4573 {
4574 if (hasCHA())
4575 {
4576 result.UncHARequests += getCBOCounterState(socket, EventPosition::REQUESTS_ALL);
4577 result.UncHALocalRequests += getCBOCounterState(socket, EventPosition::REQUESTS_LOCAL);
4578 }
4579 else
4580 {
4581 result.UncHARequests += server_pcicfg_uncore[socket]->getHARequests();
4582 result.UncHALocalRequests += server_pcicfg_uncore[socket]->getHALocalRequests();
4583 }
4584 }
4585 if (PMMTrafficMetricsAvailable() && (ReadMCStatsFromServerBW == false))
4586 {
4587 result.UncPMMReads += server_pcicfg_uncore[socket]->getPMMReads();
4588 result.UncPMMWrites += server_pcicfg_uncore[socket]->getPMMWrites();
4589 }
4590 if (MCDRAMmemoryTrafficMetricsAvailable())
4591 {
4592 result.UncEDCNormalReads += server_pcicfg_uncore[socket]->getEdcReads();
4593 result.UncEDCFullWrites += server_pcicfg_uncore[socket]->getEdcWrites();
4594 }
4595 server_pcicfg_uncore[socket]->unfreezeCounters();
4596 }
4597 }
4598 else if(clientBW.get() && socket == 0)
4599 {
4600 result.UncMCNormalReads += clientImcReads->read();
4601 result.UncMCFullWrites += clientImcWrites->read();
4602 result.UncMCIORequests += clientIoRequests->read();
4603 }
4604 else
4605 {
4606 std::shared_ptr<SafeMsrHandle> msr = MSR[socketRefCore[socket]];
4607 TemporalThreadAffinity tempThreadAffinity(socketRefCore[socket]); // speedup trick for Linux
4608 switch (cpu_model)
4609 {
4610 case PCM::WESTMERE_EP:
4611 case PCM::NEHALEM_EP:
4612 {
4613 uint64 cUncMCFullWrites = 0;
4614 uint64 cUncMCNormalReads = 0;
4615 msr->read(MSR_UNCORE_PMC0, &cUncMCFullWrites);
4616 msr->read(MSR_UNCORE_PMC1, &cUncMCNormalReads);
4617 result.UncMCFullWrites += extractUncoreGenCounterValue(cUncMCFullWrites);
4618 result.UncMCNormalReads += extractUncoreGenCounterValue(cUncMCNormalReads);
4619 }
4620 break;
4621 case PCM::NEHALEM_EX:
4622 case PCM::WESTMERE_EX:
4623 {
4624 uint64 cUncMCNormalReads = 0;
4625 msr->read(MB0_MSR_PMU_CNT_0, &cUncMCNormalReads);
4626 result.UncMCNormalReads += extractUncoreGenCounterValue(cUncMCNormalReads);
4627 msr->read(MB1_MSR_PMU_CNT_0, &cUncMCNormalReads);
4628 result.UncMCNormalReads += extractUncoreGenCounterValue(cUncMCNormalReads);
4629
4630 uint64 cUncMCFullWrites = 0; // really good approximation of
4631 msr->read(BB0_MSR_PERF_CNT_1, &cUncMCFullWrites);
4632 result.UncMCFullWrites += extractUncoreGenCounterValue(cUncMCFullWrites);
4633 msr->read(BB1_MSR_PERF_CNT_1, &cUncMCFullWrites);
4634 result.UncMCFullWrites += extractUncoreGenCounterValue(cUncMCFullWrites);
4635 }
4636 break;
4637
4638 default:;
4639 }
4640 }
4641 }
4642
4643 template <class CounterStateType>
readAndAggregateEnergyCounters(const uint32 socket,CounterStateType & result)4644 void PCM::readAndAggregateEnergyCounters(const uint32 socket, CounterStateType & result)
4645 {
4646 if(socket < (uint32)energy_status.size())
4647 result.PackageEnergyStatus += energy_status[socket]->read();
4648
4649 if (socket < (uint32)dram_energy_status.size())
4650 result.DRAMEnergyStatus += dram_energy_status[socket]->read();
4651 }
4652
4653 template <class CounterStateType>
readAndAggregatePackageCStateResidencies(std::shared_ptr<SafeMsrHandle> msr,CounterStateType & result)4654 void PCM::readAndAggregatePackageCStateResidencies(std::shared_ptr<SafeMsrHandle> msr, CounterStateType & result)
4655 {
4656 // reading package C state counters
4657 uint64 cCStateResidency[PCM::MAX_C_STATE + 1];
4658 memset(cCStateResidency, 0, sizeof(cCStateResidency));
4659
4660 for(int i=0; i <= int(PCM::MAX_C_STATE) ;++i)
4661 if(pkgCStateMsr && pkgCStateMsr[i])
4662 msr->read(pkgCStateMsr[i], &(cCStateResidency[i]));
4663
4664 for (int i = 0; i <= int(PCM::MAX_C_STATE); ++i)
4665 {
4666 atomic_fetch_add((std::atomic<uint64> *)(result.CStateResidency + i), cCStateResidency[i]);
4667 }
4668 }
4669
readQPICounters(SystemCounterState & result)4670 void PCM::readQPICounters(SystemCounterState & result)
4671 {
4672 // read QPI counters
4673 std::vector<bool> SocketProcessed(num_sockets, false);
4674 if (cpu_model == PCM::NEHALEM_EX || cpu_model == PCM::WESTMERE_EX)
4675 {
4676 for (int32 core = 0; core < num_cores; ++core)
4677 {
4678 if(isCoreOnline(core) == false) continue;
4679
4680 if(core == socketRefCore[0]) MSR[core]->read(W_MSR_PMON_FIXED_CTR, &(result.uncoreTSC));
4681
4682 uint32 s = topology[core].socket;
4683
4684 if (!SocketProcessed[s])
4685 {
4686 TemporalThreadAffinity tempThreadAffinity(core); // speedup trick for Linux
4687
4688 // incoming data responses from QPI link 0
4689 MSR[core]->read(R_MSR_PMON_CTR1, &(result.incomingQPIPackets[s][0]));
4690 // incoming data responses from QPI link 1 (yes, from CTR0)
4691 MSR[core]->read(R_MSR_PMON_CTR0, &(result.incomingQPIPackets[s][1]));
4692 // incoming data responses from QPI link 2
4693 MSR[core]->read(R_MSR_PMON_CTR8, &(result.incomingQPIPackets[s][2]));
4694 // incoming data responses from QPI link 3
4695 MSR[core]->read(R_MSR_PMON_CTR9, &(result.incomingQPIPackets[s][3]));
4696
4697 // outgoing idle flits from QPI link 0
4698 MSR[core]->read(R_MSR_PMON_CTR3, &(result.outgoingQPIFlits[s][0]));
4699 // outgoing idle flits from QPI link 1 (yes, from CTR0)
4700 MSR[core]->read(R_MSR_PMON_CTR2, &(result.outgoingQPIFlits[s][1]));
4701 // outgoing idle flits from QPI link 2
4702 MSR[core]->read(R_MSR_PMON_CTR10, &(result.outgoingQPIFlits[s][2]));
4703 // outgoing idle flits from QPI link 3
4704 MSR[core]->read(R_MSR_PMON_CTR11, &(result.outgoingQPIFlits[s][3]));
4705
4706 SocketProcessed[s] = true;
4707 }
4708 }
4709 }
4710 else if ((cpu_model == PCM::NEHALEM_EP || cpu_model == PCM::WESTMERE_EP))
4711 {
4712 if (num_sockets == 2)
4713 {
4714 uint32 SCore[2] = { 0, 0 };
4715 uint64 Total_Reads[2] = { 0, 0 };
4716 uint64 Total_Writes[2] = { 0, 0 };
4717 uint64 IOH_Reads[2] = { 0, 0 };
4718 uint64 IOH_Writes[2] = { 0, 0 };
4719 uint64 Remote_Reads[2] = { 0, 0 };
4720 uint64 Remote_Writes[2] = { 0, 0 };
4721 uint64 Local_Reads[2] = { 0, 0 };
4722 uint64 Local_Writes[2] = { 0, 0 };
4723
4724 while (topology[SCore[0]].socket != 0) ++(SCore[0]);
4725 while (topology[SCore[1]].socket != 1) ++(SCore[1]);
4726 for (int s = 0; s < 2; ++s)
4727 {
4728 TemporalThreadAffinity tempThreadAffinity(SCore[s]); // speedup trick for Linux
4729
4730 MSR[SCore[s]]->read(MSR_UNCORE_PMC0, &Total_Writes[s]);
4731 MSR[SCore[s]]->read(MSR_UNCORE_PMC1, &Total_Reads[s]);
4732 MSR[SCore[s]]->read(MSR_UNCORE_PMC2, &IOH_Reads[s]);
4733 MSR[SCore[s]]->read(MSR_UNCORE_PMC3, &IOH_Writes[s]);
4734 MSR[SCore[s]]->read(MSR_UNCORE_PMC4, &Remote_Reads[s]);
4735 MSR[SCore[s]]->read(MSR_UNCORE_PMC5, &Remote_Writes[s]);
4736 MSR[SCore[s]]->read(MSR_UNCORE_PMC6, &Local_Reads[s]);
4737 MSR[SCore[s]]->read(MSR_UNCORE_PMC7, &Local_Writes[s]);
4738 }
4739
4740 #if 1
4741 // compute Remote_Reads differently
4742 for (int s = 0; s < 2; ++s)
4743 {
4744 uint64 total = Total_Writes[s] + Total_Reads[s];
4745 uint64 rem = IOH_Reads[s]
4746 + IOH_Writes[s]
4747 + Local_Reads[s]
4748 + Local_Writes[s]
4749 + Remote_Writes[s];
4750 Remote_Reads[s] = (total > rem) ? (total - rem) : 0;
4751 }
4752 #endif
4753
4754
4755 // only an estimation (lower bound) - does not count NT stores correctly
4756 result.incomingQPIPackets[0][0] = Remote_Reads[1] + Remote_Writes[0];
4757 result.incomingQPIPackets[0][1] = IOH_Reads[0];
4758 result.incomingQPIPackets[1][0] = Remote_Reads[0] + Remote_Writes[1];
4759 result.incomingQPIPackets[1][1] = IOH_Reads[1];
4760 }
4761 else
4762 {
4763 // for a single socket systems no information is available
4764 result.incomingQPIPackets[0][0] = 0;
4765 }
4766 }
4767 else if (hasPCICFGUncore())
4768 {
4769 for (int32 s = 0; (s < (int32)server_pcicfg_uncore.size()); ++s)
4770 {
4771 server_pcicfg_uncore[s]->freezeCounters();
4772 for (uint32 port = 0; port < (uint32)getQPILinksPerSocket(); ++port)
4773 {
4774 result.incomingQPIPackets[s][port] = uint64(double(server_pcicfg_uncore[s]->getIncomingDataFlits(port)) / (64./getDataBytesPerFlit()));
4775 result.outgoingQPIFlits[s][port] = server_pcicfg_uncore[s]->getOutgoingFlits(port);
4776 result.TxL0Cycles[s][port] = server_pcicfg_uncore[s]->getUPIL0TxCycles(port);
4777 }
4778 server_pcicfg_uncore[s]->unfreezeCounters();
4779 }
4780 }
4781 // end of reading QPI counters
4782 }
4783
4784 template <class CounterStateType>
readPackageThermalHeadroom(const uint32 socket,CounterStateType & result)4785 void PCM::readPackageThermalHeadroom(const uint32 socket, CounterStateType & result)
4786 {
4787 if(packageThermalMetricsAvailable())
4788 {
4789 uint64 val = 0;
4790 MSR[socketRefCore[socket]]->read(MSR_PACKAGE_THERM_STATUS,&val);
4791 result.ThermalHeadroom = extractThermalHeadroom(val);
4792 }
4793 else
4794 result.ThermalHeadroom = PCM_INVALID_THERMAL_HEADROOM; // not available
4795 }
4796
4797 // Explicit instantiation needed in topology.cpp
4798 template void PCM::readAndAggregatePackageCStateResidencies(std::shared_ptr<SafeMsrHandle>, UncoreCounterState &);
4799 template void PCM::readAndAggregateUncoreMCCounters<UncoreCounterState>(const uint32, UncoreCounterState&);
4800 template void PCM::readAndAggregateEnergyCounters<UncoreCounterState>(const uint32, UncoreCounterState&);
4801 template void PCM::readPackageThermalHeadroom<SocketCounterState>(const uint32, SocketCounterState &);
4802
getSocketCounterState(uint32 socket)4803 SocketCounterState PCM::getSocketCounterState(uint32 socket)
4804 {
4805 SocketCounterState result;
4806 if (MSR.size())
4807 {
4808 // reading core and uncore counter states
4809 for (int32 core = 0; core < num_cores; ++core)
4810 if (isCoreOnline(core) && (topology[core].socket == int32(socket)))
4811 result.readAndAggregate(MSR[core]);
4812
4813 readAndAggregateUncoreMCCounters(socket, result);
4814
4815 readAndAggregateEnergyCounters(socket, result);
4816
4817 readPackageThermalHeadroom(socket, result);
4818
4819 }
4820 return result;
4821 }
4822
getAllCounterStates(SystemCounterState & systemState,std::vector<SocketCounterState> & socketStates,std::vector<CoreCounterState> & coreStates)4823 void PCM::getAllCounterStates(SystemCounterState & systemState, std::vector<SocketCounterState> & socketStates, std::vector<CoreCounterState> & coreStates)
4824 {
4825 // clear and zero-initialize all inputs
4826 systemState = SystemCounterState();
4827 socketStates.clear();
4828 socketStates.resize(num_sockets);
4829 coreStates.clear();
4830 coreStates.resize(num_cores);
4831
4832 std::vector<std::future<void> > asyncCoreResults;
4833
4834 for (int32 core = 0; core < num_cores; ++core)
4835 {
4836 // read core counters
4837 if (isCoreOnline(core))
4838 {
4839 std::packaged_task<void()> task([this,&coreStates,&socketStates,core]() -> void
4840 {
4841 coreStates[core].readAndAggregate(MSR[core]);
4842 socketStates[topology[core].socket].UncoreCounterState::readAndAggregate(MSR[core]); // read package C state counters
4843 }
4844 );
4845 asyncCoreResults.push_back(task.get_future());
4846 coreTaskQueues[core]->push(task);
4847 }
4848 // std::cout << "DEBUG2: " << core << " " << coreStates[core].InstRetiredAny << " \n";
4849 }
4850 // std::cout << std::flush;
4851 for (uint32 s = 0; s < (uint32)num_sockets; ++s)
4852 {
4853 int32 refCore = socketRefCore[s];
4854 if (refCore<0) refCore = 0;
4855 std::packaged_task<void()> task([this, s, &socketStates]() -> void
4856 {
4857 readAndAggregateUncoreMCCounters(s, socketStates[s]);
4858 readAndAggregateEnergyCounters(s, socketStates[s]);
4859 readPackageThermalHeadroom(s, socketStates[s]);
4860 } );
4861 asyncCoreResults.push_back(task.get_future());
4862 coreTaskQueues[refCore]->push(task);
4863 }
4864
4865 readQPICounters(systemState);
4866
4867 for (auto & ar : asyncCoreResults)
4868 ar.wait();
4869
4870 for (int32 core = 0; core < num_cores; ++core)
4871 { // aggregate core counters into sockets
4872 if(isCoreOnline(core))
4873 socketStates[topology[core].socket] += coreStates[core];
4874 }
4875
4876 for (int32 s = 0; s < num_sockets; ++s)
4877 { // aggregate core counters from sockets into system state and
4878 // aggregate socket uncore iMC, energy and package C state counters into system
4879 systemState += socketStates[s];
4880 }
4881 }
4882
getUncoreCounterStates(SystemCounterState & systemState,std::vector<SocketCounterState> & socketStates)4883 void PCM::getUncoreCounterStates(SystemCounterState & systemState, std::vector<SocketCounterState> & socketStates)
4884 {
4885 // clear and zero-initialize all inputs
4886 systemState = SystemCounterState();
4887 socketStates.clear();
4888 socketStates.resize(num_sockets);
4889 std::vector<CoreCounterState> refCoreStates(num_sockets);
4890
4891 for (uint32 s = 0; s < (uint32)num_sockets; ++s)
4892 {
4893 const int32 refCore = socketRefCore[s];
4894 if(isCoreOnline(refCore))
4895 {
4896 refCoreStates[s].readAndAggregateTSC(MSR[refCore]);
4897 }
4898 readAndAggregateUncoreMCCounters(s, socketStates[s]);
4899 readAndAggregateEnergyCounters(s, socketStates[s]);
4900 readPackageThermalHeadroom(s, socketStates[s]);
4901 }
4902
4903 readQPICounters(systemState);
4904
4905 for (int32 s = 0; s < num_sockets; ++s)
4906 {
4907 const int32 refCore = socketRefCore[s];
4908 if(isCoreOnline(refCore))
4909 {
4910 for(uint32 core=0; core < getNumCores(); ++core)
4911 {
4912 if(topology[core].socket == s && isCoreOnline(core))
4913 socketStates[s] += refCoreStates[s];
4914 }
4915 }
4916 // aggregate socket uncore iMC, energy counters into system
4917 systemState += socketStates[s];
4918 }
4919 }
4920
getCoreCounterState(uint32 core)4921 CoreCounterState PCM::getCoreCounterState(uint32 core)
4922 {
4923 CoreCounterState result;
4924 if (MSR.size()) result.readAndAggregate(MSR[core]);
4925 return result;
4926 }
4927
getNumCores() const4928 uint32 PCM::getNumCores() const
4929 {
4930 return (uint32)num_cores;
4931 }
4932
getNumOnlineCores() const4933 uint32 PCM::getNumOnlineCores() const
4934 {
4935 return (uint32)num_online_cores;
4936 }
4937
getNumSockets() const4938 uint32 PCM::getNumSockets() const
4939 {
4940 return (uint32)num_sockets;
4941 }
4942
getNumOnlineSockets() const4943 uint32 PCM::getNumOnlineSockets() const
4944 {
4945 return (uint32)num_online_sockets;
4946 }
4947
4948
getThreadsPerCore() const4949 uint32 PCM::getThreadsPerCore() const
4950 {
4951 return (uint32)threads_per_core;
4952 }
4953
getSMT() const4954 bool PCM::getSMT() const
4955 {
4956 return threads_per_core > 1;
4957 }
4958
getNominalFrequency() const4959 uint64 PCM::getNominalFrequency() const
4960 {
4961 return nominal_frequency;
4962 }
4963
getL3ScalingFactor() const4964 uint32 PCM::getL3ScalingFactor() const
4965 {
4966 PCM_CPUID_INFO cpuinfo;
4967 pcm_cpuid(0xf,0x1,cpuinfo);
4968
4969 return (uint32)cpuinfo.reg.ebx;
4970
4971 }
4972
isSomeCoreOfflined()4973 bool PCM::isSomeCoreOfflined()
4974 {
4975 PCM_CPUID_INFO cpuid_args;
4976 pcm_cpuid(0xB,1,cpuid_args);
4977 uint32 max_num_lcores_per_socket = cpuid_args.reg.ebx & 0xFFFF;
4978 uint32 max_num_lcores = max_num_lcores_per_socket * getNumSockets();
4979 if(threads_per_core == 1 && (getNumOnlineCores() * 2 == max_num_lcores)) // HT is disabled in the BIOS
4980 {
4981 return false;
4982 }
4983 return !(getNumOnlineCores() == max_num_lcores);
4984 }
4985
getServerUncoreCounterState(uint32 socket)4986 ServerUncoreCounterState PCM::getServerUncoreCounterState(uint32 socket)
4987 {
4988 ServerUncoreCounterState result;
4989 if (socket < serverBW.size() && serverBW[socket].get())
4990 {
4991 result.freeRunningCounter[ServerUncoreCounterState::ImcReads] = serverBW[socket]->getImcReads();
4992 result.freeRunningCounter[ServerUncoreCounterState::ImcWrites] = serverBW[socket]->getImcWrites();
4993 result.freeRunningCounter[ServerUncoreCounterState::PMMReads] = serverBW[socket]->getPMMReads();
4994 result.freeRunningCounter[ServerUncoreCounterState::PMMWrites] = serverBW[socket]->getPMMWrites();
4995 }
4996 if(server_pcicfg_uncore.size() && server_pcicfg_uncore[socket].get())
4997 {
4998 server_pcicfg_uncore[socket]->freezeCounters();
4999 for(uint32 port=0;port < (uint32)server_pcicfg_uncore[socket]->getNumQPIPorts();++port)
5000 {
5001 assert(port < result.xPICounter.size());
5002 for (uint32 cnt = 0; cnt < ServerUncoreCounterState::maxCounters; ++cnt)
5003 result.xPICounter[port][cnt] = server_pcicfg_uncore[socket]->getQPILLCounter(port, cnt);
5004 assert(port < result.M3UPICounter.size());
5005 for (uint32 cnt = 0; cnt < ServerUncoreCounterState::maxCounters; ++cnt)
5006 result.M3UPICounter[port][cnt] = server_pcicfg_uncore[socket]->getM3UPICounter(port, cnt);
5007 }
5008 for (uint32 channel = 0; channel < (uint32)server_pcicfg_uncore[socket]->getNumMCChannels(); ++channel)
5009 {
5010 assert(channel < result.DRAMClocks.size());
5011 result.DRAMClocks[channel] = server_pcicfg_uncore[socket]->getDRAMClocks(channel);
5012 assert(channel < result.MCCounter.size());
5013 for (uint32 cnt = 0; cnt < ServerUncoreCounterState::maxCounters; ++cnt)
5014 result.MCCounter[channel][cnt] = server_pcicfg_uncore[socket]->getMCCounter(channel, cnt);
5015 }
5016 for (uint32 channel = 0; channel < (uint32)server_pcicfg_uncore[socket]->getNumEDCChannels(); ++channel)
5017 {
5018 assert(channel < result.MCDRAMClocks.size());
5019 result.MCDRAMClocks[channel] = server_pcicfg_uncore[socket]->getMCDRAMClocks(channel);
5020 assert(channel < result.EDCCounter.size());
5021 for (uint32 cnt = 0; cnt < ServerUncoreCounterState::maxCounters; ++cnt)
5022 result.EDCCounter[channel][cnt] = server_pcicfg_uncore[socket]->getEDCCounter(channel, cnt);
5023 }
5024 for (uint32 controller = 0; controller < (uint32)server_pcicfg_uncore[socket]->getNumMC(); ++controller)
5025 {
5026 assert(controller < result.M2MCounter.size());
5027 for (uint32 cnt = 0; cnt < ServerUncoreCounterState::maxCounters; ++cnt)
5028 result.M2MCounter[controller][cnt] = server_pcicfg_uncore[socket]->getM2MCounter(controller, cnt);
5029 }
5030 server_pcicfg_uncore[socket]->unfreezeCounters();
5031 }
5032 if (MSR.size())
5033 {
5034 uint32 refCore = socketRefCore[socket];
5035 TemporalThreadAffinity tempThreadAffinity(refCore);
5036 for (uint32 cbo = 0; socket < cboPMUs.size() && cbo < cboPMUs[socket].size() && cbo < ServerUncoreCounterState::maxCBOs; ++cbo)
5037 {
5038 for (int i = 0; i < ServerUncoreCounterState::maxCounters; ++i)
5039 {
5040 result.CBOCounter[cbo][i] = *(cboPMUs[socket][cbo].counterValue[i]);
5041 }
5042 }
5043 for (uint32 stack = 0; socket < iioPMUs.size() && stack < iioPMUs[socket].size() && stack < ServerUncoreCounterState::maxIIOStacks; ++stack)
5044 {
5045 for (int i = 0; i < ServerUncoreCounterState::maxCounters; ++i)
5046 {
5047 result.IIOCounter[stack][i] = *(iioPMUs[socket][stack].counterValue[i]);
5048 }
5049 }
5050 for (int i = 0; i < 2 && socket < uboxPMUs.size(); ++i)
5051 {
5052 result.UBOXCounter[i] = *(uboxPMUs[socket].counterValue[i]);
5053 result.UncClocks = getUncoreClocks(socket);
5054 }
5055 for (int i = 0; i < ServerUncoreCounterState::maxCounters && socket < pcuPMUs.size(); ++i)
5056 result.PCUCounter[i] = *pcuPMUs[socket].counterValue[i];
5057 // std::cout << "values read: " << result.PCUCounter[0] << " " << result.PCUCounter[1] << " " << result.PCUCounter[2] << " " << result.PCUCounter[3] << "\n";
5058 uint64 val=0;
5059 //MSR[refCore]->read(MSR_PKG_ENERGY_STATUS,&val);
5060 //std::cout << "Energy status: " << val << "\n";
5061 MSR[refCore]->read(MSR_PACKAGE_THERM_STATUS,&val);
5062 result.PackageThermalHeadroom = extractThermalHeadroom(val);
5063 MSR[refCore]->read(IA32_TIME_STAMP_COUNTER, &result.InvariantTSC);
5064 readAndAggregatePackageCStateResidencies(MSR[refCore], result);
5065 }
5066 // std::cout << std::flush;
5067 readAndAggregateEnergyCounters(socket, result);
5068
5069 return result;
5070 }
5071
5072 #ifndef _MSC_VER
print_mcfg(const char * path)5073 void print_mcfg(const char * path)
5074 {
5075 int mcfg_handle = ::open(path, O_RDONLY);
5076
5077 if (mcfg_handle < 0)
5078 {
5079 std::cerr << "PCM Error: Cannot open " << path << "\n";
5080 throw std::exception();
5081 }
5082
5083 MCFGHeader header;
5084
5085 ssize_t read_bytes = ::read(mcfg_handle, (void *)&header, sizeof(MCFGHeader));
5086
5087 if(read_bytes == 0)
5088 {
5089 std::cerr << "PCM Error: Cannot read " << path << "\n";
5090 throw std::exception();
5091 }
5092
5093 const unsigned segments = header.nrecords();
5094 header.print();
5095 std::cout << "Segments: " << segments << "\n";
5096
5097 for(unsigned int i=0; i<segments;++i)
5098 {
5099 MCFGRecord record;
5100 read_bytes = ::read(mcfg_handle, (void *)&record, sizeof(MCFGRecord));
5101 if(read_bytes == 0)
5102 {
5103 std::cerr << "PCM Error: Cannot read " << path << " (2)\n";
5104 throw std::exception();
5105 }
5106 std::cout << "Segment " << std::dec << i << " ";
5107 record.print();
5108 }
5109
5110 ::close(mcfg_handle);
5111 }
5112 #endif
5113
5114
5115 static const uint32 IMC_DEV_IDS[] = {
5116 0x03cb0,
5117 0x03cb1,
5118 0x03cb4,
5119 0x03cb5,
5120 0x0EB4,
5121 0x0EB5,
5122 0x0EB0,
5123 0x0EB1,
5124 0x0EF4,
5125 0x0EF5,
5126 0x0EF0,
5127 0x0EF1,
5128 0x2fb0,
5129 0x2fb1,
5130 0x2fb4,
5131 0x2fb5,
5132 0x2fd0,
5133 0x2fd1,
5134 0x2fd4,
5135 0x2fd5,
5136 0x6fb0,
5137 0x6fb1,
5138 0x6fb4,
5139 0x6fb5,
5140 0x6fd0,
5141 0x6fd1,
5142 0x6fd4,
5143 0x6fd5,
5144 0x2042,
5145 0x2046,
5146 0x204a,
5147 0x7840,
5148 0x7841,
5149 0x7842,
5150 0x7843,
5151 0x7844,
5152 0x781f
5153 };
5154
5155 static const uint32 UPI_DEV_IDS[] = {
5156 0x2058,
5157 0x3441
5158 };
5159
5160 static const uint32 M2M_DEV_IDS[] = {
5161 0x2066,
5162 0x344A
5163 };
5164
5165 Mutex socket2busMutex;
5166 std::vector<std::pair<uint32,uint32> > ServerPCICFGUncore::socket2iMCbus;
5167 std::vector<std::pair<uint32,uint32> > ServerPCICFGUncore::socket2UPIbus;
5168 std::vector<std::pair<uint32,uint32> > ServerPCICFGUncore::socket2M2Mbus;
5169
initSocket2Bus(std::vector<std::pair<uint32,uint32>> & socket2bus,uint32 device,uint32 function,const uint32 DEV_IDS[],uint32 devIdsSize)5170 void initSocket2Bus(std::vector<std::pair<uint32, uint32> > & socket2bus, uint32 device, uint32 function, const uint32 DEV_IDS[], uint32 devIdsSize)
5171 {
5172 if (device == PCM_INVALID_DEV_ADDR || function == PCM_INVALID_FUNC_ADDR)
5173 {
5174 return;
5175 }
5176 Mutex::Scope _(socket2busMutex);
5177 if(!socket2bus.empty()) return;
5178
5179 #ifdef __linux__
5180 const std::vector<MCFGRecord> & mcfg = PciHandleMM::getMCFGRecords();
5181 #else
5182 std::vector<MCFGRecord> mcfg;
5183 MCFGRecord segment;
5184 segment.PCISegmentGroupNumber = 0;
5185 segment.startBusNumber = 0;
5186 segment.endBusNumber = 0xff;
5187 mcfg.push_back(segment);
5188 #endif
5189
5190 for(uint32 s = 0; s < (uint32)mcfg.size(); ++s)
5191 for (uint32 bus = (uint32)mcfg[s].startBusNumber; bus <= (uint32)mcfg[s].endBusNumber; ++bus)
5192 {
5193 uint32 value = 0;
5194 try
5195 {
5196 PciHandleType h(mcfg[s].PCISegmentGroupNumber, bus, device, function);
5197 h.read32(0, &value);
5198
5199 } catch(...)
5200 {
5201 // invalid bus:devicei:function
5202 continue;
5203 }
5204 const uint32 vendor_id = value & 0xffff;
5205 const uint32 device_id = (value >> 16) & 0xffff;
5206 if (vendor_id != PCM_INTEL_PCI_VENDOR_ID)
5207 continue;
5208
5209 for (uint32 i = 0; i < devIdsSize; ++i)
5210 {
5211 // match
5212 if(DEV_IDS[i] == device_id)
5213 {
5214 // std::cout << "DEBUG: found bus " << std::hex << bus << " with device ID " << device_id << std::dec << "\n";
5215 socket2bus.push_back(std::make_pair(mcfg[s].PCISegmentGroupNumber,bus));
5216 break;
5217 }
5218 }
5219 }
5220 //std::cout << std::flush;
5221 }
5222
getBusFromSocket(const uint32 socket)5223 int getBusFromSocket(const uint32 socket)
5224 {
5225 int cur_bus = 0;
5226 uint32 cur_socket = 0;
5227 // std::cout << "socket: " << socket << "\n";
5228 while(cur_socket <= socket)
5229 {
5230 // std::cout << "reading from bus 0x" << std::hex << cur_bus << std::dec << " ";
5231 PciHandleType h(0, cur_bus, 5, 0);
5232 uint32 cpubusno = 0;
5233 h.read32(0x108, &cpubusno); // CPUBUSNO register
5234 cur_bus = (cpubusno >> 8)& 0x0ff;
5235 // std::cout << "socket: " << cur_socket << std::hex << " cpubusno: 0x" << std::hex << cpubusno << " " << cur_bus << std::dec << "\n";
5236 if(socket == cur_socket)
5237 return cur_bus;
5238 ++cur_socket;
5239 ++cur_bus;
5240 if(cur_bus > 0x0ff)
5241 return -1;
5242 }
5243 //std::cout << std::flush;
5244
5245 return -1;
5246 }
5247
createIntelPerfMonDevice(uint32 groupnr_,int32 bus_,uint32 dev_,uint32 func_,bool checkVendor)5248 PciHandleType * ServerPCICFGUncore::createIntelPerfMonDevice(uint32 groupnr_, int32 bus_, uint32 dev_, uint32 func_, bool checkVendor)
5249 {
5250 if (PciHandleType::exists(groupnr_, (uint32)bus_, dev_, func_))
5251 {
5252 PciHandleType * handle = new PciHandleType(groupnr_, bus_, dev_, func_);
5253
5254 if(!checkVendor) return handle;
5255
5256 uint32 vendor_id = 0;
5257 handle->read32(PCM_PCI_VENDOR_ID_OFFSET,&vendor_id);
5258 vendor_id &= 0x0ffff;
5259
5260 if(vendor_id == PCM_INTEL_PCI_VENDOR_ID) return handle;
5261
5262 delete handle;
5263 }
5264 return NULL;
5265 }
5266
isSecureBoot() const5267 bool PCM::isSecureBoot() const
5268 {
5269 static int flag = -1;
5270 if (MSR.size() > 0 && flag == -1)
5271 {
5272 // std::cerr << "DEBUG: checking MSR in isSecureBoot\n";
5273 uint64 val = 0;
5274 if (MSR[0]->read(IA32_PERFEVTSEL0_ADDR, &val) != sizeof(val))
5275 {
5276 flag = 0; // some problem with MSR read, not secure boot
5277 }
5278 // read works
5279 if (MSR[0]->write(IA32_PERFEVTSEL0_ADDR, val) != sizeof(val)/* && errno == 1 */) // errno works only on windows
5280 { // write does not work -> secure boot
5281 flag = 1;
5282 }
5283 else
5284 {
5285 flag = 0; // can write MSR -> no secure boot
5286 }
5287 }
5288 return flag == 1;
5289 }
5290
useLinuxPerfForUncore() const5291 bool PCM::useLinuxPerfForUncore() const
5292 {
5293 static int use = -1;
5294 if (use != -1)
5295 {
5296 return 1 == use;
5297 }
5298 use = 0;
5299 bool secureBoot = isSecureBoot();
5300 #ifdef PCM_USE_PERF
5301 const auto imcIDs = enumeratePerfPMUs("imc", 100);
5302 std::cout << "INFO: Linux perf interface to program uncore PMUs is " << (imcIDs.empty()?"NOT ":"") << "present\n";
5303 const char * perf_env = std::getenv("PCM_USE_UNCORE_PERF");
5304 if (perf_env != NULL && std::string(perf_env) == std::string("1"))
5305 {
5306 std::cout << "INFO: using Linux perf interface to program uncore PMUs because env variable PCM_USE_UNCORE_PERF=1\n";
5307 use = 1;
5308 }
5309 if (secureBoot)
5310 {
5311 std::cout << "INFO: Secure Boot detected. Using Linux perf for uncore PMU programming.\n";
5312 use = 1;
5313 }
5314 else
5315 #endif
5316 {
5317 if (secureBoot)
5318 {
5319 std::cerr << "ERROR: Secure Boot detected. Recompile PCM with -DPCM_USE_PERF or disable Secure Boot.\n";
5320 }
5321 }
5322 return 1 == use;
5323 }
5324
ServerPCICFGUncore(uint32 socket_,const PCM * pcm)5325 ServerPCICFGUncore::ServerPCICFGUncore(uint32 socket_, const PCM * pcm) :
5326 iMCbus(-1)
5327 , UPIbus(-1)
5328 , M2Mbus(-1)
5329 , groupnr(0)
5330 , cpu_model(pcm->getCPUModel())
5331 , qpi_speed(0)
5332 {
5333 initRegisterLocations(pcm);
5334 initBuses(socket_, pcm);
5335
5336 if (pcm->useLinuxPerfForUncore())
5337 {
5338 initPerf(socket_, pcm);
5339 }
5340 else
5341 {
5342 initDirect(socket_, pcm);
5343 }
5344
5345 std::cerr << "Socket " << socket_ << ": " <<
5346 getNumMC() << " memory controllers detected with total number of " << getNumMCChannels() << " channels. " <<
5347 getNumQPIPorts() << " QPI ports detected." <<
5348 " " << m2mPMUs.size() << " M2M (mesh to memory) blocks detected."
5349 " " << haPMUs.size() << " Home Agents detected."
5350 " " << m3upiPMUs.size() << " M3UPI blocks detected."
5351 "\n";
5352 }
5353
initRegisterLocations(const PCM * pcm)5354 void ServerPCICFGUncore::initRegisterLocations(const PCM * pcm)
5355 {
5356 #define PCM_PCICFG_MC_INIT(controller, channel, arch) \
5357 MCRegisterLocation.resize(controller + 1); \
5358 MCRegisterLocation[controller].resize(channel + 1); \
5359 MCRegisterLocation[controller][channel] = \
5360 std::make_pair(arch##_MC##controller##_CH##channel##_REGISTER_DEV_ADDR, arch##_MC##controller##_CH##channel##_REGISTER_FUNC_ADDR);
5361
5362 #define PCM_PCICFG_QPI_INIT(port, arch) \
5363 XPIRegisterLocation.resize(port + 1); \
5364 XPIRegisterLocation[port] = std::make_pair(arch##_QPI_PORT##port##_REGISTER_DEV_ADDR, arch##_QPI_PORT##port##_REGISTER_FUNC_ADDR);
5365
5366 #define PCM_PCICFG_M3UPI_INIT(port, arch) \
5367 M3UPIRegisterLocation.resize(port + 1); \
5368 M3UPIRegisterLocation[port] = std::make_pair(arch##_M3UPI_PORT##port##_REGISTER_DEV_ADDR, arch##_M3UPI_PORT##port##_REGISTER_FUNC_ADDR);
5369
5370 #define PCM_PCICFG_EDC_INIT(controller, clock, arch) \
5371 EDCRegisterLocation.resize(controller + 1); \
5372 EDCRegisterLocation[controller] = std::make_pair(arch##_EDC##controller##_##clock##_REGISTER_DEV_ADDR, arch##_EDC##controller##_##clock##_REGISTER_FUNC_ADDR);
5373
5374 #define PCM_PCICFG_M2M_INIT(x, arch) \
5375 M2MRegisterLocation.resize(x + 1); \
5376 M2MRegisterLocation[x] = std::make_pair(arch##_M2M_##x##_REGISTER_DEV_ADDR, arch##_M2M_##x##_REGISTER_FUNC_ADDR);
5377
5378 #define PCM_PCICFG_HA_INIT(x, arch) \
5379 HARegisterLocation.resize(x + 1); \
5380 HARegisterLocation[x] = std::make_pair(arch##_HA##x##_REGISTER_DEV_ADDR, arch##_HA##x##_REGISTER_FUNC_ADDR);
5381
5382 if(cpu_model == PCM::JAKETOWN || cpu_model == PCM::IVYTOWN)
5383 {
5384 PCM_PCICFG_MC_INIT(0, 0, JKTIVT)
5385 PCM_PCICFG_MC_INIT(0, 1, JKTIVT)
5386 PCM_PCICFG_MC_INIT(0, 2, JKTIVT)
5387 PCM_PCICFG_MC_INIT(0, 3, JKTIVT)
5388 PCM_PCICFG_MC_INIT(1, 0, JKTIVT)
5389 PCM_PCICFG_MC_INIT(1, 1, JKTIVT)
5390 PCM_PCICFG_MC_INIT(1, 2, JKTIVT)
5391 PCM_PCICFG_MC_INIT(1, 3, JKTIVT)
5392
5393 PCM_PCICFG_QPI_INIT(0, JKTIVT);
5394 PCM_PCICFG_QPI_INIT(1, JKTIVT);
5395 PCM_PCICFG_QPI_INIT(2, JKTIVT);
5396 }
5397 else if(cpu_model == PCM::HASWELLX || cpu_model == PCM::BDX_DE || cpu_model == PCM::BDX)
5398 {
5399 PCM_PCICFG_MC_INIT(0, 0, HSX)
5400 PCM_PCICFG_MC_INIT(0, 1, HSX)
5401 PCM_PCICFG_MC_INIT(0, 2, HSX)
5402 PCM_PCICFG_MC_INIT(0, 3, HSX)
5403 PCM_PCICFG_MC_INIT(1, 0, HSX)
5404 PCM_PCICFG_MC_INIT(1, 1, HSX)
5405 PCM_PCICFG_MC_INIT(1, 2, HSX)
5406 PCM_PCICFG_MC_INIT(1, 3, HSX)
5407
5408 PCM_PCICFG_QPI_INIT(0, HSX);
5409 PCM_PCICFG_QPI_INIT(1, HSX);
5410 PCM_PCICFG_QPI_INIT(2, HSX);
5411
5412 PCM_PCICFG_HA_INIT(0, HSX);
5413 PCM_PCICFG_HA_INIT(1, HSX);
5414 }
5415 else if(cpu_model == PCM::SKX)
5416 {
5417 PCM_PCICFG_MC_INIT(0, 0, SKX)
5418 PCM_PCICFG_MC_INIT(0, 1, SKX)
5419 PCM_PCICFG_MC_INIT(0, 2, SKX)
5420 PCM_PCICFG_MC_INIT(0, 3, SKX)
5421 PCM_PCICFG_MC_INIT(1, 0, SKX)
5422 PCM_PCICFG_MC_INIT(1, 1, SKX)
5423 PCM_PCICFG_MC_INIT(1, 2, SKX)
5424 PCM_PCICFG_MC_INIT(1, 3, SKX)
5425
5426 PCM_PCICFG_QPI_INIT(0, SKX);
5427 PCM_PCICFG_QPI_INIT(1, SKX);
5428 PCM_PCICFG_QPI_INIT(2, SKX);
5429
5430 if (pcm->isCPX())
5431 {
5432 PCM_PCICFG_QPI_INIT(3, CPX);
5433 PCM_PCICFG_QPI_INIT(4, CPX);
5434 PCM_PCICFG_QPI_INIT(5, CPX);
5435 }
5436
5437 PCM_PCICFG_M2M_INIT(0, SKX)
5438 PCM_PCICFG_M2M_INIT(1, SKX)
5439
5440 // M3UPI
5441 if (pcm->isCPX())
5442 {
5443 // CPX
5444 PCM_PCICFG_M3UPI_INIT(0, CPX);
5445 PCM_PCICFG_M3UPI_INIT(1, CPX);
5446 PCM_PCICFG_M3UPI_INIT(2, CPX);
5447 PCM_PCICFG_M3UPI_INIT(3, CPX);
5448 PCM_PCICFG_M3UPI_INIT(4, CPX);
5449 PCM_PCICFG_M3UPI_INIT(5, CPX);
5450 }
5451 else
5452 {
5453 // SKX/CLX
5454 PCM_PCICFG_M3UPI_INIT(0, SKX);
5455 PCM_PCICFG_M3UPI_INIT(1, SKX);
5456 PCM_PCICFG_M3UPI_INIT(2, SKX);
5457 }
5458 }
5459 else if (cpu_model == PCM::ICX)
5460 {
5461 PCM_PCICFG_QPI_INIT(0, ICX);
5462 PCM_PCICFG_QPI_INIT(1, ICX);
5463 PCM_PCICFG_QPI_INIT(2, ICX);
5464
5465 PCM_PCICFG_M3UPI_INIT(0, ICX);
5466 PCM_PCICFG_M3UPI_INIT(1, ICX);
5467 PCM_PCICFG_M3UPI_INIT(2, ICX);
5468
5469 PCM_PCICFG_M2M_INIT(0, SERVER)
5470 PCM_PCICFG_M2M_INIT(1, SERVER)
5471 PCM_PCICFG_M2M_INIT(2, SERVER)
5472 PCM_PCICFG_M2M_INIT(3, SERVER)
5473 }
5474 else if(cpu_model == PCM::KNL)
5475 {
5476 // 2 DDR4 Memory Controllers with 3 channels each
5477 PCM_PCICFG_MC_INIT(0, 0, KNL)
5478 PCM_PCICFG_MC_INIT(0, 1, KNL)
5479 PCM_PCICFG_MC_INIT(0, 2, KNL)
5480 PCM_PCICFG_MC_INIT(1, 0, KNL)
5481 PCM_PCICFG_MC_INIT(1, 1, KNL)
5482 PCM_PCICFG_MC_INIT(1, 2, KNL)
5483
5484 // 8 MCDRAM (Multi-Channel [Stacked] DRAM) Memory Controllers
5485 PCM_PCICFG_EDC_INIT(0, ECLK, KNL)
5486 PCM_PCICFG_EDC_INIT(1, ECLK, KNL)
5487 PCM_PCICFG_EDC_INIT(2, ECLK, KNL)
5488 PCM_PCICFG_EDC_INIT(3, ECLK, KNL)
5489 PCM_PCICFG_EDC_INIT(4, ECLK, KNL)
5490 PCM_PCICFG_EDC_INIT(5, ECLK, KNL)
5491 PCM_PCICFG_EDC_INIT(6, ECLK, KNL)
5492 PCM_PCICFG_EDC_INIT(7, ECLK, KNL)
5493 }
5494 else if (cpu_model == PCM::SNOWRIDGE)
5495 {
5496 PCM_PCICFG_M2M_INIT(0, SERVER)
5497 PCM_PCICFG_M2M_INIT(1, SERVER)
5498 PCM_PCICFG_M2M_INIT(2, SERVER)
5499 PCM_PCICFG_M2M_INIT(3, SERVER)
5500 }
5501 else
5502 {
5503 std::cerr << "Error: Uncore PMU for processor with model id " << cpu_model << " is not supported.\n";
5504 throw std::exception();
5505 }
5506
5507 #undef PCM_PCICFG_MC_INIT
5508 #undef PCM_PCICFG_QPI_INIT
5509 #undef PCM_PCICFG_M3UPI_INIT
5510 #undef PCM_PCICFG_EDC_INIT
5511 #undef PCM_PCICFG_M2M_INIT
5512 #undef PCM_PCICFG_HA_INIT
5513 }
5514
initBuses(uint32 socket_,const PCM * pcm)5515 void ServerPCICFGUncore::initBuses(uint32 socket_, const PCM * pcm)
5516 {
5517 const uint32 total_sockets_ = pcm->getNumSockets();
5518
5519 if (M2MRegisterLocation.size())
5520 {
5521 initSocket2Bus(socket2M2Mbus, M2MRegisterLocation[0].first, M2MRegisterLocation[0].second, M2M_DEV_IDS, (uint32)sizeof(M2M_DEV_IDS) / sizeof(M2M_DEV_IDS[0]));
5522 if (socket_ < socket2M2Mbus.size())
5523 {
5524 groupnr = socket2M2Mbus[socket_].first;
5525 M2Mbus = socket2M2Mbus[socket_].second;
5526 }
5527 else
5528 {
5529 std::cerr << "PCM error: socket_ " << socket_ << " >= socket2M2Mbus.size() " << socket2M2Mbus.size() << "\n";
5530 }
5531 if (total_sockets_ != socket2M2Mbus.size())
5532 {
5533 std::cerr << "PCM warning: total_sockets_ " << total_sockets_ << " does not match socket2M2Mbus.size() " << socket2M2Mbus.size() << "\n";
5534 }
5535 }
5536
5537 if (MCRegisterLocation.size() > 0 && MCRegisterLocation[0].size() > 0)
5538 {
5539 initSocket2Bus(socket2iMCbus, MCRegisterLocation[0][0].first, MCRegisterLocation[0][0].second, IMC_DEV_IDS, (uint32)sizeof(IMC_DEV_IDS) / sizeof(IMC_DEV_IDS[0]));
5540
5541 if (total_sockets_ == socket2iMCbus.size())
5542 {
5543 if (total_sockets_ == socket2M2Mbus.size() && socket2iMCbus[socket_].first != socket2M2Mbus[socket_].first)
5544 {
5545 std::cerr << "PCM error: mismatching PCICFG group number for M2M and IMC perfmon devices.\n";
5546 M2Mbus = -1;
5547 }
5548 groupnr = socket2iMCbus[socket_].first;
5549 iMCbus = socket2iMCbus[socket_].second;
5550 }
5551 else if (total_sockets_ <= 4)
5552 {
5553 iMCbus = getBusFromSocket(socket_);
5554 if (iMCbus < 0)
5555 {
5556 std::cerr << "Cannot find bus for socket " << socket_ << " on system with " << total_sockets_ << " sockets.\n";
5557 throw std::exception();
5558 }
5559 else
5560 {
5561 std::cerr << "PCM Warning: the bus for socket " << socket_ << " on system with " << total_sockets_ << " sockets could not find via PCI bus scan. Using cpubusno register. Bus = " << iMCbus << "\n";
5562 }
5563 }
5564 else
5565 {
5566 std::cerr << "Cannot find bus for socket " << socket_ << " on system with " << total_sockets_ << " sockets.\n";
5567 throw std::exception();
5568 }
5569 }
5570
5571 #if 1
5572 if (total_sockets_ == 1) {
5573 /*
5574 * For single socket systems, do not worry at all about QPI ports. This
5575 * eliminates QPI LL programming error messages on single socket systems
5576 * with BIOS that hides QPI performance counting PCI functions. It also
5577 * eliminates register programming that is not needed since no QPI traffic
5578 * is possible with single socket systems.
5579 */
5580 return;
5581 }
5582 #endif
5583
5584 #ifdef PCM_NOQPI
5585 return;
5586 #endif
5587
5588 if (PCM::hasUPI(cpu_model))
5589 {
5590 initSocket2Bus(socket2UPIbus, XPIRegisterLocation[0].first, XPIRegisterLocation[0].second, UPI_DEV_IDS, (uint32)sizeof(UPI_DEV_IDS) / sizeof(UPI_DEV_IDS[0]));
5591 if(total_sockets_ == socket2UPIbus.size())
5592 {
5593 UPIbus = socket2UPIbus[socket_].second;
5594 if(groupnr != socket2UPIbus[socket_].first)
5595 {
5596 UPIbus = -1;
5597 std::cerr << "PCM error: mismatching PCICFG group number for UPI and IMC perfmon devices.\n";
5598 }
5599 }
5600 else
5601 {
5602 std::cerr << "PCM error: Did not find UPI perfmon device on every socket in a multisocket system.\n";
5603 }
5604 }
5605 else
5606 {
5607 UPIbus = iMCbus;
5608 }
5609 }
5610
initDirect(uint32 socket_,const PCM * pcm)5611 void ServerPCICFGUncore::initDirect(uint32 socket_, const PCM * pcm)
5612 {
5613 {
5614 std::vector<std::shared_ptr<PciHandleType> > imcHandles;
5615
5616 auto lastWorkingChannels = imcHandles.size();
5617 for (auto & ctrl: MCRegisterLocation)
5618 {
5619 for (auto & channel : ctrl)
5620 {
5621 PciHandleType * handle = createIntelPerfMonDevice(groupnr, iMCbus, channel.first, channel.second, true);
5622 if (handle) imcHandles.push_back(std::shared_ptr<PciHandleType>(handle));
5623 }
5624 if (imcHandles.size() > lastWorkingChannels)
5625 {
5626 num_imc_channels.push_back((uint32)(imcHandles.size() - lastWorkingChannels));
5627 }
5628 lastWorkingChannels = imcHandles.size();
5629 }
5630
5631 for (auto & handle : imcHandles)
5632 {
5633 if (cpu_model == PCM::KNL) {
5634 imcPMUs.push_back(
5635 UncorePMU(
5636 std::make_shared<PCICFGRegister32>(handle, KNX_MC_CH_PCI_PMON_BOX_CTL_ADDR),
5637 std::make_shared<PCICFGRegister32>(handle, KNX_MC_CH_PCI_PMON_CTL0_ADDR),
5638 std::make_shared<PCICFGRegister32>(handle, KNX_MC_CH_PCI_PMON_CTL1_ADDR),
5639 std::make_shared<PCICFGRegister32>(handle, KNX_MC_CH_PCI_PMON_CTL2_ADDR),
5640 std::make_shared<PCICFGRegister32>(handle, KNX_MC_CH_PCI_PMON_CTL3_ADDR),
5641 std::make_shared<PCICFGRegister64>(handle, KNX_MC_CH_PCI_PMON_CTR0_ADDR),
5642 std::make_shared<PCICFGRegister64>(handle, KNX_MC_CH_PCI_PMON_CTR1_ADDR),
5643 std::make_shared<PCICFGRegister64>(handle, KNX_MC_CH_PCI_PMON_CTR2_ADDR),
5644 std::make_shared<PCICFGRegister64>(handle, KNX_MC_CH_PCI_PMON_CTR3_ADDR),
5645 std::make_shared<PCICFGRegister32>(handle, KNX_MC_CH_PCI_PMON_FIXED_CTL_ADDR),
5646 std::make_shared<PCICFGRegister64>(handle, KNX_MC_CH_PCI_PMON_FIXED_CTR_ADDR))
5647 );
5648 }
5649 else {
5650 imcPMUs.push_back(
5651 UncorePMU(
5652 std::make_shared<PCICFGRegister32>(handle, XPF_MC_CH_PCI_PMON_BOX_CTL_ADDR),
5653 std::make_shared<PCICFGRegister32>(handle, XPF_MC_CH_PCI_PMON_CTL0_ADDR),
5654 std::make_shared<PCICFGRegister32>(handle, XPF_MC_CH_PCI_PMON_CTL1_ADDR),
5655 std::make_shared<PCICFGRegister32>(handle, XPF_MC_CH_PCI_PMON_CTL2_ADDR),
5656 std::make_shared<PCICFGRegister32>(handle, XPF_MC_CH_PCI_PMON_CTL3_ADDR),
5657 std::make_shared<PCICFGRegister64>(handle, XPF_MC_CH_PCI_PMON_CTR0_ADDR),
5658 std::make_shared<PCICFGRegister64>(handle, XPF_MC_CH_PCI_PMON_CTR1_ADDR),
5659 std::make_shared<PCICFGRegister64>(handle, XPF_MC_CH_PCI_PMON_CTR2_ADDR),
5660 std::make_shared<PCICFGRegister64>(handle, XPF_MC_CH_PCI_PMON_CTR3_ADDR),
5661 std::make_shared<PCICFGRegister32>(handle, XPF_MC_CH_PCI_PMON_FIXED_CTL_ADDR),
5662 std::make_shared<PCICFGRegister64>(handle, XPF_MC_CH_PCI_PMON_FIXED_CTR_ADDR))
5663 );
5664 }
5665 }
5666 }
5667
5668 {
5669 std::vector<std::shared_ptr<PciHandleType> > m2mHandles;
5670
5671 if (M2Mbus >= 0)
5672 {
5673 for (auto & reg : M2MRegisterLocation)
5674 {
5675 PciHandleType * handle = createIntelPerfMonDevice(groupnr, M2Mbus, reg.first, reg.second, true);
5676 if (handle) m2mHandles.push_back(std::shared_ptr<PciHandleType>(handle));
5677 }
5678 }
5679
5680 for (auto & handle : m2mHandles)
5681 {
5682 if (cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE)
5683 {
5684 m2mPMUs.push_back(
5685 UncorePMU(
5686 std::make_shared<PCICFGRegister32>(handle, SERVER_M2M_PCI_PMON_BOX_CTL_ADDR),
5687 std::make_shared<PCICFGRegister64>(handle, SERVER_M2M_PCI_PMON_CTL0_ADDR),
5688 std::make_shared<PCICFGRegister64>(handle, SERVER_M2M_PCI_PMON_CTL1_ADDR),
5689 std::make_shared<PCICFGRegister64>(handle, SERVER_M2M_PCI_PMON_CTL2_ADDR),
5690 std::make_shared<PCICFGRegister64>(handle, SERVER_M2M_PCI_PMON_CTL3_ADDR),
5691 std::make_shared<PCICFGRegister64>(handle, SERVER_M2M_PCI_PMON_CTR0_ADDR),
5692 std::make_shared<PCICFGRegister64>(handle, SERVER_M2M_PCI_PMON_CTR1_ADDR),
5693 std::make_shared<PCICFGRegister64>(handle, SERVER_M2M_PCI_PMON_CTR2_ADDR),
5694 std::make_shared<PCICFGRegister64>(handle, SERVER_M2M_PCI_PMON_CTR3_ADDR)
5695 )
5696 );
5697 }
5698 else
5699 {
5700 m2mPMUs.push_back(
5701 UncorePMU(
5702 std::make_shared<PCICFGRegister32>(handle, SKX_M2M_PCI_PMON_BOX_CTL_ADDR),
5703 std::make_shared<PCICFGRegister64>(handle, SKX_M2M_PCI_PMON_CTL0_ADDR),
5704 std::make_shared<PCICFGRegister64>(handle, SKX_M2M_PCI_PMON_CTL1_ADDR),
5705 std::make_shared<PCICFGRegister64>(handle, SKX_M2M_PCI_PMON_CTL2_ADDR),
5706 std::make_shared<PCICFGRegister64>(handle, SKX_M2M_PCI_PMON_CTL3_ADDR),
5707 std::make_shared<PCICFGRegister64>(handle, SKX_M2M_PCI_PMON_CTR0_ADDR),
5708 std::make_shared<PCICFGRegister64>(handle, SKX_M2M_PCI_PMON_CTR1_ADDR),
5709 std::make_shared<PCICFGRegister64>(handle, SKX_M2M_PCI_PMON_CTR2_ADDR),
5710 std::make_shared<PCICFGRegister64>(handle, SKX_M2M_PCI_PMON_CTR3_ADDR)
5711 )
5712 );
5713 }
5714 }
5715 }
5716
5717 int numChannels = 0;
5718
5719 if (cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::ICX)
5720 {
5721 numChannels = 2;
5722 }
5723
5724 if (numChannels > 0)
5725 {
5726 initSocket2Ubox0Bus();
5727 if (socket_ < socket2UBOX0bus.size())
5728 {
5729 auto memBars = getServerMemBars((uint32)m2mPMUs.size(), socket2UBOX0bus[socket_].first, socket2UBOX0bus[socket_].second);
5730 for (auto & memBar : memBars)
5731 {
5732 for (int channel = 0; channel < numChannels; ++channel)
5733 {
5734 auto handle = std::make_shared<MMIORange>(memBar + SERVER_MC_CH_PMON_BASE_ADDR + channel * SERVER_MC_CH_PMON_STEP, SERVER_MC_CH_PMON_SIZE, false);
5735 imcPMUs.push_back(
5736 UncorePMU(
5737 std::make_shared<MMIORegister32>(handle, SERVER_MC_CH_PMON_BOX_CTL_OFFSET),
5738 std::make_shared<MMIORegister32>(handle, SERVER_MC_CH_PMON_CTL0_OFFSET),
5739 std::make_shared<MMIORegister32>(handle, SERVER_MC_CH_PMON_CTL1_OFFSET),
5740 std::make_shared<MMIORegister32>(handle, SERVER_MC_CH_PMON_CTL2_OFFSET),
5741 std::make_shared<MMIORegister32>(handle, SERVER_MC_CH_PMON_CTL3_OFFSET),
5742 std::make_shared<MMIORegister64>(handle, SERVER_MC_CH_PMON_CTR0_OFFSET),
5743 std::make_shared<MMIORegister64>(handle, SERVER_MC_CH_PMON_CTR1_OFFSET),
5744 std::make_shared<MMIORegister64>(handle, SERVER_MC_CH_PMON_CTR2_OFFSET),
5745 std::make_shared<MMIORegister64>(handle, SERVER_MC_CH_PMON_CTR3_OFFSET),
5746 std::make_shared<MMIORegister32>(handle, SERVER_MC_CH_PMON_FIXED_CTL_OFFSET),
5747 std::make_shared<MMIORegister64>(handle, SERVER_MC_CH_PMON_FIXED_CTR_OFFSET)
5748 )
5749 );
5750 }
5751 num_imc_channels.push_back(numChannels);
5752 }
5753 }
5754 else
5755 {
5756 std::cerr << "ERROR: socket " << socket_ << " is not found in socket2UBOX0bus. socket2UBOX0bus.size =" << socket2UBOX0bus.size() << std::endl;
5757 }
5758 }
5759
5760 if (imcPMUs.empty())
5761 {
5762 std::cerr << "PCM error: no memory controllers found.\n";
5763 throw std::exception();
5764 }
5765
5766 if (cpu_model == PCM::KNL)
5767 {
5768 std::vector<std::shared_ptr<PciHandleType> > edcHandles;
5769
5770 for (auto & reg : EDCRegisterLocation)
5771 {
5772 PciHandleType * handle = createIntelPerfMonDevice(groupnr, iMCbus, reg.first, reg.second, true);
5773 if (handle) edcHandles.push_back(std::shared_ptr<PciHandleType>(handle));
5774 }
5775
5776 for (auto & handle : edcHandles)
5777 {
5778 edcPMUs.push_back(
5779 UncorePMU(
5780 std::make_shared<PCICFGRegister32>(handle, KNX_EDC_CH_PCI_PMON_BOX_CTL_ADDR),
5781 std::make_shared<PCICFGRegister32>(handle, KNX_EDC_CH_PCI_PMON_CTL0_ADDR),
5782 std::make_shared<PCICFGRegister32>(handle, KNX_EDC_CH_PCI_PMON_CTL1_ADDR),
5783 std::make_shared<PCICFGRegister32>(handle, KNX_EDC_CH_PCI_PMON_CTL2_ADDR),
5784 std::make_shared<PCICFGRegister32>(handle, KNX_EDC_CH_PCI_PMON_CTL3_ADDR),
5785 std::make_shared<PCICFGRegister64>(handle, KNX_EDC_CH_PCI_PMON_CTR0_ADDR),
5786 std::make_shared<PCICFGRegister64>(handle, KNX_EDC_CH_PCI_PMON_CTR1_ADDR),
5787 std::make_shared<PCICFGRegister64>(handle, KNX_EDC_CH_PCI_PMON_CTR2_ADDR),
5788 std::make_shared<PCICFGRegister64>(handle, KNX_EDC_CH_PCI_PMON_CTR3_ADDR),
5789 std::make_shared<PCICFGRegister32>(handle, KNX_EDC_CH_PCI_PMON_FIXED_CTL_ADDR),
5790 std::make_shared<PCICFGRegister64>(handle, KNX_EDC_CH_PCI_PMON_FIXED_CTR_ADDR))
5791 );
5792 }
5793 }
5794
5795 std::vector<std::shared_ptr<PciHandleType> > m3upiHandles;
5796 if (UPIbus >= 0)
5797 {
5798 for (auto& reg : M3UPIRegisterLocation)
5799 {
5800 PciHandleType* handle = createIntelPerfMonDevice(groupnr, UPIbus, reg.first, reg.second, true);
5801 if (handle) m3upiHandles.push_back(std::shared_ptr<PciHandleType>(handle));
5802 }
5803 }
5804 for (auto& handle : m3upiHandles)
5805 {
5806 if (cpu_model == PCM::ICX)
5807 {
5808 m3upiPMUs.push_back(
5809 UncorePMU(
5810 std::make_shared<PCICFGRegister32>(handle, ICX_M3UPI_PCI_PMON_BOX_CTL_ADDR),
5811 std::make_shared<PCICFGRegister32>(handle, ICX_M3UPI_PCI_PMON_CTL0_ADDR),
5812 std::make_shared<PCICFGRegister32>(handle, ICX_M3UPI_PCI_PMON_CTL1_ADDR),
5813 std::make_shared<PCICFGRegister32>(handle, ICX_M3UPI_PCI_PMON_CTL2_ADDR),
5814 std::make_shared<PCICFGRegister32>(handle, ICX_M3UPI_PCI_PMON_CTL3_ADDR),
5815 std::make_shared<PCICFGRegister64>(handle, ICX_M3UPI_PCI_PMON_CTR0_ADDR),
5816 std::make_shared<PCICFGRegister64>(handle, ICX_M3UPI_PCI_PMON_CTR1_ADDR),
5817 std::make_shared<PCICFGRegister64>(handle, ICX_M3UPI_PCI_PMON_CTR2_ADDR),
5818 std::make_shared<PCICFGRegister64>(handle, ICX_M3UPI_PCI_PMON_CTR3_ADDR)
5819 )
5820 );
5821 }
5822 else
5823 {
5824 m3upiPMUs.push_back(
5825 UncorePMU(
5826 std::make_shared<PCICFGRegister32>(handle, M3UPI_PCI_PMON_BOX_CTL_ADDR),
5827 std::make_shared<PCICFGRegister32>(handle, M3UPI_PCI_PMON_CTL0_ADDR),
5828 std::make_shared<PCICFGRegister32>(handle, M3UPI_PCI_PMON_CTL1_ADDR),
5829 std::make_shared<PCICFGRegister32>(handle, M3UPI_PCI_PMON_CTL2_ADDR),
5830 std::shared_ptr<PCICFGRegister32>(),
5831 std::make_shared<PCICFGRegister64>(handle, M3UPI_PCI_PMON_CTR0_ADDR),
5832 std::make_shared<PCICFGRegister64>(handle, M3UPI_PCI_PMON_CTR1_ADDR),
5833 std::make_shared<PCICFGRegister64>(handle, M3UPI_PCI_PMON_CTR2_ADDR),
5834 std::shared_ptr<PCICFGRegister64>()
5835 )
5836 );
5837 }
5838 }
5839
5840 {
5841 std::vector<std::shared_ptr<PciHandleType> > haHandles;
5842 for (auto & reg : HARegisterLocation)
5843 {
5844 auto handle = createIntelPerfMonDevice(groupnr, iMCbus, reg.first, reg.second, true);
5845 if (handle) haHandles.push_back(std::shared_ptr<PciHandleType>(handle));
5846 }
5847
5848 for (auto & handle : haHandles)
5849 {
5850 haPMUs.push_back(
5851 UncorePMU(
5852 std::make_shared<PCICFGRegister32>(handle, XPF_HA_PCI_PMON_BOX_CTL_ADDR),
5853 std::make_shared<PCICFGRegister32>(handle, XPF_HA_PCI_PMON_CTL0_ADDR),
5854 std::make_shared<PCICFGRegister32>(handle, XPF_HA_PCI_PMON_CTL1_ADDR),
5855 std::make_shared<PCICFGRegister32>(handle, XPF_HA_PCI_PMON_CTL2_ADDR),
5856 std::make_shared<PCICFGRegister32>(handle, XPF_HA_PCI_PMON_CTL3_ADDR),
5857 std::make_shared<PCICFGRegister64>(handle, XPF_HA_PCI_PMON_CTR0_ADDR),
5858 std::make_shared<PCICFGRegister64>(handle, XPF_HA_PCI_PMON_CTR1_ADDR),
5859 std::make_shared<PCICFGRegister64>(handle, XPF_HA_PCI_PMON_CTR2_ADDR),
5860 std::make_shared<PCICFGRegister64>(handle, XPF_HA_PCI_PMON_CTR3_ADDR)
5861 )
5862 );
5863 }
5864 }
5865
5866 if (pcm->getNumSockets() == 1) {
5867 /*
5868 * For single socket systems, do not worry at all about QPI ports. This
5869 * eliminates QPI LL programming error messages on single socket systems
5870 * with BIOS that hides QPI performance counting PCI functions. It also
5871 * eliminates register programming that is not needed since no QPI traffic
5872 * is possible with single socket systems.
5873 */
5874 xpiPMUs.clear();
5875 return;
5876 }
5877
5878 #ifdef PCM_NOQPI
5879 xpiPMUs.clear();
5880 std::cerr << getNumMC() << " memory controllers detected with total number of " << imcPMUs.size() << " channels. " <<
5881 m2mPMUs.size() << " M2M (mesh to memory) blocks detected. "
5882 << haPMUs.size() << " Home Agents detected. "
5883 << m3upiPMUs.size() << " M3UPI blocks detected. "
5884 "\n";
5885 return;
5886 #endif
5887
5888 std::vector<std::shared_ptr<PciHandleType> > qpiLLHandles;
5889 auto xPI = pcm->xPI();
5890 try
5891 {
5892 for (size_t i = 0; i < XPIRegisterLocation.size(); ++i)
5893 {
5894 PciHandleType * handle = createIntelPerfMonDevice(groupnr, UPIbus, XPIRegisterLocation[i].first, XPIRegisterLocation[i].second, true);
5895 if (handle)
5896 qpiLLHandles.push_back(std::shared_ptr<PciHandleType>(handle));
5897 else
5898 {
5899 if (i == 0 || i == 1)
5900 {
5901 std::cerr << "ERROR: " << xPI << " LL monitoring device (" << std::hex << groupnr << ":" << UPIbus << ":" << XPIRegisterLocation[i].first << ":" <<
5902 XPIRegisterLocation[i].second << ") is missing. The " << xPI << " statistics will be incomplete or missing." << std::dec << "\n";
5903 }
5904 else if (pcm->getCPUBrandString().find("E7") != std::string::npos) // Xeon E7
5905 {
5906 std::cerr << "ERROR: " << xPI << " LL performance monitoring device for the third " << xPI << " link was not found on " << pcm->getCPUBrandString() <<
5907 " processor in socket " << socket_ << ". Possibly BIOS hides the device. The " << xPI << " statistics will be incomplete or missing.\n";
5908 }
5909 }
5910 }
5911 }
5912 catch (...)
5913 {
5914 std::cerr << "PCM Error: can not create " << xPI << " LL handles.\n";
5915 throw std::exception();
5916 }
5917
5918 for (auto & handle : qpiLLHandles)
5919 {
5920 if (cpu_model == PCM::SKX)
5921 {
5922 xpiPMUs.push_back(
5923 UncorePMU(
5924 std::make_shared<PCICFGRegister32>(handle, U_L_PCI_PMON_BOX_CTL_ADDR),
5925 std::make_shared<PCICFGRegister32>(handle, U_L_PCI_PMON_CTL0_ADDR),
5926 std::make_shared<PCICFGRegister32>(handle, U_L_PCI_PMON_CTL1_ADDR),
5927 std::make_shared<PCICFGRegister32>(handle, U_L_PCI_PMON_CTL2_ADDR),
5928 std::make_shared<PCICFGRegister32>(handle, U_L_PCI_PMON_CTL3_ADDR),
5929 std::make_shared<PCICFGRegister64>(handle, U_L_PCI_PMON_CTR0_ADDR),
5930 std::make_shared<PCICFGRegister64>(handle, U_L_PCI_PMON_CTR1_ADDR),
5931 std::make_shared<PCICFGRegister64>(handle, U_L_PCI_PMON_CTR2_ADDR),
5932 std::make_shared<PCICFGRegister64>(handle, U_L_PCI_PMON_CTR3_ADDR)
5933 )
5934 );
5935 }
5936 else if (cpu_model == PCM::ICX)
5937 {
5938 xpiPMUs.push_back(
5939 UncorePMU(
5940 std::make_shared<PCICFGRegister32>(handle, ICX_UPI_PCI_PMON_BOX_CTL_ADDR),
5941 std::make_shared<PCICFGRegister32>(handle, ICX_UPI_PCI_PMON_CTL0_ADDR),
5942 std::make_shared<PCICFGRegister32>(handle, ICX_UPI_PCI_PMON_CTL1_ADDR),
5943 std::make_shared<PCICFGRegister32>(handle, ICX_UPI_PCI_PMON_CTL2_ADDR),
5944 std::make_shared<PCICFGRegister32>(handle, ICX_UPI_PCI_PMON_CTL3_ADDR),
5945 std::make_shared<PCICFGRegister64>(handle, ICX_UPI_PCI_PMON_CTR0_ADDR),
5946 std::make_shared<PCICFGRegister64>(handle, ICX_UPI_PCI_PMON_CTR1_ADDR),
5947 std::make_shared<PCICFGRegister64>(handle, ICX_UPI_PCI_PMON_CTR2_ADDR),
5948 std::make_shared<PCICFGRegister64>(handle, ICX_UPI_PCI_PMON_CTR3_ADDR)
5949 )
5950 );
5951 }
5952 else
5953 {
5954 xpiPMUs.push_back(
5955 UncorePMU(
5956 std::make_shared<PCICFGRegister32>(handle, Q_P_PCI_PMON_BOX_CTL_ADDR),
5957 std::make_shared<PCICFGRegister32>(handle, Q_P_PCI_PMON_CTL0_ADDR),
5958 std::make_shared<PCICFGRegister32>(handle, Q_P_PCI_PMON_CTL1_ADDR),
5959 std::make_shared<PCICFGRegister32>(handle, Q_P_PCI_PMON_CTL2_ADDR),
5960 std::make_shared<PCICFGRegister32>(handle, Q_P_PCI_PMON_CTL3_ADDR),
5961 std::make_shared<PCICFGRegister64>(handle, Q_P_PCI_PMON_CTR0_ADDR),
5962 std::make_shared<PCICFGRegister64>(handle, Q_P_PCI_PMON_CTR1_ADDR),
5963 std::make_shared<PCICFGRegister64>(handle, Q_P_PCI_PMON_CTR2_ADDR),
5964 std::make_shared<PCICFGRegister64>(handle, Q_P_PCI_PMON_CTR3_ADDR)
5965 )
5966 );
5967 }
5968 }
5969 }
5970
5971
5972 #ifdef PCM_USE_PERF
5973 class PerfVirtualDummyUnitControlRegister : public HWRegister
5974 {
5975 uint64 lastValue;
5976 public:
PerfVirtualDummyUnitControlRegister()5977 PerfVirtualDummyUnitControlRegister() : lastValue(0) {}
operator =(uint64 val)5978 void operator = (uint64 val) override
5979 {
5980 lastValue = val;
5981 }
operator uint64()5982 operator uint64 () override
5983 {
5984 return lastValue;
5985 }
5986 };
5987
5988 class PerfVirtualFilterRegister;
5989
5990 class PerfVirtualControlRegister : public HWRegister
5991 {
5992 friend class PerfVirtualCounterRegister;
5993 friend class PerfVirtualFilterRegister;
5994 int fd;
5995 int socket;
5996 int pmuID;
5997 perf_event_attr event;
5998 bool fixed;
close()5999 void close()
6000 {
6001 if (fd >= 0)
6002 {
6003 ::close(fd);
6004 fd = -1;
6005 }
6006 }
6007 public:
PerfVirtualControlRegister(int socket_,int pmuID_,bool fixed_=false)6008 PerfVirtualControlRegister(int socket_, int pmuID_, bool fixed_ = false) :
6009 fd(-1),
6010 socket(socket_),
6011 pmuID(pmuID_),
6012 fixed(fixed_)
6013 {
6014 event = PCM_init_perf_event_attr(false);
6015 event.type = pmuID;
6016 }
operator =(uint64 val)6017 void operator = (uint64 val) override
6018 {
6019 close();
6020 event.config = fixed ? 0xff : val;
6021 const auto core = PCM::getInstance()->socketRefCore[socket];
6022 if ((fd = syscall(SYS_perf_event_open, &event, -1, core, -1, 0)) <= 0)
6023 {
6024 std::cerr << "Linux Perf: Error on programming PMU " << pmuID << ": " << strerror(errno) << "\n";
6025 std::cerr << "config: 0x" << std::hex << event.config << " config1: 0x" << event.config1 << " config2: 0x" << event.config2 << std::dec << "\n";
6026 if (errno == 24) std::cerr << "try executing 'ulimit -n 10000' to increase the limit on the number of open files.\n";
6027 return;
6028 }
6029 }
operator uint64()6030 operator uint64 () override
6031 {
6032 return event.config;
6033 }
~PerfVirtualControlRegister()6034 ~PerfVirtualControlRegister()
6035 {
6036 close();
6037 }
getFD() const6038 int getFD() const { return fd; }
getPMUID() const6039 int getPMUID() const { return pmuID; }
6040 };
6041
6042 class PerfVirtualCounterRegister : public HWRegister
6043 {
6044 std::shared_ptr<PerfVirtualControlRegister> controlReg;
6045 public:
PerfVirtualCounterRegister(const std::shared_ptr<PerfVirtualControlRegister> & controlReg_)6046 PerfVirtualCounterRegister(const std::shared_ptr<PerfVirtualControlRegister> & controlReg_) : controlReg(controlReg_)
6047 {
6048 }
operator =(uint64)6049 void operator = (uint64 /* val */) override
6050 {
6051 // no-op
6052 }
operator uint64()6053 operator uint64 () override
6054 {
6055 uint64 result = 0;
6056 if (controlReg.get() && (controlReg->getFD() >= 0))
6057 {
6058 int status = ::read(controlReg->getFD(), &result, sizeof(result));
6059 if (status != sizeof(result))
6060 {
6061 std::cerr << "PCM Error: failed to read from Linux perf handle " << controlReg->getFD() << " PMU " << controlReg->getPMUID() << "\n";
6062 }
6063 }
6064 return result;
6065 }
6066 };
6067
6068 class PerfVirtualFilterRegister : public HWRegister
6069 {
6070 uint64 lastValue;
6071 std::array<std::shared_ptr<PerfVirtualControlRegister>, 4> controlRegs;
6072 int filterNr;
6073 public:
PerfVirtualFilterRegister(std::array<std::shared_ptr<PerfVirtualControlRegister>,4> & controlRegs_,int filterNr_)6074 PerfVirtualFilterRegister(std::array<std::shared_ptr<PerfVirtualControlRegister>, 4> & controlRegs_, int filterNr_) :
6075 lastValue(0),
6076 controlRegs(controlRegs_),
6077 filterNr(filterNr_)
6078 {
6079 }
operator =(uint64 val)6080 void operator = (uint64 val) override
6081 {
6082 lastValue = val;
6083 for (auto & ctl: controlRegs)
6084 {
6085 union {
6086 uint64 config1;
6087 uint32 config1HL[2];
6088 } cvt;
6089 cvt.config1 = ctl->event.config1;
6090 cvt.config1HL[filterNr] = val;
6091 ctl->event.config1 = cvt.config1;
6092 }
6093 }
operator uint64()6094 operator uint64 () override
6095 {
6096 return lastValue;
6097 }
6098 };
6099
enumeratePerfPMUs(const std::string & type,int max_id)6100 std::vector<int> enumeratePerfPMUs(const std::string & type, int max_id)
6101 {
6102 auto getPerfPMUID = [](const std::string & type, int num)
6103 {
6104 int id = -1;
6105 std::ostringstream pmuIDPath(std::ostringstream::out);
6106 pmuIDPath << std::string("/sys/bus/event_source/devices/uncore_") << type;
6107 if (num != -1)
6108 {
6109 pmuIDPath << "_" << num;
6110 }
6111 pmuIDPath << "/type";
6112 const std::string pmuIDStr = readSysFS(pmuIDPath.str().c_str(), true);
6113 if (pmuIDStr.size())
6114 {
6115 id = std::atoi(pmuIDStr.c_str());
6116 }
6117 return id;
6118 };
6119 std::vector<int> ids;
6120 for (int i = -1; i < max_id; ++i)
6121 {
6122 int pmuID = getPerfPMUID(type, i);
6123 if (pmuID > 0)
6124 {
6125 // std::cout << "DEBUG: " << type << " pmu id " << pmuID << " found\n";
6126 ids.push_back(pmuID);
6127 }
6128 }
6129 return ids;
6130 }
6131
populatePerfPMUs(unsigned socket_,const std::vector<int> & ids,std::vector<UncorePMU> & pmus,bool fixed,bool filter0,bool filter1)6132 void populatePerfPMUs(unsigned socket_, const std::vector<int> & ids, std::vector<UncorePMU> & pmus, bool fixed, bool filter0, bool filter1)
6133 {
6134 for (const auto & id : ids)
6135 {
6136 std::array<std::shared_ptr<PerfVirtualControlRegister>, 4> controlRegs = {
6137 std::make_shared<PerfVirtualControlRegister>(socket_, id),
6138 std::make_shared<PerfVirtualControlRegister>(socket_, id),
6139 std::make_shared<PerfVirtualControlRegister>(socket_, id),
6140 std::make_shared<PerfVirtualControlRegister>(socket_, id)
6141 };
6142 std::shared_ptr<PerfVirtualCounterRegister> counterReg0 = std::make_shared<PerfVirtualCounterRegister>(controlRegs[0]);
6143 std::shared_ptr<PerfVirtualCounterRegister> counterReg1 = std::make_shared<PerfVirtualCounterRegister>(controlRegs[1]);
6144 std::shared_ptr<PerfVirtualCounterRegister> counterReg2 = std::make_shared<PerfVirtualCounterRegister>(controlRegs[2]);
6145 std::shared_ptr<PerfVirtualCounterRegister> counterReg3 = std::make_shared<PerfVirtualCounterRegister>(controlRegs[3]);
6146 std::shared_ptr<PerfVirtualControlRegister> fixedControlReg = std::make_shared<PerfVirtualControlRegister>(socket_, id, true);
6147 std::shared_ptr<PerfVirtualCounterRegister> fixedCounterReg = std::make_shared<PerfVirtualCounterRegister>(fixedControlReg);
6148 std::shared_ptr<PerfVirtualFilterRegister> filterReg0 = std::make_shared<PerfVirtualFilterRegister>(controlRegs, 0);
6149 std::shared_ptr<PerfVirtualFilterRegister> filterReg1 = std::make_shared<PerfVirtualFilterRegister>(controlRegs, 1);
6150 pmus.push_back(
6151 UncorePMU(
6152 std::make_shared<PerfVirtualDummyUnitControlRegister>(),
6153 controlRegs[0],
6154 controlRegs[1],
6155 controlRegs[2],
6156 controlRegs[3],
6157 counterReg0,
6158 counterReg1,
6159 counterReg2,
6160 counterReg3,
6161 fixed ? fixedControlReg : std::shared_ptr<HWRegister>(),
6162 fixed ? fixedCounterReg : std::shared_ptr<HWRegister>(),
6163 filter0 ? filterReg0 : std::shared_ptr<HWRegister>(),
6164 filter1 ? filterReg1 : std::shared_ptr<HWRegister>()
6165 )
6166 );
6167 }
6168 }
6169 #endif
6170
initPerf(uint32 socket_,const PCM *)6171 void ServerPCICFGUncore::initPerf(uint32 socket_, const PCM * /*pcm*/)
6172 {
6173 #ifdef PCM_USE_PERF
6174 auto imcIDs = enumeratePerfPMUs("imc", 100);
6175 auto m2mIDs = enumeratePerfPMUs("m2m", 100);
6176 auto haIDs = enumeratePerfPMUs("ha", 100);
6177 auto numMemControllers = std::max(m2mIDs.size(), haIDs.size());
6178 for (size_t i = 0; i < numMemControllers; ++i)
6179 {
6180 const int channelsPerController = imcIDs.size() / numMemControllers;
6181 num_imc_channels.push_back(channelsPerController);
6182 }
6183 populatePerfPMUs(socket_, imcIDs, imcPMUs, true);
6184 populatePerfPMUs(socket_, m2mIDs, m2mPMUs, false);
6185 populatePerfPMUs(socket_, enumeratePerfPMUs("qpi", 100), xpiPMUs, false);
6186 populatePerfPMUs(socket_, enumeratePerfPMUs("upi", 100), xpiPMUs, false);
6187 populatePerfPMUs(socket_, enumeratePerfPMUs("m3upi", 100), m3upiPMUs, false);
6188 populatePerfPMUs(socket_, haIDs, haPMUs, false);
6189 #endif
6190 }
6191
getNumMCChannels(const uint32 controller) const6192 size_t ServerPCICFGUncore::getNumMCChannels(const uint32 controller) const
6193 {
6194 if (controller < num_imc_channels.size())
6195 {
6196 return num_imc_channels[controller];
6197 }
6198 return 0;
6199 }
6200
~ServerPCICFGUncore()6201 ServerPCICFGUncore::~ServerPCICFGUncore()
6202 {
6203 }
6204
6205
programServerUncoreMemoryMetrics(const ServerUncoreMemoryMetrics & metrics,const int rankA,const int rankB)6206 void ServerPCICFGUncore::programServerUncoreMemoryMetrics(const ServerUncoreMemoryMetrics & metrics, const int rankA, const int rankB)
6207 {
6208 PCM * pcm = PCM::getInstance();
6209 uint32 MCCntConfig[4] = {0,0,0,0};
6210 uint32 EDCCntConfig[4] = {0,0,0,0};
6211 if(rankA < 0 && rankB < 0)
6212 {
6213 auto setEvents2_3 = [&](const uint32 partial_write_event) {
6214 auto noPmem = [&pcm]() -> bool
6215 {
6216 if (pcm->PMMTrafficMetricsAvailable() == false)
6217 {
6218 std::cerr << "PCM Error: PMM/Pmem metrics are not available on your platform\n";
6219 return true;
6220 }
6221 return false;
6222 };
6223 switch (metrics)
6224 {
6225 case PmemMemoryMode:
6226 case PmemMixedMode:
6227 if (noPmem()) return false;
6228 MCCntConfig[EventPosition::PMM_MM_MISS_CLEAN] = MC_CH_PCI_PMON_CTL_EVENT(0xd3) + MC_CH_PCI_PMON_CTL_UMASK(2); // monitor TAGCHK.MISS_CLEAN on counter 2
6229 MCCntConfig[EventPosition::PMM_MM_MISS_DIRTY] = MC_CH_PCI_PMON_CTL_EVENT(0xd3) + MC_CH_PCI_PMON_CTL_UMASK(4); // monitor TAGCHK.MISS_DIRTY on counter 3
6230 break;
6231 case Pmem:
6232 if (noPmem()) return false;
6233 MCCntConfig[EventPosition::PMM_READ] = MC_CH_PCI_PMON_CTL_EVENT(0xe3); // monitor PMM_RDQ_REQUESTS on counter 2
6234 MCCntConfig[EventPosition::PMM_WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0xe7); // monitor PMM_WPQ_REQUESTS on counter 3
6235 break;
6236 case PartialWrites:
6237 MCCntConfig[EventPosition::PARTIAL] = partial_write_event;
6238 break;
6239 default:
6240 std::cerr << "PCM Error: unknown metrics: " << metrics << "\n";
6241 return false;
6242 }
6243 return true;
6244 };
6245 switch(cpu_model)
6246 {
6247 case PCM::KNL:
6248 MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x03) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 0: CAS.RD
6249 MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x03) + MC_CH_PCI_PMON_CTL_UMASK(2); // monitor reads on counter 1: CAS.WR
6250 EDCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x01) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 0: RPQ
6251 EDCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x02) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 1: WPQ
6252 break;
6253 case PCM::SNOWRIDGE:
6254 case PCM::ICX:
6255 if (metrics == PmemMemoryMode)
6256 {
6257 MCCntConfig[EventPosition::NM_HIT] = MC_CH_PCI_PMON_CTL_EVENT(0xd3) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 0: UNC_M_TAGCHK.HIT
6258 }
6259 else
6260 {
6261 MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(0x0f); // monitor reads on counter 0: CAS_COUNT.RD
6262 MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(0x30); // monitor writes on counter 1: CAS_COUNT.WR
6263 }
6264 if (setEvents2_3(MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(0x0c)) == false) // monitor partial writes on counter 2: CAS_COUNT.RD_UNDERFILL
6265 {
6266 return;
6267 }
6268 break;
6269 default:
6270 MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(3); // monitor reads on counter 0: CAS_COUNT.RD
6271 MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(12); // monitor writes on counter 1: CAS_COUNT.WR
6272 if (setEvents2_3(MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(2)) == false) // monitor partial writes on counter 2: CAS_COUNT.RD_UNDERFILL
6273 {
6274 return;
6275 }
6276 }
6277 } else {
6278 switch(cpu_model)
6279 {
6280 case PCM::IVYTOWN:
6281 MCCntConfig[EventPosition::READ_RANK_A] = MC_CH_PCI_PMON_CTL_EVENT((0xb0 + rankA)) + MC_CH_PCI_PMON_CTL_UMASK(0xff); // RD_CAS_RANK(rankA) all banks
6282 MCCntConfig[EventPosition::WRITE_RANK_A] = MC_CH_PCI_PMON_CTL_EVENT((0xb8 + rankA)) + MC_CH_PCI_PMON_CTL_UMASK(0xff); // WR_CAS_RANK(rankA) all banks
6283 MCCntConfig[EventPosition::READ_RANK_B] = MC_CH_PCI_PMON_CTL_EVENT((0xb0 + rankB)) + MC_CH_PCI_PMON_CTL_UMASK(0xff); // RD_CAS_RANK(rankB) all banks
6284 MCCntConfig[EventPosition::WRITE_RANK_B] = MC_CH_PCI_PMON_CTL_EVENT((0xb8 + rankB)) + MC_CH_PCI_PMON_CTL_UMASK(0xff); // WR_CAS_RANK(rankB) all banks
6285 break;
6286 case PCM::HASWELLX:
6287 case PCM::BDX_DE:
6288 case PCM::BDX:
6289 case PCM::SKX:
6290 MCCntConfig[EventPosition::READ_RANK_A] = MC_CH_PCI_PMON_CTL_EVENT((0xb0 + rankA)) + MC_CH_PCI_PMON_CTL_UMASK(16); // RD_CAS_RANK(rankA) all banks
6291 MCCntConfig[EventPosition::WRITE_RANK_A] = MC_CH_PCI_PMON_CTL_EVENT((0xb8 + rankA)) + MC_CH_PCI_PMON_CTL_UMASK(16); // WR_CAS_RANK(rankA) all banks
6292 MCCntConfig[EventPosition::READ_RANK_B] = MC_CH_PCI_PMON_CTL_EVENT((0xb0 + rankB)) + MC_CH_PCI_PMON_CTL_UMASK(16); // RD_CAS_RANK(rankB) all banks
6293 MCCntConfig[EventPosition::WRITE_RANK_B] = MC_CH_PCI_PMON_CTL_EVENT((0xb8 + rankB)) + MC_CH_PCI_PMON_CTL_UMASK(16); // WR_CAS_RANK(rankB) all banks
6294 break;
6295 case PCM::ICX:
6296 case PCM::SNOWRIDGE:
6297 MCCntConfig[EventPosition::READ_RANK_A] = MC_CH_PCI_PMON_CTL_EVENT((0xb0 + rankA)) + MC_CH_PCI_PMON_CTL_UMASK(0x28); // RD_CAS_RANK(rankA) all banks
6298 MCCntConfig[EventPosition::WRITE_RANK_A] = MC_CH_PCI_PMON_CTL_EVENT((0xb8 + rankA)) + MC_CH_PCI_PMON_CTL_UMASK(0x28); // WR_CAS_RANK(rankA) all banks
6299 MCCntConfig[EventPosition::READ_RANK_B] = MC_CH_PCI_PMON_CTL_EVENT((0xb0 + rankB)) + MC_CH_PCI_PMON_CTL_UMASK(0x28); // RD_CAS_RANK(rankB) all banks
6300 MCCntConfig[EventPosition::WRITE_RANK_B] = MC_CH_PCI_PMON_CTL_EVENT((0xb8 + rankB)) + MC_CH_PCI_PMON_CTL_UMASK(0x28); // WR_CAS_RANK(rankB) all banks
6301 break;
6302 case PCM::KNL:
6303 MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x03) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 0: CAS.RD
6304 MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x03) + MC_CH_PCI_PMON_CTL_UMASK(2); // monitor reads on counter 1: CAS.WR
6305 EDCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x01) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 0: RPQ
6306 EDCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x02) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 1: WPQ
6307 break;
6308 default:
6309 std::cerr << "PCM Error: your processor " << pcm->getCPUBrandString() << " model " << cpu_model << " does not support the required performance events \n";
6310 return;
6311 }
6312 }
6313 programIMC(MCCntConfig);
6314 if(cpu_model == PCM::KNL) programEDC(EDCCntConfig);
6315
6316 programM2M();
6317
6318 xpiPMUs.clear(); // no QPI events used
6319 return;
6320 }
6321
program()6322 void ServerPCICFGUncore::program()
6323 {
6324 PCM * pcm = PCM::getInstance();
6325 uint32 MCCntConfig[4] = {0, 0, 0, 0};
6326 uint32 EDCCntConfig[4] = {0, 0, 0, 0};
6327 switch(cpu_model)
6328 {
6329 case PCM::KNL:
6330 MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x03) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 0: CAS_COUNT.RD
6331 MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x03) + MC_CH_PCI_PMON_CTL_UMASK(2); // monitor writes on counter 1: CAS_COUNT.WR
6332 EDCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x01) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 0: RPQ
6333 EDCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x02) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 1: WPQ
6334 break;
6335 case PCM::SNOWRIDGE:
6336 case PCM::ICX:
6337 MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(0x0f); // monitor reads on counter 0: CAS_COUNT.RD
6338 MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(0x30); // monitor writes on counter 1: CAS_COUNT.WR
6339 break;
6340 default:
6341 MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(3); // monitor reads on counter 0: CAS_COUNT.RD
6342 MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(12); // monitor writes on counter 1: CAS_COUNT.WR
6343 }
6344
6345 if (pcm->PMMTrafficMetricsAvailable())
6346 {
6347 MCCntConfig[EventPosition::PMM_READ] = MC_CH_PCI_PMON_CTL_EVENT(0xe3); // monitor PMM_RDQ_REQUESTS on counter 2
6348 MCCntConfig[EventPosition::PMM_WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0xe7); // monitor PMM_WPQ_REQUESTS on counter 3
6349 }
6350
6351 programIMC(MCCntConfig);
6352 if(cpu_model == PCM::KNL) programEDC(EDCCntConfig);
6353
6354 programM2M();
6355
6356 uint32 event[4];
6357 if (PCM::hasUPI(cpu_model))
6358 {
6359 // monitor TxL0_POWER_CYCLES
6360 event[0] = Q_P_PCI_PMON_CTL_EVENT(0x26);
6361 // monitor RxL_FLITS.ALL_DATA on counter 1
6362 event[1] = Q_P_PCI_PMON_CTL_EVENT(0x03) + Q_P_PCI_PMON_CTL_UMASK(0xF);
6363 // monitor TxL_FLITS.NON_DATA+ALL_DATA on counter 2
6364 event[2] = Q_P_PCI_PMON_CTL_EVENT(0x02) + Q_P_PCI_PMON_CTL_UMASK((0x97|0x0F));
6365 // monitor UPI CLOCKTICKS
6366 event[ServerUncoreCounterState::EventPosition::xPI_CLOCKTICKS] = Q_P_PCI_PMON_CTL_EVENT(0x01);
6367 }
6368 else
6369 {
6370 // monitor DRS data received on counter 0: RxL_FLITS_G1.DRS_DATA
6371 event[0] = Q_P_PCI_PMON_CTL_EVENT(0x02) + Q_P_PCI_PMON_CTL_EVENT_EXT + Q_P_PCI_PMON_CTL_UMASK(8);
6372 // monitor NCB data received on counter 1: RxL_FLITS_G2.NCB_DATA
6373 event[1] = Q_P_PCI_PMON_CTL_EVENT(0x03) + Q_P_PCI_PMON_CTL_EVENT_EXT + Q_P_PCI_PMON_CTL_UMASK(4);
6374 // monitor outgoing data+nondata flits on counter 2: TxL_FLITS_G0.DATA + TxL_FLITS_G0.NON_DATA
6375 event[2] = Q_P_PCI_PMON_CTL_EVENT(0x00) + Q_P_PCI_PMON_CTL_UMASK(6);
6376 // monitor QPI clocks
6377 event[ServerUncoreCounterState::EventPosition::xPI_CLOCKTICKS] = Q_P_PCI_PMON_CTL_EVENT(0x14); // QPI clocks (CLOCKTICKS)
6378 }
6379 programXPI(event);
6380 programHA();
6381 }
6382
programXPI(const uint32 * event)6383 void ServerPCICFGUncore::programXPI(const uint32 * event)
6384 {
6385 const uint32 extra = PCM::hasUPI(cpu_model) ? UNC_PMON_UNIT_CTL_RSV : UNC_PMON_UNIT_CTL_FRZ_EN;
6386 for (uint32 i = 0; i < (uint32)xpiPMUs.size(); ++i)
6387 {
6388 // QPI LL PMU
6389
6390 if (xpiPMUs[i].initFreeze(extra,
6391 " Please see BIOS options to enable the export of QPI/UPI performance monitoring devices (devices 8 and 9: function 2).\n")
6392 == false)
6393 {
6394 std::cout << "Link " << (i + 1) << " is disabled\n";
6395 continue;
6396 }
6397
6398 PCM::program(xpiPMUs[i], event, event + 4, extra);
6399 }
6400 cleanupQPIHandles();
6401 }
6402
cleanupQPIHandles()6403 void ServerPCICFGUncore::cleanupQPIHandles()
6404 {
6405 for(auto i = xpiPMUs.begin(); i != xpiPMUs.end(); ++i)
6406 {
6407 if (!i->valid())
6408 {
6409 xpiPMUs.erase(i);
6410 cleanupQPIHandles();
6411 return;
6412 }
6413 }
6414 }
6415
cleanupPMUs()6416 void ServerPCICFGUncore::cleanupPMUs()
6417 {
6418 for (auto & pmu : xpiPMUs)
6419 {
6420 pmu.cleanup();
6421 }
6422 for (auto & pmu : imcPMUs)
6423 {
6424 pmu.cleanup();
6425 }
6426 for (auto & pmu : edcPMUs)
6427 {
6428 pmu.cleanup();
6429 }
6430 for (auto & pmu : m2mPMUs)
6431 {
6432 pmu.cleanup();
6433 }
6434 for (auto & pmu : haPMUs)
6435 {
6436 pmu.cleanup();
6437 }
6438 }
6439
getImcReads()6440 uint64 ServerPCICFGUncore::getImcReads()
6441 {
6442 return getImcReadsForChannels((uint32)0, (uint32)imcPMUs.size());
6443 }
6444
getImcReadsForController(uint32 controller)6445 uint64 ServerPCICFGUncore::getImcReadsForController(uint32 controller)
6446 {
6447 assert(controller < num_imc_channels.size());
6448 uint32 beginChannel = 0;
6449 for (uint32 i = 0; i < controller; ++i)
6450 {
6451 beginChannel += num_imc_channels[i];
6452 }
6453 const uint32 endChannel = beginChannel + num_imc_channels[controller];
6454 return getImcReadsForChannels(beginChannel, endChannel);
6455 }
6456
getImcReadsForChannels(uint32 beginChannel,uint32 endChannel)6457 uint64 ServerPCICFGUncore::getImcReadsForChannels(uint32 beginChannel, uint32 endChannel)
6458 {
6459 uint64 result = 0;
6460 for (uint32 i = beginChannel; i < endChannel && i < imcPMUs.size(); ++i)
6461 {
6462 result += getMCCounter(i, EventPosition::READ);
6463 }
6464 return result;
6465 }
6466
getImcWrites()6467 uint64 ServerPCICFGUncore::getImcWrites()
6468 {
6469 uint64 result = 0;
6470 for (uint32 i = 0; i < (uint32)imcPMUs.size(); ++i)
6471 {
6472 result += getMCCounter(i, EventPosition::WRITE);
6473 }
6474
6475 return result;
6476 }
6477
getPMMReads()6478 uint64 ServerPCICFGUncore::getPMMReads()
6479 {
6480 uint64 result = 0;
6481 for (uint32 i = 0; i < (uint32)m2mPMUs.size(); ++i)
6482 {
6483 result += getM2MCounter(i, EventPosition::PMM_READ);
6484 }
6485 return result;
6486 }
6487
getPMMWrites()6488 uint64 ServerPCICFGUncore::getPMMWrites()
6489 {
6490 uint64 result = 0;
6491 for (uint32 i = 0; i < (uint32)m2mPMUs.size(); ++i)
6492 {
6493 result += getM2MCounter(i, EventPosition::PMM_WRITE);
6494 }
6495 return result;
6496 }
6497
getEdcReads()6498 uint64 ServerPCICFGUncore::getEdcReads()
6499 {
6500 uint64 result = 0;
6501
6502 for (auto & pmu: edcPMUs)
6503 {
6504 result += *pmu.counterValue[EventPosition::READ];
6505 }
6506
6507 return result;
6508 }
6509
getEdcWrites()6510 uint64 ServerPCICFGUncore::getEdcWrites()
6511 {
6512 uint64 result = 0;
6513
6514 for (auto & pmu : edcPMUs)
6515 {
6516 result += *pmu.counterValue[EventPosition::WRITE];
6517 }
6518
6519 return result;
6520 }
6521
getIncomingDataFlits(uint32 port)6522 uint64 ServerPCICFGUncore::getIncomingDataFlits(uint32 port)
6523 {
6524 uint64 drs = 0, ncb = 0;
6525
6526 if (port >= (uint32)xpiPMUs.size())
6527 return 0;
6528
6529 if (PCM::hasUPI(cpu_model) == false)
6530 {
6531 drs = *xpiPMUs[port].counterValue[0];
6532 }
6533 ncb = *xpiPMUs[port].counterValue[1];
6534
6535 return drs + ncb;
6536 }
6537
getOutgoingFlits(uint32 port)6538 uint64 ServerPCICFGUncore::getOutgoingFlits(uint32 port)
6539 {
6540 return getQPILLCounter(port,2);
6541 }
6542
getUPIL0TxCycles(uint32 port)6543 uint64 ServerPCICFGUncore::getUPIL0TxCycles(uint32 port)
6544 {
6545 if (PCM::hasUPI(cpu_model))
6546 return getQPILLCounter(port,0);
6547 return 0;
6548 }
6549
program_power_metrics(int mc_profile)6550 void ServerPCICFGUncore::program_power_metrics(int mc_profile)
6551 {
6552 uint32 xPIEvents[4] = { 0,0,0,0 };
6553 xPIEvents[ServerUncoreCounterState::EventPosition::xPI_TxL0P_POWER_CYCLES] = (uint32)Q_P_PCI_PMON_CTL_EVENT((PCM::hasUPI(cpu_model) ? 0x27 : 0x0D)); // L0p Tx Cycles (TxL0P_POWER_CYCLES)
6554 xPIEvents[ServerUncoreCounterState::EventPosition::xPI_L1_POWER_CYCLES] = (uint32)Q_P_PCI_PMON_CTL_EVENT((PCM::hasUPI(cpu_model) ? 0x21 : 0x12)); // L1 Cycles (L1_POWER_CYCLES)
6555 xPIEvents[ServerUncoreCounterState::EventPosition::xPI_CLOCKTICKS] = (uint32)Q_P_PCI_PMON_CTL_EVENT((PCM::hasUPI(cpu_model) ? 0x01 : 0x14)); // QPI/UPI clocks (CLOCKTICKS)
6556
6557 programXPI(xPIEvents);
6558
6559 uint32 MCCntConfig[4] = {0,0,0,0};
6560 unsigned int UNC_M_POWER_CKE_CYCLES = 0x83;
6561 if (cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE)
6562 {
6563 UNC_M_POWER_CKE_CYCLES = 0x47;
6564 }
6565 switch(mc_profile)
6566 {
6567 case 0: // POWER_CKE_CYCLES.RANK0 and POWER_CKE_CYCLES.RANK1
6568 MCCntConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(1) + MC_CH_PCI_PMON_CTL_INVERT + MC_CH_PCI_PMON_CTL_THRESH(1);
6569 MCCntConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(1) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET;
6570 MCCntConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(2) + MC_CH_PCI_PMON_CTL_INVERT + MC_CH_PCI_PMON_CTL_THRESH(1);
6571 MCCntConfig[3] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(2) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET;
6572 break;
6573 case 1: // POWER_CKE_CYCLES.RANK2 and POWER_CKE_CYCLES.RANK3
6574 MCCntConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(4) + MC_CH_PCI_PMON_CTL_INVERT + MC_CH_PCI_PMON_CTL_THRESH(1);
6575 MCCntConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(4) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET;
6576 MCCntConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(8) + MC_CH_PCI_PMON_CTL_INVERT + MC_CH_PCI_PMON_CTL_THRESH(1);
6577 MCCntConfig[3] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(8) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET;
6578 break;
6579 case 2: // POWER_CKE_CYCLES.RANK4 and POWER_CKE_CYCLES.RANK5
6580 MCCntConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(0x10) + MC_CH_PCI_PMON_CTL_INVERT + MC_CH_PCI_PMON_CTL_THRESH(1);
6581 MCCntConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(0x10) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET;
6582 MCCntConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(0x20) + MC_CH_PCI_PMON_CTL_INVERT + MC_CH_PCI_PMON_CTL_THRESH(1);
6583 MCCntConfig[3] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(0x20) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET;
6584 break;
6585 case 3: // POWER_CKE_CYCLES.RANK6 and POWER_CKE_CYCLES.RANK7
6586 MCCntConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(0x40) + MC_CH_PCI_PMON_CTL_INVERT + MC_CH_PCI_PMON_CTL_THRESH(1);
6587 MCCntConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(0x40) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET;
6588 MCCntConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(0x80) + MC_CH_PCI_PMON_CTL_INVERT + MC_CH_PCI_PMON_CTL_THRESH(1);
6589 MCCntConfig[3] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(0x80) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET;
6590 break;
6591 case 4: // POWER_SELF_REFRESH
6592 MCCntConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(0x43);
6593 MCCntConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(0x43) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET;
6594 MCCntConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(0x85);
6595 break;
6596 }
6597
6598 programIMC(MCCntConfig);
6599 }
6600
programIMC(const uint32 * MCCntConfig)6601 void ServerPCICFGUncore::programIMC(const uint32 * MCCntConfig)
6602 {
6603 const uint32 extraIMC = (cpu_model == PCM::SKX)?UNC_PMON_UNIT_CTL_RSV:UNC_PMON_UNIT_CTL_FRZ_EN;
6604
6605 for (uint32 i = 0; i < (uint32)imcPMUs.size(); ++i)
6606 {
6607 // imc PMU
6608 imcPMUs[i].initFreeze(extraIMC);
6609
6610 // enable fixed counter (DRAM clocks)
6611 *imcPMUs[i].fixedCounterControl = MC_CH_PCI_PMON_FIXED_CTL_EN;
6612
6613 // reset it
6614 *imcPMUs[i].fixedCounterControl = MC_CH_PCI_PMON_FIXED_CTL_EN + MC_CH_PCI_PMON_FIXED_CTL_RST;
6615
6616 PCM::program(imcPMUs[i], MCCntConfig, MCCntConfig + 4, extraIMC);
6617 }
6618 }
6619
programEDC(const uint32 * EDCCntConfig)6620 void ServerPCICFGUncore::programEDC(const uint32 * EDCCntConfig)
6621 {
6622 for (uint32 i = 0; i < (uint32)edcPMUs.size(); ++i)
6623 {
6624 edcPMUs[i].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN);
6625
6626 // MCDRAM clocks enabled by default
6627 *edcPMUs[i].fixedCounterControl = EDC_CH_PCI_PMON_FIXED_CTL_EN;
6628
6629 PCM::program(edcPMUs[i], EDCCntConfig, EDCCntConfig + 4, UNC_PMON_UNIT_CTL_FRZ_EN);
6630 }
6631 }
6632
programM2M()6633 void ServerPCICFGUncore::programM2M()
6634 {
6635 uint64 cfg[4] = {0, 0, 0, 0};
6636 switch (cpu_model)
6637 {
6638 case PCM::ICX:
6639 cfg[EventPosition::NM_HIT] = M2M_PCI_PMON_CTL_EVENT(0x2c) + M2M_PCI_PMON_CTL_UMASK(3); // UNC_M2M_TAG_HIT.NM_DRD_HIT_* events (CLEAN | DIRTY)
6640 cfg[EventPosition::M2M_CLOCKTICKS] = 0; // CLOCKTICKS
6641 cfg[EventPosition::PMM_READ] = M2M_PCI_PMON_CTL_EVENT(0x37) + M2M_PCI_PMON_CTL_UMASK(0x20) + UNC_PMON_CTL_UMASK_EXT(0x07); // UNC_M2M_IMC_READS.TO_PMM
6642 cfg[EventPosition::PMM_WRITE] = M2M_PCI_PMON_CTL_EVENT(0x38) + M2M_PCI_PMON_CTL_UMASK(0x80) + UNC_PMON_CTL_UMASK_EXT(0x1C); // UNC_M2M_IMC_WRITES.TO_PMM
6643 break;
6644 default:
6645 cfg[EventPosition::NM_HIT] = M2M_PCI_PMON_CTL_EVENT(0x2c) + M2M_PCI_PMON_CTL_UMASK(3); // UNC_M2M_TAG_HIT.NM_DRD_HIT_* events (CLEAN | DIRTY)
6646 cfg[EventPosition::M2M_CLOCKTICKS] = 0; // CLOCKTICKS
6647 cfg[EventPosition::PMM_READ] = M2M_PCI_PMON_CTL_EVENT(0x37) + M2M_PCI_PMON_CTL_UMASK(0x8); // UNC_M2M_IMC_READS.TO_PMM
6648 cfg[EventPosition::PMM_WRITE] = M2M_PCI_PMON_CTL_EVENT(0x38) + M2M_PCI_PMON_CTL_UMASK(0x20); // UNC_M2M_IMC_WRITES.TO_PMM
6649 }
6650 programM2M(cfg);
6651 }
6652
programM2M(const uint64 * M2MCntConfig)6653 void ServerPCICFGUncore::programM2M(const uint64* M2MCntConfig)
6654 {
6655 {
6656 for (auto & pmu : m2mPMUs)
6657 {
6658 pmu.initFreeze(UNC_PMON_UNIT_CTL_RSV);
6659 PCM::program(pmu, M2MCntConfig, M2MCntConfig + 4, UNC_PMON_UNIT_CTL_RSV);
6660 }
6661 }
6662 }
6663
programM3UPI(const uint32 * M3UPICntConfig)6664 void ServerPCICFGUncore::programM3UPI(const uint32* M3UPICntConfig)
6665 {
6666 {
6667 for (auto& pmu : m3upiPMUs)
6668 {
6669 pmu.initFreeze(UNC_PMON_UNIT_CTL_RSV);
6670 PCM::program(pmu, M3UPICntConfig, M3UPICntConfig + 4, UNC_PMON_UNIT_CTL_RSV);
6671 }
6672 }
6673 }
6674
programHA(const uint32 * config)6675 void ServerPCICFGUncore::programHA(const uint32 * config)
6676 {
6677 for (auto & pmu : haPMUs)
6678 {
6679 pmu.initFreeze(UNC_PMON_UNIT_CTL_RSV);
6680 PCM::program(pmu, config, config + 4, UNC_PMON_UNIT_CTL_RSV);
6681 }
6682 }
6683
getHARequests()6684 uint64 ServerPCICFGUncore::getHARequests()
6685 {
6686 uint64 result = 0;
6687 for (auto & pmu: haPMUs)
6688 {
6689 result += *pmu.counterValue[PCM::EventPosition::REQUESTS_ALL];
6690 }
6691 return result;
6692 }
6693
getHALocalRequests()6694 uint64 ServerPCICFGUncore::getHALocalRequests()
6695 {
6696 uint64 result = 0;
6697 for (auto & pmu: haPMUs)
6698 {
6699 result += *pmu.counterValue[PCM::EventPosition::REQUESTS_LOCAL];
6700 }
6701 return result;
6702 }
6703
programHA()6704 void ServerPCICFGUncore::programHA()
6705 {
6706 uint32 config[4];
6707 config[0] = 0;
6708 config[1] = 0;
6709 #ifdef PCM_HA_REQUESTS_READS_ONLY
6710 // HA REQUESTS READ: LOCAL + REMOTE
6711 config[PCM::EventPosition::REQUESTS_ALL] = HA_PCI_PMON_CTL_EVENT(0x01) + HA_PCI_PMON_CTL_UMASK((1 + 2));
6712 // HA REQUESTS READ: LOCAL ONLY
6713 config[PCM::EventPosition::REQUESTS_LOCAL] = HA_PCI_PMON_CTL_EVENT(0x01) + HA_PCI_PMON_CTL_UMASK((1));
6714 #else
6715 // HA REQUESTS READ+WRITE+REMOTE+LOCAL
6716 config[PCM::EventPosition::REQUESTS_ALL] = HA_PCI_PMON_CTL_EVENT(0x01) + HA_PCI_PMON_CTL_UMASK((1 + 2 + 4 + 8));
6717 // HA REQUESTS READ+WRITE (LOCAL only)
6718 config[PCM::EventPosition::REQUESTS_LOCAL] = HA_PCI_PMON_CTL_EVENT(0x01) + HA_PCI_PMON_CTL_UMASK((1 + 4));
6719 #endif
6720 programHA(config);
6721 }
6722
freezeCounters()6723 void ServerPCICFGUncore::freezeCounters()
6724 {
6725 writeAllUnitControl(UNC_PMON_UNIT_CTL_FRZ + ((cpu_model == PCM::SKX) ? UNC_PMON_UNIT_CTL_RSV : UNC_PMON_UNIT_CTL_FRZ_EN));
6726 }
6727
writeAllUnitControl(const uint32 value)6728 void ServerPCICFGUncore::writeAllUnitControl(const uint32 value)
6729 {
6730 for (auto& pmuVector : allPMUs)
6731 {
6732 for (auto& pmu : *pmuVector)
6733 {
6734 pmu.writeUnitControl(value);
6735 }
6736 }
6737 }
6738
unfreezeCounters()6739 void ServerPCICFGUncore::unfreezeCounters()
6740 {
6741 writeAllUnitControl((cpu_model == PCM::SKX) ? UNC_PMON_UNIT_CTL_RSV : UNC_PMON_UNIT_CTL_FRZ_EN);
6742 }
6743
getQPIClocks(uint32 port)6744 uint64 ServerPCICFGUncore::getQPIClocks(uint32 port)
6745 {
6746 return getQPILLCounter(port, ServerUncoreCounterState::EventPosition::xPI_CLOCKTICKS);
6747 }
6748
getQPIL0pTxCycles(uint32 port)6749 uint64 ServerPCICFGUncore::getQPIL0pTxCycles(uint32 port)
6750 {
6751 return getQPILLCounter(port, ServerUncoreCounterState::EventPosition::xPI_TxL0P_POWER_CYCLES);
6752 }
6753
getQPIL1Cycles(uint32 port)6754 uint64 ServerPCICFGUncore::getQPIL1Cycles(uint32 port)
6755 {
6756 return getQPILLCounter(port, ServerUncoreCounterState::EventPosition::xPI_L1_POWER_CYCLES);
6757 }
6758
getDRAMClocks(uint32 channel)6759 uint64 ServerPCICFGUncore::getDRAMClocks(uint32 channel)
6760 {
6761 uint64 result = 0;
6762
6763 if (channel < (uint32)imcPMUs.size())
6764 result = *(imcPMUs[channel].fixedCounterValue);
6765
6766 // std::cout << "DEBUG: DRAMClocks on channel " << channel << " = " << result << "\n";
6767 return result;
6768 }
6769
getMCDRAMClocks(uint32 channel)6770 uint64 ServerPCICFGUncore::getMCDRAMClocks(uint32 channel)
6771 {
6772 uint64 result = 0;
6773
6774 if (channel < (uint32)edcPMUs.size())
6775 result = *edcPMUs[channel].fixedCounterValue;
6776
6777 // std::cout << "DEBUG: MCDRAMClocks on EDC" << channel << " = " << result << "\n";
6778 return result;
6779 }
6780
getPMUCounter(std::vector<UncorePMU> & pmu,const uint32 id,const uint32 counter)6781 uint64 ServerPCICFGUncore::getPMUCounter(std::vector<UncorePMU> & pmu, const uint32 id, const uint32 counter)
6782 {
6783 uint64 result = 0;
6784
6785 if (id < (uint32)pmu.size() && counter < 4 && pmu[id].counterValue[counter].get() != nullptr)
6786 {
6787 result = *(pmu[id].counterValue[counter]);
6788 }
6789 else
6790 {
6791 //std::cout << "DEBUG: Invalid ServerPCICFGUncore::getPMUCounter(" << id << ", " << counter << ") \n";
6792 }
6793 // std::cout << "DEBUG: ServerPCICFGUncore::getPMUCounter(" << id << ", " << counter << ") = " << result << "\n";
6794 return result;
6795 }
6796
getMCCounter(uint32 channel,uint32 counter)6797 uint64 ServerPCICFGUncore::getMCCounter(uint32 channel, uint32 counter)
6798 {
6799 return getPMUCounter(imcPMUs, channel, counter);
6800 }
6801
getEDCCounter(uint32 channel,uint32 counter)6802 uint64 ServerPCICFGUncore::getEDCCounter(uint32 channel, uint32 counter)
6803 {
6804 return getPMUCounter(edcPMUs, channel, counter);
6805 }
6806
getM2MCounter(uint32 box,uint32 counter)6807 uint64 ServerPCICFGUncore::getM2MCounter(uint32 box, uint32 counter)
6808 {
6809 return getPMUCounter(m2mPMUs, box, counter);
6810 }
6811
getQPILLCounter(uint32 port,uint32 counter)6812 uint64 ServerPCICFGUncore::getQPILLCounter(uint32 port, uint32 counter)
6813 {
6814 return getPMUCounter(xpiPMUs, port, counter);
6815 }
6816
getM3UPICounter(uint32 port,uint32 counter)6817 uint64 ServerPCICFGUncore::getM3UPICounter(uint32 port, uint32 counter)
6818 {
6819 // std::cout << "DEBUG: ServerPCICFGUncore::getM3UPICounter(" << port << ", " << counter << ") = " << getPMUCounter(m3upiPMUs, port, counter) << "\n";
6820 return getPMUCounter(m3upiPMUs, port, counter);
6821 }
6822
enableJKTWorkaround(bool enable)6823 void ServerPCICFGUncore::enableJKTWorkaround(bool enable)
6824 {
6825 {
6826 PciHandleType reg(groupnr,iMCbus,14,0);
6827 uint32 value = 0;
6828 reg.read32(0x84, &value);
6829 if(enable)
6830 value |= 2;
6831 else
6832 value &= (~2);
6833 reg.write32(0x84, value);
6834 }
6835 {
6836 PciHandleType reg(groupnr,iMCbus,8,0);
6837 uint32 value = 0;
6838 reg.read32(0x80, &value);
6839 if(enable)
6840 value |= 2;
6841 else
6842 value &= (~2);
6843 reg.write32(0x80, value);
6844 }
6845 {
6846 PciHandleType reg(groupnr,iMCbus,9,0);
6847 uint32 value = 0;
6848 reg.read32(0x80, &value);
6849 if(enable)
6850 value |= 2;
6851 else
6852 value &= (~2);
6853 reg.write32(0x80, value);
6854 }
6855 }
6856
6857 #define PCM_MEM_CAPACITY (1024ULL*1024ULL*64ULL) // 64 MByte
6858
initMemTest(ServerPCICFGUncore::MemTestParam & param)6859 void ServerPCICFGUncore::initMemTest(ServerPCICFGUncore::MemTestParam & param)
6860 {
6861 auto & memBufferBlockSize = param.first;
6862 auto & memBuffers = param.second;
6863 #ifdef __linux__
6864 size_t capacity = PCM_MEM_CAPACITY;
6865 char * buffer = (char *)mmap(NULL, capacity, PROT_READ | PROT_WRITE,
6866 MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
6867 if (buffer == MAP_FAILED) {
6868 std::cerr << "ERROR: mmap failed\n";
6869 return;
6870 }
6871 unsigned long long maxNode = (unsigned long long)(readMaxFromSysFS("/sys/devices/system/node/online") + 1);
6872 if (maxNode == 0)
6873 {
6874 std::cerr << "ERROR: max node is 0 \n";
6875 return;
6876 }
6877 if (maxNode >= 63) maxNode = 63;
6878 const unsigned long long nodeMask = (1ULL << maxNode) - 1ULL;
6879 if (0 != syscall(SYS_mbind, buffer, capacity, 3 /* MPOL_INTERLEAVE */,
6880 &nodeMask, maxNode, 0))
6881 {
6882 std::cerr << "ERROR: mbind failed. nodeMask: " << nodeMask << " maxNode: " << maxNode << "\n";
6883 return;
6884 }
6885 memBuffers.push_back((uint64 *)buffer);
6886 memBufferBlockSize = capacity;
6887 #elif defined(_MSC_VER)
6888 ULONG HighestNodeNumber;
6889 if (!GetNumaHighestNodeNumber(&HighestNodeNumber))
6890 {
6891 std::cerr << "ERROR: GetNumaHighestNodeNumber call failed.\n";
6892 return;
6893 }
6894 memBufferBlockSize = 4096;
6895 for (int i = 0; i < PCM_MEM_CAPACITY / memBufferBlockSize; ++i)
6896 {
6897 LPVOID result = VirtualAllocExNuma(
6898 GetCurrentProcess(),
6899 NULL,
6900 memBufferBlockSize,
6901 MEM_RESERVE | MEM_COMMIT,
6902 PAGE_READWRITE,
6903 i % (HighestNodeNumber + 1)
6904 );
6905
6906 if (result == NULL)
6907 {
6908 std::cerr << "ERROR: " << i << " VirtualAllocExNuma failed.\n";
6909 for (auto b : memBuffers)
6910 {
6911 VirtualFree(b, memBufferBlockSize, MEM_RELEASE);
6912 }
6913 memBuffers.clear();
6914 break;
6915 }
6916 else
6917 {
6918 memBuffers.push_back((uint64 *)result);
6919 }
6920 }
6921 #else
6922 std::cerr << "ERROR: memory test is not implemented. QPI/UPI speed and utilization metrics may not be reliable.\n";
6923 #endif
6924 for (auto b : memBuffers)
6925 std::fill(b, b + (memBufferBlockSize / sizeof(uint64)), 0ULL);
6926 }
6927
doMemTest(const ServerPCICFGUncore::MemTestParam & param)6928 void ServerPCICFGUncore::doMemTest(const ServerPCICFGUncore::MemTestParam & param)
6929 {
6930 const auto & memBufferBlockSize = param.first;
6931 const auto & memBuffers = param.second;
6932 // read and write each cache line once
6933 for (auto b : memBuffers)
6934 for (unsigned int i = 0; i < memBufferBlockSize / sizeof(uint64); i += 64 / sizeof(uint64))
6935 {
6936 (b[i])++;
6937 }
6938 }
6939
cleanupMemTest(const ServerPCICFGUncore::MemTestParam & param)6940 void ServerPCICFGUncore::cleanupMemTest(const ServerPCICFGUncore::MemTestParam & param)
6941 {
6942 const auto & memBufferBlockSize = param.first;
6943 const auto & memBuffers = param.second;
6944 for (auto b : memBuffers)
6945 {
6946 #if defined(__linux__)
6947 munmap(b, memBufferBlockSize);
6948 #elif defined(_MSC_VER)
6949 VirtualFree(b, memBufferBlockSize, MEM_RELEASE);
6950 #elif defined(__FreeBSD__)
6951 (void) b; // avoid the unused variable warning
6952 (void) memBufferBlockSize; // avoid the unused variable warning
6953 #else
6954 #endif
6955 }
6956 }
6957
computeQPISpeed(const uint32 core_nr,const int cpumodel)6958 uint64 ServerPCICFGUncore::computeQPISpeed(const uint32 core_nr, const int cpumodel)
6959 {
6960 if(qpi_speed.empty())
6961 {
6962 PCM * pcm = PCM::getInstance();
6963 TemporalThreadAffinity aff(core_nr);
6964 qpi_speed.resize(getNumQPIPorts());
6965
6966 auto getSpeed = [&] (size_t i) {
6967 if (i == 1) return 0ULL; // link 1 should have the same speed as link 0, skip it
6968 uint64 result = 0;
6969 if (PCM::hasUPI(cpumodel) == false && i < XPIRegisterLocation.size())
6970 {
6971 PciHandleType reg(groupnr,UPIbus, XPIRegisterLocation[i].first, QPI_PORT0_MISC_REGISTER_FUNC_ADDR);
6972 uint32 value = 0;
6973 reg.read32(QPI_RATE_STATUS_ADDR, &value);
6974 value &= 7; // extract lower 3 bits
6975 if(value) result = static_cast<uint64>((4000000000ULL + ((uint64)value)*800000000ULL)*2ULL);
6976 }
6977 if(result == 0ULL)
6978 {
6979 if (PCM::hasUPI(cpumodel) == false)
6980 std::cerr << "Warning: QPI_RATE_STATUS register is not available on port " << i << ". Computing QPI speed using a measurement loop.\n";
6981
6982 // compute qpi speed
6983 const uint64 timerGranularity = 1000000ULL; // mks
6984
6985 MemTestParam param;
6986 initMemTest(param);
6987 uint64 startClocks = getQPIClocks((uint32)i);
6988 uint64 startTSC = pcm->getTickCount(timerGranularity, core_nr);
6989 uint64 endTSC;
6990 do
6991 {
6992 doMemTest(param);
6993 endTSC = pcm->getTickCount(timerGranularity, core_nr);
6994 } while (endTSC - startTSC < 200000ULL); // spin for 200 ms
6995
6996 uint64 endClocks = getQPIClocks((uint32)i);
6997 cleanupMemTest(param);
6998
6999 result = (uint64(double(endClocks - startClocks) * PCM::getBytesPerLinkCycle(cpumodel) * double(timerGranularity) / double(endTSC - startTSC)));
7000 if(cpumodel == PCM::HASWELLX || cpumodel == PCM::BDX) /* BDX_DE does not have QPI. */{
7001 result /=2; // HSX runs QPI clocks with doubled speed
7002 }
7003 }
7004 return result;
7005 };
7006 std::vector<std::future<uint64> > getSpeedsAsync;
7007 for (size_t i = 0; i < getNumQPIPorts(); ++i) {
7008 getSpeedsAsync.push_back(std::async(std::launch::async, getSpeed, i));
7009 }
7010 for (size_t i = 0; i < getNumQPIPorts(); ++i) {
7011 qpi_speed[i] = (i==1)? qpi_speed[0] : getSpeedsAsync[i].get(); // link 1 does not have own speed register, it runs with the speed of link 0
7012 }
7013 if (PCM::hasUPI(cpumodel))
7014 {
7015 // check the speed of link 3
7016 if(qpi_speed.size() == 3 && qpi_speed[2] == 0)
7017 {
7018 std::cerr << "UPI link 3 is disabled\n";
7019 qpi_speed.resize(2);
7020 xpiPMUs.resize(2);
7021 }
7022 }
7023 }
7024 if(!qpi_speed.empty())
7025 {
7026 return *std::max_element(qpi_speed.begin(),qpi_speed.end());
7027 }
7028 else
7029 {
7030 return 0;
7031 }
7032 }
7033
reportQPISpeed() const7034 void ServerPCICFGUncore::reportQPISpeed() const
7035 {
7036 PCM * m = PCM::getInstance();
7037 std::cerr.precision(1);
7038 std::cerr << std::fixed;
7039 for (uint32 i = 0; i < (uint32)qpi_speed.size(); ++i)
7040 std::cerr << "Max QPI link " << i << " speed: " << qpi_speed[i] / (1e9) << " GBytes/second (" << qpi_speed[i] / (1e9 * m->getBytesPerLinkTransfer()) << " GT/second)\n";
7041 }
7042
CX_MSR_PMON_CTRY(uint32 Cbo,uint32 Ctr) const7043 uint64 PCM::CX_MSR_PMON_CTRY(uint32 Cbo, uint32 Ctr) const
7044 {
7045 if(JAKETOWN == cpu_model || IVYTOWN == cpu_model)
7046 {
7047 return JKT_C0_MSR_PMON_CTR0 + ((JKTIVT_CBO_MSR_STEP)*Cbo) + Ctr;
7048
7049 } else if(HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model)
7050 {
7051 return HSX_C0_MSR_PMON_CTR0 + ((HSX_CBO_MSR_STEP)*Cbo) + Ctr;
7052 }
7053 else if (ICX == cpu_model || SNOWRIDGE == cpu_model)
7054 {
7055 return CX_MSR_PMON_BOX_CTL(Cbo) + SERVER_CHA_MSR_PMON_CTR0_OFFSET + Ctr;
7056 }
7057 return 0;
7058 }
7059
CX_MSR_PMON_BOX_FILTER(uint32 Cbo) const7060 uint64 PCM::CX_MSR_PMON_BOX_FILTER(uint32 Cbo) const
7061 {
7062 if(JAKETOWN == cpu_model || IVYTOWN == cpu_model)
7063 {
7064 return JKT_C0_MSR_PMON_BOX_FILTER + ((JKTIVT_CBO_MSR_STEP)*Cbo);
7065
7066 } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model)
7067 {
7068 return HSX_C0_MSR_PMON_BOX_FILTER + ((HSX_CBO_MSR_STEP)*Cbo);
7069 } else if (KNL == cpu_model)
7070 {
7071 return KNL_CHA0_MSR_PMON_BOX_CTL + ((KNL_CHA_MSR_STEP)*Cbo);
7072 }
7073 else if (ICX == cpu_model)
7074 {
7075 return CX_MSR_PMON_BOX_CTL(Cbo) + SERVER_CHA_MSR_PMON_BOX_FILTER_OFFSET;
7076 }
7077
7078 return 0;
7079 }
7080
CX_MSR_PMON_BOX_FILTER1(uint32 Cbo) const7081 uint64 PCM::CX_MSR_PMON_BOX_FILTER1(uint32 Cbo) const
7082 {
7083 if(IVYTOWN == cpu_model)
7084 {
7085 return IVT_C0_MSR_PMON_BOX_FILTER1 + ((JKTIVT_CBO_MSR_STEP)*Cbo);
7086
7087 } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model)
7088 {
7089 return HSX_C0_MSR_PMON_BOX_FILTER1 + ((HSX_CBO_MSR_STEP)*Cbo);
7090 }
7091 return 0;
7092 }
7093
CX_MSR_PMON_CTLY(uint32 Cbo,uint32 Ctl) const7094 uint64 PCM::CX_MSR_PMON_CTLY(uint32 Cbo, uint32 Ctl) const
7095 {
7096 if(JAKETOWN == cpu_model || IVYTOWN == cpu_model)
7097 {
7098 return JKT_C0_MSR_PMON_CTL0 + ((JKTIVT_CBO_MSR_STEP)*Cbo) + Ctl;
7099
7100 } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model)
7101 {
7102 return HSX_C0_MSR_PMON_CTL0 + ((HSX_CBO_MSR_STEP)*Cbo) + Ctl;
7103 }
7104 else if (ICX == cpu_model || SNOWRIDGE == cpu_model)
7105 {
7106 return CX_MSR_PMON_BOX_CTL(Cbo) + SERVER_CHA_MSR_PMON_CTL0_OFFSET + Ctl;
7107 }
7108 return 0;
7109 }
7110
CX_MSR_PMON_BOX_CTL(uint32 Cbo) const7111 uint64 PCM::CX_MSR_PMON_BOX_CTL(uint32 Cbo) const
7112 {
7113 if(JAKETOWN == cpu_model || IVYTOWN == cpu_model)
7114 {
7115 return JKT_C0_MSR_PMON_BOX_CTL + ((JKTIVT_CBO_MSR_STEP)*Cbo);
7116
7117 } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model)
7118 {
7119 return HSX_C0_MSR_PMON_BOX_CTL + ((HSX_CBO_MSR_STEP)*Cbo);
7120 } else if (KNL == cpu_model)
7121 {
7122 return KNL_CHA0_MSR_PMON_BOX_CTRL + ((KNL_CHA_MSR_STEP)*Cbo);
7123 }
7124 else if (ICX == cpu_model)
7125 {
7126 return ICX_CHA_MSR_PMON_BOX_CTL[Cbo];
7127 }
7128 else if (SNOWRIDGE == cpu_model)
7129 {
7130 return SNR_CHA_MSR_PMON_BOX_CTL[Cbo];
7131 }
7132 return 0;
7133 }
7134
getMaxNumOfCBoxes() const7135 uint32 PCM::getMaxNumOfCBoxes() const
7136 {
7137 static int num = -1;
7138 if (num >= 0)
7139 {
7140 return (uint32)num;
7141 }
7142 if (KNL == cpu_model || SKX == cpu_model || ICX == cpu_model)
7143 {
7144 /*
7145 * on KNL two physical cores share CHA.
7146 * The number of CHAs in the processor is stored in bits 5:0
7147 * of NCUPMONConfig [0x702] MSR.
7148 */
7149 uint64 val;
7150 uint32 refCore = socketRefCore[0];
7151 uint32 NCUPMONConfig = 0x702;
7152 MSR[refCore]->read(NCUPMONConfig, &val);
7153 num = (uint32)(val & 63);
7154 }
7155 else if (SNOWRIDGE == cpu_model)
7156 {
7157 num = (uint32)num_phys_cores_per_socket / 4;
7158 }
7159 else
7160 {
7161 /*
7162 * on other supported CPUs there is one CBox per physical core. This calculation will get us
7163 * the number of physical cores per socket which is the expected
7164 * value to be returned.
7165 */
7166 num = (uint32)num_phys_cores_per_socket;
7167 }
7168 return num;
7169 }
7170
getMaxNumOfIIOStacks() const7171 uint32 PCM::getMaxNumOfIIOStacks() const
7172 {
7173 if (iioPMUs.size() > 0)
7174 {
7175 return (uint32)iioPMUs[0].size();
7176 }
7177 return 0;
7178 }
7179
programCboOpcodeFilter(const uint32 opc0,UncorePMU & pmu,const uint32 nc_,const uint32 opc1,const uint32 loc,const uint32 rem)7180 void PCM::programCboOpcodeFilter(const uint32 opc0, UncorePMU & pmu, const uint32 nc_, const uint32 opc1, const uint32 loc, const uint32 rem)
7181 {
7182 if(JAKETOWN == cpu_model)
7183 {
7184 *pmu.filter[0] = JKT_CBO_MSR_PMON_BOX_FILTER_OPC(opc0);
7185
7186 } else if(IVYTOWN == cpu_model || HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model)
7187 {
7188 *pmu.filter[1] = IVTHSX_CBO_MSR_PMON_BOX_FILTER1_OPC(opc0);
7189 } else if(SKX == cpu_model)
7190 {
7191 *pmu.filter[1] = SKX_CHA_MSR_PMON_BOX_FILTER1_OPC0(opc0) +
7192 SKX_CHA_MSR_PMON_BOX_FILTER1_OPC1(opc1) +
7193 (rem?SKX_CHA_MSR_PMON_BOX_FILTER1_REM(1):0ULL) +
7194 (loc?SKX_CHA_MSR_PMON_BOX_FILTER1_LOC(1):0ULL) +
7195 SKX_CHA_MSR_PMON_BOX_FILTER1_NM(1) +
7196 SKX_CHA_MSR_PMON_BOX_FILTER1_NOT_NM(1) +
7197 (nc_?SKX_CHA_MSR_PMON_BOX_FILTER1_NC(1):0ULL);
7198 }
7199 else
7200 {
7201 std::cerr << "ERROR: programCboOpcodeFilter function is not implemented for cpu model " << cpu_model << std::endl;
7202 throw std::exception();
7203 }
7204 }
7205
programIIOCounters(uint64 rawEvents[4],int IIOStack)7206 void PCM::programIIOCounters(uint64 rawEvents[4], int IIOStack)
7207 {
7208 std::vector<int32> IIO_units;
7209 if (IIOStack == -1)
7210 {
7211 int stacks_count;
7212 switch (getCPUModel())
7213 {
7214 case PCM::ICX:
7215 stacks_count = ICX_IIO_STACK_COUNT;
7216 break;
7217 case PCM::SNOWRIDGE:
7218 stacks_count = SNR_IIO_STACK_COUNT;
7219 break;
7220 case PCM::SKX:
7221 default:
7222 stacks_count = SKX_IIO_STACK_COUNT;
7223 break;
7224 }
7225 IIO_units.reserve(stacks_count);
7226 for (int stack = 0; stack < stacks_count; ++stack) {
7227 IIO_units.push_back(stack);
7228 }
7229 }
7230 else
7231 IIO_units.push_back(IIOStack);
7232
7233 for (int32 i = 0; (i < num_sockets) && MSR.size() && iioPMUs.size(); ++i)
7234 {
7235 uint32 refCore = socketRefCore[i];
7236 TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
7237
7238 for (const auto & unit: IIO_units)
7239 {
7240 if (iioPMUs[i].count(unit) == 0)
7241 {
7242 std::cerr << "IIO PMU unit (stack) " << unit << " is not found \n";
7243 continue;
7244 }
7245 auto & pmu = iioPMUs[i][unit];
7246 pmu.initFreeze(UNC_PMON_UNIT_CTL_RSV);
7247
7248 program(pmu, &rawEvents[0], &rawEvents[4], UNC_PMON_UNIT_CTL_RSV);
7249 }
7250 }
7251 }
7252
programPCIeEventGroup(eventGroup_t & eventGroup)7253 void PCM::programPCIeEventGroup(eventGroup_t &eventGroup)
7254 {
7255 assert(eventGroup.size() > 0);
7256 uint64 events[4] = {0};
7257 uint64 umask[4] = {0};
7258
7259 switch (cpu_model)
7260 {
7261 case PCM::ICX:
7262 case PCM::SNOWRIDGE:
7263 for (uint32 idx = 0; idx < eventGroup.size(); ++idx)
7264 events[idx] = eventGroup[idx];
7265 programCbo(events);
7266 break;
7267 case PCM::SKX:
7268 //JKT through СLX generations allow programming only one required event at a time.
7269 if (eventGroup[0] & SKX_CHA_MSR_PMON_BOX_FILTER1_NC(1))
7270 umask[0] |= (uint64)(SKX_CHA_TOR_INSERTS_UMASK_IRQ(1));
7271 else
7272 umask[0] |= (uint64)(SKX_CHA_TOR_INSERTS_UMASK_PRQ(1));
7273
7274 if (eventGroup[0] & SKX_CHA_MSR_PMON_BOX_FILTER1_RSV(1))
7275 umask[0] |= (uint64)(SKX_CHA_TOR_INSERTS_UMASK_HIT(1));
7276 else
7277 umask[0] |= (uint64)(SKX_CHA_TOR_INSERTS_UMASK_MISS(1));
7278
7279 events[0] += CBO_MSR_PMON_CTL_EVENT(0x35) + CBO_MSR_PMON_CTL_UMASK(umask[0]);
7280 programCbo(events, SKX_CHA_MSR_PMON_BOX_GET_OPC0(eventGroup[0]),
7281 SKX_CHA_MSR_PMON_BOX_GET_NC(eventGroup[0]));
7282 break;
7283 case PCM::BDX_DE:
7284 case PCM::BDX:
7285 case PCM::KNL:
7286 case PCM::HASWELLX:
7287 case PCM::IVYTOWN:
7288 case PCM::JAKETOWN:
7289 events[0] = CBO_MSR_PMON_CTL_EVENT(0x35);
7290 events[0] += BDX_CBO_MSR_PMON_BOX_GET_FLT(eventGroup[0]) ? CBO_MSR_PMON_CTL_UMASK(0x3) : CBO_MSR_PMON_CTL_UMASK(1);
7291 events[0] += BDX_CBO_MSR_PMON_BOX_GET_TID(eventGroup[0]) ? CBO_MSR_PMON_CTL_TID_EN : 0ULL;
7292
7293 programCbo(events, BDX_CBO_MSR_PMON_BOX_GET_OPC0(eventGroup[0]),
7294 0, BDX_CBO_MSR_PMON_BOX_GET_TID(eventGroup[0]) ? 0x3e : 0ULL);
7295 break;
7296 }
7297 }
7298
programCbo(const uint64 * events,const uint32 opCode,const uint32 nc_,const uint32 llc_lookup_tid_filter,const uint32 loc,const uint32 rem)7299 void PCM::programCbo(const uint64 * events, const uint32 opCode, const uint32 nc_, const uint32 llc_lookup_tid_filter, const uint32 loc, const uint32 rem)
7300 {
7301 for (size_t i = 0; (i < cboPMUs.size()) && MSR.size(); ++i)
7302 {
7303 uint32 refCore = socketRefCore[i];
7304 TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
7305
7306 for(uint32 cbo = 0; cbo < getMaxNumOfCBoxes(); ++cbo)
7307 {
7308 cboPMUs[i][cbo].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN);
7309
7310 if (ICX != cpu_model && SNOWRIDGE != cpu_model)
7311 programCboOpcodeFilter(opCode, cboPMUs[i][cbo], nc_, 0, loc, rem);
7312
7313 if((HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model) && llc_lookup_tid_filter != 0)
7314 *cboPMUs[i][cbo].filter[0] = llc_lookup_tid_filter;
7315
7316 PCM::program(cboPMUs[i][cbo], events, events + ServerUncoreCounterState::maxCounters, UNC_PMON_UNIT_CTL_FRZ_EN);
7317
7318 for (int c = 0; c < ServerUncoreCounterState::maxCounters; ++c)
7319 {
7320 *cboPMUs[i][cbo].counterValue[c] = 0;
7321 }
7322 }
7323 }
7324 }
7325
programCboRaw(const uint64 * events,const uint64 filter0,const uint64 filter1)7326 void PCM::programCboRaw(const uint64* events, const uint64 filter0, const uint64 filter1)
7327 {
7328 for (size_t i = 0; (i < cboPMUs.size()) && MSR.size(); ++i)
7329 {
7330 uint32 refCore = socketRefCore[i];
7331 TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
7332
7333 for (uint32 cbo = 0; cbo < getMaxNumOfCBoxes(); ++cbo)
7334 {
7335 cboPMUs[i][cbo].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN);
7336
7337 if (cboPMUs[i][cbo].filter[0].get())
7338 {
7339 *cboPMUs[i][cbo].filter[0] = filter0;
7340 }
7341
7342 if (cboPMUs[i][cbo].filter[1].get())
7343 {
7344 *cboPMUs[i][cbo].filter[1] = filter1;
7345 }
7346
7347 PCM::program(cboPMUs[i][cbo], events, events + 4, UNC_PMON_UNIT_CTL_FRZ_EN);
7348
7349 for (int c = 0; c < 4; ++c)
7350 {
7351 *cboPMUs[i][cbo].counterValue[c] = 0;
7352 }
7353 }
7354 }
7355 }
7356
programUBOX(const uint64 * events)7357 void PCM::programUBOX(const uint64* events)
7358 {
7359 for (size_t s = 0; (s < uboxPMUs.size()) && MSR.size(); ++s)
7360 {
7361 uint32 refCore = socketRefCore[s];
7362 TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
7363
7364 *uboxPMUs[s].fixedCounterControl = UCLK_FIXED_CTL_EN;
7365
7366 PCM::program(uboxPMUs[s], events, events + 2, 0);
7367 }
7368 }
7369
getCBOCounterState(const uint32 socket_,const uint32 ctr_)7370 uint64 PCM::getCBOCounterState(const uint32 socket_, const uint32 ctr_)
7371 {
7372 uint64 result = 0;
7373
7374 const uint32 refCore = socketRefCore[socket_];
7375 TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
7376
7377 for(auto & pmu: cboPMUs[socket_])
7378 {
7379 result += *pmu.counterValue[ctr_];
7380 }
7381 return result;
7382 }
7383
getUncoreClocks(const uint32 socket_)7384 uint64 PCM::getUncoreClocks(const uint32 socket_)
7385 {
7386 uint64 result = 0;
7387 if (socket_ < uboxPMUs.size())
7388 {
7389 result = *uboxPMUs[socket_].fixedCounterValue;
7390 }
7391 return result;
7392 }
7393
getPCIeCounterState(const uint32 socket_,const uint32 ctr_)7394 PCIeCounterState PCM::getPCIeCounterState(const uint32 socket_, const uint32 ctr_)
7395 {
7396 PCIeCounterState result;
7397 result.data = getCBOCounterState(socket_, ctr_);
7398 return result;
7399 }
7400
getPCIeCounterData(const uint32 socket_,const uint32 ctr_)7401 uint64 PCM::getPCIeCounterData(const uint32 socket_, const uint32 ctr_)
7402 {
7403 return getCBOCounterState(socket_, ctr_);
7404 }
7405
initLLCReadMissLatencyEvents(uint64 * events,uint32 & opCode)7406 void PCM::initLLCReadMissLatencyEvents(uint64 * events, uint32 & opCode)
7407 {
7408 if (LLCReadMissLatencyMetricsAvailable() == false)
7409 {
7410 return;
7411 }
7412 uint64 umask = 3ULL; // MISS_OPCODE
7413 switch (cpu_model)
7414 {
7415 case ICX:
7416 case SNOWRIDGE:
7417 umask = 1ULL;
7418 break;
7419 case SKX:
7420 umask = (uint64)(SKX_CHA_TOR_INSERTS_UMASK_IRQ(1)) + (uint64)(SKX_CHA_TOR_INSERTS_UMASK_MISS(1));
7421 break;
7422 }
7423
7424 uint64 umask_ext = 0;
7425 switch (cpu_model)
7426 {
7427 case ICX:
7428 umask_ext = 0xC817FE;
7429 break;
7430 case SNOWRIDGE:
7431 umask_ext = 0xC827FE;
7432 break;
7433 }
7434
7435 const uint64 all_umasks = CBO_MSR_PMON_CTL_UMASK(umask) + UNC_PMON_CTL_UMASK_EXT(umask_ext);
7436 events[EventPosition::TOR_OCCUPANCY] = CBO_MSR_PMON_CTL_EVENT(0x36) + all_umasks; // TOR_OCCUPANCY (must be on counter 0)
7437 events[EventPosition::TOR_INSERTS] = CBO_MSR_PMON_CTL_EVENT(0x35) + all_umasks; // TOR_INSERTS
7438
7439 opCode = (SKX == cpu_model) ? 0x202 : 0x182;
7440 }
7441
programCbo()7442 void PCM::programCbo()
7443 {
7444 uint64 events[ServerUncoreCounterState::maxCounters];
7445 std::fill(events, events + ServerUncoreCounterState::maxCounters, 0);
7446 uint32 opCode = 0;
7447
7448 initLLCReadMissLatencyEvents(events, opCode);
7449 initCHARequestEvents(events);
7450
7451 programCbo(events, opCode);
7452
7453 programUBOX(nullptr);
7454 }
7455
initCHARequestEvents(uint64 * config)7456 void PCM::initCHARequestEvents(uint64 * config)
7457 {
7458 if (localMemoryRequestRatioMetricAvailable() && hasCHA())
7459 {
7460 #ifdef PCM_HA_REQUESTS_READS_ONLY
7461 // HA REQUESTS READ: LOCAL + REMOTE
7462 config[EventPosition::REQUESTS_ALL] = CBO_MSR_PMON_CTL_EVENT(0x50) + CBO_MSR_PMON_CTL_UMASK((1 + 2));
7463 // HA REQUESTS READ: LOCAL ONLY
7464 config[EventPosition::REQUESTS_LOCAL] = CBO_MSR_PMON_CTL_EVENT(0x50) + CBO_MSR_PMON_CTL_UMASK((1));
7465 #else
7466 // HA REQUESTS READ+WRITE+REMOTE+LOCAL
7467 config[EventPosition::REQUESTS_ALL] = CBO_MSR_PMON_CTL_EVENT(0x50) + CBO_MSR_PMON_CTL_UMASK((1 + 2 + 4 + 8));
7468 // HA REQUESTS READ+WRITE (LOCAL only)
7469 config[EventPosition::REQUESTS_LOCAL] = CBO_MSR_PMON_CTL_EVENT(0x50) + CBO_MSR_PMON_CTL_UMASK((1 + 4));
7470 #endif
7471 }
7472 }
7473
CounterWidthExtender(AbstractRawCounter * raw_counter_,uint64 counter_width_,uint32 watchdog_delay_ms_)7474 CounterWidthExtender::CounterWidthExtender(AbstractRawCounter * raw_counter_, uint64 counter_width_, uint32 watchdog_delay_ms_) : raw_counter(raw_counter_), counter_width(counter_width_), watchdog_delay_ms(watchdog_delay_ms_)
7475 {
7476 last_raw_value = (*raw_counter)();
7477 extended_value = last_raw_value;
7478 //std::cout << "Initial Value " << extended_value << "\n";
7479 UpdateThread = new std::thread(
7480 [&]() {
7481 while (1)
7482 {
7483 MySleepMs(static_cast<int>(this->watchdog_delay_ms));
7484 /* uint64 dummy = */ this->read();
7485 }
7486 }
7487 );
7488 }
~CounterWidthExtender()7489 CounterWidthExtender::~CounterWidthExtender()
7490 {
7491 delete UpdateThread;
7492 if (raw_counter) delete raw_counter;
7493 }
7494
cleanup()7495 void UncorePMU::cleanup()
7496 {
7497 for (int i = 0; i < 4; ++i)
7498 {
7499 if (counterControl[i].get()) *counterControl[i] = 0;
7500 }
7501 if (unitControl.get()) *unitControl = 0;
7502 if (fixedCounterControl.get()) *fixedCounterControl = 0;
7503 }
7504
freeze(const uint32 extra)7505 void UncorePMU::freeze(const uint32 extra)
7506 {
7507 *unitControl = extra + UNC_PMON_UNIT_CTL_FRZ;
7508 }
7509
unfreeze(const uint32 extra)7510 void UncorePMU::unfreeze(const uint32 extra)
7511 {
7512 *unitControl = extra;
7513 }
7514
initFreeze(const uint32 extra,const char * xPICheckMsg)7515 bool UncorePMU::initFreeze(const uint32 extra, const char* xPICheckMsg)
7516 {
7517 // freeze enable
7518 *unitControl = extra;
7519 if (xPICheckMsg)
7520 {
7521 if ((extra & UNC_PMON_UNIT_CTL_VALID_BITS_MASK) != ((*unitControl) & UNC_PMON_UNIT_CTL_VALID_BITS_MASK))
7522 {
7523 unitControl = nullptr;
7524 return false;
7525 }
7526 }
7527 // freeze
7528 *unitControl = extra + UNC_PMON_UNIT_CTL_FRZ;
7529
7530 #ifdef PCM_UNCORE_PMON_BOX_CHECK_STATUS
7531 const uint64 val = *unitControl;
7532 if ((val & UNC_PMON_UNIT_CTL_VALID_BITS_MASK) != (extra + UNC_PMON_UNIT_CTL_FRZ))
7533 {
7534 std::cerr << "ERROR: PMU counter programming seems not to work. PMON_BOX_CTL=0x" << std::hex << val << " needs to be =0x" << (UNC_PMON_UNIT_CTL_FRZ_EN + UNC_PMON_UNIT_CTL_FRZ) << "\n";
7535 if (xPICheckMsg)
7536 {
7537 std::cerr << xPICheckMsg;
7538 }
7539 }
7540 #endif
7541 return true;
7542 }
7543
resetUnfreeze(const uint32 extra)7544 void UncorePMU::resetUnfreeze(const uint32 extra)
7545 {
7546 // reset counter values
7547 *unitControl = extra + UNC_PMON_UNIT_CTL_FRZ + UNC_PMON_UNIT_CTL_RST_COUNTERS;
7548
7549 // unfreeze counters
7550 *unitControl = extra;
7551 }
7552
getIIOCounterState(int socket,int IIOStack,int counter)7553 IIOCounterState PCM::getIIOCounterState(int socket, int IIOStack, int counter)
7554 {
7555 IIOCounterState result;
7556 result.data = 0;
7557 if (socket < (int)iioPMUs.size() && iioPMUs[socket].count(IIOStack) > 0)
7558 {
7559 result.data = *iioPMUs[socket][IIOStack].counterValue[counter];
7560 }
7561 return result;
7562 }
7563
getIIOCounterStates(int socket,int IIOStack,IIOCounterState * result)7564 void PCM::getIIOCounterStates(int socket, int IIOStack, IIOCounterState * result)
7565 {
7566 uint32 refCore = socketRefCore[socket];
7567 TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
7568
7569 for (int c = 0; c < 4; ++c) {
7570 result[c] = getIIOCounterState(socket, IIOStack, c);
7571 }
7572 }
7573
setupCustomCoreEventsForNuma(PCM::ExtendedCustomCoreEventDescription & conf) const7574 void PCM::setupCustomCoreEventsForNuma(PCM::ExtendedCustomCoreEventDescription& conf) const
7575 {
7576 switch (this->getCPUModel())
7577 {
7578 case PCM::WESTMERE_EX:
7579 // OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_DRAM: Offcore requests satisfied by the local DRAM
7580 conf.OffcoreResponseMsrValue[0] = 0x40FF;
7581 // OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_DRAM: Offcore requests satisfied by a remote DRAM
7582 conf.OffcoreResponseMsrValue[1] = 0x20FF;
7583 break;
7584 case PCM::JAKETOWN:
7585 case PCM::IVYTOWN:
7586 // OFFCORE_RESPONSE.*.LOCAL_DRAM
7587 conf.OffcoreResponseMsrValue[0] = 0x780400000 | 0x08FFF;
7588 // OFFCORE_RESPONSE.*.REMOTE_DRAM
7589 conf.OffcoreResponseMsrValue[1] = 0x7ff800000 | 0x08FFF;
7590 break;
7591 case PCM::HASWELLX:
7592 // OFFCORE_RESPONSE.*.LOCAL_DRAM
7593 conf.OffcoreResponseMsrValue[0] = 0x600400000 | 0x08FFF;
7594 // OFFCORE_RESPONSE.*.REMOTE_DRAM
7595 conf.OffcoreResponseMsrValue[1] = 0x63f800000 | 0x08FFF;
7596 break;
7597 case PCM::BDX:
7598 // OFFCORE_RESPONSE.ALL_REQUESTS.L3_MISS.LOCAL_DRAM
7599 conf.OffcoreResponseMsrValue[0] = 0x0604008FFF;
7600 // OFFCORE_RESPONSE.ALL_REQUESTS.L3_MISS.REMOTE_DRAM
7601 conf.OffcoreResponseMsrValue[1] = 0x067BC08FFF;
7602 break;
7603 case PCM::SKX:
7604 // OFFCORE_RESPONSE.ALL_REQUESTS.L3_MISS_LOCAL_DRAM.ANY_SNOOP
7605 conf.OffcoreResponseMsrValue[0] = 0x3FC0008FFF | (1 << 26);
7606 // OFFCORE_RESPONSE.ALL_REQUESTS.L3_MISS_REMOTE_(HOP0,HOP1,HOP2P)_DRAM.ANY_SNOOP
7607 conf.OffcoreResponseMsrValue[1] = 0x3FC0008FFF | (1 << 27) | (1 << 28) | (1 << 29);
7608 break;
7609 case PCM::ICX:
7610 std::cout << "INFO: Monitored accesses include demand + L2 cache prefetcher, code read and RFO.\n";
7611 // OCR.READS_TO_CORE.LOCAL_DRAM
7612 conf.OffcoreResponseMsrValue[0] = 0x0104000477;
7613 // OCR.READS_TO_CORE.REMOTE_DRAM
7614 conf.OffcoreResponseMsrValue[1] = 0x0730000477;
7615 break;
7616 default:
7617 throw UnsupportedProcessorException();
7618 }
7619 }
7620
7621 } // namespace pcm
7622